aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/kernel-parameters.txt5
-rw-r--r--include/linux/init_task.h4
-rw-r--r--include/linux/key.h4
-rw-r--r--include/linux/rcupdate.h50
-rw-r--r--include/trace/events/rcu.h45
-rw-r--r--kernel/rcupdate.c44
-rw-r--r--kernel/rcutiny.c4
-rw-r--r--kernel/rcutiny_plugin.h49
-rw-r--r--kernel/rcutree.c471
-rw-r--r--kernel/rcutree.h46
-rw-r--r--kernel/rcutree_plugin.h209
-rw-r--r--kernel/rcutree_trace.c125
-rw-r--r--kernel/time/tick-sched.c2
13 files changed, 559 insertions, 499 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index a92c5ebf373e..12783fa833c3 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2367,6 +2367,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
2367 Set maximum number of finished RCU callbacks to process 2367 Set maximum number of finished RCU callbacks to process
2368 in one batch. 2368 in one batch.
2369 2369
2370 rcutree.fanout_leaf= [KNL,BOOT]
2371 Increase the number of CPUs assigned to each
2372 leaf rcu_node structure. Useful for very large
2373 systems.
2374
2370 rcutree.qhimark= [KNL,BOOT] 2375 rcutree.qhimark= [KNL,BOOT]
2371 Set threshold of queued 2376 Set threshold of queued
2372 RCU callbacks over which batch limiting is disabled. 2377 RCU callbacks over which batch limiting is disabled.
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 9e65eff6af3b..8a7476186990 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -168,8 +168,8 @@ extern struct cred init_cred;
168 .children = LIST_HEAD_INIT(tsk.children), \ 168 .children = LIST_HEAD_INIT(tsk.children), \
169 .sibling = LIST_HEAD_INIT(tsk.sibling), \ 169 .sibling = LIST_HEAD_INIT(tsk.sibling), \
170 .group_leader = &tsk, \ 170 .group_leader = &tsk, \
171 RCU_INIT_POINTER(.real_cred, &init_cred), \ 171 RCU_POINTER_INITIALIZER(real_cred, &init_cred), \
172 RCU_INIT_POINTER(.cred, &init_cred), \ 172 RCU_POINTER_INITIALIZER(cred, &init_cred), \
173 .comm = INIT_TASK_COMM, \ 173 .comm = INIT_TASK_COMM, \
174 .thread = INIT_THREAD, \ 174 .thread = INIT_THREAD, \
175 .fs = &init_fs, \ 175 .fs = &init_fs, \
diff --git a/include/linux/key.h b/include/linux/key.h
index 4cd22ed627ef..cef3b315ba7c 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -303,7 +303,9 @@ static inline bool key_is_instantiated(const struct key *key)
303 rwsem_is_locked(&((struct key *)(KEY))->sem))) 303 rwsem_is_locked(&((struct key *)(KEY))->sem)))
304 304
305#define rcu_assign_keypointer(KEY, PAYLOAD) \ 305#define rcu_assign_keypointer(KEY, PAYLOAD) \
306 (rcu_assign_pointer((KEY)->payload.rcudata, PAYLOAD)) 306do { \
307 rcu_assign_pointer((KEY)->payload.rcudata, (PAYLOAD)); \
308} while (0)
307 309
308#ifdef CONFIG_SYSCTL 310#ifdef CONFIG_SYSCTL
309extern ctl_table key_sysctls[]; 311extern ctl_table key_sysctls[];
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 9cac722b169c..c2c0d86dd3ac 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -147,6 +147,7 @@ extern void synchronize_sched(void);
147 147
148extern void __rcu_read_lock(void); 148extern void __rcu_read_lock(void);
149extern void __rcu_read_unlock(void); 149extern void __rcu_read_unlock(void);
150extern void rcu_read_unlock_special(struct task_struct *t);
150void synchronize_rcu(void); 151void synchronize_rcu(void);
151 152
152/* 153/*
@@ -255,6 +256,10 @@ static inline void destroy_rcu_head_on_stack(struct rcu_head *head)
255} 256}
256#endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ 257#endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
257 258
259#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SMP)
260extern int rcu_is_cpu_idle(void);
261#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SMP) */
262
258#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) 263#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU)
259bool rcu_lockdep_current_cpu_online(void); 264bool rcu_lockdep_current_cpu_online(void);
260#else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */ 265#else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */
@@ -266,15 +271,6 @@ static inline bool rcu_lockdep_current_cpu_online(void)
266 271
267#ifdef CONFIG_DEBUG_LOCK_ALLOC 272#ifdef CONFIG_DEBUG_LOCK_ALLOC
268 273
269#ifdef CONFIG_PROVE_RCU
270extern int rcu_is_cpu_idle(void);
271#else /* !CONFIG_PROVE_RCU */
272static inline int rcu_is_cpu_idle(void)
273{
274 return 0;
275}
276#endif /* else !CONFIG_PROVE_RCU */
277
278static inline void rcu_lock_acquire(struct lockdep_map *map) 274static inline void rcu_lock_acquire(struct lockdep_map *map)
279{ 275{
280 lock_acquire(map, 0, 0, 2, 1, NULL, _THIS_IP_); 276 lock_acquire(map, 0, 0, 2, 1, NULL, _THIS_IP_);
@@ -513,10 +509,10 @@ static inline void rcu_preempt_sleep_check(void)
513 (_________p1); \ 509 (_________p1); \
514 }) 510 })
515#define __rcu_assign_pointer(p, v, space) \ 511#define __rcu_assign_pointer(p, v, space) \
516 ({ \ 512 do { \
517 smp_wmb(); \ 513 smp_wmb(); \
518 (p) = (typeof(*v) __force space *)(v); \ 514 (p) = (typeof(*v) __force space *)(v); \
519 }) 515 } while (0)
520 516
521 517
522/** 518/**
@@ -851,7 +847,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
851 * 847 *
852 * Assigns the specified value to the specified RCU-protected 848 * Assigns the specified value to the specified RCU-protected
853 * pointer, ensuring that any concurrent RCU readers will see 849 * pointer, ensuring that any concurrent RCU readers will see
854 * any prior initialization. Returns the value assigned. 850 * any prior initialization.
855 * 851 *
856 * Inserts memory barriers on architectures that require them 852 * Inserts memory barriers on architectures that require them
857 * (which is most of them), and also prevents the compiler from 853 * (which is most of them), and also prevents the compiler from
@@ -903,25 +899,17 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
903 * the reader-accessible portions of the linked structure. 899 * the reader-accessible portions of the linked structure.
904 */ 900 */
905#define RCU_INIT_POINTER(p, v) \ 901#define RCU_INIT_POINTER(p, v) \
906 p = (typeof(*v) __force __rcu *)(v) 902 do { \
907 903 p = (typeof(*v) __force __rcu *)(v); \
908static __always_inline bool __is_kfree_rcu_offset(unsigned long offset) 904 } while (0)
909{
910 return offset < 4096;
911}
912
913static __always_inline
914void __kfree_rcu(struct rcu_head *head, unsigned long offset)
915{
916 typedef void (*rcu_callback)(struct rcu_head *);
917
918 BUILD_BUG_ON(!__builtin_constant_p(offset));
919
920 /* See the kfree_rcu() header comment. */
921 BUILD_BUG_ON(!__is_kfree_rcu_offset(offset));
922 905
923 kfree_call_rcu(head, (rcu_callback)offset); 906/**
924} 907 * RCU_POINTER_INITIALIZER() - statically initialize an RCU protected pointer
908 *
909 * GCC-style initialization for an RCU-protected pointer in a structure field.
910 */
911#define RCU_POINTER_INITIALIZER(p, v) \
912 .p = (typeof(*v) __force __rcu *)(v)
925 913
926/* 914/*
927 * Does the specified offset indicate that the corresponding rcu_head 915 * Does the specified offset indicate that the corresponding rcu_head
@@ -935,7 +923,7 @@ void __kfree_rcu(struct rcu_head *head, unsigned long offset)
935#define __kfree_rcu(head, offset) \ 923#define __kfree_rcu(head, offset) \
936 do { \ 924 do { \
937 BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \ 925 BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \
938 call_rcu(head, (void (*)(struct rcu_head *))(unsigned long)(offset)); \ 926 kfree_call_rcu(head, (void (*)(struct rcu_head *))(unsigned long)(offset)); \
939 } while (0) 927 } while (0)
940 928
941/** 929/**
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index d274734b2aa4..5bde94d8585b 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -541,6 +541,50 @@ TRACE_EVENT(rcu_torture_read,
541 __entry->rcutorturename, __entry->rhp) 541 __entry->rcutorturename, __entry->rhp)
542); 542);
543 543
544/*
545 * Tracepoint for _rcu_barrier() execution. The string "s" describes
546 * the _rcu_barrier phase:
547 * "Begin": rcu_barrier_callback() started.
548 * "Check": rcu_barrier_callback() checking for piggybacking.
549 * "EarlyExit": rcu_barrier_callback() piggybacked, thus early exit.
550 * "Inc1": rcu_barrier_callback() piggyback check counter incremented.
551 * "Offline": rcu_barrier_callback() found offline CPU
552 * "OnlineQ": rcu_barrier_callback() found online CPU with callbacks.
553 * "OnlineNQ": rcu_barrier_callback() found online CPU, no callbacks.
554 * "IRQ": An rcu_barrier_callback() callback posted on remote CPU.
555 * "CB": An rcu_barrier_callback() invoked a callback, not the last.
556 * "LastCB": An rcu_barrier_callback() invoked the last callback.
557 * "Inc2": rcu_barrier_callback() piggyback check counter incremented.
558 * The "cpu" argument is the CPU or -1 if meaningless, the "cnt" argument
559 * is the count of remaining callbacks, and "done" is the piggybacking count.
560 */
561TRACE_EVENT(rcu_barrier,
562
563 TP_PROTO(char *rcuname, char *s, int cpu, int cnt, unsigned long done),
564
565 TP_ARGS(rcuname, s, cpu, cnt, done),
566
567 TP_STRUCT__entry(
568 __field(char *, rcuname)
569 __field(char *, s)
570 __field(int, cpu)
571 __field(int, cnt)
572 __field(unsigned long, done)
573 ),
574
575 TP_fast_assign(
576 __entry->rcuname = rcuname;
577 __entry->s = s;
578 __entry->cpu = cpu;
579 __entry->cnt = cnt;
580 __entry->done = done;
581 ),
582
583 TP_printk("%s %s cpu %d remaining %d # %lu",
584 __entry->rcuname, __entry->s, __entry->cpu, __entry->cnt,
585 __entry->done)
586);
587
544#else /* #ifdef CONFIG_RCU_TRACE */ 588#else /* #ifdef CONFIG_RCU_TRACE */
545 589
546#define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0) 590#define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0)
@@ -564,6 +608,7 @@ TRACE_EVENT(rcu_torture_read,
564#define trace_rcu_batch_end(rcuname, callbacks_invoked, cb, nr, iit, risk) \ 608#define trace_rcu_batch_end(rcuname, callbacks_invoked, cb, nr, iit, risk) \
565 do { } while (0) 609 do { } while (0)
566#define trace_rcu_torture_read(rcutorturename, rhp) do { } while (0) 610#define trace_rcu_torture_read(rcutorturename, rhp) do { } while (0)
611#define trace_rcu_barrier(name, s, cpu, cnt, done) do { } while (0)
567 612
568#endif /* #else #ifdef CONFIG_RCU_TRACE */ 613#endif /* #else #ifdef CONFIG_RCU_TRACE */
569 614
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 95cba41ce1e9..4e6a61b15e86 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -54,6 +54,50 @@
54#ifdef CONFIG_PREEMPT_RCU 54#ifdef CONFIG_PREEMPT_RCU
55 55
56/* 56/*
57 * Preemptible RCU implementation for rcu_read_lock().
58 * Just increment ->rcu_read_lock_nesting, shared state will be updated
59 * if we block.
60 */
61void __rcu_read_lock(void)
62{
63 current->rcu_read_lock_nesting++;
64 barrier(); /* critical section after entry code. */
65}
66EXPORT_SYMBOL_GPL(__rcu_read_lock);
67
68/*
69 * Preemptible RCU implementation for rcu_read_unlock().
70 * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
71 * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
72 * invoke rcu_read_unlock_special() to clean up after a context switch
73 * in an RCU read-side critical section and other special cases.
74 */
75void __rcu_read_unlock(void)
76{
77 struct task_struct *t = current;
78
79 if (t->rcu_read_lock_nesting != 1) {
80 --t->rcu_read_lock_nesting;
81 } else {
82 barrier(); /* critical section before exit code. */
83 t->rcu_read_lock_nesting = INT_MIN;
84 barrier(); /* assign before ->rcu_read_unlock_special load */
85 if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
86 rcu_read_unlock_special(t);
87 barrier(); /* ->rcu_read_unlock_special load before assign */
88 t->rcu_read_lock_nesting = 0;
89 }
90#ifdef CONFIG_PROVE_LOCKING
91 {
92 int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting);
93
94 WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
95 }
96#endif /* #ifdef CONFIG_PROVE_LOCKING */
97}
98EXPORT_SYMBOL_GPL(__rcu_read_unlock);
99
100/*
57 * Check for a task exiting while in a preemptible-RCU read-side 101 * Check for a task exiting while in a preemptible-RCU read-side
58 * critical section, clean up if so. No need to issue warnings, 102 * critical section, clean up if so. No need to issue warnings,
59 * as debug_check_no_locks_held() already does this if lockdep 103 * as debug_check_no_locks_held() already does this if lockdep
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index 37a5444204d2..547b1fe5b052 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -172,7 +172,7 @@ void rcu_irq_enter(void)
172 local_irq_restore(flags); 172 local_irq_restore(flags);
173} 173}
174 174
175#ifdef CONFIG_PROVE_RCU 175#ifdef CONFIG_DEBUG_LOCK_ALLOC
176 176
177/* 177/*
178 * Test whether RCU thinks that the current CPU is idle. 178 * Test whether RCU thinks that the current CPU is idle.
@@ -183,7 +183,7 @@ int rcu_is_cpu_idle(void)
183} 183}
184EXPORT_SYMBOL(rcu_is_cpu_idle); 184EXPORT_SYMBOL(rcu_is_cpu_idle);
185 185
186#endif /* #ifdef CONFIG_PROVE_RCU */ 186#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
187 187
188/* 188/*
189 * Test whether the current CPU was interrupted from idle. Nested 189 * Test whether the current CPU was interrupted from idle. Nested
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index fc31a2d65100..116725b5edfb 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -132,7 +132,6 @@ static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = {
132 RCU_TRACE(.rcb.name = "rcu_preempt") 132 RCU_TRACE(.rcb.name = "rcu_preempt")
133}; 133};
134 134
135static void rcu_read_unlock_special(struct task_struct *t);
136static int rcu_preempted_readers_exp(void); 135static int rcu_preempted_readers_exp(void);
137static void rcu_report_exp_done(void); 136static void rcu_report_exp_done(void);
138 137
@@ -527,23 +526,11 @@ void rcu_preempt_note_context_switch(void)
527} 526}
528 527
529/* 528/*
530 * Tiny-preemptible RCU implementation for rcu_read_lock().
531 * Just increment ->rcu_read_lock_nesting, shared state will be updated
532 * if we block.
533 */
534void __rcu_read_lock(void)
535{
536 current->rcu_read_lock_nesting++;
537 barrier(); /* needed if we ever invoke rcu_read_lock in rcutiny.c */
538}
539EXPORT_SYMBOL_GPL(__rcu_read_lock);
540
541/*
542 * Handle special cases during rcu_read_unlock(), such as needing to 529 * Handle special cases during rcu_read_unlock(), such as needing to
543 * notify RCU core processing or task having blocked during the RCU 530 * notify RCU core processing or task having blocked during the RCU
544 * read-side critical section. 531 * read-side critical section.
545 */ 532 */
546static noinline void rcu_read_unlock_special(struct task_struct *t) 533void rcu_read_unlock_special(struct task_struct *t)
547{ 534{
548 int empty; 535 int empty;
549 int empty_exp; 536 int empty_exp;
@@ -627,38 +614,6 @@ static noinline void rcu_read_unlock_special(struct task_struct *t)
627} 614}
628 615
629/* 616/*
630 * Tiny-preemptible RCU implementation for rcu_read_unlock().
631 * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
632 * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
633 * invoke rcu_read_unlock_special() to clean up after a context switch
634 * in an RCU read-side critical section and other special cases.
635 */
636void __rcu_read_unlock(void)
637{
638 struct task_struct *t = current;
639
640 barrier(); /* needed if we ever invoke rcu_read_unlock in rcutiny.c */
641 if (t->rcu_read_lock_nesting != 1)
642 --t->rcu_read_lock_nesting;
643 else {
644 t->rcu_read_lock_nesting = INT_MIN;
645 barrier(); /* assign before ->rcu_read_unlock_special load */
646 if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
647 rcu_read_unlock_special(t);
648 barrier(); /* ->rcu_read_unlock_special load before assign */
649 t->rcu_read_lock_nesting = 0;
650 }
651#ifdef CONFIG_PROVE_LOCKING
652 {
653 int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting);
654
655 WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
656 }
657#endif /* #ifdef CONFIG_PROVE_LOCKING */
658}
659EXPORT_SYMBOL_GPL(__rcu_read_unlock);
660
661/*
662 * Check for a quiescent state from the current CPU. When a task blocks, 617 * Check for a quiescent state from the current CPU. When a task blocks,
663 * the task is recorded in the rcu_preempt_ctrlblk structure, which is 618 * the task is recorded in the rcu_preempt_ctrlblk structure, which is
664 * checked elsewhere. This is called from the scheduling-clock interrupt. 619 * checked elsewhere. This is called from the scheduling-clock interrupt.
@@ -846,8 +801,6 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
846 */ 801 */
847int rcu_preempt_needs_cpu(void) 802int rcu_preempt_needs_cpu(void)
848{ 803{
849 if (!rcu_preempt_running_reader())
850 rcu_preempt_cpu_qs();
851 return rcu_preempt_ctrlblk.rcb.rcucblist != NULL; 804 return rcu_preempt_ctrlblk.rcb.rcucblist != NULL;
852} 805}
853 806
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 4b97bba7396e..117218a43724 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -60,36 +60,44 @@
60 60
61/* Data structures. */ 61/* Data structures. */
62 62
63static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; 63static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
64 64
65#define RCU_STATE_INITIALIZER(structname) { \ 65#define RCU_STATE_INITIALIZER(sname, cr) { \
66 .level = { &structname##_state.node[0] }, \ 66 .level = { &sname##_state.node[0] }, \
67 .levelcnt = { \ 67 .call = cr, \
68 NUM_RCU_LVL_0, /* root of hierarchy. */ \
69 NUM_RCU_LVL_1, \
70 NUM_RCU_LVL_2, \
71 NUM_RCU_LVL_3, \
72 NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \
73 }, \
74 .fqs_state = RCU_GP_IDLE, \ 68 .fqs_state = RCU_GP_IDLE, \
75 .gpnum = -300, \ 69 .gpnum = -300, \
76 .completed = -300, \ 70 .completed = -300, \
77 .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \ 71 .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.onofflock), \
78 .orphan_nxttail = &structname##_state.orphan_nxtlist, \ 72 .orphan_nxttail = &sname##_state.orphan_nxtlist, \
79 .orphan_donetail = &structname##_state.orphan_donelist, \ 73 .orphan_donetail = &sname##_state.orphan_donelist, \
80 .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.fqslock), \ 74 .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
81 .n_force_qs = 0, \ 75 .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.fqslock), \
82 .n_force_qs_ngp = 0, \ 76 .name = #sname, \
83 .name = #structname, \
84} 77}
85 78
86struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched); 79struct rcu_state rcu_sched_state =
80 RCU_STATE_INITIALIZER(rcu_sched, call_rcu_sched);
87DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); 81DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
88 82
89struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh); 83struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, call_rcu_bh);
90DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); 84DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
91 85
92static struct rcu_state *rcu_state; 86static struct rcu_state *rcu_state;
87LIST_HEAD(rcu_struct_flavors);
88
89/* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */
90static int rcu_fanout_leaf = CONFIG_RCU_FANOUT_LEAF;
91module_param(rcu_fanout_leaf, int, 0);
92int rcu_num_lvls __read_mostly = RCU_NUM_LVLS;
93static int num_rcu_lvl[] = { /* Number of rcu_nodes at specified level. */
94 NUM_RCU_LVL_0,
95 NUM_RCU_LVL_1,
96 NUM_RCU_LVL_2,
97 NUM_RCU_LVL_3,
98 NUM_RCU_LVL_4,
99};
100int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */
93 101
94/* 102/*
95 * The rcu_scheduler_active variable transitions from zero to one just 103 * The rcu_scheduler_active variable transitions from zero to one just
@@ -147,13 +155,6 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp);
147unsigned long rcutorture_testseq; 155unsigned long rcutorture_testseq;
148unsigned long rcutorture_vernum; 156unsigned long rcutorture_vernum;
149 157
150/* State information for rcu_barrier() and friends. */
151
152static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
153static atomic_t rcu_barrier_cpu_count;
154static DEFINE_MUTEX(rcu_barrier_mutex);
155static struct completion rcu_barrier_completion;
156
157/* 158/*
158 * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s 159 * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s
159 * permit this function to be invoked without holding the root rcu_node 160 * permit this function to be invoked without holding the root rcu_node
@@ -358,7 +359,7 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval)
358 struct task_struct *idle = idle_task(smp_processor_id()); 359 struct task_struct *idle = idle_task(smp_processor_id());
359 360
360 trace_rcu_dyntick("Error on entry: not idle task", oldval, 0); 361 trace_rcu_dyntick("Error on entry: not idle task", oldval, 0);
361 ftrace_dump(DUMP_ALL); 362 ftrace_dump(DUMP_ORIG);
362 WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", 363 WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
363 current->pid, current->comm, 364 current->pid, current->comm,
364 idle->pid, idle->comm); /* must be idle task! */ 365 idle->pid, idle->comm); /* must be idle task! */
@@ -468,7 +469,7 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval)
468 469
469 trace_rcu_dyntick("Error on exit: not idle task", 470 trace_rcu_dyntick("Error on exit: not idle task",
470 oldval, rdtp->dynticks_nesting); 471 oldval, rdtp->dynticks_nesting);
471 ftrace_dump(DUMP_ALL); 472 ftrace_dump(DUMP_ORIG);
472 WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", 473 WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
473 current->pid, current->comm, 474 current->pid, current->comm,
474 idle->pid, idle->comm); /* must be idle task! */ 475 idle->pid, idle->comm); /* must be idle task! */
@@ -585,8 +586,6 @@ void rcu_nmi_exit(void)
585 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); 586 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
586} 587}
587 588
588#ifdef CONFIG_PROVE_RCU
589
590/** 589/**
591 * rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle 590 * rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle
592 * 591 *
@@ -604,7 +603,7 @@ int rcu_is_cpu_idle(void)
604} 603}
605EXPORT_SYMBOL(rcu_is_cpu_idle); 604EXPORT_SYMBOL(rcu_is_cpu_idle);
606 605
607#ifdef CONFIG_HOTPLUG_CPU 606#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU)
608 607
609/* 608/*
610 * Is the current CPU online? Disable preemption to avoid false positives 609 * Is the current CPU online? Disable preemption to avoid false positives
@@ -645,9 +644,7 @@ bool rcu_lockdep_current_cpu_online(void)
645} 644}
646EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online); 645EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
647 646
648#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 647#endif /* #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) */
649
650#endif /* #ifdef CONFIG_PROVE_RCU */
651 648
652/** 649/**
653 * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle 650 * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle
@@ -733,7 +730,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
733 int cpu; 730 int cpu;
734 long delta; 731 long delta;
735 unsigned long flags; 732 unsigned long flags;
736 int ndetected; 733 int ndetected = 0;
737 struct rcu_node *rnp = rcu_get_root(rsp); 734 struct rcu_node *rnp = rcu_get_root(rsp);
738 735
739 /* Only let one CPU complain about others per time interval. */ 736 /* Only let one CPU complain about others per time interval. */
@@ -774,7 +771,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
774 */ 771 */
775 rnp = rcu_get_root(rsp); 772 rnp = rcu_get_root(rsp);
776 raw_spin_lock_irqsave(&rnp->lock, flags); 773 raw_spin_lock_irqsave(&rnp->lock, flags);
777 ndetected = rcu_print_task_stall(rnp); 774 ndetected += rcu_print_task_stall(rnp);
778 raw_spin_unlock_irqrestore(&rnp->lock, flags); 775 raw_spin_unlock_irqrestore(&rnp->lock, flags);
779 776
780 print_cpu_stall_info_end(); 777 print_cpu_stall_info_end();
@@ -860,9 +857,10 @@ static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
860 */ 857 */
861void rcu_cpu_stall_reset(void) 858void rcu_cpu_stall_reset(void)
862{ 859{
863 rcu_sched_state.jiffies_stall = jiffies + ULONG_MAX / 2; 860 struct rcu_state *rsp;
864 rcu_bh_state.jiffies_stall = jiffies + ULONG_MAX / 2; 861
865 rcu_preempt_stall_reset(); 862 for_each_rcu_flavor(rsp)
863 rsp->jiffies_stall = jiffies + ULONG_MAX / 2;
866} 864}
867 865
868static struct notifier_block rcu_panic_block = { 866static struct notifier_block rcu_panic_block = {
@@ -937,6 +935,18 @@ check_for_new_grace_period(struct rcu_state *rsp, struct rcu_data *rdp)
937} 935}
938 936
939/* 937/*
938 * Initialize the specified rcu_data structure's callback list to empty.
939 */
940static void init_callback_list(struct rcu_data *rdp)
941{
942 int i;
943
944 rdp->nxtlist = NULL;
945 for (i = 0; i < RCU_NEXT_SIZE; i++)
946 rdp->nxttail[i] = &rdp->nxtlist;
947}
948
949/*
940 * Advance this CPU's callbacks, but only if the current grace period 950 * Advance this CPU's callbacks, but only if the current grace period
941 * has ended. This may be called only from the CPU to whom the rdp 951 * has ended. This may be called only from the CPU to whom the rdp
942 * belongs. In addition, the corresponding leaf rcu_node structure's 952 * belongs. In addition, the corresponding leaf rcu_node structure's
@@ -1328,8 +1338,6 @@ static void
1328rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, 1338rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
1329 struct rcu_node *rnp, struct rcu_data *rdp) 1339 struct rcu_node *rnp, struct rcu_data *rdp)
1330{ 1340{
1331 int i;
1332
1333 /* 1341 /*
1334 * Orphan the callbacks. First adjust the counts. This is safe 1342 * Orphan the callbacks. First adjust the counts. This is safe
1335 * because ->onofflock excludes _rcu_barrier()'s adoption of 1343 * because ->onofflock excludes _rcu_barrier()'s adoption of
@@ -1340,7 +1348,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
1340 rsp->qlen += rdp->qlen; 1348 rsp->qlen += rdp->qlen;
1341 rdp->n_cbs_orphaned += rdp->qlen; 1349 rdp->n_cbs_orphaned += rdp->qlen;
1342 rdp->qlen_lazy = 0; 1350 rdp->qlen_lazy = 0;
1343 rdp->qlen = 0; 1351 ACCESS_ONCE(rdp->qlen) = 0;
1344 } 1352 }
1345 1353
1346 /* 1354 /*
@@ -1369,9 +1377,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
1369 } 1377 }
1370 1378
1371 /* Finally, initialize the rcu_data structure's list to empty. */ 1379 /* Finally, initialize the rcu_data structure's list to empty. */
1372 rdp->nxtlist = NULL; 1380 init_callback_list(rdp);
1373 for (i = 0; i < RCU_NEXT_SIZE; i++)
1374 rdp->nxttail[i] = &rdp->nxtlist;
1375} 1381}
1376 1382
1377/* 1383/*
@@ -1505,6 +1511,9 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
1505 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1511 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1506 if (need_report & RCU_OFL_TASKS_EXP_GP) 1512 if (need_report & RCU_OFL_TASKS_EXP_GP)
1507 rcu_report_exp_rnp(rsp, rnp, true); 1513 rcu_report_exp_rnp(rsp, rnp, true);
1514 WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL,
1515 "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n",
1516 cpu, rdp->qlen, rdp->nxtlist);
1508} 1517}
1509 1518
1510#else /* #ifdef CONFIG_HOTPLUG_CPU */ 1519#else /* #ifdef CONFIG_HOTPLUG_CPU */
@@ -1592,7 +1601,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1592 } 1601 }
1593 smp_mb(); /* List handling before counting for rcu_barrier(). */ 1602 smp_mb(); /* List handling before counting for rcu_barrier(). */
1594 rdp->qlen_lazy -= count_lazy; 1603 rdp->qlen_lazy -= count_lazy;
1595 rdp->qlen -= count; 1604 ACCESS_ONCE(rdp->qlen) -= count;
1596 rdp->n_cbs_invoked += count; 1605 rdp->n_cbs_invoked += count;
1597 1606
1598 /* Reinstate batch limit if we have worked down the excess. */ 1607 /* Reinstate batch limit if we have worked down the excess. */
@@ -1605,6 +1614,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1605 rdp->n_force_qs_snap = rsp->n_force_qs; 1614 rdp->n_force_qs_snap = rsp->n_force_qs;
1606 } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark) 1615 } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark)
1607 rdp->qlen_last_fqs_check = rdp->qlen; 1616 rdp->qlen_last_fqs_check = rdp->qlen;
1617 WARN_ON_ONCE((rdp->nxtlist == NULL) != (rdp->qlen == 0));
1608 1618
1609 local_irq_restore(flags); 1619 local_irq_restore(flags);
1610 1620
@@ -1745,8 +1755,6 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
1745 break; /* grace period idle or initializing, ignore. */ 1755 break; /* grace period idle or initializing, ignore. */
1746 1756
1747 case RCU_SAVE_DYNTICK: 1757 case RCU_SAVE_DYNTICK:
1748 if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK)
1749 break; /* So gcc recognizes the dead code. */
1750 1758
1751 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ 1759 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
1752 1760
@@ -1788,9 +1796,10 @@ unlock_fqs_ret:
1788 * whom the rdp belongs. 1796 * whom the rdp belongs.
1789 */ 1797 */
1790static void 1798static void
1791__rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) 1799__rcu_process_callbacks(struct rcu_state *rsp)
1792{ 1800{
1793 unsigned long flags; 1801 unsigned long flags;
1802 struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
1794 1803
1795 WARN_ON_ONCE(rdp->beenonline == 0); 1804 WARN_ON_ONCE(rdp->beenonline == 0);
1796 1805
@@ -1826,11 +1835,11 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
1826 */ 1835 */
1827static void rcu_process_callbacks(struct softirq_action *unused) 1836static void rcu_process_callbacks(struct softirq_action *unused)
1828{ 1837{
1838 struct rcu_state *rsp;
1839
1829 trace_rcu_utilization("Start RCU core"); 1840 trace_rcu_utilization("Start RCU core");
1830 __rcu_process_callbacks(&rcu_sched_state, 1841 for_each_rcu_flavor(rsp)
1831 &__get_cpu_var(rcu_sched_data)); 1842 __rcu_process_callbacks(rsp);
1832 __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
1833 rcu_preempt_process_callbacks();
1834 trace_rcu_utilization("End RCU core"); 1843 trace_rcu_utilization("End RCU core");
1835} 1844}
1836 1845
@@ -1857,6 +1866,56 @@ static void invoke_rcu_core(void)
1857 raise_softirq(RCU_SOFTIRQ); 1866 raise_softirq(RCU_SOFTIRQ);
1858} 1867}
1859 1868
1869/*
1870 * Handle any core-RCU processing required by a call_rcu() invocation.
1871 */
1872static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
1873 struct rcu_head *head, unsigned long flags)
1874{
1875 /*
1876 * If called from an extended quiescent state, invoke the RCU
1877 * core in order to force a re-evaluation of RCU's idleness.
1878 */
1879 if (rcu_is_cpu_idle() && cpu_online(smp_processor_id()))
1880 invoke_rcu_core();
1881
1882 /* If interrupts were disabled or CPU offline, don't invoke RCU core. */
1883 if (irqs_disabled_flags(flags) || cpu_is_offline(smp_processor_id()))
1884 return;
1885
1886 /*
1887 * Force the grace period if too many callbacks or too long waiting.
1888 * Enforce hysteresis, and don't invoke force_quiescent_state()
1889 * if some other CPU has recently done so. Also, don't bother
1890 * invoking force_quiescent_state() if the newly enqueued callback
1891 * is the only one waiting for a grace period to complete.
1892 */
1893 if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) {
1894
1895 /* Are we ignoring a completed grace period? */
1896 rcu_process_gp_end(rsp, rdp);
1897 check_for_new_grace_period(rsp, rdp);
1898
1899 /* Start a new grace period if one not already started. */
1900 if (!rcu_gp_in_progress(rsp)) {
1901 unsigned long nestflag;
1902 struct rcu_node *rnp_root = rcu_get_root(rsp);
1903
1904 raw_spin_lock_irqsave(&rnp_root->lock, nestflag);
1905 rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */
1906 } else {
1907 /* Give the grace period a kick. */
1908 rdp->blimit = LONG_MAX;
1909 if (rsp->n_force_qs == rdp->n_force_qs_snap &&
1910 *rdp->nxttail[RCU_DONE_TAIL] != head)
1911 force_quiescent_state(rsp, 0);
1912 rdp->n_force_qs_snap = rsp->n_force_qs;
1913 rdp->qlen_last_fqs_check = rdp->qlen;
1914 }
1915 } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies))
1916 force_quiescent_state(rsp, 1);
1917}
1918
1860static void 1919static void
1861__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), 1920__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1862 struct rcu_state *rsp, bool lazy) 1921 struct rcu_state *rsp, bool lazy)
@@ -1881,7 +1940,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1881 rdp = this_cpu_ptr(rsp->rda); 1940 rdp = this_cpu_ptr(rsp->rda);
1882 1941
1883 /* Add the callback to our list. */ 1942 /* Add the callback to our list. */
1884 rdp->qlen++; 1943 ACCESS_ONCE(rdp->qlen)++;
1885 if (lazy) 1944 if (lazy)
1886 rdp->qlen_lazy++; 1945 rdp->qlen_lazy++;
1887 else 1946 else
@@ -1896,43 +1955,8 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1896 else 1955 else
1897 trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen); 1956 trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen);
1898 1957
1899 /* If interrupts were disabled, don't dive into RCU core. */ 1958 /* Go handle any RCU core processing required. */
1900 if (irqs_disabled_flags(flags)) { 1959 __call_rcu_core(rsp, rdp, head, flags);
1901 local_irq_restore(flags);
1902 return;
1903 }
1904
1905 /*
1906 * Force the grace period if too many callbacks or too long waiting.
1907 * Enforce hysteresis, and don't invoke force_quiescent_state()
1908 * if some other CPU has recently done so. Also, don't bother
1909 * invoking force_quiescent_state() if the newly enqueued callback
1910 * is the only one waiting for a grace period to complete.
1911 */
1912 if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) {
1913
1914 /* Are we ignoring a completed grace period? */
1915 rcu_process_gp_end(rsp, rdp);
1916 check_for_new_grace_period(rsp, rdp);
1917
1918 /* Start a new grace period if one not already started. */
1919 if (!rcu_gp_in_progress(rsp)) {
1920 unsigned long nestflag;
1921 struct rcu_node *rnp_root = rcu_get_root(rsp);
1922
1923 raw_spin_lock_irqsave(&rnp_root->lock, nestflag);
1924 rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */
1925 } else {
1926 /* Give the grace period a kick. */
1927 rdp->blimit = LONG_MAX;
1928 if (rsp->n_force_qs == rdp->n_force_qs_snap &&
1929 *rdp->nxttail[RCU_DONE_TAIL] != head)
1930 force_quiescent_state(rsp, 0);
1931 rdp->n_force_qs_snap = rsp->n_force_qs;
1932 rdp->qlen_last_fqs_check = rdp->qlen;
1933 }
1934 } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies))
1935 force_quiescent_state(rsp, 1);
1936 local_irq_restore(flags); 1960 local_irq_restore(flags);
1937} 1961}
1938 1962
@@ -1962,28 +1986,16 @@ EXPORT_SYMBOL_GPL(call_rcu_bh);
1962 * occasionally incorrectly indicate that there are multiple CPUs online 1986 * occasionally incorrectly indicate that there are multiple CPUs online
1963 * when there was in fact only one the whole time, as this just adds 1987 * when there was in fact only one the whole time, as this just adds
1964 * some overhead: RCU still operates correctly. 1988 * some overhead: RCU still operates correctly.
1965 *
1966 * Of course, sampling num_online_cpus() with preemption enabled can
1967 * give erroneous results if there are concurrent CPU-hotplug operations.
1968 * For example, given a demonic sequence of preemptions in num_online_cpus()
1969 * and CPU-hotplug operations, there could be two or more CPUs online at
1970 * all times, but num_online_cpus() might well return one (or even zero).
1971 *
1972 * However, all such demonic sequences require at least one CPU-offline
1973 * operation. Furthermore, rcu_blocking_is_gp() giving the wrong answer
1974 * is only a problem if there is an RCU read-side critical section executing
1975 * throughout. But RCU-sched and RCU-bh read-side critical sections
1976 * disable either preemption or bh, which prevents a CPU from going offline.
1977 * Therefore, the only way that rcu_blocking_is_gp() can incorrectly return
1978 * that there is only one CPU when in fact there was more than one throughout
1979 * is when there were no RCU readers in the system. If there are no
1980 * RCU readers, the grace period by definition can be of zero length,
1981 * regardless of the number of online CPUs.
1982 */ 1989 */
1983static inline int rcu_blocking_is_gp(void) 1990static inline int rcu_blocking_is_gp(void)
1984{ 1991{
1992 int ret;
1993
1985 might_sleep(); /* Check for RCU read-side critical section. */ 1994 might_sleep(); /* Check for RCU read-side critical section. */
1986 return num_online_cpus() <= 1; 1995 preempt_disable();
1996 ret = num_online_cpus() <= 1;
1997 preempt_enable();
1998 return ret;
1987} 1999}
1988 2000
1989/** 2001/**
@@ -2241,9 +2253,12 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
2241 */ 2253 */
2242static int rcu_pending(int cpu) 2254static int rcu_pending(int cpu)
2243{ 2255{
2244 return __rcu_pending(&rcu_sched_state, &per_cpu(rcu_sched_data, cpu)) || 2256 struct rcu_state *rsp;
2245 __rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu)) || 2257
2246 rcu_preempt_pending(cpu); 2258 for_each_rcu_flavor(rsp)
2259 if (__rcu_pending(rsp, per_cpu_ptr(rsp->rda, cpu)))
2260 return 1;
2261 return 0;
2247} 2262}
2248 2263
2249/* 2264/*
@@ -2253,20 +2268,41 @@ static int rcu_pending(int cpu)
2253 */ 2268 */
2254static int rcu_cpu_has_callbacks(int cpu) 2269static int rcu_cpu_has_callbacks(int cpu)
2255{ 2270{
2271 struct rcu_state *rsp;
2272
2256 /* RCU callbacks either ready or pending? */ 2273 /* RCU callbacks either ready or pending? */
2257 return per_cpu(rcu_sched_data, cpu).nxtlist || 2274 for_each_rcu_flavor(rsp)
2258 per_cpu(rcu_bh_data, cpu).nxtlist || 2275 if (per_cpu_ptr(rsp->rda, cpu)->nxtlist)
2259 rcu_preempt_cpu_has_callbacks(cpu); 2276 return 1;
2277 return 0;
2278}
2279
2280/*
2281 * Helper function for _rcu_barrier() tracing. If tracing is disabled,
2282 * the compiler is expected to optimize this away.
2283 */
2284static void _rcu_barrier_trace(struct rcu_state *rsp, char *s,
2285 int cpu, unsigned long done)
2286{
2287 trace_rcu_barrier(rsp->name, s, cpu,
2288 atomic_read(&rsp->barrier_cpu_count), done);
2260} 2289}
2261 2290
2262/* 2291/*
2263 * RCU callback function for _rcu_barrier(). If we are last, wake 2292 * RCU callback function for _rcu_barrier(). If we are last, wake
2264 * up the task executing _rcu_barrier(). 2293 * up the task executing _rcu_barrier().
2265 */ 2294 */
2266static void rcu_barrier_callback(struct rcu_head *notused) 2295static void rcu_barrier_callback(struct rcu_head *rhp)
2267{ 2296{
2268 if (atomic_dec_and_test(&rcu_barrier_cpu_count)) 2297 struct rcu_data *rdp = container_of(rhp, struct rcu_data, barrier_head);
2269 complete(&rcu_barrier_completion); 2298 struct rcu_state *rsp = rdp->rsp;
2299
2300 if (atomic_dec_and_test(&rsp->barrier_cpu_count)) {
2301 _rcu_barrier_trace(rsp, "LastCB", -1, rsp->n_barrier_done);
2302 complete(&rsp->barrier_completion);
2303 } else {
2304 _rcu_barrier_trace(rsp, "CB", -1, rsp->n_barrier_done);
2305 }
2270} 2306}
2271 2307
2272/* 2308/*
@@ -2274,35 +2310,63 @@ static void rcu_barrier_callback(struct rcu_head *notused)
2274 */ 2310 */
2275static void rcu_barrier_func(void *type) 2311static void rcu_barrier_func(void *type)
2276{ 2312{
2277 int cpu = smp_processor_id(); 2313 struct rcu_state *rsp = type;
2278 struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu); 2314 struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
2279 void (*call_rcu_func)(struct rcu_head *head,
2280 void (*func)(struct rcu_head *head));
2281 2315
2282 atomic_inc(&rcu_barrier_cpu_count); 2316 _rcu_barrier_trace(rsp, "IRQ", -1, rsp->n_barrier_done);
2283 call_rcu_func = type; 2317 atomic_inc(&rsp->barrier_cpu_count);
2284 call_rcu_func(head, rcu_barrier_callback); 2318 rsp->call(&rdp->barrier_head, rcu_barrier_callback);
2285} 2319}
2286 2320
2287/* 2321/*
2288 * Orchestrate the specified type of RCU barrier, waiting for all 2322 * Orchestrate the specified type of RCU barrier, waiting for all
2289 * RCU callbacks of the specified type to complete. 2323 * RCU callbacks of the specified type to complete.
2290 */ 2324 */
2291static void _rcu_barrier(struct rcu_state *rsp, 2325static void _rcu_barrier(struct rcu_state *rsp)
2292 void (*call_rcu_func)(struct rcu_head *head,
2293 void (*func)(struct rcu_head *head)))
2294{ 2326{
2295 int cpu; 2327 int cpu;
2296 unsigned long flags; 2328 unsigned long flags;
2297 struct rcu_data *rdp; 2329 struct rcu_data *rdp;
2298 struct rcu_head rh; 2330 struct rcu_data rd;
2331 unsigned long snap = ACCESS_ONCE(rsp->n_barrier_done);
2332 unsigned long snap_done;
2299 2333
2300 init_rcu_head_on_stack(&rh); 2334 init_rcu_head_on_stack(&rd.barrier_head);
2335 _rcu_barrier_trace(rsp, "Begin", -1, snap);
2301 2336
2302 /* Take mutex to serialize concurrent rcu_barrier() requests. */ 2337 /* Take mutex to serialize concurrent rcu_barrier() requests. */
2303 mutex_lock(&rcu_barrier_mutex); 2338 mutex_lock(&rsp->barrier_mutex);
2339
2340 /*
2341 * Ensure that all prior references, including to ->n_barrier_done,
2342 * are ordered before the _rcu_barrier() machinery.
2343 */
2344 smp_mb(); /* See above block comment. */
2345
2346 /*
2347 * Recheck ->n_barrier_done to see if others did our work for us.
2348 * This means checking ->n_barrier_done for an even-to-odd-to-even
2349 * transition. The "if" expression below therefore rounds the old
2350 * value up to the next even number and adds two before comparing.
2351 */
2352 snap_done = ACCESS_ONCE(rsp->n_barrier_done);
2353 _rcu_barrier_trace(rsp, "Check", -1, snap_done);
2354 if (ULONG_CMP_GE(snap_done, ((snap + 1) & ~0x1) + 2)) {
2355 _rcu_barrier_trace(rsp, "EarlyExit", -1, snap_done);
2356 smp_mb(); /* caller's subsequent code after above check. */
2357 mutex_unlock(&rsp->barrier_mutex);
2358 return;
2359 }
2304 2360
2305 smp_mb(); /* Prevent any prior operations from leaking in. */ 2361 /*
2362 * Increment ->n_barrier_done to avoid duplicate work. Use
2363 * ACCESS_ONCE() to prevent the compiler from speculating
2364 * the increment to precede the early-exit check.
2365 */
2366 ACCESS_ONCE(rsp->n_barrier_done)++;
2367 WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 1);
2368 _rcu_barrier_trace(rsp, "Inc1", -1, rsp->n_barrier_done);
2369 smp_mb(); /* Order ->n_barrier_done increment with below mechanism. */
2306 2370
2307 /* 2371 /*
2308 * Initialize the count to one rather than to zero in order to 2372 * Initialize the count to one rather than to zero in order to
@@ -2321,8 +2385,8 @@ static void _rcu_barrier(struct rcu_state *rsp,
2321 * 6. Both rcu_barrier_callback() callbacks are invoked, awakening 2385 * 6. Both rcu_barrier_callback() callbacks are invoked, awakening
2322 * us -- but before CPU 1's orphaned callbacks are invoked!!! 2386 * us -- but before CPU 1's orphaned callbacks are invoked!!!
2323 */ 2387 */
2324 init_completion(&rcu_barrier_completion); 2388 init_completion(&rsp->barrier_completion);
2325 atomic_set(&rcu_barrier_cpu_count, 1); 2389 atomic_set(&rsp->barrier_cpu_count, 1);
2326 raw_spin_lock_irqsave(&rsp->onofflock, flags); 2390 raw_spin_lock_irqsave(&rsp->onofflock, flags);
2327 rsp->rcu_barrier_in_progress = current; 2391 rsp->rcu_barrier_in_progress = current;
2328 raw_spin_unlock_irqrestore(&rsp->onofflock, flags); 2392 raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
@@ -2338,14 +2402,19 @@ static void _rcu_barrier(struct rcu_state *rsp,
2338 preempt_disable(); 2402 preempt_disable();
2339 rdp = per_cpu_ptr(rsp->rda, cpu); 2403 rdp = per_cpu_ptr(rsp->rda, cpu);
2340 if (cpu_is_offline(cpu)) { 2404 if (cpu_is_offline(cpu)) {
2405 _rcu_barrier_trace(rsp, "Offline", cpu,
2406 rsp->n_barrier_done);
2341 preempt_enable(); 2407 preempt_enable();
2342 while (cpu_is_offline(cpu) && ACCESS_ONCE(rdp->qlen)) 2408 while (cpu_is_offline(cpu) && ACCESS_ONCE(rdp->qlen))
2343 schedule_timeout_interruptible(1); 2409 schedule_timeout_interruptible(1);
2344 } else if (ACCESS_ONCE(rdp->qlen)) { 2410 } else if (ACCESS_ONCE(rdp->qlen)) {
2345 smp_call_function_single(cpu, rcu_barrier_func, 2411 _rcu_barrier_trace(rsp, "OnlineQ", cpu,
2346 (void *)call_rcu_func, 1); 2412 rsp->n_barrier_done);
2413 smp_call_function_single(cpu, rcu_barrier_func, rsp, 1);
2347 preempt_enable(); 2414 preempt_enable();
2348 } else { 2415 } else {
2416 _rcu_barrier_trace(rsp, "OnlineNQ", cpu,
2417 rsp->n_barrier_done);
2349 preempt_enable(); 2418 preempt_enable();
2350 } 2419 }
2351 } 2420 }
@@ -2362,24 +2431,32 @@ static void _rcu_barrier(struct rcu_state *rsp,
2362 rcu_adopt_orphan_cbs(rsp); 2431 rcu_adopt_orphan_cbs(rsp);
2363 rsp->rcu_barrier_in_progress = NULL; 2432 rsp->rcu_barrier_in_progress = NULL;
2364 raw_spin_unlock_irqrestore(&rsp->onofflock, flags); 2433 raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
2365 atomic_inc(&rcu_barrier_cpu_count); 2434 atomic_inc(&rsp->barrier_cpu_count);
2366 smp_mb__after_atomic_inc(); /* Ensure atomic_inc() before callback. */ 2435 smp_mb__after_atomic_inc(); /* Ensure atomic_inc() before callback. */
2367 call_rcu_func(&rh, rcu_barrier_callback); 2436 rd.rsp = rsp;
2437 rsp->call(&rd.barrier_head, rcu_barrier_callback);
2368 2438
2369 /* 2439 /*
2370 * Now that we have an rcu_barrier_callback() callback on each 2440 * Now that we have an rcu_barrier_callback() callback on each
2371 * CPU, and thus each counted, remove the initial count. 2441 * CPU, and thus each counted, remove the initial count.
2372 */ 2442 */
2373 if (atomic_dec_and_test(&rcu_barrier_cpu_count)) 2443 if (atomic_dec_and_test(&rsp->barrier_cpu_count))
2374 complete(&rcu_barrier_completion); 2444 complete(&rsp->barrier_completion);
2445
2446 /* Increment ->n_barrier_done to prevent duplicate work. */
2447 smp_mb(); /* Keep increment after above mechanism. */
2448 ACCESS_ONCE(rsp->n_barrier_done)++;
2449 WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 0);
2450 _rcu_barrier_trace(rsp, "Inc2", -1, rsp->n_barrier_done);
2451 smp_mb(); /* Keep increment before caller's subsequent code. */
2375 2452
2376 /* Wait for all rcu_barrier_callback() callbacks to be invoked. */ 2453 /* Wait for all rcu_barrier_callback() callbacks to be invoked. */
2377 wait_for_completion(&rcu_barrier_completion); 2454 wait_for_completion(&rsp->barrier_completion);
2378 2455
2379 /* Other rcu_barrier() invocations can now safely proceed. */ 2456 /* Other rcu_barrier() invocations can now safely proceed. */
2380 mutex_unlock(&rcu_barrier_mutex); 2457 mutex_unlock(&rsp->barrier_mutex);
2381 2458
2382 destroy_rcu_head_on_stack(&rh); 2459 destroy_rcu_head_on_stack(&rd.barrier_head);
2383} 2460}
2384 2461
2385/** 2462/**
@@ -2387,7 +2464,7 @@ static void _rcu_barrier(struct rcu_state *rsp,
2387 */ 2464 */
2388void rcu_barrier_bh(void) 2465void rcu_barrier_bh(void)
2389{ 2466{
2390 _rcu_barrier(&rcu_bh_state, call_rcu_bh); 2467 _rcu_barrier(&rcu_bh_state);
2391} 2468}
2392EXPORT_SYMBOL_GPL(rcu_barrier_bh); 2469EXPORT_SYMBOL_GPL(rcu_barrier_bh);
2393 2470
@@ -2396,7 +2473,7 @@ EXPORT_SYMBOL_GPL(rcu_barrier_bh);
2396 */ 2473 */
2397void rcu_barrier_sched(void) 2474void rcu_barrier_sched(void)
2398{ 2475{
2399 _rcu_barrier(&rcu_sched_state, call_rcu_sched); 2476 _rcu_barrier(&rcu_sched_state);
2400} 2477}
2401EXPORT_SYMBOL_GPL(rcu_barrier_sched); 2478EXPORT_SYMBOL_GPL(rcu_barrier_sched);
2402 2479
@@ -2407,18 +2484,15 @@ static void __init
2407rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) 2484rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
2408{ 2485{
2409 unsigned long flags; 2486 unsigned long flags;
2410 int i;
2411 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 2487 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
2412 struct rcu_node *rnp = rcu_get_root(rsp); 2488 struct rcu_node *rnp = rcu_get_root(rsp);
2413 2489
2414 /* Set up local state, ensuring consistent view of global state. */ 2490 /* Set up local state, ensuring consistent view of global state. */
2415 raw_spin_lock_irqsave(&rnp->lock, flags); 2491 raw_spin_lock_irqsave(&rnp->lock, flags);
2416 rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo); 2492 rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo);
2417 rdp->nxtlist = NULL; 2493 init_callback_list(rdp);
2418 for (i = 0; i < RCU_NEXT_SIZE; i++)
2419 rdp->nxttail[i] = &rdp->nxtlist;
2420 rdp->qlen_lazy = 0; 2494 rdp->qlen_lazy = 0;
2421 rdp->qlen = 0; 2495 ACCESS_ONCE(rdp->qlen) = 0;
2422 rdp->dynticks = &per_cpu(rcu_dynticks, cpu); 2496 rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
2423 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE); 2497 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);
2424 WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); 2498 WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
@@ -2492,9 +2566,11 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
2492 2566
2493static void __cpuinit rcu_prepare_cpu(int cpu) 2567static void __cpuinit rcu_prepare_cpu(int cpu)
2494{ 2568{
2495 rcu_init_percpu_data(cpu, &rcu_sched_state, 0); 2569 struct rcu_state *rsp;
2496 rcu_init_percpu_data(cpu, &rcu_bh_state, 0); 2570
2497 rcu_preempt_init_percpu_data(cpu); 2571 for_each_rcu_flavor(rsp)
2572 rcu_init_percpu_data(cpu, rsp,
2573 strcmp(rsp->name, "rcu_preempt") == 0);
2498} 2574}
2499 2575
2500/* 2576/*
@@ -2506,6 +2582,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
2506 long cpu = (long)hcpu; 2582 long cpu = (long)hcpu;
2507 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); 2583 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
2508 struct rcu_node *rnp = rdp->mynode; 2584 struct rcu_node *rnp = rdp->mynode;
2585 struct rcu_state *rsp;
2509 2586
2510 trace_rcu_utilization("Start CPU hotplug"); 2587 trace_rcu_utilization("Start CPU hotplug");
2511 switch (action) { 2588 switch (action) {
@@ -2530,18 +2607,16 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
2530 * touch any data without introducing corruption. We send the 2607 * touch any data without introducing corruption. We send the
2531 * dying CPU's callbacks to an arbitrarily chosen online CPU. 2608 * dying CPU's callbacks to an arbitrarily chosen online CPU.
2532 */ 2609 */
2533 rcu_cleanup_dying_cpu(&rcu_bh_state); 2610 for_each_rcu_flavor(rsp)
2534 rcu_cleanup_dying_cpu(&rcu_sched_state); 2611 rcu_cleanup_dying_cpu(rsp);
2535 rcu_preempt_cleanup_dying_cpu();
2536 rcu_cleanup_after_idle(cpu); 2612 rcu_cleanup_after_idle(cpu);
2537 break; 2613 break;
2538 case CPU_DEAD: 2614 case CPU_DEAD:
2539 case CPU_DEAD_FROZEN: 2615 case CPU_DEAD_FROZEN:
2540 case CPU_UP_CANCELED: 2616 case CPU_UP_CANCELED:
2541 case CPU_UP_CANCELED_FROZEN: 2617 case CPU_UP_CANCELED_FROZEN:
2542 rcu_cleanup_dead_cpu(cpu, &rcu_bh_state); 2618 for_each_rcu_flavor(rsp)
2543 rcu_cleanup_dead_cpu(cpu, &rcu_sched_state); 2619 rcu_cleanup_dead_cpu(cpu, rsp);
2544 rcu_preempt_cleanup_dead_cpu(cpu);
2545 break; 2620 break;
2546 default: 2621 default:
2547 break; 2622 break;
@@ -2574,9 +2649,9 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)
2574{ 2649{
2575 int i; 2650 int i;
2576 2651
2577 for (i = NUM_RCU_LVLS - 1; i > 0; i--) 2652 for (i = rcu_num_lvls - 1; i > 0; i--)
2578 rsp->levelspread[i] = CONFIG_RCU_FANOUT; 2653 rsp->levelspread[i] = CONFIG_RCU_FANOUT;
2579 rsp->levelspread[0] = CONFIG_RCU_FANOUT_LEAF; 2654 rsp->levelspread[0] = rcu_fanout_leaf;
2580} 2655}
2581#else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ 2656#else /* #ifdef CONFIG_RCU_FANOUT_EXACT */
2582static void __init rcu_init_levelspread(struct rcu_state *rsp) 2657static void __init rcu_init_levelspread(struct rcu_state *rsp)
@@ -2586,7 +2661,7 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)
2586 int i; 2661 int i;
2587 2662
2588 cprv = NR_CPUS; 2663 cprv = NR_CPUS;
2589 for (i = NUM_RCU_LVLS - 1; i >= 0; i--) { 2664 for (i = rcu_num_lvls - 1; i >= 0; i--) {
2590 ccur = rsp->levelcnt[i]; 2665 ccur = rsp->levelcnt[i];
2591 rsp->levelspread[i] = (cprv + ccur - 1) / ccur; 2666 rsp->levelspread[i] = (cprv + ccur - 1) / ccur;
2592 cprv = ccur; 2667 cprv = ccur;
@@ -2613,13 +2688,15 @@ static void __init rcu_init_one(struct rcu_state *rsp,
2613 2688
2614 /* Initialize the level-tracking arrays. */ 2689 /* Initialize the level-tracking arrays. */
2615 2690
2616 for (i = 1; i < NUM_RCU_LVLS; i++) 2691 for (i = 0; i < rcu_num_lvls; i++)
2692 rsp->levelcnt[i] = num_rcu_lvl[i];
2693 for (i = 1; i < rcu_num_lvls; i++)
2617 rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1]; 2694 rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1];
2618 rcu_init_levelspread(rsp); 2695 rcu_init_levelspread(rsp);
2619 2696
2620 /* Initialize the elements themselves, starting from the leaves. */ 2697 /* Initialize the elements themselves, starting from the leaves. */
2621 2698
2622 for (i = NUM_RCU_LVLS - 1; i >= 0; i--) { 2699 for (i = rcu_num_lvls - 1; i >= 0; i--) {
2623 cpustride *= rsp->levelspread[i]; 2700 cpustride *= rsp->levelspread[i];
2624 rnp = rsp->level[i]; 2701 rnp = rsp->level[i];
2625 for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) { 2702 for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) {
@@ -2649,13 +2726,74 @@ static void __init rcu_init_one(struct rcu_state *rsp,
2649 } 2726 }
2650 2727
2651 rsp->rda = rda; 2728 rsp->rda = rda;
2652 rnp = rsp->level[NUM_RCU_LVLS - 1]; 2729 rnp = rsp->level[rcu_num_lvls - 1];
2653 for_each_possible_cpu(i) { 2730 for_each_possible_cpu(i) {
2654 while (i > rnp->grphi) 2731 while (i > rnp->grphi)
2655 rnp++; 2732 rnp++;
2656 per_cpu_ptr(rsp->rda, i)->mynode = rnp; 2733 per_cpu_ptr(rsp->rda, i)->mynode = rnp;
2657 rcu_boot_init_percpu_data(i, rsp); 2734 rcu_boot_init_percpu_data(i, rsp);
2658 } 2735 }
2736 list_add(&rsp->flavors, &rcu_struct_flavors);
2737}
2738
2739/*
2740 * Compute the rcu_node tree geometry from kernel parameters. This cannot
2741 * replace the definitions in rcutree.h because those are needed to size
2742 * the ->node array in the rcu_state structure.
2743 */
2744static void __init rcu_init_geometry(void)
2745{
2746 int i;
2747 int j;
2748 int n = nr_cpu_ids;
2749 int rcu_capacity[MAX_RCU_LVLS + 1];
2750
2751 /* If the compile-time values are accurate, just leave. */
2752 if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF)
2753 return;
2754
2755 /*
2756 * Compute number of nodes that can be handled an rcu_node tree
2757 * with the given number of levels. Setting rcu_capacity[0] makes
2758 * some of the arithmetic easier.
2759 */
2760 rcu_capacity[0] = 1;
2761 rcu_capacity[1] = rcu_fanout_leaf;
2762 for (i = 2; i <= MAX_RCU_LVLS; i++)
2763 rcu_capacity[i] = rcu_capacity[i - 1] * CONFIG_RCU_FANOUT;
2764
2765 /*
2766 * The boot-time rcu_fanout_leaf parameter is only permitted
2767 * to increase the leaf-level fanout, not decrease it. Of course,
2768 * the leaf-level fanout cannot exceed the number of bits in
2769 * the rcu_node masks. Finally, the tree must be able to accommodate
2770 * the configured number of CPUs. Complain and fall back to the
2771 * compile-time values if these limits are exceeded.
2772 */
2773 if (rcu_fanout_leaf < CONFIG_RCU_FANOUT_LEAF ||
2774 rcu_fanout_leaf > sizeof(unsigned long) * 8 ||
2775 n > rcu_capacity[MAX_RCU_LVLS]) {
2776 WARN_ON(1);
2777 return;
2778 }
2779
2780 /* Calculate the number of rcu_nodes at each level of the tree. */
2781 for (i = 1; i <= MAX_RCU_LVLS; i++)
2782 if (n <= rcu_capacity[i]) {
2783 for (j = 0; j <= i; j++)
2784 num_rcu_lvl[j] =
2785 DIV_ROUND_UP(n, rcu_capacity[i - j]);
2786 rcu_num_lvls = i;
2787 for (j = i + 1; j <= MAX_RCU_LVLS; j++)
2788 num_rcu_lvl[j] = 0;
2789 break;
2790 }
2791
2792 /* Calculate the total number of rcu_node structures. */
2793 rcu_num_nodes = 0;
2794 for (i = 0; i <= MAX_RCU_LVLS; i++)
2795 rcu_num_nodes += num_rcu_lvl[i];
2796 rcu_num_nodes -= n;
2659} 2797}
2660 2798
2661void __init rcu_init(void) 2799void __init rcu_init(void)
@@ -2663,6 +2801,7 @@ void __init rcu_init(void)
2663 int cpu; 2801 int cpu;
2664 2802
2665 rcu_bootup_announce(); 2803 rcu_bootup_announce();
2804 rcu_init_geometry();
2666 rcu_init_one(&rcu_sched_state, &rcu_sched_data); 2805 rcu_init_one(&rcu_sched_state, &rcu_sched_data);
2667 rcu_init_one(&rcu_bh_state, &rcu_bh_data); 2806 rcu_init_one(&rcu_bh_state, &rcu_bh_data);
2668 __rcu_init_preempt(); 2807 __rcu_init_preempt();
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 19b61ac1079f..4d29169f2124 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -42,28 +42,28 @@
42#define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT) 42#define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT)
43 43
44#if NR_CPUS <= RCU_FANOUT_1 44#if NR_CPUS <= RCU_FANOUT_1
45# define NUM_RCU_LVLS 1 45# define RCU_NUM_LVLS 1
46# define NUM_RCU_LVL_0 1 46# define NUM_RCU_LVL_0 1
47# define NUM_RCU_LVL_1 (NR_CPUS) 47# define NUM_RCU_LVL_1 (NR_CPUS)
48# define NUM_RCU_LVL_2 0 48# define NUM_RCU_LVL_2 0
49# define NUM_RCU_LVL_3 0 49# define NUM_RCU_LVL_3 0
50# define NUM_RCU_LVL_4 0 50# define NUM_RCU_LVL_4 0
51#elif NR_CPUS <= RCU_FANOUT_2 51#elif NR_CPUS <= RCU_FANOUT_2
52# define NUM_RCU_LVLS 2 52# define RCU_NUM_LVLS 2
53# define NUM_RCU_LVL_0 1 53# define NUM_RCU_LVL_0 1
54# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) 54# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
55# define NUM_RCU_LVL_2 (NR_CPUS) 55# define NUM_RCU_LVL_2 (NR_CPUS)
56# define NUM_RCU_LVL_3 0 56# define NUM_RCU_LVL_3 0
57# define NUM_RCU_LVL_4 0 57# define NUM_RCU_LVL_4 0
58#elif NR_CPUS <= RCU_FANOUT_3 58#elif NR_CPUS <= RCU_FANOUT_3
59# define NUM_RCU_LVLS 3 59# define RCU_NUM_LVLS 3
60# define NUM_RCU_LVL_0 1 60# define NUM_RCU_LVL_0 1
61# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) 61# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
62# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) 62# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
63# define NUM_RCU_LVL_3 (NR_CPUS) 63# define NUM_RCU_LVL_3 (NR_CPUS)
64# define NUM_RCU_LVL_4 0 64# define NUM_RCU_LVL_4 0
65#elif NR_CPUS <= RCU_FANOUT_4 65#elif NR_CPUS <= RCU_FANOUT_4
66# define NUM_RCU_LVLS 4 66# define RCU_NUM_LVLS 4
67# define NUM_RCU_LVL_0 1 67# define NUM_RCU_LVL_0 1
68# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3) 68# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3)
69# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) 69# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
@@ -76,6 +76,9 @@
76#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4) 76#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4)
77#define NUM_RCU_NODES (RCU_SUM - NR_CPUS) 77#define NUM_RCU_NODES (RCU_SUM - NR_CPUS)
78 78
79extern int rcu_num_lvls;
80extern int rcu_num_nodes;
81
79/* 82/*
80 * Dynticks per-CPU state. 83 * Dynticks per-CPU state.
81 */ 84 */
@@ -97,6 +100,7 @@ struct rcu_dynticks {
97 /* # times non-lazy CBs posted to CPU. */ 100 /* # times non-lazy CBs posted to CPU. */
98 unsigned long nonlazy_posted_snap; 101 unsigned long nonlazy_posted_snap;
99 /* idle-period nonlazy_posted snapshot. */ 102 /* idle-period nonlazy_posted snapshot. */
103 int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */
100#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ 104#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
101}; 105};
102 106
@@ -206,7 +210,7 @@ struct rcu_node {
206 */ 210 */
207#define rcu_for_each_node_breadth_first(rsp, rnp) \ 211#define rcu_for_each_node_breadth_first(rsp, rnp) \
208 for ((rnp) = &(rsp)->node[0]; \ 212 for ((rnp) = &(rsp)->node[0]; \
209 (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) 213 (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
210 214
211/* 215/*
212 * Do a breadth-first scan of the non-leaf rcu_node structures for the 216 * Do a breadth-first scan of the non-leaf rcu_node structures for the
@@ -215,7 +219,7 @@ struct rcu_node {
215 */ 219 */
216#define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \ 220#define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \
217 for ((rnp) = &(rsp)->node[0]; \ 221 for ((rnp) = &(rsp)->node[0]; \
218 (rnp) < (rsp)->level[NUM_RCU_LVLS - 1]; (rnp)++) 222 (rnp) < (rsp)->level[rcu_num_lvls - 1]; (rnp)++)
219 223
220/* 224/*
221 * Scan the leaves of the rcu_node hierarchy for the specified rcu_state 225 * Scan the leaves of the rcu_node hierarchy for the specified rcu_state
@@ -224,8 +228,8 @@ struct rcu_node {
224 * It is still a leaf node, even if it is also the root node. 228 * It is still a leaf node, even if it is also the root node.
225 */ 229 */
226#define rcu_for_each_leaf_node(rsp, rnp) \ 230#define rcu_for_each_leaf_node(rsp, rnp) \
227 for ((rnp) = (rsp)->level[NUM_RCU_LVLS - 1]; \ 231 for ((rnp) = (rsp)->level[rcu_num_lvls - 1]; \
228 (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) 232 (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
229 233
230/* Index values for nxttail array in struct rcu_data. */ 234/* Index values for nxttail array in struct rcu_data. */
231#define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */ 235#define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */
@@ -311,6 +315,9 @@ struct rcu_data {
311 unsigned long n_rp_need_fqs; 315 unsigned long n_rp_need_fqs;
312 unsigned long n_rp_need_nothing; 316 unsigned long n_rp_need_nothing;
313 317
318 /* 6) _rcu_barrier() callback. */
319 struct rcu_head barrier_head;
320
314 int cpu; 321 int cpu;
315 struct rcu_state *rsp; 322 struct rcu_state *rsp;
316}; 323};
@@ -357,10 +364,12 @@ do { \
357 */ 364 */
358struct rcu_state { 365struct rcu_state {
359 struct rcu_node node[NUM_RCU_NODES]; /* Hierarchy. */ 366 struct rcu_node node[NUM_RCU_NODES]; /* Hierarchy. */
360 struct rcu_node *level[NUM_RCU_LVLS]; /* Hierarchy levels. */ 367 struct rcu_node *level[RCU_NUM_LVLS]; /* Hierarchy levels. */
361 u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */ 368 u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */
362 u8 levelspread[NUM_RCU_LVLS]; /* kids/node in each level. */ 369 u8 levelspread[RCU_NUM_LVLS]; /* kids/node in each level. */
363 struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */ 370 struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */
371 void (*call)(struct rcu_head *head, /* call_rcu() flavor. */
372 void (*func)(struct rcu_head *head));
364 373
365 /* The following fields are guarded by the root rcu_node's lock. */ 374 /* The following fields are guarded by the root rcu_node's lock. */
366 375
@@ -392,6 +401,11 @@ struct rcu_state {
392 struct task_struct *rcu_barrier_in_progress; 401 struct task_struct *rcu_barrier_in_progress;
393 /* Task doing rcu_barrier(), */ 402 /* Task doing rcu_barrier(), */
394 /* or NULL if no barrier. */ 403 /* or NULL if no barrier. */
404 struct mutex barrier_mutex; /* Guards barrier fields. */
405 atomic_t barrier_cpu_count; /* # CPUs waiting on. */
406 struct completion barrier_completion; /* Wake at barrier end. */
407 unsigned long n_barrier_done; /* ++ at start and end of */
408 /* _rcu_barrier(). */
395 raw_spinlock_t fqslock; /* Only one task forcing */ 409 raw_spinlock_t fqslock; /* Only one task forcing */
396 /* quiescent states. */ 410 /* quiescent states. */
397 unsigned long jiffies_force_qs; /* Time at which to invoke */ 411 unsigned long jiffies_force_qs; /* Time at which to invoke */
@@ -409,8 +423,13 @@ struct rcu_state {
409 unsigned long gp_max; /* Maximum GP duration in */ 423 unsigned long gp_max; /* Maximum GP duration in */
410 /* jiffies. */ 424 /* jiffies. */
411 char *name; /* Name of structure. */ 425 char *name; /* Name of structure. */
426 struct list_head flavors; /* List of RCU flavors. */
412}; 427};
413 428
429extern struct list_head rcu_struct_flavors;
430#define for_each_rcu_flavor(rsp) \
431 list_for_each_entry((rsp), &rcu_struct_flavors, flavors)
432
414/* Return values for rcu_preempt_offline_tasks(). */ 433/* Return values for rcu_preempt_offline_tasks(). */
415 434
416#define RCU_OFL_TASKS_NORM_GP 0x1 /* Tasks blocking normal */ 435#define RCU_OFL_TASKS_NORM_GP 0x1 /* Tasks blocking normal */
@@ -453,25 +472,18 @@ static void rcu_stop_cpu_kthread(int cpu);
453#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 472#endif /* #ifdef CONFIG_HOTPLUG_CPU */
454static void rcu_print_detail_task_stall(struct rcu_state *rsp); 473static void rcu_print_detail_task_stall(struct rcu_state *rsp);
455static int rcu_print_task_stall(struct rcu_node *rnp); 474static int rcu_print_task_stall(struct rcu_node *rnp);
456static void rcu_preempt_stall_reset(void);
457static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); 475static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
458#ifdef CONFIG_HOTPLUG_CPU 476#ifdef CONFIG_HOTPLUG_CPU
459static int rcu_preempt_offline_tasks(struct rcu_state *rsp, 477static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
460 struct rcu_node *rnp, 478 struct rcu_node *rnp,
461 struct rcu_data *rdp); 479 struct rcu_data *rdp);
462#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 480#endif /* #ifdef CONFIG_HOTPLUG_CPU */
463static void rcu_preempt_cleanup_dead_cpu(int cpu);
464static void rcu_preempt_check_callbacks(int cpu); 481static void rcu_preempt_check_callbacks(int cpu);
465static void rcu_preempt_process_callbacks(void);
466void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); 482void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
467#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) 483#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU)
468static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, 484static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
469 bool wake); 485 bool wake);
470#endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */ 486#endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */
471static int rcu_preempt_pending(int cpu);
472static int rcu_preempt_cpu_has_callbacks(int cpu);
473static void __cpuinit rcu_preempt_init_percpu_data(int cpu);
474static void rcu_preempt_cleanup_dying_cpu(void);
475static void __init __rcu_init_preempt(void); 487static void __init __rcu_init_preempt(void);
476static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); 488static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
477static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); 489static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 3e4899459f3d..a9194d5606c4 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -68,17 +68,21 @@ static void __init rcu_bootup_announce_oddness(void)
68 printk(KERN_INFO "\tAdditional per-CPU info printed with stalls.\n"); 68 printk(KERN_INFO "\tAdditional per-CPU info printed with stalls.\n");
69#endif 69#endif
70#if NUM_RCU_LVL_4 != 0 70#if NUM_RCU_LVL_4 != 0
71 printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n"); 71 printk(KERN_INFO "\tFour-level hierarchy is enabled.\n");
72#endif 72#endif
73 if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF)
74 printk(KERN_INFO "\tExperimental boot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf);
75 if (nr_cpu_ids != NR_CPUS)
76 printk(KERN_INFO "\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids);
73} 77}
74 78
75#ifdef CONFIG_TREE_PREEMPT_RCU 79#ifdef CONFIG_TREE_PREEMPT_RCU
76 80
77struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt); 81struct rcu_state rcu_preempt_state =
82 RCU_STATE_INITIALIZER(rcu_preempt, call_rcu);
78DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); 83DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
79static struct rcu_state *rcu_state = &rcu_preempt_state; 84static struct rcu_state *rcu_state = &rcu_preempt_state;
80 85
81static void rcu_read_unlock_special(struct task_struct *t);
82static int rcu_preempted_readers_exp(struct rcu_node *rnp); 86static int rcu_preempted_readers_exp(struct rcu_node *rnp);
83 87
84/* 88/*
@@ -233,18 +237,6 @@ static void rcu_preempt_note_context_switch(int cpu)
233} 237}
234 238
235/* 239/*
236 * Tree-preemptible RCU implementation for rcu_read_lock().
237 * Just increment ->rcu_read_lock_nesting, shared state will be updated
238 * if we block.
239 */
240void __rcu_read_lock(void)
241{
242 current->rcu_read_lock_nesting++;
243 barrier(); /* needed if we ever invoke rcu_read_lock in rcutree.c */
244}
245EXPORT_SYMBOL_GPL(__rcu_read_lock);
246
247/*
248 * Check for preempted RCU readers blocking the current grace period 240 * Check for preempted RCU readers blocking the current grace period
249 * for the specified rcu_node structure. If the caller needs a reliable 241 * for the specified rcu_node structure. If the caller needs a reliable
250 * answer, it must hold the rcu_node's ->lock. 242 * answer, it must hold the rcu_node's ->lock.
@@ -310,7 +302,7 @@ static struct list_head *rcu_next_node_entry(struct task_struct *t,
310 * notify RCU core processing or task having blocked during the RCU 302 * notify RCU core processing or task having blocked during the RCU
311 * read-side critical section. 303 * read-side critical section.
312 */ 304 */
313static noinline void rcu_read_unlock_special(struct task_struct *t) 305void rcu_read_unlock_special(struct task_struct *t)
314{ 306{
315 int empty; 307 int empty;
316 int empty_exp; 308 int empty_exp;
@@ -418,38 +410,6 @@ static noinline void rcu_read_unlock_special(struct task_struct *t)
418 } 410 }
419} 411}
420 412
421/*
422 * Tree-preemptible RCU implementation for rcu_read_unlock().
423 * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
424 * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
425 * invoke rcu_read_unlock_special() to clean up after a context switch
426 * in an RCU read-side critical section and other special cases.
427 */
428void __rcu_read_unlock(void)
429{
430 struct task_struct *t = current;
431
432 if (t->rcu_read_lock_nesting != 1)
433 --t->rcu_read_lock_nesting;
434 else {
435 barrier(); /* critical section before exit code. */
436 t->rcu_read_lock_nesting = INT_MIN;
437 barrier(); /* assign before ->rcu_read_unlock_special load */
438 if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
439 rcu_read_unlock_special(t);
440 barrier(); /* ->rcu_read_unlock_special load before assign */
441 t->rcu_read_lock_nesting = 0;
442 }
443#ifdef CONFIG_PROVE_LOCKING
444 {
445 int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting);
446
447 WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
448 }
449#endif /* #ifdef CONFIG_PROVE_LOCKING */
450}
451EXPORT_SYMBOL_GPL(__rcu_read_unlock);
452
453#ifdef CONFIG_RCU_CPU_STALL_VERBOSE 413#ifdef CONFIG_RCU_CPU_STALL_VERBOSE
454 414
455/* 415/*
@@ -540,16 +500,6 @@ static int rcu_print_task_stall(struct rcu_node *rnp)
540} 500}
541 501
542/* 502/*
543 * Suppress preemptible RCU's CPU stall warnings by pushing the
544 * time of the next stall-warning message comfortably far into the
545 * future.
546 */
547static void rcu_preempt_stall_reset(void)
548{
549 rcu_preempt_state.jiffies_stall = jiffies + ULONG_MAX / 2;
550}
551
552/*
553 * Check that the list of blocked tasks for the newly completed grace 503 * Check that the list of blocked tasks for the newly completed grace
554 * period is in fact empty. It is a serious bug to complete a grace 504 * period is in fact empty. It is a serious bug to complete a grace
555 * period that still has RCU readers blocked! This function must be 505 * period that still has RCU readers blocked! This function must be
@@ -650,14 +600,6 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
650#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 600#endif /* #ifdef CONFIG_HOTPLUG_CPU */
651 601
652/* 602/*
653 * Do CPU-offline processing for preemptible RCU.
654 */
655static void rcu_preempt_cleanup_dead_cpu(int cpu)
656{
657 rcu_cleanup_dead_cpu(cpu, &rcu_preempt_state);
658}
659
660/*
661 * Check for a quiescent state from the current CPU. When a task blocks, 603 * Check for a quiescent state from the current CPU. When a task blocks,
662 * the task is recorded in the corresponding CPU's rcu_node structure, 604 * the task is recorded in the corresponding CPU's rcu_node structure,
663 * which is checked elsewhere. 605 * which is checked elsewhere.
@@ -677,15 +619,6 @@ static void rcu_preempt_check_callbacks(int cpu)
677 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; 619 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
678} 620}
679 621
680/*
681 * Process callbacks for preemptible RCU.
682 */
683static void rcu_preempt_process_callbacks(void)
684{
685 __rcu_process_callbacks(&rcu_preempt_state,
686 &__get_cpu_var(rcu_preempt_data));
687}
688
689#ifdef CONFIG_RCU_BOOST 622#ifdef CONFIG_RCU_BOOST
690 623
691static void rcu_preempt_do_callbacks(void) 624static void rcu_preempt_do_callbacks(void)
@@ -917,51 +850,16 @@ mb_ret:
917} 850}
918EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); 851EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
919 852
920/*
921 * Check to see if there is any immediate preemptible-RCU-related work
922 * to be done.
923 */
924static int rcu_preempt_pending(int cpu)
925{
926 return __rcu_pending(&rcu_preempt_state,
927 &per_cpu(rcu_preempt_data, cpu));
928}
929
930/*
931 * Does preemptible RCU have callbacks on this CPU?
932 */
933static int rcu_preempt_cpu_has_callbacks(int cpu)
934{
935 return !!per_cpu(rcu_preempt_data, cpu).nxtlist;
936}
937
938/** 853/**
939 * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete. 854 * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
940 */ 855 */
941void rcu_barrier(void) 856void rcu_barrier(void)
942{ 857{
943 _rcu_barrier(&rcu_preempt_state, call_rcu); 858 _rcu_barrier(&rcu_preempt_state);
944} 859}
945EXPORT_SYMBOL_GPL(rcu_barrier); 860EXPORT_SYMBOL_GPL(rcu_barrier);
946 861
947/* 862/*
948 * Initialize preemptible RCU's per-CPU data.
949 */
950static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
951{
952 rcu_init_percpu_data(cpu, &rcu_preempt_state, 1);
953}
954
955/*
956 * Move preemptible RCU's callbacks from dying CPU to other online CPU
957 * and record a quiescent state.
958 */
959static void rcu_preempt_cleanup_dying_cpu(void)
960{
961 rcu_cleanup_dying_cpu(&rcu_preempt_state);
962}
963
964/*
965 * Initialize preemptible RCU's state structures. 863 * Initialize preemptible RCU's state structures.
966 */ 864 */
967static void __init __rcu_init_preempt(void) 865static void __init __rcu_init_preempt(void)
@@ -1046,14 +944,6 @@ static int rcu_print_task_stall(struct rcu_node *rnp)
1046} 944}
1047 945
1048/* 946/*
1049 * Because preemptible RCU does not exist, there is no need to suppress
1050 * its CPU stall warnings.
1051 */
1052static void rcu_preempt_stall_reset(void)
1053{
1054}
1055
1056/*
1057 * Because there is no preemptible RCU, there can be no readers blocked, 947 * Because there is no preemptible RCU, there can be no readers blocked,
1058 * so there is no need to check for blocked tasks. So check only for 948 * so there is no need to check for blocked tasks. So check only for
1059 * bogus qsmask values. 949 * bogus qsmask values.
@@ -1081,14 +971,6 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
1081#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 971#endif /* #ifdef CONFIG_HOTPLUG_CPU */
1082 972
1083/* 973/*
1084 * Because preemptible RCU does not exist, it never needs CPU-offline
1085 * processing.
1086 */
1087static void rcu_preempt_cleanup_dead_cpu(int cpu)
1088{
1089}
1090
1091/*
1092 * Because preemptible RCU does not exist, it never has any callbacks 974 * Because preemptible RCU does not exist, it never has any callbacks
1093 * to check. 975 * to check.
1094 */ 976 */
@@ -1097,14 +979,6 @@ static void rcu_preempt_check_callbacks(int cpu)
1097} 979}
1098 980
1099/* 981/*
1100 * Because preemptible RCU does not exist, it never has any callbacks
1101 * to process.
1102 */
1103static void rcu_preempt_process_callbacks(void)
1104{
1105}
1106
1107/*
1108 * Queue an RCU callback for lazy invocation after a grace period. 982 * Queue an RCU callback for lazy invocation after a grace period.
1109 * This will likely be later named something like "call_rcu_lazy()", 983 * This will likely be later named something like "call_rcu_lazy()",
1110 * but this change will require some way of tagging the lazy RCU 984 * but this change will require some way of tagging the lazy RCU
@@ -1145,22 +1019,6 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
1145#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 1019#endif /* #ifdef CONFIG_HOTPLUG_CPU */
1146 1020
1147/* 1021/*
1148 * Because preemptible RCU does not exist, it never has any work to do.
1149 */
1150static int rcu_preempt_pending(int cpu)
1151{
1152 return 0;
1153}
1154
1155/*
1156 * Because preemptible RCU does not exist, it never has callbacks
1157 */
1158static int rcu_preempt_cpu_has_callbacks(int cpu)
1159{
1160 return 0;
1161}
1162
1163/*
1164 * Because preemptible RCU does not exist, rcu_barrier() is just 1022 * Because preemptible RCU does not exist, rcu_barrier() is just
1165 * another name for rcu_barrier_sched(). 1023 * another name for rcu_barrier_sched().
1166 */ 1024 */
@@ -1171,21 +1029,6 @@ void rcu_barrier(void)
1171EXPORT_SYMBOL_GPL(rcu_barrier); 1029EXPORT_SYMBOL_GPL(rcu_barrier);
1172 1030
1173/* 1031/*
1174 * Because preemptible RCU does not exist, there is no per-CPU
1175 * data to initialize.
1176 */
1177static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
1178{
1179}
1180
1181/*
1182 * Because there is no preemptible RCU, there is no cleanup to do.
1183 */
1184static void rcu_preempt_cleanup_dying_cpu(void)
1185{
1186}
1187
1188/*
1189 * Because preemptible RCU does not exist, it need not be initialized. 1032 * Because preemptible RCU does not exist, it need not be initialized.
1190 */ 1033 */
1191static void __init __rcu_init_preempt(void) 1034static void __init __rcu_init_preempt(void)
@@ -1968,9 +1811,11 @@ static void rcu_idle_count_callbacks_posted(void)
1968 */ 1811 */
1969#define RCU_IDLE_FLUSHES 5 /* Number of dyntick-idle tries. */ 1812#define RCU_IDLE_FLUSHES 5 /* Number of dyntick-idle tries. */
1970#define RCU_IDLE_OPT_FLUSHES 3 /* Optional dyntick-idle tries. */ 1813#define RCU_IDLE_OPT_FLUSHES 3 /* Optional dyntick-idle tries. */
1971#define RCU_IDLE_GP_DELAY 6 /* Roughly one grace period. */ 1814#define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */
1972#define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ 1815#define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */
1973 1816
1817extern int tick_nohz_enabled;
1818
1974/* 1819/*
1975 * Does the specified flavor of RCU have non-lazy callbacks pending on 1820 * Does the specified flavor of RCU have non-lazy callbacks pending on
1976 * the specified CPU? Both RCU flavor and CPU are specified by the 1821 * the specified CPU? Both RCU flavor and CPU are specified by the
@@ -2047,10 +1892,13 @@ int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
2047 return 1; 1892 return 1;
2048 } 1893 }
2049 /* Set up for the possibility that RCU will post a timer. */ 1894 /* Set up for the possibility that RCU will post a timer. */
2050 if (rcu_cpu_has_nonlazy_callbacks(cpu)) 1895 if (rcu_cpu_has_nonlazy_callbacks(cpu)) {
2051 *delta_jiffies = RCU_IDLE_GP_DELAY; 1896 *delta_jiffies = round_up(RCU_IDLE_GP_DELAY + jiffies,
2052 else 1897 RCU_IDLE_GP_DELAY) - jiffies;
2053 *delta_jiffies = RCU_IDLE_LAZY_GP_DELAY; 1898 } else {
1899 *delta_jiffies = jiffies + RCU_IDLE_LAZY_GP_DELAY;
1900 *delta_jiffies = round_jiffies(*delta_jiffies) - jiffies;
1901 }
2054 return 0; 1902 return 0;
2055} 1903}
2056 1904
@@ -2109,6 +1957,7 @@ static void rcu_cleanup_after_idle(int cpu)
2109 1957
2110 del_timer(&rdtp->idle_gp_timer); 1958 del_timer(&rdtp->idle_gp_timer);
2111 trace_rcu_prep_idle("Cleanup after idle"); 1959 trace_rcu_prep_idle("Cleanup after idle");
1960 rdtp->tick_nohz_enabled_snap = ACCESS_ONCE(tick_nohz_enabled);
2112} 1961}
2113 1962
2114/* 1963/*
@@ -2134,6 +1983,18 @@ static void rcu_prepare_for_idle(int cpu)
2134{ 1983{
2135 struct timer_list *tp; 1984 struct timer_list *tp;
2136 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); 1985 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
1986 int tne;
1987
1988 /* Handle nohz enablement switches conservatively. */
1989 tne = ACCESS_ONCE(tick_nohz_enabled);
1990 if (tne != rdtp->tick_nohz_enabled_snap) {
1991 if (rcu_cpu_has_callbacks(cpu))
1992 invoke_rcu_core(); /* force nohz to see update. */
1993 rdtp->tick_nohz_enabled_snap = tne;
1994 return;
1995 }
1996 if (!tne)
1997 return;
2137 1998
2138 /* 1999 /*
2139 * If this is an idle re-entry, for example, due to use of 2000 * If this is an idle re-entry, for example, due to use of
@@ -2187,10 +2048,11 @@ static void rcu_prepare_for_idle(int cpu)
2187 if (rcu_cpu_has_nonlazy_callbacks(cpu)) { 2048 if (rcu_cpu_has_nonlazy_callbacks(cpu)) {
2188 trace_rcu_prep_idle("Dyntick with callbacks"); 2049 trace_rcu_prep_idle("Dyntick with callbacks");
2189 rdtp->idle_gp_timer_expires = 2050 rdtp->idle_gp_timer_expires =
2190 jiffies + RCU_IDLE_GP_DELAY; 2051 round_up(jiffies + RCU_IDLE_GP_DELAY,
2052 RCU_IDLE_GP_DELAY);
2191 } else { 2053 } else {
2192 rdtp->idle_gp_timer_expires = 2054 rdtp->idle_gp_timer_expires =
2193 jiffies + RCU_IDLE_LAZY_GP_DELAY; 2055 round_jiffies(jiffies + RCU_IDLE_LAZY_GP_DELAY);
2194 trace_rcu_prep_idle("Dyntick with lazy callbacks"); 2056 trace_rcu_prep_idle("Dyntick with lazy callbacks");
2195 } 2057 }
2196 tp = &rdtp->idle_gp_timer; 2058 tp = &rdtp->idle_gp_timer;
@@ -2269,6 +2131,7 @@ static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
2269 2131
2270static void print_cpu_stall_fast_no_hz(char *cp, int cpu) 2132static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
2271{ 2133{
2134 *cp = '\0';
2272} 2135}
2273 2136
2274#endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */ 2137#endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index d4bc16ddd1d4..a16ddbd6fdc4 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -46,6 +46,31 @@
46#define RCU_TREE_NONCORE 46#define RCU_TREE_NONCORE
47#include "rcutree.h" 47#include "rcutree.h"
48 48
49static int show_rcubarrier(struct seq_file *m, void *unused)
50{
51 struct rcu_state *rsp;
52
53 for_each_rcu_flavor(rsp)
54 seq_printf(m, "%s: %c bcc: %d nbd: %lu\n",
55 rsp->name, rsp->rcu_barrier_in_progress ? 'B' : '.',
56 atomic_read(&rsp->barrier_cpu_count),
57 rsp->n_barrier_done);
58 return 0;
59}
60
61static int rcubarrier_open(struct inode *inode, struct file *file)
62{
63 return single_open(file, show_rcubarrier, NULL);
64}
65
66static const struct file_operations rcubarrier_fops = {
67 .owner = THIS_MODULE,
68 .open = rcubarrier_open,
69 .read = seq_read,
70 .llseek = seq_lseek,
71 .release = single_release,
72};
73
49#ifdef CONFIG_RCU_BOOST 74#ifdef CONFIG_RCU_BOOST
50 75
51static char convert_kthread_status(unsigned int kthread_status) 76static char convert_kthread_status(unsigned int kthread_status)
@@ -95,24 +120,16 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
95 rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted); 120 rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted);
96} 121}
97 122
98#define PRINT_RCU_DATA(name, func, m) \
99 do { \
100 int _p_r_d_i; \
101 \
102 for_each_possible_cpu(_p_r_d_i) \
103 func(m, &per_cpu(name, _p_r_d_i)); \
104 } while (0)
105
106static int show_rcudata(struct seq_file *m, void *unused) 123static int show_rcudata(struct seq_file *m, void *unused)
107{ 124{
108#ifdef CONFIG_TREE_PREEMPT_RCU 125 int cpu;
109 seq_puts(m, "rcu_preempt:\n"); 126 struct rcu_state *rsp;
110 PRINT_RCU_DATA(rcu_preempt_data, print_one_rcu_data, m); 127
111#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 128 for_each_rcu_flavor(rsp) {
112 seq_puts(m, "rcu_sched:\n"); 129 seq_printf(m, "%s:\n", rsp->name);
113 PRINT_RCU_DATA(rcu_sched_data, print_one_rcu_data, m); 130 for_each_possible_cpu(cpu)
114 seq_puts(m, "rcu_bh:\n"); 131 print_one_rcu_data(m, per_cpu_ptr(rsp->rda, cpu));
115 PRINT_RCU_DATA(rcu_bh_data, print_one_rcu_data, m); 132 }
116 return 0; 133 return 0;
117} 134}
118 135
@@ -166,6 +183,9 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
166 183
167static int show_rcudata_csv(struct seq_file *m, void *unused) 184static int show_rcudata_csv(struct seq_file *m, void *unused)
168{ 185{
186 int cpu;
187 struct rcu_state *rsp;
188
169 seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pgp\",\"pq\","); 189 seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pgp\",\"pq\",");
170 seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\","); 190 seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\",");
171 seq_puts(m, "\"of\",\"qll\",\"ql\",\"qs\""); 191 seq_puts(m, "\"of\",\"qll\",\"ql\",\"qs\"");
@@ -173,14 +193,11 @@ static int show_rcudata_csv(struct seq_file *m, void *unused)
173 seq_puts(m, "\"kt\",\"ktl\""); 193 seq_puts(m, "\"kt\",\"ktl\"");
174#endif /* #ifdef CONFIG_RCU_BOOST */ 194#endif /* #ifdef CONFIG_RCU_BOOST */
175 seq_puts(m, ",\"b\",\"ci\",\"co\",\"ca\"\n"); 195 seq_puts(m, ",\"b\",\"ci\",\"co\",\"ca\"\n");
176#ifdef CONFIG_TREE_PREEMPT_RCU 196 for_each_rcu_flavor(rsp) {
177 seq_puts(m, "\"rcu_preempt:\"\n"); 197 seq_printf(m, "\"%s:\"\n", rsp->name);
178 PRINT_RCU_DATA(rcu_preempt_data, print_one_rcu_data_csv, m); 198 for_each_possible_cpu(cpu)
179#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 199 print_one_rcu_data_csv(m, per_cpu_ptr(rsp->rda, cpu));
180 seq_puts(m, "\"rcu_sched:\"\n"); 200 }
181 PRINT_RCU_DATA(rcu_sched_data, print_one_rcu_data_csv, m);
182 seq_puts(m, "\"rcu_bh:\"\n");
183 PRINT_RCU_DATA(rcu_bh_data, print_one_rcu_data_csv, m);
184 return 0; 201 return 0;
185} 202}
186 203
@@ -270,15 +287,15 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
270 struct rcu_node *rnp; 287 struct rcu_node *rnp;
271 288
272 gpnum = rsp->gpnum; 289 gpnum = rsp->gpnum;
273 seq_printf(m, "c=%lu g=%lu s=%d jfq=%ld j=%x " 290 seq_printf(m, "%s: c=%lu g=%lu s=%d jfq=%ld j=%x "
274 "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n", 291 "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n",
275 rsp->completed, gpnum, rsp->fqs_state, 292 rsp->name, rsp->completed, gpnum, rsp->fqs_state,
276 (long)(rsp->jiffies_force_qs - jiffies), 293 (long)(rsp->jiffies_force_qs - jiffies),
277 (int)(jiffies & 0xffff), 294 (int)(jiffies & 0xffff),
278 rsp->n_force_qs, rsp->n_force_qs_ngp, 295 rsp->n_force_qs, rsp->n_force_qs_ngp,
279 rsp->n_force_qs - rsp->n_force_qs_ngp, 296 rsp->n_force_qs - rsp->n_force_qs_ngp,
280 rsp->n_force_qs_lh, rsp->qlen_lazy, rsp->qlen); 297 rsp->n_force_qs_lh, rsp->qlen_lazy, rsp->qlen);
281 for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < NUM_RCU_NODES; rnp++) { 298 for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < rcu_num_nodes; rnp++) {
282 if (rnp->level != level) { 299 if (rnp->level != level) {
283 seq_puts(m, "\n"); 300 seq_puts(m, "\n");
284 level = rnp->level; 301 level = rnp->level;
@@ -295,14 +312,10 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
295 312
296static int show_rcuhier(struct seq_file *m, void *unused) 313static int show_rcuhier(struct seq_file *m, void *unused)
297{ 314{
298#ifdef CONFIG_TREE_PREEMPT_RCU 315 struct rcu_state *rsp;
299 seq_puts(m, "rcu_preempt:\n"); 316
300 print_one_rcu_state(m, &rcu_preempt_state); 317 for_each_rcu_flavor(rsp)
301#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 318 print_one_rcu_state(m, rsp);
302 seq_puts(m, "rcu_sched:\n");
303 print_one_rcu_state(m, &rcu_sched_state);
304 seq_puts(m, "rcu_bh:\n");
305 print_one_rcu_state(m, &rcu_bh_state);
306 return 0; 319 return 0;
307} 320}
308 321
@@ -343,11 +356,10 @@ static void show_one_rcugp(struct seq_file *m, struct rcu_state *rsp)
343 356
344static int show_rcugp(struct seq_file *m, void *unused) 357static int show_rcugp(struct seq_file *m, void *unused)
345{ 358{
346#ifdef CONFIG_TREE_PREEMPT_RCU 359 struct rcu_state *rsp;
347 show_one_rcugp(m, &rcu_preempt_state); 360
348#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 361 for_each_rcu_flavor(rsp)
349 show_one_rcugp(m, &rcu_sched_state); 362 show_one_rcugp(m, rsp);
350 show_one_rcugp(m, &rcu_bh_state);
351 return 0; 363 return 0;
352} 364}
353 365
@@ -382,28 +394,20 @@ static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp)
382 rdp->n_rp_need_nothing); 394 rdp->n_rp_need_nothing);
383} 395}
384 396
385static void print_rcu_pendings(struct seq_file *m, struct rcu_state *rsp) 397static int show_rcu_pending(struct seq_file *m, void *unused)
386{ 398{
387 int cpu; 399 int cpu;
388 struct rcu_data *rdp; 400 struct rcu_data *rdp;
389 401 struct rcu_state *rsp;
390 for_each_possible_cpu(cpu) { 402
391 rdp = per_cpu_ptr(rsp->rda, cpu); 403 for_each_rcu_flavor(rsp) {
392 if (rdp->beenonline) 404 seq_printf(m, "%s:\n", rsp->name);
393 print_one_rcu_pending(m, rdp); 405 for_each_possible_cpu(cpu) {
406 rdp = per_cpu_ptr(rsp->rda, cpu);
407 if (rdp->beenonline)
408 print_one_rcu_pending(m, rdp);
409 }
394 } 410 }
395}
396
397static int show_rcu_pending(struct seq_file *m, void *unused)
398{
399#ifdef CONFIG_TREE_PREEMPT_RCU
400 seq_puts(m, "rcu_preempt:\n");
401 print_rcu_pendings(m, &rcu_preempt_state);
402#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
403 seq_puts(m, "rcu_sched:\n");
404 print_rcu_pendings(m, &rcu_sched_state);
405 seq_puts(m, "rcu_bh:\n");
406 print_rcu_pendings(m, &rcu_bh_state);
407 return 0; 411 return 0;
408} 412}
409 413
@@ -453,6 +457,11 @@ static int __init rcutree_trace_init(void)
453 if (!rcudir) 457 if (!rcudir)
454 goto free_out; 458 goto free_out;
455 459
460 retval = debugfs_create_file("rcubarrier", 0444, rcudir,
461 NULL, &rcubarrier_fops);
462 if (!retval)
463 goto free_out;
464
456 retval = debugfs_create_file("rcudata", 0444, rcudir, 465 retval = debugfs_create_file("rcudata", 0444, rcudir,
457 NULL, &rcudata_fops); 466 NULL, &rcudata_fops);
458 if (!retval) 467 if (!retval)
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 869997833928..66ff07f6184c 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -105,7 +105,7 @@ static ktime_t tick_init_jiffy_update(void)
105/* 105/*
106 * NO HZ enabled ? 106 * NO HZ enabled ?
107 */ 107 */
108static int tick_nohz_enabled __read_mostly = 1; 108int tick_nohz_enabled __read_mostly = 1;
109 109
110/* 110/*
111 * Enable / Disable tickless mode 111 * Enable / Disable tickless mode