aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/RCU/checklist.txt6
-rw-r--r--Documentation/RCU/torture.txt6
-rw-r--r--Documentation/RCU/trace.txt100
-rw-r--r--Documentation/RCU/whatisRCU.txt22
-rw-r--r--Documentation/kernel-per-CPU-kthreads.txt47
-rw-r--r--Documentation/timers/NO_HZ.txt79
-rw-r--r--arch/powerpc/kvm/book3s_hv.c2
-rw-r--r--include/linux/hardirq.h2
-rw-r--r--include/linux/rcupdate.h5
-rw-r--r--include/linux/rcutiny.h41
-rw-r--r--include/linux/rcutree.h3
-rw-r--r--include/linux/srcu.h43
-rw-r--r--init/Kconfig46
-rw-r--r--kernel/rcupdate.c29
-rw-r--r--kernel/rcutiny.c21
-rw-r--r--kernel/rcutiny_plugin.h1009
-rw-r--r--kernel/rcutorture.c39
-rw-r--r--kernel/rcutree.c168
-rw-r--r--kernel/rcutree.h15
-rw-r--r--kernel/rcutree_plugin.h81
20 files changed, 316 insertions, 1448 deletions
diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt
index 79e789b8b8ea..7703ec73a9bb 100644
--- a/Documentation/RCU/checklist.txt
+++ b/Documentation/RCU/checklist.txt
@@ -354,12 +354,6 @@ over a rather long period of time, but improvements are always welcome!
354 using RCU rather than SRCU, because RCU is almost always faster 354 using RCU rather than SRCU, because RCU is almost always faster
355 and easier to use than is SRCU. 355 and easier to use than is SRCU.
356 356
357 If you need to enter your read-side critical section in a
358 hardirq or exception handler, and then exit that same read-side
359 critical section in the task that was interrupted, then you need
360 to srcu_read_lock_raw() and srcu_read_unlock_raw(), which avoid
361 the lockdep checking that would otherwise this practice illegal.
362
363 Also unlike other forms of RCU, explicit initialization 357 Also unlike other forms of RCU, explicit initialization
364 and cleanup is required via init_srcu_struct() and 358 and cleanup is required via init_srcu_struct() and
365 cleanup_srcu_struct(). These are passed a "struct srcu_struct" 359 cleanup_srcu_struct(). These are passed a "struct srcu_struct"
diff --git a/Documentation/RCU/torture.txt b/Documentation/RCU/torture.txt
index 7dce8a17eac2..d8a502387397 100644
--- a/Documentation/RCU/torture.txt
+++ b/Documentation/RCU/torture.txt
@@ -182,12 +182,6 @@ torture_type The type of RCU to test, with string values as follows:
182 "srcu_expedited": srcu_read_lock(), srcu_read_unlock() and 182 "srcu_expedited": srcu_read_lock(), srcu_read_unlock() and
183 synchronize_srcu_expedited(). 183 synchronize_srcu_expedited().
184 184
185 "srcu_raw": srcu_read_lock_raw(), srcu_read_unlock_raw(),
186 and call_srcu().
187
188 "srcu_raw_sync": srcu_read_lock_raw(), srcu_read_unlock_raw(),
189 and synchronize_srcu().
190
191 "sched": preempt_disable(), preempt_enable(), and 185 "sched": preempt_disable(), preempt_enable(), and
192 call_rcu_sched(). 186 call_rcu_sched().
193 187
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt
index c776968f4463..f3778f8952da 100644
--- a/Documentation/RCU/trace.txt
+++ b/Documentation/RCU/trace.txt
@@ -530,113 +530,21 @@ o "nos" counts the number of times we balked for other
530 reasons, e.g., the grace period ended first. 530 reasons, e.g., the grace period ended first.
531 531
532 532
533CONFIG_TINY_RCU and CONFIG_TINY_PREEMPT_RCU debugfs Files and Formats 533CONFIG_TINY_RCU debugfs Files and Formats
534 534
535These implementations of RCU provides a single debugfs file under the 535These implementations of RCU provides a single debugfs file under the
536top-level directory RCU, namely rcu/rcudata, which displays fields in 536top-level directory RCU, namely rcu/rcudata, which displays fields in
537rcu_bh_ctrlblk, rcu_sched_ctrlblk and, for CONFIG_TINY_PREEMPT_RCU, 537rcu_bh_ctrlblk and rcu_sched_ctrlblk.
538rcu_preempt_ctrlblk.
539 538
540The output of "cat rcu/rcudata" is as follows: 539The output of "cat rcu/rcudata" is as follows:
541 540
542rcu_preempt: qlen=24 gp=1097669 g197/p197/c197 tasks=...
543 ttb=. btg=no ntb=184 neb=0 nnb=183 j=01f7 bt=0274
544 normal balk: nt=1097669 gt=0 bt=371 b=0 ny=25073378 nos=0
545 exp balk: bt=0 nos=0
546rcu_sched: qlen: 0 541rcu_sched: qlen: 0
547rcu_bh: qlen: 0 542rcu_bh: qlen: 0
548 543
549This is split into rcu_preempt, rcu_sched, and rcu_bh sections, with the 544This is split into rcu_sched and rcu_bh sections. The field is as
550rcu_preempt section appearing only in CONFIG_TINY_PREEMPT_RCU builds. 545follows:
551The last three lines of the rcu_preempt section appear only in
552CONFIG_RCU_BOOST kernel builds. The fields are as follows:
553 546
554o "qlen" is the number of RCU callbacks currently waiting either 547o "qlen" is the number of RCU callbacks currently waiting either
555 for an RCU grace period or waiting to be invoked. This is the 548 for an RCU grace period or waiting to be invoked. This is the
556 only field present for rcu_sched and rcu_bh, due to the 549 only field present for rcu_sched and rcu_bh, due to the
557 short-circuiting of grace period in those two cases. 550 short-circuiting of grace period in those two cases.
558
559o "gp" is the number of grace periods that have completed.
560
561o "g197/p197/c197" displays the grace-period state, with the
562 "g" number being the number of grace periods that have started
563 (mod 256), the "p" number being the number of grace periods
564 that the CPU has responded to (also mod 256), and the "c"
565 number being the number of grace periods that have completed
566 (once again mode 256).
567
568 Why have both "gp" and "g"? Because the data flowing into
569 "gp" is only present in a CONFIG_RCU_TRACE kernel.
570
571o "tasks" is a set of bits. The first bit is "T" if there are
572 currently tasks that have recently blocked within an RCU
573 read-side critical section, the second bit is "N" if any of the
574 aforementioned tasks are blocking the current RCU grace period,
575 and the third bit is "E" if any of the aforementioned tasks are
576 blocking the current expedited grace period. Each bit is "."
577 if the corresponding condition does not hold.
578
579o "ttb" is a single bit. It is "B" if any of the blocked tasks
580 need to be priority boosted and "." otherwise.
581
582o "btg" indicates whether boosting has been carried out during
583 the current grace period, with "exp" indicating that boosting
584 is in progress for an expedited grace period, "no" indicating
585 that boosting has not yet started for a normal grace period,
586 "begun" indicating that boosting has bebug for a normal grace
587 period, and "done" indicating that boosting has completed for
588 a normal grace period.
589
590o "ntb" is the total number of tasks subjected to RCU priority boosting
591 periods since boot.
592
593o "neb" is the number of expedited grace periods that have had
594 to resort to RCU priority boosting since boot.
595
596o "nnb" is the number of normal grace periods that have had
597 to resort to RCU priority boosting since boot.
598
599o "j" is the low-order 16 bits of the jiffies counter in hexadecimal.
600
601o "bt" is the low-order 16 bits of the value that the jiffies counter
602 will have at the next time that boosting is scheduled to begin.
603
604o In the line beginning with "normal balk", the fields are as follows:
605
606 o "nt" is the number of times that the system balked from
607 boosting because there were no blocked tasks to boost.
608 Note that the system will balk from boosting even if the
609 grace period is overdue when the currently running task
610 is looping within an RCU read-side critical section.
611 There is no point in boosting in this case, because
612 boosting a running task won't make it run any faster.
613
614 o "gt" is the number of times that the system balked
615 from boosting because, although there were blocked tasks,
616 none of them were preventing the current grace period
617 from completing.
618
619 o "bt" is the number of times that the system balked
620 from boosting because boosting was already in progress.
621
622 o "b" is the number of times that the system balked from
623 boosting because boosting had already completed for
624 the grace period in question.
625
626 o "ny" is the number of times that the system balked from
627 boosting because it was not yet time to start boosting
628 the grace period in question.
629
630 o "nos" is the number of times that the system balked from
631 boosting for inexplicable ("not otherwise specified")
632 reasons. This can actually happen due to races involving
633 increments of the jiffies counter.
634
635o In the line beginning with "exp balk", the fields are as follows:
636
637 o "bt" is the number of times that the system balked from
638 boosting because there were no blocked tasks to boost.
639
640 o "nos" is the number of times that the system balked from
641 boosting for inexplicable ("not otherwise specified")
642 reasons.
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 10df0b82f459..0f0fb7c432c2 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -842,9 +842,7 @@ SRCU: Critical sections Grace period Barrier
842 842
843 srcu_read_lock synchronize_srcu srcu_barrier 843 srcu_read_lock synchronize_srcu srcu_barrier
844 srcu_read_unlock call_srcu 844 srcu_read_unlock call_srcu
845 srcu_read_lock_raw synchronize_srcu_expedited 845 srcu_dereference synchronize_srcu_expedited
846 srcu_read_unlock_raw
847 srcu_dereference
848 846
849SRCU: Initialization/cleanup 847SRCU: Initialization/cleanup
850 init_srcu_struct 848 init_srcu_struct
@@ -865,38 +863,32 @@ list can be helpful:
865 863
866a. Will readers need to block? If so, you need SRCU. 864a. Will readers need to block? If so, you need SRCU.
867 865
868b. Is it necessary to start a read-side critical section in a 866b. What about the -rt patchset? If readers would need to block
869 hardirq handler or exception handler, and then to complete
870 this read-side critical section in the task that was
871 interrupted? If so, you need SRCU's srcu_read_lock_raw() and
872 srcu_read_unlock_raw() primitives.
873
874c. What about the -rt patchset? If readers would need to block
875 in an non-rt kernel, you need SRCU. If readers would block 867 in an non-rt kernel, you need SRCU. If readers would block
876 in a -rt kernel, but not in a non-rt kernel, SRCU is not 868 in a -rt kernel, but not in a non-rt kernel, SRCU is not
877 necessary. 869 necessary.
878 870
879d. Do you need to treat NMI handlers, hardirq handlers, 871c. Do you need to treat NMI handlers, hardirq handlers,
880 and code segments with preemption disabled (whether 872 and code segments with preemption disabled (whether
881 via preempt_disable(), local_irq_save(), local_bh_disable(), 873 via preempt_disable(), local_irq_save(), local_bh_disable(),
882 or some other mechanism) as if they were explicit RCU readers? 874 or some other mechanism) as if they were explicit RCU readers?
883 If so, RCU-sched is the only choice that will work for you. 875 If so, RCU-sched is the only choice that will work for you.
884 876
885e. Do you need RCU grace periods to complete even in the face 877d. Do you need RCU grace periods to complete even in the face
886 of softirq monopolization of one or more of the CPUs? For 878 of softirq monopolization of one or more of the CPUs? For
887 example, is your code subject to network-based denial-of-service 879 example, is your code subject to network-based denial-of-service
888 attacks? If so, you need RCU-bh. 880 attacks? If so, you need RCU-bh.
889 881
890f. Is your workload too update-intensive for normal use of 882e. Is your workload too update-intensive for normal use of
891 RCU, but inappropriate for other synchronization mechanisms? 883 RCU, but inappropriate for other synchronization mechanisms?
892 If so, consider SLAB_DESTROY_BY_RCU. But please be careful! 884 If so, consider SLAB_DESTROY_BY_RCU. But please be careful!
893 885
894g. Do you need read-side critical sections that are respected 886f. Do you need read-side critical sections that are respected
895 even though they are in the middle of the idle loop, during 887 even though they are in the middle of the idle loop, during
896 user-mode execution, or on an offlined CPU? If so, SRCU is the 888 user-mode execution, or on an offlined CPU? If so, SRCU is the
897 only choice that will work for you. 889 only choice that will work for you.
898 890
899h. Otherwise, use RCU. 891g. Otherwise, use RCU.
900 892
901Of course, this all assumes that you have determined that RCU is in fact 893Of course, this all assumes that you have determined that RCU is in fact
902the right tool for your job. 894the right tool for your job.
diff --git a/Documentation/kernel-per-CPU-kthreads.txt b/Documentation/kernel-per-CPU-kthreads.txt
index cbf7ae412da4..5f39ef55c6f6 100644
--- a/Documentation/kernel-per-CPU-kthreads.txt
+++ b/Documentation/kernel-per-CPU-kthreads.txt
@@ -157,6 +157,53 @@ RCU_SOFTIRQ: Do at least one of the following:
157 calls and by forcing both kernel threads and interrupts 157 calls and by forcing both kernel threads and interrupts
158 to execute elsewhere. 158 to execute elsewhere.
159 159
160Name: kworker/%u:%d%s (cpu, id, priority)
161Purpose: Execute workqueue requests
162To reduce its OS jitter, do any of the following:
1631. Run your workload at a real-time priority, which will allow
164 preempting the kworker daemons.
1652. Do any of the following needed to avoid jitter that your
166 application cannot tolerate:
167 a. Build your kernel with CONFIG_SLUB=y rather than
168 CONFIG_SLAB=y, thus avoiding the slab allocator's periodic
169 use of each CPU's workqueues to run its cache_reap()
170 function.
171 b. Avoid using oprofile, thus avoiding OS jitter from
172 wq_sync_buffer().
173 c. Limit your CPU frequency so that a CPU-frequency
174 governor is not required, possibly enlisting the aid of
175 special heatsinks or other cooling technologies. If done
176 correctly, and if you CPU architecture permits, you should
177 be able to build your kernel with CONFIG_CPU_FREQ=n to
178 avoid the CPU-frequency governor periodically running
179 on each CPU, including cs_dbs_timer() and od_dbs_timer().
180 WARNING: Please check your CPU specifications to
181 make sure that this is safe on your particular system.
182 d. It is not possible to entirely get rid of OS jitter
183 from vmstat_update() on CONFIG_SMP=y systems, but you
184 can decrease its frequency by writing a large value to
185 /proc/sys/vm/stat_interval. The default value is HZ,
186 for an interval of one second. Of course, larger values
187 will make your virtual-memory statistics update more
188 slowly. Of course, you can also run your workload at
189 a real-time priority, thus preempting vmstat_update().
190 e. If running on high-end powerpc servers, build with
191 CONFIG_PPC_RTAS_DAEMON=n. This prevents the RTAS
192 daemon from running on each CPU every second or so.
193 (This will require editing Kconfig files and will defeat
194 this platform's RAS functionality.) This avoids jitter
195 due to the rtas_event_scan() function.
196 WARNING: Please check your CPU specifications to
197 make sure that this is safe on your particular system.
198 f. If running on Cell Processor, build your kernel with
199 CBE_CPUFREQ_SPU_GOVERNOR=n to avoid OS jitter from
200 spu_gov_work().
201 WARNING: Please check your CPU specifications to
202 make sure that this is safe on your particular system.
203 g. If running on PowerMAC, build your kernel with
204 CONFIG_PMAC_RACKMETER=n to disable the CPU-meter,
205 avoiding OS jitter from rackmeter_do_timer().
206
160Name: rcuc/%u 207Name: rcuc/%u
161Purpose: Execute RCU callbacks in CONFIG_RCU_BOOST=y kernels. 208Purpose: Execute RCU callbacks in CONFIG_RCU_BOOST=y kernels.
162To reduce its OS jitter, do at least one of the following: 209To reduce its OS jitter, do at least one of the following:
diff --git a/Documentation/timers/NO_HZ.txt b/Documentation/timers/NO_HZ.txt
index 5b5322024067..88697584242b 100644
--- a/Documentation/timers/NO_HZ.txt
+++ b/Documentation/timers/NO_HZ.txt
@@ -7,21 +7,59 @@ efficiency and reducing OS jitter. Reducing OS jitter is important for
7some types of computationally intensive high-performance computing (HPC) 7some types of computationally intensive high-performance computing (HPC)
8applications and for real-time applications. 8applications and for real-time applications.
9 9
10There are two main contexts in which the number of scheduling-clock 10There are three main ways of managing scheduling-clock interrupts
11interrupts can be reduced compared to the old-school approach of sending 11(also known as "scheduling-clock ticks" or simply "ticks"):
12a scheduling-clock interrupt to all CPUs every jiffy whether they need
13it or not (CONFIG_HZ_PERIODIC=y or CONFIG_NO_HZ=n for older kernels):
14 12
151. Idle CPUs (CONFIG_NO_HZ_IDLE=y or CONFIG_NO_HZ=y for older kernels). 131. Never omit scheduling-clock ticks (CONFIG_HZ_PERIODIC=y or
14 CONFIG_NO_HZ=n for older kernels). You normally will -not-
15 want to choose this option.
16 16
172. CPUs having only one runnable task (CONFIG_NO_HZ_FULL=y). 172. Omit scheduling-clock ticks on idle CPUs (CONFIG_NO_HZ_IDLE=y or
18 CONFIG_NO_HZ=y for older kernels). This is the most common
19 approach, and should be the default.
18 20
19These two cases are described in the following two sections, followed 213. Omit scheduling-clock ticks on CPUs that are either idle or that
22 have only one runnable task (CONFIG_NO_HZ_FULL=y). Unless you
23 are running realtime applications or certain types of HPC
24 workloads, you will normally -not- want this option.
25
26These three cases are described in the following three sections, followed
20by a third section on RCU-specific considerations and a fourth and final 27by a third section on RCU-specific considerations and a fourth and final
21section listing known issues. 28section listing known issues.
22 29
23 30
24IDLE CPUs 31NEVER OMIT SCHEDULING-CLOCK TICKS
32
33Very old versions of Linux from the 1990s and the very early 2000s
34are incapable of omitting scheduling-clock ticks. It turns out that
35there are some situations where this old-school approach is still the
36right approach, for example, in heavy workloads with lots of tasks
37that use short bursts of CPU, where there are very frequent idle
38periods, but where these idle periods are also quite short (tens or
39hundreds of microseconds). For these types of workloads, scheduling
40clock interrupts will normally be delivered any way because there
41will frequently be multiple runnable tasks per CPU. In these cases,
42attempting to turn off the scheduling clock interrupt will have no effect
43other than increasing the overhead of switching to and from idle and
44transitioning between user and kernel execution.
45
46This mode of operation can be selected using CONFIG_HZ_PERIODIC=y (or
47CONFIG_NO_HZ=n for older kernels).
48
49However, if you are instead running a light workload with long idle
50periods, failing to omit scheduling-clock interrupts will result in
51excessive power consumption. This is especially bad on battery-powered
52devices, where it results in extremely short battery lifetimes. If you
53are running light workloads, you should therefore read the following
54section.
55
56In addition, if you are running either a real-time workload or an HPC
57workload with short iterations, the scheduling-clock interrupts can
58degrade your applications performance. If this describes your workload,
59you should read the following two sections.
60
61
62OMIT SCHEDULING-CLOCK TICKS FOR IDLE CPUs
25 63
26If a CPU is idle, there is little point in sending it a scheduling-clock 64If a CPU is idle, there is little point in sending it a scheduling-clock
27interrupt. After all, the primary purpose of a scheduling-clock interrupt 65interrupt. After all, the primary purpose of a scheduling-clock interrupt
@@ -59,10 +97,12 @@ By default, CONFIG_NO_HZ_IDLE=y kernels boot with "nohz=on", enabling
59dyntick-idle mode. 97dyntick-idle mode.
60 98
61 99
62CPUs WITH ONLY ONE RUNNABLE TASK 100OMIT SCHEDULING-CLOCK TICKS FOR CPUs WITH ONLY ONE RUNNABLE TASK
63 101
64If a CPU has only one runnable task, there is little point in sending it 102If a CPU has only one runnable task, there is little point in sending it
65a scheduling-clock interrupt because there is no other task to switch to. 103a scheduling-clock interrupt because there is no other task to switch to.
104Note that omitting scheduling-clock ticks for CPUs with only one runnable
105task implies also omitting them for idle CPUs.
66 106
67The CONFIG_NO_HZ_FULL=y Kconfig option causes the kernel to avoid 107The CONFIG_NO_HZ_FULL=y Kconfig option causes the kernel to avoid
68sending scheduling-clock interrupts to CPUs with a single runnable task, 108sending scheduling-clock interrupts to CPUs with a single runnable task,
@@ -238,6 +278,11 @@ o Adaptive-ticks does not do anything unless there is only one
238 single runnable SCHED_FIFO task and multiple runnable SCHED_OTHER 278 single runnable SCHED_FIFO task and multiple runnable SCHED_OTHER
239 tasks, even though these interrupts are unnecessary. 279 tasks, even though these interrupts are unnecessary.
240 280
281 And even when there are multiple runnable tasks on a given CPU,
282 there is little point in interrupting that CPU until the current
283 running task's timeslice expires, which is almost always way
284 longer than the time of the next scheduling-clock interrupt.
285
241 Better handling of these sorts of situations is future work. 286 Better handling of these sorts of situations is future work.
242 287
243o A reboot is required to reconfigure both adaptive idle and RCU 288o A reboot is required to reconfigure both adaptive idle and RCU
@@ -268,6 +313,16 @@ o Unless all CPUs are idle, at least one CPU must keep the
268 scheduling-clock interrupt going in order to support accurate 313 scheduling-clock interrupt going in order to support accurate
269 timekeeping. 314 timekeeping.
270 315
271o If there are adaptive-ticks CPUs, there will be at least one 316o If there might potentially be some adaptive-ticks CPUs, there
272 CPU keeping the scheduling-clock interrupt going, even if all 317 will be at least one CPU keeping the scheduling-clock interrupt
273 CPUs are otherwise idle. 318 going, even if all CPUs are otherwise idle.
319
320 Better handling of this situation is ongoing work.
321
322o Some process-handling operations still require the occasional
323 scheduling-clock tick. These operations include calculating CPU
324 load, maintaining sched average, computing CFS entity vruntime,
325 computing avenrun, and carrying out load balancing. They are
326 currently accommodated by scheduling-clock tick every second
327 or so. On-going work will eliminate the need even for these
328 infrequent scheduling-clock ticks.
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 550f5928b394..2efa9dde741a 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1864,7 +1864,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
1864 1864
1865 up_out: 1865 up_out:
1866 up_read(&current->mm->mmap_sem); 1866 up_read(&current->mm->mmap_sem);
1867 goto out; 1867 goto out_srcu;
1868} 1868}
1869 1869
1870int kvmppc_core_init_vm(struct kvm *kvm) 1870int kvmppc_core_init_vm(struct kvm *kvm)
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index c1d6555d2567..05bcc0903766 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -128,7 +128,7 @@ extern void synchronize_irq(unsigned int irq);
128# define synchronize_irq(irq) barrier() 128# define synchronize_irq(irq) barrier()
129#endif 129#endif
130 130
131#if defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU) 131#if defined(CONFIG_TINY_RCU)
132 132
133static inline void rcu_nmi_enter(void) 133static inline void rcu_nmi_enter(void)
134{ 134{
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index ddcc7826d907..4b14bdc911d7 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -216,6 +216,7 @@ static inline int rcu_preempt_depth(void)
216#endif /* #else #ifdef CONFIG_PREEMPT_RCU */ 216#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
217 217
218/* Internal to kernel */ 218/* Internal to kernel */
219extern void rcu_init(void);
219extern void rcu_sched_qs(int cpu); 220extern void rcu_sched_qs(int cpu);
220extern void rcu_bh_qs(int cpu); 221extern void rcu_bh_qs(int cpu);
221extern void rcu_check_callbacks(int cpu, int user); 222extern void rcu_check_callbacks(int cpu, int user);
@@ -239,8 +240,6 @@ static inline void rcu_user_hooks_switch(struct task_struct *prev,
239 struct task_struct *next) { } 240 struct task_struct *next) { }
240#endif /* CONFIG_RCU_USER_QS */ 241#endif /* CONFIG_RCU_USER_QS */
241 242
242extern void exit_rcu(void);
243
244/** 243/**
245 * RCU_NONIDLE - Indicate idle-loop code that needs RCU readers 244 * RCU_NONIDLE - Indicate idle-loop code that needs RCU readers
246 * @a: Code that RCU needs to pay attention to. 245 * @a: Code that RCU needs to pay attention to.
@@ -277,7 +276,7 @@ void wait_rcu_gp(call_rcu_func_t crf);
277 276
278#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) 277#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
279#include <linux/rcutree.h> 278#include <linux/rcutree.h>
280#elif defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU) 279#elif defined(CONFIG_TINY_RCU)
281#include <linux/rcutiny.h> 280#include <linux/rcutiny.h>
282#else 281#else
283#error "Unknown RCU implementation specified to kernel configuration" 282#error "Unknown RCU implementation specified to kernel configuration"
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 4e56a9c69a35..e31005ee339e 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -27,10 +27,6 @@
27 27
28#include <linux/cache.h> 28#include <linux/cache.h>
29 29
30static inline void rcu_init(void)
31{
32}
33
34static inline void rcu_barrier_bh(void) 30static inline void rcu_barrier_bh(void)
35{ 31{
36 wait_rcu_gp(call_rcu_bh); 32 wait_rcu_gp(call_rcu_bh);
@@ -41,8 +37,6 @@ static inline void rcu_barrier_sched(void)
41 wait_rcu_gp(call_rcu_sched); 37 wait_rcu_gp(call_rcu_sched);
42} 38}
43 39
44#ifdef CONFIG_TINY_RCU
45
46static inline void synchronize_rcu_expedited(void) 40static inline void synchronize_rcu_expedited(void)
47{ 41{
48 synchronize_sched(); /* Only one CPU, so pretty fast anyway!!! */ 42 synchronize_sched(); /* Only one CPU, so pretty fast anyway!!! */
@@ -53,17 +47,6 @@ static inline void rcu_barrier(void)
53 rcu_barrier_sched(); /* Only one CPU, so only one list of callbacks! */ 47 rcu_barrier_sched(); /* Only one CPU, so only one list of callbacks! */
54} 48}
55 49
56#else /* #ifdef CONFIG_TINY_RCU */
57
58void synchronize_rcu_expedited(void);
59
60static inline void rcu_barrier(void)
61{
62 wait_rcu_gp(call_rcu);
63}
64
65#endif /* #else #ifdef CONFIG_TINY_RCU */
66
67static inline void synchronize_rcu_bh(void) 50static inline void synchronize_rcu_bh(void)
68{ 51{
69 synchronize_sched(); 52 synchronize_sched();
@@ -85,35 +68,15 @@ static inline void kfree_call_rcu(struct rcu_head *head,
85 call_rcu(head, func); 68 call_rcu(head, func);
86} 69}
87 70
88#ifdef CONFIG_TINY_RCU
89
90static inline void rcu_preempt_note_context_switch(void)
91{
92}
93
94static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) 71static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
95{ 72{
96 *delta_jiffies = ULONG_MAX; 73 *delta_jiffies = ULONG_MAX;
97 return 0; 74 return 0;
98} 75}
99 76
100#else /* #ifdef CONFIG_TINY_RCU */
101
102void rcu_preempt_note_context_switch(void);
103int rcu_preempt_needs_cpu(void);
104
105static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
106{
107 *delta_jiffies = ULONG_MAX;
108 return rcu_preempt_needs_cpu();
109}
110
111#endif /* #else #ifdef CONFIG_TINY_RCU */
112
113static inline void rcu_note_context_switch(int cpu) 77static inline void rcu_note_context_switch(int cpu)
114{ 78{
115 rcu_sched_qs(cpu); 79 rcu_sched_qs(cpu);
116 rcu_preempt_note_context_switch();
117} 80}
118 81
119/* 82/*
@@ -156,6 +119,10 @@ static inline void rcu_cpu_stall_reset(void)
156{ 119{
157} 120}
158 121
122static inline void exit_rcu(void)
123{
124}
125
159#ifdef CONFIG_DEBUG_LOCK_ALLOC 126#ifdef CONFIG_DEBUG_LOCK_ALLOC
160extern int rcu_scheduler_active __read_mostly; 127extern int rcu_scheduler_active __read_mostly;
161extern void rcu_scheduler_starting(void); 128extern void rcu_scheduler_starting(void);
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 952b79339304..226169d1bd2b 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -30,7 +30,6 @@
30#ifndef __LINUX_RCUTREE_H 30#ifndef __LINUX_RCUTREE_H
31#define __LINUX_RCUTREE_H 31#define __LINUX_RCUTREE_H
32 32
33extern void rcu_init(void);
34extern void rcu_note_context_switch(int cpu); 33extern void rcu_note_context_switch(int cpu);
35extern int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies); 34extern int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies);
36extern void rcu_cpu_stall_reset(void); 35extern void rcu_cpu_stall_reset(void);
@@ -86,6 +85,8 @@ extern void rcu_force_quiescent_state(void);
86extern void rcu_bh_force_quiescent_state(void); 85extern void rcu_bh_force_quiescent_state(void);
87extern void rcu_sched_force_quiescent_state(void); 86extern void rcu_sched_force_quiescent_state(void);
88 87
88extern void exit_rcu(void);
89
89extern void rcu_scheduler_starting(void); 90extern void rcu_scheduler_starting(void);
90extern int rcu_scheduler_active __read_mostly; 91extern int rcu_scheduler_active __read_mostly;
91 92
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 04f4121a23ae..c114614ed172 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -237,47 +237,4 @@ static inline void srcu_read_unlock(struct srcu_struct *sp, int idx)
237 __srcu_read_unlock(sp, idx); 237 __srcu_read_unlock(sp, idx);
238} 238}
239 239
240/**
241 * srcu_read_lock_raw - register a new reader for an SRCU-protected structure.
242 * @sp: srcu_struct in which to register the new reader.
243 *
244 * Enter an SRCU read-side critical section. Similar to srcu_read_lock(),
245 * but avoids the RCU-lockdep checking. This means that it is legal to
246 * use srcu_read_lock_raw() in one context, for example, in an exception
247 * handler, and then have the matching srcu_read_unlock_raw() in another
248 * context, for example in the task that took the exception.
249 *
250 * However, the entire SRCU read-side critical section must reside within a
251 * single task. For example, beware of using srcu_read_lock_raw() in
252 * a device interrupt handler and srcu_read_unlock() in the interrupted
253 * task: This will not work if interrupts are threaded.
254 */
255static inline int srcu_read_lock_raw(struct srcu_struct *sp)
256{
257 unsigned long flags;
258 int ret;
259
260 local_irq_save(flags);
261 ret = __srcu_read_lock(sp);
262 local_irq_restore(flags);
263 return ret;
264}
265
266/**
267 * srcu_read_unlock_raw - unregister reader from an SRCU-protected structure.
268 * @sp: srcu_struct in which to unregister the old reader.
269 * @idx: return value from corresponding srcu_read_lock_raw().
270 *
271 * Exit an SRCU read-side critical section without lockdep-RCU checking.
272 * See srcu_read_lock_raw() for more details.
273 */
274static inline void srcu_read_unlock_raw(struct srcu_struct *sp, int idx)
275{
276 unsigned long flags;
277
278 local_irq_save(flags);
279 __srcu_read_unlock(sp, idx);
280 local_irq_restore(flags);
281}
282
283#endif 240#endif
diff --git a/init/Kconfig b/init/Kconfig
index 1e825c299ea5..118895cc1f67 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -473,18 +473,10 @@ config TINY_RCU
473 is not required. This option greatly reduces the 473 is not required. This option greatly reduces the
474 memory footprint of RCU. 474 memory footprint of RCU.
475 475
476config TINY_PREEMPT_RCU
477 bool "Preemptible UP-only small-memory-footprint RCU"
478 depends on PREEMPT && !SMP
479 help
480 This option selects the RCU implementation that is designed
481 for real-time UP systems. This option greatly reduces the
482 memory footprint of RCU.
483
484endchoice 476endchoice
485 477
486config PREEMPT_RCU 478config PREEMPT_RCU
487 def_bool ( TREE_PREEMPT_RCU || TINY_PREEMPT_RCU ) 479 def_bool TREE_PREEMPT_RCU
488 help 480 help
489 This option enables preemptible-RCU code that is common between 481 This option enables preemptible-RCU code that is common between
490 the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations. 482 the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations.
@@ -670,7 +662,7 @@ config RCU_BOOST_DELAY
670 Accept the default if unsure. 662 Accept the default if unsure.
671 663
672config RCU_NOCB_CPU 664config RCU_NOCB_CPU
673 bool "Offload RCU callback processing from boot-selected CPUs (EXPERIMENTAL" 665 bool "Offload RCU callback processing from boot-selected CPUs"
674 depends on TREE_RCU || TREE_PREEMPT_RCU 666 depends on TREE_RCU || TREE_PREEMPT_RCU
675 default n 667 default n
676 help 668 help
@@ -696,9 +688,10 @@ choice
696 prompt "Build-forced no-CBs CPUs" 688 prompt "Build-forced no-CBs CPUs"
697 default RCU_NOCB_CPU_NONE 689 default RCU_NOCB_CPU_NONE
698 help 690 help
699 This option allows no-CBs CPUs to be specified at build time. 691 This option allows no-CBs CPUs (whose RCU callbacks are invoked
700 Additional no-CBs CPUs may be specified by the rcu_nocbs= 692 from kthreads rather than from softirq context) to be specified
701 boot parameter. 693 at build time. Additional no-CBs CPUs may be specified by
694 the rcu_nocbs= boot parameter.
702 695
703config RCU_NOCB_CPU_NONE 696config RCU_NOCB_CPU_NONE
704 bool "No build_forced no-CBs CPUs" 697 bool "No build_forced no-CBs CPUs"
@@ -706,25 +699,40 @@ config RCU_NOCB_CPU_NONE
706 help 699 help
707 This option does not force any of the CPUs to be no-CBs CPUs. 700 This option does not force any of the CPUs to be no-CBs CPUs.
708 Only CPUs designated by the rcu_nocbs= boot parameter will be 701 Only CPUs designated by the rcu_nocbs= boot parameter will be
709 no-CBs CPUs. 702 no-CBs CPUs, whose RCU callbacks will be invoked by per-CPU
703 kthreads whose names begin with "rcuo". All other CPUs will
704 invoke their own RCU callbacks in softirq context.
705
706 Select this option if you want to choose no-CBs CPUs at
707 boot time, for example, to allow testing of different no-CBs
708 configurations without having to rebuild the kernel each time.
710 709
711config RCU_NOCB_CPU_ZERO 710config RCU_NOCB_CPU_ZERO
712 bool "CPU 0 is a build_forced no-CBs CPU" 711 bool "CPU 0 is a build_forced no-CBs CPU"
713 depends on RCU_NOCB_CPU && !NO_HZ_FULL 712 depends on RCU_NOCB_CPU && !NO_HZ_FULL
714 help 713 help
715 This option forces CPU 0 to be a no-CBs CPU. Additional CPUs 714 This option forces CPU 0 to be a no-CBs CPU, so that its RCU
716 may be designated as no-CBs CPUs using the rcu_nocbs= boot 715 callbacks are invoked by a per-CPU kthread whose name begins
717 parameter will be no-CBs CPUs. 716 with "rcuo". Additional CPUs may be designated as no-CBs
717 CPUs using the rcu_nocbs= boot parameter will be no-CBs CPUs.
718 All other CPUs will invoke their own RCU callbacks in softirq
719 context.
718 720
719 Select this if CPU 0 needs to be a no-CBs CPU for real-time 721 Select this if CPU 0 needs to be a no-CBs CPU for real-time
720 or energy-efficiency reasons. 722 or energy-efficiency reasons, but the real reason it exists
723 is to ensure that randconfig testing covers mixed systems.
721 724
722config RCU_NOCB_CPU_ALL 725config RCU_NOCB_CPU_ALL
723 bool "All CPUs are build_forced no-CBs CPUs" 726 bool "All CPUs are build_forced no-CBs CPUs"
724 depends on RCU_NOCB_CPU 727 depends on RCU_NOCB_CPU
725 help 728 help
726 This option forces all CPUs to be no-CBs CPUs. The rcu_nocbs= 729 This option forces all CPUs to be no-CBs CPUs. The rcu_nocbs=
727 boot parameter will be ignored. 730 boot parameter will be ignored. All CPUs' RCU callbacks will
731 be executed in the context of per-CPU rcuo kthreads created for
732 this purpose. Assuming that the kthreads whose names start with
733 "rcuo" are bound to "housekeeping" CPUs, this reduces OS jitter
734 on the remaining CPUs, but might decrease memory locality during
735 RCU-callback invocation, thus potentially degrading throughput.
728 736
729 Select this if all CPUs need to be no-CBs CPUs for real-time 737 Select this if all CPUs need to be no-CBs CPUs for real-time
730 or energy-efficiency reasons. 738 or energy-efficiency reasons.
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 48ab70384a4c..cce6ba8bbace 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -104,31 +104,7 @@ void __rcu_read_unlock(void)
104} 104}
105EXPORT_SYMBOL_GPL(__rcu_read_unlock); 105EXPORT_SYMBOL_GPL(__rcu_read_unlock);
106 106
107/* 107#endif /* #ifdef CONFIG_PREEMPT_RCU */
108 * Check for a task exiting while in a preemptible-RCU read-side
109 * critical section, clean up if so. No need to issue warnings,
110 * as debug_check_no_locks_held() already does this if lockdep
111 * is enabled.
112 */
113void exit_rcu(void)
114{
115 struct task_struct *t = current;
116
117 if (likely(list_empty(&current->rcu_node_entry)))
118 return;
119 t->rcu_read_lock_nesting = 1;
120 barrier();
121 t->rcu_read_unlock_special = RCU_READ_UNLOCK_BLOCKED;
122 __rcu_read_unlock();
123}
124
125#else /* #ifdef CONFIG_PREEMPT_RCU */
126
127void exit_rcu(void)
128{
129}
130
131#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
132 108
133#ifdef CONFIG_DEBUG_LOCK_ALLOC 109#ifdef CONFIG_DEBUG_LOCK_ALLOC
134static struct lock_class_key rcu_lock_key; 110static struct lock_class_key rcu_lock_key;
@@ -145,9 +121,6 @@ static struct lock_class_key rcu_sched_lock_key;
145struct lockdep_map rcu_sched_lock_map = 121struct lockdep_map rcu_sched_lock_map =
146 STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_sched", &rcu_sched_lock_key); 122 STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_sched", &rcu_sched_lock_key);
147EXPORT_SYMBOL_GPL(rcu_sched_lock_map); 123EXPORT_SYMBOL_GPL(rcu_sched_lock_map);
148#endif
149
150#ifdef CONFIG_DEBUG_LOCK_ALLOC
151 124
152int debug_lockdep_rcu_enabled(void) 125int debug_lockdep_rcu_enabled(void)
153{ 126{
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index a0714a51b6d7..aa344111de3e 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -44,7 +44,6 @@
44 44
45/* Forward declarations for rcutiny_plugin.h. */ 45/* Forward declarations for rcutiny_plugin.h. */
46struct rcu_ctrlblk; 46struct rcu_ctrlblk;
47static void invoke_rcu_callbacks(void);
48static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp); 47static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp);
49static void rcu_process_callbacks(struct softirq_action *unused); 48static void rcu_process_callbacks(struct softirq_action *unused);
50static void __call_rcu(struct rcu_head *head, 49static void __call_rcu(struct rcu_head *head,
@@ -205,7 +204,7 @@ static int rcu_is_cpu_rrupt_from_idle(void)
205 */ 204 */
206static int rcu_qsctr_help(struct rcu_ctrlblk *rcp) 205static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
207{ 206{
208 reset_cpu_stall_ticks(rcp); 207 RCU_TRACE(reset_cpu_stall_ticks(rcp));
209 if (rcp->rcucblist != NULL && 208 if (rcp->rcucblist != NULL &&
210 rcp->donetail != rcp->curtail) { 209 rcp->donetail != rcp->curtail) {
211 rcp->donetail = rcp->curtail; 210 rcp->donetail = rcp->curtail;
@@ -227,7 +226,7 @@ void rcu_sched_qs(int cpu)
227 local_irq_save(flags); 226 local_irq_save(flags);
228 if (rcu_qsctr_help(&rcu_sched_ctrlblk) + 227 if (rcu_qsctr_help(&rcu_sched_ctrlblk) +
229 rcu_qsctr_help(&rcu_bh_ctrlblk)) 228 rcu_qsctr_help(&rcu_bh_ctrlblk))
230 invoke_rcu_callbacks(); 229 raise_softirq(RCU_SOFTIRQ);
231 local_irq_restore(flags); 230 local_irq_restore(flags);
232} 231}
233 232
@@ -240,7 +239,7 @@ void rcu_bh_qs(int cpu)
240 239
241 local_irq_save(flags); 240 local_irq_save(flags);
242 if (rcu_qsctr_help(&rcu_bh_ctrlblk)) 241 if (rcu_qsctr_help(&rcu_bh_ctrlblk))
243 invoke_rcu_callbacks(); 242 raise_softirq(RCU_SOFTIRQ);
244 local_irq_restore(flags); 243 local_irq_restore(flags);
245} 244}
246 245
@@ -252,12 +251,11 @@ void rcu_bh_qs(int cpu)
252 */ 251 */
253void rcu_check_callbacks(int cpu, int user) 252void rcu_check_callbacks(int cpu, int user)
254{ 253{
255 check_cpu_stalls(); 254 RCU_TRACE(check_cpu_stalls());
256 if (user || rcu_is_cpu_rrupt_from_idle()) 255 if (user || rcu_is_cpu_rrupt_from_idle())
257 rcu_sched_qs(cpu); 256 rcu_sched_qs(cpu);
258 else if (!in_softirq()) 257 else if (!in_softirq())
259 rcu_bh_qs(cpu); 258 rcu_bh_qs(cpu);
260 rcu_preempt_check_callbacks();
261} 259}
262 260
263/* 261/*
@@ -278,7 +276,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
278 ACCESS_ONCE(rcp->rcucblist), 276 ACCESS_ONCE(rcp->rcucblist),
279 need_resched(), 277 need_resched(),
280 is_idle_task(current), 278 is_idle_task(current),
281 rcu_is_callbacks_kthread())); 279 false));
282 return; 280 return;
283 } 281 }
284 282
@@ -290,7 +288,6 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
290 *rcp->donetail = NULL; 288 *rcp->donetail = NULL;
291 if (rcp->curtail == rcp->donetail) 289 if (rcp->curtail == rcp->donetail)
292 rcp->curtail = &rcp->rcucblist; 290 rcp->curtail = &rcp->rcucblist;
293 rcu_preempt_remove_callbacks(rcp);
294 rcp->donetail = &rcp->rcucblist; 291 rcp->donetail = &rcp->rcucblist;
295 local_irq_restore(flags); 292 local_irq_restore(flags);
296 293
@@ -309,14 +306,13 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
309 RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count)); 306 RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count));
310 RCU_TRACE(trace_rcu_batch_end(rcp->name, cb_count, 0, need_resched(), 307 RCU_TRACE(trace_rcu_batch_end(rcp->name, cb_count, 0, need_resched(),
311 is_idle_task(current), 308 is_idle_task(current),
312 rcu_is_callbacks_kthread())); 309 false));
313} 310}
314 311
315static void rcu_process_callbacks(struct softirq_action *unused) 312static void rcu_process_callbacks(struct softirq_action *unused)
316{ 313{
317 __rcu_process_callbacks(&rcu_sched_ctrlblk); 314 __rcu_process_callbacks(&rcu_sched_ctrlblk);
318 __rcu_process_callbacks(&rcu_bh_ctrlblk); 315 __rcu_process_callbacks(&rcu_bh_ctrlblk);
319 rcu_preempt_process_callbacks();
320} 316}
321 317
322/* 318/*
@@ -382,3 +378,8 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
382 __call_rcu(head, func, &rcu_bh_ctrlblk); 378 __call_rcu(head, func, &rcu_bh_ctrlblk);
383} 379}
384EXPORT_SYMBOL_GPL(call_rcu_bh); 380EXPORT_SYMBOL_GPL(call_rcu_bh);
381
382void rcu_init(void)
383{
384 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
385}
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index 8a233002faeb..0cd385acccfa 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -53,958 +53,10 @@ static struct rcu_ctrlblk rcu_bh_ctrlblk = {
53}; 53};
54 54
55#ifdef CONFIG_DEBUG_LOCK_ALLOC 55#ifdef CONFIG_DEBUG_LOCK_ALLOC
56#include <linux/kernel_stat.h>
57
56int rcu_scheduler_active __read_mostly; 58int rcu_scheduler_active __read_mostly;
57EXPORT_SYMBOL_GPL(rcu_scheduler_active); 59EXPORT_SYMBOL_GPL(rcu_scheduler_active);
58#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
59
60#ifdef CONFIG_RCU_TRACE
61
62static void check_cpu_stall(struct rcu_ctrlblk *rcp)
63{
64 unsigned long j;
65 unsigned long js;
66
67 if (rcu_cpu_stall_suppress)
68 return;
69 rcp->ticks_this_gp++;
70 j = jiffies;
71 js = rcp->jiffies_stall;
72 if (*rcp->curtail && ULONG_CMP_GE(j, js)) {
73 pr_err("INFO: %s stall on CPU (%lu ticks this GP) idle=%llx (t=%lu jiffies q=%ld)\n",
74 rcp->name, rcp->ticks_this_gp, rcu_dynticks_nesting,
75 jiffies - rcp->gp_start, rcp->qlen);
76 dump_stack();
77 }
78 if (*rcp->curtail && ULONG_CMP_GE(j, js))
79 rcp->jiffies_stall = jiffies +
80 3 * rcu_jiffies_till_stall_check() + 3;
81 else if (ULONG_CMP_GE(j, js))
82 rcp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check();
83}
84
85static void check_cpu_stall_preempt(void);
86
87#endif /* #ifdef CONFIG_RCU_TRACE */
88
89static void reset_cpu_stall_ticks(struct rcu_ctrlblk *rcp)
90{
91#ifdef CONFIG_RCU_TRACE
92 rcp->ticks_this_gp = 0;
93 rcp->gp_start = jiffies;
94 rcp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check();
95#endif /* #ifdef CONFIG_RCU_TRACE */
96}
97
98static void check_cpu_stalls(void)
99{
100 RCU_TRACE(check_cpu_stall(&rcu_bh_ctrlblk));
101 RCU_TRACE(check_cpu_stall(&rcu_sched_ctrlblk));
102 RCU_TRACE(check_cpu_stall_preempt());
103}
104
105#ifdef CONFIG_TINY_PREEMPT_RCU
106
107#include <linux/delay.h>
108
109/* Global control variables for preemptible RCU. */
110struct rcu_preempt_ctrlblk {
111 struct rcu_ctrlblk rcb; /* curtail: ->next ptr of last CB for GP. */
112 struct rcu_head **nexttail;
113 /* Tasks blocked in a preemptible RCU */
114 /* read-side critical section while an */
115 /* preemptible-RCU grace period is in */
116 /* progress must wait for a later grace */
117 /* period. This pointer points to the */
118 /* ->next pointer of the last task that */
119 /* must wait for a later grace period, or */
120 /* to &->rcb.rcucblist if there is no */
121 /* such task. */
122 struct list_head blkd_tasks;
123 /* Tasks blocked in RCU read-side critical */
124 /* section. Tasks are placed at the head */
125 /* of this list and age towards the tail. */
126 struct list_head *gp_tasks;
127 /* Pointer to the first task blocking the */
128 /* current grace period, or NULL if there */
129 /* is no such task. */
130 struct list_head *exp_tasks;
131 /* Pointer to first task blocking the */
132 /* current expedited grace period, or NULL */
133 /* if there is no such task. If there */
134 /* is no current expedited grace period, */
135 /* then there cannot be any such task. */
136#ifdef CONFIG_RCU_BOOST
137 struct list_head *boost_tasks;
138 /* Pointer to first task that needs to be */
139 /* priority-boosted, or NULL if no priority */
140 /* boosting is needed. If there is no */
141 /* current or expedited grace period, there */
142 /* can be no such task. */
143#endif /* #ifdef CONFIG_RCU_BOOST */
144 u8 gpnum; /* Current grace period. */
145 u8 gpcpu; /* Last grace period blocked by the CPU. */
146 u8 completed; /* Last grace period completed. */
147 /* If all three are equal, RCU is idle. */
148#ifdef CONFIG_RCU_BOOST
149 unsigned long boost_time; /* When to start boosting (jiffies) */
150#endif /* #ifdef CONFIG_RCU_BOOST */
151#ifdef CONFIG_RCU_TRACE
152 unsigned long n_grace_periods;
153#ifdef CONFIG_RCU_BOOST
154 unsigned long n_tasks_boosted;
155 /* Total number of tasks boosted. */
156 unsigned long n_exp_boosts;
157 /* Number of tasks boosted for expedited GP. */
158 unsigned long n_normal_boosts;
159 /* Number of tasks boosted for normal GP. */
160 unsigned long n_balk_blkd_tasks;
161 /* Refused to boost: no blocked tasks. */
162 unsigned long n_balk_exp_gp_tasks;
163 /* Refused to boost: nothing blocking GP. */
164 unsigned long n_balk_boost_tasks;
165 /* Refused to boost: already boosting. */
166 unsigned long n_balk_notyet;
167 /* Refused to boost: not yet time. */
168 unsigned long n_balk_nos;
169 /* Refused to boost: not sure why, though. */
170 /* This can happen due to race conditions. */
171#endif /* #ifdef CONFIG_RCU_BOOST */
172#endif /* #ifdef CONFIG_RCU_TRACE */
173};
174
175static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = {
176 .rcb.donetail = &rcu_preempt_ctrlblk.rcb.rcucblist,
177 .rcb.curtail = &rcu_preempt_ctrlblk.rcb.rcucblist,
178 .nexttail = &rcu_preempt_ctrlblk.rcb.rcucblist,
179 .blkd_tasks = LIST_HEAD_INIT(rcu_preempt_ctrlblk.blkd_tasks),
180 RCU_TRACE(.rcb.name = "rcu_preempt")
181};
182
183static int rcu_preempted_readers_exp(void);
184static void rcu_report_exp_done(void);
185
186/*
187 * Return true if the CPU has not yet responded to the current grace period.
188 */
189static int rcu_cpu_blocking_cur_gp(void)
190{
191 return rcu_preempt_ctrlblk.gpcpu != rcu_preempt_ctrlblk.gpnum;
192}
193
194/*
195 * Check for a running RCU reader. Because there is only one CPU,
196 * there can be but one running RCU reader at a time. ;-)
197 *
198 * Returns zero if there are no running readers. Returns a positive
199 * number if there is at least one reader within its RCU read-side
200 * critical section. Returns a negative number if an outermost reader
201 * is in the midst of exiting from its RCU read-side critical section
202 *
203 * Returns zero if there are no running readers. Returns a positive
204 * number if there is at least one reader within its RCU read-side
205 * critical section. Returns a negative number if an outermost reader
206 * is in the midst of exiting from its RCU read-side critical section.
207 */
208static int rcu_preempt_running_reader(void)
209{
210 return current->rcu_read_lock_nesting;
211}
212
213/*
214 * Check for preempted RCU readers blocking any grace period.
215 * If the caller needs a reliable answer, it must disable hard irqs.
216 */
217static int rcu_preempt_blocked_readers_any(void)
218{
219 return !list_empty(&rcu_preempt_ctrlblk.blkd_tasks);
220}
221
222/*
223 * Check for preempted RCU readers blocking the current grace period.
224 * If the caller needs a reliable answer, it must disable hard irqs.
225 */
226static int rcu_preempt_blocked_readers_cgp(void)
227{
228 return rcu_preempt_ctrlblk.gp_tasks != NULL;
229}
230
231/*
232 * Return true if another preemptible-RCU grace period is needed.
233 */
234static int rcu_preempt_needs_another_gp(void)
235{
236 return *rcu_preempt_ctrlblk.rcb.curtail != NULL;
237}
238
239/*
240 * Return true if a preemptible-RCU grace period is in progress.
241 * The caller must disable hardirqs.
242 */
243static int rcu_preempt_gp_in_progress(void)
244{
245 return rcu_preempt_ctrlblk.completed != rcu_preempt_ctrlblk.gpnum;
246}
247
248/*
249 * Advance a ->blkd_tasks-list pointer to the next entry, instead
250 * returning NULL if at the end of the list.
251 */
252static struct list_head *rcu_next_node_entry(struct task_struct *t)
253{
254 struct list_head *np;
255
256 np = t->rcu_node_entry.next;
257 if (np == &rcu_preempt_ctrlblk.blkd_tasks)
258 np = NULL;
259 return np;
260}
261
262#ifdef CONFIG_RCU_TRACE
263
264#ifdef CONFIG_RCU_BOOST
265static void rcu_initiate_boost_trace(void);
266#endif /* #ifdef CONFIG_RCU_BOOST */
267
268/*
269 * Dump additional statistice for TINY_PREEMPT_RCU.
270 */
271static void show_tiny_preempt_stats(struct seq_file *m)
272{
273 seq_printf(m, "rcu_preempt: qlen=%ld gp=%lu g%u/p%u/c%u tasks=%c%c%c\n",
274 rcu_preempt_ctrlblk.rcb.qlen,
275 rcu_preempt_ctrlblk.n_grace_periods,
276 rcu_preempt_ctrlblk.gpnum,
277 rcu_preempt_ctrlblk.gpcpu,
278 rcu_preempt_ctrlblk.completed,
279 "T."[list_empty(&rcu_preempt_ctrlblk.blkd_tasks)],
280 "N."[!rcu_preempt_ctrlblk.gp_tasks],
281 "E."[!rcu_preempt_ctrlblk.exp_tasks]);
282#ifdef CONFIG_RCU_BOOST
283 seq_printf(m, "%sttb=%c ntb=%lu neb=%lu nnb=%lu j=%04x bt=%04x\n",
284 " ",
285 "B."[!rcu_preempt_ctrlblk.boost_tasks],
286 rcu_preempt_ctrlblk.n_tasks_boosted,
287 rcu_preempt_ctrlblk.n_exp_boosts,
288 rcu_preempt_ctrlblk.n_normal_boosts,
289 (int)(jiffies & 0xffff),
290 (int)(rcu_preempt_ctrlblk.boost_time & 0xffff));
291 seq_printf(m, "%s: nt=%lu egt=%lu bt=%lu ny=%lu nos=%lu\n",
292 " balk",
293 rcu_preempt_ctrlblk.n_balk_blkd_tasks,
294 rcu_preempt_ctrlblk.n_balk_exp_gp_tasks,
295 rcu_preempt_ctrlblk.n_balk_boost_tasks,
296 rcu_preempt_ctrlblk.n_balk_notyet,
297 rcu_preempt_ctrlblk.n_balk_nos);
298#endif /* #ifdef CONFIG_RCU_BOOST */
299}
300
301#endif /* #ifdef CONFIG_RCU_TRACE */
302
303#ifdef CONFIG_RCU_BOOST
304
305#include "rtmutex_common.h"
306
307#define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO
308
309/* Controls for rcu_kthread() kthread. */
310static struct task_struct *rcu_kthread_task;
311static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq);
312static unsigned long have_rcu_kthread_work;
313
314/*
315 * Carry out RCU priority boosting on the task indicated by ->boost_tasks,
316 * and advance ->boost_tasks to the next task in the ->blkd_tasks list.
317 */
318static int rcu_boost(void)
319{
320 unsigned long flags;
321 struct rt_mutex mtx;
322 struct task_struct *t;
323 struct list_head *tb;
324
325 if (rcu_preempt_ctrlblk.boost_tasks == NULL &&
326 rcu_preempt_ctrlblk.exp_tasks == NULL)
327 return 0; /* Nothing to boost. */
328
329 local_irq_save(flags);
330
331 /*
332 * Recheck with irqs disabled: all tasks in need of boosting
333 * might exit their RCU read-side critical sections on their own
334 * if we are preempted just before disabling irqs.
335 */
336 if (rcu_preempt_ctrlblk.boost_tasks == NULL &&
337 rcu_preempt_ctrlblk.exp_tasks == NULL) {
338 local_irq_restore(flags);
339 return 0;
340 }
341
342 /*
343 * Preferentially boost tasks blocking expedited grace periods.
344 * This cannot starve the normal grace periods because a second
345 * expedited grace period must boost all blocked tasks, including
346 * those blocking the pre-existing normal grace period.
347 */
348 if (rcu_preempt_ctrlblk.exp_tasks != NULL) {
349 tb = rcu_preempt_ctrlblk.exp_tasks;
350 RCU_TRACE(rcu_preempt_ctrlblk.n_exp_boosts++);
351 } else {
352 tb = rcu_preempt_ctrlblk.boost_tasks;
353 RCU_TRACE(rcu_preempt_ctrlblk.n_normal_boosts++);
354 }
355 RCU_TRACE(rcu_preempt_ctrlblk.n_tasks_boosted++);
356
357 /*
358 * We boost task t by manufacturing an rt_mutex that appears to
359 * be held by task t. We leave a pointer to that rt_mutex where
360 * task t can find it, and task t will release the mutex when it
361 * exits its outermost RCU read-side critical section. Then
362 * simply acquiring this artificial rt_mutex will boost task
363 * t's priority. (Thanks to tglx for suggesting this approach!)
364 */
365 t = container_of(tb, struct task_struct, rcu_node_entry);
366 rt_mutex_init_proxy_locked(&mtx, t);
367 t->rcu_boost_mutex = &mtx;
368 local_irq_restore(flags);
369 rt_mutex_lock(&mtx);
370 rt_mutex_unlock(&mtx); /* Keep lockdep happy. */
371
372 return ACCESS_ONCE(rcu_preempt_ctrlblk.boost_tasks) != NULL ||
373 ACCESS_ONCE(rcu_preempt_ctrlblk.exp_tasks) != NULL;
374}
375
376/*
377 * Check to see if it is now time to start boosting RCU readers blocking
378 * the current grace period, and, if so, tell the rcu_kthread_task to
379 * start boosting them. If there is an expedited boost in progress,
380 * we wait for it to complete.
381 *
382 * If there are no blocked readers blocking the current grace period,
383 * return 0 to let the caller know, otherwise return 1. Note that this
384 * return value is independent of whether or not boosting was done.
385 */
386static int rcu_initiate_boost(void)
387{
388 if (!rcu_preempt_blocked_readers_cgp() &&
389 rcu_preempt_ctrlblk.exp_tasks == NULL) {
390 RCU_TRACE(rcu_preempt_ctrlblk.n_balk_exp_gp_tasks++);
391 return 0;
392 }
393 if (rcu_preempt_ctrlblk.exp_tasks != NULL ||
394 (rcu_preempt_ctrlblk.gp_tasks != NULL &&
395 rcu_preempt_ctrlblk.boost_tasks == NULL &&
396 ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time))) {
397 if (rcu_preempt_ctrlblk.exp_tasks == NULL)
398 rcu_preempt_ctrlblk.boost_tasks =
399 rcu_preempt_ctrlblk.gp_tasks;
400 invoke_rcu_callbacks();
401 } else {
402 RCU_TRACE(rcu_initiate_boost_trace());
403 }
404 return 1;
405}
406
407#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000)
408
409/*
410 * Do priority-boost accounting for the start of a new grace period.
411 */
412static void rcu_preempt_boost_start_gp(void)
413{
414 rcu_preempt_ctrlblk.boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES;
415}
416
417#else /* #ifdef CONFIG_RCU_BOOST */
418
419/*
420 * If there is no RCU priority boosting, we don't initiate boosting,
421 * but we do indicate whether there are blocked readers blocking the
422 * current grace period.
423 */
424static int rcu_initiate_boost(void)
425{
426 return rcu_preempt_blocked_readers_cgp();
427}
428
429/*
430 * If there is no RCU priority boosting, nothing to do at grace-period start.
431 */
432static void rcu_preempt_boost_start_gp(void)
433{
434}
435
436#endif /* else #ifdef CONFIG_RCU_BOOST */
437
438/*
439 * Record a preemptible-RCU quiescent state for the specified CPU. Note
440 * that this just means that the task currently running on the CPU is
441 * in a quiescent state. There might be any number of tasks blocked
442 * while in an RCU read-side critical section.
443 *
444 * Unlike the other rcu_*_qs() functions, callers to this function
445 * must disable irqs in order to protect the assignment to
446 * ->rcu_read_unlock_special.
447 *
448 * Because this is a single-CPU implementation, the only way a grace
449 * period can end is if the CPU is in a quiescent state. The reason is
450 * that a blocked preemptible-RCU reader can exit its critical section
451 * only if the CPU is running it at the time. Therefore, when the
452 * last task blocking the current grace period exits its RCU read-side
453 * critical section, neither the CPU nor blocked tasks will be stopping
454 * the current grace period. (In contrast, SMP implementations
455 * might have CPUs running in RCU read-side critical sections that
456 * block later grace periods -- but this is not possible given only
457 * one CPU.)
458 */
459static void rcu_preempt_cpu_qs(void)
460{
461 /* Record both CPU and task as having responded to current GP. */
462 rcu_preempt_ctrlblk.gpcpu = rcu_preempt_ctrlblk.gpnum;
463 current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
464
465 /* If there is no GP then there is nothing more to do. */
466 if (!rcu_preempt_gp_in_progress())
467 return;
468 /*
469 * Check up on boosting. If there are readers blocking the
470 * current grace period, leave.
471 */
472 if (rcu_initiate_boost())
473 return;
474
475 /* Advance callbacks. */
476 rcu_preempt_ctrlblk.completed = rcu_preempt_ctrlblk.gpnum;
477 rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.rcb.curtail;
478 rcu_preempt_ctrlblk.rcb.curtail = rcu_preempt_ctrlblk.nexttail;
479
480 /* If there are no blocked readers, next GP is done instantly. */
481 if (!rcu_preempt_blocked_readers_any())
482 rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.nexttail;
483
484 /* If there are done callbacks, cause them to be invoked. */
485 if (*rcu_preempt_ctrlblk.rcb.donetail != NULL)
486 invoke_rcu_callbacks();
487}
488
489/*
490 * Start a new RCU grace period if warranted. Hard irqs must be disabled.
491 */
492static void rcu_preempt_start_gp(void)
493{
494 if (!rcu_preempt_gp_in_progress() && rcu_preempt_needs_another_gp()) {
495
496 /* Official start of GP. */
497 rcu_preempt_ctrlblk.gpnum++;
498 RCU_TRACE(rcu_preempt_ctrlblk.n_grace_periods++);
499 reset_cpu_stall_ticks(&rcu_preempt_ctrlblk.rcb);
500
501 /* Any blocked RCU readers block new GP. */
502 if (rcu_preempt_blocked_readers_any())
503 rcu_preempt_ctrlblk.gp_tasks =
504 rcu_preempt_ctrlblk.blkd_tasks.next;
505
506 /* Set up for RCU priority boosting. */
507 rcu_preempt_boost_start_gp();
508
509 /* If there is no running reader, CPU is done with GP. */
510 if (!rcu_preempt_running_reader())
511 rcu_preempt_cpu_qs();
512 }
513}
514
515/*
516 * We have entered the scheduler, and the current task might soon be
517 * context-switched away from. If this task is in an RCU read-side
518 * critical section, we will no longer be able to rely on the CPU to
519 * record that fact, so we enqueue the task on the blkd_tasks list.
520 * If the task started after the current grace period began, as recorded
521 * by ->gpcpu, we enqueue at the beginning of the list. Otherwise
522 * before the element referenced by ->gp_tasks (or at the tail if
523 * ->gp_tasks is NULL) and point ->gp_tasks at the newly added element.
524 * The task will dequeue itself when it exits the outermost enclosing
525 * RCU read-side critical section. Therefore, the current grace period
526 * cannot be permitted to complete until the ->gp_tasks pointer becomes
527 * NULL.
528 *
529 * Caller must disable preemption.
530 */
531void rcu_preempt_note_context_switch(void)
532{
533 struct task_struct *t = current;
534 unsigned long flags;
535
536 local_irq_save(flags); /* must exclude scheduler_tick(). */
537 if (rcu_preempt_running_reader() > 0 &&
538 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
539
540 /* Possibly blocking in an RCU read-side critical section. */
541 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
542
543 /*
544 * If this CPU has already checked in, then this task
545 * will hold up the next grace period rather than the
546 * current grace period. Queue the task accordingly.
547 * If the task is queued for the current grace period
548 * (i.e., this CPU has not yet passed through a quiescent
549 * state for the current grace period), then as long
550 * as that task remains queued, the current grace period
551 * cannot end.
552 */
553 list_add(&t->rcu_node_entry, &rcu_preempt_ctrlblk.blkd_tasks);
554 if (rcu_cpu_blocking_cur_gp())
555 rcu_preempt_ctrlblk.gp_tasks = &t->rcu_node_entry;
556 } else if (rcu_preempt_running_reader() < 0 &&
557 t->rcu_read_unlock_special) {
558 /*
559 * Complete exit from RCU read-side critical section on
560 * behalf of preempted instance of __rcu_read_unlock().
561 */
562 rcu_read_unlock_special(t);
563 }
564
565 /*
566 * Either we were not in an RCU read-side critical section to
567 * begin with, or we have now recorded that critical section
568 * globally. Either way, we can now note a quiescent state
569 * for this CPU. Again, if we were in an RCU read-side critical
570 * section, and if that critical section was blocking the current
571 * grace period, then the fact that the task has been enqueued
572 * means that current grace period continues to be blocked.
573 */
574 rcu_preempt_cpu_qs();
575 local_irq_restore(flags);
576}
577
578/*
579 * Handle special cases during rcu_read_unlock(), such as needing to
580 * notify RCU core processing or task having blocked during the RCU
581 * read-side critical section.
582 */
583void rcu_read_unlock_special(struct task_struct *t)
584{
585 int empty;
586 int empty_exp;
587 unsigned long flags;
588 struct list_head *np;
589#ifdef CONFIG_RCU_BOOST
590 struct rt_mutex *rbmp = NULL;
591#endif /* #ifdef CONFIG_RCU_BOOST */
592 int special;
593
594 /*
595 * NMI handlers cannot block and cannot safely manipulate state.
596 * They therefore cannot possibly be special, so just leave.
597 */
598 if (in_nmi())
599 return;
600
601 local_irq_save(flags);
602
603 /*
604 * If RCU core is waiting for this CPU to exit critical section,
605 * let it know that we have done so.
606 */
607 special = t->rcu_read_unlock_special;
608 if (special & RCU_READ_UNLOCK_NEED_QS)
609 rcu_preempt_cpu_qs();
610
611 /* Hardware IRQ handlers cannot block. */
612 if (in_irq() || in_serving_softirq()) {
613 local_irq_restore(flags);
614 return;
615 }
616
617 /* Clean up if blocked during RCU read-side critical section. */
618 if (special & RCU_READ_UNLOCK_BLOCKED) {
619 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;
620
621 /*
622 * Remove this task from the ->blkd_tasks list and adjust
623 * any pointers that might have been referencing it.
624 */
625 empty = !rcu_preempt_blocked_readers_cgp();
626 empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL;
627 np = rcu_next_node_entry(t);
628 list_del_init(&t->rcu_node_entry);
629 if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks)
630 rcu_preempt_ctrlblk.gp_tasks = np;
631 if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks)
632 rcu_preempt_ctrlblk.exp_tasks = np;
633#ifdef CONFIG_RCU_BOOST
634 if (&t->rcu_node_entry == rcu_preempt_ctrlblk.boost_tasks)
635 rcu_preempt_ctrlblk.boost_tasks = np;
636#endif /* #ifdef CONFIG_RCU_BOOST */
637
638 /*
639 * If this was the last task on the current list, and if
640 * we aren't waiting on the CPU, report the quiescent state
641 * and start a new grace period if needed.
642 */
643 if (!empty && !rcu_preempt_blocked_readers_cgp()) {
644 rcu_preempt_cpu_qs();
645 rcu_preempt_start_gp();
646 }
647
648 /*
649 * If this was the last task on the expedited lists,
650 * then we need wake up the waiting task.
651 */
652 if (!empty_exp && rcu_preempt_ctrlblk.exp_tasks == NULL)
653 rcu_report_exp_done();
654 }
655#ifdef CONFIG_RCU_BOOST
656 /* Unboost self if was boosted. */
657 if (t->rcu_boost_mutex != NULL) {
658 rbmp = t->rcu_boost_mutex;
659 t->rcu_boost_mutex = NULL;
660 rt_mutex_unlock(rbmp);
661 }
662#endif /* #ifdef CONFIG_RCU_BOOST */
663 local_irq_restore(flags);
664}
665
666/*
667 * Check for a quiescent state from the current CPU. When a task blocks,
668 * the task is recorded in the rcu_preempt_ctrlblk structure, which is
669 * checked elsewhere. This is called from the scheduling-clock interrupt.
670 *
671 * Caller must disable hard irqs.
672 */
673static void rcu_preempt_check_callbacks(void)
674{
675 struct task_struct *t = current;
676
677 if (rcu_preempt_gp_in_progress() &&
678 (!rcu_preempt_running_reader() ||
679 !rcu_cpu_blocking_cur_gp()))
680 rcu_preempt_cpu_qs();
681 if (&rcu_preempt_ctrlblk.rcb.rcucblist !=
682 rcu_preempt_ctrlblk.rcb.donetail)
683 invoke_rcu_callbacks();
684 if (rcu_preempt_gp_in_progress() &&
685 rcu_cpu_blocking_cur_gp() &&
686 rcu_preempt_running_reader() > 0)
687 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
688}
689
690/*
691 * TINY_PREEMPT_RCU has an extra callback-list tail pointer to
692 * update, so this is invoked from rcu_process_callbacks() to
693 * handle that case. Of course, it is invoked for all flavors of
694 * RCU, but RCU callbacks can appear only on one of the lists, and
695 * neither ->nexttail nor ->donetail can possibly be NULL, so there
696 * is no need for an explicit check.
697 */
698static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp)
699{
700 if (rcu_preempt_ctrlblk.nexttail == rcp->donetail)
701 rcu_preempt_ctrlblk.nexttail = &rcp->rcucblist;
702}
703
704/*
705 * Process callbacks for preemptible RCU.
706 */
707static void rcu_preempt_process_callbacks(void)
708{
709 __rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb);
710}
711
712/*
713 * Queue a preemptible -RCU callback for invocation after a grace period.
714 */
715void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
716{
717 unsigned long flags;
718
719 debug_rcu_head_queue(head);
720 head->func = func;
721 head->next = NULL;
722
723 local_irq_save(flags);
724 *rcu_preempt_ctrlblk.nexttail = head;
725 rcu_preempt_ctrlblk.nexttail = &head->next;
726 RCU_TRACE(rcu_preempt_ctrlblk.rcb.qlen++);
727 rcu_preempt_start_gp(); /* checks to see if GP needed. */
728 local_irq_restore(flags);
729}
730EXPORT_SYMBOL_GPL(call_rcu);
731
732/*
733 * synchronize_rcu - wait until a grace period has elapsed.
734 *
735 * Control will return to the caller some time after a full grace
736 * period has elapsed, in other words after all currently executing RCU
737 * read-side critical sections have completed. RCU read-side critical
738 * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
739 * and may be nested.
740 */
741void synchronize_rcu(void)
742{
743 rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
744 !lock_is_held(&rcu_lock_map) &&
745 !lock_is_held(&rcu_sched_lock_map),
746 "Illegal synchronize_rcu() in RCU read-side critical section");
747
748#ifdef CONFIG_DEBUG_LOCK_ALLOC
749 if (!rcu_scheduler_active)
750 return;
751#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
752
753 WARN_ON_ONCE(rcu_preempt_running_reader());
754 if (!rcu_preempt_blocked_readers_any())
755 return;
756
757 /* Once we get past the fastpath checks, same code as rcu_barrier(). */
758 if (rcu_expedited)
759 synchronize_rcu_expedited();
760 else
761 rcu_barrier();
762}
763EXPORT_SYMBOL_GPL(synchronize_rcu);
764
765static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq);
766static unsigned long sync_rcu_preempt_exp_count;
767static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);
768
769/*
770 * Return non-zero if there are any tasks in RCU read-side critical
771 * sections blocking the current preemptible-RCU expedited grace period.
772 * If there is no preemptible-RCU expedited grace period currently in
773 * progress, returns zero unconditionally.
774 */
775static int rcu_preempted_readers_exp(void)
776{
777 return rcu_preempt_ctrlblk.exp_tasks != NULL;
778}
779
780/*
781 * Report the exit from RCU read-side critical section for the last task
782 * that queued itself during or before the current expedited preemptible-RCU
783 * grace period.
784 */
785static void rcu_report_exp_done(void)
786{
787 wake_up(&sync_rcu_preempt_exp_wq);
788}
789
790/*
791 * Wait for an rcu-preempt grace period, but expedite it. The basic idea
792 * is to rely in the fact that there is but one CPU, and that it is
793 * illegal for a task to invoke synchronize_rcu_expedited() while in a
794 * preemptible-RCU read-side critical section. Therefore, any such
795 * critical sections must correspond to blocked tasks, which must therefore
796 * be on the ->blkd_tasks list. So just record the current head of the
797 * list in the ->exp_tasks pointer, and wait for all tasks including and
798 * after the task pointed to by ->exp_tasks to drain.
799 */
800void synchronize_rcu_expedited(void)
801{
802 unsigned long flags;
803 struct rcu_preempt_ctrlblk *rpcp = &rcu_preempt_ctrlblk;
804 unsigned long snap;
805
806 barrier(); /* ensure prior action seen before grace period. */
807
808 WARN_ON_ONCE(rcu_preempt_running_reader());
809
810 /*
811 * Acquire lock so that there is only one preemptible RCU grace
812 * period in flight. Of course, if someone does the expedited
813 * grace period for us while we are acquiring the lock, just leave.
814 */
815 snap = sync_rcu_preempt_exp_count + 1;
816 mutex_lock(&sync_rcu_preempt_exp_mutex);
817 if (ULONG_CMP_LT(snap, sync_rcu_preempt_exp_count))
818 goto unlock_mb_ret; /* Others did our work for us. */
819
820 local_irq_save(flags);
821
822 /*
823 * All RCU readers have to already be on blkd_tasks because
824 * we cannot legally be executing in an RCU read-side critical
825 * section.
826 */
827
828 /* Snapshot current head of ->blkd_tasks list. */
829 rpcp->exp_tasks = rpcp->blkd_tasks.next;
830 if (rpcp->exp_tasks == &rpcp->blkd_tasks)
831 rpcp->exp_tasks = NULL;
832
833 /* Wait for tail of ->blkd_tasks list to drain. */
834 if (!rcu_preempted_readers_exp()) {
835 local_irq_restore(flags);
836 } else {
837 rcu_initiate_boost();
838 local_irq_restore(flags);
839 wait_event(sync_rcu_preempt_exp_wq,
840 !rcu_preempted_readers_exp());
841 }
842
843 /* Clean up and exit. */
844 barrier(); /* ensure expedited GP seen before counter increment. */
845 sync_rcu_preempt_exp_count++;
846unlock_mb_ret:
847 mutex_unlock(&sync_rcu_preempt_exp_mutex);
848 barrier(); /* ensure subsequent action seen after grace period. */
849}
850EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
851
852/*
853 * Does preemptible RCU need the CPU to stay out of dynticks mode?
854 */
855int rcu_preempt_needs_cpu(void)
856{
857 return rcu_preempt_ctrlblk.rcb.rcucblist != NULL;
858}
859
860#else /* #ifdef CONFIG_TINY_PREEMPT_RCU */
861
862#ifdef CONFIG_RCU_TRACE
863
864/*
865 * Because preemptible RCU does not exist, it is not necessary to
866 * dump out its statistics.
867 */
868static void show_tiny_preempt_stats(struct seq_file *m)
869{
870}
871
872#endif /* #ifdef CONFIG_RCU_TRACE */
873
874/*
875 * Because preemptible RCU does not exist, it never has any callbacks
876 * to check.
877 */
878static void rcu_preempt_check_callbacks(void)
879{
880}
881
882/*
883 * Because preemptible RCU does not exist, it never has any callbacks
884 * to remove.
885 */
886static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp)
887{
888}
889
890/*
891 * Because preemptible RCU does not exist, it never has any callbacks
892 * to process.
893 */
894static void rcu_preempt_process_callbacks(void)
895{
896}
897
898#endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */
899
900#ifdef CONFIG_RCU_BOOST
901
902/*
903 * Wake up rcu_kthread() to process callbacks now eligible for invocation
904 * or to boost readers.
905 */
906static void invoke_rcu_callbacks(void)
907{
908 have_rcu_kthread_work = 1;
909 if (rcu_kthread_task != NULL)
910 wake_up(&rcu_kthread_wq);
911}
912
913#ifdef CONFIG_RCU_TRACE
914
915/*
916 * Is the current CPU running the RCU-callbacks kthread?
917 * Caller must have preemption disabled.
918 */
919static bool rcu_is_callbacks_kthread(void)
920{
921 return rcu_kthread_task == current;
922}
923
924#endif /* #ifdef CONFIG_RCU_TRACE */
925
926/*
927 * This kthread invokes RCU callbacks whose grace periods have
928 * elapsed. It is awakened as needed, and takes the place of the
929 * RCU_SOFTIRQ that is used for this purpose when boosting is disabled.
930 * This is a kthread, but it is never stopped, at least not until
931 * the system goes down.
932 */
933static int rcu_kthread(void *arg)
934{
935 unsigned long work;
936 unsigned long morework;
937 unsigned long flags;
938
939 for (;;) {
940 wait_event_interruptible(rcu_kthread_wq,
941 have_rcu_kthread_work != 0);
942 morework = rcu_boost();
943 local_irq_save(flags);
944 work = have_rcu_kthread_work;
945 have_rcu_kthread_work = morework;
946 local_irq_restore(flags);
947 if (work)
948 rcu_process_callbacks(NULL);
949 schedule_timeout_interruptible(1); /* Leave CPU for others. */
950 }
951
952 return 0; /* Not reached, but needed to shut gcc up. */
953}
954
955/*
956 * Spawn the kthread that invokes RCU callbacks.
957 */
958static int __init rcu_spawn_kthreads(void)
959{
960 struct sched_param sp;
961
962 rcu_kthread_task = kthread_run(rcu_kthread, NULL, "rcu_kthread");
963 sp.sched_priority = RCU_BOOST_PRIO;
964 sched_setscheduler_nocheck(rcu_kthread_task, SCHED_FIFO, &sp);
965 return 0;
966}
967early_initcall(rcu_spawn_kthreads);
968
969#else /* #ifdef CONFIG_RCU_BOOST */
970
971/* Hold off callback invocation until early_initcall() time. */
972static int rcu_scheduler_fully_active __read_mostly;
973
974/*
975 * Start up softirq processing of callbacks.
976 */
977void invoke_rcu_callbacks(void)
978{
979 if (rcu_scheduler_fully_active)
980 raise_softirq(RCU_SOFTIRQ);
981}
982
983#ifdef CONFIG_RCU_TRACE
984
985/*
986 * There is no callback kthread, so this thread is never it.
987 */
988static bool rcu_is_callbacks_kthread(void)
989{
990 return false;
991}
992
993#endif /* #ifdef CONFIG_RCU_TRACE */
994
995static int __init rcu_scheduler_really_started(void)
996{
997 rcu_scheduler_fully_active = 1;
998 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
999 raise_softirq(RCU_SOFTIRQ); /* Invoke any callbacks from early boot. */
1000 return 0;
1001}
1002early_initcall(rcu_scheduler_really_started);
1003
1004#endif /* #else #ifdef CONFIG_RCU_BOOST */
1005
1006#ifdef CONFIG_DEBUG_LOCK_ALLOC
1007#include <linux/kernel_stat.h>
1008 60
1009/* 61/*
1010 * During boot, we forgive RCU lockdep issues. After this function is 62 * During boot, we forgive RCU lockdep issues. After this function is
@@ -1020,25 +72,6 @@ void __init rcu_scheduler_starting(void)
1020 72
1021#ifdef CONFIG_RCU_TRACE 73#ifdef CONFIG_RCU_TRACE
1022 74
1023#ifdef CONFIG_RCU_BOOST
1024
1025static void rcu_initiate_boost_trace(void)
1026{
1027 if (list_empty(&rcu_preempt_ctrlblk.blkd_tasks))
1028 rcu_preempt_ctrlblk.n_balk_blkd_tasks++;
1029 else if (rcu_preempt_ctrlblk.gp_tasks == NULL &&
1030 rcu_preempt_ctrlblk.exp_tasks == NULL)
1031 rcu_preempt_ctrlblk.n_balk_exp_gp_tasks++;
1032 else if (rcu_preempt_ctrlblk.boost_tasks != NULL)
1033 rcu_preempt_ctrlblk.n_balk_boost_tasks++;
1034 else if (!ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time))
1035 rcu_preempt_ctrlblk.n_balk_notyet++;
1036 else
1037 rcu_preempt_ctrlblk.n_balk_nos++;
1038}
1039
1040#endif /* #ifdef CONFIG_RCU_BOOST */
1041
1042static void rcu_trace_sub_qlen(struct rcu_ctrlblk *rcp, int n) 75static void rcu_trace_sub_qlen(struct rcu_ctrlblk *rcp, int n)
1043{ 76{
1044 unsigned long flags; 77 unsigned long flags;
@@ -1053,7 +86,6 @@ static void rcu_trace_sub_qlen(struct rcu_ctrlblk *rcp, int n)
1053 */ 86 */
1054static int show_tiny_stats(struct seq_file *m, void *unused) 87static int show_tiny_stats(struct seq_file *m, void *unused)
1055{ 88{
1056 show_tiny_preempt_stats(m);
1057 seq_printf(m, "rcu_sched: qlen: %ld\n", rcu_sched_ctrlblk.qlen); 89 seq_printf(m, "rcu_sched: qlen: %ld\n", rcu_sched_ctrlblk.qlen);
1058 seq_printf(m, "rcu_bh: qlen: %ld\n", rcu_bh_ctrlblk.qlen); 90 seq_printf(m, "rcu_bh: qlen: %ld\n", rcu_bh_ctrlblk.qlen);
1059 return 0; 91 return 0;
@@ -1103,11 +135,40 @@ MODULE_AUTHOR("Paul E. McKenney");
1103MODULE_DESCRIPTION("Read-Copy Update tracing for tiny implementation"); 135MODULE_DESCRIPTION("Read-Copy Update tracing for tiny implementation");
1104MODULE_LICENSE("GPL"); 136MODULE_LICENSE("GPL");
1105 137
1106static void check_cpu_stall_preempt(void) 138static void check_cpu_stall(struct rcu_ctrlblk *rcp)
1107{ 139{
1108#ifdef CONFIG_TINY_PREEMPT_RCU 140 unsigned long j;
1109 check_cpu_stall(&rcu_preempt_ctrlblk.rcb); 141 unsigned long js;
1110#endif /* #ifdef CONFIG_TINY_PREEMPT_RCU */ 142
143 if (rcu_cpu_stall_suppress)
144 return;
145 rcp->ticks_this_gp++;
146 j = jiffies;
147 js = rcp->jiffies_stall;
148 if (*rcp->curtail && ULONG_CMP_GE(j, js)) {
149 pr_err("INFO: %s stall on CPU (%lu ticks this GP) idle=%llx (t=%lu jiffies q=%ld)\n",
150 rcp->name, rcp->ticks_this_gp, rcu_dynticks_nesting,
151 jiffies - rcp->gp_start, rcp->qlen);
152 dump_stack();
153 }
154 if (*rcp->curtail && ULONG_CMP_GE(j, js))
155 rcp->jiffies_stall = jiffies +
156 3 * rcu_jiffies_till_stall_check() + 3;
157 else if (ULONG_CMP_GE(j, js))
158 rcp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check();
159}
160
161static void reset_cpu_stall_ticks(struct rcu_ctrlblk *rcp)
162{
163 rcp->ticks_this_gp = 0;
164 rcp->gp_start = jiffies;
165 rcp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check();
166}
167
168static void check_cpu_stalls(void)
169{
170 RCU_TRACE(check_cpu_stall(&rcu_bh_ctrlblk));
171 RCU_TRACE(check_cpu_stall(&rcu_sched_ctrlblk));
1111} 172}
1112 173
1113#endif /* #ifdef CONFIG_RCU_TRACE */ 174#endif /* #ifdef CONFIG_RCU_TRACE */
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index e1f3a8c96724..b1fa5510388d 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -695,44 +695,6 @@ static struct rcu_torture_ops srcu_sync_ops = {
695 .name = "srcu_sync" 695 .name = "srcu_sync"
696}; 696};
697 697
698static int srcu_torture_read_lock_raw(void) __acquires(&srcu_ctl)
699{
700 return srcu_read_lock_raw(&srcu_ctl);
701}
702
703static void srcu_torture_read_unlock_raw(int idx) __releases(&srcu_ctl)
704{
705 srcu_read_unlock_raw(&srcu_ctl, idx);
706}
707
708static struct rcu_torture_ops srcu_raw_ops = {
709 .init = rcu_sync_torture_init,
710 .readlock = srcu_torture_read_lock_raw,
711 .read_delay = srcu_read_delay,
712 .readunlock = srcu_torture_read_unlock_raw,
713 .completed = srcu_torture_completed,
714 .deferred_free = srcu_torture_deferred_free,
715 .sync = srcu_torture_synchronize,
716 .call = NULL,
717 .cb_barrier = NULL,
718 .stats = srcu_torture_stats,
719 .name = "srcu_raw"
720};
721
722static struct rcu_torture_ops srcu_raw_sync_ops = {
723 .init = rcu_sync_torture_init,
724 .readlock = srcu_torture_read_lock_raw,
725 .read_delay = srcu_read_delay,
726 .readunlock = srcu_torture_read_unlock_raw,
727 .completed = srcu_torture_completed,
728 .deferred_free = rcu_sync_torture_deferred_free,
729 .sync = srcu_torture_synchronize,
730 .call = NULL,
731 .cb_barrier = NULL,
732 .stats = srcu_torture_stats,
733 .name = "srcu_raw_sync"
734};
735
736static void srcu_torture_synchronize_expedited(void) 698static void srcu_torture_synchronize_expedited(void)
737{ 699{
738 synchronize_srcu_expedited(&srcu_ctl); 700 synchronize_srcu_expedited(&srcu_ctl);
@@ -1983,7 +1945,6 @@ rcu_torture_init(void)
1983 { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops, 1945 { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops,
1984 &rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops, 1946 &rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops,
1985 &srcu_ops, &srcu_sync_ops, &srcu_expedited_ops, 1947 &srcu_ops, &srcu_sync_ops, &srcu_expedited_ops,
1986 &srcu_raw_ops, &srcu_raw_sync_ops,
1987 &sched_ops, &sched_sync_ops, &sched_expedited_ops, }; 1948 &sched_ops, &sched_sync_ops, &sched_expedited_ops, };
1988 1949
1989 mutex_lock(&fullstop_mutex); 1950 mutex_lock(&fullstop_mutex);
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 35380019f0fc..cf3adc6fe001 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -218,8 +218,8 @@ module_param(blimit, long, 0444);
218module_param(qhimark, long, 0444); 218module_param(qhimark, long, 0444);
219module_param(qlowmark, long, 0444); 219module_param(qlowmark, long, 0444);
220 220
221static ulong jiffies_till_first_fqs = RCU_JIFFIES_TILL_FORCE_QS; 221static ulong jiffies_till_first_fqs = ULONG_MAX;
222static ulong jiffies_till_next_fqs = RCU_JIFFIES_TILL_FORCE_QS; 222static ulong jiffies_till_next_fqs = ULONG_MAX;
223 223
224module_param(jiffies_till_first_fqs, ulong, 0644); 224module_param(jiffies_till_first_fqs, ulong, 0644);
225module_param(jiffies_till_next_fqs, ulong, 0644); 225module_param(jiffies_till_next_fqs, ulong, 0644);
@@ -866,7 +866,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
866 * See Documentation/RCU/stallwarn.txt for info on how to debug 866 * See Documentation/RCU/stallwarn.txt for info on how to debug
867 * RCU CPU stall warnings. 867 * RCU CPU stall warnings.
868 */ 868 */
869 printk(KERN_ERR "INFO: %s detected stalls on CPUs/tasks:", 869 pr_err("INFO: %s detected stalls on CPUs/tasks:",
870 rsp->name); 870 rsp->name);
871 print_cpu_stall_info_begin(); 871 print_cpu_stall_info_begin();
872 rcu_for_each_leaf_node(rsp, rnp) { 872 rcu_for_each_leaf_node(rsp, rnp) {
@@ -899,7 +899,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
899 smp_processor_id(), (long)(jiffies - rsp->gp_start), 899 smp_processor_id(), (long)(jiffies - rsp->gp_start),
900 rsp->gpnum, rsp->completed, totqlen); 900 rsp->gpnum, rsp->completed, totqlen);
901 if (ndetected == 0) 901 if (ndetected == 0)
902 printk(KERN_ERR "INFO: Stall ended before state dump start\n"); 902 pr_err("INFO: Stall ended before state dump start\n");
903 else if (!trigger_all_cpu_backtrace()) 903 else if (!trigger_all_cpu_backtrace())
904 rcu_dump_cpu_stacks(rsp); 904 rcu_dump_cpu_stacks(rsp);
905 905
@@ -922,7 +922,7 @@ static void print_cpu_stall(struct rcu_state *rsp)
922 * See Documentation/RCU/stallwarn.txt for info on how to debug 922 * See Documentation/RCU/stallwarn.txt for info on how to debug
923 * RCU CPU stall warnings. 923 * RCU CPU stall warnings.
924 */ 924 */
925 printk(KERN_ERR "INFO: %s self-detected stall on CPU", rsp->name); 925 pr_err("INFO: %s self-detected stall on CPU", rsp->name);
926 print_cpu_stall_info_begin(); 926 print_cpu_stall_info_begin();
927 print_cpu_stall_info(rsp, smp_processor_id()); 927 print_cpu_stall_info(rsp, smp_processor_id());
928 print_cpu_stall_info_end(); 928 print_cpu_stall_info_end();
@@ -985,65 +985,6 @@ void rcu_cpu_stall_reset(void)
985} 985}
986 986
987/* 987/*
988 * Update CPU-local rcu_data state to record the newly noticed grace period.
989 * This is used both when we started the grace period and when we notice
990 * that someone else started the grace period. The caller must hold the
991 * ->lock of the leaf rcu_node structure corresponding to the current CPU,
992 * and must have irqs disabled.
993 */
994static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
995{
996 if (rdp->gpnum != rnp->gpnum) {
997 /*
998 * If the current grace period is waiting for this CPU,
999 * set up to detect a quiescent state, otherwise don't
1000 * go looking for one.
1001 */
1002 rdp->gpnum = rnp->gpnum;
1003 trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpustart");
1004 rdp->passed_quiesce = 0;
1005 rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask);
1006 zero_cpu_stall_ticks(rdp);
1007 }
1008}
1009
1010static void note_new_gpnum(struct rcu_state *rsp, struct rcu_data *rdp)
1011{
1012 unsigned long flags;
1013 struct rcu_node *rnp;
1014
1015 local_irq_save(flags);
1016 rnp = rdp->mynode;
1017 if (rdp->gpnum == ACCESS_ONCE(rnp->gpnum) || /* outside lock. */
1018 !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */
1019 local_irq_restore(flags);
1020 return;
1021 }
1022 __note_new_gpnum(rsp, rnp, rdp);
1023 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1024}
1025
1026/*
1027 * Did someone else start a new RCU grace period start since we last
1028 * checked? Update local state appropriately if so. Must be called
1029 * on the CPU corresponding to rdp.
1030 */
1031static int
1032check_for_new_grace_period(struct rcu_state *rsp, struct rcu_data *rdp)
1033{
1034 unsigned long flags;
1035 int ret = 0;
1036
1037 local_irq_save(flags);
1038 if (rdp->gpnum != rsp->gpnum) {
1039 note_new_gpnum(rsp, rdp);
1040 ret = 1;
1041 }
1042 local_irq_restore(flags);
1043 return ret;
1044}
1045
1046/*
1047 * Initialize the specified rcu_data structure's callback list to empty. 988 * Initialize the specified rcu_data structure's callback list to empty.
1048 */ 989 */
1049static void init_callback_list(struct rcu_data *rdp) 990static void init_callback_list(struct rcu_data *rdp)
@@ -1313,18 +1254,16 @@ static void rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1313} 1254}
1314 1255
1315/* 1256/*
1316 * Advance this CPU's callbacks, but only if the current grace period 1257 * Update CPU-local rcu_data state to record the beginnings and ends of
1317 * has ended. This may be called only from the CPU to whom the rdp 1258 * grace periods. The caller must hold the ->lock of the leaf rcu_node
1318 * belongs. In addition, the corresponding leaf rcu_node structure's 1259 * structure corresponding to the current CPU, and must have irqs disabled.
1319 * ->lock must be held by the caller, with irqs disabled.
1320 */ 1260 */
1321static void 1261static void __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
1322__rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
1323{ 1262{
1324 /* Did another grace period end? */ 1263 /* Handle the ends of any preceding grace periods first. */
1325 if (rdp->completed == rnp->completed) { 1264 if (rdp->completed == rnp->completed) {
1326 1265
1327 /* No, so just accelerate recent callbacks. */ 1266 /* No grace period end, so just accelerate recent callbacks. */
1328 rcu_accelerate_cbs(rsp, rnp, rdp); 1267 rcu_accelerate_cbs(rsp, rnp, rdp);
1329 1268
1330 } else { 1269 } else {
@@ -1335,68 +1274,40 @@ __rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_dat
1335 /* Remember that we saw this grace-period completion. */ 1274 /* Remember that we saw this grace-period completion. */
1336 rdp->completed = rnp->completed; 1275 rdp->completed = rnp->completed;
1337 trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuend"); 1276 trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuend");
1277 }
1338 1278
1279 if (rdp->gpnum != rnp->gpnum) {
1339 /* 1280 /*
1340 * If we were in an extended quiescent state, we may have 1281 * If the current grace period is waiting for this CPU,
1341 * missed some grace periods that others CPUs handled on 1282 * set up to detect a quiescent state, otherwise don't
1342 * our behalf. Catch up with this state to avoid noting 1283 * go looking for one.
1343 * spurious new grace periods. If another grace period
1344 * has started, then rnp->gpnum will have advanced, so
1345 * we will detect this later on. Of course, any quiescent
1346 * states we found for the old GP are now invalid.
1347 */
1348 if (ULONG_CMP_LT(rdp->gpnum, rdp->completed)) {
1349 rdp->gpnum = rdp->completed;
1350 rdp->passed_quiesce = 0;
1351 }
1352
1353 /*
1354 * If RCU does not need a quiescent state from this CPU,
1355 * then make sure that this CPU doesn't go looking for one.
1356 */ 1284 */
1357 if ((rnp->qsmask & rdp->grpmask) == 0) 1285 rdp->gpnum = rnp->gpnum;
1358 rdp->qs_pending = 0; 1286 trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpustart");
1287 rdp->passed_quiesce = 0;
1288 rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask);
1289 zero_cpu_stall_ticks(rdp);
1359 } 1290 }
1360} 1291}
1361 1292
1362/* 1293static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp)
1363 * Advance this CPU's callbacks, but only if the current grace period
1364 * has ended. This may be called only from the CPU to whom the rdp
1365 * belongs.
1366 */
1367static void
1368rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp)
1369{ 1294{
1370 unsigned long flags; 1295 unsigned long flags;
1371 struct rcu_node *rnp; 1296 struct rcu_node *rnp;
1372 1297
1373 local_irq_save(flags); 1298 local_irq_save(flags);
1374 rnp = rdp->mynode; 1299 rnp = rdp->mynode;
1375 if (rdp->completed == ACCESS_ONCE(rnp->completed) || /* outside lock. */ 1300 if ((rdp->gpnum == ACCESS_ONCE(rnp->gpnum) &&
1301 rdp->completed == ACCESS_ONCE(rnp->completed)) || /* w/out lock. */
1376 !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */ 1302 !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */
1377 local_irq_restore(flags); 1303 local_irq_restore(flags);
1378 return; 1304 return;
1379 } 1305 }
1380 __rcu_process_gp_end(rsp, rnp, rdp); 1306 __note_gp_changes(rsp, rnp, rdp);
1381 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1307 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1382} 1308}
1383 1309
1384/* 1310/*
1385 * Do per-CPU grace-period initialization for running CPU. The caller
1386 * must hold the lock of the leaf rcu_node structure corresponding to
1387 * this CPU.
1388 */
1389static void
1390rcu_start_gp_per_cpu(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
1391{
1392 /* Prior grace period ended, so advance callbacks for current CPU. */
1393 __rcu_process_gp_end(rsp, rnp, rdp);
1394
1395 /* Set state so that this CPU will detect the next quiescent state. */
1396 __note_new_gpnum(rsp, rnp, rdp);
1397}
1398
1399/*
1400 * Initialize a new grace period. 1311 * Initialize a new grace period.
1401 */ 1312 */
1402static int rcu_gp_init(struct rcu_state *rsp) 1313static int rcu_gp_init(struct rcu_state *rsp)
@@ -1444,7 +1355,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
1444 WARN_ON_ONCE(rnp->completed != rsp->completed); 1355 WARN_ON_ONCE(rnp->completed != rsp->completed);
1445 ACCESS_ONCE(rnp->completed) = rsp->completed; 1356 ACCESS_ONCE(rnp->completed) = rsp->completed;
1446 if (rnp == rdp->mynode) 1357 if (rnp == rdp->mynode)
1447 rcu_start_gp_per_cpu(rsp, rnp, rdp); 1358 __note_gp_changes(rsp, rnp, rdp);
1448 rcu_preempt_boost_start_gp(rnp); 1359 rcu_preempt_boost_start_gp(rnp);
1449 trace_rcu_grace_period_init(rsp->name, rnp->gpnum, 1360 trace_rcu_grace_period_init(rsp->name, rnp->gpnum,
1450 rnp->level, rnp->grplo, 1361 rnp->level, rnp->grplo,
@@ -1527,7 +1438,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
1527 ACCESS_ONCE(rnp->completed) = rsp->gpnum; 1438 ACCESS_ONCE(rnp->completed) = rsp->gpnum;
1528 rdp = this_cpu_ptr(rsp->rda); 1439 rdp = this_cpu_ptr(rsp->rda);
1529 if (rnp == rdp->mynode) 1440 if (rnp == rdp->mynode)
1530 __rcu_process_gp_end(rsp, rnp, rdp); 1441 __note_gp_changes(rsp, rnp, rdp);
1531 nocb += rcu_future_gp_cleanup(rsp, rnp); 1442 nocb += rcu_future_gp_cleanup(rsp, rnp);
1532 raw_spin_unlock_irq(&rnp->lock); 1443 raw_spin_unlock_irq(&rnp->lock);
1533 cond_resched(); 1444 cond_resched();
@@ -1805,9 +1716,8 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
1805static void 1716static void
1806rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) 1717rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
1807{ 1718{
1808 /* If there is now a new grace period, record and return. */ 1719 /* Check for grace-period ends and beginnings. */
1809 if (check_for_new_grace_period(rsp, rdp)) 1720 note_gp_changes(rsp, rdp);
1810 return;
1811 1721
1812 /* 1722 /*
1813 * Does this CPU still need to do its part for current grace period? 1723 * Does this CPU still need to do its part for current grace period?
@@ -2271,9 +2181,6 @@ __rcu_process_callbacks(struct rcu_state *rsp)
2271 2181
2272 WARN_ON_ONCE(rdp->beenonline == 0); 2182 WARN_ON_ONCE(rdp->beenonline == 0);
2273 2183
2274 /* Handle the end of a grace period that some other CPU ended. */
2275 rcu_process_gp_end(rsp, rdp);
2276
2277 /* Update RCU state based on any recent quiescent states. */ 2184 /* Update RCU state based on any recent quiescent states. */
2278 rcu_check_quiescent_state(rsp, rdp); 2185 rcu_check_quiescent_state(rsp, rdp);
2279 2186
@@ -2358,8 +2265,7 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
2358 if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) { 2265 if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) {
2359 2266
2360 /* Are we ignoring a completed grace period? */ 2267 /* Are we ignoring a completed grace period? */
2361 rcu_process_gp_end(rsp, rdp); 2268 note_gp_changes(rsp, rdp);
2362 check_for_new_grace_period(rsp, rdp);
2363 2269
2364 /* Start a new grace period if one not already started. */ 2270 /* Start a new grace period if one not already started. */
2365 if (!rcu_gp_in_progress(rsp)) { 2271 if (!rcu_gp_in_progress(rsp)) {
@@ -3265,11 +3171,25 @@ static void __init rcu_init_one(struct rcu_state *rsp,
3265 */ 3171 */
3266static void __init rcu_init_geometry(void) 3172static void __init rcu_init_geometry(void)
3267{ 3173{
3174 ulong d;
3268 int i; 3175 int i;
3269 int j; 3176 int j;
3270 int n = nr_cpu_ids; 3177 int n = nr_cpu_ids;
3271 int rcu_capacity[MAX_RCU_LVLS + 1]; 3178 int rcu_capacity[MAX_RCU_LVLS + 1];
3272 3179
3180 /*
3181 * Initialize any unspecified boot parameters.
3182 * The default values of jiffies_till_first_fqs and
3183 * jiffies_till_next_fqs are set to the RCU_JIFFIES_TILL_FORCE_QS
3184 * value, which is a function of HZ, then adding one for each
3185 * RCU_JIFFIES_FQS_DIV CPUs that might be on the system.
3186 */
3187 d = RCU_JIFFIES_TILL_FORCE_QS + nr_cpu_ids / RCU_JIFFIES_FQS_DIV;
3188 if (jiffies_till_first_fqs == ULONG_MAX)
3189 jiffies_till_first_fqs = d;
3190 if (jiffies_till_next_fqs == ULONG_MAX)
3191 jiffies_till_next_fqs = d;
3192
3273 /* If the compile-time values are accurate, just leave. */ 3193 /* If the compile-time values are accurate, just leave. */
3274 if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF && 3194 if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF &&
3275 nr_cpu_ids == NR_CPUS) 3195 nr_cpu_ids == NR_CPUS)
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 4df503470e42..4a39d364493c 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -343,12 +343,17 @@ struct rcu_data {
343#define RCU_FORCE_QS 3 /* Need to force quiescent state. */ 343#define RCU_FORCE_QS 3 /* Need to force quiescent state. */
344#define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK 344#define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK
345 345
346#define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */ 346#define RCU_JIFFIES_TILL_FORCE_QS (1 + (HZ > 250) + (HZ > 500))
347 /* For jiffies_till_first_fqs and */
348 /* and jiffies_till_next_fqs. */
347 349
348#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */ 350#define RCU_JIFFIES_FQS_DIV 256 /* Very large systems need more */
349 /* to take at least one */ 351 /* delay between bouts of */
350 /* scheduling clock irq */ 352 /* quiescent-state forcing. */
351 /* before ratting on them. */ 353
354#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time to take */
355 /* at least one scheduling clock */
356 /* irq before ratting on them. */
352 357
353#define rcu_wait(cond) \ 358#define rcu_wait(cond) \
354do { \ 359do { \
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 3db5a375d8dd..63098a59216e 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -53,38 +53,37 @@ static char __initdata nocb_buf[NR_CPUS * 5];
53static void __init rcu_bootup_announce_oddness(void) 53static void __init rcu_bootup_announce_oddness(void)
54{ 54{
55#ifdef CONFIG_RCU_TRACE 55#ifdef CONFIG_RCU_TRACE
56 printk(KERN_INFO "\tRCU debugfs-based tracing is enabled.\n"); 56 pr_info("\tRCU debugfs-based tracing is enabled.\n");
57#endif 57#endif
58#if (defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || (!defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32) 58#if (defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || (!defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32)
59 printk(KERN_INFO "\tCONFIG_RCU_FANOUT set to non-default value of %d\n", 59 pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d\n",
60 CONFIG_RCU_FANOUT); 60 CONFIG_RCU_FANOUT);
61#endif 61#endif
62#ifdef CONFIG_RCU_FANOUT_EXACT 62#ifdef CONFIG_RCU_FANOUT_EXACT
63 printk(KERN_INFO "\tHierarchical RCU autobalancing is disabled.\n"); 63 pr_info("\tHierarchical RCU autobalancing is disabled.\n");
64#endif 64#endif
65#ifdef CONFIG_RCU_FAST_NO_HZ 65#ifdef CONFIG_RCU_FAST_NO_HZ
66 printk(KERN_INFO 66 pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n");
67 "\tRCU dyntick-idle grace-period acceleration is enabled.\n");
68#endif 67#endif
69#ifdef CONFIG_PROVE_RCU 68#ifdef CONFIG_PROVE_RCU
70 printk(KERN_INFO "\tRCU lockdep checking is enabled.\n"); 69 pr_info("\tRCU lockdep checking is enabled.\n");
71#endif 70#endif
72#ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE 71#ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE
73 printk(KERN_INFO "\tRCU torture testing starts during boot.\n"); 72 pr_info("\tRCU torture testing starts during boot.\n");
74#endif 73#endif
75#if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE) 74#if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE)
76 printk(KERN_INFO "\tDump stacks of tasks blocking RCU-preempt GP.\n"); 75 pr_info("\tDump stacks of tasks blocking RCU-preempt GP.\n");
77#endif 76#endif
78#if defined(CONFIG_RCU_CPU_STALL_INFO) 77#if defined(CONFIG_RCU_CPU_STALL_INFO)
79 printk(KERN_INFO "\tAdditional per-CPU info printed with stalls.\n"); 78 pr_info("\tAdditional per-CPU info printed with stalls.\n");
80#endif 79#endif
81#if NUM_RCU_LVL_4 != 0 80#if NUM_RCU_LVL_4 != 0
82 printk(KERN_INFO "\tFour-level hierarchy is enabled.\n"); 81 pr_info("\tFour-level hierarchy is enabled.\n");
83#endif 82#endif
84 if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF) 83 if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF)
85 printk(KERN_INFO "\tExperimental boot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf); 84 pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf);
86 if (nr_cpu_ids != NR_CPUS) 85 if (nr_cpu_ids != NR_CPUS)
87 printk(KERN_INFO "\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids); 86 pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids);
88#ifdef CONFIG_RCU_NOCB_CPU 87#ifdef CONFIG_RCU_NOCB_CPU
89#ifndef CONFIG_RCU_NOCB_CPU_NONE 88#ifndef CONFIG_RCU_NOCB_CPU_NONE
90 if (!have_rcu_nocb_mask) { 89 if (!have_rcu_nocb_mask) {
@@ -92,19 +91,19 @@ static void __init rcu_bootup_announce_oddness(void)
92 have_rcu_nocb_mask = true; 91 have_rcu_nocb_mask = true;
93 } 92 }
94#ifdef CONFIG_RCU_NOCB_CPU_ZERO 93#ifdef CONFIG_RCU_NOCB_CPU_ZERO
95 pr_info("\tExperimental no-CBs CPU 0\n"); 94 pr_info("\tOffload RCU callbacks from CPU 0\n");
96 cpumask_set_cpu(0, rcu_nocb_mask); 95 cpumask_set_cpu(0, rcu_nocb_mask);
97#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */ 96#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */
98#ifdef CONFIG_RCU_NOCB_CPU_ALL 97#ifdef CONFIG_RCU_NOCB_CPU_ALL
99 pr_info("\tExperimental no-CBs for all CPUs\n"); 98 pr_info("\tOffload RCU callbacks from all CPUs\n");
100 cpumask_setall(rcu_nocb_mask); 99 cpumask_setall(rcu_nocb_mask);
101#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */ 100#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */
102#endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */ 101#endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */
103 if (have_rcu_nocb_mask) { 102 if (have_rcu_nocb_mask) {
104 cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask); 103 cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask);
105 pr_info("\tExperimental no-CBs CPUs: %s.\n", nocb_buf); 104 pr_info("\tOffload RCU callbacks from CPUs: %s.\n", nocb_buf);
106 if (rcu_nocb_poll) 105 if (rcu_nocb_poll)
107 pr_info("\tExperimental polled no-CBs CPUs.\n"); 106 pr_info("\tPoll for callbacks from no-CBs CPUs.\n");
108 } 107 }
109#endif /* #ifdef CONFIG_RCU_NOCB_CPU */ 108#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
110} 109}
@@ -123,7 +122,7 @@ static int rcu_preempted_readers_exp(struct rcu_node *rnp);
123 */ 122 */
124static void __init rcu_bootup_announce(void) 123static void __init rcu_bootup_announce(void)
125{ 124{
126 printk(KERN_INFO "Preemptible hierarchical RCU implementation.\n"); 125 pr_info("Preemptible hierarchical RCU implementation.\n");
127 rcu_bootup_announce_oddness(); 126 rcu_bootup_announce_oddness();
128} 127}
129 128
@@ -490,13 +489,13 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp)
490 489
491static void rcu_print_task_stall_begin(struct rcu_node *rnp) 490static void rcu_print_task_stall_begin(struct rcu_node *rnp)
492{ 491{
493 printk(KERN_ERR "\tTasks blocked on level-%d rcu_node (CPUs %d-%d):", 492 pr_err("\tTasks blocked on level-%d rcu_node (CPUs %d-%d):",
494 rnp->level, rnp->grplo, rnp->grphi); 493 rnp->level, rnp->grplo, rnp->grphi);
495} 494}
496 495
497static void rcu_print_task_stall_end(void) 496static void rcu_print_task_stall_end(void)
498{ 497{
499 printk(KERN_CONT "\n"); 498 pr_cont("\n");
500} 499}
501 500
502#else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */ 501#else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
@@ -526,7 +525,7 @@ static int rcu_print_task_stall(struct rcu_node *rnp)
526 t = list_entry(rnp->gp_tasks, 525 t = list_entry(rnp->gp_tasks,
527 struct task_struct, rcu_node_entry); 526 struct task_struct, rcu_node_entry);
528 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) { 527 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
529 printk(KERN_CONT " P%d", t->pid); 528 pr_cont(" P%d", t->pid);
530 ndetected++; 529 ndetected++;
531 } 530 }
532 rcu_print_task_stall_end(); 531 rcu_print_task_stall_end();
@@ -933,6 +932,24 @@ static void __init __rcu_init_preempt(void)
933 rcu_init_one(&rcu_preempt_state, &rcu_preempt_data); 932 rcu_init_one(&rcu_preempt_state, &rcu_preempt_data);
934} 933}
935 934
935/*
936 * Check for a task exiting while in a preemptible-RCU read-side
937 * critical section, clean up if so. No need to issue warnings,
938 * as debug_check_no_locks_held() already does this if lockdep
939 * is enabled.
940 */
941void exit_rcu(void)
942{
943 struct task_struct *t = current;
944
945 if (likely(list_empty(&current->rcu_node_entry)))
946 return;
947 t->rcu_read_lock_nesting = 1;
948 barrier();
949 t->rcu_read_unlock_special = RCU_READ_UNLOCK_BLOCKED;
950 __rcu_read_unlock();
951}
952
936#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 953#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
937 954
938static struct rcu_state *rcu_state = &rcu_sched_state; 955static struct rcu_state *rcu_state = &rcu_sched_state;
@@ -942,7 +959,7 @@ static struct rcu_state *rcu_state = &rcu_sched_state;
942 */ 959 */
943static void __init rcu_bootup_announce(void) 960static void __init rcu_bootup_announce(void)
944{ 961{
945 printk(KERN_INFO "Hierarchical RCU implementation.\n"); 962 pr_info("Hierarchical RCU implementation.\n");
946 rcu_bootup_announce_oddness(); 963 rcu_bootup_announce_oddness();
947} 964}
948 965
@@ -1101,6 +1118,14 @@ static void __init __rcu_init_preempt(void)
1101{ 1118{
1102} 1119}
1103 1120
1121/*
1122 * Because preemptible RCU does not exist, tasks cannot possibly exit
1123 * while in preemptible RCU read-side critical sections.
1124 */
1125void exit_rcu(void)
1126{
1127}
1128
1104#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ 1129#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
1105 1130
1106#ifdef CONFIG_RCU_BOOST 1131#ifdef CONFIG_RCU_BOOST
@@ -1629,7 +1654,7 @@ static bool rcu_try_advance_all_cbs(void)
1629 */ 1654 */
1630 if (rdp->completed != rnp->completed && 1655 if (rdp->completed != rnp->completed &&
1631 rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_NEXT_TAIL]) 1656 rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_NEXT_TAIL])
1632 rcu_process_gp_end(rsp, rdp); 1657 note_gp_changes(rsp, rdp);
1633 1658
1634 if (cpu_has_callbacks_ready_to_invoke(rdp)) 1659 if (cpu_has_callbacks_ready_to_invoke(rdp))
1635 cbs_ready = true; 1660 cbs_ready = true;
@@ -1883,7 +1908,7 @@ static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
1883/* Initiate the stall-info list. */ 1908/* Initiate the stall-info list. */
1884static void print_cpu_stall_info_begin(void) 1909static void print_cpu_stall_info_begin(void)
1885{ 1910{
1886 printk(KERN_CONT "\n"); 1911 pr_cont("\n");
1887} 1912}
1888 1913
1889/* 1914/*
@@ -1914,7 +1939,7 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
1914 ticks_value = rsp->gpnum - rdp->gpnum; 1939 ticks_value = rsp->gpnum - rdp->gpnum;
1915 } 1940 }
1916 print_cpu_stall_fast_no_hz(fast_no_hz, cpu); 1941 print_cpu_stall_fast_no_hz(fast_no_hz, cpu);
1917 printk(KERN_ERR "\t%d: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u %s\n", 1942 pr_err("\t%d: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u %s\n",
1918 cpu, ticks_value, ticks_title, 1943 cpu, ticks_value, ticks_title,
1919 atomic_read(&rdtp->dynticks) & 0xfff, 1944 atomic_read(&rdtp->dynticks) & 0xfff,
1920 rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting, 1945 rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting,
@@ -1925,7 +1950,7 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
1925/* Terminate the stall-info list. */ 1950/* Terminate the stall-info list. */
1926static void print_cpu_stall_info_end(void) 1951static void print_cpu_stall_info_end(void)
1927{ 1952{
1928 printk(KERN_ERR "\t"); 1953 pr_err("\t");
1929} 1954}
1930 1955
1931/* Zero ->ticks_this_gp for all flavors of RCU. */ 1956/* Zero ->ticks_this_gp for all flavors of RCU. */
@@ -1948,17 +1973,17 @@ static void increment_cpu_stall_ticks(void)
1948 1973
1949static void print_cpu_stall_info_begin(void) 1974static void print_cpu_stall_info_begin(void)
1950{ 1975{
1951 printk(KERN_CONT " {"); 1976 pr_cont(" {");
1952} 1977}
1953 1978
1954static void print_cpu_stall_info(struct rcu_state *rsp, int cpu) 1979static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
1955{ 1980{
1956 printk(KERN_CONT " %d", cpu); 1981 pr_cont(" %d", cpu);
1957} 1982}
1958 1983
1959static void print_cpu_stall_info_end(void) 1984static void print_cpu_stall_info_end(void)
1960{ 1985{
1961 printk(KERN_CONT "} "); 1986 pr_cont("} ");
1962} 1987}
1963 1988
1964static void zero_cpu_stall_ticks(struct rcu_data *rdp) 1989static void zero_cpu_stall_ticks(struct rcu_data *rdp)