aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/RCU/RTFP.txt77
-rw-r--r--Documentation/RCU/UP.txt34
-rw-r--r--Documentation/RCU/checklist.txt20
-rw-r--r--Documentation/RCU/rcubarrier.txt7
-rw-r--r--Documentation/RCU/torture.txt23
-rw-r--r--Documentation/RCU/whatisRCU.txt14
-rw-r--r--include/linux/rcuclassic.h178
-rw-r--r--include/linux/rcupdate.h29
-rw-r--r--include/linux/rcupreempt.h10
-rw-r--r--include/linux/rcutree.h12
-rw-r--r--init/Kconfig12
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/rcuclassic.c807
-rw-r--r--kernel/rcupdate.c25
-rw-r--r--kernel/rcutorture.c202
-rw-r--r--kernel/sched.c129
16 files changed, 455 insertions, 1125 deletions
diff --git a/Documentation/RCU/RTFP.txt b/Documentation/RCU/RTFP.txt
index 9f711d2df91b..d2b85237c76e 100644
--- a/Documentation/RCU/RTFP.txt
+++ b/Documentation/RCU/RTFP.txt
@@ -743,3 +743,80 @@ Revised:
743 RCU, realtime RCU, sleepable RCU, performance. 743 RCU, realtime RCU, sleepable RCU, performance.
744" 744"
745} 745}
746
747@article{PaulEMcKenney2008RCUOSR
748,author="Paul E. McKenney and Jonathan Walpole"
749,title="Introducing technology into the {Linux} kernel: a case study"
750,Year="2008"
751,journal="SIGOPS Oper. Syst. Rev."
752,volume="42"
753,number="5"
754,pages="4--17"
755,issn="0163-5980"
756,doi={http://doi.acm.org/10.1145/1400097.1400099}
757,publisher="ACM"
758,address="New York, NY, USA"
759,annotation={
760 Linux changed RCU to a far greater degree than RCU has changed Linux.
761}
762}
763
764@unpublished{PaulEMcKenney2008HierarchicalRCU
765,Author="Paul E. McKenney"
766,Title="Hierarchical {RCU}"
767,month="November"
768,day="3"
769,year="2008"
770,note="Available:
771\url{http://lwn.net/Articles/305782/}
772[Viewed November 6, 2008]"
773,annotation="
774 RCU with combining-tree-based grace-period detection,
775 permitting it to handle thousands of CPUs.
776"
777}
778
779@conference{PaulEMcKenney2009MaliciousURCU
780,Author="Paul E. McKenney"
781,Title="Using a Malicious User-Level {RCU} to Torture {RCU}-Based Algorithms"
782,Booktitle="linux.conf.au 2009"
783,month="January"
784,year="2009"
785,address="Hobart, Australia"
786,note="Available:
787\url{http://www.rdrop.com/users/paulmck/RCU/urcutorture.2009.01.22a.pdf}
788[Viewed February 2, 2009]"
789,annotation="
790 Realtime RCU and torture-testing RCU uses.
791"
792}
793
794@unpublished{MathieuDesnoyers2009URCU
795,Author="Mathieu Desnoyers"
796,Title="[{RFC} git tree] Userspace {RCU} (urcu) for {Linux}"
797,month="February"
798,day="5"
799,year="2009"
800,note="Available:
801\url{http://lkml.org/lkml/2009/2/5/572}
802\url{git://lttng.org/userspace-rcu.git}
803[Viewed February 20, 2009]"
804,annotation="
805 Mathieu Desnoyers's user-space RCU implementation.
806 git://lttng.org/userspace-rcu.git
807"
808}
809
810@unpublished{PaulEMcKenney2009BloatWatchRCU
811,Author="Paul E. McKenney"
812,Title="{RCU}: The {Bloatwatch} Edition"
813,month="March"
814,day="17"
815,year="2009"
816,note="Available:
817\url{http://lwn.net/Articles/323929/}
818[Viewed March 20, 2009]"
819,annotation="
820 Uniprocessor assumptions allow simplified RCU implementation.
821"
822}
diff --git a/Documentation/RCU/UP.txt b/Documentation/RCU/UP.txt
index aab4a9ec3931..90ec5341ee98 100644
--- a/Documentation/RCU/UP.txt
+++ b/Documentation/RCU/UP.txt
@@ -2,14 +2,13 @@ RCU on Uniprocessor Systems
2 2
3 3
4A common misconception is that, on UP systems, the call_rcu() primitive 4A common misconception is that, on UP systems, the call_rcu() primitive
5may immediately invoke its function, and that the synchronize_rcu() 5may immediately invoke its function. The basis of this misconception
6primitive may return immediately. The basis of this misconception
7is that since there is only one CPU, it should not be necessary to 6is that since there is only one CPU, it should not be necessary to
8wait for anything else to get done, since there are no other CPUs for 7wait for anything else to get done, since there are no other CPUs for
9anything else to be happening on. Although this approach will -sort- -of- 8anything else to be happening on. Although this approach will -sort- -of-
10work a surprising amount of the time, it is a very bad idea in general. 9work a surprising amount of the time, it is a very bad idea in general.
11This document presents three examples that demonstrate exactly how bad an 10This document presents three examples that demonstrate exactly how bad
12idea this is. 11an idea this is.
13 12
14 13
15Example 1: softirq Suicide 14Example 1: softirq Suicide
@@ -82,11 +81,18 @@ Quick Quiz #2: What locking restriction must RCU callbacks respect?
82 81
83Summary 82Summary
84 83
85Permitting call_rcu() to immediately invoke its arguments or permitting 84Permitting call_rcu() to immediately invoke its arguments breaks RCU,
86synchronize_rcu() to immediately return breaks RCU, even on a UP system. 85even on a UP system. So do not do it! Even on a UP system, the RCU
87So do not do it! Even on a UP system, the RCU infrastructure -must- 86infrastructure -must- respect grace periods, and -must- invoke callbacks
88respect grace periods, and -must- invoke callbacks from a known environment 87from a known environment in which no locks are held.
89in which no locks are held. 88
89It -is- safe for synchronize_sched() and synchronize_rcu_bh() to return
90immediately on an UP system. It is also safe for synchronize_rcu()
91to return immediately on UP systems, except when running preemptable
92RCU.
93
94Quick Quiz #3: Why can't synchronize_rcu() return immediately on
95 UP systems running preemptable RCU?
90 96
91 97
92Answer to Quick Quiz #1: 98Answer to Quick Quiz #1:
@@ -117,3 +123,13 @@ Answer to Quick Quiz #2:
117 callbacks acquire locks directly. However, a great many RCU 123 callbacks acquire locks directly. However, a great many RCU
118 callbacks do acquire locks -indirectly-, for example, via 124 callbacks do acquire locks -indirectly-, for example, via
119 the kfree() primitive. 125 the kfree() primitive.
126
127Answer to Quick Quiz #3:
128 Why can't synchronize_rcu() return immediately on UP systems
129 running preemptable RCU?
130
131 Because some other task might have been preempted in the middle
132 of an RCU read-side critical section. If synchronize_rcu()
133 simply immediately returned, it would prematurely signal the
134 end of the grace period, which would come as a nasty shock to
135 that other thread when it started running again.
diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt
index accfe2f5247d..51525a30e8b4 100644
--- a/Documentation/RCU/checklist.txt
+++ b/Documentation/RCU/checklist.txt
@@ -11,7 +11,10 @@ over a rather long period of time, but improvements are always welcome!
11 structure is updated more than about 10% of the time, then 11 structure is updated more than about 10% of the time, then
12 you should strongly consider some other approach, unless 12 you should strongly consider some other approach, unless
13 detailed performance measurements show that RCU is nonetheless 13 detailed performance measurements show that RCU is nonetheless
14 the right tool for the job. 14 the right tool for the job. Yes, you might think of RCU
15 as simply cutting overhead off of the readers and imposing it
16 on the writers. That is exactly why normal uses of RCU will
17 do much more reading than updating.
15 18
16 Another exception is where performance is not an issue, and RCU 19 Another exception is where performance is not an issue, and RCU
17 provides a simpler implementation. An example of this situation 20 provides a simpler implementation. An example of this situation
@@ -240,10 +243,11 @@ over a rather long period of time, but improvements are always welcome!
240 instead need to use synchronize_irq() or synchronize_sched(). 243 instead need to use synchronize_irq() or synchronize_sched().
241 244
24212. Any lock acquired by an RCU callback must be acquired elsewhere 24512. Any lock acquired by an RCU callback must be acquired elsewhere
243 with irq disabled, e.g., via spin_lock_irqsave(). Failing to 246 with softirq disabled, e.g., via spin_lock_irqsave(),
244 disable irq on a given acquisition of that lock will result in 247 spin_lock_bh(), etc. Failing to disable irq on a given
245 deadlock as soon as the RCU callback happens to interrupt that 248 acquisition of that lock will result in deadlock as soon as the
246 acquisition's critical section. 249 RCU callback happens to interrupt that acquisition's critical
250 section.
247 251
24813. RCU callbacks can be and are executed in parallel. In many cases, 25213. RCU callbacks can be and are executed in parallel. In many cases,
249 the callback code simply wrappers around kfree(), so that this 253 the callback code simply wrappers around kfree(), so that this
@@ -310,3 +314,9 @@ over a rather long period of time, but improvements are always welcome!
310 Because these primitives only wait for pre-existing readers, 314 Because these primitives only wait for pre-existing readers,
311 it is the caller's responsibility to guarantee safety to 315 it is the caller's responsibility to guarantee safety to
312 any subsequent readers. 316 any subsequent readers.
317
31816. The various RCU read-side primitives do -not- contain memory
319 barriers. The CPU (and in some cases, the compiler) is free
320 to reorder code into and out of RCU read-side critical sections.
321 It is the responsibility of the RCU update-side primitives to
322 deal with this.
diff --git a/Documentation/RCU/rcubarrier.txt b/Documentation/RCU/rcubarrier.txt
index 909602d409bb..e439a0edee22 100644
--- a/Documentation/RCU/rcubarrier.txt
+++ b/Documentation/RCU/rcubarrier.txt
@@ -170,6 +170,13 @@ module invokes call_rcu() from timers, you will need to first cancel all
170the timers, and only then invoke rcu_barrier() to wait for any remaining 170the timers, and only then invoke rcu_barrier() to wait for any remaining
171RCU callbacks to complete. 171RCU callbacks to complete.
172 172
173Of course, if you module uses call_rcu_bh(), you will need to invoke
174rcu_barrier_bh() before unloading. Similarly, if your module uses
175call_rcu_sched(), you will need to invoke rcu_barrier_sched() before
176unloading. If your module uses call_rcu(), call_rcu_bh(), -and-
177call_rcu_sched(), then you will need to invoke each of rcu_barrier(),
178rcu_barrier_bh(), and rcu_barrier_sched().
179
173 180
174Implementing rcu_barrier() 181Implementing rcu_barrier()
175 182
diff --git a/Documentation/RCU/torture.txt b/Documentation/RCU/torture.txt
index a342b6e1cc10..9dba3bb90e60 100644
--- a/Documentation/RCU/torture.txt
+++ b/Documentation/RCU/torture.txt
@@ -76,8 +76,10 @@ torture_type The type of RCU to test: "rcu" for the rcu_read_lock() API,
76 "rcu_sync" for rcu_read_lock() with synchronous reclamation, 76 "rcu_sync" for rcu_read_lock() with synchronous reclamation,
77 "rcu_bh" for the rcu_read_lock_bh() API, "rcu_bh_sync" for 77 "rcu_bh" for the rcu_read_lock_bh() API, "rcu_bh_sync" for
78 rcu_read_lock_bh() with synchronous reclamation, "srcu" for 78 rcu_read_lock_bh() with synchronous reclamation, "srcu" for
79 the "srcu_read_lock()" API, and "sched" for the use of 79 the "srcu_read_lock()" API, "sched" for the use of
80 preempt_disable() together with synchronize_sched(). 80 preempt_disable() together with synchronize_sched(),
81 and "sched_expedited" for the use of preempt_disable()
82 with synchronize_sched_expedited().
81 83
82verbose Enable debug printk()s. Default is disabled. 84verbose Enable debug printk()s. Default is disabled.
83 85
@@ -162,6 +164,23 @@ of the "old" and "current" counters for the corresponding CPU. The
162"idx" value maps the "old" and "current" values to the underlying array, 164"idx" value maps the "old" and "current" values to the underlying array,
163and is useful for debugging. 165and is useful for debugging.
164 166
167Similarly, sched_expedited RCU provides the following:
168
169 sched_expedited-torture: rtc: d0000000016c1880 ver: 1090796 tfle: 0 rta: 1090796 rtaf: 0 rtf: 1090787 rtmbe: 0 nt: 27713319
170 sched_expedited-torture: Reader Pipe: 12660320201 95875 0 0 0 0 0 0 0 0 0
171 sched_expedited-torture: Reader Batch: 12660424885 0 0 0 0 0 0 0 0 0 0
172 sched_expedited-torture: Free-Block Circulation: 1090795 1090795 1090794 1090793 1090792 1090791 1090790 1090789 1090788 1090787 0
173 state: -1 / 0:0 3:0 4:0
174
175As before, the first four lines are similar to those for RCU.
176The last line shows the task-migration state. The first number is
177-1 if synchronize_sched_expedited() is idle, -2 if in the process of
178posting wakeups to the migration kthreads, and N when waiting on CPU N.
179Each of the colon-separated fields following the "/" is a CPU:state pair.
180Valid states are "0" for idle, "1" for waiting for quiescent state,
181"2" for passed through quiescent state, and "3" when a race with a
182CPU-hotplug event forces use of the synchronize_sched() primitive.
183
165 184
166USAGE 185USAGE
167 186
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 96170824a717..97ded2432c59 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -785,6 +785,7 @@ RCU pointer/list traversal:
785 rcu_dereference 785 rcu_dereference
786 list_for_each_entry_rcu 786 list_for_each_entry_rcu
787 hlist_for_each_entry_rcu 787 hlist_for_each_entry_rcu
788 hlist_nulls_for_each_entry_rcu
788 789
789 list_for_each_continue_rcu (to be deprecated in favor of new 790 list_for_each_continue_rcu (to be deprecated in favor of new
790 list_for_each_entry_continue_rcu) 791 list_for_each_entry_continue_rcu)
@@ -807,19 +808,23 @@ RCU: Critical sections Grace period Barrier
807 808
808 rcu_read_lock synchronize_net rcu_barrier 809 rcu_read_lock synchronize_net rcu_barrier
809 rcu_read_unlock synchronize_rcu 810 rcu_read_unlock synchronize_rcu
811 synchronize_rcu_expedited
810 call_rcu 812 call_rcu
811 813
812 814
813bh: Critical sections Grace period Barrier 815bh: Critical sections Grace period Barrier
814 816
815 rcu_read_lock_bh call_rcu_bh rcu_barrier_bh 817 rcu_read_lock_bh call_rcu_bh rcu_barrier_bh
816 rcu_read_unlock_bh 818 rcu_read_unlock_bh synchronize_rcu_bh
819 synchronize_rcu_bh_expedited
817 820
818 821
819sched: Critical sections Grace period Barrier 822sched: Critical sections Grace period Barrier
820 823
821 [preempt_disable] synchronize_sched rcu_barrier_sched 824 rcu_read_lock_sched synchronize_sched rcu_barrier_sched
822 [and friends] call_rcu_sched 825 rcu_read_unlock_sched call_rcu_sched
826 [preempt_disable] synchronize_sched_expedited
827 [and friends]
823 828
824 829
825SRCU: Critical sections Grace period Barrier 830SRCU: Critical sections Grace period Barrier
@@ -827,6 +832,9 @@ SRCU: Critical sections Grace period Barrier
827 srcu_read_lock synchronize_srcu N/A 832 srcu_read_lock synchronize_srcu N/A
828 srcu_read_unlock 833 srcu_read_unlock
829 834
835SRCU: Initialization/cleanup
836 init_srcu_struct
837 cleanup_srcu_struct
830 838
831See the comment headers in the source code (or the docbook generated 839See the comment headers in the source code (or the docbook generated
832from them) for more information. 840from them) for more information.
diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h
deleted file mode 100644
index bfd92e1e5d2c..000000000000
--- a/include/linux/rcuclassic.h
+++ /dev/null
@@ -1,178 +0,0 @@
1/*
2 * Read-Copy Update mechanism for mutual exclusion (classic version)
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright IBM Corporation, 2001
19 *
20 * Author: Dipankar Sarma <dipankar@in.ibm.com>
21 *
22 * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
23 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
24 * Papers:
25 * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
26 * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
27 *
28 * For detailed explanation of Read-Copy Update mechanism see -
29 * Documentation/RCU
30 *
31 */
32
33#ifndef __LINUX_RCUCLASSIC_H
34#define __LINUX_RCUCLASSIC_H
35
36#include <linux/cache.h>
37#include <linux/spinlock.h>
38#include <linux/threads.h>
39#include <linux/cpumask.h>
40#include <linux/seqlock.h>
41
42#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
43#define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ) /* for rcp->jiffies_stall */
44#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ) /* for rcp->jiffies_stall */
45#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
46
47/* Global control variables for rcupdate callback mechanism. */
48struct rcu_ctrlblk {
49 long cur; /* Current batch number. */
50 long completed; /* Number of the last completed batch */
51 long pending; /* Number of the last pending batch */
52#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
53 unsigned long gp_start; /* Time at which GP started in jiffies. */
54 unsigned long jiffies_stall;
55 /* Time at which to check for CPU stalls. */
56#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
57
58 int signaled;
59
60 spinlock_t lock ____cacheline_internodealigned_in_smp;
61 DECLARE_BITMAP(cpumask, NR_CPUS); /* CPUs that need to switch for */
62 /* current batch to proceed. */
63} ____cacheline_internodealigned_in_smp;
64
65/* Is batch a before batch b ? */
66static inline int rcu_batch_before(long a, long b)
67{
68 return (a - b) < 0;
69}
70
71/* Is batch a after batch b ? */
72static inline int rcu_batch_after(long a, long b)
73{
74 return (a - b) > 0;
75}
76
77/* Per-CPU data for Read-Copy UPdate. */
78struct rcu_data {
79 /* 1) quiescent state handling : */
80 long quiescbatch; /* Batch # for grace period */
81 int passed_quiesc; /* User-mode/idle loop etc. */
82 int qs_pending; /* core waits for quiesc state */
83
84 /* 2) batch handling */
85 /*
86 * if nxtlist is not NULL, then:
87 * batch:
88 * The batch # for the last entry of nxtlist
89 * [*nxttail[1], NULL = *nxttail[2]):
90 * Entries that batch # <= batch
91 * [*nxttail[0], *nxttail[1]):
92 * Entries that batch # <= batch - 1
93 * [nxtlist, *nxttail[0]):
94 * Entries that batch # <= batch - 2
95 * The grace period for these entries has completed, and
96 * the other grace-period-completed entries may be moved
97 * here temporarily in rcu_process_callbacks().
98 */
99 long batch;
100 struct rcu_head *nxtlist;
101 struct rcu_head **nxttail[3];
102 long qlen; /* # of queued callbacks */
103 struct rcu_head *donelist;
104 struct rcu_head **donetail;
105 long blimit; /* Upper limit on a processed batch */
106 int cpu;
107 struct rcu_head barrier;
108};
109
110/*
111 * Increment the quiescent state counter.
112 * The counter is a bit degenerated: We do not need to know
113 * how many quiescent states passed, just if there was at least
114 * one since the start of the grace period. Thus just a flag.
115 */
116extern void rcu_qsctr_inc(int cpu);
117extern void rcu_bh_qsctr_inc(int cpu);
118
119extern int rcu_pending(int cpu);
120extern int rcu_needs_cpu(int cpu);
121
122#ifdef CONFIG_DEBUG_LOCK_ALLOC
123extern struct lockdep_map rcu_lock_map;
124# define rcu_read_acquire() \
125 lock_acquire(&rcu_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_)
126# define rcu_read_release() lock_release(&rcu_lock_map, 1, _THIS_IP_)
127#else
128# define rcu_read_acquire() do { } while (0)
129# define rcu_read_release() do { } while (0)
130#endif
131
132#define __rcu_read_lock() \
133 do { \
134 preempt_disable(); \
135 __acquire(RCU); \
136 rcu_read_acquire(); \
137 } while (0)
138#define __rcu_read_unlock() \
139 do { \
140 rcu_read_release(); \
141 __release(RCU); \
142 preempt_enable(); \
143 } while (0)
144#define __rcu_read_lock_bh() \
145 do { \
146 local_bh_disable(); \
147 __acquire(RCU_BH); \
148 rcu_read_acquire(); \
149 } while (0)
150#define __rcu_read_unlock_bh() \
151 do { \
152 rcu_read_release(); \
153 __release(RCU_BH); \
154 local_bh_enable(); \
155 } while (0)
156
157#define __synchronize_sched() synchronize_rcu()
158
159#define call_rcu_sched(head, func) call_rcu(head, func)
160
161extern void __rcu_init(void);
162#define rcu_init_sched() do { } while (0)
163extern void rcu_check_callbacks(int cpu, int user);
164extern void rcu_restart_cpu(int cpu);
165
166extern long rcu_batches_completed(void);
167extern long rcu_batches_completed_bh(void);
168
169#define rcu_enter_nohz() do { } while (0)
170#define rcu_exit_nohz() do { } while (0)
171
172/* A context switch is a grace period for rcuclassic. */
173static inline int rcu_blocking_is_gp(void)
174{
175 return num_online_cpus() == 1;
176}
177
178#endif /* __LINUX_RCUCLASSIC_H */
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 15fbb3ca634d..3c89d6a2591f 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -51,12 +51,22 @@ struct rcu_head {
51 void (*func)(struct rcu_head *head); 51 void (*func)(struct rcu_head *head);
52}; 52};
53 53
54/* Internal to kernel, but needed by rcupreempt.h. */ 54/* Exported common interfaces */
55extern void synchronize_rcu(void);
56extern void synchronize_rcu_bh(void);
57extern void rcu_barrier(void);
58extern void rcu_barrier_bh(void);
59extern void rcu_barrier_sched(void);
60extern void synchronize_sched_expedited(void);
61extern int sched_expedited_torture_stats(char *page);
62
63/* Internal to kernel */
64extern void rcu_init(void);
65extern void rcu_scheduler_starting(void);
66extern int rcu_needs_cpu(int cpu);
55extern int rcu_scheduler_active; 67extern int rcu_scheduler_active;
56 68
57#if defined(CONFIG_CLASSIC_RCU) 69#if defined(CONFIG_TREE_RCU)
58#include <linux/rcuclassic.h>
59#elif defined(CONFIG_TREE_RCU)
60#include <linux/rcutree.h> 70#include <linux/rcutree.h>
61#elif defined(CONFIG_PREEMPT_RCU) 71#elif defined(CONFIG_PREEMPT_RCU)
62#include <linux/rcupreempt.h> 72#include <linux/rcupreempt.h>
@@ -259,15 +269,4 @@ extern void call_rcu(struct rcu_head *head,
259extern void call_rcu_bh(struct rcu_head *head, 269extern void call_rcu_bh(struct rcu_head *head,
260 void (*func)(struct rcu_head *head)); 270 void (*func)(struct rcu_head *head));
261 271
262/* Exported common interfaces */
263extern void synchronize_rcu(void);
264extern void rcu_barrier(void);
265extern void rcu_barrier_bh(void);
266extern void rcu_barrier_sched(void);
267
268/* Internal to kernel */
269extern void rcu_init(void);
270extern void rcu_scheduler_starting(void);
271extern int rcu_needs_cpu(int cpu);
272
273#endif /* __LINUX_RCUPDATE_H */ 272#endif /* __LINUX_RCUPDATE_H */
diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h
index fce522782ffa..f164ac9b7807 100644
--- a/include/linux/rcupreempt.h
+++ b/include/linux/rcupreempt.h
@@ -74,6 +74,16 @@ extern int rcu_needs_cpu(int cpu);
74 74
75extern void __synchronize_sched(void); 75extern void __synchronize_sched(void);
76 76
77static inline void synchronize_rcu_expedited(void)
78{
79 synchronize_rcu(); /* Placeholder for new rcupreempt implementation. */
80}
81
82static inline void synchronize_rcu_bh_expedited(void)
83{
84 synchronize_rcu_bh(); /* Placeholder for new rcupreempt impl. */
85}
86
77extern void __rcu_init(void); 87extern void __rcu_init(void);
78extern void rcu_init_sched(void); 88extern void rcu_init_sched(void);
79extern void rcu_check_callbacks(int cpu, int user); 89extern void rcu_check_callbacks(int cpu, int user);
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 5a5153806c42..d4dfd2489633 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -286,8 +286,14 @@ static inline void __rcu_read_unlock_bh(void)
286 286
287#define call_rcu_sched(head, func) call_rcu(head, func) 287#define call_rcu_sched(head, func) call_rcu(head, func)
288 288
289static inline void rcu_init_sched(void) 289static inline void synchronize_rcu_expedited(void)
290{
291 synchronize_sched_expedited();
292}
293
294static inline void synchronize_rcu_bh_expedited(void)
290{ 295{
296 synchronize_sched_expedited();
291} 297}
292 298
293extern void __rcu_init(void); 299extern void __rcu_init(void);
@@ -297,6 +303,10 @@ extern void rcu_restart_cpu(int cpu);
297extern long rcu_batches_completed(void); 303extern long rcu_batches_completed(void);
298extern long rcu_batches_completed_bh(void); 304extern long rcu_batches_completed_bh(void);
299 305
306static inline void rcu_init_sched(void)
307{
308}
309
300#ifdef CONFIG_NO_HZ 310#ifdef CONFIG_NO_HZ
301void rcu_enter_nohz(void); 311void rcu_enter_nohz(void);
302void rcu_exit_nohz(void); 312void rcu_exit_nohz(void);
diff --git a/init/Kconfig b/init/Kconfig
index 3f7e60995c80..25373cf32672 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -316,21 +316,13 @@ choice
316 prompt "RCU Implementation" 316 prompt "RCU Implementation"
317 default TREE_RCU 317 default TREE_RCU
318 318
319config CLASSIC_RCU
320 bool "Classic RCU"
321 help
322 This option selects the classic RCU implementation that is
323 designed for best read-side performance on non-realtime
324 systems.
325
326 Select this option if you are unsure.
327
328config TREE_RCU 319config TREE_RCU
329 bool "Tree-based hierarchical RCU" 320 bool "Tree-based hierarchical RCU"
330 help 321 help
331 This option selects the RCU implementation that is 322 This option selects the RCU implementation that is
332 designed for very large SMP system with hundreds or 323 designed for very large SMP system with hundreds or
333 thousands of CPUs. 324 thousands of CPUs. It also scales down nicely to
325 smaller systems.
334 326
335config PREEMPT_RCU 327config PREEMPT_RCU
336 bool "Preemptible RCU" 328 bool "Preemptible RCU"
diff --git a/kernel/Makefile b/kernel/Makefile
index 2093a691f1c2..2419c9d43918 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -80,7 +80,6 @@ obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
80obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ 80obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
81obj-$(CONFIG_SECCOMP) += seccomp.o 81obj-$(CONFIG_SECCOMP) += seccomp.o
82obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o 82obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
83obj-$(CONFIG_CLASSIC_RCU) += rcuclassic.o
84obj-$(CONFIG_TREE_RCU) += rcutree.o 83obj-$(CONFIG_TREE_RCU) += rcutree.o
85obj-$(CONFIG_PREEMPT_RCU) += rcupreempt.o 84obj-$(CONFIG_PREEMPT_RCU) += rcupreempt.o
86obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o 85obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
deleted file mode 100644
index 0f2b0b311304..000000000000
--- a/kernel/rcuclassic.c
+++ /dev/null
@@ -1,807 +0,0 @@
1/*
2 * Read-Copy Update mechanism for mutual exclusion
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright IBM Corporation, 2001
19 *
20 * Authors: Dipankar Sarma <dipankar@in.ibm.com>
21 * Manfred Spraul <manfred@colorfullife.com>
22 *
23 * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
24 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
25 * Papers:
26 * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
27 * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
28 *
29 * For detailed explanation of Read-Copy Update mechanism see -
30 * Documentation/RCU
31 *
32 */
33#include <linux/types.h>
34#include <linux/kernel.h>
35#include <linux/init.h>
36#include <linux/spinlock.h>
37#include <linux/smp.h>
38#include <linux/rcupdate.h>
39#include <linux/interrupt.h>
40#include <linux/sched.h>
41#include <asm/atomic.h>
42#include <linux/bitops.h>
43#include <linux/module.h>
44#include <linux/completion.h>
45#include <linux/moduleparam.h>
46#include <linux/percpu.h>
47#include <linux/notifier.h>
48#include <linux/cpu.h>
49#include <linux/mutex.h>
50#include <linux/time.h>
51
52#ifdef CONFIG_DEBUG_LOCK_ALLOC
53static struct lock_class_key rcu_lock_key;
54struct lockdep_map rcu_lock_map =
55 STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key);
56EXPORT_SYMBOL_GPL(rcu_lock_map);
57#endif
58
59
60/* Definition for rcupdate control block. */
61static struct rcu_ctrlblk rcu_ctrlblk = {
62 .cur = -300,
63 .completed = -300,
64 .pending = -300,
65 .lock = __SPIN_LOCK_UNLOCKED(&rcu_ctrlblk.lock),
66 .cpumask = CPU_BITS_NONE,
67};
68
69static struct rcu_ctrlblk rcu_bh_ctrlblk = {
70 .cur = -300,
71 .completed = -300,
72 .pending = -300,
73 .lock = __SPIN_LOCK_UNLOCKED(&rcu_bh_ctrlblk.lock),
74 .cpumask = CPU_BITS_NONE,
75};
76
77static DEFINE_PER_CPU(struct rcu_data, rcu_data);
78static DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
79
80/*
81 * Increment the quiescent state counter.
82 * The counter is a bit degenerated: We do not need to know
83 * how many quiescent states passed, just if there was at least
84 * one since the start of the grace period. Thus just a flag.
85 */
86void rcu_qsctr_inc(int cpu)
87{
88 struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
89 rdp->passed_quiesc = 1;
90}
91
92void rcu_bh_qsctr_inc(int cpu)
93{
94 struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
95 rdp->passed_quiesc = 1;
96}
97
98static int blimit = 10;
99static int qhimark = 10000;
100static int qlowmark = 100;
101
102#ifdef CONFIG_SMP
103static void force_quiescent_state(struct rcu_data *rdp,
104 struct rcu_ctrlblk *rcp)
105{
106 int cpu;
107 unsigned long flags;
108
109 set_need_resched();
110 spin_lock_irqsave(&rcp->lock, flags);
111 if (unlikely(!rcp->signaled)) {
112 rcp->signaled = 1;
113 /*
114 * Don't send IPI to itself. With irqs disabled,
115 * rdp->cpu is the current cpu.
116 *
117 * cpu_online_mask is updated by the _cpu_down()
118 * using __stop_machine(). Since we're in irqs disabled
119 * section, __stop_machine() is not exectuting, hence
120 * the cpu_online_mask is stable.
121 *
122 * However, a cpu might have been offlined _just_ before
123 * we disabled irqs while entering here.
124 * And rcu subsystem might not yet have handled the CPU_DEAD
125 * notification, leading to the offlined cpu's bit
126 * being set in the rcp->cpumask.
127 *
128 * Hence cpumask = (rcp->cpumask & cpu_online_mask) to prevent
129 * sending smp_reschedule() to an offlined CPU.
130 */
131 for_each_cpu_and(cpu,
132 to_cpumask(rcp->cpumask), cpu_online_mask) {
133 if (cpu != rdp->cpu)
134 smp_send_reschedule(cpu);
135 }
136 }
137 spin_unlock_irqrestore(&rcp->lock, flags);
138}
139#else
140static inline void force_quiescent_state(struct rcu_data *rdp,
141 struct rcu_ctrlblk *rcp)
142{
143 set_need_resched();
144}
145#endif
146
147static void __call_rcu(struct rcu_head *head, struct rcu_ctrlblk *rcp,
148 struct rcu_data *rdp)
149{
150 long batch;
151
152 head->next = NULL;
153 smp_mb(); /* Read of rcu->cur must happen after any change by caller. */
154
155 /*
156 * Determine the batch number of this callback.
157 *
158 * Using ACCESS_ONCE to avoid the following error when gcc eliminates
159 * local variable "batch" and emits codes like this:
160 * 1) rdp->batch = rcp->cur + 1 # gets old value
161 * ......
162 * 2)rcu_batch_after(rcp->cur + 1, rdp->batch) # gets new value
163 * then [*nxttail[0], *nxttail[1]) may contain callbacks
164 * that batch# = rdp->batch, see the comment of struct rcu_data.
165 */
166 batch = ACCESS_ONCE(rcp->cur) + 1;
167
168 if (rdp->nxtlist && rcu_batch_after(batch, rdp->batch)) {
169 /* process callbacks */
170 rdp->nxttail[0] = rdp->nxttail[1];
171 rdp->nxttail[1] = rdp->nxttail[2];
172 if (rcu_batch_after(batch - 1, rdp->batch))
173 rdp->nxttail[0] = rdp->nxttail[2];
174 }
175
176 rdp->batch = batch;
177 *rdp->nxttail[2] = head;
178 rdp->nxttail[2] = &head->next;
179
180 if (unlikely(++rdp->qlen > qhimark)) {
181 rdp->blimit = INT_MAX;
182 force_quiescent_state(rdp, &rcu_ctrlblk);
183 }
184}
185
186#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
187
188static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
189{
190 rcp->gp_start = jiffies;
191 rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK;
192}
193
194static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
195{
196 int cpu;
197 long delta;
198 unsigned long flags;
199
200 /* Only let one CPU complain about others per time interval. */
201
202 spin_lock_irqsave(&rcp->lock, flags);
203 delta = jiffies - rcp->jiffies_stall;
204 if (delta < 2 || rcp->cur != rcp->completed) {
205 spin_unlock_irqrestore(&rcp->lock, flags);
206 return;
207 }
208 rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
209 spin_unlock_irqrestore(&rcp->lock, flags);
210
211 /* OK, time to rat on our buddy... */
212
213 printk(KERN_ERR "INFO: RCU detected CPU stalls:");
214 for_each_possible_cpu(cpu) {
215 if (cpumask_test_cpu(cpu, to_cpumask(rcp->cpumask)))
216 printk(" %d", cpu);
217 }
218 printk(" (detected by %d, t=%ld jiffies)\n",
219 smp_processor_id(), (long)(jiffies - rcp->gp_start));
220}
221
222static void print_cpu_stall(struct rcu_ctrlblk *rcp)
223{
224 unsigned long flags;
225
226 printk(KERN_ERR "INFO: RCU detected CPU %d stall (t=%lu/%lu jiffies)\n",
227 smp_processor_id(), jiffies,
228 jiffies - rcp->gp_start);
229 dump_stack();
230 spin_lock_irqsave(&rcp->lock, flags);
231 if ((long)(jiffies - rcp->jiffies_stall) >= 0)
232 rcp->jiffies_stall =
233 jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
234 spin_unlock_irqrestore(&rcp->lock, flags);
235 set_need_resched(); /* kick ourselves to get things going. */
236}
237
238static void check_cpu_stall(struct rcu_ctrlblk *rcp)
239{
240 long delta;
241
242 delta = jiffies - rcp->jiffies_stall;
243 if (cpumask_test_cpu(smp_processor_id(), to_cpumask(rcp->cpumask)) &&
244 delta >= 0) {
245
246 /* We haven't checked in, so go dump stack. */
247 print_cpu_stall(rcp);
248
249 } else if (rcp->cur != rcp->completed && delta >= 2) {
250
251 /* They had two seconds to dump stack, so complain. */
252 print_other_cpu_stall(rcp);
253 }
254}
255
256#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
257
258static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
259{
260}
261
262static inline void check_cpu_stall(struct rcu_ctrlblk *rcp)
263{
264}
265
266#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
267
268/**
269 * call_rcu - Queue an RCU callback for invocation after a grace period.
270 * @head: structure to be used for queueing the RCU updates.
271 * @func: actual update function to be invoked after the grace period
272 *
273 * The update function will be invoked some time after a full grace
274 * period elapses, in other words after all currently executing RCU
275 * read-side critical sections have completed. RCU read-side critical
276 * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
277 * and may be nested.
278 */
279void call_rcu(struct rcu_head *head,
280 void (*func)(struct rcu_head *rcu))
281{
282 unsigned long flags;
283
284 head->func = func;
285 local_irq_save(flags);
286 __call_rcu(head, &rcu_ctrlblk, &__get_cpu_var(rcu_data));
287 local_irq_restore(flags);
288}
289EXPORT_SYMBOL_GPL(call_rcu);
290
291/**
292 * call_rcu_bh - Queue an RCU for invocation after a quicker grace period.
293 * @head: structure to be used for queueing the RCU updates.
294 * @func: actual update function to be invoked after the grace period
295 *
296 * The update function will be invoked some time after a full grace
297 * period elapses, in other words after all currently executing RCU
298 * read-side critical sections have completed. call_rcu_bh() assumes
299 * that the read-side critical sections end on completion of a softirq
300 * handler. This means that read-side critical sections in process
301 * context must not be interrupted by softirqs. This interface is to be
302 * used when most of the read-side critical sections are in softirq context.
303 * RCU read-side critical sections are delimited by rcu_read_lock() and
304 * rcu_read_unlock(), * if in interrupt context or rcu_read_lock_bh()
305 * and rcu_read_unlock_bh(), if in process context. These may be nested.
306 */
307void call_rcu_bh(struct rcu_head *head,
308 void (*func)(struct rcu_head *rcu))
309{
310 unsigned long flags;
311
312 head->func = func;
313 local_irq_save(flags);
314 __call_rcu(head, &rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
315 local_irq_restore(flags);
316}
317EXPORT_SYMBOL_GPL(call_rcu_bh);
318
319/*
320 * Return the number of RCU batches processed thus far. Useful
321 * for debug and statistics.
322 */
323long rcu_batches_completed(void)
324{
325 return rcu_ctrlblk.completed;
326}
327EXPORT_SYMBOL_GPL(rcu_batches_completed);
328
329/*
330 * Return the number of RCU batches processed thus far. Useful
331 * for debug and statistics.
332 */
333long rcu_batches_completed_bh(void)
334{
335 return rcu_bh_ctrlblk.completed;
336}
337EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
338
339/* Raises the softirq for processing rcu_callbacks. */
340static inline void raise_rcu_softirq(void)
341{
342 raise_softirq(RCU_SOFTIRQ);
343}
344
345/*
346 * Invoke the completed RCU callbacks. They are expected to be in
347 * a per-cpu list.
348 */
349static void rcu_do_batch(struct rcu_data *rdp)
350{
351 unsigned long flags;
352 struct rcu_head *next, *list;
353 int count = 0;
354
355 list = rdp->donelist;
356 while (list) {
357 next = list->next;
358 prefetch(next);
359 list->func(list);
360 list = next;
361 if (++count >= rdp->blimit)
362 break;
363 }
364 rdp->donelist = list;
365
366 local_irq_save(flags);
367 rdp->qlen -= count;
368 local_irq_restore(flags);
369 if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark)
370 rdp->blimit = blimit;
371
372 if (!rdp->donelist)
373 rdp->donetail = &rdp->donelist;
374 else
375 raise_rcu_softirq();
376}
377
378/*
379 * Grace period handling:
380 * The grace period handling consists out of two steps:
381 * - A new grace period is started.
382 * This is done by rcu_start_batch. The start is not broadcasted to
383 * all cpus, they must pick this up by comparing rcp->cur with
384 * rdp->quiescbatch. All cpus are recorded in the
385 * rcu_ctrlblk.cpumask bitmap.
386 * - All cpus must go through a quiescent state.
387 * Since the start of the grace period is not broadcasted, at least two
388 * calls to rcu_check_quiescent_state are required:
389 * The first call just notices that a new grace period is running. The
390 * following calls check if there was a quiescent state since the beginning
391 * of the grace period. If so, it updates rcu_ctrlblk.cpumask. If
392 * the bitmap is empty, then the grace period is completed.
393 * rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace
394 * period (if necessary).
395 */
396
397/*
398 * Register a new batch of callbacks, and start it up if there is currently no
399 * active batch and the batch to be registered has not already occurred.
400 * Caller must hold rcu_ctrlblk.lock.
401 */
402static void rcu_start_batch(struct rcu_ctrlblk *rcp)
403{
404 if (rcp->cur != rcp->pending &&
405 rcp->completed == rcp->cur) {
406 rcp->cur++;
407 record_gp_stall_check_time(rcp);
408
409 /*
410 * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
411 * Barrier Otherwise it can cause tickless idle CPUs to be
412 * included in rcp->cpumask, which will extend graceperiods
413 * unnecessarily.
414 */
415 smp_mb();
416 cpumask_andnot(to_cpumask(rcp->cpumask),
417 cpu_online_mask, nohz_cpu_mask);
418
419 rcp->signaled = 0;
420 }
421}
422
423/*
424 * cpu went through a quiescent state since the beginning of the grace period.
425 * Clear it from the cpu mask and complete the grace period if it was the last
426 * cpu. Start another grace period if someone has further entries pending
427 */
428static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp)
429{
430 cpumask_clear_cpu(cpu, to_cpumask(rcp->cpumask));
431 if (cpumask_empty(to_cpumask(rcp->cpumask))) {
432 /* batch completed ! */
433 rcp->completed = rcp->cur;
434 rcu_start_batch(rcp);
435 }
436}
437
438/*
439 * Check if the cpu has gone through a quiescent state (say context
440 * switch). If so and if it already hasn't done so in this RCU
441 * quiescent cycle, then indicate that it has done so.
442 */
443static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
444 struct rcu_data *rdp)
445{
446 unsigned long flags;
447
448 if (rdp->quiescbatch != rcp->cur) {
449 /* start new grace period: */
450 rdp->qs_pending = 1;
451 rdp->passed_quiesc = 0;
452 rdp->quiescbatch = rcp->cur;
453 return;
454 }
455
456 /* Grace period already completed for this cpu?
457 * qs_pending is checked instead of the actual bitmap to avoid
458 * cacheline trashing.
459 */
460 if (!rdp->qs_pending)
461 return;
462
463 /*
464 * Was there a quiescent state since the beginning of the grace
465 * period? If no, then exit and wait for the next call.
466 */
467 if (!rdp->passed_quiesc)
468 return;
469 rdp->qs_pending = 0;
470
471 spin_lock_irqsave(&rcp->lock, flags);
472 /*
473 * rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync
474 * during cpu startup. Ignore the quiescent state.
475 */
476 if (likely(rdp->quiescbatch == rcp->cur))
477 cpu_quiet(rdp->cpu, rcp);
478
479 spin_unlock_irqrestore(&rcp->lock, flags);
480}
481
482
483#ifdef CONFIG_HOTPLUG_CPU
484
485/* warning! helper for rcu_offline_cpu. do not use elsewhere without reviewing
486 * locking requirements, the list it's pulling from has to belong to a cpu
487 * which is dead and hence not processing interrupts.
488 */
489static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list,
490 struct rcu_head **tail, long batch)
491{
492 unsigned long flags;
493
494 if (list) {
495 local_irq_save(flags);
496 this_rdp->batch = batch;
497 *this_rdp->nxttail[2] = list;
498 this_rdp->nxttail[2] = tail;
499 local_irq_restore(flags);
500 }
501}
502
503static void __rcu_offline_cpu(struct rcu_data *this_rdp,
504 struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
505{
506 unsigned long flags;
507
508 /*
509 * if the cpu going offline owns the grace period
510 * we can block indefinitely waiting for it, so flush
511 * it here
512 */
513 spin_lock_irqsave(&rcp->lock, flags);
514 if (rcp->cur != rcp->completed)
515 cpu_quiet(rdp->cpu, rcp);
516 rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail, rcp->cur + 1);
517 rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail[2], rcp->cur + 1);
518 spin_unlock(&rcp->lock);
519
520 this_rdp->qlen += rdp->qlen;
521 local_irq_restore(flags);
522}
523
524static void rcu_offline_cpu(int cpu)
525{
526 struct rcu_data *this_rdp = &get_cpu_var(rcu_data);
527 struct rcu_data *this_bh_rdp = &get_cpu_var(rcu_bh_data);
528
529 __rcu_offline_cpu(this_rdp, &rcu_ctrlblk,
530 &per_cpu(rcu_data, cpu));
531 __rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk,
532 &per_cpu(rcu_bh_data, cpu));
533 put_cpu_var(rcu_data);
534 put_cpu_var(rcu_bh_data);
535}
536
537#else
538
539static void rcu_offline_cpu(int cpu)
540{
541}
542
543#endif
544
545/*
546 * This does the RCU processing work from softirq context.
547 */
548static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
549 struct rcu_data *rdp)
550{
551 unsigned long flags;
552 long completed_snap;
553
554 if (rdp->nxtlist) {
555 local_irq_save(flags);
556 completed_snap = ACCESS_ONCE(rcp->completed);
557
558 /*
559 * move the other grace-period-completed entries to
560 * [rdp->nxtlist, *rdp->nxttail[0]) temporarily
561 */
562 if (!rcu_batch_before(completed_snap, rdp->batch))
563 rdp->nxttail[0] = rdp->nxttail[1] = rdp->nxttail[2];
564 else if (!rcu_batch_before(completed_snap, rdp->batch - 1))
565 rdp->nxttail[0] = rdp->nxttail[1];
566
567 /*
568 * the grace period for entries in
569 * [rdp->nxtlist, *rdp->nxttail[0]) has completed and
570 * move these entries to donelist
571 */
572 if (rdp->nxttail[0] != &rdp->nxtlist) {
573 *rdp->donetail = rdp->nxtlist;
574 rdp->donetail = rdp->nxttail[0];
575 rdp->nxtlist = *rdp->nxttail[0];
576 *rdp->donetail = NULL;
577
578 if (rdp->nxttail[1] == rdp->nxttail[0])
579 rdp->nxttail[1] = &rdp->nxtlist;
580 if (rdp->nxttail[2] == rdp->nxttail[0])
581 rdp->nxttail[2] = &rdp->nxtlist;
582 rdp->nxttail[0] = &rdp->nxtlist;
583 }
584
585 local_irq_restore(flags);
586
587 if (rcu_batch_after(rdp->batch, rcp->pending)) {
588 unsigned long flags2;
589
590 /* and start it/schedule start if it's a new batch */
591 spin_lock_irqsave(&rcp->lock, flags2);
592 if (rcu_batch_after(rdp->batch, rcp->pending)) {
593 rcp->pending = rdp->batch;
594 rcu_start_batch(rcp);
595 }
596 spin_unlock_irqrestore(&rcp->lock, flags2);
597 }
598 }
599
600 rcu_check_quiescent_state(rcp, rdp);
601 if (rdp->donelist)
602 rcu_do_batch(rdp);
603}
604
605static void rcu_process_callbacks(struct softirq_action *unused)
606{
607 /*
608 * Memory references from any prior RCU read-side critical sections
609 * executed by the interrupted code must be see before any RCU
610 * grace-period manupulations below.
611 */
612
613 smp_mb(); /* See above block comment. */
614
615 __rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data));
616 __rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
617
618 /*
619 * Memory references from any later RCU read-side critical sections
620 * executed by the interrupted code must be see after any RCU
621 * grace-period manupulations above.
622 */
623
624 smp_mb(); /* See above block comment. */
625}
626
627static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
628{
629 /* Check for CPU stalls, if enabled. */
630 check_cpu_stall(rcp);
631
632 if (rdp->nxtlist) {
633 long completed_snap = ACCESS_ONCE(rcp->completed);
634
635 /*
636 * This cpu has pending rcu entries and the grace period
637 * for them has completed.
638 */
639 if (!rcu_batch_before(completed_snap, rdp->batch))
640 return 1;
641 if (!rcu_batch_before(completed_snap, rdp->batch - 1) &&
642 rdp->nxttail[0] != rdp->nxttail[1])
643 return 1;
644 if (rdp->nxttail[0] != &rdp->nxtlist)
645 return 1;
646
647 /*
648 * This cpu has pending rcu entries and the new batch
649 * for then hasn't been started nor scheduled start
650 */
651 if (rcu_batch_after(rdp->batch, rcp->pending))
652 return 1;
653 }
654
655 /* This cpu has finished callbacks to invoke */
656 if (rdp->donelist)
657 return 1;
658
659 /* The rcu core waits for a quiescent state from the cpu */
660 if (rdp->quiescbatch != rcp->cur || rdp->qs_pending)
661 return 1;
662
663 /* nothing to do */
664 return 0;
665}
666
667/*
668 * Check to see if there is any immediate RCU-related work to be done
669 * by the current CPU, returning 1 if so. This function is part of the
670 * RCU implementation; it is -not- an exported member of the RCU API.
671 */
672int rcu_pending(int cpu)
673{
674 return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) ||
675 __rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu));
676}
677
678/*
679 * Check to see if any future RCU-related work will need to be done
680 * by the current CPU, even if none need be done immediately, returning
681 * 1 if so. This function is part of the RCU implementation; it is -not-
682 * an exported member of the RCU API.
683 */
684int rcu_needs_cpu(int cpu)
685{
686 struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
687 struct rcu_data *rdp_bh = &per_cpu(rcu_bh_data, cpu);
688
689 return !!rdp->nxtlist || !!rdp_bh->nxtlist || rcu_pending(cpu);
690}
691
692/*
693 * Top-level function driving RCU grace-period detection, normally
694 * invoked from the scheduler-clock interrupt. This function simply
695 * increments counters that are read only from softirq by this same
696 * CPU, so there are no memory barriers required.
697 */
698void rcu_check_callbacks(int cpu, int user)
699{
700 if (user ||
701 (idle_cpu(cpu) && rcu_scheduler_active &&
702 !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
703
704 /*
705 * Get here if this CPU took its interrupt from user
706 * mode or from the idle loop, and if this is not a
707 * nested interrupt. In this case, the CPU is in
708 * a quiescent state, so count it.
709 *
710 * Also do a memory barrier. This is needed to handle
711 * the case where writes from a preempt-disable section
712 * of code get reordered into schedule() by this CPU's
713 * write buffer. The memory barrier makes sure that
714 * the rcu_qsctr_inc() and rcu_bh_qsctr_inc() are see
715 * by other CPUs to happen after any such write.
716 */
717
718 smp_mb(); /* See above block comment. */
719 rcu_qsctr_inc(cpu);
720 rcu_bh_qsctr_inc(cpu);
721
722 } else if (!in_softirq()) {
723
724 /*
725 * Get here if this CPU did not take its interrupt from
726 * softirq, in other words, if it is not interrupting
727 * a rcu_bh read-side critical section. This is an _bh
728 * critical section, so count it. The memory barrier
729 * is needed for the same reason as is the above one.
730 */
731
732 smp_mb(); /* See above block comment. */
733 rcu_bh_qsctr_inc(cpu);
734 }
735 raise_rcu_softirq();
736}
737
738static void __cpuinit rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
739 struct rcu_data *rdp)
740{
741 unsigned long flags;
742
743 spin_lock_irqsave(&rcp->lock, flags);
744 memset(rdp, 0, sizeof(*rdp));
745 rdp->nxttail[0] = rdp->nxttail[1] = rdp->nxttail[2] = &rdp->nxtlist;
746 rdp->donetail = &rdp->donelist;
747 rdp->quiescbatch = rcp->completed;
748 rdp->qs_pending = 0;
749 rdp->cpu = cpu;
750 rdp->blimit = blimit;
751 spin_unlock_irqrestore(&rcp->lock, flags);
752}
753
754static void __cpuinit rcu_online_cpu(int cpu)
755{
756 struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
757 struct rcu_data *bh_rdp = &per_cpu(rcu_bh_data, cpu);
758
759 rcu_init_percpu_data(cpu, &rcu_ctrlblk, rdp);
760 rcu_init_percpu_data(cpu, &rcu_bh_ctrlblk, bh_rdp);
761 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
762}
763
764static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
765 unsigned long action, void *hcpu)
766{
767 long cpu = (long)hcpu;
768
769 switch (action) {
770 case CPU_UP_PREPARE:
771 case CPU_UP_PREPARE_FROZEN:
772 rcu_online_cpu(cpu);
773 break;
774 case CPU_DEAD:
775 case CPU_DEAD_FROZEN:
776 rcu_offline_cpu(cpu);
777 break;
778 default:
779 break;
780 }
781 return NOTIFY_OK;
782}
783
784static struct notifier_block __cpuinitdata rcu_nb = {
785 .notifier_call = rcu_cpu_notify,
786};
787
788/*
789 * Initializes rcu mechanism. Assumed to be called early.
790 * That is before local timer(SMP) or jiffie timer (uniproc) is setup.
791 * Note that rcu_qsctr and friends are implicitly
792 * initialized due to the choice of ``0'' for RCU_CTR_INVALID.
793 */
794void __init __rcu_init(void)
795{
796#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
797 printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
798#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
799 rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE,
800 (void *)(long)smp_processor_id());
801 /* Register notifier for non-boot CPUs */
802 register_cpu_notifier(&rcu_nb);
803}
804
805module_param(blimit, int, 0);
806module_param(qhimark, int, 0);
807module_param(qlowmark, int, 0);
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index a967c9feb90a..eae29c25fb14 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -98,6 +98,30 @@ void synchronize_rcu(void)
98} 98}
99EXPORT_SYMBOL_GPL(synchronize_rcu); 99EXPORT_SYMBOL_GPL(synchronize_rcu);
100 100
101/**
102 * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
103 *
104 * Control will return to the caller some time after a full rcu_bh grace
105 * period has elapsed, in other words after all currently executing rcu_bh
106 * read-side critical sections have completed. RCU read-side critical
107 * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(),
108 * and may be nested.
109 */
110void synchronize_rcu_bh(void)
111{
112 struct rcu_synchronize rcu;
113
114 if (rcu_blocking_is_gp())
115 return;
116
117 init_completion(&rcu.completion);
118 /* Will wake me after RCU finished. */
119 call_rcu_bh(&rcu.head, wakeme_after_rcu);
120 /* Wait for it. */
121 wait_for_completion(&rcu.completion);
122}
123EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
124
101static void rcu_barrier_callback(struct rcu_head *notused) 125static void rcu_barrier_callback(struct rcu_head *notused)
102{ 126{
103 if (atomic_dec_and_test(&rcu_barrier_cpu_count)) 127 if (atomic_dec_and_test(&rcu_barrier_cpu_count))
@@ -129,6 +153,7 @@ static void rcu_barrier_func(void *type)
129static inline void wait_migrated_callbacks(void) 153static inline void wait_migrated_callbacks(void)
130{ 154{
131 wait_event(rcu_migrate_wq, !atomic_read(&rcu_migrate_type_count)); 155 wait_event(rcu_migrate_wq, !atomic_read(&rcu_migrate_type_count));
156 smp_mb(); /* In case we didn't sleep. */
132} 157}
133 158
134/* 159/*
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 9b4a975a4b4a..b33db539a8ad 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -257,14 +257,14 @@ struct rcu_torture_ops {
257 void (*init)(void); 257 void (*init)(void);
258 void (*cleanup)(void); 258 void (*cleanup)(void);
259 int (*readlock)(void); 259 int (*readlock)(void);
260 void (*readdelay)(struct rcu_random_state *rrsp); 260 void (*read_delay)(struct rcu_random_state *rrsp);
261 void (*readunlock)(int idx); 261 void (*readunlock)(int idx);
262 int (*completed)(void); 262 int (*completed)(void);
263 void (*deferredfree)(struct rcu_torture *p); 263 void (*deferred_free)(struct rcu_torture *p);
264 void (*sync)(void); 264 void (*sync)(void);
265 void (*cb_barrier)(void); 265 void (*cb_barrier)(void);
266 int (*stats)(char *page); 266 int (*stats)(char *page);
267 int irqcapable; 267 int irq_capable;
268 char *name; 268 char *name;
269}; 269};
270static struct rcu_torture_ops *cur_ops = NULL; 270static struct rcu_torture_ops *cur_ops = NULL;
@@ -320,7 +320,7 @@ rcu_torture_cb(struct rcu_head *p)
320 rp->rtort_mbtest = 0; 320 rp->rtort_mbtest = 0;
321 rcu_torture_free(rp); 321 rcu_torture_free(rp);
322 } else 322 } else
323 cur_ops->deferredfree(rp); 323 cur_ops->deferred_free(rp);
324} 324}
325 325
326static void rcu_torture_deferred_free(struct rcu_torture *p) 326static void rcu_torture_deferred_free(struct rcu_torture *p)
@@ -329,18 +329,18 @@ static void rcu_torture_deferred_free(struct rcu_torture *p)
329} 329}
330 330
331static struct rcu_torture_ops rcu_ops = { 331static struct rcu_torture_ops rcu_ops = {
332 .init = NULL, 332 .init = NULL,
333 .cleanup = NULL, 333 .cleanup = NULL,
334 .readlock = rcu_torture_read_lock, 334 .readlock = rcu_torture_read_lock,
335 .readdelay = rcu_read_delay, 335 .read_delay = rcu_read_delay,
336 .readunlock = rcu_torture_read_unlock, 336 .readunlock = rcu_torture_read_unlock,
337 .completed = rcu_torture_completed, 337 .completed = rcu_torture_completed,
338 .deferredfree = rcu_torture_deferred_free, 338 .deferred_free = rcu_torture_deferred_free,
339 .sync = synchronize_rcu, 339 .sync = synchronize_rcu,
340 .cb_barrier = rcu_barrier, 340 .cb_barrier = rcu_barrier,
341 .stats = NULL, 341 .stats = NULL,
342 .irqcapable = 1, 342 .irq_capable = 1,
343 .name = "rcu" 343 .name = "rcu"
344}; 344};
345 345
346static void rcu_sync_torture_deferred_free(struct rcu_torture *p) 346static void rcu_sync_torture_deferred_free(struct rcu_torture *p)
@@ -370,18 +370,18 @@ static void rcu_sync_torture_init(void)
370} 370}
371 371
372static struct rcu_torture_ops rcu_sync_ops = { 372static struct rcu_torture_ops rcu_sync_ops = {
373 .init = rcu_sync_torture_init, 373 .init = rcu_sync_torture_init,
374 .cleanup = NULL, 374 .cleanup = NULL,
375 .readlock = rcu_torture_read_lock, 375 .readlock = rcu_torture_read_lock,
376 .readdelay = rcu_read_delay, 376 .read_delay = rcu_read_delay,
377 .readunlock = rcu_torture_read_unlock, 377 .readunlock = rcu_torture_read_unlock,
378 .completed = rcu_torture_completed, 378 .completed = rcu_torture_completed,
379 .deferredfree = rcu_sync_torture_deferred_free, 379 .deferred_free = rcu_sync_torture_deferred_free,
380 .sync = synchronize_rcu, 380 .sync = synchronize_rcu,
381 .cb_barrier = NULL, 381 .cb_barrier = NULL,
382 .stats = NULL, 382 .stats = NULL,
383 .irqcapable = 1, 383 .irq_capable = 1,
384 .name = "rcu_sync" 384 .name = "rcu_sync"
385}; 385};
386 386
387/* 387/*
@@ -432,33 +432,33 @@ static void rcu_bh_torture_synchronize(void)
432} 432}
433 433
434static struct rcu_torture_ops rcu_bh_ops = { 434static struct rcu_torture_ops rcu_bh_ops = {
435 .init = NULL, 435 .init = NULL,
436 .cleanup = NULL, 436 .cleanup = NULL,
437 .readlock = rcu_bh_torture_read_lock, 437 .readlock = rcu_bh_torture_read_lock,
438 .readdelay = rcu_read_delay, /* just reuse rcu's version. */ 438 .read_delay = rcu_read_delay, /* just reuse rcu's version. */
439 .readunlock = rcu_bh_torture_read_unlock, 439 .readunlock = rcu_bh_torture_read_unlock,
440 .completed = rcu_bh_torture_completed, 440 .completed = rcu_bh_torture_completed,
441 .deferredfree = rcu_bh_torture_deferred_free, 441 .deferred_free = rcu_bh_torture_deferred_free,
442 .sync = rcu_bh_torture_synchronize, 442 .sync = rcu_bh_torture_synchronize,
443 .cb_barrier = rcu_barrier_bh, 443 .cb_barrier = rcu_barrier_bh,
444 .stats = NULL, 444 .stats = NULL,
445 .irqcapable = 1, 445 .irq_capable = 1,
446 .name = "rcu_bh" 446 .name = "rcu_bh"
447}; 447};
448 448
449static struct rcu_torture_ops rcu_bh_sync_ops = { 449static struct rcu_torture_ops rcu_bh_sync_ops = {
450 .init = rcu_sync_torture_init, 450 .init = rcu_sync_torture_init,
451 .cleanup = NULL, 451 .cleanup = NULL,
452 .readlock = rcu_bh_torture_read_lock, 452 .readlock = rcu_bh_torture_read_lock,
453 .readdelay = rcu_read_delay, /* just reuse rcu's version. */ 453 .read_delay = rcu_read_delay, /* just reuse rcu's version. */
454 .readunlock = rcu_bh_torture_read_unlock, 454 .readunlock = rcu_bh_torture_read_unlock,
455 .completed = rcu_bh_torture_completed, 455 .completed = rcu_bh_torture_completed,
456 .deferredfree = rcu_sync_torture_deferred_free, 456 .deferred_free = rcu_sync_torture_deferred_free,
457 .sync = rcu_bh_torture_synchronize, 457 .sync = rcu_bh_torture_synchronize,
458 .cb_barrier = NULL, 458 .cb_barrier = NULL,
459 .stats = NULL, 459 .stats = NULL,
460 .irqcapable = 1, 460 .irq_capable = 1,
461 .name = "rcu_bh_sync" 461 .name = "rcu_bh_sync"
462}; 462};
463 463
464/* 464/*
@@ -530,17 +530,17 @@ static int srcu_torture_stats(char *page)
530} 530}
531 531
532static struct rcu_torture_ops srcu_ops = { 532static struct rcu_torture_ops srcu_ops = {
533 .init = srcu_torture_init, 533 .init = srcu_torture_init,
534 .cleanup = srcu_torture_cleanup, 534 .cleanup = srcu_torture_cleanup,
535 .readlock = srcu_torture_read_lock, 535 .readlock = srcu_torture_read_lock,
536 .readdelay = srcu_read_delay, 536 .read_delay = srcu_read_delay,
537 .readunlock = srcu_torture_read_unlock, 537 .readunlock = srcu_torture_read_unlock,
538 .completed = srcu_torture_completed, 538 .completed = srcu_torture_completed,
539 .deferredfree = rcu_sync_torture_deferred_free, 539 .deferred_free = rcu_sync_torture_deferred_free,
540 .sync = srcu_torture_synchronize, 540 .sync = srcu_torture_synchronize,
541 .cb_barrier = NULL, 541 .cb_barrier = NULL,
542 .stats = srcu_torture_stats, 542 .stats = srcu_torture_stats,
543 .name = "srcu" 543 .name = "srcu"
544}; 544};
545 545
546/* 546/*
@@ -574,32 +574,49 @@ static void sched_torture_synchronize(void)
574} 574}
575 575
576static struct rcu_torture_ops sched_ops = { 576static struct rcu_torture_ops sched_ops = {
577 .init = rcu_sync_torture_init, 577 .init = rcu_sync_torture_init,
578 .cleanup = NULL, 578 .cleanup = NULL,
579 .readlock = sched_torture_read_lock, 579 .readlock = sched_torture_read_lock,
580 .readdelay = rcu_read_delay, /* just reuse rcu's version. */ 580 .read_delay = rcu_read_delay, /* just reuse rcu's version. */
581 .readunlock = sched_torture_read_unlock, 581 .readunlock = sched_torture_read_unlock,
582 .completed = sched_torture_completed, 582 .completed = sched_torture_completed,
583 .deferredfree = rcu_sched_torture_deferred_free, 583 .deferred_free = rcu_sched_torture_deferred_free,
584 .sync = sched_torture_synchronize, 584 .sync = sched_torture_synchronize,
585 .cb_barrier = rcu_barrier_sched, 585 .cb_barrier = rcu_barrier_sched,
586 .stats = NULL, 586 .stats = NULL,
587 .irqcapable = 1, 587 .irq_capable = 1,
588 .name = "sched" 588 .name = "sched"
589}; 589};
590 590
591static struct rcu_torture_ops sched_ops_sync = { 591static struct rcu_torture_ops sched_ops_sync = {
592 .init = rcu_sync_torture_init, 592 .init = rcu_sync_torture_init,
593 .cleanup = NULL, 593 .cleanup = NULL,
594 .readlock = sched_torture_read_lock, 594 .readlock = sched_torture_read_lock,
595 .readdelay = rcu_read_delay, /* just reuse rcu's version. */ 595 .read_delay = rcu_read_delay, /* just reuse rcu's version. */
596 .readunlock = sched_torture_read_unlock, 596 .readunlock = sched_torture_read_unlock,
597 .completed = sched_torture_completed, 597 .completed = sched_torture_completed,
598 .deferredfree = rcu_sync_torture_deferred_free, 598 .deferred_free = rcu_sync_torture_deferred_free,
599 .sync = sched_torture_synchronize, 599 .sync = sched_torture_synchronize,
600 .cb_barrier = NULL, 600 .cb_barrier = NULL,
601 .stats = NULL, 601 .stats = NULL,
602 .name = "sched_sync" 602 .name = "sched_sync"
603};
604
605extern int rcu_expedited_torture_stats(char *page);
606
607static struct rcu_torture_ops sched_expedited_ops = {
608 .init = rcu_sync_torture_init,
609 .cleanup = NULL,
610 .readlock = sched_torture_read_lock,
611 .read_delay = rcu_read_delay, /* just reuse rcu's version. */
612 .readunlock = sched_torture_read_unlock,
613 .completed = sched_torture_completed,
614 .deferred_free = rcu_sync_torture_deferred_free,
615 .sync = synchronize_sched_expedited,
616 .cb_barrier = NULL,
617 .stats = rcu_expedited_torture_stats,
618 .irq_capable = 1,
619 .name = "sched_expedited"
603}; 620};
604 621
605/* 622/*
@@ -635,7 +652,7 @@ rcu_torture_writer(void *arg)
635 i = RCU_TORTURE_PIPE_LEN; 652 i = RCU_TORTURE_PIPE_LEN;
636 atomic_inc(&rcu_torture_wcount[i]); 653 atomic_inc(&rcu_torture_wcount[i]);
637 old_rp->rtort_pipe_count++; 654 old_rp->rtort_pipe_count++;
638 cur_ops->deferredfree(old_rp); 655 cur_ops->deferred_free(old_rp);
639 } 656 }
640 rcu_torture_current_version++; 657 rcu_torture_current_version++;
641 oldbatch = cur_ops->completed(); 658 oldbatch = cur_ops->completed();
@@ -700,7 +717,7 @@ static void rcu_torture_timer(unsigned long unused)
700 if (p->rtort_mbtest == 0) 717 if (p->rtort_mbtest == 0)
701 atomic_inc(&n_rcu_torture_mberror); 718 atomic_inc(&n_rcu_torture_mberror);
702 spin_lock(&rand_lock); 719 spin_lock(&rand_lock);
703 cur_ops->readdelay(&rand); 720 cur_ops->read_delay(&rand);
704 n_rcu_torture_timers++; 721 n_rcu_torture_timers++;
705 spin_unlock(&rand_lock); 722 spin_unlock(&rand_lock);
706 preempt_disable(); 723 preempt_disable();
@@ -738,11 +755,11 @@ rcu_torture_reader(void *arg)
738 755
739 VERBOSE_PRINTK_STRING("rcu_torture_reader task started"); 756 VERBOSE_PRINTK_STRING("rcu_torture_reader task started");
740 set_user_nice(current, 19); 757 set_user_nice(current, 19);
741 if (irqreader && cur_ops->irqcapable) 758 if (irqreader && cur_ops->irq_capable)
742 setup_timer_on_stack(&t, rcu_torture_timer, 0); 759 setup_timer_on_stack(&t, rcu_torture_timer, 0);
743 760
744 do { 761 do {
745 if (irqreader && cur_ops->irqcapable) { 762 if (irqreader && cur_ops->irq_capable) {
746 if (!timer_pending(&t)) 763 if (!timer_pending(&t))
747 mod_timer(&t, 1); 764 mod_timer(&t, 1);
748 } 765 }
@@ -757,7 +774,7 @@ rcu_torture_reader(void *arg)
757 } 774 }
758 if (p->rtort_mbtest == 0) 775 if (p->rtort_mbtest == 0)
759 atomic_inc(&n_rcu_torture_mberror); 776 atomic_inc(&n_rcu_torture_mberror);
760 cur_ops->readdelay(&rand); 777 cur_ops->read_delay(&rand);
761 preempt_disable(); 778 preempt_disable();
762 pipe_count = p->rtort_pipe_count; 779 pipe_count = p->rtort_pipe_count;
763 if (pipe_count > RCU_TORTURE_PIPE_LEN) { 780 if (pipe_count > RCU_TORTURE_PIPE_LEN) {
@@ -778,7 +795,7 @@ rcu_torture_reader(void *arg)
778 } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); 795 } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
779 VERBOSE_PRINTK_STRING("rcu_torture_reader task stopping"); 796 VERBOSE_PRINTK_STRING("rcu_torture_reader task stopping");
780 rcutorture_shutdown_absorb("rcu_torture_reader"); 797 rcutorture_shutdown_absorb("rcu_torture_reader");
781 if (irqreader && cur_ops->irqcapable) 798 if (irqreader && cur_ops->irq_capable)
782 del_timer_sync(&t); 799 del_timer_sync(&t);
783 while (!kthread_should_stop()) 800 while (!kthread_should_stop())
784 schedule_timeout_uninterruptible(1); 801 schedule_timeout_uninterruptible(1);
@@ -1078,6 +1095,7 @@ rcu_torture_init(void)
1078 int firsterr = 0; 1095 int firsterr = 0;
1079 static struct rcu_torture_ops *torture_ops[] = 1096 static struct rcu_torture_ops *torture_ops[] =
1080 { &rcu_ops, &rcu_sync_ops, &rcu_bh_ops, &rcu_bh_sync_ops, 1097 { &rcu_ops, &rcu_sync_ops, &rcu_bh_ops, &rcu_bh_sync_ops,
1098 &sched_expedited_ops,
1081 &srcu_ops, &sched_ops, &sched_ops_sync, }; 1099 &srcu_ops, &sched_ops, &sched_ops_sync, };
1082 1100
1083 mutex_lock(&fullstop_mutex); 1101 mutex_lock(&fullstop_mutex);
diff --git a/kernel/sched.c b/kernel/sched.c
index 1b59e265273b..cda8b81f8801 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -7051,6 +7051,11 @@ fail:
7051 return ret; 7051 return ret;
7052} 7052}
7053 7053
7054#define RCU_MIGRATION_IDLE 0
7055#define RCU_MIGRATION_NEED_QS 1
7056#define RCU_MIGRATION_GOT_QS 2
7057#define RCU_MIGRATION_MUST_SYNC 3
7058
7054/* 7059/*
7055 * migration_thread - this is a highprio system thread that performs 7060 * migration_thread - this is a highprio system thread that performs
7056 * thread migration by bumping thread off CPU then 'pushing' onto 7061 * thread migration by bumping thread off CPU then 'pushing' onto
@@ -7058,6 +7063,7 @@ fail:
7058 */ 7063 */
7059static int migration_thread(void *data) 7064static int migration_thread(void *data)
7060{ 7065{
7066 int badcpu;
7061 int cpu = (long)data; 7067 int cpu = (long)data;
7062 struct rq *rq; 7068 struct rq *rq;
7063 7069
@@ -7092,8 +7098,17 @@ static int migration_thread(void *data)
7092 req = list_entry(head->next, struct migration_req, list); 7098 req = list_entry(head->next, struct migration_req, list);
7093 list_del_init(head->next); 7099 list_del_init(head->next);
7094 7100
7095 spin_unlock(&rq->lock); 7101 if (req->task != NULL) {
7096 __migrate_task(req->task, cpu, req->dest_cpu); 7102 spin_unlock(&rq->lock);
7103 __migrate_task(req->task, cpu, req->dest_cpu);
7104 } else if (likely(cpu == (badcpu = smp_processor_id()))) {
7105 req->dest_cpu = RCU_MIGRATION_GOT_QS;
7106 spin_unlock(&rq->lock);
7107 } else {
7108 req->dest_cpu = RCU_MIGRATION_MUST_SYNC;
7109 spin_unlock(&rq->lock);
7110 WARN_ONCE(1, "migration_thread() on CPU %d, expected %d\n", badcpu, cpu);
7111 }
7097 local_irq_enable(); 7112 local_irq_enable();
7098 7113
7099 complete(&req->done); 7114 complete(&req->done);
@@ -10581,3 +10596,113 @@ struct cgroup_subsys cpuacct_subsys = {
10581 .subsys_id = cpuacct_subsys_id, 10596 .subsys_id = cpuacct_subsys_id,
10582}; 10597};
10583#endif /* CONFIG_CGROUP_CPUACCT */ 10598#endif /* CONFIG_CGROUP_CPUACCT */
10599
10600#ifndef CONFIG_SMP
10601
10602int rcu_expedited_torture_stats(char *page)
10603{
10604 return 0;
10605}
10606EXPORT_SYMBOL_GPL(rcu_expedited_torture_stats);
10607
10608void synchronize_sched_expedited(void)
10609{
10610}
10611EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
10612
10613#else /* #ifndef CONFIG_SMP */
10614
10615static DEFINE_PER_CPU(struct migration_req, rcu_migration_req);
10616static DEFINE_MUTEX(rcu_sched_expedited_mutex);
10617
10618#define RCU_EXPEDITED_STATE_POST -2
10619#define RCU_EXPEDITED_STATE_IDLE -1
10620
10621static int rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE;
10622
10623int rcu_expedited_torture_stats(char *page)
10624{
10625 int cnt = 0;
10626 int cpu;
10627
10628 cnt += sprintf(&page[cnt], "state: %d /", rcu_expedited_state);
10629 for_each_online_cpu(cpu) {
10630 cnt += sprintf(&page[cnt], " %d:%d",
10631 cpu, per_cpu(rcu_migration_req, cpu).dest_cpu);
10632 }
10633 cnt += sprintf(&page[cnt], "\n");
10634 return cnt;
10635}
10636EXPORT_SYMBOL_GPL(rcu_expedited_torture_stats);
10637
10638static long synchronize_sched_expedited_count;
10639
10640/*
10641 * Wait for an rcu-sched grace period to elapse, but use "big hammer"
10642 * approach to force grace period to end quickly. This consumes
10643 * significant time on all CPUs, and is thus not recommended for
10644 * any sort of common-case code.
10645 *
10646 * Note that it is illegal to call this function while holding any
10647 * lock that is acquired by a CPU-hotplug notifier. Failing to
10648 * observe this restriction will result in deadlock.
10649 */
10650void synchronize_sched_expedited(void)
10651{
10652 int cpu;
10653 unsigned long flags;
10654 bool need_full_sync = 0;
10655 struct rq *rq;
10656 struct migration_req *req;
10657 long snap;
10658 int trycount = 0;
10659
10660 smp_mb(); /* ensure prior mod happens before capturing snap. */
10661 snap = ACCESS_ONCE(synchronize_sched_expedited_count) + 1;
10662 get_online_cpus();
10663 while (!mutex_trylock(&rcu_sched_expedited_mutex)) {
10664 put_online_cpus();
10665 if (trycount++ < 10)
10666 udelay(trycount * num_online_cpus());
10667 else {
10668 synchronize_sched();
10669 return;
10670 }
10671 if (ACCESS_ONCE(synchronize_sched_expedited_count) - snap > 0) {
10672 smp_mb(); /* ensure test happens before caller kfree */
10673 return;
10674 }
10675 get_online_cpus();
10676 }
10677 rcu_expedited_state = RCU_EXPEDITED_STATE_POST;
10678 for_each_online_cpu(cpu) {
10679 rq = cpu_rq(cpu);
10680 req = &per_cpu(rcu_migration_req, cpu);
10681 init_completion(&req->done);
10682 req->task = NULL;
10683 req->dest_cpu = RCU_MIGRATION_NEED_QS;
10684 spin_lock_irqsave(&rq->lock, flags);
10685 list_add(&req->list, &rq->migration_queue);
10686 spin_unlock_irqrestore(&rq->lock, flags);
10687 wake_up_process(rq->migration_thread);
10688 }
10689 for_each_online_cpu(cpu) {
10690 rcu_expedited_state = cpu;
10691 req = &per_cpu(rcu_migration_req, cpu);
10692 rq = cpu_rq(cpu);
10693 wait_for_completion(&req->done);
10694 spin_lock_irqsave(&rq->lock, flags);
10695 if (unlikely(req->dest_cpu == RCU_MIGRATION_MUST_SYNC))
10696 need_full_sync = 1;
10697 req->dest_cpu = RCU_MIGRATION_IDLE;
10698 spin_unlock_irqrestore(&rq->lock, flags);
10699 }
10700 rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE;
10701 mutex_unlock(&rcu_sched_expedited_mutex);
10702 put_online_cpus();
10703 if (need_full_sync)
10704 synchronize_sched();
10705}
10706EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
10707
10708#endif /* #else #ifndef CONFIG_SMP */