aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/RCU/torture.txt15
-rw-r--r--Documentation/kernel-parameters.txt88
-rw-r--r--MAINTAINERS14
-rw-r--r--arch/um/drivers/mconsole_kern.c1
-rw-r--r--include/linux/rculist.h40
-rw-r--r--include/linux/rcupdate.h20
-rw-r--r--include/linux/rcutiny.h11
-rw-r--r--include/linux/rcutree.h19
-rw-r--r--include/linux/sched.h10
-rw-r--r--include/linux/srcu.h48
-rw-r--r--init/Kconfig50
-rw-r--r--kernel/rcupdate.c28
-rw-r--r--kernel/rcutiny_plugin.h16
-rw-r--r--kernel/rcutorture.c257
-rw-r--r--kernel/rcutree.c35
-rw-r--r--kernel/rcutree.h11
-rw-r--r--kernel/rcutree_plugin.h30
-rw-r--r--kernel/sched/core.c1
-rw-r--r--kernel/srcu.c548
-rw-r--r--kernel/timer.c8
-rw-r--r--lib/list_debug.c22
21 files changed, 1035 insertions, 237 deletions
diff --git a/Documentation/RCU/torture.txt b/Documentation/RCU/torture.txt
index 375d3fb71437..4ddf3913fd8c 100644
--- a/Documentation/RCU/torture.txt
+++ b/Documentation/RCU/torture.txt
@@ -47,6 +47,16 @@ irqreader Says to invoke RCU readers from irq level. This is currently
47 permit this. (Or, more accurately, variants of RCU that do 47 permit this. (Or, more accurately, variants of RCU that do
48 -not- permit this know to ignore this variable.) 48 -not- permit this know to ignore this variable.)
49 49
50n_barrier_cbs If this is nonzero, RCU barrier testing will be conducted,
51 in which case n_barrier_cbs specifies the number of
52 RCU callbacks (and corresponding kthreads) to use for
53 this testing. The value cannot be negative. If you
54 specify this to be non-zero when torture_type indicates a
55 synchronous RCU implementation (one for which a member of
56 the synchronize_rcu() rather than the call_rcu() family is
57 used -- see the documentation for torture_type below), an
58 error will be reported and no testing will be carried out.
59
50nfakewriters This is the number of RCU fake writer threads to run. Fake 60nfakewriters This is the number of RCU fake writer threads to run. Fake
51 writer threads repeatedly use the synchronous "wait for 61 writer threads repeatedly use the synchronous "wait for
52 current readers" function of the interface selected by 62 current readers" function of the interface selected by
@@ -188,7 +198,7 @@ OUTPUT
188The statistics output is as follows: 198The statistics output is as follows:
189 199
190 rcu-torture:--- Start of test: nreaders=16 nfakewriters=4 stat_interval=30 verbose=0 test_no_idle_hz=1 shuffle_interval=3 stutter=5 irqreader=1 fqs_duration=0 fqs_holdoff=0 fqs_stutter=3 test_boost=1/0 test_boost_interval=7 test_boost_duration=4 200 rcu-torture:--- Start of test: nreaders=16 nfakewriters=4 stat_interval=30 verbose=0 test_no_idle_hz=1 shuffle_interval=3 stutter=5 irqreader=1 fqs_duration=0 fqs_holdoff=0 fqs_stutter=3 test_boost=1/0 test_boost_interval=7 test_boost_duration=4
191 rcu-torture: rtc: (null) ver: 155441 tfle: 0 rta: 155441 rtaf: 8884 rtf: 155440 rtmbe: 0 rtbke: 0 rtbre: 0 rtbf: 0 rtb: 0 nt: 3055767 201 rcu-torture: rtc: (null) ver: 155441 tfle: 0 rta: 155441 rtaf: 8884 rtf: 155440 rtmbe: 0 rtbe: 0 rtbke: 0 rtbre: 0 rtbf: 0 rtb: 0 nt: 3055767
192 rcu-torture: Reader Pipe: 727860534 34213 0 0 0 0 0 0 0 0 0 202 rcu-torture: Reader Pipe: 727860534 34213 0 0 0 0 0 0 0 0 0
193 rcu-torture: Reader Batch: 727877838 17003 0 0 0 0 0 0 0 0 0 203 rcu-torture: Reader Batch: 727877838 17003 0 0 0 0 0 0 0 0 0
194 rcu-torture: Free-Block Circulation: 155440 155440 155440 155440 155440 155440 155440 155440 155440 155440 0 204 rcu-torture: Free-Block Circulation: 155440 155440 155440 155440 155440 155440 155440 155440 155440 155440 0
@@ -230,6 +240,9 @@ o "rtmbe": A non-zero value indicates that rcutorture believes that
230 rcu_assign_pointer() and rcu_dereference() are not working 240 rcu_assign_pointer() and rcu_dereference() are not working
231 correctly. This value should be zero. 241 correctly. This value should be zero.
232 242
243o "rtbe": A non-zero value indicates that one of the rcu_barrier()
244 family of functions is not working correctly.
245
233o "rtbke": rcutorture was unable to create the real-time kthreads 246o "rtbke": rcutorture was unable to create the real-time kthreads
234 used to force RCU priority inversion. This value should be zero. 247 used to force RCU priority inversion. This value should be zero.
235 248
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index c1601e5a8b71..ab84a01c8d68 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2330,18 +2330,100 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
2330 ramdisk_size= [RAM] Sizes of RAM disks in kilobytes 2330 ramdisk_size= [RAM] Sizes of RAM disks in kilobytes
2331 See Documentation/blockdev/ramdisk.txt. 2331 See Documentation/blockdev/ramdisk.txt.
2332 2332
2333 rcupdate.blimit= [KNL,BOOT] 2333 rcutree.blimit= [KNL,BOOT]
2334 Set maximum number of finished RCU callbacks to process 2334 Set maximum number of finished RCU callbacks to process
2335 in one batch. 2335 in one batch.
2336 2336
2337 rcupdate.qhimark= [KNL,BOOT] 2337 rcutree.qhimark= [KNL,BOOT]
2338 Set threshold of queued 2338 Set threshold of queued
2339 RCU callbacks over which batch limiting is disabled. 2339 RCU callbacks over which batch limiting is disabled.
2340 2340
2341 rcupdate.qlowmark= [KNL,BOOT] 2341 rcutree.qlowmark= [KNL,BOOT]
2342 Set threshold of queued RCU callbacks below which 2342 Set threshold of queued RCU callbacks below which
2343 batch limiting is re-enabled. 2343 batch limiting is re-enabled.
2344 2344
2345 rcutree.rcu_cpu_stall_suppress= [KNL,BOOT]
2346 Suppress RCU CPU stall warning messages.
2347
2348 rcutree.rcu_cpu_stall_timeout= [KNL,BOOT]
2349 Set timeout for RCU CPU stall warning messages.
2350
2351 rcutorture.fqs_duration= [KNL,BOOT]
2352 Set duration of force_quiescent_state bursts.
2353
2354 rcutorture.fqs_holdoff= [KNL,BOOT]
2355 Set holdoff time within force_quiescent_state bursts.
2356
2357 rcutorture.fqs_stutter= [KNL,BOOT]
2358 Set wait time between force_quiescent_state bursts.
2359
2360 rcutorture.irqreader= [KNL,BOOT]
2361 Test RCU readers from irq handlers.
2362
2363 rcutorture.n_barrier_cbs= [KNL,BOOT]
2364 Set callbacks/threads for rcu_barrier() testing.
2365
2366 rcutorture.nfakewriters= [KNL,BOOT]
2367 Set number of concurrent RCU writers. These just
2368 stress RCU, they don't participate in the actual
2369 test, hence the "fake".
2370
2371 rcutorture.nreaders= [KNL,BOOT]
2372 Set number of RCU readers.
2373
2374 rcutorture.onoff_holdoff= [KNL,BOOT]
2375 Set time (s) after boot for CPU-hotplug testing.
2376
2377 rcutorture.onoff_interval= [KNL,BOOT]
2378 Set time (s) between CPU-hotplug operations, or
2379 zero to disable CPU-hotplug testing.
2380
2381 rcutorture.shuffle_interval= [KNL,BOOT]
2382 Set task-shuffle interval (s). Shuffling tasks
2383 allows some CPUs to go into dyntick-idle mode
2384 during the rcutorture test.
2385
2386 rcutorture.shutdown_secs= [KNL,BOOT]
2387 Set time (s) after boot system shutdown. This
2388 is useful for hands-off automated testing.
2389
2390 rcutorture.stall_cpu= [KNL,BOOT]
2391 Duration of CPU stall (s) to test RCU CPU stall
2392 warnings, zero to disable.
2393
2394 rcutorture.stall_cpu_holdoff= [KNL,BOOT]
2395 Time to wait (s) after boot before inducing stall.
2396
2397 rcutorture.stat_interval= [KNL,BOOT]
2398 Time (s) between statistics printk()s.
2399
2400 rcutorture.stutter= [KNL,BOOT]
2401 Time (s) to stutter testing, for example, specifying
2402 five seconds causes the test to run for five seconds,
2403 wait for five seconds, and so on. This tests RCU's
2404 ability to transition abruptly to and from idle.
2405
2406 rcutorture.test_boost= [KNL,BOOT]
2407 Test RCU priority boosting? 0=no, 1=maybe, 2=yes.
2408 "Maybe" means test if the RCU implementation
2409 under test support RCU priority boosting.
2410
2411 rcutorture.test_boost_duration= [KNL,BOOT]
2412 Duration (s) of each individual boost test.
2413
2414 rcutorture.test_boost_interval= [KNL,BOOT]
2415 Interval (s) between each boost test.
2416
2417 rcutorture.test_no_idle_hz= [KNL,BOOT]
2418 Test RCU's dyntick-idle handling. See also the
2419 rcutorture.shuffle_interval parameter.
2420
2421 rcutorture.torture_type= [KNL,BOOT]
2422 Specify the RCU implementation to test.
2423
2424 rcutorture.verbose= [KNL,BOOT]
2425 Enable additional printk() statements.
2426
2345 rdinit= [KNL] 2427 rdinit= [KNL]
2346 Format: <full_path> 2428 Format: <full_path>
2347 Run specified binary instead of /init from the ramdisk, 2429 Run specified binary instead of /init from the ramdisk,
diff --git a/MAINTAINERS b/MAINTAINERS
index 1a2f8f5823e0..a1c2ab2c6d60 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5607,14 +5607,13 @@ F: net/rds/
5607READ-COPY UPDATE (RCU) 5607READ-COPY UPDATE (RCU)
5608M: Dipankar Sarma <dipankar@in.ibm.com> 5608M: Dipankar Sarma <dipankar@in.ibm.com>
5609M: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> 5609M: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
5610W: http://www.rdrop.com/users/paulmck/rclock/ 5610W: http://www.rdrop.com/users/paulmck/RCU/
5611S: Supported 5611S: Supported
5612T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git 5612T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git
5613F: Documentation/RCU/ 5613F: Documentation/RCU/
5614X: Documentation/RCU/torture.txt
5614F: include/linux/rcu* 5615F: include/linux/rcu*
5615F: include/linux/srcu*
5616F: kernel/rcu* 5616F: kernel/rcu*
5617F: kernel/srcu*
5618X: kernel/rcutorture.c 5617X: kernel/rcutorture.c
5619 5618
5620REAL TIME CLOCK (RTC) SUBSYSTEM 5619REAL TIME CLOCK (RTC) SUBSYSTEM
@@ -6131,6 +6130,15 @@ S: Maintained
6131F: include/linux/sl?b*.h 6130F: include/linux/sl?b*.h
6132F: mm/sl?b.c 6131F: mm/sl?b.c
6133 6132
6133SLEEPABLE READ-COPY UPDATE (SRCU)
6134M: Lai Jiangshan <laijs@cn.fujitsu.com>
6135M: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
6136W: http://www.rdrop.com/users/paulmck/RCU/
6137S: Supported
6138T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git
6139F: include/linux/srcu*
6140F: kernel/srcu*
6141
6134SMC91x ETHERNET DRIVER 6142SMC91x ETHERNET DRIVER
6135M: Nicolas Pitre <nico@fluxnic.net> 6143M: Nicolas Pitre <nico@fluxnic.net>
6136S: Odd Fixes 6144S: Odd Fixes
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index 43b39d61b538..88e466b159dc 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -705,6 +705,7 @@ static void stack_proc(void *arg)
705 struct task_struct *from = current, *to = arg; 705 struct task_struct *from = current, *to = arg;
706 706
707 to->thread.saved_task = from; 707 to->thread.saved_task = from;
708 rcu_switch_from(from);
708 switch_to(from, to, from); 709 switch_to(from, to, from);
709} 710}
710 711
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index d079290843a9..e0f0fab20415 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -30,6 +30,7 @@
30 * This is only for internal list manipulation where we know 30 * This is only for internal list manipulation where we know
31 * the prev/next entries already! 31 * the prev/next entries already!
32 */ 32 */
33#ifndef CONFIG_DEBUG_LIST
33static inline void __list_add_rcu(struct list_head *new, 34static inline void __list_add_rcu(struct list_head *new,
34 struct list_head *prev, struct list_head *next) 35 struct list_head *prev, struct list_head *next)
35{ 36{
@@ -38,6 +39,10 @@ static inline void __list_add_rcu(struct list_head *new,
38 rcu_assign_pointer(list_next_rcu(prev), new); 39 rcu_assign_pointer(list_next_rcu(prev), new);
39 next->prev = new; 40 next->prev = new;
40} 41}
42#else
43extern void __list_add_rcu(struct list_head *new,
44 struct list_head *prev, struct list_head *next);
45#endif
41 46
42/** 47/**
43 * list_add_rcu - add a new entry to rcu-protected list 48 * list_add_rcu - add a new entry to rcu-protected list
@@ -108,7 +113,7 @@ static inline void list_add_tail_rcu(struct list_head *new,
108 */ 113 */
109static inline void list_del_rcu(struct list_head *entry) 114static inline void list_del_rcu(struct list_head *entry)
110{ 115{
111 __list_del(entry->prev, entry->next); 116 __list_del_entry(entry);
112 entry->prev = LIST_POISON2; 117 entry->prev = LIST_POISON2;
113} 118}
114 119
@@ -228,18 +233,43 @@ static inline void list_splice_init_rcu(struct list_head *list,
228 }) 233 })
229 234
230/** 235/**
231 * list_first_entry_rcu - get the first element from a list 236 * Where are list_empty_rcu() and list_first_entry_rcu()?
237 *
238 * Implementing those functions following their counterparts list_empty() and
239 * list_first_entry() is not advisable because they lead to subtle race
240 * conditions as the following snippet shows:
241 *
242 * if (!list_empty_rcu(mylist)) {
243 * struct foo *bar = list_first_entry_rcu(mylist, struct foo, list_member);
244 * do_something(bar);
245 * }
246 *
247 * The list may not be empty when list_empty_rcu checks it, but it may be when
248 * list_first_entry_rcu rereads the ->next pointer.
249 *
250 * Rereading the ->next pointer is not a problem for list_empty() and
251 * list_first_entry() because they would be protected by a lock that blocks
252 * writers.
253 *
254 * See list_first_or_null_rcu for an alternative.
255 */
256
257/**
258 * list_first_or_null_rcu - get the first element from a list
232 * @ptr: the list head to take the element from. 259 * @ptr: the list head to take the element from.
233 * @type: the type of the struct this is embedded in. 260 * @type: the type of the struct this is embedded in.
234 * @member: the name of the list_struct within the struct. 261 * @member: the name of the list_struct within the struct.
235 * 262 *
236 * Note, that list is expected to be not empty. 263 * Note that if the list is empty, it returns NULL.
237 * 264 *
238 * This primitive may safely run concurrently with the _rcu list-mutation 265 * This primitive may safely run concurrently with the _rcu list-mutation
239 * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). 266 * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
240 */ 267 */
241#define list_first_entry_rcu(ptr, type, member) \ 268#define list_first_or_null_rcu(ptr, type, member) \
242 list_entry_rcu((ptr)->next, type, member) 269 ({struct list_head *__ptr = (ptr); \
270 struct list_head __rcu *__next = list_next_rcu(__ptr); \
271 likely(__ptr != __next) ? container_of(__next, type, member) : NULL; \
272 })
243 273
244/** 274/**
245 * list_for_each_entry_rcu - iterate over rcu list of given type 275 * list_for_each_entry_rcu - iterate over rcu list of given type
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 20fb776a1d4a..26d1a47591f1 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -184,12 +184,14 @@ static inline int rcu_preempt_depth(void)
184/* Internal to kernel */ 184/* Internal to kernel */
185extern void rcu_sched_qs(int cpu); 185extern void rcu_sched_qs(int cpu);
186extern void rcu_bh_qs(int cpu); 186extern void rcu_bh_qs(int cpu);
187extern void rcu_preempt_note_context_switch(void);
187extern void rcu_check_callbacks(int cpu, int user); 188extern void rcu_check_callbacks(int cpu, int user);
188struct notifier_block; 189struct notifier_block;
189extern void rcu_idle_enter(void); 190extern void rcu_idle_enter(void);
190extern void rcu_idle_exit(void); 191extern void rcu_idle_exit(void);
191extern void rcu_irq_enter(void); 192extern void rcu_irq_enter(void);
192extern void rcu_irq_exit(void); 193extern void rcu_irq_exit(void);
194extern void exit_rcu(void);
193 195
194/** 196/**
195 * RCU_NONIDLE - Indicate idle-loop code that needs RCU readers 197 * RCU_NONIDLE - Indicate idle-loop code that needs RCU readers
@@ -922,6 +924,21 @@ void __kfree_rcu(struct rcu_head *head, unsigned long offset)
922 kfree_call_rcu(head, (rcu_callback)offset); 924 kfree_call_rcu(head, (rcu_callback)offset);
923} 925}
924 926
927/*
928 * Does the specified offset indicate that the corresponding rcu_head
929 * structure can be handled by kfree_rcu()?
930 */
931#define __is_kfree_rcu_offset(offset) ((offset) < 4096)
932
933/*
934 * Helper macro for kfree_rcu() to prevent argument-expansion eyestrain.
935 */
936#define __kfree_rcu(head, offset) \
937 do { \
938 BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \
939 call_rcu(head, (void (*)(struct rcu_head *))(unsigned long)(offset)); \
940 } while (0)
941
925/** 942/**
926 * kfree_rcu() - kfree an object after a grace period. 943 * kfree_rcu() - kfree an object after a grace period.
927 * @ptr: pointer to kfree 944 * @ptr: pointer to kfree
@@ -944,6 +961,9 @@ void __kfree_rcu(struct rcu_head *head, unsigned long offset)
944 * 961 *
945 * Note that the allowable offset might decrease in the future, for example, 962 * Note that the allowable offset might decrease in the future, for example,
946 * to allow something like kmem_cache_free_rcu(). 963 * to allow something like kmem_cache_free_rcu().
964 *
965 * The BUILD_BUG_ON check must not involve any function calls, hence the
966 * checks are done in macros here.
947 */ 967 */
948#define kfree_rcu(ptr, rcu_head) \ 968#define kfree_rcu(ptr, rcu_head) \
949 __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head)) 969 __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head))
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index e93df77176d1..adb5e5a38cae 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -87,14 +87,6 @@ static inline void kfree_call_rcu(struct rcu_head *head,
87 87
88#ifdef CONFIG_TINY_RCU 88#ifdef CONFIG_TINY_RCU
89 89
90static inline void rcu_preempt_note_context_switch(void)
91{
92}
93
94static inline void exit_rcu(void)
95{
96}
97
98static inline int rcu_needs_cpu(int cpu) 90static inline int rcu_needs_cpu(int cpu)
99{ 91{
100 return 0; 92 return 0;
@@ -102,8 +94,6 @@ static inline int rcu_needs_cpu(int cpu)
102 94
103#else /* #ifdef CONFIG_TINY_RCU */ 95#else /* #ifdef CONFIG_TINY_RCU */
104 96
105void rcu_preempt_note_context_switch(void);
106extern void exit_rcu(void);
107int rcu_preempt_needs_cpu(void); 97int rcu_preempt_needs_cpu(void);
108 98
109static inline int rcu_needs_cpu(int cpu) 99static inline int rcu_needs_cpu(int cpu)
@@ -116,7 +106,6 @@ static inline int rcu_needs_cpu(int cpu)
116static inline void rcu_note_context_switch(int cpu) 106static inline void rcu_note_context_switch(int cpu)
117{ 107{
118 rcu_sched_qs(cpu); 108 rcu_sched_qs(cpu);
119 rcu_preempt_note_context_switch();
120} 109}
121 110
122/* 111/*
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index e8ee5dd0854c..3c6083cde4fc 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -45,18 +45,6 @@ static inline void rcu_virt_note_context_switch(int cpu)
45 rcu_note_context_switch(cpu); 45 rcu_note_context_switch(cpu);
46} 46}
47 47
48#ifdef CONFIG_TREE_PREEMPT_RCU
49
50extern void exit_rcu(void);
51
52#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
53
54static inline void exit_rcu(void)
55{
56}
57
58#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
59
60extern void synchronize_rcu_bh(void); 48extern void synchronize_rcu_bh(void);
61extern void synchronize_sched_expedited(void); 49extern void synchronize_sched_expedited(void);
62extern void synchronize_rcu_expedited(void); 50extern void synchronize_rcu_expedited(void);
@@ -98,13 +86,6 @@ extern void rcu_force_quiescent_state(void);
98extern void rcu_bh_force_quiescent_state(void); 86extern void rcu_bh_force_quiescent_state(void);
99extern void rcu_sched_force_quiescent_state(void); 87extern void rcu_sched_force_quiescent_state(void);
100 88
101/* A context switch is a grace period for RCU-sched and RCU-bh. */
102static inline int rcu_blocking_is_gp(void)
103{
104 might_sleep(); /* Check for RCU read-side critical section. */
105 return num_online_cpus() == 1;
106}
107
108extern void rcu_scheduler_starting(void); 89extern void rcu_scheduler_starting(void);
109extern int rcu_scheduler_active __read_mostly; 90extern int rcu_scheduler_active __read_mostly;
110 91
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 81a173c0897d..8f3fd945070f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1905,12 +1905,22 @@ static inline void rcu_copy_process(struct task_struct *p)
1905 INIT_LIST_HEAD(&p->rcu_node_entry); 1905 INIT_LIST_HEAD(&p->rcu_node_entry);
1906} 1906}
1907 1907
1908static inline void rcu_switch_from(struct task_struct *prev)
1909{
1910 if (prev->rcu_read_lock_nesting != 0)
1911 rcu_preempt_note_context_switch();
1912}
1913
1908#else 1914#else
1909 1915
1910static inline void rcu_copy_process(struct task_struct *p) 1916static inline void rcu_copy_process(struct task_struct *p)
1911{ 1917{
1912} 1918}
1913 1919
1920static inline void rcu_switch_from(struct task_struct *prev)
1921{
1922}
1923
1914#endif 1924#endif
1915 1925
1916#ifdef CONFIG_SMP 1926#ifdef CONFIG_SMP
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index d3d5fa54f25e..55a5c52cbb25 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -29,26 +29,35 @@
29 29
30#include <linux/mutex.h> 30#include <linux/mutex.h>
31#include <linux/rcupdate.h> 31#include <linux/rcupdate.h>
32#include <linux/workqueue.h>
32 33
33struct srcu_struct_array { 34struct srcu_struct_array {
34 int c[2]; 35 unsigned long c[2];
36 unsigned long seq[2];
37};
38
39struct rcu_batch {
40 struct rcu_head *head, **tail;
35}; 41};
36 42
37struct srcu_struct { 43struct srcu_struct {
38 int completed; 44 unsigned completed;
39 struct srcu_struct_array __percpu *per_cpu_ref; 45 struct srcu_struct_array __percpu *per_cpu_ref;
40 struct mutex mutex; 46 spinlock_t queue_lock; /* protect ->batch_queue, ->running */
47 bool running;
48 /* callbacks just queued */
49 struct rcu_batch batch_queue;
50 /* callbacks try to do the first check_zero */
51 struct rcu_batch batch_check0;
52 /* callbacks done with the first check_zero and the flip */
53 struct rcu_batch batch_check1;
54 struct rcu_batch batch_done;
55 struct delayed_work work;
41#ifdef CONFIG_DEBUG_LOCK_ALLOC 56#ifdef CONFIG_DEBUG_LOCK_ALLOC
42 struct lockdep_map dep_map; 57 struct lockdep_map dep_map;
43#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 58#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
44}; 59};
45 60
46#ifndef CONFIG_PREEMPT
47#define srcu_barrier() barrier()
48#else /* #ifndef CONFIG_PREEMPT */
49#define srcu_barrier()
50#endif /* #else #ifndef CONFIG_PREEMPT */
51
52#ifdef CONFIG_DEBUG_LOCK_ALLOC 61#ifdef CONFIG_DEBUG_LOCK_ALLOC
53 62
54int __init_srcu_struct(struct srcu_struct *sp, const char *name, 63int __init_srcu_struct(struct srcu_struct *sp, const char *name,
@@ -67,12 +76,33 @@ int init_srcu_struct(struct srcu_struct *sp);
67 76
68#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 77#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
69 78
79/**
80 * call_srcu() - Queue a callback for invocation after an SRCU grace period
81 * @sp: srcu_struct in queue the callback
82 * @head: structure to be used for queueing the SRCU callback.
83 * @func: function to be invoked after the SRCU grace period
84 *
85 * The callback function will be invoked some time after a full SRCU
86 * grace period elapses, in other words after all pre-existing SRCU
87 * read-side critical sections have completed. However, the callback
88 * function might well execute concurrently with other SRCU read-side
89 * critical sections that started after call_srcu() was invoked. SRCU
90 * read-side critical sections are delimited by srcu_read_lock() and
91 * srcu_read_unlock(), and may be nested.
92 *
93 * The callback will be invoked from process context, but must nevertheless
94 * be fast and must not block.
95 */
96void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
97 void (*func)(struct rcu_head *head));
98
70void cleanup_srcu_struct(struct srcu_struct *sp); 99void cleanup_srcu_struct(struct srcu_struct *sp);
71int __srcu_read_lock(struct srcu_struct *sp) __acquires(sp); 100int __srcu_read_lock(struct srcu_struct *sp) __acquires(sp);
72void __srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp); 101void __srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp);
73void synchronize_srcu(struct srcu_struct *sp); 102void synchronize_srcu(struct srcu_struct *sp);
74void synchronize_srcu_expedited(struct srcu_struct *sp); 103void synchronize_srcu_expedited(struct srcu_struct *sp);
75long srcu_batches_completed(struct srcu_struct *sp); 104long srcu_batches_completed(struct srcu_struct *sp);
105void srcu_barrier(struct srcu_struct *sp);
76 106
77#ifdef CONFIG_DEBUG_LOCK_ALLOC 107#ifdef CONFIG_DEBUG_LOCK_ALLOC
78 108
diff --git a/init/Kconfig b/init/Kconfig
index 6cfd71d06463..6d18ef8071b5 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -458,6 +458,33 @@ config RCU_FANOUT
458 Select a specific number if testing RCU itself. 458 Select a specific number if testing RCU itself.
459 Take the default if unsure. 459 Take the default if unsure.
460 460
461config RCU_FANOUT_LEAF
462 int "Tree-based hierarchical RCU leaf-level fanout value"
463 range 2 RCU_FANOUT if 64BIT
464 range 2 RCU_FANOUT if !64BIT
465 depends on TREE_RCU || TREE_PREEMPT_RCU
466 default 16
467 help
468 This option controls the leaf-level fanout of hierarchical
469 implementations of RCU, and allows trading off cache misses
470 against lock contention. Systems that synchronize their
471 scheduling-clock interrupts for energy-efficiency reasons will
472 want the default because the smaller leaf-level fanout keeps
473 lock contention levels acceptably low. Very large systems
474 (hundreds or thousands of CPUs) will instead want to set this
475 value to the maximum value possible in order to reduce the
476 number of cache misses incurred during RCU's grace-period
477 initialization. These systems tend to run CPU-bound, and thus
478 are not helped by synchronized interrupts, and thus tend to
479 skew them, which reduces lock contention enough that large
480 leaf-level fanouts work well.
481
482 Select a specific number if testing RCU itself.
483
484 Select the maximum permissible value for large systems.
485
486 Take the default if unsure.
487
461config RCU_FANOUT_EXACT 488config RCU_FANOUT_EXACT
462 bool "Disable tree-based hierarchical RCU auto-balancing" 489 bool "Disable tree-based hierarchical RCU auto-balancing"
463 depends on TREE_RCU || TREE_PREEMPT_RCU 490 depends on TREE_RCU || TREE_PREEMPT_RCU
@@ -515,10 +542,25 @@ config RCU_BOOST_PRIO
515 depends on RCU_BOOST 542 depends on RCU_BOOST
516 default 1 543 default 1
517 help 544 help
518 This option specifies the real-time priority to which preempted 545 This option specifies the real-time priority to which long-term
519 RCU readers are to be boosted. If you are working with CPU-bound 546 preempted RCU readers are to be boosted. If you are working
520 real-time applications, you should specify a priority higher then 547 with a real-time application that has one or more CPU-bound
521 the highest-priority CPU-bound application. 548 threads running at a real-time priority level, you should set
549 RCU_BOOST_PRIO to a priority higher then the highest-priority
550 real-time CPU-bound thread. The default RCU_BOOST_PRIO value
551 of 1 is appropriate in the common case, which is real-time
552 applications that do not have any CPU-bound threads.
553
554 Some real-time applications might not have a single real-time
555 thread that saturates a given CPU, but instead might have
556 multiple real-time threads that, taken together, fully utilize
557 that CPU. In this case, you should set RCU_BOOST_PRIO to
558 a priority higher than the lowest-priority thread that is
559 conspiring to prevent the CPU from running any non-real-time
560 tasks. For example, if one thread at priority 10 and another
561 thread at priority 5 are between themselves fully consuming
562 the CPU time on a given CPU, then RCU_BOOST_PRIO should be
563 set to priority 6 or higher.
522 564
523 Specify the real-time priority, or take the default if unsure. 565 Specify the real-time priority, or take the default if unsure.
524 566
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index a86f1741cc27..95cba41ce1e9 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -51,6 +51,34 @@
51 51
52#include "rcu.h" 52#include "rcu.h"
53 53
54#ifdef CONFIG_PREEMPT_RCU
55
56/*
57 * Check for a task exiting while in a preemptible-RCU read-side
58 * critical section, clean up if so. No need to issue warnings,
59 * as debug_check_no_locks_held() already does this if lockdep
60 * is enabled.
61 */
62void exit_rcu(void)
63{
64 struct task_struct *t = current;
65
66 if (likely(list_empty(&current->rcu_node_entry)))
67 return;
68 t->rcu_read_lock_nesting = 1;
69 barrier();
70 t->rcu_read_unlock_special = RCU_READ_UNLOCK_BLOCKED;
71 __rcu_read_unlock();
72}
73
74#else /* #ifdef CONFIG_PREEMPT_RCU */
75
76void exit_rcu(void)
77{
78}
79
80#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
81
54#ifdef CONFIG_DEBUG_LOCK_ALLOC 82#ifdef CONFIG_DEBUG_LOCK_ALLOC
55static struct lock_class_key rcu_lock_key; 83static struct lock_class_key rcu_lock_key;
56struct lockdep_map rcu_lock_map = 84struct lockdep_map rcu_lock_map =
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index 22ecea0dfb62..fc31a2d65100 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -851,22 +851,6 @@ int rcu_preempt_needs_cpu(void)
851 return rcu_preempt_ctrlblk.rcb.rcucblist != NULL; 851 return rcu_preempt_ctrlblk.rcb.rcucblist != NULL;
852} 852}
853 853
854/*
855 * Check for a task exiting while in a preemptible -RCU read-side
856 * critical section, clean up if so. No need to issue warnings,
857 * as debug_check_no_locks_held() already does this if lockdep
858 * is enabled.
859 */
860void exit_rcu(void)
861{
862 struct task_struct *t = current;
863
864 if (t->rcu_read_lock_nesting == 0)
865 return;
866 t->rcu_read_lock_nesting = 1;
867 __rcu_read_unlock();
868}
869
870#else /* #ifdef CONFIG_TINY_PREEMPT_RCU */ 854#else /* #ifdef CONFIG_TINY_PREEMPT_RCU */
871 855
872#ifdef CONFIG_RCU_TRACE 856#ifdef CONFIG_RCU_TRACE
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index a89b381a8c6e..e66b34ab7555 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -64,6 +64,7 @@ static int irqreader = 1; /* RCU readers from irq (timers). */
64static int fqs_duration; /* Duration of bursts (us), 0 to disable. */ 64static int fqs_duration; /* Duration of bursts (us), 0 to disable. */
65static int fqs_holdoff; /* Hold time within burst (us). */ 65static int fqs_holdoff; /* Hold time within burst (us). */
66static int fqs_stutter = 3; /* Wait time between bursts (s). */ 66static int fqs_stutter = 3; /* Wait time between bursts (s). */
67static int n_barrier_cbs; /* Number of callbacks to test RCU barriers. */
67static int onoff_interval; /* Wait time between CPU hotplugs, 0=disable. */ 68static int onoff_interval; /* Wait time between CPU hotplugs, 0=disable. */
68static int onoff_holdoff; /* Seconds after boot before CPU hotplugs. */ 69static int onoff_holdoff; /* Seconds after boot before CPU hotplugs. */
69static int shutdown_secs; /* Shutdown time (s). <=0 for no shutdown. */ 70static int shutdown_secs; /* Shutdown time (s). <=0 for no shutdown. */
@@ -96,6 +97,8 @@ module_param(fqs_holdoff, int, 0444);
96MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)"); 97MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)");
97module_param(fqs_stutter, int, 0444); 98module_param(fqs_stutter, int, 0444);
98MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)"); 99MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)");
100module_param(n_barrier_cbs, int, 0444);
101MODULE_PARM_DESC(n_barrier_cbs, "# of callbacks/kthreads for barrier testing");
99module_param(onoff_interval, int, 0444); 102module_param(onoff_interval, int, 0444);
100MODULE_PARM_DESC(onoff_interval, "Time between CPU hotplugs (s), 0=disable"); 103MODULE_PARM_DESC(onoff_interval, "Time between CPU hotplugs (s), 0=disable");
101module_param(onoff_holdoff, int, 0444); 104module_param(onoff_holdoff, int, 0444);
@@ -139,6 +142,8 @@ static struct task_struct *shutdown_task;
139static struct task_struct *onoff_task; 142static struct task_struct *onoff_task;
140#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 143#endif /* #ifdef CONFIG_HOTPLUG_CPU */
141static struct task_struct *stall_task; 144static struct task_struct *stall_task;
145static struct task_struct **barrier_cbs_tasks;
146static struct task_struct *barrier_task;
142 147
143#define RCU_TORTURE_PIPE_LEN 10 148#define RCU_TORTURE_PIPE_LEN 10
144 149
@@ -164,6 +169,7 @@ static atomic_t n_rcu_torture_alloc_fail;
164static atomic_t n_rcu_torture_free; 169static atomic_t n_rcu_torture_free;
165static atomic_t n_rcu_torture_mberror; 170static atomic_t n_rcu_torture_mberror;
166static atomic_t n_rcu_torture_error; 171static atomic_t n_rcu_torture_error;
172static long n_rcu_torture_barrier_error;
167static long n_rcu_torture_boost_ktrerror; 173static long n_rcu_torture_boost_ktrerror;
168static long n_rcu_torture_boost_rterror; 174static long n_rcu_torture_boost_rterror;
169static long n_rcu_torture_boost_failure; 175static long n_rcu_torture_boost_failure;
@@ -173,6 +179,8 @@ static long n_offline_attempts;
173static long n_offline_successes; 179static long n_offline_successes;
174static long n_online_attempts; 180static long n_online_attempts;
175static long n_online_successes; 181static long n_online_successes;
182static long n_barrier_attempts;
183static long n_barrier_successes;
176static struct list_head rcu_torture_removed; 184static struct list_head rcu_torture_removed;
177static cpumask_var_t shuffle_tmp_mask; 185static cpumask_var_t shuffle_tmp_mask;
178 186
@@ -197,6 +205,10 @@ static unsigned long shutdown_time; /* jiffies to system shutdown. */
197static unsigned long boost_starttime; /* jiffies of next boost test start. */ 205static unsigned long boost_starttime; /* jiffies of next boost test start. */
198DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ 206DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */
199 /* and boost task create/destroy. */ 207 /* and boost task create/destroy. */
208static atomic_t barrier_cbs_count; /* Barrier callbacks registered. */
209static atomic_t barrier_cbs_invoked; /* Barrier callbacks invoked. */
210static wait_queue_head_t *barrier_cbs_wq; /* Coordinate barrier testing. */
211static DECLARE_WAIT_QUEUE_HEAD(barrier_wq);
200 212
201/* Mediate rmmod and system shutdown. Concurrent rmmod & shutdown illegal! */ 213/* Mediate rmmod and system shutdown. Concurrent rmmod & shutdown illegal! */
202 214
@@ -327,6 +339,7 @@ struct rcu_torture_ops {
327 int (*completed)(void); 339 int (*completed)(void);
328 void (*deferred_free)(struct rcu_torture *p); 340 void (*deferred_free)(struct rcu_torture *p);
329 void (*sync)(void); 341 void (*sync)(void);
342 void (*call)(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
330 void (*cb_barrier)(void); 343 void (*cb_barrier)(void);
331 void (*fqs)(void); 344 void (*fqs)(void);
332 int (*stats)(char *page); 345 int (*stats)(char *page);
@@ -417,6 +430,7 @@ static struct rcu_torture_ops rcu_ops = {
417 .completed = rcu_torture_completed, 430 .completed = rcu_torture_completed,
418 .deferred_free = rcu_torture_deferred_free, 431 .deferred_free = rcu_torture_deferred_free,
419 .sync = synchronize_rcu, 432 .sync = synchronize_rcu,
433 .call = call_rcu,
420 .cb_barrier = rcu_barrier, 434 .cb_barrier = rcu_barrier,
421 .fqs = rcu_force_quiescent_state, 435 .fqs = rcu_force_quiescent_state,
422 .stats = NULL, 436 .stats = NULL,
@@ -460,6 +474,7 @@ static struct rcu_torture_ops rcu_sync_ops = {
460 .completed = rcu_torture_completed, 474 .completed = rcu_torture_completed,
461 .deferred_free = rcu_sync_torture_deferred_free, 475 .deferred_free = rcu_sync_torture_deferred_free,
462 .sync = synchronize_rcu, 476 .sync = synchronize_rcu,
477 .call = NULL,
463 .cb_barrier = NULL, 478 .cb_barrier = NULL,
464 .fqs = rcu_force_quiescent_state, 479 .fqs = rcu_force_quiescent_state,
465 .stats = NULL, 480 .stats = NULL,
@@ -477,6 +492,7 @@ static struct rcu_torture_ops rcu_expedited_ops = {
477 .completed = rcu_no_completed, 492 .completed = rcu_no_completed,
478 .deferred_free = rcu_sync_torture_deferred_free, 493 .deferred_free = rcu_sync_torture_deferred_free,
479 .sync = synchronize_rcu_expedited, 494 .sync = synchronize_rcu_expedited,
495 .call = NULL,
480 .cb_barrier = NULL, 496 .cb_barrier = NULL,
481 .fqs = rcu_force_quiescent_state, 497 .fqs = rcu_force_quiescent_state,
482 .stats = NULL, 498 .stats = NULL,
@@ -519,6 +535,7 @@ static struct rcu_torture_ops rcu_bh_ops = {
519 .completed = rcu_bh_torture_completed, 535 .completed = rcu_bh_torture_completed,
520 .deferred_free = rcu_bh_torture_deferred_free, 536 .deferred_free = rcu_bh_torture_deferred_free,
521 .sync = synchronize_rcu_bh, 537 .sync = synchronize_rcu_bh,
538 .call = call_rcu_bh,
522 .cb_barrier = rcu_barrier_bh, 539 .cb_barrier = rcu_barrier_bh,
523 .fqs = rcu_bh_force_quiescent_state, 540 .fqs = rcu_bh_force_quiescent_state,
524 .stats = NULL, 541 .stats = NULL,
@@ -535,6 +552,7 @@ static struct rcu_torture_ops rcu_bh_sync_ops = {
535 .completed = rcu_bh_torture_completed, 552 .completed = rcu_bh_torture_completed,
536 .deferred_free = rcu_sync_torture_deferred_free, 553 .deferred_free = rcu_sync_torture_deferred_free,
537 .sync = synchronize_rcu_bh, 554 .sync = synchronize_rcu_bh,
555 .call = NULL,
538 .cb_barrier = NULL, 556 .cb_barrier = NULL,
539 .fqs = rcu_bh_force_quiescent_state, 557 .fqs = rcu_bh_force_quiescent_state,
540 .stats = NULL, 558 .stats = NULL,
@@ -551,6 +569,7 @@ static struct rcu_torture_ops rcu_bh_expedited_ops = {
551 .completed = rcu_bh_torture_completed, 569 .completed = rcu_bh_torture_completed,
552 .deferred_free = rcu_sync_torture_deferred_free, 570 .deferred_free = rcu_sync_torture_deferred_free,
553 .sync = synchronize_rcu_bh_expedited, 571 .sync = synchronize_rcu_bh_expedited,
572 .call = NULL,
554 .cb_barrier = NULL, 573 .cb_barrier = NULL,
555 .fqs = rcu_bh_force_quiescent_state, 574 .fqs = rcu_bh_force_quiescent_state,
556 .stats = NULL, 575 .stats = NULL,
@@ -606,6 +625,11 @@ static int srcu_torture_completed(void)
606 return srcu_batches_completed(&srcu_ctl); 625 return srcu_batches_completed(&srcu_ctl);
607} 626}
608 627
628static void srcu_torture_deferred_free(struct rcu_torture *rp)
629{
630 call_srcu(&srcu_ctl, &rp->rtort_rcu, rcu_torture_cb);
631}
632
609static void srcu_torture_synchronize(void) 633static void srcu_torture_synchronize(void)
610{ 634{
611 synchronize_srcu(&srcu_ctl); 635 synchronize_srcu(&srcu_ctl);
@@ -620,7 +644,7 @@ static int srcu_torture_stats(char *page)
620 cnt += sprintf(&page[cnt], "%s%s per-CPU(idx=%d):", 644 cnt += sprintf(&page[cnt], "%s%s per-CPU(idx=%d):",
621 torture_type, TORTURE_FLAG, idx); 645 torture_type, TORTURE_FLAG, idx);
622 for_each_possible_cpu(cpu) { 646 for_each_possible_cpu(cpu) {
623 cnt += sprintf(&page[cnt], " %d(%d,%d)", cpu, 647 cnt += sprintf(&page[cnt], " %d(%lu,%lu)", cpu,
624 per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx], 648 per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx],
625 per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx]); 649 per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx]);
626 } 650 }
@@ -635,13 +659,29 @@ static struct rcu_torture_ops srcu_ops = {
635 .read_delay = srcu_read_delay, 659 .read_delay = srcu_read_delay,
636 .readunlock = srcu_torture_read_unlock, 660 .readunlock = srcu_torture_read_unlock,
637 .completed = srcu_torture_completed, 661 .completed = srcu_torture_completed,
638 .deferred_free = rcu_sync_torture_deferred_free, 662 .deferred_free = srcu_torture_deferred_free,
639 .sync = srcu_torture_synchronize, 663 .sync = srcu_torture_synchronize,
664 .call = NULL,
640 .cb_barrier = NULL, 665 .cb_barrier = NULL,
641 .stats = srcu_torture_stats, 666 .stats = srcu_torture_stats,
642 .name = "srcu" 667 .name = "srcu"
643}; 668};
644 669
670static struct rcu_torture_ops srcu_sync_ops = {
671 .init = srcu_torture_init,
672 .cleanup = srcu_torture_cleanup,
673 .readlock = srcu_torture_read_lock,
674 .read_delay = srcu_read_delay,
675 .readunlock = srcu_torture_read_unlock,
676 .completed = srcu_torture_completed,
677 .deferred_free = rcu_sync_torture_deferred_free,
678 .sync = srcu_torture_synchronize,
679 .call = NULL,
680 .cb_barrier = NULL,
681 .stats = srcu_torture_stats,
682 .name = "srcu_sync"
683};
684
645static int srcu_torture_read_lock_raw(void) __acquires(&srcu_ctl) 685static int srcu_torture_read_lock_raw(void) __acquires(&srcu_ctl)
646{ 686{
647 return srcu_read_lock_raw(&srcu_ctl); 687 return srcu_read_lock_raw(&srcu_ctl);
@@ -659,13 +699,29 @@ static struct rcu_torture_ops srcu_raw_ops = {
659 .read_delay = srcu_read_delay, 699 .read_delay = srcu_read_delay,
660 .readunlock = srcu_torture_read_unlock_raw, 700 .readunlock = srcu_torture_read_unlock_raw,
661 .completed = srcu_torture_completed, 701 .completed = srcu_torture_completed,
662 .deferred_free = rcu_sync_torture_deferred_free, 702 .deferred_free = srcu_torture_deferred_free,
663 .sync = srcu_torture_synchronize, 703 .sync = srcu_torture_synchronize,
704 .call = NULL,
664 .cb_barrier = NULL, 705 .cb_barrier = NULL,
665 .stats = srcu_torture_stats, 706 .stats = srcu_torture_stats,
666 .name = "srcu_raw" 707 .name = "srcu_raw"
667}; 708};
668 709
710static struct rcu_torture_ops srcu_raw_sync_ops = {
711 .init = srcu_torture_init,
712 .cleanup = srcu_torture_cleanup,
713 .readlock = srcu_torture_read_lock_raw,
714 .read_delay = srcu_read_delay,
715 .readunlock = srcu_torture_read_unlock_raw,
716 .completed = srcu_torture_completed,
717 .deferred_free = rcu_sync_torture_deferred_free,
718 .sync = srcu_torture_synchronize,
719 .call = NULL,
720 .cb_barrier = NULL,
721 .stats = srcu_torture_stats,
722 .name = "srcu_raw_sync"
723};
724
669static void srcu_torture_synchronize_expedited(void) 725static void srcu_torture_synchronize_expedited(void)
670{ 726{
671 synchronize_srcu_expedited(&srcu_ctl); 727 synchronize_srcu_expedited(&srcu_ctl);
@@ -680,6 +736,7 @@ static struct rcu_torture_ops srcu_expedited_ops = {
680 .completed = srcu_torture_completed, 736 .completed = srcu_torture_completed,
681 .deferred_free = rcu_sync_torture_deferred_free, 737 .deferred_free = rcu_sync_torture_deferred_free,
682 .sync = srcu_torture_synchronize_expedited, 738 .sync = srcu_torture_synchronize_expedited,
739 .call = NULL,
683 .cb_barrier = NULL, 740 .cb_barrier = NULL,
684 .stats = srcu_torture_stats, 741 .stats = srcu_torture_stats,
685 .name = "srcu_expedited" 742 .name = "srcu_expedited"
@@ -1129,7 +1186,8 @@ rcu_torture_printk(char *page)
1129 "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d " 1186 "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d "
1130 "rtmbe: %d rtbke: %ld rtbre: %ld " 1187 "rtmbe: %d rtbke: %ld rtbre: %ld "
1131 "rtbf: %ld rtb: %ld nt: %ld " 1188 "rtbf: %ld rtb: %ld nt: %ld "
1132 "onoff: %ld/%ld:%ld/%ld", 1189 "onoff: %ld/%ld:%ld/%ld "
1190 "barrier: %ld/%ld:%ld",
1133 rcu_torture_current, 1191 rcu_torture_current,
1134 rcu_torture_current_version, 1192 rcu_torture_current_version,
1135 list_empty(&rcu_torture_freelist), 1193 list_empty(&rcu_torture_freelist),
@@ -1145,14 +1203,17 @@ rcu_torture_printk(char *page)
1145 n_online_successes, 1203 n_online_successes,
1146 n_online_attempts, 1204 n_online_attempts,
1147 n_offline_successes, 1205 n_offline_successes,
1148 n_offline_attempts); 1206 n_offline_attempts,
1207 n_barrier_successes,
1208 n_barrier_attempts,
1209 n_rcu_torture_barrier_error);
1210 cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG);
1149 if (atomic_read(&n_rcu_torture_mberror) != 0 || 1211 if (atomic_read(&n_rcu_torture_mberror) != 0 ||
1212 n_rcu_torture_barrier_error != 0 ||
1150 n_rcu_torture_boost_ktrerror != 0 || 1213 n_rcu_torture_boost_ktrerror != 0 ||
1151 n_rcu_torture_boost_rterror != 0 || 1214 n_rcu_torture_boost_rterror != 0 ||
1152 n_rcu_torture_boost_failure != 0) 1215 n_rcu_torture_boost_failure != 0 ||
1153 cnt += sprintf(&page[cnt], " !!!"); 1216 i > 1) {
1154 cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG);
1155 if (i > 1) {
1156 cnt += sprintf(&page[cnt], "!!! "); 1217 cnt += sprintf(&page[cnt], "!!! ");
1157 atomic_inc(&n_rcu_torture_error); 1218 atomic_inc(&n_rcu_torture_error);
1158 WARN_ON_ONCE(1); 1219 WARN_ON_ONCE(1);
@@ -1337,6 +1398,7 @@ static void rcutorture_booster_cleanup(int cpu)
1337 1398
1338 /* This must be outside of the mutex, otherwise deadlock! */ 1399 /* This must be outside of the mutex, otherwise deadlock! */
1339 kthread_stop(t); 1400 kthread_stop(t);
1401 boost_tasks[cpu] = NULL;
1340} 1402}
1341 1403
1342static int rcutorture_booster_init(int cpu) 1404static int rcutorture_booster_init(int cpu)
@@ -1484,13 +1546,15 @@ static void rcu_torture_onoff_cleanup(void)
1484 return; 1546 return;
1485 VERBOSE_PRINTK_STRING("Stopping rcu_torture_onoff task"); 1547 VERBOSE_PRINTK_STRING("Stopping rcu_torture_onoff task");
1486 kthread_stop(onoff_task); 1548 kthread_stop(onoff_task);
1549 onoff_task = NULL;
1487} 1550}
1488 1551
1489#else /* #ifdef CONFIG_HOTPLUG_CPU */ 1552#else /* #ifdef CONFIG_HOTPLUG_CPU */
1490 1553
1491static void 1554static int
1492rcu_torture_onoff_init(void) 1555rcu_torture_onoff_init(void)
1493{ 1556{
1557 return 0;
1494} 1558}
1495 1559
1496static void rcu_torture_onoff_cleanup(void) 1560static void rcu_torture_onoff_cleanup(void)
@@ -1554,6 +1618,152 @@ static void rcu_torture_stall_cleanup(void)
1554 return; 1618 return;
1555 VERBOSE_PRINTK_STRING("Stopping rcu_torture_stall_task."); 1619 VERBOSE_PRINTK_STRING("Stopping rcu_torture_stall_task.");
1556 kthread_stop(stall_task); 1620 kthread_stop(stall_task);
1621 stall_task = NULL;
1622}
1623
1624/* Callback function for RCU barrier testing. */
1625void rcu_torture_barrier_cbf(struct rcu_head *rcu)
1626{
1627 atomic_inc(&barrier_cbs_invoked);
1628}
1629
1630/* kthread function to register callbacks used to test RCU barriers. */
1631static int rcu_torture_barrier_cbs(void *arg)
1632{
1633 long myid = (long)arg;
1634 struct rcu_head rcu;
1635
1636 init_rcu_head_on_stack(&rcu);
1637 VERBOSE_PRINTK_STRING("rcu_torture_barrier_cbs task started");
1638 set_user_nice(current, 19);
1639 do {
1640 wait_event(barrier_cbs_wq[myid],
1641 atomic_read(&barrier_cbs_count) == n_barrier_cbs ||
1642 kthread_should_stop() ||
1643 fullstop != FULLSTOP_DONTSTOP);
1644 if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP)
1645 break;
1646 cur_ops->call(&rcu, rcu_torture_barrier_cbf);
1647 if (atomic_dec_and_test(&barrier_cbs_count))
1648 wake_up(&barrier_wq);
1649 } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
1650 VERBOSE_PRINTK_STRING("rcu_torture_barrier_cbs task stopping");
1651 rcutorture_shutdown_absorb("rcu_torture_barrier_cbs");
1652 while (!kthread_should_stop())
1653 schedule_timeout_interruptible(1);
1654 cur_ops->cb_barrier();
1655 destroy_rcu_head_on_stack(&rcu);
1656 return 0;
1657}
1658
1659/* kthread function to drive and coordinate RCU barrier testing. */
1660static int rcu_torture_barrier(void *arg)
1661{
1662 int i;
1663
1664 VERBOSE_PRINTK_STRING("rcu_torture_barrier task starting");
1665 do {
1666 atomic_set(&barrier_cbs_invoked, 0);
1667 atomic_set(&barrier_cbs_count, n_barrier_cbs);
1668 /* wake_up() path contains the required barriers. */
1669 for (i = 0; i < n_barrier_cbs; i++)
1670 wake_up(&barrier_cbs_wq[i]);
1671 wait_event(barrier_wq,
1672 atomic_read(&barrier_cbs_count) == 0 ||
1673 kthread_should_stop() ||
1674 fullstop != FULLSTOP_DONTSTOP);
1675 if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP)
1676 break;
1677 n_barrier_attempts++;
1678 cur_ops->cb_barrier();
1679 if (atomic_read(&barrier_cbs_invoked) != n_barrier_cbs) {
1680 n_rcu_torture_barrier_error++;
1681 WARN_ON_ONCE(1);
1682 }
1683 n_barrier_successes++;
1684 schedule_timeout_interruptible(HZ / 10);
1685 } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
1686 VERBOSE_PRINTK_STRING("rcu_torture_barrier task stopping");
1687 rcutorture_shutdown_absorb("rcu_torture_barrier_cbs");
1688 while (!kthread_should_stop())
1689 schedule_timeout_interruptible(1);
1690 return 0;
1691}
1692
1693/* Initialize RCU barrier testing. */
1694static int rcu_torture_barrier_init(void)
1695{
1696 int i;
1697 int ret;
1698
1699 if (n_barrier_cbs == 0)
1700 return 0;
1701 if (cur_ops->call == NULL || cur_ops->cb_barrier == NULL) {
1702 printk(KERN_ALERT "%s" TORTURE_FLAG
1703 " Call or barrier ops missing for %s,\n",
1704 torture_type, cur_ops->name);
1705 printk(KERN_ALERT "%s" TORTURE_FLAG
1706 " RCU barrier testing omitted from run.\n",
1707 torture_type);
1708 return 0;
1709 }
1710 atomic_set(&barrier_cbs_count, 0);
1711 atomic_set(&barrier_cbs_invoked, 0);
1712 barrier_cbs_tasks =
1713 kzalloc(n_barrier_cbs * sizeof(barrier_cbs_tasks[0]),
1714 GFP_KERNEL);
1715 barrier_cbs_wq =
1716 kzalloc(n_barrier_cbs * sizeof(barrier_cbs_wq[0]),
1717 GFP_KERNEL);
1718 if (barrier_cbs_tasks == NULL || barrier_cbs_wq == 0)
1719 return -ENOMEM;
1720 for (i = 0; i < n_barrier_cbs; i++) {
1721 init_waitqueue_head(&barrier_cbs_wq[i]);
1722 barrier_cbs_tasks[i] = kthread_run(rcu_torture_barrier_cbs,
1723 (void *)(long)i,
1724 "rcu_torture_barrier_cbs");
1725 if (IS_ERR(barrier_cbs_tasks[i])) {
1726 ret = PTR_ERR(barrier_cbs_tasks[i]);
1727 VERBOSE_PRINTK_ERRSTRING("Failed to create rcu_torture_barrier_cbs");
1728 barrier_cbs_tasks[i] = NULL;
1729 return ret;
1730 }
1731 }
1732 barrier_task = kthread_run(rcu_torture_barrier, NULL,
1733 "rcu_torture_barrier");
1734 if (IS_ERR(barrier_task)) {
1735 ret = PTR_ERR(barrier_task);
1736 VERBOSE_PRINTK_ERRSTRING("Failed to create rcu_torture_barrier");
1737 barrier_task = NULL;
1738 }
1739 return 0;
1740}
1741
1742/* Clean up after RCU barrier testing. */
1743static void rcu_torture_barrier_cleanup(void)
1744{
1745 int i;
1746
1747 if (barrier_task != NULL) {
1748 VERBOSE_PRINTK_STRING("Stopping rcu_torture_barrier task");
1749 kthread_stop(barrier_task);
1750 barrier_task = NULL;
1751 }
1752 if (barrier_cbs_tasks != NULL) {
1753 for (i = 0; i < n_barrier_cbs; i++) {
1754 if (barrier_cbs_tasks[i] != NULL) {
1755 VERBOSE_PRINTK_STRING("Stopping rcu_torture_barrier_cbs task");
1756 kthread_stop(barrier_cbs_tasks[i]);
1757 barrier_cbs_tasks[i] = NULL;
1758 }
1759 }
1760 kfree(barrier_cbs_tasks);
1761 barrier_cbs_tasks = NULL;
1762 }
1763 if (barrier_cbs_wq != NULL) {
1764 kfree(barrier_cbs_wq);
1765 barrier_cbs_wq = NULL;
1766 }
1557} 1767}
1558 1768
1559static int rcutorture_cpu_notify(struct notifier_block *self, 1769static int rcutorture_cpu_notify(struct notifier_block *self,
@@ -1598,6 +1808,7 @@ rcu_torture_cleanup(void)
1598 fullstop = FULLSTOP_RMMOD; 1808 fullstop = FULLSTOP_RMMOD;
1599 mutex_unlock(&fullstop_mutex); 1809 mutex_unlock(&fullstop_mutex);
1600 unregister_reboot_notifier(&rcutorture_shutdown_nb); 1810 unregister_reboot_notifier(&rcutorture_shutdown_nb);
1811 rcu_torture_barrier_cleanup();
1601 rcu_torture_stall_cleanup(); 1812 rcu_torture_stall_cleanup();
1602 if (stutter_task) { 1813 if (stutter_task) {
1603 VERBOSE_PRINTK_STRING("Stopping rcu_torture_stutter task"); 1814 VERBOSE_PRINTK_STRING("Stopping rcu_torture_stutter task");
@@ -1665,6 +1876,7 @@ rcu_torture_cleanup(void)
1665 VERBOSE_PRINTK_STRING("Stopping rcu_torture_shutdown task"); 1876 VERBOSE_PRINTK_STRING("Stopping rcu_torture_shutdown task");
1666 kthread_stop(shutdown_task); 1877 kthread_stop(shutdown_task);
1667 } 1878 }
1879 shutdown_task = NULL;
1668 rcu_torture_onoff_cleanup(); 1880 rcu_torture_onoff_cleanup();
1669 1881
1670 /* Wait for all RCU callbacks to fire. */ 1882 /* Wait for all RCU callbacks to fire. */
@@ -1676,7 +1888,7 @@ rcu_torture_cleanup(void)
1676 1888
1677 if (cur_ops->cleanup) 1889 if (cur_ops->cleanup)
1678 cur_ops->cleanup(); 1890 cur_ops->cleanup();
1679 if (atomic_read(&n_rcu_torture_error)) 1891 if (atomic_read(&n_rcu_torture_error) || n_rcu_torture_barrier_error)
1680 rcu_torture_print_module_parms(cur_ops, "End of test: FAILURE"); 1892 rcu_torture_print_module_parms(cur_ops, "End of test: FAILURE");
1681 else if (n_online_successes != n_online_attempts || 1893 else if (n_online_successes != n_online_attempts ||
1682 n_offline_successes != n_offline_attempts) 1894 n_offline_successes != n_offline_attempts)
@@ -1692,10 +1904,12 @@ rcu_torture_init(void)
1692 int i; 1904 int i;
1693 int cpu; 1905 int cpu;
1694 int firsterr = 0; 1906 int firsterr = 0;
1907 int retval;
1695 static struct rcu_torture_ops *torture_ops[] = 1908 static struct rcu_torture_ops *torture_ops[] =
1696 { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops, 1909 { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops,
1697 &rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops, 1910 &rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops,
1698 &srcu_ops, &srcu_raw_ops, &srcu_expedited_ops, 1911 &srcu_ops, &srcu_sync_ops, &srcu_raw_ops,
1912 &srcu_raw_sync_ops, &srcu_expedited_ops,
1699 &sched_ops, &sched_sync_ops, &sched_expedited_ops, }; 1913 &sched_ops, &sched_sync_ops, &sched_expedited_ops, };
1700 1914
1701 mutex_lock(&fullstop_mutex); 1915 mutex_lock(&fullstop_mutex);
@@ -1749,6 +1963,7 @@ rcu_torture_init(void)
1749 atomic_set(&n_rcu_torture_free, 0); 1963 atomic_set(&n_rcu_torture_free, 0);
1750 atomic_set(&n_rcu_torture_mberror, 0); 1964 atomic_set(&n_rcu_torture_mberror, 0);
1751 atomic_set(&n_rcu_torture_error, 0); 1965 atomic_set(&n_rcu_torture_error, 0);
1966 n_rcu_torture_barrier_error = 0;
1752 n_rcu_torture_boost_ktrerror = 0; 1967 n_rcu_torture_boost_ktrerror = 0;
1753 n_rcu_torture_boost_rterror = 0; 1968 n_rcu_torture_boost_rterror = 0;
1754 n_rcu_torture_boost_failure = 0; 1969 n_rcu_torture_boost_failure = 0;
@@ -1872,7 +2087,6 @@ rcu_torture_init(void)
1872 test_boost_duration = 2; 2087 test_boost_duration = 2;
1873 if ((test_boost == 1 && cur_ops->can_boost) || 2088 if ((test_boost == 1 && cur_ops->can_boost) ||
1874 test_boost == 2) { 2089 test_boost == 2) {
1875 int retval;
1876 2090
1877 boost_starttime = jiffies + test_boost_interval * HZ; 2091 boost_starttime = jiffies + test_boost_interval * HZ;
1878 register_cpu_notifier(&rcutorture_cpu_nb); 2092 register_cpu_notifier(&rcutorture_cpu_nb);
@@ -1897,9 +2111,22 @@ rcu_torture_init(void)
1897 goto unwind; 2111 goto unwind;
1898 } 2112 }
1899 } 2113 }
1900 rcu_torture_onoff_init(); 2114 i = rcu_torture_onoff_init();
2115 if (i != 0) {
2116 firsterr = i;
2117 goto unwind;
2118 }
1901 register_reboot_notifier(&rcutorture_shutdown_nb); 2119 register_reboot_notifier(&rcutorture_shutdown_nb);
1902 rcu_torture_stall_init(); 2120 i = rcu_torture_stall_init();
2121 if (i != 0) {
2122 firsterr = i;
2123 goto unwind;
2124 }
2125 retval = rcu_torture_barrier_init();
2126 if (retval != 0) {
2127 firsterr = retval;
2128 goto unwind;
2129 }
1903 rcutorture_record_test_transition(); 2130 rcutorture_record_test_transition();
1904 mutex_unlock(&fullstop_mutex); 2131 mutex_unlock(&fullstop_mutex);
1905 return 0; 2132 return 0;
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index e578dd327c64..b3ea3ac3a2b5 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -201,7 +201,6 @@ void rcu_note_context_switch(int cpu)
201{ 201{
202 trace_rcu_utilization("Start context switch"); 202 trace_rcu_utilization("Start context switch");
203 rcu_sched_qs(cpu); 203 rcu_sched_qs(cpu);
204 rcu_preempt_note_context_switch(cpu);
205 trace_rcu_utilization("End context switch"); 204 trace_rcu_utilization("End context switch");
206} 205}
207EXPORT_SYMBOL_GPL(rcu_note_context_switch); 206EXPORT_SYMBOL_GPL(rcu_note_context_switch);
@@ -1953,6 +1952,38 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
1953} 1952}
1954EXPORT_SYMBOL_GPL(call_rcu_bh); 1953EXPORT_SYMBOL_GPL(call_rcu_bh);
1955 1954
1955/*
1956 * Because a context switch is a grace period for RCU-sched and RCU-bh,
1957 * any blocking grace-period wait automatically implies a grace period
1958 * if there is only one CPU online at any point time during execution
1959 * of either synchronize_sched() or synchronize_rcu_bh(). It is OK to
1960 * occasionally incorrectly indicate that there are multiple CPUs online
1961 * when there was in fact only one the whole time, as this just adds
1962 * some overhead: RCU still operates correctly.
1963 *
1964 * Of course, sampling num_online_cpus() with preemption enabled can
1965 * give erroneous results if there are concurrent CPU-hotplug operations.
1966 * For example, given a demonic sequence of preemptions in num_online_cpus()
1967 * and CPU-hotplug operations, there could be two or more CPUs online at
1968 * all times, but num_online_cpus() might well return one (or even zero).
1969 *
1970 * However, all such demonic sequences require at least one CPU-offline
1971 * operation. Furthermore, rcu_blocking_is_gp() giving the wrong answer
1972 * is only a problem if there is an RCU read-side critical section executing
1973 * throughout. But RCU-sched and RCU-bh read-side critical sections
1974 * disable either preemption or bh, which prevents a CPU from going offline.
1975 * Therefore, the only way that rcu_blocking_is_gp() can incorrectly return
1976 * that there is only one CPU when in fact there was more than one throughout
1977 * is when there were no RCU readers in the system. If there are no
1978 * RCU readers, the grace period by definition can be of zero length,
1979 * regardless of the number of online CPUs.
1980 */
1981static inline int rcu_blocking_is_gp(void)
1982{
1983 might_sleep(); /* Check for RCU read-side critical section. */
1984 return num_online_cpus() <= 1;
1985}
1986
1956/** 1987/**
1957 * synchronize_sched - wait until an rcu-sched grace period has elapsed. 1988 * synchronize_sched - wait until an rcu-sched grace period has elapsed.
1958 * 1989 *
@@ -2543,7 +2574,7 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)
2543 2574
2544 for (i = NUM_RCU_LVLS - 1; i > 0; i--) 2575 for (i = NUM_RCU_LVLS - 1; i > 0; i--)
2545 rsp->levelspread[i] = CONFIG_RCU_FANOUT; 2576 rsp->levelspread[i] = CONFIG_RCU_FANOUT;
2546 rsp->levelspread[0] = RCU_FANOUT_LEAF; 2577 rsp->levelspread[0] = CONFIG_RCU_FANOUT_LEAF;
2547} 2578}
2548#else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ 2579#else /* #ifdef CONFIG_RCU_FANOUT_EXACT */
2549static void __init rcu_init_levelspread(struct rcu_state *rsp) 2580static void __init rcu_init_levelspread(struct rcu_state *rsp)
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 1e49c5685960..7f5d138dedf5 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -29,18 +29,14 @@
29#include <linux/seqlock.h> 29#include <linux/seqlock.h>
30 30
31/* 31/*
32 * Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT. 32 * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and
33 * CONFIG_RCU_FANOUT_LEAF.
33 * In theory, it should be possible to add more levels straightforwardly. 34 * In theory, it should be possible to add more levels straightforwardly.
34 * In practice, this did work well going from three levels to four. 35 * In practice, this did work well going from three levels to four.
35 * Of course, your mileage may vary. 36 * Of course, your mileage may vary.
36 */ 37 */
37#define MAX_RCU_LVLS 4 38#define MAX_RCU_LVLS 4
38#if CONFIG_RCU_FANOUT > 16 39#define RCU_FANOUT_1 (CONFIG_RCU_FANOUT_LEAF)
39#define RCU_FANOUT_LEAF 16
40#else /* #if CONFIG_RCU_FANOUT > 16 */
41#define RCU_FANOUT_LEAF (CONFIG_RCU_FANOUT)
42#endif /* #else #if CONFIG_RCU_FANOUT > 16 */
43#define RCU_FANOUT_1 (RCU_FANOUT_LEAF)
44#define RCU_FANOUT_2 (RCU_FANOUT_1 * CONFIG_RCU_FANOUT) 40#define RCU_FANOUT_2 (RCU_FANOUT_1 * CONFIG_RCU_FANOUT)
45#define RCU_FANOUT_3 (RCU_FANOUT_2 * CONFIG_RCU_FANOUT) 41#define RCU_FANOUT_3 (RCU_FANOUT_2 * CONFIG_RCU_FANOUT)
46#define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT) 42#define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT)
@@ -434,7 +430,6 @@ DECLARE_PER_CPU(char, rcu_cpu_has_work);
434/* Forward declarations for rcutree_plugin.h */ 430/* Forward declarations for rcutree_plugin.h */
435static void rcu_bootup_announce(void); 431static void rcu_bootup_announce(void);
436long rcu_batches_completed(void); 432long rcu_batches_completed(void);
437static void rcu_preempt_note_context_switch(int cpu);
438static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp); 433static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp);
439#ifdef CONFIG_HOTPLUG_CPU 434#ifdef CONFIG_HOTPLUG_CPU
440static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, 435static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp,
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 7082ea93566f..2411000d9869 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -153,7 +153,7 @@ static void rcu_preempt_qs(int cpu)
153 * 153 *
154 * Caller must disable preemption. 154 * Caller must disable preemption.
155 */ 155 */
156static void rcu_preempt_note_context_switch(int cpu) 156void rcu_preempt_note_context_switch(void)
157{ 157{
158 struct task_struct *t = current; 158 struct task_struct *t = current;
159 unsigned long flags; 159 unsigned long flags;
@@ -164,7 +164,7 @@ static void rcu_preempt_note_context_switch(int cpu)
164 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { 164 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
165 165
166 /* Possibly blocking in an RCU read-side critical section. */ 166 /* Possibly blocking in an RCU read-side critical section. */
167 rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu); 167 rdp = __this_cpu_ptr(rcu_preempt_state.rda);
168 rnp = rdp->mynode; 168 rnp = rdp->mynode;
169 raw_spin_lock_irqsave(&rnp->lock, flags); 169 raw_spin_lock_irqsave(&rnp->lock, flags);
170 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; 170 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
@@ -228,7 +228,7 @@ static void rcu_preempt_note_context_switch(int cpu)
228 * means that we continue to block the current grace period. 228 * means that we continue to block the current grace period.
229 */ 229 */
230 local_irq_save(flags); 230 local_irq_save(flags);
231 rcu_preempt_qs(cpu); 231 rcu_preempt_qs(smp_processor_id());
232 local_irq_restore(flags); 232 local_irq_restore(flags);
233} 233}
234 234
@@ -969,22 +969,6 @@ static void __init __rcu_init_preempt(void)
969 rcu_init_one(&rcu_preempt_state, &rcu_preempt_data); 969 rcu_init_one(&rcu_preempt_state, &rcu_preempt_data);
970} 970}
971 971
972/*
973 * Check for a task exiting while in a preemptible-RCU read-side
974 * critical section, clean up if so. No need to issue warnings,
975 * as debug_check_no_locks_held() already does this if lockdep
976 * is enabled.
977 */
978void exit_rcu(void)
979{
980 struct task_struct *t = current;
981
982 if (t->rcu_read_lock_nesting == 0)
983 return;
984 t->rcu_read_lock_nesting = 1;
985 __rcu_read_unlock();
986}
987
988#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 972#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
989 973
990static struct rcu_state *rcu_state = &rcu_sched_state; 974static struct rcu_state *rcu_state = &rcu_sched_state;
@@ -1018,14 +1002,6 @@ void rcu_force_quiescent_state(void)
1018EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); 1002EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
1019 1003
1020/* 1004/*
1021 * Because preemptible RCU does not exist, we never have to check for
1022 * CPUs being in quiescent states.
1023 */
1024static void rcu_preempt_note_context_switch(int cpu)
1025{
1026}
1027
1028/*
1029 * Because preemptible RCU does not exist, there are never any preempted 1005 * Because preemptible RCU does not exist, there are never any preempted
1030 * RCU readers. 1006 * RCU readers.
1031 */ 1007 */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 4603b9d8f30a..5d89eb93f7e4 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2083,6 +2083,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
2083#endif 2083#endif
2084 2084
2085 /* Here we just switch the register state and the stack. */ 2085 /* Here we just switch the register state and the stack. */
2086 rcu_switch_from(prev);
2086 switch_to(prev, next, prev); 2087 switch_to(prev, next, prev);
2087 2088
2088 barrier(); 2089 barrier();
diff --git a/kernel/srcu.c b/kernel/srcu.c
index ba35f3a4a1f4..2095be3318d5 100644
--- a/kernel/srcu.c
+++ b/kernel/srcu.c
@@ -34,10 +34,77 @@
34#include <linux/delay.h> 34#include <linux/delay.h>
35#include <linux/srcu.h> 35#include <linux/srcu.h>
36 36
37/*
38 * Initialize an rcu_batch structure to empty.
39 */
40static inline void rcu_batch_init(struct rcu_batch *b)
41{
42 b->head = NULL;
43 b->tail = &b->head;
44}
45
46/*
47 * Enqueue a callback onto the tail of the specified rcu_batch structure.
48 */
49static inline void rcu_batch_queue(struct rcu_batch *b, struct rcu_head *head)
50{
51 *b->tail = head;
52 b->tail = &head->next;
53}
54
55/*
56 * Is the specified rcu_batch structure empty?
57 */
58static inline bool rcu_batch_empty(struct rcu_batch *b)
59{
60 return b->tail == &b->head;
61}
62
63/*
64 * Remove the callback at the head of the specified rcu_batch structure
65 * and return a pointer to it, or return NULL if the structure is empty.
66 */
67static inline struct rcu_head *rcu_batch_dequeue(struct rcu_batch *b)
68{
69 struct rcu_head *head;
70
71 if (rcu_batch_empty(b))
72 return NULL;
73
74 head = b->head;
75 b->head = head->next;
76 if (b->tail == &head->next)
77 rcu_batch_init(b);
78
79 return head;
80}
81
82/*
83 * Move all callbacks from the rcu_batch structure specified by "from" to
84 * the structure specified by "to".
85 */
86static inline void rcu_batch_move(struct rcu_batch *to, struct rcu_batch *from)
87{
88 if (!rcu_batch_empty(from)) {
89 *to->tail = from->head;
90 to->tail = from->tail;
91 rcu_batch_init(from);
92 }
93}
94
95/* single-thread state-machine */
96static void process_srcu(struct work_struct *work);
97
37static int init_srcu_struct_fields(struct srcu_struct *sp) 98static int init_srcu_struct_fields(struct srcu_struct *sp)
38{ 99{
39 sp->completed = 0; 100 sp->completed = 0;
40 mutex_init(&sp->mutex); 101 spin_lock_init(&sp->queue_lock);
102 sp->running = false;
103 rcu_batch_init(&sp->batch_queue);
104 rcu_batch_init(&sp->batch_check0);
105 rcu_batch_init(&sp->batch_check1);
106 rcu_batch_init(&sp->batch_done);
107 INIT_DELAYED_WORK(&sp->work, process_srcu);
41 sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array); 108 sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array);
42 return sp->per_cpu_ref ? 0 : -ENOMEM; 109 return sp->per_cpu_ref ? 0 : -ENOMEM;
43} 110}
@@ -73,21 +140,116 @@ EXPORT_SYMBOL_GPL(init_srcu_struct);
73#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 140#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
74 141
75/* 142/*
76 * srcu_readers_active_idx -- returns approximate number of readers 143 * Returns approximate total of the readers' ->seq[] values for the
77 * active on the specified rank of per-CPU counters. 144 * rank of per-CPU counters specified by idx.
78 */ 145 */
146static unsigned long srcu_readers_seq_idx(struct srcu_struct *sp, int idx)
147{
148 int cpu;
149 unsigned long sum = 0;
150 unsigned long t;
79 151
80static int srcu_readers_active_idx(struct srcu_struct *sp, int idx) 152 for_each_possible_cpu(cpu) {
153 t = ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->seq[idx]);
154 sum += t;
155 }
156 return sum;
157}
158
159/*
160 * Returns approximate number of readers active on the specified rank
161 * of the per-CPU ->c[] counters.
162 */
163static unsigned long srcu_readers_active_idx(struct srcu_struct *sp, int idx)
81{ 164{
82 int cpu; 165 int cpu;
83 int sum; 166 unsigned long sum = 0;
167 unsigned long t;
84 168
85 sum = 0; 169 for_each_possible_cpu(cpu) {
86 for_each_possible_cpu(cpu) 170 t = ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[idx]);
87 sum += per_cpu_ptr(sp->per_cpu_ref, cpu)->c[idx]; 171 sum += t;
172 }
88 return sum; 173 return sum;
89} 174}
90 175
176/*
177 * Return true if the number of pre-existing readers is determined to
178 * be stably zero. An example unstable zero can occur if the call
179 * to srcu_readers_active_idx() misses an __srcu_read_lock() increment,
180 * but due to task migration, sees the corresponding __srcu_read_unlock()
181 * decrement. This can happen because srcu_readers_active_idx() takes
182 * time to sum the array, and might in fact be interrupted or preempted
183 * partway through the summation.
184 */
185static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx)
186{
187 unsigned long seq;
188
189 seq = srcu_readers_seq_idx(sp, idx);
190
191 /*
192 * The following smp_mb() A pairs with the smp_mb() B located in
193 * __srcu_read_lock(). This pairing ensures that if an
194 * __srcu_read_lock() increments its counter after the summation
195 * in srcu_readers_active_idx(), then the corresponding SRCU read-side
196 * critical section will see any changes made prior to the start
197 * of the current SRCU grace period.
198 *
199 * Also, if the above call to srcu_readers_seq_idx() saw the
200 * increment of ->seq[], then the call to srcu_readers_active_idx()
201 * must see the increment of ->c[].
202 */
203 smp_mb(); /* A */
204
205 /*
206 * Note that srcu_readers_active_idx() can incorrectly return
207 * zero even though there is a pre-existing reader throughout.
208 * To see this, suppose that task A is in a very long SRCU
209 * read-side critical section that started on CPU 0, and that
210 * no other reader exists, so that the sum of the counters
211 * is equal to one. Then suppose that task B starts executing
212 * srcu_readers_active_idx(), summing up to CPU 1, and then that
213 * task C starts reading on CPU 0, so that its increment is not
214 * summed, but finishes reading on CPU 2, so that its decrement
215 * -is- summed. Then when task B completes its sum, it will
216 * incorrectly get zero, despite the fact that task A has been
217 * in its SRCU read-side critical section the whole time.
218 *
219 * We therefore do a validation step should srcu_readers_active_idx()
220 * return zero.
221 */
222 if (srcu_readers_active_idx(sp, idx) != 0)
223 return false;
224
225 /*
226 * The remainder of this function is the validation step.
227 * The following smp_mb() D pairs with the smp_mb() C in
228 * __srcu_read_unlock(). If the __srcu_read_unlock() was seen
229 * by srcu_readers_active_idx() above, then any destructive
230 * operation performed after the grace period will happen after
231 * the corresponding SRCU read-side critical section.
232 *
233 * Note that there can be at most NR_CPUS worth of readers using
234 * the old index, which is not enough to overflow even a 32-bit
235 * integer. (Yes, this does mean that systems having more than
236 * a billion or so CPUs need to be 64-bit systems.) Therefore,
237 * the sum of the ->seq[] counters cannot possibly overflow.
238 * Therefore, the only way that the return values of the two
239 * calls to srcu_readers_seq_idx() can be equal is if there were
240 * no increments of the corresponding rank of ->seq[] counts
241 * in the interim. But the missed-increment scenario laid out
242 * above includes an increment of the ->seq[] counter by
243 * the corresponding __srcu_read_lock(). Therefore, if this
244 * scenario occurs, the return values from the two calls to
245 * srcu_readers_seq_idx() will differ, and thus the validation
246 * step below suffices.
247 */
248 smp_mb(); /* D */
249
250 return srcu_readers_seq_idx(sp, idx) == seq;
251}
252
91/** 253/**
92 * srcu_readers_active - returns approximate number of readers. 254 * srcu_readers_active - returns approximate number of readers.
93 * @sp: which srcu_struct to count active readers (holding srcu_read_lock). 255 * @sp: which srcu_struct to count active readers (holding srcu_read_lock).
@@ -98,7 +260,14 @@ static int srcu_readers_active_idx(struct srcu_struct *sp, int idx)
98 */ 260 */
99static int srcu_readers_active(struct srcu_struct *sp) 261static int srcu_readers_active(struct srcu_struct *sp)
100{ 262{
101 return srcu_readers_active_idx(sp, 0) + srcu_readers_active_idx(sp, 1); 263 int cpu;
264 unsigned long sum = 0;
265
266 for_each_possible_cpu(cpu) {
267 sum += ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[0]);
268 sum += ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[1]);
269 }
270 return sum;
102} 271}
103 272
104/** 273/**
@@ -131,10 +300,11 @@ int __srcu_read_lock(struct srcu_struct *sp)
131 int idx; 300 int idx;
132 301
133 preempt_disable(); 302 preempt_disable();
134 idx = sp->completed & 0x1; 303 idx = rcu_dereference_index_check(sp->completed,
135 barrier(); /* ensure compiler looks -once- at sp->completed. */ 304 rcu_read_lock_sched_held()) & 0x1;
136 per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]++; 305 ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->c[idx]) += 1;
137 srcu_barrier(); /* ensure compiler won't misorder critical section. */ 306 smp_mb(); /* B */ /* Avoid leaking the critical section. */
307 ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->seq[idx]) += 1;
138 preempt_enable(); 308 preempt_enable();
139 return idx; 309 return idx;
140} 310}
@@ -149,8 +319,8 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock);
149void __srcu_read_unlock(struct srcu_struct *sp, int idx) 319void __srcu_read_unlock(struct srcu_struct *sp, int idx)
150{ 320{
151 preempt_disable(); 321 preempt_disable();
152 srcu_barrier(); /* ensure compiler won't misorder critical section. */ 322 smp_mb(); /* C */ /* Avoid leaking the critical section. */
153 per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]--; 323 ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->c[idx]) -= 1;
154 preempt_enable(); 324 preempt_enable();
155} 325}
156EXPORT_SYMBOL_GPL(__srcu_read_unlock); 326EXPORT_SYMBOL_GPL(__srcu_read_unlock);
@@ -163,106 +333,119 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock);
163 * we repeatedly block for 1-millisecond time periods. This approach 333 * we repeatedly block for 1-millisecond time periods. This approach
164 * has done well in testing, so there is no need for a config parameter. 334 * has done well in testing, so there is no need for a config parameter.
165 */ 335 */
166#define SYNCHRONIZE_SRCU_READER_DELAY 10 336#define SRCU_RETRY_CHECK_DELAY 5
337#define SYNCHRONIZE_SRCU_TRYCOUNT 2
338#define SYNCHRONIZE_SRCU_EXP_TRYCOUNT 12
167 339
168/* 340/*
169 * Helper function for synchronize_srcu() and synchronize_srcu_expedited(). 341 * @@@ Wait until all pre-existing readers complete. Such readers
342 * will have used the index specified by "idx".
343 * the caller should ensures the ->completed is not changed while checking
344 * and idx = (->completed & 1) ^ 1
170 */ 345 */
171static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void)) 346static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount)
172{ 347{
173 int idx; 348 for (;;) {
174 349 if (srcu_readers_active_idx_check(sp, idx))
175 rcu_lockdep_assert(!lock_is_held(&sp->dep_map) && 350 return true;
176 !lock_is_held(&rcu_bh_lock_map) && 351 if (--trycount <= 0)
177 !lock_is_held(&rcu_lock_map) && 352 return false;
178 !lock_is_held(&rcu_sched_lock_map), 353 udelay(SRCU_RETRY_CHECK_DELAY);
179 "Illegal synchronize_srcu() in same-type SRCU (or RCU) read-side critical section"); 354 }
180 355}
181 idx = sp->completed;
182 mutex_lock(&sp->mutex);
183 356
184 /* 357/*
185 * Check to see if someone else did the work for us while we were 358 * Increment the ->completed counter so that future SRCU readers will
186 * waiting to acquire the lock. We need -two- advances of 359 * use the other rank of the ->c[] and ->seq[] arrays. This allows
187 * the counter, not just one. If there was but one, we might have 360 * us to wait for pre-existing readers in a starvation-free manner.
188 * shown up -after- our helper's first synchronize_sched(), thus 361 */
189 * having failed to prevent CPU-reordering races with concurrent 362static void srcu_flip(struct srcu_struct *sp)
190 * srcu_read_unlock()s on other CPUs (see comment below). So we 363{
191 * either (1) wait for two or (2) supply the second ourselves. 364 sp->completed++;
192 */ 365}
193 366
194 if ((sp->completed - idx) >= 2) { 367/*
195 mutex_unlock(&sp->mutex); 368 * Enqueue an SRCU callback on the specified srcu_struct structure,
196 return; 369 * initiating grace-period processing if it is not already running.
370 */
371void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
372 void (*func)(struct rcu_head *head))
373{
374 unsigned long flags;
375
376 head->next = NULL;
377 head->func = func;
378 spin_lock_irqsave(&sp->queue_lock, flags);
379 rcu_batch_queue(&sp->batch_queue, head);
380 if (!sp->running) {
381 sp->running = true;
382 queue_delayed_work(system_nrt_wq, &sp->work, 0);
197 } 383 }
384 spin_unlock_irqrestore(&sp->queue_lock, flags);
385}
386EXPORT_SYMBOL_GPL(call_srcu);
198 387
199 sync_func(); /* Force memory barrier on all CPUs. */ 388struct rcu_synchronize {
389 struct rcu_head head;
390 struct completion completion;
391};
200 392
201 /* 393/*
202 * The preceding synchronize_sched() ensures that any CPU that 394 * Awaken the corresponding synchronize_srcu() instance now that a
203 * sees the new value of sp->completed will also see any preceding 395 * grace period has elapsed.
204 * changes to data structures made by this CPU. This prevents 396 */
205 * some other CPU from reordering the accesses in its SRCU 397static void wakeme_after_rcu(struct rcu_head *head)
206 * read-side critical section to precede the corresponding 398{
207 * srcu_read_lock() -- ensuring that such references will in 399 struct rcu_synchronize *rcu;
208 * fact be protected.
209 *
210 * So it is now safe to do the flip.
211 */
212 400
213 idx = sp->completed & 0x1; 401 rcu = container_of(head, struct rcu_synchronize, head);
214 sp->completed++; 402 complete(&rcu->completion);
403}
215 404
216 sync_func(); /* Force memory barrier on all CPUs. */ 405static void srcu_advance_batches(struct srcu_struct *sp, int trycount);
406static void srcu_reschedule(struct srcu_struct *sp);
217 407
218 /* 408/*
219 * At this point, because of the preceding synchronize_sched(), 409 * Helper function for synchronize_srcu() and synchronize_srcu_expedited().
220 * all srcu_read_lock() calls using the old counters have completed. 410 */
221 * Their corresponding critical sections might well be still 411static void __synchronize_srcu(struct srcu_struct *sp, int trycount)
222 * executing, but the srcu_read_lock() primitives themselves 412{
223 * will have finished executing. We initially give readers 413 struct rcu_synchronize rcu;
224 * an arbitrarily chosen 10 microseconds to get out of their 414 struct rcu_head *head = &rcu.head;
225 * SRCU read-side critical sections, then loop waiting 1/HZ 415 bool done = false;
226 * seconds per iteration. The 10-microsecond value has done
227 * very well in testing.
228 */
229
230 if (srcu_readers_active_idx(sp, idx))
231 udelay(SYNCHRONIZE_SRCU_READER_DELAY);
232 while (srcu_readers_active_idx(sp, idx))
233 schedule_timeout_interruptible(1);
234 416
235 sync_func(); /* Force memory barrier on all CPUs. */ 417 rcu_lockdep_assert(!lock_is_held(&sp->dep_map) &&
418 !lock_is_held(&rcu_bh_lock_map) &&
419 !lock_is_held(&rcu_lock_map) &&
420 !lock_is_held(&rcu_sched_lock_map),
421 "Illegal synchronize_srcu() in same-type SRCU (or RCU) read-side critical section");
236 422
237 /* 423 init_completion(&rcu.completion);
238 * The preceding synchronize_sched() forces all srcu_read_unlock() 424
239 * primitives that were executing concurrently with the preceding 425 head->next = NULL;
240 * for_each_possible_cpu() loop to have completed by this point. 426 head->func = wakeme_after_rcu;
241 * More importantly, it also forces the corresponding SRCU read-side 427 spin_lock_irq(&sp->queue_lock);
242 * critical sections to have also completed, and the corresponding 428 if (!sp->running) {
243 * references to SRCU-protected data items to be dropped. 429 /* steal the processing owner */
244 * 430 sp->running = true;
245 * Note: 431 rcu_batch_queue(&sp->batch_check0, head);
246 * 432 spin_unlock_irq(&sp->queue_lock);
247 * Despite what you might think at first glance, the 433
248 * preceding synchronize_sched() -must- be within the 434 srcu_advance_batches(sp, trycount);
249 * critical section ended by the following mutex_unlock(). 435 if (!rcu_batch_empty(&sp->batch_done)) {
250 * Otherwise, a task taking the early exit can race 436 BUG_ON(sp->batch_done.head != head);
251 * with a srcu_read_unlock(), which might have executed 437 rcu_batch_dequeue(&sp->batch_done);
252 * just before the preceding srcu_readers_active() check, 438 done = true;
253 * and whose CPU might have reordered the srcu_read_unlock() 439 }
254 * with the preceding critical section. In this case, there 440 /* give the processing owner to work_struct */
255 * is nothing preventing the synchronize_sched() task that is 441 srcu_reschedule(sp);
256 * taking the early exit from freeing a data structure that 442 } else {
257 * is still being referenced (out of order) by the task 443 rcu_batch_queue(&sp->batch_queue, head);
258 * doing the srcu_read_unlock(). 444 spin_unlock_irq(&sp->queue_lock);
259 * 445 }
260 * Alternatively, the comparison with "2" on the early exit
261 * could be changed to "3", but this increases synchronize_srcu()
262 * latency for bulk loads. So the current code is preferred.
263 */
264 446
265 mutex_unlock(&sp->mutex); 447 if (!done)
448 wait_for_completion(&rcu.completion);
266} 449}
267 450
268/** 451/**
@@ -281,7 +464,7 @@ static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void))
281 */ 464 */
282void synchronize_srcu(struct srcu_struct *sp) 465void synchronize_srcu(struct srcu_struct *sp)
283{ 466{
284 __synchronize_srcu(sp, synchronize_sched); 467 __synchronize_srcu(sp, SYNCHRONIZE_SRCU_TRYCOUNT);
285} 468}
286EXPORT_SYMBOL_GPL(synchronize_srcu); 469EXPORT_SYMBOL_GPL(synchronize_srcu);
287 470
@@ -289,18 +472,11 @@ EXPORT_SYMBOL_GPL(synchronize_srcu);
289 * synchronize_srcu_expedited - Brute-force SRCU grace period 472 * synchronize_srcu_expedited - Brute-force SRCU grace period
290 * @sp: srcu_struct with which to synchronize. 473 * @sp: srcu_struct with which to synchronize.
291 * 474 *
292 * Wait for an SRCU grace period to elapse, but use a "big hammer" 475 * Wait for an SRCU grace period to elapse, but be more aggressive about
293 * approach to force the grace period to end quickly. This consumes 476 * spinning rather than blocking when waiting.
294 * significant time on all CPUs and is unfriendly to real-time workloads,
295 * so is thus not recommended for any sort of common-case code. In fact,
296 * if you are using synchronize_srcu_expedited() in a loop, please
297 * restructure your code to batch your updates, and then use a single
298 * synchronize_srcu() instead.
299 * 477 *
300 * Note that it is illegal to call this function while holding any lock 478 * Note that it is illegal to call this function while holding any lock
301 * that is acquired by a CPU-hotplug notifier. And yes, it is also illegal 479 * that is acquired by a CPU-hotplug notifier. It is also illegal to call
302 * to call this function from a CPU-hotplug notifier. Failing to observe
303 * these restriction will result in deadlock. It is also illegal to call
304 * synchronize_srcu_expedited() from the corresponding SRCU read-side 480 * synchronize_srcu_expedited() from the corresponding SRCU read-side
305 * critical section; doing so will result in deadlock. However, it is 481 * critical section; doing so will result in deadlock. However, it is
306 * perfectly legal to call synchronize_srcu_expedited() on one srcu_struct 482 * perfectly legal to call synchronize_srcu_expedited() on one srcu_struct
@@ -309,20 +485,166 @@ EXPORT_SYMBOL_GPL(synchronize_srcu);
309 */ 485 */
310void synchronize_srcu_expedited(struct srcu_struct *sp) 486void synchronize_srcu_expedited(struct srcu_struct *sp)
311{ 487{
312 __synchronize_srcu(sp, synchronize_sched_expedited); 488 __synchronize_srcu(sp, SYNCHRONIZE_SRCU_EXP_TRYCOUNT);
313} 489}
314EXPORT_SYMBOL_GPL(synchronize_srcu_expedited); 490EXPORT_SYMBOL_GPL(synchronize_srcu_expedited);
315 491
316/** 492/**
493 * srcu_barrier - Wait until all in-flight call_srcu() callbacks complete.
494 */
495void srcu_barrier(struct srcu_struct *sp)
496{
497 synchronize_srcu(sp);
498}
499EXPORT_SYMBOL_GPL(srcu_barrier);
500
501/**
317 * srcu_batches_completed - return batches completed. 502 * srcu_batches_completed - return batches completed.
318 * @sp: srcu_struct on which to report batch completion. 503 * @sp: srcu_struct on which to report batch completion.
319 * 504 *
320 * Report the number of batches, correlated with, but not necessarily 505 * Report the number of batches, correlated with, but not necessarily
321 * precisely the same as, the number of grace periods that have elapsed. 506 * precisely the same as, the number of grace periods that have elapsed.
322 */ 507 */
323
324long srcu_batches_completed(struct srcu_struct *sp) 508long srcu_batches_completed(struct srcu_struct *sp)
325{ 509{
326 return sp->completed; 510 return sp->completed;
327} 511}
328EXPORT_SYMBOL_GPL(srcu_batches_completed); 512EXPORT_SYMBOL_GPL(srcu_batches_completed);
513
514#define SRCU_CALLBACK_BATCH 10
515#define SRCU_INTERVAL 1
516
517/*
518 * Move any new SRCU callbacks to the first stage of the SRCU grace
519 * period pipeline.
520 */
521static void srcu_collect_new(struct srcu_struct *sp)
522{
523 if (!rcu_batch_empty(&sp->batch_queue)) {
524 spin_lock_irq(&sp->queue_lock);
525 rcu_batch_move(&sp->batch_check0, &sp->batch_queue);
526 spin_unlock_irq(&sp->queue_lock);
527 }
528}
529
530/*
531 * Core SRCU state machine. Advance callbacks from ->batch_check0 to
532 * ->batch_check1 and then to ->batch_done as readers drain.
533 */
534static void srcu_advance_batches(struct srcu_struct *sp, int trycount)
535{
536 int idx = 1 ^ (sp->completed & 1);
537
538 /*
539 * Because readers might be delayed for an extended period after
540 * fetching ->completed for their index, at any point in time there
541 * might well be readers using both idx=0 and idx=1. We therefore
542 * need to wait for readers to clear from both index values before
543 * invoking a callback.
544 */
545
546 if (rcu_batch_empty(&sp->batch_check0) &&
547 rcu_batch_empty(&sp->batch_check1))
548 return; /* no callbacks need to be advanced */
549
550 if (!try_check_zero(sp, idx, trycount))
551 return; /* failed to advance, will try after SRCU_INTERVAL */
552
553 /*
554 * The callbacks in ->batch_check1 have already done with their
555 * first zero check and flip back when they were enqueued on
556 * ->batch_check0 in a previous invocation of srcu_advance_batches().
557 * (Presumably try_check_zero() returned false during that
558 * invocation, leaving the callbacks stranded on ->batch_check1.)
559 * They are therefore ready to invoke, so move them to ->batch_done.
560 */
561 rcu_batch_move(&sp->batch_done, &sp->batch_check1);
562
563 if (rcu_batch_empty(&sp->batch_check0))
564 return; /* no callbacks need to be advanced */
565 srcu_flip(sp);
566
567 /*
568 * The callbacks in ->batch_check0 just finished their
569 * first check zero and flip, so move them to ->batch_check1
570 * for future checking on the other idx.
571 */
572 rcu_batch_move(&sp->batch_check1, &sp->batch_check0);
573
574 /*
575 * SRCU read-side critical sections are normally short, so check
576 * at least twice in quick succession after a flip.
577 */
578 trycount = trycount < 2 ? 2 : trycount;
579 if (!try_check_zero(sp, idx^1, trycount))
580 return; /* failed to advance, will try after SRCU_INTERVAL */
581
582 /*
583 * The callbacks in ->batch_check1 have now waited for all
584 * pre-existing readers using both idx values. They are therefore
585 * ready to invoke, so move them to ->batch_done.
586 */
587 rcu_batch_move(&sp->batch_done, &sp->batch_check1);
588}
589
590/*
591 * Invoke a limited number of SRCU callbacks that have passed through
592 * their grace period. If there are more to do, SRCU will reschedule
593 * the workqueue.
594 */
595static void srcu_invoke_callbacks(struct srcu_struct *sp)
596{
597 int i;
598 struct rcu_head *head;
599
600 for (i = 0; i < SRCU_CALLBACK_BATCH; i++) {
601 head = rcu_batch_dequeue(&sp->batch_done);
602 if (!head)
603 break;
604 local_bh_disable();
605 head->func(head);
606 local_bh_enable();
607 }
608}
609
610/*
611 * Finished one round of SRCU grace period. Start another if there are
612 * more SRCU callbacks queued, otherwise put SRCU into not-running state.
613 */
614static void srcu_reschedule(struct srcu_struct *sp)
615{
616 bool pending = true;
617
618 if (rcu_batch_empty(&sp->batch_done) &&
619 rcu_batch_empty(&sp->batch_check1) &&
620 rcu_batch_empty(&sp->batch_check0) &&
621 rcu_batch_empty(&sp->batch_queue)) {
622 spin_lock_irq(&sp->queue_lock);
623 if (rcu_batch_empty(&sp->batch_done) &&
624 rcu_batch_empty(&sp->batch_check1) &&
625 rcu_batch_empty(&sp->batch_check0) &&
626 rcu_batch_empty(&sp->batch_queue)) {
627 sp->running = false;
628 pending = false;
629 }
630 spin_unlock_irq(&sp->queue_lock);
631 }
632
633 if (pending)
634 queue_delayed_work(system_nrt_wq, &sp->work, SRCU_INTERVAL);
635}
636
637/*
638 * This is the work-queue function that handles SRCU grace periods.
639 */
640static void process_srcu(struct work_struct *work)
641{
642 struct srcu_struct *sp;
643
644 sp = container_of(work, struct srcu_struct, work.work);
645
646 srcu_collect_new(sp);
647 srcu_advance_batches(sp, 1);
648 srcu_invoke_callbacks(sp);
649 srcu_reschedule(sp);
650}
diff --git a/kernel/timer.c b/kernel/timer.c
index a297ffcf888e..837c552fe838 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -861,7 +861,13 @@ EXPORT_SYMBOL(mod_timer);
861 * 861 *
862 * mod_timer_pinned() is a way to update the expire field of an 862 * mod_timer_pinned() is a way to update the expire field of an
863 * active timer (if the timer is inactive it will be activated) 863 * active timer (if the timer is inactive it will be activated)
864 * and not allow the timer to be migrated to a different CPU. 864 * and to ensure that the timer is scheduled on the current CPU.
865 *
866 * Note that this does not prevent the timer from being migrated
867 * when the current CPU goes offline. If this is a problem for
868 * you, use CPU-hotplug notifiers to handle it correctly, for
869 * example, cancelling the timer when the corresponding CPU goes
870 * offline.
865 * 871 *
866 * mod_timer_pinned(timer, expires) is equivalent to: 872 * mod_timer_pinned(timer, expires) is equivalent to:
867 * 873 *
diff --git a/lib/list_debug.c b/lib/list_debug.c
index 982b850d4e7a..3810b481f940 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -10,6 +10,7 @@
10#include <linux/list.h> 10#include <linux/list.h>
11#include <linux/bug.h> 11#include <linux/bug.h>
12#include <linux/kernel.h> 12#include <linux/kernel.h>
13#include <linux/rculist.h>
13 14
14/* 15/*
15 * Insert a new entry between two known consecutive entries. 16 * Insert a new entry between two known consecutive entries.
@@ -75,3 +76,24 @@ void list_del(struct list_head *entry)
75 entry->prev = LIST_POISON2; 76 entry->prev = LIST_POISON2;
76} 77}
77EXPORT_SYMBOL(list_del); 78EXPORT_SYMBOL(list_del);
79
80/*
81 * RCU variants.
82 */
83void __list_add_rcu(struct list_head *new,
84 struct list_head *prev, struct list_head *next)
85{
86 WARN(next->prev != prev,
87 "list_add_rcu corruption. next->prev should be "
88 "prev (%p), but was %p. (next=%p).\n",
89 prev, next->prev, next);
90 WARN(prev->next != next,
91 "list_add_rcu corruption. prev->next should be "
92 "next (%p), but was %p. (prev=%p).\n",
93 next, prev->next, prev);
94 new->next = next;
95 new->prev = prev;
96 rcu_assign_pointer(list_next_rcu(prev), new);
97 next->prev = new;
98}
99EXPORT_SYMBOL(__list_add_rcu);