diff options
-rw-r--r-- | Documentation/RCU/torture.txt | 15 | ||||
-rw-r--r-- | Documentation/kernel-parameters.txt | 88 | ||||
-rw-r--r-- | MAINTAINERS | 14 | ||||
-rw-r--r-- | arch/um/drivers/mconsole_kern.c | 1 | ||||
-rw-r--r-- | include/linux/rculist.h | 40 | ||||
-rw-r--r-- | include/linux/rcupdate.h | 20 | ||||
-rw-r--r-- | include/linux/rcutiny.h | 11 | ||||
-rw-r--r-- | include/linux/rcutree.h | 19 | ||||
-rw-r--r-- | include/linux/sched.h | 10 | ||||
-rw-r--r-- | include/linux/srcu.h | 48 | ||||
-rw-r--r-- | include/trace/events/rcu.h | 2 | ||||
-rw-r--r-- | init/Kconfig | 50 | ||||
-rw-r--r-- | kernel/rcupdate.c | 28 | ||||
-rw-r--r-- | kernel/rcutiny_plugin.h | 16 | ||||
-rw-r--r-- | kernel/rcutorture.c | 257 | ||||
-rw-r--r-- | kernel/rcutree.c | 332 | ||||
-rw-r--r-- | kernel/rcutree.h | 23 | ||||
-rw-r--r-- | kernel/rcutree_plugin.h | 154 | ||||
-rw-r--r-- | kernel/rcutree_trace.c | 4 | ||||
-rw-r--r-- | kernel/sched/core.c | 1 | ||||
-rw-r--r-- | kernel/srcu.c | 548 | ||||
-rw-r--r-- | kernel/timer.c | 8 | ||||
-rw-r--r-- | lib/list_debug.c | 22 |
23 files changed, 1358 insertions, 353 deletions
diff --git a/Documentation/RCU/torture.txt b/Documentation/RCU/torture.txt index 375d3fb71437..4ddf3913fd8c 100644 --- a/Documentation/RCU/torture.txt +++ b/Documentation/RCU/torture.txt | |||
@@ -47,6 +47,16 @@ irqreader Says to invoke RCU readers from irq level. This is currently | |||
47 | permit this. (Or, more accurately, variants of RCU that do | 47 | permit this. (Or, more accurately, variants of RCU that do |
48 | -not- permit this know to ignore this variable.) | 48 | -not- permit this know to ignore this variable.) |
49 | 49 | ||
50 | n_barrier_cbs If this is nonzero, RCU barrier testing will be conducted, | ||
51 | in which case n_barrier_cbs specifies the number of | ||
52 | RCU callbacks (and corresponding kthreads) to use for | ||
53 | this testing. The value cannot be negative. If you | ||
54 | specify this to be non-zero when torture_type indicates a | ||
55 | synchronous RCU implementation (one for which a member of | ||
56 | the synchronize_rcu() rather than the call_rcu() family is | ||
57 | used -- see the documentation for torture_type below), an | ||
58 | error will be reported and no testing will be carried out. | ||
59 | |||
50 | nfakewriters This is the number of RCU fake writer threads to run. Fake | 60 | nfakewriters This is the number of RCU fake writer threads to run. Fake |
51 | writer threads repeatedly use the synchronous "wait for | 61 | writer threads repeatedly use the synchronous "wait for |
52 | current readers" function of the interface selected by | 62 | current readers" function of the interface selected by |
@@ -188,7 +198,7 @@ OUTPUT | |||
188 | The statistics output is as follows: | 198 | The statistics output is as follows: |
189 | 199 | ||
190 | rcu-torture:--- Start of test: nreaders=16 nfakewriters=4 stat_interval=30 verbose=0 test_no_idle_hz=1 shuffle_interval=3 stutter=5 irqreader=1 fqs_duration=0 fqs_holdoff=0 fqs_stutter=3 test_boost=1/0 test_boost_interval=7 test_boost_duration=4 | 200 | rcu-torture:--- Start of test: nreaders=16 nfakewriters=4 stat_interval=30 verbose=0 test_no_idle_hz=1 shuffle_interval=3 stutter=5 irqreader=1 fqs_duration=0 fqs_holdoff=0 fqs_stutter=3 test_boost=1/0 test_boost_interval=7 test_boost_duration=4 |
191 | rcu-torture: rtc: (null) ver: 155441 tfle: 0 rta: 155441 rtaf: 8884 rtf: 155440 rtmbe: 0 rtbke: 0 rtbre: 0 rtbf: 0 rtb: 0 nt: 3055767 | 201 | rcu-torture: rtc: (null) ver: 155441 tfle: 0 rta: 155441 rtaf: 8884 rtf: 155440 rtmbe: 0 rtbe: 0 rtbke: 0 rtbre: 0 rtbf: 0 rtb: 0 nt: 3055767 |
192 | rcu-torture: Reader Pipe: 727860534 34213 0 0 0 0 0 0 0 0 0 | 202 | rcu-torture: Reader Pipe: 727860534 34213 0 0 0 0 0 0 0 0 0 |
193 | rcu-torture: Reader Batch: 727877838 17003 0 0 0 0 0 0 0 0 0 | 203 | rcu-torture: Reader Batch: 727877838 17003 0 0 0 0 0 0 0 0 0 |
194 | rcu-torture: Free-Block Circulation: 155440 155440 155440 155440 155440 155440 155440 155440 155440 155440 0 | 204 | rcu-torture: Free-Block Circulation: 155440 155440 155440 155440 155440 155440 155440 155440 155440 155440 0 |
@@ -230,6 +240,9 @@ o "rtmbe": A non-zero value indicates that rcutorture believes that | |||
230 | rcu_assign_pointer() and rcu_dereference() are not working | 240 | rcu_assign_pointer() and rcu_dereference() are not working |
231 | correctly. This value should be zero. | 241 | correctly. This value should be zero. |
232 | 242 | ||
243 | o "rtbe": A non-zero value indicates that one of the rcu_barrier() | ||
244 | family of functions is not working correctly. | ||
245 | |||
233 | o "rtbke": rcutorture was unable to create the real-time kthreads | 246 | o "rtbke": rcutorture was unable to create the real-time kthreads |
234 | used to force RCU priority inversion. This value should be zero. | 247 | used to force RCU priority inversion. This value should be zero. |
235 | 248 | ||
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index f995195409fd..0e90453e4acb 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -2333,18 +2333,100 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
2333 | ramdisk_size= [RAM] Sizes of RAM disks in kilobytes | 2333 | ramdisk_size= [RAM] Sizes of RAM disks in kilobytes |
2334 | See Documentation/blockdev/ramdisk.txt. | 2334 | See Documentation/blockdev/ramdisk.txt. |
2335 | 2335 | ||
2336 | rcupdate.blimit= [KNL,BOOT] | 2336 | rcutree.blimit= [KNL,BOOT] |
2337 | Set maximum number of finished RCU callbacks to process | 2337 | Set maximum number of finished RCU callbacks to process |
2338 | in one batch. | 2338 | in one batch. |
2339 | 2339 | ||
2340 | rcupdate.qhimark= [KNL,BOOT] | 2340 | rcutree.qhimark= [KNL,BOOT] |
2341 | Set threshold of queued | 2341 | Set threshold of queued |
2342 | RCU callbacks over which batch limiting is disabled. | 2342 | RCU callbacks over which batch limiting is disabled. |
2343 | 2343 | ||
2344 | rcupdate.qlowmark= [KNL,BOOT] | 2344 | rcutree.qlowmark= [KNL,BOOT] |
2345 | Set threshold of queued RCU callbacks below which | 2345 | Set threshold of queued RCU callbacks below which |
2346 | batch limiting is re-enabled. | 2346 | batch limiting is re-enabled. |
2347 | 2347 | ||
2348 | rcutree.rcu_cpu_stall_suppress= [KNL,BOOT] | ||
2349 | Suppress RCU CPU stall warning messages. | ||
2350 | |||
2351 | rcutree.rcu_cpu_stall_timeout= [KNL,BOOT] | ||
2352 | Set timeout for RCU CPU stall warning messages. | ||
2353 | |||
2354 | rcutorture.fqs_duration= [KNL,BOOT] | ||
2355 | Set duration of force_quiescent_state bursts. | ||
2356 | |||
2357 | rcutorture.fqs_holdoff= [KNL,BOOT] | ||
2358 | Set holdoff time within force_quiescent_state bursts. | ||
2359 | |||
2360 | rcutorture.fqs_stutter= [KNL,BOOT] | ||
2361 | Set wait time between force_quiescent_state bursts. | ||
2362 | |||
2363 | rcutorture.irqreader= [KNL,BOOT] | ||
2364 | Test RCU readers from irq handlers. | ||
2365 | |||
2366 | rcutorture.n_barrier_cbs= [KNL,BOOT] | ||
2367 | Set callbacks/threads for rcu_barrier() testing. | ||
2368 | |||
2369 | rcutorture.nfakewriters= [KNL,BOOT] | ||
2370 | Set number of concurrent RCU writers. These just | ||
2371 | stress RCU, they don't participate in the actual | ||
2372 | test, hence the "fake". | ||
2373 | |||
2374 | rcutorture.nreaders= [KNL,BOOT] | ||
2375 | Set number of RCU readers. | ||
2376 | |||
2377 | rcutorture.onoff_holdoff= [KNL,BOOT] | ||
2378 | Set time (s) after boot for CPU-hotplug testing. | ||
2379 | |||
2380 | rcutorture.onoff_interval= [KNL,BOOT] | ||
2381 | Set time (s) between CPU-hotplug operations, or | ||
2382 | zero to disable CPU-hotplug testing. | ||
2383 | |||
2384 | rcutorture.shuffle_interval= [KNL,BOOT] | ||
2385 | Set task-shuffle interval (s). Shuffling tasks | ||
2386 | allows some CPUs to go into dyntick-idle mode | ||
2387 | during the rcutorture test. | ||
2388 | |||
2389 | rcutorture.shutdown_secs= [KNL,BOOT] | ||
2390 | Set time (s) after boot system shutdown. This | ||
2391 | is useful for hands-off automated testing. | ||
2392 | |||
2393 | rcutorture.stall_cpu= [KNL,BOOT] | ||
2394 | Duration of CPU stall (s) to test RCU CPU stall | ||
2395 | warnings, zero to disable. | ||
2396 | |||
2397 | rcutorture.stall_cpu_holdoff= [KNL,BOOT] | ||
2398 | Time to wait (s) after boot before inducing stall. | ||
2399 | |||
2400 | rcutorture.stat_interval= [KNL,BOOT] | ||
2401 | Time (s) between statistics printk()s. | ||
2402 | |||
2403 | rcutorture.stutter= [KNL,BOOT] | ||
2404 | Time (s) to stutter testing, for example, specifying | ||
2405 | five seconds causes the test to run for five seconds, | ||
2406 | wait for five seconds, and so on. This tests RCU's | ||
2407 | ability to transition abruptly to and from idle. | ||
2408 | |||
2409 | rcutorture.test_boost= [KNL,BOOT] | ||
2410 | Test RCU priority boosting? 0=no, 1=maybe, 2=yes. | ||
2411 | "Maybe" means test if the RCU implementation | ||
2412 | under test support RCU priority boosting. | ||
2413 | |||
2414 | rcutorture.test_boost_duration= [KNL,BOOT] | ||
2415 | Duration (s) of each individual boost test. | ||
2416 | |||
2417 | rcutorture.test_boost_interval= [KNL,BOOT] | ||
2418 | Interval (s) between each boost test. | ||
2419 | |||
2420 | rcutorture.test_no_idle_hz= [KNL,BOOT] | ||
2421 | Test RCU's dyntick-idle handling. See also the | ||
2422 | rcutorture.shuffle_interval parameter. | ||
2423 | |||
2424 | rcutorture.torture_type= [KNL,BOOT] | ||
2425 | Specify the RCU implementation to test. | ||
2426 | |||
2427 | rcutorture.verbose= [KNL,BOOT] | ||
2428 | Enable additional printk() statements. | ||
2429 | |||
2348 | rdinit= [KNL] | 2430 | rdinit= [KNL] |
2349 | Format: <full_path> | 2431 | Format: <full_path> |
2350 | Run specified binary instead of /init from the ramdisk, | 2432 | Run specified binary instead of /init from the ramdisk, |
diff --git a/MAINTAINERS b/MAINTAINERS index 73a8b561414b..5ccca1ca0077 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -5598,14 +5598,13 @@ F: net/rds/ | |||
5598 | READ-COPY UPDATE (RCU) | 5598 | READ-COPY UPDATE (RCU) |
5599 | M: Dipankar Sarma <dipankar@in.ibm.com> | 5599 | M: Dipankar Sarma <dipankar@in.ibm.com> |
5600 | M: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> | 5600 | M: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> |
5601 | W: http://www.rdrop.com/users/paulmck/rclock/ | 5601 | W: http://www.rdrop.com/users/paulmck/RCU/ |
5602 | S: Supported | 5602 | S: Supported |
5603 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git | 5603 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git |
5604 | F: Documentation/RCU/ | 5604 | F: Documentation/RCU/ |
5605 | X: Documentation/RCU/torture.txt | ||
5605 | F: include/linux/rcu* | 5606 | F: include/linux/rcu* |
5606 | F: include/linux/srcu* | ||
5607 | F: kernel/rcu* | 5607 | F: kernel/rcu* |
5608 | F: kernel/srcu* | ||
5609 | X: kernel/rcutorture.c | 5608 | X: kernel/rcutorture.c |
5610 | 5609 | ||
5611 | REAL TIME CLOCK (RTC) SUBSYSTEM | 5610 | REAL TIME CLOCK (RTC) SUBSYSTEM |
@@ -6122,6 +6121,15 @@ S: Maintained | |||
6122 | F: include/linux/sl?b*.h | 6121 | F: include/linux/sl?b*.h |
6123 | F: mm/sl?b.c | 6122 | F: mm/sl?b.c |
6124 | 6123 | ||
6124 | SLEEPABLE READ-COPY UPDATE (SRCU) | ||
6125 | M: Lai Jiangshan <laijs@cn.fujitsu.com> | ||
6126 | M: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> | ||
6127 | W: http://www.rdrop.com/users/paulmck/RCU/ | ||
6128 | S: Supported | ||
6129 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git | ||
6130 | F: include/linux/srcu* | ||
6131 | F: kernel/srcu* | ||
6132 | |||
6125 | SMC91x ETHERNET DRIVER | 6133 | SMC91x ETHERNET DRIVER |
6126 | M: Nicolas Pitre <nico@fluxnic.net> | 6134 | M: Nicolas Pitre <nico@fluxnic.net> |
6127 | S: Odd Fixes | 6135 | S: Odd Fixes |
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index 43b39d61b538..88e466b159dc 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c | |||
@@ -705,6 +705,7 @@ static void stack_proc(void *arg) | |||
705 | struct task_struct *from = current, *to = arg; | 705 | struct task_struct *from = current, *to = arg; |
706 | 706 | ||
707 | to->thread.saved_task = from; | 707 | to->thread.saved_task = from; |
708 | rcu_switch_from(from); | ||
708 | switch_to(from, to, from); | 709 | switch_to(from, to, from); |
709 | } | 710 | } |
710 | 711 | ||
diff --git a/include/linux/rculist.h b/include/linux/rculist.h index d079290843a9..e0f0fab20415 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h | |||
@@ -30,6 +30,7 @@ | |||
30 | * This is only for internal list manipulation where we know | 30 | * This is only for internal list manipulation where we know |
31 | * the prev/next entries already! | 31 | * the prev/next entries already! |
32 | */ | 32 | */ |
33 | #ifndef CONFIG_DEBUG_LIST | ||
33 | static inline void __list_add_rcu(struct list_head *new, | 34 | static inline void __list_add_rcu(struct list_head *new, |
34 | struct list_head *prev, struct list_head *next) | 35 | struct list_head *prev, struct list_head *next) |
35 | { | 36 | { |
@@ -38,6 +39,10 @@ static inline void __list_add_rcu(struct list_head *new, | |||
38 | rcu_assign_pointer(list_next_rcu(prev), new); | 39 | rcu_assign_pointer(list_next_rcu(prev), new); |
39 | next->prev = new; | 40 | next->prev = new; |
40 | } | 41 | } |
42 | #else | ||
43 | extern void __list_add_rcu(struct list_head *new, | ||
44 | struct list_head *prev, struct list_head *next); | ||
45 | #endif | ||
41 | 46 | ||
42 | /** | 47 | /** |
43 | * list_add_rcu - add a new entry to rcu-protected list | 48 | * list_add_rcu - add a new entry to rcu-protected list |
@@ -108,7 +113,7 @@ static inline void list_add_tail_rcu(struct list_head *new, | |||
108 | */ | 113 | */ |
109 | static inline void list_del_rcu(struct list_head *entry) | 114 | static inline void list_del_rcu(struct list_head *entry) |
110 | { | 115 | { |
111 | __list_del(entry->prev, entry->next); | 116 | __list_del_entry(entry); |
112 | entry->prev = LIST_POISON2; | 117 | entry->prev = LIST_POISON2; |
113 | } | 118 | } |
114 | 119 | ||
@@ -228,18 +233,43 @@ static inline void list_splice_init_rcu(struct list_head *list, | |||
228 | }) | 233 | }) |
229 | 234 | ||
230 | /** | 235 | /** |
231 | * list_first_entry_rcu - get the first element from a list | 236 | * Where are list_empty_rcu() and list_first_entry_rcu()? |
237 | * | ||
238 | * Implementing those functions following their counterparts list_empty() and | ||
239 | * list_first_entry() is not advisable because they lead to subtle race | ||
240 | * conditions as the following snippet shows: | ||
241 | * | ||
242 | * if (!list_empty_rcu(mylist)) { | ||
243 | * struct foo *bar = list_first_entry_rcu(mylist, struct foo, list_member); | ||
244 | * do_something(bar); | ||
245 | * } | ||
246 | * | ||
247 | * The list may not be empty when list_empty_rcu checks it, but it may be when | ||
248 | * list_first_entry_rcu rereads the ->next pointer. | ||
249 | * | ||
250 | * Rereading the ->next pointer is not a problem for list_empty() and | ||
251 | * list_first_entry() because they would be protected by a lock that blocks | ||
252 | * writers. | ||
253 | * | ||
254 | * See list_first_or_null_rcu for an alternative. | ||
255 | */ | ||
256 | |||
257 | /** | ||
258 | * list_first_or_null_rcu - get the first element from a list | ||
232 | * @ptr: the list head to take the element from. | 259 | * @ptr: the list head to take the element from. |
233 | * @type: the type of the struct this is embedded in. | 260 | * @type: the type of the struct this is embedded in. |
234 | * @member: the name of the list_struct within the struct. | 261 | * @member: the name of the list_struct within the struct. |
235 | * | 262 | * |
236 | * Note, that list is expected to be not empty. | 263 | * Note that if the list is empty, it returns NULL. |
237 | * | 264 | * |
238 | * This primitive may safely run concurrently with the _rcu list-mutation | 265 | * This primitive may safely run concurrently with the _rcu list-mutation |
239 | * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). | 266 | * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). |
240 | */ | 267 | */ |
241 | #define list_first_entry_rcu(ptr, type, member) \ | 268 | #define list_first_or_null_rcu(ptr, type, member) \ |
242 | list_entry_rcu((ptr)->next, type, member) | 269 | ({struct list_head *__ptr = (ptr); \ |
270 | struct list_head __rcu *__next = list_next_rcu(__ptr); \ | ||
271 | likely(__ptr != __next) ? container_of(__next, type, member) : NULL; \ | ||
272 | }) | ||
243 | 273 | ||
244 | /** | 274 | /** |
245 | * list_for_each_entry_rcu - iterate over rcu list of given type | 275 | * list_for_each_entry_rcu - iterate over rcu list of given type |
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 20fb776a1d4a..26d1a47591f1 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h | |||
@@ -184,12 +184,14 @@ static inline int rcu_preempt_depth(void) | |||
184 | /* Internal to kernel */ | 184 | /* Internal to kernel */ |
185 | extern void rcu_sched_qs(int cpu); | 185 | extern void rcu_sched_qs(int cpu); |
186 | extern void rcu_bh_qs(int cpu); | 186 | extern void rcu_bh_qs(int cpu); |
187 | extern void rcu_preempt_note_context_switch(void); | ||
187 | extern void rcu_check_callbacks(int cpu, int user); | 188 | extern void rcu_check_callbacks(int cpu, int user); |
188 | struct notifier_block; | 189 | struct notifier_block; |
189 | extern void rcu_idle_enter(void); | 190 | extern void rcu_idle_enter(void); |
190 | extern void rcu_idle_exit(void); | 191 | extern void rcu_idle_exit(void); |
191 | extern void rcu_irq_enter(void); | 192 | extern void rcu_irq_enter(void); |
192 | extern void rcu_irq_exit(void); | 193 | extern void rcu_irq_exit(void); |
194 | extern void exit_rcu(void); | ||
193 | 195 | ||
194 | /** | 196 | /** |
195 | * RCU_NONIDLE - Indicate idle-loop code that needs RCU readers | 197 | * RCU_NONIDLE - Indicate idle-loop code that needs RCU readers |
@@ -922,6 +924,21 @@ void __kfree_rcu(struct rcu_head *head, unsigned long offset) | |||
922 | kfree_call_rcu(head, (rcu_callback)offset); | 924 | kfree_call_rcu(head, (rcu_callback)offset); |
923 | } | 925 | } |
924 | 926 | ||
927 | /* | ||
928 | * Does the specified offset indicate that the corresponding rcu_head | ||
929 | * structure can be handled by kfree_rcu()? | ||
930 | */ | ||
931 | #define __is_kfree_rcu_offset(offset) ((offset) < 4096) | ||
932 | |||
933 | /* | ||
934 | * Helper macro for kfree_rcu() to prevent argument-expansion eyestrain. | ||
935 | */ | ||
936 | #define __kfree_rcu(head, offset) \ | ||
937 | do { \ | ||
938 | BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \ | ||
939 | call_rcu(head, (void (*)(struct rcu_head *))(unsigned long)(offset)); \ | ||
940 | } while (0) | ||
941 | |||
925 | /** | 942 | /** |
926 | * kfree_rcu() - kfree an object after a grace period. | 943 | * kfree_rcu() - kfree an object after a grace period. |
927 | * @ptr: pointer to kfree | 944 | * @ptr: pointer to kfree |
@@ -944,6 +961,9 @@ void __kfree_rcu(struct rcu_head *head, unsigned long offset) | |||
944 | * | 961 | * |
945 | * Note that the allowable offset might decrease in the future, for example, | 962 | * Note that the allowable offset might decrease in the future, for example, |
946 | * to allow something like kmem_cache_free_rcu(). | 963 | * to allow something like kmem_cache_free_rcu(). |
964 | * | ||
965 | * The BUILD_BUG_ON check must not involve any function calls, hence the | ||
966 | * checks are done in macros here. | ||
947 | */ | 967 | */ |
948 | #define kfree_rcu(ptr, rcu_head) \ | 968 | #define kfree_rcu(ptr, rcu_head) \ |
949 | __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head)) | 969 | __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head)) |
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index e93df77176d1..adb5e5a38cae 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h | |||
@@ -87,14 +87,6 @@ static inline void kfree_call_rcu(struct rcu_head *head, | |||
87 | 87 | ||
88 | #ifdef CONFIG_TINY_RCU | 88 | #ifdef CONFIG_TINY_RCU |
89 | 89 | ||
90 | static inline void rcu_preempt_note_context_switch(void) | ||
91 | { | ||
92 | } | ||
93 | |||
94 | static inline void exit_rcu(void) | ||
95 | { | ||
96 | } | ||
97 | |||
98 | static inline int rcu_needs_cpu(int cpu) | 90 | static inline int rcu_needs_cpu(int cpu) |
99 | { | 91 | { |
100 | return 0; | 92 | return 0; |
@@ -102,8 +94,6 @@ static inline int rcu_needs_cpu(int cpu) | |||
102 | 94 | ||
103 | #else /* #ifdef CONFIG_TINY_RCU */ | 95 | #else /* #ifdef CONFIG_TINY_RCU */ |
104 | 96 | ||
105 | void rcu_preempt_note_context_switch(void); | ||
106 | extern void exit_rcu(void); | ||
107 | int rcu_preempt_needs_cpu(void); | 97 | int rcu_preempt_needs_cpu(void); |
108 | 98 | ||
109 | static inline int rcu_needs_cpu(int cpu) | 99 | static inline int rcu_needs_cpu(int cpu) |
@@ -116,7 +106,6 @@ static inline int rcu_needs_cpu(int cpu) | |||
116 | static inline void rcu_note_context_switch(int cpu) | 106 | static inline void rcu_note_context_switch(int cpu) |
117 | { | 107 | { |
118 | rcu_sched_qs(cpu); | 108 | rcu_sched_qs(cpu); |
119 | rcu_preempt_note_context_switch(); | ||
120 | } | 109 | } |
121 | 110 | ||
122 | /* | 111 | /* |
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index e8ee5dd0854c..3c6083cde4fc 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h | |||
@@ -45,18 +45,6 @@ static inline void rcu_virt_note_context_switch(int cpu) | |||
45 | rcu_note_context_switch(cpu); | 45 | rcu_note_context_switch(cpu); |
46 | } | 46 | } |
47 | 47 | ||
48 | #ifdef CONFIG_TREE_PREEMPT_RCU | ||
49 | |||
50 | extern void exit_rcu(void); | ||
51 | |||
52 | #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ | ||
53 | |||
54 | static inline void exit_rcu(void) | ||
55 | { | ||
56 | } | ||
57 | |||
58 | #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ | ||
59 | |||
60 | extern void synchronize_rcu_bh(void); | 48 | extern void synchronize_rcu_bh(void); |
61 | extern void synchronize_sched_expedited(void); | 49 | extern void synchronize_sched_expedited(void); |
62 | extern void synchronize_rcu_expedited(void); | 50 | extern void synchronize_rcu_expedited(void); |
@@ -98,13 +86,6 @@ extern void rcu_force_quiescent_state(void); | |||
98 | extern void rcu_bh_force_quiescent_state(void); | 86 | extern void rcu_bh_force_quiescent_state(void); |
99 | extern void rcu_sched_force_quiescent_state(void); | 87 | extern void rcu_sched_force_quiescent_state(void); |
100 | 88 | ||
101 | /* A context switch is a grace period for RCU-sched and RCU-bh. */ | ||
102 | static inline int rcu_blocking_is_gp(void) | ||
103 | { | ||
104 | might_sleep(); /* Check for RCU read-side critical section. */ | ||
105 | return num_online_cpus() == 1; | ||
106 | } | ||
107 | |||
108 | extern void rcu_scheduler_starting(void); | 89 | extern void rcu_scheduler_starting(void); |
109 | extern int rcu_scheduler_active __read_mostly; | 90 | extern int rcu_scheduler_active __read_mostly; |
110 | 91 | ||
diff --git a/include/linux/sched.h b/include/linux/sched.h index 81a173c0897d..8f3fd945070f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -1905,12 +1905,22 @@ static inline void rcu_copy_process(struct task_struct *p) | |||
1905 | INIT_LIST_HEAD(&p->rcu_node_entry); | 1905 | INIT_LIST_HEAD(&p->rcu_node_entry); |
1906 | } | 1906 | } |
1907 | 1907 | ||
1908 | static inline void rcu_switch_from(struct task_struct *prev) | ||
1909 | { | ||
1910 | if (prev->rcu_read_lock_nesting != 0) | ||
1911 | rcu_preempt_note_context_switch(); | ||
1912 | } | ||
1913 | |||
1908 | #else | 1914 | #else |
1909 | 1915 | ||
1910 | static inline void rcu_copy_process(struct task_struct *p) | 1916 | static inline void rcu_copy_process(struct task_struct *p) |
1911 | { | 1917 | { |
1912 | } | 1918 | } |
1913 | 1919 | ||
1920 | static inline void rcu_switch_from(struct task_struct *prev) | ||
1921 | { | ||
1922 | } | ||
1923 | |||
1914 | #endif | 1924 | #endif |
1915 | 1925 | ||
1916 | #ifdef CONFIG_SMP | 1926 | #ifdef CONFIG_SMP |
diff --git a/include/linux/srcu.h b/include/linux/srcu.h index d3d5fa54f25e..55a5c52cbb25 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h | |||
@@ -29,26 +29,35 @@ | |||
29 | 29 | ||
30 | #include <linux/mutex.h> | 30 | #include <linux/mutex.h> |
31 | #include <linux/rcupdate.h> | 31 | #include <linux/rcupdate.h> |
32 | #include <linux/workqueue.h> | ||
32 | 33 | ||
33 | struct srcu_struct_array { | 34 | struct srcu_struct_array { |
34 | int c[2]; | 35 | unsigned long c[2]; |
36 | unsigned long seq[2]; | ||
37 | }; | ||
38 | |||
39 | struct rcu_batch { | ||
40 | struct rcu_head *head, **tail; | ||
35 | }; | 41 | }; |
36 | 42 | ||
37 | struct srcu_struct { | 43 | struct srcu_struct { |
38 | int completed; | 44 | unsigned completed; |
39 | struct srcu_struct_array __percpu *per_cpu_ref; | 45 | struct srcu_struct_array __percpu *per_cpu_ref; |
40 | struct mutex mutex; | 46 | spinlock_t queue_lock; /* protect ->batch_queue, ->running */ |
47 | bool running; | ||
48 | /* callbacks just queued */ | ||
49 | struct rcu_batch batch_queue; | ||
50 | /* callbacks try to do the first check_zero */ | ||
51 | struct rcu_batch batch_check0; | ||
52 | /* callbacks done with the first check_zero and the flip */ | ||
53 | struct rcu_batch batch_check1; | ||
54 | struct rcu_batch batch_done; | ||
55 | struct delayed_work work; | ||
41 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 56 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
42 | struct lockdep_map dep_map; | 57 | struct lockdep_map dep_map; |
43 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | 58 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ |
44 | }; | 59 | }; |
45 | 60 | ||
46 | #ifndef CONFIG_PREEMPT | ||
47 | #define srcu_barrier() barrier() | ||
48 | #else /* #ifndef CONFIG_PREEMPT */ | ||
49 | #define srcu_barrier() | ||
50 | #endif /* #else #ifndef CONFIG_PREEMPT */ | ||
51 | |||
52 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 61 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
53 | 62 | ||
54 | int __init_srcu_struct(struct srcu_struct *sp, const char *name, | 63 | int __init_srcu_struct(struct srcu_struct *sp, const char *name, |
@@ -67,12 +76,33 @@ int init_srcu_struct(struct srcu_struct *sp); | |||
67 | 76 | ||
68 | #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | 77 | #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ |
69 | 78 | ||
79 | /** | ||
80 | * call_srcu() - Queue a callback for invocation after an SRCU grace period | ||
81 | * @sp: srcu_struct in queue the callback | ||
82 | * @head: structure to be used for queueing the SRCU callback. | ||
83 | * @func: function to be invoked after the SRCU grace period | ||
84 | * | ||
85 | * The callback function will be invoked some time after a full SRCU | ||
86 | * grace period elapses, in other words after all pre-existing SRCU | ||
87 | * read-side critical sections have completed. However, the callback | ||
88 | * function might well execute concurrently with other SRCU read-side | ||
89 | * critical sections that started after call_srcu() was invoked. SRCU | ||
90 | * read-side critical sections are delimited by srcu_read_lock() and | ||
91 | * srcu_read_unlock(), and may be nested. | ||
92 | * | ||
93 | * The callback will be invoked from process context, but must nevertheless | ||
94 | * be fast and must not block. | ||
95 | */ | ||
96 | void call_srcu(struct srcu_struct *sp, struct rcu_head *head, | ||
97 | void (*func)(struct rcu_head *head)); | ||
98 | |||
70 | void cleanup_srcu_struct(struct srcu_struct *sp); | 99 | void cleanup_srcu_struct(struct srcu_struct *sp); |
71 | int __srcu_read_lock(struct srcu_struct *sp) __acquires(sp); | 100 | int __srcu_read_lock(struct srcu_struct *sp) __acquires(sp); |
72 | void __srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp); | 101 | void __srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp); |
73 | void synchronize_srcu(struct srcu_struct *sp); | 102 | void synchronize_srcu(struct srcu_struct *sp); |
74 | void synchronize_srcu_expedited(struct srcu_struct *sp); | 103 | void synchronize_srcu_expedited(struct srcu_struct *sp); |
75 | long srcu_batches_completed(struct srcu_struct *sp); | 104 | long srcu_batches_completed(struct srcu_struct *sp); |
105 | void srcu_barrier(struct srcu_struct *sp); | ||
76 | 106 | ||
77 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 107 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
78 | 108 | ||
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 337099783f37..1480900c511c 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h | |||
@@ -292,6 +292,8 @@ TRACE_EVENT(rcu_dyntick, | |||
292 | * "More callbacks": Still more callbacks, try again to clear them out. | 292 | * "More callbacks": Still more callbacks, try again to clear them out. |
293 | * "Callbacks drained": All callbacks processed, off to dyntick idle! | 293 | * "Callbacks drained": All callbacks processed, off to dyntick idle! |
294 | * "Timer": Timer fired to cause CPU to continue processing callbacks. | 294 | * "Timer": Timer fired to cause CPU to continue processing callbacks. |
295 | * "Demigrate": Timer fired on wrong CPU, woke up correct CPU. | ||
296 | * "Cleanup after idle": Idle exited, timer canceled. | ||
295 | */ | 297 | */ |
296 | TRACE_EVENT(rcu_prep_idle, | 298 | TRACE_EVENT(rcu_prep_idle, |
297 | 299 | ||
diff --git a/init/Kconfig b/init/Kconfig index 6cfd71d06463..6d18ef8071b5 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -458,6 +458,33 @@ config RCU_FANOUT | |||
458 | Select a specific number if testing RCU itself. | 458 | Select a specific number if testing RCU itself. |
459 | Take the default if unsure. | 459 | Take the default if unsure. |
460 | 460 | ||
461 | config RCU_FANOUT_LEAF | ||
462 | int "Tree-based hierarchical RCU leaf-level fanout value" | ||
463 | range 2 RCU_FANOUT if 64BIT | ||
464 | range 2 RCU_FANOUT if !64BIT | ||
465 | depends on TREE_RCU || TREE_PREEMPT_RCU | ||
466 | default 16 | ||
467 | help | ||
468 | This option controls the leaf-level fanout of hierarchical | ||
469 | implementations of RCU, and allows trading off cache misses | ||
470 | against lock contention. Systems that synchronize their | ||
471 | scheduling-clock interrupts for energy-efficiency reasons will | ||
472 | want the default because the smaller leaf-level fanout keeps | ||
473 | lock contention levels acceptably low. Very large systems | ||
474 | (hundreds or thousands of CPUs) will instead want to set this | ||
475 | value to the maximum value possible in order to reduce the | ||
476 | number of cache misses incurred during RCU's grace-period | ||
477 | initialization. These systems tend to run CPU-bound, and thus | ||
478 | are not helped by synchronized interrupts, and thus tend to | ||
479 | skew them, which reduces lock contention enough that large | ||
480 | leaf-level fanouts work well. | ||
481 | |||
482 | Select a specific number if testing RCU itself. | ||
483 | |||
484 | Select the maximum permissible value for large systems. | ||
485 | |||
486 | Take the default if unsure. | ||
487 | |||
461 | config RCU_FANOUT_EXACT | 488 | config RCU_FANOUT_EXACT |
462 | bool "Disable tree-based hierarchical RCU auto-balancing" | 489 | bool "Disable tree-based hierarchical RCU auto-balancing" |
463 | depends on TREE_RCU || TREE_PREEMPT_RCU | 490 | depends on TREE_RCU || TREE_PREEMPT_RCU |
@@ -515,10 +542,25 @@ config RCU_BOOST_PRIO | |||
515 | depends on RCU_BOOST | 542 | depends on RCU_BOOST |
516 | default 1 | 543 | default 1 |
517 | help | 544 | help |
518 | This option specifies the real-time priority to which preempted | 545 | This option specifies the real-time priority to which long-term |
519 | RCU readers are to be boosted. If you are working with CPU-bound | 546 | preempted RCU readers are to be boosted. If you are working |
520 | real-time applications, you should specify a priority higher then | 547 | with a real-time application that has one or more CPU-bound |
521 | the highest-priority CPU-bound application. | 548 | threads running at a real-time priority level, you should set |
549 | RCU_BOOST_PRIO to a priority higher then the highest-priority | ||
550 | real-time CPU-bound thread. The default RCU_BOOST_PRIO value | ||
551 | of 1 is appropriate in the common case, which is real-time | ||
552 | applications that do not have any CPU-bound threads. | ||
553 | |||
554 | Some real-time applications might not have a single real-time | ||
555 | thread that saturates a given CPU, but instead might have | ||
556 | multiple real-time threads that, taken together, fully utilize | ||
557 | that CPU. In this case, you should set RCU_BOOST_PRIO to | ||
558 | a priority higher than the lowest-priority thread that is | ||
559 | conspiring to prevent the CPU from running any non-real-time | ||
560 | tasks. For example, if one thread at priority 10 and another | ||
561 | thread at priority 5 are between themselves fully consuming | ||
562 | the CPU time on a given CPU, then RCU_BOOST_PRIO should be | ||
563 | set to priority 6 or higher. | ||
522 | 564 | ||
523 | Specify the real-time priority, or take the default if unsure. | 565 | Specify the real-time priority, or take the default if unsure. |
524 | 566 | ||
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index a86f1741cc27..95cba41ce1e9 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c | |||
@@ -51,6 +51,34 @@ | |||
51 | 51 | ||
52 | #include "rcu.h" | 52 | #include "rcu.h" |
53 | 53 | ||
54 | #ifdef CONFIG_PREEMPT_RCU | ||
55 | |||
56 | /* | ||
57 | * Check for a task exiting while in a preemptible-RCU read-side | ||
58 | * critical section, clean up if so. No need to issue warnings, | ||
59 | * as debug_check_no_locks_held() already does this if lockdep | ||
60 | * is enabled. | ||
61 | */ | ||
62 | void exit_rcu(void) | ||
63 | { | ||
64 | struct task_struct *t = current; | ||
65 | |||
66 | if (likely(list_empty(¤t->rcu_node_entry))) | ||
67 | return; | ||
68 | t->rcu_read_lock_nesting = 1; | ||
69 | barrier(); | ||
70 | t->rcu_read_unlock_special = RCU_READ_UNLOCK_BLOCKED; | ||
71 | __rcu_read_unlock(); | ||
72 | } | ||
73 | |||
74 | #else /* #ifdef CONFIG_PREEMPT_RCU */ | ||
75 | |||
76 | void exit_rcu(void) | ||
77 | { | ||
78 | } | ||
79 | |||
80 | #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ | ||
81 | |||
54 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 82 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
55 | static struct lock_class_key rcu_lock_key; | 83 | static struct lock_class_key rcu_lock_key; |
56 | struct lockdep_map rcu_lock_map = | 84 | struct lockdep_map rcu_lock_map = |
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index 22ecea0dfb62..fc31a2d65100 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h | |||
@@ -851,22 +851,6 @@ int rcu_preempt_needs_cpu(void) | |||
851 | return rcu_preempt_ctrlblk.rcb.rcucblist != NULL; | 851 | return rcu_preempt_ctrlblk.rcb.rcucblist != NULL; |
852 | } | 852 | } |
853 | 853 | ||
854 | /* | ||
855 | * Check for a task exiting while in a preemptible -RCU read-side | ||
856 | * critical section, clean up if so. No need to issue warnings, | ||
857 | * as debug_check_no_locks_held() already does this if lockdep | ||
858 | * is enabled. | ||
859 | */ | ||
860 | void exit_rcu(void) | ||
861 | { | ||
862 | struct task_struct *t = current; | ||
863 | |||
864 | if (t->rcu_read_lock_nesting == 0) | ||
865 | return; | ||
866 | t->rcu_read_lock_nesting = 1; | ||
867 | __rcu_read_unlock(); | ||
868 | } | ||
869 | |||
870 | #else /* #ifdef CONFIG_TINY_PREEMPT_RCU */ | 854 | #else /* #ifdef CONFIG_TINY_PREEMPT_RCU */ |
871 | 855 | ||
872 | #ifdef CONFIG_RCU_TRACE | 856 | #ifdef CONFIG_RCU_TRACE |
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index a89b381a8c6e..e66b34ab7555 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c | |||
@@ -64,6 +64,7 @@ static int irqreader = 1; /* RCU readers from irq (timers). */ | |||
64 | static int fqs_duration; /* Duration of bursts (us), 0 to disable. */ | 64 | static int fqs_duration; /* Duration of bursts (us), 0 to disable. */ |
65 | static int fqs_holdoff; /* Hold time within burst (us). */ | 65 | static int fqs_holdoff; /* Hold time within burst (us). */ |
66 | static int fqs_stutter = 3; /* Wait time between bursts (s). */ | 66 | static int fqs_stutter = 3; /* Wait time between bursts (s). */ |
67 | static int n_barrier_cbs; /* Number of callbacks to test RCU barriers. */ | ||
67 | static int onoff_interval; /* Wait time between CPU hotplugs, 0=disable. */ | 68 | static int onoff_interval; /* Wait time between CPU hotplugs, 0=disable. */ |
68 | static int onoff_holdoff; /* Seconds after boot before CPU hotplugs. */ | 69 | static int onoff_holdoff; /* Seconds after boot before CPU hotplugs. */ |
69 | static int shutdown_secs; /* Shutdown time (s). <=0 for no shutdown. */ | 70 | static int shutdown_secs; /* Shutdown time (s). <=0 for no shutdown. */ |
@@ -96,6 +97,8 @@ module_param(fqs_holdoff, int, 0444); | |||
96 | MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)"); | 97 | MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)"); |
97 | module_param(fqs_stutter, int, 0444); | 98 | module_param(fqs_stutter, int, 0444); |
98 | MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)"); | 99 | MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)"); |
100 | module_param(n_barrier_cbs, int, 0444); | ||
101 | MODULE_PARM_DESC(n_barrier_cbs, "# of callbacks/kthreads for barrier testing"); | ||
99 | module_param(onoff_interval, int, 0444); | 102 | module_param(onoff_interval, int, 0444); |
100 | MODULE_PARM_DESC(onoff_interval, "Time between CPU hotplugs (s), 0=disable"); | 103 | MODULE_PARM_DESC(onoff_interval, "Time between CPU hotplugs (s), 0=disable"); |
101 | module_param(onoff_holdoff, int, 0444); | 104 | module_param(onoff_holdoff, int, 0444); |
@@ -139,6 +142,8 @@ static struct task_struct *shutdown_task; | |||
139 | static struct task_struct *onoff_task; | 142 | static struct task_struct *onoff_task; |
140 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | 143 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ |
141 | static struct task_struct *stall_task; | 144 | static struct task_struct *stall_task; |
145 | static struct task_struct **barrier_cbs_tasks; | ||
146 | static struct task_struct *barrier_task; | ||
142 | 147 | ||
143 | #define RCU_TORTURE_PIPE_LEN 10 | 148 | #define RCU_TORTURE_PIPE_LEN 10 |
144 | 149 | ||
@@ -164,6 +169,7 @@ static atomic_t n_rcu_torture_alloc_fail; | |||
164 | static atomic_t n_rcu_torture_free; | 169 | static atomic_t n_rcu_torture_free; |
165 | static atomic_t n_rcu_torture_mberror; | 170 | static atomic_t n_rcu_torture_mberror; |
166 | static atomic_t n_rcu_torture_error; | 171 | static atomic_t n_rcu_torture_error; |
172 | static long n_rcu_torture_barrier_error; | ||
167 | static long n_rcu_torture_boost_ktrerror; | 173 | static long n_rcu_torture_boost_ktrerror; |
168 | static long n_rcu_torture_boost_rterror; | 174 | static long n_rcu_torture_boost_rterror; |
169 | static long n_rcu_torture_boost_failure; | 175 | static long n_rcu_torture_boost_failure; |
@@ -173,6 +179,8 @@ static long n_offline_attempts; | |||
173 | static long n_offline_successes; | 179 | static long n_offline_successes; |
174 | static long n_online_attempts; | 180 | static long n_online_attempts; |
175 | static long n_online_successes; | 181 | static long n_online_successes; |
182 | static long n_barrier_attempts; | ||
183 | static long n_barrier_successes; | ||
176 | static struct list_head rcu_torture_removed; | 184 | static struct list_head rcu_torture_removed; |
177 | static cpumask_var_t shuffle_tmp_mask; | 185 | static cpumask_var_t shuffle_tmp_mask; |
178 | 186 | ||
@@ -197,6 +205,10 @@ static unsigned long shutdown_time; /* jiffies to system shutdown. */ | |||
197 | static unsigned long boost_starttime; /* jiffies of next boost test start. */ | 205 | static unsigned long boost_starttime; /* jiffies of next boost test start. */ |
198 | DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ | 206 | DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ |
199 | /* and boost task create/destroy. */ | 207 | /* and boost task create/destroy. */ |
208 | static atomic_t barrier_cbs_count; /* Barrier callbacks registered. */ | ||
209 | static atomic_t barrier_cbs_invoked; /* Barrier callbacks invoked. */ | ||
210 | static wait_queue_head_t *barrier_cbs_wq; /* Coordinate barrier testing. */ | ||
211 | static DECLARE_WAIT_QUEUE_HEAD(barrier_wq); | ||
200 | 212 | ||
201 | /* Mediate rmmod and system shutdown. Concurrent rmmod & shutdown illegal! */ | 213 | /* Mediate rmmod and system shutdown. Concurrent rmmod & shutdown illegal! */ |
202 | 214 | ||
@@ -327,6 +339,7 @@ struct rcu_torture_ops { | |||
327 | int (*completed)(void); | 339 | int (*completed)(void); |
328 | void (*deferred_free)(struct rcu_torture *p); | 340 | void (*deferred_free)(struct rcu_torture *p); |
329 | void (*sync)(void); | 341 | void (*sync)(void); |
342 | void (*call)(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); | ||
330 | void (*cb_barrier)(void); | 343 | void (*cb_barrier)(void); |
331 | void (*fqs)(void); | 344 | void (*fqs)(void); |
332 | int (*stats)(char *page); | 345 | int (*stats)(char *page); |
@@ -417,6 +430,7 @@ static struct rcu_torture_ops rcu_ops = { | |||
417 | .completed = rcu_torture_completed, | 430 | .completed = rcu_torture_completed, |
418 | .deferred_free = rcu_torture_deferred_free, | 431 | .deferred_free = rcu_torture_deferred_free, |
419 | .sync = synchronize_rcu, | 432 | .sync = synchronize_rcu, |
433 | .call = call_rcu, | ||
420 | .cb_barrier = rcu_barrier, | 434 | .cb_barrier = rcu_barrier, |
421 | .fqs = rcu_force_quiescent_state, | 435 | .fqs = rcu_force_quiescent_state, |
422 | .stats = NULL, | 436 | .stats = NULL, |
@@ -460,6 +474,7 @@ static struct rcu_torture_ops rcu_sync_ops = { | |||
460 | .completed = rcu_torture_completed, | 474 | .completed = rcu_torture_completed, |
461 | .deferred_free = rcu_sync_torture_deferred_free, | 475 | .deferred_free = rcu_sync_torture_deferred_free, |
462 | .sync = synchronize_rcu, | 476 | .sync = synchronize_rcu, |
477 | .call = NULL, | ||
463 | .cb_barrier = NULL, | 478 | .cb_barrier = NULL, |
464 | .fqs = rcu_force_quiescent_state, | 479 | .fqs = rcu_force_quiescent_state, |
465 | .stats = NULL, | 480 | .stats = NULL, |
@@ -477,6 +492,7 @@ static struct rcu_torture_ops rcu_expedited_ops = { | |||
477 | .completed = rcu_no_completed, | 492 | .completed = rcu_no_completed, |
478 | .deferred_free = rcu_sync_torture_deferred_free, | 493 | .deferred_free = rcu_sync_torture_deferred_free, |
479 | .sync = synchronize_rcu_expedited, | 494 | .sync = synchronize_rcu_expedited, |
495 | .call = NULL, | ||
480 | .cb_barrier = NULL, | 496 | .cb_barrier = NULL, |
481 | .fqs = rcu_force_quiescent_state, | 497 | .fqs = rcu_force_quiescent_state, |
482 | .stats = NULL, | 498 | .stats = NULL, |
@@ -519,6 +535,7 @@ static struct rcu_torture_ops rcu_bh_ops = { | |||
519 | .completed = rcu_bh_torture_completed, | 535 | .completed = rcu_bh_torture_completed, |
520 | .deferred_free = rcu_bh_torture_deferred_free, | 536 | .deferred_free = rcu_bh_torture_deferred_free, |
521 | .sync = synchronize_rcu_bh, | 537 | .sync = synchronize_rcu_bh, |
538 | .call = call_rcu_bh, | ||
522 | .cb_barrier = rcu_barrier_bh, | 539 | .cb_barrier = rcu_barrier_bh, |
523 | .fqs = rcu_bh_force_quiescent_state, | 540 | .fqs = rcu_bh_force_quiescent_state, |
524 | .stats = NULL, | 541 | .stats = NULL, |
@@ -535,6 +552,7 @@ static struct rcu_torture_ops rcu_bh_sync_ops = { | |||
535 | .completed = rcu_bh_torture_completed, | 552 | .completed = rcu_bh_torture_completed, |
536 | .deferred_free = rcu_sync_torture_deferred_free, | 553 | .deferred_free = rcu_sync_torture_deferred_free, |
537 | .sync = synchronize_rcu_bh, | 554 | .sync = synchronize_rcu_bh, |
555 | .call = NULL, | ||
538 | .cb_barrier = NULL, | 556 | .cb_barrier = NULL, |
539 | .fqs = rcu_bh_force_quiescent_state, | 557 | .fqs = rcu_bh_force_quiescent_state, |
540 | .stats = NULL, | 558 | .stats = NULL, |
@@ -551,6 +569,7 @@ static struct rcu_torture_ops rcu_bh_expedited_ops = { | |||
551 | .completed = rcu_bh_torture_completed, | 569 | .completed = rcu_bh_torture_completed, |
552 | .deferred_free = rcu_sync_torture_deferred_free, | 570 | .deferred_free = rcu_sync_torture_deferred_free, |
553 | .sync = synchronize_rcu_bh_expedited, | 571 | .sync = synchronize_rcu_bh_expedited, |
572 | .call = NULL, | ||
554 | .cb_barrier = NULL, | 573 | .cb_barrier = NULL, |
555 | .fqs = rcu_bh_force_quiescent_state, | 574 | .fqs = rcu_bh_force_quiescent_state, |
556 | .stats = NULL, | 575 | .stats = NULL, |
@@ -606,6 +625,11 @@ static int srcu_torture_completed(void) | |||
606 | return srcu_batches_completed(&srcu_ctl); | 625 | return srcu_batches_completed(&srcu_ctl); |
607 | } | 626 | } |
608 | 627 | ||
628 | static void srcu_torture_deferred_free(struct rcu_torture *rp) | ||
629 | { | ||
630 | call_srcu(&srcu_ctl, &rp->rtort_rcu, rcu_torture_cb); | ||
631 | } | ||
632 | |||
609 | static void srcu_torture_synchronize(void) | 633 | static void srcu_torture_synchronize(void) |
610 | { | 634 | { |
611 | synchronize_srcu(&srcu_ctl); | 635 | synchronize_srcu(&srcu_ctl); |
@@ -620,7 +644,7 @@ static int srcu_torture_stats(char *page) | |||
620 | cnt += sprintf(&page[cnt], "%s%s per-CPU(idx=%d):", | 644 | cnt += sprintf(&page[cnt], "%s%s per-CPU(idx=%d):", |
621 | torture_type, TORTURE_FLAG, idx); | 645 | torture_type, TORTURE_FLAG, idx); |
622 | for_each_possible_cpu(cpu) { | 646 | for_each_possible_cpu(cpu) { |
623 | cnt += sprintf(&page[cnt], " %d(%d,%d)", cpu, | 647 | cnt += sprintf(&page[cnt], " %d(%lu,%lu)", cpu, |
624 | per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx], | 648 | per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx], |
625 | per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx]); | 649 | per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx]); |
626 | } | 650 | } |
@@ -635,13 +659,29 @@ static struct rcu_torture_ops srcu_ops = { | |||
635 | .read_delay = srcu_read_delay, | 659 | .read_delay = srcu_read_delay, |
636 | .readunlock = srcu_torture_read_unlock, | 660 | .readunlock = srcu_torture_read_unlock, |
637 | .completed = srcu_torture_completed, | 661 | .completed = srcu_torture_completed, |
638 | .deferred_free = rcu_sync_torture_deferred_free, | 662 | .deferred_free = srcu_torture_deferred_free, |
639 | .sync = srcu_torture_synchronize, | 663 | .sync = srcu_torture_synchronize, |
664 | .call = NULL, | ||
640 | .cb_barrier = NULL, | 665 | .cb_barrier = NULL, |
641 | .stats = srcu_torture_stats, | 666 | .stats = srcu_torture_stats, |
642 | .name = "srcu" | 667 | .name = "srcu" |
643 | }; | 668 | }; |
644 | 669 | ||
670 | static struct rcu_torture_ops srcu_sync_ops = { | ||
671 | .init = srcu_torture_init, | ||
672 | .cleanup = srcu_torture_cleanup, | ||
673 | .readlock = srcu_torture_read_lock, | ||
674 | .read_delay = srcu_read_delay, | ||
675 | .readunlock = srcu_torture_read_unlock, | ||
676 | .completed = srcu_torture_completed, | ||
677 | .deferred_free = rcu_sync_torture_deferred_free, | ||
678 | .sync = srcu_torture_synchronize, | ||
679 | .call = NULL, | ||
680 | .cb_barrier = NULL, | ||
681 | .stats = srcu_torture_stats, | ||
682 | .name = "srcu_sync" | ||
683 | }; | ||
684 | |||
645 | static int srcu_torture_read_lock_raw(void) __acquires(&srcu_ctl) | 685 | static int srcu_torture_read_lock_raw(void) __acquires(&srcu_ctl) |
646 | { | 686 | { |
647 | return srcu_read_lock_raw(&srcu_ctl); | 687 | return srcu_read_lock_raw(&srcu_ctl); |
@@ -659,13 +699,29 @@ static struct rcu_torture_ops srcu_raw_ops = { | |||
659 | .read_delay = srcu_read_delay, | 699 | .read_delay = srcu_read_delay, |
660 | .readunlock = srcu_torture_read_unlock_raw, | 700 | .readunlock = srcu_torture_read_unlock_raw, |
661 | .completed = srcu_torture_completed, | 701 | .completed = srcu_torture_completed, |
662 | .deferred_free = rcu_sync_torture_deferred_free, | 702 | .deferred_free = srcu_torture_deferred_free, |
663 | .sync = srcu_torture_synchronize, | 703 | .sync = srcu_torture_synchronize, |
704 | .call = NULL, | ||
664 | .cb_barrier = NULL, | 705 | .cb_barrier = NULL, |
665 | .stats = srcu_torture_stats, | 706 | .stats = srcu_torture_stats, |
666 | .name = "srcu_raw" | 707 | .name = "srcu_raw" |
667 | }; | 708 | }; |
668 | 709 | ||
710 | static struct rcu_torture_ops srcu_raw_sync_ops = { | ||
711 | .init = srcu_torture_init, | ||
712 | .cleanup = srcu_torture_cleanup, | ||
713 | .readlock = srcu_torture_read_lock_raw, | ||
714 | .read_delay = srcu_read_delay, | ||
715 | .readunlock = srcu_torture_read_unlock_raw, | ||
716 | .completed = srcu_torture_completed, | ||
717 | .deferred_free = rcu_sync_torture_deferred_free, | ||
718 | .sync = srcu_torture_synchronize, | ||
719 | .call = NULL, | ||
720 | .cb_barrier = NULL, | ||
721 | .stats = srcu_torture_stats, | ||
722 | .name = "srcu_raw_sync" | ||
723 | }; | ||
724 | |||
669 | static void srcu_torture_synchronize_expedited(void) | 725 | static void srcu_torture_synchronize_expedited(void) |
670 | { | 726 | { |
671 | synchronize_srcu_expedited(&srcu_ctl); | 727 | synchronize_srcu_expedited(&srcu_ctl); |
@@ -680,6 +736,7 @@ static struct rcu_torture_ops srcu_expedited_ops = { | |||
680 | .completed = srcu_torture_completed, | 736 | .completed = srcu_torture_completed, |
681 | .deferred_free = rcu_sync_torture_deferred_free, | 737 | .deferred_free = rcu_sync_torture_deferred_free, |
682 | .sync = srcu_torture_synchronize_expedited, | 738 | .sync = srcu_torture_synchronize_expedited, |
739 | .call = NULL, | ||
683 | .cb_barrier = NULL, | 740 | .cb_barrier = NULL, |
684 | .stats = srcu_torture_stats, | 741 | .stats = srcu_torture_stats, |
685 | .name = "srcu_expedited" | 742 | .name = "srcu_expedited" |
@@ -1129,7 +1186,8 @@ rcu_torture_printk(char *page) | |||
1129 | "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d " | 1186 | "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d " |
1130 | "rtmbe: %d rtbke: %ld rtbre: %ld " | 1187 | "rtmbe: %d rtbke: %ld rtbre: %ld " |
1131 | "rtbf: %ld rtb: %ld nt: %ld " | 1188 | "rtbf: %ld rtb: %ld nt: %ld " |
1132 | "onoff: %ld/%ld:%ld/%ld", | 1189 | "onoff: %ld/%ld:%ld/%ld " |
1190 | "barrier: %ld/%ld:%ld", | ||
1133 | rcu_torture_current, | 1191 | rcu_torture_current, |
1134 | rcu_torture_current_version, | 1192 | rcu_torture_current_version, |
1135 | list_empty(&rcu_torture_freelist), | 1193 | list_empty(&rcu_torture_freelist), |
@@ -1145,14 +1203,17 @@ rcu_torture_printk(char *page) | |||
1145 | n_online_successes, | 1203 | n_online_successes, |
1146 | n_online_attempts, | 1204 | n_online_attempts, |
1147 | n_offline_successes, | 1205 | n_offline_successes, |
1148 | n_offline_attempts); | 1206 | n_offline_attempts, |
1207 | n_barrier_successes, | ||
1208 | n_barrier_attempts, | ||
1209 | n_rcu_torture_barrier_error); | ||
1210 | cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG); | ||
1149 | if (atomic_read(&n_rcu_torture_mberror) != 0 || | 1211 | if (atomic_read(&n_rcu_torture_mberror) != 0 || |
1212 | n_rcu_torture_barrier_error != 0 || | ||
1150 | n_rcu_torture_boost_ktrerror != 0 || | 1213 | n_rcu_torture_boost_ktrerror != 0 || |
1151 | n_rcu_torture_boost_rterror != 0 || | 1214 | n_rcu_torture_boost_rterror != 0 || |
1152 | n_rcu_torture_boost_failure != 0) | 1215 | n_rcu_torture_boost_failure != 0 || |
1153 | cnt += sprintf(&page[cnt], " !!!"); | 1216 | i > 1) { |
1154 | cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG); | ||
1155 | if (i > 1) { | ||
1156 | cnt += sprintf(&page[cnt], "!!! "); | 1217 | cnt += sprintf(&page[cnt], "!!! "); |
1157 | atomic_inc(&n_rcu_torture_error); | 1218 | atomic_inc(&n_rcu_torture_error); |
1158 | WARN_ON_ONCE(1); | 1219 | WARN_ON_ONCE(1); |
@@ -1337,6 +1398,7 @@ static void rcutorture_booster_cleanup(int cpu) | |||
1337 | 1398 | ||
1338 | /* This must be outside of the mutex, otherwise deadlock! */ | 1399 | /* This must be outside of the mutex, otherwise deadlock! */ |
1339 | kthread_stop(t); | 1400 | kthread_stop(t); |
1401 | boost_tasks[cpu] = NULL; | ||
1340 | } | 1402 | } |
1341 | 1403 | ||
1342 | static int rcutorture_booster_init(int cpu) | 1404 | static int rcutorture_booster_init(int cpu) |
@@ -1484,13 +1546,15 @@ static void rcu_torture_onoff_cleanup(void) | |||
1484 | return; | 1546 | return; |
1485 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_onoff task"); | 1547 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_onoff task"); |
1486 | kthread_stop(onoff_task); | 1548 | kthread_stop(onoff_task); |
1549 | onoff_task = NULL; | ||
1487 | } | 1550 | } |
1488 | 1551 | ||
1489 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ | 1552 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ |
1490 | 1553 | ||
1491 | static void | 1554 | static int |
1492 | rcu_torture_onoff_init(void) | 1555 | rcu_torture_onoff_init(void) |
1493 | { | 1556 | { |
1557 | return 0; | ||
1494 | } | 1558 | } |
1495 | 1559 | ||
1496 | static void rcu_torture_onoff_cleanup(void) | 1560 | static void rcu_torture_onoff_cleanup(void) |
@@ -1554,6 +1618,152 @@ static void rcu_torture_stall_cleanup(void) | |||
1554 | return; | 1618 | return; |
1555 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_stall_task."); | 1619 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_stall_task."); |
1556 | kthread_stop(stall_task); | 1620 | kthread_stop(stall_task); |
1621 | stall_task = NULL; | ||
1622 | } | ||
1623 | |||
1624 | /* Callback function for RCU barrier testing. */ | ||
1625 | void rcu_torture_barrier_cbf(struct rcu_head *rcu) | ||
1626 | { | ||
1627 | atomic_inc(&barrier_cbs_invoked); | ||
1628 | } | ||
1629 | |||
1630 | /* kthread function to register callbacks used to test RCU barriers. */ | ||
1631 | static int rcu_torture_barrier_cbs(void *arg) | ||
1632 | { | ||
1633 | long myid = (long)arg; | ||
1634 | struct rcu_head rcu; | ||
1635 | |||
1636 | init_rcu_head_on_stack(&rcu); | ||
1637 | VERBOSE_PRINTK_STRING("rcu_torture_barrier_cbs task started"); | ||
1638 | set_user_nice(current, 19); | ||
1639 | do { | ||
1640 | wait_event(barrier_cbs_wq[myid], | ||
1641 | atomic_read(&barrier_cbs_count) == n_barrier_cbs || | ||
1642 | kthread_should_stop() || | ||
1643 | fullstop != FULLSTOP_DONTSTOP); | ||
1644 | if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP) | ||
1645 | break; | ||
1646 | cur_ops->call(&rcu, rcu_torture_barrier_cbf); | ||
1647 | if (atomic_dec_and_test(&barrier_cbs_count)) | ||
1648 | wake_up(&barrier_wq); | ||
1649 | } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); | ||
1650 | VERBOSE_PRINTK_STRING("rcu_torture_barrier_cbs task stopping"); | ||
1651 | rcutorture_shutdown_absorb("rcu_torture_barrier_cbs"); | ||
1652 | while (!kthread_should_stop()) | ||
1653 | schedule_timeout_interruptible(1); | ||
1654 | cur_ops->cb_barrier(); | ||
1655 | destroy_rcu_head_on_stack(&rcu); | ||
1656 | return 0; | ||
1657 | } | ||
1658 | |||
1659 | /* kthread function to drive and coordinate RCU barrier testing. */ | ||
1660 | static int rcu_torture_barrier(void *arg) | ||
1661 | { | ||
1662 | int i; | ||
1663 | |||
1664 | VERBOSE_PRINTK_STRING("rcu_torture_barrier task starting"); | ||
1665 | do { | ||
1666 | atomic_set(&barrier_cbs_invoked, 0); | ||
1667 | atomic_set(&barrier_cbs_count, n_barrier_cbs); | ||
1668 | /* wake_up() path contains the required barriers. */ | ||
1669 | for (i = 0; i < n_barrier_cbs; i++) | ||
1670 | wake_up(&barrier_cbs_wq[i]); | ||
1671 | wait_event(barrier_wq, | ||
1672 | atomic_read(&barrier_cbs_count) == 0 || | ||
1673 | kthread_should_stop() || | ||
1674 | fullstop != FULLSTOP_DONTSTOP); | ||
1675 | if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP) | ||
1676 | break; | ||
1677 | n_barrier_attempts++; | ||
1678 | cur_ops->cb_barrier(); | ||
1679 | if (atomic_read(&barrier_cbs_invoked) != n_barrier_cbs) { | ||
1680 | n_rcu_torture_barrier_error++; | ||
1681 | WARN_ON_ONCE(1); | ||
1682 | } | ||
1683 | n_barrier_successes++; | ||
1684 | schedule_timeout_interruptible(HZ / 10); | ||
1685 | } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); | ||
1686 | VERBOSE_PRINTK_STRING("rcu_torture_barrier task stopping"); | ||
1687 | rcutorture_shutdown_absorb("rcu_torture_barrier_cbs"); | ||
1688 | while (!kthread_should_stop()) | ||
1689 | schedule_timeout_interruptible(1); | ||
1690 | return 0; | ||
1691 | } | ||
1692 | |||
1693 | /* Initialize RCU barrier testing. */ | ||
1694 | static int rcu_torture_barrier_init(void) | ||
1695 | { | ||
1696 | int i; | ||
1697 | int ret; | ||
1698 | |||
1699 | if (n_barrier_cbs == 0) | ||
1700 | return 0; | ||
1701 | if (cur_ops->call == NULL || cur_ops->cb_barrier == NULL) { | ||
1702 | printk(KERN_ALERT "%s" TORTURE_FLAG | ||
1703 | " Call or barrier ops missing for %s,\n", | ||
1704 | torture_type, cur_ops->name); | ||
1705 | printk(KERN_ALERT "%s" TORTURE_FLAG | ||
1706 | " RCU barrier testing omitted from run.\n", | ||
1707 | torture_type); | ||
1708 | return 0; | ||
1709 | } | ||
1710 | atomic_set(&barrier_cbs_count, 0); | ||
1711 | atomic_set(&barrier_cbs_invoked, 0); | ||
1712 | barrier_cbs_tasks = | ||
1713 | kzalloc(n_barrier_cbs * sizeof(barrier_cbs_tasks[0]), | ||
1714 | GFP_KERNEL); | ||
1715 | barrier_cbs_wq = | ||
1716 | kzalloc(n_barrier_cbs * sizeof(barrier_cbs_wq[0]), | ||
1717 | GFP_KERNEL); | ||
1718 | if (barrier_cbs_tasks == NULL || barrier_cbs_wq == 0) | ||
1719 | return -ENOMEM; | ||
1720 | for (i = 0; i < n_barrier_cbs; i++) { | ||
1721 | init_waitqueue_head(&barrier_cbs_wq[i]); | ||
1722 | barrier_cbs_tasks[i] = kthread_run(rcu_torture_barrier_cbs, | ||
1723 | (void *)(long)i, | ||
1724 | "rcu_torture_barrier_cbs"); | ||
1725 | if (IS_ERR(barrier_cbs_tasks[i])) { | ||
1726 | ret = PTR_ERR(barrier_cbs_tasks[i]); | ||
1727 | VERBOSE_PRINTK_ERRSTRING("Failed to create rcu_torture_barrier_cbs"); | ||
1728 | barrier_cbs_tasks[i] = NULL; | ||
1729 | return ret; | ||
1730 | } | ||
1731 | } | ||
1732 | barrier_task = kthread_run(rcu_torture_barrier, NULL, | ||
1733 | "rcu_torture_barrier"); | ||
1734 | if (IS_ERR(barrier_task)) { | ||
1735 | ret = PTR_ERR(barrier_task); | ||
1736 | VERBOSE_PRINTK_ERRSTRING("Failed to create rcu_torture_barrier"); | ||
1737 | barrier_task = NULL; | ||
1738 | } | ||
1739 | return 0; | ||
1740 | } | ||
1741 | |||
1742 | /* Clean up after RCU barrier testing. */ | ||
1743 | static void rcu_torture_barrier_cleanup(void) | ||
1744 | { | ||
1745 | int i; | ||
1746 | |||
1747 | if (barrier_task != NULL) { | ||
1748 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_barrier task"); | ||
1749 | kthread_stop(barrier_task); | ||
1750 | barrier_task = NULL; | ||
1751 | } | ||
1752 | if (barrier_cbs_tasks != NULL) { | ||
1753 | for (i = 0; i < n_barrier_cbs; i++) { | ||
1754 | if (barrier_cbs_tasks[i] != NULL) { | ||
1755 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_barrier_cbs task"); | ||
1756 | kthread_stop(barrier_cbs_tasks[i]); | ||
1757 | barrier_cbs_tasks[i] = NULL; | ||
1758 | } | ||
1759 | } | ||
1760 | kfree(barrier_cbs_tasks); | ||
1761 | barrier_cbs_tasks = NULL; | ||
1762 | } | ||
1763 | if (barrier_cbs_wq != NULL) { | ||
1764 | kfree(barrier_cbs_wq); | ||
1765 | barrier_cbs_wq = NULL; | ||
1766 | } | ||
1557 | } | 1767 | } |
1558 | 1768 | ||
1559 | static int rcutorture_cpu_notify(struct notifier_block *self, | 1769 | static int rcutorture_cpu_notify(struct notifier_block *self, |
@@ -1598,6 +1808,7 @@ rcu_torture_cleanup(void) | |||
1598 | fullstop = FULLSTOP_RMMOD; | 1808 | fullstop = FULLSTOP_RMMOD; |
1599 | mutex_unlock(&fullstop_mutex); | 1809 | mutex_unlock(&fullstop_mutex); |
1600 | unregister_reboot_notifier(&rcutorture_shutdown_nb); | 1810 | unregister_reboot_notifier(&rcutorture_shutdown_nb); |
1811 | rcu_torture_barrier_cleanup(); | ||
1601 | rcu_torture_stall_cleanup(); | 1812 | rcu_torture_stall_cleanup(); |
1602 | if (stutter_task) { | 1813 | if (stutter_task) { |
1603 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_stutter task"); | 1814 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_stutter task"); |
@@ -1665,6 +1876,7 @@ rcu_torture_cleanup(void) | |||
1665 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_shutdown task"); | 1876 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_shutdown task"); |
1666 | kthread_stop(shutdown_task); | 1877 | kthread_stop(shutdown_task); |
1667 | } | 1878 | } |
1879 | shutdown_task = NULL; | ||
1668 | rcu_torture_onoff_cleanup(); | 1880 | rcu_torture_onoff_cleanup(); |
1669 | 1881 | ||
1670 | /* Wait for all RCU callbacks to fire. */ | 1882 | /* Wait for all RCU callbacks to fire. */ |
@@ -1676,7 +1888,7 @@ rcu_torture_cleanup(void) | |||
1676 | 1888 | ||
1677 | if (cur_ops->cleanup) | 1889 | if (cur_ops->cleanup) |
1678 | cur_ops->cleanup(); | 1890 | cur_ops->cleanup(); |
1679 | if (atomic_read(&n_rcu_torture_error)) | 1891 | if (atomic_read(&n_rcu_torture_error) || n_rcu_torture_barrier_error) |
1680 | rcu_torture_print_module_parms(cur_ops, "End of test: FAILURE"); | 1892 | rcu_torture_print_module_parms(cur_ops, "End of test: FAILURE"); |
1681 | else if (n_online_successes != n_online_attempts || | 1893 | else if (n_online_successes != n_online_attempts || |
1682 | n_offline_successes != n_offline_attempts) | 1894 | n_offline_successes != n_offline_attempts) |
@@ -1692,10 +1904,12 @@ rcu_torture_init(void) | |||
1692 | int i; | 1904 | int i; |
1693 | int cpu; | 1905 | int cpu; |
1694 | int firsterr = 0; | 1906 | int firsterr = 0; |
1907 | int retval; | ||
1695 | static struct rcu_torture_ops *torture_ops[] = | 1908 | static struct rcu_torture_ops *torture_ops[] = |
1696 | { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops, | 1909 | { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops, |
1697 | &rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops, | 1910 | &rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops, |
1698 | &srcu_ops, &srcu_raw_ops, &srcu_expedited_ops, | 1911 | &srcu_ops, &srcu_sync_ops, &srcu_raw_ops, |
1912 | &srcu_raw_sync_ops, &srcu_expedited_ops, | ||
1699 | &sched_ops, &sched_sync_ops, &sched_expedited_ops, }; | 1913 | &sched_ops, &sched_sync_ops, &sched_expedited_ops, }; |
1700 | 1914 | ||
1701 | mutex_lock(&fullstop_mutex); | 1915 | mutex_lock(&fullstop_mutex); |
@@ -1749,6 +1963,7 @@ rcu_torture_init(void) | |||
1749 | atomic_set(&n_rcu_torture_free, 0); | 1963 | atomic_set(&n_rcu_torture_free, 0); |
1750 | atomic_set(&n_rcu_torture_mberror, 0); | 1964 | atomic_set(&n_rcu_torture_mberror, 0); |
1751 | atomic_set(&n_rcu_torture_error, 0); | 1965 | atomic_set(&n_rcu_torture_error, 0); |
1966 | n_rcu_torture_barrier_error = 0; | ||
1752 | n_rcu_torture_boost_ktrerror = 0; | 1967 | n_rcu_torture_boost_ktrerror = 0; |
1753 | n_rcu_torture_boost_rterror = 0; | 1968 | n_rcu_torture_boost_rterror = 0; |
1754 | n_rcu_torture_boost_failure = 0; | 1969 | n_rcu_torture_boost_failure = 0; |
@@ -1872,7 +2087,6 @@ rcu_torture_init(void) | |||
1872 | test_boost_duration = 2; | 2087 | test_boost_duration = 2; |
1873 | if ((test_boost == 1 && cur_ops->can_boost) || | 2088 | if ((test_boost == 1 && cur_ops->can_boost) || |
1874 | test_boost == 2) { | 2089 | test_boost == 2) { |
1875 | int retval; | ||
1876 | 2090 | ||
1877 | boost_starttime = jiffies + test_boost_interval * HZ; | 2091 | boost_starttime = jiffies + test_boost_interval * HZ; |
1878 | register_cpu_notifier(&rcutorture_cpu_nb); | 2092 | register_cpu_notifier(&rcutorture_cpu_nb); |
@@ -1897,9 +2111,22 @@ rcu_torture_init(void) | |||
1897 | goto unwind; | 2111 | goto unwind; |
1898 | } | 2112 | } |
1899 | } | 2113 | } |
1900 | rcu_torture_onoff_init(); | 2114 | i = rcu_torture_onoff_init(); |
2115 | if (i != 0) { | ||
2116 | firsterr = i; | ||
2117 | goto unwind; | ||
2118 | } | ||
1901 | register_reboot_notifier(&rcutorture_shutdown_nb); | 2119 | register_reboot_notifier(&rcutorture_shutdown_nb); |
1902 | rcu_torture_stall_init(); | 2120 | i = rcu_torture_stall_init(); |
2121 | if (i != 0) { | ||
2122 | firsterr = i; | ||
2123 | goto unwind; | ||
2124 | } | ||
2125 | retval = rcu_torture_barrier_init(); | ||
2126 | if (retval != 0) { | ||
2127 | firsterr = retval; | ||
2128 | goto unwind; | ||
2129 | } | ||
1903 | rcutorture_record_test_transition(); | 2130 | rcutorture_record_test_transition(); |
1904 | mutex_unlock(&fullstop_mutex); | 2131 | mutex_unlock(&fullstop_mutex); |
1905 | return 0; | 2132 | return 0; |
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index d0c5baf1ab18..0da7b88d92d0 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -75,6 +75,8 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; | |||
75 | .gpnum = -300, \ | 75 | .gpnum = -300, \ |
76 | .completed = -300, \ | 76 | .completed = -300, \ |
77 | .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \ | 77 | .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \ |
78 | .orphan_nxttail = &structname##_state.orphan_nxtlist, \ | ||
79 | .orphan_donetail = &structname##_state.orphan_donelist, \ | ||
78 | .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.fqslock), \ | 80 | .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.fqslock), \ |
79 | .n_force_qs = 0, \ | 81 | .n_force_qs = 0, \ |
80 | .n_force_qs_ngp = 0, \ | 82 | .n_force_qs_ngp = 0, \ |
@@ -145,6 +147,13 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); | |||
145 | unsigned long rcutorture_testseq; | 147 | unsigned long rcutorture_testseq; |
146 | unsigned long rcutorture_vernum; | 148 | unsigned long rcutorture_vernum; |
147 | 149 | ||
150 | /* State information for rcu_barrier() and friends. */ | ||
151 | |||
152 | static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; | ||
153 | static atomic_t rcu_barrier_cpu_count; | ||
154 | static DEFINE_MUTEX(rcu_barrier_mutex); | ||
155 | static struct completion rcu_barrier_completion; | ||
156 | |||
148 | /* | 157 | /* |
149 | * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s | 158 | * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s |
150 | * permit this function to be invoked without holding the root rcu_node | 159 | * permit this function to be invoked without holding the root rcu_node |
@@ -192,7 +201,6 @@ void rcu_note_context_switch(int cpu) | |||
192 | { | 201 | { |
193 | trace_rcu_utilization("Start context switch"); | 202 | trace_rcu_utilization("Start context switch"); |
194 | rcu_sched_qs(cpu); | 203 | rcu_sched_qs(cpu); |
195 | rcu_preempt_note_context_switch(cpu); | ||
196 | trace_rcu_utilization("End context switch"); | 204 | trace_rcu_utilization("End context switch"); |
197 | } | 205 | } |
198 | EXPORT_SYMBOL_GPL(rcu_note_context_switch); | 206 | EXPORT_SYMBOL_GPL(rcu_note_context_switch); |
@@ -1311,95 +1319,133 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1311 | #ifdef CONFIG_HOTPLUG_CPU | 1319 | #ifdef CONFIG_HOTPLUG_CPU |
1312 | 1320 | ||
1313 | /* | 1321 | /* |
1314 | * Move a dying CPU's RCU callbacks to online CPU's callback list. | 1322 | * Send the specified CPU's RCU callbacks to the orphanage. The |
1315 | * Also record a quiescent state for this CPU for the current grace period. | 1323 | * specified CPU must be offline, and the caller must hold the |
1316 | * Synchronization and interrupt disabling are not required because | 1324 | * ->onofflock. |
1317 | * this function executes in stop_machine() context. Therefore, cleanup | ||
1318 | * operations that might block must be done later from the CPU_DEAD | ||
1319 | * notifier. | ||
1320 | * | ||
1321 | * Note that the outgoing CPU's bit has already been cleared in the | ||
1322 | * cpu_online_mask. This allows us to randomly pick a callback | ||
1323 | * destination from the bits set in that mask. | ||
1324 | */ | 1325 | */ |
1325 | static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) | 1326 | static void |
1327 | rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, | ||
1328 | struct rcu_node *rnp, struct rcu_data *rdp) | ||
1326 | { | 1329 | { |
1327 | int i; | 1330 | int i; |
1328 | unsigned long mask; | ||
1329 | int receive_cpu = cpumask_any(cpu_online_mask); | ||
1330 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); | ||
1331 | struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu); | ||
1332 | RCU_TRACE(struct rcu_node *rnp = rdp->mynode); /* For dying CPU. */ | ||
1333 | 1331 | ||
1334 | /* First, adjust the counts. */ | 1332 | /* |
1333 | * Orphan the callbacks. First adjust the counts. This is safe | ||
1334 | * because ->onofflock excludes _rcu_barrier()'s adoption of | ||
1335 | * the callbacks, thus no memory barrier is required. | ||
1336 | */ | ||
1335 | if (rdp->nxtlist != NULL) { | 1337 | if (rdp->nxtlist != NULL) { |
1336 | receive_rdp->qlen_lazy += rdp->qlen_lazy; | 1338 | rsp->qlen_lazy += rdp->qlen_lazy; |
1337 | receive_rdp->qlen += rdp->qlen; | 1339 | rsp->qlen += rdp->qlen; |
1340 | rdp->n_cbs_orphaned += rdp->qlen; | ||
1338 | rdp->qlen_lazy = 0; | 1341 | rdp->qlen_lazy = 0; |
1339 | rdp->qlen = 0; | 1342 | rdp->qlen = 0; |
1340 | } | 1343 | } |
1341 | 1344 | ||
1342 | /* | 1345 | /* |
1343 | * Next, move ready-to-invoke callbacks to be invoked on some | 1346 | * Next, move those callbacks still needing a grace period to |
1344 | * other CPU. These will not be required to pass through another | 1347 | * the orphanage, where some other CPU will pick them up. |
1345 | * grace period: They are done, regardless of CPU. | 1348 | * Some of the callbacks might have gone partway through a grace |
1349 | * period, but that is too bad. They get to start over because we | ||
1350 | * cannot assume that grace periods are synchronized across CPUs. | ||
1351 | * We don't bother updating the ->nxttail[] array yet, instead | ||
1352 | * we just reset the whole thing later on. | ||
1346 | */ | 1353 | */ |
1347 | if (rdp->nxtlist != NULL && | 1354 | if (*rdp->nxttail[RCU_DONE_TAIL] != NULL) { |
1348 | rdp->nxttail[RCU_DONE_TAIL] != &rdp->nxtlist) { | 1355 | *rsp->orphan_nxttail = *rdp->nxttail[RCU_DONE_TAIL]; |
1349 | struct rcu_head *oldhead; | 1356 | rsp->orphan_nxttail = rdp->nxttail[RCU_NEXT_TAIL]; |
1350 | struct rcu_head **oldtail; | 1357 | *rdp->nxttail[RCU_DONE_TAIL] = NULL; |
1351 | struct rcu_head **newtail; | ||
1352 | |||
1353 | oldhead = rdp->nxtlist; | ||
1354 | oldtail = receive_rdp->nxttail[RCU_DONE_TAIL]; | ||
1355 | rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; | ||
1356 | *rdp->nxttail[RCU_DONE_TAIL] = *oldtail; | ||
1357 | *receive_rdp->nxttail[RCU_DONE_TAIL] = oldhead; | ||
1358 | newtail = rdp->nxttail[RCU_DONE_TAIL]; | ||
1359 | for (i = RCU_DONE_TAIL; i < RCU_NEXT_SIZE; i++) { | ||
1360 | if (receive_rdp->nxttail[i] == oldtail) | ||
1361 | receive_rdp->nxttail[i] = newtail; | ||
1362 | if (rdp->nxttail[i] == newtail) | ||
1363 | rdp->nxttail[i] = &rdp->nxtlist; | ||
1364 | } | ||
1365 | } | 1358 | } |
1366 | 1359 | ||
1367 | /* | 1360 | /* |
1368 | * Finally, put the rest of the callbacks at the end of the list. | 1361 | * Then move the ready-to-invoke callbacks to the orphanage, |
1369 | * The ones that made it partway through get to start over: We | 1362 | * where some other CPU will pick them up. These will not be |
1370 | * cannot assume that grace periods are synchronized across CPUs. | 1363 | * required to pass though another grace period: They are done. |
1371 | * (We could splice RCU_WAIT_TAIL into RCU_NEXT_READY_TAIL, but | ||
1372 | * this does not seem compelling. Not yet, anyway.) | ||
1373 | */ | 1364 | */ |
1374 | if (rdp->nxtlist != NULL) { | 1365 | if (rdp->nxtlist != NULL) { |
1375 | *receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist; | 1366 | *rsp->orphan_donetail = rdp->nxtlist; |
1376 | receive_rdp->nxttail[RCU_NEXT_TAIL] = | 1367 | rsp->orphan_donetail = rdp->nxttail[RCU_DONE_TAIL]; |
1377 | rdp->nxttail[RCU_NEXT_TAIL]; | ||
1378 | receive_rdp->n_cbs_adopted += rdp->qlen; | ||
1379 | rdp->n_cbs_orphaned += rdp->qlen; | ||
1380 | |||
1381 | rdp->nxtlist = NULL; | ||
1382 | for (i = 0; i < RCU_NEXT_SIZE; i++) | ||
1383 | rdp->nxttail[i] = &rdp->nxtlist; | ||
1384 | } | 1368 | } |
1385 | 1369 | ||
1370 | /* Finally, initialize the rcu_data structure's list to empty. */ | ||
1371 | rdp->nxtlist = NULL; | ||
1372 | for (i = 0; i < RCU_NEXT_SIZE; i++) | ||
1373 | rdp->nxttail[i] = &rdp->nxtlist; | ||
1374 | } | ||
1375 | |||
1376 | /* | ||
1377 | * Adopt the RCU callbacks from the specified rcu_state structure's | ||
1378 | * orphanage. The caller must hold the ->onofflock. | ||
1379 | */ | ||
1380 | static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) | ||
1381 | { | ||
1382 | int i; | ||
1383 | struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); | ||
1384 | |||
1386 | /* | 1385 | /* |
1387 | * Record a quiescent state for the dying CPU. This is safe | 1386 | * If there is an rcu_barrier() operation in progress, then |
1388 | * only because we have already cleared out the callbacks. | 1387 | * only the task doing that operation is permitted to adopt |
1389 | * (Otherwise, the RCU core might try to schedule the invocation | 1388 | * callbacks. To do otherwise breaks rcu_barrier() and friends |
1390 | * of callbacks on this now-offline CPU, which would be bad.) | 1389 | * by causing them to fail to wait for the callbacks in the |
1390 | * orphanage. | ||
1391 | */ | 1391 | */ |
1392 | mask = rdp->grpmask; /* rnp->grplo is constant. */ | 1392 | if (rsp->rcu_barrier_in_progress && |
1393 | rsp->rcu_barrier_in_progress != current) | ||
1394 | return; | ||
1395 | |||
1396 | /* Do the accounting first. */ | ||
1397 | rdp->qlen_lazy += rsp->qlen_lazy; | ||
1398 | rdp->qlen += rsp->qlen; | ||
1399 | rdp->n_cbs_adopted += rsp->qlen; | ||
1400 | rsp->qlen_lazy = 0; | ||
1401 | rsp->qlen = 0; | ||
1402 | |||
1403 | /* | ||
1404 | * We do not need a memory barrier here because the only way we | ||
1405 | * can get here if there is an rcu_barrier() in flight is if | ||
1406 | * we are the task doing the rcu_barrier(). | ||
1407 | */ | ||
1408 | |||
1409 | /* First adopt the ready-to-invoke callbacks. */ | ||
1410 | if (rsp->orphan_donelist != NULL) { | ||
1411 | *rsp->orphan_donetail = *rdp->nxttail[RCU_DONE_TAIL]; | ||
1412 | *rdp->nxttail[RCU_DONE_TAIL] = rsp->orphan_donelist; | ||
1413 | for (i = RCU_NEXT_SIZE - 1; i >= RCU_DONE_TAIL; i--) | ||
1414 | if (rdp->nxttail[i] == rdp->nxttail[RCU_DONE_TAIL]) | ||
1415 | rdp->nxttail[i] = rsp->orphan_donetail; | ||
1416 | rsp->orphan_donelist = NULL; | ||
1417 | rsp->orphan_donetail = &rsp->orphan_donelist; | ||
1418 | } | ||
1419 | |||
1420 | /* And then adopt the callbacks that still need a grace period. */ | ||
1421 | if (rsp->orphan_nxtlist != NULL) { | ||
1422 | *rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_nxtlist; | ||
1423 | rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_nxttail; | ||
1424 | rsp->orphan_nxtlist = NULL; | ||
1425 | rsp->orphan_nxttail = &rsp->orphan_nxtlist; | ||
1426 | } | ||
1427 | } | ||
1428 | |||
1429 | /* | ||
1430 | * Trace the fact that this CPU is going offline. | ||
1431 | */ | ||
1432 | static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) | ||
1433 | { | ||
1434 | RCU_TRACE(unsigned long mask); | ||
1435 | RCU_TRACE(struct rcu_data *rdp = this_cpu_ptr(rsp->rda)); | ||
1436 | RCU_TRACE(struct rcu_node *rnp = rdp->mynode); | ||
1437 | |||
1438 | RCU_TRACE(mask = rdp->grpmask); | ||
1393 | trace_rcu_grace_period(rsp->name, | 1439 | trace_rcu_grace_period(rsp->name, |
1394 | rnp->gpnum + 1 - !!(rnp->qsmask & mask), | 1440 | rnp->gpnum + 1 - !!(rnp->qsmask & mask), |
1395 | "cpuofl"); | 1441 | "cpuofl"); |
1396 | rcu_report_qs_rdp(smp_processor_id(), rsp, rdp, rsp->gpnum); | ||
1397 | /* Note that rcu_report_qs_rdp() might call trace_rcu_grace_period(). */ | ||
1398 | } | 1442 | } |
1399 | 1443 | ||
1400 | /* | 1444 | /* |
1401 | * The CPU has been completely removed, and some other CPU is reporting | 1445 | * The CPU has been completely removed, and some other CPU is reporting |
1402 | * this fact from process context. Do the remainder of the cleanup. | 1446 | * this fact from process context. Do the remainder of the cleanup, |
1447 | * including orphaning the outgoing CPU's RCU callbacks, and also | ||
1448 | * adopting them, if there is no _rcu_barrier() instance running. | ||
1403 | * There can only be one CPU hotplug operation at a time, so no other | 1449 | * There can only be one CPU hotplug operation at a time, so no other |
1404 | * CPU can be attempting to update rcu_cpu_kthread_task. | 1450 | * CPU can be attempting to update rcu_cpu_kthread_task. |
1405 | */ | 1451 | */ |
@@ -1409,17 +1455,21 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) | |||
1409 | unsigned long mask; | 1455 | unsigned long mask; |
1410 | int need_report = 0; | 1456 | int need_report = 0; |
1411 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | 1457 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); |
1412 | struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rnp. */ | 1458 | struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ |
1413 | 1459 | ||
1414 | /* Adjust any no-longer-needed kthreads. */ | 1460 | /* Adjust any no-longer-needed kthreads. */ |
1415 | rcu_stop_cpu_kthread(cpu); | 1461 | rcu_stop_cpu_kthread(cpu); |
1416 | rcu_node_kthread_setaffinity(rnp, -1); | 1462 | rcu_node_kthread_setaffinity(rnp, -1); |
1417 | 1463 | ||
1418 | /* Remove the dying CPU from the bitmasks in the rcu_node hierarchy. */ | 1464 | /* Remove the dead CPU from the bitmasks in the rcu_node hierarchy. */ |
1419 | 1465 | ||
1420 | /* Exclude any attempts to start a new grace period. */ | 1466 | /* Exclude any attempts to start a new grace period. */ |
1421 | raw_spin_lock_irqsave(&rsp->onofflock, flags); | 1467 | raw_spin_lock_irqsave(&rsp->onofflock, flags); |
1422 | 1468 | ||
1469 | /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */ | ||
1470 | rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp); | ||
1471 | rcu_adopt_orphan_cbs(rsp); | ||
1472 | |||
1423 | /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */ | 1473 | /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */ |
1424 | mask = rdp->grpmask; /* rnp->grplo is constant. */ | 1474 | mask = rdp->grpmask; /* rnp->grplo is constant. */ |
1425 | do { | 1475 | do { |
@@ -1456,6 +1506,10 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) | |||
1456 | 1506 | ||
1457 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ | 1507 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ |
1458 | 1508 | ||
1509 | static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) | ||
1510 | { | ||
1511 | } | ||
1512 | |||
1459 | static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) | 1513 | static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) |
1460 | { | 1514 | { |
1461 | } | 1515 | } |
@@ -1524,9 +1578,6 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1524 | rcu_is_callbacks_kthread()); | 1578 | rcu_is_callbacks_kthread()); |
1525 | 1579 | ||
1526 | /* Update count, and requeue any remaining callbacks. */ | 1580 | /* Update count, and requeue any remaining callbacks. */ |
1527 | rdp->qlen_lazy -= count_lazy; | ||
1528 | rdp->qlen -= count; | ||
1529 | rdp->n_cbs_invoked += count; | ||
1530 | if (list != NULL) { | 1581 | if (list != NULL) { |
1531 | *tail = rdp->nxtlist; | 1582 | *tail = rdp->nxtlist; |
1532 | rdp->nxtlist = list; | 1583 | rdp->nxtlist = list; |
@@ -1536,6 +1587,10 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1536 | else | 1587 | else |
1537 | break; | 1588 | break; |
1538 | } | 1589 | } |
1590 | smp_mb(); /* List handling before counting for rcu_barrier(). */ | ||
1591 | rdp->qlen_lazy -= count_lazy; | ||
1592 | rdp->qlen -= count; | ||
1593 | rdp->n_cbs_invoked += count; | ||
1539 | 1594 | ||
1540 | /* Reinstate batch limit if we have worked down the excess. */ | 1595 | /* Reinstate batch limit if we have worked down the excess. */ |
1541 | if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark) | 1596 | if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark) |
@@ -1823,11 +1878,14 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | |||
1823 | rdp = this_cpu_ptr(rsp->rda); | 1878 | rdp = this_cpu_ptr(rsp->rda); |
1824 | 1879 | ||
1825 | /* Add the callback to our list. */ | 1880 | /* Add the callback to our list. */ |
1826 | *rdp->nxttail[RCU_NEXT_TAIL] = head; | ||
1827 | rdp->nxttail[RCU_NEXT_TAIL] = &head->next; | ||
1828 | rdp->qlen++; | 1881 | rdp->qlen++; |
1829 | if (lazy) | 1882 | if (lazy) |
1830 | rdp->qlen_lazy++; | 1883 | rdp->qlen_lazy++; |
1884 | else | ||
1885 | rcu_idle_count_callbacks_posted(); | ||
1886 | smp_mb(); /* Count before adding callback for rcu_barrier(). */ | ||
1887 | *rdp->nxttail[RCU_NEXT_TAIL] = head; | ||
1888 | rdp->nxttail[RCU_NEXT_TAIL] = &head->next; | ||
1831 | 1889 | ||
1832 | if (__is_kfree_rcu_offset((unsigned long)func)) | 1890 | if (__is_kfree_rcu_offset((unsigned long)func)) |
1833 | trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func, | 1891 | trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func, |
@@ -1893,6 +1951,38 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | |||
1893 | } | 1951 | } |
1894 | EXPORT_SYMBOL_GPL(call_rcu_bh); | 1952 | EXPORT_SYMBOL_GPL(call_rcu_bh); |
1895 | 1953 | ||
1954 | /* | ||
1955 | * Because a context switch is a grace period for RCU-sched and RCU-bh, | ||
1956 | * any blocking grace-period wait automatically implies a grace period | ||
1957 | * if there is only one CPU online at any point time during execution | ||
1958 | * of either synchronize_sched() or synchronize_rcu_bh(). It is OK to | ||
1959 | * occasionally incorrectly indicate that there are multiple CPUs online | ||
1960 | * when there was in fact only one the whole time, as this just adds | ||
1961 | * some overhead: RCU still operates correctly. | ||
1962 | * | ||
1963 | * Of course, sampling num_online_cpus() with preemption enabled can | ||
1964 | * give erroneous results if there are concurrent CPU-hotplug operations. | ||
1965 | * For example, given a demonic sequence of preemptions in num_online_cpus() | ||
1966 | * and CPU-hotplug operations, there could be two or more CPUs online at | ||
1967 | * all times, but num_online_cpus() might well return one (or even zero). | ||
1968 | * | ||
1969 | * However, all such demonic sequences require at least one CPU-offline | ||
1970 | * operation. Furthermore, rcu_blocking_is_gp() giving the wrong answer | ||
1971 | * is only a problem if there is an RCU read-side critical section executing | ||
1972 | * throughout. But RCU-sched and RCU-bh read-side critical sections | ||
1973 | * disable either preemption or bh, which prevents a CPU from going offline. | ||
1974 | * Therefore, the only way that rcu_blocking_is_gp() can incorrectly return | ||
1975 | * that there is only one CPU when in fact there was more than one throughout | ||
1976 | * is when there were no RCU readers in the system. If there are no | ||
1977 | * RCU readers, the grace period by definition can be of zero length, | ||
1978 | * regardless of the number of online CPUs. | ||
1979 | */ | ||
1980 | static inline int rcu_blocking_is_gp(void) | ||
1981 | { | ||
1982 | might_sleep(); /* Check for RCU read-side critical section. */ | ||
1983 | return num_online_cpus() <= 1; | ||
1984 | } | ||
1985 | |||
1896 | /** | 1986 | /** |
1897 | * synchronize_sched - wait until an rcu-sched grace period has elapsed. | 1987 | * synchronize_sched - wait until an rcu-sched grace period has elapsed. |
1898 | * | 1988 | * |
@@ -2166,11 +2256,10 @@ static int rcu_cpu_has_callbacks(int cpu) | |||
2166 | rcu_preempt_cpu_has_callbacks(cpu); | 2256 | rcu_preempt_cpu_has_callbacks(cpu); |
2167 | } | 2257 | } |
2168 | 2258 | ||
2169 | static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; | 2259 | /* |
2170 | static atomic_t rcu_barrier_cpu_count; | 2260 | * RCU callback function for _rcu_barrier(). If we are last, wake |
2171 | static DEFINE_MUTEX(rcu_barrier_mutex); | 2261 | * up the task executing _rcu_barrier(). |
2172 | static struct completion rcu_barrier_completion; | 2262 | */ |
2173 | |||
2174 | static void rcu_barrier_callback(struct rcu_head *notused) | 2263 | static void rcu_barrier_callback(struct rcu_head *notused) |
2175 | { | 2264 | { |
2176 | if (atomic_dec_and_test(&rcu_barrier_cpu_count)) | 2265 | if (atomic_dec_and_test(&rcu_barrier_cpu_count)) |
@@ -2200,27 +2289,94 @@ static void _rcu_barrier(struct rcu_state *rsp, | |||
2200 | void (*call_rcu_func)(struct rcu_head *head, | 2289 | void (*call_rcu_func)(struct rcu_head *head, |
2201 | void (*func)(struct rcu_head *head))) | 2290 | void (*func)(struct rcu_head *head))) |
2202 | { | 2291 | { |
2203 | BUG_ON(in_interrupt()); | 2292 | int cpu; |
2293 | unsigned long flags; | ||
2294 | struct rcu_data *rdp; | ||
2295 | struct rcu_head rh; | ||
2296 | |||
2297 | init_rcu_head_on_stack(&rh); | ||
2298 | |||
2204 | /* Take mutex to serialize concurrent rcu_barrier() requests. */ | 2299 | /* Take mutex to serialize concurrent rcu_barrier() requests. */ |
2205 | mutex_lock(&rcu_barrier_mutex); | 2300 | mutex_lock(&rcu_barrier_mutex); |
2206 | init_completion(&rcu_barrier_completion); | 2301 | |
2302 | smp_mb(); /* Prevent any prior operations from leaking in. */ | ||
2303 | |||
2207 | /* | 2304 | /* |
2208 | * Initialize rcu_barrier_cpu_count to 1, then invoke | 2305 | * Initialize the count to one rather than to zero in order to |
2209 | * rcu_barrier_func() on each CPU, so that each CPU also has | 2306 | * avoid a too-soon return to zero in case of a short grace period |
2210 | * incremented rcu_barrier_cpu_count. Only then is it safe to | 2307 | * (or preemption of this task). Also flag this task as doing |
2211 | * decrement rcu_barrier_cpu_count -- otherwise the first CPU | 2308 | * an rcu_barrier(). This will prevent anyone else from adopting |
2212 | * might complete its grace period before all of the other CPUs | 2309 | * orphaned callbacks, which could cause otherwise failure if a |
2213 | * did their increment, causing this function to return too | 2310 | * CPU went offline and quickly came back online. To see this, |
2214 | * early. Note that on_each_cpu() disables irqs, which prevents | 2311 | * consider the following sequence of events: |
2215 | * any CPUs from coming online or going offline until each online | 2312 | * |
2216 | * CPU has queued its RCU-barrier callback. | 2313 | * 1. We cause CPU 0 to post an rcu_barrier_callback() callback. |
2314 | * 2. CPU 1 goes offline, orphaning its callbacks. | ||
2315 | * 3. CPU 0 adopts CPU 1's orphaned callbacks. | ||
2316 | * 4. CPU 1 comes back online. | ||
2317 | * 5. We cause CPU 1 to post an rcu_barrier_callback() callback. | ||
2318 | * 6. Both rcu_barrier_callback() callbacks are invoked, awakening | ||
2319 | * us -- but before CPU 1's orphaned callbacks are invoked!!! | ||
2217 | */ | 2320 | */ |
2321 | init_completion(&rcu_barrier_completion); | ||
2218 | atomic_set(&rcu_barrier_cpu_count, 1); | 2322 | atomic_set(&rcu_barrier_cpu_count, 1); |
2219 | on_each_cpu(rcu_barrier_func, (void *)call_rcu_func, 1); | 2323 | raw_spin_lock_irqsave(&rsp->onofflock, flags); |
2324 | rsp->rcu_barrier_in_progress = current; | ||
2325 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); | ||
2326 | |||
2327 | /* | ||
2328 | * Force every CPU with callbacks to register a new callback | ||
2329 | * that will tell us when all the preceding callbacks have | ||
2330 | * been invoked. If an offline CPU has callbacks, wait for | ||
2331 | * it to either come back online or to finish orphaning those | ||
2332 | * callbacks. | ||
2333 | */ | ||
2334 | for_each_possible_cpu(cpu) { | ||
2335 | preempt_disable(); | ||
2336 | rdp = per_cpu_ptr(rsp->rda, cpu); | ||
2337 | if (cpu_is_offline(cpu)) { | ||
2338 | preempt_enable(); | ||
2339 | while (cpu_is_offline(cpu) && ACCESS_ONCE(rdp->qlen)) | ||
2340 | schedule_timeout_interruptible(1); | ||
2341 | } else if (ACCESS_ONCE(rdp->qlen)) { | ||
2342 | smp_call_function_single(cpu, rcu_barrier_func, | ||
2343 | (void *)call_rcu_func, 1); | ||
2344 | preempt_enable(); | ||
2345 | } else { | ||
2346 | preempt_enable(); | ||
2347 | } | ||
2348 | } | ||
2349 | |||
2350 | /* | ||
2351 | * Now that all online CPUs have rcu_barrier_callback() callbacks | ||
2352 | * posted, we can adopt all of the orphaned callbacks and place | ||
2353 | * an rcu_barrier_callback() callback after them. When that is done, | ||
2354 | * we are guaranteed to have an rcu_barrier_callback() callback | ||
2355 | * following every callback that could possibly have been | ||
2356 | * registered before _rcu_barrier() was called. | ||
2357 | */ | ||
2358 | raw_spin_lock_irqsave(&rsp->onofflock, flags); | ||
2359 | rcu_adopt_orphan_cbs(rsp); | ||
2360 | rsp->rcu_barrier_in_progress = NULL; | ||
2361 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); | ||
2362 | atomic_inc(&rcu_barrier_cpu_count); | ||
2363 | smp_mb__after_atomic_inc(); /* Ensure atomic_inc() before callback. */ | ||
2364 | call_rcu_func(&rh, rcu_barrier_callback); | ||
2365 | |||
2366 | /* | ||
2367 | * Now that we have an rcu_barrier_callback() callback on each | ||
2368 | * CPU, and thus each counted, remove the initial count. | ||
2369 | */ | ||
2220 | if (atomic_dec_and_test(&rcu_barrier_cpu_count)) | 2370 | if (atomic_dec_and_test(&rcu_barrier_cpu_count)) |
2221 | complete(&rcu_barrier_completion); | 2371 | complete(&rcu_barrier_completion); |
2372 | |||
2373 | /* Wait for all rcu_barrier_callback() callbacks to be invoked. */ | ||
2222 | wait_for_completion(&rcu_barrier_completion); | 2374 | wait_for_completion(&rcu_barrier_completion); |
2375 | |||
2376 | /* Other rcu_barrier() invocations can now safely proceed. */ | ||
2223 | mutex_unlock(&rcu_barrier_mutex); | 2377 | mutex_unlock(&rcu_barrier_mutex); |
2378 | |||
2379 | destroy_rcu_head_on_stack(&rh); | ||
2224 | } | 2380 | } |
2225 | 2381 | ||
2226 | /** | 2382 | /** |
@@ -2417,7 +2573,7 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp) | |||
2417 | 2573 | ||
2418 | for (i = NUM_RCU_LVLS - 1; i > 0; i--) | 2574 | for (i = NUM_RCU_LVLS - 1; i > 0; i--) |
2419 | rsp->levelspread[i] = CONFIG_RCU_FANOUT; | 2575 | rsp->levelspread[i] = CONFIG_RCU_FANOUT; |
2420 | rsp->levelspread[0] = RCU_FANOUT_LEAF; | 2576 | rsp->levelspread[0] = CONFIG_RCU_FANOUT_LEAF; |
2421 | } | 2577 | } |
2422 | #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ | 2578 | #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ |
2423 | static void __init rcu_init_levelspread(struct rcu_state *rsp) | 2579 | static void __init rcu_init_levelspread(struct rcu_state *rsp) |
diff --git a/kernel/rcutree.h b/kernel/rcutree.h index cdd1be0a4072..7f5d138dedf5 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h | |||
@@ -29,18 +29,14 @@ | |||
29 | #include <linux/seqlock.h> | 29 | #include <linux/seqlock.h> |
30 | 30 | ||
31 | /* | 31 | /* |
32 | * Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT. | 32 | * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and |
33 | * CONFIG_RCU_FANOUT_LEAF. | ||
33 | * In theory, it should be possible to add more levels straightforwardly. | 34 | * In theory, it should be possible to add more levels straightforwardly. |
34 | * In practice, this did work well going from three levels to four. | 35 | * In practice, this did work well going from three levels to four. |
35 | * Of course, your mileage may vary. | 36 | * Of course, your mileage may vary. |
36 | */ | 37 | */ |
37 | #define MAX_RCU_LVLS 4 | 38 | #define MAX_RCU_LVLS 4 |
38 | #if CONFIG_RCU_FANOUT > 16 | 39 | #define RCU_FANOUT_1 (CONFIG_RCU_FANOUT_LEAF) |
39 | #define RCU_FANOUT_LEAF 16 | ||
40 | #else /* #if CONFIG_RCU_FANOUT > 16 */ | ||
41 | #define RCU_FANOUT_LEAF (CONFIG_RCU_FANOUT) | ||
42 | #endif /* #else #if CONFIG_RCU_FANOUT > 16 */ | ||
43 | #define RCU_FANOUT_1 (RCU_FANOUT_LEAF) | ||
44 | #define RCU_FANOUT_2 (RCU_FANOUT_1 * CONFIG_RCU_FANOUT) | 40 | #define RCU_FANOUT_2 (RCU_FANOUT_1 * CONFIG_RCU_FANOUT) |
45 | #define RCU_FANOUT_3 (RCU_FANOUT_2 * CONFIG_RCU_FANOUT) | 41 | #define RCU_FANOUT_3 (RCU_FANOUT_2 * CONFIG_RCU_FANOUT) |
46 | #define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT) | 42 | #define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT) |
@@ -371,6 +367,17 @@ struct rcu_state { | |||
371 | 367 | ||
372 | raw_spinlock_t onofflock; /* exclude on/offline and */ | 368 | raw_spinlock_t onofflock; /* exclude on/offline and */ |
373 | /* starting new GP. */ | 369 | /* starting new GP. */ |
370 | struct rcu_head *orphan_nxtlist; /* Orphaned callbacks that */ | ||
371 | /* need a grace period. */ | ||
372 | struct rcu_head **orphan_nxttail; /* Tail of above. */ | ||
373 | struct rcu_head *orphan_donelist; /* Orphaned callbacks that */ | ||
374 | /* are ready to invoke. */ | ||
375 | struct rcu_head **orphan_donetail; /* Tail of above. */ | ||
376 | long qlen_lazy; /* Number of lazy callbacks. */ | ||
377 | long qlen; /* Total number of callbacks. */ | ||
378 | struct task_struct *rcu_barrier_in_progress; | ||
379 | /* Task doing rcu_barrier(), */ | ||
380 | /* or NULL if no barrier. */ | ||
374 | raw_spinlock_t fqslock; /* Only one task forcing */ | 381 | raw_spinlock_t fqslock; /* Only one task forcing */ |
375 | /* quiescent states. */ | 382 | /* quiescent states. */ |
376 | unsigned long jiffies_force_qs; /* Time at which to invoke */ | 383 | unsigned long jiffies_force_qs; /* Time at which to invoke */ |
@@ -423,7 +430,6 @@ DECLARE_PER_CPU(char, rcu_cpu_has_work); | |||
423 | /* Forward declarations for rcutree_plugin.h */ | 430 | /* Forward declarations for rcutree_plugin.h */ |
424 | static void rcu_bootup_announce(void); | 431 | static void rcu_bootup_announce(void); |
425 | long rcu_batches_completed(void); | 432 | long rcu_batches_completed(void); |
426 | static void rcu_preempt_note_context_switch(int cpu); | ||
427 | static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp); | 433 | static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp); |
428 | #ifdef CONFIG_HOTPLUG_CPU | 434 | #ifdef CONFIG_HOTPLUG_CPU |
429 | static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, | 435 | static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, |
@@ -471,6 +477,7 @@ static void __cpuinit rcu_prepare_kthreads(int cpu); | |||
471 | static void rcu_prepare_for_idle_init(int cpu); | 477 | static void rcu_prepare_for_idle_init(int cpu); |
472 | static void rcu_cleanup_after_idle(int cpu); | 478 | static void rcu_cleanup_after_idle(int cpu); |
473 | static void rcu_prepare_for_idle(int cpu); | 479 | static void rcu_prepare_for_idle(int cpu); |
480 | static void rcu_idle_count_callbacks_posted(void); | ||
474 | static void print_cpu_stall_info_begin(void); | 481 | static void print_cpu_stall_info_begin(void); |
475 | static void print_cpu_stall_info(struct rcu_state *rsp, int cpu); | 482 | static void print_cpu_stall_info(struct rcu_state *rsp, int cpu); |
476 | static void print_cpu_stall_info_end(void); | 483 | static void print_cpu_stall_info_end(void); |
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index c023464816be..2411000d9869 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
@@ -153,7 +153,7 @@ static void rcu_preempt_qs(int cpu) | |||
153 | * | 153 | * |
154 | * Caller must disable preemption. | 154 | * Caller must disable preemption. |
155 | */ | 155 | */ |
156 | static void rcu_preempt_note_context_switch(int cpu) | 156 | void rcu_preempt_note_context_switch(void) |
157 | { | 157 | { |
158 | struct task_struct *t = current; | 158 | struct task_struct *t = current; |
159 | unsigned long flags; | 159 | unsigned long flags; |
@@ -164,7 +164,7 @@ static void rcu_preempt_note_context_switch(int cpu) | |||
164 | (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { | 164 | (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { |
165 | 165 | ||
166 | /* Possibly blocking in an RCU read-side critical section. */ | 166 | /* Possibly blocking in an RCU read-side critical section. */ |
167 | rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu); | 167 | rdp = __this_cpu_ptr(rcu_preempt_state.rda); |
168 | rnp = rdp->mynode; | 168 | rnp = rdp->mynode; |
169 | raw_spin_lock_irqsave(&rnp->lock, flags); | 169 | raw_spin_lock_irqsave(&rnp->lock, flags); |
170 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; | 170 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; |
@@ -228,7 +228,7 @@ static void rcu_preempt_note_context_switch(int cpu) | |||
228 | * means that we continue to block the current grace period. | 228 | * means that we continue to block the current grace period. |
229 | */ | 229 | */ |
230 | local_irq_save(flags); | 230 | local_irq_save(flags); |
231 | rcu_preempt_qs(cpu); | 231 | rcu_preempt_qs(smp_processor_id()); |
232 | local_irq_restore(flags); | 232 | local_irq_restore(flags); |
233 | } | 233 | } |
234 | 234 | ||
@@ -969,22 +969,6 @@ static void __init __rcu_init_preempt(void) | |||
969 | rcu_init_one(&rcu_preempt_state, &rcu_preempt_data); | 969 | rcu_init_one(&rcu_preempt_state, &rcu_preempt_data); |
970 | } | 970 | } |
971 | 971 | ||
972 | /* | ||
973 | * Check for a task exiting while in a preemptible-RCU read-side | ||
974 | * critical section, clean up if so. No need to issue warnings, | ||
975 | * as debug_check_no_locks_held() already does this if lockdep | ||
976 | * is enabled. | ||
977 | */ | ||
978 | void exit_rcu(void) | ||
979 | { | ||
980 | struct task_struct *t = current; | ||
981 | |||
982 | if (t->rcu_read_lock_nesting == 0) | ||
983 | return; | ||
984 | t->rcu_read_lock_nesting = 1; | ||
985 | __rcu_read_unlock(); | ||
986 | } | ||
987 | |||
988 | #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ | 972 | #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ |
989 | 973 | ||
990 | static struct rcu_state *rcu_state = &rcu_sched_state; | 974 | static struct rcu_state *rcu_state = &rcu_sched_state; |
@@ -1018,14 +1002,6 @@ void rcu_force_quiescent_state(void) | |||
1018 | EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); | 1002 | EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); |
1019 | 1003 | ||
1020 | /* | 1004 | /* |
1021 | * Because preemptible RCU does not exist, we never have to check for | ||
1022 | * CPUs being in quiescent states. | ||
1023 | */ | ||
1024 | static void rcu_preempt_note_context_switch(int cpu) | ||
1025 | { | ||
1026 | } | ||
1027 | |||
1028 | /* | ||
1029 | * Because preemptible RCU does not exist, there are never any preempted | 1005 | * Because preemptible RCU does not exist, there are never any preempted |
1030 | * RCU readers. | 1006 | * RCU readers. |
1031 | */ | 1007 | */ |
@@ -1938,6 +1914,14 @@ static void rcu_prepare_for_idle(int cpu) | |||
1938 | { | 1914 | { |
1939 | } | 1915 | } |
1940 | 1916 | ||
1917 | /* | ||
1918 | * Don't bother keeping a running count of the number of RCU callbacks | ||
1919 | * posted because CONFIG_RCU_FAST_NO_HZ=n. | ||
1920 | */ | ||
1921 | static void rcu_idle_count_callbacks_posted(void) | ||
1922 | { | ||
1923 | } | ||
1924 | |||
1941 | #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ | 1925 | #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ |
1942 | 1926 | ||
1943 | /* | 1927 | /* |
@@ -1978,11 +1962,20 @@ static void rcu_prepare_for_idle(int cpu) | |||
1978 | #define RCU_IDLE_GP_DELAY 6 /* Roughly one grace period. */ | 1962 | #define RCU_IDLE_GP_DELAY 6 /* Roughly one grace period. */ |
1979 | #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ | 1963 | #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ |
1980 | 1964 | ||
1965 | /* Loop counter for rcu_prepare_for_idle(). */ | ||
1981 | static DEFINE_PER_CPU(int, rcu_dyntick_drain); | 1966 | static DEFINE_PER_CPU(int, rcu_dyntick_drain); |
1967 | /* If rcu_dyntick_holdoff==jiffies, don't try to enter dyntick-idle mode. */ | ||
1982 | static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff); | 1968 | static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff); |
1983 | static DEFINE_PER_CPU(struct hrtimer, rcu_idle_gp_timer); | 1969 | /* Timer to awaken the CPU if it enters dyntick-idle mode with callbacks. */ |
1984 | static ktime_t rcu_idle_gp_wait; /* If some non-lazy callbacks. */ | 1970 | static DEFINE_PER_CPU(struct timer_list, rcu_idle_gp_timer); |
1985 | static ktime_t rcu_idle_lazy_gp_wait; /* If only lazy callbacks. */ | 1971 | /* Scheduled expiry time for rcu_idle_gp_timer to allow reposting. */ |
1972 | static DEFINE_PER_CPU(unsigned long, rcu_idle_gp_timer_expires); | ||
1973 | /* Enable special processing on first attempt to enter dyntick-idle mode. */ | ||
1974 | static DEFINE_PER_CPU(bool, rcu_idle_first_pass); | ||
1975 | /* Running count of non-lazy callbacks posted, never decremented. */ | ||
1976 | static DEFINE_PER_CPU(unsigned long, rcu_nonlazy_posted); | ||
1977 | /* Snapshot of rcu_nonlazy_posted to detect meaningful exits from idle. */ | ||
1978 | static DEFINE_PER_CPU(unsigned long, rcu_nonlazy_posted_snap); | ||
1986 | 1979 | ||
1987 | /* | 1980 | /* |
1988 | * Allow the CPU to enter dyntick-idle mode if either: (1) There are no | 1981 | * Allow the CPU to enter dyntick-idle mode if either: (1) There are no |
@@ -1995,6 +1988,8 @@ static ktime_t rcu_idle_lazy_gp_wait; /* If only lazy callbacks. */ | |||
1995 | */ | 1988 | */ |
1996 | int rcu_needs_cpu(int cpu) | 1989 | int rcu_needs_cpu(int cpu) |
1997 | { | 1990 | { |
1991 | /* Flag a new idle sojourn to the idle-entry state machine. */ | ||
1992 | per_cpu(rcu_idle_first_pass, cpu) = 1; | ||
1998 | /* If no callbacks, RCU doesn't need the CPU. */ | 1993 | /* If no callbacks, RCU doesn't need the CPU. */ |
1999 | if (!rcu_cpu_has_callbacks(cpu)) | 1994 | if (!rcu_cpu_has_callbacks(cpu)) |
2000 | return 0; | 1995 | return 0; |
@@ -2045,16 +2040,34 @@ static bool rcu_cpu_has_nonlazy_callbacks(int cpu) | |||
2045 | } | 2040 | } |
2046 | 2041 | ||
2047 | /* | 2042 | /* |
2043 | * Handler for smp_call_function_single(). The only point of this | ||
2044 | * handler is to wake the CPU up, so the handler does only tracing. | ||
2045 | */ | ||
2046 | void rcu_idle_demigrate(void *unused) | ||
2047 | { | ||
2048 | trace_rcu_prep_idle("Demigrate"); | ||
2049 | } | ||
2050 | |||
2051 | /* | ||
2048 | * Timer handler used to force CPU to start pushing its remaining RCU | 2052 | * Timer handler used to force CPU to start pushing its remaining RCU |
2049 | * callbacks in the case where it entered dyntick-idle mode with callbacks | 2053 | * callbacks in the case where it entered dyntick-idle mode with callbacks |
2050 | * pending. The hander doesn't really need to do anything because the | 2054 | * pending. The hander doesn't really need to do anything because the |
2051 | * real work is done upon re-entry to idle, or by the next scheduling-clock | 2055 | * real work is done upon re-entry to idle, or by the next scheduling-clock |
2052 | * interrupt should idle not be re-entered. | 2056 | * interrupt should idle not be re-entered. |
2057 | * | ||
2058 | * One special case: the timer gets migrated without awakening the CPU | ||
2059 | * on which the timer was scheduled on. In this case, we must wake up | ||
2060 | * that CPU. We do so with smp_call_function_single(). | ||
2053 | */ | 2061 | */ |
2054 | static enum hrtimer_restart rcu_idle_gp_timer_func(struct hrtimer *hrtp) | 2062 | static void rcu_idle_gp_timer_func(unsigned long cpu_in) |
2055 | { | 2063 | { |
2064 | int cpu = (int)cpu_in; | ||
2065 | |||
2056 | trace_rcu_prep_idle("Timer"); | 2066 | trace_rcu_prep_idle("Timer"); |
2057 | return HRTIMER_NORESTART; | 2067 | if (cpu != smp_processor_id()) |
2068 | smp_call_function_single(cpu, rcu_idle_demigrate, NULL, 0); | ||
2069 | else | ||
2070 | WARN_ON_ONCE(1); /* Getting here can hang the system... */ | ||
2058 | } | 2071 | } |
2059 | 2072 | ||
2060 | /* | 2073 | /* |
@@ -2062,19 +2075,11 @@ static enum hrtimer_restart rcu_idle_gp_timer_func(struct hrtimer *hrtp) | |||
2062 | */ | 2075 | */ |
2063 | static void rcu_prepare_for_idle_init(int cpu) | 2076 | static void rcu_prepare_for_idle_init(int cpu) |
2064 | { | 2077 | { |
2065 | static int firsttime = 1; | 2078 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; |
2066 | struct hrtimer *hrtp = &per_cpu(rcu_idle_gp_timer, cpu); | 2079 | setup_timer(&per_cpu(rcu_idle_gp_timer, cpu), |
2067 | 2080 | rcu_idle_gp_timer_func, cpu); | |
2068 | hrtimer_init(hrtp, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 2081 | per_cpu(rcu_idle_gp_timer_expires, cpu) = jiffies - 1; |
2069 | hrtp->function = rcu_idle_gp_timer_func; | 2082 | per_cpu(rcu_idle_first_pass, cpu) = 1; |
2070 | if (firsttime) { | ||
2071 | unsigned int upj = jiffies_to_usecs(RCU_IDLE_GP_DELAY); | ||
2072 | |||
2073 | rcu_idle_gp_wait = ns_to_ktime(upj * (u64)1000); | ||
2074 | upj = jiffies_to_usecs(RCU_IDLE_LAZY_GP_DELAY); | ||
2075 | rcu_idle_lazy_gp_wait = ns_to_ktime(upj * (u64)1000); | ||
2076 | firsttime = 0; | ||
2077 | } | ||
2078 | } | 2083 | } |
2079 | 2084 | ||
2080 | /* | 2085 | /* |
@@ -2084,7 +2089,8 @@ static void rcu_prepare_for_idle_init(int cpu) | |||
2084 | */ | 2089 | */ |
2085 | static void rcu_cleanup_after_idle(int cpu) | 2090 | static void rcu_cleanup_after_idle(int cpu) |
2086 | { | 2091 | { |
2087 | hrtimer_cancel(&per_cpu(rcu_idle_gp_timer, cpu)); | 2092 | del_timer(&per_cpu(rcu_idle_gp_timer, cpu)); |
2093 | trace_rcu_prep_idle("Cleanup after idle"); | ||
2088 | } | 2094 | } |
2089 | 2095 | ||
2090 | /* | 2096 | /* |
@@ -2108,6 +2114,29 @@ static void rcu_cleanup_after_idle(int cpu) | |||
2108 | */ | 2114 | */ |
2109 | static void rcu_prepare_for_idle(int cpu) | 2115 | static void rcu_prepare_for_idle(int cpu) |
2110 | { | 2116 | { |
2117 | struct timer_list *tp; | ||
2118 | |||
2119 | /* | ||
2120 | * If this is an idle re-entry, for example, due to use of | ||
2121 | * RCU_NONIDLE() or the new idle-loop tracing API within the idle | ||
2122 | * loop, then don't take any state-machine actions, unless the | ||
2123 | * momentary exit from idle queued additional non-lazy callbacks. | ||
2124 | * Instead, repost the rcu_idle_gp_timer if this CPU has callbacks | ||
2125 | * pending. | ||
2126 | */ | ||
2127 | if (!per_cpu(rcu_idle_first_pass, cpu) && | ||
2128 | (per_cpu(rcu_nonlazy_posted, cpu) == | ||
2129 | per_cpu(rcu_nonlazy_posted_snap, cpu))) { | ||
2130 | if (rcu_cpu_has_callbacks(cpu)) { | ||
2131 | tp = &per_cpu(rcu_idle_gp_timer, cpu); | ||
2132 | mod_timer_pinned(tp, per_cpu(rcu_idle_gp_timer_expires, cpu)); | ||
2133 | } | ||
2134 | return; | ||
2135 | } | ||
2136 | per_cpu(rcu_idle_first_pass, cpu) = 0; | ||
2137 | per_cpu(rcu_nonlazy_posted_snap, cpu) = | ||
2138 | per_cpu(rcu_nonlazy_posted, cpu) - 1; | ||
2139 | |||
2111 | /* | 2140 | /* |
2112 | * If there are no callbacks on this CPU, enter dyntick-idle mode. | 2141 | * If there are no callbacks on this CPU, enter dyntick-idle mode. |
2113 | * Also reset state to avoid prejudicing later attempts. | 2142 | * Also reset state to avoid prejudicing later attempts. |
@@ -2140,11 +2169,15 @@ static void rcu_prepare_for_idle(int cpu) | |||
2140 | per_cpu(rcu_dyntick_drain, cpu) = 0; | 2169 | per_cpu(rcu_dyntick_drain, cpu) = 0; |
2141 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; | 2170 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; |
2142 | if (rcu_cpu_has_nonlazy_callbacks(cpu)) | 2171 | if (rcu_cpu_has_nonlazy_callbacks(cpu)) |
2143 | hrtimer_start(&per_cpu(rcu_idle_gp_timer, cpu), | 2172 | per_cpu(rcu_idle_gp_timer_expires, cpu) = |
2144 | rcu_idle_gp_wait, HRTIMER_MODE_REL); | 2173 | jiffies + RCU_IDLE_GP_DELAY; |
2145 | else | 2174 | else |
2146 | hrtimer_start(&per_cpu(rcu_idle_gp_timer, cpu), | 2175 | per_cpu(rcu_idle_gp_timer_expires, cpu) = |
2147 | rcu_idle_lazy_gp_wait, HRTIMER_MODE_REL); | 2176 | jiffies + RCU_IDLE_LAZY_GP_DELAY; |
2177 | tp = &per_cpu(rcu_idle_gp_timer, cpu); | ||
2178 | mod_timer_pinned(tp, per_cpu(rcu_idle_gp_timer_expires, cpu)); | ||
2179 | per_cpu(rcu_nonlazy_posted_snap, cpu) = | ||
2180 | per_cpu(rcu_nonlazy_posted, cpu); | ||
2148 | return; /* Nothing more to do immediately. */ | 2181 | return; /* Nothing more to do immediately. */ |
2149 | } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) { | 2182 | } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) { |
2150 | /* We have hit the limit, so time to give up. */ | 2183 | /* We have hit the limit, so time to give up. */ |
@@ -2184,6 +2217,19 @@ static void rcu_prepare_for_idle(int cpu) | |||
2184 | trace_rcu_prep_idle("Callbacks drained"); | 2217 | trace_rcu_prep_idle("Callbacks drained"); |
2185 | } | 2218 | } |
2186 | 2219 | ||
2220 | /* | ||
2221 | * Keep a running count of the number of non-lazy callbacks posted | ||
2222 | * on this CPU. This running counter (which is never decremented) allows | ||
2223 | * rcu_prepare_for_idle() to detect when something out of the idle loop | ||
2224 | * posts a callback, even if an equal number of callbacks are invoked. | ||
2225 | * Of course, callbacks should only be posted from within a trace event | ||
2226 | * designed to be called from idle or from within RCU_NONIDLE(). | ||
2227 | */ | ||
2228 | static void rcu_idle_count_callbacks_posted(void) | ||
2229 | { | ||
2230 | __this_cpu_add(rcu_nonlazy_posted, 1); | ||
2231 | } | ||
2232 | |||
2187 | #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ | 2233 | #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ |
2188 | 2234 | ||
2189 | #ifdef CONFIG_RCU_CPU_STALL_INFO | 2235 | #ifdef CONFIG_RCU_CPU_STALL_INFO |
@@ -2192,14 +2238,12 @@ static void rcu_prepare_for_idle(int cpu) | |||
2192 | 2238 | ||
2193 | static void print_cpu_stall_fast_no_hz(char *cp, int cpu) | 2239 | static void print_cpu_stall_fast_no_hz(char *cp, int cpu) |
2194 | { | 2240 | { |
2195 | struct hrtimer *hrtp = &per_cpu(rcu_idle_gp_timer, cpu); | 2241 | struct timer_list *tltp = &per_cpu(rcu_idle_gp_timer, cpu); |
2196 | 2242 | ||
2197 | sprintf(cp, "drain=%d %c timer=%lld", | 2243 | sprintf(cp, "drain=%d %c timer=%lu", |
2198 | per_cpu(rcu_dyntick_drain, cpu), | 2244 | per_cpu(rcu_dyntick_drain, cpu), |
2199 | per_cpu(rcu_dyntick_holdoff, cpu) == jiffies ? 'H' : '.', | 2245 | per_cpu(rcu_dyntick_holdoff, cpu) == jiffies ? 'H' : '.', |
2200 | hrtimer_active(hrtp) | 2246 | timer_pending(tltp) ? tltp->expires - jiffies : -1); |
2201 | ? ktime_to_us(hrtimer_get_remaining(hrtp)) | ||
2202 | : -1); | ||
2203 | } | 2247 | } |
2204 | 2248 | ||
2205 | #else /* #ifdef CONFIG_RCU_FAST_NO_HZ */ | 2249 | #else /* #ifdef CONFIG_RCU_FAST_NO_HZ */ |
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index ed459edeff43..d4bc16ddd1d4 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c | |||
@@ -271,13 +271,13 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) | |||
271 | 271 | ||
272 | gpnum = rsp->gpnum; | 272 | gpnum = rsp->gpnum; |
273 | seq_printf(m, "c=%lu g=%lu s=%d jfq=%ld j=%x " | 273 | seq_printf(m, "c=%lu g=%lu s=%d jfq=%ld j=%x " |
274 | "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu\n", | 274 | "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n", |
275 | rsp->completed, gpnum, rsp->fqs_state, | 275 | rsp->completed, gpnum, rsp->fqs_state, |
276 | (long)(rsp->jiffies_force_qs - jiffies), | 276 | (long)(rsp->jiffies_force_qs - jiffies), |
277 | (int)(jiffies & 0xffff), | 277 | (int)(jiffies & 0xffff), |
278 | rsp->n_force_qs, rsp->n_force_qs_ngp, | 278 | rsp->n_force_qs, rsp->n_force_qs_ngp, |
279 | rsp->n_force_qs - rsp->n_force_qs_ngp, | 279 | rsp->n_force_qs - rsp->n_force_qs_ngp, |
280 | rsp->n_force_qs_lh); | 280 | rsp->n_force_qs_lh, rsp->qlen_lazy, rsp->qlen); |
281 | for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < NUM_RCU_NODES; rnp++) { | 281 | for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < NUM_RCU_NODES; rnp++) { |
282 | if (rnp->level != level) { | 282 | if (rnp->level != level) { |
283 | seq_puts(m, "\n"); | 283 | seq_puts(m, "\n"); |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index e5212ae294f6..eb4131b8ad60 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -2083,6 +2083,7 @@ context_switch(struct rq *rq, struct task_struct *prev, | |||
2083 | #endif | 2083 | #endif |
2084 | 2084 | ||
2085 | /* Here we just switch the register state and the stack. */ | 2085 | /* Here we just switch the register state and the stack. */ |
2086 | rcu_switch_from(prev); | ||
2086 | switch_to(prev, next, prev); | 2087 | switch_to(prev, next, prev); |
2087 | 2088 | ||
2088 | barrier(); | 2089 | barrier(); |
diff --git a/kernel/srcu.c b/kernel/srcu.c index ba35f3a4a1f4..2095be3318d5 100644 --- a/kernel/srcu.c +++ b/kernel/srcu.c | |||
@@ -34,10 +34,77 @@ | |||
34 | #include <linux/delay.h> | 34 | #include <linux/delay.h> |
35 | #include <linux/srcu.h> | 35 | #include <linux/srcu.h> |
36 | 36 | ||
37 | /* | ||
38 | * Initialize an rcu_batch structure to empty. | ||
39 | */ | ||
40 | static inline void rcu_batch_init(struct rcu_batch *b) | ||
41 | { | ||
42 | b->head = NULL; | ||
43 | b->tail = &b->head; | ||
44 | } | ||
45 | |||
46 | /* | ||
47 | * Enqueue a callback onto the tail of the specified rcu_batch structure. | ||
48 | */ | ||
49 | static inline void rcu_batch_queue(struct rcu_batch *b, struct rcu_head *head) | ||
50 | { | ||
51 | *b->tail = head; | ||
52 | b->tail = &head->next; | ||
53 | } | ||
54 | |||
55 | /* | ||
56 | * Is the specified rcu_batch structure empty? | ||
57 | */ | ||
58 | static inline bool rcu_batch_empty(struct rcu_batch *b) | ||
59 | { | ||
60 | return b->tail == &b->head; | ||
61 | } | ||
62 | |||
63 | /* | ||
64 | * Remove the callback at the head of the specified rcu_batch structure | ||
65 | * and return a pointer to it, or return NULL if the structure is empty. | ||
66 | */ | ||
67 | static inline struct rcu_head *rcu_batch_dequeue(struct rcu_batch *b) | ||
68 | { | ||
69 | struct rcu_head *head; | ||
70 | |||
71 | if (rcu_batch_empty(b)) | ||
72 | return NULL; | ||
73 | |||
74 | head = b->head; | ||
75 | b->head = head->next; | ||
76 | if (b->tail == &head->next) | ||
77 | rcu_batch_init(b); | ||
78 | |||
79 | return head; | ||
80 | } | ||
81 | |||
82 | /* | ||
83 | * Move all callbacks from the rcu_batch structure specified by "from" to | ||
84 | * the structure specified by "to". | ||
85 | */ | ||
86 | static inline void rcu_batch_move(struct rcu_batch *to, struct rcu_batch *from) | ||
87 | { | ||
88 | if (!rcu_batch_empty(from)) { | ||
89 | *to->tail = from->head; | ||
90 | to->tail = from->tail; | ||
91 | rcu_batch_init(from); | ||
92 | } | ||
93 | } | ||
94 | |||
95 | /* single-thread state-machine */ | ||
96 | static void process_srcu(struct work_struct *work); | ||
97 | |||
37 | static int init_srcu_struct_fields(struct srcu_struct *sp) | 98 | static int init_srcu_struct_fields(struct srcu_struct *sp) |
38 | { | 99 | { |
39 | sp->completed = 0; | 100 | sp->completed = 0; |
40 | mutex_init(&sp->mutex); | 101 | spin_lock_init(&sp->queue_lock); |
102 | sp->running = false; | ||
103 | rcu_batch_init(&sp->batch_queue); | ||
104 | rcu_batch_init(&sp->batch_check0); | ||
105 | rcu_batch_init(&sp->batch_check1); | ||
106 | rcu_batch_init(&sp->batch_done); | ||
107 | INIT_DELAYED_WORK(&sp->work, process_srcu); | ||
41 | sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array); | 108 | sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array); |
42 | return sp->per_cpu_ref ? 0 : -ENOMEM; | 109 | return sp->per_cpu_ref ? 0 : -ENOMEM; |
43 | } | 110 | } |
@@ -73,21 +140,116 @@ EXPORT_SYMBOL_GPL(init_srcu_struct); | |||
73 | #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | 140 | #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ |
74 | 141 | ||
75 | /* | 142 | /* |
76 | * srcu_readers_active_idx -- returns approximate number of readers | 143 | * Returns approximate total of the readers' ->seq[] values for the |
77 | * active on the specified rank of per-CPU counters. | 144 | * rank of per-CPU counters specified by idx. |
78 | */ | 145 | */ |
146 | static unsigned long srcu_readers_seq_idx(struct srcu_struct *sp, int idx) | ||
147 | { | ||
148 | int cpu; | ||
149 | unsigned long sum = 0; | ||
150 | unsigned long t; | ||
79 | 151 | ||
80 | static int srcu_readers_active_idx(struct srcu_struct *sp, int idx) | 152 | for_each_possible_cpu(cpu) { |
153 | t = ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->seq[idx]); | ||
154 | sum += t; | ||
155 | } | ||
156 | return sum; | ||
157 | } | ||
158 | |||
159 | /* | ||
160 | * Returns approximate number of readers active on the specified rank | ||
161 | * of the per-CPU ->c[] counters. | ||
162 | */ | ||
163 | static unsigned long srcu_readers_active_idx(struct srcu_struct *sp, int idx) | ||
81 | { | 164 | { |
82 | int cpu; | 165 | int cpu; |
83 | int sum; | 166 | unsigned long sum = 0; |
167 | unsigned long t; | ||
84 | 168 | ||
85 | sum = 0; | 169 | for_each_possible_cpu(cpu) { |
86 | for_each_possible_cpu(cpu) | 170 | t = ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[idx]); |
87 | sum += per_cpu_ptr(sp->per_cpu_ref, cpu)->c[idx]; | 171 | sum += t; |
172 | } | ||
88 | return sum; | 173 | return sum; |
89 | } | 174 | } |
90 | 175 | ||
176 | /* | ||
177 | * Return true if the number of pre-existing readers is determined to | ||
178 | * be stably zero. An example unstable zero can occur if the call | ||
179 | * to srcu_readers_active_idx() misses an __srcu_read_lock() increment, | ||
180 | * but due to task migration, sees the corresponding __srcu_read_unlock() | ||
181 | * decrement. This can happen because srcu_readers_active_idx() takes | ||
182 | * time to sum the array, and might in fact be interrupted or preempted | ||
183 | * partway through the summation. | ||
184 | */ | ||
185 | static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx) | ||
186 | { | ||
187 | unsigned long seq; | ||
188 | |||
189 | seq = srcu_readers_seq_idx(sp, idx); | ||
190 | |||
191 | /* | ||
192 | * The following smp_mb() A pairs with the smp_mb() B located in | ||
193 | * __srcu_read_lock(). This pairing ensures that if an | ||
194 | * __srcu_read_lock() increments its counter after the summation | ||
195 | * in srcu_readers_active_idx(), then the corresponding SRCU read-side | ||
196 | * critical section will see any changes made prior to the start | ||
197 | * of the current SRCU grace period. | ||
198 | * | ||
199 | * Also, if the above call to srcu_readers_seq_idx() saw the | ||
200 | * increment of ->seq[], then the call to srcu_readers_active_idx() | ||
201 | * must see the increment of ->c[]. | ||
202 | */ | ||
203 | smp_mb(); /* A */ | ||
204 | |||
205 | /* | ||
206 | * Note that srcu_readers_active_idx() can incorrectly return | ||
207 | * zero even though there is a pre-existing reader throughout. | ||
208 | * To see this, suppose that task A is in a very long SRCU | ||
209 | * read-side critical section that started on CPU 0, and that | ||
210 | * no other reader exists, so that the sum of the counters | ||
211 | * is equal to one. Then suppose that task B starts executing | ||
212 | * srcu_readers_active_idx(), summing up to CPU 1, and then that | ||
213 | * task C starts reading on CPU 0, so that its increment is not | ||
214 | * summed, but finishes reading on CPU 2, so that its decrement | ||
215 | * -is- summed. Then when task B completes its sum, it will | ||
216 | * incorrectly get zero, despite the fact that task A has been | ||
217 | * in its SRCU read-side critical section the whole time. | ||
218 | * | ||
219 | * We therefore do a validation step should srcu_readers_active_idx() | ||
220 | * return zero. | ||
221 | */ | ||
222 | if (srcu_readers_active_idx(sp, idx) != 0) | ||
223 | return false; | ||
224 | |||
225 | /* | ||
226 | * The remainder of this function is the validation step. | ||
227 | * The following smp_mb() D pairs with the smp_mb() C in | ||
228 | * __srcu_read_unlock(). If the __srcu_read_unlock() was seen | ||
229 | * by srcu_readers_active_idx() above, then any destructive | ||
230 | * operation performed after the grace period will happen after | ||
231 | * the corresponding SRCU read-side critical section. | ||
232 | * | ||
233 | * Note that there can be at most NR_CPUS worth of readers using | ||
234 | * the old index, which is not enough to overflow even a 32-bit | ||
235 | * integer. (Yes, this does mean that systems having more than | ||
236 | * a billion or so CPUs need to be 64-bit systems.) Therefore, | ||
237 | * the sum of the ->seq[] counters cannot possibly overflow. | ||
238 | * Therefore, the only way that the return values of the two | ||
239 | * calls to srcu_readers_seq_idx() can be equal is if there were | ||
240 | * no increments of the corresponding rank of ->seq[] counts | ||
241 | * in the interim. But the missed-increment scenario laid out | ||
242 | * above includes an increment of the ->seq[] counter by | ||
243 | * the corresponding __srcu_read_lock(). Therefore, if this | ||
244 | * scenario occurs, the return values from the two calls to | ||
245 | * srcu_readers_seq_idx() will differ, and thus the validation | ||
246 | * step below suffices. | ||
247 | */ | ||
248 | smp_mb(); /* D */ | ||
249 | |||
250 | return srcu_readers_seq_idx(sp, idx) == seq; | ||
251 | } | ||
252 | |||
91 | /** | 253 | /** |
92 | * srcu_readers_active - returns approximate number of readers. | 254 | * srcu_readers_active - returns approximate number of readers. |
93 | * @sp: which srcu_struct to count active readers (holding srcu_read_lock). | 255 | * @sp: which srcu_struct to count active readers (holding srcu_read_lock). |
@@ -98,7 +260,14 @@ static int srcu_readers_active_idx(struct srcu_struct *sp, int idx) | |||
98 | */ | 260 | */ |
99 | static int srcu_readers_active(struct srcu_struct *sp) | 261 | static int srcu_readers_active(struct srcu_struct *sp) |
100 | { | 262 | { |
101 | return srcu_readers_active_idx(sp, 0) + srcu_readers_active_idx(sp, 1); | 263 | int cpu; |
264 | unsigned long sum = 0; | ||
265 | |||
266 | for_each_possible_cpu(cpu) { | ||
267 | sum += ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[0]); | ||
268 | sum += ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[1]); | ||
269 | } | ||
270 | return sum; | ||
102 | } | 271 | } |
103 | 272 | ||
104 | /** | 273 | /** |
@@ -131,10 +300,11 @@ int __srcu_read_lock(struct srcu_struct *sp) | |||
131 | int idx; | 300 | int idx; |
132 | 301 | ||
133 | preempt_disable(); | 302 | preempt_disable(); |
134 | idx = sp->completed & 0x1; | 303 | idx = rcu_dereference_index_check(sp->completed, |
135 | barrier(); /* ensure compiler looks -once- at sp->completed. */ | 304 | rcu_read_lock_sched_held()) & 0x1; |
136 | per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]++; | 305 | ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->c[idx]) += 1; |
137 | srcu_barrier(); /* ensure compiler won't misorder critical section. */ | 306 | smp_mb(); /* B */ /* Avoid leaking the critical section. */ |
307 | ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->seq[idx]) += 1; | ||
138 | preempt_enable(); | 308 | preempt_enable(); |
139 | return idx; | 309 | return idx; |
140 | } | 310 | } |
@@ -149,8 +319,8 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock); | |||
149 | void __srcu_read_unlock(struct srcu_struct *sp, int idx) | 319 | void __srcu_read_unlock(struct srcu_struct *sp, int idx) |
150 | { | 320 | { |
151 | preempt_disable(); | 321 | preempt_disable(); |
152 | srcu_barrier(); /* ensure compiler won't misorder critical section. */ | 322 | smp_mb(); /* C */ /* Avoid leaking the critical section. */ |
153 | per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]--; | 323 | ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->c[idx]) -= 1; |
154 | preempt_enable(); | 324 | preempt_enable(); |
155 | } | 325 | } |
156 | EXPORT_SYMBOL_GPL(__srcu_read_unlock); | 326 | EXPORT_SYMBOL_GPL(__srcu_read_unlock); |
@@ -163,106 +333,119 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock); | |||
163 | * we repeatedly block for 1-millisecond time periods. This approach | 333 | * we repeatedly block for 1-millisecond time periods. This approach |
164 | * has done well in testing, so there is no need for a config parameter. | 334 | * has done well in testing, so there is no need for a config parameter. |
165 | */ | 335 | */ |
166 | #define SYNCHRONIZE_SRCU_READER_DELAY 10 | 336 | #define SRCU_RETRY_CHECK_DELAY 5 |
337 | #define SYNCHRONIZE_SRCU_TRYCOUNT 2 | ||
338 | #define SYNCHRONIZE_SRCU_EXP_TRYCOUNT 12 | ||
167 | 339 | ||
168 | /* | 340 | /* |
169 | * Helper function for synchronize_srcu() and synchronize_srcu_expedited(). | 341 | * @@@ Wait until all pre-existing readers complete. Such readers |
342 | * will have used the index specified by "idx". | ||
343 | * the caller should ensures the ->completed is not changed while checking | ||
344 | * and idx = (->completed & 1) ^ 1 | ||
170 | */ | 345 | */ |
171 | static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void)) | 346 | static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount) |
172 | { | 347 | { |
173 | int idx; | 348 | for (;;) { |
174 | 349 | if (srcu_readers_active_idx_check(sp, idx)) | |
175 | rcu_lockdep_assert(!lock_is_held(&sp->dep_map) && | 350 | return true; |
176 | !lock_is_held(&rcu_bh_lock_map) && | 351 | if (--trycount <= 0) |
177 | !lock_is_held(&rcu_lock_map) && | 352 | return false; |
178 | !lock_is_held(&rcu_sched_lock_map), | 353 | udelay(SRCU_RETRY_CHECK_DELAY); |
179 | "Illegal synchronize_srcu() in same-type SRCU (or RCU) read-side critical section"); | 354 | } |
180 | 355 | } | |
181 | idx = sp->completed; | ||
182 | mutex_lock(&sp->mutex); | ||
183 | 356 | ||
184 | /* | 357 | /* |
185 | * Check to see if someone else did the work for us while we were | 358 | * Increment the ->completed counter so that future SRCU readers will |
186 | * waiting to acquire the lock. We need -two- advances of | 359 | * use the other rank of the ->c[] and ->seq[] arrays. This allows |
187 | * the counter, not just one. If there was but one, we might have | 360 | * us to wait for pre-existing readers in a starvation-free manner. |
188 | * shown up -after- our helper's first synchronize_sched(), thus | 361 | */ |
189 | * having failed to prevent CPU-reordering races with concurrent | 362 | static void srcu_flip(struct srcu_struct *sp) |
190 | * srcu_read_unlock()s on other CPUs (see comment below). So we | 363 | { |
191 | * either (1) wait for two or (2) supply the second ourselves. | 364 | sp->completed++; |
192 | */ | 365 | } |
193 | 366 | ||
194 | if ((sp->completed - idx) >= 2) { | 367 | /* |
195 | mutex_unlock(&sp->mutex); | 368 | * Enqueue an SRCU callback on the specified srcu_struct structure, |
196 | return; | 369 | * initiating grace-period processing if it is not already running. |
370 | */ | ||
371 | void call_srcu(struct srcu_struct *sp, struct rcu_head *head, | ||
372 | void (*func)(struct rcu_head *head)) | ||
373 | { | ||
374 | unsigned long flags; | ||
375 | |||
376 | head->next = NULL; | ||
377 | head->func = func; | ||
378 | spin_lock_irqsave(&sp->queue_lock, flags); | ||
379 | rcu_batch_queue(&sp->batch_queue, head); | ||
380 | if (!sp->running) { | ||
381 | sp->running = true; | ||
382 | queue_delayed_work(system_nrt_wq, &sp->work, 0); | ||
197 | } | 383 | } |
384 | spin_unlock_irqrestore(&sp->queue_lock, flags); | ||
385 | } | ||
386 | EXPORT_SYMBOL_GPL(call_srcu); | ||
198 | 387 | ||
199 | sync_func(); /* Force memory barrier on all CPUs. */ | 388 | struct rcu_synchronize { |
389 | struct rcu_head head; | ||
390 | struct completion completion; | ||
391 | }; | ||
200 | 392 | ||
201 | /* | 393 | /* |
202 | * The preceding synchronize_sched() ensures that any CPU that | 394 | * Awaken the corresponding synchronize_srcu() instance now that a |
203 | * sees the new value of sp->completed will also see any preceding | 395 | * grace period has elapsed. |
204 | * changes to data structures made by this CPU. This prevents | 396 | */ |
205 | * some other CPU from reordering the accesses in its SRCU | 397 | static void wakeme_after_rcu(struct rcu_head *head) |
206 | * read-side critical section to precede the corresponding | 398 | { |
207 | * srcu_read_lock() -- ensuring that such references will in | 399 | struct rcu_synchronize *rcu; |
208 | * fact be protected. | ||
209 | * | ||
210 | * So it is now safe to do the flip. | ||
211 | */ | ||
212 | 400 | ||
213 | idx = sp->completed & 0x1; | 401 | rcu = container_of(head, struct rcu_synchronize, head); |
214 | sp->completed++; | 402 | complete(&rcu->completion); |
403 | } | ||
215 | 404 | ||
216 | sync_func(); /* Force memory barrier on all CPUs. */ | 405 | static void srcu_advance_batches(struct srcu_struct *sp, int trycount); |
406 | static void srcu_reschedule(struct srcu_struct *sp); | ||
217 | 407 | ||
218 | /* | 408 | /* |
219 | * At this point, because of the preceding synchronize_sched(), | 409 | * Helper function for synchronize_srcu() and synchronize_srcu_expedited(). |
220 | * all srcu_read_lock() calls using the old counters have completed. | 410 | */ |
221 | * Their corresponding critical sections might well be still | 411 | static void __synchronize_srcu(struct srcu_struct *sp, int trycount) |
222 | * executing, but the srcu_read_lock() primitives themselves | 412 | { |
223 | * will have finished executing. We initially give readers | 413 | struct rcu_synchronize rcu; |
224 | * an arbitrarily chosen 10 microseconds to get out of their | 414 | struct rcu_head *head = &rcu.head; |
225 | * SRCU read-side critical sections, then loop waiting 1/HZ | 415 | bool done = false; |
226 | * seconds per iteration. The 10-microsecond value has done | ||
227 | * very well in testing. | ||
228 | */ | ||
229 | |||
230 | if (srcu_readers_active_idx(sp, idx)) | ||
231 | udelay(SYNCHRONIZE_SRCU_READER_DELAY); | ||
232 | while (srcu_readers_active_idx(sp, idx)) | ||
233 | schedule_timeout_interruptible(1); | ||
234 | 416 | ||
235 | sync_func(); /* Force memory barrier on all CPUs. */ | 417 | rcu_lockdep_assert(!lock_is_held(&sp->dep_map) && |
418 | !lock_is_held(&rcu_bh_lock_map) && | ||
419 | !lock_is_held(&rcu_lock_map) && | ||
420 | !lock_is_held(&rcu_sched_lock_map), | ||
421 | "Illegal synchronize_srcu() in same-type SRCU (or RCU) read-side critical section"); | ||
236 | 422 | ||
237 | /* | 423 | init_completion(&rcu.completion); |
238 | * The preceding synchronize_sched() forces all srcu_read_unlock() | 424 | |
239 | * primitives that were executing concurrently with the preceding | 425 | head->next = NULL; |
240 | * for_each_possible_cpu() loop to have completed by this point. | 426 | head->func = wakeme_after_rcu; |
241 | * More importantly, it also forces the corresponding SRCU read-side | 427 | spin_lock_irq(&sp->queue_lock); |
242 | * critical sections to have also completed, and the corresponding | 428 | if (!sp->running) { |
243 | * references to SRCU-protected data items to be dropped. | 429 | /* steal the processing owner */ |
244 | * | 430 | sp->running = true; |
245 | * Note: | 431 | rcu_batch_queue(&sp->batch_check0, head); |
246 | * | 432 | spin_unlock_irq(&sp->queue_lock); |
247 | * Despite what you might think at first glance, the | 433 | |
248 | * preceding synchronize_sched() -must- be within the | 434 | srcu_advance_batches(sp, trycount); |
249 | * critical section ended by the following mutex_unlock(). | 435 | if (!rcu_batch_empty(&sp->batch_done)) { |
250 | * Otherwise, a task taking the early exit can race | 436 | BUG_ON(sp->batch_done.head != head); |
251 | * with a srcu_read_unlock(), which might have executed | 437 | rcu_batch_dequeue(&sp->batch_done); |
252 | * just before the preceding srcu_readers_active() check, | 438 | done = true; |
253 | * and whose CPU might have reordered the srcu_read_unlock() | 439 | } |
254 | * with the preceding critical section. In this case, there | 440 | /* give the processing owner to work_struct */ |
255 | * is nothing preventing the synchronize_sched() task that is | 441 | srcu_reschedule(sp); |
256 | * taking the early exit from freeing a data structure that | 442 | } else { |
257 | * is still being referenced (out of order) by the task | 443 | rcu_batch_queue(&sp->batch_queue, head); |
258 | * doing the srcu_read_unlock(). | 444 | spin_unlock_irq(&sp->queue_lock); |
259 | * | 445 | } |
260 | * Alternatively, the comparison with "2" on the early exit | ||
261 | * could be changed to "3", but this increases synchronize_srcu() | ||
262 | * latency for bulk loads. So the current code is preferred. | ||
263 | */ | ||
264 | 446 | ||
265 | mutex_unlock(&sp->mutex); | 447 | if (!done) |
448 | wait_for_completion(&rcu.completion); | ||
266 | } | 449 | } |
267 | 450 | ||
268 | /** | 451 | /** |
@@ -281,7 +464,7 @@ static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void)) | |||
281 | */ | 464 | */ |
282 | void synchronize_srcu(struct srcu_struct *sp) | 465 | void synchronize_srcu(struct srcu_struct *sp) |
283 | { | 466 | { |
284 | __synchronize_srcu(sp, synchronize_sched); | 467 | __synchronize_srcu(sp, SYNCHRONIZE_SRCU_TRYCOUNT); |
285 | } | 468 | } |
286 | EXPORT_SYMBOL_GPL(synchronize_srcu); | 469 | EXPORT_SYMBOL_GPL(synchronize_srcu); |
287 | 470 | ||
@@ -289,18 +472,11 @@ EXPORT_SYMBOL_GPL(synchronize_srcu); | |||
289 | * synchronize_srcu_expedited - Brute-force SRCU grace period | 472 | * synchronize_srcu_expedited - Brute-force SRCU grace period |
290 | * @sp: srcu_struct with which to synchronize. | 473 | * @sp: srcu_struct with which to synchronize. |
291 | * | 474 | * |
292 | * Wait for an SRCU grace period to elapse, but use a "big hammer" | 475 | * Wait for an SRCU grace period to elapse, but be more aggressive about |
293 | * approach to force the grace period to end quickly. This consumes | 476 | * spinning rather than blocking when waiting. |
294 | * significant time on all CPUs and is unfriendly to real-time workloads, | ||
295 | * so is thus not recommended for any sort of common-case code. In fact, | ||
296 | * if you are using synchronize_srcu_expedited() in a loop, please | ||
297 | * restructure your code to batch your updates, and then use a single | ||
298 | * synchronize_srcu() instead. | ||
299 | * | 477 | * |
300 | * Note that it is illegal to call this function while holding any lock | 478 | * Note that it is illegal to call this function while holding any lock |
301 | * that is acquired by a CPU-hotplug notifier. And yes, it is also illegal | 479 | * that is acquired by a CPU-hotplug notifier. It is also illegal to call |
302 | * to call this function from a CPU-hotplug notifier. Failing to observe | ||
303 | * these restriction will result in deadlock. It is also illegal to call | ||
304 | * synchronize_srcu_expedited() from the corresponding SRCU read-side | 480 | * synchronize_srcu_expedited() from the corresponding SRCU read-side |
305 | * critical section; doing so will result in deadlock. However, it is | 481 | * critical section; doing so will result in deadlock. However, it is |
306 | * perfectly legal to call synchronize_srcu_expedited() on one srcu_struct | 482 | * perfectly legal to call synchronize_srcu_expedited() on one srcu_struct |
@@ -309,20 +485,166 @@ EXPORT_SYMBOL_GPL(synchronize_srcu); | |||
309 | */ | 485 | */ |
310 | void synchronize_srcu_expedited(struct srcu_struct *sp) | 486 | void synchronize_srcu_expedited(struct srcu_struct *sp) |
311 | { | 487 | { |
312 | __synchronize_srcu(sp, synchronize_sched_expedited); | 488 | __synchronize_srcu(sp, SYNCHRONIZE_SRCU_EXP_TRYCOUNT); |
313 | } | 489 | } |
314 | EXPORT_SYMBOL_GPL(synchronize_srcu_expedited); | 490 | EXPORT_SYMBOL_GPL(synchronize_srcu_expedited); |
315 | 491 | ||
316 | /** | 492 | /** |
493 | * srcu_barrier - Wait until all in-flight call_srcu() callbacks complete. | ||
494 | */ | ||
495 | void srcu_barrier(struct srcu_struct *sp) | ||
496 | { | ||
497 | synchronize_srcu(sp); | ||
498 | } | ||
499 | EXPORT_SYMBOL_GPL(srcu_barrier); | ||
500 | |||
501 | /** | ||
317 | * srcu_batches_completed - return batches completed. | 502 | * srcu_batches_completed - return batches completed. |
318 | * @sp: srcu_struct on which to report batch completion. | 503 | * @sp: srcu_struct on which to report batch completion. |
319 | * | 504 | * |
320 | * Report the number of batches, correlated with, but not necessarily | 505 | * Report the number of batches, correlated with, but not necessarily |
321 | * precisely the same as, the number of grace periods that have elapsed. | 506 | * precisely the same as, the number of grace periods that have elapsed. |
322 | */ | 507 | */ |
323 | |||
324 | long srcu_batches_completed(struct srcu_struct *sp) | 508 | long srcu_batches_completed(struct srcu_struct *sp) |
325 | { | 509 | { |
326 | return sp->completed; | 510 | return sp->completed; |
327 | } | 511 | } |
328 | EXPORT_SYMBOL_GPL(srcu_batches_completed); | 512 | EXPORT_SYMBOL_GPL(srcu_batches_completed); |
513 | |||
514 | #define SRCU_CALLBACK_BATCH 10 | ||
515 | #define SRCU_INTERVAL 1 | ||
516 | |||
517 | /* | ||
518 | * Move any new SRCU callbacks to the first stage of the SRCU grace | ||
519 | * period pipeline. | ||
520 | */ | ||
521 | static void srcu_collect_new(struct srcu_struct *sp) | ||
522 | { | ||
523 | if (!rcu_batch_empty(&sp->batch_queue)) { | ||
524 | spin_lock_irq(&sp->queue_lock); | ||
525 | rcu_batch_move(&sp->batch_check0, &sp->batch_queue); | ||
526 | spin_unlock_irq(&sp->queue_lock); | ||
527 | } | ||
528 | } | ||
529 | |||
530 | /* | ||
531 | * Core SRCU state machine. Advance callbacks from ->batch_check0 to | ||
532 | * ->batch_check1 and then to ->batch_done as readers drain. | ||
533 | */ | ||
534 | static void srcu_advance_batches(struct srcu_struct *sp, int trycount) | ||
535 | { | ||
536 | int idx = 1 ^ (sp->completed & 1); | ||
537 | |||
538 | /* | ||
539 | * Because readers might be delayed for an extended period after | ||
540 | * fetching ->completed for their index, at any point in time there | ||
541 | * might well be readers using both idx=0 and idx=1. We therefore | ||
542 | * need to wait for readers to clear from both index values before | ||
543 | * invoking a callback. | ||
544 | */ | ||
545 | |||
546 | if (rcu_batch_empty(&sp->batch_check0) && | ||
547 | rcu_batch_empty(&sp->batch_check1)) | ||
548 | return; /* no callbacks need to be advanced */ | ||
549 | |||
550 | if (!try_check_zero(sp, idx, trycount)) | ||
551 | return; /* failed to advance, will try after SRCU_INTERVAL */ | ||
552 | |||
553 | /* | ||
554 | * The callbacks in ->batch_check1 have already done with their | ||
555 | * first zero check and flip back when they were enqueued on | ||
556 | * ->batch_check0 in a previous invocation of srcu_advance_batches(). | ||
557 | * (Presumably try_check_zero() returned false during that | ||
558 | * invocation, leaving the callbacks stranded on ->batch_check1.) | ||
559 | * They are therefore ready to invoke, so move them to ->batch_done. | ||
560 | */ | ||
561 | rcu_batch_move(&sp->batch_done, &sp->batch_check1); | ||
562 | |||
563 | if (rcu_batch_empty(&sp->batch_check0)) | ||
564 | return; /* no callbacks need to be advanced */ | ||
565 | srcu_flip(sp); | ||
566 | |||
567 | /* | ||
568 | * The callbacks in ->batch_check0 just finished their | ||
569 | * first check zero and flip, so move them to ->batch_check1 | ||
570 | * for future checking on the other idx. | ||
571 | */ | ||
572 | rcu_batch_move(&sp->batch_check1, &sp->batch_check0); | ||
573 | |||
574 | /* | ||
575 | * SRCU read-side critical sections are normally short, so check | ||
576 | * at least twice in quick succession after a flip. | ||
577 | */ | ||
578 | trycount = trycount < 2 ? 2 : trycount; | ||
579 | if (!try_check_zero(sp, idx^1, trycount)) | ||
580 | return; /* failed to advance, will try after SRCU_INTERVAL */ | ||
581 | |||
582 | /* | ||
583 | * The callbacks in ->batch_check1 have now waited for all | ||
584 | * pre-existing readers using both idx values. They are therefore | ||
585 | * ready to invoke, so move them to ->batch_done. | ||
586 | */ | ||
587 | rcu_batch_move(&sp->batch_done, &sp->batch_check1); | ||
588 | } | ||
589 | |||
590 | /* | ||
591 | * Invoke a limited number of SRCU callbacks that have passed through | ||
592 | * their grace period. If there are more to do, SRCU will reschedule | ||
593 | * the workqueue. | ||
594 | */ | ||
595 | static void srcu_invoke_callbacks(struct srcu_struct *sp) | ||
596 | { | ||
597 | int i; | ||
598 | struct rcu_head *head; | ||
599 | |||
600 | for (i = 0; i < SRCU_CALLBACK_BATCH; i++) { | ||
601 | head = rcu_batch_dequeue(&sp->batch_done); | ||
602 | if (!head) | ||
603 | break; | ||
604 | local_bh_disable(); | ||
605 | head->func(head); | ||
606 | local_bh_enable(); | ||
607 | } | ||
608 | } | ||
609 | |||
610 | /* | ||
611 | * Finished one round of SRCU grace period. Start another if there are | ||
612 | * more SRCU callbacks queued, otherwise put SRCU into not-running state. | ||
613 | */ | ||
614 | static void srcu_reschedule(struct srcu_struct *sp) | ||
615 | { | ||
616 | bool pending = true; | ||
617 | |||
618 | if (rcu_batch_empty(&sp->batch_done) && | ||
619 | rcu_batch_empty(&sp->batch_check1) && | ||
620 | rcu_batch_empty(&sp->batch_check0) && | ||
621 | rcu_batch_empty(&sp->batch_queue)) { | ||
622 | spin_lock_irq(&sp->queue_lock); | ||
623 | if (rcu_batch_empty(&sp->batch_done) && | ||
624 | rcu_batch_empty(&sp->batch_check1) && | ||
625 | rcu_batch_empty(&sp->batch_check0) && | ||
626 | rcu_batch_empty(&sp->batch_queue)) { | ||
627 | sp->running = false; | ||
628 | pending = false; | ||
629 | } | ||
630 | spin_unlock_irq(&sp->queue_lock); | ||
631 | } | ||
632 | |||
633 | if (pending) | ||
634 | queue_delayed_work(system_nrt_wq, &sp->work, SRCU_INTERVAL); | ||
635 | } | ||
636 | |||
637 | /* | ||
638 | * This is the work-queue function that handles SRCU grace periods. | ||
639 | */ | ||
640 | static void process_srcu(struct work_struct *work) | ||
641 | { | ||
642 | struct srcu_struct *sp; | ||
643 | |||
644 | sp = container_of(work, struct srcu_struct, work.work); | ||
645 | |||
646 | srcu_collect_new(sp); | ||
647 | srcu_advance_batches(sp, 1); | ||
648 | srcu_invoke_callbacks(sp); | ||
649 | srcu_reschedule(sp); | ||
650 | } | ||
diff --git a/kernel/timer.c b/kernel/timer.c index a297ffcf888e..837c552fe838 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -861,7 +861,13 @@ EXPORT_SYMBOL(mod_timer); | |||
861 | * | 861 | * |
862 | * mod_timer_pinned() is a way to update the expire field of an | 862 | * mod_timer_pinned() is a way to update the expire field of an |
863 | * active timer (if the timer is inactive it will be activated) | 863 | * active timer (if the timer is inactive it will be activated) |
864 | * and not allow the timer to be migrated to a different CPU. | 864 | * and to ensure that the timer is scheduled on the current CPU. |
865 | * | ||
866 | * Note that this does not prevent the timer from being migrated | ||
867 | * when the current CPU goes offline. If this is a problem for | ||
868 | * you, use CPU-hotplug notifiers to handle it correctly, for | ||
869 | * example, cancelling the timer when the corresponding CPU goes | ||
870 | * offline. | ||
865 | * | 871 | * |
866 | * mod_timer_pinned(timer, expires) is equivalent to: | 872 | * mod_timer_pinned(timer, expires) is equivalent to: |
867 | * | 873 | * |
diff --git a/lib/list_debug.c b/lib/list_debug.c index 982b850d4e7a..3810b481f940 100644 --- a/lib/list_debug.c +++ b/lib/list_debug.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/list.h> | 10 | #include <linux/list.h> |
11 | #include <linux/bug.h> | 11 | #include <linux/bug.h> |
12 | #include <linux/kernel.h> | 12 | #include <linux/kernel.h> |
13 | #include <linux/rculist.h> | ||
13 | 14 | ||
14 | /* | 15 | /* |
15 | * Insert a new entry between two known consecutive entries. | 16 | * Insert a new entry between two known consecutive entries. |
@@ -75,3 +76,24 @@ void list_del(struct list_head *entry) | |||
75 | entry->prev = LIST_POISON2; | 76 | entry->prev = LIST_POISON2; |
76 | } | 77 | } |
77 | EXPORT_SYMBOL(list_del); | 78 | EXPORT_SYMBOL(list_del); |
79 | |||
80 | /* | ||
81 | * RCU variants. | ||
82 | */ | ||
83 | void __list_add_rcu(struct list_head *new, | ||
84 | struct list_head *prev, struct list_head *next) | ||
85 | { | ||
86 | WARN(next->prev != prev, | ||
87 | "list_add_rcu corruption. next->prev should be " | ||
88 | "prev (%p), but was %p. (next=%p).\n", | ||
89 | prev, next->prev, next); | ||
90 | WARN(prev->next != next, | ||
91 | "list_add_rcu corruption. prev->next should be " | ||
92 | "next (%p), but was %p. (prev=%p).\n", | ||
93 | next, prev->next, prev); | ||
94 | new->next = next; | ||
95 | new->prev = prev; | ||
96 | rcu_assign_pointer(list_next_rcu(prev), new); | ||
97 | next->prev = new; | ||
98 | } | ||
99 | EXPORT_SYMBOL(__list_add_rcu); | ||