diff options
Diffstat (limited to 'kernel/rcutree.c')
-rw-r--r-- | kernel/rcutree.c | 353 |
1 files changed, 229 insertions, 124 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index dd4aea806f8e..ba06207b1dd3 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -36,7 +36,7 @@ | |||
36 | #include <linux/interrupt.h> | 36 | #include <linux/interrupt.h> |
37 | #include <linux/sched.h> | 37 | #include <linux/sched.h> |
38 | #include <linux/nmi.h> | 38 | #include <linux/nmi.h> |
39 | #include <asm/atomic.h> | 39 | #include <linux/atomic.h> |
40 | #include <linux/bitops.h> | 40 | #include <linux/bitops.h> |
41 | #include <linux/module.h> | 41 | #include <linux/module.h> |
42 | #include <linux/completion.h> | 42 | #include <linux/completion.h> |
@@ -47,6 +47,9 @@ | |||
47 | #include <linux/mutex.h> | 47 | #include <linux/mutex.h> |
48 | #include <linux/time.h> | 48 | #include <linux/time.h> |
49 | #include <linux/kernel_stat.h> | 49 | #include <linux/kernel_stat.h> |
50 | #include <linux/wait.h> | ||
51 | #include <linux/kthread.h> | ||
52 | #include <linux/prefetch.h> | ||
50 | 53 | ||
51 | #include "rcutree.h" | 54 | #include "rcutree.h" |
52 | 55 | ||
@@ -79,10 +82,67 @@ DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); | |||
79 | struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); | 82 | struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); |
80 | DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); | 83 | DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); |
81 | 84 | ||
85 | static struct rcu_state *rcu_state; | ||
86 | |||
87 | /* | ||
88 | * The rcu_scheduler_active variable transitions from zero to one just | ||
89 | * before the first task is spawned. So when this variable is zero, RCU | ||
90 | * can assume that there is but one task, allowing RCU to (for example) | ||
91 | * optimized synchronize_sched() to a simple barrier(). When this variable | ||
92 | * is one, RCU must actually do all the hard work required to detect real | ||
93 | * grace periods. This variable is also used to suppress boot-time false | ||
94 | * positives from lockdep-RCU error checking. | ||
95 | */ | ||
82 | int rcu_scheduler_active __read_mostly; | 96 | int rcu_scheduler_active __read_mostly; |
83 | EXPORT_SYMBOL_GPL(rcu_scheduler_active); | 97 | EXPORT_SYMBOL_GPL(rcu_scheduler_active); |
84 | 98 | ||
85 | /* | 99 | /* |
100 | * The rcu_scheduler_fully_active variable transitions from zero to one | ||
101 | * during the early_initcall() processing, which is after the scheduler | ||
102 | * is capable of creating new tasks. So RCU processing (for example, | ||
103 | * creating tasks for RCU priority boosting) must be delayed until after | ||
104 | * rcu_scheduler_fully_active transitions from zero to one. We also | ||
105 | * currently delay invocation of any RCU callbacks until after this point. | ||
106 | * | ||
107 | * It might later prove better for people registering RCU callbacks during | ||
108 | * early boot to take responsibility for these callbacks, but one step at | ||
109 | * a time. | ||
110 | */ | ||
111 | static int rcu_scheduler_fully_active __read_mostly; | ||
112 | |||
113 | #ifdef CONFIG_RCU_BOOST | ||
114 | |||
115 | /* | ||
116 | * Control variables for per-CPU and per-rcu_node kthreads. These | ||
117 | * handle all flavors of RCU. | ||
118 | */ | ||
119 | static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); | ||
120 | DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); | ||
121 | DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu); | ||
122 | DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); | ||
123 | DEFINE_PER_CPU(char, rcu_cpu_has_work); | ||
124 | |||
125 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
126 | |||
127 | static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); | ||
128 | static void invoke_rcu_core(void); | ||
129 | static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); | ||
130 | |||
131 | #define RCU_KTHREAD_PRIO 1 /* RT priority for per-CPU kthreads. */ | ||
132 | |||
133 | /* | ||
134 | * Track the rcutorture test sequence number and the update version | ||
135 | * number within a given test. The rcutorture_testseq is incremented | ||
136 | * on every rcutorture module load and unload, so has an odd value | ||
137 | * when a test is running. The rcutorture_vernum is set to zero | ||
138 | * when rcutorture starts and is incremented on each rcutorture update. | ||
139 | * These variables enable correlating rcutorture output with the | ||
140 | * RCU tracing information. | ||
141 | */ | ||
142 | unsigned long rcutorture_testseq; | ||
143 | unsigned long rcutorture_vernum; | ||
144 | |||
145 | /* | ||
86 | * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s | 146 | * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s |
87 | * permit this function to be invoked without holding the root rcu_node | 147 | * permit this function to be invoked without holding the root rcu_node |
88 | * structure's ->lock, but of course results can be subject to change. | 148 | * structure's ->lock, but of course results can be subject to change. |
@@ -124,11 +184,12 @@ void rcu_note_context_switch(int cpu) | |||
124 | rcu_sched_qs(cpu); | 184 | rcu_sched_qs(cpu); |
125 | rcu_preempt_note_context_switch(cpu); | 185 | rcu_preempt_note_context_switch(cpu); |
126 | } | 186 | } |
187 | EXPORT_SYMBOL_GPL(rcu_note_context_switch); | ||
127 | 188 | ||
128 | #ifdef CONFIG_NO_HZ | 189 | #ifdef CONFIG_NO_HZ |
129 | DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { | 190 | DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { |
130 | .dynticks_nesting = 1, | 191 | .dynticks_nesting = 1, |
131 | .dynticks = 1, | 192 | .dynticks = ATOMIC_INIT(1), |
132 | }; | 193 | }; |
133 | #endif /* #ifdef CONFIG_NO_HZ */ | 194 | #endif /* #ifdef CONFIG_NO_HZ */ |
134 | 195 | ||
@@ -140,10 +201,8 @@ module_param(blimit, int, 0); | |||
140 | module_param(qhimark, int, 0); | 201 | module_param(qhimark, int, 0); |
141 | module_param(qlowmark, int, 0); | 202 | module_param(qlowmark, int, 0); |
142 | 203 | ||
143 | #ifdef CONFIG_RCU_CPU_STALL_DETECTOR | 204 | int rcu_cpu_stall_suppress __read_mostly; |
144 | int rcu_cpu_stall_suppress __read_mostly = RCU_CPU_STALL_SUPPRESS_INIT; | ||
145 | module_param(rcu_cpu_stall_suppress, int, 0644); | 205 | module_param(rcu_cpu_stall_suppress, int, 0644); |
146 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ | ||
147 | 206 | ||
148 | static void force_quiescent_state(struct rcu_state *rsp, int relaxed); | 207 | static void force_quiescent_state(struct rcu_state *rsp, int relaxed); |
149 | static int rcu_pending(int cpu); | 208 | static int rcu_pending(int cpu); |
@@ -176,6 +235,31 @@ void rcu_bh_force_quiescent_state(void) | |||
176 | EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state); | 235 | EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state); |
177 | 236 | ||
178 | /* | 237 | /* |
238 | * Record the number of times rcutorture tests have been initiated and | ||
239 | * terminated. This information allows the debugfs tracing stats to be | ||
240 | * correlated to the rcutorture messages, even when the rcutorture module | ||
241 | * is being repeatedly loaded and unloaded. In other words, we cannot | ||
242 | * store this state in rcutorture itself. | ||
243 | */ | ||
244 | void rcutorture_record_test_transition(void) | ||
245 | { | ||
246 | rcutorture_testseq++; | ||
247 | rcutorture_vernum = 0; | ||
248 | } | ||
249 | EXPORT_SYMBOL_GPL(rcutorture_record_test_transition); | ||
250 | |||
251 | /* | ||
252 | * Record the number of writer passes through the current rcutorture test. | ||
253 | * This is also used to correlate debugfs tracing stats with the rcutorture | ||
254 | * messages. | ||
255 | */ | ||
256 | void rcutorture_record_progress(unsigned long vernum) | ||
257 | { | ||
258 | rcutorture_vernum++; | ||
259 | } | ||
260 | EXPORT_SYMBOL_GPL(rcutorture_record_progress); | ||
261 | |||
262 | /* | ||
179 | * Force a quiescent state for RCU-sched. | 263 | * Force a quiescent state for RCU-sched. |
180 | */ | 264 | */ |
181 | void rcu_sched_force_quiescent_state(void) | 265 | void rcu_sched_force_quiescent_state(void) |
@@ -234,8 +318,8 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp) | |||
234 | return 1; | 318 | return 1; |
235 | } | 319 | } |
236 | 320 | ||
237 | /* If preemptable RCU, no point in sending reschedule IPI. */ | 321 | /* If preemptible RCU, no point in sending reschedule IPI. */ |
238 | if (rdp->preemptable) | 322 | if (rdp->preemptible) |
239 | return 0; | 323 | return 0; |
240 | 324 | ||
241 | /* The CPU is online, so send it a reschedule IPI. */ | 325 | /* The CPU is online, so send it a reschedule IPI. */ |
@@ -264,13 +348,25 @@ void rcu_enter_nohz(void) | |||
264 | unsigned long flags; | 348 | unsigned long flags; |
265 | struct rcu_dynticks *rdtp; | 349 | struct rcu_dynticks *rdtp; |
266 | 350 | ||
267 | smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ | ||
268 | local_irq_save(flags); | 351 | local_irq_save(flags); |
269 | rdtp = &__get_cpu_var(rcu_dynticks); | 352 | rdtp = &__get_cpu_var(rcu_dynticks); |
270 | rdtp->dynticks++; | 353 | if (--rdtp->dynticks_nesting) { |
271 | rdtp->dynticks_nesting--; | 354 | local_irq_restore(flags); |
272 | WARN_ON_ONCE(rdtp->dynticks & 0x1); | 355 | return; |
356 | } | ||
357 | /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ | ||
358 | smp_mb__before_atomic_inc(); /* See above. */ | ||
359 | atomic_inc(&rdtp->dynticks); | ||
360 | smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */ | ||
361 | WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); | ||
273 | local_irq_restore(flags); | 362 | local_irq_restore(flags); |
363 | |||
364 | /* If the interrupt queued a callback, get out of dyntick mode. */ | ||
365 | if (in_irq() && | ||
366 | (__get_cpu_var(rcu_sched_data).nxtlist || | ||
367 | __get_cpu_var(rcu_bh_data).nxtlist || | ||
368 | rcu_preempt_needs_cpu(smp_processor_id()))) | ||
369 | set_need_resched(); | ||
274 | } | 370 | } |
275 | 371 | ||
276 | /* | 372 | /* |
@@ -286,11 +382,16 @@ void rcu_exit_nohz(void) | |||
286 | 382 | ||
287 | local_irq_save(flags); | 383 | local_irq_save(flags); |
288 | rdtp = &__get_cpu_var(rcu_dynticks); | 384 | rdtp = &__get_cpu_var(rcu_dynticks); |
289 | rdtp->dynticks++; | 385 | if (rdtp->dynticks_nesting++) { |
290 | rdtp->dynticks_nesting++; | 386 | local_irq_restore(flags); |
291 | WARN_ON_ONCE(!(rdtp->dynticks & 0x1)); | 387 | return; |
388 | } | ||
389 | smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */ | ||
390 | atomic_inc(&rdtp->dynticks); | ||
391 | /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ | ||
392 | smp_mb__after_atomic_inc(); /* See above. */ | ||
393 | WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); | ||
292 | local_irq_restore(flags); | 394 | local_irq_restore(flags); |
293 | smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ | ||
294 | } | 395 | } |
295 | 396 | ||
296 | /** | 397 | /** |
@@ -304,11 +405,15 @@ void rcu_nmi_enter(void) | |||
304 | { | 405 | { |
305 | struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); | 406 | struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); |
306 | 407 | ||
307 | if (rdtp->dynticks & 0x1) | 408 | if (rdtp->dynticks_nmi_nesting == 0 && |
409 | (atomic_read(&rdtp->dynticks) & 0x1)) | ||
308 | return; | 410 | return; |
309 | rdtp->dynticks_nmi++; | 411 | rdtp->dynticks_nmi_nesting++; |
310 | WARN_ON_ONCE(!(rdtp->dynticks_nmi & 0x1)); | 412 | smp_mb__before_atomic_inc(); /* Force delay from prior write. */ |
311 | smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ | 413 | atomic_inc(&rdtp->dynticks); |
414 | /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ | ||
415 | smp_mb__after_atomic_inc(); /* See above. */ | ||
416 | WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); | ||
312 | } | 417 | } |
313 | 418 | ||
314 | /** | 419 | /** |
@@ -322,11 +427,14 @@ void rcu_nmi_exit(void) | |||
322 | { | 427 | { |
323 | struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); | 428 | struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); |
324 | 429 | ||
325 | if (rdtp->dynticks & 0x1) | 430 | if (rdtp->dynticks_nmi_nesting == 0 || |
431 | --rdtp->dynticks_nmi_nesting != 0) | ||
326 | return; | 432 | return; |
327 | smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ | 433 | /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ |
328 | rdtp->dynticks_nmi++; | 434 | smp_mb__before_atomic_inc(); /* See above. */ |
329 | WARN_ON_ONCE(rdtp->dynticks_nmi & 0x1); | 435 | atomic_inc(&rdtp->dynticks); |
436 | smp_mb__after_atomic_inc(); /* Force delay to next write. */ | ||
437 | WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); | ||
330 | } | 438 | } |
331 | 439 | ||
332 | /** | 440 | /** |
@@ -337,13 +445,7 @@ void rcu_nmi_exit(void) | |||
337 | */ | 445 | */ |
338 | void rcu_irq_enter(void) | 446 | void rcu_irq_enter(void) |
339 | { | 447 | { |
340 | struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); | 448 | rcu_exit_nohz(); |
341 | |||
342 | if (rdtp->dynticks_nesting++) | ||
343 | return; | ||
344 | rdtp->dynticks++; | ||
345 | WARN_ON_ONCE(!(rdtp->dynticks & 0x1)); | ||
346 | smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ | ||
347 | } | 449 | } |
348 | 450 | ||
349 | /** | 451 | /** |
@@ -355,18 +457,7 @@ void rcu_irq_enter(void) | |||
355 | */ | 457 | */ |
356 | void rcu_irq_exit(void) | 458 | void rcu_irq_exit(void) |
357 | { | 459 | { |
358 | struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); | 460 | rcu_enter_nohz(); |
359 | |||
360 | if (--rdtp->dynticks_nesting) | ||
361 | return; | ||
362 | smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ | ||
363 | rdtp->dynticks++; | ||
364 | WARN_ON_ONCE(rdtp->dynticks & 0x1); | ||
365 | |||
366 | /* If the interrupt queued a callback, get out of dyntick mode. */ | ||
367 | if (__this_cpu_read(rcu_sched_data.nxtlist) || | ||
368 | __this_cpu_read(rcu_bh_data.nxtlist)) | ||
369 | set_need_resched(); | ||
370 | } | 461 | } |
371 | 462 | ||
372 | #ifdef CONFIG_SMP | 463 | #ifdef CONFIG_SMP |
@@ -378,19 +469,8 @@ void rcu_irq_exit(void) | |||
378 | */ | 469 | */ |
379 | static int dyntick_save_progress_counter(struct rcu_data *rdp) | 470 | static int dyntick_save_progress_counter(struct rcu_data *rdp) |
380 | { | 471 | { |
381 | int ret; | 472 | rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks); |
382 | int snap; | 473 | return 0; |
383 | int snap_nmi; | ||
384 | |||
385 | snap = rdp->dynticks->dynticks; | ||
386 | snap_nmi = rdp->dynticks->dynticks_nmi; | ||
387 | smp_mb(); /* Order sampling of snap with end of grace period. */ | ||
388 | rdp->dynticks_snap = snap; | ||
389 | rdp->dynticks_nmi_snap = snap_nmi; | ||
390 | ret = ((snap & 0x1) == 0) && ((snap_nmi & 0x1) == 0); | ||
391 | if (ret) | ||
392 | rdp->dynticks_fqs++; | ||
393 | return ret; | ||
394 | } | 474 | } |
395 | 475 | ||
396 | /* | 476 | /* |
@@ -401,16 +481,11 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp) | |||
401 | */ | 481 | */ |
402 | static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) | 482 | static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) |
403 | { | 483 | { |
404 | long curr; | 484 | unsigned long curr; |
405 | long curr_nmi; | 485 | unsigned long snap; |
406 | long snap; | ||
407 | long snap_nmi; | ||
408 | 486 | ||
409 | curr = rdp->dynticks->dynticks; | 487 | curr = (unsigned long)atomic_add_return(0, &rdp->dynticks->dynticks); |
410 | snap = rdp->dynticks_snap; | 488 | snap = (unsigned long)rdp->dynticks_snap; |
411 | curr_nmi = rdp->dynticks->dynticks_nmi; | ||
412 | snap_nmi = rdp->dynticks_nmi_snap; | ||
413 | smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ | ||
414 | 489 | ||
415 | /* | 490 | /* |
416 | * If the CPU passed through or entered a dynticks idle phase with | 491 | * If the CPU passed through or entered a dynticks idle phase with |
@@ -420,8 +495,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) | |||
420 | * read-side critical section that started before the beginning | 495 | * read-side critical section that started before the beginning |
421 | * of the current RCU grace period. | 496 | * of the current RCU grace period. |
422 | */ | 497 | */ |
423 | if ((curr != snap || (curr & 0x1) == 0) && | 498 | if ((curr & 0x1) == 0 || ULONG_CMP_GE(curr, snap + 2)) { |
424 | (curr_nmi != snap_nmi || (curr_nmi & 0x1) == 0)) { | ||
425 | rdp->dynticks_fqs++; | 499 | rdp->dynticks_fqs++; |
426 | return 1; | 500 | return 1; |
427 | } | 501 | } |
@@ -450,8 +524,6 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) | |||
450 | 524 | ||
451 | #endif /* #else #ifdef CONFIG_NO_HZ */ | 525 | #endif /* #else #ifdef CONFIG_NO_HZ */ |
452 | 526 | ||
453 | #ifdef CONFIG_RCU_CPU_STALL_DETECTOR | ||
454 | |||
455 | int rcu_cpu_stall_suppress __read_mostly; | 527 | int rcu_cpu_stall_suppress __read_mostly; |
456 | 528 | ||
457 | static void record_gp_stall_check_time(struct rcu_state *rsp) | 529 | static void record_gp_stall_check_time(struct rcu_state *rsp) |
@@ -537,21 +609,24 @@ static void print_cpu_stall(struct rcu_state *rsp) | |||
537 | 609 | ||
538 | static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) | 610 | static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) |
539 | { | 611 | { |
540 | long delta; | 612 | unsigned long j; |
613 | unsigned long js; | ||
541 | struct rcu_node *rnp; | 614 | struct rcu_node *rnp; |
542 | 615 | ||
543 | if (rcu_cpu_stall_suppress) | 616 | if (rcu_cpu_stall_suppress) |
544 | return; | 617 | return; |
545 | delta = jiffies - ACCESS_ONCE(rsp->jiffies_stall); | 618 | j = ACCESS_ONCE(jiffies); |
619 | js = ACCESS_ONCE(rsp->jiffies_stall); | ||
546 | rnp = rdp->mynode; | 620 | rnp = rdp->mynode; |
547 | if ((ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && delta >= 0) { | 621 | if ((ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && ULONG_CMP_GE(j, js)) { |
548 | 622 | ||
549 | /* We haven't checked in, so go dump stack. */ | 623 | /* We haven't checked in, so go dump stack. */ |
550 | print_cpu_stall(rsp); | 624 | print_cpu_stall(rsp); |
551 | 625 | ||
552 | } else if (rcu_gp_in_progress(rsp) && delta >= RCU_STALL_RAT_DELAY) { | 626 | } else if (rcu_gp_in_progress(rsp) && |
627 | ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY)) { | ||
553 | 628 | ||
554 | /* They had two time units to dump stack, so complain. */ | 629 | /* They had a few time units to dump stack, so complain. */ |
555 | print_other_cpu_stall(rsp); | 630 | print_other_cpu_stall(rsp); |
556 | } | 631 | } |
557 | } | 632 | } |
@@ -587,26 +662,6 @@ static void __init check_cpu_stall_init(void) | |||
587 | atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block); | 662 | atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block); |
588 | } | 663 | } |
589 | 664 | ||
590 | #else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ | ||
591 | |||
592 | static void record_gp_stall_check_time(struct rcu_state *rsp) | ||
593 | { | ||
594 | } | ||
595 | |||
596 | static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) | ||
597 | { | ||
598 | } | ||
599 | |||
600 | void rcu_cpu_stall_reset(void) | ||
601 | { | ||
602 | } | ||
603 | |||
604 | static void __init check_cpu_stall_init(void) | ||
605 | { | ||
606 | } | ||
607 | |||
608 | #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ | ||
609 | |||
610 | /* | 665 | /* |
611 | * Update CPU-local rcu_data state to record the newly noticed grace period. | 666 | * Update CPU-local rcu_data state to record the newly noticed grace period. |
612 | * This is used both when we started the grace period and when we notice | 667 | * This is used both when we started the grace period and when we notice |
@@ -809,6 +864,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) | |||
809 | rnp->completed = rsp->completed; | 864 | rnp->completed = rsp->completed; |
810 | rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */ | 865 | rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */ |
811 | rcu_start_gp_per_cpu(rsp, rnp, rdp); | 866 | rcu_start_gp_per_cpu(rsp, rnp, rdp); |
867 | rcu_preempt_boost_start_gp(rnp); | ||
812 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 868 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
813 | return; | 869 | return; |
814 | } | 870 | } |
@@ -844,6 +900,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) | |||
844 | rnp->completed = rsp->completed; | 900 | rnp->completed = rsp->completed; |
845 | if (rnp == rdp->mynode) | 901 | if (rnp == rdp->mynode) |
846 | rcu_start_gp_per_cpu(rsp, rnp, rdp); | 902 | rcu_start_gp_per_cpu(rsp, rnp, rdp); |
903 | rcu_preempt_boost_start_gp(rnp); | ||
847 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 904 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
848 | } | 905 | } |
849 | 906 | ||
@@ -864,7 +921,18 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) | |||
864 | static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) | 921 | static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) |
865 | __releases(rcu_get_root(rsp)->lock) | 922 | __releases(rcu_get_root(rsp)->lock) |
866 | { | 923 | { |
924 | unsigned long gp_duration; | ||
925 | |||
867 | WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); | 926 | WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); |
927 | |||
928 | /* | ||
929 | * Ensure that all grace-period and pre-grace-period activity | ||
930 | * is seen before the assignment to rsp->completed. | ||
931 | */ | ||
932 | smp_mb(); /* See above block comment. */ | ||
933 | gp_duration = jiffies - rsp->gp_start; | ||
934 | if (gp_duration > rsp->gp_max) | ||
935 | rsp->gp_max = gp_duration; | ||
868 | rsp->completed = rsp->gpnum; | 936 | rsp->completed = rsp->gpnum; |
869 | rsp->signaled = RCU_GP_IDLE; | 937 | rsp->signaled = RCU_GP_IDLE; |
870 | rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ | 938 | rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ |
@@ -894,7 +962,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, | |||
894 | return; | 962 | return; |
895 | } | 963 | } |
896 | rnp->qsmask &= ~mask; | 964 | rnp->qsmask &= ~mask; |
897 | if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) { | 965 | if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { |
898 | 966 | ||
899 | /* Other bits still set at this level, so done. */ | 967 | /* Other bits still set at this level, so done. */ |
900 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 968 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
@@ -1037,6 +1105,8 @@ static void rcu_send_cbs_to_online(struct rcu_state *rsp) | |||
1037 | /* | 1105 | /* |
1038 | * Remove the outgoing CPU from the bitmasks in the rcu_node hierarchy | 1106 | * Remove the outgoing CPU from the bitmasks in the rcu_node hierarchy |
1039 | * and move all callbacks from the outgoing CPU to the current one. | 1107 | * and move all callbacks from the outgoing CPU to the current one. |
1108 | * There can only be one CPU hotplug operation at a time, so no other | ||
1109 | * CPU can be attempting to update rcu_cpu_kthread_task. | ||
1040 | */ | 1110 | */ |
1041 | static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) | 1111 | static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) |
1042 | { | 1112 | { |
@@ -1046,6 +1116,8 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) | |||
1046 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | 1116 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); |
1047 | struct rcu_node *rnp; | 1117 | struct rcu_node *rnp; |
1048 | 1118 | ||
1119 | rcu_stop_cpu_kthread(cpu); | ||
1120 | |||
1049 | /* Exclude any attempts to start a new grace period. */ | 1121 | /* Exclude any attempts to start a new grace period. */ |
1050 | raw_spin_lock_irqsave(&rsp->onofflock, flags); | 1122 | raw_spin_lock_irqsave(&rsp->onofflock, flags); |
1051 | 1123 | ||
@@ -1082,6 +1154,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) | |||
1082 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1154 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1083 | if (need_report & RCU_OFL_TASKS_EXP_GP) | 1155 | if (need_report & RCU_OFL_TASKS_EXP_GP) |
1084 | rcu_report_exp_rnp(rsp, rnp); | 1156 | rcu_report_exp_rnp(rsp, rnp); |
1157 | rcu_node_kthread_setaffinity(rnp, -1); | ||
1085 | } | 1158 | } |
1086 | 1159 | ||
1087 | /* | 1160 | /* |
@@ -1143,7 +1216,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1143 | next = list->next; | 1216 | next = list->next; |
1144 | prefetch(next); | 1217 | prefetch(next); |
1145 | debug_rcu_head_unqueue(list); | 1218 | debug_rcu_head_unqueue(list); |
1146 | list->func(list); | 1219 | __rcu_reclaim(list); |
1147 | list = next; | 1220 | list = next; |
1148 | if (++count >= rdp->blimit) | 1221 | if (++count >= rdp->blimit) |
1149 | break; | 1222 | break; |
@@ -1179,7 +1252,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1179 | 1252 | ||
1180 | /* Re-raise the RCU softirq if there are callbacks remaining. */ | 1253 | /* Re-raise the RCU softirq if there are callbacks remaining. */ |
1181 | if (cpu_has_callbacks_ready_to_invoke(rdp)) | 1254 | if (cpu_has_callbacks_ready_to_invoke(rdp)) |
1182 | raise_softirq(RCU_SOFTIRQ); | 1255 | invoke_rcu_core(); |
1183 | } | 1256 | } |
1184 | 1257 | ||
1185 | /* | 1258 | /* |
@@ -1225,7 +1298,7 @@ void rcu_check_callbacks(int cpu, int user) | |||
1225 | } | 1298 | } |
1226 | rcu_preempt_check_callbacks(cpu); | 1299 | rcu_preempt_check_callbacks(cpu); |
1227 | if (rcu_pending(cpu)) | 1300 | if (rcu_pending(cpu)) |
1228 | raise_softirq(RCU_SOFTIRQ); | 1301 | invoke_rcu_core(); |
1229 | } | 1302 | } |
1230 | 1303 | ||
1231 | #ifdef CONFIG_SMP | 1304 | #ifdef CONFIG_SMP |
@@ -1233,6 +1306,8 @@ void rcu_check_callbacks(int cpu, int user) | |||
1233 | /* | 1306 | /* |
1234 | * Scan the leaf rcu_node structures, processing dyntick state for any that | 1307 | * Scan the leaf rcu_node structures, processing dyntick state for any that |
1235 | * have not yet encountered a quiescent state, using the function specified. | 1308 | * have not yet encountered a quiescent state, using the function specified. |
1309 | * Also initiate boosting for any threads blocked on the root rcu_node. | ||
1310 | * | ||
1236 | * The caller must have suppressed start of new grace periods. | 1311 | * The caller must have suppressed start of new grace periods. |
1237 | */ | 1312 | */ |
1238 | static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)) | 1313 | static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)) |
@@ -1251,7 +1326,7 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)) | |||
1251 | return; | 1326 | return; |
1252 | } | 1327 | } |
1253 | if (rnp->qsmask == 0) { | 1328 | if (rnp->qsmask == 0) { |
1254 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1329 | rcu_initiate_boost(rnp, flags); /* releases rnp->lock */ |
1255 | continue; | 1330 | continue; |
1256 | } | 1331 | } |
1257 | cpu = rnp->grplo; | 1332 | cpu = rnp->grplo; |
@@ -1269,6 +1344,11 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)) | |||
1269 | } | 1344 | } |
1270 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1345 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1271 | } | 1346 | } |
1347 | rnp = rcu_get_root(rsp); | ||
1348 | if (rnp->qsmask == 0) { | ||
1349 | raw_spin_lock_irqsave(&rnp->lock, flags); | ||
1350 | rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */ | ||
1351 | } | ||
1272 | } | 1352 | } |
1273 | 1353 | ||
1274 | /* | 1354 | /* |
@@ -1383,7 +1463,8 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1383 | } | 1463 | } |
1384 | 1464 | ||
1385 | /* If there are callbacks ready, invoke them. */ | 1465 | /* If there are callbacks ready, invoke them. */ |
1386 | rcu_do_batch(rsp, rdp); | 1466 | if (cpu_has_callbacks_ready_to_invoke(rdp)) |
1467 | invoke_rcu_callbacks(rsp, rdp); | ||
1387 | } | 1468 | } |
1388 | 1469 | ||
1389 | /* | 1470 | /* |
@@ -1391,29 +1472,37 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1391 | */ | 1472 | */ |
1392 | static void rcu_process_callbacks(struct softirq_action *unused) | 1473 | static void rcu_process_callbacks(struct softirq_action *unused) |
1393 | { | 1474 | { |
1394 | /* | ||
1395 | * Memory references from any prior RCU read-side critical sections | ||
1396 | * executed by the interrupted code must be seen before any RCU | ||
1397 | * grace-period manipulations below. | ||
1398 | */ | ||
1399 | smp_mb(); /* See above block comment. */ | ||
1400 | |||
1401 | __rcu_process_callbacks(&rcu_sched_state, | 1475 | __rcu_process_callbacks(&rcu_sched_state, |
1402 | &__get_cpu_var(rcu_sched_data)); | 1476 | &__get_cpu_var(rcu_sched_data)); |
1403 | __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); | 1477 | __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); |
1404 | rcu_preempt_process_callbacks(); | 1478 | rcu_preempt_process_callbacks(); |
1405 | 1479 | ||
1406 | /* | ||
1407 | * Memory references from any later RCU read-side critical sections | ||
1408 | * executed by the interrupted code must be seen after any RCU | ||
1409 | * grace-period manipulations above. | ||
1410 | */ | ||
1411 | smp_mb(); /* See above block comment. */ | ||
1412 | |||
1413 | /* If we are last CPU on way to dyntick-idle mode, accelerate it. */ | 1480 | /* If we are last CPU on way to dyntick-idle mode, accelerate it. */ |
1414 | rcu_needs_cpu_flush(); | 1481 | rcu_needs_cpu_flush(); |
1415 | } | 1482 | } |
1416 | 1483 | ||
1484 | /* | ||
1485 | * Wake up the current CPU's kthread. This replaces raise_softirq() | ||
1486 | * in earlier versions of RCU. Note that because we are running on | ||
1487 | * the current CPU with interrupts disabled, the rcu_cpu_kthread_task | ||
1488 | * cannot disappear out from under us. | ||
1489 | */ | ||
1490 | static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) | ||
1491 | { | ||
1492 | if (unlikely(!ACCESS_ONCE(rcu_scheduler_fully_active))) | ||
1493 | return; | ||
1494 | if (likely(!rsp->boost)) { | ||
1495 | rcu_do_batch(rsp, rdp); | ||
1496 | return; | ||
1497 | } | ||
1498 | invoke_rcu_callbacks_kthread(); | ||
1499 | } | ||
1500 | |||
1501 | static void invoke_rcu_core(void) | ||
1502 | { | ||
1503 | raise_softirq(RCU_SOFTIRQ); | ||
1504 | } | ||
1505 | |||
1417 | static void | 1506 | static void |
1418 | __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | 1507 | __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), |
1419 | struct rcu_state *rsp) | 1508 | struct rcu_state *rsp) |
@@ -1439,6 +1528,13 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | |||
1439 | /* Add the callback to our list. */ | 1528 | /* Add the callback to our list. */ |
1440 | *rdp->nxttail[RCU_NEXT_TAIL] = head; | 1529 | *rdp->nxttail[RCU_NEXT_TAIL] = head; |
1441 | rdp->nxttail[RCU_NEXT_TAIL] = &head->next; | 1530 | rdp->nxttail[RCU_NEXT_TAIL] = &head->next; |
1531 | rdp->qlen++; | ||
1532 | |||
1533 | /* If interrupts were disabled, don't dive into RCU core. */ | ||
1534 | if (irqs_disabled_flags(flags)) { | ||
1535 | local_irq_restore(flags); | ||
1536 | return; | ||
1537 | } | ||
1442 | 1538 | ||
1443 | /* | 1539 | /* |
1444 | * Force the grace period if too many callbacks or too long waiting. | 1540 | * Force the grace period if too many callbacks or too long waiting. |
@@ -1447,7 +1543,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | |||
1447 | * invoking force_quiescent_state() if the newly enqueued callback | 1543 | * invoking force_quiescent_state() if the newly enqueued callback |
1448 | * is the only one waiting for a grace period to complete. | 1544 | * is the only one waiting for a grace period to complete. |
1449 | */ | 1545 | */ |
1450 | if (unlikely(++rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) { | 1546 | if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) { |
1451 | 1547 | ||
1452 | /* Are we ignoring a completed grace period? */ | 1548 | /* Are we ignoring a completed grace period? */ |
1453 | rcu_process_gp_end(rsp, rdp); | 1549 | rcu_process_gp_end(rsp, rdp); |
@@ -1583,7 +1679,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1583 | * or RCU-bh, force a local reschedule. | 1679 | * or RCU-bh, force a local reschedule. |
1584 | */ | 1680 | */ |
1585 | rdp->n_rp_qs_pending++; | 1681 | rdp->n_rp_qs_pending++; |
1586 | if (!rdp->preemptable && | 1682 | if (!rdp->preemptible && |
1587 | ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs) - 1, | 1683 | ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs) - 1, |
1588 | jiffies)) | 1684 | jiffies)) |
1589 | set_need_resched(); | 1685 | set_need_resched(); |
@@ -1760,7 +1856,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) | |||
1760 | * that this CPU cannot possibly have any RCU callbacks in flight yet. | 1856 | * that this CPU cannot possibly have any RCU callbacks in flight yet. |
1761 | */ | 1857 | */ |
1762 | static void __cpuinit | 1858 | static void __cpuinit |
1763 | rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable) | 1859 | rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) |
1764 | { | 1860 | { |
1765 | unsigned long flags; | 1861 | unsigned long flags; |
1766 | unsigned long mask; | 1862 | unsigned long mask; |
@@ -1772,7 +1868,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable) | |||
1772 | rdp->passed_quiesc = 0; /* We could be racing with new GP, */ | 1868 | rdp->passed_quiesc = 0; /* We could be racing with new GP, */ |
1773 | rdp->qs_pending = 1; /* so set up to respond to current GP. */ | 1869 | rdp->qs_pending = 1; /* so set up to respond to current GP. */ |
1774 | rdp->beenonline = 1; /* We have now been online. */ | 1870 | rdp->beenonline = 1; /* We have now been online. */ |
1775 | rdp->preemptable = preemptable; | 1871 | rdp->preemptible = preemptible; |
1776 | rdp->qlen_last_fqs_check = 0; | 1872 | rdp->qlen_last_fqs_check = 0; |
1777 | rdp->n_force_qs_snap = rsp->n_force_qs; | 1873 | rdp->n_force_qs_snap = rsp->n_force_qs; |
1778 | rdp->blimit = blimit; | 1874 | rdp->blimit = blimit; |
@@ -1806,7 +1902,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable) | |||
1806 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); | 1902 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); |
1807 | } | 1903 | } |
1808 | 1904 | ||
1809 | static void __cpuinit rcu_online_cpu(int cpu) | 1905 | static void __cpuinit rcu_prepare_cpu(int cpu) |
1810 | { | 1906 | { |
1811 | rcu_init_percpu_data(cpu, &rcu_sched_state, 0); | 1907 | rcu_init_percpu_data(cpu, &rcu_sched_state, 0); |
1812 | rcu_init_percpu_data(cpu, &rcu_bh_state, 0); | 1908 | rcu_init_percpu_data(cpu, &rcu_bh_state, 0); |
@@ -1820,11 +1916,23 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | |||
1820 | unsigned long action, void *hcpu) | 1916 | unsigned long action, void *hcpu) |
1821 | { | 1917 | { |
1822 | long cpu = (long)hcpu; | 1918 | long cpu = (long)hcpu; |
1919 | struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); | ||
1920 | struct rcu_node *rnp = rdp->mynode; | ||
1823 | 1921 | ||
1824 | switch (action) { | 1922 | switch (action) { |
1825 | case CPU_UP_PREPARE: | 1923 | case CPU_UP_PREPARE: |
1826 | case CPU_UP_PREPARE_FROZEN: | 1924 | case CPU_UP_PREPARE_FROZEN: |
1827 | rcu_online_cpu(cpu); | 1925 | rcu_prepare_cpu(cpu); |
1926 | rcu_prepare_kthreads(cpu); | ||
1927 | break; | ||
1928 | case CPU_ONLINE: | ||
1929 | case CPU_DOWN_FAILED: | ||
1930 | rcu_node_kthread_setaffinity(rnp, -1); | ||
1931 | rcu_cpu_kthread_setrt(cpu, 1); | ||
1932 | break; | ||
1933 | case CPU_DOWN_PREPARE: | ||
1934 | rcu_node_kthread_setaffinity(rnp, cpu); | ||
1935 | rcu_cpu_kthread_setrt(cpu, 0); | ||
1828 | break; | 1936 | break; |
1829 | case CPU_DYING: | 1937 | case CPU_DYING: |
1830 | case CPU_DYING_FROZEN: | 1938 | case CPU_DYING_FROZEN: |
@@ -1943,10 +2051,7 @@ static void __init rcu_init_one(struct rcu_state *rsp, | |||
1943 | j / rsp->levelspread[i - 1]; | 2051 | j / rsp->levelspread[i - 1]; |
1944 | } | 2052 | } |
1945 | rnp->level = i; | 2053 | rnp->level = i; |
1946 | INIT_LIST_HEAD(&rnp->blocked_tasks[0]); | 2054 | INIT_LIST_HEAD(&rnp->blkd_tasks); |
1947 | INIT_LIST_HEAD(&rnp->blocked_tasks[1]); | ||
1948 | INIT_LIST_HEAD(&rnp->blocked_tasks[2]); | ||
1949 | INIT_LIST_HEAD(&rnp->blocked_tasks[3]); | ||
1950 | } | 2055 | } |
1951 | } | 2056 | } |
1952 | 2057 | ||
@@ -1968,7 +2073,7 @@ void __init rcu_init(void) | |||
1968 | rcu_init_one(&rcu_sched_state, &rcu_sched_data); | 2073 | rcu_init_one(&rcu_sched_state, &rcu_sched_data); |
1969 | rcu_init_one(&rcu_bh_state, &rcu_bh_data); | 2074 | rcu_init_one(&rcu_bh_state, &rcu_bh_data); |
1970 | __rcu_init_preempt(); | 2075 | __rcu_init_preempt(); |
1971 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); | 2076 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); |
1972 | 2077 | ||
1973 | /* | 2078 | /* |
1974 | * We don't need protection against CPU-hotplug here because | 2079 | * We don't need protection against CPU-hotplug here because |