aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-05-28 15:56:32 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-05-28 15:56:32 -0400
commit1ba4b8cb94e59b17fd0142a509eb583695c36db6 (patch)
treee42d1967025670401758d32964a5fa048f59f10a
parentc4a227d89f758e582fd167bb15245f2704de99ef (diff)
parentcc3ce5176d83cd8ae1134f86e208ea758d6cb78e (diff)
Merge branch 'core-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'core-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: rcu: Start RCU kthreads in TASK_INTERRUPTIBLE state rcu: Remove waitqueue usage for cpu, node, and boost kthreads rcu: Avoid acquiring rcu_node locks in timer functions atomic: Add atomic_or() Documentation: Add statistics about nested locks rcu: Decrease memory-barrier usage based on semi-formal proof rcu: Make rcu_enter_nohz() pay attention to nesting rcu: Don't do reschedule unless in irq rcu: Remove old memory barriers from rcu_process_callbacks() rcu: Add memory barriers rcu: Fix unpaired rcu_irq_enter() from locking selftests
-rw-r--r--Documentation/RCU/trace.txt17
-rw-r--r--Documentation/lockstat.txt36
-rw-r--r--include/linux/atomic.h13
-rw-r--r--kernel/rcutree.c164
-rw-r--r--kernel/rcutree.h30
-rw-r--r--kernel/rcutree_plugin.h24
-rw-r--r--kernel/rcutree_trace.c12
-rw-r--r--lib/locking-selftest.c2
8 files changed, 147 insertions, 151 deletions
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt
index c078ad48f7a1..8173cec473aa 100644
--- a/Documentation/RCU/trace.txt
+++ b/Documentation/RCU/trace.txt
@@ -99,18 +99,11 @@ o "qp" indicates that RCU still expects a quiescent state from
99 99
100o "dt" is the current value of the dyntick counter that is incremented 100o "dt" is the current value of the dyntick counter that is incremented
101 when entering or leaving dynticks idle state, either by the 101 when entering or leaving dynticks idle state, either by the
102 scheduler or by irq. The number after the "/" is the interrupt 102 scheduler or by irq. This number is even if the CPU is in
103 nesting depth when in dyntick-idle state, or one greater than 103 dyntick idle mode and odd otherwise. The number after the first
104 the interrupt-nesting depth otherwise. 104 "/" is the interrupt nesting depth when in dyntick-idle state,
105 105 or one greater than the interrupt-nesting depth otherwise.
106 This field is displayed only for CONFIG_NO_HZ kernels. 106 The number after the second "/" is the NMI nesting depth.
107
108o "dn" is the current value of the dyntick counter that is incremented
109 when entering or leaving dynticks idle state via NMI. If both
110 the "dt" and "dn" values are even, then this CPU is in dynticks
111 idle mode and may be ignored by RCU. If either of these two
112 counters is odd, then RCU must be alert to the possibility of
113 an RCU read-side critical section running on this CPU.
114 107
115 This field is displayed only for CONFIG_NO_HZ kernels. 108 This field is displayed only for CONFIG_NO_HZ kernels.
116 109
diff --git a/Documentation/lockstat.txt b/Documentation/lockstat.txt
index 9c0a80d17a23..cef00d42ed5b 100644
--- a/Documentation/lockstat.txt
+++ b/Documentation/lockstat.txt
@@ -12,8 +12,9 @@ Because things like lock contention can severely impact performance.
12- HOW 12- HOW
13 13
14Lockdep already has hooks in the lock functions and maps lock instances to 14Lockdep already has hooks in the lock functions and maps lock instances to
15lock classes. We build on that. The graph below shows the relation between 15lock classes. We build on that (see Documentation/lockdep-design.txt).
16the lock functions and the various hooks therein. 16The graph below shows the relation between the lock functions and the various
17hooks therein.
17 18
18 __acquire 19 __acquire
19 | 20 |
@@ -128,6 +129,37 @@ points are the points we're contending with.
128 129
129The integer part of the time values is in us. 130The integer part of the time values is in us.
130 131
132Dealing with nested locks, subclasses may appear:
133
13432...............................................................................................................................................................................................
13533
13634 &rq->lock: 13128 13128 0.43 190.53 103881.26 97454 3453404 0.00 401.11 13224683.11
13735 ---------
13836 &rq->lock 645 [<ffffffff8103bfc4>] task_rq_lock+0x43/0x75
13937 &rq->lock 297 [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
14038 &rq->lock 360 [<ffffffff8103c4c5>] select_task_rq_fair+0x1f0/0x74a
14139 &rq->lock 428 [<ffffffff81045f98>] scheduler_tick+0x46/0x1fb
14240 ---------
14341 &rq->lock 77 [<ffffffff8103bfc4>] task_rq_lock+0x43/0x75
14442 &rq->lock 174 [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
14543 &rq->lock 4715 [<ffffffff8103ed4b>] double_rq_lock+0x42/0x54
14644 &rq->lock 893 [<ffffffff81340524>] schedule+0x157/0x7b8
14745
14846...............................................................................................................................................................................................
14947
15048 &rq->lock/1: 11526 11488 0.33 388.73 136294.31 21461 38404 0.00 37.93 109388.53
15149 -----------
15250 &rq->lock/1 11526 [<ffffffff8103ed58>] double_rq_lock+0x4f/0x54
15351 -----------
15452 &rq->lock/1 5645 [<ffffffff8103ed4b>] double_rq_lock+0x42/0x54
15553 &rq->lock/1 1224 [<ffffffff81340524>] schedule+0x157/0x7b8
15654 &rq->lock/1 4336 [<ffffffff8103ed58>] double_rq_lock+0x4f/0x54
15755 &rq->lock/1 181 [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
158
159Line 48 shows statistics for the second subclass (/1) of &rq->lock class
160(subclass starts from 0), since in this case, as line 50 suggests,
161double_rq_lock actually acquires a nested lock of two spinlocks.
162
131View the top contending locks: 163View the top contending locks:
132 164
133# grep : /proc/lock_stat | head 165# grep : /proc/lock_stat | head
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 96c038e43d66..ee456c79b0e6 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -34,4 +34,17 @@ static inline int atomic_inc_not_zero_hint(atomic_t *v, int hint)
34} 34}
35#endif 35#endif
36 36
37#ifndef CONFIG_ARCH_HAS_ATOMIC_OR
38static inline void atomic_or(int i, atomic_t *v)
39{
40 int old;
41 int new;
42
43 do {
44 old = atomic_read(v);
45 new = old | i;
46 } while (atomic_cmpxchg(v, old, new) != old);
47}
48#endif /* #ifndef CONFIG_ARCH_HAS_ATOMIC_OR */
49
37#endif /* _LINUX_ATOMIC_H */ 50#endif /* _LINUX_ATOMIC_H */
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index f07d2f03181a..77a7671dd147 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -36,7 +36,7 @@
36#include <linux/interrupt.h> 36#include <linux/interrupt.h>
37#include <linux/sched.h> 37#include <linux/sched.h>
38#include <linux/nmi.h> 38#include <linux/nmi.h>
39#include <asm/atomic.h> 39#include <linux/atomic.h>
40#include <linux/bitops.h> 40#include <linux/bitops.h>
41#include <linux/module.h> 41#include <linux/module.h>
42#include <linux/completion.h> 42#include <linux/completion.h>
@@ -95,7 +95,6 @@ static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
95DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); 95DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
96DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu); 96DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu);
97DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); 97DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
98static DEFINE_PER_CPU(wait_queue_head_t, rcu_cpu_wq);
99DEFINE_PER_CPU(char, rcu_cpu_has_work); 98DEFINE_PER_CPU(char, rcu_cpu_has_work);
100static char rcu_kthreads_spawnable; 99static char rcu_kthreads_spawnable;
101 100
@@ -163,7 +162,7 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch);
163#ifdef CONFIG_NO_HZ 162#ifdef CONFIG_NO_HZ
164DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { 163DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
165 .dynticks_nesting = 1, 164 .dynticks_nesting = 1,
166 .dynticks = 1, 165 .dynticks = ATOMIC_INIT(1),
167}; 166};
168#endif /* #ifdef CONFIG_NO_HZ */ 167#endif /* #ifdef CONFIG_NO_HZ */
169 168
@@ -322,13 +321,25 @@ void rcu_enter_nohz(void)
322 unsigned long flags; 321 unsigned long flags;
323 struct rcu_dynticks *rdtp; 322 struct rcu_dynticks *rdtp;
324 323
325 smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
326 local_irq_save(flags); 324 local_irq_save(flags);
327 rdtp = &__get_cpu_var(rcu_dynticks); 325 rdtp = &__get_cpu_var(rcu_dynticks);
328 rdtp->dynticks++; 326 if (--rdtp->dynticks_nesting) {
329 rdtp->dynticks_nesting--; 327 local_irq_restore(flags);
330 WARN_ON_ONCE(rdtp->dynticks & 0x1); 328 return;
329 }
330 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
331 smp_mb__before_atomic_inc(); /* See above. */
332 atomic_inc(&rdtp->dynticks);
333 smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */
334 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
331 local_irq_restore(flags); 335 local_irq_restore(flags);
336
337 /* If the interrupt queued a callback, get out of dyntick mode. */
338 if (in_irq() &&
339 (__get_cpu_var(rcu_sched_data).nxtlist ||
340 __get_cpu_var(rcu_bh_data).nxtlist ||
341 rcu_preempt_needs_cpu(smp_processor_id())))
342 set_need_resched();
332} 343}
333 344
334/* 345/*
@@ -344,11 +355,16 @@ void rcu_exit_nohz(void)
344 355
345 local_irq_save(flags); 356 local_irq_save(flags);
346 rdtp = &__get_cpu_var(rcu_dynticks); 357 rdtp = &__get_cpu_var(rcu_dynticks);
347 rdtp->dynticks++; 358 if (rdtp->dynticks_nesting++) {
348 rdtp->dynticks_nesting++; 359 local_irq_restore(flags);
349 WARN_ON_ONCE(!(rdtp->dynticks & 0x1)); 360 return;
361 }
362 smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */
363 atomic_inc(&rdtp->dynticks);
364 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
365 smp_mb__after_atomic_inc(); /* See above. */
366 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
350 local_irq_restore(flags); 367 local_irq_restore(flags);
351 smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
352} 368}
353 369
354/** 370/**
@@ -362,11 +378,15 @@ void rcu_nmi_enter(void)
362{ 378{
363 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); 379 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
364 380
365 if (rdtp->dynticks & 0x1) 381 if (rdtp->dynticks_nmi_nesting == 0 &&
382 (atomic_read(&rdtp->dynticks) & 0x1))
366 return; 383 return;
367 rdtp->dynticks_nmi++; 384 rdtp->dynticks_nmi_nesting++;
368 WARN_ON_ONCE(!(rdtp->dynticks_nmi & 0x1)); 385 smp_mb__before_atomic_inc(); /* Force delay from prior write. */
369 smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ 386 atomic_inc(&rdtp->dynticks);
387 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
388 smp_mb__after_atomic_inc(); /* See above. */
389 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
370} 390}
371 391
372/** 392/**
@@ -380,11 +400,14 @@ void rcu_nmi_exit(void)
380{ 400{
381 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); 401 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
382 402
383 if (rdtp->dynticks & 0x1) 403 if (rdtp->dynticks_nmi_nesting == 0 ||
404 --rdtp->dynticks_nmi_nesting != 0)
384 return; 405 return;
385 smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ 406 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
386 rdtp->dynticks_nmi++; 407 smp_mb__before_atomic_inc(); /* See above. */
387 WARN_ON_ONCE(rdtp->dynticks_nmi & 0x1); 408 atomic_inc(&rdtp->dynticks);
409 smp_mb__after_atomic_inc(); /* Force delay to next write. */
410 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
388} 411}
389 412
390/** 413/**
@@ -395,13 +418,7 @@ void rcu_nmi_exit(void)
395 */ 418 */
396void rcu_irq_enter(void) 419void rcu_irq_enter(void)
397{ 420{
398 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); 421 rcu_exit_nohz();
399
400 if (rdtp->dynticks_nesting++)
401 return;
402 rdtp->dynticks++;
403 WARN_ON_ONCE(!(rdtp->dynticks & 0x1));
404 smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
405} 422}
406 423
407/** 424/**
@@ -413,18 +430,7 @@ void rcu_irq_enter(void)
413 */ 430 */
414void rcu_irq_exit(void) 431void rcu_irq_exit(void)
415{ 432{
416 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); 433 rcu_enter_nohz();
417
418 if (--rdtp->dynticks_nesting)
419 return;
420 smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
421 rdtp->dynticks++;
422 WARN_ON_ONCE(rdtp->dynticks & 0x1);
423
424 /* If the interrupt queued a callback, get out of dyntick mode. */
425 if (__this_cpu_read(rcu_sched_data.nxtlist) ||
426 __this_cpu_read(rcu_bh_data.nxtlist))
427 set_need_resched();
428} 434}
429 435
430#ifdef CONFIG_SMP 436#ifdef CONFIG_SMP
@@ -436,19 +442,8 @@ void rcu_irq_exit(void)
436 */ 442 */
437static int dyntick_save_progress_counter(struct rcu_data *rdp) 443static int dyntick_save_progress_counter(struct rcu_data *rdp)
438{ 444{
439 int ret; 445 rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks);
440 int snap; 446 return 0;
441 int snap_nmi;
442
443 snap = rdp->dynticks->dynticks;
444 snap_nmi = rdp->dynticks->dynticks_nmi;
445 smp_mb(); /* Order sampling of snap with end of grace period. */
446 rdp->dynticks_snap = snap;
447 rdp->dynticks_nmi_snap = snap_nmi;
448 ret = ((snap & 0x1) == 0) && ((snap_nmi & 0x1) == 0);
449 if (ret)
450 rdp->dynticks_fqs++;
451 return ret;
452} 447}
453 448
454/* 449/*
@@ -459,16 +454,11 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp)
459 */ 454 */
460static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) 455static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
461{ 456{
462 long curr; 457 unsigned long curr;
463 long curr_nmi; 458 unsigned long snap;
464 long snap;
465 long snap_nmi;
466 459
467 curr = rdp->dynticks->dynticks; 460 curr = (unsigned long)atomic_add_return(0, &rdp->dynticks->dynticks);
468 snap = rdp->dynticks_snap; 461 snap = (unsigned long)rdp->dynticks_snap;
469 curr_nmi = rdp->dynticks->dynticks_nmi;
470 snap_nmi = rdp->dynticks_nmi_snap;
471 smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
472 462
473 /* 463 /*
474 * If the CPU passed through or entered a dynticks idle phase with 464 * If the CPU passed through or entered a dynticks idle phase with
@@ -478,8 +468,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
478 * read-side critical section that started before the beginning 468 * read-side critical section that started before the beginning
479 * of the current RCU grace period. 469 * of the current RCU grace period.
480 */ 470 */
481 if ((curr != snap || (curr & 0x1) == 0) && 471 if ((curr & 0x1) == 0 || ULONG_CMP_GE(curr, snap + 2)) {
482 (curr_nmi != snap_nmi || (curr_nmi & 0x1) == 0)) {
483 rdp->dynticks_fqs++; 472 rdp->dynticks_fqs++;
484 return 1; 473 return 1;
485 } 474 }
@@ -908,6 +897,12 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
908 unsigned long gp_duration; 897 unsigned long gp_duration;
909 898
910 WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); 899 WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
900
901 /*
902 * Ensure that all grace-period and pre-grace-period activity
903 * is seen before the assignment to rsp->completed.
904 */
905 smp_mb(); /* See above block comment. */
911 gp_duration = jiffies - rsp->gp_start; 906 gp_duration = jiffies - rsp->gp_start;
912 if (gp_duration > rsp->gp_max) 907 if (gp_duration > rsp->gp_max)
913 rsp->gp_max = gp_duration; 908 rsp->gp_max = gp_duration;
@@ -1455,25 +1450,11 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
1455 */ 1450 */
1456static void rcu_process_callbacks(void) 1451static void rcu_process_callbacks(void)
1457{ 1452{
1458 /*
1459 * Memory references from any prior RCU read-side critical sections
1460 * executed by the interrupted code must be seen before any RCU
1461 * grace-period manipulations below.
1462 */
1463 smp_mb(); /* See above block comment. */
1464
1465 __rcu_process_callbacks(&rcu_sched_state, 1453 __rcu_process_callbacks(&rcu_sched_state,
1466 &__get_cpu_var(rcu_sched_data)); 1454 &__get_cpu_var(rcu_sched_data));
1467 __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); 1455 __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
1468 rcu_preempt_process_callbacks(); 1456 rcu_preempt_process_callbacks();
1469 1457
1470 /*
1471 * Memory references from any later RCU read-side critical sections
1472 * executed by the interrupted code must be seen after any RCU
1473 * grace-period manipulations above.
1474 */
1475 smp_mb(); /* See above block comment. */
1476
1477 /* If we are last CPU on way to dyntick-idle mode, accelerate it. */ 1458 /* If we are last CPU on way to dyntick-idle mode, accelerate it. */
1478 rcu_needs_cpu_flush(); 1459 rcu_needs_cpu_flush();
1479} 1460}
@@ -1494,7 +1475,7 @@ static void invoke_rcu_cpu_kthread(void)
1494 local_irq_restore(flags); 1475 local_irq_restore(flags);
1495 return; 1476 return;
1496 } 1477 }
1497 wake_up(&__get_cpu_var(rcu_cpu_wq)); 1478 wake_up_process(__this_cpu_read(rcu_cpu_kthread_task));
1498 local_irq_restore(flags); 1479 local_irq_restore(flags);
1499} 1480}
1500 1481
@@ -1544,13 +1525,10 @@ static void rcu_cpu_kthread_setrt(int cpu, int to_rt)
1544 */ 1525 */
1545static void rcu_cpu_kthread_timer(unsigned long arg) 1526static void rcu_cpu_kthread_timer(unsigned long arg)
1546{ 1527{
1547 unsigned long flags;
1548 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg); 1528 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg);
1549 struct rcu_node *rnp = rdp->mynode; 1529 struct rcu_node *rnp = rdp->mynode;
1550 1530
1551 raw_spin_lock_irqsave(&rnp->lock, flags); 1531 atomic_or(rdp->grpmask, &rnp->wakemask);
1552 rnp->wakemask |= rdp->grpmask;
1553 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1554 invoke_rcu_node_kthread(rnp); 1532 invoke_rcu_node_kthread(rnp);
1555} 1533}
1556 1534
@@ -1617,14 +1595,12 @@ static int rcu_cpu_kthread(void *arg)
1617 unsigned long flags; 1595 unsigned long flags;
1618 int spincnt = 0; 1596 int spincnt = 0;
1619 unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu); 1597 unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu);
1620 wait_queue_head_t *wqp = &per_cpu(rcu_cpu_wq, cpu);
1621 char work; 1598 char work;
1622 char *workp = &per_cpu(rcu_cpu_has_work, cpu); 1599 char *workp = &per_cpu(rcu_cpu_has_work, cpu);
1623 1600
1624 for (;;) { 1601 for (;;) {
1625 *statusp = RCU_KTHREAD_WAITING; 1602 *statusp = RCU_KTHREAD_WAITING;
1626 wait_event_interruptible(*wqp, 1603 rcu_wait(*workp != 0 || kthread_should_stop());
1627 *workp != 0 || kthread_should_stop());
1628 local_bh_disable(); 1604 local_bh_disable();
1629 if (rcu_cpu_kthread_should_stop(cpu)) { 1605 if (rcu_cpu_kthread_should_stop(cpu)) {
1630 local_bh_enable(); 1606 local_bh_enable();
@@ -1672,10 +1648,10 @@ static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu)
1672 if (IS_ERR(t)) 1648 if (IS_ERR(t))
1673 return PTR_ERR(t); 1649 return PTR_ERR(t);
1674 kthread_bind(t, cpu); 1650 kthread_bind(t, cpu);
1651 set_task_state(t, TASK_INTERRUPTIBLE);
1675 per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; 1652 per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu;
1676 WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL); 1653 WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL);
1677 per_cpu(rcu_cpu_kthread_task, cpu) = t; 1654 per_cpu(rcu_cpu_kthread_task, cpu) = t;
1678 wake_up_process(t);
1679 sp.sched_priority = RCU_KTHREAD_PRIO; 1655 sp.sched_priority = RCU_KTHREAD_PRIO;
1680 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); 1656 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1681 return 0; 1657 return 0;
@@ -1698,11 +1674,10 @@ static int rcu_node_kthread(void *arg)
1698 1674
1699 for (;;) { 1675 for (;;) {
1700 rnp->node_kthread_status = RCU_KTHREAD_WAITING; 1676 rnp->node_kthread_status = RCU_KTHREAD_WAITING;
1701 wait_event_interruptible(rnp->node_wq, rnp->wakemask != 0); 1677 rcu_wait(atomic_read(&rnp->wakemask) != 0);
1702 rnp->node_kthread_status = RCU_KTHREAD_RUNNING; 1678 rnp->node_kthread_status = RCU_KTHREAD_RUNNING;
1703 raw_spin_lock_irqsave(&rnp->lock, flags); 1679 raw_spin_lock_irqsave(&rnp->lock, flags);
1704 mask = rnp->wakemask; 1680 mask = atomic_xchg(&rnp->wakemask, 0);
1705 rnp->wakemask = 0;
1706 rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */ 1681 rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */
1707 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) { 1682 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) {
1708 if ((mask & 0x1) == 0) 1683 if ((mask & 0x1) == 0)
@@ -1781,9 +1756,9 @@ static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp,
1781 if (IS_ERR(t)) 1756 if (IS_ERR(t))
1782 return PTR_ERR(t); 1757 return PTR_ERR(t);
1783 raw_spin_lock_irqsave(&rnp->lock, flags); 1758 raw_spin_lock_irqsave(&rnp->lock, flags);
1759 set_task_state(t, TASK_INTERRUPTIBLE);
1784 rnp->node_kthread_task = t; 1760 rnp->node_kthread_task = t;
1785 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1761 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1786 wake_up_process(t);
1787 sp.sched_priority = 99; 1762 sp.sched_priority = 99;
1788 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); 1763 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1789 } 1764 }
@@ -1800,21 +1775,16 @@ static int __init rcu_spawn_kthreads(void)
1800 1775
1801 rcu_kthreads_spawnable = 1; 1776 rcu_kthreads_spawnable = 1;
1802 for_each_possible_cpu(cpu) { 1777 for_each_possible_cpu(cpu) {
1803 init_waitqueue_head(&per_cpu(rcu_cpu_wq, cpu));
1804 per_cpu(rcu_cpu_has_work, cpu) = 0; 1778 per_cpu(rcu_cpu_has_work, cpu) = 0;
1805 if (cpu_online(cpu)) 1779 if (cpu_online(cpu))
1806 (void)rcu_spawn_one_cpu_kthread(cpu); 1780 (void)rcu_spawn_one_cpu_kthread(cpu);
1807 } 1781 }
1808 rnp = rcu_get_root(rcu_state); 1782 rnp = rcu_get_root(rcu_state);
1809 init_waitqueue_head(&rnp->node_wq);
1810 rcu_init_boost_waitqueue(rnp);
1811 (void)rcu_spawn_one_node_kthread(rcu_state, rnp); 1783 (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
1812 if (NUM_RCU_NODES > 1) 1784 if (NUM_RCU_NODES > 1) {
1813 rcu_for_each_leaf_node(rcu_state, rnp) { 1785 rcu_for_each_leaf_node(rcu_state, rnp)
1814 init_waitqueue_head(&rnp->node_wq);
1815 rcu_init_boost_waitqueue(rnp);
1816 (void)rcu_spawn_one_node_kthread(rcu_state, rnp); 1786 (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
1817 } 1787 }
1818 return 0; 1788 return 0;
1819} 1789}
1820early_initcall(rcu_spawn_kthreads); 1790early_initcall(rcu_spawn_kthreads);
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 257664815d5d..7b9a08b4aaea 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -84,11 +84,9 @@
84 * Dynticks per-CPU state. 84 * Dynticks per-CPU state.
85 */ 85 */
86struct rcu_dynticks { 86struct rcu_dynticks {
87 int dynticks_nesting; /* Track nesting level, sort of. */ 87 int dynticks_nesting; /* Track irq/process nesting level. */
88 int dynticks; /* Even value for dynticks-idle, else odd. */ 88 int dynticks_nmi_nesting; /* Track NMI nesting level. */
89 int dynticks_nmi; /* Even value for either dynticks-idle or */ 89 atomic_t dynticks; /* Even value for dynticks-idle, else odd. */
90 /* not in nmi handler, else odd. So this */
91 /* remains even for nmi from irq handler. */
92}; 90};
93 91
94/* RCU's kthread states for tracing. */ 92/* RCU's kthread states for tracing. */
@@ -121,7 +119,9 @@ struct rcu_node {
121 /* elements that need to drain to allow the */ 119 /* elements that need to drain to allow the */
122 /* current expedited grace period to */ 120 /* current expedited grace period to */
123 /* complete (only for TREE_PREEMPT_RCU). */ 121 /* complete (only for TREE_PREEMPT_RCU). */
124 unsigned long wakemask; /* CPUs whose kthread needs to be awakened. */ 122 atomic_t wakemask; /* CPUs whose kthread needs to be awakened. */
123 /* Since this has meaning only for leaf */
124 /* rcu_node structures, 32 bits suffices. */
125 unsigned long qsmaskinit; 125 unsigned long qsmaskinit;
126 /* Per-GP initial value for qsmask & expmask. */ 126 /* Per-GP initial value for qsmask & expmask. */
127 unsigned long grpmask; /* Mask to apply to parent qsmask. */ 127 unsigned long grpmask; /* Mask to apply to parent qsmask. */
@@ -159,9 +159,6 @@ struct rcu_node {
159 struct task_struct *boost_kthread_task; 159 struct task_struct *boost_kthread_task;
160 /* kthread that takes care of priority */ 160 /* kthread that takes care of priority */
161 /* boosting for this rcu_node structure. */ 161 /* boosting for this rcu_node structure. */
162 wait_queue_head_t boost_wq;
163 /* Wait queue on which to park the boost */
164 /* kthread. */
165 unsigned int boost_kthread_status; 162 unsigned int boost_kthread_status;
166 /* State of boost_kthread_task for tracing. */ 163 /* State of boost_kthread_task for tracing. */
167 unsigned long n_tasks_boosted; 164 unsigned long n_tasks_boosted;
@@ -188,9 +185,6 @@ struct rcu_node {
188 /* kthread that takes care of this rcu_node */ 185 /* kthread that takes care of this rcu_node */
189 /* structure, for example, awakening the */ 186 /* structure, for example, awakening the */
190 /* per-CPU kthreads as needed. */ 187 /* per-CPU kthreads as needed. */
191 wait_queue_head_t node_wq;
192 /* Wait queue on which to park the per-node */
193 /* kthread. */
194 unsigned int node_kthread_status; 188 unsigned int node_kthread_status;
195 /* State of node_kthread_task for tracing. */ 189 /* State of node_kthread_task for tracing. */
196} ____cacheline_internodealigned_in_smp; 190} ____cacheline_internodealigned_in_smp;
@@ -284,7 +278,6 @@ struct rcu_data {
284 /* 3) dynticks interface. */ 278 /* 3) dynticks interface. */
285 struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */ 279 struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */
286 int dynticks_snap; /* Per-GP tracking for dynticks. */ 280 int dynticks_snap; /* Per-GP tracking for dynticks. */
287 int dynticks_nmi_snap; /* Per-GP tracking for dynticks_nmi. */
288#endif /* #ifdef CONFIG_NO_HZ */ 281#endif /* #ifdef CONFIG_NO_HZ */
289 282
290 /* 4) reasons this CPU needed to be kicked by force_quiescent_state */ 283 /* 4) reasons this CPU needed to be kicked by force_quiescent_state */
@@ -337,6 +330,16 @@ struct rcu_data {
337 /* scheduling clock irq */ 330 /* scheduling clock irq */
338 /* before ratting on them. */ 331 /* before ratting on them. */
339 332
333#define rcu_wait(cond) \
334do { \
335 for (;;) { \
336 set_current_state(TASK_INTERRUPTIBLE); \
337 if (cond) \
338 break; \
339 schedule(); \
340 } \
341 __set_current_state(TASK_RUNNING); \
342} while (0)
340 343
341/* 344/*
342 * RCU global state, including node hierarchy. This hierarchy is 345 * RCU global state, including node hierarchy. This hierarchy is
@@ -446,7 +449,6 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu);
446static void rcu_preempt_send_cbs_to_online(void); 449static void rcu_preempt_send_cbs_to_online(void);
447static void __init __rcu_init_preempt(void); 450static void __init __rcu_init_preempt(void);
448static void rcu_needs_cpu_flush(void); 451static void rcu_needs_cpu_flush(void);
449static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp);
450static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); 452static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
451static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, 453static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp,
452 cpumask_var_t cm); 454 cpumask_var_t cm);
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 3f6559a5f5cd..a767b7dac365 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -1196,8 +1196,7 @@ static int rcu_boost_kthread(void *arg)
1196 1196
1197 for (;;) { 1197 for (;;) {
1198 rnp->boost_kthread_status = RCU_KTHREAD_WAITING; 1198 rnp->boost_kthread_status = RCU_KTHREAD_WAITING;
1199 wait_event_interruptible(rnp->boost_wq, rnp->boost_tasks || 1199 rcu_wait(rnp->boost_tasks || rnp->exp_tasks);
1200 rnp->exp_tasks);
1201 rnp->boost_kthread_status = RCU_KTHREAD_RUNNING; 1200 rnp->boost_kthread_status = RCU_KTHREAD_RUNNING;
1202 more2boost = rcu_boost(rnp); 1201 more2boost = rcu_boost(rnp);
1203 if (more2boost) 1202 if (more2boost)
@@ -1275,14 +1274,6 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
1275} 1274}
1276 1275
1277/* 1276/*
1278 * Initialize the RCU-boost waitqueue.
1279 */
1280static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp)
1281{
1282 init_waitqueue_head(&rnp->boost_wq);
1283}
1284
1285/*
1286 * Create an RCU-boost kthread for the specified node if one does not 1277 * Create an RCU-boost kthread for the specified node if one does not
1287 * already exist. We only create this kthread for preemptible RCU. 1278 * already exist. We only create this kthread for preemptible RCU.
1288 * Returns zero if all is well, a negated errno otherwise. 1279 * Returns zero if all is well, a negated errno otherwise.
@@ -1304,9 +1295,9 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
1304 if (IS_ERR(t)) 1295 if (IS_ERR(t))
1305 return PTR_ERR(t); 1296 return PTR_ERR(t);
1306 raw_spin_lock_irqsave(&rnp->lock, flags); 1297 raw_spin_lock_irqsave(&rnp->lock, flags);
1298 set_task_state(t, TASK_INTERRUPTIBLE);
1307 rnp->boost_kthread_task = t; 1299 rnp->boost_kthread_task = t;
1308 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1300 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1309 wake_up_process(t);
1310 sp.sched_priority = RCU_KTHREAD_PRIO; 1301 sp.sched_priority = RCU_KTHREAD_PRIO;
1311 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); 1302 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1312 return 0; 1303 return 0;
@@ -1328,10 +1319,6 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
1328{ 1319{
1329} 1320}
1330 1321
1331static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp)
1332{
1333}
1334
1335static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, 1322static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
1336 struct rcu_node *rnp, 1323 struct rcu_node *rnp,
1337 int rnp_index) 1324 int rnp_index)
@@ -1520,7 +1507,6 @@ int rcu_needs_cpu(int cpu)
1520{ 1507{
1521 int c = 0; 1508 int c = 0;
1522 int snap; 1509 int snap;
1523 int snap_nmi;
1524 int thatcpu; 1510 int thatcpu;
1525 1511
1526 /* Check for being in the holdoff period. */ 1512 /* Check for being in the holdoff period. */
@@ -1531,10 +1517,10 @@ int rcu_needs_cpu(int cpu)
1531 for_each_online_cpu(thatcpu) { 1517 for_each_online_cpu(thatcpu) {
1532 if (thatcpu == cpu) 1518 if (thatcpu == cpu)
1533 continue; 1519 continue;
1534 snap = per_cpu(rcu_dynticks, thatcpu).dynticks; 1520 snap = atomic_add_return(0, &per_cpu(rcu_dynticks,
1535 snap_nmi = per_cpu(rcu_dynticks, thatcpu).dynticks_nmi; 1521 thatcpu).dynticks);
1536 smp_mb(); /* Order sampling of snap with end of grace period. */ 1522 smp_mb(); /* Order sampling of snap with end of grace period. */
1537 if (((snap & 0x1) != 0) || ((snap_nmi & 0x1) != 0)) { 1523 if ((snap & 0x1) != 0) {
1538 per_cpu(rcu_dyntick_drain, cpu) = 0; 1524 per_cpu(rcu_dyntick_drain, cpu) = 0;
1539 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; 1525 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1;
1540 return rcu_needs_cpu_quick_check(cpu); 1526 return rcu_needs_cpu_quick_check(cpu);
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index aa0fd72b4bc7..9678cc3650f5 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -69,10 +69,10 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
69 rdp->passed_quiesc, rdp->passed_quiesc_completed, 69 rdp->passed_quiesc, rdp->passed_quiesc_completed,
70 rdp->qs_pending); 70 rdp->qs_pending);
71#ifdef CONFIG_NO_HZ 71#ifdef CONFIG_NO_HZ
72 seq_printf(m, " dt=%d/%d dn=%d df=%lu", 72 seq_printf(m, " dt=%d/%d/%d df=%lu",
73 rdp->dynticks->dynticks, 73 atomic_read(&rdp->dynticks->dynticks),
74 rdp->dynticks->dynticks_nesting, 74 rdp->dynticks->dynticks_nesting,
75 rdp->dynticks->dynticks_nmi, 75 rdp->dynticks->dynticks_nmi_nesting,
76 rdp->dynticks_fqs); 76 rdp->dynticks_fqs);
77#endif /* #ifdef CONFIG_NO_HZ */ 77#endif /* #ifdef CONFIG_NO_HZ */
78 seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi); 78 seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi);
@@ -141,9 +141,9 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
141 rdp->qs_pending); 141 rdp->qs_pending);
142#ifdef CONFIG_NO_HZ 142#ifdef CONFIG_NO_HZ
143 seq_printf(m, ",%d,%d,%d,%lu", 143 seq_printf(m, ",%d,%d,%d,%lu",
144 rdp->dynticks->dynticks, 144 atomic_read(&rdp->dynticks->dynticks),
145 rdp->dynticks->dynticks_nesting, 145 rdp->dynticks->dynticks_nesting,
146 rdp->dynticks->dynticks_nmi, 146 rdp->dynticks->dynticks_nmi_nesting,
147 rdp->dynticks_fqs); 147 rdp->dynticks_fqs);
148#endif /* #ifdef CONFIG_NO_HZ */ 148#endif /* #ifdef CONFIG_NO_HZ */
149 seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi); 149 seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi);
@@ -167,7 +167,7 @@ static int show_rcudata_csv(struct seq_file *m, void *unused)
167{ 167{
168 seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\","); 168 seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\",");
169#ifdef CONFIG_NO_HZ 169#ifdef CONFIG_NO_HZ
170 seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\","); 170 seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\",");
171#endif /* #ifdef CONFIG_NO_HZ */ 171#endif /* #ifdef CONFIG_NO_HZ */
172 seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\",\"ci\",\"co\",\"ca\"\n"); 172 seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\",\"ci\",\"co\",\"ca\"\n");
173#ifdef CONFIG_TREE_PREEMPT_RCU 173#ifdef CONFIG_TREE_PREEMPT_RCU
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 619313ed6c46..507a22fab738 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -144,7 +144,7 @@ static void init_shared_classes(void)
144 144
145#define HARDIRQ_ENTER() \ 145#define HARDIRQ_ENTER() \
146 local_irq_disable(); \ 146 local_irq_disable(); \
147 irq_enter(); \ 147 __irq_enter(); \
148 WARN_ON(!in_irq()); 148 WARN_ON(!in_irq());
149 149
150#define HARDIRQ_EXIT() \ 150#define HARDIRQ_EXIT() \