aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-09-21 12:06:52 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-09-21 12:06:52 -0400
commitb8c7f1dc5ca4e0d10709182233cdab932cef593d (patch)
tree28c5f79d61ce7167511f18eaed5c9fca87c68ab4 /kernel
parentf4eccb6d979e0cc5a719a50af5f9a56e79092a2d (diff)
parenta71fca58b7f4abca551ae2256ac08dd9123a03f9 (diff)
Merge branch 'core-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'core-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: rcu: Fix whitespace inconsistencies rcu: Fix thinko, actually initialize full tree rcu: Apply results of code inspection of kernel/rcutree_plugin.h rcu: Add WARN_ON_ONCE() consistency checks covering state transitions rcu: Fix synchronize_rcu() for TREE_PREEMPT_RCU rcu: Simplify rcu_read_unlock_special() quiescent-state accounting rcu: Add debug checks to TREE_PREEMPT_RCU for premature grace periods rcu: Kconfig help needs to say that TREE_PREEMPT_RCU scales down rcutorture: Occasionally delay readers enough to make RCU force_quiescent_state rcu: Initialize multi-level RCU grace periods holding locks rcu: Need to update rnp->gpnum if preemptable RCU is to be reliable
Diffstat (limited to 'kernel')
-rw-r--r--kernel/rcupdate.c48
-rw-r--r--kernel/rcutorture.c43
-rw-r--r--kernel/rcutree.c105
-rw-r--r--kernel/rcutree.h2
-rw-r--r--kernel/rcutree_plugin.h110
-rw-r--r--kernel/rcutree_trace.c2
6 files changed, 181 insertions, 129 deletions
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index bd5d5c8e5140..37ac45483082 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -19,7 +19,7 @@
19 * 19 *
20 * Authors: Dipankar Sarma <dipankar@in.ibm.com> 20 * Authors: Dipankar Sarma <dipankar@in.ibm.com>
21 * Manfred Spraul <manfred@colorfullife.com> 21 * Manfred Spraul <manfred@colorfullife.com>
22 * 22 *
23 * Based on the original work by Paul McKenney <paulmck@us.ibm.com> 23 * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
24 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. 24 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
25 * Papers: 25 * Papers:
@@ -27,7 +27,7 @@
27 * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) 27 * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
28 * 28 *
29 * For detailed explanation of Read-Copy Update mechanism see - 29 * For detailed explanation of Read-Copy Update mechanism see -
30 * http://lse.sourceforge.net/locking/rcupdate.html 30 * http://lse.sourceforge.net/locking/rcupdate.html
31 * 31 *
32 */ 32 */
33#include <linux/types.h> 33#include <linux/types.h>
@@ -74,6 +74,8 @@ void wakeme_after_rcu(struct rcu_head *head)
74 complete(&rcu->completion); 74 complete(&rcu->completion);
75} 75}
76 76
77#ifdef CONFIG_TREE_PREEMPT_RCU
78
77/** 79/**
78 * synchronize_rcu - wait until a grace period has elapsed. 80 * synchronize_rcu - wait until a grace period has elapsed.
79 * 81 *
@@ -87,7 +89,7 @@ void synchronize_rcu(void)
87{ 89{
88 struct rcu_synchronize rcu; 90 struct rcu_synchronize rcu;
89 91
90 if (rcu_blocking_is_gp()) 92 if (!rcu_scheduler_active)
91 return; 93 return;
92 94
93 init_completion(&rcu.completion); 95 init_completion(&rcu.completion);
@@ -98,6 +100,46 @@ void synchronize_rcu(void)
98} 100}
99EXPORT_SYMBOL_GPL(synchronize_rcu); 101EXPORT_SYMBOL_GPL(synchronize_rcu);
100 102
103#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
104
105/**
106 * synchronize_sched - wait until an rcu-sched grace period has elapsed.
107 *
108 * Control will return to the caller some time after a full rcu-sched
109 * grace period has elapsed, in other words after all currently executing
110 * rcu-sched read-side critical sections have completed. These read-side
111 * critical sections are delimited by rcu_read_lock_sched() and
112 * rcu_read_unlock_sched(), and may be nested. Note that preempt_disable(),
113 * local_irq_disable(), and so on may be used in place of
114 * rcu_read_lock_sched().
115 *
116 * This means that all preempt_disable code sequences, including NMI and
117 * hardware-interrupt handlers, in progress on entry will have completed
118 * before this primitive returns. However, this does not guarantee that
119 * softirq handlers will have completed, since in some kernels, these
120 * handlers can run in process context, and can block.
121 *
122 * This primitive provides the guarantees made by the (now removed)
123 * synchronize_kernel() API. In contrast, synchronize_rcu() only
124 * guarantees that rcu_read_lock() sections will have completed.
125 * In "classic RCU", these two guarantees happen to be one and
126 * the same, but can differ in realtime RCU implementations.
127 */
128void synchronize_sched(void)
129{
130 struct rcu_synchronize rcu;
131
132 if (rcu_blocking_is_gp())
133 return;
134
135 init_completion(&rcu.completion);
136 /* Will wake me after RCU finished. */
137 call_rcu_sched(&rcu.head, wakeme_after_rcu);
138 /* Wait for it. */
139 wait_for_completion(&rcu.completion);
140}
141EXPORT_SYMBOL_GPL(synchronize_sched);
142
101/** 143/**
102 * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed. 144 * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
103 * 145 *
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index b33db539a8ad..233768f21f97 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -18,7 +18,7 @@
18 * Copyright (C) IBM Corporation, 2005, 2006 18 * Copyright (C) IBM Corporation, 2005, 2006
19 * 19 *
20 * Authors: Paul E. McKenney <paulmck@us.ibm.com> 20 * Authors: Paul E. McKenney <paulmck@us.ibm.com>
21 * Josh Triplett <josh@freedesktop.org> 21 * Josh Triplett <josh@freedesktop.org>
22 * 22 *
23 * See also: Documentation/RCU/torture.txt 23 * See also: Documentation/RCU/torture.txt
24 */ 24 */
@@ -50,7 +50,7 @@
50 50
51MODULE_LICENSE("GPL"); 51MODULE_LICENSE("GPL");
52MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and " 52MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and "
53 "Josh Triplett <josh@freedesktop.org>"); 53 "Josh Triplett <josh@freedesktop.org>");
54 54
55static int nreaders = -1; /* # reader threads, defaults to 2*ncpus */ 55static int nreaders = -1; /* # reader threads, defaults to 2*ncpus */
56static int nfakewriters = 4; /* # fake writer threads */ 56static int nfakewriters = 4; /* # fake writer threads */
@@ -110,8 +110,8 @@ struct rcu_torture {
110}; 110};
111 111
112static LIST_HEAD(rcu_torture_freelist); 112static LIST_HEAD(rcu_torture_freelist);
113static struct rcu_torture *rcu_torture_current = NULL; 113static struct rcu_torture *rcu_torture_current;
114static long rcu_torture_current_version = 0; 114static long rcu_torture_current_version;
115static struct rcu_torture rcu_tortures[10 * RCU_TORTURE_PIPE_LEN]; 115static struct rcu_torture rcu_tortures[10 * RCU_TORTURE_PIPE_LEN];
116static DEFINE_SPINLOCK(rcu_torture_lock); 116static DEFINE_SPINLOCK(rcu_torture_lock);
117static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_count) = 117static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_count) =
@@ -124,11 +124,11 @@ static atomic_t n_rcu_torture_alloc_fail;
124static atomic_t n_rcu_torture_free; 124static atomic_t n_rcu_torture_free;
125static atomic_t n_rcu_torture_mberror; 125static atomic_t n_rcu_torture_mberror;
126static atomic_t n_rcu_torture_error; 126static atomic_t n_rcu_torture_error;
127static long n_rcu_torture_timers = 0; 127static long n_rcu_torture_timers;
128static struct list_head rcu_torture_removed; 128static struct list_head rcu_torture_removed;
129static cpumask_var_t shuffle_tmp_mask; 129static cpumask_var_t shuffle_tmp_mask;
130 130
131static int stutter_pause_test = 0; 131static int stutter_pause_test;
132 132
133#if defined(MODULE) || defined(CONFIG_RCU_TORTURE_TEST_RUNNABLE) 133#if defined(MODULE) || defined(CONFIG_RCU_TORTURE_TEST_RUNNABLE)
134#define RCUTORTURE_RUNNABLE_INIT 1 134#define RCUTORTURE_RUNNABLE_INIT 1
@@ -267,7 +267,8 @@ struct rcu_torture_ops {
267 int irq_capable; 267 int irq_capable;
268 char *name; 268 char *name;
269}; 269};
270static struct rcu_torture_ops *cur_ops = NULL; 270
271static struct rcu_torture_ops *cur_ops;
271 272
272/* 273/*
273 * Definitions for rcu torture testing. 274 * Definitions for rcu torture testing.
@@ -281,14 +282,17 @@ static int rcu_torture_read_lock(void) __acquires(RCU)
281 282
282static void rcu_read_delay(struct rcu_random_state *rrsp) 283static void rcu_read_delay(struct rcu_random_state *rrsp)
283{ 284{
284 long delay; 285 const unsigned long shortdelay_us = 200;
285 const long longdelay = 200; 286 const unsigned long longdelay_ms = 50;
286 287
287 /* We want there to be long-running readers, but not all the time. */ 288 /* We want a short delay sometimes to make a reader delay the grace
289 * period, and we want a long delay occasionally to trigger
290 * force_quiescent_state. */
288 291
289 delay = rcu_random(rrsp) % (nrealreaders * 2 * longdelay); 292 if (!(rcu_random(rrsp) % (nrealreaders * 2000 * longdelay_ms)))
290 if (!delay) 293 mdelay(longdelay_ms);
291 udelay(longdelay); 294 if (!(rcu_random(rrsp) % (nrealreaders * 2 * shortdelay_us)))
295 udelay(shortdelay_us);
292} 296}
293 297
294static void rcu_torture_read_unlock(int idx) __releases(RCU) 298static void rcu_torture_read_unlock(int idx) __releases(RCU)
@@ -339,8 +343,8 @@ static struct rcu_torture_ops rcu_ops = {
339 .sync = synchronize_rcu, 343 .sync = synchronize_rcu,
340 .cb_barrier = rcu_barrier, 344 .cb_barrier = rcu_barrier,
341 .stats = NULL, 345 .stats = NULL,
342 .irq_capable = 1, 346 .irq_capable = 1,
343 .name = "rcu" 347 .name = "rcu"
344}; 348};
345 349
346static void rcu_sync_torture_deferred_free(struct rcu_torture *p) 350static void rcu_sync_torture_deferred_free(struct rcu_torture *p)
@@ -638,7 +642,8 @@ rcu_torture_writer(void *arg)
638 642
639 do { 643 do {
640 schedule_timeout_uninterruptible(1); 644 schedule_timeout_uninterruptible(1);
641 if ((rp = rcu_torture_alloc()) == NULL) 645 rp = rcu_torture_alloc();
646 if (rp == NULL)
642 continue; 647 continue;
643 rp->rtort_pipe_count = 0; 648 rp->rtort_pipe_count = 0;
644 udelay(rcu_random(&rand) & 0x3ff); 649 udelay(rcu_random(&rand) & 0x3ff);
@@ -1110,7 +1115,7 @@ rcu_torture_init(void)
1110 printk(KERN_ALERT "rcutorture: invalid torture type: \"%s\"\n", 1115 printk(KERN_ALERT "rcutorture: invalid torture type: \"%s\"\n",
1111 torture_type); 1116 torture_type);
1112 mutex_unlock(&fullstop_mutex); 1117 mutex_unlock(&fullstop_mutex);
1113 return (-EINVAL); 1118 return -EINVAL;
1114 } 1119 }
1115 if (cur_ops->init) 1120 if (cur_ops->init)
1116 cur_ops->init(); /* no "goto unwind" prior to this point!!! */ 1121 cur_ops->init(); /* no "goto unwind" prior to this point!!! */
@@ -1161,7 +1166,7 @@ rcu_torture_init(void)
1161 goto unwind; 1166 goto unwind;
1162 } 1167 }
1163 fakewriter_tasks = kzalloc(nfakewriters * sizeof(fakewriter_tasks[0]), 1168 fakewriter_tasks = kzalloc(nfakewriters * sizeof(fakewriter_tasks[0]),
1164 GFP_KERNEL); 1169 GFP_KERNEL);
1165 if (fakewriter_tasks == NULL) { 1170 if (fakewriter_tasks == NULL) {
1166 VERBOSE_PRINTK_ERRSTRING("out of memory"); 1171 VERBOSE_PRINTK_ERRSTRING("out of memory");
1167 firsterr = -ENOMEM; 1172 firsterr = -ENOMEM;
@@ -1170,7 +1175,7 @@ rcu_torture_init(void)
1170 for (i = 0; i < nfakewriters; i++) { 1175 for (i = 0; i < nfakewriters; i++) {
1171 VERBOSE_PRINTK_STRING("Creating rcu_torture_fakewriter task"); 1176 VERBOSE_PRINTK_STRING("Creating rcu_torture_fakewriter task");
1172 fakewriter_tasks[i] = kthread_run(rcu_torture_fakewriter, NULL, 1177 fakewriter_tasks[i] = kthread_run(rcu_torture_fakewriter, NULL,
1173 "rcu_torture_fakewriter"); 1178 "rcu_torture_fakewriter");
1174 if (IS_ERR(fakewriter_tasks[i])) { 1179 if (IS_ERR(fakewriter_tasks[i])) {
1175 firsterr = PTR_ERR(fakewriter_tasks[i]); 1180 firsterr = PTR_ERR(fakewriter_tasks[i]);
1176 VERBOSE_PRINTK_ERRSTRING("Failed to create fakewriter"); 1181 VERBOSE_PRINTK_ERRSTRING("Failed to create fakewriter");
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 6b11b07cfe7f..52b06f6e158c 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -25,7 +25,7 @@
25 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. 25 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
26 * 26 *
27 * For detailed explanation of Read-Copy Update mechanism see - 27 * For detailed explanation of Read-Copy Update mechanism see -
28 * Documentation/RCU 28 * Documentation/RCU
29 */ 29 */
30#include <linux/types.h> 30#include <linux/types.h>
31#include <linux/kernel.h> 31#include <linux/kernel.h>
@@ -107,27 +107,23 @@ static void __cpuinit rcu_init_percpu_data(int cpu, struct rcu_state *rsp,
107 */ 107 */
108void rcu_sched_qs(int cpu) 108void rcu_sched_qs(int cpu)
109{ 109{
110 unsigned long flags;
111 struct rcu_data *rdp; 110 struct rcu_data *rdp;
112 111
113 local_irq_save(flags);
114 rdp = &per_cpu(rcu_sched_data, cpu); 112 rdp = &per_cpu(rcu_sched_data, cpu);
115 rdp->passed_quiesc = 1;
116 rdp->passed_quiesc_completed = rdp->completed; 113 rdp->passed_quiesc_completed = rdp->completed;
117 rcu_preempt_qs(cpu); 114 barrier();
118 local_irq_restore(flags); 115 rdp->passed_quiesc = 1;
116 rcu_preempt_note_context_switch(cpu);
119} 117}
120 118
121void rcu_bh_qs(int cpu) 119void rcu_bh_qs(int cpu)
122{ 120{
123 unsigned long flags;
124 struct rcu_data *rdp; 121 struct rcu_data *rdp;
125 122
126 local_irq_save(flags);
127 rdp = &per_cpu(rcu_bh_data, cpu); 123 rdp = &per_cpu(rcu_bh_data, cpu);
128 rdp->passed_quiesc = 1;
129 rdp->passed_quiesc_completed = rdp->completed; 124 rdp->passed_quiesc_completed = rdp->completed;
130 local_irq_restore(flags); 125 barrier();
126 rdp->passed_quiesc = 1;
131} 127}
132 128
133#ifdef CONFIG_NO_HZ 129#ifdef CONFIG_NO_HZ
@@ -605,8 +601,6 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
605{ 601{
606 struct rcu_data *rdp = rsp->rda[smp_processor_id()]; 602 struct rcu_data *rdp = rsp->rda[smp_processor_id()];
607 struct rcu_node *rnp = rcu_get_root(rsp); 603 struct rcu_node *rnp = rcu_get_root(rsp);
608 struct rcu_node *rnp_cur;
609 struct rcu_node *rnp_end;
610 604
611 if (!cpu_needs_another_gp(rsp, rdp)) { 605 if (!cpu_needs_another_gp(rsp, rdp)) {
612 spin_unlock_irqrestore(&rnp->lock, flags); 606 spin_unlock_irqrestore(&rnp->lock, flags);
@@ -615,6 +609,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
615 609
616 /* Advance to a new grace period and initialize state. */ 610 /* Advance to a new grace period and initialize state. */
617 rsp->gpnum++; 611 rsp->gpnum++;
612 WARN_ON_ONCE(rsp->signaled == RCU_GP_INIT);
618 rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */ 613 rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */
619 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; 614 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
620 record_gp_stall_check_time(rsp); 615 record_gp_stall_check_time(rsp);
@@ -631,7 +626,9 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
631 626
632 /* Special-case the common single-level case. */ 627 /* Special-case the common single-level case. */
633 if (NUM_RCU_NODES == 1) { 628 if (NUM_RCU_NODES == 1) {
629 rcu_preempt_check_blocked_tasks(rnp);
634 rnp->qsmask = rnp->qsmaskinit; 630 rnp->qsmask = rnp->qsmaskinit;
631 rnp->gpnum = rsp->gpnum;
635 rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */ 632 rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */
636 spin_unlock_irqrestore(&rnp->lock, flags); 633 spin_unlock_irqrestore(&rnp->lock, flags);
637 return; 634 return;
@@ -644,42 +641,28 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
644 spin_lock(&rsp->onofflock); /* irqs already disabled. */ 641 spin_lock(&rsp->onofflock); /* irqs already disabled. */
645 642
646 /* 643 /*
647 * Set the quiescent-state-needed bits in all the non-leaf RCU 644 * Set the quiescent-state-needed bits in all the rcu_node
648 * nodes for all currently online CPUs. This operation relies 645 * structures for all currently online CPUs in breadth-first
649 * on the layout of the hierarchy within the rsp->node[] array. 646 * order, starting from the root rcu_node structure. This
650 * Note that other CPUs will access only the leaves of the 647 * operation relies on the layout of the hierarchy within the
651 * hierarchy, which still indicate that no grace period is in 648 * rsp->node[] array. Note that other CPUs will access only
652 * progress. In addition, we have excluded CPU-hotplug operations. 649 * the leaves of the hierarchy, which still indicate that no
653 * 650 * grace period is in progress, at least until the corresponding
654 * We therefore do not need to hold any locks. Any required 651 * leaf node has been initialized. In addition, we have excluded
655 * memory barriers will be supplied by the locks guarding the 652 * CPU-hotplug operations.
656 * leaf rcu_nodes in the hierarchy.
657 */
658
659 rnp_end = rsp->level[NUM_RCU_LVLS - 1];
660 for (rnp_cur = &rsp->node[0]; rnp_cur < rnp_end; rnp_cur++)
661 rnp_cur->qsmask = rnp_cur->qsmaskinit;
662
663 /*
664 * Now set up the leaf nodes. Here we must be careful. First,
665 * we need to hold the lock in order to exclude other CPUs, which
666 * might be contending for the leaf nodes' locks. Second, as
667 * soon as we initialize a given leaf node, its CPUs might run
668 * up the rest of the hierarchy. We must therefore acquire locks
669 * for each node that we touch during this stage. (But we still
670 * are excluding CPU-hotplug operations.)
671 * 653 *
672 * Note that the grace period cannot complete until we finish 654 * Note that the grace period cannot complete until we finish
673 * the initialization process, as there will be at least one 655 * the initialization process, as there will be at least one
674 * qsmask bit set in the root node until that time, namely the 656 * qsmask bit set in the root node until that time, namely the
675 * one corresponding to this CPU. 657 * one corresponding to this CPU, due to the fact that we have
658 * irqs disabled.
676 */ 659 */
677 rnp_end = &rsp->node[NUM_RCU_NODES]; 660 for (rnp = &rsp->node[0]; rnp < &rsp->node[NUM_RCU_NODES]; rnp++) {
678 rnp_cur = rsp->level[NUM_RCU_LVLS - 1]; 661 spin_lock(&rnp->lock); /* irqs already disabled. */
679 for (; rnp_cur < rnp_end; rnp_cur++) { 662 rcu_preempt_check_blocked_tasks(rnp);
680 spin_lock(&rnp_cur->lock); /* irqs already disabled. */ 663 rnp->qsmask = rnp->qsmaskinit;
681 rnp_cur->qsmask = rnp_cur->qsmaskinit; 664 rnp->gpnum = rsp->gpnum;
682 spin_unlock(&rnp_cur->lock); /* irqs already disabled. */ 665 spin_unlock(&rnp->lock); /* irqs already disabled. */
683 } 666 }
684 667
685 rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */ 668 rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */
@@ -722,6 +705,7 @@ rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp)
722static void cpu_quiet_msk_finish(struct rcu_state *rsp, unsigned long flags) 705static void cpu_quiet_msk_finish(struct rcu_state *rsp, unsigned long flags)
723 __releases(rnp->lock) 706 __releases(rnp->lock)
724{ 707{
708 WARN_ON_ONCE(rsp->completed == rsp->gpnum);
725 rsp->completed = rsp->gpnum; 709 rsp->completed = rsp->gpnum;
726 rcu_process_gp_end(rsp, rsp->rda[smp_processor_id()]); 710 rcu_process_gp_end(rsp, rsp->rda[smp_processor_id()]);
727 rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ 711 rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */
@@ -739,6 +723,8 @@ cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, struct rcu_node *rnp,
739 unsigned long flags) 723 unsigned long flags)
740 __releases(rnp->lock) 724 __releases(rnp->lock)
741{ 725{
726 struct rcu_node *rnp_c;
727
742 /* Walk up the rcu_node hierarchy. */ 728 /* Walk up the rcu_node hierarchy. */
743 for (;;) { 729 for (;;) {
744 if (!(rnp->qsmask & mask)) { 730 if (!(rnp->qsmask & mask)) {
@@ -762,8 +748,10 @@ cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, struct rcu_node *rnp,
762 break; 748 break;
763 } 749 }
764 spin_unlock_irqrestore(&rnp->lock, flags); 750 spin_unlock_irqrestore(&rnp->lock, flags);
751 rnp_c = rnp;
765 rnp = rnp->parent; 752 rnp = rnp->parent;
766 spin_lock_irqsave(&rnp->lock, flags); 753 spin_lock_irqsave(&rnp->lock, flags);
754 WARN_ON_ONCE(rnp_c->qsmask);
767 } 755 }
768 756
769 /* 757 /*
@@ -776,10 +764,10 @@ cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, struct rcu_node *rnp,
776 764
777/* 765/*
778 * Record a quiescent state for the specified CPU, which must either be 766 * Record a quiescent state for the specified CPU, which must either be
779 * the current CPU or an offline CPU. The lastcomp argument is used to 767 * the current CPU. The lastcomp argument is used to make sure we are
780 * make sure we are still in the grace period of interest. We don't want 768 * still in the grace period of interest. We don't want to end the current
781 * to end the current grace period based on quiescent states detected in 769 * grace period based on quiescent states detected in an earlier grace
782 * an earlier grace period! 770 * period!
783 */ 771 */
784static void 772static void
785cpu_quiet(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp) 773cpu_quiet(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp)
@@ -814,7 +802,6 @@ cpu_quiet(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp)
814 * This GP can't end until cpu checks in, so all of our 802 * This GP can't end until cpu checks in, so all of our
815 * callbacks can be processed during the next GP. 803 * callbacks can be processed during the next GP.
816 */ 804 */
817 rdp = rsp->rda[smp_processor_id()];
818 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; 805 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
819 806
820 cpu_quiet_msk(mask, rsp, rnp, flags); /* releases rnp->lock */ 807 cpu_quiet_msk(mask, rsp, rnp, flags); /* releases rnp->lock */
@@ -872,7 +859,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
872 spin_lock_irqsave(&rsp->onofflock, flags); 859 spin_lock_irqsave(&rsp->onofflock, flags);
873 860
874 /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */ 861 /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */
875 rnp = rdp->mynode; 862 rnp = rdp->mynode; /* this is the outgoing CPU's rnp. */
876 mask = rdp->grpmask; /* rnp->grplo is constant. */ 863 mask = rdp->grpmask; /* rnp->grplo is constant. */
877 do { 864 do {
878 spin_lock(&rnp->lock); /* irqs already disabled. */ 865 spin_lock(&rnp->lock); /* irqs already disabled. */
@@ -881,7 +868,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
881 spin_unlock(&rnp->lock); /* irqs remain disabled. */ 868 spin_unlock(&rnp->lock); /* irqs remain disabled. */
882 break; 869 break;
883 } 870 }
884 rcu_preempt_offline_tasks(rsp, rnp); 871 rcu_preempt_offline_tasks(rsp, rnp, rdp);
885 mask = rnp->grpmask; 872 mask = rnp->grpmask;
886 spin_unlock(&rnp->lock); /* irqs remain disabled. */ 873 spin_unlock(&rnp->lock); /* irqs remain disabled. */
887 rnp = rnp->parent; 874 rnp = rnp->parent;
@@ -890,9 +877,6 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
890 877
891 spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ 878 spin_unlock(&rsp->onofflock); /* irqs remain disabled. */
892 879
893 /* Being offline is a quiescent state, so go record it. */
894 cpu_quiet(cpu, rsp, rdp, lastcomp);
895
896 /* 880 /*
897 * Move callbacks from the outgoing CPU to the running CPU. 881 * Move callbacks from the outgoing CPU to the running CPU.
898 * Note that the outgoing CPU is now quiscent, so it is now 882 * Note that the outgoing CPU is now quiscent, so it is now
@@ -1457,20 +1441,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable)
1457 rnp = rnp->parent; 1441 rnp = rnp->parent;
1458 } while (rnp != NULL && !(rnp->qsmaskinit & mask)); 1442 } while (rnp != NULL && !(rnp->qsmaskinit & mask));
1459 1443
1460 spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ 1444 spin_unlock_irqrestore(&rsp->onofflock, flags);
1461
1462 /*
1463 * A new grace period might start here. If so, we will be part of
1464 * it, and its gpnum will be greater than ours, so we will
1465 * participate. It is also possible for the gpnum to have been
1466 * incremented before this function was called, and the bitmasks
1467 * to not be filled out until now, in which case we will also
1468 * participate due to our gpnum being behind.
1469 */
1470
1471 /* Since it is coming online, the CPU is in a quiescent state. */
1472 cpu_quiet(cpu, rsp, rdp, lastcomp);
1473 local_irq_restore(flags);
1474} 1445}
1475 1446
1476static void __cpuinit rcu_online_cpu(int cpu) 1447static void __cpuinit rcu_online_cpu(int cpu)
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index bf8a6f9f134d..8e8287a983c2 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -142,7 +142,7 @@ struct rcu_data {
142 */ 142 */
143 struct rcu_head *nxtlist; 143 struct rcu_head *nxtlist;
144 struct rcu_head **nxttail[RCU_NEXT_SIZE]; 144 struct rcu_head **nxttail[RCU_NEXT_SIZE];
145 long qlen; /* # of queued callbacks */ 145 long qlen; /* # of queued callbacks */
146 long blimit; /* Upper limit on a processed batch */ 146 long blimit; /* Upper limit on a processed batch */
147 147
148#ifdef CONFIG_NO_HZ 148#ifdef CONFIG_NO_HZ
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 47789369ea59..1cee04f627eb 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -64,22 +64,31 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed);
64 * not in a quiescent state. There might be any number of tasks blocked 64 * not in a quiescent state. There might be any number of tasks blocked
65 * while in an RCU read-side critical section. 65 * while in an RCU read-side critical section.
66 */ 66 */
67static void rcu_preempt_qs_record(int cpu) 67static void rcu_preempt_qs(int cpu)
68{ 68{
69 struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); 69 struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
70 rdp->passed_quiesc = 1;
71 rdp->passed_quiesc_completed = rdp->completed; 70 rdp->passed_quiesc_completed = rdp->completed;
71 barrier();
72 rdp->passed_quiesc = 1;
72} 73}
73 74
74/* 75/*
75 * We have entered the scheduler or are between softirqs in ksoftirqd. 76 * We have entered the scheduler, and the current task might soon be
76 * If we are in an RCU read-side critical section, we need to reflect 77 * context-switched away from. If this task is in an RCU read-side
77 * that in the state of the rcu_node structure corresponding to this CPU. 78 * critical section, we will no longer be able to rely on the CPU to
78 * Caller must disable hardirqs. 79 * record that fact, so we enqueue the task on the appropriate entry
80 * of the blocked_tasks[] array. The task will dequeue itself when
81 * it exits the outermost enclosing RCU read-side critical section.
82 * Therefore, the current grace period cannot be permitted to complete
83 * until the blocked_tasks[] entry indexed by the low-order bit of
84 * rnp->gpnum empties.
85 *
86 * Caller must disable preemption.
79 */ 87 */
80static void rcu_preempt_qs(int cpu) 88static void rcu_preempt_note_context_switch(int cpu)
81{ 89{
82 struct task_struct *t = current; 90 struct task_struct *t = current;
91 unsigned long flags;
83 int phase; 92 int phase;
84 struct rcu_data *rdp; 93 struct rcu_data *rdp;
85 struct rcu_node *rnp; 94 struct rcu_node *rnp;
@@ -90,7 +99,7 @@ static void rcu_preempt_qs(int cpu)
90 /* Possibly blocking in an RCU read-side critical section. */ 99 /* Possibly blocking in an RCU read-side critical section. */
91 rdp = rcu_preempt_state.rda[cpu]; 100 rdp = rcu_preempt_state.rda[cpu];
92 rnp = rdp->mynode; 101 rnp = rdp->mynode;
93 spin_lock(&rnp->lock); 102 spin_lock_irqsave(&rnp->lock, flags);
94 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; 103 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
95 t->rcu_blocked_node = rnp; 104 t->rcu_blocked_node = rnp;
96 105
@@ -103,11 +112,15 @@ static void rcu_preempt_qs(int cpu)
103 * state for the current grace period), then as long 112 * state for the current grace period), then as long
104 * as that task remains queued, the current grace period 113 * as that task remains queued, the current grace period
105 * cannot end. 114 * cannot end.
115 *
116 * But first, note that the current CPU must still be
117 * on line!
106 */ 118 */
107 phase = !(rnp->qsmask & rdp->grpmask) ^ (rnp->gpnum & 0x1); 119 WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0);
120 WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
121 phase = (rnp->gpnum + !(rnp->qsmask & rdp->grpmask)) & 0x1;
108 list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]); 122 list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]);
109 smp_mb(); /* Ensure later ctxt swtch seen after above. */ 123 spin_unlock_irqrestore(&rnp->lock, flags);
110 spin_unlock(&rnp->lock);
111 } 124 }
112 125
113 /* 126 /*
@@ -119,9 +132,10 @@ static void rcu_preempt_qs(int cpu)
119 * grace period, then the fact that the task has been enqueued 132 * grace period, then the fact that the task has been enqueued
120 * means that we continue to block the current grace period. 133 * means that we continue to block the current grace period.
121 */ 134 */
122 rcu_preempt_qs_record(cpu); 135 rcu_preempt_qs(cpu);
123 t->rcu_read_unlock_special &= ~(RCU_READ_UNLOCK_NEED_QS | 136 local_irq_save(flags);
124 RCU_READ_UNLOCK_GOT_QS); 137 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
138 local_irq_restore(flags);
125} 139}
126 140
127/* 141/*
@@ -157,7 +171,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
157 special = t->rcu_read_unlock_special; 171 special = t->rcu_read_unlock_special;
158 if (special & RCU_READ_UNLOCK_NEED_QS) { 172 if (special & RCU_READ_UNLOCK_NEED_QS) {
159 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; 173 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
160 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_GOT_QS; 174 rcu_preempt_qs(smp_processor_id());
161 } 175 }
162 176
163 /* Hardware IRQ handlers cannot block. */ 177 /* Hardware IRQ handlers cannot block. */
@@ -177,10 +191,10 @@ static void rcu_read_unlock_special(struct task_struct *t)
177 */ 191 */
178 for (;;) { 192 for (;;) {
179 rnp = t->rcu_blocked_node; 193 rnp = t->rcu_blocked_node;
180 spin_lock(&rnp->lock); 194 spin_lock(&rnp->lock); /* irqs already disabled. */
181 if (rnp == t->rcu_blocked_node) 195 if (rnp == t->rcu_blocked_node)
182 break; 196 break;
183 spin_unlock(&rnp->lock); 197 spin_unlock(&rnp->lock); /* irqs remain disabled. */
184 } 198 }
185 empty = list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]); 199 empty = list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]);
186 list_del_init(&t->rcu_node_entry); 200 list_del_init(&t->rcu_node_entry);
@@ -194,9 +208,8 @@ static void rcu_read_unlock_special(struct task_struct *t)
194 */ 208 */
195 if (!empty && rnp->qsmask == 0 && 209 if (!empty && rnp->qsmask == 0 &&
196 list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1])) { 210 list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1])) {
197 t->rcu_read_unlock_special &= 211 struct rcu_node *rnp_p;
198 ~(RCU_READ_UNLOCK_NEED_QS | 212
199 RCU_READ_UNLOCK_GOT_QS);
200 if (rnp->parent == NULL) { 213 if (rnp->parent == NULL) {
201 /* Only one rcu_node in the tree. */ 214 /* Only one rcu_node in the tree. */
202 cpu_quiet_msk_finish(&rcu_preempt_state, flags); 215 cpu_quiet_msk_finish(&rcu_preempt_state, flags);
@@ -205,9 +218,10 @@ static void rcu_read_unlock_special(struct task_struct *t)
205 /* Report up the rest of the hierarchy. */ 218 /* Report up the rest of the hierarchy. */
206 mask = rnp->grpmask; 219 mask = rnp->grpmask;
207 spin_unlock_irqrestore(&rnp->lock, flags); 220 spin_unlock_irqrestore(&rnp->lock, flags);
208 rnp = rnp->parent; 221 rnp_p = rnp->parent;
209 spin_lock_irqsave(&rnp->lock, flags); 222 spin_lock_irqsave(&rnp_p->lock, flags);
210 cpu_quiet_msk(mask, &rcu_preempt_state, rnp, flags); 223 WARN_ON_ONCE(rnp->qsmask);
224 cpu_quiet_msk(mask, &rcu_preempt_state, rnp_p, flags);
211 return; 225 return;
212 } 226 }
213 spin_unlock(&rnp->lock); 227 spin_unlock(&rnp->lock);
@@ -259,6 +273,19 @@ static void rcu_print_task_stall(struct rcu_node *rnp)
259#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ 273#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
260 274
261/* 275/*
276 * Check that the list of blocked tasks for the newly completed grace
277 * period is in fact empty. It is a serious bug to complete a grace
278 * period that still has RCU readers blocked! This function must be
279 * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock
280 * must be held by the caller.
281 */
282static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
283{
284 WARN_ON_ONCE(!list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]));
285 WARN_ON_ONCE(rnp->qsmask);
286}
287
288/*
262 * Check for preempted RCU readers for the specified rcu_node structure. 289 * Check for preempted RCU readers for the specified rcu_node structure.
263 * If the caller needs a reliable answer, it must hold the rcu_node's 290 * If the caller needs a reliable answer, it must hold the rcu_node's
264 * >lock. 291 * >lock.
@@ -280,7 +307,8 @@ static int rcu_preempted_readers(struct rcu_node *rnp)
280 * The caller must hold rnp->lock with irqs disabled. 307 * The caller must hold rnp->lock with irqs disabled.
281 */ 308 */
282static void rcu_preempt_offline_tasks(struct rcu_state *rsp, 309static void rcu_preempt_offline_tasks(struct rcu_state *rsp,
283 struct rcu_node *rnp) 310 struct rcu_node *rnp,
311 struct rcu_data *rdp)
284{ 312{
285 int i; 313 int i;
286 struct list_head *lp; 314 struct list_head *lp;
@@ -292,6 +320,9 @@ static void rcu_preempt_offline_tasks(struct rcu_state *rsp,
292 WARN_ONCE(1, "Last CPU thought to be offlined?"); 320 WARN_ONCE(1, "Last CPU thought to be offlined?");
293 return; /* Shouldn't happen: at least one CPU online. */ 321 return; /* Shouldn't happen: at least one CPU online. */
294 } 322 }
323 WARN_ON_ONCE(rnp != rdp->mynode &&
324 (!list_empty(&rnp->blocked_tasks[0]) ||
325 !list_empty(&rnp->blocked_tasks[1])));
295 326
296 /* 327 /*
297 * Move tasks up to root rcu_node. Rely on the fact that the 328 * Move tasks up to root rcu_node. Rely on the fact that the
@@ -335,20 +366,12 @@ static void rcu_preempt_check_callbacks(int cpu)
335 struct task_struct *t = current; 366 struct task_struct *t = current;
336 367
337 if (t->rcu_read_lock_nesting == 0) { 368 if (t->rcu_read_lock_nesting == 0) {
338 t->rcu_read_unlock_special &= 369 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
339 ~(RCU_READ_UNLOCK_NEED_QS | RCU_READ_UNLOCK_GOT_QS); 370 rcu_preempt_qs(cpu);
340 rcu_preempt_qs_record(cpu);
341 return; 371 return;
342 } 372 }
343 if (per_cpu(rcu_preempt_data, cpu).qs_pending) { 373 if (per_cpu(rcu_preempt_data, cpu).qs_pending)
344 if (t->rcu_read_unlock_special & RCU_READ_UNLOCK_GOT_QS) { 374 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
345 rcu_preempt_qs_record(cpu);
346 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_GOT_QS;
347 } else if (!(t->rcu_read_unlock_special &
348 RCU_READ_UNLOCK_NEED_QS)) {
349 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
350 }
351 }
352} 375}
353 376
354/* 377/*
@@ -434,7 +457,7 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed);
434 * Because preemptable RCU does not exist, we never have to check for 457 * Because preemptable RCU does not exist, we never have to check for
435 * CPUs being in quiescent states. 458 * CPUs being in quiescent states.
436 */ 459 */
437static void rcu_preempt_qs(int cpu) 460static void rcu_preempt_note_context_switch(int cpu)
438{ 461{
439} 462}
440 463
@@ -451,6 +474,16 @@ static void rcu_print_task_stall(struct rcu_node *rnp)
451#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ 474#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
452 475
453/* 476/*
477 * Because there is no preemptable RCU, there can be no readers blocked,
478 * so there is no need to check for blocked tasks. So check only for
479 * bogus qsmask values.
480 */
481static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
482{
483 WARN_ON_ONCE(rnp->qsmask);
484}
485
486/*
454 * Because preemptable RCU does not exist, there are never any preempted 487 * Because preemptable RCU does not exist, there are never any preempted
455 * RCU readers. 488 * RCU readers.
456 */ 489 */
@@ -466,7 +499,8 @@ static int rcu_preempted_readers(struct rcu_node *rnp)
466 * tasks that were blocked within RCU read-side critical sections. 499 * tasks that were blocked within RCU read-side critical sections.
467 */ 500 */
468static void rcu_preempt_offline_tasks(struct rcu_state *rsp, 501static void rcu_preempt_offline_tasks(struct rcu_state *rsp,
469 struct rcu_node *rnp) 502 struct rcu_node *rnp,
503 struct rcu_data *rdp)
470{ 504{
471} 505}
472 506
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index 0ea1bff69727..c89f5e9fd173 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -20,7 +20,7 @@
20 * Papers: http://www.rdrop.com/users/paulmck/RCU 20 * Papers: http://www.rdrop.com/users/paulmck/RCU
21 * 21 *
22 * For detailed explanation of Read-Copy Update mechanism see - 22 * For detailed explanation of Read-Copy Update mechanism see -
23 * Documentation/RCU 23 * Documentation/RCU
24 * 24 *
25 */ 25 */
26#include <linux/types.h> 26#include <linux/types.h>