aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c27
-rw-r--r--kernel/events/core.c20
-rw-r--r--kernel/rcupdate.c32
-rw-r--r--kernel/rcutiny.c45
-rw-r--r--kernel/rcutiny_plugin.h203
-rw-r--r--kernel/rcutorture.c26
-rw-r--r--kernel/rcutree.c526
-rw-r--r--kernel/rcutree.h104
-rw-r--r--kernel/rcutree_plugin.h568
-rw-r--r--kernel/rcutree_trace.c180
-rw-r--r--kernel/softirq.c2
11 files changed, 1333 insertions, 400 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 25c7eb52de1a..909a35510af5 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -326,12 +326,6 @@ static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
326 return &css_set_table[index]; 326 return &css_set_table[index];
327} 327}
328 328
329static void free_css_set_rcu(struct rcu_head *obj)
330{
331 struct css_set *cg = container_of(obj, struct css_set, rcu_head);
332 kfree(cg);
333}
334
335/* We don't maintain the lists running through each css_set to its 329/* We don't maintain the lists running through each css_set to its
336 * task until after the first call to cgroup_iter_start(). This 330 * task until after the first call to cgroup_iter_start(). This
337 * reduces the fork()/exit() overhead for people who have cgroups 331 * reduces the fork()/exit() overhead for people who have cgroups
@@ -375,7 +369,7 @@ static void __put_css_set(struct css_set *cg, int taskexit)
375 } 369 }
376 370
377 write_unlock(&css_set_lock); 371 write_unlock(&css_set_lock);
378 call_rcu(&cg->rcu_head, free_css_set_rcu); 372 kfree_rcu(cg, rcu_head);
379} 373}
380 374
381/* 375/*
@@ -812,13 +806,6 @@ static int cgroup_call_pre_destroy(struct cgroup *cgrp)
812 return ret; 806 return ret;
813} 807}
814 808
815static void free_cgroup_rcu(struct rcu_head *obj)
816{
817 struct cgroup *cgrp = container_of(obj, struct cgroup, rcu_head);
818
819 kfree(cgrp);
820}
821
822static void cgroup_diput(struct dentry *dentry, struct inode *inode) 809static void cgroup_diput(struct dentry *dentry, struct inode *inode)
823{ 810{
824 /* is dentry a directory ? if so, kfree() associated cgroup */ 811 /* is dentry a directory ? if so, kfree() associated cgroup */
@@ -856,7 +843,7 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
856 */ 843 */
857 BUG_ON(!list_empty(&cgrp->pidlists)); 844 BUG_ON(!list_empty(&cgrp->pidlists));
858 845
859 call_rcu(&cgrp->rcu_head, free_cgroup_rcu); 846 kfree_rcu(cgrp, rcu_head);
860 } 847 }
861 iput(inode); 848 iput(inode);
862} 849}
@@ -4623,14 +4610,6 @@ bool css_is_ancestor(struct cgroup_subsys_state *child,
4623 return ret; 4610 return ret;
4624} 4611}
4625 4612
4626static void __free_css_id_cb(struct rcu_head *head)
4627{
4628 struct css_id *id;
4629
4630 id = container_of(head, struct css_id, rcu_head);
4631 kfree(id);
4632}
4633
4634void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css) 4613void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
4635{ 4614{
4636 struct css_id *id = css->id; 4615 struct css_id *id = css->id;
@@ -4645,7 +4624,7 @@ void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
4645 spin_lock(&ss->id_lock); 4624 spin_lock(&ss->id_lock);
4646 idr_remove(&ss->idr, id->id); 4625 idr_remove(&ss->idr, id->id);
4647 spin_unlock(&ss->id_lock); 4626 spin_unlock(&ss->id_lock);
4648 call_rcu(&id->rcu_head, __free_css_id_cb); 4627 kfree_rcu(id, rcu_head);
4649} 4628}
4650EXPORT_SYMBOL_GPL(free_css_id); 4629EXPORT_SYMBOL_GPL(free_css_id);
4651 4630
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 0fc34a370ba4..c09767f7db3e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -586,14 +586,6 @@ static void get_ctx(struct perf_event_context *ctx)
586 WARN_ON(!atomic_inc_not_zero(&ctx->refcount)); 586 WARN_ON(!atomic_inc_not_zero(&ctx->refcount));
587} 587}
588 588
589static void free_ctx(struct rcu_head *head)
590{
591 struct perf_event_context *ctx;
592
593 ctx = container_of(head, struct perf_event_context, rcu_head);
594 kfree(ctx);
595}
596
597static void put_ctx(struct perf_event_context *ctx) 589static void put_ctx(struct perf_event_context *ctx)
598{ 590{
599 if (atomic_dec_and_test(&ctx->refcount)) { 591 if (atomic_dec_and_test(&ctx->refcount)) {
@@ -601,7 +593,7 @@ static void put_ctx(struct perf_event_context *ctx)
601 put_ctx(ctx->parent_ctx); 593 put_ctx(ctx->parent_ctx);
602 if (ctx->task) 594 if (ctx->task)
603 put_task_struct(ctx->task); 595 put_task_struct(ctx->task);
604 call_rcu(&ctx->rcu_head, free_ctx); 596 kfree_rcu(ctx, rcu_head);
605 } 597 }
606} 598}
607 599
@@ -5331,14 +5323,6 @@ swevent_hlist_deref(struct swevent_htable *swhash)
5331 lockdep_is_held(&swhash->hlist_mutex)); 5323 lockdep_is_held(&swhash->hlist_mutex));
5332} 5324}
5333 5325
5334static void swevent_hlist_release_rcu(struct rcu_head *rcu_head)
5335{
5336 struct swevent_hlist *hlist;
5337
5338 hlist = container_of(rcu_head, struct swevent_hlist, rcu_head);
5339 kfree(hlist);
5340}
5341
5342static void swevent_hlist_release(struct swevent_htable *swhash) 5326static void swevent_hlist_release(struct swevent_htable *swhash)
5343{ 5327{
5344 struct swevent_hlist *hlist = swevent_hlist_deref(swhash); 5328 struct swevent_hlist *hlist = swevent_hlist_deref(swhash);
@@ -5347,7 +5331,7 @@ static void swevent_hlist_release(struct swevent_htable *swhash)
5347 return; 5331 return;
5348 5332
5349 rcu_assign_pointer(swhash->swevent_hlist, NULL); 5333 rcu_assign_pointer(swhash->swevent_hlist, NULL);
5350 call_rcu(&hlist->rcu_head, swevent_hlist_release_rcu); 5334 kfree_rcu(hlist, rcu_head);
5351} 5335}
5352 5336
5353static void swevent_hlist_put_cpu(struct perf_event *event, int cpu) 5337static void swevent_hlist_put_cpu(struct perf_event *event, int cpu)
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index f3240e987928..7784bd216b6a 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -142,10 +142,17 @@ static int rcuhead_fixup_init(void *addr, enum debug_obj_state state)
142 * Ensure that queued callbacks are all executed. 142 * Ensure that queued callbacks are all executed.
143 * If we detect that we are nested in a RCU read-side critical 143 * If we detect that we are nested in a RCU read-side critical
144 * section, we should simply fail, otherwise we would deadlock. 144 * section, we should simply fail, otherwise we would deadlock.
145 * In !PREEMPT configurations, there is no way to tell if we are
146 * in a RCU read-side critical section or not, so we never
147 * attempt any fixup and just print a warning.
145 */ 148 */
149#ifndef CONFIG_PREEMPT
150 WARN_ON_ONCE(1);
151 return 0;
152#endif
146 if (rcu_preempt_depth() != 0 || preempt_count() != 0 || 153 if (rcu_preempt_depth() != 0 || preempt_count() != 0 ||
147 irqs_disabled()) { 154 irqs_disabled()) {
148 WARN_ON(1); 155 WARN_ON_ONCE(1);
149 return 0; 156 return 0;
150 } 157 }
151 rcu_barrier(); 158 rcu_barrier();
@@ -184,10 +191,17 @@ static int rcuhead_fixup_activate(void *addr, enum debug_obj_state state)
184 * Ensure that queued callbacks are all executed. 191 * Ensure that queued callbacks are all executed.
185 * If we detect that we are nested in a RCU read-side critical 192 * If we detect that we are nested in a RCU read-side critical
186 * section, we should simply fail, otherwise we would deadlock. 193 * section, we should simply fail, otherwise we would deadlock.
194 * In !PREEMPT configurations, there is no way to tell if we are
195 * in a RCU read-side critical section or not, so we never
196 * attempt any fixup and just print a warning.
187 */ 197 */
198#ifndef CONFIG_PREEMPT
199 WARN_ON_ONCE(1);
200 return 0;
201#endif
188 if (rcu_preempt_depth() != 0 || preempt_count() != 0 || 202 if (rcu_preempt_depth() != 0 || preempt_count() != 0 ||
189 irqs_disabled()) { 203 irqs_disabled()) {
190 WARN_ON(1); 204 WARN_ON_ONCE(1);
191 return 0; 205 return 0;
192 } 206 }
193 rcu_barrier(); 207 rcu_barrier();
@@ -214,15 +228,17 @@ static int rcuhead_fixup_free(void *addr, enum debug_obj_state state)
214 * Ensure that queued callbacks are all executed. 228 * Ensure that queued callbacks are all executed.
215 * If we detect that we are nested in a RCU read-side critical 229 * If we detect that we are nested in a RCU read-side critical
216 * section, we should simply fail, otherwise we would deadlock. 230 * section, we should simply fail, otherwise we would deadlock.
217 * Note that the machinery to reliably determine whether 231 * In !PREEMPT configurations, there is no way to tell if we are
218 * or not we are in an RCU read-side critical section 232 * in a RCU read-side critical section or not, so we never
219 * exists only in the preemptible RCU implementations 233 * attempt any fixup and just print a warning.
220 * (TINY_PREEMPT_RCU and TREE_PREEMPT_RCU), which is why
221 * DEBUG_OBJECTS_RCU_HEAD is disallowed if !PREEMPT.
222 */ 234 */
235#ifndef CONFIG_PREEMPT
236 WARN_ON_ONCE(1);
237 return 0;
238#endif
223 if (rcu_preempt_depth() != 0 || preempt_count() != 0 || 239 if (rcu_preempt_depth() != 0 || preempt_count() != 0 ||
224 irqs_disabled()) { 240 irqs_disabled()) {
225 WARN_ON(1); 241 WARN_ON_ONCE(1);
226 return 0; 242 return 0;
227 } 243 }
228 rcu_barrier(); 244 rcu_barrier();
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index 0c343b9a46d5..421abfd3641d 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -40,10 +40,10 @@
40static struct task_struct *rcu_kthread_task; 40static struct task_struct *rcu_kthread_task;
41static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq); 41static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq);
42static unsigned long have_rcu_kthread_work; 42static unsigned long have_rcu_kthread_work;
43static void invoke_rcu_kthread(void);
44 43
45/* Forward declarations for rcutiny_plugin.h. */ 44/* Forward declarations for rcutiny_plugin.h. */
46struct rcu_ctrlblk; 45struct rcu_ctrlblk;
46static void invoke_rcu_kthread(void);
47static void rcu_process_callbacks(struct rcu_ctrlblk *rcp); 47static void rcu_process_callbacks(struct rcu_ctrlblk *rcp);
48static int rcu_kthread(void *arg); 48static int rcu_kthread(void *arg);
49static void __call_rcu(struct rcu_head *head, 49static void __call_rcu(struct rcu_head *head,
@@ -79,36 +79,45 @@ void rcu_exit_nohz(void)
79#endif /* #ifdef CONFIG_NO_HZ */ 79#endif /* #ifdef CONFIG_NO_HZ */
80 80
81/* 81/*
82 * Helper function for rcu_qsctr_inc() and rcu_bh_qsctr_inc(). 82 * Helper function for rcu_sched_qs() and rcu_bh_qs().
83 * Also disable irqs to avoid confusion due to interrupt handlers 83 * Also irqs are disabled to avoid confusion due to interrupt handlers
84 * invoking call_rcu(). 84 * invoking call_rcu().
85 */ 85 */
86static int rcu_qsctr_help(struct rcu_ctrlblk *rcp) 86static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
87{ 87{
88 unsigned long flags;
89
90 local_irq_save(flags);
91 if (rcp->rcucblist != NULL && 88 if (rcp->rcucblist != NULL &&
92 rcp->donetail != rcp->curtail) { 89 rcp->donetail != rcp->curtail) {
93 rcp->donetail = rcp->curtail; 90 rcp->donetail = rcp->curtail;
94 local_irq_restore(flags);
95 return 1; 91 return 1;
96 } 92 }
97 local_irq_restore(flags);
98 93
99 return 0; 94 return 0;
100} 95}
101 96
102/* 97/*
98 * Wake up rcu_kthread() to process callbacks now eligible for invocation
99 * or to boost readers.
100 */
101static void invoke_rcu_kthread(void)
102{
103 have_rcu_kthread_work = 1;
104 wake_up(&rcu_kthread_wq);
105}
106
107/*
103 * Record an rcu quiescent state. And an rcu_bh quiescent state while we 108 * Record an rcu quiescent state. And an rcu_bh quiescent state while we
104 * are at it, given that any rcu quiescent state is also an rcu_bh 109 * are at it, given that any rcu quiescent state is also an rcu_bh
105 * quiescent state. Use "+" instead of "||" to defeat short circuiting. 110 * quiescent state. Use "+" instead of "||" to defeat short circuiting.
106 */ 111 */
107void rcu_sched_qs(int cpu) 112void rcu_sched_qs(int cpu)
108{ 113{
114 unsigned long flags;
115
116 local_irq_save(flags);
109 if (rcu_qsctr_help(&rcu_sched_ctrlblk) + 117 if (rcu_qsctr_help(&rcu_sched_ctrlblk) +
110 rcu_qsctr_help(&rcu_bh_ctrlblk)) 118 rcu_qsctr_help(&rcu_bh_ctrlblk))
111 invoke_rcu_kthread(); 119 invoke_rcu_kthread();
120 local_irq_restore(flags);
112} 121}
113 122
114/* 123/*
@@ -116,8 +125,12 @@ void rcu_sched_qs(int cpu)
116 */ 125 */
117void rcu_bh_qs(int cpu) 126void rcu_bh_qs(int cpu)
118{ 127{
128 unsigned long flags;
129
130 local_irq_save(flags);
119 if (rcu_qsctr_help(&rcu_bh_ctrlblk)) 131 if (rcu_qsctr_help(&rcu_bh_ctrlblk))
120 invoke_rcu_kthread(); 132 invoke_rcu_kthread();
133 local_irq_restore(flags);
121} 134}
122 135
123/* 136/*
@@ -167,7 +180,7 @@ static void rcu_process_callbacks(struct rcu_ctrlblk *rcp)
167 prefetch(next); 180 prefetch(next);
168 debug_rcu_head_unqueue(list); 181 debug_rcu_head_unqueue(list);
169 local_bh_disable(); 182 local_bh_disable();
170 list->func(list); 183 __rcu_reclaim(list);
171 local_bh_enable(); 184 local_bh_enable();
172 list = next; 185 list = next;
173 RCU_TRACE(cb_count++); 186 RCU_TRACE(cb_count++);
@@ -208,20 +221,6 @@ static int rcu_kthread(void *arg)
208} 221}
209 222
210/* 223/*
211 * Wake up rcu_kthread() to process callbacks now eligible for invocation
212 * or to boost readers.
213 */
214static void invoke_rcu_kthread(void)
215{
216 unsigned long flags;
217
218 local_irq_save(flags);
219 have_rcu_kthread_work = 1;
220 wake_up(&rcu_kthread_wq);
221 local_irq_restore(flags);
222}
223
224/*
225 * Wait for a grace period to elapse. But it is illegal to invoke 224 * Wait for a grace period to elapse. But it is illegal to invoke
226 * synchronize_sched() from within an RCU read-side critical section. 225 * synchronize_sched() from within an RCU read-side critical section.
227 * Therefore, any legal call to synchronize_sched() is a quiescent 226 * Therefore, any legal call to synchronize_sched() is a quiescent
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index 3cb8e362e883..f259c676195f 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -100,23 +100,28 @@ struct rcu_preempt_ctrlblk {
100 u8 completed; /* Last grace period completed. */ 100 u8 completed; /* Last grace period completed. */
101 /* If all three are equal, RCU is idle. */ 101 /* If all three are equal, RCU is idle. */
102#ifdef CONFIG_RCU_BOOST 102#ifdef CONFIG_RCU_BOOST
103 s8 boosted_this_gp; /* Has boosting already happened? */
104 unsigned long boost_time; /* When to start boosting (jiffies) */ 103 unsigned long boost_time; /* When to start boosting (jiffies) */
105#endif /* #ifdef CONFIG_RCU_BOOST */ 104#endif /* #ifdef CONFIG_RCU_BOOST */
106#ifdef CONFIG_RCU_TRACE 105#ifdef CONFIG_RCU_TRACE
107 unsigned long n_grace_periods; 106 unsigned long n_grace_periods;
108#ifdef CONFIG_RCU_BOOST 107#ifdef CONFIG_RCU_BOOST
109 unsigned long n_tasks_boosted; 108 unsigned long n_tasks_boosted;
109 /* Total number of tasks boosted. */
110 unsigned long n_exp_boosts; 110 unsigned long n_exp_boosts;
111 /* Number of tasks boosted for expedited GP. */
111 unsigned long n_normal_boosts; 112 unsigned long n_normal_boosts;
112 unsigned long n_normal_balk_blkd_tasks; 113 /* Number of tasks boosted for normal GP. */
113 unsigned long n_normal_balk_gp_tasks; 114 unsigned long n_balk_blkd_tasks;
114 unsigned long n_normal_balk_boost_tasks; 115 /* Refused to boost: no blocked tasks. */
115 unsigned long n_normal_balk_boosted; 116 unsigned long n_balk_exp_gp_tasks;
116 unsigned long n_normal_balk_notyet; 117 /* Refused to boost: nothing blocking GP. */
117 unsigned long n_normal_balk_nos; 118 unsigned long n_balk_boost_tasks;
118 unsigned long n_exp_balk_blkd_tasks; 119 /* Refused to boost: already boosting. */
119 unsigned long n_exp_balk_nos; 120 unsigned long n_balk_notyet;
121 /* Refused to boost: not yet time. */
122 unsigned long n_balk_nos;
123 /* Refused to boost: not sure why, though. */
124 /* This can happen due to race conditions. */
120#endif /* #ifdef CONFIG_RCU_BOOST */ 125#endif /* #ifdef CONFIG_RCU_BOOST */
121#endif /* #ifdef CONFIG_RCU_TRACE */ 126#endif /* #ifdef CONFIG_RCU_TRACE */
122}; 127};
@@ -201,7 +206,6 @@ static struct list_head *rcu_next_node_entry(struct task_struct *t)
201 206
202#ifdef CONFIG_RCU_BOOST 207#ifdef CONFIG_RCU_BOOST
203static void rcu_initiate_boost_trace(void); 208static void rcu_initiate_boost_trace(void);
204static void rcu_initiate_exp_boost_trace(void);
205#endif /* #ifdef CONFIG_RCU_BOOST */ 209#endif /* #ifdef CONFIG_RCU_BOOST */
206 210
207/* 211/*
@@ -219,41 +223,21 @@ static void show_tiny_preempt_stats(struct seq_file *m)
219 "N."[!rcu_preempt_ctrlblk.gp_tasks], 223 "N."[!rcu_preempt_ctrlblk.gp_tasks],
220 "E."[!rcu_preempt_ctrlblk.exp_tasks]); 224 "E."[!rcu_preempt_ctrlblk.exp_tasks]);
221#ifdef CONFIG_RCU_BOOST 225#ifdef CONFIG_RCU_BOOST
222 seq_printf(m, " ttb=%c btg=", 226 seq_printf(m, "%sttb=%c ntb=%lu neb=%lu nnb=%lu j=%04x bt=%04x\n",
223 "B."[!rcu_preempt_ctrlblk.boost_tasks]); 227 " ",
224 switch (rcu_preempt_ctrlblk.boosted_this_gp) { 228 "B."[!rcu_preempt_ctrlblk.boost_tasks],
225 case -1:
226 seq_puts(m, "exp");
227 break;
228 case 0:
229 seq_puts(m, "no");
230 break;
231 case 1:
232 seq_puts(m, "begun");
233 break;
234 case 2:
235 seq_puts(m, "done");
236 break;
237 default:
238 seq_printf(m, "?%d?", rcu_preempt_ctrlblk.boosted_this_gp);
239 }
240 seq_printf(m, " ntb=%lu neb=%lu nnb=%lu j=%04x bt=%04x\n",
241 rcu_preempt_ctrlblk.n_tasks_boosted, 229 rcu_preempt_ctrlblk.n_tasks_boosted,
242 rcu_preempt_ctrlblk.n_exp_boosts, 230 rcu_preempt_ctrlblk.n_exp_boosts,
243 rcu_preempt_ctrlblk.n_normal_boosts, 231 rcu_preempt_ctrlblk.n_normal_boosts,
244 (int)(jiffies & 0xffff), 232 (int)(jiffies & 0xffff),
245 (int)(rcu_preempt_ctrlblk.boost_time & 0xffff)); 233 (int)(rcu_preempt_ctrlblk.boost_time & 0xffff));
246 seq_printf(m, " %s: nt=%lu gt=%lu bt=%lu b=%lu ny=%lu nos=%lu\n", 234 seq_printf(m, "%s: nt=%lu egt=%lu bt=%lu ny=%lu nos=%lu\n",
247 "normal balk", 235 " balk",
248 rcu_preempt_ctrlblk.n_normal_balk_blkd_tasks, 236 rcu_preempt_ctrlblk.n_balk_blkd_tasks,
249 rcu_preempt_ctrlblk.n_normal_balk_gp_tasks, 237 rcu_preempt_ctrlblk.n_balk_exp_gp_tasks,
250 rcu_preempt_ctrlblk.n_normal_balk_boost_tasks, 238 rcu_preempt_ctrlblk.n_balk_boost_tasks,
251 rcu_preempt_ctrlblk.n_normal_balk_boosted, 239 rcu_preempt_ctrlblk.n_balk_notyet,
252 rcu_preempt_ctrlblk.n_normal_balk_notyet, 240 rcu_preempt_ctrlblk.n_balk_nos);
253 rcu_preempt_ctrlblk.n_normal_balk_nos);
254 seq_printf(m, " exp balk: bt=%lu nos=%lu\n",
255 rcu_preempt_ctrlblk.n_exp_balk_blkd_tasks,
256 rcu_preempt_ctrlblk.n_exp_balk_nos);
257#endif /* #ifdef CONFIG_RCU_BOOST */ 241#endif /* #ifdef CONFIG_RCU_BOOST */
258} 242}
259 243
@@ -271,25 +255,59 @@ static int rcu_boost(void)
271{ 255{
272 unsigned long flags; 256 unsigned long flags;
273 struct rt_mutex mtx; 257 struct rt_mutex mtx;
274 struct list_head *np;
275 struct task_struct *t; 258 struct task_struct *t;
259 struct list_head *tb;
276 260
277 if (rcu_preempt_ctrlblk.boost_tasks == NULL) 261 if (rcu_preempt_ctrlblk.boost_tasks == NULL &&
262 rcu_preempt_ctrlblk.exp_tasks == NULL)
278 return 0; /* Nothing to boost. */ 263 return 0; /* Nothing to boost. */
264
279 raw_local_irq_save(flags); 265 raw_local_irq_save(flags);
280 rcu_preempt_ctrlblk.boosted_this_gp++; 266
281 t = container_of(rcu_preempt_ctrlblk.boost_tasks, struct task_struct, 267 /*
282 rcu_node_entry); 268 * Recheck with irqs disabled: all tasks in need of boosting
283 np = rcu_next_node_entry(t); 269 * might exit their RCU read-side critical sections on their own
270 * if we are preempted just before disabling irqs.
271 */
272 if (rcu_preempt_ctrlblk.boost_tasks == NULL &&
273 rcu_preempt_ctrlblk.exp_tasks == NULL) {
274 raw_local_irq_restore(flags);
275 return 0;
276 }
277
278 /*
279 * Preferentially boost tasks blocking expedited grace periods.
280 * This cannot starve the normal grace periods because a second
281 * expedited grace period must boost all blocked tasks, including
282 * those blocking the pre-existing normal grace period.
283 */
284 if (rcu_preempt_ctrlblk.exp_tasks != NULL) {
285 tb = rcu_preempt_ctrlblk.exp_tasks;
286 RCU_TRACE(rcu_preempt_ctrlblk.n_exp_boosts++);
287 } else {
288 tb = rcu_preempt_ctrlblk.boost_tasks;
289 RCU_TRACE(rcu_preempt_ctrlblk.n_normal_boosts++);
290 }
291 RCU_TRACE(rcu_preempt_ctrlblk.n_tasks_boosted++);
292
293 /*
294 * We boost task t by manufacturing an rt_mutex that appears to
295 * be held by task t. We leave a pointer to that rt_mutex where
296 * task t can find it, and task t will release the mutex when it
297 * exits its outermost RCU read-side critical section. Then
298 * simply acquiring this artificial rt_mutex will boost task
299 * t's priority. (Thanks to tglx for suggesting this approach!)
300 */
301 t = container_of(tb, struct task_struct, rcu_node_entry);
284 rt_mutex_init_proxy_locked(&mtx, t); 302 rt_mutex_init_proxy_locked(&mtx, t);
285 t->rcu_boost_mutex = &mtx; 303 t->rcu_boost_mutex = &mtx;
286 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED; 304 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED;
287 raw_local_irq_restore(flags); 305 raw_local_irq_restore(flags);
288 rt_mutex_lock(&mtx); 306 rt_mutex_lock(&mtx);
289 RCU_TRACE(rcu_preempt_ctrlblk.n_tasks_boosted++); 307 rt_mutex_unlock(&mtx); /* Keep lockdep happy. */
290 rcu_preempt_ctrlblk.boosted_this_gp++; 308
291 rt_mutex_unlock(&mtx); 309 return rcu_preempt_ctrlblk.boost_tasks != NULL ||
292 return rcu_preempt_ctrlblk.boost_tasks != NULL; 310 rcu_preempt_ctrlblk.exp_tasks != NULL;
293} 311}
294 312
295/* 313/*
@@ -304,42 +322,25 @@ static int rcu_boost(void)
304 */ 322 */
305static int rcu_initiate_boost(void) 323static int rcu_initiate_boost(void)
306{ 324{
307 if (!rcu_preempt_blocked_readers_cgp()) { 325 if (!rcu_preempt_blocked_readers_cgp() &&
308 RCU_TRACE(rcu_preempt_ctrlblk.n_normal_balk_blkd_tasks++); 326 rcu_preempt_ctrlblk.exp_tasks == NULL) {
327 RCU_TRACE(rcu_preempt_ctrlblk.n_balk_exp_gp_tasks++);
309 return 0; 328 return 0;
310 } 329 }
311 if (rcu_preempt_ctrlblk.gp_tasks != NULL && 330 if (rcu_preempt_ctrlblk.exp_tasks != NULL ||
312 rcu_preempt_ctrlblk.boost_tasks == NULL && 331 (rcu_preempt_ctrlblk.gp_tasks != NULL &&
313 rcu_preempt_ctrlblk.boosted_this_gp == 0 && 332 rcu_preempt_ctrlblk.boost_tasks == NULL &&
314 ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time)) { 333 ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time))) {
315 rcu_preempt_ctrlblk.boost_tasks = rcu_preempt_ctrlblk.gp_tasks; 334 if (rcu_preempt_ctrlblk.exp_tasks == NULL)
335 rcu_preempt_ctrlblk.boost_tasks =
336 rcu_preempt_ctrlblk.gp_tasks;
316 invoke_rcu_kthread(); 337 invoke_rcu_kthread();
317 RCU_TRACE(rcu_preempt_ctrlblk.n_normal_boosts++);
318 } else 338 } else
319 RCU_TRACE(rcu_initiate_boost_trace()); 339 RCU_TRACE(rcu_initiate_boost_trace());
320 return 1; 340 return 1;
321} 341}
322 342
323/* 343#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000)
324 * Initiate boosting for an expedited grace period.
325 */
326static void rcu_initiate_expedited_boost(void)
327{
328 unsigned long flags;
329
330 raw_local_irq_save(flags);
331 if (!list_empty(&rcu_preempt_ctrlblk.blkd_tasks)) {
332 rcu_preempt_ctrlblk.boost_tasks =
333 rcu_preempt_ctrlblk.blkd_tasks.next;
334 rcu_preempt_ctrlblk.boosted_this_gp = -1;
335 invoke_rcu_kthread();
336 RCU_TRACE(rcu_preempt_ctrlblk.n_exp_boosts++);
337 } else
338 RCU_TRACE(rcu_initiate_exp_boost_trace());
339 raw_local_irq_restore(flags);
340}
341
342#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000);
343 344
344/* 345/*
345 * Do priority-boost accounting for the start of a new grace period. 346 * Do priority-boost accounting for the start of a new grace period.
@@ -347,8 +348,6 @@ static void rcu_initiate_expedited_boost(void)
347static void rcu_preempt_boost_start_gp(void) 348static void rcu_preempt_boost_start_gp(void)
348{ 349{
349 rcu_preempt_ctrlblk.boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES; 350 rcu_preempt_ctrlblk.boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES;
350 if (rcu_preempt_ctrlblk.boosted_this_gp > 0)
351 rcu_preempt_ctrlblk.boosted_this_gp = 0;
352} 351}
353 352
354#else /* #ifdef CONFIG_RCU_BOOST */ 353#else /* #ifdef CONFIG_RCU_BOOST */
@@ -372,13 +371,6 @@ static int rcu_initiate_boost(void)
372} 371}
373 372
374/* 373/*
375 * If there is no RCU priority boosting, we don't initiate expedited boosting.
376 */
377static void rcu_initiate_expedited_boost(void)
378{
379}
380
381/*
382 * If there is no RCU priority boosting, nothing to do at grace-period start. 374 * If there is no RCU priority boosting, nothing to do at grace-period start.
383 */ 375 */
384static void rcu_preempt_boost_start_gp(void) 376static void rcu_preempt_boost_start_gp(void)
@@ -418,7 +410,7 @@ static void rcu_preempt_cpu_qs(void)
418 if (!rcu_preempt_gp_in_progress()) 410 if (!rcu_preempt_gp_in_progress())
419 return; 411 return;
420 /* 412 /*
421 * Check up on boosting. If there are no readers blocking the 413 * Check up on boosting. If there are readers blocking the
422 * current grace period, leave. 414 * current grace period, leave.
423 */ 415 */
424 if (rcu_initiate_boost()) 416 if (rcu_initiate_boost())
@@ -578,7 +570,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
578 empty = !rcu_preempt_blocked_readers_cgp(); 570 empty = !rcu_preempt_blocked_readers_cgp();
579 empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL; 571 empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL;
580 np = rcu_next_node_entry(t); 572 np = rcu_next_node_entry(t);
581 list_del(&t->rcu_node_entry); 573 list_del_init(&t->rcu_node_entry);
582 if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks) 574 if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks)
583 rcu_preempt_ctrlblk.gp_tasks = np; 575 rcu_preempt_ctrlblk.gp_tasks = np;
584 if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks) 576 if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks)
@@ -587,7 +579,6 @@ static void rcu_read_unlock_special(struct task_struct *t)
587 if (&t->rcu_node_entry == rcu_preempt_ctrlblk.boost_tasks) 579 if (&t->rcu_node_entry == rcu_preempt_ctrlblk.boost_tasks)
588 rcu_preempt_ctrlblk.boost_tasks = np; 580 rcu_preempt_ctrlblk.boost_tasks = np;
589#endif /* #ifdef CONFIG_RCU_BOOST */ 581#endif /* #ifdef CONFIG_RCU_BOOST */
590 INIT_LIST_HEAD(&t->rcu_node_entry);
591 582
592 /* 583 /*
593 * If this was the last task on the current list, and if 584 * If this was the last task on the current list, and if
@@ -812,13 +803,16 @@ void synchronize_rcu_expedited(void)
812 rpcp->exp_tasks = rpcp->blkd_tasks.next; 803 rpcp->exp_tasks = rpcp->blkd_tasks.next;
813 if (rpcp->exp_tasks == &rpcp->blkd_tasks) 804 if (rpcp->exp_tasks == &rpcp->blkd_tasks)
814 rpcp->exp_tasks = NULL; 805 rpcp->exp_tasks = NULL;
815 local_irq_restore(flags);
816 806
817 /* Wait for tail of ->blkd_tasks list to drain. */ 807 /* Wait for tail of ->blkd_tasks list to drain. */
818 if (rcu_preempted_readers_exp()) 808 if (!rcu_preempted_readers_exp())
819 rcu_initiate_expedited_boost(); 809 local_irq_restore(flags);
810 else {
811 rcu_initiate_boost();
812 local_irq_restore(flags);
820 wait_event(sync_rcu_preempt_exp_wq, 813 wait_event(sync_rcu_preempt_exp_wq,
821 !rcu_preempted_readers_exp()); 814 !rcu_preempted_readers_exp());
815 }
822 816
823 /* Clean up and exit. */ 817 /* Clean up and exit. */
824 barrier(); /* ensure expedited GP seen before counter increment. */ 818 barrier(); /* ensure expedited GP seen before counter increment. */
@@ -931,24 +925,17 @@ void __init rcu_scheduler_starting(void)
931 925
932static void rcu_initiate_boost_trace(void) 926static void rcu_initiate_boost_trace(void)
933{ 927{
934 if (rcu_preempt_ctrlblk.gp_tasks == NULL) 928 if (list_empty(&rcu_preempt_ctrlblk.blkd_tasks))
935 rcu_preempt_ctrlblk.n_normal_balk_gp_tasks++; 929 rcu_preempt_ctrlblk.n_balk_blkd_tasks++;
930 else if (rcu_preempt_ctrlblk.gp_tasks == NULL &&
931 rcu_preempt_ctrlblk.exp_tasks == NULL)
932 rcu_preempt_ctrlblk.n_balk_exp_gp_tasks++;
936 else if (rcu_preempt_ctrlblk.boost_tasks != NULL) 933 else if (rcu_preempt_ctrlblk.boost_tasks != NULL)
937 rcu_preempt_ctrlblk.n_normal_balk_boost_tasks++; 934 rcu_preempt_ctrlblk.n_balk_boost_tasks++;
938 else if (rcu_preempt_ctrlblk.boosted_this_gp != 0)
939 rcu_preempt_ctrlblk.n_normal_balk_boosted++;
940 else if (!ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time)) 935 else if (!ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time))
941 rcu_preempt_ctrlblk.n_normal_balk_notyet++; 936 rcu_preempt_ctrlblk.n_balk_notyet++;
942 else
943 rcu_preempt_ctrlblk.n_normal_balk_nos++;
944}
945
946static void rcu_initiate_exp_boost_trace(void)
947{
948 if (list_empty(&rcu_preempt_ctrlblk.blkd_tasks))
949 rcu_preempt_ctrlblk.n_exp_balk_blkd_tasks++;
950 else 937 else
951 rcu_preempt_ctrlblk.n_exp_balk_nos++; 938 rcu_preempt_ctrlblk.n_balk_nos++;
952} 939}
953 940
954#endif /* #ifdef CONFIG_RCU_BOOST */ 941#endif /* #ifdef CONFIG_RCU_BOOST */
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index c224da41890c..2e138db03382 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -131,7 +131,7 @@ struct rcu_torture {
131 131
132static LIST_HEAD(rcu_torture_freelist); 132static LIST_HEAD(rcu_torture_freelist);
133static struct rcu_torture __rcu *rcu_torture_current; 133static struct rcu_torture __rcu *rcu_torture_current;
134static long rcu_torture_current_version; 134static unsigned long rcu_torture_current_version;
135static struct rcu_torture rcu_tortures[10 * RCU_TORTURE_PIPE_LEN]; 135static struct rcu_torture rcu_tortures[10 * RCU_TORTURE_PIPE_LEN];
136static DEFINE_SPINLOCK(rcu_torture_lock); 136static DEFINE_SPINLOCK(rcu_torture_lock);
137static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_count) = 137static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_count) =
@@ -146,8 +146,6 @@ static atomic_t n_rcu_torture_mberror;
146static atomic_t n_rcu_torture_error; 146static atomic_t n_rcu_torture_error;
147static long n_rcu_torture_boost_ktrerror; 147static long n_rcu_torture_boost_ktrerror;
148static long n_rcu_torture_boost_rterror; 148static long n_rcu_torture_boost_rterror;
149static long n_rcu_torture_boost_allocerror;
150static long n_rcu_torture_boost_afferror;
151static long n_rcu_torture_boost_failure; 149static long n_rcu_torture_boost_failure;
152static long n_rcu_torture_boosts; 150static long n_rcu_torture_boosts;
153static long n_rcu_torture_timers; 151static long n_rcu_torture_timers;
@@ -163,11 +161,11 @@ static int stutter_pause_test;
163#endif 161#endif
164int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT; 162int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT;
165 163
166#ifdef CONFIG_RCU_BOOST 164#if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU)
167#define rcu_can_boost() 1 165#define rcu_can_boost() 1
168#else /* #ifdef CONFIG_RCU_BOOST */ 166#else /* #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) */
169#define rcu_can_boost() 0 167#define rcu_can_boost() 0
170#endif /* #else #ifdef CONFIG_RCU_BOOST */ 168#endif /* #else #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) */
171 169
172static unsigned long boost_starttime; /* jiffies of next boost test start. */ 170static unsigned long boost_starttime; /* jiffies of next boost test start. */
173DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ 171DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */
@@ -751,6 +749,7 @@ static int rcu_torture_boost(void *arg)
751 n_rcu_torture_boost_rterror++; 749 n_rcu_torture_boost_rterror++;
752 } 750 }
753 751
752 init_rcu_head_on_stack(&rbi.rcu);
754 /* Each pass through the following loop does one boost-test cycle. */ 753 /* Each pass through the following loop does one boost-test cycle. */
755 do { 754 do {
756 /* Wait for the next test interval. */ 755 /* Wait for the next test interval. */
@@ -810,6 +809,7 @@ checkwait: rcu_stutter_wait("rcu_torture_boost");
810 809
811 /* Clean up and exit. */ 810 /* Clean up and exit. */
812 VERBOSE_PRINTK_STRING("rcu_torture_boost task stopping"); 811 VERBOSE_PRINTK_STRING("rcu_torture_boost task stopping");
812 destroy_rcu_head_on_stack(&rbi.rcu);
813 rcutorture_shutdown_absorb("rcu_torture_boost"); 813 rcutorture_shutdown_absorb("rcu_torture_boost");
814 while (!kthread_should_stop() || rbi.inflight) 814 while (!kthread_should_stop() || rbi.inflight)
815 schedule_timeout_uninterruptible(1); 815 schedule_timeout_uninterruptible(1);
@@ -886,7 +886,7 @@ rcu_torture_writer(void *arg)
886 old_rp->rtort_pipe_count++; 886 old_rp->rtort_pipe_count++;
887 cur_ops->deferred_free(old_rp); 887 cur_ops->deferred_free(old_rp);
888 } 888 }
889 rcu_torture_current_version++; 889 rcutorture_record_progress(++rcu_torture_current_version);
890 oldbatch = cur_ops->completed(); 890 oldbatch = cur_ops->completed();
891 rcu_stutter_wait("rcu_torture_writer"); 891 rcu_stutter_wait("rcu_torture_writer");
892 } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); 892 } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
@@ -1066,8 +1066,8 @@ rcu_torture_printk(char *page)
1066 } 1066 }
1067 cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG); 1067 cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG);
1068 cnt += sprintf(&page[cnt], 1068 cnt += sprintf(&page[cnt],
1069 "rtc: %p ver: %ld tfle: %d rta: %d rtaf: %d rtf: %d " 1069 "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d "
1070 "rtmbe: %d rtbke: %ld rtbre: %ld rtbae: %ld rtbafe: %ld " 1070 "rtmbe: %d rtbke: %ld rtbre: %ld "
1071 "rtbf: %ld rtb: %ld nt: %ld", 1071 "rtbf: %ld rtb: %ld nt: %ld",
1072 rcu_torture_current, 1072 rcu_torture_current,
1073 rcu_torture_current_version, 1073 rcu_torture_current_version,
@@ -1078,16 +1078,12 @@ rcu_torture_printk(char *page)
1078 atomic_read(&n_rcu_torture_mberror), 1078 atomic_read(&n_rcu_torture_mberror),
1079 n_rcu_torture_boost_ktrerror, 1079 n_rcu_torture_boost_ktrerror,
1080 n_rcu_torture_boost_rterror, 1080 n_rcu_torture_boost_rterror,
1081 n_rcu_torture_boost_allocerror,
1082 n_rcu_torture_boost_afferror,
1083 n_rcu_torture_boost_failure, 1081 n_rcu_torture_boost_failure,
1084 n_rcu_torture_boosts, 1082 n_rcu_torture_boosts,
1085 n_rcu_torture_timers); 1083 n_rcu_torture_timers);
1086 if (atomic_read(&n_rcu_torture_mberror) != 0 || 1084 if (atomic_read(&n_rcu_torture_mberror) != 0 ||
1087 n_rcu_torture_boost_ktrerror != 0 || 1085 n_rcu_torture_boost_ktrerror != 0 ||
1088 n_rcu_torture_boost_rterror != 0 || 1086 n_rcu_torture_boost_rterror != 0 ||
1089 n_rcu_torture_boost_allocerror != 0 ||
1090 n_rcu_torture_boost_afferror != 0 ||
1091 n_rcu_torture_boost_failure != 0) 1087 n_rcu_torture_boost_failure != 0)
1092 cnt += sprintf(&page[cnt], " !!!"); 1088 cnt += sprintf(&page[cnt], " !!!");
1093 cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG); 1089 cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG);
@@ -1331,6 +1327,7 @@ rcu_torture_cleanup(void)
1331 int i; 1327 int i;
1332 1328
1333 mutex_lock(&fullstop_mutex); 1329 mutex_lock(&fullstop_mutex);
1330 rcutorture_record_test_transition();
1334 if (fullstop == FULLSTOP_SHUTDOWN) { 1331 if (fullstop == FULLSTOP_SHUTDOWN) {
1335 printk(KERN_WARNING /* but going down anyway, so... */ 1332 printk(KERN_WARNING /* but going down anyway, so... */
1336 "Concurrent 'rmmod rcutorture' and shutdown illegal!\n"); 1333 "Concurrent 'rmmod rcutorture' and shutdown illegal!\n");
@@ -1486,8 +1483,6 @@ rcu_torture_init(void)
1486 atomic_set(&n_rcu_torture_error, 0); 1483 atomic_set(&n_rcu_torture_error, 0);
1487 n_rcu_torture_boost_ktrerror = 0; 1484 n_rcu_torture_boost_ktrerror = 0;
1488 n_rcu_torture_boost_rterror = 0; 1485 n_rcu_torture_boost_rterror = 0;
1489 n_rcu_torture_boost_allocerror = 0;
1490 n_rcu_torture_boost_afferror = 0;
1491 n_rcu_torture_boost_failure = 0; 1486 n_rcu_torture_boost_failure = 0;
1492 n_rcu_torture_boosts = 0; 1487 n_rcu_torture_boosts = 0;
1493 for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) 1488 for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
@@ -1624,6 +1619,7 @@ rcu_torture_init(void)
1624 } 1619 }
1625 } 1620 }
1626 register_reboot_notifier(&rcutorture_shutdown_nb); 1621 register_reboot_notifier(&rcutorture_shutdown_nb);
1622 rcutorture_record_test_transition();
1627 mutex_unlock(&fullstop_mutex); 1623 mutex_unlock(&fullstop_mutex);
1628 return 0; 1624 return 0;
1629 1625
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index dd4aea806f8e..e486f7c3ffb8 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -47,6 +47,8 @@
47#include <linux/mutex.h> 47#include <linux/mutex.h>
48#include <linux/time.h> 48#include <linux/time.h>
49#include <linux/kernel_stat.h> 49#include <linux/kernel_stat.h>
50#include <linux/wait.h>
51#include <linux/kthread.h>
50 52
51#include "rcutree.h" 53#include "rcutree.h"
52 54
@@ -79,10 +81,41 @@ DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
79struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); 81struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state);
80DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); 82DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
81 83
84static struct rcu_state *rcu_state;
85
82int rcu_scheduler_active __read_mostly; 86int rcu_scheduler_active __read_mostly;
83EXPORT_SYMBOL_GPL(rcu_scheduler_active); 87EXPORT_SYMBOL_GPL(rcu_scheduler_active);
84 88
85/* 89/*
90 * Control variables for per-CPU and per-rcu_node kthreads. These
91 * handle all flavors of RCU.
92 */
93static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
94DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
95DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu);
96DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
97static DEFINE_PER_CPU(wait_queue_head_t, rcu_cpu_wq);
98DEFINE_PER_CPU(char, rcu_cpu_has_work);
99static char rcu_kthreads_spawnable;
100
101static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
102static void invoke_rcu_cpu_kthread(void);
103
104#define RCU_KTHREAD_PRIO 1 /* RT priority for per-CPU kthreads. */
105
106/*
107 * Track the rcutorture test sequence number and the update version
108 * number within a given test. The rcutorture_testseq is incremented
109 * on every rcutorture module load and unload, so has an odd value
110 * when a test is running. The rcutorture_vernum is set to zero
111 * when rcutorture starts and is incremented on each rcutorture update.
112 * These variables enable correlating rcutorture output with the
113 * RCU tracing information.
114 */
115unsigned long rcutorture_testseq;
116unsigned long rcutorture_vernum;
117
118/*
86 * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s 119 * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s
87 * permit this function to be invoked without holding the root rcu_node 120 * permit this function to be invoked without holding the root rcu_node
88 * structure's ->lock, but of course results can be subject to change. 121 * structure's ->lock, but of course results can be subject to change.
@@ -124,6 +157,7 @@ void rcu_note_context_switch(int cpu)
124 rcu_sched_qs(cpu); 157 rcu_sched_qs(cpu);
125 rcu_preempt_note_context_switch(cpu); 158 rcu_preempt_note_context_switch(cpu);
126} 159}
160EXPORT_SYMBOL_GPL(rcu_note_context_switch);
127 161
128#ifdef CONFIG_NO_HZ 162#ifdef CONFIG_NO_HZ
129DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { 163DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
@@ -140,10 +174,8 @@ module_param(blimit, int, 0);
140module_param(qhimark, int, 0); 174module_param(qhimark, int, 0);
141module_param(qlowmark, int, 0); 175module_param(qlowmark, int, 0);
142 176
143#ifdef CONFIG_RCU_CPU_STALL_DETECTOR 177int rcu_cpu_stall_suppress __read_mostly;
144int rcu_cpu_stall_suppress __read_mostly = RCU_CPU_STALL_SUPPRESS_INIT;
145module_param(rcu_cpu_stall_suppress, int, 0644); 178module_param(rcu_cpu_stall_suppress, int, 0644);
146#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
147 179
148static void force_quiescent_state(struct rcu_state *rsp, int relaxed); 180static void force_quiescent_state(struct rcu_state *rsp, int relaxed);
149static int rcu_pending(int cpu); 181static int rcu_pending(int cpu);
@@ -176,6 +208,31 @@ void rcu_bh_force_quiescent_state(void)
176EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state); 208EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
177 209
178/* 210/*
211 * Record the number of times rcutorture tests have been initiated and
212 * terminated. This information allows the debugfs tracing stats to be
213 * correlated to the rcutorture messages, even when the rcutorture module
214 * is being repeatedly loaded and unloaded. In other words, we cannot
215 * store this state in rcutorture itself.
216 */
217void rcutorture_record_test_transition(void)
218{
219 rcutorture_testseq++;
220 rcutorture_vernum = 0;
221}
222EXPORT_SYMBOL_GPL(rcutorture_record_test_transition);
223
224/*
225 * Record the number of writer passes through the current rcutorture test.
226 * This is also used to correlate debugfs tracing stats with the rcutorture
227 * messages.
228 */
229void rcutorture_record_progress(unsigned long vernum)
230{
231 rcutorture_vernum++;
232}
233EXPORT_SYMBOL_GPL(rcutorture_record_progress);
234
235/*
179 * Force a quiescent state for RCU-sched. 236 * Force a quiescent state for RCU-sched.
180 */ 237 */
181void rcu_sched_force_quiescent_state(void) 238void rcu_sched_force_quiescent_state(void)
@@ -234,8 +291,8 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp)
234 return 1; 291 return 1;
235 } 292 }
236 293
237 /* If preemptable RCU, no point in sending reschedule IPI. */ 294 /* If preemptible RCU, no point in sending reschedule IPI. */
238 if (rdp->preemptable) 295 if (rdp->preemptible)
239 return 0; 296 return 0;
240 297
241 /* The CPU is online, so send it a reschedule IPI. */ 298 /* The CPU is online, so send it a reschedule IPI. */
@@ -450,8 +507,6 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
450 507
451#endif /* #else #ifdef CONFIG_NO_HZ */ 508#endif /* #else #ifdef CONFIG_NO_HZ */
452 509
453#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
454
455int rcu_cpu_stall_suppress __read_mostly; 510int rcu_cpu_stall_suppress __read_mostly;
456 511
457static void record_gp_stall_check_time(struct rcu_state *rsp) 512static void record_gp_stall_check_time(struct rcu_state *rsp)
@@ -537,21 +592,24 @@ static void print_cpu_stall(struct rcu_state *rsp)
537 592
538static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) 593static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
539{ 594{
540 long delta; 595 unsigned long j;
596 unsigned long js;
541 struct rcu_node *rnp; 597 struct rcu_node *rnp;
542 598
543 if (rcu_cpu_stall_suppress) 599 if (rcu_cpu_stall_suppress)
544 return; 600 return;
545 delta = jiffies - ACCESS_ONCE(rsp->jiffies_stall); 601 j = ACCESS_ONCE(jiffies);
602 js = ACCESS_ONCE(rsp->jiffies_stall);
546 rnp = rdp->mynode; 603 rnp = rdp->mynode;
547 if ((ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && delta >= 0) { 604 if ((ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && ULONG_CMP_GE(j, js)) {
548 605
549 /* We haven't checked in, so go dump stack. */ 606 /* We haven't checked in, so go dump stack. */
550 print_cpu_stall(rsp); 607 print_cpu_stall(rsp);
551 608
552 } else if (rcu_gp_in_progress(rsp) && delta >= RCU_STALL_RAT_DELAY) { 609 } else if (rcu_gp_in_progress(rsp) &&
610 ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY)) {
553 611
554 /* They had two time units to dump stack, so complain. */ 612 /* They had a few time units to dump stack, so complain. */
555 print_other_cpu_stall(rsp); 613 print_other_cpu_stall(rsp);
556 } 614 }
557} 615}
@@ -587,26 +645,6 @@ static void __init check_cpu_stall_init(void)
587 atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block); 645 atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block);
588} 646}
589 647
590#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
591
592static void record_gp_stall_check_time(struct rcu_state *rsp)
593{
594}
595
596static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
597{
598}
599
600void rcu_cpu_stall_reset(void)
601{
602}
603
604static void __init check_cpu_stall_init(void)
605{
606}
607
608#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
609
610/* 648/*
611 * Update CPU-local rcu_data state to record the newly noticed grace period. 649 * Update CPU-local rcu_data state to record the newly noticed grace period.
612 * This is used both when we started the grace period and when we notice 650 * This is used both when we started the grace period and when we notice
@@ -809,6 +847,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
809 rnp->completed = rsp->completed; 847 rnp->completed = rsp->completed;
810 rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */ 848 rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */
811 rcu_start_gp_per_cpu(rsp, rnp, rdp); 849 rcu_start_gp_per_cpu(rsp, rnp, rdp);
850 rcu_preempt_boost_start_gp(rnp);
812 raw_spin_unlock_irqrestore(&rnp->lock, flags); 851 raw_spin_unlock_irqrestore(&rnp->lock, flags);
813 return; 852 return;
814 } 853 }
@@ -844,6 +883,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
844 rnp->completed = rsp->completed; 883 rnp->completed = rsp->completed;
845 if (rnp == rdp->mynode) 884 if (rnp == rdp->mynode)
846 rcu_start_gp_per_cpu(rsp, rnp, rdp); 885 rcu_start_gp_per_cpu(rsp, rnp, rdp);
886 rcu_preempt_boost_start_gp(rnp);
847 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 887 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
848 } 888 }
849 889
@@ -864,7 +904,12 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
864static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) 904static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
865 __releases(rcu_get_root(rsp)->lock) 905 __releases(rcu_get_root(rsp)->lock)
866{ 906{
907 unsigned long gp_duration;
908
867 WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); 909 WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
910 gp_duration = jiffies - rsp->gp_start;
911 if (gp_duration > rsp->gp_max)
912 rsp->gp_max = gp_duration;
868 rsp->completed = rsp->gpnum; 913 rsp->completed = rsp->gpnum;
869 rsp->signaled = RCU_GP_IDLE; 914 rsp->signaled = RCU_GP_IDLE;
870 rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ 915 rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */
@@ -894,7 +939,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
894 return; 939 return;
895 } 940 }
896 rnp->qsmask &= ~mask; 941 rnp->qsmask &= ~mask;
897 if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) { 942 if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
898 943
899 /* Other bits still set at this level, so done. */ 944 /* Other bits still set at this level, so done. */
900 raw_spin_unlock_irqrestore(&rnp->lock, flags); 945 raw_spin_unlock_irqrestore(&rnp->lock, flags);
@@ -1037,6 +1082,8 @@ static void rcu_send_cbs_to_online(struct rcu_state *rsp)
1037/* 1082/*
1038 * Remove the outgoing CPU from the bitmasks in the rcu_node hierarchy 1083 * Remove the outgoing CPU from the bitmasks in the rcu_node hierarchy
1039 * and move all callbacks from the outgoing CPU to the current one. 1084 * and move all callbacks from the outgoing CPU to the current one.
1085 * There can only be one CPU hotplug operation at a time, so no other
1086 * CPU can be attempting to update rcu_cpu_kthread_task.
1040 */ 1087 */
1041static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) 1088static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
1042{ 1089{
@@ -1045,6 +1092,14 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
1045 int need_report = 0; 1092 int need_report = 0;
1046 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 1093 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
1047 struct rcu_node *rnp; 1094 struct rcu_node *rnp;
1095 struct task_struct *t;
1096
1097 /* Stop the CPU's kthread. */
1098 t = per_cpu(rcu_cpu_kthread_task, cpu);
1099 if (t != NULL) {
1100 per_cpu(rcu_cpu_kthread_task, cpu) = NULL;
1101 kthread_stop(t);
1102 }
1048 1103
1049 /* Exclude any attempts to start a new grace period. */ 1104 /* Exclude any attempts to start a new grace period. */
1050 raw_spin_lock_irqsave(&rsp->onofflock, flags); 1105 raw_spin_lock_irqsave(&rsp->onofflock, flags);
@@ -1082,6 +1137,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
1082 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1137 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1083 if (need_report & RCU_OFL_TASKS_EXP_GP) 1138 if (need_report & RCU_OFL_TASKS_EXP_GP)
1084 rcu_report_exp_rnp(rsp, rnp); 1139 rcu_report_exp_rnp(rsp, rnp);
1140 rcu_node_kthread_setaffinity(rnp, -1);
1085} 1141}
1086 1142
1087/* 1143/*
@@ -1143,7 +1199,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1143 next = list->next; 1199 next = list->next;
1144 prefetch(next); 1200 prefetch(next);
1145 debug_rcu_head_unqueue(list); 1201 debug_rcu_head_unqueue(list);
1146 list->func(list); 1202 __rcu_reclaim(list);
1147 list = next; 1203 list = next;
1148 if (++count >= rdp->blimit) 1204 if (++count >= rdp->blimit)
1149 break; 1205 break;
@@ -1179,7 +1235,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1179 1235
1180 /* Re-raise the RCU softirq if there are callbacks remaining. */ 1236 /* Re-raise the RCU softirq if there are callbacks remaining. */
1181 if (cpu_has_callbacks_ready_to_invoke(rdp)) 1237 if (cpu_has_callbacks_ready_to_invoke(rdp))
1182 raise_softirq(RCU_SOFTIRQ); 1238 invoke_rcu_cpu_kthread();
1183} 1239}
1184 1240
1185/* 1241/*
@@ -1225,7 +1281,7 @@ void rcu_check_callbacks(int cpu, int user)
1225 } 1281 }
1226 rcu_preempt_check_callbacks(cpu); 1282 rcu_preempt_check_callbacks(cpu);
1227 if (rcu_pending(cpu)) 1283 if (rcu_pending(cpu))
1228 raise_softirq(RCU_SOFTIRQ); 1284 invoke_rcu_cpu_kthread();
1229} 1285}
1230 1286
1231#ifdef CONFIG_SMP 1287#ifdef CONFIG_SMP
@@ -1233,6 +1289,8 @@ void rcu_check_callbacks(int cpu, int user)
1233/* 1289/*
1234 * Scan the leaf rcu_node structures, processing dyntick state for any that 1290 * Scan the leaf rcu_node structures, processing dyntick state for any that
1235 * have not yet encountered a quiescent state, using the function specified. 1291 * have not yet encountered a quiescent state, using the function specified.
1292 * Also initiate boosting for any threads blocked on the root rcu_node.
1293 *
1236 * The caller must have suppressed start of new grace periods. 1294 * The caller must have suppressed start of new grace periods.
1237 */ 1295 */
1238static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)) 1296static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *))
@@ -1251,7 +1309,7 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *))
1251 return; 1309 return;
1252 } 1310 }
1253 if (rnp->qsmask == 0) { 1311 if (rnp->qsmask == 0) {
1254 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1312 rcu_initiate_boost(rnp, flags); /* releases rnp->lock */
1255 continue; 1313 continue;
1256 } 1314 }
1257 cpu = rnp->grplo; 1315 cpu = rnp->grplo;
@@ -1269,6 +1327,11 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *))
1269 } 1327 }
1270 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1328 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1271 } 1329 }
1330 rnp = rcu_get_root(rsp);
1331 if (rnp->qsmask == 0) {
1332 raw_spin_lock_irqsave(&rnp->lock, flags);
1333 rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */
1334 }
1272} 1335}
1273 1336
1274/* 1337/*
@@ -1389,7 +1452,7 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
1389/* 1452/*
1390 * Do softirq processing for the current CPU. 1453 * Do softirq processing for the current CPU.
1391 */ 1454 */
1392static void rcu_process_callbacks(struct softirq_action *unused) 1455static void rcu_process_callbacks(void)
1393{ 1456{
1394 /* 1457 /*
1395 * Memory references from any prior RCU read-side critical sections 1458 * Memory references from any prior RCU read-side critical sections
@@ -1414,6 +1477,347 @@ static void rcu_process_callbacks(struct softirq_action *unused)
1414 rcu_needs_cpu_flush(); 1477 rcu_needs_cpu_flush();
1415} 1478}
1416 1479
1480/*
1481 * Wake up the current CPU's kthread. This replaces raise_softirq()
1482 * in earlier versions of RCU. Note that because we are running on
1483 * the current CPU with interrupts disabled, the rcu_cpu_kthread_task
1484 * cannot disappear out from under us.
1485 */
1486static void invoke_rcu_cpu_kthread(void)
1487{
1488 unsigned long flags;
1489
1490 local_irq_save(flags);
1491 __this_cpu_write(rcu_cpu_has_work, 1);
1492 if (__this_cpu_read(rcu_cpu_kthread_task) == NULL) {
1493 local_irq_restore(flags);
1494 return;
1495 }
1496 wake_up(&__get_cpu_var(rcu_cpu_wq));
1497 local_irq_restore(flags);
1498}
1499
1500/*
1501 * Wake up the specified per-rcu_node-structure kthread.
1502 * Because the per-rcu_node kthreads are immortal, we don't need
1503 * to do anything to keep them alive.
1504 */
1505static void invoke_rcu_node_kthread(struct rcu_node *rnp)
1506{
1507 struct task_struct *t;
1508
1509 t = rnp->node_kthread_task;
1510 if (t != NULL)
1511 wake_up_process(t);
1512}
1513
1514/*
1515 * Set the specified CPU's kthread to run RT or not, as specified by
1516 * the to_rt argument. The CPU-hotplug locks are held, so the task
1517 * is not going away.
1518 */
1519static void rcu_cpu_kthread_setrt(int cpu, int to_rt)
1520{
1521 int policy;
1522 struct sched_param sp;
1523 struct task_struct *t;
1524
1525 t = per_cpu(rcu_cpu_kthread_task, cpu);
1526 if (t == NULL)
1527 return;
1528 if (to_rt) {
1529 policy = SCHED_FIFO;
1530 sp.sched_priority = RCU_KTHREAD_PRIO;
1531 } else {
1532 policy = SCHED_NORMAL;
1533 sp.sched_priority = 0;
1534 }
1535 sched_setscheduler_nocheck(t, policy, &sp);
1536}
1537
1538/*
1539 * Timer handler to initiate the waking up of per-CPU kthreads that
1540 * have yielded the CPU due to excess numbers of RCU callbacks.
1541 * We wake up the per-rcu_node kthread, which in turn will wake up
1542 * the booster kthread.
1543 */
1544static void rcu_cpu_kthread_timer(unsigned long arg)
1545{
1546 unsigned long flags;
1547 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg);
1548 struct rcu_node *rnp = rdp->mynode;
1549
1550 raw_spin_lock_irqsave(&rnp->lock, flags);
1551 rnp->wakemask |= rdp->grpmask;
1552 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1553 invoke_rcu_node_kthread(rnp);
1554}
1555
1556/*
1557 * Drop to non-real-time priority and yield, but only after posting a
1558 * timer that will cause us to regain our real-time priority if we
1559 * remain preempted. Either way, we restore our real-time priority
1560 * before returning.
1561 */
1562static void rcu_yield(void (*f)(unsigned long), unsigned long arg)
1563{
1564 struct sched_param sp;
1565 struct timer_list yield_timer;
1566
1567 setup_timer_on_stack(&yield_timer, f, arg);
1568 mod_timer(&yield_timer, jiffies + 2);
1569 sp.sched_priority = 0;
1570 sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp);
1571 set_user_nice(current, 19);
1572 schedule();
1573 sp.sched_priority = RCU_KTHREAD_PRIO;
1574 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
1575 del_timer(&yield_timer);
1576}
1577
1578/*
1579 * Handle cases where the rcu_cpu_kthread() ends up on the wrong CPU.
1580 * This can happen while the corresponding CPU is either coming online
1581 * or going offline. We cannot wait until the CPU is fully online
1582 * before starting the kthread, because the various notifier functions
1583 * can wait for RCU grace periods. So we park rcu_cpu_kthread() until
1584 * the corresponding CPU is online.
1585 *
1586 * Return 1 if the kthread needs to stop, 0 otherwise.
1587 *
1588 * Caller must disable bh. This function can momentarily enable it.
1589 */
1590static int rcu_cpu_kthread_should_stop(int cpu)
1591{
1592 while (cpu_is_offline(cpu) ||
1593 !cpumask_equal(&current->cpus_allowed, cpumask_of(cpu)) ||
1594 smp_processor_id() != cpu) {
1595 if (kthread_should_stop())
1596 return 1;
1597 per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
1598 per_cpu(rcu_cpu_kthread_cpu, cpu) = raw_smp_processor_id();
1599 local_bh_enable();
1600 schedule_timeout_uninterruptible(1);
1601 if (!cpumask_equal(&current->cpus_allowed, cpumask_of(cpu)))
1602 set_cpus_allowed_ptr(current, cpumask_of(cpu));
1603 local_bh_disable();
1604 }
1605 per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu;
1606 return 0;
1607}
1608
1609/*
1610 * Per-CPU kernel thread that invokes RCU callbacks. This replaces the
1611 * earlier RCU softirq.
1612 */
1613static int rcu_cpu_kthread(void *arg)
1614{
1615 int cpu = (int)(long)arg;
1616 unsigned long flags;
1617 int spincnt = 0;
1618 unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu);
1619 wait_queue_head_t *wqp = &per_cpu(rcu_cpu_wq, cpu);
1620 char work;
1621 char *workp = &per_cpu(rcu_cpu_has_work, cpu);
1622
1623 for (;;) {
1624 *statusp = RCU_KTHREAD_WAITING;
1625 wait_event_interruptible(*wqp,
1626 *workp != 0 || kthread_should_stop());
1627 local_bh_disable();
1628 if (rcu_cpu_kthread_should_stop(cpu)) {
1629 local_bh_enable();
1630 break;
1631 }
1632 *statusp = RCU_KTHREAD_RUNNING;
1633 per_cpu(rcu_cpu_kthread_loops, cpu)++;
1634 local_irq_save(flags);
1635 work = *workp;
1636 *workp = 0;
1637 local_irq_restore(flags);
1638 if (work)
1639 rcu_process_callbacks();
1640 local_bh_enable();
1641 if (*workp != 0)
1642 spincnt++;
1643 else
1644 spincnt = 0;
1645 if (spincnt > 10) {
1646 *statusp = RCU_KTHREAD_YIELDING;
1647 rcu_yield(rcu_cpu_kthread_timer, (unsigned long)cpu);
1648 spincnt = 0;
1649 }
1650 }
1651 *statusp = RCU_KTHREAD_STOPPED;
1652 return 0;
1653}
1654
1655/*
1656 * Spawn a per-CPU kthread, setting up affinity and priority.
1657 * Because the CPU hotplug lock is held, no other CPU will be attempting
1658 * to manipulate rcu_cpu_kthread_task. There might be another CPU
1659 * attempting to access it during boot, but the locking in kthread_bind()
1660 * will enforce sufficient ordering.
1661 */
1662static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu)
1663{
1664 struct sched_param sp;
1665 struct task_struct *t;
1666
1667 if (!rcu_kthreads_spawnable ||
1668 per_cpu(rcu_cpu_kthread_task, cpu) != NULL)
1669 return 0;
1670 t = kthread_create(rcu_cpu_kthread, (void *)(long)cpu, "rcuc%d", cpu);
1671 if (IS_ERR(t))
1672 return PTR_ERR(t);
1673 kthread_bind(t, cpu);
1674 per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu;
1675 WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL);
1676 per_cpu(rcu_cpu_kthread_task, cpu) = t;
1677 wake_up_process(t);
1678 sp.sched_priority = RCU_KTHREAD_PRIO;
1679 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1680 return 0;
1681}
1682
1683/*
1684 * Per-rcu_node kthread, which is in charge of waking up the per-CPU
1685 * kthreads when needed. We ignore requests to wake up kthreads
1686 * for offline CPUs, which is OK because force_quiescent_state()
1687 * takes care of this case.
1688 */
1689static int rcu_node_kthread(void *arg)
1690{
1691 int cpu;
1692 unsigned long flags;
1693 unsigned long mask;
1694 struct rcu_node *rnp = (struct rcu_node *)arg;
1695 struct sched_param sp;
1696 struct task_struct *t;
1697
1698 for (;;) {
1699 rnp->node_kthread_status = RCU_KTHREAD_WAITING;
1700 wait_event_interruptible(rnp->node_wq, rnp->wakemask != 0);
1701 rnp->node_kthread_status = RCU_KTHREAD_RUNNING;
1702 raw_spin_lock_irqsave(&rnp->lock, flags);
1703 mask = rnp->wakemask;
1704 rnp->wakemask = 0;
1705 rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */
1706 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) {
1707 if ((mask & 0x1) == 0)
1708 continue;
1709 preempt_disable();
1710 t = per_cpu(rcu_cpu_kthread_task, cpu);
1711 if (!cpu_online(cpu) || t == NULL) {
1712 preempt_enable();
1713 continue;
1714 }
1715 per_cpu(rcu_cpu_has_work, cpu) = 1;
1716 sp.sched_priority = RCU_KTHREAD_PRIO;
1717 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1718 preempt_enable();
1719 }
1720 }
1721 /* NOTREACHED */
1722 rnp->node_kthread_status = RCU_KTHREAD_STOPPED;
1723 return 0;
1724}
1725
1726/*
1727 * Set the per-rcu_node kthread's affinity to cover all CPUs that are
1728 * served by the rcu_node in question. The CPU hotplug lock is still
1729 * held, so the value of rnp->qsmaskinit will be stable.
1730 *
1731 * We don't include outgoingcpu in the affinity set, use -1 if there is
1732 * no outgoing CPU. If there are no CPUs left in the affinity set,
1733 * this function allows the kthread to execute on any CPU.
1734 */
1735static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
1736{
1737 cpumask_var_t cm;
1738 int cpu;
1739 unsigned long mask = rnp->qsmaskinit;
1740
1741 if (rnp->node_kthread_task == NULL)
1742 return;
1743 if (!alloc_cpumask_var(&cm, GFP_KERNEL))
1744 return;
1745 cpumask_clear(cm);
1746 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1)
1747 if ((mask & 0x1) && cpu != outgoingcpu)
1748 cpumask_set_cpu(cpu, cm);
1749 if (cpumask_weight(cm) == 0) {
1750 cpumask_setall(cm);
1751 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++)
1752 cpumask_clear_cpu(cpu, cm);
1753 WARN_ON_ONCE(cpumask_weight(cm) == 0);
1754 }
1755 set_cpus_allowed_ptr(rnp->node_kthread_task, cm);
1756 rcu_boost_kthread_setaffinity(rnp, cm);
1757 free_cpumask_var(cm);
1758}
1759
1760/*
1761 * Spawn a per-rcu_node kthread, setting priority and affinity.
1762 * Called during boot before online/offline can happen, or, if
1763 * during runtime, with the main CPU-hotplug locks held. So only
1764 * one of these can be executing at a time.
1765 */
1766static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp,
1767 struct rcu_node *rnp)
1768{
1769 unsigned long flags;
1770 int rnp_index = rnp - &rsp->node[0];
1771 struct sched_param sp;
1772 struct task_struct *t;
1773
1774 if (!rcu_kthreads_spawnable ||
1775 rnp->qsmaskinit == 0)
1776 return 0;
1777 if (rnp->node_kthread_task == NULL) {
1778 t = kthread_create(rcu_node_kthread, (void *)rnp,
1779 "rcun%d", rnp_index);
1780 if (IS_ERR(t))
1781 return PTR_ERR(t);
1782 raw_spin_lock_irqsave(&rnp->lock, flags);
1783 rnp->node_kthread_task = t;
1784 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1785 wake_up_process(t);
1786 sp.sched_priority = 99;
1787 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1788 }
1789 return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index);
1790}
1791
1792/*
1793 * Spawn all kthreads -- called as soon as the scheduler is running.
1794 */
1795static int __init rcu_spawn_kthreads(void)
1796{
1797 int cpu;
1798 struct rcu_node *rnp;
1799
1800 rcu_kthreads_spawnable = 1;
1801 for_each_possible_cpu(cpu) {
1802 init_waitqueue_head(&per_cpu(rcu_cpu_wq, cpu));
1803 per_cpu(rcu_cpu_has_work, cpu) = 0;
1804 if (cpu_online(cpu))
1805 (void)rcu_spawn_one_cpu_kthread(cpu);
1806 }
1807 rnp = rcu_get_root(rcu_state);
1808 init_waitqueue_head(&rnp->node_wq);
1809 rcu_init_boost_waitqueue(rnp);
1810 (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
1811 if (NUM_RCU_NODES > 1)
1812 rcu_for_each_leaf_node(rcu_state, rnp) {
1813 init_waitqueue_head(&rnp->node_wq);
1814 rcu_init_boost_waitqueue(rnp);
1815 (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
1816 }
1817 return 0;
1818}
1819early_initcall(rcu_spawn_kthreads);
1820
1417static void 1821static void
1418__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), 1822__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1419 struct rcu_state *rsp) 1823 struct rcu_state *rsp)
@@ -1439,6 +1843,13 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1439 /* Add the callback to our list. */ 1843 /* Add the callback to our list. */
1440 *rdp->nxttail[RCU_NEXT_TAIL] = head; 1844 *rdp->nxttail[RCU_NEXT_TAIL] = head;
1441 rdp->nxttail[RCU_NEXT_TAIL] = &head->next; 1845 rdp->nxttail[RCU_NEXT_TAIL] = &head->next;
1846 rdp->qlen++;
1847
1848 /* If interrupts were disabled, don't dive into RCU core. */
1849 if (irqs_disabled_flags(flags)) {
1850 local_irq_restore(flags);
1851 return;
1852 }
1442 1853
1443 /* 1854 /*
1444 * Force the grace period if too many callbacks or too long waiting. 1855 * Force the grace period if too many callbacks or too long waiting.
@@ -1447,7 +1858,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1447 * invoking force_quiescent_state() if the newly enqueued callback 1858 * invoking force_quiescent_state() if the newly enqueued callback
1448 * is the only one waiting for a grace period to complete. 1859 * is the only one waiting for a grace period to complete.
1449 */ 1860 */
1450 if (unlikely(++rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) { 1861 if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) {
1451 1862
1452 /* Are we ignoring a completed grace period? */ 1863 /* Are we ignoring a completed grace period? */
1453 rcu_process_gp_end(rsp, rdp); 1864 rcu_process_gp_end(rsp, rdp);
@@ -1583,7 +1994,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
1583 * or RCU-bh, force a local reschedule. 1994 * or RCU-bh, force a local reschedule.
1584 */ 1995 */
1585 rdp->n_rp_qs_pending++; 1996 rdp->n_rp_qs_pending++;
1586 if (!rdp->preemptable && 1997 if (!rdp->preemptible &&
1587 ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs) - 1, 1998 ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs) - 1,
1588 jiffies)) 1999 jiffies))
1589 set_need_resched(); 2000 set_need_resched();
@@ -1760,7 +2171,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
1760 * that this CPU cannot possibly have any RCU callbacks in flight yet. 2171 * that this CPU cannot possibly have any RCU callbacks in flight yet.
1761 */ 2172 */
1762static void __cpuinit 2173static void __cpuinit
1763rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable) 2174rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
1764{ 2175{
1765 unsigned long flags; 2176 unsigned long flags;
1766 unsigned long mask; 2177 unsigned long mask;
@@ -1772,7 +2183,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable)
1772 rdp->passed_quiesc = 0; /* We could be racing with new GP, */ 2183 rdp->passed_quiesc = 0; /* We could be racing with new GP, */
1773 rdp->qs_pending = 1; /* so set up to respond to current GP. */ 2184 rdp->qs_pending = 1; /* so set up to respond to current GP. */
1774 rdp->beenonline = 1; /* We have now been online. */ 2185 rdp->beenonline = 1; /* We have now been online. */
1775 rdp->preemptable = preemptable; 2186 rdp->preemptible = preemptible;
1776 rdp->qlen_last_fqs_check = 0; 2187 rdp->qlen_last_fqs_check = 0;
1777 rdp->n_force_qs_snap = rsp->n_force_qs; 2188 rdp->n_force_qs_snap = rsp->n_force_qs;
1778 rdp->blimit = blimit; 2189 rdp->blimit = blimit;
@@ -1813,6 +2224,19 @@ static void __cpuinit rcu_online_cpu(int cpu)
1813 rcu_preempt_init_percpu_data(cpu); 2224 rcu_preempt_init_percpu_data(cpu);
1814} 2225}
1815 2226
2227static void __cpuinit rcu_online_kthreads(int cpu)
2228{
2229 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
2230 struct rcu_node *rnp = rdp->mynode;
2231
2232 /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */
2233 if (rcu_kthreads_spawnable) {
2234 (void)rcu_spawn_one_cpu_kthread(cpu);
2235 if (rnp->node_kthread_task == NULL)
2236 (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
2237 }
2238}
2239
1816/* 2240/*
1817 * Handle CPU online/offline notification events. 2241 * Handle CPU online/offline notification events.
1818 */ 2242 */
@@ -1820,11 +2244,23 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
1820 unsigned long action, void *hcpu) 2244 unsigned long action, void *hcpu)
1821{ 2245{
1822 long cpu = (long)hcpu; 2246 long cpu = (long)hcpu;
2247 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
2248 struct rcu_node *rnp = rdp->mynode;
1823 2249
1824 switch (action) { 2250 switch (action) {
1825 case CPU_UP_PREPARE: 2251 case CPU_UP_PREPARE:
1826 case CPU_UP_PREPARE_FROZEN: 2252 case CPU_UP_PREPARE_FROZEN:
1827 rcu_online_cpu(cpu); 2253 rcu_online_cpu(cpu);
2254 rcu_online_kthreads(cpu);
2255 break;
2256 case CPU_ONLINE:
2257 case CPU_DOWN_FAILED:
2258 rcu_node_kthread_setaffinity(rnp, -1);
2259 rcu_cpu_kthread_setrt(cpu, 1);
2260 break;
2261 case CPU_DOWN_PREPARE:
2262 rcu_node_kthread_setaffinity(rnp, cpu);
2263 rcu_cpu_kthread_setrt(cpu, 0);
1828 break; 2264 break;
1829 case CPU_DYING: 2265 case CPU_DYING:
1830 case CPU_DYING_FROZEN: 2266 case CPU_DYING_FROZEN:
@@ -1943,10 +2379,7 @@ static void __init rcu_init_one(struct rcu_state *rsp,
1943 j / rsp->levelspread[i - 1]; 2379 j / rsp->levelspread[i - 1];
1944 } 2380 }
1945 rnp->level = i; 2381 rnp->level = i;
1946 INIT_LIST_HEAD(&rnp->blocked_tasks[0]); 2382 INIT_LIST_HEAD(&rnp->blkd_tasks);
1947 INIT_LIST_HEAD(&rnp->blocked_tasks[1]);
1948 INIT_LIST_HEAD(&rnp->blocked_tasks[2]);
1949 INIT_LIST_HEAD(&rnp->blocked_tasks[3]);
1950 } 2383 }
1951 } 2384 }
1952 2385
@@ -1968,7 +2401,6 @@ void __init rcu_init(void)
1968 rcu_init_one(&rcu_sched_state, &rcu_sched_data); 2401 rcu_init_one(&rcu_sched_state, &rcu_sched_data);
1969 rcu_init_one(&rcu_bh_state, &rcu_bh_data); 2402 rcu_init_one(&rcu_bh_state, &rcu_bh_data);
1970 __rcu_init_preempt(); 2403 __rcu_init_preempt();
1971 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
1972 2404
1973 /* 2405 /*
1974 * We don't need protection against CPU-hotplug here because 2406 * We don't need protection against CPU-hotplug here because
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index e8f057e44e3e..257664815d5d 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -91,6 +91,14 @@ struct rcu_dynticks {
91 /* remains even for nmi from irq handler. */ 91 /* remains even for nmi from irq handler. */
92}; 92};
93 93
94/* RCU's kthread states for tracing. */
95#define RCU_KTHREAD_STOPPED 0
96#define RCU_KTHREAD_RUNNING 1
97#define RCU_KTHREAD_WAITING 2
98#define RCU_KTHREAD_OFFCPU 3
99#define RCU_KTHREAD_YIELDING 4
100#define RCU_KTHREAD_MAX 4
101
94/* 102/*
95 * Definition for node within the RCU grace-period-detection hierarchy. 103 * Definition for node within the RCU grace-period-detection hierarchy.
96 */ 104 */
@@ -109,10 +117,11 @@ struct rcu_node {
109 /* an rcu_data structure, otherwise, each */ 117 /* an rcu_data structure, otherwise, each */
110 /* bit corresponds to a child rcu_node */ 118 /* bit corresponds to a child rcu_node */
111 /* structure. */ 119 /* structure. */
112 unsigned long expmask; /* Groups that have ->blocked_tasks[] */ 120 unsigned long expmask; /* Groups that have ->blkd_tasks */
113 /* elements that need to drain to allow the */ 121 /* elements that need to drain to allow the */
114 /* current expedited grace period to */ 122 /* current expedited grace period to */
115 /* complete (only for TREE_PREEMPT_RCU). */ 123 /* complete (only for TREE_PREEMPT_RCU). */
124 unsigned long wakemask; /* CPUs whose kthread needs to be awakened. */
116 unsigned long qsmaskinit; 125 unsigned long qsmaskinit;
117 /* Per-GP initial value for qsmask & expmask. */ 126 /* Per-GP initial value for qsmask & expmask. */
118 unsigned long grpmask; /* Mask to apply to parent qsmask. */ 127 unsigned long grpmask; /* Mask to apply to parent qsmask. */
@@ -122,11 +131,68 @@ struct rcu_node {
122 u8 grpnum; /* CPU/group number for next level up. */ 131 u8 grpnum; /* CPU/group number for next level up. */
123 u8 level; /* root is at level 0. */ 132 u8 level; /* root is at level 0. */
124 struct rcu_node *parent; 133 struct rcu_node *parent;
125 struct list_head blocked_tasks[4]; 134 struct list_head blkd_tasks;
126 /* Tasks blocked in RCU read-side critsect. */ 135 /* Tasks blocked in RCU read-side critical */
127 /* Grace period number (->gpnum) x blocked */ 136 /* section. Tasks are placed at the head */
128 /* by tasks on the (x & 0x1) element of the */ 137 /* of this list and age towards the tail. */
129 /* blocked_tasks[] array. */ 138 struct list_head *gp_tasks;
139 /* Pointer to the first task blocking the */
140 /* current grace period, or NULL if there */
141 /* is no such task. */
142 struct list_head *exp_tasks;
143 /* Pointer to the first task blocking the */
144 /* current expedited grace period, or NULL */
145 /* if there is no such task. If there */
146 /* is no current expedited grace period, */
147 /* then there can cannot be any such task. */
148#ifdef CONFIG_RCU_BOOST
149 struct list_head *boost_tasks;
150 /* Pointer to first task that needs to be */
151 /* priority boosted, or NULL if no priority */
152 /* boosting is needed for this rcu_node */
153 /* structure. If there are no tasks */
154 /* queued on this rcu_node structure that */
155 /* are blocking the current grace period, */
156 /* there can be no such task. */
157 unsigned long boost_time;
158 /* When to start boosting (jiffies). */
159 struct task_struct *boost_kthread_task;
160 /* kthread that takes care of priority */
161 /* boosting for this rcu_node structure. */
162 wait_queue_head_t boost_wq;
163 /* Wait queue on which to park the boost */
164 /* kthread. */
165 unsigned int boost_kthread_status;
166 /* State of boost_kthread_task for tracing. */
167 unsigned long n_tasks_boosted;
168 /* Total number of tasks boosted. */
169 unsigned long n_exp_boosts;
170 /* Number of tasks boosted for expedited GP. */
171 unsigned long n_normal_boosts;
172 /* Number of tasks boosted for normal GP. */
173 unsigned long n_balk_blkd_tasks;
174 /* Refused to boost: no blocked tasks. */
175 unsigned long n_balk_exp_gp_tasks;
176 /* Refused to boost: nothing blocking GP. */
177 unsigned long n_balk_boost_tasks;
178 /* Refused to boost: already boosting. */
179 unsigned long n_balk_notblocked;
180 /* Refused to boost: RCU RS CS still running. */
181 unsigned long n_balk_notyet;
182 /* Refused to boost: not yet time. */
183 unsigned long n_balk_nos;
184 /* Refused to boost: not sure why, though. */
185 /* This can happen due to race conditions. */
186#endif /* #ifdef CONFIG_RCU_BOOST */
187 struct task_struct *node_kthread_task;
188 /* kthread that takes care of this rcu_node */
189 /* structure, for example, awakening the */
190 /* per-CPU kthreads as needed. */
191 wait_queue_head_t node_wq;
192 /* Wait queue on which to park the per-node */
193 /* kthread. */
194 unsigned int node_kthread_status;
195 /* State of node_kthread_task for tracing. */
130} ____cacheline_internodealigned_in_smp; 196} ____cacheline_internodealigned_in_smp;
131 197
132/* 198/*
@@ -175,7 +241,7 @@ struct rcu_data {
175 bool passed_quiesc; /* User-mode/idle loop etc. */ 241 bool passed_quiesc; /* User-mode/idle loop etc. */
176 bool qs_pending; /* Core waits for quiesc state. */ 242 bool qs_pending; /* Core waits for quiesc state. */
177 bool beenonline; /* CPU online at least once. */ 243 bool beenonline; /* CPU online at least once. */
178 bool preemptable; /* Preemptable RCU? */ 244 bool preemptible; /* Preemptible RCU? */
179 struct rcu_node *mynode; /* This CPU's leaf of hierarchy */ 245 struct rcu_node *mynode; /* This CPU's leaf of hierarchy */
180 unsigned long grpmask; /* Mask to apply to leaf qsmask. */ 246 unsigned long grpmask; /* Mask to apply to leaf qsmask. */
181 247
@@ -254,7 +320,6 @@ struct rcu_data {
254#endif /* #else #ifdef CONFIG_NO_HZ */ 320#endif /* #else #ifdef CONFIG_NO_HZ */
255 321
256#define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */ 322#define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */
257#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
258 323
259#ifdef CONFIG_PROVE_RCU 324#ifdef CONFIG_PROVE_RCU
260#define RCU_STALL_DELAY_DELTA (5 * HZ) 325#define RCU_STALL_DELAY_DELTA (5 * HZ)
@@ -272,13 +337,6 @@ struct rcu_data {
272 /* scheduling clock irq */ 337 /* scheduling clock irq */
273 /* before ratting on them. */ 338 /* before ratting on them. */
274 339
275#ifdef CONFIG_RCU_CPU_STALL_DETECTOR_RUNNABLE
276#define RCU_CPU_STALL_SUPPRESS_INIT 0
277#else
278#define RCU_CPU_STALL_SUPPRESS_INIT 1
279#endif
280
281#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
282 340
283/* 341/*
284 * RCU global state, including node hierarchy. This hierarchy is 342 * RCU global state, including node hierarchy. This hierarchy is
@@ -325,12 +383,12 @@ struct rcu_state {
325 /* due to lock unavailable. */ 383 /* due to lock unavailable. */
326 unsigned long n_force_qs_ngp; /* Number of calls leaving */ 384 unsigned long n_force_qs_ngp; /* Number of calls leaving */
327 /* due to no GP active. */ 385 /* due to no GP active. */
328#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
329 unsigned long gp_start; /* Time at which GP started, */ 386 unsigned long gp_start; /* Time at which GP started, */
330 /* but in jiffies. */ 387 /* but in jiffies. */
331 unsigned long jiffies_stall; /* Time at which to check */ 388 unsigned long jiffies_stall; /* Time at which to check */
332 /* for CPU stalls. */ 389 /* for CPU stalls. */
333#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ 390 unsigned long gp_max; /* Maximum GP duration in */
391 /* jiffies. */
334 char *name; /* Name of structure. */ 392 char *name; /* Name of structure. */
335}; 393};
336 394
@@ -361,16 +419,14 @@ DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data);
361static void rcu_bootup_announce(void); 419static void rcu_bootup_announce(void);
362long rcu_batches_completed(void); 420long rcu_batches_completed(void);
363static void rcu_preempt_note_context_switch(int cpu); 421static void rcu_preempt_note_context_switch(int cpu);
364static int rcu_preempted_readers(struct rcu_node *rnp); 422static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp);
365#ifdef CONFIG_HOTPLUG_CPU 423#ifdef CONFIG_HOTPLUG_CPU
366static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, 424static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp,
367 unsigned long flags); 425 unsigned long flags);
368#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 426#endif /* #ifdef CONFIG_HOTPLUG_CPU */
369#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
370static void rcu_print_detail_task_stall(struct rcu_state *rsp); 427static void rcu_print_detail_task_stall(struct rcu_state *rsp);
371static void rcu_print_task_stall(struct rcu_node *rnp); 428static void rcu_print_task_stall(struct rcu_node *rnp);
372static void rcu_preempt_stall_reset(void); 429static void rcu_preempt_stall_reset(void);
373#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
374static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); 430static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
375#ifdef CONFIG_HOTPLUG_CPU 431#ifdef CONFIG_HOTPLUG_CPU
376static int rcu_preempt_offline_tasks(struct rcu_state *rsp, 432static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
@@ -390,5 +446,13 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu);
390static void rcu_preempt_send_cbs_to_online(void); 446static void rcu_preempt_send_cbs_to_online(void);
391static void __init __rcu_init_preempt(void); 447static void __init __rcu_init_preempt(void);
392static void rcu_needs_cpu_flush(void); 448static void rcu_needs_cpu_flush(void);
449static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp);
450static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
451static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp,
452 cpumask_var_t cm);
453static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
454static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
455 struct rcu_node *rnp,
456 int rnp_index);
393 457
394#endif /* #ifndef RCU_TREE_NONCORE */ 458#endif /* #ifndef RCU_TREE_NONCORE */
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index a3638710dc67..3f6559a5f5cd 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -1,7 +1,7 @@
1/* 1/*
2 * Read-Copy Update mechanism for mutual exclusion (tree-based version) 2 * Read-Copy Update mechanism for mutual exclusion (tree-based version)
3 * Internal non-public definitions that provide either classic 3 * Internal non-public definitions that provide either classic
4 * or preemptable semantics. 4 * or preemptible semantics.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by 7 * it under the terms of the GNU General Public License as published by
@@ -54,10 +54,6 @@ static void __init rcu_bootup_announce_oddness(void)
54#ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE 54#ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE
55 printk(KERN_INFO "\tRCU torture testing starts during boot.\n"); 55 printk(KERN_INFO "\tRCU torture testing starts during boot.\n");
56#endif 56#endif
57#ifndef CONFIG_RCU_CPU_STALL_DETECTOR
58 printk(KERN_INFO
59 "\tRCU-based detection of stalled CPUs is disabled.\n");
60#endif
61#if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE) 57#if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE)
62 printk(KERN_INFO "\tVerbose stalled-CPUs detection is disabled.\n"); 58 printk(KERN_INFO "\tVerbose stalled-CPUs detection is disabled.\n");
63#endif 59#endif
@@ -70,6 +66,7 @@ static void __init rcu_bootup_announce_oddness(void)
70 66
71struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state); 67struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state);
72DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); 68DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
69static struct rcu_state *rcu_state = &rcu_preempt_state;
73 70
74static int rcu_preempted_readers_exp(struct rcu_node *rnp); 71static int rcu_preempted_readers_exp(struct rcu_node *rnp);
75 72
@@ -78,7 +75,7 @@ static int rcu_preempted_readers_exp(struct rcu_node *rnp);
78 */ 75 */
79static void __init rcu_bootup_announce(void) 76static void __init rcu_bootup_announce(void)
80{ 77{
81 printk(KERN_INFO "Preemptable hierarchical RCU implementation.\n"); 78 printk(KERN_INFO "Preemptible hierarchical RCU implementation.\n");
82 rcu_bootup_announce_oddness(); 79 rcu_bootup_announce_oddness();
83} 80}
84 81
@@ -111,7 +108,7 @@ void rcu_force_quiescent_state(void)
111EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); 108EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
112 109
113/* 110/*
114 * Record a preemptable-RCU quiescent state for the specified CPU. Note 111 * Record a preemptible-RCU quiescent state for the specified CPU. Note
115 * that this just means that the task currently running on the CPU is 112 * that this just means that the task currently running on the CPU is
116 * not in a quiescent state. There might be any number of tasks blocked 113 * not in a quiescent state. There might be any number of tasks blocked
117 * while in an RCU read-side critical section. 114 * while in an RCU read-side critical section.
@@ -134,12 +131,12 @@ static void rcu_preempt_qs(int cpu)
134 * We have entered the scheduler, and the current task might soon be 131 * We have entered the scheduler, and the current task might soon be
135 * context-switched away from. If this task is in an RCU read-side 132 * context-switched away from. If this task is in an RCU read-side
136 * critical section, we will no longer be able to rely on the CPU to 133 * critical section, we will no longer be able to rely on the CPU to
137 * record that fact, so we enqueue the task on the appropriate entry 134 * record that fact, so we enqueue the task on the blkd_tasks list.
138 * of the blocked_tasks[] array. The task will dequeue itself when 135 * The task will dequeue itself when it exits the outermost enclosing
139 * it exits the outermost enclosing RCU read-side critical section. 136 * RCU read-side critical section. Therefore, the current grace period
140 * Therefore, the current grace period cannot be permitted to complete 137 * cannot be permitted to complete until the blkd_tasks list entries
141 * until the blocked_tasks[] entry indexed by the low-order bit of 138 * predating the current grace period drain, in other words, until
142 * rnp->gpnum empties. 139 * rnp->gp_tasks becomes NULL.
143 * 140 *
144 * Caller must disable preemption. 141 * Caller must disable preemption.
145 */ 142 */
@@ -147,7 +144,6 @@ static void rcu_preempt_note_context_switch(int cpu)
147{ 144{
148 struct task_struct *t = current; 145 struct task_struct *t = current;
149 unsigned long flags; 146 unsigned long flags;
150 int phase;
151 struct rcu_data *rdp; 147 struct rcu_data *rdp;
152 struct rcu_node *rnp; 148 struct rcu_node *rnp;
153 149
@@ -169,15 +165,30 @@ static void rcu_preempt_note_context_switch(int cpu)
169 * (i.e., this CPU has not yet passed through a quiescent 165 * (i.e., this CPU has not yet passed through a quiescent
170 * state for the current grace period), then as long 166 * state for the current grace period), then as long
171 * as that task remains queued, the current grace period 167 * as that task remains queued, the current grace period
172 * cannot end. 168 * cannot end. Note that there is some uncertainty as
169 * to exactly when the current grace period started.
170 * We take a conservative approach, which can result
171 * in unnecessarily waiting on tasks that started very
172 * slightly after the current grace period began. C'est
173 * la vie!!!
173 * 174 *
174 * But first, note that the current CPU must still be 175 * But first, note that the current CPU must still be
175 * on line! 176 * on line!
176 */ 177 */
177 WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0); 178 WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0);
178 WARN_ON_ONCE(!list_empty(&t->rcu_node_entry)); 179 WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
179 phase = (rnp->gpnum + !(rnp->qsmask & rdp->grpmask)) & 0x1; 180 if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) {
180 list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]); 181 list_add(&t->rcu_node_entry, rnp->gp_tasks->prev);
182 rnp->gp_tasks = &t->rcu_node_entry;
183#ifdef CONFIG_RCU_BOOST
184 if (rnp->boost_tasks != NULL)
185 rnp->boost_tasks = rnp->gp_tasks;
186#endif /* #ifdef CONFIG_RCU_BOOST */
187 } else {
188 list_add(&t->rcu_node_entry, &rnp->blkd_tasks);
189 if (rnp->qsmask & rdp->grpmask)
190 rnp->gp_tasks = &t->rcu_node_entry;
191 }
181 raw_spin_unlock_irqrestore(&rnp->lock, flags); 192 raw_spin_unlock_irqrestore(&rnp->lock, flags);
182 } 193 }
183 194
@@ -196,7 +207,7 @@ static void rcu_preempt_note_context_switch(int cpu)
196} 207}
197 208
198/* 209/*
199 * Tree-preemptable RCU implementation for rcu_read_lock(). 210 * Tree-preemptible RCU implementation for rcu_read_lock().
200 * Just increment ->rcu_read_lock_nesting, shared state will be updated 211 * Just increment ->rcu_read_lock_nesting, shared state will be updated
201 * if we block. 212 * if we block.
202 */ 213 */
@@ -212,12 +223,9 @@ EXPORT_SYMBOL_GPL(__rcu_read_lock);
212 * for the specified rcu_node structure. If the caller needs a reliable 223 * for the specified rcu_node structure. If the caller needs a reliable
213 * answer, it must hold the rcu_node's ->lock. 224 * answer, it must hold the rcu_node's ->lock.
214 */ 225 */
215static int rcu_preempted_readers(struct rcu_node *rnp) 226static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
216{ 227{
217 int phase = rnp->gpnum & 0x1; 228 return rnp->gp_tasks != NULL;
218
219 return !list_empty(&rnp->blocked_tasks[phase]) ||
220 !list_empty(&rnp->blocked_tasks[phase + 2]);
221} 229}
222 230
223/* 231/*
@@ -233,7 +241,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
233 unsigned long mask; 241 unsigned long mask;
234 struct rcu_node *rnp_p; 242 struct rcu_node *rnp_p;
235 243
236 if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) { 244 if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
237 raw_spin_unlock_irqrestore(&rnp->lock, flags); 245 raw_spin_unlock_irqrestore(&rnp->lock, flags);
238 return; /* Still need more quiescent states! */ 246 return; /* Still need more quiescent states! */
239 } 247 }
@@ -257,6 +265,21 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
257} 265}
258 266
259/* 267/*
268 * Advance a ->blkd_tasks-list pointer to the next entry, instead
269 * returning NULL if at the end of the list.
270 */
271static struct list_head *rcu_next_node_entry(struct task_struct *t,
272 struct rcu_node *rnp)
273{
274 struct list_head *np;
275
276 np = t->rcu_node_entry.next;
277 if (np == &rnp->blkd_tasks)
278 np = NULL;
279 return np;
280}
281
282/*
260 * Handle special cases during rcu_read_unlock(), such as needing to 283 * Handle special cases during rcu_read_unlock(), such as needing to
261 * notify RCU core processing or task having blocked during the RCU 284 * notify RCU core processing or task having blocked during the RCU
262 * read-side critical section. 285 * read-side critical section.
@@ -266,6 +289,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
266 int empty; 289 int empty;
267 int empty_exp; 290 int empty_exp;
268 unsigned long flags; 291 unsigned long flags;
292 struct list_head *np;
269 struct rcu_node *rnp; 293 struct rcu_node *rnp;
270 int special; 294 int special;
271 295
@@ -306,10 +330,19 @@ static void rcu_read_unlock_special(struct task_struct *t)
306 break; 330 break;
307 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 331 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
308 } 332 }
309 empty = !rcu_preempted_readers(rnp); 333 empty = !rcu_preempt_blocked_readers_cgp(rnp);
310 empty_exp = !rcu_preempted_readers_exp(rnp); 334 empty_exp = !rcu_preempted_readers_exp(rnp);
311 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ 335 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
336 np = rcu_next_node_entry(t, rnp);
312 list_del_init(&t->rcu_node_entry); 337 list_del_init(&t->rcu_node_entry);
338 if (&t->rcu_node_entry == rnp->gp_tasks)
339 rnp->gp_tasks = np;
340 if (&t->rcu_node_entry == rnp->exp_tasks)
341 rnp->exp_tasks = np;
342#ifdef CONFIG_RCU_BOOST
343 if (&t->rcu_node_entry == rnp->boost_tasks)
344 rnp->boost_tasks = np;
345#endif /* #ifdef CONFIG_RCU_BOOST */
313 t->rcu_blocked_node = NULL; 346 t->rcu_blocked_node = NULL;
314 347
315 /* 348 /*
@@ -322,6 +355,15 @@ static void rcu_read_unlock_special(struct task_struct *t)
322 else 355 else
323 rcu_report_unblock_qs_rnp(rnp, flags); 356 rcu_report_unblock_qs_rnp(rnp, flags);
324 357
358#ifdef CONFIG_RCU_BOOST
359 /* Unboost if we were boosted. */
360 if (special & RCU_READ_UNLOCK_BOOSTED) {
361 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BOOSTED;
362 rt_mutex_unlock(t->rcu_boost_mutex);
363 t->rcu_boost_mutex = NULL;
364 }
365#endif /* #ifdef CONFIG_RCU_BOOST */
366
325 /* 367 /*
326 * If this was the last task on the expedited lists, 368 * If this was the last task on the expedited lists,
327 * then we need to report up the rcu_node hierarchy. 369 * then we need to report up the rcu_node hierarchy.
@@ -334,7 +376,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
334} 376}
335 377
336/* 378/*
337 * Tree-preemptable RCU implementation for rcu_read_unlock(). 379 * Tree-preemptible RCU implementation for rcu_read_unlock().
338 * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost 380 * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
339 * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then 381 * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
340 * invoke rcu_read_unlock_special() to clean up after a context switch 382 * invoke rcu_read_unlock_special() to clean up after a context switch
@@ -356,8 +398,6 @@ void __rcu_read_unlock(void)
356} 398}
357EXPORT_SYMBOL_GPL(__rcu_read_unlock); 399EXPORT_SYMBOL_GPL(__rcu_read_unlock);
358 400
359#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
360
361#ifdef CONFIG_RCU_CPU_STALL_VERBOSE 401#ifdef CONFIG_RCU_CPU_STALL_VERBOSE
362 402
363/* 403/*
@@ -367,18 +407,16 @@ EXPORT_SYMBOL_GPL(__rcu_read_unlock);
367static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) 407static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
368{ 408{
369 unsigned long flags; 409 unsigned long flags;
370 struct list_head *lp;
371 int phase;
372 struct task_struct *t; 410 struct task_struct *t;
373 411
374 if (rcu_preempted_readers(rnp)) { 412 if (!rcu_preempt_blocked_readers_cgp(rnp))
375 raw_spin_lock_irqsave(&rnp->lock, flags); 413 return;
376 phase = rnp->gpnum & 0x1; 414 raw_spin_lock_irqsave(&rnp->lock, flags);
377 lp = &rnp->blocked_tasks[phase]; 415 t = list_entry(rnp->gp_tasks,
378 list_for_each_entry(t, lp, rcu_node_entry) 416 struct task_struct, rcu_node_entry);
379 sched_show_task(t); 417 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry)
380 raw_spin_unlock_irqrestore(&rnp->lock, flags); 418 sched_show_task(t);
381 } 419 raw_spin_unlock_irqrestore(&rnp->lock, flags);
382} 420}
383 421
384/* 422/*
@@ -408,16 +446,14 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp)
408 */ 446 */
409static void rcu_print_task_stall(struct rcu_node *rnp) 447static void rcu_print_task_stall(struct rcu_node *rnp)
410{ 448{
411 struct list_head *lp;
412 int phase;
413 struct task_struct *t; 449 struct task_struct *t;
414 450
415 if (rcu_preempted_readers(rnp)) { 451 if (!rcu_preempt_blocked_readers_cgp(rnp))
416 phase = rnp->gpnum & 0x1; 452 return;
417 lp = &rnp->blocked_tasks[phase]; 453 t = list_entry(rnp->gp_tasks,
418 list_for_each_entry(t, lp, rcu_node_entry) 454 struct task_struct, rcu_node_entry);
419 printk(" P%d", t->pid); 455 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry)
420 } 456 printk(" P%d", t->pid);
421} 457}
422 458
423/* 459/*
@@ -430,18 +466,21 @@ static void rcu_preempt_stall_reset(void)
430 rcu_preempt_state.jiffies_stall = jiffies + ULONG_MAX / 2; 466 rcu_preempt_state.jiffies_stall = jiffies + ULONG_MAX / 2;
431} 467}
432 468
433#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
434
435/* 469/*
436 * Check that the list of blocked tasks for the newly completed grace 470 * Check that the list of blocked tasks for the newly completed grace
437 * period is in fact empty. It is a serious bug to complete a grace 471 * period is in fact empty. It is a serious bug to complete a grace
438 * period that still has RCU readers blocked! This function must be 472 * period that still has RCU readers blocked! This function must be
439 * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock 473 * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock
440 * must be held by the caller. 474 * must be held by the caller.
475 *
476 * Also, if there are blocked tasks on the list, they automatically
477 * block the newly created grace period, so set up ->gp_tasks accordingly.
441 */ 478 */
442static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) 479static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
443{ 480{
444 WARN_ON_ONCE(rcu_preempted_readers(rnp)); 481 WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp));
482 if (!list_empty(&rnp->blkd_tasks))
483 rnp->gp_tasks = rnp->blkd_tasks.next;
445 WARN_ON_ONCE(rnp->qsmask); 484 WARN_ON_ONCE(rnp->qsmask);
446} 485}
447 486
@@ -465,50 +504,68 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
465 struct rcu_node *rnp, 504 struct rcu_node *rnp,
466 struct rcu_data *rdp) 505 struct rcu_data *rdp)
467{ 506{
468 int i;
469 struct list_head *lp; 507 struct list_head *lp;
470 struct list_head *lp_root; 508 struct list_head *lp_root;
471 int retval = 0; 509 int retval = 0;
472 struct rcu_node *rnp_root = rcu_get_root(rsp); 510 struct rcu_node *rnp_root = rcu_get_root(rsp);
473 struct task_struct *tp; 511 struct task_struct *t;
474 512
475 if (rnp == rnp_root) { 513 if (rnp == rnp_root) {
476 WARN_ONCE(1, "Last CPU thought to be offlined?"); 514 WARN_ONCE(1, "Last CPU thought to be offlined?");
477 return 0; /* Shouldn't happen: at least one CPU online. */ 515 return 0; /* Shouldn't happen: at least one CPU online. */
478 } 516 }
479 WARN_ON_ONCE(rnp != rdp->mynode && 517
480 (!list_empty(&rnp->blocked_tasks[0]) || 518 /* If we are on an internal node, complain bitterly. */
481 !list_empty(&rnp->blocked_tasks[1]) || 519 WARN_ON_ONCE(rnp != rdp->mynode);
482 !list_empty(&rnp->blocked_tasks[2]) ||
483 !list_empty(&rnp->blocked_tasks[3])));
484 520
485 /* 521 /*
486 * Move tasks up to root rcu_node. Rely on the fact that the 522 * Move tasks up to root rcu_node. Don't try to get fancy for
487 * root rcu_node can be at most one ahead of the rest of the 523 * this corner-case operation -- just put this node's tasks
488 * rcu_nodes in terms of gp_num value. This fact allows us to 524 * at the head of the root node's list, and update the root node's
489 * move the blocked_tasks[] array directly, element by element. 525 * ->gp_tasks and ->exp_tasks pointers to those of this node's,
526 * if non-NULL. This might result in waiting for more tasks than
527 * absolutely necessary, but this is a good performance/complexity
528 * tradeoff.
490 */ 529 */
491 if (rcu_preempted_readers(rnp)) 530 if (rcu_preempt_blocked_readers_cgp(rnp))
492 retval |= RCU_OFL_TASKS_NORM_GP; 531 retval |= RCU_OFL_TASKS_NORM_GP;
493 if (rcu_preempted_readers_exp(rnp)) 532 if (rcu_preempted_readers_exp(rnp))
494 retval |= RCU_OFL_TASKS_EXP_GP; 533 retval |= RCU_OFL_TASKS_EXP_GP;
495 for (i = 0; i < 4; i++) { 534 lp = &rnp->blkd_tasks;
496 lp = &rnp->blocked_tasks[i]; 535 lp_root = &rnp_root->blkd_tasks;
497 lp_root = &rnp_root->blocked_tasks[i]; 536 while (!list_empty(lp)) {
498 while (!list_empty(lp)) { 537 t = list_entry(lp->next, typeof(*t), rcu_node_entry);
499 tp = list_entry(lp->next, typeof(*tp), rcu_node_entry); 538 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
500 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ 539 list_del(&t->rcu_node_entry);
501 list_del(&tp->rcu_node_entry); 540 t->rcu_blocked_node = rnp_root;
502 tp->rcu_blocked_node = rnp_root; 541 list_add(&t->rcu_node_entry, lp_root);
503 list_add(&tp->rcu_node_entry, lp_root); 542 if (&t->rcu_node_entry == rnp->gp_tasks)
504 raw_spin_unlock(&rnp_root->lock); /* irqs remain disabled */ 543 rnp_root->gp_tasks = rnp->gp_tasks;
505 } 544 if (&t->rcu_node_entry == rnp->exp_tasks)
545 rnp_root->exp_tasks = rnp->exp_tasks;
546#ifdef CONFIG_RCU_BOOST
547 if (&t->rcu_node_entry == rnp->boost_tasks)
548 rnp_root->boost_tasks = rnp->boost_tasks;
549#endif /* #ifdef CONFIG_RCU_BOOST */
550 raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */
506 } 551 }
552
553#ifdef CONFIG_RCU_BOOST
554 /* In case root is being boosted and leaf is not. */
555 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
556 if (rnp_root->boost_tasks != NULL &&
557 rnp_root->boost_tasks != rnp_root->gp_tasks)
558 rnp_root->boost_tasks = rnp_root->gp_tasks;
559 raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */
560#endif /* #ifdef CONFIG_RCU_BOOST */
561
562 rnp->gp_tasks = NULL;
563 rnp->exp_tasks = NULL;
507 return retval; 564 return retval;
508} 565}
509 566
510/* 567/*
511 * Do CPU-offline processing for preemptable RCU. 568 * Do CPU-offline processing for preemptible RCU.
512 */ 569 */
513static void rcu_preempt_offline_cpu(int cpu) 570static void rcu_preempt_offline_cpu(int cpu)
514{ 571{
@@ -537,7 +594,7 @@ static void rcu_preempt_check_callbacks(int cpu)
537} 594}
538 595
539/* 596/*
540 * Process callbacks for preemptable RCU. 597 * Process callbacks for preemptible RCU.
541 */ 598 */
542static void rcu_preempt_process_callbacks(void) 599static void rcu_preempt_process_callbacks(void)
543{ 600{
@@ -546,7 +603,7 @@ static void rcu_preempt_process_callbacks(void)
546} 603}
547 604
548/* 605/*
549 * Queue a preemptable-RCU callback for invocation after a grace period. 606 * Queue a preemptible-RCU callback for invocation after a grace period.
550 */ 607 */
551void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) 608void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
552{ 609{
@@ -594,8 +651,7 @@ static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);
594 */ 651 */
595static int rcu_preempted_readers_exp(struct rcu_node *rnp) 652static int rcu_preempted_readers_exp(struct rcu_node *rnp)
596{ 653{
597 return !list_empty(&rnp->blocked_tasks[2]) || 654 return rnp->exp_tasks != NULL;
598 !list_empty(&rnp->blocked_tasks[3]);
599} 655}
600 656
601/* 657/*
@@ -655,13 +711,17 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
655static void 711static void
656sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) 712sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
657{ 713{
658 int must_wait; 714 unsigned long flags;
715 int must_wait = 0;
659 716
660 raw_spin_lock(&rnp->lock); /* irqs already disabled */ 717 raw_spin_lock_irqsave(&rnp->lock, flags);
661 list_splice_init(&rnp->blocked_tasks[0], &rnp->blocked_tasks[2]); 718 if (list_empty(&rnp->blkd_tasks))
662 list_splice_init(&rnp->blocked_tasks[1], &rnp->blocked_tasks[3]); 719 raw_spin_unlock_irqrestore(&rnp->lock, flags);
663 must_wait = rcu_preempted_readers_exp(rnp); 720 else {
664 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ 721 rnp->exp_tasks = rnp->blkd_tasks.next;
722 rcu_initiate_boost(rnp, flags); /* releases rnp->lock */
723 must_wait = 1;
724 }
665 if (!must_wait) 725 if (!must_wait)
666 rcu_report_exp_rnp(rsp, rnp); 726 rcu_report_exp_rnp(rsp, rnp);
667} 727}
@@ -669,9 +729,7 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
669/* 729/*
670 * Wait for an rcu-preempt grace period, but expedite it. The basic idea 730 * Wait for an rcu-preempt grace period, but expedite it. The basic idea
671 * is to invoke synchronize_sched_expedited() to push all the tasks to 731 * is to invoke synchronize_sched_expedited() to push all the tasks to
672 * the ->blocked_tasks[] lists, move all entries from the first set of 732 * the ->blkd_tasks lists and wait for this list to drain.
673 * ->blocked_tasks[] lists to the second set, and finally wait for this
674 * second set to drain.
675 */ 733 */
676void synchronize_rcu_expedited(void) 734void synchronize_rcu_expedited(void)
677{ 735{
@@ -703,7 +761,7 @@ void synchronize_rcu_expedited(void)
703 if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) 761 if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0)
704 goto unlock_mb_ret; /* Others did our work for us. */ 762 goto unlock_mb_ret; /* Others did our work for us. */
705 763
706 /* force all RCU readers onto blocked_tasks[]. */ 764 /* force all RCU readers onto ->blkd_tasks lists. */
707 synchronize_sched_expedited(); 765 synchronize_sched_expedited();
708 766
709 raw_spin_lock_irqsave(&rsp->onofflock, flags); 767 raw_spin_lock_irqsave(&rsp->onofflock, flags);
@@ -715,7 +773,7 @@ void synchronize_rcu_expedited(void)
715 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 773 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
716 } 774 }
717 775
718 /* Snapshot current state of ->blocked_tasks[] lists. */ 776 /* Snapshot current state of ->blkd_tasks lists. */
719 rcu_for_each_leaf_node(rsp, rnp) 777 rcu_for_each_leaf_node(rsp, rnp)
720 sync_rcu_preempt_exp_init(rsp, rnp); 778 sync_rcu_preempt_exp_init(rsp, rnp);
721 if (NUM_RCU_NODES > 1) 779 if (NUM_RCU_NODES > 1)
@@ -723,7 +781,7 @@ void synchronize_rcu_expedited(void)
723 781
724 raw_spin_unlock_irqrestore(&rsp->onofflock, flags); 782 raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
725 783
726 /* Wait for snapshotted ->blocked_tasks[] lists to drain. */ 784 /* Wait for snapshotted ->blkd_tasks lists to drain. */
727 rnp = rcu_get_root(rsp); 785 rnp = rcu_get_root(rsp);
728 wait_event(sync_rcu_preempt_exp_wq, 786 wait_event(sync_rcu_preempt_exp_wq,
729 sync_rcu_preempt_exp_done(rnp)); 787 sync_rcu_preempt_exp_done(rnp));
@@ -739,7 +797,7 @@ mb_ret:
739EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); 797EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
740 798
741/* 799/*
742 * Check to see if there is any immediate preemptable-RCU-related work 800 * Check to see if there is any immediate preemptible-RCU-related work
743 * to be done. 801 * to be done.
744 */ 802 */
745static int rcu_preempt_pending(int cpu) 803static int rcu_preempt_pending(int cpu)
@@ -749,7 +807,7 @@ static int rcu_preempt_pending(int cpu)
749} 807}
750 808
751/* 809/*
752 * Does preemptable RCU need the CPU to stay out of dynticks mode? 810 * Does preemptible RCU need the CPU to stay out of dynticks mode?
753 */ 811 */
754static int rcu_preempt_needs_cpu(int cpu) 812static int rcu_preempt_needs_cpu(int cpu)
755{ 813{
@@ -766,7 +824,7 @@ void rcu_barrier(void)
766EXPORT_SYMBOL_GPL(rcu_barrier); 824EXPORT_SYMBOL_GPL(rcu_barrier);
767 825
768/* 826/*
769 * Initialize preemptable RCU's per-CPU data. 827 * Initialize preemptible RCU's per-CPU data.
770 */ 828 */
771static void __cpuinit rcu_preempt_init_percpu_data(int cpu) 829static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
772{ 830{
@@ -774,7 +832,7 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
774} 832}
775 833
776/* 834/*
777 * Move preemptable RCU's callbacks from dying CPU to other online CPU. 835 * Move preemptible RCU's callbacks from dying CPU to other online CPU.
778 */ 836 */
779static void rcu_preempt_send_cbs_to_online(void) 837static void rcu_preempt_send_cbs_to_online(void)
780{ 838{
@@ -782,7 +840,7 @@ static void rcu_preempt_send_cbs_to_online(void)
782} 840}
783 841
784/* 842/*
785 * Initialize preemptable RCU's state structures. 843 * Initialize preemptible RCU's state structures.
786 */ 844 */
787static void __init __rcu_init_preempt(void) 845static void __init __rcu_init_preempt(void)
788{ 846{
@@ -790,7 +848,7 @@ static void __init __rcu_init_preempt(void)
790} 848}
791 849
792/* 850/*
793 * Check for a task exiting while in a preemptable-RCU read-side 851 * Check for a task exiting while in a preemptible-RCU read-side
794 * critical section, clean up if so. No need to issue warnings, 852 * critical section, clean up if so. No need to issue warnings,
795 * as debug_check_no_locks_held() already does this if lockdep 853 * as debug_check_no_locks_held() already does this if lockdep
796 * is enabled. 854 * is enabled.
@@ -802,11 +860,13 @@ void exit_rcu(void)
802 if (t->rcu_read_lock_nesting == 0) 860 if (t->rcu_read_lock_nesting == 0)
803 return; 861 return;
804 t->rcu_read_lock_nesting = 1; 862 t->rcu_read_lock_nesting = 1;
805 rcu_read_unlock(); 863 __rcu_read_unlock();
806} 864}
807 865
808#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 866#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
809 867
868static struct rcu_state *rcu_state = &rcu_sched_state;
869
810/* 870/*
811 * Tell them what RCU they are running. 871 * Tell them what RCU they are running.
812 */ 872 */
@@ -836,7 +896,7 @@ void rcu_force_quiescent_state(void)
836EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); 896EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
837 897
838/* 898/*
839 * Because preemptable RCU does not exist, we never have to check for 899 * Because preemptible RCU does not exist, we never have to check for
840 * CPUs being in quiescent states. 900 * CPUs being in quiescent states.
841 */ 901 */
842static void rcu_preempt_note_context_switch(int cpu) 902static void rcu_preempt_note_context_switch(int cpu)
@@ -844,10 +904,10 @@ static void rcu_preempt_note_context_switch(int cpu)
844} 904}
845 905
846/* 906/*
847 * Because preemptable RCU does not exist, there are never any preempted 907 * Because preemptible RCU does not exist, there are never any preempted
848 * RCU readers. 908 * RCU readers.
849 */ 909 */
850static int rcu_preempted_readers(struct rcu_node *rnp) 910static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
851{ 911{
852 return 0; 912 return 0;
853} 913}
@@ -862,10 +922,8 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
862 922
863#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 923#endif /* #ifdef CONFIG_HOTPLUG_CPU */
864 924
865#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
866
867/* 925/*
868 * Because preemptable RCU does not exist, we never have to check for 926 * Because preemptible RCU does not exist, we never have to check for
869 * tasks blocked within RCU read-side critical sections. 927 * tasks blocked within RCU read-side critical sections.
870 */ 928 */
871static void rcu_print_detail_task_stall(struct rcu_state *rsp) 929static void rcu_print_detail_task_stall(struct rcu_state *rsp)
@@ -873,7 +931,7 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp)
873} 931}
874 932
875/* 933/*
876 * Because preemptable RCU does not exist, we never have to check for 934 * Because preemptible RCU does not exist, we never have to check for
877 * tasks blocked within RCU read-side critical sections. 935 * tasks blocked within RCU read-side critical sections.
878 */ 936 */
879static void rcu_print_task_stall(struct rcu_node *rnp) 937static void rcu_print_task_stall(struct rcu_node *rnp)
@@ -888,10 +946,8 @@ static void rcu_preempt_stall_reset(void)
888{ 946{
889} 947}
890 948
891#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
892
893/* 949/*
894 * Because there is no preemptable RCU, there can be no readers blocked, 950 * Because there is no preemptible RCU, there can be no readers blocked,
895 * so there is no need to check for blocked tasks. So check only for 951 * so there is no need to check for blocked tasks. So check only for
896 * bogus qsmask values. 952 * bogus qsmask values.
897 */ 953 */
@@ -903,7 +959,7 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
903#ifdef CONFIG_HOTPLUG_CPU 959#ifdef CONFIG_HOTPLUG_CPU
904 960
905/* 961/*
906 * Because preemptable RCU does not exist, it never needs to migrate 962 * Because preemptible RCU does not exist, it never needs to migrate
907 * tasks that were blocked within RCU read-side critical sections, and 963 * tasks that were blocked within RCU read-side critical sections, and
908 * such non-existent tasks cannot possibly have been blocking the current 964 * such non-existent tasks cannot possibly have been blocking the current
909 * grace period. 965 * grace period.
@@ -916,7 +972,7 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
916} 972}
917 973
918/* 974/*
919 * Because preemptable RCU does not exist, it never needs CPU-offline 975 * Because preemptible RCU does not exist, it never needs CPU-offline
920 * processing. 976 * processing.
921 */ 977 */
922static void rcu_preempt_offline_cpu(int cpu) 978static void rcu_preempt_offline_cpu(int cpu)
@@ -926,7 +982,7 @@ static void rcu_preempt_offline_cpu(int cpu)
926#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 982#endif /* #ifdef CONFIG_HOTPLUG_CPU */
927 983
928/* 984/*
929 * Because preemptable RCU does not exist, it never has any callbacks 985 * Because preemptible RCU does not exist, it never has any callbacks
930 * to check. 986 * to check.
931 */ 987 */
932static void rcu_preempt_check_callbacks(int cpu) 988static void rcu_preempt_check_callbacks(int cpu)
@@ -934,7 +990,7 @@ static void rcu_preempt_check_callbacks(int cpu)
934} 990}
935 991
936/* 992/*
937 * Because preemptable RCU does not exist, it never has any callbacks 993 * Because preemptible RCU does not exist, it never has any callbacks
938 * to process. 994 * to process.
939 */ 995 */
940static void rcu_preempt_process_callbacks(void) 996static void rcu_preempt_process_callbacks(void)
@@ -943,7 +999,7 @@ static void rcu_preempt_process_callbacks(void)
943 999
944/* 1000/*
945 * Wait for an rcu-preempt grace period, but make it happen quickly. 1001 * Wait for an rcu-preempt grace period, but make it happen quickly.
946 * But because preemptable RCU does not exist, map to rcu-sched. 1002 * But because preemptible RCU does not exist, map to rcu-sched.
947 */ 1003 */
948void synchronize_rcu_expedited(void) 1004void synchronize_rcu_expedited(void)
949{ 1005{
@@ -954,7 +1010,7 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
954#ifdef CONFIG_HOTPLUG_CPU 1010#ifdef CONFIG_HOTPLUG_CPU
955 1011
956/* 1012/*
957 * Because preemptable RCU does not exist, there is never any need to 1013 * Because preemptible RCU does not exist, there is never any need to
958 * report on tasks preempted in RCU read-side critical sections during 1014 * report on tasks preempted in RCU read-side critical sections during
959 * expedited RCU grace periods. 1015 * expedited RCU grace periods.
960 */ 1016 */
@@ -966,7 +1022,7 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
966#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 1022#endif /* #ifdef CONFIG_HOTPLUG_CPU */
967 1023
968/* 1024/*
969 * Because preemptable RCU does not exist, it never has any work to do. 1025 * Because preemptible RCU does not exist, it never has any work to do.
970 */ 1026 */
971static int rcu_preempt_pending(int cpu) 1027static int rcu_preempt_pending(int cpu)
972{ 1028{
@@ -974,7 +1030,7 @@ static int rcu_preempt_pending(int cpu)
974} 1030}
975 1031
976/* 1032/*
977 * Because preemptable RCU does not exist, it never needs any CPU. 1033 * Because preemptible RCU does not exist, it never needs any CPU.
978 */ 1034 */
979static int rcu_preempt_needs_cpu(int cpu) 1035static int rcu_preempt_needs_cpu(int cpu)
980{ 1036{
@@ -982,7 +1038,7 @@ static int rcu_preempt_needs_cpu(int cpu)
982} 1038}
983 1039
984/* 1040/*
985 * Because preemptable RCU does not exist, rcu_barrier() is just 1041 * Because preemptible RCU does not exist, rcu_barrier() is just
986 * another name for rcu_barrier_sched(). 1042 * another name for rcu_barrier_sched().
987 */ 1043 */
988void rcu_barrier(void) 1044void rcu_barrier(void)
@@ -992,7 +1048,7 @@ void rcu_barrier(void)
992EXPORT_SYMBOL_GPL(rcu_barrier); 1048EXPORT_SYMBOL_GPL(rcu_barrier);
993 1049
994/* 1050/*
995 * Because preemptable RCU does not exist, there is no per-CPU 1051 * Because preemptible RCU does not exist, there is no per-CPU
996 * data to initialize. 1052 * data to initialize.
997 */ 1053 */
998static void __cpuinit rcu_preempt_init_percpu_data(int cpu) 1054static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
@@ -1000,14 +1056,14 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
1000} 1056}
1001 1057
1002/* 1058/*
1003 * Because there is no preemptable RCU, there are no callbacks to move. 1059 * Because there is no preemptible RCU, there are no callbacks to move.
1004 */ 1060 */
1005static void rcu_preempt_send_cbs_to_online(void) 1061static void rcu_preempt_send_cbs_to_online(void)
1006{ 1062{
1007} 1063}
1008 1064
1009/* 1065/*
1010 * Because preemptable RCU does not exist, it need not be initialized. 1066 * Because preemptible RCU does not exist, it need not be initialized.
1011 */ 1067 */
1012static void __init __rcu_init_preempt(void) 1068static void __init __rcu_init_preempt(void)
1013{ 1069{
@@ -1015,6 +1071,276 @@ static void __init __rcu_init_preempt(void)
1015 1071
1016#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ 1072#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
1017 1073
1074#ifdef CONFIG_RCU_BOOST
1075
1076#include "rtmutex_common.h"
1077
1078#ifdef CONFIG_RCU_TRACE
1079
1080static void rcu_initiate_boost_trace(struct rcu_node *rnp)
1081{
1082 if (list_empty(&rnp->blkd_tasks))
1083 rnp->n_balk_blkd_tasks++;
1084 else if (rnp->exp_tasks == NULL && rnp->gp_tasks == NULL)
1085 rnp->n_balk_exp_gp_tasks++;
1086 else if (rnp->gp_tasks != NULL && rnp->boost_tasks != NULL)
1087 rnp->n_balk_boost_tasks++;
1088 else if (rnp->gp_tasks != NULL && rnp->qsmask != 0)
1089 rnp->n_balk_notblocked++;
1090 else if (rnp->gp_tasks != NULL &&
1091 ULONG_CMP_LT(jiffies, rnp->boost_time))
1092 rnp->n_balk_notyet++;
1093 else
1094 rnp->n_balk_nos++;
1095}
1096
1097#else /* #ifdef CONFIG_RCU_TRACE */
1098
1099static void rcu_initiate_boost_trace(struct rcu_node *rnp)
1100{
1101}
1102
1103#endif /* #else #ifdef CONFIG_RCU_TRACE */
1104
1105/*
1106 * Carry out RCU priority boosting on the task indicated by ->exp_tasks
1107 * or ->boost_tasks, advancing the pointer to the next task in the
1108 * ->blkd_tasks list.
1109 *
1110 * Note that irqs must be enabled: boosting the task can block.
1111 * Returns 1 if there are more tasks needing to be boosted.
1112 */
1113static int rcu_boost(struct rcu_node *rnp)
1114{
1115 unsigned long flags;
1116 struct rt_mutex mtx;
1117 struct task_struct *t;
1118 struct list_head *tb;
1119
1120 if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL)
1121 return 0; /* Nothing left to boost. */
1122
1123 raw_spin_lock_irqsave(&rnp->lock, flags);
1124
1125 /*
1126 * Recheck under the lock: all tasks in need of boosting
1127 * might exit their RCU read-side critical sections on their own.
1128 */
1129 if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) {
1130 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1131 return 0;
1132 }
1133
1134 /*
1135 * Preferentially boost tasks blocking expedited grace periods.
1136 * This cannot starve the normal grace periods because a second
1137 * expedited grace period must boost all blocked tasks, including
1138 * those blocking the pre-existing normal grace period.
1139 */
1140 if (rnp->exp_tasks != NULL) {
1141 tb = rnp->exp_tasks;
1142 rnp->n_exp_boosts++;
1143 } else {
1144 tb = rnp->boost_tasks;
1145 rnp->n_normal_boosts++;
1146 }
1147 rnp->n_tasks_boosted++;
1148
1149 /*
1150 * We boost task t by manufacturing an rt_mutex that appears to
1151 * be held by task t. We leave a pointer to that rt_mutex where
1152 * task t can find it, and task t will release the mutex when it
1153 * exits its outermost RCU read-side critical section. Then
1154 * simply acquiring this artificial rt_mutex will boost task
1155 * t's priority. (Thanks to tglx for suggesting this approach!)
1156 *
1157 * Note that task t must acquire rnp->lock to remove itself from
1158 * the ->blkd_tasks list, which it will do from exit() if from
1159 * nowhere else. We therefore are guaranteed that task t will
1160 * stay around at least until we drop rnp->lock. Note that
1161 * rnp->lock also resolves races between our priority boosting
1162 * and task t's exiting its outermost RCU read-side critical
1163 * section.
1164 */
1165 t = container_of(tb, struct task_struct, rcu_node_entry);
1166 rt_mutex_init_proxy_locked(&mtx, t);
1167 t->rcu_boost_mutex = &mtx;
1168 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED;
1169 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1170 rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */
1171 rt_mutex_unlock(&mtx); /* Keep lockdep happy. */
1172
1173 return rnp->exp_tasks != NULL || rnp->boost_tasks != NULL;
1174}
1175
1176/*
1177 * Timer handler to initiate waking up of boost kthreads that
1178 * have yielded the CPU due to excessive numbers of tasks to
1179 * boost. We wake up the per-rcu_node kthread, which in turn
1180 * will wake up the booster kthread.
1181 */
1182static void rcu_boost_kthread_timer(unsigned long arg)
1183{
1184 invoke_rcu_node_kthread((struct rcu_node *)arg);
1185}
1186
1187/*
1188 * Priority-boosting kthread. One per leaf rcu_node and one for the
1189 * root rcu_node.
1190 */
1191static int rcu_boost_kthread(void *arg)
1192{
1193 struct rcu_node *rnp = (struct rcu_node *)arg;
1194 int spincnt = 0;
1195 int more2boost;
1196
1197 for (;;) {
1198 rnp->boost_kthread_status = RCU_KTHREAD_WAITING;
1199 wait_event_interruptible(rnp->boost_wq, rnp->boost_tasks ||
1200 rnp->exp_tasks);
1201 rnp->boost_kthread_status = RCU_KTHREAD_RUNNING;
1202 more2boost = rcu_boost(rnp);
1203 if (more2boost)
1204 spincnt++;
1205 else
1206 spincnt = 0;
1207 if (spincnt > 10) {
1208 rcu_yield(rcu_boost_kthread_timer, (unsigned long)rnp);
1209 spincnt = 0;
1210 }
1211 }
1212 /* NOTREACHED */
1213 return 0;
1214}
1215
1216/*
1217 * Check to see if it is time to start boosting RCU readers that are
1218 * blocking the current grace period, and, if so, tell the per-rcu_node
1219 * kthread to start boosting them. If there is an expedited grace
1220 * period in progress, it is always time to boost.
1221 *
1222 * The caller must hold rnp->lock, which this function releases,
1223 * but irqs remain disabled. The ->boost_kthread_task is immortal,
1224 * so we don't need to worry about it going away.
1225 */
1226static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
1227{
1228 struct task_struct *t;
1229
1230 if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) {
1231 rnp->n_balk_exp_gp_tasks++;
1232 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1233 return;
1234 }
1235 if (rnp->exp_tasks != NULL ||
1236 (rnp->gp_tasks != NULL &&
1237 rnp->boost_tasks == NULL &&
1238 rnp->qsmask == 0 &&
1239 ULONG_CMP_GE(jiffies, rnp->boost_time))) {
1240 if (rnp->exp_tasks == NULL)
1241 rnp->boost_tasks = rnp->gp_tasks;
1242 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1243 t = rnp->boost_kthread_task;
1244 if (t != NULL)
1245 wake_up_process(t);
1246 } else {
1247 rcu_initiate_boost_trace(rnp);
1248 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1249 }
1250}
1251
1252/*
1253 * Set the affinity of the boost kthread. The CPU-hotplug locks are
1254 * held, so no one should be messing with the existence of the boost
1255 * kthread.
1256 */
1257static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp,
1258 cpumask_var_t cm)
1259{
1260 struct task_struct *t;
1261
1262 t = rnp->boost_kthread_task;
1263 if (t != NULL)
1264 set_cpus_allowed_ptr(rnp->boost_kthread_task, cm);
1265}
1266
1267#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000)
1268
1269/*
1270 * Do priority-boost accounting for the start of a new grace period.
1271 */
1272static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
1273{
1274 rnp->boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES;
1275}
1276
1277/*
1278 * Initialize the RCU-boost waitqueue.
1279 */
1280static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp)
1281{
1282 init_waitqueue_head(&rnp->boost_wq);
1283}
1284
1285/*
1286 * Create an RCU-boost kthread for the specified node if one does not
1287 * already exist. We only create this kthread for preemptible RCU.
1288 * Returns zero if all is well, a negated errno otherwise.
1289 */
1290static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
1291 struct rcu_node *rnp,
1292 int rnp_index)
1293{
1294 unsigned long flags;
1295 struct sched_param sp;
1296 struct task_struct *t;
1297
1298 if (&rcu_preempt_state != rsp)
1299 return 0;
1300 if (rnp->boost_kthread_task != NULL)
1301 return 0;
1302 t = kthread_create(rcu_boost_kthread, (void *)rnp,
1303 "rcub%d", rnp_index);
1304 if (IS_ERR(t))
1305 return PTR_ERR(t);
1306 raw_spin_lock_irqsave(&rnp->lock, flags);
1307 rnp->boost_kthread_task = t;
1308 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1309 wake_up_process(t);
1310 sp.sched_priority = RCU_KTHREAD_PRIO;
1311 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1312 return 0;
1313}
1314
1315#else /* #ifdef CONFIG_RCU_BOOST */
1316
1317static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
1318{
1319 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1320}
1321
1322static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp,
1323 cpumask_var_t cm)
1324{
1325}
1326
1327static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
1328{
1329}
1330
1331static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp)
1332{
1333}
1334
1335static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
1336 struct rcu_node *rnp,
1337 int rnp_index)
1338{
1339 return 0;
1340}
1341
1342#endif /* #else #ifdef CONFIG_RCU_BOOST */
1343
1018#ifndef CONFIG_SMP 1344#ifndef CONFIG_SMP
1019 1345
1020void synchronize_sched_expedited(void) 1346void synchronize_sched_expedited(void)
@@ -1187,8 +1513,8 @@ static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff);
1187 * 1513 *
1188 * Because it is not legal to invoke rcu_process_callbacks() with irqs 1514 * Because it is not legal to invoke rcu_process_callbacks() with irqs
1189 * disabled, we do one pass of force_quiescent_state(), then do a 1515 * disabled, we do one pass of force_quiescent_state(), then do a
1190 * raise_softirq() to cause rcu_process_callbacks() to be invoked later. 1516 * invoke_rcu_cpu_kthread() to cause rcu_process_callbacks() to be invoked
1191 * The per-cpu rcu_dyntick_drain variable controls the sequencing. 1517 * later. The per-cpu rcu_dyntick_drain variable controls the sequencing.
1192 */ 1518 */
1193int rcu_needs_cpu(int cpu) 1519int rcu_needs_cpu(int cpu)
1194{ 1520{
@@ -1239,7 +1565,7 @@ int rcu_needs_cpu(int cpu)
1239 1565
1240 /* If RCU callbacks are still pending, RCU still needs this CPU. */ 1566 /* If RCU callbacks are still pending, RCU still needs this CPU. */
1241 if (c) 1567 if (c)
1242 raise_softirq(RCU_SOFTIRQ); 1568 invoke_rcu_cpu_kthread();
1243 return c; 1569 return c;
1244} 1570}
1245 1571
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index c8e97853b970..aa0fd72b4bc7 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -46,6 +46,18 @@
46#define RCU_TREE_NONCORE 46#define RCU_TREE_NONCORE
47#include "rcutree.h" 47#include "rcutree.h"
48 48
49DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
50DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_cpu);
51DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
52DECLARE_PER_CPU(char, rcu_cpu_has_work);
53
54static char convert_kthread_status(unsigned int kthread_status)
55{
56 if (kthread_status > RCU_KTHREAD_MAX)
57 return '?';
58 return "SRWOY"[kthread_status];
59}
60
49static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) 61static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
50{ 62{
51 if (!rdp->beenonline) 63 if (!rdp->beenonline)
@@ -64,7 +76,21 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
64 rdp->dynticks_fqs); 76 rdp->dynticks_fqs);
65#endif /* #ifdef CONFIG_NO_HZ */ 77#endif /* #ifdef CONFIG_NO_HZ */
66 seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi); 78 seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi);
67 seq_printf(m, " ql=%ld b=%ld", rdp->qlen, rdp->blimit); 79 seq_printf(m, " ql=%ld qs=%c%c%c%c kt=%d/%c/%d ktl=%x b=%ld",
80 rdp->qlen,
81 ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] !=
82 rdp->nxttail[RCU_NEXT_TAIL]],
83 ".R"[rdp->nxttail[RCU_WAIT_TAIL] !=
84 rdp->nxttail[RCU_NEXT_READY_TAIL]],
85 ".W"[rdp->nxttail[RCU_DONE_TAIL] !=
86 rdp->nxttail[RCU_WAIT_TAIL]],
87 ".D"[&rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]],
88 per_cpu(rcu_cpu_has_work, rdp->cpu),
89 convert_kthread_status(per_cpu(rcu_cpu_kthread_status,
90 rdp->cpu)),
91 per_cpu(rcu_cpu_kthread_cpu, rdp->cpu),
92 per_cpu(rcu_cpu_kthread_loops, rdp->cpu) & 0xffff,
93 rdp->blimit);
68 seq_printf(m, " ci=%lu co=%lu ca=%lu\n", 94 seq_printf(m, " ci=%lu co=%lu ca=%lu\n",
69 rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted); 95 rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted);
70} 96}
@@ -121,7 +147,18 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
121 rdp->dynticks_fqs); 147 rdp->dynticks_fqs);
122#endif /* #ifdef CONFIG_NO_HZ */ 148#endif /* #ifdef CONFIG_NO_HZ */
123 seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi); 149 seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi);
124 seq_printf(m, ",%ld,%ld", rdp->qlen, rdp->blimit); 150 seq_printf(m, ",%ld,\"%c%c%c%c\",%d,\"%c\",%ld", rdp->qlen,
151 ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] !=
152 rdp->nxttail[RCU_NEXT_TAIL]],
153 ".R"[rdp->nxttail[RCU_WAIT_TAIL] !=
154 rdp->nxttail[RCU_NEXT_READY_TAIL]],
155 ".W"[rdp->nxttail[RCU_DONE_TAIL] !=
156 rdp->nxttail[RCU_WAIT_TAIL]],
157 ".D"[&rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]],
158 per_cpu(rcu_cpu_has_work, rdp->cpu),
159 convert_kthread_status(per_cpu(rcu_cpu_kthread_status,
160 rdp->cpu)),
161 rdp->blimit);
125 seq_printf(m, ",%lu,%lu,%lu\n", 162 seq_printf(m, ",%lu,%lu,%lu\n",
126 rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted); 163 rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted);
127} 164}
@@ -157,11 +194,76 @@ static const struct file_operations rcudata_csv_fops = {
157 .release = single_release, 194 .release = single_release,
158}; 195};
159 196
197#ifdef CONFIG_RCU_BOOST
198
199static void print_one_rcu_node_boost(struct seq_file *m, struct rcu_node *rnp)
200{
201 seq_printf(m, "%d:%d tasks=%c%c%c%c kt=%c ntb=%lu neb=%lu nnb=%lu "
202 "j=%04x bt=%04x\n",
203 rnp->grplo, rnp->grphi,
204 "T."[list_empty(&rnp->blkd_tasks)],
205 "N."[!rnp->gp_tasks],
206 "E."[!rnp->exp_tasks],
207 "B."[!rnp->boost_tasks],
208 convert_kthread_status(rnp->boost_kthread_status),
209 rnp->n_tasks_boosted, rnp->n_exp_boosts,
210 rnp->n_normal_boosts,
211 (int)(jiffies & 0xffff),
212 (int)(rnp->boost_time & 0xffff));
213 seq_printf(m, "%s: nt=%lu egt=%lu bt=%lu nb=%lu ny=%lu nos=%lu\n",
214 " balk",
215 rnp->n_balk_blkd_tasks,
216 rnp->n_balk_exp_gp_tasks,
217 rnp->n_balk_boost_tasks,
218 rnp->n_balk_notblocked,
219 rnp->n_balk_notyet,
220 rnp->n_balk_nos);
221}
222
223static int show_rcu_node_boost(struct seq_file *m, void *unused)
224{
225 struct rcu_node *rnp;
226
227 rcu_for_each_leaf_node(&rcu_preempt_state, rnp)
228 print_one_rcu_node_boost(m, rnp);
229 return 0;
230}
231
232static int rcu_node_boost_open(struct inode *inode, struct file *file)
233{
234 return single_open(file, show_rcu_node_boost, NULL);
235}
236
237static const struct file_operations rcu_node_boost_fops = {
238 .owner = THIS_MODULE,
239 .open = rcu_node_boost_open,
240 .read = seq_read,
241 .llseek = seq_lseek,
242 .release = single_release,
243};
244
245/*
246 * Create the rcuboost debugfs entry. Standard error return.
247 */
248static int rcu_boost_trace_create_file(struct dentry *rcudir)
249{
250 return !debugfs_create_file("rcuboost", 0444, rcudir, NULL,
251 &rcu_node_boost_fops);
252}
253
254#else /* #ifdef CONFIG_RCU_BOOST */
255
256static int rcu_boost_trace_create_file(struct dentry *rcudir)
257{
258 return 0; /* There cannot be an error if we didn't create it! */
259}
260
261#endif /* #else #ifdef CONFIG_RCU_BOOST */
262
160static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) 263static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
161{ 264{
162 unsigned long gpnum; 265 unsigned long gpnum;
163 int level = 0; 266 int level = 0;
164 int phase;
165 struct rcu_node *rnp; 267 struct rcu_node *rnp;
166 268
167 gpnum = rsp->gpnum; 269 gpnum = rsp->gpnum;
@@ -178,13 +280,11 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
178 seq_puts(m, "\n"); 280 seq_puts(m, "\n");
179 level = rnp->level; 281 level = rnp->level;
180 } 282 }
181 phase = gpnum & 0x1; 283 seq_printf(m, "%lx/%lx %c%c>%c %d:%d ^%d ",
182 seq_printf(m, "%lx/%lx %c%c>%c%c %d:%d ^%d ",
183 rnp->qsmask, rnp->qsmaskinit, 284 rnp->qsmask, rnp->qsmaskinit,
184 "T."[list_empty(&rnp->blocked_tasks[phase])], 285 ".G"[rnp->gp_tasks != NULL],
185 "E."[list_empty(&rnp->blocked_tasks[phase + 2])], 286 ".E"[rnp->exp_tasks != NULL],
186 "T."[list_empty(&rnp->blocked_tasks[!phase])], 287 ".T"[!list_empty(&rnp->blkd_tasks)],
187 "E."[list_empty(&rnp->blocked_tasks[!phase + 2])],
188 rnp->grplo, rnp->grphi, rnp->grpnum); 288 rnp->grplo, rnp->grphi, rnp->grpnum);
189 } 289 }
190 seq_puts(m, "\n"); 290 seq_puts(m, "\n");
@@ -216,16 +316,35 @@ static const struct file_operations rcuhier_fops = {
216 .release = single_release, 316 .release = single_release,
217}; 317};
218 318
319static void show_one_rcugp(struct seq_file *m, struct rcu_state *rsp)
320{
321 unsigned long flags;
322 unsigned long completed;
323 unsigned long gpnum;
324 unsigned long gpage;
325 unsigned long gpmax;
326 struct rcu_node *rnp = &rsp->node[0];
327
328 raw_spin_lock_irqsave(&rnp->lock, flags);
329 completed = rsp->completed;
330 gpnum = rsp->gpnum;
331 if (rsp->completed == rsp->gpnum)
332 gpage = 0;
333 else
334 gpage = jiffies - rsp->gp_start;
335 gpmax = rsp->gp_max;
336 raw_spin_unlock_irqrestore(&rnp->lock, flags);
337 seq_printf(m, "%s: completed=%ld gpnum=%lu age=%ld max=%ld\n",
338 rsp->name, completed, gpnum, gpage, gpmax);
339}
340
219static int show_rcugp(struct seq_file *m, void *unused) 341static int show_rcugp(struct seq_file *m, void *unused)
220{ 342{
221#ifdef CONFIG_TREE_PREEMPT_RCU 343#ifdef CONFIG_TREE_PREEMPT_RCU
222 seq_printf(m, "rcu_preempt: completed=%ld gpnum=%lu\n", 344 show_one_rcugp(m, &rcu_preempt_state);
223 rcu_preempt_state.completed, rcu_preempt_state.gpnum);
224#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 345#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
225 seq_printf(m, "rcu_sched: completed=%ld gpnum=%lu\n", 346 show_one_rcugp(m, &rcu_sched_state);
226 rcu_sched_state.completed, rcu_sched_state.gpnum); 347 show_one_rcugp(m, &rcu_bh_state);
227 seq_printf(m, "rcu_bh: completed=%ld gpnum=%lu\n",
228 rcu_bh_state.completed, rcu_bh_state.gpnum);
229 return 0; 348 return 0;
230} 349}
231 350
@@ -298,6 +417,29 @@ static const struct file_operations rcu_pending_fops = {
298 .release = single_release, 417 .release = single_release,
299}; 418};
300 419
420static int show_rcutorture(struct seq_file *m, void *unused)
421{
422 seq_printf(m, "rcutorture test sequence: %lu %s\n",
423 rcutorture_testseq >> 1,
424 (rcutorture_testseq & 0x1) ? "(test in progress)" : "");
425 seq_printf(m, "rcutorture update version number: %lu\n",
426 rcutorture_vernum);
427 return 0;
428}
429
430static int rcutorture_open(struct inode *inode, struct file *file)
431{
432 return single_open(file, show_rcutorture, NULL);
433}
434
435static const struct file_operations rcutorture_fops = {
436 .owner = THIS_MODULE,
437 .open = rcutorture_open,
438 .read = seq_read,
439 .llseek = seq_lseek,
440 .release = single_release,
441};
442
301static struct dentry *rcudir; 443static struct dentry *rcudir;
302 444
303static int __init rcutree_trace_init(void) 445static int __init rcutree_trace_init(void)
@@ -318,6 +460,9 @@ static int __init rcutree_trace_init(void)
318 if (!retval) 460 if (!retval)
319 goto free_out; 461 goto free_out;
320 462
463 if (rcu_boost_trace_create_file(rcudir))
464 goto free_out;
465
321 retval = debugfs_create_file("rcugp", 0444, rcudir, NULL, &rcugp_fops); 466 retval = debugfs_create_file("rcugp", 0444, rcudir, NULL, &rcugp_fops);
322 if (!retval) 467 if (!retval)
323 goto free_out; 468 goto free_out;
@@ -331,6 +476,11 @@ static int __init rcutree_trace_init(void)
331 NULL, &rcu_pending_fops); 476 NULL, &rcu_pending_fops);
332 if (!retval) 477 if (!retval)
333 goto free_out; 478 goto free_out;
479
480 retval = debugfs_create_file("rcutorture", 0444, rcudir,
481 NULL, &rcutorture_fops);
482 if (!retval)
483 goto free_out;
334 return 0; 484 return 0;
335free_out: 485free_out:
336 debugfs_remove_recursive(rcudir); 486 debugfs_remove_recursive(rcudir);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 174f976c2874..13960170cad4 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -58,7 +58,7 @@ DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
58 58
59char *softirq_to_name[NR_SOFTIRQS] = { 59char *softirq_to_name[NR_SOFTIRQS] = {
60 "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL", 60 "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
61 "TASKLET", "SCHED", "HRTIMER", "RCU" 61 "TASKLET", "SCHED", "HRTIMER"
62}; 62};
63 63
64/* 64/*