aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2009-12-02 15:10:15 -0500
committerIngo Molnar <mingo@elte.hu>2009-12-03 05:35:25 -0500
commitd9a3da0699b24a589b27a61e1a5b5bd30d9db669 (patch)
treef7440e396a6c818f3cef514ccc31ab55d88025ef /kernel
parentcf244dc01bf68e1ad338b82447f8686d24ea4435 (diff)
rcu: Add expedited grace-period support for preemptible RCU
Implement an synchronize_rcu_expedited() for preemptible RCU that actually is expedited. This uses synchronize_sched_expedited() to force all threads currently running in a preemptible-RCU read-side critical section onto the appropriate ->blocked_tasks[] list, then takes a snapshot of all of these lists and waits for them to drain. Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1259784616158-git-send-email-> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/rcutorture.c34
-rw-r--r--kernel/rcutree.c10
-rw-r--r--kernel/rcutree.h35
-rw-r--r--kernel/rcutree_plugin.h198
-rw-r--r--kernel/rcutree_trace.c10
5 files changed, 260 insertions, 27 deletions
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 3dd0ca23e191..a621a67ef4e3 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -327,6 +327,11 @@ rcu_torture_cb(struct rcu_head *p)
327 cur_ops->deferred_free(rp); 327 cur_ops->deferred_free(rp);
328} 328}
329 329
330static int rcu_no_completed(void)
331{
332 return 0;
333}
334
330static void rcu_torture_deferred_free(struct rcu_torture *p) 335static void rcu_torture_deferred_free(struct rcu_torture *p)
331{ 336{
332 call_rcu(&p->rtort_rcu, rcu_torture_cb); 337 call_rcu(&p->rtort_rcu, rcu_torture_cb);
@@ -388,6 +393,21 @@ static struct rcu_torture_ops rcu_sync_ops = {
388 .name = "rcu_sync" 393 .name = "rcu_sync"
389}; 394};
390 395
396static struct rcu_torture_ops rcu_expedited_ops = {
397 .init = rcu_sync_torture_init,
398 .cleanup = NULL,
399 .readlock = rcu_torture_read_lock,
400 .read_delay = rcu_read_delay, /* just reuse rcu's version. */
401 .readunlock = rcu_torture_read_unlock,
402 .completed = rcu_no_completed,
403 .deferred_free = rcu_sync_torture_deferred_free,
404 .sync = synchronize_rcu_expedited,
405 .cb_barrier = NULL,
406 .stats = NULL,
407 .irq_capable = 1,
408 .name = "rcu_expedited"
409};
410
391/* 411/*
392 * Definitions for rcu_bh torture testing. 412 * Definitions for rcu_bh torture testing.
393 */ 413 */
@@ -581,11 +601,6 @@ static void sched_torture_read_unlock(int idx)
581 preempt_enable(); 601 preempt_enable();
582} 602}
583 603
584static int sched_torture_completed(void)
585{
586 return 0;
587}
588
589static void rcu_sched_torture_deferred_free(struct rcu_torture *p) 604static void rcu_sched_torture_deferred_free(struct rcu_torture *p)
590{ 605{
591 call_rcu_sched(&p->rtort_rcu, rcu_torture_cb); 606 call_rcu_sched(&p->rtort_rcu, rcu_torture_cb);
@@ -602,7 +617,7 @@ static struct rcu_torture_ops sched_ops = {
602 .readlock = sched_torture_read_lock, 617 .readlock = sched_torture_read_lock,
603 .read_delay = rcu_read_delay, /* just reuse rcu's version. */ 618 .read_delay = rcu_read_delay, /* just reuse rcu's version. */
604 .readunlock = sched_torture_read_unlock, 619 .readunlock = sched_torture_read_unlock,
605 .completed = sched_torture_completed, 620 .completed = rcu_no_completed,
606 .deferred_free = rcu_sched_torture_deferred_free, 621 .deferred_free = rcu_sched_torture_deferred_free,
607 .sync = sched_torture_synchronize, 622 .sync = sched_torture_synchronize,
608 .cb_barrier = rcu_barrier_sched, 623 .cb_barrier = rcu_barrier_sched,
@@ -617,7 +632,7 @@ static struct rcu_torture_ops sched_sync_ops = {
617 .readlock = sched_torture_read_lock, 632 .readlock = sched_torture_read_lock,
618 .read_delay = rcu_read_delay, /* just reuse rcu's version. */ 633 .read_delay = rcu_read_delay, /* just reuse rcu's version. */
619 .readunlock = sched_torture_read_unlock, 634 .readunlock = sched_torture_read_unlock,
620 .completed = sched_torture_completed, 635 .completed = rcu_no_completed,
621 .deferred_free = rcu_sync_torture_deferred_free, 636 .deferred_free = rcu_sync_torture_deferred_free,
622 .sync = sched_torture_synchronize, 637 .sync = sched_torture_synchronize,
623 .cb_barrier = NULL, 638 .cb_barrier = NULL,
@@ -631,7 +646,7 @@ static struct rcu_torture_ops sched_expedited_ops = {
631 .readlock = sched_torture_read_lock, 646 .readlock = sched_torture_read_lock,
632 .read_delay = rcu_read_delay, /* just reuse rcu's version. */ 647 .read_delay = rcu_read_delay, /* just reuse rcu's version. */
633 .readunlock = sched_torture_read_unlock, 648 .readunlock = sched_torture_read_unlock,
634 .completed = sched_torture_completed, 649 .completed = rcu_no_completed,
635 .deferred_free = rcu_sync_torture_deferred_free, 650 .deferred_free = rcu_sync_torture_deferred_free,
636 .sync = synchronize_sched_expedited, 651 .sync = synchronize_sched_expedited,
637 .cb_barrier = NULL, 652 .cb_barrier = NULL,
@@ -1116,7 +1131,8 @@ rcu_torture_init(void)
1116 int cpu; 1131 int cpu;
1117 int firsterr = 0; 1132 int firsterr = 0;
1118 static struct rcu_torture_ops *torture_ops[] = 1133 static struct rcu_torture_ops *torture_ops[] =
1119 { &rcu_ops, &rcu_sync_ops, &rcu_bh_ops, &rcu_bh_sync_ops, 1134 { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops,
1135 &rcu_bh_ops, &rcu_bh_sync_ops,
1120 &srcu_ops, &srcu_expedited_ops, 1136 &srcu_ops, &srcu_expedited_ops,
1121 &sched_ops, &sched_sync_ops, &sched_expedited_ops, }; 1137 &sched_ops, &sched_sync_ops, &sched_expedited_ops, };
1122 1138
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index d47e03e5792a..53ae9598f798 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -948,7 +948,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
948{ 948{
949 unsigned long flags; 949 unsigned long flags;
950 unsigned long mask; 950 unsigned long mask;
951 int need_quiet = 0; 951 int need_report = 0;
952 struct rcu_data *rdp = rsp->rda[cpu]; 952 struct rcu_data *rdp = rsp->rda[cpu];
953 struct rcu_node *rnp; 953 struct rcu_node *rnp;
954 954
@@ -967,7 +967,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
967 break; 967 break;
968 } 968 }
969 if (rnp == rdp->mynode) 969 if (rnp == rdp->mynode)
970 need_quiet = rcu_preempt_offline_tasks(rsp, rnp, rdp); 970 need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp);
971 else 971 else
972 spin_unlock(&rnp->lock); /* irqs remain disabled. */ 972 spin_unlock(&rnp->lock); /* irqs remain disabled. */
973 mask = rnp->grpmask; 973 mask = rnp->grpmask;
@@ -982,10 +982,12 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
982 */ 982 */
983 spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ 983 spin_unlock(&rsp->onofflock); /* irqs remain disabled. */
984 rnp = rdp->mynode; 984 rnp = rdp->mynode;
985 if (need_quiet) 985 if (need_report & RCU_OFL_TASKS_NORM_GP)
986 rcu_report_unblock_qs_rnp(rnp, flags); 986 rcu_report_unblock_qs_rnp(rnp, flags);
987 else 987 else
988 spin_unlock_irqrestore(&rnp->lock, flags); 988 spin_unlock_irqrestore(&rnp->lock, flags);
989 if (need_report & RCU_OFL_TASKS_EXP_GP)
990 rcu_report_exp_rnp(rsp, rnp);
989 991
990 rcu_adopt_orphan_cbs(rsp); 992 rcu_adopt_orphan_cbs(rsp);
991} 993}
@@ -1843,6 +1845,8 @@ static void __init rcu_init_one(struct rcu_state *rsp)
1843 rnp->level = i; 1845 rnp->level = i;
1844 INIT_LIST_HEAD(&rnp->blocked_tasks[0]); 1846 INIT_LIST_HEAD(&rnp->blocked_tasks[0]);
1845 INIT_LIST_HEAD(&rnp->blocked_tasks[1]); 1847 INIT_LIST_HEAD(&rnp->blocked_tasks[1]);
1848 INIT_LIST_HEAD(&rnp->blocked_tasks[2]);
1849 INIT_LIST_HEAD(&rnp->blocked_tasks[3]);
1846 } 1850 }
1847 } 1851 }
1848} 1852}
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index df2e0b694744..d2a0046f63b2 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -104,8 +104,12 @@ struct rcu_node {
104 /* an rcu_data structure, otherwise, each */ 104 /* an rcu_data structure, otherwise, each */
105 /* bit corresponds to a child rcu_node */ 105 /* bit corresponds to a child rcu_node */
106 /* structure. */ 106 /* structure. */
107 unsigned long expmask; /* Groups that have ->blocked_tasks[] */
108 /* elements that need to drain to allow the */
109 /* current expedited grace period to */
110 /* complete (only for TREE_PREEMPT_RCU). */
107 unsigned long qsmaskinit; 111 unsigned long qsmaskinit;
108 /* Per-GP initialization for qsmask. */ 112 /* Per-GP initial value for qsmask & expmask. */
109 unsigned long grpmask; /* Mask to apply to parent qsmask. */ 113 unsigned long grpmask; /* Mask to apply to parent qsmask. */
110 /* Only one bit will be set in this mask. */ 114 /* Only one bit will be set in this mask. */
111 int grplo; /* lowest-numbered CPU or group here. */ 115 int grplo; /* lowest-numbered CPU or group here. */
@@ -113,7 +117,7 @@ struct rcu_node {
113 u8 grpnum; /* CPU/group number for next level up. */ 117 u8 grpnum; /* CPU/group number for next level up. */
114 u8 level; /* root is at level 0. */ 118 u8 level; /* root is at level 0. */
115 struct rcu_node *parent; 119 struct rcu_node *parent;
116 struct list_head blocked_tasks[2]; 120 struct list_head blocked_tasks[4];
117 /* Tasks blocked in RCU read-side critsect. */ 121 /* Tasks blocked in RCU read-side critsect. */
118 /* Grace period number (->gpnum) x blocked */ 122 /* Grace period number (->gpnum) x blocked */
119 /* by tasks on the (x & 0x1) element of the */ 123 /* by tasks on the (x & 0x1) element of the */
@@ -128,6 +132,21 @@ struct rcu_node {
128 for ((rnp) = &(rsp)->node[0]; \ 132 for ((rnp) = &(rsp)->node[0]; \
129 (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) 133 (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++)
130 134
135/*
136 * Do a breadth-first scan of the non-leaf rcu_node structures for the
137 * specified rcu_state structure. Note that if there is a singleton
138 * rcu_node tree with but one rcu_node structure, this loop is a no-op.
139 */
140#define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \
141 for ((rnp) = &(rsp)->node[0]; \
142 (rnp) < (rsp)->level[NUM_RCU_LVLS - 1]; (rnp)++)
143
144/*
145 * Scan the leaves of the rcu_node hierarchy for the specified rcu_state
146 * structure. Note that if there is a singleton rcu_node tree with but
147 * one rcu_node structure, this loop -will- visit the rcu_node structure.
148 * It is still a leaf node, even if it is also the root node.
149 */
131#define rcu_for_each_leaf_node(rsp, rnp) \ 150#define rcu_for_each_leaf_node(rsp, rnp) \
132 for ((rnp) = (rsp)->level[NUM_RCU_LVLS - 1]; \ 151 for ((rnp) = (rsp)->level[NUM_RCU_LVLS - 1]; \
133 (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) 152 (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++)
@@ -261,7 +280,7 @@ struct rcu_state {
261 long gpnum; /* Current gp number. */ 280 long gpnum; /* Current gp number. */
262 long completed; /* # of last completed gp. */ 281 long completed; /* # of last completed gp. */
263 282
264 /* End of fields guarded by root rcu_node's lock. */ 283 /* End of fields guarded by root rcu_node's lock. */
265 284
266 spinlock_t onofflock; /* exclude on/offline and */ 285 spinlock_t onofflock; /* exclude on/offline and */
267 /* starting new GP. Also */ 286 /* starting new GP. Also */
@@ -293,6 +312,13 @@ struct rcu_state {
293#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ 312#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
294}; 313};
295 314
315/* Return values for rcu_preempt_offline_tasks(). */
316
317#define RCU_OFL_TASKS_NORM_GP 0x1 /* Tasks blocking normal */
318 /* GP were moved to root. */
319#define RCU_OFL_TASKS_EXP_GP 0x2 /* Tasks blocking expedited */
320 /* GP were moved to root. */
321
296#ifdef RCU_TREE_NONCORE 322#ifdef RCU_TREE_NONCORE
297 323
298/* 324/*
@@ -333,6 +359,9 @@ static void rcu_preempt_offline_cpu(int cpu);
333static void rcu_preempt_check_callbacks(int cpu); 359static void rcu_preempt_check_callbacks(int cpu);
334static void rcu_preempt_process_callbacks(void); 360static void rcu_preempt_process_callbacks(void);
335void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); 361void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
362#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU)
363static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp);
364#endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */
336static int rcu_preempt_pending(int cpu); 365static int rcu_preempt_pending(int cpu);
337static int rcu_preempt_needs_cpu(int cpu); 366static int rcu_preempt_needs_cpu(int cpu);
338static void __cpuinit rcu_preempt_init_percpu_data(int cpu); 367static void __cpuinit rcu_preempt_init_percpu_data(int cpu);
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index c9f0c975c003..37fbccdf41d5 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -24,12 +24,15 @@
24 * Paul E. McKenney <paulmck@linux.vnet.ibm.com> 24 * Paul E. McKenney <paulmck@linux.vnet.ibm.com>
25 */ 25 */
26 26
27#include <linux/delay.h>
27 28
28#ifdef CONFIG_TREE_PREEMPT_RCU 29#ifdef CONFIG_TREE_PREEMPT_RCU
29 30
30struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state); 31struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state);
31DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); 32DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
32 33
34static int rcu_preempted_readers_exp(struct rcu_node *rnp);
35
33/* 36/*
34 * Tell them what RCU they are running. 37 * Tell them what RCU they are running.
35 */ 38 */
@@ -157,7 +160,10 @@ EXPORT_SYMBOL_GPL(__rcu_read_lock);
157 */ 160 */
158static int rcu_preempted_readers(struct rcu_node *rnp) 161static int rcu_preempted_readers(struct rcu_node *rnp)
159{ 162{
160 return !list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]); 163 int phase = rnp->gpnum & 0x1;
164
165 return !list_empty(&rnp->blocked_tasks[phase]) ||
166 !list_empty(&rnp->blocked_tasks[phase + 2]);
161} 167}
162 168
163/* 169/*
@@ -204,6 +210,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
204static void rcu_read_unlock_special(struct task_struct *t) 210static void rcu_read_unlock_special(struct task_struct *t)
205{ 211{
206 int empty; 212 int empty;
213 int empty_exp;
207 unsigned long flags; 214 unsigned long flags;
208 struct rcu_node *rnp; 215 struct rcu_node *rnp;
209 int special; 216 int special;
@@ -247,6 +254,8 @@ static void rcu_read_unlock_special(struct task_struct *t)
247 spin_unlock(&rnp->lock); /* irqs remain disabled. */ 254 spin_unlock(&rnp->lock); /* irqs remain disabled. */
248 } 255 }
249 empty = !rcu_preempted_readers(rnp); 256 empty = !rcu_preempted_readers(rnp);
257 empty_exp = !rcu_preempted_readers_exp(rnp);
258 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
250 list_del_init(&t->rcu_node_entry); 259 list_del_init(&t->rcu_node_entry);
251 t->rcu_blocked_node = NULL; 260 t->rcu_blocked_node = NULL;
252 261
@@ -259,6 +268,13 @@ static void rcu_read_unlock_special(struct task_struct *t)
259 spin_unlock_irqrestore(&rnp->lock, flags); 268 spin_unlock_irqrestore(&rnp->lock, flags);
260 else 269 else
261 rcu_report_unblock_qs_rnp(rnp, flags); 270 rcu_report_unblock_qs_rnp(rnp, flags);
271
272 /*
273 * If this was the last task on the expedited lists,
274 * then we need to report up the rcu_node hierarchy.
275 */
276 if (!empty_exp && !rcu_preempted_readers_exp(rnp))
277 rcu_report_exp_rnp(&rcu_preempt_state, rnp);
262 } else { 278 } else {
263 local_irq_restore(flags); 279 local_irq_restore(flags);
264 } 280 }
@@ -343,7 +359,7 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
343 int i; 359 int i;
344 struct list_head *lp; 360 struct list_head *lp;
345 struct list_head *lp_root; 361 struct list_head *lp_root;
346 int retval; 362 int retval = 0;
347 struct rcu_node *rnp_root = rcu_get_root(rsp); 363 struct rcu_node *rnp_root = rcu_get_root(rsp);
348 struct task_struct *tp; 364 struct task_struct *tp;
349 365
@@ -353,7 +369,9 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
353 } 369 }
354 WARN_ON_ONCE(rnp != rdp->mynode && 370 WARN_ON_ONCE(rnp != rdp->mynode &&
355 (!list_empty(&rnp->blocked_tasks[0]) || 371 (!list_empty(&rnp->blocked_tasks[0]) ||
356 !list_empty(&rnp->blocked_tasks[1]))); 372 !list_empty(&rnp->blocked_tasks[1]) ||
373 !list_empty(&rnp->blocked_tasks[2]) ||
374 !list_empty(&rnp->blocked_tasks[3])));
357 375
358 /* 376 /*
359 * Move tasks up to root rcu_node. Rely on the fact that the 377 * Move tasks up to root rcu_node. Rely on the fact that the
@@ -361,8 +379,11 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
361 * rcu_nodes in terms of gp_num value. This fact allows us to 379 * rcu_nodes in terms of gp_num value. This fact allows us to
362 * move the blocked_tasks[] array directly, element by element. 380 * move the blocked_tasks[] array directly, element by element.
363 */ 381 */
364 retval = rcu_preempted_readers(rnp); 382 if (rcu_preempted_readers(rnp))
365 for (i = 0; i < 2; i++) { 383 retval |= RCU_OFL_TASKS_NORM_GP;
384 if (rcu_preempted_readers_exp(rnp))
385 retval |= RCU_OFL_TASKS_EXP_GP;
386 for (i = 0; i < 4; i++) {
366 lp = &rnp->blocked_tasks[i]; 387 lp = &rnp->blocked_tasks[i];
367 lp_root = &rnp_root->blocked_tasks[i]; 388 lp_root = &rnp_root->blocked_tasks[i];
368 while (!list_empty(lp)) { 389 while (!list_empty(lp)) {
@@ -449,14 +470,159 @@ void synchronize_rcu(void)
449} 470}
450EXPORT_SYMBOL_GPL(synchronize_rcu); 471EXPORT_SYMBOL_GPL(synchronize_rcu);
451 472
473static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq);
474static long sync_rcu_preempt_exp_count;
475static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);
476
477/*
478 * Return non-zero if there are any tasks in RCU read-side critical
479 * sections blocking the current preemptible-RCU expedited grace period.
480 * If there is no preemptible-RCU expedited grace period currently in
481 * progress, returns zero unconditionally.
482 */
483static int rcu_preempted_readers_exp(struct rcu_node *rnp)
484{
485 return !list_empty(&rnp->blocked_tasks[2]) ||
486 !list_empty(&rnp->blocked_tasks[3]);
487}
488
489/*
490 * return non-zero if there is no RCU expedited grace period in progress
491 * for the specified rcu_node structure, in other words, if all CPUs and
492 * tasks covered by the specified rcu_node structure have done their bit
493 * for the current expedited grace period. Works only for preemptible
494 * RCU -- other RCU implementation use other means.
495 *
496 * Caller must hold sync_rcu_preempt_exp_mutex.
497 */
498static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
499{
500 return !rcu_preempted_readers_exp(rnp) &&
501 ACCESS_ONCE(rnp->expmask) == 0;
502}
503
504/*
505 * Report the exit from RCU read-side critical section for the last task
506 * that queued itself during or before the current expedited preemptible-RCU
507 * grace period. This event is reported either to the rcu_node structure on
508 * which the task was queued or to one of that rcu_node structure's ancestors,
509 * recursively up the tree. (Calm down, calm down, we do the recursion
510 * iteratively!)
511 *
512 * Caller must hold sync_rcu_preempt_exp_mutex.
513 */
514static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
515{
516 unsigned long flags;
517 unsigned long mask;
518
519 spin_lock_irqsave(&rnp->lock, flags);
520 for (;;) {
521 if (!sync_rcu_preempt_exp_done(rnp))
522 break;
523 if (rnp->parent == NULL) {
524 wake_up(&sync_rcu_preempt_exp_wq);
525 break;
526 }
527 mask = rnp->grpmask;
528 spin_unlock(&rnp->lock); /* irqs remain disabled */
529 rnp = rnp->parent;
530 spin_lock(&rnp->lock); /* irqs already disabled */
531 rnp->expmask &= ~mask;
532 }
533 spin_unlock_irqrestore(&rnp->lock, flags);
534}
535
536/*
537 * Snapshot the tasks blocking the newly started preemptible-RCU expedited
538 * grace period for the specified rcu_node structure. If there are no such
539 * tasks, report it up the rcu_node hierarchy.
540 *
541 * Caller must hold sync_rcu_preempt_exp_mutex and rsp->onofflock.
542 */
543static void
544sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
545{
546 int must_wait;
547
548 spin_lock(&rnp->lock); /* irqs already disabled */
549 list_splice_init(&rnp->blocked_tasks[0], &rnp->blocked_tasks[2]);
550 list_splice_init(&rnp->blocked_tasks[1], &rnp->blocked_tasks[3]);
551 must_wait = rcu_preempted_readers_exp(rnp);
552 spin_unlock(&rnp->lock); /* irqs remain disabled */
553 if (!must_wait)
554 rcu_report_exp_rnp(rsp, rnp);
555}
556
452/* 557/*
453 * Wait for an rcu-preempt grace period. We are supposed to expedite the 558 * Wait for an rcu-preempt grace period, but expedite it. The basic idea
454 * grace period, but this is the crude slow compatability hack, so just 559 * is to invoke synchronize_sched_expedited() to push all the tasks to
455 * invoke synchronize_rcu(). 560 * the ->blocked_tasks[] lists, move all entries from the first set of
561 * ->blocked_tasks[] lists to the second set, and finally wait for this
562 * second set to drain.
456 */ 563 */
457void synchronize_rcu_expedited(void) 564void synchronize_rcu_expedited(void)
458{ 565{
459 synchronize_rcu(); 566 unsigned long flags;
567 struct rcu_node *rnp;
568 struct rcu_state *rsp = &rcu_preempt_state;
569 long snap;
570 int trycount = 0;
571
572 smp_mb(); /* Caller's modifications seen first by other CPUs. */
573 snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1;
574 smp_mb(); /* Above access cannot bleed into critical section. */
575
576 /*
577 * Acquire lock, falling back to synchronize_rcu() if too many
578 * lock-acquisition failures. Of course, if someone does the
579 * expedited grace period for us, just leave.
580 */
581 while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) {
582 if (trycount++ < 10)
583 udelay(trycount * num_online_cpus());
584 else {
585 synchronize_rcu();
586 return;
587 }
588 if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0)
589 goto mb_ret; /* Others did our work for us. */
590 }
591 if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0)
592 goto unlock_mb_ret; /* Others did our work for us. */
593
594 /* force all RCU readers onto blocked_tasks[]. */
595 synchronize_sched_expedited();
596
597 spin_lock_irqsave(&rsp->onofflock, flags);
598
599 /* Initialize ->expmask for all non-leaf rcu_node structures. */
600 rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) {
601 spin_lock(&rnp->lock); /* irqs already disabled. */
602 rnp->expmask = rnp->qsmaskinit;
603 spin_unlock(&rnp->lock); /* irqs remain disabled. */
604 }
605
606 /* Snapshot current state of ->blocked_tasks[] lists. */
607 rcu_for_each_leaf_node(rsp, rnp)
608 sync_rcu_preempt_exp_init(rsp, rnp);
609 if (NUM_RCU_NODES > 1)
610 sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp));
611
612 spin_unlock_irqrestore(&rsp->onofflock, flags);
613
614 /* Wait for snapshotted ->blocked_tasks[] lists to drain. */
615 rnp = rcu_get_root(rsp);
616 wait_event(sync_rcu_preempt_exp_wq,
617 sync_rcu_preempt_exp_done(rnp));
618
619 /* Clean up and exit. */
620 smp_mb(); /* ensure expedited GP seen before counter increment. */
621 ACCESS_ONCE(sync_rcu_preempt_exp_count)++;
622unlock_mb_ret:
623 mutex_unlock(&sync_rcu_preempt_exp_mutex);
624mb_ret:
625 smp_mb(); /* ensure subsequent action seen after grace period. */
460} 626}
461EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); 627EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
462 628
@@ -655,6 +821,20 @@ void synchronize_rcu_expedited(void)
655} 821}
656EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); 822EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
657 823
824#ifdef CONFIG_HOTPLUG_CPU
825
826/*
827 * Because preemptable RCU does not exist, there is never any need to
828 * report on tasks preempted in RCU read-side critical sections during
829 * expedited RCU grace periods.
830 */
831static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
832{
833 return;
834}
835
836#endif /* #ifdef CONFIG_HOTPLUG_CPU */
837
658/* 838/*
659 * Because preemptable RCU does not exist, it never has any work to do. 839 * Because preemptable RCU does not exist, it never has any work to do.
660 */ 840 */
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index 1984cdc51e9a..9d2c88423b31 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -157,6 +157,7 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
157{ 157{
158 long gpnum; 158 long gpnum;
159 int level = 0; 159 int level = 0;
160 int phase;
160 struct rcu_node *rnp; 161 struct rcu_node *rnp;
161 162
162 gpnum = rsp->gpnum; 163 gpnum = rsp->gpnum;
@@ -173,10 +174,13 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
173 seq_puts(m, "\n"); 174 seq_puts(m, "\n");
174 level = rnp->level; 175 level = rnp->level;
175 } 176 }
176 seq_printf(m, "%lx/%lx %c>%c %d:%d ^%d ", 177 phase = gpnum & 0x1;
178 seq_printf(m, "%lx/%lx %c%c>%c%c %d:%d ^%d ",
177 rnp->qsmask, rnp->qsmaskinit, 179 rnp->qsmask, rnp->qsmaskinit,
178 "T."[list_empty(&rnp->blocked_tasks[gpnum & 1])], 180 "T."[list_empty(&rnp->blocked_tasks[phase])],
179 "T."[list_empty(&rnp->blocked_tasks[!(gpnum & 1)])], 181 "E."[list_empty(&rnp->blocked_tasks[phase + 2])],
182 "T."[list_empty(&rnp->blocked_tasks[!phase])],
183 "E."[list_empty(&rnp->blocked_tasks[!phase + 2])],
180 rnp->grplo, rnp->grphi, rnp->grpnum); 184 rnp->grplo, rnp->grphi, rnp->grpnum);
181 } 185 }
182 seq_puts(m, "\n"); 186 seq_puts(m, "\n");