diff options
author | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2009-12-02 15:10:15 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-12-03 05:35:25 -0500 |
commit | d9a3da0699b24a589b27a61e1a5b5bd30d9db669 (patch) | |
tree | f7440e396a6c818f3cef514ccc31ab55d88025ef | |
parent | cf244dc01bf68e1ad338b82447f8686d24ea4435 (diff) |
rcu: Add expedited grace-period support for preemptible RCU
Implement an synchronize_rcu_expedited() for preemptible RCU
that actually is expedited. This uses
synchronize_sched_expedited() to force all threads currently
running in a preemptible-RCU read-side critical section onto the
appropriate ->blocked_tasks[] list, then takes a snapshot of all
of these lists and waits for them to drain.
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <1259784616158-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | kernel/rcutorture.c | 34 | ||||
-rw-r--r-- | kernel/rcutree.c | 10 | ||||
-rw-r--r-- | kernel/rcutree.h | 35 | ||||
-rw-r--r-- | kernel/rcutree_plugin.h | 198 | ||||
-rw-r--r-- | kernel/rcutree_trace.c | 10 |
5 files changed, 260 insertions, 27 deletions
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 3dd0ca23e191..a621a67ef4e3 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c | |||
@@ -327,6 +327,11 @@ rcu_torture_cb(struct rcu_head *p) | |||
327 | cur_ops->deferred_free(rp); | 327 | cur_ops->deferred_free(rp); |
328 | } | 328 | } |
329 | 329 | ||
330 | static int rcu_no_completed(void) | ||
331 | { | ||
332 | return 0; | ||
333 | } | ||
334 | |||
330 | static void rcu_torture_deferred_free(struct rcu_torture *p) | 335 | static void rcu_torture_deferred_free(struct rcu_torture *p) |
331 | { | 336 | { |
332 | call_rcu(&p->rtort_rcu, rcu_torture_cb); | 337 | call_rcu(&p->rtort_rcu, rcu_torture_cb); |
@@ -388,6 +393,21 @@ static struct rcu_torture_ops rcu_sync_ops = { | |||
388 | .name = "rcu_sync" | 393 | .name = "rcu_sync" |
389 | }; | 394 | }; |
390 | 395 | ||
396 | static struct rcu_torture_ops rcu_expedited_ops = { | ||
397 | .init = rcu_sync_torture_init, | ||
398 | .cleanup = NULL, | ||
399 | .readlock = rcu_torture_read_lock, | ||
400 | .read_delay = rcu_read_delay, /* just reuse rcu's version. */ | ||
401 | .readunlock = rcu_torture_read_unlock, | ||
402 | .completed = rcu_no_completed, | ||
403 | .deferred_free = rcu_sync_torture_deferred_free, | ||
404 | .sync = synchronize_rcu_expedited, | ||
405 | .cb_barrier = NULL, | ||
406 | .stats = NULL, | ||
407 | .irq_capable = 1, | ||
408 | .name = "rcu_expedited" | ||
409 | }; | ||
410 | |||
391 | /* | 411 | /* |
392 | * Definitions for rcu_bh torture testing. | 412 | * Definitions for rcu_bh torture testing. |
393 | */ | 413 | */ |
@@ -581,11 +601,6 @@ static void sched_torture_read_unlock(int idx) | |||
581 | preempt_enable(); | 601 | preempt_enable(); |
582 | } | 602 | } |
583 | 603 | ||
584 | static int sched_torture_completed(void) | ||
585 | { | ||
586 | return 0; | ||
587 | } | ||
588 | |||
589 | static void rcu_sched_torture_deferred_free(struct rcu_torture *p) | 604 | static void rcu_sched_torture_deferred_free(struct rcu_torture *p) |
590 | { | 605 | { |
591 | call_rcu_sched(&p->rtort_rcu, rcu_torture_cb); | 606 | call_rcu_sched(&p->rtort_rcu, rcu_torture_cb); |
@@ -602,7 +617,7 @@ static struct rcu_torture_ops sched_ops = { | |||
602 | .readlock = sched_torture_read_lock, | 617 | .readlock = sched_torture_read_lock, |
603 | .read_delay = rcu_read_delay, /* just reuse rcu's version. */ | 618 | .read_delay = rcu_read_delay, /* just reuse rcu's version. */ |
604 | .readunlock = sched_torture_read_unlock, | 619 | .readunlock = sched_torture_read_unlock, |
605 | .completed = sched_torture_completed, | 620 | .completed = rcu_no_completed, |
606 | .deferred_free = rcu_sched_torture_deferred_free, | 621 | .deferred_free = rcu_sched_torture_deferred_free, |
607 | .sync = sched_torture_synchronize, | 622 | .sync = sched_torture_synchronize, |
608 | .cb_barrier = rcu_barrier_sched, | 623 | .cb_barrier = rcu_barrier_sched, |
@@ -617,7 +632,7 @@ static struct rcu_torture_ops sched_sync_ops = { | |||
617 | .readlock = sched_torture_read_lock, | 632 | .readlock = sched_torture_read_lock, |
618 | .read_delay = rcu_read_delay, /* just reuse rcu's version. */ | 633 | .read_delay = rcu_read_delay, /* just reuse rcu's version. */ |
619 | .readunlock = sched_torture_read_unlock, | 634 | .readunlock = sched_torture_read_unlock, |
620 | .completed = sched_torture_completed, | 635 | .completed = rcu_no_completed, |
621 | .deferred_free = rcu_sync_torture_deferred_free, | 636 | .deferred_free = rcu_sync_torture_deferred_free, |
622 | .sync = sched_torture_synchronize, | 637 | .sync = sched_torture_synchronize, |
623 | .cb_barrier = NULL, | 638 | .cb_barrier = NULL, |
@@ -631,7 +646,7 @@ static struct rcu_torture_ops sched_expedited_ops = { | |||
631 | .readlock = sched_torture_read_lock, | 646 | .readlock = sched_torture_read_lock, |
632 | .read_delay = rcu_read_delay, /* just reuse rcu's version. */ | 647 | .read_delay = rcu_read_delay, /* just reuse rcu's version. */ |
633 | .readunlock = sched_torture_read_unlock, | 648 | .readunlock = sched_torture_read_unlock, |
634 | .completed = sched_torture_completed, | 649 | .completed = rcu_no_completed, |
635 | .deferred_free = rcu_sync_torture_deferred_free, | 650 | .deferred_free = rcu_sync_torture_deferred_free, |
636 | .sync = synchronize_sched_expedited, | 651 | .sync = synchronize_sched_expedited, |
637 | .cb_barrier = NULL, | 652 | .cb_barrier = NULL, |
@@ -1116,7 +1131,8 @@ rcu_torture_init(void) | |||
1116 | int cpu; | 1131 | int cpu; |
1117 | int firsterr = 0; | 1132 | int firsterr = 0; |
1118 | static struct rcu_torture_ops *torture_ops[] = | 1133 | static struct rcu_torture_ops *torture_ops[] = |
1119 | { &rcu_ops, &rcu_sync_ops, &rcu_bh_ops, &rcu_bh_sync_ops, | 1134 | { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops, |
1135 | &rcu_bh_ops, &rcu_bh_sync_ops, | ||
1120 | &srcu_ops, &srcu_expedited_ops, | 1136 | &srcu_ops, &srcu_expedited_ops, |
1121 | &sched_ops, &sched_sync_ops, &sched_expedited_ops, }; | 1137 | &sched_ops, &sched_sync_ops, &sched_expedited_ops, }; |
1122 | 1138 | ||
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index d47e03e5792a..53ae9598f798 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -948,7 +948,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) | |||
948 | { | 948 | { |
949 | unsigned long flags; | 949 | unsigned long flags; |
950 | unsigned long mask; | 950 | unsigned long mask; |
951 | int need_quiet = 0; | 951 | int need_report = 0; |
952 | struct rcu_data *rdp = rsp->rda[cpu]; | 952 | struct rcu_data *rdp = rsp->rda[cpu]; |
953 | struct rcu_node *rnp; | 953 | struct rcu_node *rnp; |
954 | 954 | ||
@@ -967,7 +967,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) | |||
967 | break; | 967 | break; |
968 | } | 968 | } |
969 | if (rnp == rdp->mynode) | 969 | if (rnp == rdp->mynode) |
970 | need_quiet = rcu_preempt_offline_tasks(rsp, rnp, rdp); | 970 | need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp); |
971 | else | 971 | else |
972 | spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 972 | spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
973 | mask = rnp->grpmask; | 973 | mask = rnp->grpmask; |
@@ -982,10 +982,12 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) | |||
982 | */ | 982 | */ |
983 | spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ | 983 | spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ |
984 | rnp = rdp->mynode; | 984 | rnp = rdp->mynode; |
985 | if (need_quiet) | 985 | if (need_report & RCU_OFL_TASKS_NORM_GP) |
986 | rcu_report_unblock_qs_rnp(rnp, flags); | 986 | rcu_report_unblock_qs_rnp(rnp, flags); |
987 | else | 987 | else |
988 | spin_unlock_irqrestore(&rnp->lock, flags); | 988 | spin_unlock_irqrestore(&rnp->lock, flags); |
989 | if (need_report & RCU_OFL_TASKS_EXP_GP) | ||
990 | rcu_report_exp_rnp(rsp, rnp); | ||
989 | 991 | ||
990 | rcu_adopt_orphan_cbs(rsp); | 992 | rcu_adopt_orphan_cbs(rsp); |
991 | } | 993 | } |
@@ -1843,6 +1845,8 @@ static void __init rcu_init_one(struct rcu_state *rsp) | |||
1843 | rnp->level = i; | 1845 | rnp->level = i; |
1844 | INIT_LIST_HEAD(&rnp->blocked_tasks[0]); | 1846 | INIT_LIST_HEAD(&rnp->blocked_tasks[0]); |
1845 | INIT_LIST_HEAD(&rnp->blocked_tasks[1]); | 1847 | INIT_LIST_HEAD(&rnp->blocked_tasks[1]); |
1848 | INIT_LIST_HEAD(&rnp->blocked_tasks[2]); | ||
1849 | INIT_LIST_HEAD(&rnp->blocked_tasks[3]); | ||
1846 | } | 1850 | } |
1847 | } | 1851 | } |
1848 | } | 1852 | } |
diff --git a/kernel/rcutree.h b/kernel/rcutree.h index df2e0b694744..d2a0046f63b2 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h | |||
@@ -104,8 +104,12 @@ struct rcu_node { | |||
104 | /* an rcu_data structure, otherwise, each */ | 104 | /* an rcu_data structure, otherwise, each */ |
105 | /* bit corresponds to a child rcu_node */ | 105 | /* bit corresponds to a child rcu_node */ |
106 | /* structure. */ | 106 | /* structure. */ |
107 | unsigned long expmask; /* Groups that have ->blocked_tasks[] */ | ||
108 | /* elements that need to drain to allow the */ | ||
109 | /* current expedited grace period to */ | ||
110 | /* complete (only for TREE_PREEMPT_RCU). */ | ||
107 | unsigned long qsmaskinit; | 111 | unsigned long qsmaskinit; |
108 | /* Per-GP initialization for qsmask. */ | 112 | /* Per-GP initial value for qsmask & expmask. */ |
109 | unsigned long grpmask; /* Mask to apply to parent qsmask. */ | 113 | unsigned long grpmask; /* Mask to apply to parent qsmask. */ |
110 | /* Only one bit will be set in this mask. */ | 114 | /* Only one bit will be set in this mask. */ |
111 | int grplo; /* lowest-numbered CPU or group here. */ | 115 | int grplo; /* lowest-numbered CPU or group here. */ |
@@ -113,7 +117,7 @@ struct rcu_node { | |||
113 | u8 grpnum; /* CPU/group number for next level up. */ | 117 | u8 grpnum; /* CPU/group number for next level up. */ |
114 | u8 level; /* root is at level 0. */ | 118 | u8 level; /* root is at level 0. */ |
115 | struct rcu_node *parent; | 119 | struct rcu_node *parent; |
116 | struct list_head blocked_tasks[2]; | 120 | struct list_head blocked_tasks[4]; |
117 | /* Tasks blocked in RCU read-side critsect. */ | 121 | /* Tasks blocked in RCU read-side critsect. */ |
118 | /* Grace period number (->gpnum) x blocked */ | 122 | /* Grace period number (->gpnum) x blocked */ |
119 | /* by tasks on the (x & 0x1) element of the */ | 123 | /* by tasks on the (x & 0x1) element of the */ |
@@ -128,6 +132,21 @@ struct rcu_node { | |||
128 | for ((rnp) = &(rsp)->node[0]; \ | 132 | for ((rnp) = &(rsp)->node[0]; \ |
129 | (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) | 133 | (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) |
130 | 134 | ||
135 | /* | ||
136 | * Do a breadth-first scan of the non-leaf rcu_node structures for the | ||
137 | * specified rcu_state structure. Note that if there is a singleton | ||
138 | * rcu_node tree with but one rcu_node structure, this loop is a no-op. | ||
139 | */ | ||
140 | #define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \ | ||
141 | for ((rnp) = &(rsp)->node[0]; \ | ||
142 | (rnp) < (rsp)->level[NUM_RCU_LVLS - 1]; (rnp)++) | ||
143 | |||
144 | /* | ||
145 | * Scan the leaves of the rcu_node hierarchy for the specified rcu_state | ||
146 | * structure. Note that if there is a singleton rcu_node tree with but | ||
147 | * one rcu_node structure, this loop -will- visit the rcu_node structure. | ||
148 | * It is still a leaf node, even if it is also the root node. | ||
149 | */ | ||
131 | #define rcu_for_each_leaf_node(rsp, rnp) \ | 150 | #define rcu_for_each_leaf_node(rsp, rnp) \ |
132 | for ((rnp) = (rsp)->level[NUM_RCU_LVLS - 1]; \ | 151 | for ((rnp) = (rsp)->level[NUM_RCU_LVLS - 1]; \ |
133 | (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) | 152 | (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) |
@@ -261,7 +280,7 @@ struct rcu_state { | |||
261 | long gpnum; /* Current gp number. */ | 280 | long gpnum; /* Current gp number. */ |
262 | long completed; /* # of last completed gp. */ | 281 | long completed; /* # of last completed gp. */ |
263 | 282 | ||
264 | /* End of fields guarded by root rcu_node's lock. */ | 283 | /* End of fields guarded by root rcu_node's lock. */ |
265 | 284 | ||
266 | spinlock_t onofflock; /* exclude on/offline and */ | 285 | spinlock_t onofflock; /* exclude on/offline and */ |
267 | /* starting new GP. Also */ | 286 | /* starting new GP. Also */ |
@@ -293,6 +312,13 @@ struct rcu_state { | |||
293 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ | 312 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ |
294 | }; | 313 | }; |
295 | 314 | ||
315 | /* Return values for rcu_preempt_offline_tasks(). */ | ||
316 | |||
317 | #define RCU_OFL_TASKS_NORM_GP 0x1 /* Tasks blocking normal */ | ||
318 | /* GP were moved to root. */ | ||
319 | #define RCU_OFL_TASKS_EXP_GP 0x2 /* Tasks blocking expedited */ | ||
320 | /* GP were moved to root. */ | ||
321 | |||
296 | #ifdef RCU_TREE_NONCORE | 322 | #ifdef RCU_TREE_NONCORE |
297 | 323 | ||
298 | /* | 324 | /* |
@@ -333,6 +359,9 @@ static void rcu_preempt_offline_cpu(int cpu); | |||
333 | static void rcu_preempt_check_callbacks(int cpu); | 359 | static void rcu_preempt_check_callbacks(int cpu); |
334 | static void rcu_preempt_process_callbacks(void); | 360 | static void rcu_preempt_process_callbacks(void); |
335 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); | 361 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); |
362 | #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) | ||
363 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp); | ||
364 | #endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */ | ||
336 | static int rcu_preempt_pending(int cpu); | 365 | static int rcu_preempt_pending(int cpu); |
337 | static int rcu_preempt_needs_cpu(int cpu); | 366 | static int rcu_preempt_needs_cpu(int cpu); |
338 | static void __cpuinit rcu_preempt_init_percpu_data(int cpu); | 367 | static void __cpuinit rcu_preempt_init_percpu_data(int cpu); |
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index c9f0c975c003..37fbccdf41d5 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
@@ -24,12 +24,15 @@ | |||
24 | * Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 24 | * Paul E. McKenney <paulmck@linux.vnet.ibm.com> |
25 | */ | 25 | */ |
26 | 26 | ||
27 | #include <linux/delay.h> | ||
27 | 28 | ||
28 | #ifdef CONFIG_TREE_PREEMPT_RCU | 29 | #ifdef CONFIG_TREE_PREEMPT_RCU |
29 | 30 | ||
30 | struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state); | 31 | struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state); |
31 | DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); | 32 | DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); |
32 | 33 | ||
34 | static int rcu_preempted_readers_exp(struct rcu_node *rnp); | ||
35 | |||
33 | /* | 36 | /* |
34 | * Tell them what RCU they are running. | 37 | * Tell them what RCU they are running. |
35 | */ | 38 | */ |
@@ -157,7 +160,10 @@ EXPORT_SYMBOL_GPL(__rcu_read_lock); | |||
157 | */ | 160 | */ |
158 | static int rcu_preempted_readers(struct rcu_node *rnp) | 161 | static int rcu_preempted_readers(struct rcu_node *rnp) |
159 | { | 162 | { |
160 | return !list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]); | 163 | int phase = rnp->gpnum & 0x1; |
164 | |||
165 | return !list_empty(&rnp->blocked_tasks[phase]) || | ||
166 | !list_empty(&rnp->blocked_tasks[phase + 2]); | ||
161 | } | 167 | } |
162 | 168 | ||
163 | /* | 169 | /* |
@@ -204,6 +210,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) | |||
204 | static void rcu_read_unlock_special(struct task_struct *t) | 210 | static void rcu_read_unlock_special(struct task_struct *t) |
205 | { | 211 | { |
206 | int empty; | 212 | int empty; |
213 | int empty_exp; | ||
207 | unsigned long flags; | 214 | unsigned long flags; |
208 | struct rcu_node *rnp; | 215 | struct rcu_node *rnp; |
209 | int special; | 216 | int special; |
@@ -247,6 +254,8 @@ static void rcu_read_unlock_special(struct task_struct *t) | |||
247 | spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 254 | spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
248 | } | 255 | } |
249 | empty = !rcu_preempted_readers(rnp); | 256 | empty = !rcu_preempted_readers(rnp); |
257 | empty_exp = !rcu_preempted_readers_exp(rnp); | ||
258 | smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ | ||
250 | list_del_init(&t->rcu_node_entry); | 259 | list_del_init(&t->rcu_node_entry); |
251 | t->rcu_blocked_node = NULL; | 260 | t->rcu_blocked_node = NULL; |
252 | 261 | ||
@@ -259,6 +268,13 @@ static void rcu_read_unlock_special(struct task_struct *t) | |||
259 | spin_unlock_irqrestore(&rnp->lock, flags); | 268 | spin_unlock_irqrestore(&rnp->lock, flags); |
260 | else | 269 | else |
261 | rcu_report_unblock_qs_rnp(rnp, flags); | 270 | rcu_report_unblock_qs_rnp(rnp, flags); |
271 | |||
272 | /* | ||
273 | * If this was the last task on the expedited lists, | ||
274 | * then we need to report up the rcu_node hierarchy. | ||
275 | */ | ||
276 | if (!empty_exp && !rcu_preempted_readers_exp(rnp)) | ||
277 | rcu_report_exp_rnp(&rcu_preempt_state, rnp); | ||
262 | } else { | 278 | } else { |
263 | local_irq_restore(flags); | 279 | local_irq_restore(flags); |
264 | } | 280 | } |
@@ -343,7 +359,7 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, | |||
343 | int i; | 359 | int i; |
344 | struct list_head *lp; | 360 | struct list_head *lp; |
345 | struct list_head *lp_root; | 361 | struct list_head *lp_root; |
346 | int retval; | 362 | int retval = 0; |
347 | struct rcu_node *rnp_root = rcu_get_root(rsp); | 363 | struct rcu_node *rnp_root = rcu_get_root(rsp); |
348 | struct task_struct *tp; | 364 | struct task_struct *tp; |
349 | 365 | ||
@@ -353,7 +369,9 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, | |||
353 | } | 369 | } |
354 | WARN_ON_ONCE(rnp != rdp->mynode && | 370 | WARN_ON_ONCE(rnp != rdp->mynode && |
355 | (!list_empty(&rnp->blocked_tasks[0]) || | 371 | (!list_empty(&rnp->blocked_tasks[0]) || |
356 | !list_empty(&rnp->blocked_tasks[1]))); | 372 | !list_empty(&rnp->blocked_tasks[1]) || |
373 | !list_empty(&rnp->blocked_tasks[2]) || | ||
374 | !list_empty(&rnp->blocked_tasks[3]))); | ||
357 | 375 | ||
358 | /* | 376 | /* |
359 | * Move tasks up to root rcu_node. Rely on the fact that the | 377 | * Move tasks up to root rcu_node. Rely on the fact that the |
@@ -361,8 +379,11 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, | |||
361 | * rcu_nodes in terms of gp_num value. This fact allows us to | 379 | * rcu_nodes in terms of gp_num value. This fact allows us to |
362 | * move the blocked_tasks[] array directly, element by element. | 380 | * move the blocked_tasks[] array directly, element by element. |
363 | */ | 381 | */ |
364 | retval = rcu_preempted_readers(rnp); | 382 | if (rcu_preempted_readers(rnp)) |
365 | for (i = 0; i < 2; i++) { | 383 | retval |= RCU_OFL_TASKS_NORM_GP; |
384 | if (rcu_preempted_readers_exp(rnp)) | ||
385 | retval |= RCU_OFL_TASKS_EXP_GP; | ||
386 | for (i = 0; i < 4; i++) { | ||
366 | lp = &rnp->blocked_tasks[i]; | 387 | lp = &rnp->blocked_tasks[i]; |
367 | lp_root = &rnp_root->blocked_tasks[i]; | 388 | lp_root = &rnp_root->blocked_tasks[i]; |
368 | while (!list_empty(lp)) { | 389 | while (!list_empty(lp)) { |
@@ -449,14 +470,159 @@ void synchronize_rcu(void) | |||
449 | } | 470 | } |
450 | EXPORT_SYMBOL_GPL(synchronize_rcu); | 471 | EXPORT_SYMBOL_GPL(synchronize_rcu); |
451 | 472 | ||
473 | static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq); | ||
474 | static long sync_rcu_preempt_exp_count; | ||
475 | static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex); | ||
476 | |||
477 | /* | ||
478 | * Return non-zero if there are any tasks in RCU read-side critical | ||
479 | * sections blocking the current preemptible-RCU expedited grace period. | ||
480 | * If there is no preemptible-RCU expedited grace period currently in | ||
481 | * progress, returns zero unconditionally. | ||
482 | */ | ||
483 | static int rcu_preempted_readers_exp(struct rcu_node *rnp) | ||
484 | { | ||
485 | return !list_empty(&rnp->blocked_tasks[2]) || | ||
486 | !list_empty(&rnp->blocked_tasks[3]); | ||
487 | } | ||
488 | |||
489 | /* | ||
490 | * return non-zero if there is no RCU expedited grace period in progress | ||
491 | * for the specified rcu_node structure, in other words, if all CPUs and | ||
492 | * tasks covered by the specified rcu_node structure have done their bit | ||
493 | * for the current expedited grace period. Works only for preemptible | ||
494 | * RCU -- other RCU implementation use other means. | ||
495 | * | ||
496 | * Caller must hold sync_rcu_preempt_exp_mutex. | ||
497 | */ | ||
498 | static int sync_rcu_preempt_exp_done(struct rcu_node *rnp) | ||
499 | { | ||
500 | return !rcu_preempted_readers_exp(rnp) && | ||
501 | ACCESS_ONCE(rnp->expmask) == 0; | ||
502 | } | ||
503 | |||
504 | /* | ||
505 | * Report the exit from RCU read-side critical section for the last task | ||
506 | * that queued itself during or before the current expedited preemptible-RCU | ||
507 | * grace period. This event is reported either to the rcu_node structure on | ||
508 | * which the task was queued or to one of that rcu_node structure's ancestors, | ||
509 | * recursively up the tree. (Calm down, calm down, we do the recursion | ||
510 | * iteratively!) | ||
511 | * | ||
512 | * Caller must hold sync_rcu_preempt_exp_mutex. | ||
513 | */ | ||
514 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) | ||
515 | { | ||
516 | unsigned long flags; | ||
517 | unsigned long mask; | ||
518 | |||
519 | spin_lock_irqsave(&rnp->lock, flags); | ||
520 | for (;;) { | ||
521 | if (!sync_rcu_preempt_exp_done(rnp)) | ||
522 | break; | ||
523 | if (rnp->parent == NULL) { | ||
524 | wake_up(&sync_rcu_preempt_exp_wq); | ||
525 | break; | ||
526 | } | ||
527 | mask = rnp->grpmask; | ||
528 | spin_unlock(&rnp->lock); /* irqs remain disabled */ | ||
529 | rnp = rnp->parent; | ||
530 | spin_lock(&rnp->lock); /* irqs already disabled */ | ||
531 | rnp->expmask &= ~mask; | ||
532 | } | ||
533 | spin_unlock_irqrestore(&rnp->lock, flags); | ||
534 | } | ||
535 | |||
536 | /* | ||
537 | * Snapshot the tasks blocking the newly started preemptible-RCU expedited | ||
538 | * grace period for the specified rcu_node structure. If there are no such | ||
539 | * tasks, report it up the rcu_node hierarchy. | ||
540 | * | ||
541 | * Caller must hold sync_rcu_preempt_exp_mutex and rsp->onofflock. | ||
542 | */ | ||
543 | static void | ||
544 | sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) | ||
545 | { | ||
546 | int must_wait; | ||
547 | |||
548 | spin_lock(&rnp->lock); /* irqs already disabled */ | ||
549 | list_splice_init(&rnp->blocked_tasks[0], &rnp->blocked_tasks[2]); | ||
550 | list_splice_init(&rnp->blocked_tasks[1], &rnp->blocked_tasks[3]); | ||
551 | must_wait = rcu_preempted_readers_exp(rnp); | ||
552 | spin_unlock(&rnp->lock); /* irqs remain disabled */ | ||
553 | if (!must_wait) | ||
554 | rcu_report_exp_rnp(rsp, rnp); | ||
555 | } | ||
556 | |||
452 | /* | 557 | /* |
453 | * Wait for an rcu-preempt grace period. We are supposed to expedite the | 558 | * Wait for an rcu-preempt grace period, but expedite it. The basic idea |
454 | * grace period, but this is the crude slow compatability hack, so just | 559 | * is to invoke synchronize_sched_expedited() to push all the tasks to |
455 | * invoke synchronize_rcu(). | 560 | * the ->blocked_tasks[] lists, move all entries from the first set of |
561 | * ->blocked_tasks[] lists to the second set, and finally wait for this | ||
562 | * second set to drain. | ||
456 | */ | 563 | */ |
457 | void synchronize_rcu_expedited(void) | 564 | void synchronize_rcu_expedited(void) |
458 | { | 565 | { |
459 | synchronize_rcu(); | 566 | unsigned long flags; |
567 | struct rcu_node *rnp; | ||
568 | struct rcu_state *rsp = &rcu_preempt_state; | ||
569 | long snap; | ||
570 | int trycount = 0; | ||
571 | |||
572 | smp_mb(); /* Caller's modifications seen first by other CPUs. */ | ||
573 | snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1; | ||
574 | smp_mb(); /* Above access cannot bleed into critical section. */ | ||
575 | |||
576 | /* | ||
577 | * Acquire lock, falling back to synchronize_rcu() if too many | ||
578 | * lock-acquisition failures. Of course, if someone does the | ||
579 | * expedited grace period for us, just leave. | ||
580 | */ | ||
581 | while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) { | ||
582 | if (trycount++ < 10) | ||
583 | udelay(trycount * num_online_cpus()); | ||
584 | else { | ||
585 | synchronize_rcu(); | ||
586 | return; | ||
587 | } | ||
588 | if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) | ||
589 | goto mb_ret; /* Others did our work for us. */ | ||
590 | } | ||
591 | if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) | ||
592 | goto unlock_mb_ret; /* Others did our work for us. */ | ||
593 | |||
594 | /* force all RCU readers onto blocked_tasks[]. */ | ||
595 | synchronize_sched_expedited(); | ||
596 | |||
597 | spin_lock_irqsave(&rsp->onofflock, flags); | ||
598 | |||
599 | /* Initialize ->expmask for all non-leaf rcu_node structures. */ | ||
600 | rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) { | ||
601 | spin_lock(&rnp->lock); /* irqs already disabled. */ | ||
602 | rnp->expmask = rnp->qsmaskinit; | ||
603 | spin_unlock(&rnp->lock); /* irqs remain disabled. */ | ||
604 | } | ||
605 | |||
606 | /* Snapshot current state of ->blocked_tasks[] lists. */ | ||
607 | rcu_for_each_leaf_node(rsp, rnp) | ||
608 | sync_rcu_preempt_exp_init(rsp, rnp); | ||
609 | if (NUM_RCU_NODES > 1) | ||
610 | sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp)); | ||
611 | |||
612 | spin_unlock_irqrestore(&rsp->onofflock, flags); | ||
613 | |||
614 | /* Wait for snapshotted ->blocked_tasks[] lists to drain. */ | ||
615 | rnp = rcu_get_root(rsp); | ||
616 | wait_event(sync_rcu_preempt_exp_wq, | ||
617 | sync_rcu_preempt_exp_done(rnp)); | ||
618 | |||
619 | /* Clean up and exit. */ | ||
620 | smp_mb(); /* ensure expedited GP seen before counter increment. */ | ||
621 | ACCESS_ONCE(sync_rcu_preempt_exp_count)++; | ||
622 | unlock_mb_ret: | ||
623 | mutex_unlock(&sync_rcu_preempt_exp_mutex); | ||
624 | mb_ret: | ||
625 | smp_mb(); /* ensure subsequent action seen after grace period. */ | ||
460 | } | 626 | } |
461 | EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); | 627 | EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); |
462 | 628 | ||
@@ -655,6 +821,20 @@ void synchronize_rcu_expedited(void) | |||
655 | } | 821 | } |
656 | EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); | 822 | EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); |
657 | 823 | ||
824 | #ifdef CONFIG_HOTPLUG_CPU | ||
825 | |||
826 | /* | ||
827 | * Because preemptable RCU does not exist, there is never any need to | ||
828 | * report on tasks preempted in RCU read-side critical sections during | ||
829 | * expedited RCU grace periods. | ||
830 | */ | ||
831 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) | ||
832 | { | ||
833 | return; | ||
834 | } | ||
835 | |||
836 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
837 | |||
658 | /* | 838 | /* |
659 | * Because preemptable RCU does not exist, it never has any work to do. | 839 | * Because preemptable RCU does not exist, it never has any work to do. |
660 | */ | 840 | */ |
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index 1984cdc51e9a..9d2c88423b31 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c | |||
@@ -157,6 +157,7 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) | |||
157 | { | 157 | { |
158 | long gpnum; | 158 | long gpnum; |
159 | int level = 0; | 159 | int level = 0; |
160 | int phase; | ||
160 | struct rcu_node *rnp; | 161 | struct rcu_node *rnp; |
161 | 162 | ||
162 | gpnum = rsp->gpnum; | 163 | gpnum = rsp->gpnum; |
@@ -173,10 +174,13 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) | |||
173 | seq_puts(m, "\n"); | 174 | seq_puts(m, "\n"); |
174 | level = rnp->level; | 175 | level = rnp->level; |
175 | } | 176 | } |
176 | seq_printf(m, "%lx/%lx %c>%c %d:%d ^%d ", | 177 | phase = gpnum & 0x1; |
178 | seq_printf(m, "%lx/%lx %c%c>%c%c %d:%d ^%d ", | ||
177 | rnp->qsmask, rnp->qsmaskinit, | 179 | rnp->qsmask, rnp->qsmaskinit, |
178 | "T."[list_empty(&rnp->blocked_tasks[gpnum & 1])], | 180 | "T."[list_empty(&rnp->blocked_tasks[phase])], |
179 | "T."[list_empty(&rnp->blocked_tasks[!(gpnum & 1)])], | 181 | "E."[list_empty(&rnp->blocked_tasks[phase + 2])], |
182 | "T."[list_empty(&rnp->blocked_tasks[!phase])], | ||
183 | "E."[list_empty(&rnp->blocked_tasks[!phase + 2])], | ||
180 | rnp->grplo, rnp->grphi, rnp->grpnum); | 184 | rnp->grplo, rnp->grphi, rnp->grpnum); |
181 | } | 185 | } |
182 | seq_puts(m, "\n"); | 186 | seq_puts(m, "\n"); |