diff options
author | Andrea Bastoni <bastoni@cs.unc.edu> | 2010-05-30 19:16:45 -0400 |
---|---|---|
committer | Andrea Bastoni <bastoni@cs.unc.edu> | 2010-05-30 19:16:45 -0400 |
commit | ada47b5fe13d89735805b566185f4885f5a3f750 (patch) | |
tree | 644b88f8a71896307d71438e9b3af49126ffb22b /kernel/rcutree_plugin.h | |
parent | 43e98717ad40a4ae64545b5ba047c7b86aa44f4f (diff) | |
parent | 3280f21d43ee541f97f8cda5792150d2dbec20d5 (diff) |
Merge branch 'wip-2.6.34' into old-private-masterarchived-private-master
Diffstat (limited to 'kernel/rcutree_plugin.h')
-rw-r--r-- | kernel/rcutree_plugin.h | 512 |
1 files changed, 468 insertions, 44 deletions
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index ef2a58c2b9d5..79b53bda8943 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
@@ -24,16 +24,19 @@ | |||
24 | * Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 24 | * Paul E. McKenney <paulmck@linux.vnet.ibm.com> |
25 | */ | 25 | */ |
26 | 26 | ||
27 | #include <linux/delay.h> | ||
27 | 28 | ||
28 | #ifdef CONFIG_TREE_PREEMPT_RCU | 29 | #ifdef CONFIG_TREE_PREEMPT_RCU |
29 | 30 | ||
30 | struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state); | 31 | struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state); |
31 | DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); | 32 | DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); |
32 | 33 | ||
34 | static int rcu_preempted_readers_exp(struct rcu_node *rnp); | ||
35 | |||
33 | /* | 36 | /* |
34 | * Tell them what RCU they are running. | 37 | * Tell them what RCU they are running. |
35 | */ | 38 | */ |
36 | static inline void rcu_bootup_announce(void) | 39 | static void __init rcu_bootup_announce(void) |
37 | { | 40 | { |
38 | printk(KERN_INFO | 41 | printk(KERN_INFO |
39 | "Experimental preemptable hierarchical RCU implementation.\n"); | 42 | "Experimental preemptable hierarchical RCU implementation.\n"); |
@@ -59,6 +62,15 @@ long rcu_batches_completed(void) | |||
59 | EXPORT_SYMBOL_GPL(rcu_batches_completed); | 62 | EXPORT_SYMBOL_GPL(rcu_batches_completed); |
60 | 63 | ||
61 | /* | 64 | /* |
65 | * Force a quiescent state for preemptible RCU. | ||
66 | */ | ||
67 | void rcu_force_quiescent_state(void) | ||
68 | { | ||
69 | force_quiescent_state(&rcu_preempt_state, 0); | ||
70 | } | ||
71 | EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); | ||
72 | |||
73 | /* | ||
62 | * Record a preemptable-RCU quiescent state for the specified CPU. Note | 74 | * Record a preemptable-RCU quiescent state for the specified CPU. Note |
63 | * that this just means that the task currently running on the CPU is | 75 | * that this just means that the task currently running on the CPU is |
64 | * not in a quiescent state. There might be any number of tasks blocked | 76 | * not in a quiescent state. There might be any number of tasks blocked |
@@ -67,7 +79,7 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed); | |||
67 | static void rcu_preempt_qs(int cpu) | 79 | static void rcu_preempt_qs(int cpu) |
68 | { | 80 | { |
69 | struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); | 81 | struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); |
70 | rdp->passed_quiesc_completed = rdp->completed; | 82 | rdp->passed_quiesc_completed = rdp->gpnum - 1; |
71 | barrier(); | 83 | barrier(); |
72 | rdp->passed_quiesc = 1; | 84 | rdp->passed_quiesc = 1; |
73 | } | 85 | } |
@@ -99,7 +111,7 @@ static void rcu_preempt_note_context_switch(int cpu) | |||
99 | /* Possibly blocking in an RCU read-side critical section. */ | 111 | /* Possibly blocking in an RCU read-side critical section. */ |
100 | rdp = rcu_preempt_state.rda[cpu]; | 112 | rdp = rcu_preempt_state.rda[cpu]; |
101 | rnp = rdp->mynode; | 113 | rnp = rdp->mynode; |
102 | spin_lock_irqsave(&rnp->lock, flags); | 114 | raw_spin_lock_irqsave(&rnp->lock, flags); |
103 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; | 115 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; |
104 | t->rcu_blocked_node = rnp; | 116 | t->rcu_blocked_node = rnp; |
105 | 117 | ||
@@ -120,7 +132,7 @@ static void rcu_preempt_note_context_switch(int cpu) | |||
120 | WARN_ON_ONCE(!list_empty(&t->rcu_node_entry)); | 132 | WARN_ON_ONCE(!list_empty(&t->rcu_node_entry)); |
121 | phase = (rnp->gpnum + !(rnp->qsmask & rdp->grpmask)) & 0x1; | 133 | phase = (rnp->gpnum + !(rnp->qsmask & rdp->grpmask)) & 0x1; |
122 | list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]); | 134 | list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]); |
123 | spin_unlock_irqrestore(&rnp->lock, flags); | 135 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
124 | } | 136 | } |
125 | 137 | ||
126 | /* | 138 | /* |
@@ -157,14 +169,58 @@ EXPORT_SYMBOL_GPL(__rcu_read_lock); | |||
157 | */ | 169 | */ |
158 | static int rcu_preempted_readers(struct rcu_node *rnp) | 170 | static int rcu_preempted_readers(struct rcu_node *rnp) |
159 | { | 171 | { |
160 | return !list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]); | 172 | int phase = rnp->gpnum & 0x1; |
173 | |||
174 | return !list_empty(&rnp->blocked_tasks[phase]) || | ||
175 | !list_empty(&rnp->blocked_tasks[phase + 2]); | ||
161 | } | 176 | } |
162 | 177 | ||
178 | /* | ||
179 | * Record a quiescent state for all tasks that were previously queued | ||
180 | * on the specified rcu_node structure and that were blocking the current | ||
181 | * RCU grace period. The caller must hold the specified rnp->lock with | ||
182 | * irqs disabled, and this lock is released upon return, but irqs remain | ||
183 | * disabled. | ||
184 | */ | ||
185 | static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) | ||
186 | __releases(rnp->lock) | ||
187 | { | ||
188 | unsigned long mask; | ||
189 | struct rcu_node *rnp_p; | ||
190 | |||
191 | if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) { | ||
192 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
193 | return; /* Still need more quiescent states! */ | ||
194 | } | ||
195 | |||
196 | rnp_p = rnp->parent; | ||
197 | if (rnp_p == NULL) { | ||
198 | /* | ||
199 | * Either there is only one rcu_node in the tree, | ||
200 | * or tasks were kicked up to root rcu_node due to | ||
201 | * CPUs going offline. | ||
202 | */ | ||
203 | rcu_report_qs_rsp(&rcu_preempt_state, flags); | ||
204 | return; | ||
205 | } | ||
206 | |||
207 | /* Report up the rest of the hierarchy. */ | ||
208 | mask = rnp->grpmask; | ||
209 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | ||
210 | raw_spin_lock(&rnp_p->lock); /* irqs already disabled. */ | ||
211 | rcu_report_qs_rnp(mask, &rcu_preempt_state, rnp_p, flags); | ||
212 | } | ||
213 | |||
214 | /* | ||
215 | * Handle special cases during rcu_read_unlock(), such as needing to | ||
216 | * notify RCU core processing or task having blocked during the RCU | ||
217 | * read-side critical section. | ||
218 | */ | ||
163 | static void rcu_read_unlock_special(struct task_struct *t) | 219 | static void rcu_read_unlock_special(struct task_struct *t) |
164 | { | 220 | { |
165 | int empty; | 221 | int empty; |
222 | int empty_exp; | ||
166 | unsigned long flags; | 223 | unsigned long flags; |
167 | unsigned long mask; | ||
168 | struct rcu_node *rnp; | 224 | struct rcu_node *rnp; |
169 | int special; | 225 | int special; |
170 | 226 | ||
@@ -201,42 +257,36 @@ static void rcu_read_unlock_special(struct task_struct *t) | |||
201 | */ | 257 | */ |
202 | for (;;) { | 258 | for (;;) { |
203 | rnp = t->rcu_blocked_node; | 259 | rnp = t->rcu_blocked_node; |
204 | spin_lock(&rnp->lock); /* irqs already disabled. */ | 260 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ |
205 | if (rnp == t->rcu_blocked_node) | 261 | if (rnp == t->rcu_blocked_node) |
206 | break; | 262 | break; |
207 | spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 263 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
208 | } | 264 | } |
209 | empty = !rcu_preempted_readers(rnp); | 265 | empty = !rcu_preempted_readers(rnp); |
266 | empty_exp = !rcu_preempted_readers_exp(rnp); | ||
267 | smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ | ||
210 | list_del_init(&t->rcu_node_entry); | 268 | list_del_init(&t->rcu_node_entry); |
211 | t->rcu_blocked_node = NULL; | 269 | t->rcu_blocked_node = NULL; |
212 | 270 | ||
213 | /* | 271 | /* |
214 | * If this was the last task on the current list, and if | 272 | * If this was the last task on the current list, and if |
215 | * we aren't waiting on any CPUs, report the quiescent state. | 273 | * we aren't waiting on any CPUs, report the quiescent state. |
216 | * Note that both cpu_quiet_msk_finish() and cpu_quiet_msk() | 274 | * Note that rcu_report_unblock_qs_rnp() releases rnp->lock. |
217 | * drop rnp->lock and restore irq. | ||
218 | */ | 275 | */ |
219 | if (!empty && rnp->qsmask == 0 && | 276 | if (empty) |
220 | !rcu_preempted_readers(rnp)) { | 277 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
221 | struct rcu_node *rnp_p; | 278 | else |
222 | 279 | rcu_report_unblock_qs_rnp(rnp, flags); | |
223 | if (rnp->parent == NULL) { | 280 | |
224 | /* Only one rcu_node in the tree. */ | 281 | /* |
225 | cpu_quiet_msk_finish(&rcu_preempt_state, flags); | 282 | * If this was the last task on the expedited lists, |
226 | return; | 283 | * then we need to report up the rcu_node hierarchy. |
227 | } | 284 | */ |
228 | /* Report up the rest of the hierarchy. */ | 285 | if (!empty_exp && !rcu_preempted_readers_exp(rnp)) |
229 | mask = rnp->grpmask; | 286 | rcu_report_exp_rnp(&rcu_preempt_state, rnp); |
230 | spin_unlock_irqrestore(&rnp->lock, flags); | 287 | } else { |
231 | rnp_p = rnp->parent; | 288 | local_irq_restore(flags); |
232 | spin_lock_irqsave(&rnp_p->lock, flags); | ||
233 | WARN_ON_ONCE(rnp->qsmask); | ||
234 | cpu_quiet_msk(mask, &rcu_preempt_state, rnp_p, flags); | ||
235 | return; | ||
236 | } | ||
237 | spin_unlock(&rnp->lock); | ||
238 | } | 289 | } |
239 | local_irq_restore(flags); | ||
240 | } | 290 | } |
241 | 291 | ||
242 | /* | 292 | /* |
@@ -254,29 +304,73 @@ void __rcu_read_unlock(void) | |||
254 | if (--ACCESS_ONCE(t->rcu_read_lock_nesting) == 0 && | 304 | if (--ACCESS_ONCE(t->rcu_read_lock_nesting) == 0 && |
255 | unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) | 305 | unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) |
256 | rcu_read_unlock_special(t); | 306 | rcu_read_unlock_special(t); |
307 | #ifdef CONFIG_PROVE_LOCKING | ||
308 | WARN_ON_ONCE(ACCESS_ONCE(t->rcu_read_lock_nesting) < 0); | ||
309 | #endif /* #ifdef CONFIG_PROVE_LOCKING */ | ||
257 | } | 310 | } |
258 | EXPORT_SYMBOL_GPL(__rcu_read_unlock); | 311 | EXPORT_SYMBOL_GPL(__rcu_read_unlock); |
259 | 312 | ||
260 | #ifdef CONFIG_RCU_CPU_STALL_DETECTOR | 313 | #ifdef CONFIG_RCU_CPU_STALL_DETECTOR |
261 | 314 | ||
315 | #ifdef CONFIG_RCU_CPU_STALL_VERBOSE | ||
316 | |||
317 | /* | ||
318 | * Dump detailed information for all tasks blocking the current RCU | ||
319 | * grace period on the specified rcu_node structure. | ||
320 | */ | ||
321 | static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) | ||
322 | { | ||
323 | unsigned long flags; | ||
324 | struct list_head *lp; | ||
325 | int phase; | ||
326 | struct task_struct *t; | ||
327 | |||
328 | if (rcu_preempted_readers(rnp)) { | ||
329 | raw_spin_lock_irqsave(&rnp->lock, flags); | ||
330 | phase = rnp->gpnum & 0x1; | ||
331 | lp = &rnp->blocked_tasks[phase]; | ||
332 | list_for_each_entry(t, lp, rcu_node_entry) | ||
333 | sched_show_task(t); | ||
334 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
335 | } | ||
336 | } | ||
337 | |||
338 | /* | ||
339 | * Dump detailed information for all tasks blocking the current RCU | ||
340 | * grace period. | ||
341 | */ | ||
342 | static void rcu_print_detail_task_stall(struct rcu_state *rsp) | ||
343 | { | ||
344 | struct rcu_node *rnp = rcu_get_root(rsp); | ||
345 | |||
346 | rcu_print_detail_task_stall_rnp(rnp); | ||
347 | rcu_for_each_leaf_node(rsp, rnp) | ||
348 | rcu_print_detail_task_stall_rnp(rnp); | ||
349 | } | ||
350 | |||
351 | #else /* #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */ | ||
352 | |||
353 | static void rcu_print_detail_task_stall(struct rcu_state *rsp) | ||
354 | { | ||
355 | } | ||
356 | |||
357 | #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */ | ||
358 | |||
262 | /* | 359 | /* |
263 | * Scan the current list of tasks blocked within RCU read-side critical | 360 | * Scan the current list of tasks blocked within RCU read-side critical |
264 | * sections, printing out the tid of each. | 361 | * sections, printing out the tid of each. |
265 | */ | 362 | */ |
266 | static void rcu_print_task_stall(struct rcu_node *rnp) | 363 | static void rcu_print_task_stall(struct rcu_node *rnp) |
267 | { | 364 | { |
268 | unsigned long flags; | ||
269 | struct list_head *lp; | 365 | struct list_head *lp; |
270 | int phase; | 366 | int phase; |
271 | struct task_struct *t; | 367 | struct task_struct *t; |
272 | 368 | ||
273 | if (rcu_preempted_readers(rnp)) { | 369 | if (rcu_preempted_readers(rnp)) { |
274 | spin_lock_irqsave(&rnp->lock, flags); | ||
275 | phase = rnp->gpnum & 0x1; | 370 | phase = rnp->gpnum & 0x1; |
276 | lp = &rnp->blocked_tasks[phase]; | 371 | lp = &rnp->blocked_tasks[phase]; |
277 | list_for_each_entry(t, lp, rcu_node_entry) | 372 | list_for_each_entry(t, lp, rcu_node_entry) |
278 | printk(" P%d", t->pid); | 373 | printk(" P%d", t->pid); |
279 | spin_unlock_irqrestore(&rnp->lock, flags); | ||
280 | } | 374 | } |
281 | } | 375 | } |
282 | 376 | ||
@@ -303,6 +397,8 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) | |||
303 | * rcu_node. The reason for not just moving them to the immediate | 397 | * rcu_node. The reason for not just moving them to the immediate |
304 | * parent is to remove the need for rcu_read_unlock_special() to | 398 | * parent is to remove the need for rcu_read_unlock_special() to |
305 | * make more than two attempts to acquire the target rcu_node's lock. | 399 | * make more than two attempts to acquire the target rcu_node's lock. |
400 | * Returns true if there were tasks blocking the current RCU grace | ||
401 | * period. | ||
306 | * | 402 | * |
307 | * Returns 1 if there was previously a task blocking the current grace | 403 | * Returns 1 if there was previously a task blocking the current grace |
308 | * period on the specified rcu_node structure. | 404 | * period on the specified rcu_node structure. |
@@ -316,7 +412,7 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, | |||
316 | int i; | 412 | int i; |
317 | struct list_head *lp; | 413 | struct list_head *lp; |
318 | struct list_head *lp_root; | 414 | struct list_head *lp_root; |
319 | int retval = rcu_preempted_readers(rnp); | 415 | int retval = 0; |
320 | struct rcu_node *rnp_root = rcu_get_root(rsp); | 416 | struct rcu_node *rnp_root = rcu_get_root(rsp); |
321 | struct task_struct *tp; | 417 | struct task_struct *tp; |
322 | 418 | ||
@@ -326,7 +422,9 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, | |||
326 | } | 422 | } |
327 | WARN_ON_ONCE(rnp != rdp->mynode && | 423 | WARN_ON_ONCE(rnp != rdp->mynode && |
328 | (!list_empty(&rnp->blocked_tasks[0]) || | 424 | (!list_empty(&rnp->blocked_tasks[0]) || |
329 | !list_empty(&rnp->blocked_tasks[1]))); | 425 | !list_empty(&rnp->blocked_tasks[1]) || |
426 | !list_empty(&rnp->blocked_tasks[2]) || | ||
427 | !list_empty(&rnp->blocked_tasks[3]))); | ||
330 | 428 | ||
331 | /* | 429 | /* |
332 | * Move tasks up to root rcu_node. Rely on the fact that the | 430 | * Move tasks up to root rcu_node. Rely on the fact that the |
@@ -334,19 +432,22 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, | |||
334 | * rcu_nodes in terms of gp_num value. This fact allows us to | 432 | * rcu_nodes in terms of gp_num value. This fact allows us to |
335 | * move the blocked_tasks[] array directly, element by element. | 433 | * move the blocked_tasks[] array directly, element by element. |
336 | */ | 434 | */ |
337 | for (i = 0; i < 2; i++) { | 435 | if (rcu_preempted_readers(rnp)) |
436 | retval |= RCU_OFL_TASKS_NORM_GP; | ||
437 | if (rcu_preempted_readers_exp(rnp)) | ||
438 | retval |= RCU_OFL_TASKS_EXP_GP; | ||
439 | for (i = 0; i < 4; i++) { | ||
338 | lp = &rnp->blocked_tasks[i]; | 440 | lp = &rnp->blocked_tasks[i]; |
339 | lp_root = &rnp_root->blocked_tasks[i]; | 441 | lp_root = &rnp_root->blocked_tasks[i]; |
340 | while (!list_empty(lp)) { | 442 | while (!list_empty(lp)) { |
341 | tp = list_entry(lp->next, typeof(*tp), rcu_node_entry); | 443 | tp = list_entry(lp->next, typeof(*tp), rcu_node_entry); |
342 | spin_lock(&rnp_root->lock); /* irqs already disabled */ | 444 | raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ |
343 | list_del(&tp->rcu_node_entry); | 445 | list_del(&tp->rcu_node_entry); |
344 | tp->rcu_blocked_node = rnp_root; | 446 | tp->rcu_blocked_node = rnp_root; |
345 | list_add(&tp->rcu_node_entry, lp_root); | 447 | list_add(&tp->rcu_node_entry, lp_root); |
346 | spin_unlock(&rnp_root->lock); /* irqs remain disabled */ | 448 | raw_spin_unlock(&rnp_root->lock); /* irqs remain disabled */ |
347 | } | 449 | } |
348 | } | 450 | } |
349 | |||
350 | return retval; | 451 | return retval; |
351 | } | 452 | } |
352 | 453 | ||
@@ -398,14 +499,183 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | |||
398 | } | 499 | } |
399 | EXPORT_SYMBOL_GPL(call_rcu); | 500 | EXPORT_SYMBOL_GPL(call_rcu); |
400 | 501 | ||
502 | /** | ||
503 | * synchronize_rcu - wait until a grace period has elapsed. | ||
504 | * | ||
505 | * Control will return to the caller some time after a full grace | ||
506 | * period has elapsed, in other words after all currently executing RCU | ||
507 | * read-side critical sections have completed. RCU read-side critical | ||
508 | * sections are delimited by rcu_read_lock() and rcu_read_unlock(), | ||
509 | * and may be nested. | ||
510 | */ | ||
511 | void synchronize_rcu(void) | ||
512 | { | ||
513 | struct rcu_synchronize rcu; | ||
514 | |||
515 | if (!rcu_scheduler_active) | ||
516 | return; | ||
517 | |||
518 | init_completion(&rcu.completion); | ||
519 | /* Will wake me after RCU finished. */ | ||
520 | call_rcu(&rcu.head, wakeme_after_rcu); | ||
521 | /* Wait for it. */ | ||
522 | wait_for_completion(&rcu.completion); | ||
523 | } | ||
524 | EXPORT_SYMBOL_GPL(synchronize_rcu); | ||
525 | |||
526 | static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq); | ||
527 | static long sync_rcu_preempt_exp_count; | ||
528 | static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex); | ||
529 | |||
530 | /* | ||
531 | * Return non-zero if there are any tasks in RCU read-side critical | ||
532 | * sections blocking the current preemptible-RCU expedited grace period. | ||
533 | * If there is no preemptible-RCU expedited grace period currently in | ||
534 | * progress, returns zero unconditionally. | ||
535 | */ | ||
536 | static int rcu_preempted_readers_exp(struct rcu_node *rnp) | ||
537 | { | ||
538 | return !list_empty(&rnp->blocked_tasks[2]) || | ||
539 | !list_empty(&rnp->blocked_tasks[3]); | ||
540 | } | ||
541 | |||
542 | /* | ||
543 | * return non-zero if there is no RCU expedited grace period in progress | ||
544 | * for the specified rcu_node structure, in other words, if all CPUs and | ||
545 | * tasks covered by the specified rcu_node structure have done their bit | ||
546 | * for the current expedited grace period. Works only for preemptible | ||
547 | * RCU -- other RCU implementation use other means. | ||
548 | * | ||
549 | * Caller must hold sync_rcu_preempt_exp_mutex. | ||
550 | */ | ||
551 | static int sync_rcu_preempt_exp_done(struct rcu_node *rnp) | ||
552 | { | ||
553 | return !rcu_preempted_readers_exp(rnp) && | ||
554 | ACCESS_ONCE(rnp->expmask) == 0; | ||
555 | } | ||
556 | |||
401 | /* | 557 | /* |
402 | * Wait for an rcu-preempt grace period. We are supposed to expedite the | 558 | * Report the exit from RCU read-side critical section for the last task |
403 | * grace period, but this is the crude slow compatability hack, so just | 559 | * that queued itself during or before the current expedited preemptible-RCU |
404 | * invoke synchronize_rcu(). | 560 | * grace period. This event is reported either to the rcu_node structure on |
561 | * which the task was queued or to one of that rcu_node structure's ancestors, | ||
562 | * recursively up the tree. (Calm down, calm down, we do the recursion | ||
563 | * iteratively!) | ||
564 | * | ||
565 | * Caller must hold sync_rcu_preempt_exp_mutex. | ||
566 | */ | ||
567 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) | ||
568 | { | ||
569 | unsigned long flags; | ||
570 | unsigned long mask; | ||
571 | |||
572 | raw_spin_lock_irqsave(&rnp->lock, flags); | ||
573 | for (;;) { | ||
574 | if (!sync_rcu_preempt_exp_done(rnp)) | ||
575 | break; | ||
576 | if (rnp->parent == NULL) { | ||
577 | wake_up(&sync_rcu_preempt_exp_wq); | ||
578 | break; | ||
579 | } | ||
580 | mask = rnp->grpmask; | ||
581 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ | ||
582 | rnp = rnp->parent; | ||
583 | raw_spin_lock(&rnp->lock); /* irqs already disabled */ | ||
584 | rnp->expmask &= ~mask; | ||
585 | } | ||
586 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
587 | } | ||
588 | |||
589 | /* | ||
590 | * Snapshot the tasks blocking the newly started preemptible-RCU expedited | ||
591 | * grace period for the specified rcu_node structure. If there are no such | ||
592 | * tasks, report it up the rcu_node hierarchy. | ||
593 | * | ||
594 | * Caller must hold sync_rcu_preempt_exp_mutex and rsp->onofflock. | ||
595 | */ | ||
596 | static void | ||
597 | sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) | ||
598 | { | ||
599 | int must_wait; | ||
600 | |||
601 | raw_spin_lock(&rnp->lock); /* irqs already disabled */ | ||
602 | list_splice_init(&rnp->blocked_tasks[0], &rnp->blocked_tasks[2]); | ||
603 | list_splice_init(&rnp->blocked_tasks[1], &rnp->blocked_tasks[3]); | ||
604 | must_wait = rcu_preempted_readers_exp(rnp); | ||
605 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ | ||
606 | if (!must_wait) | ||
607 | rcu_report_exp_rnp(rsp, rnp); | ||
608 | } | ||
609 | |||
610 | /* | ||
611 | * Wait for an rcu-preempt grace period, but expedite it. The basic idea | ||
612 | * is to invoke synchronize_sched_expedited() to push all the tasks to | ||
613 | * the ->blocked_tasks[] lists, move all entries from the first set of | ||
614 | * ->blocked_tasks[] lists to the second set, and finally wait for this | ||
615 | * second set to drain. | ||
405 | */ | 616 | */ |
406 | void synchronize_rcu_expedited(void) | 617 | void synchronize_rcu_expedited(void) |
407 | { | 618 | { |
408 | synchronize_rcu(); | 619 | unsigned long flags; |
620 | struct rcu_node *rnp; | ||
621 | struct rcu_state *rsp = &rcu_preempt_state; | ||
622 | long snap; | ||
623 | int trycount = 0; | ||
624 | |||
625 | smp_mb(); /* Caller's modifications seen first by other CPUs. */ | ||
626 | snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1; | ||
627 | smp_mb(); /* Above access cannot bleed into critical section. */ | ||
628 | |||
629 | /* | ||
630 | * Acquire lock, falling back to synchronize_rcu() if too many | ||
631 | * lock-acquisition failures. Of course, if someone does the | ||
632 | * expedited grace period for us, just leave. | ||
633 | */ | ||
634 | while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) { | ||
635 | if (trycount++ < 10) | ||
636 | udelay(trycount * num_online_cpus()); | ||
637 | else { | ||
638 | synchronize_rcu(); | ||
639 | return; | ||
640 | } | ||
641 | if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) | ||
642 | goto mb_ret; /* Others did our work for us. */ | ||
643 | } | ||
644 | if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) | ||
645 | goto unlock_mb_ret; /* Others did our work for us. */ | ||
646 | |||
647 | /* force all RCU readers onto blocked_tasks[]. */ | ||
648 | synchronize_sched_expedited(); | ||
649 | |||
650 | raw_spin_lock_irqsave(&rsp->onofflock, flags); | ||
651 | |||
652 | /* Initialize ->expmask for all non-leaf rcu_node structures. */ | ||
653 | rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) { | ||
654 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | ||
655 | rnp->expmask = rnp->qsmaskinit; | ||
656 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | ||
657 | } | ||
658 | |||
659 | /* Snapshot current state of ->blocked_tasks[] lists. */ | ||
660 | rcu_for_each_leaf_node(rsp, rnp) | ||
661 | sync_rcu_preempt_exp_init(rsp, rnp); | ||
662 | if (NUM_RCU_NODES > 1) | ||
663 | sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp)); | ||
664 | |||
665 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); | ||
666 | |||
667 | /* Wait for snapshotted ->blocked_tasks[] lists to drain. */ | ||
668 | rnp = rcu_get_root(rsp); | ||
669 | wait_event(sync_rcu_preempt_exp_wq, | ||
670 | sync_rcu_preempt_exp_done(rnp)); | ||
671 | |||
672 | /* Clean up and exit. */ | ||
673 | smp_mb(); /* ensure expedited GP seen before counter increment. */ | ||
674 | ACCESS_ONCE(sync_rcu_preempt_exp_count)++; | ||
675 | unlock_mb_ret: | ||
676 | mutex_unlock(&sync_rcu_preempt_exp_mutex); | ||
677 | mb_ret: | ||
678 | smp_mb(); /* ensure subsequent action seen after grace period. */ | ||
409 | } | 679 | } |
410 | EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); | 680 | EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); |
411 | 681 | ||
@@ -481,7 +751,7 @@ void exit_rcu(void) | |||
481 | /* | 751 | /* |
482 | * Tell them what RCU they are running. | 752 | * Tell them what RCU they are running. |
483 | */ | 753 | */ |
484 | static inline void rcu_bootup_announce(void) | 754 | static void __init rcu_bootup_announce(void) |
485 | { | 755 | { |
486 | printk(KERN_INFO "Hierarchical RCU implementation.\n"); | 756 | printk(KERN_INFO "Hierarchical RCU implementation.\n"); |
487 | } | 757 | } |
@@ -496,6 +766,16 @@ long rcu_batches_completed(void) | |||
496 | EXPORT_SYMBOL_GPL(rcu_batches_completed); | 766 | EXPORT_SYMBOL_GPL(rcu_batches_completed); |
497 | 767 | ||
498 | /* | 768 | /* |
769 | * Force a quiescent state for RCU, which, because there is no preemptible | ||
770 | * RCU, becomes the same as rcu-sched. | ||
771 | */ | ||
772 | void rcu_force_quiescent_state(void) | ||
773 | { | ||
774 | rcu_sched_force_quiescent_state(); | ||
775 | } | ||
776 | EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); | ||
777 | |||
778 | /* | ||
499 | * Because preemptable RCU does not exist, we never have to check for | 779 | * Because preemptable RCU does not exist, we never have to check for |
500 | * CPUs being in quiescent states. | 780 | * CPUs being in quiescent states. |
501 | */ | 781 | */ |
@@ -512,12 +792,30 @@ static int rcu_preempted_readers(struct rcu_node *rnp) | |||
512 | return 0; | 792 | return 0; |
513 | } | 793 | } |
514 | 794 | ||
795 | #ifdef CONFIG_HOTPLUG_CPU | ||
796 | |||
797 | /* Because preemptible RCU does not exist, no quieting of tasks. */ | ||
798 | static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) | ||
799 | { | ||
800 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
801 | } | ||
802 | |||
803 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
804 | |||
515 | #ifdef CONFIG_RCU_CPU_STALL_DETECTOR | 805 | #ifdef CONFIG_RCU_CPU_STALL_DETECTOR |
516 | 806 | ||
517 | /* | 807 | /* |
518 | * Because preemptable RCU does not exist, we never have to check for | 808 | * Because preemptable RCU does not exist, we never have to check for |
519 | * tasks blocked within RCU read-side critical sections. | 809 | * tasks blocked within RCU read-side critical sections. |
520 | */ | 810 | */ |
811 | static void rcu_print_detail_task_stall(struct rcu_state *rsp) | ||
812 | { | ||
813 | } | ||
814 | |||
815 | /* | ||
816 | * Because preemptable RCU does not exist, we never have to check for | ||
817 | * tasks blocked within RCU read-side critical sections. | ||
818 | */ | ||
521 | static void rcu_print_task_stall(struct rcu_node *rnp) | 819 | static void rcu_print_task_stall(struct rcu_node *rnp) |
522 | { | 820 | { |
523 | } | 821 | } |
@@ -594,6 +892,20 @@ void synchronize_rcu_expedited(void) | |||
594 | } | 892 | } |
595 | EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); | 893 | EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); |
596 | 894 | ||
895 | #ifdef CONFIG_HOTPLUG_CPU | ||
896 | |||
897 | /* | ||
898 | * Because preemptable RCU does not exist, there is never any need to | ||
899 | * report on tasks preempted in RCU read-side critical sections during | ||
900 | * expedited RCU grace periods. | ||
901 | */ | ||
902 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) | ||
903 | { | ||
904 | return; | ||
905 | } | ||
906 | |||
907 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
908 | |||
597 | /* | 909 | /* |
598 | * Because preemptable RCU does not exist, it never has any work to do. | 910 | * Because preemptable RCU does not exist, it never has any work to do. |
599 | */ | 911 | */ |
@@ -643,3 +955,115 @@ static void __init __rcu_init_preempt(void) | |||
643 | } | 955 | } |
644 | 956 | ||
645 | #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ | 957 | #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ |
958 | |||
959 | #if !defined(CONFIG_RCU_FAST_NO_HZ) | ||
960 | |||
961 | /* | ||
962 | * Check to see if any future RCU-related work will need to be done | ||
963 | * by the current CPU, even if none need be done immediately, returning | ||
964 | * 1 if so. This function is part of the RCU implementation; it is -not- | ||
965 | * an exported member of the RCU API. | ||
966 | * | ||
967 | * Because we have preemptible RCU, just check whether this CPU needs | ||
968 | * any flavor of RCU. Do not chew up lots of CPU cycles with preemption | ||
969 | * disabled in a most-likely vain attempt to cause RCU not to need this CPU. | ||
970 | */ | ||
971 | int rcu_needs_cpu(int cpu) | ||
972 | { | ||
973 | return rcu_needs_cpu_quick_check(cpu); | ||
974 | } | ||
975 | |||
976 | /* | ||
977 | * Check to see if we need to continue a callback-flush operations to | ||
978 | * allow the last CPU to enter dyntick-idle mode. But fast dyntick-idle | ||
979 | * entry is not configured, so we never do need to. | ||
980 | */ | ||
981 | static void rcu_needs_cpu_flush(void) | ||
982 | { | ||
983 | } | ||
984 | |||
985 | #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ | ||
986 | |||
987 | #define RCU_NEEDS_CPU_FLUSHES 5 | ||
988 | static DEFINE_PER_CPU(int, rcu_dyntick_drain); | ||
989 | static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff); | ||
990 | |||
991 | /* | ||
992 | * Check to see if any future RCU-related work will need to be done | ||
993 | * by the current CPU, even if none need be done immediately, returning | ||
994 | * 1 if so. This function is part of the RCU implementation; it is -not- | ||
995 | * an exported member of the RCU API. | ||
996 | * | ||
997 | * Because we are not supporting preemptible RCU, attempt to accelerate | ||
998 | * any current grace periods so that RCU no longer needs this CPU, but | ||
999 | * only if all other CPUs are already in dynticks-idle mode. This will | ||
1000 | * allow the CPU cores to be powered down immediately, as opposed to after | ||
1001 | * waiting many milliseconds for grace periods to elapse. | ||
1002 | * | ||
1003 | * Because it is not legal to invoke rcu_process_callbacks() with irqs | ||
1004 | * disabled, we do one pass of force_quiescent_state(), then do a | ||
1005 | * raise_softirq() to cause rcu_process_callbacks() to be invoked later. | ||
1006 | * The per-cpu rcu_dyntick_drain variable controls the sequencing. | ||
1007 | */ | ||
1008 | int rcu_needs_cpu(int cpu) | ||
1009 | { | ||
1010 | int c = 0; | ||
1011 | int thatcpu; | ||
1012 | |||
1013 | /* Check for being in the holdoff period. */ | ||
1014 | if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies) | ||
1015 | return rcu_needs_cpu_quick_check(cpu); | ||
1016 | |||
1017 | /* Don't bother unless we are the last non-dyntick-idle CPU. */ | ||
1018 | for_each_cpu_not(thatcpu, nohz_cpu_mask) | ||
1019 | if (thatcpu != cpu) { | ||
1020 | per_cpu(rcu_dyntick_drain, cpu) = 0; | ||
1021 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; | ||
1022 | return rcu_needs_cpu_quick_check(cpu); | ||
1023 | } | ||
1024 | |||
1025 | /* Check and update the rcu_dyntick_drain sequencing. */ | ||
1026 | if (per_cpu(rcu_dyntick_drain, cpu) <= 0) { | ||
1027 | /* First time through, initialize the counter. */ | ||
1028 | per_cpu(rcu_dyntick_drain, cpu) = RCU_NEEDS_CPU_FLUSHES; | ||
1029 | } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) { | ||
1030 | /* We have hit the limit, so time to give up. */ | ||
1031 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; | ||
1032 | return rcu_needs_cpu_quick_check(cpu); | ||
1033 | } | ||
1034 | |||
1035 | /* Do one step pushing remaining RCU callbacks through. */ | ||
1036 | if (per_cpu(rcu_sched_data, cpu).nxtlist) { | ||
1037 | rcu_sched_qs(cpu); | ||
1038 | force_quiescent_state(&rcu_sched_state, 0); | ||
1039 | c = c || per_cpu(rcu_sched_data, cpu).nxtlist; | ||
1040 | } | ||
1041 | if (per_cpu(rcu_bh_data, cpu).nxtlist) { | ||
1042 | rcu_bh_qs(cpu); | ||
1043 | force_quiescent_state(&rcu_bh_state, 0); | ||
1044 | c = c || per_cpu(rcu_bh_data, cpu).nxtlist; | ||
1045 | } | ||
1046 | |||
1047 | /* If RCU callbacks are still pending, RCU still needs this CPU. */ | ||
1048 | if (c) | ||
1049 | raise_softirq(RCU_SOFTIRQ); | ||
1050 | return c; | ||
1051 | } | ||
1052 | |||
1053 | /* | ||
1054 | * Check to see if we need to continue a callback-flush operations to | ||
1055 | * allow the last CPU to enter dyntick-idle mode. | ||
1056 | */ | ||
1057 | static void rcu_needs_cpu_flush(void) | ||
1058 | { | ||
1059 | int cpu = smp_processor_id(); | ||
1060 | unsigned long flags; | ||
1061 | |||
1062 | if (per_cpu(rcu_dyntick_drain, cpu) <= 0) | ||
1063 | return; | ||
1064 | local_irq_save(flags); | ||
1065 | (void)rcu_needs_cpu(cpu); | ||
1066 | local_irq_restore(flags); | ||
1067 | } | ||
1068 | |||
1069 | #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ | ||