diff options
author | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2009-12-02 15:10:15 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-12-03 05:35:25 -0500 |
commit | d9a3da0699b24a589b27a61e1a5b5bd30d9db669 (patch) | |
tree | f7440e396a6c818f3cef514ccc31ab55d88025ef /kernel/rcutree_plugin.h | |
parent | cf244dc01bf68e1ad338b82447f8686d24ea4435 (diff) |
rcu: Add expedited grace-period support for preemptible RCU
Implement an synchronize_rcu_expedited() for preemptible RCU
that actually is expedited. This uses
synchronize_sched_expedited() to force all threads currently
running in a preemptible-RCU read-side critical section onto the
appropriate ->blocked_tasks[] list, then takes a snapshot of all
of these lists and waits for them to drain.
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <1259784616158-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/rcutree_plugin.h')
-rw-r--r-- | kernel/rcutree_plugin.h | 198 |
1 files changed, 189 insertions, 9 deletions
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index c9f0c975c003..37fbccdf41d5 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
@@ -24,12 +24,15 @@ | |||
24 | * Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 24 | * Paul E. McKenney <paulmck@linux.vnet.ibm.com> |
25 | */ | 25 | */ |
26 | 26 | ||
27 | #include <linux/delay.h> | ||
27 | 28 | ||
28 | #ifdef CONFIG_TREE_PREEMPT_RCU | 29 | #ifdef CONFIG_TREE_PREEMPT_RCU |
29 | 30 | ||
30 | struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state); | 31 | struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state); |
31 | DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); | 32 | DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); |
32 | 33 | ||
34 | static int rcu_preempted_readers_exp(struct rcu_node *rnp); | ||
35 | |||
33 | /* | 36 | /* |
34 | * Tell them what RCU they are running. | 37 | * Tell them what RCU they are running. |
35 | */ | 38 | */ |
@@ -157,7 +160,10 @@ EXPORT_SYMBOL_GPL(__rcu_read_lock); | |||
157 | */ | 160 | */ |
158 | static int rcu_preempted_readers(struct rcu_node *rnp) | 161 | static int rcu_preempted_readers(struct rcu_node *rnp) |
159 | { | 162 | { |
160 | return !list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]); | 163 | int phase = rnp->gpnum & 0x1; |
164 | |||
165 | return !list_empty(&rnp->blocked_tasks[phase]) || | ||
166 | !list_empty(&rnp->blocked_tasks[phase + 2]); | ||
161 | } | 167 | } |
162 | 168 | ||
163 | /* | 169 | /* |
@@ -204,6 +210,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) | |||
204 | static void rcu_read_unlock_special(struct task_struct *t) | 210 | static void rcu_read_unlock_special(struct task_struct *t) |
205 | { | 211 | { |
206 | int empty; | 212 | int empty; |
213 | int empty_exp; | ||
207 | unsigned long flags; | 214 | unsigned long flags; |
208 | struct rcu_node *rnp; | 215 | struct rcu_node *rnp; |
209 | int special; | 216 | int special; |
@@ -247,6 +254,8 @@ static void rcu_read_unlock_special(struct task_struct *t) | |||
247 | spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 254 | spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
248 | } | 255 | } |
249 | empty = !rcu_preempted_readers(rnp); | 256 | empty = !rcu_preempted_readers(rnp); |
257 | empty_exp = !rcu_preempted_readers_exp(rnp); | ||
258 | smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ | ||
250 | list_del_init(&t->rcu_node_entry); | 259 | list_del_init(&t->rcu_node_entry); |
251 | t->rcu_blocked_node = NULL; | 260 | t->rcu_blocked_node = NULL; |
252 | 261 | ||
@@ -259,6 +268,13 @@ static void rcu_read_unlock_special(struct task_struct *t) | |||
259 | spin_unlock_irqrestore(&rnp->lock, flags); | 268 | spin_unlock_irqrestore(&rnp->lock, flags); |
260 | else | 269 | else |
261 | rcu_report_unblock_qs_rnp(rnp, flags); | 270 | rcu_report_unblock_qs_rnp(rnp, flags); |
271 | |||
272 | /* | ||
273 | * If this was the last task on the expedited lists, | ||
274 | * then we need to report up the rcu_node hierarchy. | ||
275 | */ | ||
276 | if (!empty_exp && !rcu_preempted_readers_exp(rnp)) | ||
277 | rcu_report_exp_rnp(&rcu_preempt_state, rnp); | ||
262 | } else { | 278 | } else { |
263 | local_irq_restore(flags); | 279 | local_irq_restore(flags); |
264 | } | 280 | } |
@@ -343,7 +359,7 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, | |||
343 | int i; | 359 | int i; |
344 | struct list_head *lp; | 360 | struct list_head *lp; |
345 | struct list_head *lp_root; | 361 | struct list_head *lp_root; |
346 | int retval; | 362 | int retval = 0; |
347 | struct rcu_node *rnp_root = rcu_get_root(rsp); | 363 | struct rcu_node *rnp_root = rcu_get_root(rsp); |
348 | struct task_struct *tp; | 364 | struct task_struct *tp; |
349 | 365 | ||
@@ -353,7 +369,9 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, | |||
353 | } | 369 | } |
354 | WARN_ON_ONCE(rnp != rdp->mynode && | 370 | WARN_ON_ONCE(rnp != rdp->mynode && |
355 | (!list_empty(&rnp->blocked_tasks[0]) || | 371 | (!list_empty(&rnp->blocked_tasks[0]) || |
356 | !list_empty(&rnp->blocked_tasks[1]))); | 372 | !list_empty(&rnp->blocked_tasks[1]) || |
373 | !list_empty(&rnp->blocked_tasks[2]) || | ||
374 | !list_empty(&rnp->blocked_tasks[3]))); | ||
357 | 375 | ||
358 | /* | 376 | /* |
359 | * Move tasks up to root rcu_node. Rely on the fact that the | 377 | * Move tasks up to root rcu_node. Rely on the fact that the |
@@ -361,8 +379,11 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, | |||
361 | * rcu_nodes in terms of gp_num value. This fact allows us to | 379 | * rcu_nodes in terms of gp_num value. This fact allows us to |
362 | * move the blocked_tasks[] array directly, element by element. | 380 | * move the blocked_tasks[] array directly, element by element. |
363 | */ | 381 | */ |
364 | retval = rcu_preempted_readers(rnp); | 382 | if (rcu_preempted_readers(rnp)) |
365 | for (i = 0; i < 2; i++) { | 383 | retval |= RCU_OFL_TASKS_NORM_GP; |
384 | if (rcu_preempted_readers_exp(rnp)) | ||
385 | retval |= RCU_OFL_TASKS_EXP_GP; | ||
386 | for (i = 0; i < 4; i++) { | ||
366 | lp = &rnp->blocked_tasks[i]; | 387 | lp = &rnp->blocked_tasks[i]; |
367 | lp_root = &rnp_root->blocked_tasks[i]; | 388 | lp_root = &rnp_root->blocked_tasks[i]; |
368 | while (!list_empty(lp)) { | 389 | while (!list_empty(lp)) { |
@@ -449,14 +470,159 @@ void synchronize_rcu(void) | |||
449 | } | 470 | } |
450 | EXPORT_SYMBOL_GPL(synchronize_rcu); | 471 | EXPORT_SYMBOL_GPL(synchronize_rcu); |
451 | 472 | ||
473 | static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq); | ||
474 | static long sync_rcu_preempt_exp_count; | ||
475 | static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex); | ||
476 | |||
477 | /* | ||
478 | * Return non-zero if there are any tasks in RCU read-side critical | ||
479 | * sections blocking the current preemptible-RCU expedited grace period. | ||
480 | * If there is no preemptible-RCU expedited grace period currently in | ||
481 | * progress, returns zero unconditionally. | ||
482 | */ | ||
483 | static int rcu_preempted_readers_exp(struct rcu_node *rnp) | ||
484 | { | ||
485 | return !list_empty(&rnp->blocked_tasks[2]) || | ||
486 | !list_empty(&rnp->blocked_tasks[3]); | ||
487 | } | ||
488 | |||
489 | /* | ||
490 | * return non-zero if there is no RCU expedited grace period in progress | ||
491 | * for the specified rcu_node structure, in other words, if all CPUs and | ||
492 | * tasks covered by the specified rcu_node structure have done their bit | ||
493 | * for the current expedited grace period. Works only for preemptible | ||
494 | * RCU -- other RCU implementation use other means. | ||
495 | * | ||
496 | * Caller must hold sync_rcu_preempt_exp_mutex. | ||
497 | */ | ||
498 | static int sync_rcu_preempt_exp_done(struct rcu_node *rnp) | ||
499 | { | ||
500 | return !rcu_preempted_readers_exp(rnp) && | ||
501 | ACCESS_ONCE(rnp->expmask) == 0; | ||
502 | } | ||
503 | |||
504 | /* | ||
505 | * Report the exit from RCU read-side critical section for the last task | ||
506 | * that queued itself during or before the current expedited preemptible-RCU | ||
507 | * grace period. This event is reported either to the rcu_node structure on | ||
508 | * which the task was queued or to one of that rcu_node structure's ancestors, | ||
509 | * recursively up the tree. (Calm down, calm down, we do the recursion | ||
510 | * iteratively!) | ||
511 | * | ||
512 | * Caller must hold sync_rcu_preempt_exp_mutex. | ||
513 | */ | ||
514 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) | ||
515 | { | ||
516 | unsigned long flags; | ||
517 | unsigned long mask; | ||
518 | |||
519 | spin_lock_irqsave(&rnp->lock, flags); | ||
520 | for (;;) { | ||
521 | if (!sync_rcu_preempt_exp_done(rnp)) | ||
522 | break; | ||
523 | if (rnp->parent == NULL) { | ||
524 | wake_up(&sync_rcu_preempt_exp_wq); | ||
525 | break; | ||
526 | } | ||
527 | mask = rnp->grpmask; | ||
528 | spin_unlock(&rnp->lock); /* irqs remain disabled */ | ||
529 | rnp = rnp->parent; | ||
530 | spin_lock(&rnp->lock); /* irqs already disabled */ | ||
531 | rnp->expmask &= ~mask; | ||
532 | } | ||
533 | spin_unlock_irqrestore(&rnp->lock, flags); | ||
534 | } | ||
535 | |||
536 | /* | ||
537 | * Snapshot the tasks blocking the newly started preemptible-RCU expedited | ||
538 | * grace period for the specified rcu_node structure. If there are no such | ||
539 | * tasks, report it up the rcu_node hierarchy. | ||
540 | * | ||
541 | * Caller must hold sync_rcu_preempt_exp_mutex and rsp->onofflock. | ||
542 | */ | ||
543 | static void | ||
544 | sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) | ||
545 | { | ||
546 | int must_wait; | ||
547 | |||
548 | spin_lock(&rnp->lock); /* irqs already disabled */ | ||
549 | list_splice_init(&rnp->blocked_tasks[0], &rnp->blocked_tasks[2]); | ||
550 | list_splice_init(&rnp->blocked_tasks[1], &rnp->blocked_tasks[3]); | ||
551 | must_wait = rcu_preempted_readers_exp(rnp); | ||
552 | spin_unlock(&rnp->lock); /* irqs remain disabled */ | ||
553 | if (!must_wait) | ||
554 | rcu_report_exp_rnp(rsp, rnp); | ||
555 | } | ||
556 | |||
452 | /* | 557 | /* |
453 | * Wait for an rcu-preempt grace period. We are supposed to expedite the | 558 | * Wait for an rcu-preempt grace period, but expedite it. The basic idea |
454 | * grace period, but this is the crude slow compatability hack, so just | 559 | * is to invoke synchronize_sched_expedited() to push all the tasks to |
455 | * invoke synchronize_rcu(). | 560 | * the ->blocked_tasks[] lists, move all entries from the first set of |
561 | * ->blocked_tasks[] lists to the second set, and finally wait for this | ||
562 | * second set to drain. | ||
456 | */ | 563 | */ |
457 | void synchronize_rcu_expedited(void) | 564 | void synchronize_rcu_expedited(void) |
458 | { | 565 | { |
459 | synchronize_rcu(); | 566 | unsigned long flags; |
567 | struct rcu_node *rnp; | ||
568 | struct rcu_state *rsp = &rcu_preempt_state; | ||
569 | long snap; | ||
570 | int trycount = 0; | ||
571 | |||
572 | smp_mb(); /* Caller's modifications seen first by other CPUs. */ | ||
573 | snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1; | ||
574 | smp_mb(); /* Above access cannot bleed into critical section. */ | ||
575 | |||
576 | /* | ||
577 | * Acquire lock, falling back to synchronize_rcu() if too many | ||
578 | * lock-acquisition failures. Of course, if someone does the | ||
579 | * expedited grace period for us, just leave. | ||
580 | */ | ||
581 | while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) { | ||
582 | if (trycount++ < 10) | ||
583 | udelay(trycount * num_online_cpus()); | ||
584 | else { | ||
585 | synchronize_rcu(); | ||
586 | return; | ||
587 | } | ||
588 | if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) | ||
589 | goto mb_ret; /* Others did our work for us. */ | ||
590 | } | ||
591 | if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) | ||
592 | goto unlock_mb_ret; /* Others did our work for us. */ | ||
593 | |||
594 | /* force all RCU readers onto blocked_tasks[]. */ | ||
595 | synchronize_sched_expedited(); | ||
596 | |||
597 | spin_lock_irqsave(&rsp->onofflock, flags); | ||
598 | |||
599 | /* Initialize ->expmask for all non-leaf rcu_node structures. */ | ||
600 | rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) { | ||
601 | spin_lock(&rnp->lock); /* irqs already disabled. */ | ||
602 | rnp->expmask = rnp->qsmaskinit; | ||
603 | spin_unlock(&rnp->lock); /* irqs remain disabled. */ | ||
604 | } | ||
605 | |||
606 | /* Snapshot current state of ->blocked_tasks[] lists. */ | ||
607 | rcu_for_each_leaf_node(rsp, rnp) | ||
608 | sync_rcu_preempt_exp_init(rsp, rnp); | ||
609 | if (NUM_RCU_NODES > 1) | ||
610 | sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp)); | ||
611 | |||
612 | spin_unlock_irqrestore(&rsp->onofflock, flags); | ||
613 | |||
614 | /* Wait for snapshotted ->blocked_tasks[] lists to drain. */ | ||
615 | rnp = rcu_get_root(rsp); | ||
616 | wait_event(sync_rcu_preempt_exp_wq, | ||
617 | sync_rcu_preempt_exp_done(rnp)); | ||
618 | |||
619 | /* Clean up and exit. */ | ||
620 | smp_mb(); /* ensure expedited GP seen before counter increment. */ | ||
621 | ACCESS_ONCE(sync_rcu_preempt_exp_count)++; | ||
622 | unlock_mb_ret: | ||
623 | mutex_unlock(&sync_rcu_preempt_exp_mutex); | ||
624 | mb_ret: | ||
625 | smp_mb(); /* ensure subsequent action seen after grace period. */ | ||
460 | } | 626 | } |
461 | EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); | 627 | EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); |
462 | 628 | ||
@@ -655,6 +821,20 @@ void synchronize_rcu_expedited(void) | |||
655 | } | 821 | } |
656 | EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); | 822 | EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); |
657 | 823 | ||
824 | #ifdef CONFIG_HOTPLUG_CPU | ||
825 | |||
826 | /* | ||
827 | * Because preemptable RCU does not exist, there is never any need to | ||
828 | * report on tasks preempted in RCU read-side critical sections during | ||
829 | * expedited RCU grace periods. | ||
830 | */ | ||
831 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) | ||
832 | { | ||
833 | return; | ||
834 | } | ||
835 | |||
836 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
837 | |||
658 | /* | 838 | /* |
659 | * Because preemptable RCU does not exist, it never has any work to do. | 839 | * Because preemptable RCU does not exist, it never has any work to do. |
660 | */ | 840 | */ |