diff options
| author | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2015-02-13 13:52:28 -0500 |
|---|---|---|
| committer | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2015-03-12 18:18:42 -0400 |
| commit | 8eb74b2b291e7bf6aa59fcb4e59f236382f00bf5 (patch) | |
| tree | 07abd44430c52bfe2f76ba62188b7958c3910c35 /kernel/rcu | |
| parent | 999c286347538388170f919146d7cfa58689472e (diff) | |
rcu: Rework preemptible expedited bitmask handling
Currently, the rcu_node tree ->expmask bitmasks are initially set to
reflect the online CPUs. This is pointless, because only the CPUs
preempted within RCU read-side critical sections by the preceding
synchronize_sched_expedited() need to be tracked. This commit therefore
instead sets up these bitmasks based on the state of the ->blkd_tasks
lists.
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Diffstat (limited to 'kernel/rcu')
| -rw-r--r-- | kernel/rcu/tree_plugin.h | 98 |
1 files changed, 75 insertions, 23 deletions
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 79376e2461c9..a22721547442 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h | |||
| @@ -626,9 +626,6 @@ static int sync_rcu_preempt_exp_done(struct rcu_node *rnp) | |||
| 626 | * recursively up the tree. (Calm down, calm down, we do the recursion | 626 | * recursively up the tree. (Calm down, calm down, we do the recursion |
| 627 | * iteratively!) | 627 | * iteratively!) |
| 628 | * | 628 | * |
| 629 | * Most callers will set the "wake" flag, but the task initiating the | ||
| 630 | * expedited grace period need not wake itself. | ||
| 631 | * | ||
| 632 | * Caller must hold sync_rcu_preempt_exp_mutex. | 629 | * Caller must hold sync_rcu_preempt_exp_mutex. |
| 633 | */ | 630 | */ |
| 634 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, | 631 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, |
| @@ -663,26 +660,85 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, | |||
| 663 | 660 | ||
| 664 | /* | 661 | /* |
| 665 | * Snapshot the tasks blocking the newly started preemptible-RCU expedited | 662 | * Snapshot the tasks blocking the newly started preemptible-RCU expedited |
| 666 | * grace period for the specified rcu_node structure. If there are no such | 663 | * grace period for the specified rcu_node structure, phase 1. If there |
| 667 | * tasks, report it up the rcu_node hierarchy. | 664 | * are such tasks, set the ->expmask bits up the rcu_node tree and also |
| 665 | * set the ->expmask bits on the leaf rcu_node structures to tell phase 2 | ||
| 666 | * that work is needed here. | ||
| 668 | * | 667 | * |
| 669 | * Caller must hold sync_rcu_preempt_exp_mutex and must exclude | 668 | * Caller must hold sync_rcu_preempt_exp_mutex. |
| 670 | * CPU hotplug operations. | ||
| 671 | */ | 669 | */ |
| 672 | static void | 670 | static void |
| 673 | sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) | 671 | sync_rcu_preempt_exp_init1(struct rcu_state *rsp, struct rcu_node *rnp) |
| 674 | { | 672 | { |
| 675 | unsigned long flags; | 673 | unsigned long flags; |
| 674 | unsigned long mask; | ||
| 675 | struct rcu_node *rnp_up; | ||
| 676 | 676 | ||
| 677 | raw_spin_lock_irqsave(&rnp->lock, flags); | 677 | raw_spin_lock_irqsave(&rnp->lock, flags); |
| 678 | smp_mb__after_unlock_lock(); | 678 | smp_mb__after_unlock_lock(); |
| 679 | WARN_ON_ONCE(rnp->expmask); | ||
| 680 | WARN_ON_ONCE(rnp->exp_tasks); | ||
| 679 | if (!rcu_preempt_has_tasks(rnp)) { | 681 | if (!rcu_preempt_has_tasks(rnp)) { |
| 682 | /* No blocked tasks, nothing to do. */ | ||
| 680 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 683 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
| 681 | rcu_report_exp_rnp(rsp, rnp, false); /* No tasks, report. */ | 684 | return; |
| 682 | } else { | 685 | } |
| 686 | /* Call for Phase 2 and propagate ->expmask bits up the tree. */ | ||
| 687 | rnp->expmask = 1; | ||
| 688 | rnp_up = rnp; | ||
| 689 | while (rnp_up->parent) { | ||
| 690 | mask = rnp_up->grpmask; | ||
| 691 | rnp_up = rnp_up->parent; | ||
| 692 | if (rnp_up->expmask & mask) | ||
| 693 | break; | ||
| 694 | raw_spin_lock(&rnp_up->lock); /* irqs already off */ | ||
| 695 | smp_mb__after_unlock_lock(); | ||
| 696 | rnp_up->expmask |= mask; | ||
| 697 | raw_spin_unlock(&rnp_up->lock); /* irqs still off */ | ||
| 698 | } | ||
| 699 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
| 700 | } | ||
| 701 | |||
| 702 | /* | ||
| 703 | * Snapshot the tasks blocking the newly started preemptible-RCU expedited | ||
| 704 | * grace period for the specified rcu_node structure, phase 2. If the | ||
| 705 | * leaf rcu_node structure has its ->expmask field set, check for tasks. | ||
| 706 | * If there are some, clear ->expmask and set ->exp_tasks accordingly, | ||
| 707 | * then initiate RCU priority boosting. Otherwise, clear ->expmask and | ||
| 708 | * invoke rcu_report_exp_rnp() to clear out the upper-level ->expmask bits, | ||
| 709 | * enabling rcu_read_unlock_special() to do the bit-clearing. | ||
| 710 | * | ||
| 711 | * Caller must hold sync_rcu_preempt_exp_mutex. | ||
| 712 | */ | ||
| 713 | static void | ||
| 714 | sync_rcu_preempt_exp_init2(struct rcu_state *rsp, struct rcu_node *rnp) | ||
| 715 | { | ||
| 716 | unsigned long flags; | ||
| 717 | |||
| 718 | raw_spin_lock_irqsave(&rnp->lock, flags); | ||
| 719 | smp_mb__after_unlock_lock(); | ||
| 720 | if (!rnp->expmask) { | ||
| 721 | /* Phase 1 didn't do anything, so Phase 2 doesn't either. */ | ||
| 722 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
| 723 | return; | ||
| 724 | } | ||
| 725 | |||
| 726 | /* Phase 1 is over. */ | ||
| 727 | rnp->expmask = 0; | ||
| 728 | |||
| 729 | /* | ||
| 730 | * If there are still blocked tasks, set up ->exp_tasks so that | ||
| 731 | * rcu_read_unlock_special() will wake us and then boost them. | ||
| 732 | */ | ||
| 733 | if (rcu_preempt_has_tasks(rnp)) { | ||
| 683 | rnp->exp_tasks = rnp->blkd_tasks.next; | 734 | rnp->exp_tasks = rnp->blkd_tasks.next; |
| 684 | rcu_initiate_boost(rnp, flags); /* releases rnp->lock */ | 735 | rcu_initiate_boost(rnp, flags); /* releases rnp->lock */ |
| 736 | return; | ||
| 685 | } | 737 | } |
| 738 | |||
| 739 | /* No longer any blocked tasks, so undo bit setting. */ | ||
| 740 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
| 741 | rcu_report_exp_rnp(rsp, rnp, false); | ||
| 686 | } | 742 | } |
| 687 | 743 | ||
| 688 | /** | 744 | /** |
| @@ -699,7 +755,6 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) | |||
| 699 | */ | 755 | */ |
| 700 | void synchronize_rcu_expedited(void) | 756 | void synchronize_rcu_expedited(void) |
| 701 | { | 757 | { |
| 702 | unsigned long flags; | ||
| 703 | struct rcu_node *rnp; | 758 | struct rcu_node *rnp; |
| 704 | struct rcu_state *rsp = &rcu_preempt_state; | 759 | struct rcu_state *rsp = &rcu_preempt_state; |
| 705 | unsigned long snap; | 760 | unsigned long snap; |
| @@ -750,19 +805,16 @@ void synchronize_rcu_expedited(void) | |||
| 750 | /* force all RCU readers onto ->blkd_tasks lists. */ | 805 | /* force all RCU readers onto ->blkd_tasks lists. */ |
| 751 | synchronize_sched_expedited(); | 806 | synchronize_sched_expedited(); |
| 752 | 807 | ||
| 753 | /* Initialize ->expmask for all non-leaf rcu_node structures. */ | 808 | /* |
| 754 | rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) { | 809 | * Snapshot current state of ->blkd_tasks lists into ->expmask. |
| 755 | raw_spin_lock_irqsave(&rnp->lock, flags); | 810 | * Phase 1 sets bits and phase 2 permits rcu_read_unlock_special() |
| 756 | smp_mb__after_unlock_lock(); | 811 | * to start clearing them. Doing this in one phase leads to |
| 757 | rnp->expmask = rnp->qsmaskinit; | 812 | * strange races between setting and clearing bits, so just say "no"! |
| 758 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 813 | */ |
| 759 | } | 814 | rcu_for_each_leaf_node(rsp, rnp) |
| 760 | 815 | sync_rcu_preempt_exp_init1(rsp, rnp); | |
| 761 | /* Snapshot current state of ->blkd_tasks lists. */ | ||
| 762 | rcu_for_each_leaf_node(rsp, rnp) | 816 | rcu_for_each_leaf_node(rsp, rnp) |
| 763 | sync_rcu_preempt_exp_init(rsp, rnp); | 817 | sync_rcu_preempt_exp_init2(rsp, rnp); |
| 764 | if (NUM_RCU_NODES > 1) | ||
| 765 | sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp)); | ||
| 766 | 818 | ||
| 767 | put_online_cpus(); | 819 | put_online_cpus(); |
| 768 | 820 | ||
