diff options
| author | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2018-05-01 13:26:57 -0400 |
|---|---|---|
| committer | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2018-07-12 18:37:48 -0400 |
| commit | 7a1d0f23ad70cd4813bf4b72735ea2c26a4f53fe (patch) | |
| tree | 83c5e674a73d05ebc38687b9754d1509366e29b5 | |
| parent | aebc82644b2c8eafa15e8c481fbafc1b41f4fbf9 (diff) | |
rcu: Move from ->need_future_gp[] to ->gp_seq_needed
One problem with the ->need_future_gp[] array is that the grace-period
assignment of each element changes as the grace periods complete.
This means that it is necessary to hold a lock when checking this
array to learn if a given grace period has already been requested.
This increase lock contention, which is the opposite of helpful.
This commit therefore replaces the ->need_future_gp[] with a single
->gp_seq_needed value and keeps it updated in the rcu_data structure.
This will enable reliable lockless checking of whether or not a given
grace period has already been requested.
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
| -rw-r--r-- | kernel/rcu/tree.c | 34 | ||||
| -rw-r--r-- | kernel/rcu/tree.h | 19 |
2 files changed, 20 insertions, 33 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 4a528a062cd4..1ede51690e4a 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c | |||
| @@ -1560,7 +1560,7 @@ static void trace_rcu_this_gp(struct rcu_node *rnp, struct rcu_data *rdp, | |||
| 1560 | /* | 1560 | /* |
| 1561 | * Start the specified grace period, as needed to handle newly arrived | 1561 | * Start the specified grace period, as needed to handle newly arrived |
| 1562 | * callbacks. The required future grace periods are recorded in each | 1562 | * callbacks. The required future grace periods are recorded in each |
| 1563 | * rcu_node structure's ->need_future_gp[] field. Returns true if there | 1563 | * rcu_node structure's ->gp_seq_needed field. Returns true if there |
| 1564 | * is reason to awaken the grace-period kthread. | 1564 | * is reason to awaken the grace-period kthread. |
| 1565 | * | 1565 | * |
| 1566 | * The caller must hold the specified rcu_node structure's ->lock, which | 1566 | * The caller must hold the specified rcu_node structure's ->lock, which |
| @@ -1589,14 +1589,14 @@ static bool rcu_start_this_gp(struct rcu_node *rnp, struct rcu_data *rdp, | |||
| 1589 | for (rnp_root = rnp; 1; rnp_root = rnp_root->parent) { | 1589 | for (rnp_root = rnp; 1; rnp_root = rnp_root->parent) { |
| 1590 | if (rnp_root != rnp) | 1590 | if (rnp_root != rnp) |
| 1591 | raw_spin_lock_rcu_node(rnp_root); | 1591 | raw_spin_lock_rcu_node(rnp_root); |
| 1592 | if (need_future_gp_element(rnp_root, c) || | 1592 | if (ULONG_CMP_GE(rnp_root->gp_seq_needed, c) || |
| 1593 | rcu_seq_done(&rnp_root->gp_seq, c) || | 1593 | rcu_seq_done(&rnp_root->gp_seq, c) || |
| 1594 | (rnp != rnp_root && | 1594 | (rnp != rnp_root && |
| 1595 | rcu_seq_state(rcu_seq_current(&rnp_root->gp_seq)))) { | 1595 | rcu_seq_state(rcu_seq_current(&rnp_root->gp_seq)))) { |
| 1596 | trace_rcu_this_gp(rnp_root, rdp, c, TPS("Prestarted")); | 1596 | trace_rcu_this_gp(rnp_root, rdp, c, TPS("Prestarted")); |
| 1597 | goto unlock_out; | 1597 | goto unlock_out; |
| 1598 | } | 1598 | } |
| 1599 | need_future_gp_element(rnp_root, c) = true; | 1599 | rnp_root->gp_seq_needed = c; |
| 1600 | if (rnp_root != rnp && rnp_root->parent != NULL) | 1600 | if (rnp_root != rnp && rnp_root->parent != NULL) |
| 1601 | raw_spin_unlock_rcu_node(rnp_root); | 1601 | raw_spin_unlock_rcu_node(rnp_root); |
| 1602 | if (!rnp_root->parent) | 1602 | if (!rnp_root->parent) |
| @@ -1633,8 +1633,9 @@ static bool rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) | |||
| 1633 | bool needmore; | 1633 | bool needmore; |
| 1634 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); | 1634 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); |
| 1635 | 1635 | ||
| 1636 | need_future_gp_element(rnp, c) = false; | 1636 | needmore = ULONG_CMP_LT(rnp->gp_seq, rnp->gp_seq_needed); |
| 1637 | needmore = need_any_future_gp(rnp); | 1637 | if (!needmore) |
| 1638 | rnp->gp_seq_needed = rnp->gp_seq; /* Avoid counter wrap. */ | ||
| 1638 | trace_rcu_this_gp(rnp, rdp, c, | 1639 | trace_rcu_this_gp(rnp, rdp, c, |
| 1639 | needmore ? TPS("CleanupMore") : TPS("Cleanup")); | 1640 | needmore ? TPS("CleanupMore") : TPS("Cleanup")); |
| 1640 | return needmore; | 1641 | return needmore; |
| @@ -2046,7 +2047,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) | |||
| 2046 | rsp->gp_state = RCU_GP_IDLE; | 2047 | rsp->gp_state = RCU_GP_IDLE; |
| 2047 | /* Check for GP requests since above loop. */ | 2048 | /* Check for GP requests since above loop. */ |
| 2048 | rdp = this_cpu_ptr(rsp->rda); | 2049 | rdp = this_cpu_ptr(rsp->rda); |
| 2049 | if (need_any_future_gp(rnp)) { | 2050 | if (ULONG_CMP_LT(rnp->gp_seq, rnp->gp_seq_needed)) { |
| 2050 | trace_rcu_this_gp(rnp, rdp, rsp->completed - 1, | 2051 | trace_rcu_this_gp(rnp, rdp, rsp->completed - 1, |
| 2051 | TPS("CleanupMore")); | 2052 | TPS("CleanupMore")); |
| 2052 | needgp = true; | 2053 | needgp = true; |
| @@ -2700,8 +2701,8 @@ rcu_check_gp_start_stall(struct rcu_state *rsp, struct rcu_node *rnp, | |||
| 2700 | struct rcu_node *rnp_root = rcu_get_root(rsp); | 2701 | struct rcu_node *rnp_root = rcu_get_root(rsp); |
| 2701 | static atomic_t warned = ATOMIC_INIT(0); | 2702 | static atomic_t warned = ATOMIC_INIT(0); |
| 2702 | 2703 | ||
| 2703 | if (!IS_ENABLED(CONFIG_PROVE_RCU) || | 2704 | if (!IS_ENABLED(CONFIG_PROVE_RCU) || rcu_gp_in_progress(rsp) || |
| 2704 | rcu_gp_in_progress(rsp) || !need_any_future_gp(rcu_get_root(rsp))) | 2705 | ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed)) |
| 2705 | return; | 2706 | return; |
| 2706 | j = jiffies; /* Expensive access, and in common case don't get here. */ | 2707 | j = jiffies; /* Expensive access, and in common case don't get here. */ |
| 2707 | if (time_before(j, READ_ONCE(rsp->gp_req_activity) + HZ) || | 2708 | if (time_before(j, READ_ONCE(rsp->gp_req_activity) + HZ) || |
| @@ -2711,7 +2712,8 @@ rcu_check_gp_start_stall(struct rcu_state *rsp, struct rcu_node *rnp, | |||
| 2711 | 2712 | ||
| 2712 | raw_spin_lock_irqsave_rcu_node(rnp, flags); | 2713 | raw_spin_lock_irqsave_rcu_node(rnp, flags); |
| 2713 | j = jiffies; | 2714 | j = jiffies; |
| 2714 | if (rcu_gp_in_progress(rsp) || !need_any_future_gp(rcu_get_root(rsp)) || | 2715 | if (rcu_gp_in_progress(rsp) || |
| 2716 | ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed) || | ||
| 2715 | time_before(j, READ_ONCE(rsp->gp_req_activity) + HZ) || | 2717 | time_before(j, READ_ONCE(rsp->gp_req_activity) + HZ) || |
| 2716 | time_before(j, READ_ONCE(rsp->gp_activity) + HZ) || | 2718 | time_before(j, READ_ONCE(rsp->gp_activity) + HZ) || |
| 2717 | atomic_read(&warned)) { | 2719 | atomic_read(&warned)) { |
| @@ -2723,7 +2725,8 @@ rcu_check_gp_start_stall(struct rcu_state *rsp, struct rcu_node *rnp, | |||
| 2723 | if (rnp_root != rnp) | 2725 | if (rnp_root != rnp) |
| 2724 | raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */ | 2726 | raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */ |
| 2725 | j = jiffies; | 2727 | j = jiffies; |
| 2726 | if (rcu_gp_in_progress(rsp) || !need_any_future_gp(rcu_get_root(rsp)) || | 2728 | if (rcu_gp_in_progress(rsp) || |
| 2729 | ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed) || | ||
| 2727 | time_before(j, rsp->gp_req_activity + HZ) || | 2730 | time_before(j, rsp->gp_req_activity + HZ) || |
| 2728 | time_before(j, rsp->gp_activity + HZ) || | 2731 | time_before(j, rsp->gp_activity + HZ) || |
| 2729 | atomic_xchg(&warned, 1)) { | 2732 | atomic_xchg(&warned, 1)) { |
| @@ -2731,12 +2734,9 @@ rcu_check_gp_start_stall(struct rcu_state *rsp, struct rcu_node *rnp, | |||
| 2731 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | 2734 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
| 2732 | return; | 2735 | return; |
| 2733 | } | 2736 | } |
| 2734 | pr_alert("%s: g%lu %d%d%d%d gar:%lu ga:%lu f%#x %s->state:%#lx\n", | 2737 | pr_alert("%s: g%ld->%ld gar:%lu ga:%lu f%#x %s->state:%#lx\n", |
| 2735 | __func__, READ_ONCE(rsp->gpnum), | 2738 | __func__, (long)READ_ONCE(rsp->gp_seq), |
| 2736 | need_future_gp_element(rcu_get_root(rsp), 0), | 2739 | (long)READ_ONCE(rnp_root->gp_seq_needed), |
| 2737 | need_future_gp_element(rcu_get_root(rsp), 1), | ||
| 2738 | need_future_gp_element(rcu_get_root(rsp), 2), | ||
| 2739 | need_future_gp_element(rcu_get_root(rsp), 3), | ||
| 2740 | j - rsp->gp_req_activity, j - rsp->gp_activity, | 2740 | j - rsp->gp_req_activity, j - rsp->gp_activity, |
| 2741 | rsp->gp_flags, rsp->name, | 2741 | rsp->gp_flags, rsp->name, |
| 2742 | rsp->gp_kthread ? rsp->gp_kthread->state : 0x1ffffL); | 2742 | rsp->gp_kthread ? rsp->gp_kthread->state : 0x1ffffL); |
| @@ -3527,6 +3527,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) | |||
| 3527 | rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */ | 3527 | rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */ |
| 3528 | rdp->completed = rnp->completed; | 3528 | rdp->completed = rnp->completed; |
| 3529 | rdp->gp_seq = rnp->gp_seq; | 3529 | rdp->gp_seq = rnp->gp_seq; |
| 3530 | rdp->gp_seq_needed = rnp->gp_seq; | ||
| 3530 | rdp->cpu_no_qs.b.norm = true; | 3531 | rdp->cpu_no_qs.b.norm = true; |
| 3531 | rdp->rcu_qs_ctr_snap = per_cpu(rcu_dynticks.rcu_qs_ctr, cpu); | 3532 | rdp->rcu_qs_ctr_snap = per_cpu(rcu_dynticks.rcu_qs_ctr, cpu); |
| 3532 | rdp->core_needs_qs = false; | 3533 | rdp->core_needs_qs = false; |
| @@ -3907,6 +3908,7 @@ static void __init rcu_init_one(struct rcu_state *rsp) | |||
| 3907 | rnp->gpnum = rsp->gpnum; | 3908 | rnp->gpnum = rsp->gpnum; |
| 3908 | rnp->completed = rsp->completed; | 3909 | rnp->completed = rsp->completed; |
| 3909 | rnp->gp_seq = rsp->gp_seq; | 3910 | rnp->gp_seq = rsp->gp_seq; |
| 3911 | rnp->gp_seq_needed = rsp->gp_seq; | ||
| 3910 | rnp->completedqs = rsp->gp_seq; | 3912 | rnp->completedqs = rsp->gp_seq; |
| 3911 | rnp->qsmask = 0; | 3913 | rnp->qsmask = 0; |
| 3912 | rnp->qsmaskinit = 0; | 3914 | rnp->qsmaskinit = 0; |
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index a21d403a6010..9329c1ff695f 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h | |||
| @@ -88,6 +88,7 @@ struct rcu_node { | |||
| 88 | /* This will either be equal to or one */ | 88 | /* This will either be equal to or one */ |
| 89 | /* behind the root rcu_node's gpnum. */ | 89 | /* behind the root rcu_node's gpnum. */ |
| 90 | unsigned long gp_seq; /* Track rsp->rcu_gp_seq. */ | 90 | unsigned long gp_seq; /* Track rsp->rcu_gp_seq. */ |
| 91 | unsigned long gp_seq_needed; /* Track rsp->rcu_gp_seq_needed. */ | ||
| 91 | unsigned long completedqs; /* All QSes done for this node. */ | 92 | unsigned long completedqs; /* All QSes done for this node. */ |
| 92 | unsigned long qsmask; /* CPUs or groups that need to switch in */ | 93 | unsigned long qsmask; /* CPUs or groups that need to switch in */ |
| 93 | /* order for current grace period to proceed.*/ | 94 | /* order for current grace period to proceed.*/ |
| @@ -160,7 +161,6 @@ struct rcu_node { | |||
| 160 | struct swait_queue_head nocb_gp_wq[2]; | 161 | struct swait_queue_head nocb_gp_wq[2]; |
| 161 | /* Place for rcu_nocb_kthread() to wait GP. */ | 162 | /* Place for rcu_nocb_kthread() to wait GP. */ |
| 162 | #endif /* #ifdef CONFIG_RCU_NOCB_CPU */ | 163 | #endif /* #ifdef CONFIG_RCU_NOCB_CPU */ |
| 163 | u8 need_future_gp[4]; /* Counts of upcoming GP requests. */ | ||
| 164 | raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp; | 164 | raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp; |
| 165 | 165 | ||
| 166 | spinlock_t exp_lock ____cacheline_internodealigned_in_smp; | 166 | spinlock_t exp_lock ____cacheline_internodealigned_in_smp; |
| @@ -170,22 +170,6 @@ struct rcu_node { | |||
| 170 | bool exp_need_flush; /* Need to flush workitem? */ | 170 | bool exp_need_flush; /* Need to flush workitem? */ |
| 171 | } ____cacheline_internodealigned_in_smp; | 171 | } ____cacheline_internodealigned_in_smp; |
| 172 | 172 | ||
| 173 | /* Accessors for ->need_future_gp[] array. */ | ||
| 174 | #define need_future_gp_mask() \ | ||
| 175 | (ARRAY_SIZE(((struct rcu_node *)NULL)->need_future_gp) - 1) | ||
| 176 | #define need_future_gp_element(rnp, c) \ | ||
| 177 | ((rnp)->need_future_gp[(c >> RCU_SEQ_CTR_SHIFT) & need_future_gp_mask()]) | ||
| 178 | #define need_any_future_gp(rnp) \ | ||
| 179 | ({ \ | ||
| 180 | int __i; \ | ||
| 181 | bool __nonzero = false; \ | ||
| 182 | \ | ||
| 183 | for (__i = 0; __i < ARRAY_SIZE((rnp)->need_future_gp); __i++) \ | ||
| 184 | __nonzero = __nonzero || \ | ||
| 185 | READ_ONCE((rnp)->need_future_gp[__i]); \ | ||
| 186 | __nonzero; \ | ||
| 187 | }) | ||
| 188 | |||
| 189 | /* | 173 | /* |
| 190 | * Bitmasks in an rcu_node cover the interval [grplo, grphi] of CPU IDs, and | 174 | * Bitmasks in an rcu_node cover the interval [grplo, grphi] of CPU IDs, and |
| 191 | * are indexed relative to this interval rather than the global CPU ID space. | 175 | * are indexed relative to this interval rather than the global CPU ID space. |
| @@ -213,6 +197,7 @@ struct rcu_data { | |||
| 213 | unsigned long gpnum; /* Highest gp number that this CPU */ | 197 | unsigned long gpnum; /* Highest gp number that this CPU */ |
| 214 | /* is aware of having started. */ | 198 | /* is aware of having started. */ |
| 215 | unsigned long gp_seq; /* Track rsp->rcu_gp_seq counter. */ | 199 | unsigned long gp_seq; /* Track rsp->rcu_gp_seq counter. */ |
| 200 | unsigned long gp_seq_needed; /* Track rsp->rcu_gp_seq_needed ctr. */ | ||
| 216 | unsigned long rcu_qs_ctr_snap;/* Snapshot of rcu_qs_ctr to check */ | 201 | unsigned long rcu_qs_ctr_snap;/* Snapshot of rcu_qs_ctr to check */ |
| 217 | /* for rcu_all_qs() invocations. */ | 202 | /* for rcu_all_qs() invocations. */ |
| 218 | union rcu_noqs cpu_no_qs; /* No QSes yet for this CPU. */ | 203 | union rcu_noqs cpu_no_qs; /* No QSes yet for this CPU. */ |
