diff options
| author | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2014-03-14 19:37:08 -0400 |
|---|---|---|
| committer | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2014-03-20 20:12:25 -0400 |
| commit | 765a3f4fed708ae429ee095914a7897acb3a65bd (patch) | |
| tree | 1a33b4e0e88cdebf60855f6c3b102819ed6986b0 /kernel/rcu | |
| parent | f5604f67fe8cbd6f2088b20b9463f721aa613d4b (diff) | |
rcu: Provide grace-period piggybacking API
The following pattern is currently not well supported by RCU:
1. Make data element inaccessible to RCU readers.
2. Do work that probably lasts for more than one grace period.
3. Do something to make sure RCU readers in flight before #1 above
have completed.
Here are some things that could currently be done:
a. Do a synchronize_rcu() unconditionally at either #1 or #3 above.
This works, but imposes needless work and latency.
b. Post an RCU callback at #1 above that does a wakeup, then
wait for the wakeup at #3. This works well, but likely results
in an extra unneeded grace period. Open-coding this is also
a bit more semi-tricky code than would be good.
This commit therefore adds get_state_synchronize_rcu() and
cond_synchronize_rcu() APIs. Call get_state_synchronize_rcu() at #1
above and pass its return value to cond_synchronize_rcu() at #3 above.
This results in a call to synchronize_rcu() if no grace period has
elapsed between #1 and #3, but requires only a load, comparison, and
memory barrier if a full grace period did elapse.
Requested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Diffstat (limited to 'kernel/rcu')
| -rw-r--r-- | kernel/rcu/tree.c | 62 |
1 files changed, 58 insertions, 4 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 351faba48b91..0c47e300210a 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c | |||
| @@ -1421,13 +1421,14 @@ static int rcu_gp_init(struct rcu_state *rsp) | |||
| 1421 | 1421 | ||
| 1422 | /* Advance to a new grace period and initialize state. */ | 1422 | /* Advance to a new grace period and initialize state. */ |
| 1423 | record_gp_stall_check_time(rsp); | 1423 | record_gp_stall_check_time(rsp); |
| 1424 | smp_wmb(); /* Record GP times before starting GP. */ | 1424 | /* Record GP times before starting GP, hence smp_store_release(). */ |
| 1425 | rsp->gpnum++; | 1425 | smp_store_release(&rsp->gpnum, rsp->gpnum + 1); |
| 1426 | trace_rcu_grace_period(rsp->name, rsp->gpnum, TPS("start")); | 1426 | trace_rcu_grace_period(rsp->name, rsp->gpnum, TPS("start")); |
| 1427 | raw_spin_unlock_irq(&rnp->lock); | 1427 | raw_spin_unlock_irq(&rnp->lock); |
| 1428 | 1428 | ||
| 1429 | /* Exclude any concurrent CPU-hotplug operations. */ | 1429 | /* Exclude any concurrent CPU-hotplug operations. */ |
| 1430 | mutex_lock(&rsp->onoff_mutex); | 1430 | mutex_lock(&rsp->onoff_mutex); |
| 1431 | smp_mb__after_unlock_lock(); /* ->gpnum increment before GP! */ | ||
| 1431 | 1432 | ||
| 1432 | /* | 1433 | /* |
| 1433 | * Set the quiescent-state-needed bits in all the rcu_node | 1434 | * Set the quiescent-state-needed bits in all the rcu_node |
| @@ -1555,10 +1556,11 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) | |||
| 1555 | } | 1556 | } |
| 1556 | rnp = rcu_get_root(rsp); | 1557 | rnp = rcu_get_root(rsp); |
| 1557 | raw_spin_lock_irq(&rnp->lock); | 1558 | raw_spin_lock_irq(&rnp->lock); |
| 1558 | smp_mb__after_unlock_lock(); | 1559 | smp_mb__after_unlock_lock(); /* Order GP before ->completed update. */ |
| 1559 | rcu_nocb_gp_set(rnp, nocb); | 1560 | rcu_nocb_gp_set(rnp, nocb); |
| 1560 | 1561 | ||
| 1561 | rsp->completed = rsp->gpnum; /* Declare grace period done. */ | 1562 | /* Declare grace period done. */ |
| 1563 | ACCESS_ONCE(rsp->completed) = rsp->gpnum; | ||
| 1562 | trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end")); | 1564 | trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end")); |
| 1563 | rsp->fqs_state = RCU_GP_IDLE; | 1565 | rsp->fqs_state = RCU_GP_IDLE; |
| 1564 | rdp = this_cpu_ptr(rsp->rda); | 1566 | rdp = this_cpu_ptr(rsp->rda); |
| @@ -2637,6 +2639,58 @@ void synchronize_rcu_bh(void) | |||
| 2637 | } | 2639 | } |
| 2638 | EXPORT_SYMBOL_GPL(synchronize_rcu_bh); | 2640 | EXPORT_SYMBOL_GPL(synchronize_rcu_bh); |
| 2639 | 2641 | ||
| 2642 | /** | ||
| 2643 | * get_state_synchronize_rcu - Snapshot current RCU state | ||
| 2644 | * | ||
| 2645 | * Returns a cookie that is used by a later call to cond_synchronize_rcu() | ||
| 2646 | * to determine whether or not a full grace period has elapsed in the | ||
| 2647 | * meantime. | ||
| 2648 | */ | ||
| 2649 | unsigned long get_state_synchronize_rcu(void) | ||
| 2650 | { | ||
| 2651 | /* | ||
| 2652 | * Any prior manipulation of RCU-protected data must happen | ||
| 2653 | * before the load from ->gpnum. | ||
| 2654 | */ | ||
| 2655 | smp_mb(); /* ^^^ */ | ||
| 2656 | |||
| 2657 | /* | ||
| 2658 | * Make sure this load happens before the purportedly | ||
| 2659 | * time-consuming work between get_state_synchronize_rcu() | ||
| 2660 | * and cond_synchronize_rcu(). | ||
| 2661 | */ | ||
| 2662 | return smp_load_acquire(&rcu_state->gpnum); | ||
| 2663 | } | ||
| 2664 | EXPORT_SYMBOL_GPL(get_state_synchronize_rcu); | ||
| 2665 | |||
| 2666 | /** | ||
| 2667 | * cond_synchronize_rcu - Conditionally wait for an RCU grace period | ||
| 2668 | * | ||
| 2669 | * @oldstate: return value from earlier call to get_state_synchronize_rcu() | ||
| 2670 | * | ||
| 2671 | * If a full RCU grace period has elapsed since the earlier call to | ||
| 2672 | * get_state_synchronize_rcu(), just return. Otherwise, invoke | ||
| 2673 | * synchronize_rcu() to wait for a full grace period. | ||
| 2674 | * | ||
| 2675 | * Yes, this function does not take counter wrap into account. But | ||
| 2676 | * counter wrap is harmless. If the counter wraps, we have waited for | ||
| 2677 | * more than 2 billion grace periods (and way more on a 64-bit system!), | ||
| 2678 | * so waiting for one additional grace period should be just fine. | ||
| 2679 | */ | ||
| 2680 | void cond_synchronize_rcu(unsigned long oldstate) | ||
| 2681 | { | ||
| 2682 | unsigned long newstate; | ||
| 2683 | |||
| 2684 | /* | ||
| 2685 | * Ensure that this load happens before any RCU-destructive | ||
| 2686 | * actions the caller might carry out after we return. | ||
| 2687 | */ | ||
| 2688 | newstate = smp_load_acquire(&rcu_state->completed); | ||
| 2689 | if (ULONG_CMP_GE(oldstate, newstate)) | ||
| 2690 | synchronize_rcu(); | ||
| 2691 | } | ||
| 2692 | EXPORT_SYMBOL_GPL(cond_synchronize_rcu); | ||
| 2693 | |||
| 2640 | static int synchronize_sched_expedited_cpu_stop(void *data) | 2694 | static int synchronize_sched_expedited_cpu_stop(void *data) |
| 2641 | { | 2695 | { |
| 2642 | /* | 2696 | /* |
