summaryrefslogtreecommitdiffstats
path: root/kernel/rcu
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2017-04-25 14:34:40 -0400
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2017-04-26 19:32:16 -0400
commit2da4b2a7fd8de52c44fa83c59f00d38a0d90caae (patch)
tree12ae8f7939181af0c8722f81232dbd23b1fffe76 /kernel/rcu
parent1e9a038b7fe9a8c10ef1238f4e695d5fbe0dd594 (diff)
srcu: Expedite first synchronize_srcu() when idle
Classic SRCU in effect expedites the first synchronize_srcu() when SRCU is idle, and Mike Galbraith demonstrated that some use cases do in fact rely on this behavior. In particular, Mike showed that Steven Rostedt's hotplug stress script takes 55 seconds with Classic SRCU and more than 16 -minutes- when running Tree SRCU. Assuming that each Tree SRCU's call to synchronize_srcu() takes four milliseconds, this implies that Steven's test invokes synchronize_srcu() in isolation, but more than once per 200 microseconds. Mike used ftrace to demonstrate that the time between successive calls to synchronize_srcu() ranged from 118 to 342 microseconds, with one outlier at 80 milliseconds. This data clearly indicates that Tree SRCU needs to expedite the first invocation of synchronize_srcu() during an SRCU idle period. This commit therefor introduces a srcu_might_be_idle() function that probabilistically checks whether or not SRCU is idle. This function is used by synchronize_rcu() as an additional criterion in deciding whether or not to expedite. (Hat trick to Peter Zijlstra for his earlier suggestion that this might in fact be a problem. Which for all I know might have motivated Mike to look into it.) Reported-by: Mike Galbraith <efault@gmx.de> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Tested-by: Mike Galbraith <efault@gmx.de>
Diffstat (limited to 'kernel/rcu')
-rw-r--r--kernel/rcu/srcutree.c59
1 files changed, 58 insertions, 1 deletions
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 4b98e6f45166..2286e06fd159 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -402,6 +402,7 @@ static void srcu_gp_start(struct srcu_struct *sp)
402 rcu_seq_current(&sp->srcu_gp_seq)); 402 rcu_seq_current(&sp->srcu_gp_seq));
403 (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, 403 (void)rcu_segcblist_accelerate(&sdp->srcu_cblist,
404 rcu_seq_snap(&sp->srcu_gp_seq)); 404 rcu_seq_snap(&sp->srcu_gp_seq));
405 smp_mb(); /* Order prior store to ->srcu_gp_seq_needed vs. GP start. */
405 rcu_seq_start(&sp->srcu_gp_seq); 406 rcu_seq_start(&sp->srcu_gp_seq);
406 state = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)); 407 state = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq));
407 WARN_ON_ONCE(state != SRCU_STATE_SCAN1); 408 WARN_ON_ONCE(state != SRCU_STATE_SCAN1);
@@ -677,6 +678,57 @@ static void srcu_flip(struct srcu_struct *sp)
677} 678}
678 679
679/* 680/*
681 * If SRCU is likely idle, return true, otherwise return false.
682 *
683 * Note that it is OK for several current from-idle requests for a new
684 * grace period from idle to specify expediting because they will all end
685 * up requesting the same grace period anyhow. So no loss.
686 *
687 * Note also that if any CPU (including the current one) is still invoking
688 * callbacks, this function will nevertheless say "idle". This is not
689 * ideal, but the overhead of checking all CPUs' callback lists is even
690 * less ideal, especially on large systems. Furthermore, the wakeup
691 * can happen before the callback is fully removed, so we have no choice
692 * but to accept this type of error.
693 *
694 * This function is also subject to counter-wrap errors, but let's face
695 * it, if this function was preempted for enough time for the counters
696 * to wrap, it really doesn't matter whether or not we expedite the grace
697 * period. The extra overhead of a needlessly expedited grace period is
698 * negligible when amoritized over that time period, and the extra latency
699 * of a needlessly non-expedited grace period is similarly negligible.
700 */
701static bool srcu_might_be_idle(struct srcu_struct *sp)
702{
703 unsigned long flags;
704 struct srcu_data *sdp;
705 unsigned long curseq;
706
707 /* If the local srcu_data structure has callbacks, not idle. */
708 local_irq_save(flags);
709 sdp = this_cpu_ptr(sp->sda);
710 if (rcu_segcblist_pend_cbs(&sdp->srcu_cblist)) {
711 local_irq_restore(flags);
712 return false; /* Callbacks already present, so not idle. */
713 }
714 local_irq_restore(flags);
715
716 /*
717 * No local callbacks, so probabalistically probe global state.
718 * Exact information would require acquiring locks, which would
719 * kill scalability, hence the probabalistic nature of the probe.
720 */
721 curseq = rcu_seq_current(&sp->srcu_gp_seq);
722 smp_mb(); /* Order ->srcu_gp_seq with ->srcu_gp_seq_needed. */
723 if (ULONG_CMP_LT(curseq, READ_ONCE(sp->srcu_gp_seq_needed)))
724 return false; /* Grace period in progress, so not idle. */
725 smp_mb(); /* Order ->srcu_gp_seq with prior access. */
726 if (curseq != rcu_seq_current(&sp->srcu_gp_seq))
727 return false; /* GP # changed, so not idle. */
728 return true; /* With reasonable probability, idle! */
729}
730
731/*
680 * Enqueue an SRCU callback on the srcu_data structure associated with 732 * Enqueue an SRCU callback on the srcu_data structure associated with
681 * the current CPU and the specified srcu_struct structure, initiating 733 * the current CPU and the specified srcu_struct structure, initiating
682 * grace-period processing if it is not already running. 734 * grace-period processing if it is not already running.
@@ -823,10 +875,15 @@ EXPORT_SYMBOL_GPL(synchronize_srcu_expedited);
823 * Of course, these memory-ordering guarantees apply only when 875 * Of course, these memory-ordering guarantees apply only when
824 * synchronize_srcu(), srcu_read_lock(), and srcu_read_unlock() are 876 * synchronize_srcu(), srcu_read_lock(), and srcu_read_unlock() are
825 * passed the same srcu_struct structure. 877 * passed the same srcu_struct structure.
878 *
879 * If SRCU is likely idle, expedite the first request. This semantic
880 * was provided by Classic SRCU, and is relied upon by its users, so TREE
881 * SRCU must also provide it. Note that detecting idleness is heuristic
882 * and subject to both false positives and negatives.
826 */ 883 */
827void synchronize_srcu(struct srcu_struct *sp) 884void synchronize_srcu(struct srcu_struct *sp)
828{ 885{
829 if (rcu_gp_is_expedited()) 886 if (srcu_might_be_idle(sp) || rcu_gp_is_expedited())
830 synchronize_srcu_expedited(sp); 887 synchronize_srcu_expedited(sp);
831 else 888 else
832 __synchronize_srcu(sp, true); 889 __synchronize_srcu(sp, true);