aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2016-06-29 16:46:25 -0400
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2016-08-22 12:30:25 -0400
commit8b355e3bc1408be238ae4695fb6318ae502cae8e (patch)
treef923d1cb005f3102e76cd0b2f64567ed80f67c7a
parentf7b8eb847e35b18d3ec333774691a905bf16017f (diff)
rcu: Drive expedited grace periods from workqueue
The current implementation of expedited grace periods has the user task drive the grace period. This works, but has downsides: (1) The user task must awaken tasks piggybacking on this grace period, which can result in latencies rivaling that of the grace period itself, and (2) User tasks can receive signals, which interfere with RCU CPU stall warnings. This commit therefore uses workqueues to drive the grace periods, so that the user task need not do the awakening. A subsequent commit will remove the now-unnecessary code allowing for signals. Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
-rw-r--r--kernel/rcu/tree.h1
-rw-r--r--kernel/rcu/tree_exp.h48
-rw-r--r--kernel/rcu/tree_trace.c7
3 files changed, 47 insertions, 9 deletions
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index f714f873bf9d..e99a5234d9ed 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -400,6 +400,7 @@ struct rcu_data {
400#ifdef CONFIG_RCU_FAST_NO_HZ 400#ifdef CONFIG_RCU_FAST_NO_HZ
401 struct rcu_head oom_head; 401 struct rcu_head oom_head;
402#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ 402#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
403 atomic_long_t exp_workdone0; /* # done by workqueue. */
403 atomic_long_t exp_workdone1; /* # done by others #1. */ 404 atomic_long_t exp_workdone1; /* # done by others #1. */
404 atomic_long_t exp_workdone2; /* # done by others #2. */ 405 atomic_long_t exp_workdone2; /* # done by others #2. */
405 atomic_long_t exp_workdone3; /* # done by others #3. */ 406 atomic_long_t exp_workdone3; /* # done by others #3. */
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 1549f456fb7b..97f5ffe42b58 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -500,7 +500,6 @@ static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s)
500 * next GP, to proceed. 500 * next GP, to proceed.
501 */ 501 */
502 mutex_lock(&rsp->exp_wake_mutex); 502 mutex_lock(&rsp->exp_wake_mutex);
503 mutex_unlock(&rsp->exp_mutex);
504 503
505 rcu_for_each_node_breadth_first(rsp, rnp) { 504 rcu_for_each_node_breadth_first(rsp, rnp) {
506 if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s)) { 505 if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s)) {
@@ -516,6 +515,29 @@ static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s)
516 mutex_unlock(&rsp->exp_wake_mutex); 515 mutex_unlock(&rsp->exp_wake_mutex);
517} 516}
518 517
518/* Let the workqueue handler know what it is supposed to do. */
519struct rcu_exp_work {
520 smp_call_func_t rew_func;
521 struct rcu_state *rew_rsp;
522 unsigned long rew_s;
523 struct work_struct rew_work;
524};
525
526/*
527 * Work-queue handler to drive an expedited grace period forward.
528 */
529static void wait_rcu_exp_gp(struct work_struct *wp)
530{
531 struct rcu_exp_work *rewp;
532
533 /* Initialize the rcu_node tree in preparation for the wait. */
534 rewp = container_of(wp, struct rcu_exp_work, rew_work);
535 sync_rcu_exp_select_cpus(rewp->rew_rsp, rewp->rew_func);
536
537 /* Wait and clean up, including waking everyone. */
538 rcu_exp_wait_wake(rewp->rew_rsp, rewp->rew_s);
539}
540
519/* 541/*
520 * Given an rcu_state pointer and a smp_call_function() handler, kick 542 * Given an rcu_state pointer and a smp_call_function() handler, kick
521 * off the specified flavor of expedited grace period. 543 * off the specified flavor of expedited grace period.
@@ -523,6 +545,9 @@ static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s)
523static void _synchronize_rcu_expedited(struct rcu_state *rsp, 545static void _synchronize_rcu_expedited(struct rcu_state *rsp,
524 smp_call_func_t func) 546 smp_call_func_t func)
525{ 547{
548 struct rcu_data *rdp;
549 struct rcu_exp_work rew;
550 struct rcu_node *rnp;
526 unsigned long s; 551 unsigned long s;
527 552
528 /* If expedited grace periods are prohibited, fall back to normal. */ 553 /* If expedited grace periods are prohibited, fall back to normal. */
@@ -536,11 +561,22 @@ static void _synchronize_rcu_expedited(struct rcu_state *rsp,
536 if (exp_funnel_lock(rsp, s)) 561 if (exp_funnel_lock(rsp, s))
537 return; /* Someone else did our work for us. */ 562 return; /* Someone else did our work for us. */
538 563
539 /* Initialize the rcu_node tree in preparation for the wait. */ 564 /* Marshall arguments and schedule the expedited grace period. */
540 sync_rcu_exp_select_cpus(rsp, func); 565 rew.rew_func = func;
541 566 rew.rew_rsp = rsp;
542 /* Wait and clean up, including waking everyone. */ 567 rew.rew_s = s;
543 rcu_exp_wait_wake(rsp, s); 568 INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp);
569 schedule_work(&rew.rew_work);
570
571 /* Wait for expedited grace period to complete. */
572 rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
573 rnp = rcu_get_root(rsp);
574 wait_event(rnp->exp_wq[(s >> 1) & 0x3],
575 sync_exp_work_done(rsp,
576 &rdp->exp_workdone0, s));
577
578 /* Let the next expedited grace period start. */
579 mutex_unlock(&rsp->exp_mutex);
544} 580}
545 581
546/** 582/**
diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c
index 86782f9a4604..b1f28972872c 100644
--- a/kernel/rcu/tree_trace.c
+++ b/kernel/rcu/tree_trace.c
@@ -185,16 +185,17 @@ static int show_rcuexp(struct seq_file *m, void *v)
185 int cpu; 185 int cpu;
186 struct rcu_state *rsp = (struct rcu_state *)m->private; 186 struct rcu_state *rsp = (struct rcu_state *)m->private;
187 struct rcu_data *rdp; 187 struct rcu_data *rdp;
188 unsigned long s1 = 0, s2 = 0, s3 = 0; 188 unsigned long s0 = 0, s1 = 0, s2 = 0, s3 = 0;
189 189
190 for_each_possible_cpu(cpu) { 190 for_each_possible_cpu(cpu) {
191 rdp = per_cpu_ptr(rsp->rda, cpu); 191 rdp = per_cpu_ptr(rsp->rda, cpu);
192 s0 += atomic_long_read(&rdp->exp_workdone0);
192 s1 += atomic_long_read(&rdp->exp_workdone1); 193 s1 += atomic_long_read(&rdp->exp_workdone1);
193 s2 += atomic_long_read(&rdp->exp_workdone2); 194 s2 += atomic_long_read(&rdp->exp_workdone2);
194 s3 += atomic_long_read(&rdp->exp_workdone3); 195 s3 += atomic_long_read(&rdp->exp_workdone3);
195 } 196 }
196 seq_printf(m, "s=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n", 197 seq_printf(m, "s=%lu wd0=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n",
197 rsp->expedited_sequence, s1, s2, s3, 198 rsp->expedited_sequence, s0, s1, s2, s3,
198 atomic_long_read(&rsp->expedited_normal), 199 atomic_long_read(&rsp->expedited_normal),
199 atomic_read(&rsp->expedited_need_qs), 200 atomic_read(&rsp->expedited_need_qs),
200 rsp->expedited_sequence / 2); 201 rsp->expedited_sequence / 2);