aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--kernel/rcu/rcu.h1
-rw-r--r--kernel/rcu/tree.c3
-rw-r--r--kernel/rcu/tree.h10
-rw-r--r--kernel/rcu/tree_exp.h184
4 files changed, 120 insertions, 78 deletions
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 7a693e31184a..976019d6fa06 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -486,6 +486,7 @@ void rcu_force_quiescent_state(void);
486void rcu_bh_force_quiescent_state(void); 486void rcu_bh_force_quiescent_state(void);
487void rcu_sched_force_quiescent_state(void); 487void rcu_sched_force_quiescent_state(void);
488extern struct workqueue_struct *rcu_gp_wq; 488extern struct workqueue_struct *rcu_gp_wq;
489extern struct workqueue_struct *rcu_par_gp_wq;
489#endif /* #else #ifdef CONFIG_TINY_RCU */ 490#endif /* #else #ifdef CONFIG_TINY_RCU */
490 491
491#ifdef CONFIG_RCU_NOCB_CPU 492#ifdef CONFIG_RCU_NOCB_CPU
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 2a734692a581..23781fc90830 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -4168,6 +4168,7 @@ static void __init rcu_dump_rcu_node_tree(struct rcu_state *rsp)
4168} 4168}
4169 4169
4170struct workqueue_struct *rcu_gp_wq; 4170struct workqueue_struct *rcu_gp_wq;
4171struct workqueue_struct *rcu_par_gp_wq;
4171 4172
4172void __init rcu_init(void) 4173void __init rcu_init(void)
4173{ 4174{
@@ -4199,6 +4200,8 @@ void __init rcu_init(void)
4199 /* Create workqueue for expedited GPs and for Tree SRCU. */ 4200 /* Create workqueue for expedited GPs and for Tree SRCU. */
4200 rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM, 0); 4201 rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM, 0);
4201 WARN_ON(!rcu_gp_wq); 4202 WARN_ON(!rcu_gp_wq);
4203 rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0);
4204 WARN_ON(!rcu_par_gp_wq);
4202} 4205}
4203 4206
4204#include "tree_exp.h" 4207#include "tree_exp.h"
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index f491ab4f2e8e..98d33902b65c 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -58,6 +58,14 @@ struct rcu_dynticks {
58#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ 58#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
59}; 59};
60 60
61/* Communicate arguments to a workqueue handler. */
62struct rcu_exp_work {
63 smp_call_func_t rew_func;
64 struct rcu_state *rew_rsp;
65 unsigned long rew_s;
66 struct work_struct rew_work;
67};
68
61/* RCU's kthread states for tracing. */ 69/* RCU's kthread states for tracing. */
62#define RCU_KTHREAD_STOPPED 0 70#define RCU_KTHREAD_STOPPED 0
63#define RCU_KTHREAD_RUNNING 1 71#define RCU_KTHREAD_RUNNING 1
@@ -157,6 +165,8 @@ struct rcu_node {
157 spinlock_t exp_lock ____cacheline_internodealigned_in_smp; 165 spinlock_t exp_lock ____cacheline_internodealigned_in_smp;
158 unsigned long exp_seq_rq; 166 unsigned long exp_seq_rq;
159 wait_queue_head_t exp_wq[4]; 167 wait_queue_head_t exp_wq[4];
168 struct rcu_exp_work rew;
169 bool exp_need_flush; /* Need to flush workitem? */
160} ____cacheline_internodealigned_in_smp; 170} ____cacheline_internodealigned_in_smp;
161 171
162/* 172/*
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index f72eefab8543..73e1d3dca5b1 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -362,93 +362,129 @@ static void sync_sched_exp_online_cleanup(int cpu)
362} 362}
363 363
364/* 364/*
365 * Select the nodes that the upcoming expedited grace period needs 365 * Select the CPUs within the specified rcu_node that the upcoming
366 * to wait for. 366 * expedited grace period needs to wait for.
367 */ 367 */
368static void sync_rcu_exp_select_cpus(struct rcu_state *rsp, 368static void sync_rcu_exp_select_node_cpus(struct work_struct *wp)
369 smp_call_func_t func)
370{ 369{
371 int cpu; 370 int cpu;
372 unsigned long flags; 371 unsigned long flags;
372 smp_call_func_t func;
373 unsigned long mask_ofl_test; 373 unsigned long mask_ofl_test;
374 unsigned long mask_ofl_ipi; 374 unsigned long mask_ofl_ipi;
375 int ret; 375 int ret;
376 struct rcu_node *rnp; 376 struct rcu_exp_work *rewp =
377 container_of(wp, struct rcu_exp_work, rew_work);
378 struct rcu_node *rnp = container_of(rewp, struct rcu_node, rew);
379 struct rcu_state *rsp = rewp->rew_rsp;
377 380
378 trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("reset")); 381 func = rewp->rew_func;
379 sync_exp_reset_tree(rsp); 382 raw_spin_lock_irqsave_rcu_node(rnp, flags);
380 trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("select"));
381 rcu_for_each_leaf_node(rsp, rnp) {
382 raw_spin_lock_irqsave_rcu_node(rnp, flags);
383
384 /* Each pass checks a CPU for identity, offline, and idle. */
385 mask_ofl_test = 0;
386 for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
387 unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
388 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
389 struct rcu_dynticks *rdtp = per_cpu_ptr(&rcu_dynticks, cpu);
390 int snap;
391 383
392 if (raw_smp_processor_id() == cpu || 384 /* Each pass checks a CPU for identity, offline, and idle. */
393 !(rnp->qsmaskinitnext & mask)) { 385 mask_ofl_test = 0;
386 for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
387 unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
388 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
389 struct rcu_dynticks *rdtp = per_cpu_ptr(&rcu_dynticks, cpu);
390 int snap;
391
392 if (raw_smp_processor_id() == cpu ||
393 !(rnp->qsmaskinitnext & mask)) {
394 mask_ofl_test |= mask;
395 } else {
396 snap = rcu_dynticks_snap(rdtp);
397 if (rcu_dynticks_in_eqs(snap))
394 mask_ofl_test |= mask; 398 mask_ofl_test |= mask;
395 } else { 399 else
396 snap = rcu_dynticks_snap(rdtp); 400 rdp->exp_dynticks_snap = snap;
397 if (rcu_dynticks_in_eqs(snap))
398 mask_ofl_test |= mask;
399 else
400 rdp->exp_dynticks_snap = snap;
401 }
402 } 401 }
403 mask_ofl_ipi = rnp->expmask & ~mask_ofl_test; 402 }
404 403 mask_ofl_ipi = rnp->expmask & ~mask_ofl_test;
405 /*
406 * Need to wait for any blocked tasks as well. Note that
407 * additional blocking tasks will also block the expedited
408 * GP until such time as the ->expmask bits are cleared.
409 */
410 if (rcu_preempt_has_tasks(rnp))
411 rnp->exp_tasks = rnp->blkd_tasks.next;
412 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
413 404
414 /* IPI the remaining CPUs for expedited quiescent state. */ 405 /*
415 for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) { 406 * Need to wait for any blocked tasks as well. Note that
416 unsigned long mask = leaf_node_cpu_bit(rnp, cpu); 407 * additional blocking tasks will also block the expedited GP
417 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 408 * until such time as the ->expmask bits are cleared.
409 */
410 if (rcu_preempt_has_tasks(rnp))
411 rnp->exp_tasks = rnp->blkd_tasks.next;
412 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
413
414 /* IPI the remaining CPUs for expedited quiescent state. */
415 for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
416 unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
417 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
418 418
419 if (!(mask_ofl_ipi & mask)) 419 if (!(mask_ofl_ipi & mask))
420 continue; 420 continue;
421retry_ipi: 421retry_ipi:
422 if (rcu_dynticks_in_eqs_since(rdp->dynticks, 422 if (rcu_dynticks_in_eqs_since(rdp->dynticks,
423 rdp->exp_dynticks_snap)) { 423 rdp->exp_dynticks_snap)) {
424 mask_ofl_test |= mask; 424 mask_ofl_test |= mask;
425 continue; 425 continue;
426 } 426 }
427 ret = smp_call_function_single(cpu, func, rsp, 0); 427 ret = smp_call_function_single(cpu, func, rsp, 0);
428 if (!ret) { 428 if (!ret) {
429 mask_ofl_ipi &= ~mask; 429 mask_ofl_ipi &= ~mask;
430 continue; 430 continue;
431 } 431 }
432 /* Failed, raced with CPU hotplug operation. */ 432 /* Failed, raced with CPU hotplug operation. */
433 raw_spin_lock_irqsave_rcu_node(rnp, flags); 433 raw_spin_lock_irqsave_rcu_node(rnp, flags);
434 if ((rnp->qsmaskinitnext & mask) && 434 if ((rnp->qsmaskinitnext & mask) &&
435 (rnp->expmask & mask)) { 435 (rnp->expmask & mask)) {
436 /* Online, so delay for a bit and try again. */ 436 /* Online, so delay for a bit and try again. */
437 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
438 trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("selectofl"));
439 schedule_timeout_uninterruptible(1);
440 goto retry_ipi;
441 }
442 /* CPU really is offline, so we can ignore it. */
443 if (!(rnp->expmask & mask))
444 mask_ofl_ipi &= ~mask;
445 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 437 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
438 trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("selectofl"));
439 schedule_timeout_uninterruptible(1);
440 goto retry_ipi;
441 }
442 /* CPU really is offline, so we can ignore it. */
443 if (!(rnp->expmask & mask))
444 mask_ofl_ipi &= ~mask;
445 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
446 }
447 /* Report quiescent states for those that went offline. */
448 mask_ofl_test |= mask_ofl_ipi;
449 if (mask_ofl_test)
450 rcu_report_exp_cpu_mult(rsp, rnp, mask_ofl_test, false);
451}
452
453/*
454 * Select the nodes that the upcoming expedited grace period needs
455 * to wait for.
456 */
457static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
458 smp_call_func_t func)
459{
460 struct rcu_node *rnp;
461
462 trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("reset"));
463 sync_exp_reset_tree(rsp);
464 trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("select"));
465
466 /* Schedule work for each leaf rcu_node structure. */
467 rcu_for_each_leaf_node(rsp, rnp) {
468 rnp->exp_need_flush = false;
469 if (!READ_ONCE(rnp->expmask))
470 continue; /* Avoid early boot non-existent wq. */
471 rnp->rew.rew_func = func;
472 rnp->rew.rew_rsp = rsp;
473 if (!READ_ONCE(rcu_par_gp_wq) ||
474 rcu_scheduler_active != RCU_SCHEDULER_RUNNING) {
475 /* No workqueues yet. */
476 sync_rcu_exp_select_node_cpus(&rnp->rew.rew_work);
477 continue;
446 } 478 }
447 /* Report quiescent states for those that went offline. */ 479 INIT_WORK(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus);
448 mask_ofl_test |= mask_ofl_ipi; 480 queue_work_on(rnp->grplo, rcu_par_gp_wq, &rnp->rew.rew_work);
449 if (mask_ofl_test) 481 rnp->exp_need_flush = true;
450 rcu_report_exp_cpu_mult(rsp, rnp, mask_ofl_test, false);
451 } 482 }
483
484 /* Wait for workqueue jobs (if any) to complete. */
485 rcu_for_each_leaf_node(rsp, rnp)
486 if (rnp->exp_need_flush)
487 flush_work(&rnp->rew.rew_work);
452} 488}
453 489
454static void synchronize_sched_expedited_wait(struct rcu_state *rsp) 490static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
@@ -560,14 +596,6 @@ static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s)
560 mutex_unlock(&rsp->exp_wake_mutex); 596 mutex_unlock(&rsp->exp_wake_mutex);
561} 597}
562 598
563/* Let the workqueue handler know what it is supposed to do. */
564struct rcu_exp_work {
565 smp_call_func_t rew_func;
566 struct rcu_state *rew_rsp;
567 unsigned long rew_s;
568 struct work_struct rew_work;
569};
570
571/* 599/*
572 * Common code to drive an expedited grace period forward, used by 600 * Common code to drive an expedited grace period forward, used by
573 * workqueues and mid-boot-time tasks. 601 * workqueues and mid-boot-time tasks.