diff options
| -rw-r--r-- | kernel/rcu/rcu.h | 1 | ||||
| -rw-r--r-- | kernel/rcu/tree.c | 3 | ||||
| -rw-r--r-- | kernel/rcu/tree.h | 10 | ||||
| -rw-r--r-- | kernel/rcu/tree_exp.h | 184 |
4 files changed, 120 insertions, 78 deletions
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 7a693e31184a..976019d6fa06 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h | |||
| @@ -486,6 +486,7 @@ void rcu_force_quiescent_state(void); | |||
| 486 | void rcu_bh_force_quiescent_state(void); | 486 | void rcu_bh_force_quiescent_state(void); |
| 487 | void rcu_sched_force_quiescent_state(void); | 487 | void rcu_sched_force_quiescent_state(void); |
| 488 | extern struct workqueue_struct *rcu_gp_wq; | 488 | extern struct workqueue_struct *rcu_gp_wq; |
| 489 | extern struct workqueue_struct *rcu_par_gp_wq; | ||
| 489 | #endif /* #else #ifdef CONFIG_TINY_RCU */ | 490 | #endif /* #else #ifdef CONFIG_TINY_RCU */ |
| 490 | 491 | ||
| 491 | #ifdef CONFIG_RCU_NOCB_CPU | 492 | #ifdef CONFIG_RCU_NOCB_CPU |
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 2a734692a581..23781fc90830 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c | |||
| @@ -4168,6 +4168,7 @@ static void __init rcu_dump_rcu_node_tree(struct rcu_state *rsp) | |||
| 4168 | } | 4168 | } |
| 4169 | 4169 | ||
| 4170 | struct workqueue_struct *rcu_gp_wq; | 4170 | struct workqueue_struct *rcu_gp_wq; |
| 4171 | struct workqueue_struct *rcu_par_gp_wq; | ||
| 4171 | 4172 | ||
| 4172 | void __init rcu_init(void) | 4173 | void __init rcu_init(void) |
| 4173 | { | 4174 | { |
| @@ -4199,6 +4200,8 @@ void __init rcu_init(void) | |||
| 4199 | /* Create workqueue for expedited GPs and for Tree SRCU. */ | 4200 | /* Create workqueue for expedited GPs and for Tree SRCU. */ |
| 4200 | rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM, 0); | 4201 | rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM, 0); |
| 4201 | WARN_ON(!rcu_gp_wq); | 4202 | WARN_ON(!rcu_gp_wq); |
| 4203 | rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0); | ||
| 4204 | WARN_ON(!rcu_par_gp_wq); | ||
| 4202 | } | 4205 | } |
| 4203 | 4206 | ||
| 4204 | #include "tree_exp.h" | 4207 | #include "tree_exp.h" |
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index f491ab4f2e8e..98d33902b65c 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h | |||
| @@ -58,6 +58,14 @@ struct rcu_dynticks { | |||
| 58 | #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ | 58 | #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ |
| 59 | }; | 59 | }; |
| 60 | 60 | ||
| 61 | /* Communicate arguments to a workqueue handler. */ | ||
| 62 | struct rcu_exp_work { | ||
| 63 | smp_call_func_t rew_func; | ||
| 64 | struct rcu_state *rew_rsp; | ||
| 65 | unsigned long rew_s; | ||
| 66 | struct work_struct rew_work; | ||
| 67 | }; | ||
| 68 | |||
| 61 | /* RCU's kthread states for tracing. */ | 69 | /* RCU's kthread states for tracing. */ |
| 62 | #define RCU_KTHREAD_STOPPED 0 | 70 | #define RCU_KTHREAD_STOPPED 0 |
| 63 | #define RCU_KTHREAD_RUNNING 1 | 71 | #define RCU_KTHREAD_RUNNING 1 |
| @@ -157,6 +165,8 @@ struct rcu_node { | |||
| 157 | spinlock_t exp_lock ____cacheline_internodealigned_in_smp; | 165 | spinlock_t exp_lock ____cacheline_internodealigned_in_smp; |
| 158 | unsigned long exp_seq_rq; | 166 | unsigned long exp_seq_rq; |
| 159 | wait_queue_head_t exp_wq[4]; | 167 | wait_queue_head_t exp_wq[4]; |
| 168 | struct rcu_exp_work rew; | ||
| 169 | bool exp_need_flush; /* Need to flush workitem? */ | ||
| 160 | } ____cacheline_internodealigned_in_smp; | 170 | } ____cacheline_internodealigned_in_smp; |
| 161 | 171 | ||
| 162 | /* | 172 | /* |
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index f72eefab8543..73e1d3dca5b1 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h | |||
| @@ -362,93 +362,129 @@ static void sync_sched_exp_online_cleanup(int cpu) | |||
| 362 | } | 362 | } |
| 363 | 363 | ||
| 364 | /* | 364 | /* |
| 365 | * Select the nodes that the upcoming expedited grace period needs | 365 | * Select the CPUs within the specified rcu_node that the upcoming |
| 366 | * to wait for. | 366 | * expedited grace period needs to wait for. |
| 367 | */ | 367 | */ |
| 368 | static void sync_rcu_exp_select_cpus(struct rcu_state *rsp, | 368 | static void sync_rcu_exp_select_node_cpus(struct work_struct *wp) |
| 369 | smp_call_func_t func) | ||
| 370 | { | 369 | { |
| 371 | int cpu; | 370 | int cpu; |
| 372 | unsigned long flags; | 371 | unsigned long flags; |
| 372 | smp_call_func_t func; | ||
| 373 | unsigned long mask_ofl_test; | 373 | unsigned long mask_ofl_test; |
| 374 | unsigned long mask_ofl_ipi; | 374 | unsigned long mask_ofl_ipi; |
| 375 | int ret; | 375 | int ret; |
| 376 | struct rcu_node *rnp; | 376 | struct rcu_exp_work *rewp = |
| 377 | container_of(wp, struct rcu_exp_work, rew_work); | ||
| 378 | struct rcu_node *rnp = container_of(rewp, struct rcu_node, rew); | ||
| 379 | struct rcu_state *rsp = rewp->rew_rsp; | ||
| 377 | 380 | ||
| 378 | trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("reset")); | 381 | func = rewp->rew_func; |
| 379 | sync_exp_reset_tree(rsp); | 382 | raw_spin_lock_irqsave_rcu_node(rnp, flags); |
| 380 | trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("select")); | ||
| 381 | rcu_for_each_leaf_node(rsp, rnp) { | ||
| 382 | raw_spin_lock_irqsave_rcu_node(rnp, flags); | ||
| 383 | |||
| 384 | /* Each pass checks a CPU for identity, offline, and idle. */ | ||
| 385 | mask_ofl_test = 0; | ||
| 386 | for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) { | ||
| 387 | unsigned long mask = leaf_node_cpu_bit(rnp, cpu); | ||
| 388 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | ||
| 389 | struct rcu_dynticks *rdtp = per_cpu_ptr(&rcu_dynticks, cpu); | ||
| 390 | int snap; | ||
| 391 | 383 | ||
| 392 | if (raw_smp_processor_id() == cpu || | 384 | /* Each pass checks a CPU for identity, offline, and idle. */ |
| 393 | !(rnp->qsmaskinitnext & mask)) { | 385 | mask_ofl_test = 0; |
| 386 | for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) { | ||
| 387 | unsigned long mask = leaf_node_cpu_bit(rnp, cpu); | ||
| 388 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | ||
| 389 | struct rcu_dynticks *rdtp = per_cpu_ptr(&rcu_dynticks, cpu); | ||
| 390 | int snap; | ||
| 391 | |||
| 392 | if (raw_smp_processor_id() == cpu || | ||
| 393 | !(rnp->qsmaskinitnext & mask)) { | ||
| 394 | mask_ofl_test |= mask; | ||
| 395 | } else { | ||
| 396 | snap = rcu_dynticks_snap(rdtp); | ||
| 397 | if (rcu_dynticks_in_eqs(snap)) | ||
| 394 | mask_ofl_test |= mask; | 398 | mask_ofl_test |= mask; |
| 395 | } else { | 399 | else |
| 396 | snap = rcu_dynticks_snap(rdtp); | 400 | rdp->exp_dynticks_snap = snap; |
| 397 | if (rcu_dynticks_in_eqs(snap)) | ||
| 398 | mask_ofl_test |= mask; | ||
| 399 | else | ||
| 400 | rdp->exp_dynticks_snap = snap; | ||
| 401 | } | ||
| 402 | } | 401 | } |
| 403 | mask_ofl_ipi = rnp->expmask & ~mask_ofl_test; | 402 | } |
| 404 | 403 | mask_ofl_ipi = rnp->expmask & ~mask_ofl_test; | |
| 405 | /* | ||
| 406 | * Need to wait for any blocked tasks as well. Note that | ||
| 407 | * additional blocking tasks will also block the expedited | ||
| 408 | * GP until such time as the ->expmask bits are cleared. | ||
| 409 | */ | ||
| 410 | if (rcu_preempt_has_tasks(rnp)) | ||
| 411 | rnp->exp_tasks = rnp->blkd_tasks.next; | ||
| 412 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | ||
| 413 | 404 | ||
| 414 | /* IPI the remaining CPUs for expedited quiescent state. */ | 405 | /* |
| 415 | for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) { | 406 | * Need to wait for any blocked tasks as well. Note that |
| 416 | unsigned long mask = leaf_node_cpu_bit(rnp, cpu); | 407 | * additional blocking tasks will also block the expedited GP |
| 417 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | 408 | * until such time as the ->expmask bits are cleared. |
| 409 | */ | ||
| 410 | if (rcu_preempt_has_tasks(rnp)) | ||
| 411 | rnp->exp_tasks = rnp->blkd_tasks.next; | ||
| 412 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | ||
| 413 | |||
| 414 | /* IPI the remaining CPUs for expedited quiescent state. */ | ||
| 415 | for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) { | ||
| 416 | unsigned long mask = leaf_node_cpu_bit(rnp, cpu); | ||
| 417 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | ||
| 418 | 418 | ||
| 419 | if (!(mask_ofl_ipi & mask)) | 419 | if (!(mask_ofl_ipi & mask)) |
| 420 | continue; | 420 | continue; |
| 421 | retry_ipi: | 421 | retry_ipi: |
| 422 | if (rcu_dynticks_in_eqs_since(rdp->dynticks, | 422 | if (rcu_dynticks_in_eqs_since(rdp->dynticks, |
| 423 | rdp->exp_dynticks_snap)) { | 423 | rdp->exp_dynticks_snap)) { |
| 424 | mask_ofl_test |= mask; | 424 | mask_ofl_test |= mask; |
| 425 | continue; | 425 | continue; |
| 426 | } | 426 | } |
| 427 | ret = smp_call_function_single(cpu, func, rsp, 0); | 427 | ret = smp_call_function_single(cpu, func, rsp, 0); |
| 428 | if (!ret) { | 428 | if (!ret) { |
| 429 | mask_ofl_ipi &= ~mask; | 429 | mask_ofl_ipi &= ~mask; |
| 430 | continue; | 430 | continue; |
| 431 | } | 431 | } |
| 432 | /* Failed, raced with CPU hotplug operation. */ | 432 | /* Failed, raced with CPU hotplug operation. */ |
| 433 | raw_spin_lock_irqsave_rcu_node(rnp, flags); | 433 | raw_spin_lock_irqsave_rcu_node(rnp, flags); |
| 434 | if ((rnp->qsmaskinitnext & mask) && | 434 | if ((rnp->qsmaskinitnext & mask) && |
| 435 | (rnp->expmask & mask)) { | 435 | (rnp->expmask & mask)) { |
| 436 | /* Online, so delay for a bit and try again. */ | 436 | /* Online, so delay for a bit and try again. */ |
| 437 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | ||
| 438 | trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("selectofl")); | ||
| 439 | schedule_timeout_uninterruptible(1); | ||
| 440 | goto retry_ipi; | ||
| 441 | } | ||
| 442 | /* CPU really is offline, so we can ignore it. */ | ||
| 443 | if (!(rnp->expmask & mask)) | ||
| 444 | mask_ofl_ipi &= ~mask; | ||
| 445 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | 437 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
| 438 | trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("selectofl")); | ||
| 439 | schedule_timeout_uninterruptible(1); | ||
| 440 | goto retry_ipi; | ||
| 441 | } | ||
| 442 | /* CPU really is offline, so we can ignore it. */ | ||
| 443 | if (!(rnp->expmask & mask)) | ||
| 444 | mask_ofl_ipi &= ~mask; | ||
| 445 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | ||
| 446 | } | ||
| 447 | /* Report quiescent states for those that went offline. */ | ||
| 448 | mask_ofl_test |= mask_ofl_ipi; | ||
| 449 | if (mask_ofl_test) | ||
| 450 | rcu_report_exp_cpu_mult(rsp, rnp, mask_ofl_test, false); | ||
| 451 | } | ||
| 452 | |||
| 453 | /* | ||
| 454 | * Select the nodes that the upcoming expedited grace period needs | ||
| 455 | * to wait for. | ||
| 456 | */ | ||
| 457 | static void sync_rcu_exp_select_cpus(struct rcu_state *rsp, | ||
| 458 | smp_call_func_t func) | ||
| 459 | { | ||
| 460 | struct rcu_node *rnp; | ||
| 461 | |||
| 462 | trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("reset")); | ||
| 463 | sync_exp_reset_tree(rsp); | ||
| 464 | trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("select")); | ||
| 465 | |||
| 466 | /* Schedule work for each leaf rcu_node structure. */ | ||
| 467 | rcu_for_each_leaf_node(rsp, rnp) { | ||
| 468 | rnp->exp_need_flush = false; | ||
| 469 | if (!READ_ONCE(rnp->expmask)) | ||
| 470 | continue; /* Avoid early boot non-existent wq. */ | ||
| 471 | rnp->rew.rew_func = func; | ||
| 472 | rnp->rew.rew_rsp = rsp; | ||
| 473 | if (!READ_ONCE(rcu_par_gp_wq) || | ||
| 474 | rcu_scheduler_active != RCU_SCHEDULER_RUNNING) { | ||
| 475 | /* No workqueues yet. */ | ||
| 476 | sync_rcu_exp_select_node_cpus(&rnp->rew.rew_work); | ||
| 477 | continue; | ||
| 446 | } | 478 | } |
| 447 | /* Report quiescent states for those that went offline. */ | 479 | INIT_WORK(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus); |
| 448 | mask_ofl_test |= mask_ofl_ipi; | 480 | queue_work_on(rnp->grplo, rcu_par_gp_wq, &rnp->rew.rew_work); |
| 449 | if (mask_ofl_test) | 481 | rnp->exp_need_flush = true; |
| 450 | rcu_report_exp_cpu_mult(rsp, rnp, mask_ofl_test, false); | ||
| 451 | } | 482 | } |
| 483 | |||
| 484 | /* Wait for workqueue jobs (if any) to complete. */ | ||
| 485 | rcu_for_each_leaf_node(rsp, rnp) | ||
| 486 | if (rnp->exp_need_flush) | ||
| 487 | flush_work(&rnp->rew.rew_work); | ||
| 452 | } | 488 | } |
| 453 | 489 | ||
| 454 | static void synchronize_sched_expedited_wait(struct rcu_state *rsp) | 490 | static void synchronize_sched_expedited_wait(struct rcu_state *rsp) |
| @@ -560,14 +596,6 @@ static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s) | |||
| 560 | mutex_unlock(&rsp->exp_wake_mutex); | 596 | mutex_unlock(&rsp->exp_wake_mutex); |
| 561 | } | 597 | } |
| 562 | 598 | ||
| 563 | /* Let the workqueue handler know what it is supposed to do. */ | ||
| 564 | struct rcu_exp_work { | ||
| 565 | smp_call_func_t rew_func; | ||
| 566 | struct rcu_state *rew_rsp; | ||
| 567 | unsigned long rew_s; | ||
| 568 | struct work_struct rew_work; | ||
| 569 | }; | ||
| 570 | |||
| 571 | /* | 599 | /* |
| 572 | * Common code to drive an expedited grace period forward, used by | 600 | * Common code to drive an expedited grace period forward, used by |
| 573 | * workqueues and mid-boot-time tasks. | 601 | * workqueues and mid-boot-time tasks. |
