diff options
author | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2016-04-15 19:35:29 -0400 |
---|---|---|
committer | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2016-06-14 19:01:41 -0400 |
commit | 3549c2bc2c4ea8ecfeb9d21cb81cb00c6002b011 (patch) | |
tree | 78d1e1e5af82cead573006d62a59cc2a0bfc1912 /kernel/rcu | |
parent | d3acab65f274800dd0901f0816f8bca9f2a8c8ec (diff) |
rcu: Move expedited code from tree.c to tree_exp.h
People have been having some difficulty finding their way around the
RCU code. This commit therefore pulls some of the expedited grace-period
code from tree.c to a new tree_exp.h file. This commit is strictly code
movement, with the exception of a forward declaration that was added
for the sync_sched_exp_online_cleanup() function.
A subsequent commit will move the remaining expedited grace-period code
from tree_plugin.h to tree_exp.h.
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Diffstat (limited to 'kernel/rcu')
-rw-r--r-- | kernel/rcu/tree.c | 545 | ||||
-rw-r--r-- | kernel/rcu/tree_exp.h | 564 |
2 files changed, 566 insertions, 543 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 4aefeafb9a95..c844b6142a86 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c | |||
@@ -159,6 +159,7 @@ static void invoke_rcu_core(void); | |||
159 | static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); | 159 | static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); |
160 | static void rcu_report_exp_rdp(struct rcu_state *rsp, | 160 | static void rcu_report_exp_rdp(struct rcu_state *rsp, |
161 | struct rcu_data *rdp, bool wake); | 161 | struct rcu_data *rdp, bool wake); |
162 | static void sync_sched_exp_online_cleanup(int cpu); | ||
162 | 163 | ||
163 | /* rcuc/rcub kthread realtime priority */ | 164 | /* rcuc/rcub kthread realtime priority */ |
164 | #ifdef CONFIG_RCU_KTHREAD_PRIO | 165 | #ifdef CONFIG_RCU_KTHREAD_PRIO |
@@ -3447,549 +3448,6 @@ static bool rcu_seq_done(unsigned long *sp, unsigned long s) | |||
3447 | return ULONG_CMP_GE(READ_ONCE(*sp), s); | 3448 | return ULONG_CMP_GE(READ_ONCE(*sp), s); |
3448 | } | 3449 | } |
3449 | 3450 | ||
3450 | /* Wrapper functions for expedited grace periods. */ | ||
3451 | static void rcu_exp_gp_seq_start(struct rcu_state *rsp) | ||
3452 | { | ||
3453 | rcu_seq_start(&rsp->expedited_sequence); | ||
3454 | } | ||
3455 | static void rcu_exp_gp_seq_end(struct rcu_state *rsp) | ||
3456 | { | ||
3457 | rcu_seq_end(&rsp->expedited_sequence); | ||
3458 | smp_mb(); /* Ensure that consecutive grace periods serialize. */ | ||
3459 | } | ||
3460 | static unsigned long rcu_exp_gp_seq_snap(struct rcu_state *rsp) | ||
3461 | { | ||
3462 | unsigned long s; | ||
3463 | |||
3464 | smp_mb(); /* Caller's modifications seen first by other CPUs. */ | ||
3465 | s = rcu_seq_snap(&rsp->expedited_sequence); | ||
3466 | trace_rcu_exp_grace_period(rsp->name, s, TPS("snap")); | ||
3467 | return s; | ||
3468 | } | ||
3469 | static bool rcu_exp_gp_seq_done(struct rcu_state *rsp, unsigned long s) | ||
3470 | { | ||
3471 | return rcu_seq_done(&rsp->expedited_sequence, s); | ||
3472 | } | ||
3473 | |||
3474 | /* | ||
3475 | * Reset the ->expmaskinit values in the rcu_node tree to reflect any | ||
3476 | * recent CPU-online activity. Note that these masks are not cleared | ||
3477 | * when CPUs go offline, so they reflect the union of all CPUs that have | ||
3478 | * ever been online. This means that this function normally takes its | ||
3479 | * no-work-to-do fastpath. | ||
3480 | */ | ||
3481 | static void sync_exp_reset_tree_hotplug(struct rcu_state *rsp) | ||
3482 | { | ||
3483 | bool done; | ||
3484 | unsigned long flags; | ||
3485 | unsigned long mask; | ||
3486 | unsigned long oldmask; | ||
3487 | int ncpus = READ_ONCE(rsp->ncpus); | ||
3488 | struct rcu_node *rnp; | ||
3489 | struct rcu_node *rnp_up; | ||
3490 | |||
3491 | /* If no new CPUs onlined since last time, nothing to do. */ | ||
3492 | if (likely(ncpus == rsp->ncpus_snap)) | ||
3493 | return; | ||
3494 | rsp->ncpus_snap = ncpus; | ||
3495 | |||
3496 | /* | ||
3497 | * Each pass through the following loop propagates newly onlined | ||
3498 | * CPUs for the current rcu_node structure up the rcu_node tree. | ||
3499 | */ | ||
3500 | rcu_for_each_leaf_node(rsp, rnp) { | ||
3501 | raw_spin_lock_irqsave_rcu_node(rnp, flags); | ||
3502 | if (rnp->expmaskinit == rnp->expmaskinitnext) { | ||
3503 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | ||
3504 | continue; /* No new CPUs, nothing to do. */ | ||
3505 | } | ||
3506 | |||
3507 | /* Update this node's mask, track old value for propagation. */ | ||
3508 | oldmask = rnp->expmaskinit; | ||
3509 | rnp->expmaskinit = rnp->expmaskinitnext; | ||
3510 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | ||
3511 | |||
3512 | /* If was already nonzero, nothing to propagate. */ | ||
3513 | if (oldmask) | ||
3514 | continue; | ||
3515 | |||
3516 | /* Propagate the new CPU up the tree. */ | ||
3517 | mask = rnp->grpmask; | ||
3518 | rnp_up = rnp->parent; | ||
3519 | done = false; | ||
3520 | while (rnp_up) { | ||
3521 | raw_spin_lock_irqsave_rcu_node(rnp_up, flags); | ||
3522 | if (rnp_up->expmaskinit) | ||
3523 | done = true; | ||
3524 | rnp_up->expmaskinit |= mask; | ||
3525 | raw_spin_unlock_irqrestore_rcu_node(rnp_up, flags); | ||
3526 | if (done) | ||
3527 | break; | ||
3528 | mask = rnp_up->grpmask; | ||
3529 | rnp_up = rnp_up->parent; | ||
3530 | } | ||
3531 | } | ||
3532 | } | ||
3533 | |||
3534 | /* | ||
3535 | * Reset the ->expmask values in the rcu_node tree in preparation for | ||
3536 | * a new expedited grace period. | ||
3537 | */ | ||
3538 | static void __maybe_unused sync_exp_reset_tree(struct rcu_state *rsp) | ||
3539 | { | ||
3540 | unsigned long flags; | ||
3541 | struct rcu_node *rnp; | ||
3542 | |||
3543 | sync_exp_reset_tree_hotplug(rsp); | ||
3544 | rcu_for_each_node_breadth_first(rsp, rnp) { | ||
3545 | raw_spin_lock_irqsave_rcu_node(rnp, flags); | ||
3546 | WARN_ON_ONCE(rnp->expmask); | ||
3547 | rnp->expmask = rnp->expmaskinit; | ||
3548 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | ||
3549 | } | ||
3550 | } | ||
3551 | |||
3552 | /* | ||
3553 | * Return non-zero if there is no RCU expedited grace period in progress | ||
3554 | * for the specified rcu_node structure, in other words, if all CPUs and | ||
3555 | * tasks covered by the specified rcu_node structure have done their bit | ||
3556 | * for the current expedited grace period. Works only for preemptible | ||
3557 | * RCU -- other RCU implementation use other means. | ||
3558 | * | ||
3559 | * Caller must hold the rcu_state's exp_mutex. | ||
3560 | */ | ||
3561 | static int sync_rcu_preempt_exp_done(struct rcu_node *rnp) | ||
3562 | { | ||
3563 | return rnp->exp_tasks == NULL && | ||
3564 | READ_ONCE(rnp->expmask) == 0; | ||
3565 | } | ||
3566 | |||
3567 | /* | ||
3568 | * Report the exit from RCU read-side critical section for the last task | ||
3569 | * that queued itself during or before the current expedited preemptible-RCU | ||
3570 | * grace period. This event is reported either to the rcu_node structure on | ||
3571 | * which the task was queued or to one of that rcu_node structure's ancestors, | ||
3572 | * recursively up the tree. (Calm down, calm down, we do the recursion | ||
3573 | * iteratively!) | ||
3574 | * | ||
3575 | * Caller must hold the rcu_state's exp_mutex and the specified rcu_node | ||
3576 | * structure's ->lock. | ||
3577 | */ | ||
3578 | static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, | ||
3579 | bool wake, unsigned long flags) | ||
3580 | __releases(rnp->lock) | ||
3581 | { | ||
3582 | unsigned long mask; | ||
3583 | |||
3584 | for (;;) { | ||
3585 | if (!sync_rcu_preempt_exp_done(rnp)) { | ||
3586 | if (!rnp->expmask) | ||
3587 | rcu_initiate_boost(rnp, flags); | ||
3588 | else | ||
3589 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | ||
3590 | break; | ||
3591 | } | ||
3592 | if (rnp->parent == NULL) { | ||
3593 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | ||
3594 | if (wake) { | ||
3595 | smp_mb(); /* EGP done before wake_up(). */ | ||
3596 | swake_up(&rsp->expedited_wq); | ||
3597 | } | ||
3598 | break; | ||
3599 | } | ||
3600 | mask = rnp->grpmask; | ||
3601 | raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled */ | ||
3602 | rnp = rnp->parent; | ||
3603 | raw_spin_lock_rcu_node(rnp); /* irqs already disabled */ | ||
3604 | WARN_ON_ONCE(!(rnp->expmask & mask)); | ||
3605 | rnp->expmask &= ~mask; | ||
3606 | } | ||
3607 | } | ||
3608 | |||
3609 | /* | ||
3610 | * Report expedited quiescent state for specified node. This is a | ||
3611 | * lock-acquisition wrapper function for __rcu_report_exp_rnp(). | ||
3612 | * | ||
3613 | * Caller must hold the rcu_state's exp_mutex. | ||
3614 | */ | ||
3615 | static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp, | ||
3616 | struct rcu_node *rnp, bool wake) | ||
3617 | { | ||
3618 | unsigned long flags; | ||
3619 | |||
3620 | raw_spin_lock_irqsave_rcu_node(rnp, flags); | ||
3621 | __rcu_report_exp_rnp(rsp, rnp, wake, flags); | ||
3622 | } | ||
3623 | |||
3624 | /* | ||
3625 | * Report expedited quiescent state for multiple CPUs, all covered by the | ||
3626 | * specified leaf rcu_node structure. Caller must hold the rcu_state's | ||
3627 | * exp_mutex. | ||
3628 | */ | ||
3629 | static void rcu_report_exp_cpu_mult(struct rcu_state *rsp, struct rcu_node *rnp, | ||
3630 | unsigned long mask, bool wake) | ||
3631 | { | ||
3632 | unsigned long flags; | ||
3633 | |||
3634 | raw_spin_lock_irqsave_rcu_node(rnp, flags); | ||
3635 | if (!(rnp->expmask & mask)) { | ||
3636 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | ||
3637 | return; | ||
3638 | } | ||
3639 | rnp->expmask &= ~mask; | ||
3640 | __rcu_report_exp_rnp(rsp, rnp, wake, flags); /* Releases rnp->lock. */ | ||
3641 | } | ||
3642 | |||
3643 | /* | ||
3644 | * Report expedited quiescent state for specified rcu_data (CPU). | ||
3645 | */ | ||
3646 | static void rcu_report_exp_rdp(struct rcu_state *rsp, struct rcu_data *rdp, | ||
3647 | bool wake) | ||
3648 | { | ||
3649 | rcu_report_exp_cpu_mult(rsp, rdp->mynode, rdp->grpmask, wake); | ||
3650 | } | ||
3651 | |||
3652 | /* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */ | ||
3653 | static bool sync_exp_work_done(struct rcu_state *rsp, atomic_long_t *stat, | ||
3654 | unsigned long s) | ||
3655 | { | ||
3656 | if (rcu_exp_gp_seq_done(rsp, s)) { | ||
3657 | trace_rcu_exp_grace_period(rsp->name, s, TPS("done")); | ||
3658 | /* Ensure test happens before caller kfree(). */ | ||
3659 | smp_mb__before_atomic(); /* ^^^ */ | ||
3660 | atomic_long_inc(stat); | ||
3661 | return true; | ||
3662 | } | ||
3663 | return false; | ||
3664 | } | ||
3665 | |||
3666 | /* | ||
3667 | * Funnel-lock acquisition for expedited grace periods. Returns true | ||
3668 | * if some other task completed an expedited grace period that this task | ||
3669 | * can piggy-back on, and with no mutex held. Otherwise, returns false | ||
3670 | * with the mutex held, indicating that the caller must actually do the | ||
3671 | * expedited grace period. | ||
3672 | */ | ||
3673 | static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s) | ||
3674 | { | ||
3675 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id()); | ||
3676 | struct rcu_node *rnp = rdp->mynode; | ||
3677 | struct rcu_node *rnp_root = rcu_get_root(rsp); | ||
3678 | |||
3679 | /* Low-contention fastpath. */ | ||
3680 | if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s) && | ||
3681 | (rnp == rnp_root || | ||
3682 | ULONG_CMP_LT(READ_ONCE(rnp_root->exp_seq_rq), s)) && | ||
3683 | !mutex_is_locked(&rsp->exp_mutex) && | ||
3684 | mutex_trylock(&rsp->exp_mutex)) | ||
3685 | goto fastpath; | ||
3686 | |||
3687 | /* | ||
3688 | * Each pass through the following loop works its way up | ||
3689 | * the rcu_node tree, returning if others have done the work or | ||
3690 | * otherwise falls through to acquire rsp->exp_mutex. The mapping | ||
3691 | * from CPU to rcu_node structure can be inexact, as it is just | ||
3692 | * promoting locality and is not strictly needed for correctness. | ||
3693 | */ | ||
3694 | for (; rnp != NULL; rnp = rnp->parent) { | ||
3695 | if (sync_exp_work_done(rsp, &rdp->exp_workdone1, s)) | ||
3696 | return true; | ||
3697 | |||
3698 | /* Work not done, either wait here or go up. */ | ||
3699 | spin_lock(&rnp->exp_lock); | ||
3700 | if (ULONG_CMP_GE(rnp->exp_seq_rq, s)) { | ||
3701 | |||
3702 | /* Someone else doing GP, so wait for them. */ | ||
3703 | spin_unlock(&rnp->exp_lock); | ||
3704 | trace_rcu_exp_funnel_lock(rsp->name, rnp->level, | ||
3705 | rnp->grplo, rnp->grphi, | ||
3706 | TPS("wait")); | ||
3707 | wait_event(rnp->exp_wq[(s >> 1) & 0x3], | ||
3708 | sync_exp_work_done(rsp, | ||
3709 | &rdp->exp_workdone2, s)); | ||
3710 | return true; | ||
3711 | } | ||
3712 | rnp->exp_seq_rq = s; /* Followers can wait on us. */ | ||
3713 | spin_unlock(&rnp->exp_lock); | ||
3714 | trace_rcu_exp_funnel_lock(rsp->name, rnp->level, rnp->grplo, | ||
3715 | rnp->grphi, TPS("nxtlvl")); | ||
3716 | } | ||
3717 | mutex_lock(&rsp->exp_mutex); | ||
3718 | fastpath: | ||
3719 | if (sync_exp_work_done(rsp, &rdp->exp_workdone3, s)) { | ||
3720 | mutex_unlock(&rsp->exp_mutex); | ||
3721 | return true; | ||
3722 | } | ||
3723 | rcu_exp_gp_seq_start(rsp); | ||
3724 | trace_rcu_exp_grace_period(rsp->name, s, TPS("start")); | ||
3725 | return false; | ||
3726 | } | ||
3727 | |||
3728 | /* Invoked on each online non-idle CPU for expedited quiescent state. */ | ||
3729 | static void sync_sched_exp_handler(void *data) | ||
3730 | { | ||
3731 | struct rcu_data *rdp; | ||
3732 | struct rcu_node *rnp; | ||
3733 | struct rcu_state *rsp = data; | ||
3734 | |||
3735 | rdp = this_cpu_ptr(rsp->rda); | ||
3736 | rnp = rdp->mynode; | ||
3737 | if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) || | ||
3738 | __this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp)) | ||
3739 | return; | ||
3740 | if (rcu_is_cpu_rrupt_from_idle()) { | ||
3741 | rcu_report_exp_rdp(&rcu_sched_state, | ||
3742 | this_cpu_ptr(&rcu_sched_data), true); | ||
3743 | return; | ||
3744 | } | ||
3745 | __this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, true); | ||
3746 | resched_cpu(smp_processor_id()); | ||
3747 | } | ||
3748 | |||
3749 | /* Send IPI for expedited cleanup if needed at end of CPU-hotplug operation. */ | ||
3750 | static void sync_sched_exp_online_cleanup(int cpu) | ||
3751 | { | ||
3752 | struct rcu_data *rdp; | ||
3753 | int ret; | ||
3754 | struct rcu_node *rnp; | ||
3755 | struct rcu_state *rsp = &rcu_sched_state; | ||
3756 | |||
3757 | rdp = per_cpu_ptr(rsp->rda, cpu); | ||
3758 | rnp = rdp->mynode; | ||
3759 | if (!(READ_ONCE(rnp->expmask) & rdp->grpmask)) | ||
3760 | return; | ||
3761 | ret = smp_call_function_single(cpu, sync_sched_exp_handler, rsp, 0); | ||
3762 | WARN_ON_ONCE(ret); | ||
3763 | } | ||
3764 | |||
3765 | /* | ||
3766 | * Select the nodes that the upcoming expedited grace period needs | ||
3767 | * to wait for. | ||
3768 | */ | ||
3769 | static void sync_rcu_exp_select_cpus(struct rcu_state *rsp, | ||
3770 | smp_call_func_t func) | ||
3771 | { | ||
3772 | int cpu; | ||
3773 | unsigned long flags; | ||
3774 | unsigned long mask; | ||
3775 | unsigned long mask_ofl_test; | ||
3776 | unsigned long mask_ofl_ipi; | ||
3777 | int ret; | ||
3778 | struct rcu_node *rnp; | ||
3779 | |||
3780 | sync_exp_reset_tree(rsp); | ||
3781 | rcu_for_each_leaf_node(rsp, rnp) { | ||
3782 | raw_spin_lock_irqsave_rcu_node(rnp, flags); | ||
3783 | |||
3784 | /* Each pass checks a CPU for identity, offline, and idle. */ | ||
3785 | mask_ofl_test = 0; | ||
3786 | for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) { | ||
3787 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | ||
3788 | struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); | ||
3789 | |||
3790 | if (raw_smp_processor_id() == cpu || | ||
3791 | !(atomic_add_return(0, &rdtp->dynticks) & 0x1)) | ||
3792 | mask_ofl_test |= rdp->grpmask; | ||
3793 | } | ||
3794 | mask_ofl_ipi = rnp->expmask & ~mask_ofl_test; | ||
3795 | |||
3796 | /* | ||
3797 | * Need to wait for any blocked tasks as well. Note that | ||
3798 | * additional blocking tasks will also block the expedited | ||
3799 | * GP until such time as the ->expmask bits are cleared. | ||
3800 | */ | ||
3801 | if (rcu_preempt_has_tasks(rnp)) | ||
3802 | rnp->exp_tasks = rnp->blkd_tasks.next; | ||
3803 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | ||
3804 | |||
3805 | /* IPI the remaining CPUs for expedited quiescent state. */ | ||
3806 | mask = 1; | ||
3807 | for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask <<= 1) { | ||
3808 | if (!(mask_ofl_ipi & mask)) | ||
3809 | continue; | ||
3810 | retry_ipi: | ||
3811 | ret = smp_call_function_single(cpu, func, rsp, 0); | ||
3812 | if (!ret) { | ||
3813 | mask_ofl_ipi &= ~mask; | ||
3814 | continue; | ||
3815 | } | ||
3816 | /* Failed, raced with offline. */ | ||
3817 | raw_spin_lock_irqsave_rcu_node(rnp, flags); | ||
3818 | if (cpu_online(cpu) && | ||
3819 | (rnp->expmask & mask)) { | ||
3820 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | ||
3821 | schedule_timeout_uninterruptible(1); | ||
3822 | if (cpu_online(cpu) && | ||
3823 | (rnp->expmask & mask)) | ||
3824 | goto retry_ipi; | ||
3825 | raw_spin_lock_irqsave_rcu_node(rnp, flags); | ||
3826 | } | ||
3827 | if (!(rnp->expmask & mask)) | ||
3828 | mask_ofl_ipi &= ~mask; | ||
3829 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | ||
3830 | } | ||
3831 | /* Report quiescent states for those that went offline. */ | ||
3832 | mask_ofl_test |= mask_ofl_ipi; | ||
3833 | if (mask_ofl_test) | ||
3834 | rcu_report_exp_cpu_mult(rsp, rnp, mask_ofl_test, false); | ||
3835 | } | ||
3836 | } | ||
3837 | |||
3838 | static void synchronize_sched_expedited_wait(struct rcu_state *rsp) | ||
3839 | { | ||
3840 | int cpu; | ||
3841 | unsigned long jiffies_stall; | ||
3842 | unsigned long jiffies_start; | ||
3843 | unsigned long mask; | ||
3844 | int ndetected; | ||
3845 | struct rcu_node *rnp; | ||
3846 | struct rcu_node *rnp_root = rcu_get_root(rsp); | ||
3847 | int ret; | ||
3848 | |||
3849 | jiffies_stall = rcu_jiffies_till_stall_check(); | ||
3850 | jiffies_start = jiffies; | ||
3851 | |||
3852 | for (;;) { | ||
3853 | ret = swait_event_timeout( | ||
3854 | rsp->expedited_wq, | ||
3855 | sync_rcu_preempt_exp_done(rnp_root), | ||
3856 | jiffies_stall); | ||
3857 | if (ret > 0 || sync_rcu_preempt_exp_done(rnp_root)) | ||
3858 | return; | ||
3859 | if (ret < 0) { | ||
3860 | /* Hit a signal, disable CPU stall warnings. */ | ||
3861 | swait_event(rsp->expedited_wq, | ||
3862 | sync_rcu_preempt_exp_done(rnp_root)); | ||
3863 | return; | ||
3864 | } | ||
3865 | pr_err("INFO: %s detected expedited stalls on CPUs/tasks: {", | ||
3866 | rsp->name); | ||
3867 | ndetected = 0; | ||
3868 | rcu_for_each_leaf_node(rsp, rnp) { | ||
3869 | ndetected += rcu_print_task_exp_stall(rnp); | ||
3870 | mask = 1; | ||
3871 | for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask <<= 1) { | ||
3872 | struct rcu_data *rdp; | ||
3873 | |||
3874 | if (!(rnp->expmask & mask)) | ||
3875 | continue; | ||
3876 | ndetected++; | ||
3877 | rdp = per_cpu_ptr(rsp->rda, cpu); | ||
3878 | pr_cont(" %d-%c%c%c", cpu, | ||
3879 | "O."[!!cpu_online(cpu)], | ||
3880 | "o."[!!(rdp->grpmask & rnp->expmaskinit)], | ||
3881 | "N."[!!(rdp->grpmask & rnp->expmaskinitnext)]); | ||
3882 | } | ||
3883 | mask <<= 1; | ||
3884 | } | ||
3885 | pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n", | ||
3886 | jiffies - jiffies_start, rsp->expedited_sequence, | ||
3887 | rnp_root->expmask, ".T"[!!rnp_root->exp_tasks]); | ||
3888 | if (ndetected) { | ||
3889 | pr_err("blocking rcu_node structures:"); | ||
3890 | rcu_for_each_node_breadth_first(rsp, rnp) { | ||
3891 | if (rnp == rnp_root) | ||
3892 | continue; /* printed unconditionally */ | ||
3893 | if (sync_rcu_preempt_exp_done(rnp)) | ||
3894 | continue; | ||
3895 | pr_cont(" l=%u:%d-%d:%#lx/%c", | ||
3896 | rnp->level, rnp->grplo, rnp->grphi, | ||
3897 | rnp->expmask, | ||
3898 | ".T"[!!rnp->exp_tasks]); | ||
3899 | } | ||
3900 | pr_cont("\n"); | ||
3901 | } | ||
3902 | rcu_for_each_leaf_node(rsp, rnp) { | ||
3903 | mask = 1; | ||
3904 | for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask <<= 1) { | ||
3905 | if (!(rnp->expmask & mask)) | ||
3906 | continue; | ||
3907 | dump_cpu_task(cpu); | ||
3908 | } | ||
3909 | } | ||
3910 | jiffies_stall = 3 * rcu_jiffies_till_stall_check() + 3; | ||
3911 | } | ||
3912 | } | ||
3913 | |||
3914 | /* | ||
3915 | * Wait for the current expedited grace period to complete, and then | ||
3916 | * wake up everyone who piggybacked on the just-completed expedited | ||
3917 | * grace period. Also update all the ->exp_seq_rq counters as needed | ||
3918 | * in order to avoid counter-wrap problems. | ||
3919 | */ | ||
3920 | static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s) | ||
3921 | { | ||
3922 | struct rcu_node *rnp; | ||
3923 | |||
3924 | synchronize_sched_expedited_wait(rsp); | ||
3925 | rcu_exp_gp_seq_end(rsp); | ||
3926 | trace_rcu_exp_grace_period(rsp->name, s, TPS("end")); | ||
3927 | |||
3928 | /* | ||
3929 | * Switch over to wakeup mode, allowing the next GP, but -only- the | ||
3930 | * next GP, to proceed. | ||
3931 | */ | ||
3932 | mutex_lock(&rsp->exp_wake_mutex); | ||
3933 | mutex_unlock(&rsp->exp_mutex); | ||
3934 | |||
3935 | rcu_for_each_node_breadth_first(rsp, rnp) { | ||
3936 | if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s)) { | ||
3937 | spin_lock(&rnp->exp_lock); | ||
3938 | /* Recheck, avoid hang in case someone just arrived. */ | ||
3939 | if (ULONG_CMP_LT(rnp->exp_seq_rq, s)) | ||
3940 | rnp->exp_seq_rq = s; | ||
3941 | spin_unlock(&rnp->exp_lock); | ||
3942 | } | ||
3943 | wake_up_all(&rnp->exp_wq[(rsp->expedited_sequence >> 1) & 0x3]); | ||
3944 | } | ||
3945 | trace_rcu_exp_grace_period(rsp->name, s, TPS("endwake")); | ||
3946 | mutex_unlock(&rsp->exp_wake_mutex); | ||
3947 | } | ||
3948 | |||
3949 | /** | ||
3950 | * synchronize_sched_expedited - Brute-force RCU-sched grace period | ||
3951 | * | ||
3952 | * Wait for an RCU-sched grace period to elapse, but use a "big hammer" | ||
3953 | * approach to force the grace period to end quickly. This consumes | ||
3954 | * significant time on all CPUs and is unfriendly to real-time workloads, | ||
3955 | * so is thus not recommended for any sort of common-case code. In fact, | ||
3956 | * if you are using synchronize_sched_expedited() in a loop, please | ||
3957 | * restructure your code to batch your updates, and then use a single | ||
3958 | * synchronize_sched() instead. | ||
3959 | * | ||
3960 | * This implementation can be thought of as an application of sequence | ||
3961 | * locking to expedited grace periods, but using the sequence counter to | ||
3962 | * determine when someone else has already done the work instead of for | ||
3963 | * retrying readers. | ||
3964 | */ | ||
3965 | void synchronize_sched_expedited(void) | ||
3966 | { | ||
3967 | unsigned long s; | ||
3968 | struct rcu_state *rsp = &rcu_sched_state; | ||
3969 | |||
3970 | /* If only one CPU, this is automatically a grace period. */ | ||
3971 | if (rcu_blocking_is_gp()) | ||
3972 | return; | ||
3973 | |||
3974 | /* If expedited grace periods are prohibited, fall back to normal. */ | ||
3975 | if (rcu_gp_is_normal()) { | ||
3976 | wait_rcu_gp(call_rcu_sched); | ||
3977 | return; | ||
3978 | } | ||
3979 | |||
3980 | /* Take a snapshot of the sequence number. */ | ||
3981 | s = rcu_exp_gp_seq_snap(rsp); | ||
3982 | if (exp_funnel_lock(rsp, s)) | ||
3983 | return; /* Someone else did our work for us. */ | ||
3984 | |||
3985 | /* Initialize the rcu_node tree in preparation for the wait. */ | ||
3986 | sync_rcu_exp_select_cpus(rsp, sync_sched_exp_handler); | ||
3987 | |||
3988 | /* Wait and clean up, including waking everyone. */ | ||
3989 | rcu_exp_wait_wake(rsp, s); | ||
3990 | } | ||
3991 | EXPORT_SYMBOL_GPL(synchronize_sched_expedited); | ||
3992 | |||
3993 | /* | 3451 | /* |
3994 | * Check to see if there is any immediate RCU-related work to be done | 3452 | * Check to see if there is any immediate RCU-related work to be done |
3995 | * by the current CPU, for the specified type of RCU, returning 1 if so. | 3453 | * by the current CPU, for the specified type of RCU, returning 1 if so. |
@@ -4747,4 +4205,5 @@ void __init rcu_init(void) | |||
4747 | rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu); | 4205 | rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu); |
4748 | } | 4206 | } |
4749 | 4207 | ||
4208 | #include "tree_exp.h" | ||
4750 | #include "tree_plugin.h" | 4209 | #include "tree_plugin.h" |
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h new file mode 100644 index 000000000000..db0909cf7fe1 --- /dev/null +++ b/kernel/rcu/tree_exp.h | |||
@@ -0,0 +1,564 @@ | |||
1 | /* | ||
2 | * RCU expedited grace periods | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, you can access it online at | ||
16 | * http://www.gnu.org/licenses/gpl-2.0.html. | ||
17 | * | ||
18 | * Copyright IBM Corporation, 2016 | ||
19 | * | ||
20 | * Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com> | ||
21 | */ | ||
22 | |||
23 | /* Wrapper functions for expedited grace periods. */ | ||
24 | static void rcu_exp_gp_seq_start(struct rcu_state *rsp) | ||
25 | { | ||
26 | rcu_seq_start(&rsp->expedited_sequence); | ||
27 | } | ||
28 | static void rcu_exp_gp_seq_end(struct rcu_state *rsp) | ||
29 | { | ||
30 | rcu_seq_end(&rsp->expedited_sequence); | ||
31 | smp_mb(); /* Ensure that consecutive grace periods serialize. */ | ||
32 | } | ||
33 | static unsigned long rcu_exp_gp_seq_snap(struct rcu_state *rsp) | ||
34 | { | ||
35 | unsigned long s; | ||
36 | |||
37 | smp_mb(); /* Caller's modifications seen first by other CPUs. */ | ||
38 | s = rcu_seq_snap(&rsp->expedited_sequence); | ||
39 | trace_rcu_exp_grace_period(rsp->name, s, TPS("snap")); | ||
40 | return s; | ||
41 | } | ||
42 | static bool rcu_exp_gp_seq_done(struct rcu_state *rsp, unsigned long s) | ||
43 | { | ||
44 | return rcu_seq_done(&rsp->expedited_sequence, s); | ||
45 | } | ||
46 | |||
47 | /* | ||
48 | * Reset the ->expmaskinit values in the rcu_node tree to reflect any | ||
49 | * recent CPU-online activity. Note that these masks are not cleared | ||
50 | * when CPUs go offline, so they reflect the union of all CPUs that have | ||
51 | * ever been online. This means that this function normally takes its | ||
52 | * no-work-to-do fastpath. | ||
53 | */ | ||
54 | static void sync_exp_reset_tree_hotplug(struct rcu_state *rsp) | ||
55 | { | ||
56 | bool done; | ||
57 | unsigned long flags; | ||
58 | unsigned long mask; | ||
59 | unsigned long oldmask; | ||
60 | int ncpus = READ_ONCE(rsp->ncpus); | ||
61 | struct rcu_node *rnp; | ||
62 | struct rcu_node *rnp_up; | ||
63 | |||
64 | /* If no new CPUs onlined since last time, nothing to do. */ | ||
65 | if (likely(ncpus == rsp->ncpus_snap)) | ||
66 | return; | ||
67 | rsp->ncpus_snap = ncpus; | ||
68 | |||
69 | /* | ||
70 | * Each pass through the following loop propagates newly onlined | ||
71 | * CPUs for the current rcu_node structure up the rcu_node tree. | ||
72 | */ | ||
73 | rcu_for_each_leaf_node(rsp, rnp) { | ||
74 | raw_spin_lock_irqsave_rcu_node(rnp, flags); | ||
75 | if (rnp->expmaskinit == rnp->expmaskinitnext) { | ||
76 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | ||
77 | continue; /* No new CPUs, nothing to do. */ | ||
78 | } | ||
79 | |||
80 | /* Update this node's mask, track old value for propagation. */ | ||
81 | oldmask = rnp->expmaskinit; | ||
82 | rnp->expmaskinit = rnp->expmaskinitnext; | ||
83 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | ||
84 | |||
85 | /* If was already nonzero, nothing to propagate. */ | ||
86 | if (oldmask) | ||
87 | continue; | ||
88 | |||
89 | /* Propagate the new CPU up the tree. */ | ||
90 | mask = rnp->grpmask; | ||
91 | rnp_up = rnp->parent; | ||
92 | done = false; | ||
93 | while (rnp_up) { | ||
94 | raw_spin_lock_irqsave_rcu_node(rnp_up, flags); | ||
95 | if (rnp_up->expmaskinit) | ||
96 | done = true; | ||
97 | rnp_up->expmaskinit |= mask; | ||
98 | raw_spin_unlock_irqrestore_rcu_node(rnp_up, flags); | ||
99 | if (done) | ||
100 | break; | ||
101 | mask = rnp_up->grpmask; | ||
102 | rnp_up = rnp_up->parent; | ||
103 | } | ||
104 | } | ||
105 | } | ||
106 | |||
107 | /* | ||
108 | * Reset the ->expmask values in the rcu_node tree in preparation for | ||
109 | * a new expedited grace period. | ||
110 | */ | ||
111 | static void __maybe_unused sync_exp_reset_tree(struct rcu_state *rsp) | ||
112 | { | ||
113 | unsigned long flags; | ||
114 | struct rcu_node *rnp; | ||
115 | |||
116 | sync_exp_reset_tree_hotplug(rsp); | ||
117 | rcu_for_each_node_breadth_first(rsp, rnp) { | ||
118 | raw_spin_lock_irqsave_rcu_node(rnp, flags); | ||
119 | WARN_ON_ONCE(rnp->expmask); | ||
120 | rnp->expmask = rnp->expmaskinit; | ||
121 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | ||
122 | } | ||
123 | } | ||
124 | |||
125 | /* | ||
126 | * Return non-zero if there is no RCU expedited grace period in progress | ||
127 | * for the specified rcu_node structure, in other words, if all CPUs and | ||
128 | * tasks covered by the specified rcu_node structure have done their bit | ||
129 | * for the current expedited grace period. Works only for preemptible | ||
130 | * RCU -- other RCU implementation use other means. | ||
131 | * | ||
132 | * Caller must hold the rcu_state's exp_mutex. | ||
133 | */ | ||
134 | static int sync_rcu_preempt_exp_done(struct rcu_node *rnp) | ||
135 | { | ||
136 | return rnp->exp_tasks == NULL && | ||
137 | READ_ONCE(rnp->expmask) == 0; | ||
138 | } | ||
139 | |||
140 | /* | ||
141 | * Report the exit from RCU read-side critical section for the last task | ||
142 | * that queued itself during or before the current expedited preemptible-RCU | ||
143 | * grace period. This event is reported either to the rcu_node structure on | ||
144 | * which the task was queued or to one of that rcu_node structure's ancestors, | ||
145 | * recursively up the tree. (Calm down, calm down, we do the recursion | ||
146 | * iteratively!) | ||
147 | * | ||
148 | * Caller must hold the rcu_state's exp_mutex and the specified rcu_node | ||
149 | * structure's ->lock. | ||
150 | */ | ||
151 | static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, | ||
152 | bool wake, unsigned long flags) | ||
153 | __releases(rnp->lock) | ||
154 | { | ||
155 | unsigned long mask; | ||
156 | |||
157 | for (;;) { | ||
158 | if (!sync_rcu_preempt_exp_done(rnp)) { | ||
159 | if (!rnp->expmask) | ||
160 | rcu_initiate_boost(rnp, flags); | ||
161 | else | ||
162 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | ||
163 | break; | ||
164 | } | ||
165 | if (rnp->parent == NULL) { | ||
166 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | ||
167 | if (wake) { | ||
168 | smp_mb(); /* EGP done before wake_up(). */ | ||
169 | swake_up(&rsp->expedited_wq); | ||
170 | } | ||
171 | break; | ||
172 | } | ||
173 | mask = rnp->grpmask; | ||
174 | raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled */ | ||
175 | rnp = rnp->parent; | ||
176 | raw_spin_lock_rcu_node(rnp); /* irqs already disabled */ | ||
177 | WARN_ON_ONCE(!(rnp->expmask & mask)); | ||
178 | rnp->expmask &= ~mask; | ||
179 | } | ||
180 | } | ||
181 | |||
182 | /* | ||
183 | * Report expedited quiescent state for specified node. This is a | ||
184 | * lock-acquisition wrapper function for __rcu_report_exp_rnp(). | ||
185 | * | ||
186 | * Caller must hold the rcu_state's exp_mutex. | ||
187 | */ | ||
188 | static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp, | ||
189 | struct rcu_node *rnp, bool wake) | ||
190 | { | ||
191 | unsigned long flags; | ||
192 | |||
193 | raw_spin_lock_irqsave_rcu_node(rnp, flags); | ||
194 | __rcu_report_exp_rnp(rsp, rnp, wake, flags); | ||
195 | } | ||
196 | |||
197 | /* | ||
198 | * Report expedited quiescent state for multiple CPUs, all covered by the | ||
199 | * specified leaf rcu_node structure. Caller must hold the rcu_state's | ||
200 | * exp_mutex. | ||
201 | */ | ||
202 | static void rcu_report_exp_cpu_mult(struct rcu_state *rsp, struct rcu_node *rnp, | ||
203 | unsigned long mask, bool wake) | ||
204 | { | ||
205 | unsigned long flags; | ||
206 | |||
207 | raw_spin_lock_irqsave_rcu_node(rnp, flags); | ||
208 | if (!(rnp->expmask & mask)) { | ||
209 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | ||
210 | return; | ||
211 | } | ||
212 | rnp->expmask &= ~mask; | ||
213 | __rcu_report_exp_rnp(rsp, rnp, wake, flags); /* Releases rnp->lock. */ | ||
214 | } | ||
215 | |||
216 | /* | ||
217 | * Report expedited quiescent state for specified rcu_data (CPU). | ||
218 | */ | ||
219 | static void rcu_report_exp_rdp(struct rcu_state *rsp, struct rcu_data *rdp, | ||
220 | bool wake) | ||
221 | { | ||
222 | rcu_report_exp_cpu_mult(rsp, rdp->mynode, rdp->grpmask, wake); | ||
223 | } | ||
224 | |||
225 | /* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */ | ||
226 | static bool sync_exp_work_done(struct rcu_state *rsp, atomic_long_t *stat, | ||
227 | unsigned long s) | ||
228 | { | ||
229 | if (rcu_exp_gp_seq_done(rsp, s)) { | ||
230 | trace_rcu_exp_grace_period(rsp->name, s, TPS("done")); | ||
231 | /* Ensure test happens before caller kfree(). */ | ||
232 | smp_mb__before_atomic(); /* ^^^ */ | ||
233 | atomic_long_inc(stat); | ||
234 | return true; | ||
235 | } | ||
236 | return false; | ||
237 | } | ||
238 | |||
239 | /* | ||
240 | * Funnel-lock acquisition for expedited grace periods. Returns true | ||
241 | * if some other task completed an expedited grace period that this task | ||
242 | * can piggy-back on, and with no mutex held. Otherwise, returns false | ||
243 | * with the mutex held, indicating that the caller must actually do the | ||
244 | * expedited grace period. | ||
245 | */ | ||
246 | static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s) | ||
247 | { | ||
248 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id()); | ||
249 | struct rcu_node *rnp = rdp->mynode; | ||
250 | struct rcu_node *rnp_root = rcu_get_root(rsp); | ||
251 | |||
252 | /* Low-contention fastpath. */ | ||
253 | if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s) && | ||
254 | (rnp == rnp_root || | ||
255 | ULONG_CMP_LT(READ_ONCE(rnp_root->exp_seq_rq), s)) && | ||
256 | !mutex_is_locked(&rsp->exp_mutex) && | ||
257 | mutex_trylock(&rsp->exp_mutex)) | ||
258 | goto fastpath; | ||
259 | |||
260 | /* | ||
261 | * Each pass through the following loop works its way up | ||
262 | * the rcu_node tree, returning if others have done the work or | ||
263 | * otherwise falls through to acquire rsp->exp_mutex. The mapping | ||
264 | * from CPU to rcu_node structure can be inexact, as it is just | ||
265 | * promoting locality and is not strictly needed for correctness. | ||
266 | */ | ||
267 | for (; rnp != NULL; rnp = rnp->parent) { | ||
268 | if (sync_exp_work_done(rsp, &rdp->exp_workdone1, s)) | ||
269 | return true; | ||
270 | |||
271 | /* Work not done, either wait here or go up. */ | ||
272 | spin_lock(&rnp->exp_lock); | ||
273 | if (ULONG_CMP_GE(rnp->exp_seq_rq, s)) { | ||
274 | |||
275 | /* Someone else doing GP, so wait for them. */ | ||
276 | spin_unlock(&rnp->exp_lock); | ||
277 | trace_rcu_exp_funnel_lock(rsp->name, rnp->level, | ||
278 | rnp->grplo, rnp->grphi, | ||
279 | TPS("wait")); | ||
280 | wait_event(rnp->exp_wq[(s >> 1) & 0x3], | ||
281 | sync_exp_work_done(rsp, | ||
282 | &rdp->exp_workdone2, s)); | ||
283 | return true; | ||
284 | } | ||
285 | rnp->exp_seq_rq = s; /* Followers can wait on us. */ | ||
286 | spin_unlock(&rnp->exp_lock); | ||
287 | trace_rcu_exp_funnel_lock(rsp->name, rnp->level, rnp->grplo, | ||
288 | rnp->grphi, TPS("nxtlvl")); | ||
289 | } | ||
290 | mutex_lock(&rsp->exp_mutex); | ||
291 | fastpath: | ||
292 | if (sync_exp_work_done(rsp, &rdp->exp_workdone3, s)) { | ||
293 | mutex_unlock(&rsp->exp_mutex); | ||
294 | return true; | ||
295 | } | ||
296 | rcu_exp_gp_seq_start(rsp); | ||
297 | trace_rcu_exp_grace_period(rsp->name, s, TPS("start")); | ||
298 | return false; | ||
299 | } | ||
300 | |||
301 | /* Invoked on each online non-idle CPU for expedited quiescent state. */ | ||
302 | static void sync_sched_exp_handler(void *data) | ||
303 | { | ||
304 | struct rcu_data *rdp; | ||
305 | struct rcu_node *rnp; | ||
306 | struct rcu_state *rsp = data; | ||
307 | |||
308 | rdp = this_cpu_ptr(rsp->rda); | ||
309 | rnp = rdp->mynode; | ||
310 | if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) || | ||
311 | __this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp)) | ||
312 | return; | ||
313 | if (rcu_is_cpu_rrupt_from_idle()) { | ||
314 | rcu_report_exp_rdp(&rcu_sched_state, | ||
315 | this_cpu_ptr(&rcu_sched_data), true); | ||
316 | return; | ||
317 | } | ||
318 | __this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, true); | ||
319 | resched_cpu(smp_processor_id()); | ||
320 | } | ||
321 | |||
322 | /* Send IPI for expedited cleanup if needed at end of CPU-hotplug operation. */ | ||
323 | static void sync_sched_exp_online_cleanup(int cpu) | ||
324 | { | ||
325 | struct rcu_data *rdp; | ||
326 | int ret; | ||
327 | struct rcu_node *rnp; | ||
328 | struct rcu_state *rsp = &rcu_sched_state; | ||
329 | |||
330 | rdp = per_cpu_ptr(rsp->rda, cpu); | ||
331 | rnp = rdp->mynode; | ||
332 | if (!(READ_ONCE(rnp->expmask) & rdp->grpmask)) | ||
333 | return; | ||
334 | ret = smp_call_function_single(cpu, sync_sched_exp_handler, rsp, 0); | ||
335 | WARN_ON_ONCE(ret); | ||
336 | } | ||
337 | |||
338 | /* | ||
339 | * Select the nodes that the upcoming expedited grace period needs | ||
340 | * to wait for. | ||
341 | */ | ||
342 | static void sync_rcu_exp_select_cpus(struct rcu_state *rsp, | ||
343 | smp_call_func_t func) | ||
344 | { | ||
345 | int cpu; | ||
346 | unsigned long flags; | ||
347 | unsigned long mask; | ||
348 | unsigned long mask_ofl_test; | ||
349 | unsigned long mask_ofl_ipi; | ||
350 | int ret; | ||
351 | struct rcu_node *rnp; | ||
352 | |||
353 | sync_exp_reset_tree(rsp); | ||
354 | rcu_for_each_leaf_node(rsp, rnp) { | ||
355 | raw_spin_lock_irqsave_rcu_node(rnp, flags); | ||
356 | |||
357 | /* Each pass checks a CPU for identity, offline, and idle. */ | ||
358 | mask_ofl_test = 0; | ||
359 | for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) { | ||
360 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | ||
361 | struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); | ||
362 | |||
363 | if (raw_smp_processor_id() == cpu || | ||
364 | !(atomic_add_return(0, &rdtp->dynticks) & 0x1)) | ||
365 | mask_ofl_test |= rdp->grpmask; | ||
366 | } | ||
367 | mask_ofl_ipi = rnp->expmask & ~mask_ofl_test; | ||
368 | |||
369 | /* | ||
370 | * Need to wait for any blocked tasks as well. Note that | ||
371 | * additional blocking tasks will also block the expedited | ||
372 | * GP until such time as the ->expmask bits are cleared. | ||
373 | */ | ||
374 | if (rcu_preempt_has_tasks(rnp)) | ||
375 | rnp->exp_tasks = rnp->blkd_tasks.next; | ||
376 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | ||
377 | |||
378 | /* IPI the remaining CPUs for expedited quiescent state. */ | ||
379 | mask = 1; | ||
380 | for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask <<= 1) { | ||
381 | if (!(mask_ofl_ipi & mask)) | ||
382 | continue; | ||
383 | retry_ipi: | ||
384 | ret = smp_call_function_single(cpu, func, rsp, 0); | ||
385 | if (!ret) { | ||
386 | mask_ofl_ipi &= ~mask; | ||
387 | continue; | ||
388 | } | ||
389 | /* Failed, raced with offline. */ | ||
390 | raw_spin_lock_irqsave_rcu_node(rnp, flags); | ||
391 | if (cpu_online(cpu) && | ||
392 | (rnp->expmask & mask)) { | ||
393 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | ||
394 | schedule_timeout_uninterruptible(1); | ||
395 | if (cpu_online(cpu) && | ||
396 | (rnp->expmask & mask)) | ||
397 | goto retry_ipi; | ||
398 | raw_spin_lock_irqsave_rcu_node(rnp, flags); | ||
399 | } | ||
400 | if (!(rnp->expmask & mask)) | ||
401 | mask_ofl_ipi &= ~mask; | ||
402 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | ||
403 | } | ||
404 | /* Report quiescent states for those that went offline. */ | ||
405 | mask_ofl_test |= mask_ofl_ipi; | ||
406 | if (mask_ofl_test) | ||
407 | rcu_report_exp_cpu_mult(rsp, rnp, mask_ofl_test, false); | ||
408 | } | ||
409 | } | ||
410 | |||
411 | static void synchronize_sched_expedited_wait(struct rcu_state *rsp) | ||
412 | { | ||
413 | int cpu; | ||
414 | unsigned long jiffies_stall; | ||
415 | unsigned long jiffies_start; | ||
416 | unsigned long mask; | ||
417 | int ndetected; | ||
418 | struct rcu_node *rnp; | ||
419 | struct rcu_node *rnp_root = rcu_get_root(rsp); | ||
420 | int ret; | ||
421 | |||
422 | jiffies_stall = rcu_jiffies_till_stall_check(); | ||
423 | jiffies_start = jiffies; | ||
424 | |||
425 | for (;;) { | ||
426 | ret = swait_event_timeout( | ||
427 | rsp->expedited_wq, | ||
428 | sync_rcu_preempt_exp_done(rnp_root), | ||
429 | jiffies_stall); | ||
430 | if (ret > 0 || sync_rcu_preempt_exp_done(rnp_root)) | ||
431 | return; | ||
432 | if (ret < 0) { | ||
433 | /* Hit a signal, disable CPU stall warnings. */ | ||
434 | swait_event(rsp->expedited_wq, | ||
435 | sync_rcu_preempt_exp_done(rnp_root)); | ||
436 | return; | ||
437 | } | ||
438 | pr_err("INFO: %s detected expedited stalls on CPUs/tasks: {", | ||
439 | rsp->name); | ||
440 | ndetected = 0; | ||
441 | rcu_for_each_leaf_node(rsp, rnp) { | ||
442 | ndetected += rcu_print_task_exp_stall(rnp); | ||
443 | mask = 1; | ||
444 | for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask <<= 1) { | ||
445 | struct rcu_data *rdp; | ||
446 | |||
447 | if (!(rnp->expmask & mask)) | ||
448 | continue; | ||
449 | ndetected++; | ||
450 | rdp = per_cpu_ptr(rsp->rda, cpu); | ||
451 | pr_cont(" %d-%c%c%c", cpu, | ||
452 | "O."[!!cpu_online(cpu)], | ||
453 | "o."[!!(rdp->grpmask & rnp->expmaskinit)], | ||
454 | "N."[!!(rdp->grpmask & rnp->expmaskinitnext)]); | ||
455 | } | ||
456 | mask <<= 1; | ||
457 | } | ||
458 | pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n", | ||
459 | jiffies - jiffies_start, rsp->expedited_sequence, | ||
460 | rnp_root->expmask, ".T"[!!rnp_root->exp_tasks]); | ||
461 | if (ndetected) { | ||
462 | pr_err("blocking rcu_node structures:"); | ||
463 | rcu_for_each_node_breadth_first(rsp, rnp) { | ||
464 | if (rnp == rnp_root) | ||
465 | continue; /* printed unconditionally */ | ||
466 | if (sync_rcu_preempt_exp_done(rnp)) | ||
467 | continue; | ||
468 | pr_cont(" l=%u:%d-%d:%#lx/%c", | ||
469 | rnp->level, rnp->grplo, rnp->grphi, | ||
470 | rnp->expmask, | ||
471 | ".T"[!!rnp->exp_tasks]); | ||
472 | } | ||
473 | pr_cont("\n"); | ||
474 | } | ||
475 | rcu_for_each_leaf_node(rsp, rnp) { | ||
476 | mask = 1; | ||
477 | for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask <<= 1) { | ||
478 | if (!(rnp->expmask & mask)) | ||
479 | continue; | ||
480 | dump_cpu_task(cpu); | ||
481 | } | ||
482 | } | ||
483 | jiffies_stall = 3 * rcu_jiffies_till_stall_check() + 3; | ||
484 | } | ||
485 | } | ||
486 | |||
487 | /* | ||
488 | * Wait for the current expedited grace period to complete, and then | ||
489 | * wake up everyone who piggybacked on the just-completed expedited | ||
490 | * grace period. Also update all the ->exp_seq_rq counters as needed | ||
491 | * in order to avoid counter-wrap problems. | ||
492 | */ | ||
493 | static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s) | ||
494 | { | ||
495 | struct rcu_node *rnp; | ||
496 | |||
497 | synchronize_sched_expedited_wait(rsp); | ||
498 | rcu_exp_gp_seq_end(rsp); | ||
499 | trace_rcu_exp_grace_period(rsp->name, s, TPS("end")); | ||
500 | |||
501 | /* | ||
502 | * Switch over to wakeup mode, allowing the next GP, but -only- the | ||
503 | * next GP, to proceed. | ||
504 | */ | ||
505 | mutex_lock(&rsp->exp_wake_mutex); | ||
506 | mutex_unlock(&rsp->exp_mutex); | ||
507 | |||
508 | rcu_for_each_node_breadth_first(rsp, rnp) { | ||
509 | if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s)) { | ||
510 | spin_lock(&rnp->exp_lock); | ||
511 | /* Recheck, avoid hang in case someone just arrived. */ | ||
512 | if (ULONG_CMP_LT(rnp->exp_seq_rq, s)) | ||
513 | rnp->exp_seq_rq = s; | ||
514 | spin_unlock(&rnp->exp_lock); | ||
515 | } | ||
516 | wake_up_all(&rnp->exp_wq[(rsp->expedited_sequence >> 1) & 0x3]); | ||
517 | } | ||
518 | trace_rcu_exp_grace_period(rsp->name, s, TPS("endwake")); | ||
519 | mutex_unlock(&rsp->exp_wake_mutex); | ||
520 | } | ||
521 | |||
522 | /** | ||
523 | * synchronize_sched_expedited - Brute-force RCU-sched grace period | ||
524 | * | ||
525 | * Wait for an RCU-sched grace period to elapse, but use a "big hammer" | ||
526 | * approach to force the grace period to end quickly. This consumes | ||
527 | * significant time on all CPUs and is unfriendly to real-time workloads, | ||
528 | * so is thus not recommended for any sort of common-case code. In fact, | ||
529 | * if you are using synchronize_sched_expedited() in a loop, please | ||
530 | * restructure your code to batch your updates, and then use a single | ||
531 | * synchronize_sched() instead. | ||
532 | * | ||
533 | * This implementation can be thought of as an application of sequence | ||
534 | * locking to expedited grace periods, but using the sequence counter to | ||
535 | * determine when someone else has already done the work instead of for | ||
536 | * retrying readers. | ||
537 | */ | ||
538 | void synchronize_sched_expedited(void) | ||
539 | { | ||
540 | unsigned long s; | ||
541 | struct rcu_state *rsp = &rcu_sched_state; | ||
542 | |||
543 | /* If only one CPU, this is automatically a grace period. */ | ||
544 | if (rcu_blocking_is_gp()) | ||
545 | return; | ||
546 | |||
547 | /* If expedited grace periods are prohibited, fall back to normal. */ | ||
548 | if (rcu_gp_is_normal()) { | ||
549 | wait_rcu_gp(call_rcu_sched); | ||
550 | return; | ||
551 | } | ||
552 | |||
553 | /* Take a snapshot of the sequence number. */ | ||
554 | s = rcu_exp_gp_seq_snap(rsp); | ||
555 | if (exp_funnel_lock(rsp, s)) | ||
556 | return; /* Someone else did our work for us. */ | ||
557 | |||
558 | /* Initialize the rcu_node tree in preparation for the wait. */ | ||
559 | sync_rcu_exp_select_cpus(rsp, sync_sched_exp_handler); | ||
560 | |||
561 | /* Wait and clean up, including waking everyone. */ | ||
562 | rcu_exp_wait_wake(rsp, s); | ||
563 | } | ||
564 | EXPORT_SYMBOL_GPL(synchronize_sched_expedited); | ||