diff options
author | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2015-10-07 19:05:21 -0400 |
---|---|---|
committer | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2015-10-07 19:05:21 -0400 |
commit | d2856b046d2ce2bfb664727cb8671ad0e371bd6c (patch) | |
tree | cb9056e8fb6a3038db6629781dfefbac8387d0c2 /kernel/rcu/tree.c | |
parent | 7f5f873c6a0772970d5fee1f364231207051ecd8 (diff) | |
parent | 338b0f760e84676130c6e4d8268cb8c923b38c8c (diff) |
Merge branches 'fixes.2015.10.06a' and 'exp.2015.10.07a' into HEAD
exp.2015.10.07a: Reduce OS jitter of RCU-sched expedited grace periods.
fixes.2015.10.06a: Miscellaneous fixes.
Diffstat (limited to 'kernel/rcu/tree.c')
-rw-r--r-- | kernel/rcu/tree.c | 457 |
1 files changed, 364 insertions, 93 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 4d296b0fb987..f07343b54fe5 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c | |||
@@ -71,7 +71,6 @@ MODULE_ALIAS("rcutree"); | |||
71 | static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; | 71 | static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; |
72 | static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS]; | 72 | static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS]; |
73 | static struct lock_class_key rcu_exp_class[RCU_NUM_LVLS]; | 73 | static struct lock_class_key rcu_exp_class[RCU_NUM_LVLS]; |
74 | static struct lock_class_key rcu_exp_sched_class[RCU_NUM_LVLS]; | ||
75 | 74 | ||
76 | /* | 75 | /* |
77 | * In order to export the rcu_state name to the tracing tools, it | 76 | * In order to export the rcu_state name to the tracing tools, it |
@@ -161,6 +160,8 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf); | |||
161 | static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); | 160 | static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); |
162 | static void invoke_rcu_core(void); | 161 | static void invoke_rcu_core(void); |
163 | static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); | 162 | static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); |
163 | static void rcu_report_exp_rdp(struct rcu_state *rsp, | ||
164 | struct rcu_data *rdp, bool wake); | ||
164 | 165 | ||
165 | /* rcuc/rcub kthread realtime priority */ | 166 | /* rcuc/rcub kthread realtime priority */ |
166 | #ifdef CONFIG_RCU_KTHREAD_PRIO | 167 | #ifdef CONFIG_RCU_KTHREAD_PRIO |
@@ -245,21 +246,33 @@ static int rcu_gp_in_progress(struct rcu_state *rsp) | |||
245 | */ | 246 | */ |
246 | void rcu_sched_qs(void) | 247 | void rcu_sched_qs(void) |
247 | { | 248 | { |
248 | if (!__this_cpu_read(rcu_sched_data.passed_quiesce)) { | 249 | unsigned long flags; |
250 | |||
251 | if (__this_cpu_read(rcu_sched_data.cpu_no_qs.s)) { | ||
249 | trace_rcu_grace_period(TPS("rcu_sched"), | 252 | trace_rcu_grace_period(TPS("rcu_sched"), |
250 | __this_cpu_read(rcu_sched_data.gpnum), | 253 | __this_cpu_read(rcu_sched_data.gpnum), |
251 | TPS("cpuqs")); | 254 | TPS("cpuqs")); |
252 | __this_cpu_write(rcu_sched_data.passed_quiesce, 1); | 255 | __this_cpu_write(rcu_sched_data.cpu_no_qs.b.norm, false); |
256 | if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp)) | ||
257 | return; | ||
258 | local_irq_save(flags); | ||
259 | if (__this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp)) { | ||
260 | __this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, false); | ||
261 | rcu_report_exp_rdp(&rcu_sched_state, | ||
262 | this_cpu_ptr(&rcu_sched_data), | ||
263 | true); | ||
264 | } | ||
265 | local_irq_restore(flags); | ||
253 | } | 266 | } |
254 | } | 267 | } |
255 | 268 | ||
256 | void rcu_bh_qs(void) | 269 | void rcu_bh_qs(void) |
257 | { | 270 | { |
258 | if (!__this_cpu_read(rcu_bh_data.passed_quiesce)) { | 271 | if (__this_cpu_read(rcu_bh_data.cpu_no_qs.s)) { |
259 | trace_rcu_grace_period(TPS("rcu_bh"), | 272 | trace_rcu_grace_period(TPS("rcu_bh"), |
260 | __this_cpu_read(rcu_bh_data.gpnum), | 273 | __this_cpu_read(rcu_bh_data.gpnum), |
261 | TPS("cpuqs")); | 274 | TPS("cpuqs")); |
262 | __this_cpu_write(rcu_bh_data.passed_quiesce, 1); | 275 | __this_cpu_write(rcu_bh_data.cpu_no_qs.b.norm, false); |
263 | } | 276 | } |
264 | } | 277 | } |
265 | 278 | ||
@@ -1753,9 +1766,9 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, | |||
1753 | */ | 1766 | */ |
1754 | rdp->gpnum = rnp->gpnum; | 1767 | rdp->gpnum = rnp->gpnum; |
1755 | trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart")); | 1768 | trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart")); |
1756 | rdp->passed_quiesce = 0; | 1769 | rdp->cpu_no_qs.b.norm = true; |
1757 | rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr); | 1770 | rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr); |
1758 | rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask); | 1771 | rdp->core_needs_qs = !!(rnp->qsmask & rdp->grpmask); |
1759 | zero_cpu_stall_ticks(rdp); | 1772 | zero_cpu_stall_ticks(rdp); |
1760 | WRITE_ONCE(rdp->gpwrap, false); | 1773 | WRITE_ONCE(rdp->gpwrap, false); |
1761 | } | 1774 | } |
@@ -2344,7 +2357,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp) | |||
2344 | rnp = rdp->mynode; | 2357 | rnp = rdp->mynode; |
2345 | raw_spin_lock_irqsave(&rnp->lock, flags); | 2358 | raw_spin_lock_irqsave(&rnp->lock, flags); |
2346 | smp_mb__after_unlock_lock(); | 2359 | smp_mb__after_unlock_lock(); |
2347 | if ((rdp->passed_quiesce == 0 && | 2360 | if ((rdp->cpu_no_qs.b.norm && |
2348 | rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) || | 2361 | rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) || |
2349 | rdp->gpnum != rnp->gpnum || rnp->completed == rnp->gpnum || | 2362 | rdp->gpnum != rnp->gpnum || rnp->completed == rnp->gpnum || |
2350 | rdp->gpwrap) { | 2363 | rdp->gpwrap) { |
@@ -2355,7 +2368,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp) | |||
2355 | * We will instead need a new quiescent state that lies | 2368 | * We will instead need a new quiescent state that lies |
2356 | * within the current grace period. | 2369 | * within the current grace period. |
2357 | */ | 2370 | */ |
2358 | rdp->passed_quiesce = 0; /* need qs for new gp. */ | 2371 | rdp->cpu_no_qs.b.norm = true; /* need qs for new gp. */ |
2359 | rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr); | 2372 | rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr); |
2360 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 2373 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
2361 | return; | 2374 | return; |
@@ -2364,7 +2377,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp) | |||
2364 | if ((rnp->qsmask & mask) == 0) { | 2377 | if ((rnp->qsmask & mask) == 0) { |
2365 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 2378 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
2366 | } else { | 2379 | } else { |
2367 | rdp->qs_pending = 0; | 2380 | rdp->core_needs_qs = 0; |
2368 | 2381 | ||
2369 | /* | 2382 | /* |
2370 | * This GP can't end until cpu checks in, so all of our | 2383 | * This GP can't end until cpu checks in, so all of our |
@@ -2395,14 +2408,14 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) | |||
2395 | * Does this CPU still need to do its part for current grace period? | 2408 | * Does this CPU still need to do its part for current grace period? |
2396 | * If no, return and let the other CPUs do their part as well. | 2409 | * If no, return and let the other CPUs do their part as well. |
2397 | */ | 2410 | */ |
2398 | if (!rdp->qs_pending) | 2411 | if (!rdp->core_needs_qs) |
2399 | return; | 2412 | return; |
2400 | 2413 | ||
2401 | /* | 2414 | /* |
2402 | * Was there a quiescent state since the beginning of the grace | 2415 | * Was there a quiescent state since the beginning of the grace |
2403 | * period? If no, then exit and wait for the next call. | 2416 | * period? If no, then exit and wait for the next call. |
2404 | */ | 2417 | */ |
2405 | if (!rdp->passed_quiesce && | 2418 | if (rdp->cpu_no_qs.b.norm && |
2406 | rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) | 2419 | rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) |
2407 | return; | 2420 | return; |
2408 | 2421 | ||
@@ -3386,6 +3399,191 @@ static bool rcu_exp_gp_seq_done(struct rcu_state *rsp, unsigned long s) | |||
3386 | return rcu_seq_done(&rsp->expedited_sequence, s); | 3399 | return rcu_seq_done(&rsp->expedited_sequence, s); |
3387 | } | 3400 | } |
3388 | 3401 | ||
3402 | /* | ||
3403 | * Reset the ->expmaskinit values in the rcu_node tree to reflect any | ||
3404 | * recent CPU-online activity. Note that these masks are not cleared | ||
3405 | * when CPUs go offline, so they reflect the union of all CPUs that have | ||
3406 | * ever been online. This means that this function normally takes its | ||
3407 | * no-work-to-do fastpath. | ||
3408 | */ | ||
3409 | static void sync_exp_reset_tree_hotplug(struct rcu_state *rsp) | ||
3410 | { | ||
3411 | bool done; | ||
3412 | unsigned long flags; | ||
3413 | unsigned long mask; | ||
3414 | unsigned long oldmask; | ||
3415 | int ncpus = READ_ONCE(rsp->ncpus); | ||
3416 | struct rcu_node *rnp; | ||
3417 | struct rcu_node *rnp_up; | ||
3418 | |||
3419 | /* If no new CPUs onlined since last time, nothing to do. */ | ||
3420 | if (likely(ncpus == rsp->ncpus_snap)) | ||
3421 | return; | ||
3422 | rsp->ncpus_snap = ncpus; | ||
3423 | |||
3424 | /* | ||
3425 | * Each pass through the following loop propagates newly onlined | ||
3426 | * CPUs for the current rcu_node structure up the rcu_node tree. | ||
3427 | */ | ||
3428 | rcu_for_each_leaf_node(rsp, rnp) { | ||
3429 | raw_spin_lock_irqsave(&rnp->lock, flags); | ||
3430 | smp_mb__after_unlock_lock(); | ||
3431 | if (rnp->expmaskinit == rnp->expmaskinitnext) { | ||
3432 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
3433 | continue; /* No new CPUs, nothing to do. */ | ||
3434 | } | ||
3435 | |||
3436 | /* Update this node's mask, track old value for propagation. */ | ||
3437 | oldmask = rnp->expmaskinit; | ||
3438 | rnp->expmaskinit = rnp->expmaskinitnext; | ||
3439 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
3440 | |||
3441 | /* If was already nonzero, nothing to propagate. */ | ||
3442 | if (oldmask) | ||
3443 | continue; | ||
3444 | |||
3445 | /* Propagate the new CPU up the tree. */ | ||
3446 | mask = rnp->grpmask; | ||
3447 | rnp_up = rnp->parent; | ||
3448 | done = false; | ||
3449 | while (rnp_up) { | ||
3450 | raw_spin_lock_irqsave(&rnp_up->lock, flags); | ||
3451 | smp_mb__after_unlock_lock(); | ||
3452 | if (rnp_up->expmaskinit) | ||
3453 | done = true; | ||
3454 | rnp_up->expmaskinit |= mask; | ||
3455 | raw_spin_unlock_irqrestore(&rnp_up->lock, flags); | ||
3456 | if (done) | ||
3457 | break; | ||
3458 | mask = rnp_up->grpmask; | ||
3459 | rnp_up = rnp_up->parent; | ||
3460 | } | ||
3461 | } | ||
3462 | } | ||
3463 | |||
3464 | /* | ||
3465 | * Reset the ->expmask values in the rcu_node tree in preparation for | ||
3466 | * a new expedited grace period. | ||
3467 | */ | ||
3468 | static void __maybe_unused sync_exp_reset_tree(struct rcu_state *rsp) | ||
3469 | { | ||
3470 | unsigned long flags; | ||
3471 | struct rcu_node *rnp; | ||
3472 | |||
3473 | sync_exp_reset_tree_hotplug(rsp); | ||
3474 | rcu_for_each_node_breadth_first(rsp, rnp) { | ||
3475 | raw_spin_lock_irqsave(&rnp->lock, flags); | ||
3476 | smp_mb__after_unlock_lock(); | ||
3477 | WARN_ON_ONCE(rnp->expmask); | ||
3478 | rnp->expmask = rnp->expmaskinit; | ||
3479 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
3480 | } | ||
3481 | } | ||
3482 | |||
3483 | /* | ||
3484 | * Return non-zero if there is no RCU expedited grace period in progress | ||
3485 | * for the specified rcu_node structure, in other words, if all CPUs and | ||
3486 | * tasks covered by the specified rcu_node structure have done their bit | ||
3487 | * for the current expedited grace period. Works only for preemptible | ||
3488 | * RCU -- other RCU implementation use other means. | ||
3489 | * | ||
3490 | * Caller must hold the root rcu_node's exp_funnel_mutex. | ||
3491 | */ | ||
3492 | static int sync_rcu_preempt_exp_done(struct rcu_node *rnp) | ||
3493 | { | ||
3494 | return rnp->exp_tasks == NULL && | ||
3495 | READ_ONCE(rnp->expmask) == 0; | ||
3496 | } | ||
3497 | |||
3498 | /* | ||
3499 | * Report the exit from RCU read-side critical section for the last task | ||
3500 | * that queued itself during or before the current expedited preemptible-RCU | ||
3501 | * grace period. This event is reported either to the rcu_node structure on | ||
3502 | * which the task was queued or to one of that rcu_node structure's ancestors, | ||
3503 | * recursively up the tree. (Calm down, calm down, we do the recursion | ||
3504 | * iteratively!) | ||
3505 | * | ||
3506 | * Caller must hold the root rcu_node's exp_funnel_mutex and the | ||
3507 | * specified rcu_node structure's ->lock. | ||
3508 | */ | ||
3509 | static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, | ||
3510 | bool wake, unsigned long flags) | ||
3511 | __releases(rnp->lock) | ||
3512 | { | ||
3513 | unsigned long mask; | ||
3514 | |||
3515 | for (;;) { | ||
3516 | if (!sync_rcu_preempt_exp_done(rnp)) { | ||
3517 | if (!rnp->expmask) | ||
3518 | rcu_initiate_boost(rnp, flags); | ||
3519 | else | ||
3520 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
3521 | break; | ||
3522 | } | ||
3523 | if (rnp->parent == NULL) { | ||
3524 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
3525 | if (wake) { | ||
3526 | smp_mb(); /* EGP done before wake_up(). */ | ||
3527 | wake_up(&rsp->expedited_wq); | ||
3528 | } | ||
3529 | break; | ||
3530 | } | ||
3531 | mask = rnp->grpmask; | ||
3532 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ | ||
3533 | rnp = rnp->parent; | ||
3534 | raw_spin_lock(&rnp->lock); /* irqs already disabled */ | ||
3535 | smp_mb__after_unlock_lock(); | ||
3536 | WARN_ON_ONCE(!(rnp->expmask & mask)); | ||
3537 | rnp->expmask &= ~mask; | ||
3538 | } | ||
3539 | } | ||
3540 | |||
3541 | /* | ||
3542 | * Report expedited quiescent state for specified node. This is a | ||
3543 | * lock-acquisition wrapper function for __rcu_report_exp_rnp(). | ||
3544 | * | ||
3545 | * Caller must hold the root rcu_node's exp_funnel_mutex. | ||
3546 | */ | ||
3547 | static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp, | ||
3548 | struct rcu_node *rnp, bool wake) | ||
3549 | { | ||
3550 | unsigned long flags; | ||
3551 | |||
3552 | raw_spin_lock_irqsave(&rnp->lock, flags); | ||
3553 | smp_mb__after_unlock_lock(); | ||
3554 | __rcu_report_exp_rnp(rsp, rnp, wake, flags); | ||
3555 | } | ||
3556 | |||
3557 | /* | ||
3558 | * Report expedited quiescent state for multiple CPUs, all covered by the | ||
3559 | * specified leaf rcu_node structure. Caller must hold the root | ||
3560 | * rcu_node's exp_funnel_mutex. | ||
3561 | */ | ||
3562 | static void rcu_report_exp_cpu_mult(struct rcu_state *rsp, struct rcu_node *rnp, | ||
3563 | unsigned long mask, bool wake) | ||
3564 | { | ||
3565 | unsigned long flags; | ||
3566 | |||
3567 | raw_spin_lock_irqsave(&rnp->lock, flags); | ||
3568 | smp_mb__after_unlock_lock(); | ||
3569 | if (!(rnp->expmask & mask)) { | ||
3570 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
3571 | return; | ||
3572 | } | ||
3573 | rnp->expmask &= ~mask; | ||
3574 | __rcu_report_exp_rnp(rsp, rnp, wake, flags); /* Releases rnp->lock. */ | ||
3575 | } | ||
3576 | |||
3577 | /* | ||
3578 | * Report expedited quiescent state for specified rcu_data (CPU). | ||
3579 | * Caller must hold the root rcu_node's exp_funnel_mutex. | ||
3580 | */ | ||
3581 | static void rcu_report_exp_rdp(struct rcu_state *rsp, struct rcu_data *rdp, | ||
3582 | bool wake) | ||
3583 | { | ||
3584 | rcu_report_exp_cpu_mult(rsp, rdp->mynode, rdp->grpmask, wake); | ||
3585 | } | ||
3586 | |||
3389 | /* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */ | 3587 | /* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */ |
3390 | static bool sync_exp_work_done(struct rcu_state *rsp, struct rcu_node *rnp, | 3588 | static bool sync_exp_work_done(struct rcu_state *rsp, struct rcu_node *rnp, |
3391 | struct rcu_data *rdp, | 3589 | struct rcu_data *rdp, |
@@ -3462,16 +3660,111 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s) | |||
3462 | } | 3660 | } |
3463 | 3661 | ||
3464 | /* Invoked on each online non-idle CPU for expedited quiescent state. */ | 3662 | /* Invoked on each online non-idle CPU for expedited quiescent state. */ |
3465 | static int synchronize_sched_expedited_cpu_stop(void *data) | 3663 | static void sync_sched_exp_handler(void *data) |
3466 | { | 3664 | { |
3467 | struct rcu_data *rdp = data; | 3665 | struct rcu_data *rdp; |
3468 | struct rcu_state *rsp = rdp->rsp; | 3666 | struct rcu_node *rnp; |
3667 | struct rcu_state *rsp = data; | ||
3469 | 3668 | ||
3470 | /* We are here: If we are last, do the wakeup. */ | 3669 | rdp = this_cpu_ptr(rsp->rda); |
3471 | rdp->exp_done = true; | 3670 | rnp = rdp->mynode; |
3472 | if (atomic_dec_and_test(&rsp->expedited_need_qs)) | 3671 | if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) || |
3473 | wake_up(&rsp->expedited_wq); | 3672 | __this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp)) |
3474 | return 0; | 3673 | return; |
3674 | __this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, true); | ||
3675 | resched_cpu(smp_processor_id()); | ||
3676 | } | ||
3677 | |||
3678 | /* Send IPI for expedited cleanup if needed at end of CPU-hotplug operation. */ | ||
3679 | static void sync_sched_exp_online_cleanup(int cpu) | ||
3680 | { | ||
3681 | struct rcu_data *rdp; | ||
3682 | int ret; | ||
3683 | struct rcu_node *rnp; | ||
3684 | struct rcu_state *rsp = &rcu_sched_state; | ||
3685 | |||
3686 | rdp = per_cpu_ptr(rsp->rda, cpu); | ||
3687 | rnp = rdp->mynode; | ||
3688 | if (!(READ_ONCE(rnp->expmask) & rdp->grpmask)) | ||
3689 | return; | ||
3690 | ret = smp_call_function_single(cpu, sync_sched_exp_handler, rsp, 0); | ||
3691 | WARN_ON_ONCE(ret); | ||
3692 | } | ||
3693 | |||
3694 | /* | ||
3695 | * Select the nodes that the upcoming expedited grace period needs | ||
3696 | * to wait for. | ||
3697 | */ | ||
3698 | static void sync_rcu_exp_select_cpus(struct rcu_state *rsp, | ||
3699 | smp_call_func_t func) | ||
3700 | { | ||
3701 | int cpu; | ||
3702 | unsigned long flags; | ||
3703 | unsigned long mask; | ||
3704 | unsigned long mask_ofl_test; | ||
3705 | unsigned long mask_ofl_ipi; | ||
3706 | int ret; | ||
3707 | struct rcu_node *rnp; | ||
3708 | |||
3709 | sync_exp_reset_tree(rsp); | ||
3710 | rcu_for_each_leaf_node(rsp, rnp) { | ||
3711 | raw_spin_lock_irqsave(&rnp->lock, flags); | ||
3712 | smp_mb__after_unlock_lock(); | ||
3713 | |||
3714 | /* Each pass checks a CPU for identity, offline, and idle. */ | ||
3715 | mask_ofl_test = 0; | ||
3716 | for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) { | ||
3717 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | ||
3718 | struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); | ||
3719 | |||
3720 | if (raw_smp_processor_id() == cpu || | ||
3721 | !(atomic_add_return(0, &rdtp->dynticks) & 0x1)) | ||
3722 | mask_ofl_test |= rdp->grpmask; | ||
3723 | } | ||
3724 | mask_ofl_ipi = rnp->expmask & ~mask_ofl_test; | ||
3725 | |||
3726 | /* | ||
3727 | * Need to wait for any blocked tasks as well. Note that | ||
3728 | * additional blocking tasks will also block the expedited | ||
3729 | * GP until such time as the ->expmask bits are cleared. | ||
3730 | */ | ||
3731 | if (rcu_preempt_has_tasks(rnp)) | ||
3732 | rnp->exp_tasks = rnp->blkd_tasks.next; | ||
3733 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
3734 | |||
3735 | /* IPI the remaining CPUs for expedited quiescent state. */ | ||
3736 | mask = 1; | ||
3737 | for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask <<= 1) { | ||
3738 | if (!(mask_ofl_ipi & mask)) | ||
3739 | continue; | ||
3740 | retry_ipi: | ||
3741 | ret = smp_call_function_single(cpu, func, rsp, 0); | ||
3742 | if (!ret) { | ||
3743 | mask_ofl_ipi &= ~mask; | ||
3744 | } else { | ||
3745 | /* Failed, raced with offline. */ | ||
3746 | raw_spin_lock_irqsave(&rnp->lock, flags); | ||
3747 | if (cpu_online(cpu) && | ||
3748 | (rnp->expmask & mask)) { | ||
3749 | raw_spin_unlock_irqrestore(&rnp->lock, | ||
3750 | flags); | ||
3751 | schedule_timeout_uninterruptible(1); | ||
3752 | if (cpu_online(cpu) && | ||
3753 | (rnp->expmask & mask)) | ||
3754 | goto retry_ipi; | ||
3755 | raw_spin_lock_irqsave(&rnp->lock, | ||
3756 | flags); | ||
3757 | } | ||
3758 | if (!(rnp->expmask & mask)) | ||
3759 | mask_ofl_ipi &= ~mask; | ||
3760 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
3761 | } | ||
3762 | } | ||
3763 | /* Report quiescent states for those that went offline. */ | ||
3764 | mask_ofl_test |= mask_ofl_ipi; | ||
3765 | if (mask_ofl_test) | ||
3766 | rcu_report_exp_cpu_mult(rsp, rnp, mask_ofl_test, false); | ||
3767 | } | ||
3475 | } | 3768 | } |
3476 | 3769 | ||
3477 | static void synchronize_sched_expedited_wait(struct rcu_state *rsp) | 3770 | static void synchronize_sched_expedited_wait(struct rcu_state *rsp) |
@@ -3479,7 +3772,9 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp) | |||
3479 | int cpu; | 3772 | int cpu; |
3480 | unsigned long jiffies_stall; | 3773 | unsigned long jiffies_stall; |
3481 | unsigned long jiffies_start; | 3774 | unsigned long jiffies_start; |
3482 | struct rcu_data *rdp; | 3775 | unsigned long mask; |
3776 | struct rcu_node *rnp; | ||
3777 | struct rcu_node *rnp_root = rcu_get_root(rsp); | ||
3483 | int ret; | 3778 | int ret; |
3484 | 3779 | ||
3485 | jiffies_stall = rcu_jiffies_till_stall_check(); | 3780 | jiffies_stall = rcu_jiffies_till_stall_check(); |
@@ -3488,33 +3783,43 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp) | |||
3488 | for (;;) { | 3783 | for (;;) { |
3489 | ret = wait_event_interruptible_timeout( | 3784 | ret = wait_event_interruptible_timeout( |
3490 | rsp->expedited_wq, | 3785 | rsp->expedited_wq, |
3491 | !atomic_read(&rsp->expedited_need_qs), | 3786 | sync_rcu_preempt_exp_done(rnp_root), |
3492 | jiffies_stall); | 3787 | jiffies_stall); |
3493 | if (ret > 0) | 3788 | if (ret > 0) |
3494 | return; | 3789 | return; |
3495 | if (ret < 0) { | 3790 | if (ret < 0) { |
3496 | /* Hit a signal, disable CPU stall warnings. */ | 3791 | /* Hit a signal, disable CPU stall warnings. */ |
3497 | wait_event(rsp->expedited_wq, | 3792 | wait_event(rsp->expedited_wq, |
3498 | !atomic_read(&rsp->expedited_need_qs)); | 3793 | sync_rcu_preempt_exp_done(rnp_root)); |
3499 | return; | 3794 | return; |
3500 | } | 3795 | } |
3501 | pr_err("INFO: %s detected expedited stalls on CPUs: {", | 3796 | pr_err("INFO: %s detected expedited stalls on CPUs/tasks: {", |
3502 | rsp->name); | 3797 | rsp->name); |
3503 | for_each_online_cpu(cpu) { | 3798 | rcu_for_each_leaf_node(rsp, rnp) { |
3504 | rdp = per_cpu_ptr(rsp->rda, cpu); | 3799 | (void)rcu_print_task_exp_stall(rnp); |
3505 | 3800 | mask = 1; | |
3506 | if (rdp->exp_done) | 3801 | for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask <<= 1) { |
3507 | continue; | 3802 | struct rcu_data *rdp; |
3508 | pr_cont(" %d", cpu); | 3803 | |
3804 | if (!(rnp->expmask & mask)) | ||
3805 | continue; | ||
3806 | rdp = per_cpu_ptr(rsp->rda, cpu); | ||
3807 | pr_cont(" %d-%c%c%c", cpu, | ||
3808 | "O."[cpu_online(cpu)], | ||
3809 | "o."[!!(rdp->grpmask & rnp->expmaskinit)], | ||
3810 | "N."[!!(rdp->grpmask & rnp->expmaskinitnext)]); | ||
3811 | } | ||
3812 | mask <<= 1; | ||
3509 | } | 3813 | } |
3510 | pr_cont(" } %lu jiffies s: %lu\n", | 3814 | pr_cont(" } %lu jiffies s: %lu\n", |
3511 | jiffies - jiffies_start, rsp->expedited_sequence); | 3815 | jiffies - jiffies_start, rsp->expedited_sequence); |
3512 | for_each_online_cpu(cpu) { | 3816 | rcu_for_each_leaf_node(rsp, rnp) { |
3513 | rdp = per_cpu_ptr(rsp->rda, cpu); | 3817 | mask = 1; |
3514 | 3818 | for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask <<= 1) { | |
3515 | if (rdp->exp_done) | 3819 | if (!(rnp->expmask & mask)) |
3516 | continue; | 3820 | continue; |
3517 | dump_cpu_task(cpu); | 3821 | dump_cpu_task(cpu); |
3822 | } | ||
3518 | } | 3823 | } |
3519 | jiffies_stall = 3 * rcu_jiffies_till_stall_check() + 3; | 3824 | jiffies_stall = 3 * rcu_jiffies_till_stall_check() + 3; |
3520 | } | 3825 | } |
@@ -3538,7 +3843,6 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp) | |||
3538 | */ | 3843 | */ |
3539 | void synchronize_sched_expedited(void) | 3844 | void synchronize_sched_expedited(void) |
3540 | { | 3845 | { |
3541 | int cpu; | ||
3542 | unsigned long s; | 3846 | unsigned long s; |
3543 | struct rcu_node *rnp; | 3847 | struct rcu_node *rnp; |
3544 | struct rcu_state *rsp = &rcu_sched_state; | 3848 | struct rcu_state *rsp = &rcu_sched_state; |
@@ -3546,48 +3850,16 @@ void synchronize_sched_expedited(void) | |||
3546 | /* Take a snapshot of the sequence number. */ | 3850 | /* Take a snapshot of the sequence number. */ |
3547 | s = rcu_exp_gp_seq_snap(rsp); | 3851 | s = rcu_exp_gp_seq_snap(rsp); |
3548 | 3852 | ||
3549 | if (!try_get_online_cpus()) { | ||
3550 | /* CPU hotplug operation in flight, fall back to normal GP. */ | ||
3551 | wait_rcu_gp(call_rcu_sched); | ||
3552 | atomic_long_inc(&rsp->expedited_normal); | ||
3553 | return; | ||
3554 | } | ||
3555 | WARN_ON_ONCE(cpu_is_offline(raw_smp_processor_id())); | ||
3556 | |||
3557 | rnp = exp_funnel_lock(rsp, s); | 3853 | rnp = exp_funnel_lock(rsp, s); |
3558 | if (rnp == NULL) { | 3854 | if (rnp == NULL) |
3559 | put_online_cpus(); | ||
3560 | return; /* Someone else did our work for us. */ | 3855 | return; /* Someone else did our work for us. */ |
3561 | } | ||
3562 | 3856 | ||
3563 | rcu_exp_gp_seq_start(rsp); | 3857 | rcu_exp_gp_seq_start(rsp); |
3564 | 3858 | sync_rcu_exp_select_cpus(rsp, sync_sched_exp_handler); | |
3565 | /* Stop each CPU that is online, non-idle, and not us. */ | 3859 | synchronize_sched_expedited_wait(rsp); |
3566 | init_waitqueue_head(&rsp->expedited_wq); | ||
3567 | atomic_set(&rsp->expedited_need_qs, 1); /* Extra count avoids race. */ | ||
3568 | for_each_online_cpu(cpu) { | ||
3569 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | ||
3570 | struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); | ||
3571 | |||
3572 | rdp->exp_done = false; | ||
3573 | |||
3574 | /* Skip our CPU and any idle CPUs. */ | ||
3575 | if (raw_smp_processor_id() == cpu || | ||
3576 | !(atomic_add_return(0, &rdtp->dynticks) & 0x1)) | ||
3577 | continue; | ||
3578 | atomic_inc(&rsp->expedited_need_qs); | ||
3579 | stop_one_cpu_nowait(cpu, synchronize_sched_expedited_cpu_stop, | ||
3580 | rdp, &rdp->exp_stop_work); | ||
3581 | } | ||
3582 | |||
3583 | /* Remove extra count and, if necessary, wait for CPUs to stop. */ | ||
3584 | if (!atomic_dec_and_test(&rsp->expedited_need_qs)) | ||
3585 | synchronize_sched_expedited_wait(rsp); | ||
3586 | 3860 | ||
3587 | rcu_exp_gp_seq_end(rsp); | 3861 | rcu_exp_gp_seq_end(rsp); |
3588 | mutex_unlock(&rnp->exp_funnel_mutex); | 3862 | mutex_unlock(&rnp->exp_funnel_mutex); |
3589 | |||
3590 | put_online_cpus(); | ||
3591 | } | 3863 | } |
3592 | EXPORT_SYMBOL_GPL(synchronize_sched_expedited); | 3864 | EXPORT_SYMBOL_GPL(synchronize_sched_expedited); |
3593 | 3865 | ||
@@ -3613,11 +3885,11 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) | |||
3613 | 3885 | ||
3614 | /* Is the RCU core waiting for a quiescent state from this CPU? */ | 3886 | /* Is the RCU core waiting for a quiescent state from this CPU? */ |
3615 | if (rcu_scheduler_fully_active && | 3887 | if (rcu_scheduler_fully_active && |
3616 | rdp->qs_pending && !rdp->passed_quiesce && | 3888 | rdp->core_needs_qs && rdp->cpu_no_qs.b.norm && |
3617 | rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) { | 3889 | rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) { |
3618 | rdp->n_rp_qs_pending++; | 3890 | rdp->n_rp_core_needs_qs++; |
3619 | } else if (rdp->qs_pending && | 3891 | } else if (rdp->core_needs_qs && |
3620 | (rdp->passed_quiesce || | 3892 | (!rdp->cpu_no_qs.b.norm || |
3621 | rdp->rcu_qs_ctr_snap != __this_cpu_read(rcu_qs_ctr))) { | 3893 | rdp->rcu_qs_ctr_snap != __this_cpu_read(rcu_qs_ctr))) { |
3622 | rdp->n_rp_report_qs++; | 3894 | rdp->n_rp_report_qs++; |
3623 | return 1; | 3895 | return 1; |
@@ -3875,7 +4147,6 @@ static void rcu_init_new_rnp(struct rcu_node *rnp_leaf) | |||
3875 | static void __init | 4147 | static void __init |
3876 | rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) | 4148 | rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) |
3877 | { | 4149 | { |
3878 | static struct lock_class_key rcu_exp_sched_rdp_class; | ||
3879 | unsigned long flags; | 4150 | unsigned long flags; |
3880 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | 4151 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); |
3881 | struct rcu_node *rnp = rcu_get_root(rsp); | 4152 | struct rcu_node *rnp = rcu_get_root(rsp); |
@@ -3891,10 +4162,6 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) | |||
3891 | mutex_init(&rdp->exp_funnel_mutex); | 4162 | mutex_init(&rdp->exp_funnel_mutex); |
3892 | rcu_boot_init_nocb_percpu_data(rdp); | 4163 | rcu_boot_init_nocb_percpu_data(rdp); |
3893 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 4164 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
3894 | if (rsp == &rcu_sched_state) | ||
3895 | lockdep_set_class_and_name(&rdp->exp_funnel_mutex, | ||
3896 | &rcu_exp_sched_rdp_class, | ||
3897 | "rcu_data_exp_sched"); | ||
3898 | } | 4165 | } |
3899 | 4166 | ||
3900 | /* | 4167 | /* |
@@ -3913,7 +4180,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) | |||
3913 | 4180 | ||
3914 | /* Set up local state, ensuring consistent view of global state. */ | 4181 | /* Set up local state, ensuring consistent view of global state. */ |
3915 | raw_spin_lock_irqsave(&rnp->lock, flags); | 4182 | raw_spin_lock_irqsave(&rnp->lock, flags); |
3916 | rdp->beenonline = 1; /* We have now been online. */ | ||
3917 | rdp->qlen_last_fqs_check = 0; | 4183 | rdp->qlen_last_fqs_check = 0; |
3918 | rdp->n_force_qs_snap = rsp->n_force_qs; | 4184 | rdp->n_force_qs_snap = rsp->n_force_qs; |
3919 | rdp->blimit = blimit; | 4185 | rdp->blimit = blimit; |
@@ -3935,11 +4201,15 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) | |||
3935 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | 4201 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ |
3936 | smp_mb__after_unlock_lock(); | 4202 | smp_mb__after_unlock_lock(); |
3937 | rnp->qsmaskinitnext |= mask; | 4203 | rnp->qsmaskinitnext |= mask; |
4204 | rnp->expmaskinitnext |= mask; | ||
4205 | if (!rdp->beenonline) | ||
4206 | WRITE_ONCE(rsp->ncpus, READ_ONCE(rsp->ncpus) + 1); | ||
4207 | rdp->beenonline = true; /* We have now been online. */ | ||
3938 | rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */ | 4208 | rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */ |
3939 | rdp->completed = rnp->completed; | 4209 | rdp->completed = rnp->completed; |
3940 | rdp->passed_quiesce = false; | 4210 | rdp->cpu_no_qs.b.norm = true; |
3941 | rdp->rcu_qs_ctr_snap = per_cpu(rcu_qs_ctr, cpu); | 4211 | rdp->rcu_qs_ctr_snap = per_cpu(rcu_qs_ctr, cpu); |
3942 | rdp->qs_pending = false; | 4212 | rdp->core_needs_qs = false; |
3943 | trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl")); | 4213 | trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl")); |
3944 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 4214 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
3945 | } | 4215 | } |
@@ -3972,6 +4242,7 @@ int rcu_cpu_notify(struct notifier_block *self, | |||
3972 | break; | 4242 | break; |
3973 | case CPU_ONLINE: | 4243 | case CPU_ONLINE: |
3974 | case CPU_DOWN_FAILED: | 4244 | case CPU_DOWN_FAILED: |
4245 | sync_sched_exp_online_cleanup(cpu); | ||
3975 | rcu_boost_kthread_setaffinity(rnp, -1); | 4246 | rcu_boost_kthread_setaffinity(rnp, -1); |
3976 | break; | 4247 | break; |
3977 | case CPU_DOWN_PREPARE: | 4248 | case CPU_DOWN_PREPARE: |
@@ -3983,6 +4254,12 @@ int rcu_cpu_notify(struct notifier_block *self, | |||
3983 | rcu_cleanup_dying_cpu(rsp); | 4254 | rcu_cleanup_dying_cpu(rsp); |
3984 | break; | 4255 | break; |
3985 | case CPU_DYING_IDLE: | 4256 | case CPU_DYING_IDLE: |
4257 | /* QS for any half-done expedited RCU-sched GP. */ | ||
4258 | preempt_disable(); | ||
4259 | rcu_report_exp_rdp(&rcu_sched_state, | ||
4260 | this_cpu_ptr(rcu_sched_state.rda), true); | ||
4261 | preempt_enable(); | ||
4262 | |||
3986 | for_each_rcu_flavor(rsp) { | 4263 | for_each_rcu_flavor(rsp) { |
3987 | rcu_cleanup_dying_idle_cpu(cpu, rsp); | 4264 | rcu_cleanup_dying_idle_cpu(cpu, rsp); |
3988 | } | 4265 | } |
@@ -4114,7 +4391,6 @@ static void __init rcu_init_one(struct rcu_state *rsp, | |||
4114 | static const char * const buf[] = RCU_NODE_NAME_INIT; | 4391 | static const char * const buf[] = RCU_NODE_NAME_INIT; |
4115 | static const char * const fqs[] = RCU_FQS_NAME_INIT; | 4392 | static const char * const fqs[] = RCU_FQS_NAME_INIT; |
4116 | static const char * const exp[] = RCU_EXP_NAME_INIT; | 4393 | static const char * const exp[] = RCU_EXP_NAME_INIT; |
4117 | static const char * const exp_sched[] = RCU_EXP_SCHED_NAME_INIT; | ||
4118 | static u8 fl_mask = 0x1; | 4394 | static u8 fl_mask = 0x1; |
4119 | 4395 | ||
4120 | int levelcnt[RCU_NUM_LVLS]; /* # nodes in each level. */ | 4396 | int levelcnt[RCU_NUM_LVLS]; /* # nodes in each level. */ |
@@ -4174,18 +4450,13 @@ static void __init rcu_init_one(struct rcu_state *rsp, | |||
4174 | INIT_LIST_HEAD(&rnp->blkd_tasks); | 4450 | INIT_LIST_HEAD(&rnp->blkd_tasks); |
4175 | rcu_init_one_nocb(rnp); | 4451 | rcu_init_one_nocb(rnp); |
4176 | mutex_init(&rnp->exp_funnel_mutex); | 4452 | mutex_init(&rnp->exp_funnel_mutex); |
4177 | if (rsp == &rcu_sched_state) | 4453 | lockdep_set_class_and_name(&rnp->exp_funnel_mutex, |
4178 | lockdep_set_class_and_name( | 4454 | &rcu_exp_class[i], exp[i]); |
4179 | &rnp->exp_funnel_mutex, | ||
4180 | &rcu_exp_sched_class[i], exp_sched[i]); | ||
4181 | else | ||
4182 | lockdep_set_class_and_name( | ||
4183 | &rnp->exp_funnel_mutex, | ||
4184 | &rcu_exp_class[i], exp[i]); | ||
4185 | } | 4455 | } |
4186 | } | 4456 | } |
4187 | 4457 | ||
4188 | init_waitqueue_head(&rsp->gp_wq); | 4458 | init_waitqueue_head(&rsp->gp_wq); |
4459 | init_waitqueue_head(&rsp->expedited_wq); | ||
4189 | rnp = rsp->level[rcu_num_lvls - 1]; | 4460 | rnp = rsp->level[rcu_num_lvls - 1]; |
4190 | for_each_possible_cpu(i) { | 4461 | for_each_possible_cpu(i) { |
4191 | while (i > rnp->grphi) | 4462 | while (i > rnp->grphi) |