diff options
Diffstat (limited to 'kernel/rcutree.c')
| -rw-r--r-- | kernel/rcutree.c | 507 |
1 files changed, 371 insertions, 136 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 6c4a6722abfd..1050d6d3922c 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
| @@ -50,6 +50,8 @@ | |||
| 50 | #include <linux/wait.h> | 50 | #include <linux/wait.h> |
| 51 | #include <linux/kthread.h> | 51 | #include <linux/kthread.h> |
| 52 | #include <linux/prefetch.h> | 52 | #include <linux/prefetch.h> |
| 53 | #include <linux/delay.h> | ||
| 54 | #include <linux/stop_machine.h> | ||
| 53 | 55 | ||
| 54 | #include "rcutree.h" | 56 | #include "rcutree.h" |
| 55 | #include <trace/events/rcu.h> | 57 | #include <trace/events/rcu.h> |
| @@ -196,7 +198,7 @@ void rcu_note_context_switch(int cpu) | |||
| 196 | EXPORT_SYMBOL_GPL(rcu_note_context_switch); | 198 | EXPORT_SYMBOL_GPL(rcu_note_context_switch); |
| 197 | 199 | ||
| 198 | DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { | 200 | DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { |
| 199 | .dynticks_nesting = DYNTICK_TASK_NESTING, | 201 | .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE, |
| 200 | .dynticks = ATOMIC_INIT(1), | 202 | .dynticks = ATOMIC_INIT(1), |
| 201 | }; | 203 | }; |
| 202 | 204 | ||
| @@ -208,8 +210,11 @@ module_param(blimit, int, 0); | |||
| 208 | module_param(qhimark, int, 0); | 210 | module_param(qhimark, int, 0); |
| 209 | module_param(qlowmark, int, 0); | 211 | module_param(qlowmark, int, 0); |
| 210 | 212 | ||
| 211 | int rcu_cpu_stall_suppress __read_mostly; | 213 | int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */ |
| 214 | int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT; | ||
| 215 | |||
| 212 | module_param(rcu_cpu_stall_suppress, int, 0644); | 216 | module_param(rcu_cpu_stall_suppress, int, 0644); |
| 217 | module_param(rcu_cpu_stall_timeout, int, 0644); | ||
| 213 | 218 | ||
| 214 | static void force_quiescent_state(struct rcu_state *rsp, int relaxed); | 219 | static void force_quiescent_state(struct rcu_state *rsp, int relaxed); |
| 215 | static int rcu_pending(int cpu); | 220 | static int rcu_pending(int cpu); |
| @@ -301,8 +306,6 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp) | |||
| 301 | return &rsp->node[0]; | 306 | return &rsp->node[0]; |
| 302 | } | 307 | } |
| 303 | 308 | ||
| 304 | #ifdef CONFIG_SMP | ||
| 305 | |||
| 306 | /* | 309 | /* |
| 307 | * If the specified CPU is offline, tell the caller that it is in | 310 | * If the specified CPU is offline, tell the caller that it is in |
| 308 | * a quiescent state. Otherwise, whack it with a reschedule IPI. | 311 | * a quiescent state. Otherwise, whack it with a reschedule IPI. |
| @@ -317,30 +320,21 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp) | |||
| 317 | static int rcu_implicit_offline_qs(struct rcu_data *rdp) | 320 | static int rcu_implicit_offline_qs(struct rcu_data *rdp) |
| 318 | { | 321 | { |
| 319 | /* | 322 | /* |
| 320 | * If the CPU is offline, it is in a quiescent state. We can | 323 | * If the CPU is offline for more than a jiffy, it is in a quiescent |
| 321 | * trust its state not to change because interrupts are disabled. | 324 | * state. We can trust its state not to change because interrupts |
| 325 | * are disabled. The reason for the jiffy's worth of slack is to | ||
| 326 | * handle CPUs initializing on the way up and finding their way | ||
| 327 | * to the idle loop on the way down. | ||
| 322 | */ | 328 | */ |
| 323 | if (cpu_is_offline(rdp->cpu)) { | 329 | if (cpu_is_offline(rdp->cpu) && |
| 330 | ULONG_CMP_LT(rdp->rsp->gp_start + 2, jiffies)) { | ||
| 324 | trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl"); | 331 | trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl"); |
| 325 | rdp->offline_fqs++; | 332 | rdp->offline_fqs++; |
| 326 | return 1; | 333 | return 1; |
| 327 | } | 334 | } |
| 328 | |||
| 329 | /* | ||
| 330 | * The CPU is online, so send it a reschedule IPI. This forces | ||
| 331 | * it through the scheduler, and (inefficiently) also handles cases | ||
| 332 | * where idle loops fail to inform RCU about the CPU being idle. | ||
| 333 | */ | ||
| 334 | if (rdp->cpu != smp_processor_id()) | ||
| 335 | smp_send_reschedule(rdp->cpu); | ||
| 336 | else | ||
| 337 | set_need_resched(); | ||
| 338 | rdp->resched_ipi++; | ||
| 339 | return 0; | 335 | return 0; |
| 340 | } | 336 | } |
| 341 | 337 | ||
| 342 | #endif /* #ifdef CONFIG_SMP */ | ||
| 343 | |||
| 344 | /* | 338 | /* |
| 345 | * rcu_idle_enter_common - inform RCU that current CPU is moving towards idle | 339 | * rcu_idle_enter_common - inform RCU that current CPU is moving towards idle |
| 346 | * | 340 | * |
| @@ -366,6 +360,17 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval) | |||
| 366 | atomic_inc(&rdtp->dynticks); | 360 | atomic_inc(&rdtp->dynticks); |
| 367 | smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */ | 361 | smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */ |
| 368 | WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); | 362 | WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); |
| 363 | |||
| 364 | /* | ||
| 365 | * The idle task is not permitted to enter the idle loop while | ||
| 366 | * in an RCU read-side critical section. | ||
| 367 | */ | ||
| 368 | rcu_lockdep_assert(!lock_is_held(&rcu_lock_map), | ||
| 369 | "Illegal idle entry in RCU read-side critical section."); | ||
| 370 | rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map), | ||
| 371 | "Illegal idle entry in RCU-bh read-side critical section."); | ||
| 372 | rcu_lockdep_assert(!lock_is_held(&rcu_sched_lock_map), | ||
| 373 | "Illegal idle entry in RCU-sched read-side critical section."); | ||
| 369 | } | 374 | } |
| 370 | 375 | ||
| 371 | /** | 376 | /** |
| @@ -389,10 +394,15 @@ void rcu_idle_enter(void) | |||
| 389 | local_irq_save(flags); | 394 | local_irq_save(flags); |
| 390 | rdtp = &__get_cpu_var(rcu_dynticks); | 395 | rdtp = &__get_cpu_var(rcu_dynticks); |
| 391 | oldval = rdtp->dynticks_nesting; | 396 | oldval = rdtp->dynticks_nesting; |
| 392 | rdtp->dynticks_nesting = 0; | 397 | WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0); |
| 398 | if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) | ||
| 399 | rdtp->dynticks_nesting = 0; | ||
| 400 | else | ||
| 401 | rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE; | ||
| 393 | rcu_idle_enter_common(rdtp, oldval); | 402 | rcu_idle_enter_common(rdtp, oldval); |
| 394 | local_irq_restore(flags); | 403 | local_irq_restore(flags); |
| 395 | } | 404 | } |
| 405 | EXPORT_SYMBOL_GPL(rcu_idle_enter); | ||
| 396 | 406 | ||
| 397 | /** | 407 | /** |
| 398 | * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle | 408 | * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle |
| @@ -462,7 +472,7 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval) | |||
| 462 | * Exit idle mode, in other words, -enter- the mode in which RCU | 472 | * Exit idle mode, in other words, -enter- the mode in which RCU |
| 463 | * read-side critical sections can occur. | 473 | * read-side critical sections can occur. |
| 464 | * | 474 | * |
| 465 | * We crowbar the ->dynticks_nesting field to DYNTICK_TASK_NESTING to | 475 | * We crowbar the ->dynticks_nesting field to DYNTICK_TASK_NEST to |
| 466 | * allow for the possibility of usermode upcalls messing up our count | 476 | * allow for the possibility of usermode upcalls messing up our count |
| 467 | * of interrupt nesting level during the busy period that is just | 477 | * of interrupt nesting level during the busy period that is just |
| 468 | * now starting. | 478 | * now starting. |
| @@ -476,11 +486,15 @@ void rcu_idle_exit(void) | |||
| 476 | local_irq_save(flags); | 486 | local_irq_save(flags); |
| 477 | rdtp = &__get_cpu_var(rcu_dynticks); | 487 | rdtp = &__get_cpu_var(rcu_dynticks); |
| 478 | oldval = rdtp->dynticks_nesting; | 488 | oldval = rdtp->dynticks_nesting; |
| 479 | WARN_ON_ONCE(oldval != 0); | 489 | WARN_ON_ONCE(oldval < 0); |
| 480 | rdtp->dynticks_nesting = DYNTICK_TASK_NESTING; | 490 | if (oldval & DYNTICK_TASK_NEST_MASK) |
| 491 | rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE; | ||
| 492 | else | ||
| 493 | rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; | ||
| 481 | rcu_idle_exit_common(rdtp, oldval); | 494 | rcu_idle_exit_common(rdtp, oldval); |
| 482 | local_irq_restore(flags); | 495 | local_irq_restore(flags); |
| 483 | } | 496 | } |
| 497 | EXPORT_SYMBOL_GPL(rcu_idle_exit); | ||
| 484 | 498 | ||
| 485 | /** | 499 | /** |
| 486 | * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle | 500 | * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle |
| @@ -581,6 +595,49 @@ int rcu_is_cpu_idle(void) | |||
| 581 | } | 595 | } |
| 582 | EXPORT_SYMBOL(rcu_is_cpu_idle); | 596 | EXPORT_SYMBOL(rcu_is_cpu_idle); |
| 583 | 597 | ||
| 598 | #ifdef CONFIG_HOTPLUG_CPU | ||
| 599 | |||
| 600 | /* | ||
| 601 | * Is the current CPU online? Disable preemption to avoid false positives | ||
| 602 | * that could otherwise happen due to the current CPU number being sampled, | ||
| 603 | * this task being preempted, its old CPU being taken offline, resuming | ||
| 604 | * on some other CPU, then determining that its old CPU is now offline. | ||
| 605 | * It is OK to use RCU on an offline processor during initial boot, hence | ||
| 606 | * the check for rcu_scheduler_fully_active. Note also that it is OK | ||
| 607 | * for a CPU coming online to use RCU for one jiffy prior to marking itself | ||
| 608 | * online in the cpu_online_mask. Similarly, it is OK for a CPU going | ||
| 609 | * offline to continue to use RCU for one jiffy after marking itself | ||
| 610 | * offline in the cpu_online_mask. This leniency is necessary given the | ||
| 611 | * non-atomic nature of the online and offline processing, for example, | ||
| 612 | * the fact that a CPU enters the scheduler after completing the CPU_DYING | ||
| 613 | * notifiers. | ||
| 614 | * | ||
| 615 | * This is also why RCU internally marks CPUs online during the | ||
| 616 | * CPU_UP_PREPARE phase and offline during the CPU_DEAD phase. | ||
| 617 | * | ||
| 618 | * Disable checking if in an NMI handler because we cannot safely report | ||
| 619 | * errors from NMI handlers anyway. | ||
| 620 | */ | ||
| 621 | bool rcu_lockdep_current_cpu_online(void) | ||
| 622 | { | ||
| 623 | struct rcu_data *rdp; | ||
| 624 | struct rcu_node *rnp; | ||
| 625 | bool ret; | ||
| 626 | |||
| 627 | if (in_nmi()) | ||
| 628 | return 1; | ||
| 629 | preempt_disable(); | ||
| 630 | rdp = &__get_cpu_var(rcu_sched_data); | ||
| 631 | rnp = rdp->mynode; | ||
| 632 | ret = (rdp->grpmask & rnp->qsmaskinit) || | ||
| 633 | !rcu_scheduler_fully_active; | ||
| 634 | preempt_enable(); | ||
| 635 | return ret; | ||
| 636 | } | ||
| 637 | EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online); | ||
| 638 | |||
| 639 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
| 640 | |||
| 584 | #endif /* #ifdef CONFIG_PROVE_RCU */ | 641 | #endif /* #ifdef CONFIG_PROVE_RCU */ |
| 585 | 642 | ||
| 586 | /** | 643 | /** |
| @@ -595,8 +652,6 @@ int rcu_is_cpu_rrupt_from_idle(void) | |||
| 595 | return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1; | 652 | return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1; |
| 596 | } | 653 | } |
| 597 | 654 | ||
| 598 | #ifdef CONFIG_SMP | ||
| 599 | |||
| 600 | /* | 655 | /* |
| 601 | * Snapshot the specified CPU's dynticks counter so that we can later | 656 | * Snapshot the specified CPU's dynticks counter so that we can later |
| 602 | * credit them with an implicit quiescent state. Return 1 if this CPU | 657 | * credit them with an implicit quiescent state. Return 1 if this CPU |
| @@ -640,12 +695,28 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) | |||
| 640 | return rcu_implicit_offline_qs(rdp); | 695 | return rcu_implicit_offline_qs(rdp); |
| 641 | } | 696 | } |
| 642 | 697 | ||
| 643 | #endif /* #ifdef CONFIG_SMP */ | 698 | static int jiffies_till_stall_check(void) |
| 699 | { | ||
| 700 | int till_stall_check = ACCESS_ONCE(rcu_cpu_stall_timeout); | ||
| 701 | |||
| 702 | /* | ||
| 703 | * Limit check must be consistent with the Kconfig limits | ||
| 704 | * for CONFIG_RCU_CPU_STALL_TIMEOUT. | ||
| 705 | */ | ||
| 706 | if (till_stall_check < 3) { | ||
| 707 | ACCESS_ONCE(rcu_cpu_stall_timeout) = 3; | ||
| 708 | till_stall_check = 3; | ||
| 709 | } else if (till_stall_check > 300) { | ||
| 710 | ACCESS_ONCE(rcu_cpu_stall_timeout) = 300; | ||
| 711 | till_stall_check = 300; | ||
| 712 | } | ||
| 713 | return till_stall_check * HZ + RCU_STALL_DELAY_DELTA; | ||
| 714 | } | ||
| 644 | 715 | ||
| 645 | static void record_gp_stall_check_time(struct rcu_state *rsp) | 716 | static void record_gp_stall_check_time(struct rcu_state *rsp) |
| 646 | { | 717 | { |
| 647 | rsp->gp_start = jiffies; | 718 | rsp->gp_start = jiffies; |
| 648 | rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK; | 719 | rsp->jiffies_stall = jiffies + jiffies_till_stall_check(); |
| 649 | } | 720 | } |
| 650 | 721 | ||
| 651 | static void print_other_cpu_stall(struct rcu_state *rsp) | 722 | static void print_other_cpu_stall(struct rcu_state *rsp) |
| @@ -664,13 +735,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp) | |||
| 664 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 735 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
| 665 | return; | 736 | return; |
| 666 | } | 737 | } |
| 667 | rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK; | 738 | rsp->jiffies_stall = jiffies + 3 * jiffies_till_stall_check() + 3; |
| 668 | |||
| 669 | /* | ||
| 670 | * Now rat on any tasks that got kicked up to the root rcu_node | ||
| 671 | * due to CPU offlining. | ||
| 672 | */ | ||
| 673 | ndetected = rcu_print_task_stall(rnp); | ||
| 674 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 739 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
| 675 | 740 | ||
| 676 | /* | 741 | /* |
| @@ -678,8 +743,9 @@ static void print_other_cpu_stall(struct rcu_state *rsp) | |||
| 678 | * See Documentation/RCU/stallwarn.txt for info on how to debug | 743 | * See Documentation/RCU/stallwarn.txt for info on how to debug |
| 679 | * RCU CPU stall warnings. | 744 | * RCU CPU stall warnings. |
| 680 | */ | 745 | */ |
| 681 | printk(KERN_ERR "INFO: %s detected stalls on CPUs/tasks: {", | 746 | printk(KERN_ERR "INFO: %s detected stalls on CPUs/tasks:", |
| 682 | rsp->name); | 747 | rsp->name); |
| 748 | print_cpu_stall_info_begin(); | ||
| 683 | rcu_for_each_leaf_node(rsp, rnp) { | 749 | rcu_for_each_leaf_node(rsp, rnp) { |
| 684 | raw_spin_lock_irqsave(&rnp->lock, flags); | 750 | raw_spin_lock_irqsave(&rnp->lock, flags); |
| 685 | ndetected += rcu_print_task_stall(rnp); | 751 | ndetected += rcu_print_task_stall(rnp); |
| @@ -688,11 +754,22 @@ static void print_other_cpu_stall(struct rcu_state *rsp) | |||
| 688 | continue; | 754 | continue; |
| 689 | for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) | 755 | for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) |
| 690 | if (rnp->qsmask & (1UL << cpu)) { | 756 | if (rnp->qsmask & (1UL << cpu)) { |
| 691 | printk(" %d", rnp->grplo + cpu); | 757 | print_cpu_stall_info(rsp, rnp->grplo + cpu); |
| 692 | ndetected++; | 758 | ndetected++; |
| 693 | } | 759 | } |
| 694 | } | 760 | } |
| 695 | printk("} (detected by %d, t=%ld jiffies)\n", | 761 | |
| 762 | /* | ||
| 763 | * Now rat on any tasks that got kicked up to the root rcu_node | ||
| 764 | * due to CPU offlining. | ||
| 765 | */ | ||
| 766 | rnp = rcu_get_root(rsp); | ||
| 767 | raw_spin_lock_irqsave(&rnp->lock, flags); | ||
| 768 | ndetected = rcu_print_task_stall(rnp); | ||
| 769 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
| 770 | |||
| 771 | print_cpu_stall_info_end(); | ||
| 772 | printk(KERN_CONT "(detected by %d, t=%ld jiffies)\n", | ||
| 696 | smp_processor_id(), (long)(jiffies - rsp->gp_start)); | 773 | smp_processor_id(), (long)(jiffies - rsp->gp_start)); |
| 697 | if (ndetected == 0) | 774 | if (ndetected == 0) |
| 698 | printk(KERN_ERR "INFO: Stall ended before state dump start\n"); | 775 | printk(KERN_ERR "INFO: Stall ended before state dump start\n"); |
| @@ -716,15 +793,18 @@ static void print_cpu_stall(struct rcu_state *rsp) | |||
| 716 | * See Documentation/RCU/stallwarn.txt for info on how to debug | 793 | * See Documentation/RCU/stallwarn.txt for info on how to debug |
| 717 | * RCU CPU stall warnings. | 794 | * RCU CPU stall warnings. |
| 718 | */ | 795 | */ |
| 719 | printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n", | 796 | printk(KERN_ERR "INFO: %s self-detected stall on CPU", rsp->name); |
| 720 | rsp->name, smp_processor_id(), jiffies - rsp->gp_start); | 797 | print_cpu_stall_info_begin(); |
| 798 | print_cpu_stall_info(rsp, smp_processor_id()); | ||
| 799 | print_cpu_stall_info_end(); | ||
| 800 | printk(KERN_CONT " (t=%lu jiffies)\n", jiffies - rsp->gp_start); | ||
| 721 | if (!trigger_all_cpu_backtrace()) | 801 | if (!trigger_all_cpu_backtrace()) |
| 722 | dump_stack(); | 802 | dump_stack(); |
| 723 | 803 | ||
| 724 | raw_spin_lock_irqsave(&rnp->lock, flags); | 804 | raw_spin_lock_irqsave(&rnp->lock, flags); |
| 725 | if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall)) | 805 | if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall)) |
| 726 | rsp->jiffies_stall = | 806 | rsp->jiffies_stall = jiffies + |
| 727 | jiffies + RCU_SECONDS_TILL_STALL_RECHECK; | 807 | 3 * jiffies_till_stall_check() + 3; |
| 728 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 808 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
| 729 | 809 | ||
| 730 | set_need_resched(); /* kick ourselves to get things going. */ | 810 | set_need_resched(); /* kick ourselves to get things going. */ |
| @@ -807,6 +887,7 @@ static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct | |||
| 807 | rdp->passed_quiesce = 0; | 887 | rdp->passed_quiesce = 0; |
| 808 | } else | 888 | } else |
| 809 | rdp->qs_pending = 0; | 889 | rdp->qs_pending = 0; |
| 890 | zero_cpu_stall_ticks(rdp); | ||
| 810 | } | 891 | } |
| 811 | } | 892 | } |
| 812 | 893 | ||
| @@ -943,6 +1024,10 @@ rcu_start_gp_per_cpu(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_dat | |||
| 943 | * in preparation for detecting the next grace period. The caller must hold | 1024 | * in preparation for detecting the next grace period. The caller must hold |
| 944 | * the root node's ->lock, which is released before return. Hard irqs must | 1025 | * the root node's ->lock, which is released before return. Hard irqs must |
| 945 | * be disabled. | 1026 | * be disabled. |
| 1027 | * | ||
| 1028 | * Note that it is legal for a dying CPU (which is marked as offline) to | ||
| 1029 | * invoke this function. This can happen when the dying CPU reports its | ||
| 1030 | * quiescent state. | ||
| 946 | */ | 1031 | */ |
| 947 | static void | 1032 | static void |
| 948 | rcu_start_gp(struct rcu_state *rsp, unsigned long flags) | 1033 | rcu_start_gp(struct rcu_state *rsp, unsigned long flags) |
| @@ -980,26 +1065,8 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) | |||
| 980 | rsp->fqs_state = RCU_GP_INIT; /* Hold off force_quiescent_state. */ | 1065 | rsp->fqs_state = RCU_GP_INIT; /* Hold off force_quiescent_state. */ |
| 981 | rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; | 1066 | rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; |
| 982 | record_gp_stall_check_time(rsp); | 1067 | record_gp_stall_check_time(rsp); |
| 983 | |||
| 984 | /* Special-case the common single-level case. */ | ||
| 985 | if (NUM_RCU_NODES == 1) { | ||
| 986 | rcu_preempt_check_blocked_tasks(rnp); | ||
| 987 | rnp->qsmask = rnp->qsmaskinit; | ||
| 988 | rnp->gpnum = rsp->gpnum; | ||
| 989 | rnp->completed = rsp->completed; | ||
| 990 | rsp->fqs_state = RCU_SIGNAL_INIT; /* force_quiescent_state OK */ | ||
| 991 | rcu_start_gp_per_cpu(rsp, rnp, rdp); | ||
| 992 | rcu_preempt_boost_start_gp(rnp); | ||
| 993 | trace_rcu_grace_period_init(rsp->name, rnp->gpnum, | ||
| 994 | rnp->level, rnp->grplo, | ||
| 995 | rnp->grphi, rnp->qsmask); | ||
| 996 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
| 997 | return; | ||
| 998 | } | ||
| 999 | |||
| 1000 | raw_spin_unlock(&rnp->lock); /* leave irqs disabled. */ | 1068 | raw_spin_unlock(&rnp->lock); /* leave irqs disabled. */ |
| 1001 | 1069 | ||
| 1002 | |||
| 1003 | /* Exclude any concurrent CPU-hotplug operations. */ | 1070 | /* Exclude any concurrent CPU-hotplug operations. */ |
| 1004 | raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */ | 1071 | raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */ |
| 1005 | 1072 | ||
| @@ -1245,53 +1312,115 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 1245 | 1312 | ||
| 1246 | /* | 1313 | /* |
| 1247 | * Move a dying CPU's RCU callbacks to online CPU's callback list. | 1314 | * Move a dying CPU's RCU callbacks to online CPU's callback list. |
| 1248 | * Synchronization is not required because this function executes | 1315 | * Also record a quiescent state for this CPU for the current grace period. |
| 1249 | * in stop_machine() context. | 1316 | * Synchronization and interrupt disabling are not required because |
| 1317 | * this function executes in stop_machine() context. Therefore, cleanup | ||
| 1318 | * operations that might block must be done later from the CPU_DEAD | ||
| 1319 | * notifier. | ||
| 1320 | * | ||
| 1321 | * Note that the outgoing CPU's bit has already been cleared in the | ||
| 1322 | * cpu_online_mask. This allows us to randomly pick a callback | ||
| 1323 | * destination from the bits set in that mask. | ||
| 1250 | */ | 1324 | */ |
| 1251 | static void rcu_send_cbs_to_online(struct rcu_state *rsp) | 1325 | static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) |
| 1252 | { | 1326 | { |
| 1253 | int i; | 1327 | int i; |
| 1254 | /* current DYING CPU is cleared in the cpu_online_mask */ | 1328 | unsigned long mask; |
| 1255 | int receive_cpu = cpumask_any(cpu_online_mask); | 1329 | int receive_cpu = cpumask_any(cpu_online_mask); |
| 1256 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); | 1330 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); |
| 1257 | struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu); | 1331 | struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu); |
| 1332 | RCU_TRACE(struct rcu_node *rnp = rdp->mynode); /* For dying CPU. */ | ||
| 1333 | |||
| 1334 | /* First, adjust the counts. */ | ||
| 1335 | if (rdp->nxtlist != NULL) { | ||
| 1336 | receive_rdp->qlen_lazy += rdp->qlen_lazy; | ||
| 1337 | receive_rdp->qlen += rdp->qlen; | ||
| 1338 | rdp->qlen_lazy = 0; | ||
| 1339 | rdp->qlen = 0; | ||
| 1340 | } | ||
| 1258 | 1341 | ||
| 1259 | if (rdp->nxtlist == NULL) | 1342 | /* |
| 1260 | return; /* irqs disabled, so comparison is stable. */ | 1343 | * Next, move ready-to-invoke callbacks to be invoked on some |
| 1344 | * other CPU. These will not be required to pass through another | ||
| 1345 | * grace period: They are done, regardless of CPU. | ||
| 1346 | */ | ||
| 1347 | if (rdp->nxtlist != NULL && | ||
| 1348 | rdp->nxttail[RCU_DONE_TAIL] != &rdp->nxtlist) { | ||
| 1349 | struct rcu_head *oldhead; | ||
| 1350 | struct rcu_head **oldtail; | ||
| 1351 | struct rcu_head **newtail; | ||
| 1352 | |||
| 1353 | oldhead = rdp->nxtlist; | ||
| 1354 | oldtail = receive_rdp->nxttail[RCU_DONE_TAIL]; | ||
| 1355 | rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; | ||
| 1356 | *rdp->nxttail[RCU_DONE_TAIL] = *oldtail; | ||
| 1357 | *receive_rdp->nxttail[RCU_DONE_TAIL] = oldhead; | ||
| 1358 | newtail = rdp->nxttail[RCU_DONE_TAIL]; | ||
| 1359 | for (i = RCU_DONE_TAIL; i < RCU_NEXT_SIZE; i++) { | ||
| 1360 | if (receive_rdp->nxttail[i] == oldtail) | ||
| 1361 | receive_rdp->nxttail[i] = newtail; | ||
| 1362 | if (rdp->nxttail[i] == newtail) | ||
| 1363 | rdp->nxttail[i] = &rdp->nxtlist; | ||
| 1364 | } | ||
| 1365 | } | ||
| 1261 | 1366 | ||
| 1262 | *receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist; | 1367 | /* |
| 1263 | receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; | 1368 | * Finally, put the rest of the callbacks at the end of the list. |
| 1264 | receive_rdp->qlen += rdp->qlen; | 1369 | * The ones that made it partway through get to start over: We |
| 1265 | receive_rdp->n_cbs_adopted += rdp->qlen; | 1370 | * cannot assume that grace periods are synchronized across CPUs. |
| 1266 | rdp->n_cbs_orphaned += rdp->qlen; | 1371 | * (We could splice RCU_WAIT_TAIL into RCU_NEXT_READY_TAIL, but |
| 1372 | * this does not seem compelling. Not yet, anyway.) | ||
| 1373 | */ | ||
| 1374 | if (rdp->nxtlist != NULL) { | ||
| 1375 | *receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist; | ||
| 1376 | receive_rdp->nxttail[RCU_NEXT_TAIL] = | ||
| 1377 | rdp->nxttail[RCU_NEXT_TAIL]; | ||
| 1378 | receive_rdp->n_cbs_adopted += rdp->qlen; | ||
| 1379 | rdp->n_cbs_orphaned += rdp->qlen; | ||
| 1380 | |||
| 1381 | rdp->nxtlist = NULL; | ||
| 1382 | for (i = 0; i < RCU_NEXT_SIZE; i++) | ||
| 1383 | rdp->nxttail[i] = &rdp->nxtlist; | ||
| 1384 | } | ||
| 1267 | 1385 | ||
| 1268 | rdp->nxtlist = NULL; | 1386 | /* |
| 1269 | for (i = 0; i < RCU_NEXT_SIZE; i++) | 1387 | * Record a quiescent state for the dying CPU. This is safe |
| 1270 | rdp->nxttail[i] = &rdp->nxtlist; | 1388 | * only because we have already cleared out the callbacks. |
| 1271 | rdp->qlen = 0; | 1389 | * (Otherwise, the RCU core might try to schedule the invocation |
| 1390 | * of callbacks on this now-offline CPU, which would be bad.) | ||
| 1391 | */ | ||
| 1392 | mask = rdp->grpmask; /* rnp->grplo is constant. */ | ||
| 1393 | trace_rcu_grace_period(rsp->name, | ||
| 1394 | rnp->gpnum + 1 - !!(rnp->qsmask & mask), | ||
| 1395 | "cpuofl"); | ||
| 1396 | rcu_report_qs_rdp(smp_processor_id(), rsp, rdp, rsp->gpnum); | ||
| 1397 | /* Note that rcu_report_qs_rdp() might call trace_rcu_grace_period(). */ | ||
| 1272 | } | 1398 | } |
| 1273 | 1399 | ||
| 1274 | /* | 1400 | /* |
| 1275 | * Remove the outgoing CPU from the bitmasks in the rcu_node hierarchy | 1401 | * The CPU has been completely removed, and some other CPU is reporting |
| 1276 | * and move all callbacks from the outgoing CPU to the current one. | 1402 | * this fact from process context. Do the remainder of the cleanup. |
| 1277 | * There can only be one CPU hotplug operation at a time, so no other | 1403 | * There can only be one CPU hotplug operation at a time, so no other |
| 1278 | * CPU can be attempting to update rcu_cpu_kthread_task. | 1404 | * CPU can be attempting to update rcu_cpu_kthread_task. |
| 1279 | */ | 1405 | */ |
| 1280 | static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) | 1406 | static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) |
| 1281 | { | 1407 | { |
| 1282 | unsigned long flags; | 1408 | unsigned long flags; |
| 1283 | unsigned long mask; | 1409 | unsigned long mask; |
| 1284 | int need_report = 0; | 1410 | int need_report = 0; |
| 1285 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | 1411 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); |
| 1286 | struct rcu_node *rnp; | 1412 | struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rnp. */ |
| 1287 | 1413 | ||
| 1414 | /* Adjust any no-longer-needed kthreads. */ | ||
| 1288 | rcu_stop_cpu_kthread(cpu); | 1415 | rcu_stop_cpu_kthread(cpu); |
| 1416 | rcu_node_kthread_setaffinity(rnp, -1); | ||
| 1417 | |||
| 1418 | /* Remove the dying CPU from the bitmasks in the rcu_node hierarchy. */ | ||
| 1289 | 1419 | ||
| 1290 | /* Exclude any attempts to start a new grace period. */ | 1420 | /* Exclude any attempts to start a new grace period. */ |
| 1291 | raw_spin_lock_irqsave(&rsp->onofflock, flags); | 1421 | raw_spin_lock_irqsave(&rsp->onofflock, flags); |
| 1292 | 1422 | ||
| 1293 | /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */ | 1423 | /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */ |
| 1294 | rnp = rdp->mynode; /* this is the outgoing CPU's rnp. */ | ||
| 1295 | mask = rdp->grpmask; /* rnp->grplo is constant. */ | 1424 | mask = rdp->grpmask; /* rnp->grplo is constant. */ |
| 1296 | do { | 1425 | do { |
| 1297 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | 1426 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ |
| @@ -1299,20 +1428,11 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) | |||
| 1299 | if (rnp->qsmaskinit != 0) { | 1428 | if (rnp->qsmaskinit != 0) { |
| 1300 | if (rnp != rdp->mynode) | 1429 | if (rnp != rdp->mynode) |
| 1301 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 1430 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
| 1302 | else | ||
| 1303 | trace_rcu_grace_period(rsp->name, | ||
| 1304 | rnp->gpnum + 1 - | ||
| 1305 | !!(rnp->qsmask & mask), | ||
| 1306 | "cpuofl"); | ||
| 1307 | break; | 1431 | break; |
| 1308 | } | 1432 | } |
| 1309 | if (rnp == rdp->mynode) { | 1433 | if (rnp == rdp->mynode) |
| 1310 | trace_rcu_grace_period(rsp->name, | ||
| 1311 | rnp->gpnum + 1 - | ||
| 1312 | !!(rnp->qsmask & mask), | ||
| 1313 | "cpuofl"); | ||
| 1314 | need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp); | 1434 | need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp); |
| 1315 | } else | 1435 | else |
| 1316 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 1436 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
| 1317 | mask = rnp->grpmask; | 1437 | mask = rnp->grpmask; |
| 1318 | rnp = rnp->parent; | 1438 | rnp = rnp->parent; |
| @@ -1332,29 +1452,15 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) | |||
| 1332 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1452 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
| 1333 | if (need_report & RCU_OFL_TASKS_EXP_GP) | 1453 | if (need_report & RCU_OFL_TASKS_EXP_GP) |
| 1334 | rcu_report_exp_rnp(rsp, rnp, true); | 1454 | rcu_report_exp_rnp(rsp, rnp, true); |
| 1335 | rcu_node_kthread_setaffinity(rnp, -1); | ||
| 1336 | } | ||
| 1337 | |||
| 1338 | /* | ||
| 1339 | * Remove the specified CPU from the RCU hierarchy and move any pending | ||
| 1340 | * callbacks that it might have to the current CPU. This code assumes | ||
| 1341 | * that at least one CPU in the system will remain running at all times. | ||
| 1342 | * Any attempt to offline -all- CPUs is likely to strand RCU callbacks. | ||
| 1343 | */ | ||
| 1344 | static void rcu_offline_cpu(int cpu) | ||
| 1345 | { | ||
| 1346 | __rcu_offline_cpu(cpu, &rcu_sched_state); | ||
| 1347 | __rcu_offline_cpu(cpu, &rcu_bh_state); | ||
| 1348 | rcu_preempt_offline_cpu(cpu); | ||
| 1349 | } | 1455 | } |
| 1350 | 1456 | ||
| 1351 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ | 1457 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ |
| 1352 | 1458 | ||
| 1353 | static void rcu_send_cbs_to_online(struct rcu_state *rsp) | 1459 | static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) |
| 1354 | { | 1460 | { |
| 1355 | } | 1461 | } |
| 1356 | 1462 | ||
| 1357 | static void rcu_offline_cpu(int cpu) | 1463 | static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) |
| 1358 | { | 1464 | { |
| 1359 | } | 1465 | } |
| 1360 | 1466 | ||
| @@ -1368,11 +1474,11 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 1368 | { | 1474 | { |
| 1369 | unsigned long flags; | 1475 | unsigned long flags; |
| 1370 | struct rcu_head *next, *list, **tail; | 1476 | struct rcu_head *next, *list, **tail; |
| 1371 | int bl, count; | 1477 | int bl, count, count_lazy; |
| 1372 | 1478 | ||
| 1373 | /* If no callbacks are ready, just return.*/ | 1479 | /* If no callbacks are ready, just return.*/ |
| 1374 | if (!cpu_has_callbacks_ready_to_invoke(rdp)) { | 1480 | if (!cpu_has_callbacks_ready_to_invoke(rdp)) { |
| 1375 | trace_rcu_batch_start(rsp->name, 0, 0); | 1481 | trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, 0); |
| 1376 | trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist), | 1482 | trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist), |
| 1377 | need_resched(), is_idle_task(current), | 1483 | need_resched(), is_idle_task(current), |
| 1378 | rcu_is_callbacks_kthread()); | 1484 | rcu_is_callbacks_kthread()); |
| @@ -1384,8 +1490,9 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 1384 | * races with call_rcu() from interrupt handlers. | 1490 | * races with call_rcu() from interrupt handlers. |
| 1385 | */ | 1491 | */ |
| 1386 | local_irq_save(flags); | 1492 | local_irq_save(flags); |
| 1493 | WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); | ||
| 1387 | bl = rdp->blimit; | 1494 | bl = rdp->blimit; |
| 1388 | trace_rcu_batch_start(rsp->name, rdp->qlen, bl); | 1495 | trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, bl); |
| 1389 | list = rdp->nxtlist; | 1496 | list = rdp->nxtlist; |
| 1390 | rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; | 1497 | rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; |
| 1391 | *rdp->nxttail[RCU_DONE_TAIL] = NULL; | 1498 | *rdp->nxttail[RCU_DONE_TAIL] = NULL; |
| @@ -1396,12 +1503,13 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 1396 | local_irq_restore(flags); | 1503 | local_irq_restore(flags); |
| 1397 | 1504 | ||
| 1398 | /* Invoke callbacks. */ | 1505 | /* Invoke callbacks. */ |
| 1399 | count = 0; | 1506 | count = count_lazy = 0; |
| 1400 | while (list) { | 1507 | while (list) { |
| 1401 | next = list->next; | 1508 | next = list->next; |
| 1402 | prefetch(next); | 1509 | prefetch(next); |
| 1403 | debug_rcu_head_unqueue(list); | 1510 | debug_rcu_head_unqueue(list); |
| 1404 | __rcu_reclaim(rsp->name, list); | 1511 | if (__rcu_reclaim(rsp->name, list)) |
| 1512 | count_lazy++; | ||
| 1405 | list = next; | 1513 | list = next; |
| 1406 | /* Stop only if limit reached and CPU has something to do. */ | 1514 | /* Stop only if limit reached and CPU has something to do. */ |
| 1407 | if (++count >= bl && | 1515 | if (++count >= bl && |
| @@ -1416,6 +1524,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 1416 | rcu_is_callbacks_kthread()); | 1524 | rcu_is_callbacks_kthread()); |
| 1417 | 1525 | ||
| 1418 | /* Update count, and requeue any remaining callbacks. */ | 1526 | /* Update count, and requeue any remaining callbacks. */ |
| 1527 | rdp->qlen_lazy -= count_lazy; | ||
| 1419 | rdp->qlen -= count; | 1528 | rdp->qlen -= count; |
| 1420 | rdp->n_cbs_invoked += count; | 1529 | rdp->n_cbs_invoked += count; |
| 1421 | if (list != NULL) { | 1530 | if (list != NULL) { |
| @@ -1458,6 +1567,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 1458 | void rcu_check_callbacks(int cpu, int user) | 1567 | void rcu_check_callbacks(int cpu, int user) |
| 1459 | { | 1568 | { |
| 1460 | trace_rcu_utilization("Start scheduler-tick"); | 1569 | trace_rcu_utilization("Start scheduler-tick"); |
| 1570 | increment_cpu_stall_ticks(); | ||
| 1461 | if (user || rcu_is_cpu_rrupt_from_idle()) { | 1571 | if (user || rcu_is_cpu_rrupt_from_idle()) { |
| 1462 | 1572 | ||
| 1463 | /* | 1573 | /* |
| @@ -1492,8 +1602,6 @@ void rcu_check_callbacks(int cpu, int user) | |||
| 1492 | trace_rcu_utilization("End scheduler-tick"); | 1602 | trace_rcu_utilization("End scheduler-tick"); |
| 1493 | } | 1603 | } |
| 1494 | 1604 | ||
| 1495 | #ifdef CONFIG_SMP | ||
| 1496 | |||
| 1497 | /* | 1605 | /* |
| 1498 | * Scan the leaf rcu_node structures, processing dyntick state for any that | 1606 | * Scan the leaf rcu_node structures, processing dyntick state for any that |
| 1499 | * have not yet encountered a quiescent state, using the function specified. | 1607 | * have not yet encountered a quiescent state, using the function specified. |
| @@ -1616,15 +1724,6 @@ unlock_fqs_ret: | |||
| 1616 | trace_rcu_utilization("End fqs"); | 1724 | trace_rcu_utilization("End fqs"); |
| 1617 | } | 1725 | } |
| 1618 | 1726 | ||
| 1619 | #else /* #ifdef CONFIG_SMP */ | ||
| 1620 | |||
| 1621 | static void force_quiescent_state(struct rcu_state *rsp, int relaxed) | ||
| 1622 | { | ||
| 1623 | set_need_resched(); | ||
| 1624 | } | ||
| 1625 | |||
| 1626 | #endif /* #else #ifdef CONFIG_SMP */ | ||
| 1627 | |||
| 1628 | /* | 1727 | /* |
| 1629 | * This does the RCU core processing work for the specified rcu_state | 1728 | * This does the RCU core processing work for the specified rcu_state |
| 1630 | * and rcu_data structures. This may be called only from the CPU to | 1729 | * and rcu_data structures. This may be called only from the CPU to |
| @@ -1702,11 +1801,12 @@ static void invoke_rcu_core(void) | |||
| 1702 | 1801 | ||
| 1703 | static void | 1802 | static void |
| 1704 | __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | 1803 | __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), |
| 1705 | struct rcu_state *rsp) | 1804 | struct rcu_state *rsp, bool lazy) |
| 1706 | { | 1805 | { |
| 1707 | unsigned long flags; | 1806 | unsigned long flags; |
| 1708 | struct rcu_data *rdp; | 1807 | struct rcu_data *rdp; |
| 1709 | 1808 | ||
| 1809 | WARN_ON_ONCE((unsigned long)head & 0x3); /* Misaligned rcu_head! */ | ||
| 1710 | debug_rcu_head_queue(head); | 1810 | debug_rcu_head_queue(head); |
| 1711 | head->func = func; | 1811 | head->func = func; |
| 1712 | head->next = NULL; | 1812 | head->next = NULL; |
| @@ -1720,18 +1820,21 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | |||
| 1720 | * a quiescent state betweentimes. | 1820 | * a quiescent state betweentimes. |
| 1721 | */ | 1821 | */ |
| 1722 | local_irq_save(flags); | 1822 | local_irq_save(flags); |
| 1823 | WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); | ||
| 1723 | rdp = this_cpu_ptr(rsp->rda); | 1824 | rdp = this_cpu_ptr(rsp->rda); |
| 1724 | 1825 | ||
| 1725 | /* Add the callback to our list. */ | 1826 | /* Add the callback to our list. */ |
| 1726 | *rdp->nxttail[RCU_NEXT_TAIL] = head; | 1827 | *rdp->nxttail[RCU_NEXT_TAIL] = head; |
| 1727 | rdp->nxttail[RCU_NEXT_TAIL] = &head->next; | 1828 | rdp->nxttail[RCU_NEXT_TAIL] = &head->next; |
| 1728 | rdp->qlen++; | 1829 | rdp->qlen++; |
| 1830 | if (lazy) | ||
| 1831 | rdp->qlen_lazy++; | ||
| 1729 | 1832 | ||
| 1730 | if (__is_kfree_rcu_offset((unsigned long)func)) | 1833 | if (__is_kfree_rcu_offset((unsigned long)func)) |
| 1731 | trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func, | 1834 | trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func, |
| 1732 | rdp->qlen); | 1835 | rdp->qlen_lazy, rdp->qlen); |
| 1733 | else | 1836 | else |
| 1734 | trace_rcu_callback(rsp->name, head, rdp->qlen); | 1837 | trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen); |
| 1735 | 1838 | ||
| 1736 | /* If interrupts were disabled, don't dive into RCU core. */ | 1839 | /* If interrupts were disabled, don't dive into RCU core. */ |
| 1737 | if (irqs_disabled_flags(flags)) { | 1840 | if (irqs_disabled_flags(flags)) { |
| @@ -1778,16 +1881,16 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | |||
| 1778 | */ | 1881 | */ |
| 1779 | void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | 1882 | void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) |
| 1780 | { | 1883 | { |
| 1781 | __call_rcu(head, func, &rcu_sched_state); | 1884 | __call_rcu(head, func, &rcu_sched_state, 0); |
| 1782 | } | 1885 | } |
| 1783 | EXPORT_SYMBOL_GPL(call_rcu_sched); | 1886 | EXPORT_SYMBOL_GPL(call_rcu_sched); |
| 1784 | 1887 | ||
| 1785 | /* | 1888 | /* |
| 1786 | * Queue an RCU for invocation after a quicker grace period. | 1889 | * Queue an RCU callback for invocation after a quicker grace period. |
| 1787 | */ | 1890 | */ |
| 1788 | void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | 1891 | void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) |
| 1789 | { | 1892 | { |
| 1790 | __call_rcu(head, func, &rcu_bh_state); | 1893 | __call_rcu(head, func, &rcu_bh_state, 0); |
| 1791 | } | 1894 | } |
| 1792 | EXPORT_SYMBOL_GPL(call_rcu_bh); | 1895 | EXPORT_SYMBOL_GPL(call_rcu_bh); |
| 1793 | 1896 | ||
| @@ -1816,6 +1919,10 @@ EXPORT_SYMBOL_GPL(call_rcu_bh); | |||
| 1816 | */ | 1919 | */ |
| 1817 | void synchronize_sched(void) | 1920 | void synchronize_sched(void) |
| 1818 | { | 1921 | { |
| 1922 | rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) && | ||
| 1923 | !lock_is_held(&rcu_lock_map) && | ||
| 1924 | !lock_is_held(&rcu_sched_lock_map), | ||
| 1925 | "Illegal synchronize_sched() in RCU-sched read-side critical section"); | ||
| 1819 | if (rcu_blocking_is_gp()) | 1926 | if (rcu_blocking_is_gp()) |
| 1820 | return; | 1927 | return; |
| 1821 | wait_rcu_gp(call_rcu_sched); | 1928 | wait_rcu_gp(call_rcu_sched); |
| @@ -1833,12 +1940,137 @@ EXPORT_SYMBOL_GPL(synchronize_sched); | |||
| 1833 | */ | 1940 | */ |
| 1834 | void synchronize_rcu_bh(void) | 1941 | void synchronize_rcu_bh(void) |
| 1835 | { | 1942 | { |
| 1943 | rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) && | ||
| 1944 | !lock_is_held(&rcu_lock_map) && | ||
| 1945 | !lock_is_held(&rcu_sched_lock_map), | ||
| 1946 | "Illegal synchronize_rcu_bh() in RCU-bh read-side critical section"); | ||
| 1836 | if (rcu_blocking_is_gp()) | 1947 | if (rcu_blocking_is_gp()) |
| 1837 | return; | 1948 | return; |
| 1838 | wait_rcu_gp(call_rcu_bh); | 1949 | wait_rcu_gp(call_rcu_bh); |
| 1839 | } | 1950 | } |
| 1840 | EXPORT_SYMBOL_GPL(synchronize_rcu_bh); | 1951 | EXPORT_SYMBOL_GPL(synchronize_rcu_bh); |
| 1841 | 1952 | ||
| 1953 | static atomic_t sync_sched_expedited_started = ATOMIC_INIT(0); | ||
| 1954 | static atomic_t sync_sched_expedited_done = ATOMIC_INIT(0); | ||
| 1955 | |||
| 1956 | static int synchronize_sched_expedited_cpu_stop(void *data) | ||
| 1957 | { | ||
| 1958 | /* | ||
| 1959 | * There must be a full memory barrier on each affected CPU | ||
| 1960 | * between the time that try_stop_cpus() is called and the | ||
| 1961 | * time that it returns. | ||
| 1962 | * | ||
| 1963 | * In the current initial implementation of cpu_stop, the | ||
| 1964 | * above condition is already met when the control reaches | ||
| 1965 | * this point and the following smp_mb() is not strictly | ||
| 1966 | * necessary. Do smp_mb() anyway for documentation and | ||
| 1967 | * robustness against future implementation changes. | ||
| 1968 | */ | ||
| 1969 | smp_mb(); /* See above comment block. */ | ||
| 1970 | return 0; | ||
| 1971 | } | ||
| 1972 | |||
| 1973 | /** | ||
| 1974 | * synchronize_sched_expedited - Brute-force RCU-sched grace period | ||
| 1975 | * | ||
| 1976 | * Wait for an RCU-sched grace period to elapse, but use a "big hammer" | ||
| 1977 | * approach to force the grace period to end quickly. This consumes | ||
| 1978 | * significant time on all CPUs and is unfriendly to real-time workloads, | ||
| 1979 | * so is thus not recommended for any sort of common-case code. In fact, | ||
| 1980 | * if you are using synchronize_sched_expedited() in a loop, please | ||
| 1981 | * restructure your code to batch your updates, and then use a single | ||
| 1982 | * synchronize_sched() instead. | ||
| 1983 | * | ||
| 1984 | * Note that it is illegal to call this function while holding any lock | ||
| 1985 | * that is acquired by a CPU-hotplug notifier. And yes, it is also illegal | ||
| 1986 | * to call this function from a CPU-hotplug notifier. Failing to observe | ||
| 1987 | * these restriction will result in deadlock. | ||
| 1988 | * | ||
| 1989 | * This implementation can be thought of as an application of ticket | ||
| 1990 | * locking to RCU, with sync_sched_expedited_started and | ||
| 1991 | * sync_sched_expedited_done taking on the roles of the halves | ||
| 1992 | * of the ticket-lock word. Each task atomically increments | ||
| 1993 | * sync_sched_expedited_started upon entry, snapshotting the old value, | ||
| 1994 | * then attempts to stop all the CPUs. If this succeeds, then each | ||
| 1995 | * CPU will have executed a context switch, resulting in an RCU-sched | ||
| 1996 | * grace period. We are then done, so we use atomic_cmpxchg() to | ||
| 1997 | * update sync_sched_expedited_done to match our snapshot -- but | ||
| 1998 | * only if someone else has not already advanced past our snapshot. | ||
| 1999 | * | ||
| 2000 | * On the other hand, if try_stop_cpus() fails, we check the value | ||
| 2001 | * of sync_sched_expedited_done. If it has advanced past our | ||
| 2002 | * initial snapshot, then someone else must have forced a grace period | ||
| 2003 | * some time after we took our snapshot. In this case, our work is | ||
| 2004 | * done for us, and we can simply return. Otherwise, we try again, | ||
| 2005 | * but keep our initial snapshot for purposes of checking for someone | ||
| 2006 | * doing our work for us. | ||
| 2007 | * | ||
| 2008 | * If we fail too many times in a row, we fall back to synchronize_sched(). | ||
| 2009 | */ | ||
| 2010 | void synchronize_sched_expedited(void) | ||
| 2011 | { | ||
| 2012 | int firstsnap, s, snap, trycount = 0; | ||
| 2013 | |||
| 2014 | /* Note that atomic_inc_return() implies full memory barrier. */ | ||
| 2015 | firstsnap = snap = atomic_inc_return(&sync_sched_expedited_started); | ||
| 2016 | get_online_cpus(); | ||
| 2017 | WARN_ON_ONCE(cpu_is_offline(raw_smp_processor_id())); | ||
| 2018 | |||
| 2019 | /* | ||
| 2020 | * Each pass through the following loop attempts to force a | ||
| 2021 | * context switch on each CPU. | ||
| 2022 | */ | ||
| 2023 | while (try_stop_cpus(cpu_online_mask, | ||
| 2024 | synchronize_sched_expedited_cpu_stop, | ||
| 2025 | NULL) == -EAGAIN) { | ||
| 2026 | put_online_cpus(); | ||
| 2027 | |||
| 2028 | /* No joy, try again later. Or just synchronize_sched(). */ | ||
| 2029 | if (trycount++ < 10) | ||
| 2030 | udelay(trycount * num_online_cpus()); | ||
| 2031 | else { | ||
| 2032 | synchronize_sched(); | ||
| 2033 | return; | ||
| 2034 | } | ||
| 2035 | |||
| 2036 | /* Check to see if someone else did our work for us. */ | ||
| 2037 | s = atomic_read(&sync_sched_expedited_done); | ||
| 2038 | if (UINT_CMP_GE((unsigned)s, (unsigned)firstsnap)) { | ||
| 2039 | smp_mb(); /* ensure test happens before caller kfree */ | ||
| 2040 | return; | ||
| 2041 | } | ||
| 2042 | |||
| 2043 | /* | ||
| 2044 | * Refetching sync_sched_expedited_started allows later | ||
| 2045 | * callers to piggyback on our grace period. We subtract | ||
| 2046 | * 1 to get the same token that the last incrementer got. | ||
| 2047 | * We retry after they started, so our grace period works | ||
| 2048 | * for them, and they started after our first try, so their | ||
| 2049 | * grace period works for us. | ||
| 2050 | */ | ||
| 2051 | get_online_cpus(); | ||
| 2052 | snap = atomic_read(&sync_sched_expedited_started); | ||
| 2053 | smp_mb(); /* ensure read is before try_stop_cpus(). */ | ||
| 2054 | } | ||
| 2055 | |||
| 2056 | /* | ||
| 2057 | * Everyone up to our most recent fetch is covered by our grace | ||
| 2058 | * period. Update the counter, but only if our work is still | ||
| 2059 | * relevant -- which it won't be if someone who started later | ||
| 2060 | * than we did beat us to the punch. | ||
| 2061 | */ | ||
| 2062 | do { | ||
| 2063 | s = atomic_read(&sync_sched_expedited_done); | ||
| 2064 | if (UINT_CMP_GE((unsigned)s, (unsigned)snap)) { | ||
| 2065 | smp_mb(); /* ensure test happens before caller kfree */ | ||
| 2066 | break; | ||
| 2067 | } | ||
| 2068 | } while (atomic_cmpxchg(&sync_sched_expedited_done, s, snap) != s); | ||
| 2069 | |||
| 2070 | put_online_cpus(); | ||
| 2071 | } | ||
| 2072 | EXPORT_SYMBOL_GPL(synchronize_sched_expedited); | ||
| 2073 | |||
| 1842 | /* | 2074 | /* |
| 1843 | * Check to see if there is any immediate RCU-related work to be done | 2075 | * Check to see if there is any immediate RCU-related work to be done |
| 1844 | * by the current CPU, for the specified type of RCU, returning 1 if so. | 2076 | * by the current CPU, for the specified type of RCU, returning 1 if so. |
| @@ -1932,7 +2164,7 @@ static int rcu_cpu_has_callbacks(int cpu) | |||
| 1932 | /* RCU callbacks either ready or pending? */ | 2164 | /* RCU callbacks either ready or pending? */ |
| 1933 | return per_cpu(rcu_sched_data, cpu).nxtlist || | 2165 | return per_cpu(rcu_sched_data, cpu).nxtlist || |
| 1934 | per_cpu(rcu_bh_data, cpu).nxtlist || | 2166 | per_cpu(rcu_bh_data, cpu).nxtlist || |
| 1935 | rcu_preempt_needs_cpu(cpu); | 2167 | rcu_preempt_cpu_has_callbacks(cpu); |
| 1936 | } | 2168 | } |
| 1937 | 2169 | ||
| 1938 | static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; | 2170 | static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; |
| @@ -2027,9 +2259,10 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) | |||
| 2027 | rdp->nxtlist = NULL; | 2259 | rdp->nxtlist = NULL; |
| 2028 | for (i = 0; i < RCU_NEXT_SIZE; i++) | 2260 | for (i = 0; i < RCU_NEXT_SIZE; i++) |
| 2029 | rdp->nxttail[i] = &rdp->nxtlist; | 2261 | rdp->nxttail[i] = &rdp->nxtlist; |
| 2262 | rdp->qlen_lazy = 0; | ||
| 2030 | rdp->qlen = 0; | 2263 | rdp->qlen = 0; |
| 2031 | rdp->dynticks = &per_cpu(rcu_dynticks, cpu); | 2264 | rdp->dynticks = &per_cpu(rcu_dynticks, cpu); |
| 2032 | WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_NESTING); | 2265 | WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE); |
| 2033 | WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); | 2266 | WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); |
| 2034 | rdp->cpu = cpu; | 2267 | rdp->cpu = cpu; |
| 2035 | rdp->rsp = rsp; | 2268 | rdp->rsp = rsp; |
| @@ -2057,7 +2290,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) | |||
| 2057 | rdp->qlen_last_fqs_check = 0; | 2290 | rdp->qlen_last_fqs_check = 0; |
| 2058 | rdp->n_force_qs_snap = rsp->n_force_qs; | 2291 | rdp->n_force_qs_snap = rsp->n_force_qs; |
| 2059 | rdp->blimit = blimit; | 2292 | rdp->blimit = blimit; |
| 2060 | rdp->dynticks->dynticks_nesting = DYNTICK_TASK_NESTING; | 2293 | rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; |
| 2061 | atomic_set(&rdp->dynticks->dynticks, | 2294 | atomic_set(&rdp->dynticks->dynticks, |
| 2062 | (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); | 2295 | (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); |
| 2063 | rcu_prepare_for_idle_init(cpu); | 2296 | rcu_prepare_for_idle_init(cpu); |
| @@ -2139,16 +2372,18 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | |||
| 2139 | * touch any data without introducing corruption. We send the | 2372 | * touch any data without introducing corruption. We send the |
| 2140 | * dying CPU's callbacks to an arbitrarily chosen online CPU. | 2373 | * dying CPU's callbacks to an arbitrarily chosen online CPU. |
| 2141 | */ | 2374 | */ |
| 2142 | rcu_send_cbs_to_online(&rcu_bh_state); | 2375 | rcu_cleanup_dying_cpu(&rcu_bh_state); |
| 2143 | rcu_send_cbs_to_online(&rcu_sched_state); | 2376 | rcu_cleanup_dying_cpu(&rcu_sched_state); |
| 2144 | rcu_preempt_send_cbs_to_online(); | 2377 | rcu_preempt_cleanup_dying_cpu(); |
| 2145 | rcu_cleanup_after_idle(cpu); | 2378 | rcu_cleanup_after_idle(cpu); |
| 2146 | break; | 2379 | break; |
| 2147 | case CPU_DEAD: | 2380 | case CPU_DEAD: |
| 2148 | case CPU_DEAD_FROZEN: | 2381 | case CPU_DEAD_FROZEN: |
| 2149 | case CPU_UP_CANCELED: | 2382 | case CPU_UP_CANCELED: |
| 2150 | case CPU_UP_CANCELED_FROZEN: | 2383 | case CPU_UP_CANCELED_FROZEN: |
| 2151 | rcu_offline_cpu(cpu); | 2384 | rcu_cleanup_dead_cpu(cpu, &rcu_bh_state); |
| 2385 | rcu_cleanup_dead_cpu(cpu, &rcu_sched_state); | ||
| 2386 | rcu_preempt_cleanup_dead_cpu(cpu); | ||
| 2152 | break; | 2387 | break; |
| 2153 | default: | 2388 | default: |
| 2154 | break; | 2389 | break; |
