diff options
Diffstat (limited to 'kernel/rcutree.c')
-rw-r--r-- | kernel/rcutree.c | 507 |
1 files changed, 371 insertions, 136 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 6c4a6722abfd..1050d6d3922c 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -50,6 +50,8 @@ | |||
50 | #include <linux/wait.h> | 50 | #include <linux/wait.h> |
51 | #include <linux/kthread.h> | 51 | #include <linux/kthread.h> |
52 | #include <linux/prefetch.h> | 52 | #include <linux/prefetch.h> |
53 | #include <linux/delay.h> | ||
54 | #include <linux/stop_machine.h> | ||
53 | 55 | ||
54 | #include "rcutree.h" | 56 | #include "rcutree.h" |
55 | #include <trace/events/rcu.h> | 57 | #include <trace/events/rcu.h> |
@@ -196,7 +198,7 @@ void rcu_note_context_switch(int cpu) | |||
196 | EXPORT_SYMBOL_GPL(rcu_note_context_switch); | 198 | EXPORT_SYMBOL_GPL(rcu_note_context_switch); |
197 | 199 | ||
198 | DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { | 200 | DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { |
199 | .dynticks_nesting = DYNTICK_TASK_NESTING, | 201 | .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE, |
200 | .dynticks = ATOMIC_INIT(1), | 202 | .dynticks = ATOMIC_INIT(1), |
201 | }; | 203 | }; |
202 | 204 | ||
@@ -208,8 +210,11 @@ module_param(blimit, int, 0); | |||
208 | module_param(qhimark, int, 0); | 210 | module_param(qhimark, int, 0); |
209 | module_param(qlowmark, int, 0); | 211 | module_param(qlowmark, int, 0); |
210 | 212 | ||
211 | int rcu_cpu_stall_suppress __read_mostly; | 213 | int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */ |
214 | int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT; | ||
215 | |||
212 | module_param(rcu_cpu_stall_suppress, int, 0644); | 216 | module_param(rcu_cpu_stall_suppress, int, 0644); |
217 | module_param(rcu_cpu_stall_timeout, int, 0644); | ||
213 | 218 | ||
214 | static void force_quiescent_state(struct rcu_state *rsp, int relaxed); | 219 | static void force_quiescent_state(struct rcu_state *rsp, int relaxed); |
215 | static int rcu_pending(int cpu); | 220 | static int rcu_pending(int cpu); |
@@ -301,8 +306,6 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp) | |||
301 | return &rsp->node[0]; | 306 | return &rsp->node[0]; |
302 | } | 307 | } |
303 | 308 | ||
304 | #ifdef CONFIG_SMP | ||
305 | |||
306 | /* | 309 | /* |
307 | * If the specified CPU is offline, tell the caller that it is in | 310 | * If the specified CPU is offline, tell the caller that it is in |
308 | * a quiescent state. Otherwise, whack it with a reschedule IPI. | 311 | * a quiescent state. Otherwise, whack it with a reschedule IPI. |
@@ -317,30 +320,21 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp) | |||
317 | static int rcu_implicit_offline_qs(struct rcu_data *rdp) | 320 | static int rcu_implicit_offline_qs(struct rcu_data *rdp) |
318 | { | 321 | { |
319 | /* | 322 | /* |
320 | * If the CPU is offline, it is in a quiescent state. We can | 323 | * If the CPU is offline for more than a jiffy, it is in a quiescent |
321 | * trust its state not to change because interrupts are disabled. | 324 | * state. We can trust its state not to change because interrupts |
325 | * are disabled. The reason for the jiffy's worth of slack is to | ||
326 | * handle CPUs initializing on the way up and finding their way | ||
327 | * to the idle loop on the way down. | ||
322 | */ | 328 | */ |
323 | if (cpu_is_offline(rdp->cpu)) { | 329 | if (cpu_is_offline(rdp->cpu) && |
330 | ULONG_CMP_LT(rdp->rsp->gp_start + 2, jiffies)) { | ||
324 | trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl"); | 331 | trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl"); |
325 | rdp->offline_fqs++; | 332 | rdp->offline_fqs++; |
326 | return 1; | 333 | return 1; |
327 | } | 334 | } |
328 | |||
329 | /* | ||
330 | * The CPU is online, so send it a reschedule IPI. This forces | ||
331 | * it through the scheduler, and (inefficiently) also handles cases | ||
332 | * where idle loops fail to inform RCU about the CPU being idle. | ||
333 | */ | ||
334 | if (rdp->cpu != smp_processor_id()) | ||
335 | smp_send_reschedule(rdp->cpu); | ||
336 | else | ||
337 | set_need_resched(); | ||
338 | rdp->resched_ipi++; | ||
339 | return 0; | 335 | return 0; |
340 | } | 336 | } |
341 | 337 | ||
342 | #endif /* #ifdef CONFIG_SMP */ | ||
343 | |||
344 | /* | 338 | /* |
345 | * rcu_idle_enter_common - inform RCU that current CPU is moving towards idle | 339 | * rcu_idle_enter_common - inform RCU that current CPU is moving towards idle |
346 | * | 340 | * |
@@ -366,6 +360,17 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval) | |||
366 | atomic_inc(&rdtp->dynticks); | 360 | atomic_inc(&rdtp->dynticks); |
367 | smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */ | 361 | smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */ |
368 | WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); | 362 | WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); |
363 | |||
364 | /* | ||
365 | * The idle task is not permitted to enter the idle loop while | ||
366 | * in an RCU read-side critical section. | ||
367 | */ | ||
368 | rcu_lockdep_assert(!lock_is_held(&rcu_lock_map), | ||
369 | "Illegal idle entry in RCU read-side critical section."); | ||
370 | rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map), | ||
371 | "Illegal idle entry in RCU-bh read-side critical section."); | ||
372 | rcu_lockdep_assert(!lock_is_held(&rcu_sched_lock_map), | ||
373 | "Illegal idle entry in RCU-sched read-side critical section."); | ||
369 | } | 374 | } |
370 | 375 | ||
371 | /** | 376 | /** |
@@ -389,10 +394,15 @@ void rcu_idle_enter(void) | |||
389 | local_irq_save(flags); | 394 | local_irq_save(flags); |
390 | rdtp = &__get_cpu_var(rcu_dynticks); | 395 | rdtp = &__get_cpu_var(rcu_dynticks); |
391 | oldval = rdtp->dynticks_nesting; | 396 | oldval = rdtp->dynticks_nesting; |
392 | rdtp->dynticks_nesting = 0; | 397 | WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0); |
398 | if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) | ||
399 | rdtp->dynticks_nesting = 0; | ||
400 | else | ||
401 | rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE; | ||
393 | rcu_idle_enter_common(rdtp, oldval); | 402 | rcu_idle_enter_common(rdtp, oldval); |
394 | local_irq_restore(flags); | 403 | local_irq_restore(flags); |
395 | } | 404 | } |
405 | EXPORT_SYMBOL_GPL(rcu_idle_enter); | ||
396 | 406 | ||
397 | /** | 407 | /** |
398 | * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle | 408 | * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle |
@@ -462,7 +472,7 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval) | |||
462 | * Exit idle mode, in other words, -enter- the mode in which RCU | 472 | * Exit idle mode, in other words, -enter- the mode in which RCU |
463 | * read-side critical sections can occur. | 473 | * read-side critical sections can occur. |
464 | * | 474 | * |
465 | * We crowbar the ->dynticks_nesting field to DYNTICK_TASK_NESTING to | 475 | * We crowbar the ->dynticks_nesting field to DYNTICK_TASK_NEST to |
466 | * allow for the possibility of usermode upcalls messing up our count | 476 | * allow for the possibility of usermode upcalls messing up our count |
467 | * of interrupt nesting level during the busy period that is just | 477 | * of interrupt nesting level during the busy period that is just |
468 | * now starting. | 478 | * now starting. |
@@ -476,11 +486,15 @@ void rcu_idle_exit(void) | |||
476 | local_irq_save(flags); | 486 | local_irq_save(flags); |
477 | rdtp = &__get_cpu_var(rcu_dynticks); | 487 | rdtp = &__get_cpu_var(rcu_dynticks); |
478 | oldval = rdtp->dynticks_nesting; | 488 | oldval = rdtp->dynticks_nesting; |
479 | WARN_ON_ONCE(oldval != 0); | 489 | WARN_ON_ONCE(oldval < 0); |
480 | rdtp->dynticks_nesting = DYNTICK_TASK_NESTING; | 490 | if (oldval & DYNTICK_TASK_NEST_MASK) |
491 | rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE; | ||
492 | else | ||
493 | rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; | ||
481 | rcu_idle_exit_common(rdtp, oldval); | 494 | rcu_idle_exit_common(rdtp, oldval); |
482 | local_irq_restore(flags); | 495 | local_irq_restore(flags); |
483 | } | 496 | } |
497 | EXPORT_SYMBOL_GPL(rcu_idle_exit); | ||
484 | 498 | ||
485 | /** | 499 | /** |
486 | * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle | 500 | * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle |
@@ -581,6 +595,49 @@ int rcu_is_cpu_idle(void) | |||
581 | } | 595 | } |
582 | EXPORT_SYMBOL(rcu_is_cpu_idle); | 596 | EXPORT_SYMBOL(rcu_is_cpu_idle); |
583 | 597 | ||
598 | #ifdef CONFIG_HOTPLUG_CPU | ||
599 | |||
600 | /* | ||
601 | * Is the current CPU online? Disable preemption to avoid false positives | ||
602 | * that could otherwise happen due to the current CPU number being sampled, | ||
603 | * this task being preempted, its old CPU being taken offline, resuming | ||
604 | * on some other CPU, then determining that its old CPU is now offline. | ||
605 | * It is OK to use RCU on an offline processor during initial boot, hence | ||
606 | * the check for rcu_scheduler_fully_active. Note also that it is OK | ||
607 | * for a CPU coming online to use RCU for one jiffy prior to marking itself | ||
608 | * online in the cpu_online_mask. Similarly, it is OK for a CPU going | ||
609 | * offline to continue to use RCU for one jiffy after marking itself | ||
610 | * offline in the cpu_online_mask. This leniency is necessary given the | ||
611 | * non-atomic nature of the online and offline processing, for example, | ||
612 | * the fact that a CPU enters the scheduler after completing the CPU_DYING | ||
613 | * notifiers. | ||
614 | * | ||
615 | * This is also why RCU internally marks CPUs online during the | ||
616 | * CPU_UP_PREPARE phase and offline during the CPU_DEAD phase. | ||
617 | * | ||
618 | * Disable checking if in an NMI handler because we cannot safely report | ||
619 | * errors from NMI handlers anyway. | ||
620 | */ | ||
621 | bool rcu_lockdep_current_cpu_online(void) | ||
622 | { | ||
623 | struct rcu_data *rdp; | ||
624 | struct rcu_node *rnp; | ||
625 | bool ret; | ||
626 | |||
627 | if (in_nmi()) | ||
628 | return 1; | ||
629 | preempt_disable(); | ||
630 | rdp = &__get_cpu_var(rcu_sched_data); | ||
631 | rnp = rdp->mynode; | ||
632 | ret = (rdp->grpmask & rnp->qsmaskinit) || | ||
633 | !rcu_scheduler_fully_active; | ||
634 | preempt_enable(); | ||
635 | return ret; | ||
636 | } | ||
637 | EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online); | ||
638 | |||
639 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
640 | |||
584 | #endif /* #ifdef CONFIG_PROVE_RCU */ | 641 | #endif /* #ifdef CONFIG_PROVE_RCU */ |
585 | 642 | ||
586 | /** | 643 | /** |
@@ -595,8 +652,6 @@ int rcu_is_cpu_rrupt_from_idle(void) | |||
595 | return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1; | 652 | return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1; |
596 | } | 653 | } |
597 | 654 | ||
598 | #ifdef CONFIG_SMP | ||
599 | |||
600 | /* | 655 | /* |
601 | * Snapshot the specified CPU's dynticks counter so that we can later | 656 | * Snapshot the specified CPU's dynticks counter so that we can later |
602 | * credit them with an implicit quiescent state. Return 1 if this CPU | 657 | * credit them with an implicit quiescent state. Return 1 if this CPU |
@@ -640,12 +695,28 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) | |||
640 | return rcu_implicit_offline_qs(rdp); | 695 | return rcu_implicit_offline_qs(rdp); |
641 | } | 696 | } |
642 | 697 | ||
643 | #endif /* #ifdef CONFIG_SMP */ | 698 | static int jiffies_till_stall_check(void) |
699 | { | ||
700 | int till_stall_check = ACCESS_ONCE(rcu_cpu_stall_timeout); | ||
701 | |||
702 | /* | ||
703 | * Limit check must be consistent with the Kconfig limits | ||
704 | * for CONFIG_RCU_CPU_STALL_TIMEOUT. | ||
705 | */ | ||
706 | if (till_stall_check < 3) { | ||
707 | ACCESS_ONCE(rcu_cpu_stall_timeout) = 3; | ||
708 | till_stall_check = 3; | ||
709 | } else if (till_stall_check > 300) { | ||
710 | ACCESS_ONCE(rcu_cpu_stall_timeout) = 300; | ||
711 | till_stall_check = 300; | ||
712 | } | ||
713 | return till_stall_check * HZ + RCU_STALL_DELAY_DELTA; | ||
714 | } | ||
644 | 715 | ||
645 | static void record_gp_stall_check_time(struct rcu_state *rsp) | 716 | static void record_gp_stall_check_time(struct rcu_state *rsp) |
646 | { | 717 | { |
647 | rsp->gp_start = jiffies; | 718 | rsp->gp_start = jiffies; |
648 | rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK; | 719 | rsp->jiffies_stall = jiffies + jiffies_till_stall_check(); |
649 | } | 720 | } |
650 | 721 | ||
651 | static void print_other_cpu_stall(struct rcu_state *rsp) | 722 | static void print_other_cpu_stall(struct rcu_state *rsp) |
@@ -664,13 +735,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp) | |||
664 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 735 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
665 | return; | 736 | return; |
666 | } | 737 | } |
667 | rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK; | 738 | rsp->jiffies_stall = jiffies + 3 * jiffies_till_stall_check() + 3; |
668 | |||
669 | /* | ||
670 | * Now rat on any tasks that got kicked up to the root rcu_node | ||
671 | * due to CPU offlining. | ||
672 | */ | ||
673 | ndetected = rcu_print_task_stall(rnp); | ||
674 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 739 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
675 | 740 | ||
676 | /* | 741 | /* |
@@ -678,8 +743,9 @@ static void print_other_cpu_stall(struct rcu_state *rsp) | |||
678 | * See Documentation/RCU/stallwarn.txt for info on how to debug | 743 | * See Documentation/RCU/stallwarn.txt for info on how to debug |
679 | * RCU CPU stall warnings. | 744 | * RCU CPU stall warnings. |
680 | */ | 745 | */ |
681 | printk(KERN_ERR "INFO: %s detected stalls on CPUs/tasks: {", | 746 | printk(KERN_ERR "INFO: %s detected stalls on CPUs/tasks:", |
682 | rsp->name); | 747 | rsp->name); |
748 | print_cpu_stall_info_begin(); | ||
683 | rcu_for_each_leaf_node(rsp, rnp) { | 749 | rcu_for_each_leaf_node(rsp, rnp) { |
684 | raw_spin_lock_irqsave(&rnp->lock, flags); | 750 | raw_spin_lock_irqsave(&rnp->lock, flags); |
685 | ndetected += rcu_print_task_stall(rnp); | 751 | ndetected += rcu_print_task_stall(rnp); |
@@ -688,11 +754,22 @@ static void print_other_cpu_stall(struct rcu_state *rsp) | |||
688 | continue; | 754 | continue; |
689 | for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) | 755 | for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) |
690 | if (rnp->qsmask & (1UL << cpu)) { | 756 | if (rnp->qsmask & (1UL << cpu)) { |
691 | printk(" %d", rnp->grplo + cpu); | 757 | print_cpu_stall_info(rsp, rnp->grplo + cpu); |
692 | ndetected++; | 758 | ndetected++; |
693 | } | 759 | } |
694 | } | 760 | } |
695 | printk("} (detected by %d, t=%ld jiffies)\n", | 761 | |
762 | /* | ||
763 | * Now rat on any tasks that got kicked up to the root rcu_node | ||
764 | * due to CPU offlining. | ||
765 | */ | ||
766 | rnp = rcu_get_root(rsp); | ||
767 | raw_spin_lock_irqsave(&rnp->lock, flags); | ||
768 | ndetected = rcu_print_task_stall(rnp); | ||
769 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
770 | |||
771 | print_cpu_stall_info_end(); | ||
772 | printk(KERN_CONT "(detected by %d, t=%ld jiffies)\n", | ||
696 | smp_processor_id(), (long)(jiffies - rsp->gp_start)); | 773 | smp_processor_id(), (long)(jiffies - rsp->gp_start)); |
697 | if (ndetected == 0) | 774 | if (ndetected == 0) |
698 | printk(KERN_ERR "INFO: Stall ended before state dump start\n"); | 775 | printk(KERN_ERR "INFO: Stall ended before state dump start\n"); |
@@ -716,15 +793,18 @@ static void print_cpu_stall(struct rcu_state *rsp) | |||
716 | * See Documentation/RCU/stallwarn.txt for info on how to debug | 793 | * See Documentation/RCU/stallwarn.txt for info on how to debug |
717 | * RCU CPU stall warnings. | 794 | * RCU CPU stall warnings. |
718 | */ | 795 | */ |
719 | printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n", | 796 | printk(KERN_ERR "INFO: %s self-detected stall on CPU", rsp->name); |
720 | rsp->name, smp_processor_id(), jiffies - rsp->gp_start); | 797 | print_cpu_stall_info_begin(); |
798 | print_cpu_stall_info(rsp, smp_processor_id()); | ||
799 | print_cpu_stall_info_end(); | ||
800 | printk(KERN_CONT " (t=%lu jiffies)\n", jiffies - rsp->gp_start); | ||
721 | if (!trigger_all_cpu_backtrace()) | 801 | if (!trigger_all_cpu_backtrace()) |
722 | dump_stack(); | 802 | dump_stack(); |
723 | 803 | ||
724 | raw_spin_lock_irqsave(&rnp->lock, flags); | 804 | raw_spin_lock_irqsave(&rnp->lock, flags); |
725 | if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall)) | 805 | if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall)) |
726 | rsp->jiffies_stall = | 806 | rsp->jiffies_stall = jiffies + |
727 | jiffies + RCU_SECONDS_TILL_STALL_RECHECK; | 807 | 3 * jiffies_till_stall_check() + 3; |
728 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 808 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
729 | 809 | ||
730 | set_need_resched(); /* kick ourselves to get things going. */ | 810 | set_need_resched(); /* kick ourselves to get things going. */ |
@@ -807,6 +887,7 @@ static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct | |||
807 | rdp->passed_quiesce = 0; | 887 | rdp->passed_quiesce = 0; |
808 | } else | 888 | } else |
809 | rdp->qs_pending = 0; | 889 | rdp->qs_pending = 0; |
890 | zero_cpu_stall_ticks(rdp); | ||
810 | } | 891 | } |
811 | } | 892 | } |
812 | 893 | ||
@@ -943,6 +1024,10 @@ rcu_start_gp_per_cpu(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_dat | |||
943 | * in preparation for detecting the next grace period. The caller must hold | 1024 | * in preparation for detecting the next grace period. The caller must hold |
944 | * the root node's ->lock, which is released before return. Hard irqs must | 1025 | * the root node's ->lock, which is released before return. Hard irqs must |
945 | * be disabled. | 1026 | * be disabled. |
1027 | * | ||
1028 | * Note that it is legal for a dying CPU (which is marked as offline) to | ||
1029 | * invoke this function. This can happen when the dying CPU reports its | ||
1030 | * quiescent state. | ||
946 | */ | 1031 | */ |
947 | static void | 1032 | static void |
948 | rcu_start_gp(struct rcu_state *rsp, unsigned long flags) | 1033 | rcu_start_gp(struct rcu_state *rsp, unsigned long flags) |
@@ -980,26 +1065,8 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) | |||
980 | rsp->fqs_state = RCU_GP_INIT; /* Hold off force_quiescent_state. */ | 1065 | rsp->fqs_state = RCU_GP_INIT; /* Hold off force_quiescent_state. */ |
981 | rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; | 1066 | rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; |
982 | record_gp_stall_check_time(rsp); | 1067 | record_gp_stall_check_time(rsp); |
983 | |||
984 | /* Special-case the common single-level case. */ | ||
985 | if (NUM_RCU_NODES == 1) { | ||
986 | rcu_preempt_check_blocked_tasks(rnp); | ||
987 | rnp->qsmask = rnp->qsmaskinit; | ||
988 | rnp->gpnum = rsp->gpnum; | ||
989 | rnp->completed = rsp->completed; | ||
990 | rsp->fqs_state = RCU_SIGNAL_INIT; /* force_quiescent_state OK */ | ||
991 | rcu_start_gp_per_cpu(rsp, rnp, rdp); | ||
992 | rcu_preempt_boost_start_gp(rnp); | ||
993 | trace_rcu_grace_period_init(rsp->name, rnp->gpnum, | ||
994 | rnp->level, rnp->grplo, | ||
995 | rnp->grphi, rnp->qsmask); | ||
996 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
997 | return; | ||
998 | } | ||
999 | |||
1000 | raw_spin_unlock(&rnp->lock); /* leave irqs disabled. */ | 1068 | raw_spin_unlock(&rnp->lock); /* leave irqs disabled. */ |
1001 | 1069 | ||
1002 | |||
1003 | /* Exclude any concurrent CPU-hotplug operations. */ | 1070 | /* Exclude any concurrent CPU-hotplug operations. */ |
1004 | raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */ | 1071 | raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */ |
1005 | 1072 | ||
@@ -1245,53 +1312,115 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1245 | 1312 | ||
1246 | /* | 1313 | /* |
1247 | * Move a dying CPU's RCU callbacks to online CPU's callback list. | 1314 | * Move a dying CPU's RCU callbacks to online CPU's callback list. |
1248 | * Synchronization is not required because this function executes | 1315 | * Also record a quiescent state for this CPU for the current grace period. |
1249 | * in stop_machine() context. | 1316 | * Synchronization and interrupt disabling are not required because |
1317 | * this function executes in stop_machine() context. Therefore, cleanup | ||
1318 | * operations that might block must be done later from the CPU_DEAD | ||
1319 | * notifier. | ||
1320 | * | ||
1321 | * Note that the outgoing CPU's bit has already been cleared in the | ||
1322 | * cpu_online_mask. This allows us to randomly pick a callback | ||
1323 | * destination from the bits set in that mask. | ||
1250 | */ | 1324 | */ |
1251 | static void rcu_send_cbs_to_online(struct rcu_state *rsp) | 1325 | static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) |
1252 | { | 1326 | { |
1253 | int i; | 1327 | int i; |
1254 | /* current DYING CPU is cleared in the cpu_online_mask */ | 1328 | unsigned long mask; |
1255 | int receive_cpu = cpumask_any(cpu_online_mask); | 1329 | int receive_cpu = cpumask_any(cpu_online_mask); |
1256 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); | 1330 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); |
1257 | struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu); | 1331 | struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu); |
1332 | RCU_TRACE(struct rcu_node *rnp = rdp->mynode); /* For dying CPU. */ | ||
1333 | |||
1334 | /* First, adjust the counts. */ | ||
1335 | if (rdp->nxtlist != NULL) { | ||
1336 | receive_rdp->qlen_lazy += rdp->qlen_lazy; | ||
1337 | receive_rdp->qlen += rdp->qlen; | ||
1338 | rdp->qlen_lazy = 0; | ||
1339 | rdp->qlen = 0; | ||
1340 | } | ||
1258 | 1341 | ||
1259 | if (rdp->nxtlist == NULL) | 1342 | /* |
1260 | return; /* irqs disabled, so comparison is stable. */ | 1343 | * Next, move ready-to-invoke callbacks to be invoked on some |
1344 | * other CPU. These will not be required to pass through another | ||
1345 | * grace period: They are done, regardless of CPU. | ||
1346 | */ | ||
1347 | if (rdp->nxtlist != NULL && | ||
1348 | rdp->nxttail[RCU_DONE_TAIL] != &rdp->nxtlist) { | ||
1349 | struct rcu_head *oldhead; | ||
1350 | struct rcu_head **oldtail; | ||
1351 | struct rcu_head **newtail; | ||
1352 | |||
1353 | oldhead = rdp->nxtlist; | ||
1354 | oldtail = receive_rdp->nxttail[RCU_DONE_TAIL]; | ||
1355 | rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; | ||
1356 | *rdp->nxttail[RCU_DONE_TAIL] = *oldtail; | ||
1357 | *receive_rdp->nxttail[RCU_DONE_TAIL] = oldhead; | ||
1358 | newtail = rdp->nxttail[RCU_DONE_TAIL]; | ||
1359 | for (i = RCU_DONE_TAIL; i < RCU_NEXT_SIZE; i++) { | ||
1360 | if (receive_rdp->nxttail[i] == oldtail) | ||
1361 | receive_rdp->nxttail[i] = newtail; | ||
1362 | if (rdp->nxttail[i] == newtail) | ||
1363 | rdp->nxttail[i] = &rdp->nxtlist; | ||
1364 | } | ||
1365 | } | ||
1261 | 1366 | ||
1262 | *receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist; | 1367 | /* |
1263 | receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; | 1368 | * Finally, put the rest of the callbacks at the end of the list. |
1264 | receive_rdp->qlen += rdp->qlen; | 1369 | * The ones that made it partway through get to start over: We |
1265 | receive_rdp->n_cbs_adopted += rdp->qlen; | 1370 | * cannot assume that grace periods are synchronized across CPUs. |
1266 | rdp->n_cbs_orphaned += rdp->qlen; | 1371 | * (We could splice RCU_WAIT_TAIL into RCU_NEXT_READY_TAIL, but |
1372 | * this does not seem compelling. Not yet, anyway.) | ||
1373 | */ | ||
1374 | if (rdp->nxtlist != NULL) { | ||
1375 | *receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist; | ||
1376 | receive_rdp->nxttail[RCU_NEXT_TAIL] = | ||
1377 | rdp->nxttail[RCU_NEXT_TAIL]; | ||
1378 | receive_rdp->n_cbs_adopted += rdp->qlen; | ||
1379 | rdp->n_cbs_orphaned += rdp->qlen; | ||
1380 | |||
1381 | rdp->nxtlist = NULL; | ||
1382 | for (i = 0; i < RCU_NEXT_SIZE; i++) | ||
1383 | rdp->nxttail[i] = &rdp->nxtlist; | ||
1384 | } | ||
1267 | 1385 | ||
1268 | rdp->nxtlist = NULL; | 1386 | /* |
1269 | for (i = 0; i < RCU_NEXT_SIZE; i++) | 1387 | * Record a quiescent state for the dying CPU. This is safe |
1270 | rdp->nxttail[i] = &rdp->nxtlist; | 1388 | * only because we have already cleared out the callbacks. |
1271 | rdp->qlen = 0; | 1389 | * (Otherwise, the RCU core might try to schedule the invocation |
1390 | * of callbacks on this now-offline CPU, which would be bad.) | ||
1391 | */ | ||
1392 | mask = rdp->grpmask; /* rnp->grplo is constant. */ | ||
1393 | trace_rcu_grace_period(rsp->name, | ||
1394 | rnp->gpnum + 1 - !!(rnp->qsmask & mask), | ||
1395 | "cpuofl"); | ||
1396 | rcu_report_qs_rdp(smp_processor_id(), rsp, rdp, rsp->gpnum); | ||
1397 | /* Note that rcu_report_qs_rdp() might call trace_rcu_grace_period(). */ | ||
1272 | } | 1398 | } |
1273 | 1399 | ||
1274 | /* | 1400 | /* |
1275 | * Remove the outgoing CPU from the bitmasks in the rcu_node hierarchy | 1401 | * The CPU has been completely removed, and some other CPU is reporting |
1276 | * and move all callbacks from the outgoing CPU to the current one. | 1402 | * this fact from process context. Do the remainder of the cleanup. |
1277 | * There can only be one CPU hotplug operation at a time, so no other | 1403 | * There can only be one CPU hotplug operation at a time, so no other |
1278 | * CPU can be attempting to update rcu_cpu_kthread_task. | 1404 | * CPU can be attempting to update rcu_cpu_kthread_task. |
1279 | */ | 1405 | */ |
1280 | static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) | 1406 | static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) |
1281 | { | 1407 | { |
1282 | unsigned long flags; | 1408 | unsigned long flags; |
1283 | unsigned long mask; | 1409 | unsigned long mask; |
1284 | int need_report = 0; | 1410 | int need_report = 0; |
1285 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | 1411 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); |
1286 | struct rcu_node *rnp; | 1412 | struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rnp. */ |
1287 | 1413 | ||
1414 | /* Adjust any no-longer-needed kthreads. */ | ||
1288 | rcu_stop_cpu_kthread(cpu); | 1415 | rcu_stop_cpu_kthread(cpu); |
1416 | rcu_node_kthread_setaffinity(rnp, -1); | ||
1417 | |||
1418 | /* Remove the dying CPU from the bitmasks in the rcu_node hierarchy. */ | ||
1289 | 1419 | ||
1290 | /* Exclude any attempts to start a new grace period. */ | 1420 | /* Exclude any attempts to start a new grace period. */ |
1291 | raw_spin_lock_irqsave(&rsp->onofflock, flags); | 1421 | raw_spin_lock_irqsave(&rsp->onofflock, flags); |
1292 | 1422 | ||
1293 | /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */ | 1423 | /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */ |
1294 | rnp = rdp->mynode; /* this is the outgoing CPU's rnp. */ | ||
1295 | mask = rdp->grpmask; /* rnp->grplo is constant. */ | 1424 | mask = rdp->grpmask; /* rnp->grplo is constant. */ |
1296 | do { | 1425 | do { |
1297 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | 1426 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ |
@@ -1299,20 +1428,11 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) | |||
1299 | if (rnp->qsmaskinit != 0) { | 1428 | if (rnp->qsmaskinit != 0) { |
1300 | if (rnp != rdp->mynode) | 1429 | if (rnp != rdp->mynode) |
1301 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 1430 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
1302 | else | ||
1303 | trace_rcu_grace_period(rsp->name, | ||
1304 | rnp->gpnum + 1 - | ||
1305 | !!(rnp->qsmask & mask), | ||
1306 | "cpuofl"); | ||
1307 | break; | 1431 | break; |
1308 | } | 1432 | } |
1309 | if (rnp == rdp->mynode) { | 1433 | if (rnp == rdp->mynode) |
1310 | trace_rcu_grace_period(rsp->name, | ||
1311 | rnp->gpnum + 1 - | ||
1312 | !!(rnp->qsmask & mask), | ||
1313 | "cpuofl"); | ||
1314 | need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp); | 1434 | need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp); |
1315 | } else | 1435 | else |
1316 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 1436 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
1317 | mask = rnp->grpmask; | 1437 | mask = rnp->grpmask; |
1318 | rnp = rnp->parent; | 1438 | rnp = rnp->parent; |
@@ -1332,29 +1452,15 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) | |||
1332 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1452 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1333 | if (need_report & RCU_OFL_TASKS_EXP_GP) | 1453 | if (need_report & RCU_OFL_TASKS_EXP_GP) |
1334 | rcu_report_exp_rnp(rsp, rnp, true); | 1454 | rcu_report_exp_rnp(rsp, rnp, true); |
1335 | rcu_node_kthread_setaffinity(rnp, -1); | ||
1336 | } | ||
1337 | |||
1338 | /* | ||
1339 | * Remove the specified CPU from the RCU hierarchy and move any pending | ||
1340 | * callbacks that it might have to the current CPU. This code assumes | ||
1341 | * that at least one CPU in the system will remain running at all times. | ||
1342 | * Any attempt to offline -all- CPUs is likely to strand RCU callbacks. | ||
1343 | */ | ||
1344 | static void rcu_offline_cpu(int cpu) | ||
1345 | { | ||
1346 | __rcu_offline_cpu(cpu, &rcu_sched_state); | ||
1347 | __rcu_offline_cpu(cpu, &rcu_bh_state); | ||
1348 | rcu_preempt_offline_cpu(cpu); | ||
1349 | } | 1455 | } |
1350 | 1456 | ||
1351 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ | 1457 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ |
1352 | 1458 | ||
1353 | static void rcu_send_cbs_to_online(struct rcu_state *rsp) | 1459 | static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) |
1354 | { | 1460 | { |
1355 | } | 1461 | } |
1356 | 1462 | ||
1357 | static void rcu_offline_cpu(int cpu) | 1463 | static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) |
1358 | { | 1464 | { |
1359 | } | 1465 | } |
1360 | 1466 | ||
@@ -1368,11 +1474,11 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1368 | { | 1474 | { |
1369 | unsigned long flags; | 1475 | unsigned long flags; |
1370 | struct rcu_head *next, *list, **tail; | 1476 | struct rcu_head *next, *list, **tail; |
1371 | int bl, count; | 1477 | int bl, count, count_lazy; |
1372 | 1478 | ||
1373 | /* If no callbacks are ready, just return.*/ | 1479 | /* If no callbacks are ready, just return.*/ |
1374 | if (!cpu_has_callbacks_ready_to_invoke(rdp)) { | 1480 | if (!cpu_has_callbacks_ready_to_invoke(rdp)) { |
1375 | trace_rcu_batch_start(rsp->name, 0, 0); | 1481 | trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, 0); |
1376 | trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist), | 1482 | trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist), |
1377 | need_resched(), is_idle_task(current), | 1483 | need_resched(), is_idle_task(current), |
1378 | rcu_is_callbacks_kthread()); | 1484 | rcu_is_callbacks_kthread()); |
@@ -1384,8 +1490,9 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1384 | * races with call_rcu() from interrupt handlers. | 1490 | * races with call_rcu() from interrupt handlers. |
1385 | */ | 1491 | */ |
1386 | local_irq_save(flags); | 1492 | local_irq_save(flags); |
1493 | WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); | ||
1387 | bl = rdp->blimit; | 1494 | bl = rdp->blimit; |
1388 | trace_rcu_batch_start(rsp->name, rdp->qlen, bl); | 1495 | trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, bl); |
1389 | list = rdp->nxtlist; | 1496 | list = rdp->nxtlist; |
1390 | rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; | 1497 | rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; |
1391 | *rdp->nxttail[RCU_DONE_TAIL] = NULL; | 1498 | *rdp->nxttail[RCU_DONE_TAIL] = NULL; |
@@ -1396,12 +1503,13 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1396 | local_irq_restore(flags); | 1503 | local_irq_restore(flags); |
1397 | 1504 | ||
1398 | /* Invoke callbacks. */ | 1505 | /* Invoke callbacks. */ |
1399 | count = 0; | 1506 | count = count_lazy = 0; |
1400 | while (list) { | 1507 | while (list) { |
1401 | next = list->next; | 1508 | next = list->next; |
1402 | prefetch(next); | 1509 | prefetch(next); |
1403 | debug_rcu_head_unqueue(list); | 1510 | debug_rcu_head_unqueue(list); |
1404 | __rcu_reclaim(rsp->name, list); | 1511 | if (__rcu_reclaim(rsp->name, list)) |
1512 | count_lazy++; | ||
1405 | list = next; | 1513 | list = next; |
1406 | /* Stop only if limit reached and CPU has something to do. */ | 1514 | /* Stop only if limit reached and CPU has something to do. */ |
1407 | if (++count >= bl && | 1515 | if (++count >= bl && |
@@ -1416,6 +1524,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1416 | rcu_is_callbacks_kthread()); | 1524 | rcu_is_callbacks_kthread()); |
1417 | 1525 | ||
1418 | /* Update count, and requeue any remaining callbacks. */ | 1526 | /* Update count, and requeue any remaining callbacks. */ |
1527 | rdp->qlen_lazy -= count_lazy; | ||
1419 | rdp->qlen -= count; | 1528 | rdp->qlen -= count; |
1420 | rdp->n_cbs_invoked += count; | 1529 | rdp->n_cbs_invoked += count; |
1421 | if (list != NULL) { | 1530 | if (list != NULL) { |
@@ -1458,6 +1567,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1458 | void rcu_check_callbacks(int cpu, int user) | 1567 | void rcu_check_callbacks(int cpu, int user) |
1459 | { | 1568 | { |
1460 | trace_rcu_utilization("Start scheduler-tick"); | 1569 | trace_rcu_utilization("Start scheduler-tick"); |
1570 | increment_cpu_stall_ticks(); | ||
1461 | if (user || rcu_is_cpu_rrupt_from_idle()) { | 1571 | if (user || rcu_is_cpu_rrupt_from_idle()) { |
1462 | 1572 | ||
1463 | /* | 1573 | /* |
@@ -1492,8 +1602,6 @@ void rcu_check_callbacks(int cpu, int user) | |||
1492 | trace_rcu_utilization("End scheduler-tick"); | 1602 | trace_rcu_utilization("End scheduler-tick"); |
1493 | } | 1603 | } |
1494 | 1604 | ||
1495 | #ifdef CONFIG_SMP | ||
1496 | |||
1497 | /* | 1605 | /* |
1498 | * Scan the leaf rcu_node structures, processing dyntick state for any that | 1606 | * Scan the leaf rcu_node structures, processing dyntick state for any that |
1499 | * have not yet encountered a quiescent state, using the function specified. | 1607 | * have not yet encountered a quiescent state, using the function specified. |
@@ -1616,15 +1724,6 @@ unlock_fqs_ret: | |||
1616 | trace_rcu_utilization("End fqs"); | 1724 | trace_rcu_utilization("End fqs"); |
1617 | } | 1725 | } |
1618 | 1726 | ||
1619 | #else /* #ifdef CONFIG_SMP */ | ||
1620 | |||
1621 | static void force_quiescent_state(struct rcu_state *rsp, int relaxed) | ||
1622 | { | ||
1623 | set_need_resched(); | ||
1624 | } | ||
1625 | |||
1626 | #endif /* #else #ifdef CONFIG_SMP */ | ||
1627 | |||
1628 | /* | 1727 | /* |
1629 | * This does the RCU core processing work for the specified rcu_state | 1728 | * This does the RCU core processing work for the specified rcu_state |
1630 | * and rcu_data structures. This may be called only from the CPU to | 1729 | * and rcu_data structures. This may be called only from the CPU to |
@@ -1702,11 +1801,12 @@ static void invoke_rcu_core(void) | |||
1702 | 1801 | ||
1703 | static void | 1802 | static void |
1704 | __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | 1803 | __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), |
1705 | struct rcu_state *rsp) | 1804 | struct rcu_state *rsp, bool lazy) |
1706 | { | 1805 | { |
1707 | unsigned long flags; | 1806 | unsigned long flags; |
1708 | struct rcu_data *rdp; | 1807 | struct rcu_data *rdp; |
1709 | 1808 | ||
1809 | WARN_ON_ONCE((unsigned long)head & 0x3); /* Misaligned rcu_head! */ | ||
1710 | debug_rcu_head_queue(head); | 1810 | debug_rcu_head_queue(head); |
1711 | head->func = func; | 1811 | head->func = func; |
1712 | head->next = NULL; | 1812 | head->next = NULL; |
@@ -1720,18 +1820,21 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | |||
1720 | * a quiescent state betweentimes. | 1820 | * a quiescent state betweentimes. |
1721 | */ | 1821 | */ |
1722 | local_irq_save(flags); | 1822 | local_irq_save(flags); |
1823 | WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); | ||
1723 | rdp = this_cpu_ptr(rsp->rda); | 1824 | rdp = this_cpu_ptr(rsp->rda); |
1724 | 1825 | ||
1725 | /* Add the callback to our list. */ | 1826 | /* Add the callback to our list. */ |
1726 | *rdp->nxttail[RCU_NEXT_TAIL] = head; | 1827 | *rdp->nxttail[RCU_NEXT_TAIL] = head; |
1727 | rdp->nxttail[RCU_NEXT_TAIL] = &head->next; | 1828 | rdp->nxttail[RCU_NEXT_TAIL] = &head->next; |
1728 | rdp->qlen++; | 1829 | rdp->qlen++; |
1830 | if (lazy) | ||
1831 | rdp->qlen_lazy++; | ||
1729 | 1832 | ||
1730 | if (__is_kfree_rcu_offset((unsigned long)func)) | 1833 | if (__is_kfree_rcu_offset((unsigned long)func)) |
1731 | trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func, | 1834 | trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func, |
1732 | rdp->qlen); | 1835 | rdp->qlen_lazy, rdp->qlen); |
1733 | else | 1836 | else |
1734 | trace_rcu_callback(rsp->name, head, rdp->qlen); | 1837 | trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen); |
1735 | 1838 | ||
1736 | /* If interrupts were disabled, don't dive into RCU core. */ | 1839 | /* If interrupts were disabled, don't dive into RCU core. */ |
1737 | if (irqs_disabled_flags(flags)) { | 1840 | if (irqs_disabled_flags(flags)) { |
@@ -1778,16 +1881,16 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | |||
1778 | */ | 1881 | */ |
1779 | void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | 1882 | void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) |
1780 | { | 1883 | { |
1781 | __call_rcu(head, func, &rcu_sched_state); | 1884 | __call_rcu(head, func, &rcu_sched_state, 0); |
1782 | } | 1885 | } |
1783 | EXPORT_SYMBOL_GPL(call_rcu_sched); | 1886 | EXPORT_SYMBOL_GPL(call_rcu_sched); |
1784 | 1887 | ||
1785 | /* | 1888 | /* |
1786 | * Queue an RCU for invocation after a quicker grace period. | 1889 | * Queue an RCU callback for invocation after a quicker grace period. |
1787 | */ | 1890 | */ |
1788 | void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | 1891 | void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) |
1789 | { | 1892 | { |
1790 | __call_rcu(head, func, &rcu_bh_state); | 1893 | __call_rcu(head, func, &rcu_bh_state, 0); |
1791 | } | 1894 | } |
1792 | EXPORT_SYMBOL_GPL(call_rcu_bh); | 1895 | EXPORT_SYMBOL_GPL(call_rcu_bh); |
1793 | 1896 | ||
@@ -1816,6 +1919,10 @@ EXPORT_SYMBOL_GPL(call_rcu_bh); | |||
1816 | */ | 1919 | */ |
1817 | void synchronize_sched(void) | 1920 | void synchronize_sched(void) |
1818 | { | 1921 | { |
1922 | rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) && | ||
1923 | !lock_is_held(&rcu_lock_map) && | ||
1924 | !lock_is_held(&rcu_sched_lock_map), | ||
1925 | "Illegal synchronize_sched() in RCU-sched read-side critical section"); | ||
1819 | if (rcu_blocking_is_gp()) | 1926 | if (rcu_blocking_is_gp()) |
1820 | return; | 1927 | return; |
1821 | wait_rcu_gp(call_rcu_sched); | 1928 | wait_rcu_gp(call_rcu_sched); |
@@ -1833,12 +1940,137 @@ EXPORT_SYMBOL_GPL(synchronize_sched); | |||
1833 | */ | 1940 | */ |
1834 | void synchronize_rcu_bh(void) | 1941 | void synchronize_rcu_bh(void) |
1835 | { | 1942 | { |
1943 | rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) && | ||
1944 | !lock_is_held(&rcu_lock_map) && | ||
1945 | !lock_is_held(&rcu_sched_lock_map), | ||
1946 | "Illegal synchronize_rcu_bh() in RCU-bh read-side critical section"); | ||
1836 | if (rcu_blocking_is_gp()) | 1947 | if (rcu_blocking_is_gp()) |
1837 | return; | 1948 | return; |
1838 | wait_rcu_gp(call_rcu_bh); | 1949 | wait_rcu_gp(call_rcu_bh); |
1839 | } | 1950 | } |
1840 | EXPORT_SYMBOL_GPL(synchronize_rcu_bh); | 1951 | EXPORT_SYMBOL_GPL(synchronize_rcu_bh); |
1841 | 1952 | ||
1953 | static atomic_t sync_sched_expedited_started = ATOMIC_INIT(0); | ||
1954 | static atomic_t sync_sched_expedited_done = ATOMIC_INIT(0); | ||
1955 | |||
1956 | static int synchronize_sched_expedited_cpu_stop(void *data) | ||
1957 | { | ||
1958 | /* | ||
1959 | * There must be a full memory barrier on each affected CPU | ||
1960 | * between the time that try_stop_cpus() is called and the | ||
1961 | * time that it returns. | ||
1962 | * | ||
1963 | * In the current initial implementation of cpu_stop, the | ||
1964 | * above condition is already met when the control reaches | ||
1965 | * this point and the following smp_mb() is not strictly | ||
1966 | * necessary. Do smp_mb() anyway for documentation and | ||
1967 | * robustness against future implementation changes. | ||
1968 | */ | ||
1969 | smp_mb(); /* See above comment block. */ | ||
1970 | return 0; | ||
1971 | } | ||
1972 | |||
1973 | /** | ||
1974 | * synchronize_sched_expedited - Brute-force RCU-sched grace period | ||
1975 | * | ||
1976 | * Wait for an RCU-sched grace period to elapse, but use a "big hammer" | ||
1977 | * approach to force the grace period to end quickly. This consumes | ||
1978 | * significant time on all CPUs and is unfriendly to real-time workloads, | ||
1979 | * so is thus not recommended for any sort of common-case code. In fact, | ||
1980 | * if you are using synchronize_sched_expedited() in a loop, please | ||
1981 | * restructure your code to batch your updates, and then use a single | ||
1982 | * synchronize_sched() instead. | ||
1983 | * | ||
1984 | * Note that it is illegal to call this function while holding any lock | ||
1985 | * that is acquired by a CPU-hotplug notifier. And yes, it is also illegal | ||
1986 | * to call this function from a CPU-hotplug notifier. Failing to observe | ||
1987 | * these restriction will result in deadlock. | ||
1988 | * | ||
1989 | * This implementation can be thought of as an application of ticket | ||
1990 | * locking to RCU, with sync_sched_expedited_started and | ||
1991 | * sync_sched_expedited_done taking on the roles of the halves | ||
1992 | * of the ticket-lock word. Each task atomically increments | ||
1993 | * sync_sched_expedited_started upon entry, snapshotting the old value, | ||
1994 | * then attempts to stop all the CPUs. If this succeeds, then each | ||
1995 | * CPU will have executed a context switch, resulting in an RCU-sched | ||
1996 | * grace period. We are then done, so we use atomic_cmpxchg() to | ||
1997 | * update sync_sched_expedited_done to match our snapshot -- but | ||
1998 | * only if someone else has not already advanced past our snapshot. | ||
1999 | * | ||
2000 | * On the other hand, if try_stop_cpus() fails, we check the value | ||
2001 | * of sync_sched_expedited_done. If it has advanced past our | ||
2002 | * initial snapshot, then someone else must have forced a grace period | ||
2003 | * some time after we took our snapshot. In this case, our work is | ||
2004 | * done for us, and we can simply return. Otherwise, we try again, | ||
2005 | * but keep our initial snapshot for purposes of checking for someone | ||
2006 | * doing our work for us. | ||
2007 | * | ||
2008 | * If we fail too many times in a row, we fall back to synchronize_sched(). | ||
2009 | */ | ||
2010 | void synchronize_sched_expedited(void) | ||
2011 | { | ||
2012 | int firstsnap, s, snap, trycount = 0; | ||
2013 | |||
2014 | /* Note that atomic_inc_return() implies full memory barrier. */ | ||
2015 | firstsnap = snap = atomic_inc_return(&sync_sched_expedited_started); | ||
2016 | get_online_cpus(); | ||
2017 | WARN_ON_ONCE(cpu_is_offline(raw_smp_processor_id())); | ||
2018 | |||
2019 | /* | ||
2020 | * Each pass through the following loop attempts to force a | ||
2021 | * context switch on each CPU. | ||
2022 | */ | ||
2023 | while (try_stop_cpus(cpu_online_mask, | ||
2024 | synchronize_sched_expedited_cpu_stop, | ||
2025 | NULL) == -EAGAIN) { | ||
2026 | put_online_cpus(); | ||
2027 | |||
2028 | /* No joy, try again later. Or just synchronize_sched(). */ | ||
2029 | if (trycount++ < 10) | ||
2030 | udelay(trycount * num_online_cpus()); | ||
2031 | else { | ||
2032 | synchronize_sched(); | ||
2033 | return; | ||
2034 | } | ||
2035 | |||
2036 | /* Check to see if someone else did our work for us. */ | ||
2037 | s = atomic_read(&sync_sched_expedited_done); | ||
2038 | if (UINT_CMP_GE((unsigned)s, (unsigned)firstsnap)) { | ||
2039 | smp_mb(); /* ensure test happens before caller kfree */ | ||
2040 | return; | ||
2041 | } | ||
2042 | |||
2043 | /* | ||
2044 | * Refetching sync_sched_expedited_started allows later | ||
2045 | * callers to piggyback on our grace period. We subtract | ||
2046 | * 1 to get the same token that the last incrementer got. | ||
2047 | * We retry after they started, so our grace period works | ||
2048 | * for them, and they started after our first try, so their | ||
2049 | * grace period works for us. | ||
2050 | */ | ||
2051 | get_online_cpus(); | ||
2052 | snap = atomic_read(&sync_sched_expedited_started); | ||
2053 | smp_mb(); /* ensure read is before try_stop_cpus(). */ | ||
2054 | } | ||
2055 | |||
2056 | /* | ||
2057 | * Everyone up to our most recent fetch is covered by our grace | ||
2058 | * period. Update the counter, but only if our work is still | ||
2059 | * relevant -- which it won't be if someone who started later | ||
2060 | * than we did beat us to the punch. | ||
2061 | */ | ||
2062 | do { | ||
2063 | s = atomic_read(&sync_sched_expedited_done); | ||
2064 | if (UINT_CMP_GE((unsigned)s, (unsigned)snap)) { | ||
2065 | smp_mb(); /* ensure test happens before caller kfree */ | ||
2066 | break; | ||
2067 | } | ||
2068 | } while (atomic_cmpxchg(&sync_sched_expedited_done, s, snap) != s); | ||
2069 | |||
2070 | put_online_cpus(); | ||
2071 | } | ||
2072 | EXPORT_SYMBOL_GPL(synchronize_sched_expedited); | ||
2073 | |||
1842 | /* | 2074 | /* |
1843 | * Check to see if there is any immediate RCU-related work to be done | 2075 | * Check to see if there is any immediate RCU-related work to be done |
1844 | * by the current CPU, for the specified type of RCU, returning 1 if so. | 2076 | * by the current CPU, for the specified type of RCU, returning 1 if so. |
@@ -1932,7 +2164,7 @@ static int rcu_cpu_has_callbacks(int cpu) | |||
1932 | /* RCU callbacks either ready or pending? */ | 2164 | /* RCU callbacks either ready or pending? */ |
1933 | return per_cpu(rcu_sched_data, cpu).nxtlist || | 2165 | return per_cpu(rcu_sched_data, cpu).nxtlist || |
1934 | per_cpu(rcu_bh_data, cpu).nxtlist || | 2166 | per_cpu(rcu_bh_data, cpu).nxtlist || |
1935 | rcu_preempt_needs_cpu(cpu); | 2167 | rcu_preempt_cpu_has_callbacks(cpu); |
1936 | } | 2168 | } |
1937 | 2169 | ||
1938 | static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; | 2170 | static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; |
@@ -2027,9 +2259,10 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) | |||
2027 | rdp->nxtlist = NULL; | 2259 | rdp->nxtlist = NULL; |
2028 | for (i = 0; i < RCU_NEXT_SIZE; i++) | 2260 | for (i = 0; i < RCU_NEXT_SIZE; i++) |
2029 | rdp->nxttail[i] = &rdp->nxtlist; | 2261 | rdp->nxttail[i] = &rdp->nxtlist; |
2262 | rdp->qlen_lazy = 0; | ||
2030 | rdp->qlen = 0; | 2263 | rdp->qlen = 0; |
2031 | rdp->dynticks = &per_cpu(rcu_dynticks, cpu); | 2264 | rdp->dynticks = &per_cpu(rcu_dynticks, cpu); |
2032 | WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_NESTING); | 2265 | WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE); |
2033 | WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); | 2266 | WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); |
2034 | rdp->cpu = cpu; | 2267 | rdp->cpu = cpu; |
2035 | rdp->rsp = rsp; | 2268 | rdp->rsp = rsp; |
@@ -2057,7 +2290,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) | |||
2057 | rdp->qlen_last_fqs_check = 0; | 2290 | rdp->qlen_last_fqs_check = 0; |
2058 | rdp->n_force_qs_snap = rsp->n_force_qs; | 2291 | rdp->n_force_qs_snap = rsp->n_force_qs; |
2059 | rdp->blimit = blimit; | 2292 | rdp->blimit = blimit; |
2060 | rdp->dynticks->dynticks_nesting = DYNTICK_TASK_NESTING; | 2293 | rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; |
2061 | atomic_set(&rdp->dynticks->dynticks, | 2294 | atomic_set(&rdp->dynticks->dynticks, |
2062 | (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); | 2295 | (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); |
2063 | rcu_prepare_for_idle_init(cpu); | 2296 | rcu_prepare_for_idle_init(cpu); |
@@ -2139,16 +2372,18 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | |||
2139 | * touch any data without introducing corruption. We send the | 2372 | * touch any data without introducing corruption. We send the |
2140 | * dying CPU's callbacks to an arbitrarily chosen online CPU. | 2373 | * dying CPU's callbacks to an arbitrarily chosen online CPU. |
2141 | */ | 2374 | */ |
2142 | rcu_send_cbs_to_online(&rcu_bh_state); | 2375 | rcu_cleanup_dying_cpu(&rcu_bh_state); |
2143 | rcu_send_cbs_to_online(&rcu_sched_state); | 2376 | rcu_cleanup_dying_cpu(&rcu_sched_state); |
2144 | rcu_preempt_send_cbs_to_online(); | 2377 | rcu_preempt_cleanup_dying_cpu(); |
2145 | rcu_cleanup_after_idle(cpu); | 2378 | rcu_cleanup_after_idle(cpu); |
2146 | break; | 2379 | break; |
2147 | case CPU_DEAD: | 2380 | case CPU_DEAD: |
2148 | case CPU_DEAD_FROZEN: | 2381 | case CPU_DEAD_FROZEN: |
2149 | case CPU_UP_CANCELED: | 2382 | case CPU_UP_CANCELED: |
2150 | case CPU_UP_CANCELED_FROZEN: | 2383 | case CPU_UP_CANCELED_FROZEN: |
2151 | rcu_offline_cpu(cpu); | 2384 | rcu_cleanup_dead_cpu(cpu, &rcu_bh_state); |
2385 | rcu_cleanup_dead_cpu(cpu, &rcu_sched_state); | ||
2386 | rcu_preempt_cleanup_dead_cpu(cpu); | ||
2152 | break; | 2387 | break; |
2153 | default: | 2388 | default: |
2154 | break; | 2389 | break; |