diff options
Diffstat (limited to 'kernel/rcutree_plugin.h')
-rw-r--r-- | kernel/rcutree_plugin.h | 450 |
1 files changed, 276 insertions, 174 deletions
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 8bb35d73e1f9..c023464816be 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
@@ -25,7 +25,6 @@ | |||
25 | */ | 25 | */ |
26 | 26 | ||
27 | #include <linux/delay.h> | 27 | #include <linux/delay.h> |
28 | #include <linux/stop_machine.h> | ||
29 | 28 | ||
30 | #define RCU_KTHREAD_PRIO 1 | 29 | #define RCU_KTHREAD_PRIO 1 |
31 | 30 | ||
@@ -63,7 +62,10 @@ static void __init rcu_bootup_announce_oddness(void) | |||
63 | printk(KERN_INFO "\tRCU torture testing starts during boot.\n"); | 62 | printk(KERN_INFO "\tRCU torture testing starts during boot.\n"); |
64 | #endif | 63 | #endif |
65 | #if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE) | 64 | #if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE) |
66 | printk(KERN_INFO "\tVerbose stalled-CPUs detection is disabled.\n"); | 65 | printk(KERN_INFO "\tDump stacks of tasks blocking RCU-preempt GP.\n"); |
66 | #endif | ||
67 | #if defined(CONFIG_RCU_CPU_STALL_INFO) | ||
68 | printk(KERN_INFO "\tAdditional per-CPU info printed with stalls.\n"); | ||
67 | #endif | 69 | #endif |
68 | #if NUM_RCU_LVL_4 != 0 | 70 | #if NUM_RCU_LVL_4 != 0 |
69 | printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n"); | 71 | printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n"); |
@@ -490,6 +492,31 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp) | |||
490 | 492 | ||
491 | #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */ | 493 | #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */ |
492 | 494 | ||
495 | #ifdef CONFIG_RCU_CPU_STALL_INFO | ||
496 | |||
497 | static void rcu_print_task_stall_begin(struct rcu_node *rnp) | ||
498 | { | ||
499 | printk(KERN_ERR "\tTasks blocked on level-%d rcu_node (CPUs %d-%d):", | ||
500 | rnp->level, rnp->grplo, rnp->grphi); | ||
501 | } | ||
502 | |||
503 | static void rcu_print_task_stall_end(void) | ||
504 | { | ||
505 | printk(KERN_CONT "\n"); | ||
506 | } | ||
507 | |||
508 | #else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */ | ||
509 | |||
510 | static void rcu_print_task_stall_begin(struct rcu_node *rnp) | ||
511 | { | ||
512 | } | ||
513 | |||
514 | static void rcu_print_task_stall_end(void) | ||
515 | { | ||
516 | } | ||
517 | |||
518 | #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */ | ||
519 | |||
493 | /* | 520 | /* |
494 | * Scan the current list of tasks blocked within RCU read-side critical | 521 | * Scan the current list of tasks blocked within RCU read-side critical |
495 | * sections, printing out the tid of each. | 522 | * sections, printing out the tid of each. |
@@ -501,12 +528,14 @@ static int rcu_print_task_stall(struct rcu_node *rnp) | |||
501 | 528 | ||
502 | if (!rcu_preempt_blocked_readers_cgp(rnp)) | 529 | if (!rcu_preempt_blocked_readers_cgp(rnp)) |
503 | return 0; | 530 | return 0; |
531 | rcu_print_task_stall_begin(rnp); | ||
504 | t = list_entry(rnp->gp_tasks, | 532 | t = list_entry(rnp->gp_tasks, |
505 | struct task_struct, rcu_node_entry); | 533 | struct task_struct, rcu_node_entry); |
506 | list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) { | 534 | list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) { |
507 | printk(" P%d", t->pid); | 535 | printk(KERN_CONT " P%d", t->pid); |
508 | ndetected++; | 536 | ndetected++; |
509 | } | 537 | } |
538 | rcu_print_task_stall_end(); | ||
510 | return ndetected; | 539 | return ndetected; |
511 | } | 540 | } |
512 | 541 | ||
@@ -581,7 +610,7 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, | |||
581 | * absolutely necessary, but this is a good performance/complexity | 610 | * absolutely necessary, but this is a good performance/complexity |
582 | * tradeoff. | 611 | * tradeoff. |
583 | */ | 612 | */ |
584 | if (rcu_preempt_blocked_readers_cgp(rnp)) | 613 | if (rcu_preempt_blocked_readers_cgp(rnp) && rnp->qsmask == 0) |
585 | retval |= RCU_OFL_TASKS_NORM_GP; | 614 | retval |= RCU_OFL_TASKS_NORM_GP; |
586 | if (rcu_preempted_readers_exp(rnp)) | 615 | if (rcu_preempted_readers_exp(rnp)) |
587 | retval |= RCU_OFL_TASKS_EXP_GP; | 616 | retval |= RCU_OFL_TASKS_EXP_GP; |
@@ -618,16 +647,16 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, | |||
618 | return retval; | 647 | return retval; |
619 | } | 648 | } |
620 | 649 | ||
650 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
651 | |||
621 | /* | 652 | /* |
622 | * Do CPU-offline processing for preemptible RCU. | 653 | * Do CPU-offline processing for preemptible RCU. |
623 | */ | 654 | */ |
624 | static void rcu_preempt_offline_cpu(int cpu) | 655 | static void rcu_preempt_cleanup_dead_cpu(int cpu) |
625 | { | 656 | { |
626 | __rcu_offline_cpu(cpu, &rcu_preempt_state); | 657 | rcu_cleanup_dead_cpu(cpu, &rcu_preempt_state); |
627 | } | 658 | } |
628 | 659 | ||
629 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
630 | |||
631 | /* | 660 | /* |
632 | * Check for a quiescent state from the current CPU. When a task blocks, | 661 | * Check for a quiescent state from the current CPU. When a task blocks, |
633 | * the task is recorded in the corresponding CPU's rcu_node structure, | 662 | * the task is recorded in the corresponding CPU's rcu_node structure, |
@@ -671,10 +700,24 @@ static void rcu_preempt_do_callbacks(void) | |||
671 | */ | 700 | */ |
672 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | 701 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) |
673 | { | 702 | { |
674 | __call_rcu(head, func, &rcu_preempt_state); | 703 | __call_rcu(head, func, &rcu_preempt_state, 0); |
675 | } | 704 | } |
676 | EXPORT_SYMBOL_GPL(call_rcu); | 705 | EXPORT_SYMBOL_GPL(call_rcu); |
677 | 706 | ||
707 | /* | ||
708 | * Queue an RCU callback for lazy invocation after a grace period. | ||
709 | * This will likely be later named something like "call_rcu_lazy()", | ||
710 | * but this change will require some way of tagging the lazy RCU | ||
711 | * callbacks in the list of pending callbacks. Until then, this | ||
712 | * function may only be called from __kfree_rcu(). | ||
713 | */ | ||
714 | void kfree_call_rcu(struct rcu_head *head, | ||
715 | void (*func)(struct rcu_head *rcu)) | ||
716 | { | ||
717 | __call_rcu(head, func, &rcu_preempt_state, 1); | ||
718 | } | ||
719 | EXPORT_SYMBOL_GPL(kfree_call_rcu); | ||
720 | |||
678 | /** | 721 | /** |
679 | * synchronize_rcu - wait until a grace period has elapsed. | 722 | * synchronize_rcu - wait until a grace period has elapsed. |
680 | * | 723 | * |
@@ -688,6 +731,10 @@ EXPORT_SYMBOL_GPL(call_rcu); | |||
688 | */ | 731 | */ |
689 | void synchronize_rcu(void) | 732 | void synchronize_rcu(void) |
690 | { | 733 | { |
734 | rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) && | ||
735 | !lock_is_held(&rcu_lock_map) && | ||
736 | !lock_is_held(&rcu_sched_lock_map), | ||
737 | "Illegal synchronize_rcu() in RCU read-side critical section"); | ||
691 | if (!rcu_scheduler_active) | 738 | if (!rcu_scheduler_active) |
692 | return; | 739 | return; |
693 | wait_rcu_gp(call_rcu); | 740 | wait_rcu_gp(call_rcu); |
@@ -788,10 +835,22 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) | |||
788 | rcu_report_exp_rnp(rsp, rnp, false); /* Don't wake self. */ | 835 | rcu_report_exp_rnp(rsp, rnp, false); /* Don't wake self. */ |
789 | } | 836 | } |
790 | 837 | ||
791 | /* | 838 | /** |
792 | * Wait for an rcu-preempt grace period, but expedite it. The basic idea | 839 | * synchronize_rcu_expedited - Brute-force RCU grace period |
793 | * is to invoke synchronize_sched_expedited() to push all the tasks to | 840 | * |
794 | * the ->blkd_tasks lists and wait for this list to drain. | 841 | * Wait for an RCU-preempt grace period, but expedite it. The basic |
842 | * idea is to invoke synchronize_sched_expedited() to push all the tasks to | ||
843 | * the ->blkd_tasks lists and wait for this list to drain. This consumes | ||
844 | * significant time on all CPUs and is unfriendly to real-time workloads, | ||
845 | * so is thus not recommended for any sort of common-case code. | ||
846 | * In fact, if you are using synchronize_rcu_expedited() in a loop, | ||
847 | * please restructure your code to batch your updates, and then Use a | ||
848 | * single synchronize_rcu() instead. | ||
849 | * | ||
850 | * Note that it is illegal to call this function while holding any lock | ||
851 | * that is acquired by a CPU-hotplug notifier. And yes, it is also illegal | ||
852 | * to call this function from a CPU-hotplug notifier. Failing to observe | ||
853 | * these restriction will result in deadlock. | ||
795 | */ | 854 | */ |
796 | void synchronize_rcu_expedited(void) | 855 | void synchronize_rcu_expedited(void) |
797 | { | 856 | { |
@@ -869,9 +928,9 @@ static int rcu_preempt_pending(int cpu) | |||
869 | } | 928 | } |
870 | 929 | ||
871 | /* | 930 | /* |
872 | * Does preemptible RCU need the CPU to stay out of dynticks mode? | 931 | * Does preemptible RCU have callbacks on this CPU? |
873 | */ | 932 | */ |
874 | static int rcu_preempt_needs_cpu(int cpu) | 933 | static int rcu_preempt_cpu_has_callbacks(int cpu) |
875 | { | 934 | { |
876 | return !!per_cpu(rcu_preempt_data, cpu).nxtlist; | 935 | return !!per_cpu(rcu_preempt_data, cpu).nxtlist; |
877 | } | 936 | } |
@@ -894,11 +953,12 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu) | |||
894 | } | 953 | } |
895 | 954 | ||
896 | /* | 955 | /* |
897 | * Move preemptible RCU's callbacks from dying CPU to other online CPU. | 956 | * Move preemptible RCU's callbacks from dying CPU to other online CPU |
957 | * and record a quiescent state. | ||
898 | */ | 958 | */ |
899 | static void rcu_preempt_send_cbs_to_online(void) | 959 | static void rcu_preempt_cleanup_dying_cpu(void) |
900 | { | 960 | { |
901 | rcu_send_cbs_to_online(&rcu_preempt_state); | 961 | rcu_cleanup_dying_cpu(&rcu_preempt_state); |
902 | } | 962 | } |
903 | 963 | ||
904 | /* | 964 | /* |
@@ -1034,16 +1094,16 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, | |||
1034 | return 0; | 1094 | return 0; |
1035 | } | 1095 | } |
1036 | 1096 | ||
1097 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
1098 | |||
1037 | /* | 1099 | /* |
1038 | * Because preemptible RCU does not exist, it never needs CPU-offline | 1100 | * Because preemptible RCU does not exist, it never needs CPU-offline |
1039 | * processing. | 1101 | * processing. |
1040 | */ | 1102 | */ |
1041 | static void rcu_preempt_offline_cpu(int cpu) | 1103 | static void rcu_preempt_cleanup_dead_cpu(int cpu) |
1042 | { | 1104 | { |
1043 | } | 1105 | } |
1044 | 1106 | ||
1045 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
1046 | |||
1047 | /* | 1107 | /* |
1048 | * Because preemptible RCU does not exist, it never has any callbacks | 1108 | * Because preemptible RCU does not exist, it never has any callbacks |
1049 | * to check. | 1109 | * to check. |
@@ -1061,6 +1121,22 @@ static void rcu_preempt_process_callbacks(void) | |||
1061 | } | 1121 | } |
1062 | 1122 | ||
1063 | /* | 1123 | /* |
1124 | * Queue an RCU callback for lazy invocation after a grace period. | ||
1125 | * This will likely be later named something like "call_rcu_lazy()", | ||
1126 | * but this change will require some way of tagging the lazy RCU | ||
1127 | * callbacks in the list of pending callbacks. Until then, this | ||
1128 | * function may only be called from __kfree_rcu(). | ||
1129 | * | ||
1130 | * Because there is no preemptible RCU, we use RCU-sched instead. | ||
1131 | */ | ||
1132 | void kfree_call_rcu(struct rcu_head *head, | ||
1133 | void (*func)(struct rcu_head *rcu)) | ||
1134 | { | ||
1135 | __call_rcu(head, func, &rcu_sched_state, 1); | ||
1136 | } | ||
1137 | EXPORT_SYMBOL_GPL(kfree_call_rcu); | ||
1138 | |||
1139 | /* | ||
1064 | * Wait for an rcu-preempt grace period, but make it happen quickly. | 1140 | * Wait for an rcu-preempt grace period, but make it happen quickly. |
1065 | * But because preemptible RCU does not exist, map to rcu-sched. | 1141 | * But because preemptible RCU does not exist, map to rcu-sched. |
1066 | */ | 1142 | */ |
@@ -1093,9 +1169,9 @@ static int rcu_preempt_pending(int cpu) | |||
1093 | } | 1169 | } |
1094 | 1170 | ||
1095 | /* | 1171 | /* |
1096 | * Because preemptible RCU does not exist, it never needs any CPU. | 1172 | * Because preemptible RCU does not exist, it never has callbacks |
1097 | */ | 1173 | */ |
1098 | static int rcu_preempt_needs_cpu(int cpu) | 1174 | static int rcu_preempt_cpu_has_callbacks(int cpu) |
1099 | { | 1175 | { |
1100 | return 0; | 1176 | return 0; |
1101 | } | 1177 | } |
@@ -1119,9 +1195,9 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu) | |||
1119 | } | 1195 | } |
1120 | 1196 | ||
1121 | /* | 1197 | /* |
1122 | * Because there is no preemptible RCU, there are no callbacks to move. | 1198 | * Because there is no preemptible RCU, there is no cleanup to do. |
1123 | */ | 1199 | */ |
1124 | static void rcu_preempt_send_cbs_to_online(void) | 1200 | static void rcu_preempt_cleanup_dying_cpu(void) |
1125 | { | 1201 | { |
1126 | } | 1202 | } |
1127 | 1203 | ||
@@ -1823,132 +1899,6 @@ static void __cpuinit rcu_prepare_kthreads(int cpu) | |||
1823 | 1899 | ||
1824 | #endif /* #else #ifdef CONFIG_RCU_BOOST */ | 1900 | #endif /* #else #ifdef CONFIG_RCU_BOOST */ |
1825 | 1901 | ||
1826 | #ifndef CONFIG_SMP | ||
1827 | |||
1828 | void synchronize_sched_expedited(void) | ||
1829 | { | ||
1830 | cond_resched(); | ||
1831 | } | ||
1832 | EXPORT_SYMBOL_GPL(synchronize_sched_expedited); | ||
1833 | |||
1834 | #else /* #ifndef CONFIG_SMP */ | ||
1835 | |||
1836 | static atomic_t sync_sched_expedited_started = ATOMIC_INIT(0); | ||
1837 | static atomic_t sync_sched_expedited_done = ATOMIC_INIT(0); | ||
1838 | |||
1839 | static int synchronize_sched_expedited_cpu_stop(void *data) | ||
1840 | { | ||
1841 | /* | ||
1842 | * There must be a full memory barrier on each affected CPU | ||
1843 | * between the time that try_stop_cpus() is called and the | ||
1844 | * time that it returns. | ||
1845 | * | ||
1846 | * In the current initial implementation of cpu_stop, the | ||
1847 | * above condition is already met when the control reaches | ||
1848 | * this point and the following smp_mb() is not strictly | ||
1849 | * necessary. Do smp_mb() anyway for documentation and | ||
1850 | * robustness against future implementation changes. | ||
1851 | */ | ||
1852 | smp_mb(); /* See above comment block. */ | ||
1853 | return 0; | ||
1854 | } | ||
1855 | |||
1856 | /* | ||
1857 | * Wait for an rcu-sched grace period to elapse, but use "big hammer" | ||
1858 | * approach to force grace period to end quickly. This consumes | ||
1859 | * significant time on all CPUs, and is thus not recommended for | ||
1860 | * any sort of common-case code. | ||
1861 | * | ||
1862 | * Note that it is illegal to call this function while holding any | ||
1863 | * lock that is acquired by a CPU-hotplug notifier. Failing to | ||
1864 | * observe this restriction will result in deadlock. | ||
1865 | * | ||
1866 | * This implementation can be thought of as an application of ticket | ||
1867 | * locking to RCU, with sync_sched_expedited_started and | ||
1868 | * sync_sched_expedited_done taking on the roles of the halves | ||
1869 | * of the ticket-lock word. Each task atomically increments | ||
1870 | * sync_sched_expedited_started upon entry, snapshotting the old value, | ||
1871 | * then attempts to stop all the CPUs. If this succeeds, then each | ||
1872 | * CPU will have executed a context switch, resulting in an RCU-sched | ||
1873 | * grace period. We are then done, so we use atomic_cmpxchg() to | ||
1874 | * update sync_sched_expedited_done to match our snapshot -- but | ||
1875 | * only if someone else has not already advanced past our snapshot. | ||
1876 | * | ||
1877 | * On the other hand, if try_stop_cpus() fails, we check the value | ||
1878 | * of sync_sched_expedited_done. If it has advanced past our | ||
1879 | * initial snapshot, then someone else must have forced a grace period | ||
1880 | * some time after we took our snapshot. In this case, our work is | ||
1881 | * done for us, and we can simply return. Otherwise, we try again, | ||
1882 | * but keep our initial snapshot for purposes of checking for someone | ||
1883 | * doing our work for us. | ||
1884 | * | ||
1885 | * If we fail too many times in a row, we fall back to synchronize_sched(). | ||
1886 | */ | ||
1887 | void synchronize_sched_expedited(void) | ||
1888 | { | ||
1889 | int firstsnap, s, snap, trycount = 0; | ||
1890 | |||
1891 | /* Note that atomic_inc_return() implies full memory barrier. */ | ||
1892 | firstsnap = snap = atomic_inc_return(&sync_sched_expedited_started); | ||
1893 | get_online_cpus(); | ||
1894 | |||
1895 | /* | ||
1896 | * Each pass through the following loop attempts to force a | ||
1897 | * context switch on each CPU. | ||
1898 | */ | ||
1899 | while (try_stop_cpus(cpu_online_mask, | ||
1900 | synchronize_sched_expedited_cpu_stop, | ||
1901 | NULL) == -EAGAIN) { | ||
1902 | put_online_cpus(); | ||
1903 | |||
1904 | /* No joy, try again later. Or just synchronize_sched(). */ | ||
1905 | if (trycount++ < 10) | ||
1906 | udelay(trycount * num_online_cpus()); | ||
1907 | else { | ||
1908 | synchronize_sched(); | ||
1909 | return; | ||
1910 | } | ||
1911 | |||
1912 | /* Check to see if someone else did our work for us. */ | ||
1913 | s = atomic_read(&sync_sched_expedited_done); | ||
1914 | if (UINT_CMP_GE((unsigned)s, (unsigned)firstsnap)) { | ||
1915 | smp_mb(); /* ensure test happens before caller kfree */ | ||
1916 | return; | ||
1917 | } | ||
1918 | |||
1919 | /* | ||
1920 | * Refetching sync_sched_expedited_started allows later | ||
1921 | * callers to piggyback on our grace period. We subtract | ||
1922 | * 1 to get the same token that the last incrementer got. | ||
1923 | * We retry after they started, so our grace period works | ||
1924 | * for them, and they started after our first try, so their | ||
1925 | * grace period works for us. | ||
1926 | */ | ||
1927 | get_online_cpus(); | ||
1928 | snap = atomic_read(&sync_sched_expedited_started); | ||
1929 | smp_mb(); /* ensure read is before try_stop_cpus(). */ | ||
1930 | } | ||
1931 | |||
1932 | /* | ||
1933 | * Everyone up to our most recent fetch is covered by our grace | ||
1934 | * period. Update the counter, but only if our work is still | ||
1935 | * relevant -- which it won't be if someone who started later | ||
1936 | * than we did beat us to the punch. | ||
1937 | */ | ||
1938 | do { | ||
1939 | s = atomic_read(&sync_sched_expedited_done); | ||
1940 | if (UINT_CMP_GE((unsigned)s, (unsigned)snap)) { | ||
1941 | smp_mb(); /* ensure test happens before caller kfree */ | ||
1942 | break; | ||
1943 | } | ||
1944 | } while (atomic_cmpxchg(&sync_sched_expedited_done, s, snap) != s); | ||
1945 | |||
1946 | put_online_cpus(); | ||
1947 | } | ||
1948 | EXPORT_SYMBOL_GPL(synchronize_sched_expedited); | ||
1949 | |||
1950 | #endif /* #else #ifndef CONFIG_SMP */ | ||
1951 | |||
1952 | #if !defined(CONFIG_RCU_FAST_NO_HZ) | 1902 | #if !defined(CONFIG_RCU_FAST_NO_HZ) |
1953 | 1903 | ||
1954 | /* | 1904 | /* |
@@ -1981,7 +1931,7 @@ static void rcu_cleanup_after_idle(int cpu) | |||
1981 | } | 1931 | } |
1982 | 1932 | ||
1983 | /* | 1933 | /* |
1984 | * Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=y, | 1934 | * Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=n, |
1985 | * is nothing. | 1935 | * is nothing. |
1986 | */ | 1936 | */ |
1987 | static void rcu_prepare_for_idle(int cpu) | 1937 | static void rcu_prepare_for_idle(int cpu) |
@@ -2015,6 +1965,9 @@ static void rcu_prepare_for_idle(int cpu) | |||
2015 | * number, be warned: Setting RCU_IDLE_GP_DELAY too high can hang your | 1965 | * number, be warned: Setting RCU_IDLE_GP_DELAY too high can hang your |
2016 | * system. And if you are -that- concerned about energy efficiency, | 1966 | * system. And if you are -that- concerned about energy efficiency, |
2017 | * just power the system down and be done with it! | 1967 | * just power the system down and be done with it! |
1968 | * RCU_IDLE_LAZY_GP_DELAY gives the number of jiffies that a CPU is | ||
1969 | * permitted to sleep in dyntick-idle mode with only lazy RCU | ||
1970 | * callbacks pending. Setting this too high can OOM your system. | ||
2018 | * | 1971 | * |
2019 | * The values below work well in practice. If future workloads require | 1972 | * The values below work well in practice. If future workloads require |
2020 | * adjustment, they can be converted into kernel config parameters, though | 1973 | * adjustment, they can be converted into kernel config parameters, though |
@@ -2023,11 +1976,13 @@ static void rcu_prepare_for_idle(int cpu) | |||
2023 | #define RCU_IDLE_FLUSHES 5 /* Number of dyntick-idle tries. */ | 1976 | #define RCU_IDLE_FLUSHES 5 /* Number of dyntick-idle tries. */ |
2024 | #define RCU_IDLE_OPT_FLUSHES 3 /* Optional dyntick-idle tries. */ | 1977 | #define RCU_IDLE_OPT_FLUSHES 3 /* Optional dyntick-idle tries. */ |
2025 | #define RCU_IDLE_GP_DELAY 6 /* Roughly one grace period. */ | 1978 | #define RCU_IDLE_GP_DELAY 6 /* Roughly one grace period. */ |
1979 | #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ | ||
2026 | 1980 | ||
2027 | static DEFINE_PER_CPU(int, rcu_dyntick_drain); | 1981 | static DEFINE_PER_CPU(int, rcu_dyntick_drain); |
2028 | static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff); | 1982 | static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff); |
2029 | static DEFINE_PER_CPU(struct hrtimer, rcu_idle_gp_timer); | 1983 | static DEFINE_PER_CPU(struct hrtimer, rcu_idle_gp_timer); |
2030 | static ktime_t rcu_idle_gp_wait; | 1984 | static ktime_t rcu_idle_gp_wait; /* If some non-lazy callbacks. */ |
1985 | static ktime_t rcu_idle_lazy_gp_wait; /* If only lazy callbacks. */ | ||
2031 | 1986 | ||
2032 | /* | 1987 | /* |
2033 | * Allow the CPU to enter dyntick-idle mode if either: (1) There are no | 1988 | * Allow the CPU to enter dyntick-idle mode if either: (1) There are no |
@@ -2048,6 +2003,48 @@ int rcu_needs_cpu(int cpu) | |||
2048 | } | 2003 | } |
2049 | 2004 | ||
2050 | /* | 2005 | /* |
2006 | * Does the specified flavor of RCU have non-lazy callbacks pending on | ||
2007 | * the specified CPU? Both RCU flavor and CPU are specified by the | ||
2008 | * rcu_data structure. | ||
2009 | */ | ||
2010 | static bool __rcu_cpu_has_nonlazy_callbacks(struct rcu_data *rdp) | ||
2011 | { | ||
2012 | return rdp->qlen != rdp->qlen_lazy; | ||
2013 | } | ||
2014 | |||
2015 | #ifdef CONFIG_TREE_PREEMPT_RCU | ||
2016 | |||
2017 | /* | ||
2018 | * Are there non-lazy RCU-preempt callbacks? (There cannot be if there | ||
2019 | * is no RCU-preempt in the kernel.) | ||
2020 | */ | ||
2021 | static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu) | ||
2022 | { | ||
2023 | struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); | ||
2024 | |||
2025 | return __rcu_cpu_has_nonlazy_callbacks(rdp); | ||
2026 | } | ||
2027 | |||
2028 | #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ | ||
2029 | |||
2030 | static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu) | ||
2031 | { | ||
2032 | return 0; | ||
2033 | } | ||
2034 | |||
2035 | #endif /* else #ifdef CONFIG_TREE_PREEMPT_RCU */ | ||
2036 | |||
2037 | /* | ||
2038 | * Does any flavor of RCU have non-lazy callbacks on the specified CPU? | ||
2039 | */ | ||
2040 | static bool rcu_cpu_has_nonlazy_callbacks(int cpu) | ||
2041 | { | ||
2042 | return __rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_sched_data, cpu)) || | ||
2043 | __rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_bh_data, cpu)) || | ||
2044 | rcu_preempt_cpu_has_nonlazy_callbacks(cpu); | ||
2045 | } | ||
2046 | |||
2047 | /* | ||
2051 | * Timer handler used to force CPU to start pushing its remaining RCU | 2048 | * Timer handler used to force CPU to start pushing its remaining RCU |
2052 | * callbacks in the case where it entered dyntick-idle mode with callbacks | 2049 | * callbacks in the case where it entered dyntick-idle mode with callbacks |
2053 | * pending. The hander doesn't really need to do anything because the | 2050 | * pending. The hander doesn't really need to do anything because the |
@@ -2074,6 +2071,8 @@ static void rcu_prepare_for_idle_init(int cpu) | |||
2074 | unsigned int upj = jiffies_to_usecs(RCU_IDLE_GP_DELAY); | 2071 | unsigned int upj = jiffies_to_usecs(RCU_IDLE_GP_DELAY); |
2075 | 2072 | ||
2076 | rcu_idle_gp_wait = ns_to_ktime(upj * (u64)1000); | 2073 | rcu_idle_gp_wait = ns_to_ktime(upj * (u64)1000); |
2074 | upj = jiffies_to_usecs(RCU_IDLE_LAZY_GP_DELAY); | ||
2075 | rcu_idle_lazy_gp_wait = ns_to_ktime(upj * (u64)1000); | ||
2077 | firsttime = 0; | 2076 | firsttime = 0; |
2078 | } | 2077 | } |
2079 | } | 2078 | } |
@@ -2109,10 +2108,6 @@ static void rcu_cleanup_after_idle(int cpu) | |||
2109 | */ | 2108 | */ |
2110 | static void rcu_prepare_for_idle(int cpu) | 2109 | static void rcu_prepare_for_idle(int cpu) |
2111 | { | 2110 | { |
2112 | unsigned long flags; | ||
2113 | |||
2114 | local_irq_save(flags); | ||
2115 | |||
2116 | /* | 2111 | /* |
2117 | * If there are no callbacks on this CPU, enter dyntick-idle mode. | 2112 | * If there are no callbacks on this CPU, enter dyntick-idle mode. |
2118 | * Also reset state to avoid prejudicing later attempts. | 2113 | * Also reset state to avoid prejudicing later attempts. |
@@ -2120,7 +2115,6 @@ static void rcu_prepare_for_idle(int cpu) | |||
2120 | if (!rcu_cpu_has_callbacks(cpu)) { | 2115 | if (!rcu_cpu_has_callbacks(cpu)) { |
2121 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; | 2116 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; |
2122 | per_cpu(rcu_dyntick_drain, cpu) = 0; | 2117 | per_cpu(rcu_dyntick_drain, cpu) = 0; |
2123 | local_irq_restore(flags); | ||
2124 | trace_rcu_prep_idle("No callbacks"); | 2118 | trace_rcu_prep_idle("No callbacks"); |
2125 | return; | 2119 | return; |
2126 | } | 2120 | } |
@@ -2130,7 +2124,6 @@ static void rcu_prepare_for_idle(int cpu) | |||
2130 | * refrained from disabling the scheduling-clock tick. | 2124 | * refrained from disabling the scheduling-clock tick. |
2131 | */ | 2125 | */ |
2132 | if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies) { | 2126 | if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies) { |
2133 | local_irq_restore(flags); | ||
2134 | trace_rcu_prep_idle("In holdoff"); | 2127 | trace_rcu_prep_idle("In holdoff"); |
2135 | return; | 2128 | return; |
2136 | } | 2129 | } |
@@ -2140,18 +2133,22 @@ static void rcu_prepare_for_idle(int cpu) | |||
2140 | /* First time through, initialize the counter. */ | 2133 | /* First time through, initialize the counter. */ |
2141 | per_cpu(rcu_dyntick_drain, cpu) = RCU_IDLE_FLUSHES; | 2134 | per_cpu(rcu_dyntick_drain, cpu) = RCU_IDLE_FLUSHES; |
2142 | } else if (per_cpu(rcu_dyntick_drain, cpu) <= RCU_IDLE_OPT_FLUSHES && | 2135 | } else if (per_cpu(rcu_dyntick_drain, cpu) <= RCU_IDLE_OPT_FLUSHES && |
2143 | !rcu_pending(cpu)) { | 2136 | !rcu_pending(cpu) && |
2137 | !local_softirq_pending()) { | ||
2144 | /* Can we go dyntick-idle despite still having callbacks? */ | 2138 | /* Can we go dyntick-idle despite still having callbacks? */ |
2145 | trace_rcu_prep_idle("Dyntick with callbacks"); | 2139 | trace_rcu_prep_idle("Dyntick with callbacks"); |
2146 | per_cpu(rcu_dyntick_drain, cpu) = 0; | 2140 | per_cpu(rcu_dyntick_drain, cpu) = 0; |
2147 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; | 2141 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; |
2148 | hrtimer_start(&per_cpu(rcu_idle_gp_timer, cpu), | 2142 | if (rcu_cpu_has_nonlazy_callbacks(cpu)) |
2149 | rcu_idle_gp_wait, HRTIMER_MODE_REL); | 2143 | hrtimer_start(&per_cpu(rcu_idle_gp_timer, cpu), |
2144 | rcu_idle_gp_wait, HRTIMER_MODE_REL); | ||
2145 | else | ||
2146 | hrtimer_start(&per_cpu(rcu_idle_gp_timer, cpu), | ||
2147 | rcu_idle_lazy_gp_wait, HRTIMER_MODE_REL); | ||
2150 | return; /* Nothing more to do immediately. */ | 2148 | return; /* Nothing more to do immediately. */ |
2151 | } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) { | 2149 | } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) { |
2152 | /* We have hit the limit, so time to give up. */ | 2150 | /* We have hit the limit, so time to give up. */ |
2153 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; | 2151 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; |
2154 | local_irq_restore(flags); | ||
2155 | trace_rcu_prep_idle("Begin holdoff"); | 2152 | trace_rcu_prep_idle("Begin holdoff"); |
2156 | invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */ | 2153 | invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */ |
2157 | return; | 2154 | return; |
@@ -2163,23 +2160,17 @@ static void rcu_prepare_for_idle(int cpu) | |||
2163 | */ | 2160 | */ |
2164 | #ifdef CONFIG_TREE_PREEMPT_RCU | 2161 | #ifdef CONFIG_TREE_PREEMPT_RCU |
2165 | if (per_cpu(rcu_preempt_data, cpu).nxtlist) { | 2162 | if (per_cpu(rcu_preempt_data, cpu).nxtlist) { |
2166 | local_irq_restore(flags); | ||
2167 | rcu_preempt_qs(cpu); | 2163 | rcu_preempt_qs(cpu); |
2168 | force_quiescent_state(&rcu_preempt_state, 0); | 2164 | force_quiescent_state(&rcu_preempt_state, 0); |
2169 | local_irq_save(flags); | ||
2170 | } | 2165 | } |
2171 | #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ | 2166 | #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ |
2172 | if (per_cpu(rcu_sched_data, cpu).nxtlist) { | 2167 | if (per_cpu(rcu_sched_data, cpu).nxtlist) { |
2173 | local_irq_restore(flags); | ||
2174 | rcu_sched_qs(cpu); | 2168 | rcu_sched_qs(cpu); |
2175 | force_quiescent_state(&rcu_sched_state, 0); | 2169 | force_quiescent_state(&rcu_sched_state, 0); |
2176 | local_irq_save(flags); | ||
2177 | } | 2170 | } |
2178 | if (per_cpu(rcu_bh_data, cpu).nxtlist) { | 2171 | if (per_cpu(rcu_bh_data, cpu).nxtlist) { |
2179 | local_irq_restore(flags); | ||
2180 | rcu_bh_qs(cpu); | 2172 | rcu_bh_qs(cpu); |
2181 | force_quiescent_state(&rcu_bh_state, 0); | 2173 | force_quiescent_state(&rcu_bh_state, 0); |
2182 | local_irq_save(flags); | ||
2183 | } | 2174 | } |
2184 | 2175 | ||
2185 | /* | 2176 | /* |
@@ -2187,13 +2178,124 @@ static void rcu_prepare_for_idle(int cpu) | |||
2187 | * So try forcing the callbacks through the grace period. | 2178 | * So try forcing the callbacks through the grace period. |
2188 | */ | 2179 | */ |
2189 | if (rcu_cpu_has_callbacks(cpu)) { | 2180 | if (rcu_cpu_has_callbacks(cpu)) { |
2190 | local_irq_restore(flags); | ||
2191 | trace_rcu_prep_idle("More callbacks"); | 2181 | trace_rcu_prep_idle("More callbacks"); |
2192 | invoke_rcu_core(); | 2182 | invoke_rcu_core(); |
2193 | } else { | 2183 | } else |
2194 | local_irq_restore(flags); | ||
2195 | trace_rcu_prep_idle("Callbacks drained"); | 2184 | trace_rcu_prep_idle("Callbacks drained"); |
2196 | } | ||
2197 | } | 2185 | } |
2198 | 2186 | ||
2199 | #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ | 2187 | #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ |
2188 | |||
2189 | #ifdef CONFIG_RCU_CPU_STALL_INFO | ||
2190 | |||
2191 | #ifdef CONFIG_RCU_FAST_NO_HZ | ||
2192 | |||
2193 | static void print_cpu_stall_fast_no_hz(char *cp, int cpu) | ||
2194 | { | ||
2195 | struct hrtimer *hrtp = &per_cpu(rcu_idle_gp_timer, cpu); | ||
2196 | |||
2197 | sprintf(cp, "drain=%d %c timer=%lld", | ||
2198 | per_cpu(rcu_dyntick_drain, cpu), | ||
2199 | per_cpu(rcu_dyntick_holdoff, cpu) == jiffies ? 'H' : '.', | ||
2200 | hrtimer_active(hrtp) | ||
2201 | ? ktime_to_us(hrtimer_get_remaining(hrtp)) | ||
2202 | : -1); | ||
2203 | } | ||
2204 | |||
2205 | #else /* #ifdef CONFIG_RCU_FAST_NO_HZ */ | ||
2206 | |||
2207 | static void print_cpu_stall_fast_no_hz(char *cp, int cpu) | ||
2208 | { | ||
2209 | } | ||
2210 | |||
2211 | #endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */ | ||
2212 | |||
2213 | /* Initiate the stall-info list. */ | ||
2214 | static void print_cpu_stall_info_begin(void) | ||
2215 | { | ||
2216 | printk(KERN_CONT "\n"); | ||
2217 | } | ||
2218 | |||
2219 | /* | ||
2220 | * Print out diagnostic information for the specified stalled CPU. | ||
2221 | * | ||
2222 | * If the specified CPU is aware of the current RCU grace period | ||
2223 | * (flavor specified by rsp), then print the number of scheduling | ||
2224 | * clock interrupts the CPU has taken during the time that it has | ||
2225 | * been aware. Otherwise, print the number of RCU grace periods | ||
2226 | * that this CPU is ignorant of, for example, "1" if the CPU was | ||
2227 | * aware of the previous grace period. | ||
2228 | * | ||
2229 | * Also print out idle and (if CONFIG_RCU_FAST_NO_HZ) idle-entry info. | ||
2230 | */ | ||
2231 | static void print_cpu_stall_info(struct rcu_state *rsp, int cpu) | ||
2232 | { | ||
2233 | char fast_no_hz[72]; | ||
2234 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | ||
2235 | struct rcu_dynticks *rdtp = rdp->dynticks; | ||
2236 | char *ticks_title; | ||
2237 | unsigned long ticks_value; | ||
2238 | |||
2239 | if (rsp->gpnum == rdp->gpnum) { | ||
2240 | ticks_title = "ticks this GP"; | ||
2241 | ticks_value = rdp->ticks_this_gp; | ||
2242 | } else { | ||
2243 | ticks_title = "GPs behind"; | ||
2244 | ticks_value = rsp->gpnum - rdp->gpnum; | ||
2245 | } | ||
2246 | print_cpu_stall_fast_no_hz(fast_no_hz, cpu); | ||
2247 | printk(KERN_ERR "\t%d: (%lu %s) idle=%03x/%llx/%d %s\n", | ||
2248 | cpu, ticks_value, ticks_title, | ||
2249 | atomic_read(&rdtp->dynticks) & 0xfff, | ||
2250 | rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting, | ||
2251 | fast_no_hz); | ||
2252 | } | ||
2253 | |||
2254 | /* Terminate the stall-info list. */ | ||
2255 | static void print_cpu_stall_info_end(void) | ||
2256 | { | ||
2257 | printk(KERN_ERR "\t"); | ||
2258 | } | ||
2259 | |||
2260 | /* Zero ->ticks_this_gp for all flavors of RCU. */ | ||
2261 | static void zero_cpu_stall_ticks(struct rcu_data *rdp) | ||
2262 | { | ||
2263 | rdp->ticks_this_gp = 0; | ||
2264 | } | ||
2265 | |||
2266 | /* Increment ->ticks_this_gp for all flavors of RCU. */ | ||
2267 | static void increment_cpu_stall_ticks(void) | ||
2268 | { | ||
2269 | __get_cpu_var(rcu_sched_data).ticks_this_gp++; | ||
2270 | __get_cpu_var(rcu_bh_data).ticks_this_gp++; | ||
2271 | #ifdef CONFIG_TREE_PREEMPT_RCU | ||
2272 | __get_cpu_var(rcu_preempt_data).ticks_this_gp++; | ||
2273 | #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ | ||
2274 | } | ||
2275 | |||
2276 | #else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */ | ||
2277 | |||
2278 | static void print_cpu_stall_info_begin(void) | ||
2279 | { | ||
2280 | printk(KERN_CONT " {"); | ||
2281 | } | ||
2282 | |||
2283 | static void print_cpu_stall_info(struct rcu_state *rsp, int cpu) | ||
2284 | { | ||
2285 | printk(KERN_CONT " %d", cpu); | ||
2286 | } | ||
2287 | |||
2288 | static void print_cpu_stall_info_end(void) | ||
2289 | { | ||
2290 | printk(KERN_CONT "} "); | ||
2291 | } | ||
2292 | |||
2293 | static void zero_cpu_stall_ticks(struct rcu_data *rdp) | ||
2294 | { | ||
2295 | } | ||
2296 | |||
2297 | static void increment_cpu_stall_ticks(void) | ||
2298 | { | ||
2299 | } | ||
2300 | |||
2301 | #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */ | ||