diff options
Diffstat (limited to 'kernel/rcutree_plugin.h')
| -rw-r--r-- | kernel/rcutree_plugin.h | 450 |
1 files changed, 276 insertions, 174 deletions
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 8bb35d73e1f9..c023464816be 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
| @@ -25,7 +25,6 @@ | |||
| 25 | */ | 25 | */ |
| 26 | 26 | ||
| 27 | #include <linux/delay.h> | 27 | #include <linux/delay.h> |
| 28 | #include <linux/stop_machine.h> | ||
| 29 | 28 | ||
| 30 | #define RCU_KTHREAD_PRIO 1 | 29 | #define RCU_KTHREAD_PRIO 1 |
| 31 | 30 | ||
| @@ -63,7 +62,10 @@ static void __init rcu_bootup_announce_oddness(void) | |||
| 63 | printk(KERN_INFO "\tRCU torture testing starts during boot.\n"); | 62 | printk(KERN_INFO "\tRCU torture testing starts during boot.\n"); |
| 64 | #endif | 63 | #endif |
| 65 | #if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE) | 64 | #if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE) |
| 66 | printk(KERN_INFO "\tVerbose stalled-CPUs detection is disabled.\n"); | 65 | printk(KERN_INFO "\tDump stacks of tasks blocking RCU-preempt GP.\n"); |
| 66 | #endif | ||
| 67 | #if defined(CONFIG_RCU_CPU_STALL_INFO) | ||
| 68 | printk(KERN_INFO "\tAdditional per-CPU info printed with stalls.\n"); | ||
| 67 | #endif | 69 | #endif |
| 68 | #if NUM_RCU_LVL_4 != 0 | 70 | #if NUM_RCU_LVL_4 != 0 |
| 69 | printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n"); | 71 | printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n"); |
| @@ -490,6 +492,31 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp) | |||
| 490 | 492 | ||
| 491 | #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */ | 493 | #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */ |
| 492 | 494 | ||
| 495 | #ifdef CONFIG_RCU_CPU_STALL_INFO | ||
| 496 | |||
| 497 | static void rcu_print_task_stall_begin(struct rcu_node *rnp) | ||
| 498 | { | ||
| 499 | printk(KERN_ERR "\tTasks blocked on level-%d rcu_node (CPUs %d-%d):", | ||
| 500 | rnp->level, rnp->grplo, rnp->grphi); | ||
| 501 | } | ||
| 502 | |||
| 503 | static void rcu_print_task_stall_end(void) | ||
| 504 | { | ||
| 505 | printk(KERN_CONT "\n"); | ||
| 506 | } | ||
| 507 | |||
| 508 | #else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */ | ||
| 509 | |||
| 510 | static void rcu_print_task_stall_begin(struct rcu_node *rnp) | ||
| 511 | { | ||
| 512 | } | ||
| 513 | |||
| 514 | static void rcu_print_task_stall_end(void) | ||
| 515 | { | ||
| 516 | } | ||
| 517 | |||
| 518 | #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */ | ||
| 519 | |||
| 493 | /* | 520 | /* |
| 494 | * Scan the current list of tasks blocked within RCU read-side critical | 521 | * Scan the current list of tasks blocked within RCU read-side critical |
| 495 | * sections, printing out the tid of each. | 522 | * sections, printing out the tid of each. |
| @@ -501,12 +528,14 @@ static int rcu_print_task_stall(struct rcu_node *rnp) | |||
| 501 | 528 | ||
| 502 | if (!rcu_preempt_blocked_readers_cgp(rnp)) | 529 | if (!rcu_preempt_blocked_readers_cgp(rnp)) |
| 503 | return 0; | 530 | return 0; |
| 531 | rcu_print_task_stall_begin(rnp); | ||
| 504 | t = list_entry(rnp->gp_tasks, | 532 | t = list_entry(rnp->gp_tasks, |
| 505 | struct task_struct, rcu_node_entry); | 533 | struct task_struct, rcu_node_entry); |
| 506 | list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) { | 534 | list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) { |
| 507 | printk(" P%d", t->pid); | 535 | printk(KERN_CONT " P%d", t->pid); |
| 508 | ndetected++; | 536 | ndetected++; |
| 509 | } | 537 | } |
| 538 | rcu_print_task_stall_end(); | ||
| 510 | return ndetected; | 539 | return ndetected; |
| 511 | } | 540 | } |
| 512 | 541 | ||
| @@ -581,7 +610,7 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, | |||
| 581 | * absolutely necessary, but this is a good performance/complexity | 610 | * absolutely necessary, but this is a good performance/complexity |
| 582 | * tradeoff. | 611 | * tradeoff. |
| 583 | */ | 612 | */ |
| 584 | if (rcu_preempt_blocked_readers_cgp(rnp)) | 613 | if (rcu_preempt_blocked_readers_cgp(rnp) && rnp->qsmask == 0) |
| 585 | retval |= RCU_OFL_TASKS_NORM_GP; | 614 | retval |= RCU_OFL_TASKS_NORM_GP; |
| 586 | if (rcu_preempted_readers_exp(rnp)) | 615 | if (rcu_preempted_readers_exp(rnp)) |
| 587 | retval |= RCU_OFL_TASKS_EXP_GP; | 616 | retval |= RCU_OFL_TASKS_EXP_GP; |
| @@ -618,16 +647,16 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, | |||
| 618 | return retval; | 647 | return retval; |
| 619 | } | 648 | } |
| 620 | 649 | ||
| 650 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
| 651 | |||
| 621 | /* | 652 | /* |
| 622 | * Do CPU-offline processing for preemptible RCU. | 653 | * Do CPU-offline processing for preemptible RCU. |
| 623 | */ | 654 | */ |
| 624 | static void rcu_preempt_offline_cpu(int cpu) | 655 | static void rcu_preempt_cleanup_dead_cpu(int cpu) |
| 625 | { | 656 | { |
| 626 | __rcu_offline_cpu(cpu, &rcu_preempt_state); | 657 | rcu_cleanup_dead_cpu(cpu, &rcu_preempt_state); |
| 627 | } | 658 | } |
| 628 | 659 | ||
| 629 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
| 630 | |||
| 631 | /* | 660 | /* |
| 632 | * Check for a quiescent state from the current CPU. When a task blocks, | 661 | * Check for a quiescent state from the current CPU. When a task blocks, |
| 633 | * the task is recorded in the corresponding CPU's rcu_node structure, | 662 | * the task is recorded in the corresponding CPU's rcu_node structure, |
| @@ -671,10 +700,24 @@ static void rcu_preempt_do_callbacks(void) | |||
| 671 | */ | 700 | */ |
| 672 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | 701 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) |
| 673 | { | 702 | { |
| 674 | __call_rcu(head, func, &rcu_preempt_state); | 703 | __call_rcu(head, func, &rcu_preempt_state, 0); |
| 675 | } | 704 | } |
| 676 | EXPORT_SYMBOL_GPL(call_rcu); | 705 | EXPORT_SYMBOL_GPL(call_rcu); |
| 677 | 706 | ||
| 707 | /* | ||
| 708 | * Queue an RCU callback for lazy invocation after a grace period. | ||
| 709 | * This will likely be later named something like "call_rcu_lazy()", | ||
| 710 | * but this change will require some way of tagging the lazy RCU | ||
| 711 | * callbacks in the list of pending callbacks. Until then, this | ||
| 712 | * function may only be called from __kfree_rcu(). | ||
| 713 | */ | ||
| 714 | void kfree_call_rcu(struct rcu_head *head, | ||
| 715 | void (*func)(struct rcu_head *rcu)) | ||
| 716 | { | ||
| 717 | __call_rcu(head, func, &rcu_preempt_state, 1); | ||
| 718 | } | ||
| 719 | EXPORT_SYMBOL_GPL(kfree_call_rcu); | ||
| 720 | |||
| 678 | /** | 721 | /** |
| 679 | * synchronize_rcu - wait until a grace period has elapsed. | 722 | * synchronize_rcu - wait until a grace period has elapsed. |
| 680 | * | 723 | * |
| @@ -688,6 +731,10 @@ EXPORT_SYMBOL_GPL(call_rcu); | |||
| 688 | */ | 731 | */ |
| 689 | void synchronize_rcu(void) | 732 | void synchronize_rcu(void) |
| 690 | { | 733 | { |
| 734 | rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) && | ||
| 735 | !lock_is_held(&rcu_lock_map) && | ||
| 736 | !lock_is_held(&rcu_sched_lock_map), | ||
| 737 | "Illegal synchronize_rcu() in RCU read-side critical section"); | ||
| 691 | if (!rcu_scheduler_active) | 738 | if (!rcu_scheduler_active) |
| 692 | return; | 739 | return; |
| 693 | wait_rcu_gp(call_rcu); | 740 | wait_rcu_gp(call_rcu); |
| @@ -788,10 +835,22 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) | |||
| 788 | rcu_report_exp_rnp(rsp, rnp, false); /* Don't wake self. */ | 835 | rcu_report_exp_rnp(rsp, rnp, false); /* Don't wake self. */ |
| 789 | } | 836 | } |
| 790 | 837 | ||
| 791 | /* | 838 | /** |
| 792 | * Wait for an rcu-preempt grace period, but expedite it. The basic idea | 839 | * synchronize_rcu_expedited - Brute-force RCU grace period |
| 793 | * is to invoke synchronize_sched_expedited() to push all the tasks to | 840 | * |
| 794 | * the ->blkd_tasks lists and wait for this list to drain. | 841 | * Wait for an RCU-preempt grace period, but expedite it. The basic |
| 842 | * idea is to invoke synchronize_sched_expedited() to push all the tasks to | ||
| 843 | * the ->blkd_tasks lists and wait for this list to drain. This consumes | ||
| 844 | * significant time on all CPUs and is unfriendly to real-time workloads, | ||
| 845 | * so is thus not recommended for any sort of common-case code. | ||
| 846 | * In fact, if you are using synchronize_rcu_expedited() in a loop, | ||
| 847 | * please restructure your code to batch your updates, and then Use a | ||
| 848 | * single synchronize_rcu() instead. | ||
| 849 | * | ||
| 850 | * Note that it is illegal to call this function while holding any lock | ||
| 851 | * that is acquired by a CPU-hotplug notifier. And yes, it is also illegal | ||
| 852 | * to call this function from a CPU-hotplug notifier. Failing to observe | ||
| 853 | * these restriction will result in deadlock. | ||
| 795 | */ | 854 | */ |
| 796 | void synchronize_rcu_expedited(void) | 855 | void synchronize_rcu_expedited(void) |
| 797 | { | 856 | { |
| @@ -869,9 +928,9 @@ static int rcu_preempt_pending(int cpu) | |||
| 869 | } | 928 | } |
| 870 | 929 | ||
| 871 | /* | 930 | /* |
| 872 | * Does preemptible RCU need the CPU to stay out of dynticks mode? | 931 | * Does preemptible RCU have callbacks on this CPU? |
| 873 | */ | 932 | */ |
| 874 | static int rcu_preempt_needs_cpu(int cpu) | 933 | static int rcu_preempt_cpu_has_callbacks(int cpu) |
| 875 | { | 934 | { |
| 876 | return !!per_cpu(rcu_preempt_data, cpu).nxtlist; | 935 | return !!per_cpu(rcu_preempt_data, cpu).nxtlist; |
| 877 | } | 936 | } |
| @@ -894,11 +953,12 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu) | |||
| 894 | } | 953 | } |
| 895 | 954 | ||
| 896 | /* | 955 | /* |
| 897 | * Move preemptible RCU's callbacks from dying CPU to other online CPU. | 956 | * Move preemptible RCU's callbacks from dying CPU to other online CPU |
| 957 | * and record a quiescent state. | ||
| 898 | */ | 958 | */ |
| 899 | static void rcu_preempt_send_cbs_to_online(void) | 959 | static void rcu_preempt_cleanup_dying_cpu(void) |
| 900 | { | 960 | { |
| 901 | rcu_send_cbs_to_online(&rcu_preempt_state); | 961 | rcu_cleanup_dying_cpu(&rcu_preempt_state); |
| 902 | } | 962 | } |
| 903 | 963 | ||
| 904 | /* | 964 | /* |
| @@ -1034,16 +1094,16 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, | |||
| 1034 | return 0; | 1094 | return 0; |
| 1035 | } | 1095 | } |
| 1036 | 1096 | ||
| 1097 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
| 1098 | |||
| 1037 | /* | 1099 | /* |
| 1038 | * Because preemptible RCU does not exist, it never needs CPU-offline | 1100 | * Because preemptible RCU does not exist, it never needs CPU-offline |
| 1039 | * processing. | 1101 | * processing. |
| 1040 | */ | 1102 | */ |
| 1041 | static void rcu_preempt_offline_cpu(int cpu) | 1103 | static void rcu_preempt_cleanup_dead_cpu(int cpu) |
| 1042 | { | 1104 | { |
| 1043 | } | 1105 | } |
| 1044 | 1106 | ||
| 1045 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
| 1046 | |||
| 1047 | /* | 1107 | /* |
| 1048 | * Because preemptible RCU does not exist, it never has any callbacks | 1108 | * Because preemptible RCU does not exist, it never has any callbacks |
| 1049 | * to check. | 1109 | * to check. |
| @@ -1061,6 +1121,22 @@ static void rcu_preempt_process_callbacks(void) | |||
| 1061 | } | 1121 | } |
| 1062 | 1122 | ||
| 1063 | /* | 1123 | /* |
| 1124 | * Queue an RCU callback for lazy invocation after a grace period. | ||
| 1125 | * This will likely be later named something like "call_rcu_lazy()", | ||
| 1126 | * but this change will require some way of tagging the lazy RCU | ||
| 1127 | * callbacks in the list of pending callbacks. Until then, this | ||
| 1128 | * function may only be called from __kfree_rcu(). | ||
| 1129 | * | ||
| 1130 | * Because there is no preemptible RCU, we use RCU-sched instead. | ||
| 1131 | */ | ||
| 1132 | void kfree_call_rcu(struct rcu_head *head, | ||
| 1133 | void (*func)(struct rcu_head *rcu)) | ||
| 1134 | { | ||
| 1135 | __call_rcu(head, func, &rcu_sched_state, 1); | ||
| 1136 | } | ||
| 1137 | EXPORT_SYMBOL_GPL(kfree_call_rcu); | ||
| 1138 | |||
| 1139 | /* | ||
| 1064 | * Wait for an rcu-preempt grace period, but make it happen quickly. | 1140 | * Wait for an rcu-preempt grace period, but make it happen quickly. |
| 1065 | * But because preemptible RCU does not exist, map to rcu-sched. | 1141 | * But because preemptible RCU does not exist, map to rcu-sched. |
| 1066 | */ | 1142 | */ |
| @@ -1093,9 +1169,9 @@ static int rcu_preempt_pending(int cpu) | |||
| 1093 | } | 1169 | } |
| 1094 | 1170 | ||
| 1095 | /* | 1171 | /* |
| 1096 | * Because preemptible RCU does not exist, it never needs any CPU. | 1172 | * Because preemptible RCU does not exist, it never has callbacks |
| 1097 | */ | 1173 | */ |
| 1098 | static int rcu_preempt_needs_cpu(int cpu) | 1174 | static int rcu_preempt_cpu_has_callbacks(int cpu) |
| 1099 | { | 1175 | { |
| 1100 | return 0; | 1176 | return 0; |
| 1101 | } | 1177 | } |
| @@ -1119,9 +1195,9 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu) | |||
| 1119 | } | 1195 | } |
| 1120 | 1196 | ||
| 1121 | /* | 1197 | /* |
| 1122 | * Because there is no preemptible RCU, there are no callbacks to move. | 1198 | * Because there is no preemptible RCU, there is no cleanup to do. |
| 1123 | */ | 1199 | */ |
| 1124 | static void rcu_preempt_send_cbs_to_online(void) | 1200 | static void rcu_preempt_cleanup_dying_cpu(void) |
| 1125 | { | 1201 | { |
| 1126 | } | 1202 | } |
| 1127 | 1203 | ||
| @@ -1823,132 +1899,6 @@ static void __cpuinit rcu_prepare_kthreads(int cpu) | |||
| 1823 | 1899 | ||
| 1824 | #endif /* #else #ifdef CONFIG_RCU_BOOST */ | 1900 | #endif /* #else #ifdef CONFIG_RCU_BOOST */ |
| 1825 | 1901 | ||
| 1826 | #ifndef CONFIG_SMP | ||
| 1827 | |||
| 1828 | void synchronize_sched_expedited(void) | ||
| 1829 | { | ||
| 1830 | cond_resched(); | ||
| 1831 | } | ||
| 1832 | EXPORT_SYMBOL_GPL(synchronize_sched_expedited); | ||
| 1833 | |||
| 1834 | #else /* #ifndef CONFIG_SMP */ | ||
| 1835 | |||
| 1836 | static atomic_t sync_sched_expedited_started = ATOMIC_INIT(0); | ||
| 1837 | static atomic_t sync_sched_expedited_done = ATOMIC_INIT(0); | ||
| 1838 | |||
| 1839 | static int synchronize_sched_expedited_cpu_stop(void *data) | ||
| 1840 | { | ||
| 1841 | /* | ||
| 1842 | * There must be a full memory barrier on each affected CPU | ||
| 1843 | * between the time that try_stop_cpus() is called and the | ||
| 1844 | * time that it returns. | ||
| 1845 | * | ||
| 1846 | * In the current initial implementation of cpu_stop, the | ||
| 1847 | * above condition is already met when the control reaches | ||
| 1848 | * this point and the following smp_mb() is not strictly | ||
| 1849 | * necessary. Do smp_mb() anyway for documentation and | ||
| 1850 | * robustness against future implementation changes. | ||
| 1851 | */ | ||
| 1852 | smp_mb(); /* See above comment block. */ | ||
| 1853 | return 0; | ||
| 1854 | } | ||
| 1855 | |||
| 1856 | /* | ||
| 1857 | * Wait for an rcu-sched grace period to elapse, but use "big hammer" | ||
| 1858 | * approach to force grace period to end quickly. This consumes | ||
| 1859 | * significant time on all CPUs, and is thus not recommended for | ||
| 1860 | * any sort of common-case code. | ||
| 1861 | * | ||
| 1862 | * Note that it is illegal to call this function while holding any | ||
| 1863 | * lock that is acquired by a CPU-hotplug notifier. Failing to | ||
| 1864 | * observe this restriction will result in deadlock. | ||
| 1865 | * | ||
| 1866 | * This implementation can be thought of as an application of ticket | ||
| 1867 | * locking to RCU, with sync_sched_expedited_started and | ||
| 1868 | * sync_sched_expedited_done taking on the roles of the halves | ||
| 1869 | * of the ticket-lock word. Each task atomically increments | ||
| 1870 | * sync_sched_expedited_started upon entry, snapshotting the old value, | ||
| 1871 | * then attempts to stop all the CPUs. If this succeeds, then each | ||
| 1872 | * CPU will have executed a context switch, resulting in an RCU-sched | ||
| 1873 | * grace period. We are then done, so we use atomic_cmpxchg() to | ||
| 1874 | * update sync_sched_expedited_done to match our snapshot -- but | ||
| 1875 | * only if someone else has not already advanced past our snapshot. | ||
| 1876 | * | ||
| 1877 | * On the other hand, if try_stop_cpus() fails, we check the value | ||
| 1878 | * of sync_sched_expedited_done. If it has advanced past our | ||
| 1879 | * initial snapshot, then someone else must have forced a grace period | ||
| 1880 | * some time after we took our snapshot. In this case, our work is | ||
| 1881 | * done for us, and we can simply return. Otherwise, we try again, | ||
| 1882 | * but keep our initial snapshot for purposes of checking for someone | ||
| 1883 | * doing our work for us. | ||
| 1884 | * | ||
| 1885 | * If we fail too many times in a row, we fall back to synchronize_sched(). | ||
| 1886 | */ | ||
| 1887 | void synchronize_sched_expedited(void) | ||
| 1888 | { | ||
| 1889 | int firstsnap, s, snap, trycount = 0; | ||
| 1890 | |||
| 1891 | /* Note that atomic_inc_return() implies full memory barrier. */ | ||
| 1892 | firstsnap = snap = atomic_inc_return(&sync_sched_expedited_started); | ||
| 1893 | get_online_cpus(); | ||
| 1894 | |||
| 1895 | /* | ||
| 1896 | * Each pass through the following loop attempts to force a | ||
| 1897 | * context switch on each CPU. | ||
| 1898 | */ | ||
| 1899 | while (try_stop_cpus(cpu_online_mask, | ||
| 1900 | synchronize_sched_expedited_cpu_stop, | ||
| 1901 | NULL) == -EAGAIN) { | ||
| 1902 | put_online_cpus(); | ||
| 1903 | |||
| 1904 | /* No joy, try again later. Or just synchronize_sched(). */ | ||
| 1905 | if (trycount++ < 10) | ||
| 1906 | udelay(trycount * num_online_cpus()); | ||
| 1907 | else { | ||
| 1908 | synchronize_sched(); | ||
| 1909 | return; | ||
| 1910 | } | ||
| 1911 | |||
| 1912 | /* Check to see if someone else did our work for us. */ | ||
| 1913 | s = atomic_read(&sync_sched_expedited_done); | ||
| 1914 | if (UINT_CMP_GE((unsigned)s, (unsigned)firstsnap)) { | ||
| 1915 | smp_mb(); /* ensure test happens before caller kfree */ | ||
| 1916 | return; | ||
| 1917 | } | ||
| 1918 | |||
| 1919 | /* | ||
| 1920 | * Refetching sync_sched_expedited_started allows later | ||
| 1921 | * callers to piggyback on our grace period. We subtract | ||
| 1922 | * 1 to get the same token that the last incrementer got. | ||
| 1923 | * We retry after they started, so our grace period works | ||
| 1924 | * for them, and they started after our first try, so their | ||
| 1925 | * grace period works for us. | ||
| 1926 | */ | ||
| 1927 | get_online_cpus(); | ||
| 1928 | snap = atomic_read(&sync_sched_expedited_started); | ||
| 1929 | smp_mb(); /* ensure read is before try_stop_cpus(). */ | ||
| 1930 | } | ||
| 1931 | |||
| 1932 | /* | ||
| 1933 | * Everyone up to our most recent fetch is covered by our grace | ||
| 1934 | * period. Update the counter, but only if our work is still | ||
| 1935 | * relevant -- which it won't be if someone who started later | ||
| 1936 | * than we did beat us to the punch. | ||
| 1937 | */ | ||
| 1938 | do { | ||
| 1939 | s = atomic_read(&sync_sched_expedited_done); | ||
| 1940 | if (UINT_CMP_GE((unsigned)s, (unsigned)snap)) { | ||
| 1941 | smp_mb(); /* ensure test happens before caller kfree */ | ||
| 1942 | break; | ||
| 1943 | } | ||
| 1944 | } while (atomic_cmpxchg(&sync_sched_expedited_done, s, snap) != s); | ||
| 1945 | |||
| 1946 | put_online_cpus(); | ||
| 1947 | } | ||
| 1948 | EXPORT_SYMBOL_GPL(synchronize_sched_expedited); | ||
| 1949 | |||
| 1950 | #endif /* #else #ifndef CONFIG_SMP */ | ||
| 1951 | |||
| 1952 | #if !defined(CONFIG_RCU_FAST_NO_HZ) | 1902 | #if !defined(CONFIG_RCU_FAST_NO_HZ) |
| 1953 | 1903 | ||
| 1954 | /* | 1904 | /* |
| @@ -1981,7 +1931,7 @@ static void rcu_cleanup_after_idle(int cpu) | |||
| 1981 | } | 1931 | } |
| 1982 | 1932 | ||
| 1983 | /* | 1933 | /* |
| 1984 | * Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=y, | 1934 | * Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=n, |
| 1985 | * is nothing. | 1935 | * is nothing. |
| 1986 | */ | 1936 | */ |
| 1987 | static void rcu_prepare_for_idle(int cpu) | 1937 | static void rcu_prepare_for_idle(int cpu) |
| @@ -2015,6 +1965,9 @@ static void rcu_prepare_for_idle(int cpu) | |||
| 2015 | * number, be warned: Setting RCU_IDLE_GP_DELAY too high can hang your | 1965 | * number, be warned: Setting RCU_IDLE_GP_DELAY too high can hang your |
| 2016 | * system. And if you are -that- concerned about energy efficiency, | 1966 | * system. And if you are -that- concerned about energy efficiency, |
| 2017 | * just power the system down and be done with it! | 1967 | * just power the system down and be done with it! |
| 1968 | * RCU_IDLE_LAZY_GP_DELAY gives the number of jiffies that a CPU is | ||
| 1969 | * permitted to sleep in dyntick-idle mode with only lazy RCU | ||
| 1970 | * callbacks pending. Setting this too high can OOM your system. | ||
| 2018 | * | 1971 | * |
| 2019 | * The values below work well in practice. If future workloads require | 1972 | * The values below work well in practice. If future workloads require |
| 2020 | * adjustment, they can be converted into kernel config parameters, though | 1973 | * adjustment, they can be converted into kernel config parameters, though |
| @@ -2023,11 +1976,13 @@ static void rcu_prepare_for_idle(int cpu) | |||
| 2023 | #define RCU_IDLE_FLUSHES 5 /* Number of dyntick-idle tries. */ | 1976 | #define RCU_IDLE_FLUSHES 5 /* Number of dyntick-idle tries. */ |
| 2024 | #define RCU_IDLE_OPT_FLUSHES 3 /* Optional dyntick-idle tries. */ | 1977 | #define RCU_IDLE_OPT_FLUSHES 3 /* Optional dyntick-idle tries. */ |
| 2025 | #define RCU_IDLE_GP_DELAY 6 /* Roughly one grace period. */ | 1978 | #define RCU_IDLE_GP_DELAY 6 /* Roughly one grace period. */ |
| 1979 | #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ | ||
| 2026 | 1980 | ||
| 2027 | static DEFINE_PER_CPU(int, rcu_dyntick_drain); | 1981 | static DEFINE_PER_CPU(int, rcu_dyntick_drain); |
| 2028 | static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff); | 1982 | static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff); |
| 2029 | static DEFINE_PER_CPU(struct hrtimer, rcu_idle_gp_timer); | 1983 | static DEFINE_PER_CPU(struct hrtimer, rcu_idle_gp_timer); |
| 2030 | static ktime_t rcu_idle_gp_wait; | 1984 | static ktime_t rcu_idle_gp_wait; /* If some non-lazy callbacks. */ |
| 1985 | static ktime_t rcu_idle_lazy_gp_wait; /* If only lazy callbacks. */ | ||
| 2031 | 1986 | ||
| 2032 | /* | 1987 | /* |
| 2033 | * Allow the CPU to enter dyntick-idle mode if either: (1) There are no | 1988 | * Allow the CPU to enter dyntick-idle mode if either: (1) There are no |
| @@ -2048,6 +2003,48 @@ int rcu_needs_cpu(int cpu) | |||
| 2048 | } | 2003 | } |
| 2049 | 2004 | ||
| 2050 | /* | 2005 | /* |
| 2006 | * Does the specified flavor of RCU have non-lazy callbacks pending on | ||
| 2007 | * the specified CPU? Both RCU flavor and CPU are specified by the | ||
| 2008 | * rcu_data structure. | ||
| 2009 | */ | ||
| 2010 | static bool __rcu_cpu_has_nonlazy_callbacks(struct rcu_data *rdp) | ||
| 2011 | { | ||
| 2012 | return rdp->qlen != rdp->qlen_lazy; | ||
| 2013 | } | ||
| 2014 | |||
| 2015 | #ifdef CONFIG_TREE_PREEMPT_RCU | ||
| 2016 | |||
| 2017 | /* | ||
| 2018 | * Are there non-lazy RCU-preempt callbacks? (There cannot be if there | ||
| 2019 | * is no RCU-preempt in the kernel.) | ||
| 2020 | */ | ||
| 2021 | static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu) | ||
| 2022 | { | ||
| 2023 | struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); | ||
| 2024 | |||
| 2025 | return __rcu_cpu_has_nonlazy_callbacks(rdp); | ||
| 2026 | } | ||
| 2027 | |||
| 2028 | #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ | ||
| 2029 | |||
| 2030 | static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu) | ||
| 2031 | { | ||
| 2032 | return 0; | ||
| 2033 | } | ||
| 2034 | |||
| 2035 | #endif /* else #ifdef CONFIG_TREE_PREEMPT_RCU */ | ||
| 2036 | |||
| 2037 | /* | ||
| 2038 | * Does any flavor of RCU have non-lazy callbacks on the specified CPU? | ||
| 2039 | */ | ||
| 2040 | static bool rcu_cpu_has_nonlazy_callbacks(int cpu) | ||
| 2041 | { | ||
| 2042 | return __rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_sched_data, cpu)) || | ||
| 2043 | __rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_bh_data, cpu)) || | ||
| 2044 | rcu_preempt_cpu_has_nonlazy_callbacks(cpu); | ||
| 2045 | } | ||
| 2046 | |||
| 2047 | /* | ||
| 2051 | * Timer handler used to force CPU to start pushing its remaining RCU | 2048 | * Timer handler used to force CPU to start pushing its remaining RCU |
| 2052 | * callbacks in the case where it entered dyntick-idle mode with callbacks | 2049 | * callbacks in the case where it entered dyntick-idle mode with callbacks |
| 2053 | * pending. The hander doesn't really need to do anything because the | 2050 | * pending. The hander doesn't really need to do anything because the |
| @@ -2074,6 +2071,8 @@ static void rcu_prepare_for_idle_init(int cpu) | |||
| 2074 | unsigned int upj = jiffies_to_usecs(RCU_IDLE_GP_DELAY); | 2071 | unsigned int upj = jiffies_to_usecs(RCU_IDLE_GP_DELAY); |
| 2075 | 2072 | ||
| 2076 | rcu_idle_gp_wait = ns_to_ktime(upj * (u64)1000); | 2073 | rcu_idle_gp_wait = ns_to_ktime(upj * (u64)1000); |
| 2074 | upj = jiffies_to_usecs(RCU_IDLE_LAZY_GP_DELAY); | ||
| 2075 | rcu_idle_lazy_gp_wait = ns_to_ktime(upj * (u64)1000); | ||
| 2077 | firsttime = 0; | 2076 | firsttime = 0; |
| 2078 | } | 2077 | } |
| 2079 | } | 2078 | } |
| @@ -2109,10 +2108,6 @@ static void rcu_cleanup_after_idle(int cpu) | |||
| 2109 | */ | 2108 | */ |
| 2110 | static void rcu_prepare_for_idle(int cpu) | 2109 | static void rcu_prepare_for_idle(int cpu) |
| 2111 | { | 2110 | { |
| 2112 | unsigned long flags; | ||
| 2113 | |||
| 2114 | local_irq_save(flags); | ||
| 2115 | |||
| 2116 | /* | 2111 | /* |
| 2117 | * If there are no callbacks on this CPU, enter dyntick-idle mode. | 2112 | * If there are no callbacks on this CPU, enter dyntick-idle mode. |
| 2118 | * Also reset state to avoid prejudicing later attempts. | 2113 | * Also reset state to avoid prejudicing later attempts. |
| @@ -2120,7 +2115,6 @@ static void rcu_prepare_for_idle(int cpu) | |||
| 2120 | if (!rcu_cpu_has_callbacks(cpu)) { | 2115 | if (!rcu_cpu_has_callbacks(cpu)) { |
| 2121 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; | 2116 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; |
| 2122 | per_cpu(rcu_dyntick_drain, cpu) = 0; | 2117 | per_cpu(rcu_dyntick_drain, cpu) = 0; |
| 2123 | local_irq_restore(flags); | ||
| 2124 | trace_rcu_prep_idle("No callbacks"); | 2118 | trace_rcu_prep_idle("No callbacks"); |
| 2125 | return; | 2119 | return; |
| 2126 | } | 2120 | } |
| @@ -2130,7 +2124,6 @@ static void rcu_prepare_for_idle(int cpu) | |||
| 2130 | * refrained from disabling the scheduling-clock tick. | 2124 | * refrained from disabling the scheduling-clock tick. |
| 2131 | */ | 2125 | */ |
| 2132 | if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies) { | 2126 | if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies) { |
| 2133 | local_irq_restore(flags); | ||
| 2134 | trace_rcu_prep_idle("In holdoff"); | 2127 | trace_rcu_prep_idle("In holdoff"); |
| 2135 | return; | 2128 | return; |
| 2136 | } | 2129 | } |
| @@ -2140,18 +2133,22 @@ static void rcu_prepare_for_idle(int cpu) | |||
| 2140 | /* First time through, initialize the counter. */ | 2133 | /* First time through, initialize the counter. */ |
| 2141 | per_cpu(rcu_dyntick_drain, cpu) = RCU_IDLE_FLUSHES; | 2134 | per_cpu(rcu_dyntick_drain, cpu) = RCU_IDLE_FLUSHES; |
| 2142 | } else if (per_cpu(rcu_dyntick_drain, cpu) <= RCU_IDLE_OPT_FLUSHES && | 2135 | } else if (per_cpu(rcu_dyntick_drain, cpu) <= RCU_IDLE_OPT_FLUSHES && |
| 2143 | !rcu_pending(cpu)) { | 2136 | !rcu_pending(cpu) && |
| 2137 | !local_softirq_pending()) { | ||
| 2144 | /* Can we go dyntick-idle despite still having callbacks? */ | 2138 | /* Can we go dyntick-idle despite still having callbacks? */ |
| 2145 | trace_rcu_prep_idle("Dyntick with callbacks"); | 2139 | trace_rcu_prep_idle("Dyntick with callbacks"); |
| 2146 | per_cpu(rcu_dyntick_drain, cpu) = 0; | 2140 | per_cpu(rcu_dyntick_drain, cpu) = 0; |
| 2147 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; | 2141 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; |
| 2148 | hrtimer_start(&per_cpu(rcu_idle_gp_timer, cpu), | 2142 | if (rcu_cpu_has_nonlazy_callbacks(cpu)) |
| 2149 | rcu_idle_gp_wait, HRTIMER_MODE_REL); | 2143 | hrtimer_start(&per_cpu(rcu_idle_gp_timer, cpu), |
| 2144 | rcu_idle_gp_wait, HRTIMER_MODE_REL); | ||
| 2145 | else | ||
| 2146 | hrtimer_start(&per_cpu(rcu_idle_gp_timer, cpu), | ||
| 2147 | rcu_idle_lazy_gp_wait, HRTIMER_MODE_REL); | ||
| 2150 | return; /* Nothing more to do immediately. */ | 2148 | return; /* Nothing more to do immediately. */ |
| 2151 | } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) { | 2149 | } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) { |
| 2152 | /* We have hit the limit, so time to give up. */ | 2150 | /* We have hit the limit, so time to give up. */ |
| 2153 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; | 2151 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; |
| 2154 | local_irq_restore(flags); | ||
| 2155 | trace_rcu_prep_idle("Begin holdoff"); | 2152 | trace_rcu_prep_idle("Begin holdoff"); |
| 2156 | invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */ | 2153 | invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */ |
| 2157 | return; | 2154 | return; |
| @@ -2163,23 +2160,17 @@ static void rcu_prepare_for_idle(int cpu) | |||
| 2163 | */ | 2160 | */ |
| 2164 | #ifdef CONFIG_TREE_PREEMPT_RCU | 2161 | #ifdef CONFIG_TREE_PREEMPT_RCU |
| 2165 | if (per_cpu(rcu_preempt_data, cpu).nxtlist) { | 2162 | if (per_cpu(rcu_preempt_data, cpu).nxtlist) { |
| 2166 | local_irq_restore(flags); | ||
| 2167 | rcu_preempt_qs(cpu); | 2163 | rcu_preempt_qs(cpu); |
| 2168 | force_quiescent_state(&rcu_preempt_state, 0); | 2164 | force_quiescent_state(&rcu_preempt_state, 0); |
| 2169 | local_irq_save(flags); | ||
| 2170 | } | 2165 | } |
| 2171 | #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ | 2166 | #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ |
| 2172 | if (per_cpu(rcu_sched_data, cpu).nxtlist) { | 2167 | if (per_cpu(rcu_sched_data, cpu).nxtlist) { |
| 2173 | local_irq_restore(flags); | ||
| 2174 | rcu_sched_qs(cpu); | 2168 | rcu_sched_qs(cpu); |
| 2175 | force_quiescent_state(&rcu_sched_state, 0); | 2169 | force_quiescent_state(&rcu_sched_state, 0); |
| 2176 | local_irq_save(flags); | ||
| 2177 | } | 2170 | } |
| 2178 | if (per_cpu(rcu_bh_data, cpu).nxtlist) { | 2171 | if (per_cpu(rcu_bh_data, cpu).nxtlist) { |
| 2179 | local_irq_restore(flags); | ||
| 2180 | rcu_bh_qs(cpu); | 2172 | rcu_bh_qs(cpu); |
| 2181 | force_quiescent_state(&rcu_bh_state, 0); | 2173 | force_quiescent_state(&rcu_bh_state, 0); |
| 2182 | local_irq_save(flags); | ||
| 2183 | } | 2174 | } |
| 2184 | 2175 | ||
| 2185 | /* | 2176 | /* |
| @@ -2187,13 +2178,124 @@ static void rcu_prepare_for_idle(int cpu) | |||
| 2187 | * So try forcing the callbacks through the grace period. | 2178 | * So try forcing the callbacks through the grace period. |
| 2188 | */ | 2179 | */ |
| 2189 | if (rcu_cpu_has_callbacks(cpu)) { | 2180 | if (rcu_cpu_has_callbacks(cpu)) { |
| 2190 | local_irq_restore(flags); | ||
| 2191 | trace_rcu_prep_idle("More callbacks"); | 2181 | trace_rcu_prep_idle("More callbacks"); |
| 2192 | invoke_rcu_core(); | 2182 | invoke_rcu_core(); |
| 2193 | } else { | 2183 | } else |
| 2194 | local_irq_restore(flags); | ||
| 2195 | trace_rcu_prep_idle("Callbacks drained"); | 2184 | trace_rcu_prep_idle("Callbacks drained"); |
| 2196 | } | ||
| 2197 | } | 2185 | } |
| 2198 | 2186 | ||
| 2199 | #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ | 2187 | #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ |
| 2188 | |||
| 2189 | #ifdef CONFIG_RCU_CPU_STALL_INFO | ||
| 2190 | |||
| 2191 | #ifdef CONFIG_RCU_FAST_NO_HZ | ||
| 2192 | |||
| 2193 | static void print_cpu_stall_fast_no_hz(char *cp, int cpu) | ||
| 2194 | { | ||
| 2195 | struct hrtimer *hrtp = &per_cpu(rcu_idle_gp_timer, cpu); | ||
| 2196 | |||
| 2197 | sprintf(cp, "drain=%d %c timer=%lld", | ||
| 2198 | per_cpu(rcu_dyntick_drain, cpu), | ||
| 2199 | per_cpu(rcu_dyntick_holdoff, cpu) == jiffies ? 'H' : '.', | ||
| 2200 | hrtimer_active(hrtp) | ||
| 2201 | ? ktime_to_us(hrtimer_get_remaining(hrtp)) | ||
| 2202 | : -1); | ||
| 2203 | } | ||
| 2204 | |||
| 2205 | #else /* #ifdef CONFIG_RCU_FAST_NO_HZ */ | ||
| 2206 | |||
| 2207 | static void print_cpu_stall_fast_no_hz(char *cp, int cpu) | ||
| 2208 | { | ||
| 2209 | } | ||
| 2210 | |||
| 2211 | #endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */ | ||
| 2212 | |||
| 2213 | /* Initiate the stall-info list. */ | ||
| 2214 | static void print_cpu_stall_info_begin(void) | ||
| 2215 | { | ||
| 2216 | printk(KERN_CONT "\n"); | ||
| 2217 | } | ||
| 2218 | |||
| 2219 | /* | ||
| 2220 | * Print out diagnostic information for the specified stalled CPU. | ||
| 2221 | * | ||
| 2222 | * If the specified CPU is aware of the current RCU grace period | ||
| 2223 | * (flavor specified by rsp), then print the number of scheduling | ||
| 2224 | * clock interrupts the CPU has taken during the time that it has | ||
| 2225 | * been aware. Otherwise, print the number of RCU grace periods | ||
| 2226 | * that this CPU is ignorant of, for example, "1" if the CPU was | ||
| 2227 | * aware of the previous grace period. | ||
| 2228 | * | ||
| 2229 | * Also print out idle and (if CONFIG_RCU_FAST_NO_HZ) idle-entry info. | ||
| 2230 | */ | ||
| 2231 | static void print_cpu_stall_info(struct rcu_state *rsp, int cpu) | ||
| 2232 | { | ||
| 2233 | char fast_no_hz[72]; | ||
| 2234 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | ||
| 2235 | struct rcu_dynticks *rdtp = rdp->dynticks; | ||
| 2236 | char *ticks_title; | ||
| 2237 | unsigned long ticks_value; | ||
| 2238 | |||
| 2239 | if (rsp->gpnum == rdp->gpnum) { | ||
| 2240 | ticks_title = "ticks this GP"; | ||
| 2241 | ticks_value = rdp->ticks_this_gp; | ||
| 2242 | } else { | ||
| 2243 | ticks_title = "GPs behind"; | ||
| 2244 | ticks_value = rsp->gpnum - rdp->gpnum; | ||
| 2245 | } | ||
| 2246 | print_cpu_stall_fast_no_hz(fast_no_hz, cpu); | ||
| 2247 | printk(KERN_ERR "\t%d: (%lu %s) idle=%03x/%llx/%d %s\n", | ||
| 2248 | cpu, ticks_value, ticks_title, | ||
| 2249 | atomic_read(&rdtp->dynticks) & 0xfff, | ||
| 2250 | rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting, | ||
| 2251 | fast_no_hz); | ||
| 2252 | } | ||
| 2253 | |||
| 2254 | /* Terminate the stall-info list. */ | ||
| 2255 | static void print_cpu_stall_info_end(void) | ||
| 2256 | { | ||
| 2257 | printk(KERN_ERR "\t"); | ||
| 2258 | } | ||
| 2259 | |||
| 2260 | /* Zero ->ticks_this_gp for all flavors of RCU. */ | ||
| 2261 | static void zero_cpu_stall_ticks(struct rcu_data *rdp) | ||
| 2262 | { | ||
| 2263 | rdp->ticks_this_gp = 0; | ||
| 2264 | } | ||
| 2265 | |||
| 2266 | /* Increment ->ticks_this_gp for all flavors of RCU. */ | ||
| 2267 | static void increment_cpu_stall_ticks(void) | ||
| 2268 | { | ||
| 2269 | __get_cpu_var(rcu_sched_data).ticks_this_gp++; | ||
| 2270 | __get_cpu_var(rcu_bh_data).ticks_this_gp++; | ||
| 2271 | #ifdef CONFIG_TREE_PREEMPT_RCU | ||
| 2272 | __get_cpu_var(rcu_preempt_data).ticks_this_gp++; | ||
| 2273 | #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ | ||
| 2274 | } | ||
| 2275 | |||
| 2276 | #else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */ | ||
| 2277 | |||
| 2278 | static void print_cpu_stall_info_begin(void) | ||
| 2279 | { | ||
| 2280 | printk(KERN_CONT " {"); | ||
| 2281 | } | ||
| 2282 | |||
| 2283 | static void print_cpu_stall_info(struct rcu_state *rsp, int cpu) | ||
| 2284 | { | ||
| 2285 | printk(KERN_CONT " %d", cpu); | ||
| 2286 | } | ||
| 2287 | |||
| 2288 | static void print_cpu_stall_info_end(void) | ||
| 2289 | { | ||
| 2290 | printk(KERN_CONT "} "); | ||
| 2291 | } | ||
| 2292 | |||
| 2293 | static void zero_cpu_stall_ticks(struct rcu_data *rdp) | ||
| 2294 | { | ||
| 2295 | } | ||
| 2296 | |||
| 2297 | static void increment_cpu_stall_ticks(void) | ||
| 2298 | { | ||
| 2299 | } | ||
| 2300 | |||
| 2301 | #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */ | ||
