diff options
| author | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2010-02-22 20:04:59 -0500 |
|---|---|---|
| committer | Ingo Molnar <mingo@elte.hu> | 2010-02-25 04:34:55 -0500 |
| commit | 8bd93a2c5d4cab2ae17d06350daa7dbf546a4634 (patch) | |
| tree | 3facbdbfbcc1b169fad20f456b0a2521adadfb25 | |
| parent | 998f2ac3fea93bfa8b55c279fff68f7c5b9ab93d (diff) | |
rcu: Accelerate grace period if last non-dynticked CPU
Currently, rcu_needs_cpu() simply checks whether the current CPU
has an outstanding RCU callback, which means that the last CPU
to go into dyntick-idle mode might wait a few ticks for the
relevant grace periods to complete. However, if all the other
CPUs are in dyntick-idle mode, and if this CPU is in a quiescent
state (which it is for RCU-bh and RCU-sched any time that we are
considering going into dyntick-idle mode), then the grace period
is instantly complete.
This patch therefore repeatedly invokes the RCU grace-period
machinery in order to force any needed grace periods to complete
quickly. It does so a limited number of times in order to
prevent starvation by an RCU callback function that might pass
itself to call_rcu().
However, if any CPU other than the current one is not in
dyntick-idle mode, fall back to simply checking (with fix to bug
noted by Lai Jiangshan). Also, take advantage of last
grace-period forcing, the opportunity to do so noted by Steve
Rostedt. And apply simplified #ifdef condition suggested by
Frederic Weisbecker.
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <1266887105-1528-15-git-send-email-paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
| -rw-r--r-- | include/linux/cpumask.h | 14 | ||||
| -rw-r--r-- | init/Kconfig | 16 | ||||
| -rw-r--r-- | kernel/rcutree.c | 5 | ||||
| -rw-r--r-- | kernel/rcutree_plugin.h | 69 |
4 files changed, 101 insertions, 3 deletions
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index d77b54733c5b..dbcee7647d9a 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h | |||
| @@ -143,6 +143,8 @@ static inline unsigned int cpumask_any_but(const struct cpumask *mask, | |||
| 143 | 143 | ||
| 144 | #define for_each_cpu(cpu, mask) \ | 144 | #define for_each_cpu(cpu, mask) \ |
| 145 | for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) | 145 | for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) |
| 146 | #define for_each_cpu_not(cpu, mask) \ | ||
| 147 | for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) | ||
| 146 | #define for_each_cpu_and(cpu, mask, and) \ | 148 | #define for_each_cpu_and(cpu, mask, and) \ |
| 147 | for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)and) | 149 | for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)and) |
| 148 | #else | 150 | #else |
| @@ -203,6 +205,18 @@ int cpumask_any_but(const struct cpumask *mask, unsigned int cpu); | |||
| 203 | (cpu) < nr_cpu_ids;) | 205 | (cpu) < nr_cpu_ids;) |
| 204 | 206 | ||
| 205 | /** | 207 | /** |
| 208 | * for_each_cpu_not - iterate over every cpu in a complemented mask | ||
| 209 | * @cpu: the (optionally unsigned) integer iterator | ||
| 210 | * @mask: the cpumask pointer | ||
| 211 | * | ||
| 212 | * After the loop, cpu is >= nr_cpu_ids. | ||
| 213 | */ | ||
| 214 | #define for_each_cpu_not(cpu, mask) \ | ||
| 215 | for ((cpu) = -1; \ | ||
| 216 | (cpu) = cpumask_next_zero((cpu), (mask)), \ | ||
| 217 | (cpu) < nr_cpu_ids;) | ||
| 218 | |||
| 219 | /** | ||
| 206 | * for_each_cpu_and - iterate over every cpu in both masks | 220 | * for_each_cpu_and - iterate over every cpu in both masks |
| 207 | * @cpu: the (optionally unsigned) integer iterator | 221 | * @cpu: the (optionally unsigned) integer iterator |
| 208 | * @mask: the first cpumask pointer | 222 | * @mask: the first cpumask pointer |
diff --git a/init/Kconfig b/init/Kconfig index d95ca7cd5d45..42bf914b325a 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
| @@ -396,6 +396,22 @@ config RCU_FANOUT_EXACT | |||
| 396 | 396 | ||
| 397 | Say N if unsure. | 397 | Say N if unsure. |
| 398 | 398 | ||
| 399 | config RCU_FAST_NO_HZ | ||
| 400 | bool "Accelerate last non-dyntick-idle CPU's grace periods" | ||
| 401 | depends on TREE_RCU && NO_HZ && SMP | ||
| 402 | default n | ||
| 403 | help | ||
| 404 | This option causes RCU to attempt to accelerate grace periods | ||
| 405 | in order to allow the final CPU to enter dynticks-idle state | ||
| 406 | more quickly. On the other hand, this option increases the | ||
| 407 | overhead of the dynticks-idle checking, particularly on systems | ||
| 408 | with large numbers of CPUs. | ||
| 409 | |||
| 410 | Say Y if energy efficiency is critically important, particularly | ||
| 411 | if you have relatively few CPUs. | ||
| 412 | |||
| 413 | Say N if you are unsure. | ||
| 414 | |||
| 399 | config TREE_RCU_TRACE | 415 | config TREE_RCU_TRACE |
| 400 | def_bool RCU_TRACE && ( TREE_RCU || TREE_PREEMPT_RCU ) | 416 | def_bool RCU_TRACE && ( TREE_RCU || TREE_PREEMPT_RCU ) |
| 401 | select DEBUG_FS | 417 | select DEBUG_FS |
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 099a255ede4c..29d88c08d875 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
| @@ -1550,10 +1550,9 @@ static int rcu_pending(int cpu) | |||
| 1550 | /* | 1550 | /* |
| 1551 | * Check to see if any future RCU-related work will need to be done | 1551 | * Check to see if any future RCU-related work will need to be done |
| 1552 | * by the current CPU, even if none need be done immediately, returning | 1552 | * by the current CPU, even if none need be done immediately, returning |
| 1553 | * 1 if so. This function is part of the RCU implementation; it is -not- | 1553 | * 1 if so. |
| 1554 | * an exported member of the RCU API. | ||
| 1555 | */ | 1554 | */ |
| 1556 | int rcu_needs_cpu(int cpu) | 1555 | static int rcu_needs_cpu_quick_check(int cpu) |
| 1557 | { | 1556 | { |
| 1558 | /* RCU callbacks either ready or pending? */ | 1557 | /* RCU callbacks either ready or pending? */ |
| 1559 | return per_cpu(rcu_sched_data, cpu).nxtlist || | 1558 | return per_cpu(rcu_sched_data, cpu).nxtlist || |
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index e77cdf321e13..a82566696b0b 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
| @@ -906,3 +906,72 @@ static void __init __rcu_init_preempt(void) | |||
| 906 | } | 906 | } |
| 907 | 907 | ||
| 908 | #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ | 908 | #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ |
| 909 | |||
| 910 | #if !defined(CONFIG_RCU_FAST_NO_HZ) | ||
| 911 | |||
| 912 | /* | ||
| 913 | * Check to see if any future RCU-related work will need to be done | ||
| 914 | * by the current CPU, even if none need be done immediately, returning | ||
| 915 | * 1 if so. This function is part of the RCU implementation; it is -not- | ||
| 916 | * an exported member of the RCU API. | ||
| 917 | * | ||
| 918 | * Because we have preemptible RCU, just check whether this CPU needs | ||
| 919 | * any flavor of RCU. Do not chew up lots of CPU cycles with preemption | ||
| 920 | * disabled in a most-likely vain attempt to cause RCU not to need this CPU. | ||
| 921 | */ | ||
| 922 | int rcu_needs_cpu(int cpu) | ||
| 923 | { | ||
| 924 | return rcu_needs_cpu_quick_check(cpu); | ||
| 925 | } | ||
| 926 | |||
| 927 | #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ | ||
| 928 | |||
| 929 | #define RCU_NEEDS_CPU_FLUSHES 5 | ||
| 930 | |||
| 931 | /* | ||
| 932 | * Check to see if any future RCU-related work will need to be done | ||
| 933 | * by the current CPU, even if none need be done immediately, returning | ||
| 934 | * 1 if so. This function is part of the RCU implementation; it is -not- | ||
| 935 | * an exported member of the RCU API. | ||
| 936 | * | ||
| 937 | * Because we are not supporting preemptible RCU, attempt to accelerate | ||
| 938 | * any current grace periods so that RCU no longer needs this CPU, but | ||
| 939 | * only if all other CPUs are already in dynticks-idle mode. This will | ||
| 940 | * allow the CPU cores to be powered down immediately, as opposed to after | ||
| 941 | * waiting many milliseconds for grace periods to elapse. | ||
| 942 | */ | ||
| 943 | int rcu_needs_cpu(int cpu) | ||
| 944 | { | ||
| 945 | int c = 1; | ||
| 946 | int i; | ||
| 947 | int thatcpu; | ||
| 948 | |||
| 949 | /* Don't bother unless we are the last non-dyntick-idle CPU. */ | ||
| 950 | for_each_cpu_not(thatcpu, nohz_cpu_mask) | ||
| 951 | if (thatcpu != cpu) | ||
| 952 | return rcu_needs_cpu_quick_check(cpu); | ||
| 953 | |||
| 954 | /* Try to push remaining RCU-sched and RCU-bh callbacks through. */ | ||
| 955 | for (i = 0; i < RCU_NEEDS_CPU_FLUSHES && c; i++) { | ||
| 956 | c = 0; | ||
| 957 | if (per_cpu(rcu_sched_data, cpu).nxtlist) { | ||
| 958 | rcu_sched_qs(cpu); | ||
| 959 | force_quiescent_state(&rcu_sched_state, 0); | ||
| 960 | __rcu_process_callbacks(&rcu_sched_state, | ||
| 961 | &per_cpu(rcu_sched_data, cpu)); | ||
| 962 | c = !!per_cpu(rcu_sched_data, cpu).nxtlist; | ||
| 963 | } | ||
| 964 | if (per_cpu(rcu_bh_data, cpu).nxtlist) { | ||
| 965 | rcu_bh_qs(cpu); | ||
| 966 | force_quiescent_state(&rcu_bh_state, 0); | ||
| 967 | __rcu_process_callbacks(&rcu_bh_state, | ||
| 968 | &per_cpu(rcu_bh_data, cpu)); | ||
| 969 | c = !!per_cpu(rcu_bh_data, cpu).nxtlist; | ||
| 970 | } | ||
| 971 | } | ||
| 972 | |||
| 973 | /* If RCU callbacks are still pending, RCU still needs this CPU. */ | ||
| 974 | return c; | ||
| 975 | } | ||
| 976 | |||
| 977 | #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ | ||
