diff options
author | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2010-02-22 20:04:59 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-02-25 04:34:55 -0500 |
commit | 8bd93a2c5d4cab2ae17d06350daa7dbf546a4634 (patch) | |
tree | 3facbdbfbcc1b169fad20f456b0a2521adadfb25 | |
parent | 998f2ac3fea93bfa8b55c279fff68f7c5b9ab93d (diff) |
rcu: Accelerate grace period if last non-dynticked CPU
Currently, rcu_needs_cpu() simply checks whether the current CPU
has an outstanding RCU callback, which means that the last CPU
to go into dyntick-idle mode might wait a few ticks for the
relevant grace periods to complete. However, if all the other
CPUs are in dyntick-idle mode, and if this CPU is in a quiescent
state (which it is for RCU-bh and RCU-sched any time that we are
considering going into dyntick-idle mode), then the grace period
is instantly complete.
This patch therefore repeatedly invokes the RCU grace-period
machinery in order to force any needed grace periods to complete
quickly. It does so a limited number of times in order to
prevent starvation by an RCU callback function that might pass
itself to call_rcu().
However, if any CPU other than the current one is not in
dyntick-idle mode, fall back to simply checking (with fix to bug
noted by Lai Jiangshan). Also, take advantage of last
grace-period forcing, the opportunity to do so noted by Steve
Rostedt. And apply simplified #ifdef condition suggested by
Frederic Weisbecker.
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <1266887105-1528-15-git-send-email-paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | include/linux/cpumask.h | 14 | ||||
-rw-r--r-- | init/Kconfig | 16 | ||||
-rw-r--r-- | kernel/rcutree.c | 5 | ||||
-rw-r--r-- | kernel/rcutree_plugin.h | 69 |
4 files changed, 101 insertions, 3 deletions
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index d77b54733c5..dbcee7647d9 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h | |||
@@ -143,6 +143,8 @@ static inline unsigned int cpumask_any_but(const struct cpumask *mask, | |||
143 | 143 | ||
144 | #define for_each_cpu(cpu, mask) \ | 144 | #define for_each_cpu(cpu, mask) \ |
145 | for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) | 145 | for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) |
146 | #define for_each_cpu_not(cpu, mask) \ | ||
147 | for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) | ||
146 | #define for_each_cpu_and(cpu, mask, and) \ | 148 | #define for_each_cpu_and(cpu, mask, and) \ |
147 | for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)and) | 149 | for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)and) |
148 | #else | 150 | #else |
@@ -203,6 +205,18 @@ int cpumask_any_but(const struct cpumask *mask, unsigned int cpu); | |||
203 | (cpu) < nr_cpu_ids;) | 205 | (cpu) < nr_cpu_ids;) |
204 | 206 | ||
205 | /** | 207 | /** |
208 | * for_each_cpu_not - iterate over every cpu in a complemented mask | ||
209 | * @cpu: the (optionally unsigned) integer iterator | ||
210 | * @mask: the cpumask pointer | ||
211 | * | ||
212 | * After the loop, cpu is >= nr_cpu_ids. | ||
213 | */ | ||
214 | #define for_each_cpu_not(cpu, mask) \ | ||
215 | for ((cpu) = -1; \ | ||
216 | (cpu) = cpumask_next_zero((cpu), (mask)), \ | ||
217 | (cpu) < nr_cpu_ids;) | ||
218 | |||
219 | /** | ||
206 | * for_each_cpu_and - iterate over every cpu in both masks | 220 | * for_each_cpu_and - iterate over every cpu in both masks |
207 | * @cpu: the (optionally unsigned) integer iterator | 221 | * @cpu: the (optionally unsigned) integer iterator |
208 | * @mask: the first cpumask pointer | 222 | * @mask: the first cpumask pointer |
diff --git a/init/Kconfig b/init/Kconfig index d95ca7cd5d4..42bf914b325 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -396,6 +396,22 @@ config RCU_FANOUT_EXACT | |||
396 | 396 | ||
397 | Say N if unsure. | 397 | Say N if unsure. |
398 | 398 | ||
399 | config RCU_FAST_NO_HZ | ||
400 | bool "Accelerate last non-dyntick-idle CPU's grace periods" | ||
401 | depends on TREE_RCU && NO_HZ && SMP | ||
402 | default n | ||
403 | help | ||
404 | This option causes RCU to attempt to accelerate grace periods | ||
405 | in order to allow the final CPU to enter dynticks-idle state | ||
406 | more quickly. On the other hand, this option increases the | ||
407 | overhead of the dynticks-idle checking, particularly on systems | ||
408 | with large numbers of CPUs. | ||
409 | |||
410 | Say Y if energy efficiency is critically important, particularly | ||
411 | if you have relatively few CPUs. | ||
412 | |||
413 | Say N if you are unsure. | ||
414 | |||
399 | config TREE_RCU_TRACE | 415 | config TREE_RCU_TRACE |
400 | def_bool RCU_TRACE && ( TREE_RCU || TREE_PREEMPT_RCU ) | 416 | def_bool RCU_TRACE && ( TREE_RCU || TREE_PREEMPT_RCU ) |
401 | select DEBUG_FS | 417 | select DEBUG_FS |
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 099a255ede4..29d88c08d87 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -1550,10 +1550,9 @@ static int rcu_pending(int cpu) | |||
1550 | /* | 1550 | /* |
1551 | * Check to see if any future RCU-related work will need to be done | 1551 | * Check to see if any future RCU-related work will need to be done |
1552 | * by the current CPU, even if none need be done immediately, returning | 1552 | * by the current CPU, even if none need be done immediately, returning |
1553 | * 1 if so. This function is part of the RCU implementation; it is -not- | 1553 | * 1 if so. |
1554 | * an exported member of the RCU API. | ||
1555 | */ | 1554 | */ |
1556 | int rcu_needs_cpu(int cpu) | 1555 | static int rcu_needs_cpu_quick_check(int cpu) |
1557 | { | 1556 | { |
1558 | /* RCU callbacks either ready or pending? */ | 1557 | /* RCU callbacks either ready or pending? */ |
1559 | return per_cpu(rcu_sched_data, cpu).nxtlist || | 1558 | return per_cpu(rcu_sched_data, cpu).nxtlist || |
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index e77cdf321e1..a82566696b0 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
@@ -906,3 +906,72 @@ static void __init __rcu_init_preempt(void) | |||
906 | } | 906 | } |
907 | 907 | ||
908 | #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ | 908 | #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ |
909 | |||
910 | #if !defined(CONFIG_RCU_FAST_NO_HZ) | ||
911 | |||
912 | /* | ||
913 | * Check to see if any future RCU-related work will need to be done | ||
914 | * by the current CPU, even if none need be done immediately, returning | ||
915 | * 1 if so. This function is part of the RCU implementation; it is -not- | ||
916 | * an exported member of the RCU API. | ||
917 | * | ||
918 | * Because we have preemptible RCU, just check whether this CPU needs | ||
919 | * any flavor of RCU. Do not chew up lots of CPU cycles with preemption | ||
920 | * disabled in a most-likely vain attempt to cause RCU not to need this CPU. | ||
921 | */ | ||
922 | int rcu_needs_cpu(int cpu) | ||
923 | { | ||
924 | return rcu_needs_cpu_quick_check(cpu); | ||
925 | } | ||
926 | |||
927 | #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ | ||
928 | |||
929 | #define RCU_NEEDS_CPU_FLUSHES 5 | ||
930 | |||
931 | /* | ||
932 | * Check to see if any future RCU-related work will need to be done | ||
933 | * by the current CPU, even if none need be done immediately, returning | ||
934 | * 1 if so. This function is part of the RCU implementation; it is -not- | ||
935 | * an exported member of the RCU API. | ||
936 | * | ||
937 | * Because we are not supporting preemptible RCU, attempt to accelerate | ||
938 | * any current grace periods so that RCU no longer needs this CPU, but | ||
939 | * only if all other CPUs are already in dynticks-idle mode. This will | ||
940 | * allow the CPU cores to be powered down immediately, as opposed to after | ||
941 | * waiting many milliseconds for grace periods to elapse. | ||
942 | */ | ||
943 | int rcu_needs_cpu(int cpu) | ||
944 | { | ||
945 | int c = 1; | ||
946 | int i; | ||
947 | int thatcpu; | ||
948 | |||
949 | /* Don't bother unless we are the last non-dyntick-idle CPU. */ | ||
950 | for_each_cpu_not(thatcpu, nohz_cpu_mask) | ||
951 | if (thatcpu != cpu) | ||
952 | return rcu_needs_cpu_quick_check(cpu); | ||
953 | |||
954 | /* Try to push remaining RCU-sched and RCU-bh callbacks through. */ | ||
955 | for (i = 0; i < RCU_NEEDS_CPU_FLUSHES && c; i++) { | ||
956 | c = 0; | ||
957 | if (per_cpu(rcu_sched_data, cpu).nxtlist) { | ||
958 | rcu_sched_qs(cpu); | ||
959 | force_quiescent_state(&rcu_sched_state, 0); | ||
960 | __rcu_process_callbacks(&rcu_sched_state, | ||
961 | &per_cpu(rcu_sched_data, cpu)); | ||
962 | c = !!per_cpu(rcu_sched_data, cpu).nxtlist; | ||
963 | } | ||
964 | if (per_cpu(rcu_bh_data, cpu).nxtlist) { | ||
965 | rcu_bh_qs(cpu); | ||
966 | force_quiescent_state(&rcu_bh_state, 0); | ||
967 | __rcu_process_callbacks(&rcu_bh_state, | ||
968 | &per_cpu(rcu_bh_data, cpu)); | ||
969 | c = !!per_cpu(rcu_bh_data, cpu).nxtlist; | ||
970 | } | ||
971 | } | ||
972 | |||
973 | /* If RCU callbacks are still pending, RCU still needs this CPU. */ | ||
974 | return c; | ||
975 | } | ||
976 | |||
977 | #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ | ||