aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSuresh Siddha <suresh.b.siddha@intel.com>2011-12-01 20:07:33 -0500
committerIngo Molnar <mingo@elte.hu>2011-12-06 03:06:32 -0500
commit69e1e811dcc436a6b129dbef273ad9ec22d095ce (patch)
treef07179af4989178733727683796927948db9059a
parent1c792db7f7957e2e34b9a164f08200e36a25dfd0 (diff)
sched, nohz: Track nr_busy_cpus in the sched_group_power
Introduce nr_busy_cpus in the struct sched_group_power [Not in sched_group because sched groups are duplicated for the SD_OVERLAP scheduler domain] and for each cpu that enters and exits idle, this parameter will be updated in each scheduler group of the scheduler domain that this cpu belongs to. To avoid the frequent update of this state as the cpu enters and exits idle, the update of the stat during idle exit is delayed to the first timer tick that happens after the cpu becomes busy. This is done using NOHZ_IDLE flag in the struct rq's nohz_flags. Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/20111202010832.555984323@sbsiddha-desk.sc.intel.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--include/linux/sched.h6
-rw-r--r--kernel/sched/core.c1
-rw-r--r--kernel/sched/fair.c31
-rw-r--r--kernel/sched/sched.h1
-rw-r--r--kernel/time/tick-sched.c9
5 files changed, 48 insertions, 0 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8db17b7622ec..295666cb5b86 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -273,9 +273,11 @@ extern int runqueue_is_locked(int cpu);
273 273
274#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) 274#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
275extern void select_nohz_load_balancer(int stop_tick); 275extern void select_nohz_load_balancer(int stop_tick);
276extern void set_cpu_sd_state_idle(void);
276extern int get_nohz_timer_target(void); 277extern int get_nohz_timer_target(void);
277#else 278#else
278static inline void select_nohz_load_balancer(int stop_tick) { } 279static inline void select_nohz_load_balancer(int stop_tick) { }
280static inline void set_cpu_sd_state_idle(void);
279#endif 281#endif
280 282
281/* 283/*
@@ -901,6 +903,10 @@ struct sched_group_power {
901 * single CPU. 903 * single CPU.
902 */ 904 */
903 unsigned int power, power_orig; 905 unsigned int power, power_orig;
906 /*
907 * Number of busy cpus in this group.
908 */
909 atomic_t nr_busy_cpus;
904}; 910};
905 911
906struct sched_group { 912struct sched_group {
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7f1da77b83f3..699ff1499a8a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6024,6 +6024,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
6024 return; 6024 return;
6025 6025
6026 update_group_power(sd, cpu); 6026 update_group_power(sd, cpu);
6027 atomic_set(&sg->sgp->nr_busy_cpus, sg->group_weight);
6027} 6028}
6028 6029
6029int __weak arch_sd_sibling_asym_packing(void) 6030int __weak arch_sd_sibling_asym_packing(void)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 50c06b0e9fab..e050563e97a4 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4901,6 +4901,36 @@ static void nohz_balancer_kick(int cpu)
4901 return; 4901 return;
4902} 4902}
4903 4903
4904static inline void set_cpu_sd_state_busy(void)
4905{
4906 struct sched_domain *sd;
4907 int cpu = smp_processor_id();
4908
4909 if (!test_bit(NOHZ_IDLE, nohz_flags(cpu)))
4910 return;
4911 clear_bit(NOHZ_IDLE, nohz_flags(cpu));
4912
4913 rcu_read_lock();
4914 for_each_domain(cpu, sd)
4915 atomic_inc(&sd->groups->sgp->nr_busy_cpus);
4916 rcu_read_unlock();
4917}
4918
4919void set_cpu_sd_state_idle(void)
4920{
4921 struct sched_domain *sd;
4922 int cpu = smp_processor_id();
4923
4924 if (test_bit(NOHZ_IDLE, nohz_flags(cpu)))
4925 return;
4926 set_bit(NOHZ_IDLE, nohz_flags(cpu));
4927
4928 rcu_read_lock();
4929 for_each_domain(cpu, sd)
4930 atomic_dec(&sd->groups->sgp->nr_busy_cpus);
4931 rcu_read_unlock();
4932}
4933
4904/* 4934/*
4905 * This routine will try to nominate the ilb (idle load balancing) 4935 * This routine will try to nominate the ilb (idle load balancing)
4906 * owner among the cpus whose ticks are stopped. ilb owner will do the idle 4936 * owner among the cpus whose ticks are stopped. ilb owner will do the idle
@@ -5135,6 +5165,7 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu)
5135 * We may be recently in ticked or tickless idle mode. At the first 5165 * We may be recently in ticked or tickless idle mode. At the first
5136 * busy tick after returning from idle, we will update the busy stats. 5166 * busy tick after returning from idle, we will update the busy stats.
5137 */ 5167 */
5168 set_cpu_sd_state_busy();
5138 if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) 5169 if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu))))
5139 clear_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)); 5170 clear_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu));
5140 5171
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index cf7d02662bc2..91810f0ee3af 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1069,6 +1069,7 @@ extern void account_cfs_bandwidth_used(int enabled, int was_enabled);
1069enum rq_nohz_flag_bits { 1069enum rq_nohz_flag_bits {
1070 NOHZ_TICK_STOPPED, 1070 NOHZ_TICK_STOPPED,
1071 NOHZ_BALANCE_KICK, 1071 NOHZ_BALANCE_KICK,
1072 NOHZ_IDLE,
1072}; 1073};
1073 1074
1074#define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags) 1075#define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags)
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 40420644d0ba..31cc06163ed5 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -297,6 +297,15 @@ void tick_nohz_stop_sched_tick(int inidle)
297 ts = &per_cpu(tick_cpu_sched, cpu); 297 ts = &per_cpu(tick_cpu_sched, cpu);
298 298
299 /* 299 /*
300 * Update the idle state in the scheduler domain hierarchy
301 * when tick_nohz_stop_sched_tick() is called from the idle loop.
302 * State will be updated to busy during the first busy tick after
303 * exiting idle.
304 */
305 if (inidle)
306 set_cpu_sd_state_idle();
307
308 /*
300 * Call to tick_nohz_start_idle stops the last_update_time from being 309 * Call to tick_nohz_start_idle stops the last_update_time from being
301 * updated. Thus, it must not be called in the event we are called from 310 * updated. Thus, it must not be called in the event we are called from
302 * irq_exit() with the prior state different than idle. 311 * irq_exit() with the prior state different than idle.