aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSuresh Siddha <suresh.b.siddha@intel.com>2011-12-01 20:07:32 -0500
committerIngo Molnar <mingo@elte.hu>2011-12-06 03:06:30 -0500
commit1c792db7f7957e2e34b9a164f08200e36a25dfd0 (patch)
treed1ab8d9e860e12b31f8553309b79cc65ff380fa5
parent5b680fd61388abb9059fbc8e7a2f60f602bfce15 (diff)
sched, nohz: Introduce nohz_flags in 'struct rq'
Introduce nohz_flags in the struct rq, which will track these two flags for now. NOHZ_TICK_STOPPED keeps track of the tick stopped status that gets set when the tick is stopped. It will be used to update the nohz idle load balancer data structures during the first busy tick after the tick is restarted. At this first busy tick after tickless idle, NOHZ_TICK_STOPPED flag will be reset. This will minimize the nohz idle load balancer status updates that currently happen for every tickless exit, making it more scalable when there are many logical cpu's that enter and exit idle often. NOHZ_BALANCE_KICK will track the need for nohz idle load balance on this rq. This will replace the nohz_balance_kick in the rq, which was not being updated atomically. Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/20111202010832.499438999@sbsiddha-desk.sc.intel.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--kernel/sched/core.c5
-rw-r--r--kernel/sched/fair.c48
-rw-r--r--kernel/sched/sched.h11
3 files changed, 40 insertions, 24 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 07f1e9935f21..7f1da77b83f3 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -575,7 +575,8 @@ void wake_up_idle_cpu(int cpu)
575 575
576static inline bool got_nohz_idle_kick(void) 576static inline bool got_nohz_idle_kick(void)
577{ 577{
578 return idle_cpu(smp_processor_id()) && this_rq()->nohz_balance_kick; 578 int cpu = smp_processor_id();
579 return idle_cpu(cpu) && test_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu));
579} 580}
580 581
581#else /* CONFIG_NO_HZ */ 582#else /* CONFIG_NO_HZ */
@@ -6840,7 +6841,7 @@ void __init sched_init(void)
6840 rq->avg_idle = 2*sysctl_sched_migration_cost; 6841 rq->avg_idle = 2*sysctl_sched_migration_cost;
6841 rq_attach_root(rq, &def_root_domain); 6842 rq_attach_root(rq, &def_root_domain);
6842#ifdef CONFIG_NO_HZ 6843#ifdef CONFIG_NO_HZ
6843 rq->nohz_balance_kick = 0; 6844 rq->nohz_flags = 0;
6844#endif 6845#endif
6845#endif 6846#endif
6846 init_rq_hrtick(rq); 6847 init_rq_hrtick(rq);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 81ccb811afb4..50c06b0e9fab 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4889,18 +4889,15 @@ static void nohz_balancer_kick(int cpu)
4889 return; 4889 return;
4890 } 4890 }
4891 4891
4892 if (!cpu_rq(ilb_cpu)->nohz_balance_kick) { 4892 if (test_and_set_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu)))
4893 cpu_rq(ilb_cpu)->nohz_balance_kick = 1; 4893 return;
4894 4894 /*
4895 smp_mb(); 4895 * Use smp_send_reschedule() instead of resched_cpu().
4896 /* 4896 * This way we generate a sched IPI on the target cpu which
4897 * Use smp_send_reschedule() instead of resched_cpu(). 4897 * is idle. And the softirq performing nohz idle load balance
4898 * This way we generate a sched IPI on the target cpu which 4898 * will be run before returning from the IPI.
4899 * is idle. And the softirq performing nohz idle load balance 4899 */
4900 * will be run before returning from the IPI. 4900 smp_send_reschedule(ilb_cpu);
4901 */
4902 smp_send_reschedule(ilb_cpu);
4903 }
4904 return; 4901 return;
4905} 4902}
4906 4903
@@ -4964,6 +4961,8 @@ void select_nohz_load_balancer(int stop_tick)
4964 } 4961 }
4965 return; 4962 return;
4966 } 4963 }
4964
4965 set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu));
4967 } else { 4966 } else {
4968 if (!cpumask_test_cpu(cpu, nohz.idle_cpus_mask)) 4967 if (!cpumask_test_cpu(cpu, nohz.idle_cpus_mask))
4969 return; 4968 return;
@@ -5079,8 +5078,9 @@ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle)
5079 struct rq *rq; 5078 struct rq *rq;
5080 int balance_cpu; 5079 int balance_cpu;
5081 5080
5082 if (idle != CPU_IDLE || !this_rq->nohz_balance_kick) 5081 if (idle != CPU_IDLE ||
5083 return; 5082 !test_bit(NOHZ_BALANCE_KICK, nohz_flags(this_cpu)))
5083 goto end;
5084 5084
5085 for_each_cpu(balance_cpu, nohz.idle_cpus_mask) { 5085 for_each_cpu(balance_cpu, nohz.idle_cpus_mask) {
5086 if (balance_cpu == this_cpu) 5086 if (balance_cpu == this_cpu)
@@ -5091,10 +5091,8 @@ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle)
5091 * work being done for other cpus. Next load 5091 * work being done for other cpus. Next load
5092 * balancing owner will pick it up. 5092 * balancing owner will pick it up.
5093 */ 5093 */
5094 if (need_resched()) { 5094 if (need_resched())
5095 this_rq->nohz_balance_kick = 0;
5096 break; 5095 break;
5097 }
5098 5096
5099 raw_spin_lock_irq(&this_rq->lock); 5097 raw_spin_lock_irq(&this_rq->lock);
5100 update_rq_clock(this_rq); 5098 update_rq_clock(this_rq);
@@ -5108,7 +5106,8 @@ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle)
5108 this_rq->next_balance = rq->next_balance; 5106 this_rq->next_balance = rq->next_balance;
5109 } 5107 }
5110 nohz.next_balance = this_rq->next_balance; 5108 nohz.next_balance = this_rq->next_balance;
5111 this_rq->nohz_balance_kick = 0; 5109end:
5110 clear_bit(NOHZ_BALANCE_KICK, nohz_flags(this_cpu));
5112} 5111}
5113 5112
5114/* 5113/*
@@ -5129,10 +5128,17 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu)
5129 int ret; 5128 int ret;
5130 int first_pick_cpu, second_pick_cpu; 5129 int first_pick_cpu, second_pick_cpu;
5131 5130
5132 if (time_before(now, nohz.next_balance)) 5131 if (unlikely(idle_cpu(cpu)))
5133 return 0; 5132 return 0;
5134 5133
5135 if (idle_cpu(cpu)) 5134 /*
5135 * We may be recently in ticked or tickless idle mode. At the first
5136 * busy tick after returning from idle, we will update the busy stats.
5137 */
5138 if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu))))
5139 clear_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu));
5140
5141 if (time_before(now, nohz.next_balance))
5136 return 0; 5142 return 0;
5137 5143
5138 first_pick_cpu = atomic_read(&nohz.first_pick_cpu); 5144 first_pick_cpu = atomic_read(&nohz.first_pick_cpu);
@@ -5196,7 +5202,7 @@ void trigger_load_balance(struct rq *rq, int cpu)
5196 likely(!on_null_domain(cpu))) 5202 likely(!on_null_domain(cpu)))
5197 raise_softirq(SCHED_SOFTIRQ); 5203 raise_softirq(SCHED_SOFTIRQ);
5198#ifdef CONFIG_NO_HZ 5204#ifdef CONFIG_NO_HZ
5199 else if (nohz_kick_needed(rq, cpu) && likely(!on_null_domain(cpu))) 5205 if (nohz_kick_needed(rq, cpu) && likely(!on_null_domain(cpu)))
5200 nohz_balancer_kick(cpu); 5206 nohz_balancer_kick(cpu);
5201#endif 5207#endif
5202} 5208}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 8715055979d1..cf7d02662bc2 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -371,7 +371,7 @@ struct rq {
371 unsigned long last_load_update_tick; 371 unsigned long last_load_update_tick;
372#ifdef CONFIG_NO_HZ 372#ifdef CONFIG_NO_HZ
373 u64 nohz_stamp; 373 u64 nohz_stamp;
374 unsigned char nohz_balance_kick; 374 unsigned long nohz_flags;
375#endif 375#endif
376 int skip_clock_update; 376 int skip_clock_update;
377 377
@@ -1064,3 +1064,12 @@ extern void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq);
1064extern void unthrottle_offline_cfs_rqs(struct rq *rq); 1064extern void unthrottle_offline_cfs_rqs(struct rq *rq);
1065 1065
1066extern void account_cfs_bandwidth_used(int enabled, int was_enabled); 1066extern void account_cfs_bandwidth_used(int enabled, int was_enabled);
1067
1068#ifdef CONFIG_NO_HZ
1069enum rq_nohz_flag_bits {
1070 NOHZ_TICK_STOPPED,
1071 NOHZ_BALANCE_KICK,
1072};
1073
1074#define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags)
1075#endif