aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--kernel/sched/fair.c160
1 files changed, 47 insertions, 113 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e050563e97a4..821af14335f3 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4727,28 +4727,17 @@ out_unlock:
4727#ifdef CONFIG_NO_HZ 4727#ifdef CONFIG_NO_HZ
4728/* 4728/*
4729 * idle load balancing details 4729 * idle load balancing details
4730 * - One of the idle CPUs nominates itself as idle load_balancer, while
4731 * entering idle.
4732 * - This idle load balancer CPU will also go into tickless mode when
4733 * it is idle, just like all other idle CPUs
4734 * - When one of the busy CPUs notice that there may be an idle rebalancing 4730 * - When one of the busy CPUs notice that there may be an idle rebalancing
4735 * needed, they will kick the idle load balancer, which then does idle 4731 * needed, they will kick the idle load balancer, which then does idle
4736 * load balancing for all the idle CPUs. 4732 * load balancing for all the idle CPUs.
4737 */ 4733 */
4738static struct { 4734static struct {
4739 atomic_t load_balancer;
4740 atomic_t first_pick_cpu;
4741 atomic_t second_pick_cpu;
4742 cpumask_var_t idle_cpus_mask; 4735 cpumask_var_t idle_cpus_mask;
4743 cpumask_var_t grp_idle_mask; 4736 cpumask_var_t grp_idle_mask;
4737 atomic_t nr_cpus;
4744 unsigned long next_balance; /* in jiffy units */ 4738 unsigned long next_balance; /* in jiffy units */
4745} nohz ____cacheline_aligned; 4739} nohz ____cacheline_aligned;
4746 4740
4747int get_nohz_load_balancer(void)
4748{
4749 return atomic_read(&nohz.load_balancer);
4750}
4751
4752#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) 4741#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
4753/** 4742/**
4754 * lowest_flag_domain - Return lowest sched_domain containing flag. 4743 * lowest_flag_domain - Return lowest sched_domain containing flag.
@@ -4825,9 +4814,9 @@ static inline int is_semi_idle_group(struct sched_group *ilb_group)
4825 */ 4814 */
4826static int find_new_ilb(int cpu) 4815static int find_new_ilb(int cpu)
4827{ 4816{
4817 int ilb = cpumask_first(nohz.idle_cpus_mask);
4828 struct sched_domain *sd; 4818 struct sched_domain *sd;
4829 struct sched_group *ilb_group; 4819 struct sched_group *ilb_group;
4830 int ilb = nr_cpu_ids;
4831 4820
4832 /* 4821 /*
4833 * Have idle load balancer selection from semi-idle packages only 4822 * Have idle load balancer selection from semi-idle packages only
@@ -4881,13 +4870,10 @@ static void nohz_balancer_kick(int cpu)
4881 4870
4882 nohz.next_balance++; 4871 nohz.next_balance++;
4883 4872
4884 ilb_cpu = get_nohz_load_balancer(); 4873 ilb_cpu = find_new_ilb(cpu);
4885 4874
4886 if (ilb_cpu >= nr_cpu_ids) { 4875 if (ilb_cpu >= nr_cpu_ids)
4887 ilb_cpu = cpumask_first(nohz.idle_cpus_mask); 4876 return;
4888 if (ilb_cpu >= nr_cpu_ids)
4889 return;
4890 }
4891 4877
4892 if (test_and_set_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu))) 4878 if (test_and_set_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu)))
4893 return; 4879 return;
@@ -4932,77 +4918,20 @@ void set_cpu_sd_state_idle(void)
4932} 4918}
4933 4919
4934/* 4920/*
4935 * This routine will try to nominate the ilb (idle load balancing) 4921 * This routine will record that this cpu is going idle with tick stopped.
4936 * owner among the cpus whose ticks are stopped. ilb owner will do the idle 4922 * This info will be used in performing idle load balancing in the future.
4937 * load balancing on behalf of all those cpus.
4938 *
4939 * When the ilb owner becomes busy, we will not have new ilb owner until some
4940 * idle CPU wakes up and goes back to idle or some busy CPU tries to kick
4941 * idle load balancing by kicking one of the idle CPUs.
4942 *
4943 * Ticks are stopped for the ilb owner as well, with busy CPU kicking this
4944 * ilb owner CPU in future (when there is a need for idle load balancing on
4945 * behalf of all idle CPUs).
4946 */ 4923 */
4947void select_nohz_load_balancer(int stop_tick) 4924void select_nohz_load_balancer(int stop_tick)
4948{ 4925{
4949 int cpu = smp_processor_id(); 4926 int cpu = smp_processor_id();
4950 4927
4951 if (stop_tick) { 4928 if (stop_tick) {
4952 if (!cpu_active(cpu)) { 4929 if (test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))
4953 if (atomic_read(&nohz.load_balancer) != cpu)
4954 return;
4955
4956 /*
4957 * If we are going offline and still the leader,
4958 * give up!
4959 */
4960 if (atomic_cmpxchg(&nohz.load_balancer, cpu,
4961 nr_cpu_ids) != cpu)
4962 BUG();
4963
4964 return; 4930 return;
4965 }
4966 4931
4967 cpumask_set_cpu(cpu, nohz.idle_cpus_mask); 4932 cpumask_set_cpu(cpu, nohz.idle_cpus_mask);
4968 4933 atomic_inc(&nohz.nr_cpus);
4969 if (atomic_read(&nohz.first_pick_cpu) == cpu)
4970 atomic_cmpxchg(&nohz.first_pick_cpu, cpu, nr_cpu_ids);
4971 if (atomic_read(&nohz.second_pick_cpu) == cpu)
4972 atomic_cmpxchg(&nohz.second_pick_cpu, cpu, nr_cpu_ids);
4973
4974 if (atomic_read(&nohz.load_balancer) >= nr_cpu_ids) {
4975 int new_ilb;
4976
4977 /* make me the ilb owner */
4978 if (atomic_cmpxchg(&nohz.load_balancer, nr_cpu_ids,
4979 cpu) != nr_cpu_ids)
4980 return;
4981
4982 /*
4983 * Check to see if there is a more power-efficient
4984 * ilb.
4985 */
4986 new_ilb = find_new_ilb(cpu);
4987 if (new_ilb < nr_cpu_ids && new_ilb != cpu) {
4988 atomic_set(&nohz.load_balancer, nr_cpu_ids);
4989 resched_cpu(new_ilb);
4990 return;
4991 }
4992 return;
4993 }
4994
4995 set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)); 4934 set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu));
4996 } else {
4997 if (!cpumask_test_cpu(cpu, nohz.idle_cpus_mask))
4998 return;
4999
5000 cpumask_clear_cpu(cpu, nohz.idle_cpus_mask);
5001
5002 if (atomic_read(&nohz.load_balancer) == cpu)
5003 if (atomic_cmpxchg(&nohz.load_balancer, cpu,
5004 nr_cpu_ids) != cpu)
5005 BUG();
5006 } 4935 }
5007 return; 4936 return;
5008} 4937}
@@ -5113,7 +5042,7 @@ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle)
5113 goto end; 5042 goto end;
5114 5043
5115 for_each_cpu(balance_cpu, nohz.idle_cpus_mask) { 5044 for_each_cpu(balance_cpu, nohz.idle_cpus_mask) {
5116 if (balance_cpu == this_cpu) 5045 if (balance_cpu == this_cpu || !idle_cpu(this_cpu))
5117 continue; 5046 continue;
5118 5047
5119 /* 5048 /*
@@ -5141,22 +5070,18 @@ end:
5141} 5070}
5142 5071
5143/* 5072/*
5144 * Current heuristic for kicking the idle load balancer 5073 * Current heuristic for kicking the idle load balancer in the presence
5145 * - first_pick_cpu is the one of the busy CPUs. It will kick 5074 * of an idle cpu is the system.
5146 * idle load balancer when it has more than one process active. This 5075 * - This rq has more than one task.
5147 * eliminates the need for idle load balancing altogether when we have 5076 * - At any scheduler domain level, this cpu's scheduler group has multiple
5148 * only one running process in the system (common case). 5077 * busy cpu's exceeding the group's power.
5149 * - If there are more than one busy CPU, idle load balancer may have 5078 * - For SD_ASYM_PACKING, if the lower numbered cpu's in the scheduler
5150 * to run for active_load_balance to happen (i.e., two busy CPUs are 5079 * domain span are idle.
5151 * SMT or core siblings and can run better if they move to different
5152 * physical CPUs). So, second_pick_cpu is the second of the busy CPUs
5153 * which will kick idle load balancer as soon as it has any load.
5154 */ 5080 */
5155static inline int nohz_kick_needed(struct rq *rq, int cpu) 5081static inline int nohz_kick_needed(struct rq *rq, int cpu)
5156{ 5082{
5157 unsigned long now = jiffies; 5083 unsigned long now = jiffies;
5158 int ret; 5084 struct sched_domain *sd;
5159 int first_pick_cpu, second_pick_cpu;
5160 5085
5161 if (unlikely(idle_cpu(cpu))) 5086 if (unlikely(idle_cpu(cpu)))
5162 return 0; 5087 return 0;
@@ -5166,32 +5091,44 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu)
5166 * busy tick after returning from idle, we will update the busy stats. 5091 * busy tick after returning from idle, we will update the busy stats.
5167 */ 5092 */
5168 set_cpu_sd_state_busy(); 5093 set_cpu_sd_state_busy();
5169 if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) 5094 if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) {
5170 clear_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)); 5095 clear_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu));
5096 cpumask_clear_cpu(cpu, nohz.idle_cpus_mask);
5097 atomic_dec(&nohz.nr_cpus);
5098 }
5099
5100 /*
5101 * None are in tickless mode and hence no need for NOHZ idle load
5102 * balancing.
5103 */
5104 if (likely(!atomic_read(&nohz.nr_cpus)))
5105 return 0;
5171 5106
5172 if (time_before(now, nohz.next_balance)) 5107 if (time_before(now, nohz.next_balance))
5173 return 0; 5108 return 0;
5174 5109
5175 first_pick_cpu = atomic_read(&nohz.first_pick_cpu); 5110 if (rq->nr_running >= 2)
5176 second_pick_cpu = atomic_read(&nohz.second_pick_cpu); 5111 goto need_kick;
5177 5112
5178 if (first_pick_cpu < nr_cpu_ids && first_pick_cpu != cpu && 5113 for_each_domain(cpu, sd) {
5179 second_pick_cpu < nr_cpu_ids && second_pick_cpu != cpu) 5114 struct sched_group *sg = sd->groups;
5180 return 0; 5115 struct sched_group_power *sgp = sg->sgp;
5116 int nr_busy = atomic_read(&sgp->nr_busy_cpus);
5181 5117
5182 ret = atomic_cmpxchg(&nohz.first_pick_cpu, nr_cpu_ids, cpu); 5118 if (sd->flags & SD_SHARE_PKG_RESOURCES && nr_busy > 1)
5183 if (ret == nr_cpu_ids || ret == cpu) { 5119 goto need_kick;
5184 atomic_cmpxchg(&nohz.second_pick_cpu, cpu, nr_cpu_ids); 5120
5185 if (rq->nr_running > 1) 5121 if (sd->flags & SD_ASYM_PACKING && nr_busy != sg->group_weight
5186 return 1; 5122 && (cpumask_first_and(nohz.idle_cpus_mask,
5187 } else { 5123 sched_domain_span(sd)) < cpu))
5188 ret = atomic_cmpxchg(&nohz.second_pick_cpu, nr_cpu_ids, cpu); 5124 goto need_kick;
5189 if (ret == nr_cpu_ids || ret == cpu) { 5125
5190 if (rq->nr_running) 5126 if (!(sd->flags & (SD_SHARE_PKG_RESOURCES | SD_ASYM_PACKING)))
5191 return 1; 5127 break;
5192 }
5193 } 5128 }
5194 return 0; 5129 return 0;
5130need_kick:
5131 return 1;
5195} 5132}
5196#else 5133#else
5197static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) { } 5134static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) { }
@@ -5652,9 +5589,6 @@ __init void init_sched_fair_class(void)
5652#ifdef CONFIG_NO_HZ 5589#ifdef CONFIG_NO_HZ
5653 zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT); 5590 zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT);
5654 alloc_cpumask_var(&nohz.grp_idle_mask, GFP_NOWAIT); 5591 alloc_cpumask_var(&nohz.grp_idle_mask, GFP_NOWAIT);
5655 atomic_set(&nohz.load_balancer, nr_cpu_ids);
5656 atomic_set(&nohz.first_pick_cpu, nr_cpu_ids);
5657 atomic_set(&nohz.second_pick_cpu, nr_cpu_ids);
5658#endif 5592#endif
5659#endif /* SMP */ 5593#endif /* SMP */
5660 5594