diff options
-rw-r--r-- | kernel/sched/fair.c | 160 |
1 files changed, 47 insertions, 113 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index e050563e97a4..821af14335f3 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -4727,28 +4727,17 @@ out_unlock: | |||
4727 | #ifdef CONFIG_NO_HZ | 4727 | #ifdef CONFIG_NO_HZ |
4728 | /* | 4728 | /* |
4729 | * idle load balancing details | 4729 | * idle load balancing details |
4730 | * - One of the idle CPUs nominates itself as idle load_balancer, while | ||
4731 | * entering idle. | ||
4732 | * - This idle load balancer CPU will also go into tickless mode when | ||
4733 | * it is idle, just like all other idle CPUs | ||
4734 | * - When one of the busy CPUs notice that there may be an idle rebalancing | 4730 | * - When one of the busy CPUs notice that there may be an idle rebalancing |
4735 | * needed, they will kick the idle load balancer, which then does idle | 4731 | * needed, they will kick the idle load balancer, which then does idle |
4736 | * load balancing for all the idle CPUs. | 4732 | * load balancing for all the idle CPUs. |
4737 | */ | 4733 | */ |
4738 | static struct { | 4734 | static struct { |
4739 | atomic_t load_balancer; | ||
4740 | atomic_t first_pick_cpu; | ||
4741 | atomic_t second_pick_cpu; | ||
4742 | cpumask_var_t idle_cpus_mask; | 4735 | cpumask_var_t idle_cpus_mask; |
4743 | cpumask_var_t grp_idle_mask; | 4736 | cpumask_var_t grp_idle_mask; |
4737 | atomic_t nr_cpus; | ||
4744 | unsigned long next_balance; /* in jiffy units */ | 4738 | unsigned long next_balance; /* in jiffy units */ |
4745 | } nohz ____cacheline_aligned; | 4739 | } nohz ____cacheline_aligned; |
4746 | 4740 | ||
4747 | int get_nohz_load_balancer(void) | ||
4748 | { | ||
4749 | return atomic_read(&nohz.load_balancer); | ||
4750 | } | ||
4751 | |||
4752 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) | 4741 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) |
4753 | /** | 4742 | /** |
4754 | * lowest_flag_domain - Return lowest sched_domain containing flag. | 4743 | * lowest_flag_domain - Return lowest sched_domain containing flag. |
@@ -4825,9 +4814,9 @@ static inline int is_semi_idle_group(struct sched_group *ilb_group) | |||
4825 | */ | 4814 | */ |
4826 | static int find_new_ilb(int cpu) | 4815 | static int find_new_ilb(int cpu) |
4827 | { | 4816 | { |
4817 | int ilb = cpumask_first(nohz.idle_cpus_mask); | ||
4828 | struct sched_domain *sd; | 4818 | struct sched_domain *sd; |
4829 | struct sched_group *ilb_group; | 4819 | struct sched_group *ilb_group; |
4830 | int ilb = nr_cpu_ids; | ||
4831 | 4820 | ||
4832 | /* | 4821 | /* |
4833 | * Have idle load balancer selection from semi-idle packages only | 4822 | * Have idle load balancer selection from semi-idle packages only |
@@ -4881,13 +4870,10 @@ static void nohz_balancer_kick(int cpu) | |||
4881 | 4870 | ||
4882 | nohz.next_balance++; | 4871 | nohz.next_balance++; |
4883 | 4872 | ||
4884 | ilb_cpu = get_nohz_load_balancer(); | 4873 | ilb_cpu = find_new_ilb(cpu); |
4885 | 4874 | ||
4886 | if (ilb_cpu >= nr_cpu_ids) { | 4875 | if (ilb_cpu >= nr_cpu_ids) |
4887 | ilb_cpu = cpumask_first(nohz.idle_cpus_mask); | 4876 | return; |
4888 | if (ilb_cpu >= nr_cpu_ids) | ||
4889 | return; | ||
4890 | } | ||
4891 | 4877 | ||
4892 | if (test_and_set_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu))) | 4878 | if (test_and_set_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu))) |
4893 | return; | 4879 | return; |
@@ -4932,77 +4918,20 @@ void set_cpu_sd_state_idle(void) | |||
4932 | } | 4918 | } |
4933 | 4919 | ||
4934 | /* | 4920 | /* |
4935 | * This routine will try to nominate the ilb (idle load balancing) | 4921 | * This routine will record that this cpu is going idle with tick stopped. |
4936 | * owner among the cpus whose ticks are stopped. ilb owner will do the idle | 4922 | * This info will be used in performing idle load balancing in the future. |
4937 | * load balancing on behalf of all those cpus. | ||
4938 | * | ||
4939 | * When the ilb owner becomes busy, we will not have new ilb owner until some | ||
4940 | * idle CPU wakes up and goes back to idle or some busy CPU tries to kick | ||
4941 | * idle load balancing by kicking one of the idle CPUs. | ||
4942 | * | ||
4943 | * Ticks are stopped for the ilb owner as well, with busy CPU kicking this | ||
4944 | * ilb owner CPU in future (when there is a need for idle load balancing on | ||
4945 | * behalf of all idle CPUs). | ||
4946 | */ | 4923 | */ |
4947 | void select_nohz_load_balancer(int stop_tick) | 4924 | void select_nohz_load_balancer(int stop_tick) |
4948 | { | 4925 | { |
4949 | int cpu = smp_processor_id(); | 4926 | int cpu = smp_processor_id(); |
4950 | 4927 | ||
4951 | if (stop_tick) { | 4928 | if (stop_tick) { |
4952 | if (!cpu_active(cpu)) { | 4929 | if (test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu))) |
4953 | if (atomic_read(&nohz.load_balancer) != cpu) | ||
4954 | return; | ||
4955 | |||
4956 | /* | ||
4957 | * If we are going offline and still the leader, | ||
4958 | * give up! | ||
4959 | */ | ||
4960 | if (atomic_cmpxchg(&nohz.load_balancer, cpu, | ||
4961 | nr_cpu_ids) != cpu) | ||
4962 | BUG(); | ||
4963 | |||
4964 | return; | 4930 | return; |
4965 | } | ||
4966 | 4931 | ||
4967 | cpumask_set_cpu(cpu, nohz.idle_cpus_mask); | 4932 | cpumask_set_cpu(cpu, nohz.idle_cpus_mask); |
4968 | 4933 | atomic_inc(&nohz.nr_cpus); | |
4969 | if (atomic_read(&nohz.first_pick_cpu) == cpu) | ||
4970 | atomic_cmpxchg(&nohz.first_pick_cpu, cpu, nr_cpu_ids); | ||
4971 | if (atomic_read(&nohz.second_pick_cpu) == cpu) | ||
4972 | atomic_cmpxchg(&nohz.second_pick_cpu, cpu, nr_cpu_ids); | ||
4973 | |||
4974 | if (atomic_read(&nohz.load_balancer) >= nr_cpu_ids) { | ||
4975 | int new_ilb; | ||
4976 | |||
4977 | /* make me the ilb owner */ | ||
4978 | if (atomic_cmpxchg(&nohz.load_balancer, nr_cpu_ids, | ||
4979 | cpu) != nr_cpu_ids) | ||
4980 | return; | ||
4981 | |||
4982 | /* | ||
4983 | * Check to see if there is a more power-efficient | ||
4984 | * ilb. | ||
4985 | */ | ||
4986 | new_ilb = find_new_ilb(cpu); | ||
4987 | if (new_ilb < nr_cpu_ids && new_ilb != cpu) { | ||
4988 | atomic_set(&nohz.load_balancer, nr_cpu_ids); | ||
4989 | resched_cpu(new_ilb); | ||
4990 | return; | ||
4991 | } | ||
4992 | return; | ||
4993 | } | ||
4994 | |||
4995 | set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)); | 4934 | set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)); |
4996 | } else { | ||
4997 | if (!cpumask_test_cpu(cpu, nohz.idle_cpus_mask)) | ||
4998 | return; | ||
4999 | |||
5000 | cpumask_clear_cpu(cpu, nohz.idle_cpus_mask); | ||
5001 | |||
5002 | if (atomic_read(&nohz.load_balancer) == cpu) | ||
5003 | if (atomic_cmpxchg(&nohz.load_balancer, cpu, | ||
5004 | nr_cpu_ids) != cpu) | ||
5005 | BUG(); | ||
5006 | } | 4935 | } |
5007 | return; | 4936 | return; |
5008 | } | 4937 | } |
@@ -5113,7 +5042,7 @@ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) | |||
5113 | goto end; | 5042 | goto end; |
5114 | 5043 | ||
5115 | for_each_cpu(balance_cpu, nohz.idle_cpus_mask) { | 5044 | for_each_cpu(balance_cpu, nohz.idle_cpus_mask) { |
5116 | if (balance_cpu == this_cpu) | 5045 | if (balance_cpu == this_cpu || !idle_cpu(this_cpu)) |
5117 | continue; | 5046 | continue; |
5118 | 5047 | ||
5119 | /* | 5048 | /* |
@@ -5141,22 +5070,18 @@ end: | |||
5141 | } | 5070 | } |
5142 | 5071 | ||
5143 | /* | 5072 | /* |
5144 | * Current heuristic for kicking the idle load balancer | 5073 | * Current heuristic for kicking the idle load balancer in the presence |
5145 | * - first_pick_cpu is the one of the busy CPUs. It will kick | 5074 | * of an idle cpu is the system. |
5146 | * idle load balancer when it has more than one process active. This | 5075 | * - This rq has more than one task. |
5147 | * eliminates the need for idle load balancing altogether when we have | 5076 | * - At any scheduler domain level, this cpu's scheduler group has multiple |
5148 | * only one running process in the system (common case). | 5077 | * busy cpu's exceeding the group's power. |
5149 | * - If there are more than one busy CPU, idle load balancer may have | 5078 | * - For SD_ASYM_PACKING, if the lower numbered cpu's in the scheduler |
5150 | * to run for active_load_balance to happen (i.e., two busy CPUs are | 5079 | * domain span are idle. |
5151 | * SMT or core siblings and can run better if they move to different | ||
5152 | * physical CPUs). So, second_pick_cpu is the second of the busy CPUs | ||
5153 | * which will kick idle load balancer as soon as it has any load. | ||
5154 | */ | 5080 | */ |
5155 | static inline int nohz_kick_needed(struct rq *rq, int cpu) | 5081 | static inline int nohz_kick_needed(struct rq *rq, int cpu) |
5156 | { | 5082 | { |
5157 | unsigned long now = jiffies; | 5083 | unsigned long now = jiffies; |
5158 | int ret; | 5084 | struct sched_domain *sd; |
5159 | int first_pick_cpu, second_pick_cpu; | ||
5160 | 5085 | ||
5161 | if (unlikely(idle_cpu(cpu))) | 5086 | if (unlikely(idle_cpu(cpu))) |
5162 | return 0; | 5087 | return 0; |
@@ -5166,32 +5091,44 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu) | |||
5166 | * busy tick after returning from idle, we will update the busy stats. | 5091 | * busy tick after returning from idle, we will update the busy stats. |
5167 | */ | 5092 | */ |
5168 | set_cpu_sd_state_busy(); | 5093 | set_cpu_sd_state_busy(); |
5169 | if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) | 5094 | if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) { |
5170 | clear_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)); | 5095 | clear_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)); |
5096 | cpumask_clear_cpu(cpu, nohz.idle_cpus_mask); | ||
5097 | atomic_dec(&nohz.nr_cpus); | ||
5098 | } | ||
5099 | |||
5100 | /* | ||
5101 | * None are in tickless mode and hence no need for NOHZ idle load | ||
5102 | * balancing. | ||
5103 | */ | ||
5104 | if (likely(!atomic_read(&nohz.nr_cpus))) | ||
5105 | return 0; | ||
5171 | 5106 | ||
5172 | if (time_before(now, nohz.next_balance)) | 5107 | if (time_before(now, nohz.next_balance)) |
5173 | return 0; | 5108 | return 0; |
5174 | 5109 | ||
5175 | first_pick_cpu = atomic_read(&nohz.first_pick_cpu); | 5110 | if (rq->nr_running >= 2) |
5176 | second_pick_cpu = atomic_read(&nohz.second_pick_cpu); | 5111 | goto need_kick; |
5177 | 5112 | ||
5178 | if (first_pick_cpu < nr_cpu_ids && first_pick_cpu != cpu && | 5113 | for_each_domain(cpu, sd) { |
5179 | second_pick_cpu < nr_cpu_ids && second_pick_cpu != cpu) | 5114 | struct sched_group *sg = sd->groups; |
5180 | return 0; | 5115 | struct sched_group_power *sgp = sg->sgp; |
5116 | int nr_busy = atomic_read(&sgp->nr_busy_cpus); | ||
5181 | 5117 | ||
5182 | ret = atomic_cmpxchg(&nohz.first_pick_cpu, nr_cpu_ids, cpu); | 5118 | if (sd->flags & SD_SHARE_PKG_RESOURCES && nr_busy > 1) |
5183 | if (ret == nr_cpu_ids || ret == cpu) { | 5119 | goto need_kick; |
5184 | atomic_cmpxchg(&nohz.second_pick_cpu, cpu, nr_cpu_ids); | 5120 | |
5185 | if (rq->nr_running > 1) | 5121 | if (sd->flags & SD_ASYM_PACKING && nr_busy != sg->group_weight |
5186 | return 1; | 5122 | && (cpumask_first_and(nohz.idle_cpus_mask, |
5187 | } else { | 5123 | sched_domain_span(sd)) < cpu)) |
5188 | ret = atomic_cmpxchg(&nohz.second_pick_cpu, nr_cpu_ids, cpu); | 5124 | goto need_kick; |
5189 | if (ret == nr_cpu_ids || ret == cpu) { | 5125 | |
5190 | if (rq->nr_running) | 5126 | if (!(sd->flags & (SD_SHARE_PKG_RESOURCES | SD_ASYM_PACKING))) |
5191 | return 1; | 5127 | break; |
5192 | } | ||
5193 | } | 5128 | } |
5194 | return 0; | 5129 | return 0; |
5130 | need_kick: | ||
5131 | return 1; | ||
5195 | } | 5132 | } |
5196 | #else | 5133 | #else |
5197 | static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) { } | 5134 | static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) { } |
@@ -5652,9 +5589,6 @@ __init void init_sched_fair_class(void) | |||
5652 | #ifdef CONFIG_NO_HZ | 5589 | #ifdef CONFIG_NO_HZ |
5653 | zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT); | 5590 | zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT); |
5654 | alloc_cpumask_var(&nohz.grp_idle_mask, GFP_NOWAIT); | 5591 | alloc_cpumask_var(&nohz.grp_idle_mask, GFP_NOWAIT); |
5655 | atomic_set(&nohz.load_balancer, nr_cpu_ids); | ||
5656 | atomic_set(&nohz.first_pick_cpu, nr_cpu_ids); | ||
5657 | atomic_set(&nohz.second_pick_cpu, nr_cpu_ids); | ||
5658 | #endif | 5592 | #endif |
5659 | #endif /* SMP */ | 5593 | #endif /* SMP */ |
5660 | 5594 | ||