diff options
author | Peter Zijlstra <peterz@infradead.org> | 2018-02-20 04:58:39 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2018-03-09 01:59:23 -0500 |
commit | af3fe03c562055bc3c116eabe73f141ae31bf234 (patch) | |
tree | 4c80794275cf19bada57efd51d2dec291a08b71b | |
parent | 63928384faefba1b31c3bb77361965715a9fc71c (diff) |
sched/fair: Move rebalance_domains()
This pure code movement results in two #ifdef CONFIG_NO_HZ_COMMON
sections landing next to each other.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | kernel/sched/fair.c | 236 |
1 files changed, 118 insertions, 118 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 5c357561db5d..0da79d8a6a2c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -9121,6 +9121,124 @@ out_unlock: | |||
9121 | return 0; | 9121 | return 0; |
9122 | } | 9122 | } |
9123 | 9123 | ||
9124 | static DEFINE_SPINLOCK(balancing); | ||
9125 | |||
9126 | /* | ||
9127 | * Scale the max load_balance interval with the number of CPUs in the system. | ||
9128 | * This trades load-balance latency on larger machines for less cross talk. | ||
9129 | */ | ||
9130 | void update_max_interval(void) | ||
9131 | { | ||
9132 | max_load_balance_interval = HZ*num_online_cpus()/10; | ||
9133 | } | ||
9134 | |||
9135 | /* | ||
9136 | * It checks each scheduling domain to see if it is due to be balanced, | ||
9137 | * and initiates a balancing operation if so. | ||
9138 | * | ||
9139 | * Balancing parameters are set up in init_sched_domains. | ||
9140 | */ | ||
9141 | static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle) | ||
9142 | { | ||
9143 | int continue_balancing = 1; | ||
9144 | int cpu = rq->cpu; | ||
9145 | unsigned long interval; | ||
9146 | struct sched_domain *sd; | ||
9147 | /* Earliest time when we have to do rebalance again */ | ||
9148 | unsigned long next_balance = jiffies + 60*HZ; | ||
9149 | int update_next_balance = 0; | ||
9150 | int need_serialize, need_decay = 0; | ||
9151 | u64 max_cost = 0; | ||
9152 | |||
9153 | rcu_read_lock(); | ||
9154 | for_each_domain(cpu, sd) { | ||
9155 | /* | ||
9156 | * Decay the newidle max times here because this is a regular | ||
9157 | * visit to all the domains. Decay ~1% per second. | ||
9158 | */ | ||
9159 | if (time_after(jiffies, sd->next_decay_max_lb_cost)) { | ||
9160 | sd->max_newidle_lb_cost = | ||
9161 | (sd->max_newidle_lb_cost * 253) / 256; | ||
9162 | sd->next_decay_max_lb_cost = jiffies + HZ; | ||
9163 | need_decay = 1; | ||
9164 | } | ||
9165 | max_cost += sd->max_newidle_lb_cost; | ||
9166 | |||
9167 | if (!(sd->flags & SD_LOAD_BALANCE)) | ||
9168 | continue; | ||
9169 | |||
9170 | /* | ||
9171 | * Stop the load balance at this level. There is another | ||
9172 | * CPU in our sched group which is doing load balancing more | ||
9173 | * actively. | ||
9174 | */ | ||
9175 | if (!continue_balancing) { | ||
9176 | if (need_decay) | ||
9177 | continue; | ||
9178 | break; | ||
9179 | } | ||
9180 | |||
9181 | interval = get_sd_balance_interval(sd, idle != CPU_IDLE); | ||
9182 | |||
9183 | need_serialize = sd->flags & SD_SERIALIZE; | ||
9184 | if (need_serialize) { | ||
9185 | if (!spin_trylock(&balancing)) | ||
9186 | goto out; | ||
9187 | } | ||
9188 | |||
9189 | if (time_after_eq(jiffies, sd->last_balance + interval)) { | ||
9190 | if (load_balance(cpu, rq, sd, idle, &continue_balancing)) { | ||
9191 | /* | ||
9192 | * The LBF_DST_PINNED logic could have changed | ||
9193 | * env->dst_cpu, so we can't know our idle | ||
9194 | * state even if we migrated tasks. Update it. | ||
9195 | */ | ||
9196 | idle = idle_cpu(cpu) ? CPU_IDLE : CPU_NOT_IDLE; | ||
9197 | } | ||
9198 | sd->last_balance = jiffies; | ||
9199 | interval = get_sd_balance_interval(sd, idle != CPU_IDLE); | ||
9200 | } | ||
9201 | if (need_serialize) | ||
9202 | spin_unlock(&balancing); | ||
9203 | out: | ||
9204 | if (time_after(next_balance, sd->last_balance + interval)) { | ||
9205 | next_balance = sd->last_balance + interval; | ||
9206 | update_next_balance = 1; | ||
9207 | } | ||
9208 | } | ||
9209 | if (need_decay) { | ||
9210 | /* | ||
9211 | * Ensure the rq-wide value also decays but keep it at a | ||
9212 | * reasonable floor to avoid funnies with rq->avg_idle. | ||
9213 | */ | ||
9214 | rq->max_idle_balance_cost = | ||
9215 | max((u64)sysctl_sched_migration_cost, max_cost); | ||
9216 | } | ||
9217 | rcu_read_unlock(); | ||
9218 | |||
9219 | /* | ||
9220 | * next_balance will be updated only when there is a need. | ||
9221 | * When the cpu is attached to null domain for ex, it will not be | ||
9222 | * updated. | ||
9223 | */ | ||
9224 | if (likely(update_next_balance)) { | ||
9225 | rq->next_balance = next_balance; | ||
9226 | |||
9227 | #ifdef CONFIG_NO_HZ_COMMON | ||
9228 | /* | ||
9229 | * If this CPU has been elected to perform the nohz idle | ||
9230 | * balance. Other idle CPUs have already rebalanced with | ||
9231 | * nohz_idle_balance() and nohz.next_balance has been | ||
9232 | * updated accordingly. This CPU is now running the idle load | ||
9233 | * balance for itself and we need to update the | ||
9234 | * nohz.next_balance accordingly. | ||
9235 | */ | ||
9236 | if ((idle == CPU_IDLE) && time_after(nohz.next_balance, rq->next_balance)) | ||
9237 | nohz.next_balance = rq->next_balance; | ||
9238 | #endif | ||
9239 | } | ||
9240 | } | ||
9241 | |||
9124 | static inline int on_null_domain(struct rq *rq) | 9242 | static inline int on_null_domain(struct rq *rq) |
9125 | { | 9243 | { |
9126 | return unlikely(!rcu_dereference_sched(rq->sd)); | 9244 | return unlikely(!rcu_dereference_sched(rq->sd)); |
@@ -9373,124 +9491,6 @@ out: | |||
9373 | static inline void nohz_balancer_kick(struct rq *rq) { } | 9491 | static inline void nohz_balancer_kick(struct rq *rq) { } |
9374 | #endif | 9492 | #endif |
9375 | 9493 | ||
9376 | static DEFINE_SPINLOCK(balancing); | ||
9377 | |||
9378 | /* | ||
9379 | * Scale the max load_balance interval with the number of CPUs in the system. | ||
9380 | * This trades load-balance latency on larger machines for less cross talk. | ||
9381 | */ | ||
9382 | void update_max_interval(void) | ||
9383 | { | ||
9384 | max_load_balance_interval = HZ*num_online_cpus()/10; | ||
9385 | } | ||
9386 | |||
9387 | /* | ||
9388 | * It checks each scheduling domain to see if it is due to be balanced, | ||
9389 | * and initiates a balancing operation if so. | ||
9390 | * | ||
9391 | * Balancing parameters are set up in init_sched_domains. | ||
9392 | */ | ||
9393 | static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle) | ||
9394 | { | ||
9395 | int continue_balancing = 1; | ||
9396 | int cpu = rq->cpu; | ||
9397 | unsigned long interval; | ||
9398 | struct sched_domain *sd; | ||
9399 | /* Earliest time when we have to do rebalance again */ | ||
9400 | unsigned long next_balance = jiffies + 60*HZ; | ||
9401 | int update_next_balance = 0; | ||
9402 | int need_serialize, need_decay = 0; | ||
9403 | u64 max_cost = 0; | ||
9404 | |||
9405 | rcu_read_lock(); | ||
9406 | for_each_domain(cpu, sd) { | ||
9407 | /* | ||
9408 | * Decay the newidle max times here because this is a regular | ||
9409 | * visit to all the domains. Decay ~1% per second. | ||
9410 | */ | ||
9411 | if (time_after(jiffies, sd->next_decay_max_lb_cost)) { | ||
9412 | sd->max_newidle_lb_cost = | ||
9413 | (sd->max_newidle_lb_cost * 253) / 256; | ||
9414 | sd->next_decay_max_lb_cost = jiffies + HZ; | ||
9415 | need_decay = 1; | ||
9416 | } | ||
9417 | max_cost += sd->max_newidle_lb_cost; | ||
9418 | |||
9419 | if (!(sd->flags & SD_LOAD_BALANCE)) | ||
9420 | continue; | ||
9421 | |||
9422 | /* | ||
9423 | * Stop the load balance at this level. There is another | ||
9424 | * CPU in our sched group which is doing load balancing more | ||
9425 | * actively. | ||
9426 | */ | ||
9427 | if (!continue_balancing) { | ||
9428 | if (need_decay) | ||
9429 | continue; | ||
9430 | break; | ||
9431 | } | ||
9432 | |||
9433 | interval = get_sd_balance_interval(sd, idle != CPU_IDLE); | ||
9434 | |||
9435 | need_serialize = sd->flags & SD_SERIALIZE; | ||
9436 | if (need_serialize) { | ||
9437 | if (!spin_trylock(&balancing)) | ||
9438 | goto out; | ||
9439 | } | ||
9440 | |||
9441 | if (time_after_eq(jiffies, sd->last_balance + interval)) { | ||
9442 | if (load_balance(cpu, rq, sd, idle, &continue_balancing)) { | ||
9443 | /* | ||
9444 | * The LBF_DST_PINNED logic could have changed | ||
9445 | * env->dst_cpu, so we can't know our idle | ||
9446 | * state even if we migrated tasks. Update it. | ||
9447 | */ | ||
9448 | idle = idle_cpu(cpu) ? CPU_IDLE : CPU_NOT_IDLE; | ||
9449 | } | ||
9450 | sd->last_balance = jiffies; | ||
9451 | interval = get_sd_balance_interval(sd, idle != CPU_IDLE); | ||
9452 | } | ||
9453 | if (need_serialize) | ||
9454 | spin_unlock(&balancing); | ||
9455 | out: | ||
9456 | if (time_after(next_balance, sd->last_balance + interval)) { | ||
9457 | next_balance = sd->last_balance + interval; | ||
9458 | update_next_balance = 1; | ||
9459 | } | ||
9460 | } | ||
9461 | if (need_decay) { | ||
9462 | /* | ||
9463 | * Ensure the rq-wide value also decays but keep it at a | ||
9464 | * reasonable floor to avoid funnies with rq->avg_idle. | ||
9465 | */ | ||
9466 | rq->max_idle_balance_cost = | ||
9467 | max((u64)sysctl_sched_migration_cost, max_cost); | ||
9468 | } | ||
9469 | rcu_read_unlock(); | ||
9470 | |||
9471 | /* | ||
9472 | * next_balance will be updated only when there is a need. | ||
9473 | * When the CPU is attached to null domain for ex, it will not be | ||
9474 | * updated. | ||
9475 | */ | ||
9476 | if (likely(update_next_balance)) { | ||
9477 | rq->next_balance = next_balance; | ||
9478 | |||
9479 | #ifdef CONFIG_NO_HZ_COMMON | ||
9480 | /* | ||
9481 | * If this CPU has been elected to perform the nohz idle | ||
9482 | * balance. Other idle CPUs have already rebalanced with | ||
9483 | * nohz_idle_balance() and nohz.next_balance has been | ||
9484 | * updated accordingly. This CPU is now running the idle load | ||
9485 | * balance for itself and we need to update the | ||
9486 | * nohz.next_balance accordingly. | ||
9487 | */ | ||
9488 | if ((idle == CPU_IDLE) && time_after(nohz.next_balance, rq->next_balance)) | ||
9489 | nohz.next_balance = rq->next_balance; | ||
9490 | #endif | ||
9491 | } | ||
9492 | } | ||
9493 | |||
9494 | #ifdef CONFIG_NO_HZ_COMMON | 9494 | #ifdef CONFIG_NO_HZ_COMMON |
9495 | /* | 9495 | /* |
9496 | * In CONFIG_NO_HZ_COMMON case, the idle balance kickee will do the | 9496 | * In CONFIG_NO_HZ_COMMON case, the idle balance kickee will do the |