diff options
-rw-r--r-- | include/linux/sched.h | 9 | ||||
-rw-r--r-- | kernel/hrtimer.c | 8 | ||||
-rw-r--r-- | kernel/sched.c | 34 | ||||
-rw-r--r-- | kernel/sched_fair.c | 329 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 8 | ||||
-rw-r--r-- | kernel/timer.c | 8 |
6 files changed, 237 insertions, 159 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index c2d4316a04bb..a3e5b1cd0438 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -271,14 +271,11 @@ extern int runqueue_is_locked(int cpu); | |||
271 | 271 | ||
272 | extern cpumask_var_t nohz_cpu_mask; | 272 | extern cpumask_var_t nohz_cpu_mask; |
273 | #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) | 273 | #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) |
274 | extern int select_nohz_load_balancer(int cpu); | 274 | extern void select_nohz_load_balancer(int stop_tick); |
275 | extern int get_nohz_load_balancer(void); | 275 | extern int get_nohz_timer_target(void); |
276 | extern int nohz_ratelimit(int cpu); | 276 | extern int nohz_ratelimit(int cpu); |
277 | #else | 277 | #else |
278 | static inline int select_nohz_load_balancer(int cpu) | 278 | static inline void select_nohz_load_balancer(int stop_tick) { } |
279 | { | ||
280 | return 0; | ||
281 | } | ||
282 | 279 | ||
283 | static inline int nohz_ratelimit(int cpu) | 280 | static inline int nohz_ratelimit(int cpu) |
284 | { | 281 | { |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 5c69e996bd0f..e934339fbbef 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -144,12 +144,8 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, | |||
144 | static int hrtimer_get_target(int this_cpu, int pinned) | 144 | static int hrtimer_get_target(int this_cpu, int pinned) |
145 | { | 145 | { |
146 | #ifdef CONFIG_NO_HZ | 146 | #ifdef CONFIG_NO_HZ |
147 | if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu)) { | 147 | if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu)) |
148 | int preferred_cpu = get_nohz_load_balancer(); | 148 | return get_nohz_timer_target(); |
149 | |||
150 | if (preferred_cpu >= 0) | ||
151 | return preferred_cpu; | ||
152 | } | ||
153 | #endif | 149 | #endif |
154 | return this_cpu; | 150 | return this_cpu; |
155 | } | 151 | } |
diff --git a/kernel/sched.c b/kernel/sched.c index a757f6b11cbd..132950b33dde 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -460,7 +460,7 @@ struct rq { | |||
460 | unsigned long last_load_update_tick; | 460 | unsigned long last_load_update_tick; |
461 | #ifdef CONFIG_NO_HZ | 461 | #ifdef CONFIG_NO_HZ |
462 | u64 nohz_stamp; | 462 | u64 nohz_stamp; |
463 | unsigned char in_nohz_recently; | 463 | unsigned char nohz_balance_kick; |
464 | #endif | 464 | #endif |
465 | unsigned int skip_clock_update; | 465 | unsigned int skip_clock_update; |
466 | 466 | ||
@@ -1195,6 +1195,27 @@ static void resched_cpu(int cpu) | |||
1195 | 1195 | ||
1196 | #ifdef CONFIG_NO_HZ | 1196 | #ifdef CONFIG_NO_HZ |
1197 | /* | 1197 | /* |
1198 | * In the semi idle case, use the nearest busy cpu for migrating timers | ||
1199 | * from an idle cpu. This is good for power-savings. | ||
1200 | * | ||
1201 | * We don't do similar optimization for completely idle system, as | ||
1202 | * selecting an idle cpu will add more delays to the timers than intended | ||
1203 | * (as that cpu's timer base may not be uptodate wrt jiffies etc). | ||
1204 | */ | ||
1205 | int get_nohz_timer_target(void) | ||
1206 | { | ||
1207 | int cpu = smp_processor_id(); | ||
1208 | int i; | ||
1209 | struct sched_domain *sd; | ||
1210 | |||
1211 | for_each_domain(cpu, sd) { | ||
1212 | for_each_cpu(i, sched_domain_span(sd)) | ||
1213 | if (!idle_cpu(i)) | ||
1214 | return i; | ||
1215 | } | ||
1216 | return cpu; | ||
1217 | } | ||
1218 | /* | ||
1198 | * When add_timer_on() enqueues a timer into the timer wheel of an | 1219 | * When add_timer_on() enqueues a timer into the timer wheel of an |
1199 | * idle CPU then this timer might expire before the next timer event | 1220 | * idle CPU then this timer might expire before the next timer event |
1200 | * which is scheduled to wake up that CPU. In case of a completely | 1221 | * which is scheduled to wake up that CPU. In case of a completely |
@@ -7791,6 +7812,10 @@ void __init sched_init(void) | |||
7791 | rq->idle_stamp = 0; | 7812 | rq->idle_stamp = 0; |
7792 | rq->avg_idle = 2*sysctl_sched_migration_cost; | 7813 | rq->avg_idle = 2*sysctl_sched_migration_cost; |
7793 | rq_attach_root(rq, &def_root_domain); | 7814 | rq_attach_root(rq, &def_root_domain); |
7815 | #ifdef CONFIG_NO_HZ | ||
7816 | rq->nohz_balance_kick = 0; | ||
7817 | init_sched_softirq_csd(&per_cpu(remote_sched_softirq_cb, i)); | ||
7818 | #endif | ||
7794 | #endif | 7819 | #endif |
7795 | init_rq_hrtick(rq); | 7820 | init_rq_hrtick(rq); |
7796 | atomic_set(&rq->nr_iowait, 0); | 7821 | atomic_set(&rq->nr_iowait, 0); |
@@ -7835,8 +7860,11 @@ void __init sched_init(void) | |||
7835 | zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT); | 7860 | zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT); |
7836 | #ifdef CONFIG_SMP | 7861 | #ifdef CONFIG_SMP |
7837 | #ifdef CONFIG_NO_HZ | 7862 | #ifdef CONFIG_NO_HZ |
7838 | zalloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT); | 7863 | zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT); |
7839 | alloc_cpumask_var(&nohz.ilb_grp_nohz_mask, GFP_NOWAIT); | 7864 | alloc_cpumask_var(&nohz.grp_idle_mask, GFP_NOWAIT); |
7865 | atomic_set(&nohz.load_balancer, nr_cpu_ids); | ||
7866 | atomic_set(&nohz.first_pick_cpu, nr_cpu_ids); | ||
7867 | atomic_set(&nohz.second_pick_cpu, nr_cpu_ids); | ||
7840 | #endif | 7868 | #endif |
7841 | /* May be allocated at isolcpus cmdline parse time */ | 7869 | /* May be allocated at isolcpus cmdline parse time */ |
7842 | if (cpu_isolated_map == NULL) | 7870 | if (cpu_isolated_map == NULL) |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 22b8b4f2b616..6ee2e0af665b 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -3091,13 +3091,40 @@ out_unlock: | |||
3091 | } | 3091 | } |
3092 | 3092 | ||
3093 | #ifdef CONFIG_NO_HZ | 3093 | #ifdef CONFIG_NO_HZ |
3094 | |||
3095 | static DEFINE_PER_CPU(struct call_single_data, remote_sched_softirq_cb); | ||
3096 | |||
3097 | static void trigger_sched_softirq(void *data) | ||
3098 | { | ||
3099 | raise_softirq_irqoff(SCHED_SOFTIRQ); | ||
3100 | } | ||
3101 | |||
3102 | static inline void init_sched_softirq_csd(struct call_single_data *csd) | ||
3103 | { | ||
3104 | csd->func = trigger_sched_softirq; | ||
3105 | csd->info = NULL; | ||
3106 | csd->flags = 0; | ||
3107 | csd->priv = 0; | ||
3108 | } | ||
3109 | |||
3110 | /* | ||
3111 | * idle load balancing details | ||
3112 | * - One of the idle CPUs nominates itself as idle load_balancer, while | ||
3113 | * entering idle. | ||
3114 | * - This idle load balancer CPU will also go into tickless mode when | ||
3115 | * it is idle, just like all other idle CPUs | ||
3116 | * - When one of the busy CPUs notice that there may be an idle rebalancing | ||
3117 | * needed, they will kick the idle load balancer, which then does idle | ||
3118 | * load balancing for all the idle CPUs. | ||
3119 | */ | ||
3094 | static struct { | 3120 | static struct { |
3095 | atomic_t load_balancer; | 3121 | atomic_t load_balancer; |
3096 | cpumask_var_t cpu_mask; | 3122 | atomic_t first_pick_cpu; |
3097 | cpumask_var_t ilb_grp_nohz_mask; | 3123 | atomic_t second_pick_cpu; |
3098 | } nohz ____cacheline_aligned = { | 3124 | cpumask_var_t idle_cpus_mask; |
3099 | .load_balancer = ATOMIC_INIT(-1), | 3125 | cpumask_var_t grp_idle_mask; |
3100 | }; | 3126 | unsigned long next_balance; /* in jiffy units */ |
3127 | } nohz ____cacheline_aligned; | ||
3101 | 3128 | ||
3102 | int get_nohz_load_balancer(void) | 3129 | int get_nohz_load_balancer(void) |
3103 | { | 3130 | { |
@@ -3151,17 +3178,17 @@ static inline struct sched_domain *lowest_flag_domain(int cpu, int flag) | |||
3151 | */ | 3178 | */ |
3152 | static inline int is_semi_idle_group(struct sched_group *ilb_group) | 3179 | static inline int is_semi_idle_group(struct sched_group *ilb_group) |
3153 | { | 3180 | { |
3154 | cpumask_and(nohz.ilb_grp_nohz_mask, nohz.cpu_mask, | 3181 | cpumask_and(nohz.grp_idle_mask, nohz.idle_cpus_mask, |
3155 | sched_group_cpus(ilb_group)); | 3182 | sched_group_cpus(ilb_group)); |
3156 | 3183 | ||
3157 | /* | 3184 | /* |
3158 | * A sched_group is semi-idle when it has atleast one busy cpu | 3185 | * A sched_group is semi-idle when it has atleast one busy cpu |
3159 | * and atleast one idle cpu. | 3186 | * and atleast one idle cpu. |
3160 | */ | 3187 | */ |
3161 | if (cpumask_empty(nohz.ilb_grp_nohz_mask)) | 3188 | if (cpumask_empty(nohz.grp_idle_mask)) |
3162 | return 0; | 3189 | return 0; |
3163 | 3190 | ||
3164 | if (cpumask_equal(nohz.ilb_grp_nohz_mask, sched_group_cpus(ilb_group))) | 3191 | if (cpumask_equal(nohz.grp_idle_mask, sched_group_cpus(ilb_group))) |
3165 | return 0; | 3192 | return 0; |
3166 | 3193 | ||
3167 | return 1; | 3194 | return 1; |
@@ -3194,7 +3221,7 @@ static int find_new_ilb(int cpu) | |||
3194 | * Optimize for the case when we have no idle CPUs or only one | 3221 | * Optimize for the case when we have no idle CPUs or only one |
3195 | * idle CPU. Don't walk the sched_domain hierarchy in such cases | 3222 | * idle CPU. Don't walk the sched_domain hierarchy in such cases |
3196 | */ | 3223 | */ |
3197 | if (cpumask_weight(nohz.cpu_mask) < 2) | 3224 | if (cpumask_weight(nohz.idle_cpus_mask) < 2) |
3198 | goto out_done; | 3225 | goto out_done; |
3199 | 3226 | ||
3200 | for_each_flag_domain(cpu, sd, SD_POWERSAVINGS_BALANCE) { | 3227 | for_each_flag_domain(cpu, sd, SD_POWERSAVINGS_BALANCE) { |
@@ -3202,7 +3229,7 @@ static int find_new_ilb(int cpu) | |||
3202 | 3229 | ||
3203 | do { | 3230 | do { |
3204 | if (is_semi_idle_group(ilb_group)) | 3231 | if (is_semi_idle_group(ilb_group)) |
3205 | return cpumask_first(nohz.ilb_grp_nohz_mask); | 3232 | return cpumask_first(nohz.grp_idle_mask); |
3206 | 3233 | ||
3207 | ilb_group = ilb_group->next; | 3234 | ilb_group = ilb_group->next; |
3208 | 3235 | ||
@@ -3210,98 +3237,116 @@ static int find_new_ilb(int cpu) | |||
3210 | } | 3237 | } |
3211 | 3238 | ||
3212 | out_done: | 3239 | out_done: |
3213 | return cpumask_first(nohz.cpu_mask); | 3240 | return nr_cpu_ids; |
3214 | } | 3241 | } |
3215 | #else /* (CONFIG_SCHED_MC || CONFIG_SCHED_SMT) */ | 3242 | #else /* (CONFIG_SCHED_MC || CONFIG_SCHED_SMT) */ |
3216 | static inline int find_new_ilb(int call_cpu) | 3243 | static inline int find_new_ilb(int call_cpu) |
3217 | { | 3244 | { |
3218 | return cpumask_first(nohz.cpu_mask); | 3245 | return nr_cpu_ids; |
3219 | } | 3246 | } |
3220 | #endif | 3247 | #endif |
3221 | 3248 | ||
3222 | /* | 3249 | /* |
3250 | * Kick a CPU to do the nohz balancing, if it is time for it. We pick the | ||
3251 | * nohz_load_balancer CPU (if there is one) otherwise fallback to any idle | ||
3252 | * CPU (if there is one). | ||
3253 | */ | ||
3254 | static void nohz_balancer_kick(int cpu) | ||
3255 | { | ||
3256 | int ilb_cpu; | ||
3257 | |||
3258 | nohz.next_balance++; | ||
3259 | |||
3260 | ilb_cpu = get_nohz_load_balancer(); | ||
3261 | |||
3262 | if (ilb_cpu >= nr_cpu_ids) { | ||
3263 | ilb_cpu = cpumask_first(nohz.idle_cpus_mask); | ||
3264 | if (ilb_cpu >= nr_cpu_ids) | ||
3265 | return; | ||
3266 | } | ||
3267 | |||
3268 | if (!cpu_rq(ilb_cpu)->nohz_balance_kick) { | ||
3269 | struct call_single_data *cp; | ||
3270 | |||
3271 | cpu_rq(ilb_cpu)->nohz_balance_kick = 1; | ||
3272 | cp = &per_cpu(remote_sched_softirq_cb, cpu); | ||
3273 | __smp_call_function_single(ilb_cpu, cp, 0); | ||
3274 | } | ||
3275 | return; | ||
3276 | } | ||
3277 | |||
3278 | /* | ||
3223 | * This routine will try to nominate the ilb (idle load balancing) | 3279 | * This routine will try to nominate the ilb (idle load balancing) |
3224 | * owner among the cpus whose ticks are stopped. ilb owner will do the idle | 3280 | * owner among the cpus whose ticks are stopped. ilb owner will do the idle |
3225 | * load balancing on behalf of all those cpus. If all the cpus in the system | 3281 | * load balancing on behalf of all those cpus. |
3226 | * go into this tickless mode, then there will be no ilb owner (as there is | ||
3227 | * no need for one) and all the cpus will sleep till the next wakeup event | ||
3228 | * arrives... | ||
3229 | * | ||
3230 | * For the ilb owner, tick is not stopped. And this tick will be used | ||
3231 | * for idle load balancing. ilb owner will still be part of | ||
3232 | * nohz.cpu_mask.. | ||
3233 | * | 3282 | * |
3234 | * While stopping the tick, this cpu will become the ilb owner if there | 3283 | * When the ilb owner becomes busy, we will not have new ilb owner until some |
3235 | * is no other owner. And will be the owner till that cpu becomes busy | 3284 | * idle CPU wakes up and goes back to idle or some busy CPU tries to kick |
3236 | * or if all cpus in the system stop their ticks at which point | 3285 | * idle load balancing by kicking one of the idle CPUs. |
3237 | * there is no need for ilb owner. | ||
3238 | * | 3286 | * |
3239 | * When the ilb owner becomes busy, it nominates another owner, during the | 3287 | * Ticks are stopped for the ilb owner as well, with busy CPU kicking this |
3240 | * next busy scheduler_tick() | 3288 | * ilb owner CPU in future (when there is a need for idle load balancing on |
3289 | * behalf of all idle CPUs). | ||
3241 | */ | 3290 | */ |
3242 | int select_nohz_load_balancer(int stop_tick) | 3291 | void select_nohz_load_balancer(int stop_tick) |
3243 | { | 3292 | { |
3244 | int cpu = smp_processor_id(); | 3293 | int cpu = smp_processor_id(); |
3245 | 3294 | ||
3246 | if (stop_tick) { | 3295 | if (stop_tick) { |
3247 | cpu_rq(cpu)->in_nohz_recently = 1; | ||
3248 | |||
3249 | if (!cpu_active(cpu)) { | 3296 | if (!cpu_active(cpu)) { |
3250 | if (atomic_read(&nohz.load_balancer) != cpu) | 3297 | if (atomic_read(&nohz.load_balancer) != cpu) |
3251 | return 0; | 3298 | return; |
3252 | 3299 | ||
3253 | /* | 3300 | /* |
3254 | * If we are going offline and still the leader, | 3301 | * If we are going offline and still the leader, |
3255 | * give up! | 3302 | * give up! |
3256 | */ | 3303 | */ |
3257 | if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu) | 3304 | if (atomic_cmpxchg(&nohz.load_balancer, cpu, |
3305 | nr_cpu_ids) != cpu) | ||
3258 | BUG(); | 3306 | BUG(); |
3259 | 3307 | ||
3260 | return 0; | 3308 | return; |
3261 | } | 3309 | } |
3262 | 3310 | ||
3263 | cpumask_set_cpu(cpu, nohz.cpu_mask); | 3311 | cpumask_set_cpu(cpu, nohz.idle_cpus_mask); |
3264 | 3312 | ||
3265 | /* time for ilb owner also to sleep */ | 3313 | if (atomic_read(&nohz.first_pick_cpu) == cpu) |
3266 | if (cpumask_weight(nohz.cpu_mask) == num_active_cpus()) { | 3314 | atomic_cmpxchg(&nohz.first_pick_cpu, cpu, nr_cpu_ids); |
3267 | if (atomic_read(&nohz.load_balancer) == cpu) | 3315 | if (atomic_read(&nohz.second_pick_cpu) == cpu) |
3268 | atomic_set(&nohz.load_balancer, -1); | 3316 | atomic_cmpxchg(&nohz.second_pick_cpu, cpu, nr_cpu_ids); |
3269 | return 0; | ||
3270 | } | ||
3271 | 3317 | ||
3272 | if (atomic_read(&nohz.load_balancer) == -1) { | 3318 | if (atomic_read(&nohz.load_balancer) >= nr_cpu_ids) { |
3273 | /* make me the ilb owner */ | ||
3274 | if (atomic_cmpxchg(&nohz.load_balancer, -1, cpu) == -1) | ||
3275 | return 1; | ||
3276 | } else if (atomic_read(&nohz.load_balancer) == cpu) { | ||
3277 | int new_ilb; | 3319 | int new_ilb; |
3278 | 3320 | ||
3279 | if (!(sched_smt_power_savings || | 3321 | /* make me the ilb owner */ |
3280 | sched_mc_power_savings)) | 3322 | if (atomic_cmpxchg(&nohz.load_balancer, nr_cpu_ids, |
3281 | return 1; | 3323 | cpu) != nr_cpu_ids) |
3324 | return; | ||
3325 | |||
3282 | /* | 3326 | /* |
3283 | * Check to see if there is a more power-efficient | 3327 | * Check to see if there is a more power-efficient |
3284 | * ilb. | 3328 | * ilb. |
3285 | */ | 3329 | */ |
3286 | new_ilb = find_new_ilb(cpu); | 3330 | new_ilb = find_new_ilb(cpu); |
3287 | if (new_ilb < nr_cpu_ids && new_ilb != cpu) { | 3331 | if (new_ilb < nr_cpu_ids && new_ilb != cpu) { |
3288 | atomic_set(&nohz.load_balancer, -1); | 3332 | atomic_set(&nohz.load_balancer, nr_cpu_ids); |
3289 | resched_cpu(new_ilb); | 3333 | resched_cpu(new_ilb); |
3290 | return 0; | 3334 | return; |
3291 | } | 3335 | } |
3292 | return 1; | 3336 | return; |
3293 | } | 3337 | } |
3294 | } else { | 3338 | } else { |
3295 | if (!cpumask_test_cpu(cpu, nohz.cpu_mask)) | 3339 | if (!cpumask_test_cpu(cpu, nohz.idle_cpus_mask)) |
3296 | return 0; | 3340 | return; |
3297 | 3341 | ||
3298 | cpumask_clear_cpu(cpu, nohz.cpu_mask); | 3342 | cpumask_clear_cpu(cpu, nohz.idle_cpus_mask); |
3299 | 3343 | ||
3300 | if (atomic_read(&nohz.load_balancer) == cpu) | 3344 | if (atomic_read(&nohz.load_balancer) == cpu) |
3301 | if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu) | 3345 | if (atomic_cmpxchg(&nohz.load_balancer, cpu, |
3346 | nr_cpu_ids) != cpu) | ||
3302 | BUG(); | 3347 | BUG(); |
3303 | } | 3348 | } |
3304 | return 0; | 3349 | return; |
3305 | } | 3350 | } |
3306 | #endif | 3351 | #endif |
3307 | 3352 | ||
@@ -3383,11 +3428,101 @@ out: | |||
3383 | rq->next_balance = next_balance; | 3428 | rq->next_balance = next_balance; |
3384 | } | 3429 | } |
3385 | 3430 | ||
3431 | #ifdef CONFIG_NO_HZ | ||
3386 | /* | 3432 | /* |
3387 | * run_rebalance_domains is triggered when needed from the scheduler tick. | 3433 | * In CONFIG_NO_HZ case, the idle balance kickee will do the |
3388 | * In CONFIG_NO_HZ case, the idle load balance owner will do the | ||
3389 | * rebalancing for all the cpus for whom scheduler ticks are stopped. | 3434 | * rebalancing for all the cpus for whom scheduler ticks are stopped. |
3390 | */ | 3435 | */ |
3436 | static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) | ||
3437 | { | ||
3438 | struct rq *this_rq = cpu_rq(this_cpu); | ||
3439 | struct rq *rq; | ||
3440 | int balance_cpu; | ||
3441 | |||
3442 | if (idle != CPU_IDLE || !this_rq->nohz_balance_kick) | ||
3443 | return; | ||
3444 | |||
3445 | for_each_cpu(balance_cpu, nohz.idle_cpus_mask) { | ||
3446 | if (balance_cpu == this_cpu) | ||
3447 | continue; | ||
3448 | |||
3449 | /* | ||
3450 | * If this cpu gets work to do, stop the load balancing | ||
3451 | * work being done for other cpus. Next load | ||
3452 | * balancing owner will pick it up. | ||
3453 | */ | ||
3454 | if (need_resched()) { | ||
3455 | this_rq->nohz_balance_kick = 0; | ||
3456 | break; | ||
3457 | } | ||
3458 | |||
3459 | raw_spin_lock_irq(&this_rq->lock); | ||
3460 | update_cpu_load(this_rq); | ||
3461 | raw_spin_unlock_irq(&this_rq->lock); | ||
3462 | |||
3463 | rebalance_domains(balance_cpu, CPU_IDLE); | ||
3464 | |||
3465 | rq = cpu_rq(balance_cpu); | ||
3466 | if (time_after(this_rq->next_balance, rq->next_balance)) | ||
3467 | this_rq->next_balance = rq->next_balance; | ||
3468 | } | ||
3469 | nohz.next_balance = this_rq->next_balance; | ||
3470 | this_rq->nohz_balance_kick = 0; | ||
3471 | } | ||
3472 | |||
3473 | /* | ||
3474 | * Current heuristic for kicking the idle load balancer | ||
3475 | * - first_pick_cpu is the one of the busy CPUs. It will kick | ||
3476 | * idle load balancer when it has more than one process active. This | ||
3477 | * eliminates the need for idle load balancing altogether when we have | ||
3478 | * only one running process in the system (common case). | ||
3479 | * - If there are more than one busy CPU, idle load balancer may have | ||
3480 | * to run for active_load_balance to happen (i.e., two busy CPUs are | ||
3481 | * SMT or core siblings and can run better if they move to different | ||
3482 | * physical CPUs). So, second_pick_cpu is the second of the busy CPUs | ||
3483 | * which will kick idle load balancer as soon as it has any load. | ||
3484 | */ | ||
3485 | static inline int nohz_kick_needed(struct rq *rq, int cpu) | ||
3486 | { | ||
3487 | unsigned long now = jiffies; | ||
3488 | int ret; | ||
3489 | int first_pick_cpu, second_pick_cpu; | ||
3490 | |||
3491 | if (time_before(now, nohz.next_balance)) | ||
3492 | return 0; | ||
3493 | |||
3494 | if (!rq->nr_running) | ||
3495 | return 0; | ||
3496 | |||
3497 | first_pick_cpu = atomic_read(&nohz.first_pick_cpu); | ||
3498 | second_pick_cpu = atomic_read(&nohz.second_pick_cpu); | ||
3499 | |||
3500 | if (first_pick_cpu < nr_cpu_ids && first_pick_cpu != cpu && | ||
3501 | second_pick_cpu < nr_cpu_ids && second_pick_cpu != cpu) | ||
3502 | return 0; | ||
3503 | |||
3504 | ret = atomic_cmpxchg(&nohz.first_pick_cpu, nr_cpu_ids, cpu); | ||
3505 | if (ret == nr_cpu_ids || ret == cpu) { | ||
3506 | atomic_cmpxchg(&nohz.second_pick_cpu, cpu, nr_cpu_ids); | ||
3507 | if (rq->nr_running > 1) | ||
3508 | return 1; | ||
3509 | } else { | ||
3510 | ret = atomic_cmpxchg(&nohz.second_pick_cpu, nr_cpu_ids, cpu); | ||
3511 | if (ret == nr_cpu_ids || ret == cpu) { | ||
3512 | if (rq->nr_running) | ||
3513 | return 1; | ||
3514 | } | ||
3515 | } | ||
3516 | return 0; | ||
3517 | } | ||
3518 | #else | ||
3519 | static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) { } | ||
3520 | #endif | ||
3521 | |||
3522 | /* | ||
3523 | * run_rebalance_domains is triggered when needed from the scheduler tick. | ||
3524 | * Also triggered for nohz idle balancing (with nohz_balancing_kick set). | ||
3525 | */ | ||
3391 | static void run_rebalance_domains(struct softirq_action *h) | 3526 | static void run_rebalance_domains(struct softirq_action *h) |
3392 | { | 3527 | { |
3393 | int this_cpu = smp_processor_id(); | 3528 | int this_cpu = smp_processor_id(); |
@@ -3397,40 +3532,12 @@ static void run_rebalance_domains(struct softirq_action *h) | |||
3397 | 3532 | ||
3398 | rebalance_domains(this_cpu, idle); | 3533 | rebalance_domains(this_cpu, idle); |
3399 | 3534 | ||
3400 | #ifdef CONFIG_NO_HZ | ||
3401 | /* | 3535 | /* |
3402 | * If this cpu is the owner for idle load balancing, then do the | 3536 | * If this cpu has a pending nohz_balance_kick, then do the |
3403 | * balancing on behalf of the other idle cpus whose ticks are | 3537 | * balancing on behalf of the other idle cpus whose ticks are |
3404 | * stopped. | 3538 | * stopped. |
3405 | */ | 3539 | */ |
3406 | if (this_rq->idle_at_tick && | 3540 | nohz_idle_balance(this_cpu, idle); |
3407 | atomic_read(&nohz.load_balancer) == this_cpu) { | ||
3408 | struct rq *rq; | ||
3409 | int balance_cpu; | ||
3410 | |||
3411 | for_each_cpu(balance_cpu, nohz.cpu_mask) { | ||
3412 | if (balance_cpu == this_cpu) | ||
3413 | continue; | ||
3414 | |||
3415 | /* | ||
3416 | * If this cpu gets work to do, stop the load balancing | ||
3417 | * work being done for other cpus. Next load | ||
3418 | * balancing owner will pick it up. | ||
3419 | */ | ||
3420 | if (need_resched()) | ||
3421 | break; | ||
3422 | |||
3423 | rq = cpu_rq(balance_cpu); | ||
3424 | raw_spin_lock_irq(&rq->lock); | ||
3425 | update_cpu_load(rq); | ||
3426 | raw_spin_unlock_irq(&rq->lock); | ||
3427 | rebalance_domains(balance_cpu, CPU_IDLE); | ||
3428 | |||
3429 | if (time_after(this_rq->next_balance, rq->next_balance)) | ||
3430 | this_rq->next_balance = rq->next_balance; | ||
3431 | } | ||
3432 | } | ||
3433 | #endif | ||
3434 | } | 3541 | } |
3435 | 3542 | ||
3436 | static inline int on_null_domain(int cpu) | 3543 | static inline int on_null_domain(int cpu) |
@@ -3440,57 +3547,17 @@ static inline int on_null_domain(int cpu) | |||
3440 | 3547 | ||
3441 | /* | 3548 | /* |
3442 | * Trigger the SCHED_SOFTIRQ if it is time to do periodic load balancing. | 3549 | * Trigger the SCHED_SOFTIRQ if it is time to do periodic load balancing. |
3443 | * | ||
3444 | * In case of CONFIG_NO_HZ, this is the place where we nominate a new | ||
3445 | * idle load balancing owner or decide to stop the periodic load balancing, | ||
3446 | * if the whole system is idle. | ||
3447 | */ | 3550 | */ |
3448 | static inline void trigger_load_balance(struct rq *rq, int cpu) | 3551 | static inline void trigger_load_balance(struct rq *rq, int cpu) |
3449 | { | 3552 | { |
3450 | #ifdef CONFIG_NO_HZ | ||
3451 | /* | ||
3452 | * If we were in the nohz mode recently and busy at the current | ||
3453 | * scheduler tick, then check if we need to nominate new idle | ||
3454 | * load balancer. | ||
3455 | */ | ||
3456 | if (rq->in_nohz_recently && !rq->idle_at_tick) { | ||
3457 | rq->in_nohz_recently = 0; | ||
3458 | |||
3459 | if (atomic_read(&nohz.load_balancer) == cpu) { | ||
3460 | cpumask_clear_cpu(cpu, nohz.cpu_mask); | ||
3461 | atomic_set(&nohz.load_balancer, -1); | ||
3462 | } | ||
3463 | |||
3464 | if (atomic_read(&nohz.load_balancer) == -1) { | ||
3465 | int ilb = find_new_ilb(cpu); | ||
3466 | |||
3467 | if (ilb < nr_cpu_ids) | ||
3468 | resched_cpu(ilb); | ||
3469 | } | ||
3470 | } | ||
3471 | |||
3472 | /* | ||
3473 | * If this cpu is idle and doing idle load balancing for all the | ||
3474 | * cpus with ticks stopped, is it time for that to stop? | ||
3475 | */ | ||
3476 | if (rq->idle_at_tick && atomic_read(&nohz.load_balancer) == cpu && | ||
3477 | cpumask_weight(nohz.cpu_mask) == num_online_cpus()) { | ||
3478 | resched_cpu(cpu); | ||
3479 | return; | ||
3480 | } | ||
3481 | |||
3482 | /* | ||
3483 | * If this cpu is idle and the idle load balancing is done by | ||
3484 | * someone else, then no need raise the SCHED_SOFTIRQ | ||
3485 | */ | ||
3486 | if (rq->idle_at_tick && atomic_read(&nohz.load_balancer) != cpu && | ||
3487 | cpumask_test_cpu(cpu, nohz.cpu_mask)) | ||
3488 | return; | ||
3489 | #endif | ||
3490 | /* Don't need to rebalance while attached to NULL domain */ | 3553 | /* Don't need to rebalance while attached to NULL domain */ |
3491 | if (time_after_eq(jiffies, rq->next_balance) && | 3554 | if (time_after_eq(jiffies, rq->next_balance) && |
3492 | likely(!on_null_domain(cpu))) | 3555 | likely(!on_null_domain(cpu))) |
3493 | raise_softirq(SCHED_SOFTIRQ); | 3556 | raise_softirq(SCHED_SOFTIRQ); |
3557 | #ifdef CONFIG_NO_HZ | ||
3558 | else if (nohz_kick_needed(rq, cpu) && likely(!on_null_domain(cpu))) | ||
3559 | nohz_balancer_kick(cpu); | ||
3560 | #endif | ||
3494 | } | 3561 | } |
3495 | 3562 | ||
3496 | static void rq_online_fair(struct rq *rq) | 3563 | static void rq_online_fair(struct rq *rq) |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 1d7b9bc1c034..5f171f04ab00 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -408,13 +408,7 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
408 | * the scheduler tick in nohz_restart_sched_tick. | 408 | * the scheduler tick in nohz_restart_sched_tick. |
409 | */ | 409 | */ |
410 | if (!ts->tick_stopped) { | 410 | if (!ts->tick_stopped) { |
411 | if (select_nohz_load_balancer(1)) { | 411 | select_nohz_load_balancer(1); |
412 | /* | ||
413 | * sched tick not stopped! | ||
414 | */ | ||
415 | cpumask_clear_cpu(cpu, nohz_cpu_mask); | ||
416 | goto out; | ||
417 | } | ||
418 | 412 | ||
419 | ts->idle_tick = hrtimer_get_expires(&ts->sched_timer); | 413 | ts->idle_tick = hrtimer_get_expires(&ts->sched_timer); |
420 | ts->tick_stopped = 1; | 414 | ts->tick_stopped = 1; |
diff --git a/kernel/timer.c b/kernel/timer.c index ee305c8d4e18..48d6aec0789c 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -679,12 +679,8 @@ __mod_timer(struct timer_list *timer, unsigned long expires, | |||
679 | cpu = smp_processor_id(); | 679 | cpu = smp_processor_id(); |
680 | 680 | ||
681 | #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) | 681 | #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) |
682 | if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) { | 682 | if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) |
683 | int preferred_cpu = get_nohz_load_balancer(); | 683 | cpu = get_nohz_timer_target(); |
684 | |||
685 | if (preferred_cpu >= 0) | ||
686 | cpu = preferred_cpu; | ||
687 | } | ||
688 | #endif | 684 | #endif |
689 | new_base = per_cpu(tvec_bases, cpu); | 685 | new_base = per_cpu(tvec_bases, cpu); |
690 | 686 | ||