diff options
-rw-r--r-- | include/linux/sched.h | 5 | ||||
-rw-r--r-- | kernel/sched.c | 73 | ||||
-rw-r--r-- | kernel/sched_fair.c | 15 |
3 files changed, 63 insertions, 30 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index b1b8d84c5805..0eef87b58ea5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -870,7 +870,10 @@ static inline int sd_balance_for_mc_power(void) | |||
870 | if (sched_smt_power_savings) | 870 | if (sched_smt_power_savings) |
871 | return SD_POWERSAVINGS_BALANCE; | 871 | return SD_POWERSAVINGS_BALANCE; |
872 | 872 | ||
873 | return SD_PREFER_SIBLING; | 873 | if (!sched_mc_power_savings) |
874 | return SD_PREFER_SIBLING; | ||
875 | |||
876 | return 0; | ||
874 | } | 877 | } |
875 | 878 | ||
876 | static inline int sd_balance_for_package_power(void) | 879 | static inline int sd_balance_for_package_power(void) |
diff --git a/kernel/sched.c b/kernel/sched.c index bae6fcfe6d75..af5fa239804d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -898,16 +898,33 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) | |||
898 | #endif /* __ARCH_WANT_UNLOCKED_CTXSW */ | 898 | #endif /* __ARCH_WANT_UNLOCKED_CTXSW */ |
899 | 899 | ||
900 | /* | 900 | /* |
901 | * Check whether the task is waking, we use this to synchronize against | ||
902 | * ttwu() so that task_cpu() reports a stable number. | ||
903 | * | ||
904 | * We need to make an exception for PF_STARTING tasks because the fork | ||
905 | * path might require task_rq_lock() to work, eg. it can call | ||
906 | * set_cpus_allowed_ptr() from the cpuset clone_ns code. | ||
907 | */ | ||
908 | static inline int task_is_waking(struct task_struct *p) | ||
909 | { | ||
910 | return unlikely((p->state == TASK_WAKING) && !(p->flags & PF_STARTING)); | ||
911 | } | ||
912 | |||
913 | /* | ||
901 | * __task_rq_lock - lock the runqueue a given task resides on. | 914 | * __task_rq_lock - lock the runqueue a given task resides on. |
902 | * Must be called interrupts disabled. | 915 | * Must be called interrupts disabled. |
903 | */ | 916 | */ |
904 | static inline struct rq *__task_rq_lock(struct task_struct *p) | 917 | static inline struct rq *__task_rq_lock(struct task_struct *p) |
905 | __acquires(rq->lock) | 918 | __acquires(rq->lock) |
906 | { | 919 | { |
920 | struct rq *rq; | ||
921 | |||
907 | for (;;) { | 922 | for (;;) { |
908 | struct rq *rq = task_rq(p); | 923 | while (task_is_waking(p)) |
924 | cpu_relax(); | ||
925 | rq = task_rq(p); | ||
909 | raw_spin_lock(&rq->lock); | 926 | raw_spin_lock(&rq->lock); |
910 | if (likely(rq == task_rq(p))) | 927 | if (likely(rq == task_rq(p) && !task_is_waking(p))) |
911 | return rq; | 928 | return rq; |
912 | raw_spin_unlock(&rq->lock); | 929 | raw_spin_unlock(&rq->lock); |
913 | } | 930 | } |
@@ -924,10 +941,12 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) | |||
924 | struct rq *rq; | 941 | struct rq *rq; |
925 | 942 | ||
926 | for (;;) { | 943 | for (;;) { |
944 | while (task_is_waking(p)) | ||
945 | cpu_relax(); | ||
927 | local_irq_save(*flags); | 946 | local_irq_save(*flags); |
928 | rq = task_rq(p); | 947 | rq = task_rq(p); |
929 | raw_spin_lock(&rq->lock); | 948 | raw_spin_lock(&rq->lock); |
930 | if (likely(rq == task_rq(p))) | 949 | if (likely(rq == task_rq(p) && !task_is_waking(p))) |
931 | return rq; | 950 | return rq; |
932 | raw_spin_unlock_irqrestore(&rq->lock, *flags); | 951 | raw_spin_unlock_irqrestore(&rq->lock, *flags); |
933 | } | 952 | } |
@@ -2374,14 +2393,27 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, | |||
2374 | __task_rq_unlock(rq); | 2393 | __task_rq_unlock(rq); |
2375 | 2394 | ||
2376 | cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags); | 2395 | cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags); |
2377 | if (cpu != orig_cpu) | 2396 | if (cpu != orig_cpu) { |
2397 | /* | ||
2398 | * Since we migrate the task without holding any rq->lock, | ||
2399 | * we need to be careful with task_rq_lock(), since that | ||
2400 | * might end up locking an invalid rq. | ||
2401 | */ | ||
2378 | set_task_cpu(p, cpu); | 2402 | set_task_cpu(p, cpu); |
2403 | } | ||
2379 | 2404 | ||
2380 | rq = __task_rq_lock(p); | 2405 | rq = cpu_rq(cpu); |
2406 | raw_spin_lock(&rq->lock); | ||
2381 | update_rq_clock(rq); | 2407 | update_rq_clock(rq); |
2382 | 2408 | ||
2409 | /* | ||
2410 | * We migrated the task without holding either rq->lock, however | ||
2411 | * since the task is not on the task list itself, nobody else | ||
2412 | * will try and migrate the task, hence the rq should match the | ||
2413 | * cpu we just moved it to. | ||
2414 | */ | ||
2415 | WARN_ON(task_cpu(p) != cpu); | ||
2383 | WARN_ON(p->state != TASK_WAKING); | 2416 | WARN_ON(p->state != TASK_WAKING); |
2384 | cpu = task_cpu(p); | ||
2385 | 2417 | ||
2386 | #ifdef CONFIG_SCHEDSTATS | 2418 | #ifdef CONFIG_SCHEDSTATS |
2387 | schedstat_inc(rq, ttwu_count); | 2419 | schedstat_inc(rq, ttwu_count); |
@@ -2613,7 +2645,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
2613 | { | 2645 | { |
2614 | unsigned long flags; | 2646 | unsigned long flags; |
2615 | struct rq *rq; | 2647 | struct rq *rq; |
2616 | int cpu __maybe_unused = get_cpu(); | 2648 | int cpu = get_cpu(); |
2617 | 2649 | ||
2618 | #ifdef CONFIG_SMP | 2650 | #ifdef CONFIG_SMP |
2619 | /* | 2651 | /* |
@@ -2629,7 +2661,13 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
2629 | set_task_cpu(p, cpu); | 2661 | set_task_cpu(p, cpu); |
2630 | #endif | 2662 | #endif |
2631 | 2663 | ||
2632 | rq = task_rq_lock(p, &flags); | 2664 | /* |
2665 | * Since the task is not on the rq and we still have TASK_WAKING set | ||
2666 | * nobody else will migrate this task. | ||
2667 | */ | ||
2668 | rq = cpu_rq(cpu); | ||
2669 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
2670 | |||
2633 | BUG_ON(p->state != TASK_WAKING); | 2671 | BUG_ON(p->state != TASK_WAKING); |
2634 | p->state = TASK_RUNNING; | 2672 | p->state = TASK_RUNNING; |
2635 | update_rq_clock(rq); | 2673 | update_rq_clock(rq); |
@@ -5302,27 +5340,8 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) | |||
5302 | struct rq *rq; | 5340 | struct rq *rq; |
5303 | int ret = 0; | 5341 | int ret = 0; |
5304 | 5342 | ||
5305 | /* | ||
5306 | * Since we rely on wake-ups to migrate sleeping tasks, don't change | ||
5307 | * the ->cpus_allowed mask from under waking tasks, which would be | ||
5308 | * possible when we change rq->lock in ttwu(), so synchronize against | ||
5309 | * TASK_WAKING to avoid that. | ||
5310 | * | ||
5311 | * Make an exception for freshly cloned tasks, since cpuset namespaces | ||
5312 | * might move the task about, we have to validate the target in | ||
5313 | * wake_up_new_task() anyway since the cpu might have gone away. | ||
5314 | */ | ||
5315 | again: | ||
5316 | while (p->state == TASK_WAKING && !(p->flags & PF_STARTING)) | ||
5317 | cpu_relax(); | ||
5318 | |||
5319 | rq = task_rq_lock(p, &flags); | 5343 | rq = task_rq_lock(p, &flags); |
5320 | 5344 | ||
5321 | if (p->state == TASK_WAKING && !(p->flags & PF_STARTING)) { | ||
5322 | task_rq_unlock(rq, &flags); | ||
5323 | goto again; | ||
5324 | } | ||
5325 | |||
5326 | if (!cpumask_intersects(new_mask, cpu_active_mask)) { | 5345 | if (!cpumask_intersects(new_mask, cpu_active_mask)) { |
5327 | ret = -EINVAL; | 5346 | ret = -EINVAL; |
5328 | goto out; | 5347 | goto out; |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index b45abbe55067..ff7692ccda89 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -2792,12 +2792,23 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle, | |||
2792 | continue; | 2792 | continue; |
2793 | 2793 | ||
2794 | rq = cpu_rq(i); | 2794 | rq = cpu_rq(i); |
2795 | wl = weighted_cpuload(i) * SCHED_LOAD_SCALE; | 2795 | wl = weighted_cpuload(i); |
2796 | wl /= power; | ||
2797 | 2796 | ||
2797 | /* | ||
2798 | * When comparing with imbalance, use weighted_cpuload() | ||
2799 | * which is not scaled with the cpu power. | ||
2800 | */ | ||
2798 | if (capacity && rq->nr_running == 1 && wl > imbalance) | 2801 | if (capacity && rq->nr_running == 1 && wl > imbalance) |
2799 | continue; | 2802 | continue; |
2800 | 2803 | ||
2804 | /* | ||
2805 | * For the load comparisons with the other cpu's, consider | ||
2806 | * the weighted_cpuload() scaled with the cpu power, so that | ||
2807 | * the load can be moved away from the cpu that is potentially | ||
2808 | * running at a lower capacity. | ||
2809 | */ | ||
2810 | wl = (wl * SCHED_LOAD_SCALE) / power; | ||
2811 | |||
2801 | if (wl > max_load) { | 2812 | if (wl > max_load) { |
2802 | max_load = wl; | 2813 | max_load = wl; |
2803 | busiest = rq; | 2814 | busiest = rq; |