diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-02-28 13:23:41 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-02-28 13:23:41 -0500 |
commit | 2531216f236cb2a1f39ffa12a4a9339541e52191 (patch) | |
tree | 74ca94a99a00ebca463ca67458b8099728cece15 | |
parent | 6556a6743549defc32e5f90ee2cb1ecd833a44c3 (diff) | |
parent | 0970d2992dfd7d5ec2c787417cf464f01eeaf42a (diff) |
Merge branch 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
sched: Fix race between ttwu() and task_rq_lock()
sched: Fix SMT scheduler regression in find_busiest_queue()
sched: Fix sched_mv_power_savings for !SMT
kernel/sched.c: Suppress unused var warning
-rw-r--r-- | include/linux/sched.h | 5 | ||||
-rw-r--r-- | kernel/sched.c | 86 |
2 files changed, 62 insertions, 29 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index 78efe7c485ac..1f5fa53b46b1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -878,7 +878,10 @@ static inline int sd_balance_for_mc_power(void) | |||
878 | if (sched_smt_power_savings) | 878 | if (sched_smt_power_savings) |
879 | return SD_POWERSAVINGS_BALANCE; | 879 | return SD_POWERSAVINGS_BALANCE; |
880 | 880 | ||
881 | return SD_PREFER_SIBLING; | 881 | if (!sched_mc_power_savings) |
882 | return SD_PREFER_SIBLING; | ||
883 | |||
884 | return 0; | ||
882 | } | 885 | } |
883 | 886 | ||
884 | static inline int sd_balance_for_package_power(void) | 887 | static inline int sd_balance_for_package_power(void) |
diff --git a/kernel/sched.c b/kernel/sched.c index 9d163f83e5c3..caf54e1eef6e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -946,16 +946,33 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) | |||
946 | #endif /* __ARCH_WANT_UNLOCKED_CTXSW */ | 946 | #endif /* __ARCH_WANT_UNLOCKED_CTXSW */ |
947 | 947 | ||
948 | /* | 948 | /* |
949 | * Check whether the task is waking, we use this to synchronize against | ||
950 | * ttwu() so that task_cpu() reports a stable number. | ||
951 | * | ||
952 | * We need to make an exception for PF_STARTING tasks because the fork | ||
953 | * path might require task_rq_lock() to work, eg. it can call | ||
954 | * set_cpus_allowed_ptr() from the cpuset clone_ns code. | ||
955 | */ | ||
956 | static inline int task_is_waking(struct task_struct *p) | ||
957 | { | ||
958 | return unlikely((p->state == TASK_WAKING) && !(p->flags & PF_STARTING)); | ||
959 | } | ||
960 | |||
961 | /* | ||
949 | * __task_rq_lock - lock the runqueue a given task resides on. | 962 | * __task_rq_lock - lock the runqueue a given task resides on. |
950 | * Must be called interrupts disabled. | 963 | * Must be called interrupts disabled. |
951 | */ | 964 | */ |
952 | static inline struct rq *__task_rq_lock(struct task_struct *p) | 965 | static inline struct rq *__task_rq_lock(struct task_struct *p) |
953 | __acquires(rq->lock) | 966 | __acquires(rq->lock) |
954 | { | 967 | { |
968 | struct rq *rq; | ||
969 | |||
955 | for (;;) { | 970 | for (;;) { |
956 | struct rq *rq = task_rq(p); | 971 | while (task_is_waking(p)) |
972 | cpu_relax(); | ||
973 | rq = task_rq(p); | ||
957 | raw_spin_lock(&rq->lock); | 974 | raw_spin_lock(&rq->lock); |
958 | if (likely(rq == task_rq(p))) | 975 | if (likely(rq == task_rq(p) && !task_is_waking(p))) |
959 | return rq; | 976 | return rq; |
960 | raw_spin_unlock(&rq->lock); | 977 | raw_spin_unlock(&rq->lock); |
961 | } | 978 | } |
@@ -972,10 +989,12 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) | |||
972 | struct rq *rq; | 989 | struct rq *rq; |
973 | 990 | ||
974 | for (;;) { | 991 | for (;;) { |
992 | while (task_is_waking(p)) | ||
993 | cpu_relax(); | ||
975 | local_irq_save(*flags); | 994 | local_irq_save(*flags); |
976 | rq = task_rq(p); | 995 | rq = task_rq(p); |
977 | raw_spin_lock(&rq->lock); | 996 | raw_spin_lock(&rq->lock); |
978 | if (likely(rq == task_rq(p))) | 997 | if (likely(rq == task_rq(p) && !task_is_waking(p))) |
979 | return rq; | 998 | return rq; |
980 | raw_spin_unlock_irqrestore(&rq->lock, *flags); | 999 | raw_spin_unlock_irqrestore(&rq->lock, *flags); |
981 | } | 1000 | } |
@@ -2413,14 +2432,27 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, | |||
2413 | __task_rq_unlock(rq); | 2432 | __task_rq_unlock(rq); |
2414 | 2433 | ||
2415 | cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags); | 2434 | cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags); |
2416 | if (cpu != orig_cpu) | 2435 | if (cpu != orig_cpu) { |
2436 | /* | ||
2437 | * Since we migrate the task without holding any rq->lock, | ||
2438 | * we need to be careful with task_rq_lock(), since that | ||
2439 | * might end up locking an invalid rq. | ||
2440 | */ | ||
2417 | set_task_cpu(p, cpu); | 2441 | set_task_cpu(p, cpu); |
2442 | } | ||
2418 | 2443 | ||
2419 | rq = __task_rq_lock(p); | 2444 | rq = cpu_rq(cpu); |
2445 | raw_spin_lock(&rq->lock); | ||
2420 | update_rq_clock(rq); | 2446 | update_rq_clock(rq); |
2421 | 2447 | ||
2448 | /* | ||
2449 | * We migrated the task without holding either rq->lock, however | ||
2450 | * since the task is not on the task list itself, nobody else | ||
2451 | * will try and migrate the task, hence the rq should match the | ||
2452 | * cpu we just moved it to. | ||
2453 | */ | ||
2454 | WARN_ON(task_cpu(p) != cpu); | ||
2422 | WARN_ON(p->state != TASK_WAKING); | 2455 | WARN_ON(p->state != TASK_WAKING); |
2423 | cpu = task_cpu(p); | ||
2424 | 2456 | ||
2425 | #ifdef CONFIG_SCHEDSTATS | 2457 | #ifdef CONFIG_SCHEDSTATS |
2426 | schedstat_inc(rq, ttwu_count); | 2458 | schedstat_inc(rq, ttwu_count); |
@@ -2668,7 +2700,13 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
2668 | set_task_cpu(p, cpu); | 2700 | set_task_cpu(p, cpu); |
2669 | #endif | 2701 | #endif |
2670 | 2702 | ||
2671 | rq = task_rq_lock(p, &flags); | 2703 | /* |
2704 | * Since the task is not on the rq and we still have TASK_WAKING set | ||
2705 | * nobody else will migrate this task. | ||
2706 | */ | ||
2707 | rq = cpu_rq(cpu); | ||
2708 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
2709 | |||
2672 | BUG_ON(p->state != TASK_WAKING); | 2710 | BUG_ON(p->state != TASK_WAKING); |
2673 | p->state = TASK_RUNNING; | 2711 | p->state = TASK_RUNNING; |
2674 | update_rq_clock(rq); | 2712 | update_rq_clock(rq); |
@@ -4130,12 +4168,23 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle, | |||
4130 | continue; | 4168 | continue; |
4131 | 4169 | ||
4132 | rq = cpu_rq(i); | 4170 | rq = cpu_rq(i); |
4133 | wl = weighted_cpuload(i) * SCHED_LOAD_SCALE; | 4171 | wl = weighted_cpuload(i); |
4134 | wl /= power; | ||
4135 | 4172 | ||
4173 | /* | ||
4174 | * When comparing with imbalance, use weighted_cpuload() | ||
4175 | * which is not scaled with the cpu power. | ||
4176 | */ | ||
4136 | if (capacity && rq->nr_running == 1 && wl > imbalance) | 4177 | if (capacity && rq->nr_running == 1 && wl > imbalance) |
4137 | continue; | 4178 | continue; |
4138 | 4179 | ||
4180 | /* | ||
4181 | * For the load comparisons with the other cpu's, consider | ||
4182 | * the weighted_cpuload() scaled with the cpu power, so that | ||
4183 | * the load can be moved away from the cpu that is potentially | ||
4184 | * running at a lower capacity. | ||
4185 | */ | ||
4186 | wl = (wl * SCHED_LOAD_SCALE) / power; | ||
4187 | |||
4139 | if (wl > max_load) { | 4188 | if (wl > max_load) { |
4140 | max_load = wl; | 4189 | max_load = wl; |
4141 | busiest = rq; | 4190 | busiest = rq; |
@@ -7156,27 +7205,8 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) | |||
7156 | struct rq *rq; | 7205 | struct rq *rq; |
7157 | int ret = 0; | 7206 | int ret = 0; |
7158 | 7207 | ||
7159 | /* | ||
7160 | * Since we rely on wake-ups to migrate sleeping tasks, don't change | ||
7161 | * the ->cpus_allowed mask from under waking tasks, which would be | ||
7162 | * possible when we change rq->lock in ttwu(), so synchronize against | ||
7163 | * TASK_WAKING to avoid that. | ||
7164 | * | ||
7165 | * Make an exception for freshly cloned tasks, since cpuset namespaces | ||
7166 | * might move the task about, we have to validate the target in | ||
7167 | * wake_up_new_task() anyway since the cpu might have gone away. | ||
7168 | */ | ||
7169 | again: | ||
7170 | while (p->state == TASK_WAKING && !(p->flags & PF_STARTING)) | ||
7171 | cpu_relax(); | ||
7172 | |||
7173 | rq = task_rq_lock(p, &flags); | 7208 | rq = task_rq_lock(p, &flags); |
7174 | 7209 | ||
7175 | if (p->state == TASK_WAKING && !(p->flags & PF_STARTING)) { | ||
7176 | task_rq_unlock(rq, &flags); | ||
7177 | goto again; | ||
7178 | } | ||
7179 | |||
7180 | if (!cpumask_intersects(new_mask, cpu_active_mask)) { | 7210 | if (!cpumask_intersects(new_mask, cpu_active_mask)) { |
7181 | ret = -EINVAL; | 7211 | ret = -EINVAL; |
7182 | goto out; | 7212 | goto out; |