aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-02-28 13:23:41 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2010-02-28 13:23:41 -0500
commit2531216f236cb2a1f39ffa12a4a9339541e52191 (patch)
tree74ca94a99a00ebca463ca67458b8099728cece15
parent6556a6743549defc32e5f90ee2cb1ecd833a44c3 (diff)
parent0970d2992dfd7d5ec2c787417cf464f01eeaf42a (diff)
Merge branch 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: sched: Fix race between ttwu() and task_rq_lock() sched: Fix SMT scheduler regression in find_busiest_queue() sched: Fix sched_mv_power_savings for !SMT kernel/sched.c: Suppress unused var warning
-rw-r--r--include/linux/sched.h5
-rw-r--r--kernel/sched.c86
2 files changed, 62 insertions, 29 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 78efe7c485ac..1f5fa53b46b1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -878,7 +878,10 @@ static inline int sd_balance_for_mc_power(void)
878 if (sched_smt_power_savings) 878 if (sched_smt_power_savings)
879 return SD_POWERSAVINGS_BALANCE; 879 return SD_POWERSAVINGS_BALANCE;
880 880
881 return SD_PREFER_SIBLING; 881 if (!sched_mc_power_savings)
882 return SD_PREFER_SIBLING;
883
884 return 0;
882} 885}
883 886
884static inline int sd_balance_for_package_power(void) 887static inline int sd_balance_for_package_power(void)
diff --git a/kernel/sched.c b/kernel/sched.c
index 9d163f83e5c3..caf54e1eef6e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -946,16 +946,33 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
946#endif /* __ARCH_WANT_UNLOCKED_CTXSW */ 946#endif /* __ARCH_WANT_UNLOCKED_CTXSW */
947 947
948/* 948/*
949 * Check whether the task is waking, we use this to synchronize against
950 * ttwu() so that task_cpu() reports a stable number.
951 *
952 * We need to make an exception for PF_STARTING tasks because the fork
953 * path might require task_rq_lock() to work, eg. it can call
954 * set_cpus_allowed_ptr() from the cpuset clone_ns code.
955 */
956static inline int task_is_waking(struct task_struct *p)
957{
958 return unlikely((p->state == TASK_WAKING) && !(p->flags & PF_STARTING));
959}
960
961/*
949 * __task_rq_lock - lock the runqueue a given task resides on. 962 * __task_rq_lock - lock the runqueue a given task resides on.
950 * Must be called interrupts disabled. 963 * Must be called interrupts disabled.
951 */ 964 */
952static inline struct rq *__task_rq_lock(struct task_struct *p) 965static inline struct rq *__task_rq_lock(struct task_struct *p)
953 __acquires(rq->lock) 966 __acquires(rq->lock)
954{ 967{
968 struct rq *rq;
969
955 for (;;) { 970 for (;;) {
956 struct rq *rq = task_rq(p); 971 while (task_is_waking(p))
972 cpu_relax();
973 rq = task_rq(p);
957 raw_spin_lock(&rq->lock); 974 raw_spin_lock(&rq->lock);
958 if (likely(rq == task_rq(p))) 975 if (likely(rq == task_rq(p) && !task_is_waking(p)))
959 return rq; 976 return rq;
960 raw_spin_unlock(&rq->lock); 977 raw_spin_unlock(&rq->lock);
961 } 978 }
@@ -972,10 +989,12 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
972 struct rq *rq; 989 struct rq *rq;
973 990
974 for (;;) { 991 for (;;) {
992 while (task_is_waking(p))
993 cpu_relax();
975 local_irq_save(*flags); 994 local_irq_save(*flags);
976 rq = task_rq(p); 995 rq = task_rq(p);
977 raw_spin_lock(&rq->lock); 996 raw_spin_lock(&rq->lock);
978 if (likely(rq == task_rq(p))) 997 if (likely(rq == task_rq(p) && !task_is_waking(p)))
979 return rq; 998 return rq;
980 raw_spin_unlock_irqrestore(&rq->lock, *flags); 999 raw_spin_unlock_irqrestore(&rq->lock, *flags);
981 } 1000 }
@@ -2413,14 +2432,27 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
2413 __task_rq_unlock(rq); 2432 __task_rq_unlock(rq);
2414 2433
2415 cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags); 2434 cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
2416 if (cpu != orig_cpu) 2435 if (cpu != orig_cpu) {
2436 /*
2437 * Since we migrate the task without holding any rq->lock,
2438 * we need to be careful with task_rq_lock(), since that
2439 * might end up locking an invalid rq.
2440 */
2417 set_task_cpu(p, cpu); 2441 set_task_cpu(p, cpu);
2442 }
2418 2443
2419 rq = __task_rq_lock(p); 2444 rq = cpu_rq(cpu);
2445 raw_spin_lock(&rq->lock);
2420 update_rq_clock(rq); 2446 update_rq_clock(rq);
2421 2447
2448 /*
2449 * We migrated the task without holding either rq->lock, however
2450 * since the task is not on the task list itself, nobody else
2451 * will try and migrate the task, hence the rq should match the
2452 * cpu we just moved it to.
2453 */
2454 WARN_ON(task_cpu(p) != cpu);
2422 WARN_ON(p->state != TASK_WAKING); 2455 WARN_ON(p->state != TASK_WAKING);
2423 cpu = task_cpu(p);
2424 2456
2425#ifdef CONFIG_SCHEDSTATS 2457#ifdef CONFIG_SCHEDSTATS
2426 schedstat_inc(rq, ttwu_count); 2458 schedstat_inc(rq, ttwu_count);
@@ -2668,7 +2700,13 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
2668 set_task_cpu(p, cpu); 2700 set_task_cpu(p, cpu);
2669#endif 2701#endif
2670 2702
2671 rq = task_rq_lock(p, &flags); 2703 /*
2704 * Since the task is not on the rq and we still have TASK_WAKING set
2705 * nobody else will migrate this task.
2706 */
2707 rq = cpu_rq(cpu);
2708 raw_spin_lock_irqsave(&rq->lock, flags);
2709
2672 BUG_ON(p->state != TASK_WAKING); 2710 BUG_ON(p->state != TASK_WAKING);
2673 p->state = TASK_RUNNING; 2711 p->state = TASK_RUNNING;
2674 update_rq_clock(rq); 2712 update_rq_clock(rq);
@@ -4130,12 +4168,23 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
4130 continue; 4168 continue;
4131 4169
4132 rq = cpu_rq(i); 4170 rq = cpu_rq(i);
4133 wl = weighted_cpuload(i) * SCHED_LOAD_SCALE; 4171 wl = weighted_cpuload(i);
4134 wl /= power;
4135 4172
4173 /*
4174 * When comparing with imbalance, use weighted_cpuload()
4175 * which is not scaled with the cpu power.
4176 */
4136 if (capacity && rq->nr_running == 1 && wl > imbalance) 4177 if (capacity && rq->nr_running == 1 && wl > imbalance)
4137 continue; 4178 continue;
4138 4179
4180 /*
4181 * For the load comparisons with the other cpu's, consider
4182 * the weighted_cpuload() scaled with the cpu power, so that
4183 * the load can be moved away from the cpu that is potentially
4184 * running at a lower capacity.
4185 */
4186 wl = (wl * SCHED_LOAD_SCALE) / power;
4187
4139 if (wl > max_load) { 4188 if (wl > max_load) {
4140 max_load = wl; 4189 max_load = wl;
4141 busiest = rq; 4190 busiest = rq;
@@ -7156,27 +7205,8 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
7156 struct rq *rq; 7205 struct rq *rq;
7157 int ret = 0; 7206 int ret = 0;
7158 7207
7159 /*
7160 * Since we rely on wake-ups to migrate sleeping tasks, don't change
7161 * the ->cpus_allowed mask from under waking tasks, which would be
7162 * possible when we change rq->lock in ttwu(), so synchronize against
7163 * TASK_WAKING to avoid that.
7164 *
7165 * Make an exception for freshly cloned tasks, since cpuset namespaces
7166 * might move the task about, we have to validate the target in
7167 * wake_up_new_task() anyway since the cpu might have gone away.
7168 */
7169again:
7170 while (p->state == TASK_WAKING && !(p->flags & PF_STARTING))
7171 cpu_relax();
7172
7173 rq = task_rq_lock(p, &flags); 7208 rq = task_rq_lock(p, &flags);
7174 7209
7175 if (p->state == TASK_WAKING && !(p->flags & PF_STARTING)) {
7176 task_rq_unlock(rq, &flags);
7177 goto again;
7178 }
7179
7180 if (!cpumask_intersects(new_mask, cpu_active_mask)) { 7210 if (!cpumask_intersects(new_mask, cpu_active_mask)) {
7181 ret = -EINVAL; 7211 ret = -EINVAL;
7182 goto out; 7212 goto out;