aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched.c73
-rw-r--r--kernel/sched_fair.c15
2 files changed, 59 insertions, 29 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index bae6fcfe6d75..af5fa239804d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -898,16 +898,33 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
898#endif /* __ARCH_WANT_UNLOCKED_CTXSW */ 898#endif /* __ARCH_WANT_UNLOCKED_CTXSW */
899 899
900/* 900/*
901 * Check whether the task is waking, we use this to synchronize against
902 * ttwu() so that task_cpu() reports a stable number.
903 *
904 * We need to make an exception for PF_STARTING tasks because the fork
905 * path might require task_rq_lock() to work, eg. it can call
906 * set_cpus_allowed_ptr() from the cpuset clone_ns code.
907 */
908static inline int task_is_waking(struct task_struct *p)
909{
910 return unlikely((p->state == TASK_WAKING) && !(p->flags & PF_STARTING));
911}
912
913/*
901 * __task_rq_lock - lock the runqueue a given task resides on. 914 * __task_rq_lock - lock the runqueue a given task resides on.
902 * Must be called interrupts disabled. 915 * Must be called interrupts disabled.
903 */ 916 */
904static inline struct rq *__task_rq_lock(struct task_struct *p) 917static inline struct rq *__task_rq_lock(struct task_struct *p)
905 __acquires(rq->lock) 918 __acquires(rq->lock)
906{ 919{
920 struct rq *rq;
921
907 for (;;) { 922 for (;;) {
908 struct rq *rq = task_rq(p); 923 while (task_is_waking(p))
924 cpu_relax();
925 rq = task_rq(p);
909 raw_spin_lock(&rq->lock); 926 raw_spin_lock(&rq->lock);
910 if (likely(rq == task_rq(p))) 927 if (likely(rq == task_rq(p) && !task_is_waking(p)))
911 return rq; 928 return rq;
912 raw_spin_unlock(&rq->lock); 929 raw_spin_unlock(&rq->lock);
913 } 930 }
@@ -924,10 +941,12 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
924 struct rq *rq; 941 struct rq *rq;
925 942
926 for (;;) { 943 for (;;) {
944 while (task_is_waking(p))
945 cpu_relax();
927 local_irq_save(*flags); 946 local_irq_save(*flags);
928 rq = task_rq(p); 947 rq = task_rq(p);
929 raw_spin_lock(&rq->lock); 948 raw_spin_lock(&rq->lock);
930 if (likely(rq == task_rq(p))) 949 if (likely(rq == task_rq(p) && !task_is_waking(p)))
931 return rq; 950 return rq;
932 raw_spin_unlock_irqrestore(&rq->lock, *flags); 951 raw_spin_unlock_irqrestore(&rq->lock, *flags);
933 } 952 }
@@ -2374,14 +2393,27 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
2374 __task_rq_unlock(rq); 2393 __task_rq_unlock(rq);
2375 2394
2376 cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags); 2395 cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
2377 if (cpu != orig_cpu) 2396 if (cpu != orig_cpu) {
2397 /*
2398 * Since we migrate the task without holding any rq->lock,
2399 * we need to be careful with task_rq_lock(), since that
2400 * might end up locking an invalid rq.
2401 */
2378 set_task_cpu(p, cpu); 2402 set_task_cpu(p, cpu);
2403 }
2379 2404
2380 rq = __task_rq_lock(p); 2405 rq = cpu_rq(cpu);
2406 raw_spin_lock(&rq->lock);
2381 update_rq_clock(rq); 2407 update_rq_clock(rq);
2382 2408
2409 /*
2410 * We migrated the task without holding either rq->lock, however
2411 * since the task is not on the task list itself, nobody else
2412 * will try and migrate the task, hence the rq should match the
2413 * cpu we just moved it to.
2414 */
2415 WARN_ON(task_cpu(p) != cpu);
2383 WARN_ON(p->state != TASK_WAKING); 2416 WARN_ON(p->state != TASK_WAKING);
2384 cpu = task_cpu(p);
2385 2417
2386#ifdef CONFIG_SCHEDSTATS 2418#ifdef CONFIG_SCHEDSTATS
2387 schedstat_inc(rq, ttwu_count); 2419 schedstat_inc(rq, ttwu_count);
@@ -2613,7 +2645,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
2613{ 2645{
2614 unsigned long flags; 2646 unsigned long flags;
2615 struct rq *rq; 2647 struct rq *rq;
2616 int cpu __maybe_unused = get_cpu(); 2648 int cpu = get_cpu();
2617 2649
2618#ifdef CONFIG_SMP 2650#ifdef CONFIG_SMP
2619 /* 2651 /*
@@ -2629,7 +2661,13 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
2629 set_task_cpu(p, cpu); 2661 set_task_cpu(p, cpu);
2630#endif 2662#endif
2631 2663
2632 rq = task_rq_lock(p, &flags); 2664 /*
2665 * Since the task is not on the rq and we still have TASK_WAKING set
2666 * nobody else will migrate this task.
2667 */
2668 rq = cpu_rq(cpu);
2669 raw_spin_lock_irqsave(&rq->lock, flags);
2670
2633 BUG_ON(p->state != TASK_WAKING); 2671 BUG_ON(p->state != TASK_WAKING);
2634 p->state = TASK_RUNNING; 2672 p->state = TASK_RUNNING;
2635 update_rq_clock(rq); 2673 update_rq_clock(rq);
@@ -5302,27 +5340,8 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
5302 struct rq *rq; 5340 struct rq *rq;
5303 int ret = 0; 5341 int ret = 0;
5304 5342
5305 /*
5306 * Since we rely on wake-ups to migrate sleeping tasks, don't change
5307 * the ->cpus_allowed mask from under waking tasks, which would be
5308 * possible when we change rq->lock in ttwu(), so synchronize against
5309 * TASK_WAKING to avoid that.
5310 *
5311 * Make an exception for freshly cloned tasks, since cpuset namespaces
5312 * might move the task about, we have to validate the target in
5313 * wake_up_new_task() anyway since the cpu might have gone away.
5314 */
5315again:
5316 while (p->state == TASK_WAKING && !(p->flags & PF_STARTING))
5317 cpu_relax();
5318
5319 rq = task_rq_lock(p, &flags); 5343 rq = task_rq_lock(p, &flags);
5320 5344
5321 if (p->state == TASK_WAKING && !(p->flags & PF_STARTING)) {
5322 task_rq_unlock(rq, &flags);
5323 goto again;
5324 }
5325
5326 if (!cpumask_intersects(new_mask, cpu_active_mask)) { 5345 if (!cpumask_intersects(new_mask, cpu_active_mask)) {
5327 ret = -EINVAL; 5346 ret = -EINVAL;
5328 goto out; 5347 goto out;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index b45abbe55067..ff7692ccda89 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -2792,12 +2792,23 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
2792 continue; 2792 continue;
2793 2793
2794 rq = cpu_rq(i); 2794 rq = cpu_rq(i);
2795 wl = weighted_cpuload(i) * SCHED_LOAD_SCALE; 2795 wl = weighted_cpuload(i);
2796 wl /= power;
2797 2796
2797 /*
2798 * When comparing with imbalance, use weighted_cpuload()
2799 * which is not scaled with the cpu power.
2800 */
2798 if (capacity && rq->nr_running == 1 && wl > imbalance) 2801 if (capacity && rq->nr_running == 1 && wl > imbalance)
2799 continue; 2802 continue;
2800 2803
2804 /*
2805 * For the load comparisons with the other cpu's, consider
2806 * the weighted_cpuload() scaled with the cpu power, so that
2807 * the load can be moved away from the cpu that is potentially
2808 * running at a lower capacity.
2809 */
2810 wl = (wl * SCHED_LOAD_SCALE) / power;
2811
2801 if (wl > max_load) { 2812 if (wl > max_load) {
2802 max_load = wl; 2813 max_load = wl;
2803 busiest = rq; 2814 busiest = rq;