aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/core.c')
-rw-r--r--kernel/sched/core.c377
1 files changed, 255 insertions, 122 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1211575a2208..240157c13ddc 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -90,22 +90,6 @@
90#define CREATE_TRACE_POINTS 90#define CREATE_TRACE_POINTS
91#include <trace/events/sched.h> 91#include <trace/events/sched.h>
92 92
93#ifdef smp_mb__before_atomic
94void __smp_mb__before_atomic(void)
95{
96 smp_mb__before_atomic();
97}
98EXPORT_SYMBOL(__smp_mb__before_atomic);
99#endif
100
101#ifdef smp_mb__after_atomic
102void __smp_mb__after_atomic(void)
103{
104 smp_mb__after_atomic();
105}
106EXPORT_SYMBOL(__smp_mb__after_atomic);
107#endif
108
109void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period) 93void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period)
110{ 94{
111 unsigned long delta; 95 unsigned long delta;
@@ -333,9 +317,12 @@ static inline struct rq *__task_rq_lock(struct task_struct *p)
333 for (;;) { 317 for (;;) {
334 rq = task_rq(p); 318 rq = task_rq(p);
335 raw_spin_lock(&rq->lock); 319 raw_spin_lock(&rq->lock);
336 if (likely(rq == task_rq(p))) 320 if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
337 return rq; 321 return rq;
338 raw_spin_unlock(&rq->lock); 322 raw_spin_unlock(&rq->lock);
323
324 while (unlikely(task_on_rq_migrating(p)))
325 cpu_relax();
339 } 326 }
340} 327}
341 328
@@ -352,10 +339,13 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
352 raw_spin_lock_irqsave(&p->pi_lock, *flags); 339 raw_spin_lock_irqsave(&p->pi_lock, *flags);
353 rq = task_rq(p); 340 rq = task_rq(p);
354 raw_spin_lock(&rq->lock); 341 raw_spin_lock(&rq->lock);
355 if (likely(rq == task_rq(p))) 342 if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
356 return rq; 343 return rq;
357 raw_spin_unlock(&rq->lock); 344 raw_spin_unlock(&rq->lock);
358 raw_spin_unlock_irqrestore(&p->pi_lock, *flags); 345 raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
346
347 while (unlikely(task_on_rq_migrating(p)))
348 cpu_relax();
359 } 349 }
360} 350}
361 351
@@ -449,7 +439,15 @@ static void __hrtick_start(void *arg)
449void hrtick_start(struct rq *rq, u64 delay) 439void hrtick_start(struct rq *rq, u64 delay)
450{ 440{
451 struct hrtimer *timer = &rq->hrtick_timer; 441 struct hrtimer *timer = &rq->hrtick_timer;
452 ktime_t time = ktime_add_ns(timer->base->get_time(), delay); 442 ktime_t time;
443 s64 delta;
444
445 /*
446 * Don't schedule slices shorter than 10000ns, that just
447 * doesn't make sense and can cause timer DoS.
448 */
449 delta = max_t(s64, delay, 10000LL);
450 time = ktime_add_ns(timer->base->get_time(), delta);
453 451
454 hrtimer_set_expires(timer, time); 452 hrtimer_set_expires(timer, time);
455 453
@@ -1043,7 +1041,7 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
1043 * A queue event has occurred, and we're going to schedule. In 1041 * A queue event has occurred, and we're going to schedule. In
1044 * this case, we can save a useless back to back clock update. 1042 * this case, we can save a useless back to back clock update.
1045 */ 1043 */
1046 if (rq->curr->on_rq && test_tsk_need_resched(rq->curr)) 1044 if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr))
1047 rq->skip_clock_update = 1; 1045 rq->skip_clock_update = 1;
1048} 1046}
1049 1047
@@ -1088,7 +1086,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
1088 1086
1089static void __migrate_swap_task(struct task_struct *p, int cpu) 1087static void __migrate_swap_task(struct task_struct *p, int cpu)
1090{ 1088{
1091 if (p->on_rq) { 1089 if (task_on_rq_queued(p)) {
1092 struct rq *src_rq, *dst_rq; 1090 struct rq *src_rq, *dst_rq;
1093 1091
1094 src_rq = task_rq(p); 1092 src_rq = task_rq(p);
@@ -1214,7 +1212,7 @@ static int migration_cpu_stop(void *data);
1214unsigned long wait_task_inactive(struct task_struct *p, long match_state) 1212unsigned long wait_task_inactive(struct task_struct *p, long match_state)
1215{ 1213{
1216 unsigned long flags; 1214 unsigned long flags;
1217 int running, on_rq; 1215 int running, queued;
1218 unsigned long ncsw; 1216 unsigned long ncsw;
1219 struct rq *rq; 1217 struct rq *rq;
1220 1218
@@ -1252,7 +1250,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
1252 rq = task_rq_lock(p, &flags); 1250 rq = task_rq_lock(p, &flags);
1253 trace_sched_wait_task(p); 1251 trace_sched_wait_task(p);
1254 running = task_running(rq, p); 1252 running = task_running(rq, p);
1255 on_rq = p->on_rq; 1253 queued = task_on_rq_queued(p);
1256 ncsw = 0; 1254 ncsw = 0;
1257 if (!match_state || p->state == match_state) 1255 if (!match_state || p->state == match_state)
1258 ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ 1256 ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
@@ -1284,7 +1282,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
1284 * running right now), it's preempted, and we should 1282 * running right now), it's preempted, and we should
1285 * yield - it could be a while. 1283 * yield - it could be a while.
1286 */ 1284 */
1287 if (unlikely(on_rq)) { 1285 if (unlikely(queued)) {
1288 ktime_t to = ktime_set(0, NSEC_PER_SEC/HZ); 1286 ktime_t to = ktime_set(0, NSEC_PER_SEC/HZ);
1289 1287
1290 set_current_state(TASK_UNINTERRUPTIBLE); 1288 set_current_state(TASK_UNINTERRUPTIBLE);
@@ -1478,7 +1476,7 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
1478static void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags) 1476static void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
1479{ 1477{
1480 activate_task(rq, p, en_flags); 1478 activate_task(rq, p, en_flags);
1481 p->on_rq = 1; 1479 p->on_rq = TASK_ON_RQ_QUEUED;
1482 1480
1483 /* if a worker is waking up, notify workqueue */ 1481 /* if a worker is waking up, notify workqueue */
1484 if (p->flags & PF_WQ_WORKER) 1482 if (p->flags & PF_WQ_WORKER)
@@ -1537,7 +1535,7 @@ static int ttwu_remote(struct task_struct *p, int wake_flags)
1537 int ret = 0; 1535 int ret = 0;
1538 1536
1539 rq = __task_rq_lock(p); 1537 rq = __task_rq_lock(p);
1540 if (p->on_rq) { 1538 if (task_on_rq_queued(p)) {
1541 /* check_preempt_curr() may use rq clock */ 1539 /* check_preempt_curr() may use rq clock */
1542 update_rq_clock(rq); 1540 update_rq_clock(rq);
1543 ttwu_do_wakeup(rq, p, wake_flags); 1541 ttwu_do_wakeup(rq, p, wake_flags);
@@ -1620,6 +1618,25 @@ static void ttwu_queue_remote(struct task_struct *p, int cpu)
1620 } 1618 }
1621} 1619}
1622 1620
1621void wake_up_if_idle(int cpu)
1622{
1623 struct rq *rq = cpu_rq(cpu);
1624 unsigned long flags;
1625
1626 if (!is_idle_task(rq->curr))
1627 return;
1628
1629 if (set_nr_if_polling(rq->idle)) {
1630 trace_sched_wake_idle_without_ipi(cpu);
1631 } else {
1632 raw_spin_lock_irqsave(&rq->lock, flags);
1633 if (is_idle_task(rq->curr))
1634 smp_send_reschedule(cpu);
1635 /* Else cpu is not in idle, do nothing here */
1636 raw_spin_unlock_irqrestore(&rq->lock, flags);
1637 }
1638}
1639
1623bool cpus_share_cache(int this_cpu, int that_cpu) 1640bool cpus_share_cache(int this_cpu, int that_cpu)
1624{ 1641{
1625 return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu); 1642 return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu);
@@ -1742,7 +1759,7 @@ static void try_to_wake_up_local(struct task_struct *p)
1742 if (!(p->state & TASK_NORMAL)) 1759 if (!(p->state & TASK_NORMAL))
1743 goto out; 1760 goto out;
1744 1761
1745 if (!p->on_rq) 1762 if (!task_on_rq_queued(p))
1746 ttwu_activate(rq, p, ENQUEUE_WAKEUP); 1763 ttwu_activate(rq, p, ENQUEUE_WAKEUP);
1747 1764
1748 ttwu_do_wakeup(rq, p, 0); 1765 ttwu_do_wakeup(rq, p, 0);
@@ -1776,6 +1793,20 @@ int wake_up_state(struct task_struct *p, unsigned int state)
1776} 1793}
1777 1794
1778/* 1795/*
1796 * This function clears the sched_dl_entity static params.
1797 */
1798void __dl_clear_params(struct task_struct *p)
1799{
1800 struct sched_dl_entity *dl_se = &p->dl;
1801
1802 dl_se->dl_runtime = 0;
1803 dl_se->dl_deadline = 0;
1804 dl_se->dl_period = 0;
1805 dl_se->flags = 0;
1806 dl_se->dl_bw = 0;
1807}
1808
1809/*
1779 * Perform scheduler related setup for a newly forked process p. 1810 * Perform scheduler related setup for a newly forked process p.
1780 * p is forked by current. 1811 * p is forked by current.
1781 * 1812 *
@@ -1799,10 +1830,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
1799 1830
1800 RB_CLEAR_NODE(&p->dl.rb_node); 1831 RB_CLEAR_NODE(&p->dl.rb_node);
1801 hrtimer_init(&p->dl.dl_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 1832 hrtimer_init(&p->dl.dl_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1802 p->dl.dl_runtime = p->dl.runtime = 0; 1833 __dl_clear_params(p);
1803 p->dl.dl_deadline = p->dl.deadline = 0;
1804 p->dl.dl_period = 0;
1805 p->dl.flags = 0;
1806 1834
1807 INIT_LIST_HEAD(&p->rt.run_list); 1835 INIT_LIST_HEAD(&p->rt.run_list);
1808 1836
@@ -1977,6 +2005,8 @@ unsigned long to_ratio(u64 period, u64 runtime)
1977#ifdef CONFIG_SMP 2005#ifdef CONFIG_SMP
1978inline struct dl_bw *dl_bw_of(int i) 2006inline struct dl_bw *dl_bw_of(int i)
1979{ 2007{
2008 rcu_lockdep_assert(rcu_read_lock_sched_held(),
2009 "sched RCU must be held");
1980 return &cpu_rq(i)->rd->dl_bw; 2010 return &cpu_rq(i)->rd->dl_bw;
1981} 2011}
1982 2012
@@ -1985,6 +2015,8 @@ static inline int dl_bw_cpus(int i)
1985 struct root_domain *rd = cpu_rq(i)->rd; 2015 struct root_domain *rd = cpu_rq(i)->rd;
1986 int cpus = 0; 2016 int cpus = 0;
1987 2017
2018 rcu_lockdep_assert(rcu_read_lock_sched_held(),
2019 "sched RCU must be held");
1988 for_each_cpu_and(i, rd->span, cpu_active_mask) 2020 for_each_cpu_and(i, rd->span, cpu_active_mask)
1989 cpus++; 2021 cpus++;
1990 2022
@@ -2095,7 +2127,7 @@ void wake_up_new_task(struct task_struct *p)
2095 init_task_runnable_average(p); 2127 init_task_runnable_average(p);
2096 rq = __task_rq_lock(p); 2128 rq = __task_rq_lock(p);
2097 activate_task(rq, p, 0); 2129 activate_task(rq, p, 0);
2098 p->on_rq = 1; 2130 p->on_rq = TASK_ON_RQ_QUEUED;
2099 trace_sched_wakeup_new(p, true); 2131 trace_sched_wakeup_new(p, true);
2100 check_preempt_curr(rq, p, WF_FORK); 2132 check_preempt_curr(rq, p, WF_FORK);
2101#ifdef CONFIG_SMP 2133#ifdef CONFIG_SMP
@@ -2287,10 +2319,6 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev)
2287 */ 2319 */
2288 post_schedule(rq); 2320 post_schedule(rq);
2289 2321
2290#ifdef __ARCH_WANT_UNLOCKED_CTXSW
2291 /* In this case, finish_task_switch does not reenable preemption */
2292 preempt_enable();
2293#endif
2294 if (current->set_child_tid) 2322 if (current->set_child_tid)
2295 put_user(task_pid_vnr(current), current->set_child_tid); 2323 put_user(task_pid_vnr(current), current->set_child_tid);
2296} 2324}
@@ -2333,9 +2361,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
2333 * of the scheduler it's an obvious special-case), so we 2361 * of the scheduler it's an obvious special-case), so we
2334 * do an early lockdep release here: 2362 * do an early lockdep release here:
2335 */ 2363 */
2336#ifndef __ARCH_WANT_UNLOCKED_CTXSW
2337 spin_release(&rq->lock.dep_map, 1, _THIS_IP_); 2364 spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
2338#endif
2339 2365
2340 context_tracking_task_switch(prev, next); 2366 context_tracking_task_switch(prev, next);
2341 /* Here we just switch the register state and the stack. */ 2367 /* Here we just switch the register state and the stack. */
@@ -2366,6 +2392,18 @@ unsigned long nr_running(void)
2366 return sum; 2392 return sum;
2367} 2393}
2368 2394
2395/*
2396 * Check if only the current task is running on the cpu.
2397 */
2398bool single_task_running(void)
2399{
2400 if (cpu_rq(smp_processor_id())->nr_running == 1)
2401 return true;
2402 else
2403 return false;
2404}
2405EXPORT_SYMBOL(single_task_running);
2406
2369unsigned long long nr_context_switches(void) 2407unsigned long long nr_context_switches(void)
2370{ 2408{
2371 int i; 2409 int i;
@@ -2393,6 +2431,13 @@ unsigned long nr_iowait_cpu(int cpu)
2393 return atomic_read(&this->nr_iowait); 2431 return atomic_read(&this->nr_iowait);
2394} 2432}
2395 2433
2434void get_iowait_load(unsigned long *nr_waiters, unsigned long *load)
2435{
2436 struct rq *this = this_rq();
2437 *nr_waiters = atomic_read(&this->nr_iowait);
2438 *load = this->cpu_load[0];
2439}
2440
2396#ifdef CONFIG_SMP 2441#ifdef CONFIG_SMP
2397 2442
2398/* 2443/*
@@ -2444,7 +2489,7 @@ static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq)
2444 * project cycles that may never be accounted to this 2489 * project cycles that may never be accounted to this
2445 * thread, breaking clock_gettime(). 2490 * thread, breaking clock_gettime().
2446 */ 2491 */
2447 if (task_current(rq, p) && p->on_rq) { 2492 if (task_current(rq, p) && task_on_rq_queued(p)) {
2448 update_rq_clock(rq); 2493 update_rq_clock(rq);
2449 ns = rq_clock_task(rq) - p->se.exec_start; 2494 ns = rq_clock_task(rq) - p->se.exec_start;
2450 if ((s64)ns < 0) 2495 if ((s64)ns < 0)
@@ -2490,7 +2535,7 @@ unsigned long long task_sched_runtime(struct task_struct *p)
2490 * If we see ->on_cpu without ->on_rq, the task is leaving, and has 2535 * If we see ->on_cpu without ->on_rq, the task is leaving, and has
2491 * been accounted, so we're correct here as well. 2536 * been accounted, so we're correct here as well.
2492 */ 2537 */
2493 if (!p->on_cpu || !p->on_rq) 2538 if (!p->on_cpu || !task_on_rq_queued(p))
2494 return p->se.sum_exec_runtime; 2539 return p->se.sum_exec_runtime;
2495#endif 2540#endif
2496 2541
@@ -2653,6 +2698,9 @@ static noinline void __schedule_bug(struct task_struct *prev)
2653 */ 2698 */
2654static inline void schedule_debug(struct task_struct *prev) 2699static inline void schedule_debug(struct task_struct *prev)
2655{ 2700{
2701#ifdef CONFIG_SCHED_STACK_END_CHECK
2702 BUG_ON(unlikely(task_stack_end_corrupted(prev)));
2703#endif
2656 /* 2704 /*
2657 * Test if we are atomic. Since do_exit() needs to call into 2705 * Test if we are atomic. Since do_exit() needs to call into
2658 * schedule() atomically, we ignore that path. Otherwise whine 2706 * schedule() atomically, we ignore that path. Otherwise whine
@@ -2794,7 +2842,7 @@ need_resched:
2794 switch_count = &prev->nvcsw; 2842 switch_count = &prev->nvcsw;
2795 } 2843 }
2796 2844
2797 if (prev->on_rq || rq->skip_clock_update < 0) 2845 if (task_on_rq_queued(prev) || rq->skip_clock_update < 0)
2798 update_rq_clock(rq); 2846 update_rq_clock(rq);
2799 2847
2800 next = pick_next_task(rq, prev); 2848 next = pick_next_task(rq, prev);
@@ -2903,6 +2951,47 @@ asmlinkage __visible void __sched notrace preempt_schedule(void)
2903} 2951}
2904NOKPROBE_SYMBOL(preempt_schedule); 2952NOKPROBE_SYMBOL(preempt_schedule);
2905EXPORT_SYMBOL(preempt_schedule); 2953EXPORT_SYMBOL(preempt_schedule);
2954
2955#ifdef CONFIG_CONTEXT_TRACKING
2956/**
2957 * preempt_schedule_context - preempt_schedule called by tracing
2958 *
2959 * The tracing infrastructure uses preempt_enable_notrace to prevent
2960 * recursion and tracing preempt enabling caused by the tracing
2961 * infrastructure itself. But as tracing can happen in areas coming
2962 * from userspace or just about to enter userspace, a preempt enable
2963 * can occur before user_exit() is called. This will cause the scheduler
2964 * to be called when the system is still in usermode.
2965 *
2966 * To prevent this, the preempt_enable_notrace will use this function
2967 * instead of preempt_schedule() to exit user context if needed before
2968 * calling the scheduler.
2969 */
2970asmlinkage __visible void __sched notrace preempt_schedule_context(void)
2971{
2972 enum ctx_state prev_ctx;
2973
2974 if (likely(!preemptible()))
2975 return;
2976
2977 do {
2978 __preempt_count_add(PREEMPT_ACTIVE);
2979 /*
2980 * Needs preempt disabled in case user_exit() is traced
2981 * and the tracer calls preempt_enable_notrace() causing
2982 * an infinite recursion.
2983 */
2984 prev_ctx = exception_enter();
2985 __schedule();
2986 exception_exit(prev_ctx);
2987
2988 __preempt_count_sub(PREEMPT_ACTIVE);
2989 barrier();
2990 } while (need_resched());
2991}
2992EXPORT_SYMBOL_GPL(preempt_schedule_context);
2993#endif /* CONFIG_CONTEXT_TRACKING */
2994
2906#endif /* CONFIG_PREEMPT */ 2995#endif /* CONFIG_PREEMPT */
2907 2996
2908/* 2997/*
@@ -2959,7 +3048,7 @@ EXPORT_SYMBOL(default_wake_function);
2959 */ 3048 */
2960void rt_mutex_setprio(struct task_struct *p, int prio) 3049void rt_mutex_setprio(struct task_struct *p, int prio)
2961{ 3050{
2962 int oldprio, on_rq, running, enqueue_flag = 0; 3051 int oldprio, queued, running, enqueue_flag = 0;
2963 struct rq *rq; 3052 struct rq *rq;
2964 const struct sched_class *prev_class; 3053 const struct sched_class *prev_class;
2965 3054
@@ -2988,12 +3077,12 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
2988 trace_sched_pi_setprio(p, prio); 3077 trace_sched_pi_setprio(p, prio);
2989 oldprio = p->prio; 3078 oldprio = p->prio;
2990 prev_class = p->sched_class; 3079 prev_class = p->sched_class;
2991 on_rq = p->on_rq; 3080 queued = task_on_rq_queued(p);
2992 running = task_current(rq, p); 3081 running = task_current(rq, p);
2993 if (on_rq) 3082 if (queued)
2994 dequeue_task(rq, p, 0); 3083 dequeue_task(rq, p, 0);
2995 if (running) 3084 if (running)
2996 p->sched_class->put_prev_task(rq, p); 3085 put_prev_task(rq, p);
2997 3086
2998 /* 3087 /*
2999 * Boosting condition are: 3088 * Boosting condition are:
@@ -3030,7 +3119,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
3030 3119
3031 if (running) 3120 if (running)
3032 p->sched_class->set_curr_task(rq); 3121 p->sched_class->set_curr_task(rq);
3033 if (on_rq) 3122 if (queued)
3034 enqueue_task(rq, p, enqueue_flag); 3123 enqueue_task(rq, p, enqueue_flag);
3035 3124
3036 check_class_changed(rq, p, prev_class, oldprio); 3125 check_class_changed(rq, p, prev_class, oldprio);
@@ -3041,7 +3130,7 @@ out_unlock:
3041 3130
3042void set_user_nice(struct task_struct *p, long nice) 3131void set_user_nice(struct task_struct *p, long nice)
3043{ 3132{
3044 int old_prio, delta, on_rq; 3133 int old_prio, delta, queued;
3045 unsigned long flags; 3134 unsigned long flags;
3046 struct rq *rq; 3135 struct rq *rq;
3047 3136
@@ -3062,8 +3151,8 @@ void set_user_nice(struct task_struct *p, long nice)
3062 p->static_prio = NICE_TO_PRIO(nice); 3151 p->static_prio = NICE_TO_PRIO(nice);
3063 goto out_unlock; 3152 goto out_unlock;
3064 } 3153 }
3065 on_rq = p->on_rq; 3154 queued = task_on_rq_queued(p);
3066 if (on_rq) 3155 if (queued)
3067 dequeue_task(rq, p, 0); 3156 dequeue_task(rq, p, 0);
3068 3157
3069 p->static_prio = NICE_TO_PRIO(nice); 3158 p->static_prio = NICE_TO_PRIO(nice);
@@ -3072,7 +3161,7 @@ void set_user_nice(struct task_struct *p, long nice)
3072 p->prio = effective_prio(p); 3161 p->prio = effective_prio(p);
3073 delta = p->prio - old_prio; 3162 delta = p->prio - old_prio;
3074 3163
3075 if (on_rq) { 3164 if (queued) {
3076 enqueue_task(rq, p, 0); 3165 enqueue_task(rq, p, 0);
3077 /* 3166 /*
3078 * If the task increased its priority or is running and 3167 * If the task increased its priority or is running and
@@ -3344,7 +3433,7 @@ static int __sched_setscheduler(struct task_struct *p,
3344{ 3433{
3345 int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 : 3434 int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 :
3346 MAX_RT_PRIO - 1 - attr->sched_priority; 3435 MAX_RT_PRIO - 1 - attr->sched_priority;
3347 int retval, oldprio, oldpolicy = -1, on_rq, running; 3436 int retval, oldprio, oldpolicy = -1, queued, running;
3348 int policy = attr->sched_policy; 3437 int policy = attr->sched_policy;
3349 unsigned long flags; 3438 unsigned long flags;
3350 const struct sched_class *prev_class; 3439 const struct sched_class *prev_class;
@@ -3541,19 +3630,19 @@ change:
3541 return 0; 3630 return 0;
3542 } 3631 }
3543 3632
3544 on_rq = p->on_rq; 3633 queued = task_on_rq_queued(p);
3545 running = task_current(rq, p); 3634 running = task_current(rq, p);
3546 if (on_rq) 3635 if (queued)
3547 dequeue_task(rq, p, 0); 3636 dequeue_task(rq, p, 0);
3548 if (running) 3637 if (running)
3549 p->sched_class->put_prev_task(rq, p); 3638 put_prev_task(rq, p);
3550 3639
3551 prev_class = p->sched_class; 3640 prev_class = p->sched_class;
3552 __setscheduler(rq, p, attr); 3641 __setscheduler(rq, p, attr);
3553 3642
3554 if (running) 3643 if (running)
3555 p->sched_class->set_curr_task(rq); 3644 p->sched_class->set_curr_task(rq);
3556 if (on_rq) { 3645 if (queued) {
3557 /* 3646 /*
3558 * We enqueue to tail when the priority of a task is 3647 * We enqueue to tail when the priority of a task is
3559 * increased (user space view). 3648 * increased (user space view).
@@ -3977,14 +4066,14 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
3977 rcu_read_lock(); 4066 rcu_read_lock();
3978 if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) { 4067 if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
3979 rcu_read_unlock(); 4068 rcu_read_unlock();
3980 goto out_unlock; 4069 goto out_free_new_mask;
3981 } 4070 }
3982 rcu_read_unlock(); 4071 rcu_read_unlock();
3983 } 4072 }
3984 4073
3985 retval = security_task_setscheduler(p); 4074 retval = security_task_setscheduler(p);
3986 if (retval) 4075 if (retval)
3987 goto out_unlock; 4076 goto out_free_new_mask;
3988 4077
3989 4078
3990 cpuset_cpus_allowed(p, cpus_allowed); 4079 cpuset_cpus_allowed(p, cpus_allowed);
@@ -3997,13 +4086,14 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
3997 * root_domain. 4086 * root_domain.
3998 */ 4087 */
3999#ifdef CONFIG_SMP 4088#ifdef CONFIG_SMP
4000 if (task_has_dl_policy(p)) { 4089 if (task_has_dl_policy(p) && dl_bandwidth_enabled()) {
4001 const struct cpumask *span = task_rq(p)->rd->span; 4090 rcu_read_lock();
4002 4091 if (!cpumask_subset(task_rq(p)->rd->span, new_mask)) {
4003 if (dl_bandwidth_enabled() && !cpumask_subset(span, new_mask)) {
4004 retval = -EBUSY; 4092 retval = -EBUSY;
4005 goto out_unlock; 4093 rcu_read_unlock();
4094 goto out_free_new_mask;
4006 } 4095 }
4096 rcu_read_unlock();
4007 } 4097 }
4008#endif 4098#endif
4009again: 4099again:
@@ -4021,7 +4111,7 @@ again:
4021 goto again; 4111 goto again;
4022 } 4112 }
4023 } 4113 }
4024out_unlock: 4114out_free_new_mask:
4025 free_cpumask_var(new_mask); 4115 free_cpumask_var(new_mask);
4026out_free_cpus_allowed: 4116out_free_cpus_allowed:
4027 free_cpumask_var(cpus_allowed); 4117 free_cpumask_var(cpus_allowed);
@@ -4505,7 +4595,7 @@ void show_state_filter(unsigned long state_filter)
4505 " task PC stack pid father\n"); 4595 " task PC stack pid father\n");
4506#endif 4596#endif
4507 rcu_read_lock(); 4597 rcu_read_lock();
4508 do_each_thread(g, p) { 4598 for_each_process_thread(g, p) {
4509 /* 4599 /*
4510 * reset the NMI-timeout, listing all files on a slow 4600 * reset the NMI-timeout, listing all files on a slow
4511 * console might take a lot of time: 4601 * console might take a lot of time:
@@ -4513,7 +4603,7 @@ void show_state_filter(unsigned long state_filter)
4513 touch_nmi_watchdog(); 4603 touch_nmi_watchdog();
4514 if (!state_filter || (p->state & state_filter)) 4604 if (!state_filter || (p->state & state_filter))
4515 sched_show_task(p); 4605 sched_show_task(p);
4516 } while_each_thread(g, p); 4606 }
4517 4607
4518 touch_all_softlockup_watchdogs(); 4608 touch_all_softlockup_watchdogs();
4519 4609
@@ -4568,7 +4658,7 @@ void init_idle(struct task_struct *idle, int cpu)
4568 rcu_read_unlock(); 4658 rcu_read_unlock();
4569 4659
4570 rq->curr = rq->idle = idle; 4660 rq->curr = rq->idle = idle;
4571 idle->on_rq = 1; 4661 idle->on_rq = TASK_ON_RQ_QUEUED;
4572#if defined(CONFIG_SMP) 4662#if defined(CONFIG_SMP)
4573 idle->on_cpu = 1; 4663 idle->on_cpu = 1;
4574#endif 4664#endif
@@ -4589,6 +4679,33 @@ void init_idle(struct task_struct *idle, int cpu)
4589} 4679}
4590 4680
4591#ifdef CONFIG_SMP 4681#ifdef CONFIG_SMP
4682/*
4683 * move_queued_task - move a queued task to new rq.
4684 *
4685 * Returns (locked) new rq. Old rq's lock is released.
4686 */
4687static struct rq *move_queued_task(struct task_struct *p, int new_cpu)
4688{
4689 struct rq *rq = task_rq(p);
4690
4691 lockdep_assert_held(&rq->lock);
4692
4693 dequeue_task(rq, p, 0);
4694 p->on_rq = TASK_ON_RQ_MIGRATING;
4695 set_task_cpu(p, new_cpu);
4696 raw_spin_unlock(&rq->lock);
4697
4698 rq = cpu_rq(new_cpu);
4699
4700 raw_spin_lock(&rq->lock);
4701 BUG_ON(task_cpu(p) != new_cpu);
4702 p->on_rq = TASK_ON_RQ_QUEUED;
4703 enqueue_task(rq, p, 0);
4704 check_preempt_curr(rq, p, 0);
4705
4706 return rq;
4707}
4708
4592void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) 4709void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
4593{ 4710{
4594 if (p->sched_class && p->sched_class->set_cpus_allowed) 4711 if (p->sched_class && p->sched_class->set_cpus_allowed)
@@ -4645,14 +4762,15 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
4645 goto out; 4762 goto out;
4646 4763
4647 dest_cpu = cpumask_any_and(cpu_active_mask, new_mask); 4764 dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
4648 if (p->on_rq) { 4765 if (task_running(rq, p) || p->state == TASK_WAKING) {
4649 struct migration_arg arg = { p, dest_cpu }; 4766 struct migration_arg arg = { p, dest_cpu };
4650 /* Need help from migration thread: drop lock and wait. */ 4767 /* Need help from migration thread: drop lock and wait. */
4651 task_rq_unlock(rq, p, &flags); 4768 task_rq_unlock(rq, p, &flags);
4652 stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); 4769 stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
4653 tlb_migrate_finish(p->mm); 4770 tlb_migrate_finish(p->mm);
4654 return 0; 4771 return 0;
4655 } 4772 } else if (task_on_rq_queued(p))
4773 rq = move_queued_task(p, dest_cpu);
4656out: 4774out:
4657 task_rq_unlock(rq, p, &flags); 4775 task_rq_unlock(rq, p, &flags);
4658 4776
@@ -4673,20 +4791,20 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
4673 */ 4791 */
4674static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) 4792static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
4675{ 4793{
4676 struct rq *rq_dest, *rq_src; 4794 struct rq *rq;
4677 int ret = 0; 4795 int ret = 0;
4678 4796
4679 if (unlikely(!cpu_active(dest_cpu))) 4797 if (unlikely(!cpu_active(dest_cpu)))
4680 return ret; 4798 return ret;
4681 4799
4682 rq_src = cpu_rq(src_cpu); 4800 rq = cpu_rq(src_cpu);
4683 rq_dest = cpu_rq(dest_cpu);
4684 4801
4685 raw_spin_lock(&p->pi_lock); 4802 raw_spin_lock(&p->pi_lock);
4686 double_rq_lock(rq_src, rq_dest); 4803 raw_spin_lock(&rq->lock);
4687 /* Already moved. */ 4804 /* Already moved. */
4688 if (task_cpu(p) != src_cpu) 4805 if (task_cpu(p) != src_cpu)
4689 goto done; 4806 goto done;
4807
4690 /* Affinity changed (again). */ 4808 /* Affinity changed (again). */
4691 if (!cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p))) 4809 if (!cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
4692 goto fail; 4810 goto fail;
@@ -4695,16 +4813,12 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
4695 * If we're not on a rq, the next wake-up will ensure we're 4813 * If we're not on a rq, the next wake-up will ensure we're
4696 * placed properly. 4814 * placed properly.
4697 */ 4815 */
4698 if (p->on_rq) { 4816 if (task_on_rq_queued(p))
4699 dequeue_task(rq_src, p, 0); 4817 rq = move_queued_task(p, dest_cpu);
4700 set_task_cpu(p, dest_cpu);
4701 enqueue_task(rq_dest, p, 0);
4702 check_preempt_curr(rq_dest, p, 0);
4703 }
4704done: 4818done:
4705 ret = 1; 4819 ret = 1;
4706fail: 4820fail:
4707 double_rq_unlock(rq_src, rq_dest); 4821 raw_spin_unlock(&rq->lock);
4708 raw_spin_unlock(&p->pi_lock); 4822 raw_spin_unlock(&p->pi_lock);
4709 return ret; 4823 return ret;
4710} 4824}
@@ -4736,22 +4850,22 @@ void sched_setnuma(struct task_struct *p, int nid)
4736{ 4850{
4737 struct rq *rq; 4851 struct rq *rq;
4738 unsigned long flags; 4852 unsigned long flags;
4739 bool on_rq, running; 4853 bool queued, running;
4740 4854
4741 rq = task_rq_lock(p, &flags); 4855 rq = task_rq_lock(p, &flags);
4742 on_rq = p->on_rq; 4856 queued = task_on_rq_queued(p);
4743 running = task_current(rq, p); 4857 running = task_current(rq, p);
4744 4858
4745 if (on_rq) 4859 if (queued)
4746 dequeue_task(rq, p, 0); 4860 dequeue_task(rq, p, 0);
4747 if (running) 4861 if (running)
4748 p->sched_class->put_prev_task(rq, p); 4862 put_prev_task(rq, p);
4749 4863
4750 p->numa_preferred_nid = nid; 4864 p->numa_preferred_nid = nid;
4751 4865
4752 if (running) 4866 if (running)
4753 p->sched_class->set_curr_task(rq); 4867 p->sched_class->set_curr_task(rq);
4754 if (on_rq) 4868 if (queued)
4755 enqueue_task(rq, p, 0); 4869 enqueue_task(rq, p, 0);
4756 task_rq_unlock(rq, p, &flags); 4870 task_rq_unlock(rq, p, &flags);
4757} 4871}
@@ -4771,6 +4885,12 @@ static int migration_cpu_stop(void *data)
4771 * be on another cpu but it doesn't matter. 4885 * be on another cpu but it doesn't matter.
4772 */ 4886 */
4773 local_irq_disable(); 4887 local_irq_disable();
4888 /*
4889 * We need to explicitly wake pending tasks before running
4890 * __migrate_task() such that we will not miss enforcing cpus_allowed
4891 * during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test.
4892 */
4893 sched_ttwu_pending();
4774 __migrate_task(arg->task, raw_smp_processor_id(), arg->dest_cpu); 4894 __migrate_task(arg->task, raw_smp_processor_id(), arg->dest_cpu);
4775 local_irq_enable(); 4895 local_irq_enable();
4776 return 0; 4896 return 0;
@@ -5181,6 +5301,7 @@ static int sched_cpu_inactive(struct notifier_block *nfb,
5181{ 5301{
5182 unsigned long flags; 5302 unsigned long flags;
5183 long cpu = (long)hcpu; 5303 long cpu = (long)hcpu;
5304 struct dl_bw *dl_b;
5184 5305
5185 switch (action & ~CPU_TASKS_FROZEN) { 5306 switch (action & ~CPU_TASKS_FROZEN) {
5186 case CPU_DOWN_PREPARE: 5307 case CPU_DOWN_PREPARE:
@@ -5188,15 +5309,19 @@ static int sched_cpu_inactive(struct notifier_block *nfb,
5188 5309
5189 /* explicitly allow suspend */ 5310 /* explicitly allow suspend */
5190 if (!(action & CPU_TASKS_FROZEN)) { 5311 if (!(action & CPU_TASKS_FROZEN)) {
5191 struct dl_bw *dl_b = dl_bw_of(cpu);
5192 bool overflow; 5312 bool overflow;
5193 int cpus; 5313 int cpus;
5194 5314
5315 rcu_read_lock_sched();
5316 dl_b = dl_bw_of(cpu);
5317
5195 raw_spin_lock_irqsave(&dl_b->lock, flags); 5318 raw_spin_lock_irqsave(&dl_b->lock, flags);
5196 cpus = dl_bw_cpus(cpu); 5319 cpus = dl_bw_cpus(cpu);
5197 overflow = __dl_overflow(dl_b, cpus, 0, 0); 5320 overflow = __dl_overflow(dl_b, cpus, 0, 0);
5198 raw_spin_unlock_irqrestore(&dl_b->lock, flags); 5321 raw_spin_unlock_irqrestore(&dl_b->lock, flags);
5199 5322
5323 rcu_read_unlock_sched();
5324
5200 if (overflow) 5325 if (overflow)
5201 return notifier_from_errno(-EBUSY); 5326 return notifier_from_errno(-EBUSY);
5202 } 5327 }
@@ -5739,7 +5864,7 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
5739 const struct cpumask *span = sched_domain_span(sd); 5864 const struct cpumask *span = sched_domain_span(sd);
5740 struct cpumask *covered = sched_domains_tmpmask; 5865 struct cpumask *covered = sched_domains_tmpmask;
5741 struct sd_data *sdd = sd->private; 5866 struct sd_data *sdd = sd->private;
5742 struct sched_domain *child; 5867 struct sched_domain *sibling;
5743 int i; 5868 int i;
5744 5869
5745 cpumask_clear(covered); 5870 cpumask_clear(covered);
@@ -5750,10 +5875,10 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
5750 if (cpumask_test_cpu(i, covered)) 5875 if (cpumask_test_cpu(i, covered))
5751 continue; 5876 continue;
5752 5877
5753 child = *per_cpu_ptr(sdd->sd, i); 5878 sibling = *per_cpu_ptr(sdd->sd, i);
5754 5879
5755 /* See the comment near build_group_mask(). */ 5880 /* See the comment near build_group_mask(). */
5756 if (!cpumask_test_cpu(i, sched_domain_span(child))) 5881 if (!cpumask_test_cpu(i, sched_domain_span(sibling)))
5757 continue; 5882 continue;
5758 5883
5759 sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(), 5884 sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
@@ -5763,10 +5888,9 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
5763 goto fail; 5888 goto fail;
5764 5889
5765 sg_span = sched_group_cpus(sg); 5890 sg_span = sched_group_cpus(sg);
5766 if (child->child) { 5891 if (sibling->child)
5767 child = child->child; 5892 cpumask_copy(sg_span, sched_domain_span(sibling->child));
5768 cpumask_copy(sg_span, sched_domain_span(child)); 5893 else
5769 } else
5770 cpumask_set_cpu(i, sg_span); 5894 cpumask_set_cpu(i, sg_span);
5771 5895
5772 cpumask_or(covered, covered, sg_span); 5896 cpumask_or(covered, covered, sg_span);
@@ -7117,13 +7241,13 @@ static void normalize_task(struct rq *rq, struct task_struct *p)
7117 .sched_policy = SCHED_NORMAL, 7241 .sched_policy = SCHED_NORMAL,
7118 }; 7242 };
7119 int old_prio = p->prio; 7243 int old_prio = p->prio;
7120 int on_rq; 7244 int queued;
7121 7245
7122 on_rq = p->on_rq; 7246 queued = task_on_rq_queued(p);
7123 if (on_rq) 7247 if (queued)
7124 dequeue_task(rq, p, 0); 7248 dequeue_task(rq, p, 0);
7125 __setscheduler(rq, p, &attr); 7249 __setscheduler(rq, p, &attr);
7126 if (on_rq) { 7250 if (queued) {
7127 enqueue_task(rq, p, 0); 7251 enqueue_task(rq, p, 0);
7128 resched_curr(rq); 7252 resched_curr(rq);
7129 } 7253 }
@@ -7137,12 +7261,12 @@ void normalize_rt_tasks(void)
7137 unsigned long flags; 7261 unsigned long flags;
7138 struct rq *rq; 7262 struct rq *rq;
7139 7263
7140 read_lock_irqsave(&tasklist_lock, flags); 7264 read_lock(&tasklist_lock);
7141 do_each_thread(g, p) { 7265 for_each_process_thread(g, p) {
7142 /* 7266 /*
7143 * Only normalize user tasks: 7267 * Only normalize user tasks:
7144 */ 7268 */
7145 if (!p->mm) 7269 if (p->flags & PF_KTHREAD)
7146 continue; 7270 continue;
7147 7271
7148 p->se.exec_start = 0; 7272 p->se.exec_start = 0;
@@ -7157,21 +7281,16 @@ void normalize_rt_tasks(void)
7157 * Renice negative nice level userspace 7281 * Renice negative nice level userspace
7158 * tasks back to 0: 7282 * tasks back to 0:
7159 */ 7283 */
7160 if (task_nice(p) < 0 && p->mm) 7284 if (task_nice(p) < 0)
7161 set_user_nice(p, 0); 7285 set_user_nice(p, 0);
7162 continue; 7286 continue;
7163 } 7287 }
7164 7288
7165 raw_spin_lock(&p->pi_lock); 7289 rq = task_rq_lock(p, &flags);
7166 rq = __task_rq_lock(p);
7167
7168 normalize_task(rq, p); 7290 normalize_task(rq, p);
7169 7291 task_rq_unlock(rq, p, &flags);
7170 __task_rq_unlock(rq); 7292 }
7171 raw_spin_unlock(&p->pi_lock); 7293 read_unlock(&tasklist_lock);
7172 } while_each_thread(g, p);
7173
7174 read_unlock_irqrestore(&tasklist_lock, flags);
7175} 7294}
7176 7295
7177#endif /* CONFIG_MAGIC_SYSRQ */ 7296#endif /* CONFIG_MAGIC_SYSRQ */
@@ -7311,19 +7430,19 @@ void sched_offline_group(struct task_group *tg)
7311void sched_move_task(struct task_struct *tsk) 7430void sched_move_task(struct task_struct *tsk)
7312{ 7431{
7313 struct task_group *tg; 7432 struct task_group *tg;
7314 int on_rq, running; 7433 int queued, running;
7315 unsigned long flags; 7434 unsigned long flags;
7316 struct rq *rq; 7435 struct rq *rq;
7317 7436
7318 rq = task_rq_lock(tsk, &flags); 7437 rq = task_rq_lock(tsk, &flags);
7319 7438
7320 running = task_current(rq, tsk); 7439 running = task_current(rq, tsk);
7321 on_rq = tsk->on_rq; 7440 queued = task_on_rq_queued(tsk);
7322 7441
7323 if (on_rq) 7442 if (queued)
7324 dequeue_task(rq, tsk, 0); 7443 dequeue_task(rq, tsk, 0);
7325 if (unlikely(running)) 7444 if (unlikely(running))
7326 tsk->sched_class->put_prev_task(rq, tsk); 7445 put_prev_task(rq, tsk);
7327 7446
7328 tg = container_of(task_css_check(tsk, cpu_cgrp_id, 7447 tg = container_of(task_css_check(tsk, cpu_cgrp_id,
7329 lockdep_is_held(&tsk->sighand->siglock)), 7448 lockdep_is_held(&tsk->sighand->siglock)),
@@ -7333,14 +7452,14 @@ void sched_move_task(struct task_struct *tsk)
7333 7452
7334#ifdef CONFIG_FAIR_GROUP_SCHED 7453#ifdef CONFIG_FAIR_GROUP_SCHED
7335 if (tsk->sched_class->task_move_group) 7454 if (tsk->sched_class->task_move_group)
7336 tsk->sched_class->task_move_group(tsk, on_rq); 7455 tsk->sched_class->task_move_group(tsk, queued);
7337 else 7456 else
7338#endif 7457#endif
7339 set_task_rq(tsk, task_cpu(tsk)); 7458 set_task_rq(tsk, task_cpu(tsk));
7340 7459
7341 if (unlikely(running)) 7460 if (unlikely(running))
7342 tsk->sched_class->set_curr_task(rq); 7461 tsk->sched_class->set_curr_task(rq);
7343 if (on_rq) 7462 if (queued)
7344 enqueue_task(rq, tsk, 0); 7463 enqueue_task(rq, tsk, 0);
7345 7464
7346 task_rq_unlock(rq, tsk, &flags); 7465 task_rq_unlock(rq, tsk, &flags);
@@ -7358,10 +7477,10 @@ static inline int tg_has_rt_tasks(struct task_group *tg)
7358{ 7477{
7359 struct task_struct *g, *p; 7478 struct task_struct *g, *p;
7360 7479
7361 do_each_thread(g, p) { 7480 for_each_process_thread(g, p) {
7362 if (rt_task(p) && task_rq(p)->rt.tg == tg) 7481 if (rt_task(p) && task_group(p) == tg)
7363 return 1; 7482 return 1;
7364 } while_each_thread(g, p); 7483 }
7365 7484
7366 return 0; 7485 return 0;
7367} 7486}
@@ -7570,6 +7689,7 @@ static int sched_dl_global_constraints(void)
7570 u64 runtime = global_rt_runtime(); 7689 u64 runtime = global_rt_runtime();
7571 u64 period = global_rt_period(); 7690 u64 period = global_rt_period();
7572 u64 new_bw = to_ratio(period, runtime); 7691 u64 new_bw = to_ratio(period, runtime);
7692 struct dl_bw *dl_b;
7573 int cpu, ret = 0; 7693 int cpu, ret = 0;
7574 unsigned long flags; 7694 unsigned long flags;
7575 7695
@@ -7583,13 +7703,16 @@ static int sched_dl_global_constraints(void)
7583 * solutions is welcome! 7703 * solutions is welcome!
7584 */ 7704 */
7585 for_each_possible_cpu(cpu) { 7705 for_each_possible_cpu(cpu) {
7586 struct dl_bw *dl_b = dl_bw_of(cpu); 7706 rcu_read_lock_sched();
7707 dl_b = dl_bw_of(cpu);
7587 7708
7588 raw_spin_lock_irqsave(&dl_b->lock, flags); 7709 raw_spin_lock_irqsave(&dl_b->lock, flags);
7589 if (new_bw < dl_b->total_bw) 7710 if (new_bw < dl_b->total_bw)
7590 ret = -EBUSY; 7711 ret = -EBUSY;
7591 raw_spin_unlock_irqrestore(&dl_b->lock, flags); 7712 raw_spin_unlock_irqrestore(&dl_b->lock, flags);
7592 7713
7714 rcu_read_unlock_sched();
7715
7593 if (ret) 7716 if (ret)
7594 break; 7717 break;
7595 } 7718 }
@@ -7600,6 +7723,7 @@ static int sched_dl_global_constraints(void)
7600static void sched_dl_do_global(void) 7723static void sched_dl_do_global(void)
7601{ 7724{
7602 u64 new_bw = -1; 7725 u64 new_bw = -1;
7726 struct dl_bw *dl_b;
7603 int cpu; 7727 int cpu;
7604 unsigned long flags; 7728 unsigned long flags;
7605 7729
@@ -7613,11 +7737,14 @@ static void sched_dl_do_global(void)
7613 * FIXME: As above... 7737 * FIXME: As above...
7614 */ 7738 */
7615 for_each_possible_cpu(cpu) { 7739 for_each_possible_cpu(cpu) {
7616 struct dl_bw *dl_b = dl_bw_of(cpu); 7740 rcu_read_lock_sched();
7741 dl_b = dl_bw_of(cpu);
7617 7742
7618 raw_spin_lock_irqsave(&dl_b->lock, flags); 7743 raw_spin_lock_irqsave(&dl_b->lock, flags);
7619 dl_b->bw = new_bw; 7744 dl_b->bw = new_bw;
7620 raw_spin_unlock_irqrestore(&dl_b->lock, flags); 7745 raw_spin_unlock_irqrestore(&dl_b->lock, flags);
7746
7747 rcu_read_unlock_sched();
7621 } 7748 }
7622} 7749}
7623 7750
@@ -7747,6 +7874,11 @@ static void cpu_cgroup_css_offline(struct cgroup_subsys_state *css)
7747 sched_offline_group(tg); 7874 sched_offline_group(tg);
7748} 7875}
7749 7876
7877static void cpu_cgroup_fork(struct task_struct *task)
7878{
7879 sched_move_task(task);
7880}
7881
7750static int cpu_cgroup_can_attach(struct cgroup_subsys_state *css, 7882static int cpu_cgroup_can_attach(struct cgroup_subsys_state *css,
7751 struct cgroup_taskset *tset) 7883 struct cgroup_taskset *tset)
7752{ 7884{
@@ -7998,7 +8130,7 @@ static int tg_cfs_schedulable_down(struct task_group *tg, void *data)
7998 struct cfs_bandwidth *parent_b = &tg->parent->cfs_bandwidth; 8130 struct cfs_bandwidth *parent_b = &tg->parent->cfs_bandwidth;
7999 8131
8000 quota = normalize_cfs_quota(tg, d); 8132 quota = normalize_cfs_quota(tg, d);
8001 parent_quota = parent_b->hierarchal_quota; 8133 parent_quota = parent_b->hierarchical_quota;
8002 8134
8003 /* 8135 /*
8004 * ensure max(child_quota) <= parent_quota, inherit when no 8136 * ensure max(child_quota) <= parent_quota, inherit when no
@@ -8009,7 +8141,7 @@ static int tg_cfs_schedulable_down(struct task_group *tg, void *data)
8009 else if (parent_quota != RUNTIME_INF && quota > parent_quota) 8141 else if (parent_quota != RUNTIME_INF && quota > parent_quota)
8010 return -EINVAL; 8142 return -EINVAL;
8011 } 8143 }
8012 cfs_b->hierarchal_quota = quota; 8144 cfs_b->hierarchical_quota = quota;
8013 8145
8014 return 0; 8146 return 0;
8015} 8147}
@@ -8119,6 +8251,7 @@ struct cgroup_subsys cpu_cgrp_subsys = {
8119 .css_free = cpu_cgroup_css_free, 8251 .css_free = cpu_cgroup_css_free,
8120 .css_online = cpu_cgroup_css_online, 8252 .css_online = cpu_cgroup_css_online,
8121 .css_offline = cpu_cgroup_css_offline, 8253 .css_offline = cpu_cgroup_css_offline,
8254 .fork = cpu_cgroup_fork,
8122 .can_attach = cpu_cgroup_can_attach, 8255 .can_attach = cpu_cgroup_can_attach,
8123 .attach = cpu_cgroup_attach, 8256 .attach = cpu_cgroup_attach,
8124 .exit = cpu_cgroup_exit, 8257 .exit = cpu_cgroup_exit,