diff options
Diffstat (limited to 'kernel/sched/core.c')
| -rw-r--r-- | kernel/sched/core.c | 377 |
1 files changed, 255 insertions, 122 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 1211575a2208..240157c13ddc 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
| @@ -90,22 +90,6 @@ | |||
| 90 | #define CREATE_TRACE_POINTS | 90 | #define CREATE_TRACE_POINTS |
| 91 | #include <trace/events/sched.h> | 91 | #include <trace/events/sched.h> |
| 92 | 92 | ||
| 93 | #ifdef smp_mb__before_atomic | ||
| 94 | void __smp_mb__before_atomic(void) | ||
| 95 | { | ||
| 96 | smp_mb__before_atomic(); | ||
| 97 | } | ||
| 98 | EXPORT_SYMBOL(__smp_mb__before_atomic); | ||
| 99 | #endif | ||
| 100 | |||
| 101 | #ifdef smp_mb__after_atomic | ||
| 102 | void __smp_mb__after_atomic(void) | ||
| 103 | { | ||
| 104 | smp_mb__after_atomic(); | ||
| 105 | } | ||
| 106 | EXPORT_SYMBOL(__smp_mb__after_atomic); | ||
| 107 | #endif | ||
| 108 | |||
| 109 | void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period) | 93 | void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period) |
| 110 | { | 94 | { |
| 111 | unsigned long delta; | 95 | unsigned long delta; |
| @@ -333,9 +317,12 @@ static inline struct rq *__task_rq_lock(struct task_struct *p) | |||
| 333 | for (;;) { | 317 | for (;;) { |
| 334 | rq = task_rq(p); | 318 | rq = task_rq(p); |
| 335 | raw_spin_lock(&rq->lock); | 319 | raw_spin_lock(&rq->lock); |
| 336 | if (likely(rq == task_rq(p))) | 320 | if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) |
| 337 | return rq; | 321 | return rq; |
| 338 | raw_spin_unlock(&rq->lock); | 322 | raw_spin_unlock(&rq->lock); |
| 323 | |||
| 324 | while (unlikely(task_on_rq_migrating(p))) | ||
| 325 | cpu_relax(); | ||
| 339 | } | 326 | } |
| 340 | } | 327 | } |
| 341 | 328 | ||
| @@ -352,10 +339,13 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) | |||
| 352 | raw_spin_lock_irqsave(&p->pi_lock, *flags); | 339 | raw_spin_lock_irqsave(&p->pi_lock, *flags); |
| 353 | rq = task_rq(p); | 340 | rq = task_rq(p); |
| 354 | raw_spin_lock(&rq->lock); | 341 | raw_spin_lock(&rq->lock); |
| 355 | if (likely(rq == task_rq(p))) | 342 | if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) |
| 356 | return rq; | 343 | return rq; |
| 357 | raw_spin_unlock(&rq->lock); | 344 | raw_spin_unlock(&rq->lock); |
| 358 | raw_spin_unlock_irqrestore(&p->pi_lock, *flags); | 345 | raw_spin_unlock_irqrestore(&p->pi_lock, *flags); |
| 346 | |||
| 347 | while (unlikely(task_on_rq_migrating(p))) | ||
| 348 | cpu_relax(); | ||
| 359 | } | 349 | } |
| 360 | } | 350 | } |
| 361 | 351 | ||
| @@ -449,7 +439,15 @@ static void __hrtick_start(void *arg) | |||
| 449 | void hrtick_start(struct rq *rq, u64 delay) | 439 | void hrtick_start(struct rq *rq, u64 delay) |
| 450 | { | 440 | { |
| 451 | struct hrtimer *timer = &rq->hrtick_timer; | 441 | struct hrtimer *timer = &rq->hrtick_timer; |
| 452 | ktime_t time = ktime_add_ns(timer->base->get_time(), delay); | 442 | ktime_t time; |
| 443 | s64 delta; | ||
| 444 | |||
| 445 | /* | ||
| 446 | * Don't schedule slices shorter than 10000ns, that just | ||
| 447 | * doesn't make sense and can cause timer DoS. | ||
| 448 | */ | ||
| 449 | delta = max_t(s64, delay, 10000LL); | ||
| 450 | time = ktime_add_ns(timer->base->get_time(), delta); | ||
| 453 | 451 | ||
| 454 | hrtimer_set_expires(timer, time); | 452 | hrtimer_set_expires(timer, time); |
| 455 | 453 | ||
| @@ -1043,7 +1041,7 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) | |||
| 1043 | * A queue event has occurred, and we're going to schedule. In | 1041 | * A queue event has occurred, and we're going to schedule. In |
| 1044 | * this case, we can save a useless back to back clock update. | 1042 | * this case, we can save a useless back to back clock update. |
| 1045 | */ | 1043 | */ |
| 1046 | if (rq->curr->on_rq && test_tsk_need_resched(rq->curr)) | 1044 | if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr)) |
| 1047 | rq->skip_clock_update = 1; | 1045 | rq->skip_clock_update = 1; |
| 1048 | } | 1046 | } |
| 1049 | 1047 | ||
| @@ -1088,7 +1086,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | |||
| 1088 | 1086 | ||
| 1089 | static void __migrate_swap_task(struct task_struct *p, int cpu) | 1087 | static void __migrate_swap_task(struct task_struct *p, int cpu) |
| 1090 | { | 1088 | { |
| 1091 | if (p->on_rq) { | 1089 | if (task_on_rq_queued(p)) { |
| 1092 | struct rq *src_rq, *dst_rq; | 1090 | struct rq *src_rq, *dst_rq; |
| 1093 | 1091 | ||
| 1094 | src_rq = task_rq(p); | 1092 | src_rq = task_rq(p); |
| @@ -1214,7 +1212,7 @@ static int migration_cpu_stop(void *data); | |||
| 1214 | unsigned long wait_task_inactive(struct task_struct *p, long match_state) | 1212 | unsigned long wait_task_inactive(struct task_struct *p, long match_state) |
| 1215 | { | 1213 | { |
| 1216 | unsigned long flags; | 1214 | unsigned long flags; |
| 1217 | int running, on_rq; | 1215 | int running, queued; |
| 1218 | unsigned long ncsw; | 1216 | unsigned long ncsw; |
| 1219 | struct rq *rq; | 1217 | struct rq *rq; |
| 1220 | 1218 | ||
| @@ -1252,7 +1250,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state) | |||
| 1252 | rq = task_rq_lock(p, &flags); | 1250 | rq = task_rq_lock(p, &flags); |
| 1253 | trace_sched_wait_task(p); | 1251 | trace_sched_wait_task(p); |
| 1254 | running = task_running(rq, p); | 1252 | running = task_running(rq, p); |
| 1255 | on_rq = p->on_rq; | 1253 | queued = task_on_rq_queued(p); |
| 1256 | ncsw = 0; | 1254 | ncsw = 0; |
| 1257 | if (!match_state || p->state == match_state) | 1255 | if (!match_state || p->state == match_state) |
| 1258 | ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ | 1256 | ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ |
| @@ -1284,7 +1282,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state) | |||
| 1284 | * running right now), it's preempted, and we should | 1282 | * running right now), it's preempted, and we should |
| 1285 | * yield - it could be a while. | 1283 | * yield - it could be a while. |
| 1286 | */ | 1284 | */ |
| 1287 | if (unlikely(on_rq)) { | 1285 | if (unlikely(queued)) { |
| 1288 | ktime_t to = ktime_set(0, NSEC_PER_SEC/HZ); | 1286 | ktime_t to = ktime_set(0, NSEC_PER_SEC/HZ); |
| 1289 | 1287 | ||
| 1290 | set_current_state(TASK_UNINTERRUPTIBLE); | 1288 | set_current_state(TASK_UNINTERRUPTIBLE); |
| @@ -1478,7 +1476,7 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags) | |||
| 1478 | static void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags) | 1476 | static void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags) |
| 1479 | { | 1477 | { |
| 1480 | activate_task(rq, p, en_flags); | 1478 | activate_task(rq, p, en_flags); |
| 1481 | p->on_rq = 1; | 1479 | p->on_rq = TASK_ON_RQ_QUEUED; |
| 1482 | 1480 | ||
| 1483 | /* if a worker is waking up, notify workqueue */ | 1481 | /* if a worker is waking up, notify workqueue */ |
| 1484 | if (p->flags & PF_WQ_WORKER) | 1482 | if (p->flags & PF_WQ_WORKER) |
| @@ -1537,7 +1535,7 @@ static int ttwu_remote(struct task_struct *p, int wake_flags) | |||
| 1537 | int ret = 0; | 1535 | int ret = 0; |
| 1538 | 1536 | ||
| 1539 | rq = __task_rq_lock(p); | 1537 | rq = __task_rq_lock(p); |
| 1540 | if (p->on_rq) { | 1538 | if (task_on_rq_queued(p)) { |
| 1541 | /* check_preempt_curr() may use rq clock */ | 1539 | /* check_preempt_curr() may use rq clock */ |
| 1542 | update_rq_clock(rq); | 1540 | update_rq_clock(rq); |
| 1543 | ttwu_do_wakeup(rq, p, wake_flags); | 1541 | ttwu_do_wakeup(rq, p, wake_flags); |
| @@ -1620,6 +1618,25 @@ static void ttwu_queue_remote(struct task_struct *p, int cpu) | |||
| 1620 | } | 1618 | } |
| 1621 | } | 1619 | } |
| 1622 | 1620 | ||
| 1621 | void wake_up_if_idle(int cpu) | ||
| 1622 | { | ||
| 1623 | struct rq *rq = cpu_rq(cpu); | ||
| 1624 | unsigned long flags; | ||
| 1625 | |||
| 1626 | if (!is_idle_task(rq->curr)) | ||
| 1627 | return; | ||
| 1628 | |||
| 1629 | if (set_nr_if_polling(rq->idle)) { | ||
| 1630 | trace_sched_wake_idle_without_ipi(cpu); | ||
| 1631 | } else { | ||
| 1632 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
| 1633 | if (is_idle_task(rq->curr)) | ||
| 1634 | smp_send_reschedule(cpu); | ||
| 1635 | /* Else cpu is not in idle, do nothing here */ | ||
| 1636 | raw_spin_unlock_irqrestore(&rq->lock, flags); | ||
| 1637 | } | ||
| 1638 | } | ||
| 1639 | |||
| 1623 | bool cpus_share_cache(int this_cpu, int that_cpu) | 1640 | bool cpus_share_cache(int this_cpu, int that_cpu) |
| 1624 | { | 1641 | { |
| 1625 | return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu); | 1642 | return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu); |
| @@ -1742,7 +1759,7 @@ static void try_to_wake_up_local(struct task_struct *p) | |||
| 1742 | if (!(p->state & TASK_NORMAL)) | 1759 | if (!(p->state & TASK_NORMAL)) |
| 1743 | goto out; | 1760 | goto out; |
| 1744 | 1761 | ||
| 1745 | if (!p->on_rq) | 1762 | if (!task_on_rq_queued(p)) |
| 1746 | ttwu_activate(rq, p, ENQUEUE_WAKEUP); | 1763 | ttwu_activate(rq, p, ENQUEUE_WAKEUP); |
| 1747 | 1764 | ||
| 1748 | ttwu_do_wakeup(rq, p, 0); | 1765 | ttwu_do_wakeup(rq, p, 0); |
| @@ -1776,6 +1793,20 @@ int wake_up_state(struct task_struct *p, unsigned int state) | |||
| 1776 | } | 1793 | } |
| 1777 | 1794 | ||
| 1778 | /* | 1795 | /* |
| 1796 | * This function clears the sched_dl_entity static params. | ||
| 1797 | */ | ||
| 1798 | void __dl_clear_params(struct task_struct *p) | ||
| 1799 | { | ||
| 1800 | struct sched_dl_entity *dl_se = &p->dl; | ||
| 1801 | |||
| 1802 | dl_se->dl_runtime = 0; | ||
| 1803 | dl_se->dl_deadline = 0; | ||
| 1804 | dl_se->dl_period = 0; | ||
| 1805 | dl_se->flags = 0; | ||
| 1806 | dl_se->dl_bw = 0; | ||
| 1807 | } | ||
| 1808 | |||
| 1809 | /* | ||
| 1779 | * Perform scheduler related setup for a newly forked process p. | 1810 | * Perform scheduler related setup for a newly forked process p. |
| 1780 | * p is forked by current. | 1811 | * p is forked by current. |
| 1781 | * | 1812 | * |
| @@ -1799,10 +1830,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) | |||
| 1799 | 1830 | ||
| 1800 | RB_CLEAR_NODE(&p->dl.rb_node); | 1831 | RB_CLEAR_NODE(&p->dl.rb_node); |
| 1801 | hrtimer_init(&p->dl.dl_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 1832 | hrtimer_init(&p->dl.dl_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
| 1802 | p->dl.dl_runtime = p->dl.runtime = 0; | 1833 | __dl_clear_params(p); |
| 1803 | p->dl.dl_deadline = p->dl.deadline = 0; | ||
| 1804 | p->dl.dl_period = 0; | ||
| 1805 | p->dl.flags = 0; | ||
| 1806 | 1834 | ||
| 1807 | INIT_LIST_HEAD(&p->rt.run_list); | 1835 | INIT_LIST_HEAD(&p->rt.run_list); |
| 1808 | 1836 | ||
| @@ -1977,6 +2005,8 @@ unsigned long to_ratio(u64 period, u64 runtime) | |||
| 1977 | #ifdef CONFIG_SMP | 2005 | #ifdef CONFIG_SMP |
| 1978 | inline struct dl_bw *dl_bw_of(int i) | 2006 | inline struct dl_bw *dl_bw_of(int i) |
| 1979 | { | 2007 | { |
| 2008 | rcu_lockdep_assert(rcu_read_lock_sched_held(), | ||
| 2009 | "sched RCU must be held"); | ||
| 1980 | return &cpu_rq(i)->rd->dl_bw; | 2010 | return &cpu_rq(i)->rd->dl_bw; |
| 1981 | } | 2011 | } |
| 1982 | 2012 | ||
| @@ -1985,6 +2015,8 @@ static inline int dl_bw_cpus(int i) | |||
| 1985 | struct root_domain *rd = cpu_rq(i)->rd; | 2015 | struct root_domain *rd = cpu_rq(i)->rd; |
| 1986 | int cpus = 0; | 2016 | int cpus = 0; |
| 1987 | 2017 | ||
| 2018 | rcu_lockdep_assert(rcu_read_lock_sched_held(), | ||
| 2019 | "sched RCU must be held"); | ||
| 1988 | for_each_cpu_and(i, rd->span, cpu_active_mask) | 2020 | for_each_cpu_and(i, rd->span, cpu_active_mask) |
| 1989 | cpus++; | 2021 | cpus++; |
| 1990 | 2022 | ||
| @@ -2095,7 +2127,7 @@ void wake_up_new_task(struct task_struct *p) | |||
| 2095 | init_task_runnable_average(p); | 2127 | init_task_runnable_average(p); |
| 2096 | rq = __task_rq_lock(p); | 2128 | rq = __task_rq_lock(p); |
| 2097 | activate_task(rq, p, 0); | 2129 | activate_task(rq, p, 0); |
| 2098 | p->on_rq = 1; | 2130 | p->on_rq = TASK_ON_RQ_QUEUED; |
| 2099 | trace_sched_wakeup_new(p, true); | 2131 | trace_sched_wakeup_new(p, true); |
| 2100 | check_preempt_curr(rq, p, WF_FORK); | 2132 | check_preempt_curr(rq, p, WF_FORK); |
| 2101 | #ifdef CONFIG_SMP | 2133 | #ifdef CONFIG_SMP |
| @@ -2287,10 +2319,6 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev) | |||
| 2287 | */ | 2319 | */ |
| 2288 | post_schedule(rq); | 2320 | post_schedule(rq); |
| 2289 | 2321 | ||
| 2290 | #ifdef __ARCH_WANT_UNLOCKED_CTXSW | ||
| 2291 | /* In this case, finish_task_switch does not reenable preemption */ | ||
| 2292 | preempt_enable(); | ||
| 2293 | #endif | ||
| 2294 | if (current->set_child_tid) | 2322 | if (current->set_child_tid) |
| 2295 | put_user(task_pid_vnr(current), current->set_child_tid); | 2323 | put_user(task_pid_vnr(current), current->set_child_tid); |
| 2296 | } | 2324 | } |
| @@ -2333,9 +2361,7 @@ context_switch(struct rq *rq, struct task_struct *prev, | |||
| 2333 | * of the scheduler it's an obvious special-case), so we | 2361 | * of the scheduler it's an obvious special-case), so we |
| 2334 | * do an early lockdep release here: | 2362 | * do an early lockdep release here: |
| 2335 | */ | 2363 | */ |
| 2336 | #ifndef __ARCH_WANT_UNLOCKED_CTXSW | ||
| 2337 | spin_release(&rq->lock.dep_map, 1, _THIS_IP_); | 2364 | spin_release(&rq->lock.dep_map, 1, _THIS_IP_); |
| 2338 | #endif | ||
| 2339 | 2365 | ||
| 2340 | context_tracking_task_switch(prev, next); | 2366 | context_tracking_task_switch(prev, next); |
| 2341 | /* Here we just switch the register state and the stack. */ | 2367 | /* Here we just switch the register state and the stack. */ |
| @@ -2366,6 +2392,18 @@ unsigned long nr_running(void) | |||
| 2366 | return sum; | 2392 | return sum; |
| 2367 | } | 2393 | } |
| 2368 | 2394 | ||
| 2395 | /* | ||
| 2396 | * Check if only the current task is running on the cpu. | ||
| 2397 | */ | ||
| 2398 | bool single_task_running(void) | ||
| 2399 | { | ||
| 2400 | if (cpu_rq(smp_processor_id())->nr_running == 1) | ||
| 2401 | return true; | ||
| 2402 | else | ||
| 2403 | return false; | ||
| 2404 | } | ||
| 2405 | EXPORT_SYMBOL(single_task_running); | ||
| 2406 | |||
| 2369 | unsigned long long nr_context_switches(void) | 2407 | unsigned long long nr_context_switches(void) |
| 2370 | { | 2408 | { |
| 2371 | int i; | 2409 | int i; |
| @@ -2393,6 +2431,13 @@ unsigned long nr_iowait_cpu(int cpu) | |||
| 2393 | return atomic_read(&this->nr_iowait); | 2431 | return atomic_read(&this->nr_iowait); |
| 2394 | } | 2432 | } |
| 2395 | 2433 | ||
| 2434 | void get_iowait_load(unsigned long *nr_waiters, unsigned long *load) | ||
| 2435 | { | ||
| 2436 | struct rq *this = this_rq(); | ||
| 2437 | *nr_waiters = atomic_read(&this->nr_iowait); | ||
| 2438 | *load = this->cpu_load[0]; | ||
| 2439 | } | ||
| 2440 | |||
| 2396 | #ifdef CONFIG_SMP | 2441 | #ifdef CONFIG_SMP |
| 2397 | 2442 | ||
| 2398 | /* | 2443 | /* |
| @@ -2444,7 +2489,7 @@ static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq) | |||
| 2444 | * project cycles that may never be accounted to this | 2489 | * project cycles that may never be accounted to this |
| 2445 | * thread, breaking clock_gettime(). | 2490 | * thread, breaking clock_gettime(). |
| 2446 | */ | 2491 | */ |
| 2447 | if (task_current(rq, p) && p->on_rq) { | 2492 | if (task_current(rq, p) && task_on_rq_queued(p)) { |
| 2448 | update_rq_clock(rq); | 2493 | update_rq_clock(rq); |
| 2449 | ns = rq_clock_task(rq) - p->se.exec_start; | 2494 | ns = rq_clock_task(rq) - p->se.exec_start; |
| 2450 | if ((s64)ns < 0) | 2495 | if ((s64)ns < 0) |
| @@ -2490,7 +2535,7 @@ unsigned long long task_sched_runtime(struct task_struct *p) | |||
| 2490 | * If we see ->on_cpu without ->on_rq, the task is leaving, and has | 2535 | * If we see ->on_cpu without ->on_rq, the task is leaving, and has |
| 2491 | * been accounted, so we're correct here as well. | 2536 | * been accounted, so we're correct here as well. |
| 2492 | */ | 2537 | */ |
| 2493 | if (!p->on_cpu || !p->on_rq) | 2538 | if (!p->on_cpu || !task_on_rq_queued(p)) |
| 2494 | return p->se.sum_exec_runtime; | 2539 | return p->se.sum_exec_runtime; |
| 2495 | #endif | 2540 | #endif |
| 2496 | 2541 | ||
| @@ -2653,6 +2698,9 @@ static noinline void __schedule_bug(struct task_struct *prev) | |||
| 2653 | */ | 2698 | */ |
| 2654 | static inline void schedule_debug(struct task_struct *prev) | 2699 | static inline void schedule_debug(struct task_struct *prev) |
| 2655 | { | 2700 | { |
| 2701 | #ifdef CONFIG_SCHED_STACK_END_CHECK | ||
| 2702 | BUG_ON(unlikely(task_stack_end_corrupted(prev))); | ||
| 2703 | #endif | ||
| 2656 | /* | 2704 | /* |
| 2657 | * Test if we are atomic. Since do_exit() needs to call into | 2705 | * Test if we are atomic. Since do_exit() needs to call into |
| 2658 | * schedule() atomically, we ignore that path. Otherwise whine | 2706 | * schedule() atomically, we ignore that path. Otherwise whine |
| @@ -2794,7 +2842,7 @@ need_resched: | |||
| 2794 | switch_count = &prev->nvcsw; | 2842 | switch_count = &prev->nvcsw; |
| 2795 | } | 2843 | } |
| 2796 | 2844 | ||
| 2797 | if (prev->on_rq || rq->skip_clock_update < 0) | 2845 | if (task_on_rq_queued(prev) || rq->skip_clock_update < 0) |
| 2798 | update_rq_clock(rq); | 2846 | update_rq_clock(rq); |
| 2799 | 2847 | ||
| 2800 | next = pick_next_task(rq, prev); | 2848 | next = pick_next_task(rq, prev); |
| @@ -2903,6 +2951,47 @@ asmlinkage __visible void __sched notrace preempt_schedule(void) | |||
| 2903 | } | 2951 | } |
| 2904 | NOKPROBE_SYMBOL(preempt_schedule); | 2952 | NOKPROBE_SYMBOL(preempt_schedule); |
| 2905 | EXPORT_SYMBOL(preempt_schedule); | 2953 | EXPORT_SYMBOL(preempt_schedule); |
| 2954 | |||
| 2955 | #ifdef CONFIG_CONTEXT_TRACKING | ||
| 2956 | /** | ||
| 2957 | * preempt_schedule_context - preempt_schedule called by tracing | ||
| 2958 | * | ||
| 2959 | * The tracing infrastructure uses preempt_enable_notrace to prevent | ||
| 2960 | * recursion and tracing preempt enabling caused by the tracing | ||
| 2961 | * infrastructure itself. But as tracing can happen in areas coming | ||
| 2962 | * from userspace or just about to enter userspace, a preempt enable | ||
| 2963 | * can occur before user_exit() is called. This will cause the scheduler | ||
| 2964 | * to be called when the system is still in usermode. | ||
| 2965 | * | ||
| 2966 | * To prevent this, the preempt_enable_notrace will use this function | ||
| 2967 | * instead of preempt_schedule() to exit user context if needed before | ||
| 2968 | * calling the scheduler. | ||
| 2969 | */ | ||
| 2970 | asmlinkage __visible void __sched notrace preempt_schedule_context(void) | ||
| 2971 | { | ||
| 2972 | enum ctx_state prev_ctx; | ||
| 2973 | |||
| 2974 | if (likely(!preemptible())) | ||
| 2975 | return; | ||
| 2976 | |||
| 2977 | do { | ||
| 2978 | __preempt_count_add(PREEMPT_ACTIVE); | ||
| 2979 | /* | ||
| 2980 | * Needs preempt disabled in case user_exit() is traced | ||
| 2981 | * and the tracer calls preempt_enable_notrace() causing | ||
| 2982 | * an infinite recursion. | ||
| 2983 | */ | ||
| 2984 | prev_ctx = exception_enter(); | ||
| 2985 | __schedule(); | ||
| 2986 | exception_exit(prev_ctx); | ||
| 2987 | |||
| 2988 | __preempt_count_sub(PREEMPT_ACTIVE); | ||
| 2989 | barrier(); | ||
| 2990 | } while (need_resched()); | ||
| 2991 | } | ||
| 2992 | EXPORT_SYMBOL_GPL(preempt_schedule_context); | ||
| 2993 | #endif /* CONFIG_CONTEXT_TRACKING */ | ||
| 2994 | |||
| 2906 | #endif /* CONFIG_PREEMPT */ | 2995 | #endif /* CONFIG_PREEMPT */ |
| 2907 | 2996 | ||
| 2908 | /* | 2997 | /* |
| @@ -2959,7 +3048,7 @@ EXPORT_SYMBOL(default_wake_function); | |||
| 2959 | */ | 3048 | */ |
| 2960 | void rt_mutex_setprio(struct task_struct *p, int prio) | 3049 | void rt_mutex_setprio(struct task_struct *p, int prio) |
| 2961 | { | 3050 | { |
| 2962 | int oldprio, on_rq, running, enqueue_flag = 0; | 3051 | int oldprio, queued, running, enqueue_flag = 0; |
| 2963 | struct rq *rq; | 3052 | struct rq *rq; |
| 2964 | const struct sched_class *prev_class; | 3053 | const struct sched_class *prev_class; |
| 2965 | 3054 | ||
| @@ -2988,12 +3077,12 @@ void rt_mutex_setprio(struct task_struct *p, int prio) | |||
| 2988 | trace_sched_pi_setprio(p, prio); | 3077 | trace_sched_pi_setprio(p, prio); |
| 2989 | oldprio = p->prio; | 3078 | oldprio = p->prio; |
| 2990 | prev_class = p->sched_class; | 3079 | prev_class = p->sched_class; |
| 2991 | on_rq = p->on_rq; | 3080 | queued = task_on_rq_queued(p); |
| 2992 | running = task_current(rq, p); | 3081 | running = task_current(rq, p); |
| 2993 | if (on_rq) | 3082 | if (queued) |
| 2994 | dequeue_task(rq, p, 0); | 3083 | dequeue_task(rq, p, 0); |
| 2995 | if (running) | 3084 | if (running) |
| 2996 | p->sched_class->put_prev_task(rq, p); | 3085 | put_prev_task(rq, p); |
| 2997 | 3086 | ||
| 2998 | /* | 3087 | /* |
| 2999 | * Boosting condition are: | 3088 | * Boosting condition are: |
| @@ -3030,7 +3119,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio) | |||
| 3030 | 3119 | ||
| 3031 | if (running) | 3120 | if (running) |
| 3032 | p->sched_class->set_curr_task(rq); | 3121 | p->sched_class->set_curr_task(rq); |
| 3033 | if (on_rq) | 3122 | if (queued) |
| 3034 | enqueue_task(rq, p, enqueue_flag); | 3123 | enqueue_task(rq, p, enqueue_flag); |
| 3035 | 3124 | ||
| 3036 | check_class_changed(rq, p, prev_class, oldprio); | 3125 | check_class_changed(rq, p, prev_class, oldprio); |
| @@ -3041,7 +3130,7 @@ out_unlock: | |||
| 3041 | 3130 | ||
| 3042 | void set_user_nice(struct task_struct *p, long nice) | 3131 | void set_user_nice(struct task_struct *p, long nice) |
| 3043 | { | 3132 | { |
| 3044 | int old_prio, delta, on_rq; | 3133 | int old_prio, delta, queued; |
| 3045 | unsigned long flags; | 3134 | unsigned long flags; |
| 3046 | struct rq *rq; | 3135 | struct rq *rq; |
| 3047 | 3136 | ||
| @@ -3062,8 +3151,8 @@ void set_user_nice(struct task_struct *p, long nice) | |||
| 3062 | p->static_prio = NICE_TO_PRIO(nice); | 3151 | p->static_prio = NICE_TO_PRIO(nice); |
| 3063 | goto out_unlock; | 3152 | goto out_unlock; |
| 3064 | } | 3153 | } |
| 3065 | on_rq = p->on_rq; | 3154 | queued = task_on_rq_queued(p); |
| 3066 | if (on_rq) | 3155 | if (queued) |
| 3067 | dequeue_task(rq, p, 0); | 3156 | dequeue_task(rq, p, 0); |
| 3068 | 3157 | ||
| 3069 | p->static_prio = NICE_TO_PRIO(nice); | 3158 | p->static_prio = NICE_TO_PRIO(nice); |
| @@ -3072,7 +3161,7 @@ void set_user_nice(struct task_struct *p, long nice) | |||
| 3072 | p->prio = effective_prio(p); | 3161 | p->prio = effective_prio(p); |
| 3073 | delta = p->prio - old_prio; | 3162 | delta = p->prio - old_prio; |
| 3074 | 3163 | ||
| 3075 | if (on_rq) { | 3164 | if (queued) { |
| 3076 | enqueue_task(rq, p, 0); | 3165 | enqueue_task(rq, p, 0); |
| 3077 | /* | 3166 | /* |
| 3078 | * If the task increased its priority or is running and | 3167 | * If the task increased its priority or is running and |
| @@ -3344,7 +3433,7 @@ static int __sched_setscheduler(struct task_struct *p, | |||
| 3344 | { | 3433 | { |
| 3345 | int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 : | 3434 | int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 : |
| 3346 | MAX_RT_PRIO - 1 - attr->sched_priority; | 3435 | MAX_RT_PRIO - 1 - attr->sched_priority; |
| 3347 | int retval, oldprio, oldpolicy = -1, on_rq, running; | 3436 | int retval, oldprio, oldpolicy = -1, queued, running; |
| 3348 | int policy = attr->sched_policy; | 3437 | int policy = attr->sched_policy; |
| 3349 | unsigned long flags; | 3438 | unsigned long flags; |
| 3350 | const struct sched_class *prev_class; | 3439 | const struct sched_class *prev_class; |
| @@ -3541,19 +3630,19 @@ change: | |||
| 3541 | return 0; | 3630 | return 0; |
| 3542 | } | 3631 | } |
| 3543 | 3632 | ||
| 3544 | on_rq = p->on_rq; | 3633 | queued = task_on_rq_queued(p); |
| 3545 | running = task_current(rq, p); | 3634 | running = task_current(rq, p); |
| 3546 | if (on_rq) | 3635 | if (queued) |
| 3547 | dequeue_task(rq, p, 0); | 3636 | dequeue_task(rq, p, 0); |
| 3548 | if (running) | 3637 | if (running) |
| 3549 | p->sched_class->put_prev_task(rq, p); | 3638 | put_prev_task(rq, p); |
| 3550 | 3639 | ||
| 3551 | prev_class = p->sched_class; | 3640 | prev_class = p->sched_class; |
| 3552 | __setscheduler(rq, p, attr); | 3641 | __setscheduler(rq, p, attr); |
| 3553 | 3642 | ||
| 3554 | if (running) | 3643 | if (running) |
| 3555 | p->sched_class->set_curr_task(rq); | 3644 | p->sched_class->set_curr_task(rq); |
| 3556 | if (on_rq) { | 3645 | if (queued) { |
| 3557 | /* | 3646 | /* |
| 3558 | * We enqueue to tail when the priority of a task is | 3647 | * We enqueue to tail when the priority of a task is |
| 3559 | * increased (user space view). | 3648 | * increased (user space view). |
| @@ -3977,14 +4066,14 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) | |||
| 3977 | rcu_read_lock(); | 4066 | rcu_read_lock(); |
| 3978 | if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) { | 4067 | if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) { |
| 3979 | rcu_read_unlock(); | 4068 | rcu_read_unlock(); |
| 3980 | goto out_unlock; | 4069 | goto out_free_new_mask; |
| 3981 | } | 4070 | } |
| 3982 | rcu_read_unlock(); | 4071 | rcu_read_unlock(); |
| 3983 | } | 4072 | } |
| 3984 | 4073 | ||
| 3985 | retval = security_task_setscheduler(p); | 4074 | retval = security_task_setscheduler(p); |
| 3986 | if (retval) | 4075 | if (retval) |
| 3987 | goto out_unlock; | 4076 | goto out_free_new_mask; |
| 3988 | 4077 | ||
| 3989 | 4078 | ||
| 3990 | cpuset_cpus_allowed(p, cpus_allowed); | 4079 | cpuset_cpus_allowed(p, cpus_allowed); |
| @@ -3997,13 +4086,14 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) | |||
| 3997 | * root_domain. | 4086 | * root_domain. |
| 3998 | */ | 4087 | */ |
| 3999 | #ifdef CONFIG_SMP | 4088 | #ifdef CONFIG_SMP |
| 4000 | if (task_has_dl_policy(p)) { | 4089 | if (task_has_dl_policy(p) && dl_bandwidth_enabled()) { |
| 4001 | const struct cpumask *span = task_rq(p)->rd->span; | 4090 | rcu_read_lock(); |
| 4002 | 4091 | if (!cpumask_subset(task_rq(p)->rd->span, new_mask)) { | |
| 4003 | if (dl_bandwidth_enabled() && !cpumask_subset(span, new_mask)) { | ||
| 4004 | retval = -EBUSY; | 4092 | retval = -EBUSY; |
| 4005 | goto out_unlock; | 4093 | rcu_read_unlock(); |
| 4094 | goto out_free_new_mask; | ||
| 4006 | } | 4095 | } |
| 4096 | rcu_read_unlock(); | ||
| 4007 | } | 4097 | } |
| 4008 | #endif | 4098 | #endif |
| 4009 | again: | 4099 | again: |
| @@ -4021,7 +4111,7 @@ again: | |||
| 4021 | goto again; | 4111 | goto again; |
| 4022 | } | 4112 | } |
| 4023 | } | 4113 | } |
| 4024 | out_unlock: | 4114 | out_free_new_mask: |
| 4025 | free_cpumask_var(new_mask); | 4115 | free_cpumask_var(new_mask); |
| 4026 | out_free_cpus_allowed: | 4116 | out_free_cpus_allowed: |
| 4027 | free_cpumask_var(cpus_allowed); | 4117 | free_cpumask_var(cpus_allowed); |
| @@ -4505,7 +4595,7 @@ void show_state_filter(unsigned long state_filter) | |||
| 4505 | " task PC stack pid father\n"); | 4595 | " task PC stack pid father\n"); |
| 4506 | #endif | 4596 | #endif |
| 4507 | rcu_read_lock(); | 4597 | rcu_read_lock(); |
| 4508 | do_each_thread(g, p) { | 4598 | for_each_process_thread(g, p) { |
| 4509 | /* | 4599 | /* |
| 4510 | * reset the NMI-timeout, listing all files on a slow | 4600 | * reset the NMI-timeout, listing all files on a slow |
| 4511 | * console might take a lot of time: | 4601 | * console might take a lot of time: |
| @@ -4513,7 +4603,7 @@ void show_state_filter(unsigned long state_filter) | |||
| 4513 | touch_nmi_watchdog(); | 4603 | touch_nmi_watchdog(); |
| 4514 | if (!state_filter || (p->state & state_filter)) | 4604 | if (!state_filter || (p->state & state_filter)) |
| 4515 | sched_show_task(p); | 4605 | sched_show_task(p); |
| 4516 | } while_each_thread(g, p); | 4606 | } |
| 4517 | 4607 | ||
| 4518 | touch_all_softlockup_watchdogs(); | 4608 | touch_all_softlockup_watchdogs(); |
| 4519 | 4609 | ||
| @@ -4568,7 +4658,7 @@ void init_idle(struct task_struct *idle, int cpu) | |||
| 4568 | rcu_read_unlock(); | 4658 | rcu_read_unlock(); |
| 4569 | 4659 | ||
| 4570 | rq->curr = rq->idle = idle; | 4660 | rq->curr = rq->idle = idle; |
| 4571 | idle->on_rq = 1; | 4661 | idle->on_rq = TASK_ON_RQ_QUEUED; |
| 4572 | #if defined(CONFIG_SMP) | 4662 | #if defined(CONFIG_SMP) |
| 4573 | idle->on_cpu = 1; | 4663 | idle->on_cpu = 1; |
| 4574 | #endif | 4664 | #endif |
| @@ -4589,6 +4679,33 @@ void init_idle(struct task_struct *idle, int cpu) | |||
| 4589 | } | 4679 | } |
| 4590 | 4680 | ||
| 4591 | #ifdef CONFIG_SMP | 4681 | #ifdef CONFIG_SMP |
| 4682 | /* | ||
| 4683 | * move_queued_task - move a queued task to new rq. | ||
| 4684 | * | ||
| 4685 | * Returns (locked) new rq. Old rq's lock is released. | ||
| 4686 | */ | ||
| 4687 | static struct rq *move_queued_task(struct task_struct *p, int new_cpu) | ||
| 4688 | { | ||
| 4689 | struct rq *rq = task_rq(p); | ||
| 4690 | |||
| 4691 | lockdep_assert_held(&rq->lock); | ||
| 4692 | |||
| 4693 | dequeue_task(rq, p, 0); | ||
| 4694 | p->on_rq = TASK_ON_RQ_MIGRATING; | ||
| 4695 | set_task_cpu(p, new_cpu); | ||
| 4696 | raw_spin_unlock(&rq->lock); | ||
| 4697 | |||
| 4698 | rq = cpu_rq(new_cpu); | ||
| 4699 | |||
| 4700 | raw_spin_lock(&rq->lock); | ||
| 4701 | BUG_ON(task_cpu(p) != new_cpu); | ||
| 4702 | p->on_rq = TASK_ON_RQ_QUEUED; | ||
| 4703 | enqueue_task(rq, p, 0); | ||
| 4704 | check_preempt_curr(rq, p, 0); | ||
| 4705 | |||
| 4706 | return rq; | ||
| 4707 | } | ||
| 4708 | |||
| 4592 | void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) | 4709 | void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) |
| 4593 | { | 4710 | { |
| 4594 | if (p->sched_class && p->sched_class->set_cpus_allowed) | 4711 | if (p->sched_class && p->sched_class->set_cpus_allowed) |
| @@ -4645,14 +4762,15 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) | |||
| 4645 | goto out; | 4762 | goto out; |
| 4646 | 4763 | ||
| 4647 | dest_cpu = cpumask_any_and(cpu_active_mask, new_mask); | 4764 | dest_cpu = cpumask_any_and(cpu_active_mask, new_mask); |
| 4648 | if (p->on_rq) { | 4765 | if (task_running(rq, p) || p->state == TASK_WAKING) { |
| 4649 | struct migration_arg arg = { p, dest_cpu }; | 4766 | struct migration_arg arg = { p, dest_cpu }; |
| 4650 | /* Need help from migration thread: drop lock and wait. */ | 4767 | /* Need help from migration thread: drop lock and wait. */ |
| 4651 | task_rq_unlock(rq, p, &flags); | 4768 | task_rq_unlock(rq, p, &flags); |
| 4652 | stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); | 4769 | stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); |
| 4653 | tlb_migrate_finish(p->mm); | 4770 | tlb_migrate_finish(p->mm); |
| 4654 | return 0; | 4771 | return 0; |
| 4655 | } | 4772 | } else if (task_on_rq_queued(p)) |
| 4773 | rq = move_queued_task(p, dest_cpu); | ||
| 4656 | out: | 4774 | out: |
| 4657 | task_rq_unlock(rq, p, &flags); | 4775 | task_rq_unlock(rq, p, &flags); |
| 4658 | 4776 | ||
| @@ -4673,20 +4791,20 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); | |||
| 4673 | */ | 4791 | */ |
| 4674 | static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) | 4792 | static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) |
| 4675 | { | 4793 | { |
| 4676 | struct rq *rq_dest, *rq_src; | 4794 | struct rq *rq; |
| 4677 | int ret = 0; | 4795 | int ret = 0; |
| 4678 | 4796 | ||
| 4679 | if (unlikely(!cpu_active(dest_cpu))) | 4797 | if (unlikely(!cpu_active(dest_cpu))) |
| 4680 | return ret; | 4798 | return ret; |
| 4681 | 4799 | ||
| 4682 | rq_src = cpu_rq(src_cpu); | 4800 | rq = cpu_rq(src_cpu); |
| 4683 | rq_dest = cpu_rq(dest_cpu); | ||
| 4684 | 4801 | ||
| 4685 | raw_spin_lock(&p->pi_lock); | 4802 | raw_spin_lock(&p->pi_lock); |
| 4686 | double_rq_lock(rq_src, rq_dest); | 4803 | raw_spin_lock(&rq->lock); |
| 4687 | /* Already moved. */ | 4804 | /* Already moved. */ |
| 4688 | if (task_cpu(p) != src_cpu) | 4805 | if (task_cpu(p) != src_cpu) |
| 4689 | goto done; | 4806 | goto done; |
| 4807 | |||
| 4690 | /* Affinity changed (again). */ | 4808 | /* Affinity changed (again). */ |
| 4691 | if (!cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p))) | 4809 | if (!cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p))) |
| 4692 | goto fail; | 4810 | goto fail; |
| @@ -4695,16 +4813,12 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) | |||
| 4695 | * If we're not on a rq, the next wake-up will ensure we're | 4813 | * If we're not on a rq, the next wake-up will ensure we're |
| 4696 | * placed properly. | 4814 | * placed properly. |
| 4697 | */ | 4815 | */ |
| 4698 | if (p->on_rq) { | 4816 | if (task_on_rq_queued(p)) |
| 4699 | dequeue_task(rq_src, p, 0); | 4817 | rq = move_queued_task(p, dest_cpu); |
| 4700 | set_task_cpu(p, dest_cpu); | ||
| 4701 | enqueue_task(rq_dest, p, 0); | ||
| 4702 | check_preempt_curr(rq_dest, p, 0); | ||
| 4703 | } | ||
| 4704 | done: | 4818 | done: |
| 4705 | ret = 1; | 4819 | ret = 1; |
| 4706 | fail: | 4820 | fail: |
| 4707 | double_rq_unlock(rq_src, rq_dest); | 4821 | raw_spin_unlock(&rq->lock); |
| 4708 | raw_spin_unlock(&p->pi_lock); | 4822 | raw_spin_unlock(&p->pi_lock); |
| 4709 | return ret; | 4823 | return ret; |
| 4710 | } | 4824 | } |
| @@ -4736,22 +4850,22 @@ void sched_setnuma(struct task_struct *p, int nid) | |||
| 4736 | { | 4850 | { |
| 4737 | struct rq *rq; | 4851 | struct rq *rq; |
| 4738 | unsigned long flags; | 4852 | unsigned long flags; |
| 4739 | bool on_rq, running; | 4853 | bool queued, running; |
| 4740 | 4854 | ||
| 4741 | rq = task_rq_lock(p, &flags); | 4855 | rq = task_rq_lock(p, &flags); |
| 4742 | on_rq = p->on_rq; | 4856 | queued = task_on_rq_queued(p); |
| 4743 | running = task_current(rq, p); | 4857 | running = task_current(rq, p); |
| 4744 | 4858 | ||
| 4745 | if (on_rq) | 4859 | if (queued) |
| 4746 | dequeue_task(rq, p, 0); | 4860 | dequeue_task(rq, p, 0); |
| 4747 | if (running) | 4861 | if (running) |
| 4748 | p->sched_class->put_prev_task(rq, p); | 4862 | put_prev_task(rq, p); |
| 4749 | 4863 | ||
| 4750 | p->numa_preferred_nid = nid; | 4864 | p->numa_preferred_nid = nid; |
| 4751 | 4865 | ||
| 4752 | if (running) | 4866 | if (running) |
| 4753 | p->sched_class->set_curr_task(rq); | 4867 | p->sched_class->set_curr_task(rq); |
| 4754 | if (on_rq) | 4868 | if (queued) |
| 4755 | enqueue_task(rq, p, 0); | 4869 | enqueue_task(rq, p, 0); |
| 4756 | task_rq_unlock(rq, p, &flags); | 4870 | task_rq_unlock(rq, p, &flags); |
| 4757 | } | 4871 | } |
| @@ -4771,6 +4885,12 @@ static int migration_cpu_stop(void *data) | |||
| 4771 | * be on another cpu but it doesn't matter. | 4885 | * be on another cpu but it doesn't matter. |
| 4772 | */ | 4886 | */ |
| 4773 | local_irq_disable(); | 4887 | local_irq_disable(); |
| 4888 | /* | ||
| 4889 | * We need to explicitly wake pending tasks before running | ||
| 4890 | * __migrate_task() such that we will not miss enforcing cpus_allowed | ||
| 4891 | * during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test. | ||
| 4892 | */ | ||
| 4893 | sched_ttwu_pending(); | ||
| 4774 | __migrate_task(arg->task, raw_smp_processor_id(), arg->dest_cpu); | 4894 | __migrate_task(arg->task, raw_smp_processor_id(), arg->dest_cpu); |
| 4775 | local_irq_enable(); | 4895 | local_irq_enable(); |
| 4776 | return 0; | 4896 | return 0; |
| @@ -5181,6 +5301,7 @@ static int sched_cpu_inactive(struct notifier_block *nfb, | |||
| 5181 | { | 5301 | { |
| 5182 | unsigned long flags; | 5302 | unsigned long flags; |
| 5183 | long cpu = (long)hcpu; | 5303 | long cpu = (long)hcpu; |
| 5304 | struct dl_bw *dl_b; | ||
| 5184 | 5305 | ||
| 5185 | switch (action & ~CPU_TASKS_FROZEN) { | 5306 | switch (action & ~CPU_TASKS_FROZEN) { |
| 5186 | case CPU_DOWN_PREPARE: | 5307 | case CPU_DOWN_PREPARE: |
| @@ -5188,15 +5309,19 @@ static int sched_cpu_inactive(struct notifier_block *nfb, | |||
| 5188 | 5309 | ||
| 5189 | /* explicitly allow suspend */ | 5310 | /* explicitly allow suspend */ |
| 5190 | if (!(action & CPU_TASKS_FROZEN)) { | 5311 | if (!(action & CPU_TASKS_FROZEN)) { |
| 5191 | struct dl_bw *dl_b = dl_bw_of(cpu); | ||
| 5192 | bool overflow; | 5312 | bool overflow; |
| 5193 | int cpus; | 5313 | int cpus; |
| 5194 | 5314 | ||
| 5315 | rcu_read_lock_sched(); | ||
| 5316 | dl_b = dl_bw_of(cpu); | ||
| 5317 | |||
| 5195 | raw_spin_lock_irqsave(&dl_b->lock, flags); | 5318 | raw_spin_lock_irqsave(&dl_b->lock, flags); |
| 5196 | cpus = dl_bw_cpus(cpu); | 5319 | cpus = dl_bw_cpus(cpu); |
| 5197 | overflow = __dl_overflow(dl_b, cpus, 0, 0); | 5320 | overflow = __dl_overflow(dl_b, cpus, 0, 0); |
| 5198 | raw_spin_unlock_irqrestore(&dl_b->lock, flags); | 5321 | raw_spin_unlock_irqrestore(&dl_b->lock, flags); |
| 5199 | 5322 | ||
| 5323 | rcu_read_unlock_sched(); | ||
| 5324 | |||
| 5200 | if (overflow) | 5325 | if (overflow) |
| 5201 | return notifier_from_errno(-EBUSY); | 5326 | return notifier_from_errno(-EBUSY); |
| 5202 | } | 5327 | } |
| @@ -5739,7 +5864,7 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu) | |||
| 5739 | const struct cpumask *span = sched_domain_span(sd); | 5864 | const struct cpumask *span = sched_domain_span(sd); |
| 5740 | struct cpumask *covered = sched_domains_tmpmask; | 5865 | struct cpumask *covered = sched_domains_tmpmask; |
| 5741 | struct sd_data *sdd = sd->private; | 5866 | struct sd_data *sdd = sd->private; |
| 5742 | struct sched_domain *child; | 5867 | struct sched_domain *sibling; |
| 5743 | int i; | 5868 | int i; |
| 5744 | 5869 | ||
| 5745 | cpumask_clear(covered); | 5870 | cpumask_clear(covered); |
| @@ -5750,10 +5875,10 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu) | |||
| 5750 | if (cpumask_test_cpu(i, covered)) | 5875 | if (cpumask_test_cpu(i, covered)) |
| 5751 | continue; | 5876 | continue; |
| 5752 | 5877 | ||
| 5753 | child = *per_cpu_ptr(sdd->sd, i); | 5878 | sibling = *per_cpu_ptr(sdd->sd, i); |
| 5754 | 5879 | ||
| 5755 | /* See the comment near build_group_mask(). */ | 5880 | /* See the comment near build_group_mask(). */ |
| 5756 | if (!cpumask_test_cpu(i, sched_domain_span(child))) | 5881 | if (!cpumask_test_cpu(i, sched_domain_span(sibling))) |
| 5757 | continue; | 5882 | continue; |
| 5758 | 5883 | ||
| 5759 | sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(), | 5884 | sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(), |
| @@ -5763,10 +5888,9 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu) | |||
| 5763 | goto fail; | 5888 | goto fail; |
| 5764 | 5889 | ||
| 5765 | sg_span = sched_group_cpus(sg); | 5890 | sg_span = sched_group_cpus(sg); |
| 5766 | if (child->child) { | 5891 | if (sibling->child) |
| 5767 | child = child->child; | 5892 | cpumask_copy(sg_span, sched_domain_span(sibling->child)); |
| 5768 | cpumask_copy(sg_span, sched_domain_span(child)); | 5893 | else |
| 5769 | } else | ||
| 5770 | cpumask_set_cpu(i, sg_span); | 5894 | cpumask_set_cpu(i, sg_span); |
| 5771 | 5895 | ||
| 5772 | cpumask_or(covered, covered, sg_span); | 5896 | cpumask_or(covered, covered, sg_span); |
| @@ -7117,13 +7241,13 @@ static void normalize_task(struct rq *rq, struct task_struct *p) | |||
| 7117 | .sched_policy = SCHED_NORMAL, | 7241 | .sched_policy = SCHED_NORMAL, |
| 7118 | }; | 7242 | }; |
| 7119 | int old_prio = p->prio; | 7243 | int old_prio = p->prio; |
| 7120 | int on_rq; | 7244 | int queued; |
| 7121 | 7245 | ||
| 7122 | on_rq = p->on_rq; | 7246 | queued = task_on_rq_queued(p); |
| 7123 | if (on_rq) | 7247 | if (queued) |
| 7124 | dequeue_task(rq, p, 0); | 7248 | dequeue_task(rq, p, 0); |
| 7125 | __setscheduler(rq, p, &attr); | 7249 | __setscheduler(rq, p, &attr); |
| 7126 | if (on_rq) { | 7250 | if (queued) { |
| 7127 | enqueue_task(rq, p, 0); | 7251 | enqueue_task(rq, p, 0); |
| 7128 | resched_curr(rq); | 7252 | resched_curr(rq); |
| 7129 | } | 7253 | } |
| @@ -7137,12 +7261,12 @@ void normalize_rt_tasks(void) | |||
| 7137 | unsigned long flags; | 7261 | unsigned long flags; |
| 7138 | struct rq *rq; | 7262 | struct rq *rq; |
| 7139 | 7263 | ||
| 7140 | read_lock_irqsave(&tasklist_lock, flags); | 7264 | read_lock(&tasklist_lock); |
| 7141 | do_each_thread(g, p) { | 7265 | for_each_process_thread(g, p) { |
| 7142 | /* | 7266 | /* |
| 7143 | * Only normalize user tasks: | 7267 | * Only normalize user tasks: |
| 7144 | */ | 7268 | */ |
| 7145 | if (!p->mm) | 7269 | if (p->flags & PF_KTHREAD) |
| 7146 | continue; | 7270 | continue; |
| 7147 | 7271 | ||
| 7148 | p->se.exec_start = 0; | 7272 | p->se.exec_start = 0; |
| @@ -7157,21 +7281,16 @@ void normalize_rt_tasks(void) | |||
| 7157 | * Renice negative nice level userspace | 7281 | * Renice negative nice level userspace |
| 7158 | * tasks back to 0: | 7282 | * tasks back to 0: |
| 7159 | */ | 7283 | */ |
| 7160 | if (task_nice(p) < 0 && p->mm) | 7284 | if (task_nice(p) < 0) |
| 7161 | set_user_nice(p, 0); | 7285 | set_user_nice(p, 0); |
| 7162 | continue; | 7286 | continue; |
| 7163 | } | 7287 | } |
| 7164 | 7288 | ||
| 7165 | raw_spin_lock(&p->pi_lock); | 7289 | rq = task_rq_lock(p, &flags); |
| 7166 | rq = __task_rq_lock(p); | ||
| 7167 | |||
| 7168 | normalize_task(rq, p); | 7290 | normalize_task(rq, p); |
| 7169 | 7291 | task_rq_unlock(rq, p, &flags); | |
| 7170 | __task_rq_unlock(rq); | 7292 | } |
| 7171 | raw_spin_unlock(&p->pi_lock); | 7293 | read_unlock(&tasklist_lock); |
| 7172 | } while_each_thread(g, p); | ||
| 7173 | |||
| 7174 | read_unlock_irqrestore(&tasklist_lock, flags); | ||
| 7175 | } | 7294 | } |
| 7176 | 7295 | ||
| 7177 | #endif /* CONFIG_MAGIC_SYSRQ */ | 7296 | #endif /* CONFIG_MAGIC_SYSRQ */ |
| @@ -7311,19 +7430,19 @@ void sched_offline_group(struct task_group *tg) | |||
| 7311 | void sched_move_task(struct task_struct *tsk) | 7430 | void sched_move_task(struct task_struct *tsk) |
| 7312 | { | 7431 | { |
| 7313 | struct task_group *tg; | 7432 | struct task_group *tg; |
| 7314 | int on_rq, running; | 7433 | int queued, running; |
| 7315 | unsigned long flags; | 7434 | unsigned long flags; |
| 7316 | struct rq *rq; | 7435 | struct rq *rq; |
| 7317 | 7436 | ||
| 7318 | rq = task_rq_lock(tsk, &flags); | 7437 | rq = task_rq_lock(tsk, &flags); |
| 7319 | 7438 | ||
| 7320 | running = task_current(rq, tsk); | 7439 | running = task_current(rq, tsk); |
| 7321 | on_rq = tsk->on_rq; | 7440 | queued = task_on_rq_queued(tsk); |
| 7322 | 7441 | ||
| 7323 | if (on_rq) | 7442 | if (queued) |
| 7324 | dequeue_task(rq, tsk, 0); | 7443 | dequeue_task(rq, tsk, 0); |
| 7325 | if (unlikely(running)) | 7444 | if (unlikely(running)) |
| 7326 | tsk->sched_class->put_prev_task(rq, tsk); | 7445 | put_prev_task(rq, tsk); |
| 7327 | 7446 | ||
| 7328 | tg = container_of(task_css_check(tsk, cpu_cgrp_id, | 7447 | tg = container_of(task_css_check(tsk, cpu_cgrp_id, |
| 7329 | lockdep_is_held(&tsk->sighand->siglock)), | 7448 | lockdep_is_held(&tsk->sighand->siglock)), |
| @@ -7333,14 +7452,14 @@ void sched_move_task(struct task_struct *tsk) | |||
| 7333 | 7452 | ||
| 7334 | #ifdef CONFIG_FAIR_GROUP_SCHED | 7453 | #ifdef CONFIG_FAIR_GROUP_SCHED |
| 7335 | if (tsk->sched_class->task_move_group) | 7454 | if (tsk->sched_class->task_move_group) |
| 7336 | tsk->sched_class->task_move_group(tsk, on_rq); | 7455 | tsk->sched_class->task_move_group(tsk, queued); |
| 7337 | else | 7456 | else |
| 7338 | #endif | 7457 | #endif |
| 7339 | set_task_rq(tsk, task_cpu(tsk)); | 7458 | set_task_rq(tsk, task_cpu(tsk)); |
| 7340 | 7459 | ||
| 7341 | if (unlikely(running)) | 7460 | if (unlikely(running)) |
| 7342 | tsk->sched_class->set_curr_task(rq); | 7461 | tsk->sched_class->set_curr_task(rq); |
| 7343 | if (on_rq) | 7462 | if (queued) |
| 7344 | enqueue_task(rq, tsk, 0); | 7463 | enqueue_task(rq, tsk, 0); |
| 7345 | 7464 | ||
| 7346 | task_rq_unlock(rq, tsk, &flags); | 7465 | task_rq_unlock(rq, tsk, &flags); |
| @@ -7358,10 +7477,10 @@ static inline int tg_has_rt_tasks(struct task_group *tg) | |||
| 7358 | { | 7477 | { |
| 7359 | struct task_struct *g, *p; | 7478 | struct task_struct *g, *p; |
| 7360 | 7479 | ||
| 7361 | do_each_thread(g, p) { | 7480 | for_each_process_thread(g, p) { |
| 7362 | if (rt_task(p) && task_rq(p)->rt.tg == tg) | 7481 | if (rt_task(p) && task_group(p) == tg) |
| 7363 | return 1; | 7482 | return 1; |
| 7364 | } while_each_thread(g, p); | 7483 | } |
| 7365 | 7484 | ||
| 7366 | return 0; | 7485 | return 0; |
| 7367 | } | 7486 | } |
| @@ -7570,6 +7689,7 @@ static int sched_dl_global_constraints(void) | |||
| 7570 | u64 runtime = global_rt_runtime(); | 7689 | u64 runtime = global_rt_runtime(); |
| 7571 | u64 period = global_rt_period(); | 7690 | u64 period = global_rt_period(); |
| 7572 | u64 new_bw = to_ratio(period, runtime); | 7691 | u64 new_bw = to_ratio(period, runtime); |
| 7692 | struct dl_bw *dl_b; | ||
| 7573 | int cpu, ret = 0; | 7693 | int cpu, ret = 0; |
| 7574 | unsigned long flags; | 7694 | unsigned long flags; |
| 7575 | 7695 | ||
| @@ -7583,13 +7703,16 @@ static int sched_dl_global_constraints(void) | |||
| 7583 | * solutions is welcome! | 7703 | * solutions is welcome! |
| 7584 | */ | 7704 | */ |
| 7585 | for_each_possible_cpu(cpu) { | 7705 | for_each_possible_cpu(cpu) { |
| 7586 | struct dl_bw *dl_b = dl_bw_of(cpu); | 7706 | rcu_read_lock_sched(); |
| 7707 | dl_b = dl_bw_of(cpu); | ||
| 7587 | 7708 | ||
| 7588 | raw_spin_lock_irqsave(&dl_b->lock, flags); | 7709 | raw_spin_lock_irqsave(&dl_b->lock, flags); |
| 7589 | if (new_bw < dl_b->total_bw) | 7710 | if (new_bw < dl_b->total_bw) |
| 7590 | ret = -EBUSY; | 7711 | ret = -EBUSY; |
| 7591 | raw_spin_unlock_irqrestore(&dl_b->lock, flags); | 7712 | raw_spin_unlock_irqrestore(&dl_b->lock, flags); |
| 7592 | 7713 | ||
| 7714 | rcu_read_unlock_sched(); | ||
| 7715 | |||
| 7593 | if (ret) | 7716 | if (ret) |
| 7594 | break; | 7717 | break; |
| 7595 | } | 7718 | } |
| @@ -7600,6 +7723,7 @@ static int sched_dl_global_constraints(void) | |||
| 7600 | static void sched_dl_do_global(void) | 7723 | static void sched_dl_do_global(void) |
| 7601 | { | 7724 | { |
| 7602 | u64 new_bw = -1; | 7725 | u64 new_bw = -1; |
| 7726 | struct dl_bw *dl_b; | ||
| 7603 | int cpu; | 7727 | int cpu; |
| 7604 | unsigned long flags; | 7728 | unsigned long flags; |
| 7605 | 7729 | ||
| @@ -7613,11 +7737,14 @@ static void sched_dl_do_global(void) | |||
| 7613 | * FIXME: As above... | 7737 | * FIXME: As above... |
| 7614 | */ | 7738 | */ |
| 7615 | for_each_possible_cpu(cpu) { | 7739 | for_each_possible_cpu(cpu) { |
| 7616 | struct dl_bw *dl_b = dl_bw_of(cpu); | 7740 | rcu_read_lock_sched(); |
| 7741 | dl_b = dl_bw_of(cpu); | ||
| 7617 | 7742 | ||
| 7618 | raw_spin_lock_irqsave(&dl_b->lock, flags); | 7743 | raw_spin_lock_irqsave(&dl_b->lock, flags); |
| 7619 | dl_b->bw = new_bw; | 7744 | dl_b->bw = new_bw; |
| 7620 | raw_spin_unlock_irqrestore(&dl_b->lock, flags); | 7745 | raw_spin_unlock_irqrestore(&dl_b->lock, flags); |
| 7746 | |||
| 7747 | rcu_read_unlock_sched(); | ||
| 7621 | } | 7748 | } |
| 7622 | } | 7749 | } |
| 7623 | 7750 | ||
| @@ -7747,6 +7874,11 @@ static void cpu_cgroup_css_offline(struct cgroup_subsys_state *css) | |||
| 7747 | sched_offline_group(tg); | 7874 | sched_offline_group(tg); |
| 7748 | } | 7875 | } |
| 7749 | 7876 | ||
| 7877 | static void cpu_cgroup_fork(struct task_struct *task) | ||
| 7878 | { | ||
| 7879 | sched_move_task(task); | ||
| 7880 | } | ||
| 7881 | |||
| 7750 | static int cpu_cgroup_can_attach(struct cgroup_subsys_state *css, | 7882 | static int cpu_cgroup_can_attach(struct cgroup_subsys_state *css, |
| 7751 | struct cgroup_taskset *tset) | 7883 | struct cgroup_taskset *tset) |
| 7752 | { | 7884 | { |
| @@ -7998,7 +8130,7 @@ static int tg_cfs_schedulable_down(struct task_group *tg, void *data) | |||
| 7998 | struct cfs_bandwidth *parent_b = &tg->parent->cfs_bandwidth; | 8130 | struct cfs_bandwidth *parent_b = &tg->parent->cfs_bandwidth; |
| 7999 | 8131 | ||
| 8000 | quota = normalize_cfs_quota(tg, d); | 8132 | quota = normalize_cfs_quota(tg, d); |
| 8001 | parent_quota = parent_b->hierarchal_quota; | 8133 | parent_quota = parent_b->hierarchical_quota; |
| 8002 | 8134 | ||
| 8003 | /* | 8135 | /* |
| 8004 | * ensure max(child_quota) <= parent_quota, inherit when no | 8136 | * ensure max(child_quota) <= parent_quota, inherit when no |
| @@ -8009,7 +8141,7 @@ static int tg_cfs_schedulable_down(struct task_group *tg, void *data) | |||
| 8009 | else if (parent_quota != RUNTIME_INF && quota > parent_quota) | 8141 | else if (parent_quota != RUNTIME_INF && quota > parent_quota) |
| 8010 | return -EINVAL; | 8142 | return -EINVAL; |
| 8011 | } | 8143 | } |
| 8012 | cfs_b->hierarchal_quota = quota; | 8144 | cfs_b->hierarchical_quota = quota; |
| 8013 | 8145 | ||
| 8014 | return 0; | 8146 | return 0; |
| 8015 | } | 8147 | } |
| @@ -8119,6 +8251,7 @@ struct cgroup_subsys cpu_cgrp_subsys = { | |||
| 8119 | .css_free = cpu_cgroup_css_free, | 8251 | .css_free = cpu_cgroup_css_free, |
| 8120 | .css_online = cpu_cgroup_css_online, | 8252 | .css_online = cpu_cgroup_css_online, |
| 8121 | .css_offline = cpu_cgroup_css_offline, | 8253 | .css_offline = cpu_cgroup_css_offline, |
| 8254 | .fork = cpu_cgroup_fork, | ||
| 8122 | .can_attach = cpu_cgroup_can_attach, | 8255 | .can_attach = cpu_cgroup_can_attach, |
| 8123 | .attach = cpu_cgroup_attach, | 8256 | .attach = cpu_cgroup_attach, |
| 8124 | .exit = cpu_cgroup_exit, | 8257 | .exit = cpu_cgroup_exit, |
