aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/core.c')
-rw-r--r--kernel/sched/core.c231
1 files changed, 142 insertions, 89 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 6edbef296ece..3c4d096544ce 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -555,12 +555,15 @@ void resched_cpu(int cpu)
555 * selecting an idle cpu will add more delays to the timers than intended 555 * selecting an idle cpu will add more delays to the timers than intended
556 * (as that cpu's timer base may not be uptodate wrt jiffies etc). 556 * (as that cpu's timer base may not be uptodate wrt jiffies etc).
557 */ 557 */
558int get_nohz_timer_target(void) 558int get_nohz_timer_target(int pinned)
559{ 559{
560 int cpu = smp_processor_id(); 560 int cpu = smp_processor_id();
561 int i; 561 int i;
562 struct sched_domain *sd; 562 struct sched_domain *sd;
563 563
564 if (pinned || !get_sysctl_timer_migration() || !idle_cpu(cpu))
565 return cpu;
566
564 rcu_read_lock(); 567 rcu_read_lock();
565 for_each_domain(cpu, sd) { 568 for_each_domain(cpu, sd) {
566 for_each_cpu(i, sched_domain_span(sd)) { 569 for_each_cpu(i, sched_domain_span(sd)) {
@@ -823,19 +826,13 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
823#endif 826#endif
824#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING 827#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
825 if (static_key_false((&paravirt_steal_rq_enabled))) { 828 if (static_key_false((&paravirt_steal_rq_enabled))) {
826 u64 st;
827
828 steal = paravirt_steal_clock(cpu_of(rq)); 829 steal = paravirt_steal_clock(cpu_of(rq));
829 steal -= rq->prev_steal_time_rq; 830 steal -= rq->prev_steal_time_rq;
830 831
831 if (unlikely(steal > delta)) 832 if (unlikely(steal > delta))
832 steal = delta; 833 steal = delta;
833 834
834 st = steal_ticks(steal);
835 steal = st * TICK_NSEC;
836
837 rq->prev_steal_time_rq += steal; 835 rq->prev_steal_time_rq += steal;
838
839 delta -= steal; 836 delta -= steal;
840 } 837 }
841#endif 838#endif
@@ -1745,8 +1742,10 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
1745 p->numa_scan_seq = p->mm ? p->mm->numa_scan_seq : 0; 1742 p->numa_scan_seq = p->mm ? p->mm->numa_scan_seq : 0;
1746 p->numa_scan_period = sysctl_numa_balancing_scan_delay; 1743 p->numa_scan_period = sysctl_numa_balancing_scan_delay;
1747 p->numa_work.next = &p->numa_work; 1744 p->numa_work.next = &p->numa_work;
1748 p->numa_faults = NULL; 1745 p->numa_faults_memory = NULL;
1749 p->numa_faults_buffer = NULL; 1746 p->numa_faults_buffer_memory = NULL;
1747 p->last_task_numa_placement = 0;
1748 p->last_sum_exec_runtime = 0;
1750 1749
1751 INIT_LIST_HEAD(&p->numa_entry); 1750 INIT_LIST_HEAD(&p->numa_entry);
1752 p->numa_group = NULL; 1751 p->numa_group = NULL;
@@ -2149,8 +2148,6 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
2149 if (mm) 2148 if (mm)
2150 mmdrop(mm); 2149 mmdrop(mm);
2151 if (unlikely(prev_state == TASK_DEAD)) { 2150 if (unlikely(prev_state == TASK_DEAD)) {
2152 task_numa_free(prev);
2153
2154 if (prev->sched_class->task_dead) 2151 if (prev->sched_class->task_dead)
2155 prev->sched_class->task_dead(prev); 2152 prev->sched_class->task_dead(prev);
2156 2153
@@ -2167,13 +2164,6 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
2167 2164
2168#ifdef CONFIG_SMP 2165#ifdef CONFIG_SMP
2169 2166
2170/* assumes rq->lock is held */
2171static inline void pre_schedule(struct rq *rq, struct task_struct *prev)
2172{
2173 if (prev->sched_class->pre_schedule)
2174 prev->sched_class->pre_schedule(rq, prev);
2175}
2176
2177/* rq->lock is NOT held, but preemption is disabled */ 2167/* rq->lock is NOT held, but preemption is disabled */
2178static inline void post_schedule(struct rq *rq) 2168static inline void post_schedule(struct rq *rq)
2179{ 2169{
@@ -2191,10 +2181,6 @@ static inline void post_schedule(struct rq *rq)
2191 2181
2192#else 2182#else
2193 2183
2194static inline void pre_schedule(struct rq *rq, struct task_struct *p)
2195{
2196}
2197
2198static inline void post_schedule(struct rq *rq) 2184static inline void post_schedule(struct rq *rq)
2199{ 2185{
2200} 2186}
@@ -2510,8 +2496,13 @@ void __kprobes preempt_count_add(int val)
2510 DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= 2496 DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
2511 PREEMPT_MASK - 10); 2497 PREEMPT_MASK - 10);
2512#endif 2498#endif
2513 if (preempt_count() == val) 2499 if (preempt_count() == val) {
2514 trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1)); 2500 unsigned long ip = get_parent_ip(CALLER_ADDR1);
2501#ifdef CONFIG_DEBUG_PREEMPT
2502 current->preempt_disable_ip = ip;
2503#endif
2504 trace_preempt_off(CALLER_ADDR0, ip);
2505 }
2515} 2506}
2516EXPORT_SYMBOL(preempt_count_add); 2507EXPORT_SYMBOL(preempt_count_add);
2517 2508
@@ -2554,6 +2545,13 @@ static noinline void __schedule_bug(struct task_struct *prev)
2554 print_modules(); 2545 print_modules();
2555 if (irqs_disabled()) 2546 if (irqs_disabled())
2556 print_irqtrace_events(prev); 2547 print_irqtrace_events(prev);
2548#ifdef CONFIG_DEBUG_PREEMPT
2549 if (in_atomic_preempt_off()) {
2550 pr_err("Preemption disabled at:");
2551 print_ip_sym(current->preempt_disable_ip);
2552 pr_cont("\n");
2553 }
2554#endif
2557 dump_stack(); 2555 dump_stack();
2558 add_taint(TAINT_WARN, LOCKDEP_STILL_OK); 2556 add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
2559} 2557}
@@ -2577,36 +2575,34 @@ static inline void schedule_debug(struct task_struct *prev)
2577 schedstat_inc(this_rq(), sched_count); 2575 schedstat_inc(this_rq(), sched_count);
2578} 2576}
2579 2577
2580static void put_prev_task(struct rq *rq, struct task_struct *prev)
2581{
2582 if (prev->on_rq || rq->skip_clock_update < 0)
2583 update_rq_clock(rq);
2584 prev->sched_class->put_prev_task(rq, prev);
2585}
2586
2587/* 2578/*
2588 * Pick up the highest-prio task: 2579 * Pick up the highest-prio task:
2589 */ 2580 */
2590static inline struct task_struct * 2581static inline struct task_struct *
2591pick_next_task(struct rq *rq) 2582pick_next_task(struct rq *rq, struct task_struct *prev)
2592{ 2583{
2593 const struct sched_class *class; 2584 const struct sched_class *class = &fair_sched_class;
2594 struct task_struct *p; 2585 struct task_struct *p;
2595 2586
2596 /* 2587 /*
2597 * Optimization: we know that if all tasks are in 2588 * Optimization: we know that if all tasks are in
2598 * the fair class we can call that function directly: 2589 * the fair class we can call that function directly:
2599 */ 2590 */
2600 if (likely(rq->nr_running == rq->cfs.h_nr_running)) { 2591 if (likely(prev->sched_class == class &&
2601 p = fair_sched_class.pick_next_task(rq); 2592 rq->nr_running == rq->cfs.h_nr_running)) {
2602 if (likely(p)) 2593 p = fair_sched_class.pick_next_task(rq, prev);
2594 if (likely(p && p != RETRY_TASK))
2603 return p; 2595 return p;
2604 } 2596 }
2605 2597
2598again:
2606 for_each_class(class) { 2599 for_each_class(class) {
2607 p = class->pick_next_task(rq); 2600 p = class->pick_next_task(rq, prev);
2608 if (p) 2601 if (p) {
2602 if (unlikely(p == RETRY_TASK))
2603 goto again;
2609 return p; 2604 return p;
2605 }
2610 } 2606 }
2611 2607
2612 BUG(); /* the idle class will always have a runnable task */ 2608 BUG(); /* the idle class will always have a runnable task */
@@ -2700,13 +2696,10 @@ need_resched:
2700 switch_count = &prev->nvcsw; 2696 switch_count = &prev->nvcsw;
2701 } 2697 }
2702 2698
2703 pre_schedule(rq, prev); 2699 if (prev->on_rq || rq->skip_clock_update < 0)
2704 2700 update_rq_clock(rq);
2705 if (unlikely(!rq->nr_running))
2706 idle_balance(cpu, rq);
2707 2701
2708 put_prev_task(rq, prev); 2702 next = pick_next_task(rq, prev);
2709 next = pick_next_task(rq);
2710 clear_tsk_need_resched(prev); 2703 clear_tsk_need_resched(prev);
2711 clear_preempt_need_resched(); 2704 clear_preempt_need_resched();
2712 rq->skip_clock_update = 0; 2705 rq->skip_clock_update = 0;
@@ -2908,7 +2901,8 @@ EXPORT_SYMBOL(sleep_on_timeout);
2908 * This function changes the 'effective' priority of a task. It does 2901 * This function changes the 'effective' priority of a task. It does
2909 * not touch ->normal_prio like __setscheduler(). 2902 * not touch ->normal_prio like __setscheduler().
2910 * 2903 *
2911 * Used by the rt_mutex code to implement priority inheritance logic. 2904 * Used by the rt_mutex code to implement priority inheritance
2905 * logic. Call site only calls if the priority of the task changed.
2912 */ 2906 */
2913void rt_mutex_setprio(struct task_struct *p, int prio) 2907void rt_mutex_setprio(struct task_struct *p, int prio)
2914{ 2908{
@@ -2998,7 +2992,7 @@ void set_user_nice(struct task_struct *p, long nice)
2998 unsigned long flags; 2992 unsigned long flags;
2999 struct rq *rq; 2993 struct rq *rq;
3000 2994
3001 if (TASK_NICE(p) == nice || nice < -20 || nice > 19) 2995 if (task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE)
3002 return; 2996 return;
3003 /* 2997 /*
3004 * We have to be careful, if called from sys_setpriority(), 2998 * We have to be careful, if called from sys_setpriority(),
@@ -3076,11 +3070,11 @@ SYSCALL_DEFINE1(nice, int, increment)
3076 if (increment > 40) 3070 if (increment > 40)
3077 increment = 40; 3071 increment = 40;
3078 3072
3079 nice = TASK_NICE(current) + increment; 3073 nice = task_nice(current) + increment;
3080 if (nice < -20) 3074 if (nice < MIN_NICE)
3081 nice = -20; 3075 nice = MIN_NICE;
3082 if (nice > 19) 3076 if (nice > MAX_NICE)
3083 nice = 19; 3077 nice = MAX_NICE;
3084 3078
3085 if (increment < 0 && !can_nice(current, nice)) 3079 if (increment < 0 && !can_nice(current, nice))
3086 return -EPERM; 3080 return -EPERM;
@@ -3109,18 +3103,6 @@ int task_prio(const struct task_struct *p)
3109} 3103}
3110 3104
3111/** 3105/**
3112 * task_nice - return the nice value of a given task.
3113 * @p: the task in question.
3114 *
3115 * Return: The nice value [ -20 ... 0 ... 19 ].
3116 */
3117int task_nice(const struct task_struct *p)
3118{
3119 return TASK_NICE(p);
3120}
3121EXPORT_SYMBOL(task_nice);
3122
3123/**
3124 * idle_cpu - is a given cpu idle currently? 3106 * idle_cpu - is a given cpu idle currently?
3125 * @cpu: the processor in question. 3107 * @cpu: the processor in question.
3126 * 3108 *
@@ -3189,9 +3171,8 @@ __setparam_dl(struct task_struct *p, const struct sched_attr *attr)
3189 dl_se->dl_new = 1; 3171 dl_se->dl_new = 1;
3190} 3172}
3191 3173
3192/* Actually do priority change: must hold pi & rq lock. */ 3174static void __setscheduler_params(struct task_struct *p,
3193static void __setscheduler(struct rq *rq, struct task_struct *p, 3175 const struct sched_attr *attr)
3194 const struct sched_attr *attr)
3195{ 3176{
3196 int policy = attr->sched_policy; 3177 int policy = attr->sched_policy;
3197 3178
@@ -3211,9 +3192,21 @@ static void __setscheduler(struct rq *rq, struct task_struct *p,
3211 * getparam()/getattr() don't report silly values for !rt tasks. 3192 * getparam()/getattr() don't report silly values for !rt tasks.
3212 */ 3193 */
3213 p->rt_priority = attr->sched_priority; 3194 p->rt_priority = attr->sched_priority;
3214
3215 p->normal_prio = normal_prio(p); 3195 p->normal_prio = normal_prio(p);
3216 p->prio = rt_mutex_getprio(p); 3196 set_load_weight(p);
3197}
3198
3199/* Actually do priority change: must hold pi & rq lock. */
3200static void __setscheduler(struct rq *rq, struct task_struct *p,
3201 const struct sched_attr *attr)
3202{
3203 __setscheduler_params(p, attr);
3204
3205 /*
3206 * If we get here, there was no pi waiters boosting the
3207 * task. It is safe to use the normal prio.
3208 */
3209 p->prio = normal_prio(p);
3217 3210
3218 if (dl_prio(p->prio)) 3211 if (dl_prio(p->prio))
3219 p->sched_class = &dl_sched_class; 3212 p->sched_class = &dl_sched_class;
@@ -3221,8 +3214,6 @@ static void __setscheduler(struct rq *rq, struct task_struct *p,
3221 p->sched_class = &rt_sched_class; 3214 p->sched_class = &rt_sched_class;
3222 else 3215 else
3223 p->sched_class = &fair_sched_class; 3216 p->sched_class = &fair_sched_class;
3224
3225 set_load_weight(p);
3226} 3217}
3227 3218
3228static void 3219static void
@@ -3275,6 +3266,8 @@ static int __sched_setscheduler(struct task_struct *p,
3275 const struct sched_attr *attr, 3266 const struct sched_attr *attr,
3276 bool user) 3267 bool user)
3277{ 3268{
3269 int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 :
3270 MAX_RT_PRIO - 1 - attr->sched_priority;
3278 int retval, oldprio, oldpolicy = -1, on_rq, running; 3271 int retval, oldprio, oldpolicy = -1, on_rq, running;
3279 int policy = attr->sched_policy; 3272 int policy = attr->sched_policy;
3280 unsigned long flags; 3273 unsigned long flags;
@@ -3319,7 +3312,7 @@ recheck:
3319 */ 3312 */
3320 if (user && !capable(CAP_SYS_NICE)) { 3313 if (user && !capable(CAP_SYS_NICE)) {
3321 if (fair_policy(policy)) { 3314 if (fair_policy(policy)) {
3322 if (attr->sched_nice < TASK_NICE(p) && 3315 if (attr->sched_nice < task_nice(p) &&
3323 !can_nice(p, attr->sched_nice)) 3316 !can_nice(p, attr->sched_nice))
3324 return -EPERM; 3317 return -EPERM;
3325 } 3318 }
@@ -3338,12 +3331,21 @@ recheck:
3338 return -EPERM; 3331 return -EPERM;
3339 } 3332 }
3340 3333
3334 /*
3335 * Can't set/change SCHED_DEADLINE policy at all for now
3336 * (safest behavior); in the future we would like to allow
3337 * unprivileged DL tasks to increase their relative deadline
3338 * or reduce their runtime (both ways reducing utilization)
3339 */
3340 if (dl_policy(policy))
3341 return -EPERM;
3342
3341 /* 3343 /*
3342 * Treat SCHED_IDLE as nice 20. Only allow a switch to 3344 * Treat SCHED_IDLE as nice 20. Only allow a switch to
3343 * SCHED_NORMAL if the RLIMIT_NICE would normally permit it. 3345 * SCHED_NORMAL if the RLIMIT_NICE would normally permit it.
3344 */ 3346 */
3345 if (p->policy == SCHED_IDLE && policy != SCHED_IDLE) { 3347 if (p->policy == SCHED_IDLE && policy != SCHED_IDLE) {
3346 if (!can_nice(p, TASK_NICE(p))) 3348 if (!can_nice(p, task_nice(p)))
3347 return -EPERM; 3349 return -EPERM;
3348 } 3350 }
3349 3351
@@ -3380,16 +3382,18 @@ recheck:
3380 } 3382 }
3381 3383
3382 /* 3384 /*
3383 * If not changing anything there's no need to proceed further: 3385 * If not changing anything there's no need to proceed further,
3386 * but store a possible modification of reset_on_fork.
3384 */ 3387 */
3385 if (unlikely(policy == p->policy)) { 3388 if (unlikely(policy == p->policy)) {
3386 if (fair_policy(policy) && attr->sched_nice != TASK_NICE(p)) 3389 if (fair_policy(policy) && attr->sched_nice != task_nice(p))
3387 goto change; 3390 goto change;
3388 if (rt_policy(policy) && attr->sched_priority != p->rt_priority) 3391 if (rt_policy(policy) && attr->sched_priority != p->rt_priority)
3389 goto change; 3392 goto change;
3390 if (dl_policy(policy)) 3393 if (dl_policy(policy))
3391 goto change; 3394 goto change;
3392 3395
3396 p->sched_reset_on_fork = reset_on_fork;
3393 task_rq_unlock(rq, p, &flags); 3397 task_rq_unlock(rq, p, &flags);
3394 return 0; 3398 return 0;
3395 } 3399 }
@@ -3443,6 +3447,24 @@ change:
3443 return -EBUSY; 3447 return -EBUSY;
3444 } 3448 }
3445 3449
3450 p->sched_reset_on_fork = reset_on_fork;
3451 oldprio = p->prio;
3452
3453 /*
3454 * Special case for priority boosted tasks.
3455 *
3456 * If the new priority is lower or equal (user space view)
3457 * than the current (boosted) priority, we just store the new
3458 * normal parameters and do not touch the scheduler class and
3459 * the runqueue. This will be done when the task deboost
3460 * itself.
3461 */
3462 if (rt_mutex_check_prio(p, newprio)) {
3463 __setscheduler_params(p, attr);
3464 task_rq_unlock(rq, p, &flags);
3465 return 0;
3466 }
3467
3446 on_rq = p->on_rq; 3468 on_rq = p->on_rq;
3447 running = task_current(rq, p); 3469 running = task_current(rq, p);
3448 if (on_rq) 3470 if (on_rq)
@@ -3450,16 +3472,18 @@ change:
3450 if (running) 3472 if (running)
3451 p->sched_class->put_prev_task(rq, p); 3473 p->sched_class->put_prev_task(rq, p);
3452 3474
3453 p->sched_reset_on_fork = reset_on_fork;
3454
3455 oldprio = p->prio;
3456 prev_class = p->sched_class; 3475 prev_class = p->sched_class;
3457 __setscheduler(rq, p, attr); 3476 __setscheduler(rq, p, attr);
3458 3477
3459 if (running) 3478 if (running)
3460 p->sched_class->set_curr_task(rq); 3479 p->sched_class->set_curr_task(rq);
3461 if (on_rq) 3480 if (on_rq) {
3462 enqueue_task(rq, p, 0); 3481 /*
3482 * We enqueue to tail when the priority of a task is
3483 * increased (user space view).
3484 */
3485 enqueue_task(rq, p, oldprio <= p->prio ? ENQUEUE_HEAD : 0);
3486 }
3463 3487
3464 check_class_changed(rq, p, prev_class, oldprio); 3488 check_class_changed(rq, p, prev_class, oldprio);
3465 task_rq_unlock(rq, p, &flags); 3489 task_rq_unlock(rq, p, &flags);
@@ -3615,7 +3639,7 @@ static int sched_copy_attr(struct sched_attr __user *uattr,
3615 * XXX: do we want to be lenient like existing syscalls; or do we want 3639 * XXX: do we want to be lenient like existing syscalls; or do we want
3616 * to be strict and return an error on out-of-bounds values? 3640 * to be strict and return an error on out-of-bounds values?
3617 */ 3641 */
3618 attr->sched_nice = clamp(attr->sched_nice, -20, 19); 3642 attr->sched_nice = clamp(attr->sched_nice, MIN_NICE, MAX_NICE);
3619 3643
3620out: 3644out:
3621 return ret; 3645 return ret;
@@ -3836,7 +3860,7 @@ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
3836 else if (task_has_rt_policy(p)) 3860 else if (task_has_rt_policy(p))
3837 attr.sched_priority = p->rt_priority; 3861 attr.sched_priority = p->rt_priority;
3838 else 3862 else
3839 attr.sched_nice = TASK_NICE(p); 3863 attr.sched_nice = task_nice(p);
3840 3864
3841 rcu_read_unlock(); 3865 rcu_read_unlock();
3842 3866
@@ -4474,6 +4498,7 @@ void init_idle(struct task_struct *idle, int cpu)
4474 rcu_read_unlock(); 4498 rcu_read_unlock();
4475 4499
4476 rq->curr = rq->idle = idle; 4500 rq->curr = rq->idle = idle;
4501 idle->on_rq = 1;
4477#if defined(CONFIG_SMP) 4502#if defined(CONFIG_SMP)
4478 idle->on_cpu = 1; 4503 idle->on_cpu = 1;
4479#endif 4504#endif
@@ -4693,8 +4718,10 @@ void idle_task_exit(void)
4693 4718
4694 BUG_ON(cpu_online(smp_processor_id())); 4719 BUG_ON(cpu_online(smp_processor_id()));
4695 4720
4696 if (mm != &init_mm) 4721 if (mm != &init_mm) {
4697 switch_mm(mm, &init_mm, current); 4722 switch_mm(mm, &init_mm, current);
4723 finish_arch_post_lock_switch();
4724 }
4698 mmdrop(mm); 4725 mmdrop(mm);
4699} 4726}
4700 4727
@@ -4712,6 +4739,22 @@ static void calc_load_migrate(struct rq *rq)
4712 atomic_long_add(delta, &calc_load_tasks); 4739 atomic_long_add(delta, &calc_load_tasks);
4713} 4740}
4714 4741
4742static void put_prev_task_fake(struct rq *rq, struct task_struct *prev)
4743{
4744}
4745
4746static const struct sched_class fake_sched_class = {
4747 .put_prev_task = put_prev_task_fake,
4748};
4749
4750static struct task_struct fake_task = {
4751 /*
4752 * Avoid pull_{rt,dl}_task()
4753 */
4754 .prio = MAX_PRIO + 1,
4755 .sched_class = &fake_sched_class,
4756};
4757
4715/* 4758/*
4716 * Migrate all tasks from the rq, sleeping tasks will be migrated by 4759 * Migrate all tasks from the rq, sleeping tasks will be migrated by
4717 * try_to_wake_up()->select_task_rq(). 4760 * try_to_wake_up()->select_task_rq().
@@ -4752,7 +4795,7 @@ static void migrate_tasks(unsigned int dead_cpu)
4752 if (rq->nr_running == 1) 4795 if (rq->nr_running == 1)
4753 break; 4796 break;
4754 4797
4755 next = pick_next_task(rq); 4798 next = pick_next_task(rq, &fake_task);
4756 BUG_ON(!next); 4799 BUG_ON(!next);
4757 next->sched_class->put_prev_task(rq, next); 4800 next->sched_class->put_prev_task(rq, next);
4758 4801
@@ -4842,7 +4885,7 @@ set_table_entry(struct ctl_table *entry,
4842static struct ctl_table * 4885static struct ctl_table *
4843sd_alloc_ctl_domain_table(struct sched_domain *sd) 4886sd_alloc_ctl_domain_table(struct sched_domain *sd)
4844{ 4887{
4845 struct ctl_table *table = sd_alloc_ctl_entry(13); 4888 struct ctl_table *table = sd_alloc_ctl_entry(14);
4846 4889
4847 if (table == NULL) 4890 if (table == NULL)
4848 return NULL; 4891 return NULL;
@@ -4870,9 +4913,12 @@ sd_alloc_ctl_domain_table(struct sched_domain *sd)
4870 sizeof(int), 0644, proc_dointvec_minmax, false); 4913 sizeof(int), 0644, proc_dointvec_minmax, false);
4871 set_table_entry(&table[10], "flags", &sd->flags, 4914 set_table_entry(&table[10], "flags", &sd->flags,
4872 sizeof(int), 0644, proc_dointvec_minmax, false); 4915 sizeof(int), 0644, proc_dointvec_minmax, false);
4873 set_table_entry(&table[11], "name", sd->name, 4916 set_table_entry(&table[11], "max_newidle_lb_cost",
4917 &sd->max_newidle_lb_cost,
4918 sizeof(long), 0644, proc_doulongvec_minmax, false);
4919 set_table_entry(&table[12], "name", sd->name,
4874 CORENAME_MAX_SIZE, 0444, proc_dostring, false); 4920 CORENAME_MAX_SIZE, 0444, proc_dostring, false);
4875 /* &table[12] is terminator */ 4921 /* &table[13] is terminator */
4876 4922
4877 return table; 4923 return table;
4878} 4924}
@@ -6849,7 +6895,6 @@ void __init sched_init(void)
6849 6895
6850 rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime; 6896 rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime;
6851#ifdef CONFIG_RT_GROUP_SCHED 6897#ifdef CONFIG_RT_GROUP_SCHED
6852 INIT_LIST_HEAD(&rq->leaf_rt_rq_list);
6853 init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL); 6898 init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL);
6854#endif 6899#endif
6855 6900
@@ -6938,7 +6983,8 @@ void __might_sleep(const char *file, int line, int preempt_offset)
6938 static unsigned long prev_jiffy; /* ratelimiting */ 6983 static unsigned long prev_jiffy; /* ratelimiting */
6939 6984
6940 rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */ 6985 rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */
6941 if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) || 6986 if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
6987 !is_idle_task(current)) ||
6942 system_state != SYSTEM_RUNNING || oops_in_progress) 6988 system_state != SYSTEM_RUNNING || oops_in_progress)
6943 return; 6989 return;
6944 if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) 6990 if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
@@ -6956,6 +7002,13 @@ void __might_sleep(const char *file, int line, int preempt_offset)
6956 debug_show_held_locks(current); 7002 debug_show_held_locks(current);
6957 if (irqs_disabled()) 7003 if (irqs_disabled())
6958 print_irqtrace_events(current); 7004 print_irqtrace_events(current);
7005#ifdef CONFIG_DEBUG_PREEMPT
7006 if (!preempt_count_equals(preempt_offset)) {
7007 pr_err("Preemption disabled at:");
7008 print_ip_sym(current->preempt_disable_ip);
7009 pr_cont("\n");
7010 }
7011#endif
6959 dump_stack(); 7012 dump_stack();
6960} 7013}
6961EXPORT_SYMBOL(__might_sleep); 7014EXPORT_SYMBOL(__might_sleep);
@@ -7009,7 +7062,7 @@ void normalize_rt_tasks(void)
7009 * Renice negative nice level userspace 7062 * Renice negative nice level userspace
7010 * tasks back to 0: 7063 * tasks back to 0:
7011 */ 7064 */
7012 if (TASK_NICE(p) < 0 && p->mm) 7065 if (task_nice(p) < 0 && p->mm)
7013 set_user_nice(p, 0); 7066 set_user_nice(p, 0);
7014 continue; 7067 continue;
7015 } 7068 }