diff options
Diffstat (limited to 'kernel/sched/core.c')
-rw-r--r-- | kernel/sched/core.c | 231 |
1 files changed, 142 insertions, 89 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 6edbef296ece..3c4d096544ce 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -555,12 +555,15 @@ void resched_cpu(int cpu) | |||
555 | * selecting an idle cpu will add more delays to the timers than intended | 555 | * selecting an idle cpu will add more delays to the timers than intended |
556 | * (as that cpu's timer base may not be uptodate wrt jiffies etc). | 556 | * (as that cpu's timer base may not be uptodate wrt jiffies etc). |
557 | */ | 557 | */ |
558 | int get_nohz_timer_target(void) | 558 | int get_nohz_timer_target(int pinned) |
559 | { | 559 | { |
560 | int cpu = smp_processor_id(); | 560 | int cpu = smp_processor_id(); |
561 | int i; | 561 | int i; |
562 | struct sched_domain *sd; | 562 | struct sched_domain *sd; |
563 | 563 | ||
564 | if (pinned || !get_sysctl_timer_migration() || !idle_cpu(cpu)) | ||
565 | return cpu; | ||
566 | |||
564 | rcu_read_lock(); | 567 | rcu_read_lock(); |
565 | for_each_domain(cpu, sd) { | 568 | for_each_domain(cpu, sd) { |
566 | for_each_cpu(i, sched_domain_span(sd)) { | 569 | for_each_cpu(i, sched_domain_span(sd)) { |
@@ -823,19 +826,13 @@ static void update_rq_clock_task(struct rq *rq, s64 delta) | |||
823 | #endif | 826 | #endif |
824 | #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING | 827 | #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING |
825 | if (static_key_false((¶virt_steal_rq_enabled))) { | 828 | if (static_key_false((¶virt_steal_rq_enabled))) { |
826 | u64 st; | ||
827 | |||
828 | steal = paravirt_steal_clock(cpu_of(rq)); | 829 | steal = paravirt_steal_clock(cpu_of(rq)); |
829 | steal -= rq->prev_steal_time_rq; | 830 | steal -= rq->prev_steal_time_rq; |
830 | 831 | ||
831 | if (unlikely(steal > delta)) | 832 | if (unlikely(steal > delta)) |
832 | steal = delta; | 833 | steal = delta; |
833 | 834 | ||
834 | st = steal_ticks(steal); | ||
835 | steal = st * TICK_NSEC; | ||
836 | |||
837 | rq->prev_steal_time_rq += steal; | 835 | rq->prev_steal_time_rq += steal; |
838 | |||
839 | delta -= steal; | 836 | delta -= steal; |
840 | } | 837 | } |
841 | #endif | 838 | #endif |
@@ -1745,8 +1742,10 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) | |||
1745 | p->numa_scan_seq = p->mm ? p->mm->numa_scan_seq : 0; | 1742 | p->numa_scan_seq = p->mm ? p->mm->numa_scan_seq : 0; |
1746 | p->numa_scan_period = sysctl_numa_balancing_scan_delay; | 1743 | p->numa_scan_period = sysctl_numa_balancing_scan_delay; |
1747 | p->numa_work.next = &p->numa_work; | 1744 | p->numa_work.next = &p->numa_work; |
1748 | p->numa_faults = NULL; | 1745 | p->numa_faults_memory = NULL; |
1749 | p->numa_faults_buffer = NULL; | 1746 | p->numa_faults_buffer_memory = NULL; |
1747 | p->last_task_numa_placement = 0; | ||
1748 | p->last_sum_exec_runtime = 0; | ||
1750 | 1749 | ||
1751 | INIT_LIST_HEAD(&p->numa_entry); | 1750 | INIT_LIST_HEAD(&p->numa_entry); |
1752 | p->numa_group = NULL; | 1751 | p->numa_group = NULL; |
@@ -2149,8 +2148,6 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) | |||
2149 | if (mm) | 2148 | if (mm) |
2150 | mmdrop(mm); | 2149 | mmdrop(mm); |
2151 | if (unlikely(prev_state == TASK_DEAD)) { | 2150 | if (unlikely(prev_state == TASK_DEAD)) { |
2152 | task_numa_free(prev); | ||
2153 | |||
2154 | if (prev->sched_class->task_dead) | 2151 | if (prev->sched_class->task_dead) |
2155 | prev->sched_class->task_dead(prev); | 2152 | prev->sched_class->task_dead(prev); |
2156 | 2153 | ||
@@ -2167,13 +2164,6 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) | |||
2167 | 2164 | ||
2168 | #ifdef CONFIG_SMP | 2165 | #ifdef CONFIG_SMP |
2169 | 2166 | ||
2170 | /* assumes rq->lock is held */ | ||
2171 | static inline void pre_schedule(struct rq *rq, struct task_struct *prev) | ||
2172 | { | ||
2173 | if (prev->sched_class->pre_schedule) | ||
2174 | prev->sched_class->pre_schedule(rq, prev); | ||
2175 | } | ||
2176 | |||
2177 | /* rq->lock is NOT held, but preemption is disabled */ | 2167 | /* rq->lock is NOT held, but preemption is disabled */ |
2178 | static inline void post_schedule(struct rq *rq) | 2168 | static inline void post_schedule(struct rq *rq) |
2179 | { | 2169 | { |
@@ -2191,10 +2181,6 @@ static inline void post_schedule(struct rq *rq) | |||
2191 | 2181 | ||
2192 | #else | 2182 | #else |
2193 | 2183 | ||
2194 | static inline void pre_schedule(struct rq *rq, struct task_struct *p) | ||
2195 | { | ||
2196 | } | ||
2197 | |||
2198 | static inline void post_schedule(struct rq *rq) | 2184 | static inline void post_schedule(struct rq *rq) |
2199 | { | 2185 | { |
2200 | } | 2186 | } |
@@ -2510,8 +2496,13 @@ void __kprobes preempt_count_add(int val) | |||
2510 | DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= | 2496 | DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= |
2511 | PREEMPT_MASK - 10); | 2497 | PREEMPT_MASK - 10); |
2512 | #endif | 2498 | #endif |
2513 | if (preempt_count() == val) | 2499 | if (preempt_count() == val) { |
2514 | trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1)); | 2500 | unsigned long ip = get_parent_ip(CALLER_ADDR1); |
2501 | #ifdef CONFIG_DEBUG_PREEMPT | ||
2502 | current->preempt_disable_ip = ip; | ||
2503 | #endif | ||
2504 | trace_preempt_off(CALLER_ADDR0, ip); | ||
2505 | } | ||
2515 | } | 2506 | } |
2516 | EXPORT_SYMBOL(preempt_count_add); | 2507 | EXPORT_SYMBOL(preempt_count_add); |
2517 | 2508 | ||
@@ -2554,6 +2545,13 @@ static noinline void __schedule_bug(struct task_struct *prev) | |||
2554 | print_modules(); | 2545 | print_modules(); |
2555 | if (irqs_disabled()) | 2546 | if (irqs_disabled()) |
2556 | print_irqtrace_events(prev); | 2547 | print_irqtrace_events(prev); |
2548 | #ifdef CONFIG_DEBUG_PREEMPT | ||
2549 | if (in_atomic_preempt_off()) { | ||
2550 | pr_err("Preemption disabled at:"); | ||
2551 | print_ip_sym(current->preempt_disable_ip); | ||
2552 | pr_cont("\n"); | ||
2553 | } | ||
2554 | #endif | ||
2557 | dump_stack(); | 2555 | dump_stack(); |
2558 | add_taint(TAINT_WARN, LOCKDEP_STILL_OK); | 2556 | add_taint(TAINT_WARN, LOCKDEP_STILL_OK); |
2559 | } | 2557 | } |
@@ -2577,36 +2575,34 @@ static inline void schedule_debug(struct task_struct *prev) | |||
2577 | schedstat_inc(this_rq(), sched_count); | 2575 | schedstat_inc(this_rq(), sched_count); |
2578 | } | 2576 | } |
2579 | 2577 | ||
2580 | static void put_prev_task(struct rq *rq, struct task_struct *prev) | ||
2581 | { | ||
2582 | if (prev->on_rq || rq->skip_clock_update < 0) | ||
2583 | update_rq_clock(rq); | ||
2584 | prev->sched_class->put_prev_task(rq, prev); | ||
2585 | } | ||
2586 | |||
2587 | /* | 2578 | /* |
2588 | * Pick up the highest-prio task: | 2579 | * Pick up the highest-prio task: |
2589 | */ | 2580 | */ |
2590 | static inline struct task_struct * | 2581 | static inline struct task_struct * |
2591 | pick_next_task(struct rq *rq) | 2582 | pick_next_task(struct rq *rq, struct task_struct *prev) |
2592 | { | 2583 | { |
2593 | const struct sched_class *class; | 2584 | const struct sched_class *class = &fair_sched_class; |
2594 | struct task_struct *p; | 2585 | struct task_struct *p; |
2595 | 2586 | ||
2596 | /* | 2587 | /* |
2597 | * Optimization: we know that if all tasks are in | 2588 | * Optimization: we know that if all tasks are in |
2598 | * the fair class we can call that function directly: | 2589 | * the fair class we can call that function directly: |
2599 | */ | 2590 | */ |
2600 | if (likely(rq->nr_running == rq->cfs.h_nr_running)) { | 2591 | if (likely(prev->sched_class == class && |
2601 | p = fair_sched_class.pick_next_task(rq); | 2592 | rq->nr_running == rq->cfs.h_nr_running)) { |
2602 | if (likely(p)) | 2593 | p = fair_sched_class.pick_next_task(rq, prev); |
2594 | if (likely(p && p != RETRY_TASK)) | ||
2603 | return p; | 2595 | return p; |
2604 | } | 2596 | } |
2605 | 2597 | ||
2598 | again: | ||
2606 | for_each_class(class) { | 2599 | for_each_class(class) { |
2607 | p = class->pick_next_task(rq); | 2600 | p = class->pick_next_task(rq, prev); |
2608 | if (p) | 2601 | if (p) { |
2602 | if (unlikely(p == RETRY_TASK)) | ||
2603 | goto again; | ||
2609 | return p; | 2604 | return p; |
2605 | } | ||
2610 | } | 2606 | } |
2611 | 2607 | ||
2612 | BUG(); /* the idle class will always have a runnable task */ | 2608 | BUG(); /* the idle class will always have a runnable task */ |
@@ -2700,13 +2696,10 @@ need_resched: | |||
2700 | switch_count = &prev->nvcsw; | 2696 | switch_count = &prev->nvcsw; |
2701 | } | 2697 | } |
2702 | 2698 | ||
2703 | pre_schedule(rq, prev); | 2699 | if (prev->on_rq || rq->skip_clock_update < 0) |
2704 | 2700 | update_rq_clock(rq); | |
2705 | if (unlikely(!rq->nr_running)) | ||
2706 | idle_balance(cpu, rq); | ||
2707 | 2701 | ||
2708 | put_prev_task(rq, prev); | 2702 | next = pick_next_task(rq, prev); |
2709 | next = pick_next_task(rq); | ||
2710 | clear_tsk_need_resched(prev); | 2703 | clear_tsk_need_resched(prev); |
2711 | clear_preempt_need_resched(); | 2704 | clear_preempt_need_resched(); |
2712 | rq->skip_clock_update = 0; | 2705 | rq->skip_clock_update = 0; |
@@ -2908,7 +2901,8 @@ EXPORT_SYMBOL(sleep_on_timeout); | |||
2908 | * This function changes the 'effective' priority of a task. It does | 2901 | * This function changes the 'effective' priority of a task. It does |
2909 | * not touch ->normal_prio like __setscheduler(). | 2902 | * not touch ->normal_prio like __setscheduler(). |
2910 | * | 2903 | * |
2911 | * Used by the rt_mutex code to implement priority inheritance logic. | 2904 | * Used by the rt_mutex code to implement priority inheritance |
2905 | * logic. Call site only calls if the priority of the task changed. | ||
2912 | */ | 2906 | */ |
2913 | void rt_mutex_setprio(struct task_struct *p, int prio) | 2907 | void rt_mutex_setprio(struct task_struct *p, int prio) |
2914 | { | 2908 | { |
@@ -2998,7 +2992,7 @@ void set_user_nice(struct task_struct *p, long nice) | |||
2998 | unsigned long flags; | 2992 | unsigned long flags; |
2999 | struct rq *rq; | 2993 | struct rq *rq; |
3000 | 2994 | ||
3001 | if (TASK_NICE(p) == nice || nice < -20 || nice > 19) | 2995 | if (task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE) |
3002 | return; | 2996 | return; |
3003 | /* | 2997 | /* |
3004 | * We have to be careful, if called from sys_setpriority(), | 2998 | * We have to be careful, if called from sys_setpriority(), |
@@ -3076,11 +3070,11 @@ SYSCALL_DEFINE1(nice, int, increment) | |||
3076 | if (increment > 40) | 3070 | if (increment > 40) |
3077 | increment = 40; | 3071 | increment = 40; |
3078 | 3072 | ||
3079 | nice = TASK_NICE(current) + increment; | 3073 | nice = task_nice(current) + increment; |
3080 | if (nice < -20) | 3074 | if (nice < MIN_NICE) |
3081 | nice = -20; | 3075 | nice = MIN_NICE; |
3082 | if (nice > 19) | 3076 | if (nice > MAX_NICE) |
3083 | nice = 19; | 3077 | nice = MAX_NICE; |
3084 | 3078 | ||
3085 | if (increment < 0 && !can_nice(current, nice)) | 3079 | if (increment < 0 && !can_nice(current, nice)) |
3086 | return -EPERM; | 3080 | return -EPERM; |
@@ -3109,18 +3103,6 @@ int task_prio(const struct task_struct *p) | |||
3109 | } | 3103 | } |
3110 | 3104 | ||
3111 | /** | 3105 | /** |
3112 | * task_nice - return the nice value of a given task. | ||
3113 | * @p: the task in question. | ||
3114 | * | ||
3115 | * Return: The nice value [ -20 ... 0 ... 19 ]. | ||
3116 | */ | ||
3117 | int task_nice(const struct task_struct *p) | ||
3118 | { | ||
3119 | return TASK_NICE(p); | ||
3120 | } | ||
3121 | EXPORT_SYMBOL(task_nice); | ||
3122 | |||
3123 | /** | ||
3124 | * idle_cpu - is a given cpu idle currently? | 3106 | * idle_cpu - is a given cpu idle currently? |
3125 | * @cpu: the processor in question. | 3107 | * @cpu: the processor in question. |
3126 | * | 3108 | * |
@@ -3189,9 +3171,8 @@ __setparam_dl(struct task_struct *p, const struct sched_attr *attr) | |||
3189 | dl_se->dl_new = 1; | 3171 | dl_se->dl_new = 1; |
3190 | } | 3172 | } |
3191 | 3173 | ||
3192 | /* Actually do priority change: must hold pi & rq lock. */ | 3174 | static void __setscheduler_params(struct task_struct *p, |
3193 | static void __setscheduler(struct rq *rq, struct task_struct *p, | 3175 | const struct sched_attr *attr) |
3194 | const struct sched_attr *attr) | ||
3195 | { | 3176 | { |
3196 | int policy = attr->sched_policy; | 3177 | int policy = attr->sched_policy; |
3197 | 3178 | ||
@@ -3211,9 +3192,21 @@ static void __setscheduler(struct rq *rq, struct task_struct *p, | |||
3211 | * getparam()/getattr() don't report silly values for !rt tasks. | 3192 | * getparam()/getattr() don't report silly values for !rt tasks. |
3212 | */ | 3193 | */ |
3213 | p->rt_priority = attr->sched_priority; | 3194 | p->rt_priority = attr->sched_priority; |
3214 | |||
3215 | p->normal_prio = normal_prio(p); | 3195 | p->normal_prio = normal_prio(p); |
3216 | p->prio = rt_mutex_getprio(p); | 3196 | set_load_weight(p); |
3197 | } | ||
3198 | |||
3199 | /* Actually do priority change: must hold pi & rq lock. */ | ||
3200 | static void __setscheduler(struct rq *rq, struct task_struct *p, | ||
3201 | const struct sched_attr *attr) | ||
3202 | { | ||
3203 | __setscheduler_params(p, attr); | ||
3204 | |||
3205 | /* | ||
3206 | * If we get here, there was no pi waiters boosting the | ||
3207 | * task. It is safe to use the normal prio. | ||
3208 | */ | ||
3209 | p->prio = normal_prio(p); | ||
3217 | 3210 | ||
3218 | if (dl_prio(p->prio)) | 3211 | if (dl_prio(p->prio)) |
3219 | p->sched_class = &dl_sched_class; | 3212 | p->sched_class = &dl_sched_class; |
@@ -3221,8 +3214,6 @@ static void __setscheduler(struct rq *rq, struct task_struct *p, | |||
3221 | p->sched_class = &rt_sched_class; | 3214 | p->sched_class = &rt_sched_class; |
3222 | else | 3215 | else |
3223 | p->sched_class = &fair_sched_class; | 3216 | p->sched_class = &fair_sched_class; |
3224 | |||
3225 | set_load_weight(p); | ||
3226 | } | 3217 | } |
3227 | 3218 | ||
3228 | static void | 3219 | static void |
@@ -3275,6 +3266,8 @@ static int __sched_setscheduler(struct task_struct *p, | |||
3275 | const struct sched_attr *attr, | 3266 | const struct sched_attr *attr, |
3276 | bool user) | 3267 | bool user) |
3277 | { | 3268 | { |
3269 | int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 : | ||
3270 | MAX_RT_PRIO - 1 - attr->sched_priority; | ||
3278 | int retval, oldprio, oldpolicy = -1, on_rq, running; | 3271 | int retval, oldprio, oldpolicy = -1, on_rq, running; |
3279 | int policy = attr->sched_policy; | 3272 | int policy = attr->sched_policy; |
3280 | unsigned long flags; | 3273 | unsigned long flags; |
@@ -3319,7 +3312,7 @@ recheck: | |||
3319 | */ | 3312 | */ |
3320 | if (user && !capable(CAP_SYS_NICE)) { | 3313 | if (user && !capable(CAP_SYS_NICE)) { |
3321 | if (fair_policy(policy)) { | 3314 | if (fair_policy(policy)) { |
3322 | if (attr->sched_nice < TASK_NICE(p) && | 3315 | if (attr->sched_nice < task_nice(p) && |
3323 | !can_nice(p, attr->sched_nice)) | 3316 | !can_nice(p, attr->sched_nice)) |
3324 | return -EPERM; | 3317 | return -EPERM; |
3325 | } | 3318 | } |
@@ -3338,12 +3331,21 @@ recheck: | |||
3338 | return -EPERM; | 3331 | return -EPERM; |
3339 | } | 3332 | } |
3340 | 3333 | ||
3334 | /* | ||
3335 | * Can't set/change SCHED_DEADLINE policy at all for now | ||
3336 | * (safest behavior); in the future we would like to allow | ||
3337 | * unprivileged DL tasks to increase their relative deadline | ||
3338 | * or reduce their runtime (both ways reducing utilization) | ||
3339 | */ | ||
3340 | if (dl_policy(policy)) | ||
3341 | return -EPERM; | ||
3342 | |||
3341 | /* | 3343 | /* |
3342 | * Treat SCHED_IDLE as nice 20. Only allow a switch to | 3344 | * Treat SCHED_IDLE as nice 20. Only allow a switch to |
3343 | * SCHED_NORMAL if the RLIMIT_NICE would normally permit it. | 3345 | * SCHED_NORMAL if the RLIMIT_NICE would normally permit it. |
3344 | */ | 3346 | */ |
3345 | if (p->policy == SCHED_IDLE && policy != SCHED_IDLE) { | 3347 | if (p->policy == SCHED_IDLE && policy != SCHED_IDLE) { |
3346 | if (!can_nice(p, TASK_NICE(p))) | 3348 | if (!can_nice(p, task_nice(p))) |
3347 | return -EPERM; | 3349 | return -EPERM; |
3348 | } | 3350 | } |
3349 | 3351 | ||
@@ -3380,16 +3382,18 @@ recheck: | |||
3380 | } | 3382 | } |
3381 | 3383 | ||
3382 | /* | 3384 | /* |
3383 | * If not changing anything there's no need to proceed further: | 3385 | * If not changing anything there's no need to proceed further, |
3386 | * but store a possible modification of reset_on_fork. | ||
3384 | */ | 3387 | */ |
3385 | if (unlikely(policy == p->policy)) { | 3388 | if (unlikely(policy == p->policy)) { |
3386 | if (fair_policy(policy) && attr->sched_nice != TASK_NICE(p)) | 3389 | if (fair_policy(policy) && attr->sched_nice != task_nice(p)) |
3387 | goto change; | 3390 | goto change; |
3388 | if (rt_policy(policy) && attr->sched_priority != p->rt_priority) | 3391 | if (rt_policy(policy) && attr->sched_priority != p->rt_priority) |
3389 | goto change; | 3392 | goto change; |
3390 | if (dl_policy(policy)) | 3393 | if (dl_policy(policy)) |
3391 | goto change; | 3394 | goto change; |
3392 | 3395 | ||
3396 | p->sched_reset_on_fork = reset_on_fork; | ||
3393 | task_rq_unlock(rq, p, &flags); | 3397 | task_rq_unlock(rq, p, &flags); |
3394 | return 0; | 3398 | return 0; |
3395 | } | 3399 | } |
@@ -3443,6 +3447,24 @@ change: | |||
3443 | return -EBUSY; | 3447 | return -EBUSY; |
3444 | } | 3448 | } |
3445 | 3449 | ||
3450 | p->sched_reset_on_fork = reset_on_fork; | ||
3451 | oldprio = p->prio; | ||
3452 | |||
3453 | /* | ||
3454 | * Special case for priority boosted tasks. | ||
3455 | * | ||
3456 | * If the new priority is lower or equal (user space view) | ||
3457 | * than the current (boosted) priority, we just store the new | ||
3458 | * normal parameters and do not touch the scheduler class and | ||
3459 | * the runqueue. This will be done when the task deboost | ||
3460 | * itself. | ||
3461 | */ | ||
3462 | if (rt_mutex_check_prio(p, newprio)) { | ||
3463 | __setscheduler_params(p, attr); | ||
3464 | task_rq_unlock(rq, p, &flags); | ||
3465 | return 0; | ||
3466 | } | ||
3467 | |||
3446 | on_rq = p->on_rq; | 3468 | on_rq = p->on_rq; |
3447 | running = task_current(rq, p); | 3469 | running = task_current(rq, p); |
3448 | if (on_rq) | 3470 | if (on_rq) |
@@ -3450,16 +3472,18 @@ change: | |||
3450 | if (running) | 3472 | if (running) |
3451 | p->sched_class->put_prev_task(rq, p); | 3473 | p->sched_class->put_prev_task(rq, p); |
3452 | 3474 | ||
3453 | p->sched_reset_on_fork = reset_on_fork; | ||
3454 | |||
3455 | oldprio = p->prio; | ||
3456 | prev_class = p->sched_class; | 3475 | prev_class = p->sched_class; |
3457 | __setscheduler(rq, p, attr); | 3476 | __setscheduler(rq, p, attr); |
3458 | 3477 | ||
3459 | if (running) | 3478 | if (running) |
3460 | p->sched_class->set_curr_task(rq); | 3479 | p->sched_class->set_curr_task(rq); |
3461 | if (on_rq) | 3480 | if (on_rq) { |
3462 | enqueue_task(rq, p, 0); | 3481 | /* |
3482 | * We enqueue to tail when the priority of a task is | ||
3483 | * increased (user space view). | ||
3484 | */ | ||
3485 | enqueue_task(rq, p, oldprio <= p->prio ? ENQUEUE_HEAD : 0); | ||
3486 | } | ||
3463 | 3487 | ||
3464 | check_class_changed(rq, p, prev_class, oldprio); | 3488 | check_class_changed(rq, p, prev_class, oldprio); |
3465 | task_rq_unlock(rq, p, &flags); | 3489 | task_rq_unlock(rq, p, &flags); |
@@ -3615,7 +3639,7 @@ static int sched_copy_attr(struct sched_attr __user *uattr, | |||
3615 | * XXX: do we want to be lenient like existing syscalls; or do we want | 3639 | * XXX: do we want to be lenient like existing syscalls; or do we want |
3616 | * to be strict and return an error on out-of-bounds values? | 3640 | * to be strict and return an error on out-of-bounds values? |
3617 | */ | 3641 | */ |
3618 | attr->sched_nice = clamp(attr->sched_nice, -20, 19); | 3642 | attr->sched_nice = clamp(attr->sched_nice, MIN_NICE, MAX_NICE); |
3619 | 3643 | ||
3620 | out: | 3644 | out: |
3621 | return ret; | 3645 | return ret; |
@@ -3836,7 +3860,7 @@ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, | |||
3836 | else if (task_has_rt_policy(p)) | 3860 | else if (task_has_rt_policy(p)) |
3837 | attr.sched_priority = p->rt_priority; | 3861 | attr.sched_priority = p->rt_priority; |
3838 | else | 3862 | else |
3839 | attr.sched_nice = TASK_NICE(p); | 3863 | attr.sched_nice = task_nice(p); |
3840 | 3864 | ||
3841 | rcu_read_unlock(); | 3865 | rcu_read_unlock(); |
3842 | 3866 | ||
@@ -4474,6 +4498,7 @@ void init_idle(struct task_struct *idle, int cpu) | |||
4474 | rcu_read_unlock(); | 4498 | rcu_read_unlock(); |
4475 | 4499 | ||
4476 | rq->curr = rq->idle = idle; | 4500 | rq->curr = rq->idle = idle; |
4501 | idle->on_rq = 1; | ||
4477 | #if defined(CONFIG_SMP) | 4502 | #if defined(CONFIG_SMP) |
4478 | idle->on_cpu = 1; | 4503 | idle->on_cpu = 1; |
4479 | #endif | 4504 | #endif |
@@ -4693,8 +4718,10 @@ void idle_task_exit(void) | |||
4693 | 4718 | ||
4694 | BUG_ON(cpu_online(smp_processor_id())); | 4719 | BUG_ON(cpu_online(smp_processor_id())); |
4695 | 4720 | ||
4696 | if (mm != &init_mm) | 4721 | if (mm != &init_mm) { |
4697 | switch_mm(mm, &init_mm, current); | 4722 | switch_mm(mm, &init_mm, current); |
4723 | finish_arch_post_lock_switch(); | ||
4724 | } | ||
4698 | mmdrop(mm); | 4725 | mmdrop(mm); |
4699 | } | 4726 | } |
4700 | 4727 | ||
@@ -4712,6 +4739,22 @@ static void calc_load_migrate(struct rq *rq) | |||
4712 | atomic_long_add(delta, &calc_load_tasks); | 4739 | atomic_long_add(delta, &calc_load_tasks); |
4713 | } | 4740 | } |
4714 | 4741 | ||
4742 | static void put_prev_task_fake(struct rq *rq, struct task_struct *prev) | ||
4743 | { | ||
4744 | } | ||
4745 | |||
4746 | static const struct sched_class fake_sched_class = { | ||
4747 | .put_prev_task = put_prev_task_fake, | ||
4748 | }; | ||
4749 | |||
4750 | static struct task_struct fake_task = { | ||
4751 | /* | ||
4752 | * Avoid pull_{rt,dl}_task() | ||
4753 | */ | ||
4754 | .prio = MAX_PRIO + 1, | ||
4755 | .sched_class = &fake_sched_class, | ||
4756 | }; | ||
4757 | |||
4715 | /* | 4758 | /* |
4716 | * Migrate all tasks from the rq, sleeping tasks will be migrated by | 4759 | * Migrate all tasks from the rq, sleeping tasks will be migrated by |
4717 | * try_to_wake_up()->select_task_rq(). | 4760 | * try_to_wake_up()->select_task_rq(). |
@@ -4752,7 +4795,7 @@ static void migrate_tasks(unsigned int dead_cpu) | |||
4752 | if (rq->nr_running == 1) | 4795 | if (rq->nr_running == 1) |
4753 | break; | 4796 | break; |
4754 | 4797 | ||
4755 | next = pick_next_task(rq); | 4798 | next = pick_next_task(rq, &fake_task); |
4756 | BUG_ON(!next); | 4799 | BUG_ON(!next); |
4757 | next->sched_class->put_prev_task(rq, next); | 4800 | next->sched_class->put_prev_task(rq, next); |
4758 | 4801 | ||
@@ -4842,7 +4885,7 @@ set_table_entry(struct ctl_table *entry, | |||
4842 | static struct ctl_table * | 4885 | static struct ctl_table * |
4843 | sd_alloc_ctl_domain_table(struct sched_domain *sd) | 4886 | sd_alloc_ctl_domain_table(struct sched_domain *sd) |
4844 | { | 4887 | { |
4845 | struct ctl_table *table = sd_alloc_ctl_entry(13); | 4888 | struct ctl_table *table = sd_alloc_ctl_entry(14); |
4846 | 4889 | ||
4847 | if (table == NULL) | 4890 | if (table == NULL) |
4848 | return NULL; | 4891 | return NULL; |
@@ -4870,9 +4913,12 @@ sd_alloc_ctl_domain_table(struct sched_domain *sd) | |||
4870 | sizeof(int), 0644, proc_dointvec_minmax, false); | 4913 | sizeof(int), 0644, proc_dointvec_minmax, false); |
4871 | set_table_entry(&table[10], "flags", &sd->flags, | 4914 | set_table_entry(&table[10], "flags", &sd->flags, |
4872 | sizeof(int), 0644, proc_dointvec_minmax, false); | 4915 | sizeof(int), 0644, proc_dointvec_minmax, false); |
4873 | set_table_entry(&table[11], "name", sd->name, | 4916 | set_table_entry(&table[11], "max_newidle_lb_cost", |
4917 | &sd->max_newidle_lb_cost, | ||
4918 | sizeof(long), 0644, proc_doulongvec_minmax, false); | ||
4919 | set_table_entry(&table[12], "name", sd->name, | ||
4874 | CORENAME_MAX_SIZE, 0444, proc_dostring, false); | 4920 | CORENAME_MAX_SIZE, 0444, proc_dostring, false); |
4875 | /* &table[12] is terminator */ | 4921 | /* &table[13] is terminator */ |
4876 | 4922 | ||
4877 | return table; | 4923 | return table; |
4878 | } | 4924 | } |
@@ -6849,7 +6895,6 @@ void __init sched_init(void) | |||
6849 | 6895 | ||
6850 | rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime; | 6896 | rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime; |
6851 | #ifdef CONFIG_RT_GROUP_SCHED | 6897 | #ifdef CONFIG_RT_GROUP_SCHED |
6852 | INIT_LIST_HEAD(&rq->leaf_rt_rq_list); | ||
6853 | init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL); | 6898 | init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL); |
6854 | #endif | 6899 | #endif |
6855 | 6900 | ||
@@ -6938,7 +6983,8 @@ void __might_sleep(const char *file, int line, int preempt_offset) | |||
6938 | static unsigned long prev_jiffy; /* ratelimiting */ | 6983 | static unsigned long prev_jiffy; /* ratelimiting */ |
6939 | 6984 | ||
6940 | rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */ | 6985 | rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */ |
6941 | if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) || | 6986 | if ((preempt_count_equals(preempt_offset) && !irqs_disabled() && |
6987 | !is_idle_task(current)) || | ||
6942 | system_state != SYSTEM_RUNNING || oops_in_progress) | 6988 | system_state != SYSTEM_RUNNING || oops_in_progress) |
6943 | return; | 6989 | return; |
6944 | if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) | 6990 | if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) |
@@ -6956,6 +7002,13 @@ void __might_sleep(const char *file, int line, int preempt_offset) | |||
6956 | debug_show_held_locks(current); | 7002 | debug_show_held_locks(current); |
6957 | if (irqs_disabled()) | 7003 | if (irqs_disabled()) |
6958 | print_irqtrace_events(current); | 7004 | print_irqtrace_events(current); |
7005 | #ifdef CONFIG_DEBUG_PREEMPT | ||
7006 | if (!preempt_count_equals(preempt_offset)) { | ||
7007 | pr_err("Preemption disabled at:"); | ||
7008 | print_ip_sym(current->preempt_disable_ip); | ||
7009 | pr_cont("\n"); | ||
7010 | } | ||
7011 | #endif | ||
6959 | dump_stack(); | 7012 | dump_stack(); |
6960 | } | 7013 | } |
6961 | EXPORT_SYMBOL(__might_sleep); | 7014 | EXPORT_SYMBOL(__might_sleep); |
@@ -7009,7 +7062,7 @@ void normalize_rt_tasks(void) | |||
7009 | * Renice negative nice level userspace | 7062 | * Renice negative nice level userspace |
7010 | * tasks back to 0: | 7063 | * tasks back to 0: |
7011 | */ | 7064 | */ |
7012 | if (TASK_NICE(p) < 0 && p->mm) | 7065 | if (task_nice(p) < 0 && p->mm) |
7013 | set_user_nice(p, 0); | 7066 | set_user_nice(p, 0); |
7014 | continue; | 7067 | continue; |
7015 | } | 7068 | } |