diff options
Diffstat (limited to 'kernel/sched/core.c')
| -rw-r--r-- | kernel/sched/core.c | 292 |
1 files changed, 149 insertions, 143 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 6edbef296ece..268a45ea238c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
| @@ -73,6 +73,7 @@ | |||
| 73 | #include <linux/init_task.h> | 73 | #include <linux/init_task.h> |
| 74 | #include <linux/binfmts.h> | 74 | #include <linux/binfmts.h> |
| 75 | #include <linux/context_tracking.h> | 75 | #include <linux/context_tracking.h> |
| 76 | #include <linux/compiler.h> | ||
| 76 | 77 | ||
| 77 | #include <asm/switch_to.h> | 78 | #include <asm/switch_to.h> |
| 78 | #include <asm/tlb.h> | 79 | #include <asm/tlb.h> |
| @@ -432,7 +433,7 @@ void hrtick_start(struct rq *rq, u64 delay) | |||
| 432 | if (rq == this_rq()) { | 433 | if (rq == this_rq()) { |
| 433 | __hrtick_restart(rq); | 434 | __hrtick_restart(rq); |
| 434 | } else if (!rq->hrtick_csd_pending) { | 435 | } else if (!rq->hrtick_csd_pending) { |
| 435 | __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd, 0); | 436 | smp_call_function_single_async(cpu_of(rq), &rq->hrtick_csd); |
| 436 | rq->hrtick_csd_pending = 1; | 437 | rq->hrtick_csd_pending = 1; |
| 437 | } | 438 | } |
| 438 | } | 439 | } |
| @@ -555,12 +556,15 @@ void resched_cpu(int cpu) | |||
| 555 | * selecting an idle cpu will add more delays to the timers than intended | 556 | * selecting an idle cpu will add more delays to the timers than intended |
| 556 | * (as that cpu's timer base may not be uptodate wrt jiffies etc). | 557 | * (as that cpu's timer base may not be uptodate wrt jiffies etc). |
| 557 | */ | 558 | */ |
| 558 | int get_nohz_timer_target(void) | 559 | int get_nohz_timer_target(int pinned) |
| 559 | { | 560 | { |
| 560 | int cpu = smp_processor_id(); | 561 | int cpu = smp_processor_id(); |
| 561 | int i; | 562 | int i; |
| 562 | struct sched_domain *sd; | 563 | struct sched_domain *sd; |
| 563 | 564 | ||
| 565 | if (pinned || !get_sysctl_timer_migration() || !idle_cpu(cpu)) | ||
| 566 | return cpu; | ||
| 567 | |||
| 564 | rcu_read_lock(); | 568 | rcu_read_lock(); |
| 565 | for_each_domain(cpu, sd) { | 569 | for_each_domain(cpu, sd) { |
| 566 | for_each_cpu(i, sched_domain_span(sd)) { | 570 | for_each_cpu(i, sched_domain_span(sd)) { |
| @@ -823,19 +827,13 @@ static void update_rq_clock_task(struct rq *rq, s64 delta) | |||
| 823 | #endif | 827 | #endif |
| 824 | #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING | 828 | #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING |
| 825 | if (static_key_false((¶virt_steal_rq_enabled))) { | 829 | if (static_key_false((¶virt_steal_rq_enabled))) { |
| 826 | u64 st; | ||
| 827 | |||
| 828 | steal = paravirt_steal_clock(cpu_of(rq)); | 830 | steal = paravirt_steal_clock(cpu_of(rq)); |
| 829 | steal -= rq->prev_steal_time_rq; | 831 | steal -= rq->prev_steal_time_rq; |
| 830 | 832 | ||
| 831 | if (unlikely(steal > delta)) | 833 | if (unlikely(steal > delta)) |
| 832 | steal = delta; | 834 | steal = delta; |
| 833 | 835 | ||
| 834 | st = steal_ticks(steal); | ||
| 835 | steal = st * TICK_NSEC; | ||
| 836 | |||
| 837 | rq->prev_steal_time_rq += steal; | 836 | rq->prev_steal_time_rq += steal; |
| 838 | |||
| 839 | delta -= steal; | 837 | delta -= steal; |
| 840 | } | 838 | } |
| 841 | #endif | 839 | #endif |
| @@ -1745,8 +1743,10 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) | |||
| 1745 | p->numa_scan_seq = p->mm ? p->mm->numa_scan_seq : 0; | 1743 | p->numa_scan_seq = p->mm ? p->mm->numa_scan_seq : 0; |
| 1746 | p->numa_scan_period = sysctl_numa_balancing_scan_delay; | 1744 | p->numa_scan_period = sysctl_numa_balancing_scan_delay; |
| 1747 | p->numa_work.next = &p->numa_work; | 1745 | p->numa_work.next = &p->numa_work; |
| 1748 | p->numa_faults = NULL; | 1746 | p->numa_faults_memory = NULL; |
| 1749 | p->numa_faults_buffer = NULL; | 1747 | p->numa_faults_buffer_memory = NULL; |
| 1748 | p->last_task_numa_placement = 0; | ||
| 1749 | p->last_sum_exec_runtime = 0; | ||
| 1750 | 1750 | ||
| 1751 | INIT_LIST_HEAD(&p->numa_entry); | 1751 | INIT_LIST_HEAD(&p->numa_entry); |
| 1752 | p->numa_group = NULL; | 1752 | p->numa_group = NULL; |
| @@ -2149,8 +2149,6 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) | |||
| 2149 | if (mm) | 2149 | if (mm) |
| 2150 | mmdrop(mm); | 2150 | mmdrop(mm); |
| 2151 | if (unlikely(prev_state == TASK_DEAD)) { | 2151 | if (unlikely(prev_state == TASK_DEAD)) { |
| 2152 | task_numa_free(prev); | ||
| 2153 | |||
| 2154 | if (prev->sched_class->task_dead) | 2152 | if (prev->sched_class->task_dead) |
| 2155 | prev->sched_class->task_dead(prev); | 2153 | prev->sched_class->task_dead(prev); |
| 2156 | 2154 | ||
| @@ -2167,13 +2165,6 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) | |||
| 2167 | 2165 | ||
| 2168 | #ifdef CONFIG_SMP | 2166 | #ifdef CONFIG_SMP |
| 2169 | 2167 | ||
| 2170 | /* assumes rq->lock is held */ | ||
| 2171 | static inline void pre_schedule(struct rq *rq, struct task_struct *prev) | ||
| 2172 | { | ||
| 2173 | if (prev->sched_class->pre_schedule) | ||
| 2174 | prev->sched_class->pre_schedule(rq, prev); | ||
| 2175 | } | ||
| 2176 | |||
| 2177 | /* rq->lock is NOT held, but preemption is disabled */ | 2168 | /* rq->lock is NOT held, but preemption is disabled */ |
| 2178 | static inline void post_schedule(struct rq *rq) | 2169 | static inline void post_schedule(struct rq *rq) |
| 2179 | { | 2170 | { |
| @@ -2191,10 +2182,6 @@ static inline void post_schedule(struct rq *rq) | |||
| 2191 | 2182 | ||
| 2192 | #else | 2183 | #else |
| 2193 | 2184 | ||
| 2194 | static inline void pre_schedule(struct rq *rq, struct task_struct *p) | ||
| 2195 | { | ||
| 2196 | } | ||
| 2197 | |||
| 2198 | static inline void post_schedule(struct rq *rq) | 2185 | static inline void post_schedule(struct rq *rq) |
| 2199 | { | 2186 | { |
| 2200 | } | 2187 | } |
| @@ -2510,8 +2497,13 @@ void __kprobes preempt_count_add(int val) | |||
| 2510 | DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= | 2497 | DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= |
| 2511 | PREEMPT_MASK - 10); | 2498 | PREEMPT_MASK - 10); |
| 2512 | #endif | 2499 | #endif |
| 2513 | if (preempt_count() == val) | 2500 | if (preempt_count() == val) { |
| 2514 | trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1)); | 2501 | unsigned long ip = get_parent_ip(CALLER_ADDR1); |
| 2502 | #ifdef CONFIG_DEBUG_PREEMPT | ||
| 2503 | current->preempt_disable_ip = ip; | ||
| 2504 | #endif | ||
| 2505 | trace_preempt_off(CALLER_ADDR0, ip); | ||
| 2506 | } | ||
| 2515 | } | 2507 | } |
| 2516 | EXPORT_SYMBOL(preempt_count_add); | 2508 | EXPORT_SYMBOL(preempt_count_add); |
| 2517 | 2509 | ||
| @@ -2554,6 +2546,13 @@ static noinline void __schedule_bug(struct task_struct *prev) | |||
| 2554 | print_modules(); | 2546 | print_modules(); |
| 2555 | if (irqs_disabled()) | 2547 | if (irqs_disabled()) |
| 2556 | print_irqtrace_events(prev); | 2548 | print_irqtrace_events(prev); |
| 2549 | #ifdef CONFIG_DEBUG_PREEMPT | ||
| 2550 | if (in_atomic_preempt_off()) { | ||
| 2551 | pr_err("Preemption disabled at:"); | ||
| 2552 | print_ip_sym(current->preempt_disable_ip); | ||
| 2553 | pr_cont("\n"); | ||
| 2554 | } | ||
| 2555 | #endif | ||
| 2557 | dump_stack(); | 2556 | dump_stack(); |
| 2558 | add_taint(TAINT_WARN, LOCKDEP_STILL_OK); | 2557 | add_taint(TAINT_WARN, LOCKDEP_STILL_OK); |
| 2559 | } | 2558 | } |
| @@ -2577,36 +2576,34 @@ static inline void schedule_debug(struct task_struct *prev) | |||
| 2577 | schedstat_inc(this_rq(), sched_count); | 2576 | schedstat_inc(this_rq(), sched_count); |
| 2578 | } | 2577 | } |
| 2579 | 2578 | ||
| 2580 | static void put_prev_task(struct rq *rq, struct task_struct *prev) | ||
| 2581 | { | ||
| 2582 | if (prev->on_rq || rq->skip_clock_update < 0) | ||
| 2583 | update_rq_clock(rq); | ||
| 2584 | prev->sched_class->put_prev_task(rq, prev); | ||
| 2585 | } | ||
| 2586 | |||
| 2587 | /* | 2579 | /* |
| 2588 | * Pick up the highest-prio task: | 2580 | * Pick up the highest-prio task: |
| 2589 | */ | 2581 | */ |
| 2590 | static inline struct task_struct * | 2582 | static inline struct task_struct * |
| 2591 | pick_next_task(struct rq *rq) | 2583 | pick_next_task(struct rq *rq, struct task_struct *prev) |
| 2592 | { | 2584 | { |
| 2593 | const struct sched_class *class; | 2585 | const struct sched_class *class = &fair_sched_class; |
| 2594 | struct task_struct *p; | 2586 | struct task_struct *p; |
| 2595 | 2587 | ||
| 2596 | /* | 2588 | /* |
| 2597 | * Optimization: we know that if all tasks are in | 2589 | * Optimization: we know that if all tasks are in |
| 2598 | * the fair class we can call that function directly: | 2590 | * the fair class we can call that function directly: |
| 2599 | */ | 2591 | */ |
| 2600 | if (likely(rq->nr_running == rq->cfs.h_nr_running)) { | 2592 | if (likely(prev->sched_class == class && |
| 2601 | p = fair_sched_class.pick_next_task(rq); | 2593 | rq->nr_running == rq->cfs.h_nr_running)) { |
| 2602 | if (likely(p)) | 2594 | p = fair_sched_class.pick_next_task(rq, prev); |
| 2595 | if (likely(p && p != RETRY_TASK)) | ||
| 2603 | return p; | 2596 | return p; |
| 2604 | } | 2597 | } |
| 2605 | 2598 | ||
| 2599 | again: | ||
| 2606 | for_each_class(class) { | 2600 | for_each_class(class) { |
| 2607 | p = class->pick_next_task(rq); | 2601 | p = class->pick_next_task(rq, prev); |
| 2608 | if (p) | 2602 | if (p) { |
| 2603 | if (unlikely(p == RETRY_TASK)) | ||
| 2604 | goto again; | ||
| 2609 | return p; | 2605 | return p; |
| 2606 | } | ||
| 2610 | } | 2607 | } |
| 2611 | 2608 | ||
| 2612 | BUG(); /* the idle class will always have a runnable task */ | 2609 | BUG(); /* the idle class will always have a runnable task */ |
| @@ -2700,13 +2697,10 @@ need_resched: | |||
| 2700 | switch_count = &prev->nvcsw; | 2697 | switch_count = &prev->nvcsw; |
| 2701 | } | 2698 | } |
| 2702 | 2699 | ||
| 2703 | pre_schedule(rq, prev); | 2700 | if (prev->on_rq || rq->skip_clock_update < 0) |
| 2704 | 2701 | update_rq_clock(rq); | |
| 2705 | if (unlikely(!rq->nr_running)) | ||
| 2706 | idle_balance(cpu, rq); | ||
| 2707 | 2702 | ||
| 2708 | put_prev_task(rq, prev); | 2703 | next = pick_next_task(rq, prev); |
| 2709 | next = pick_next_task(rq); | ||
| 2710 | clear_tsk_need_resched(prev); | 2704 | clear_tsk_need_resched(prev); |
| 2711 | clear_preempt_need_resched(); | 2705 | clear_preempt_need_resched(); |
| 2712 | rq->skip_clock_update = 0; | 2706 | rq->skip_clock_update = 0; |
| @@ -2852,52 +2846,6 @@ int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags, | |||
| 2852 | } | 2846 | } |
| 2853 | EXPORT_SYMBOL(default_wake_function); | 2847 | EXPORT_SYMBOL(default_wake_function); |
| 2854 | 2848 | ||
| 2855 | static long __sched | ||
| 2856 | sleep_on_common(wait_queue_head_t *q, int state, long timeout) | ||
| 2857 | { | ||
| 2858 | unsigned long flags; | ||
| 2859 | wait_queue_t wait; | ||
| 2860 | |||
| 2861 | init_waitqueue_entry(&wait, current); | ||
| 2862 | |||
| 2863 | __set_current_state(state); | ||
| 2864 | |||
| 2865 | spin_lock_irqsave(&q->lock, flags); | ||
| 2866 | __add_wait_queue(q, &wait); | ||
| 2867 | spin_unlock(&q->lock); | ||
| 2868 | timeout = schedule_timeout(timeout); | ||
| 2869 | spin_lock_irq(&q->lock); | ||
| 2870 | __remove_wait_queue(q, &wait); | ||
| 2871 | spin_unlock_irqrestore(&q->lock, flags); | ||
| 2872 | |||
| 2873 | return timeout; | ||
| 2874 | } | ||
| 2875 | |||
| 2876 | void __sched interruptible_sleep_on(wait_queue_head_t *q) | ||
| 2877 | { | ||
| 2878 | sleep_on_common(q, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); | ||
| 2879 | } | ||
| 2880 | EXPORT_SYMBOL(interruptible_sleep_on); | ||
| 2881 | |||
| 2882 | long __sched | ||
| 2883 | interruptible_sleep_on_timeout(wait_queue_head_t *q, long timeout) | ||
| 2884 | { | ||
| 2885 | return sleep_on_common(q, TASK_INTERRUPTIBLE, timeout); | ||
| 2886 | } | ||
| 2887 | EXPORT_SYMBOL(interruptible_sleep_on_timeout); | ||
| 2888 | |||
| 2889 | void __sched sleep_on(wait_queue_head_t *q) | ||
| 2890 | { | ||
| 2891 | sleep_on_common(q, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); | ||
| 2892 | } | ||
| 2893 | EXPORT_SYMBOL(sleep_on); | ||
| 2894 | |||
| 2895 | long __sched sleep_on_timeout(wait_queue_head_t *q, long timeout) | ||
| 2896 | { | ||
| 2897 | return sleep_on_common(q, TASK_UNINTERRUPTIBLE, timeout); | ||
| 2898 | } | ||
| 2899 | EXPORT_SYMBOL(sleep_on_timeout); | ||
| 2900 | |||
| 2901 | #ifdef CONFIG_RT_MUTEXES | 2849 | #ifdef CONFIG_RT_MUTEXES |
| 2902 | 2850 | ||
| 2903 | /* | 2851 | /* |
| @@ -2908,7 +2856,8 @@ EXPORT_SYMBOL(sleep_on_timeout); | |||
| 2908 | * This function changes the 'effective' priority of a task. It does | 2856 | * This function changes the 'effective' priority of a task. It does |
| 2909 | * not touch ->normal_prio like __setscheduler(). | 2857 | * not touch ->normal_prio like __setscheduler(). |
| 2910 | * | 2858 | * |
| 2911 | * Used by the rt_mutex code to implement priority inheritance logic. | 2859 | * Used by the rt_mutex code to implement priority inheritance |
| 2860 | * logic. Call site only calls if the priority of the task changed. | ||
| 2912 | */ | 2861 | */ |
| 2913 | void rt_mutex_setprio(struct task_struct *p, int prio) | 2862 | void rt_mutex_setprio(struct task_struct *p, int prio) |
| 2914 | { | 2863 | { |
| @@ -2998,7 +2947,7 @@ void set_user_nice(struct task_struct *p, long nice) | |||
| 2998 | unsigned long flags; | 2947 | unsigned long flags; |
| 2999 | struct rq *rq; | 2948 | struct rq *rq; |
| 3000 | 2949 | ||
| 3001 | if (TASK_NICE(p) == nice || nice < -20 || nice > 19) | 2950 | if (task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE) |
| 3002 | return; | 2951 | return; |
| 3003 | /* | 2952 | /* |
| 3004 | * We have to be careful, if called from sys_setpriority(), | 2953 | * We have to be careful, if called from sys_setpriority(), |
| @@ -3076,11 +3025,11 @@ SYSCALL_DEFINE1(nice, int, increment) | |||
| 3076 | if (increment > 40) | 3025 | if (increment > 40) |
| 3077 | increment = 40; | 3026 | increment = 40; |
| 3078 | 3027 | ||
| 3079 | nice = TASK_NICE(current) + increment; | 3028 | nice = task_nice(current) + increment; |
| 3080 | if (nice < -20) | 3029 | if (nice < MIN_NICE) |
| 3081 | nice = -20; | 3030 | nice = MIN_NICE; |
| 3082 | if (nice > 19) | 3031 | if (nice > MAX_NICE) |
| 3083 | nice = 19; | 3032 | nice = MAX_NICE; |
| 3084 | 3033 | ||
| 3085 | if (increment < 0 && !can_nice(current, nice)) | 3034 | if (increment < 0 && !can_nice(current, nice)) |
| 3086 | return -EPERM; | 3035 | return -EPERM; |
| @@ -3109,18 +3058,6 @@ int task_prio(const struct task_struct *p) | |||
| 3109 | } | 3058 | } |
| 3110 | 3059 | ||
| 3111 | /** | 3060 | /** |
| 3112 | * task_nice - return the nice value of a given task. | ||
| 3113 | * @p: the task in question. | ||
| 3114 | * | ||
| 3115 | * Return: The nice value [ -20 ... 0 ... 19 ]. | ||
| 3116 | */ | ||
| 3117 | int task_nice(const struct task_struct *p) | ||
| 3118 | { | ||
| 3119 | return TASK_NICE(p); | ||
| 3120 | } | ||
| 3121 | EXPORT_SYMBOL(task_nice); | ||
| 3122 | |||
| 3123 | /** | ||
| 3124 | * idle_cpu - is a given cpu idle currently? | 3061 | * idle_cpu - is a given cpu idle currently? |
| 3125 | * @cpu: the processor in question. | 3062 | * @cpu: the processor in question. |
| 3126 | * | 3063 | * |
| @@ -3189,9 +3126,8 @@ __setparam_dl(struct task_struct *p, const struct sched_attr *attr) | |||
| 3189 | dl_se->dl_new = 1; | 3126 | dl_se->dl_new = 1; |
| 3190 | } | 3127 | } |
| 3191 | 3128 | ||
| 3192 | /* Actually do priority change: must hold pi & rq lock. */ | 3129 | static void __setscheduler_params(struct task_struct *p, |
| 3193 | static void __setscheduler(struct rq *rq, struct task_struct *p, | 3130 | const struct sched_attr *attr) |
| 3194 | const struct sched_attr *attr) | ||
| 3195 | { | 3131 | { |
| 3196 | int policy = attr->sched_policy; | 3132 | int policy = attr->sched_policy; |
| 3197 | 3133 | ||
| @@ -3211,9 +3147,21 @@ static void __setscheduler(struct rq *rq, struct task_struct *p, | |||
| 3211 | * getparam()/getattr() don't report silly values for !rt tasks. | 3147 | * getparam()/getattr() don't report silly values for !rt tasks. |
| 3212 | */ | 3148 | */ |
| 3213 | p->rt_priority = attr->sched_priority; | 3149 | p->rt_priority = attr->sched_priority; |
| 3214 | |||
| 3215 | p->normal_prio = normal_prio(p); | 3150 | p->normal_prio = normal_prio(p); |
| 3216 | p->prio = rt_mutex_getprio(p); | 3151 | set_load_weight(p); |
| 3152 | } | ||
| 3153 | |||
| 3154 | /* Actually do priority change: must hold pi & rq lock. */ | ||
| 3155 | static void __setscheduler(struct rq *rq, struct task_struct *p, | ||
| 3156 | const struct sched_attr *attr) | ||
| 3157 | { | ||
| 3158 | __setscheduler_params(p, attr); | ||
| 3159 | |||
| 3160 | /* | ||
| 3161 | * If we get here, there was no pi waiters boosting the | ||
| 3162 | * task. It is safe to use the normal prio. | ||
| 3163 | */ | ||
| 3164 | p->prio = normal_prio(p); | ||
| 3217 | 3165 | ||
| 3218 | if (dl_prio(p->prio)) | 3166 | if (dl_prio(p->prio)) |
| 3219 | p->sched_class = &dl_sched_class; | 3167 | p->sched_class = &dl_sched_class; |
| @@ -3221,8 +3169,6 @@ static void __setscheduler(struct rq *rq, struct task_struct *p, | |||
| 3221 | p->sched_class = &rt_sched_class; | 3169 | p->sched_class = &rt_sched_class; |
| 3222 | else | 3170 | else |
| 3223 | p->sched_class = &fair_sched_class; | 3171 | p->sched_class = &fair_sched_class; |
| 3224 | |||
| 3225 | set_load_weight(p); | ||
| 3226 | } | 3172 | } |
| 3227 | 3173 | ||
| 3228 | static void | 3174 | static void |
| @@ -3275,6 +3221,8 @@ static int __sched_setscheduler(struct task_struct *p, | |||
| 3275 | const struct sched_attr *attr, | 3221 | const struct sched_attr *attr, |
| 3276 | bool user) | 3222 | bool user) |
| 3277 | { | 3223 | { |
| 3224 | int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 : | ||
| 3225 | MAX_RT_PRIO - 1 - attr->sched_priority; | ||
| 3278 | int retval, oldprio, oldpolicy = -1, on_rq, running; | 3226 | int retval, oldprio, oldpolicy = -1, on_rq, running; |
| 3279 | int policy = attr->sched_policy; | 3227 | int policy = attr->sched_policy; |
| 3280 | unsigned long flags; | 3228 | unsigned long flags; |
| @@ -3319,7 +3267,7 @@ recheck: | |||
| 3319 | */ | 3267 | */ |
| 3320 | if (user && !capable(CAP_SYS_NICE)) { | 3268 | if (user && !capable(CAP_SYS_NICE)) { |
| 3321 | if (fair_policy(policy)) { | 3269 | if (fair_policy(policy)) { |
| 3322 | if (attr->sched_nice < TASK_NICE(p) && | 3270 | if (attr->sched_nice < task_nice(p) && |
| 3323 | !can_nice(p, attr->sched_nice)) | 3271 | !can_nice(p, attr->sched_nice)) |
| 3324 | return -EPERM; | 3272 | return -EPERM; |
| 3325 | } | 3273 | } |
| @@ -3338,12 +3286,21 @@ recheck: | |||
| 3338 | return -EPERM; | 3286 | return -EPERM; |
| 3339 | } | 3287 | } |
| 3340 | 3288 | ||
| 3289 | /* | ||
| 3290 | * Can't set/change SCHED_DEADLINE policy at all for now | ||
| 3291 | * (safest behavior); in the future we would like to allow | ||
| 3292 | * unprivileged DL tasks to increase their relative deadline | ||
| 3293 | * or reduce their runtime (both ways reducing utilization) | ||
| 3294 | */ | ||
| 3295 | if (dl_policy(policy)) | ||
| 3296 | return -EPERM; | ||
| 3297 | |||
| 3341 | /* | 3298 | /* |
| 3342 | * Treat SCHED_IDLE as nice 20. Only allow a switch to | 3299 | * Treat SCHED_IDLE as nice 20. Only allow a switch to |
| 3343 | * SCHED_NORMAL if the RLIMIT_NICE would normally permit it. | 3300 | * SCHED_NORMAL if the RLIMIT_NICE would normally permit it. |
| 3344 | */ | 3301 | */ |
| 3345 | if (p->policy == SCHED_IDLE && policy != SCHED_IDLE) { | 3302 | if (p->policy == SCHED_IDLE && policy != SCHED_IDLE) { |
| 3346 | if (!can_nice(p, TASK_NICE(p))) | 3303 | if (!can_nice(p, task_nice(p))) |
| 3347 | return -EPERM; | 3304 | return -EPERM; |
| 3348 | } | 3305 | } |
| 3349 | 3306 | ||
| @@ -3380,16 +3337,18 @@ recheck: | |||
| 3380 | } | 3337 | } |
| 3381 | 3338 | ||
| 3382 | /* | 3339 | /* |
| 3383 | * If not changing anything there's no need to proceed further: | 3340 | * If not changing anything there's no need to proceed further, |
| 3341 | * but store a possible modification of reset_on_fork. | ||
| 3384 | */ | 3342 | */ |
| 3385 | if (unlikely(policy == p->policy)) { | 3343 | if (unlikely(policy == p->policy)) { |
| 3386 | if (fair_policy(policy) && attr->sched_nice != TASK_NICE(p)) | 3344 | if (fair_policy(policy) && attr->sched_nice != task_nice(p)) |
| 3387 | goto change; | 3345 | goto change; |
| 3388 | if (rt_policy(policy) && attr->sched_priority != p->rt_priority) | 3346 | if (rt_policy(policy) && attr->sched_priority != p->rt_priority) |
| 3389 | goto change; | 3347 | goto change; |
| 3390 | if (dl_policy(policy)) | 3348 | if (dl_policy(policy)) |
| 3391 | goto change; | 3349 | goto change; |
| 3392 | 3350 | ||
| 3351 | p->sched_reset_on_fork = reset_on_fork; | ||
| 3393 | task_rq_unlock(rq, p, &flags); | 3352 | task_rq_unlock(rq, p, &flags); |
| 3394 | return 0; | 3353 | return 0; |
| 3395 | } | 3354 | } |
| @@ -3443,6 +3402,24 @@ change: | |||
| 3443 | return -EBUSY; | 3402 | return -EBUSY; |
| 3444 | } | 3403 | } |
| 3445 | 3404 | ||
| 3405 | p->sched_reset_on_fork = reset_on_fork; | ||
| 3406 | oldprio = p->prio; | ||
| 3407 | |||
| 3408 | /* | ||
| 3409 | * Special case for priority boosted tasks. | ||
| 3410 | * | ||
| 3411 | * If the new priority is lower or equal (user space view) | ||
| 3412 | * than the current (boosted) priority, we just store the new | ||
| 3413 | * normal parameters and do not touch the scheduler class and | ||
| 3414 | * the runqueue. This will be done when the task deboost | ||
| 3415 | * itself. | ||
| 3416 | */ | ||
| 3417 | if (rt_mutex_check_prio(p, newprio)) { | ||
| 3418 | __setscheduler_params(p, attr); | ||
| 3419 | task_rq_unlock(rq, p, &flags); | ||
| 3420 | return 0; | ||
| 3421 | } | ||
| 3422 | |||
| 3446 | on_rq = p->on_rq; | 3423 | on_rq = p->on_rq; |
| 3447 | running = task_current(rq, p); | 3424 | running = task_current(rq, p); |
| 3448 | if (on_rq) | 3425 | if (on_rq) |
| @@ -3450,16 +3427,18 @@ change: | |||
| 3450 | if (running) | 3427 | if (running) |
| 3451 | p->sched_class->put_prev_task(rq, p); | 3428 | p->sched_class->put_prev_task(rq, p); |
| 3452 | 3429 | ||
| 3453 | p->sched_reset_on_fork = reset_on_fork; | ||
| 3454 | |||
| 3455 | oldprio = p->prio; | ||
| 3456 | prev_class = p->sched_class; | 3430 | prev_class = p->sched_class; |
| 3457 | __setscheduler(rq, p, attr); | 3431 | __setscheduler(rq, p, attr); |
| 3458 | 3432 | ||
| 3459 | if (running) | 3433 | if (running) |
| 3460 | p->sched_class->set_curr_task(rq); | 3434 | p->sched_class->set_curr_task(rq); |
| 3461 | if (on_rq) | 3435 | if (on_rq) { |
| 3462 | enqueue_task(rq, p, 0); | 3436 | /* |
| 3437 | * We enqueue to tail when the priority of a task is | ||
| 3438 | * increased (user space view). | ||
| 3439 | */ | ||
| 3440 | enqueue_task(rq, p, oldprio <= p->prio ? ENQUEUE_HEAD : 0); | ||
| 3441 | } | ||
| 3463 | 3442 | ||
| 3464 | check_class_changed(rq, p, prev_class, oldprio); | 3443 | check_class_changed(rq, p, prev_class, oldprio); |
| 3465 | task_rq_unlock(rq, p, &flags); | 3444 | task_rq_unlock(rq, p, &flags); |
| @@ -3615,7 +3594,7 @@ static int sched_copy_attr(struct sched_attr __user *uattr, | |||
| 3615 | * XXX: do we want to be lenient like existing syscalls; or do we want | 3594 | * XXX: do we want to be lenient like existing syscalls; or do we want |
| 3616 | * to be strict and return an error on out-of-bounds values? | 3595 | * to be strict and return an error on out-of-bounds values? |
| 3617 | */ | 3596 | */ |
| 3618 | attr->sched_nice = clamp(attr->sched_nice, -20, 19); | 3597 | attr->sched_nice = clamp(attr->sched_nice, MIN_NICE, MAX_NICE); |
| 3619 | 3598 | ||
| 3620 | out: | 3599 | out: |
| 3621 | return ret; | 3600 | return ret; |
| @@ -3836,7 +3815,7 @@ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, | |||
| 3836 | else if (task_has_rt_policy(p)) | 3815 | else if (task_has_rt_policy(p)) |
| 3837 | attr.sched_priority = p->rt_priority; | 3816 | attr.sched_priority = p->rt_priority; |
| 3838 | else | 3817 | else |
| 3839 | attr.sched_nice = TASK_NICE(p); | 3818 | attr.sched_nice = task_nice(p); |
| 3840 | 3819 | ||
| 3841 | rcu_read_unlock(); | 3820 | rcu_read_unlock(); |
| 3842 | 3821 | ||
| @@ -4474,6 +4453,7 @@ void init_idle(struct task_struct *idle, int cpu) | |||
| 4474 | rcu_read_unlock(); | 4453 | rcu_read_unlock(); |
| 4475 | 4454 | ||
| 4476 | rq->curr = rq->idle = idle; | 4455 | rq->curr = rq->idle = idle; |
| 4456 | idle->on_rq = 1; | ||
| 4477 | #if defined(CONFIG_SMP) | 4457 | #if defined(CONFIG_SMP) |
| 4478 | idle->on_cpu = 1; | 4458 | idle->on_cpu = 1; |
| 4479 | #endif | 4459 | #endif |
| @@ -4693,8 +4673,10 @@ void idle_task_exit(void) | |||
| 4693 | 4673 | ||
| 4694 | BUG_ON(cpu_online(smp_processor_id())); | 4674 | BUG_ON(cpu_online(smp_processor_id())); |
| 4695 | 4675 | ||
| 4696 | if (mm != &init_mm) | 4676 | if (mm != &init_mm) { |
| 4697 | switch_mm(mm, &init_mm, current); | 4677 | switch_mm(mm, &init_mm, current); |
| 4678 | finish_arch_post_lock_switch(); | ||
| 4679 | } | ||
| 4698 | mmdrop(mm); | 4680 | mmdrop(mm); |
| 4699 | } | 4681 | } |
| 4700 | 4682 | ||
| @@ -4712,6 +4694,22 @@ static void calc_load_migrate(struct rq *rq) | |||
| 4712 | atomic_long_add(delta, &calc_load_tasks); | 4694 | atomic_long_add(delta, &calc_load_tasks); |
| 4713 | } | 4695 | } |
| 4714 | 4696 | ||
| 4697 | static void put_prev_task_fake(struct rq *rq, struct task_struct *prev) | ||
| 4698 | { | ||
| 4699 | } | ||
| 4700 | |||
| 4701 | static const struct sched_class fake_sched_class = { | ||
| 4702 | .put_prev_task = put_prev_task_fake, | ||
| 4703 | }; | ||
| 4704 | |||
| 4705 | static struct task_struct fake_task = { | ||
| 4706 | /* | ||
| 4707 | * Avoid pull_{rt,dl}_task() | ||
| 4708 | */ | ||
| 4709 | .prio = MAX_PRIO + 1, | ||
| 4710 | .sched_class = &fake_sched_class, | ||
| 4711 | }; | ||
| 4712 | |||
| 4715 | /* | 4713 | /* |
| 4716 | * Migrate all tasks from the rq, sleeping tasks will be migrated by | 4714 | * Migrate all tasks from the rq, sleeping tasks will be migrated by |
| 4717 | * try_to_wake_up()->select_task_rq(). | 4715 | * try_to_wake_up()->select_task_rq(). |
| @@ -4752,7 +4750,7 @@ static void migrate_tasks(unsigned int dead_cpu) | |||
| 4752 | if (rq->nr_running == 1) | 4750 | if (rq->nr_running == 1) |
| 4753 | break; | 4751 | break; |
| 4754 | 4752 | ||
| 4755 | next = pick_next_task(rq); | 4753 | next = pick_next_task(rq, &fake_task); |
| 4756 | BUG_ON(!next); | 4754 | BUG_ON(!next); |
| 4757 | next->sched_class->put_prev_task(rq, next); | 4755 | next->sched_class->put_prev_task(rq, next); |
| 4758 | 4756 | ||
| @@ -4842,7 +4840,7 @@ set_table_entry(struct ctl_table *entry, | |||
| 4842 | static struct ctl_table * | 4840 | static struct ctl_table * |
| 4843 | sd_alloc_ctl_domain_table(struct sched_domain *sd) | 4841 | sd_alloc_ctl_domain_table(struct sched_domain *sd) |
| 4844 | { | 4842 | { |
| 4845 | struct ctl_table *table = sd_alloc_ctl_entry(13); | 4843 | struct ctl_table *table = sd_alloc_ctl_entry(14); |
| 4846 | 4844 | ||
| 4847 | if (table == NULL) | 4845 | if (table == NULL) |
| 4848 | return NULL; | 4846 | return NULL; |
| @@ -4870,9 +4868,12 @@ sd_alloc_ctl_domain_table(struct sched_domain *sd) | |||
| 4870 | sizeof(int), 0644, proc_dointvec_minmax, false); | 4868 | sizeof(int), 0644, proc_dointvec_minmax, false); |
| 4871 | set_table_entry(&table[10], "flags", &sd->flags, | 4869 | set_table_entry(&table[10], "flags", &sd->flags, |
| 4872 | sizeof(int), 0644, proc_dointvec_minmax, false); | 4870 | sizeof(int), 0644, proc_dointvec_minmax, false); |
| 4873 | set_table_entry(&table[11], "name", sd->name, | 4871 | set_table_entry(&table[11], "max_newidle_lb_cost", |
| 4872 | &sd->max_newidle_lb_cost, | ||
| 4873 | sizeof(long), 0644, proc_doulongvec_minmax, false); | ||
| 4874 | set_table_entry(&table[12], "name", sd->name, | ||
| 4874 | CORENAME_MAX_SIZE, 0444, proc_dostring, false); | 4875 | CORENAME_MAX_SIZE, 0444, proc_dostring, false); |
| 4875 | /* &table[12] is terminator */ | 4876 | /* &table[13] is terminator */ |
| 4876 | 4877 | ||
| 4877 | return table; | 4878 | return table; |
| 4878 | } | 4879 | } |
| @@ -6452,7 +6453,7 @@ static cpumask_var_t fallback_doms; | |||
| 6452 | * cpu core maps. It is supposed to return 1 if the topology changed | 6453 | * cpu core maps. It is supposed to return 1 if the topology changed |
| 6453 | * or 0 if it stayed the same. | 6454 | * or 0 if it stayed the same. |
| 6454 | */ | 6455 | */ |
| 6455 | int __attribute__((weak)) arch_update_cpu_topology(void) | 6456 | int __weak arch_update_cpu_topology(void) |
| 6456 | { | 6457 | { |
| 6457 | return 0; | 6458 | return 0; |
| 6458 | } | 6459 | } |
| @@ -6849,7 +6850,6 @@ void __init sched_init(void) | |||
| 6849 | 6850 | ||
| 6850 | rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime; | 6851 | rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime; |
| 6851 | #ifdef CONFIG_RT_GROUP_SCHED | 6852 | #ifdef CONFIG_RT_GROUP_SCHED |
| 6852 | INIT_LIST_HEAD(&rq->leaf_rt_rq_list); | ||
| 6853 | init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL); | 6853 | init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL); |
| 6854 | #endif | 6854 | #endif |
| 6855 | 6855 | ||
| @@ -6938,7 +6938,8 @@ void __might_sleep(const char *file, int line, int preempt_offset) | |||
| 6938 | static unsigned long prev_jiffy; /* ratelimiting */ | 6938 | static unsigned long prev_jiffy; /* ratelimiting */ |
| 6939 | 6939 | ||
| 6940 | rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */ | 6940 | rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */ |
| 6941 | if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) || | 6941 | if ((preempt_count_equals(preempt_offset) && !irqs_disabled() && |
| 6942 | !is_idle_task(current)) || | ||
| 6942 | system_state != SYSTEM_RUNNING || oops_in_progress) | 6943 | system_state != SYSTEM_RUNNING || oops_in_progress) |
| 6943 | return; | 6944 | return; |
| 6944 | if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) | 6945 | if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) |
| @@ -6956,6 +6957,13 @@ void __might_sleep(const char *file, int line, int preempt_offset) | |||
| 6956 | debug_show_held_locks(current); | 6957 | debug_show_held_locks(current); |
| 6957 | if (irqs_disabled()) | 6958 | if (irqs_disabled()) |
| 6958 | print_irqtrace_events(current); | 6959 | print_irqtrace_events(current); |
| 6960 | #ifdef CONFIG_DEBUG_PREEMPT | ||
| 6961 | if (!preempt_count_equals(preempt_offset)) { | ||
| 6962 | pr_err("Preemption disabled at:"); | ||
| 6963 | print_ip_sym(current->preempt_disable_ip); | ||
| 6964 | pr_cont("\n"); | ||
| 6965 | } | ||
| 6966 | #endif | ||
| 6959 | dump_stack(); | 6967 | dump_stack(); |
| 6960 | } | 6968 | } |
| 6961 | EXPORT_SYMBOL(__might_sleep); | 6969 | EXPORT_SYMBOL(__might_sleep); |
| @@ -7009,7 +7017,7 @@ void normalize_rt_tasks(void) | |||
| 7009 | * Renice negative nice level userspace | 7017 | * Renice negative nice level userspace |
| 7010 | * tasks back to 0: | 7018 | * tasks back to 0: |
| 7011 | */ | 7019 | */ |
| 7012 | if (TASK_NICE(p) < 0 && p->mm) | 7020 | if (task_nice(p) < 0 && p->mm) |
| 7013 | set_user_nice(p, 0); | 7021 | set_user_nice(p, 0); |
| 7014 | continue; | 7022 | continue; |
| 7015 | } | 7023 | } |
| @@ -7177,7 +7185,7 @@ void sched_move_task(struct task_struct *tsk) | |||
| 7177 | if (unlikely(running)) | 7185 | if (unlikely(running)) |
| 7178 | tsk->sched_class->put_prev_task(rq, tsk); | 7186 | tsk->sched_class->put_prev_task(rq, tsk); |
| 7179 | 7187 | ||
| 7180 | tg = container_of(task_css_check(tsk, cpu_cgroup_subsys_id, | 7188 | tg = container_of(task_css_check(tsk, cpu_cgrp_id, |
| 7181 | lockdep_is_held(&tsk->sighand->siglock)), | 7189 | lockdep_is_held(&tsk->sighand->siglock)), |
| 7182 | struct task_group, css); | 7190 | struct task_group, css); |
| 7183 | tg = autogroup_task_group(tsk, tg); | 7191 | tg = autogroup_task_group(tsk, tg); |
| @@ -7604,7 +7612,7 @@ static int cpu_cgroup_can_attach(struct cgroup_subsys_state *css, | |||
| 7604 | { | 7612 | { |
| 7605 | struct task_struct *task; | 7613 | struct task_struct *task; |
| 7606 | 7614 | ||
| 7607 | cgroup_taskset_for_each(task, css, tset) { | 7615 | cgroup_taskset_for_each(task, tset) { |
| 7608 | #ifdef CONFIG_RT_GROUP_SCHED | 7616 | #ifdef CONFIG_RT_GROUP_SCHED |
| 7609 | if (!sched_rt_can_attach(css_tg(css), task)) | 7617 | if (!sched_rt_can_attach(css_tg(css), task)) |
| 7610 | return -EINVAL; | 7618 | return -EINVAL; |
| @@ -7622,7 +7630,7 @@ static void cpu_cgroup_attach(struct cgroup_subsys_state *css, | |||
| 7622 | { | 7630 | { |
| 7623 | struct task_struct *task; | 7631 | struct task_struct *task; |
| 7624 | 7632 | ||
| 7625 | cgroup_taskset_for_each(task, css, tset) | 7633 | cgroup_taskset_for_each(task, tset) |
| 7626 | sched_move_task(task); | 7634 | sched_move_task(task); |
| 7627 | } | 7635 | } |
| 7628 | 7636 | ||
| @@ -7961,8 +7969,7 @@ static struct cftype cpu_files[] = { | |||
| 7961 | { } /* terminate */ | 7969 | { } /* terminate */ |
| 7962 | }; | 7970 | }; |
| 7963 | 7971 | ||
| 7964 | struct cgroup_subsys cpu_cgroup_subsys = { | 7972 | struct cgroup_subsys cpu_cgrp_subsys = { |
| 7965 | .name = "cpu", | ||
| 7966 | .css_alloc = cpu_cgroup_css_alloc, | 7973 | .css_alloc = cpu_cgroup_css_alloc, |
| 7967 | .css_free = cpu_cgroup_css_free, | 7974 | .css_free = cpu_cgroup_css_free, |
| 7968 | .css_online = cpu_cgroup_css_online, | 7975 | .css_online = cpu_cgroup_css_online, |
| @@ -7970,7 +7977,6 @@ struct cgroup_subsys cpu_cgroup_subsys = { | |||
| 7970 | .can_attach = cpu_cgroup_can_attach, | 7977 | .can_attach = cpu_cgroup_can_attach, |
| 7971 | .attach = cpu_cgroup_attach, | 7978 | .attach = cpu_cgroup_attach, |
| 7972 | .exit = cpu_cgroup_exit, | 7979 | .exit = cpu_cgroup_exit, |
| 7973 | .subsys_id = cpu_cgroup_subsys_id, | ||
| 7974 | .base_cftypes = cpu_files, | 7980 | .base_cftypes = cpu_files, |
| 7975 | .early_init = 1, | 7981 | .early_init = 1, |
| 7976 | }; | 7982 | }; |
