diff options
Diffstat (limited to 'kernel/sched')
-rw-r--r-- | kernel/sched/core.c | 80 | ||||
-rw-r--r-- | kernel/sched/cpudeadline.c | 37 | ||||
-rw-r--r-- | kernel/sched/cpudeadline.h | 6 | ||||
-rw-r--r-- | kernel/sched/cpupri.c | 10 | ||||
-rw-r--r-- | kernel/sched/cpupri.h | 2 | ||||
-rw-r--r-- | kernel/sched/cputime.c | 32 | ||||
-rw-r--r-- | kernel/sched/deadline.c | 16 | ||||
-rw-r--r-- | kernel/sched/fair.c | 32 | ||||
-rw-r--r-- | kernel/sched/rt.c | 7 | ||||
-rw-r--r-- | kernel/sched/sched.h | 9 |
10 files changed, 153 insertions, 78 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 268a45ea238c..0a7251678982 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -2192,7 +2192,7 @@ static inline void post_schedule(struct rq *rq) | |||
2192 | * schedule_tail - first thing a freshly forked thread must call. | 2192 | * schedule_tail - first thing a freshly forked thread must call. |
2193 | * @prev: the thread we just switched away from. | 2193 | * @prev: the thread we just switched away from. |
2194 | */ | 2194 | */ |
2195 | asmlinkage void schedule_tail(struct task_struct *prev) | 2195 | asmlinkage __visible void schedule_tail(struct task_struct *prev) |
2196 | __releases(rq->lock) | 2196 | __releases(rq->lock) |
2197 | { | 2197 | { |
2198 | struct rq *rq = this_rq(); | 2198 | struct rq *rq = this_rq(); |
@@ -2592,8 +2592,14 @@ pick_next_task(struct rq *rq, struct task_struct *prev) | |||
2592 | if (likely(prev->sched_class == class && | 2592 | if (likely(prev->sched_class == class && |
2593 | rq->nr_running == rq->cfs.h_nr_running)) { | 2593 | rq->nr_running == rq->cfs.h_nr_running)) { |
2594 | p = fair_sched_class.pick_next_task(rq, prev); | 2594 | p = fair_sched_class.pick_next_task(rq, prev); |
2595 | if (likely(p && p != RETRY_TASK)) | 2595 | if (unlikely(p == RETRY_TASK)) |
2596 | return p; | 2596 | goto again; |
2597 | |||
2598 | /* assumes fair_sched_class->next == idle_sched_class */ | ||
2599 | if (unlikely(!p)) | ||
2600 | p = idle_sched_class.pick_next_task(rq, prev); | ||
2601 | |||
2602 | return p; | ||
2597 | } | 2603 | } |
2598 | 2604 | ||
2599 | again: | 2605 | again: |
@@ -2741,7 +2747,7 @@ static inline void sched_submit_work(struct task_struct *tsk) | |||
2741 | blk_schedule_flush_plug(tsk); | 2747 | blk_schedule_flush_plug(tsk); |
2742 | } | 2748 | } |
2743 | 2749 | ||
2744 | asmlinkage void __sched schedule(void) | 2750 | asmlinkage __visible void __sched schedule(void) |
2745 | { | 2751 | { |
2746 | struct task_struct *tsk = current; | 2752 | struct task_struct *tsk = current; |
2747 | 2753 | ||
@@ -2751,7 +2757,7 @@ asmlinkage void __sched schedule(void) | |||
2751 | EXPORT_SYMBOL(schedule); | 2757 | EXPORT_SYMBOL(schedule); |
2752 | 2758 | ||
2753 | #ifdef CONFIG_CONTEXT_TRACKING | 2759 | #ifdef CONFIG_CONTEXT_TRACKING |
2754 | asmlinkage void __sched schedule_user(void) | 2760 | asmlinkage __visible void __sched schedule_user(void) |
2755 | { | 2761 | { |
2756 | /* | 2762 | /* |
2757 | * If we come here after a random call to set_need_resched(), | 2763 | * If we come here after a random call to set_need_resched(), |
@@ -2783,7 +2789,7 @@ void __sched schedule_preempt_disabled(void) | |||
2783 | * off of preempt_enable. Kernel preemptions off return from interrupt | 2789 | * off of preempt_enable. Kernel preemptions off return from interrupt |
2784 | * occur there and call schedule directly. | 2790 | * occur there and call schedule directly. |
2785 | */ | 2791 | */ |
2786 | asmlinkage void __sched notrace preempt_schedule(void) | 2792 | asmlinkage __visible void __sched notrace preempt_schedule(void) |
2787 | { | 2793 | { |
2788 | /* | 2794 | /* |
2789 | * If there is a non-zero preempt_count or interrupts are disabled, | 2795 | * If there is a non-zero preempt_count or interrupts are disabled, |
@@ -2813,7 +2819,7 @@ EXPORT_SYMBOL(preempt_schedule); | |||
2813 | * Note, that this is called and return with irqs disabled. This will | 2819 | * Note, that this is called and return with irqs disabled. This will |
2814 | * protect us against recursive calling from irq. | 2820 | * protect us against recursive calling from irq. |
2815 | */ | 2821 | */ |
2816 | asmlinkage void __sched preempt_schedule_irq(void) | 2822 | asmlinkage __visible void __sched preempt_schedule_irq(void) |
2817 | { | 2823 | { |
2818 | enum ctx_state prev_state; | 2824 | enum ctx_state prev_state; |
2819 | 2825 | ||
@@ -3124,6 +3130,7 @@ __setparam_dl(struct task_struct *p, const struct sched_attr *attr) | |||
3124 | dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime); | 3130 | dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime); |
3125 | dl_se->dl_throttled = 0; | 3131 | dl_se->dl_throttled = 0; |
3126 | dl_se->dl_new = 1; | 3132 | dl_se->dl_new = 1; |
3133 | dl_se->dl_yielded = 0; | ||
3127 | } | 3134 | } |
3128 | 3135 | ||
3129 | static void __setscheduler_params(struct task_struct *p, | 3136 | static void __setscheduler_params(struct task_struct *p, |
@@ -3188,17 +3195,40 @@ __getparam_dl(struct task_struct *p, struct sched_attr *attr) | |||
3188 | * We ask for the deadline not being zero, and greater or equal | 3195 | * We ask for the deadline not being zero, and greater or equal |
3189 | * than the runtime, as well as the period of being zero or | 3196 | * than the runtime, as well as the period of being zero or |
3190 | * greater than deadline. Furthermore, we have to be sure that | 3197 | * greater than deadline. Furthermore, we have to be sure that |
3191 | * user parameters are above the internal resolution (1us); we | 3198 | * user parameters are above the internal resolution of 1us (we |
3192 | * check sched_runtime only since it is always the smaller one. | 3199 | * check sched_runtime only since it is always the smaller one) and |
3200 | * below 2^63 ns (we have to check both sched_deadline and | ||
3201 | * sched_period, as the latter can be zero). | ||
3193 | */ | 3202 | */ |
3194 | static bool | 3203 | static bool |
3195 | __checkparam_dl(const struct sched_attr *attr) | 3204 | __checkparam_dl(const struct sched_attr *attr) |
3196 | { | 3205 | { |
3197 | return attr && attr->sched_deadline != 0 && | 3206 | /* deadline != 0 */ |
3198 | (attr->sched_period == 0 || | 3207 | if (attr->sched_deadline == 0) |
3199 | (s64)(attr->sched_period - attr->sched_deadline) >= 0) && | 3208 | return false; |
3200 | (s64)(attr->sched_deadline - attr->sched_runtime ) >= 0 && | 3209 | |
3201 | attr->sched_runtime >= (2 << (DL_SCALE - 1)); | 3210 | /* |
3211 | * Since we truncate DL_SCALE bits, make sure we're at least | ||
3212 | * that big. | ||
3213 | */ | ||
3214 | if (attr->sched_runtime < (1ULL << DL_SCALE)) | ||
3215 | return false; | ||
3216 | |||
3217 | /* | ||
3218 | * Since we use the MSB for wrap-around and sign issues, make | ||
3219 | * sure it's not set (mind that period can be equal to zero). | ||
3220 | */ | ||
3221 | if (attr->sched_deadline & (1ULL << 63) || | ||
3222 | attr->sched_period & (1ULL << 63)) | ||
3223 | return false; | ||
3224 | |||
3225 | /* runtime <= deadline <= period (if period != 0) */ | ||
3226 | if ((attr->sched_period != 0 && | ||
3227 | attr->sched_period < attr->sched_deadline) || | ||
3228 | attr->sched_deadline < attr->sched_runtime) | ||
3229 | return false; | ||
3230 | |||
3231 | return true; | ||
3202 | } | 3232 | } |
3203 | 3233 | ||
3204 | /* | 3234 | /* |
@@ -3639,6 +3669,7 @@ SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param) | |||
3639 | * sys_sched_setattr - same as above, but with extended sched_attr | 3669 | * sys_sched_setattr - same as above, but with extended sched_attr |
3640 | * @pid: the pid in question. | 3670 | * @pid: the pid in question. |
3641 | * @uattr: structure containing the extended parameters. | 3671 | * @uattr: structure containing the extended parameters. |
3672 | * @flags: for future extension. | ||
3642 | */ | 3673 | */ |
3643 | SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr, | 3674 | SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr, |
3644 | unsigned int, flags) | 3675 | unsigned int, flags) |
@@ -3650,8 +3681,12 @@ SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr, | |||
3650 | if (!uattr || pid < 0 || flags) | 3681 | if (!uattr || pid < 0 || flags) |
3651 | return -EINVAL; | 3682 | return -EINVAL; |
3652 | 3683 | ||
3653 | if (sched_copy_attr(uattr, &attr)) | 3684 | retval = sched_copy_attr(uattr, &attr); |
3654 | return -EFAULT; | 3685 | if (retval) |
3686 | return retval; | ||
3687 | |||
3688 | if (attr.sched_policy < 0) | ||
3689 | return -EINVAL; | ||
3655 | 3690 | ||
3656 | rcu_read_lock(); | 3691 | rcu_read_lock(); |
3657 | retval = -ESRCH; | 3692 | retval = -ESRCH; |
@@ -3701,7 +3736,7 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid) | |||
3701 | */ | 3736 | */ |
3702 | SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) | 3737 | SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) |
3703 | { | 3738 | { |
3704 | struct sched_param lp; | 3739 | struct sched_param lp = { .sched_priority = 0 }; |
3705 | struct task_struct *p; | 3740 | struct task_struct *p; |
3706 | int retval; | 3741 | int retval; |
3707 | 3742 | ||
@@ -3718,11 +3753,8 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) | |||
3718 | if (retval) | 3753 | if (retval) |
3719 | goto out_unlock; | 3754 | goto out_unlock; |
3720 | 3755 | ||
3721 | if (task_has_dl_policy(p)) { | 3756 | if (task_has_rt_policy(p)) |
3722 | retval = -EINVAL; | 3757 | lp.sched_priority = p->rt_priority; |
3723 | goto out_unlock; | ||
3724 | } | ||
3725 | lp.sched_priority = p->rt_priority; | ||
3726 | rcu_read_unlock(); | 3758 | rcu_read_unlock(); |
3727 | 3759 | ||
3728 | /* | 3760 | /* |
@@ -3783,6 +3815,7 @@ err_size: | |||
3783 | * @pid: the pid in question. | 3815 | * @pid: the pid in question. |
3784 | * @uattr: structure containing the extended parameters. | 3816 | * @uattr: structure containing the extended parameters. |
3785 | * @size: sizeof(attr) for fwd/bwd comp. | 3817 | * @size: sizeof(attr) for fwd/bwd comp. |
3818 | * @flags: for future extension. | ||
3786 | */ | 3819 | */ |
3787 | SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, | 3820 | SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, |
3788 | unsigned int, size, unsigned int, flags) | 3821 | unsigned int, size, unsigned int, flags) |
@@ -5043,7 +5076,6 @@ static int sched_cpu_active(struct notifier_block *nfb, | |||
5043 | unsigned long action, void *hcpu) | 5076 | unsigned long action, void *hcpu) |
5044 | { | 5077 | { |
5045 | switch (action & ~CPU_TASKS_FROZEN) { | 5078 | switch (action & ~CPU_TASKS_FROZEN) { |
5046 | case CPU_STARTING: | ||
5047 | case CPU_DOWN_FAILED: | 5079 | case CPU_DOWN_FAILED: |
5048 | set_cpu_active((long)hcpu, true); | 5080 | set_cpu_active((long)hcpu, true); |
5049 | return NOTIFY_OK; | 5081 | return NOTIFY_OK; |
@@ -6017,6 +6049,8 @@ sd_numa_init(struct sched_domain_topology_level *tl, int cpu) | |||
6017 | , | 6049 | , |
6018 | .last_balance = jiffies, | 6050 | .last_balance = jiffies, |
6019 | .balance_interval = sd_weight, | 6051 | .balance_interval = sd_weight, |
6052 | .max_newidle_lb_cost = 0, | ||
6053 | .next_decay_max_lb_cost = jiffies, | ||
6020 | }; | 6054 | }; |
6021 | SD_INIT_NAME(sd, NUMA); | 6055 | SD_INIT_NAME(sd, NUMA); |
6022 | sd->private = &tl->data; | 6056 | sd->private = &tl->data; |
diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c index 5b9bb42b2d47..bd95963dae80 100644 --- a/kernel/sched/cpudeadline.c +++ b/kernel/sched/cpudeadline.c | |||
@@ -13,6 +13,7 @@ | |||
13 | 13 | ||
14 | #include <linux/gfp.h> | 14 | #include <linux/gfp.h> |
15 | #include <linux/kernel.h> | 15 | #include <linux/kernel.h> |
16 | #include <linux/slab.h> | ||
16 | #include "cpudeadline.h" | 17 | #include "cpudeadline.h" |
17 | 18 | ||
18 | static inline int parent(int i) | 19 | static inline int parent(int i) |
@@ -39,8 +40,10 @@ static void cpudl_exchange(struct cpudl *cp, int a, int b) | |||
39 | { | 40 | { |
40 | int cpu_a = cp->elements[a].cpu, cpu_b = cp->elements[b].cpu; | 41 | int cpu_a = cp->elements[a].cpu, cpu_b = cp->elements[b].cpu; |
41 | 42 | ||
42 | swap(cp->elements[a], cp->elements[b]); | 43 | swap(cp->elements[a].cpu, cp->elements[b].cpu); |
43 | swap(cp->cpu_to_idx[cpu_a], cp->cpu_to_idx[cpu_b]); | 44 | swap(cp->elements[a].dl , cp->elements[b].dl ); |
45 | |||
46 | swap(cp->elements[cpu_a].idx, cp->elements[cpu_b].idx); | ||
44 | } | 47 | } |
45 | 48 | ||
46 | static void cpudl_heapify(struct cpudl *cp, int idx) | 49 | static void cpudl_heapify(struct cpudl *cp, int idx) |
@@ -140,7 +143,7 @@ void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid) | |||
140 | WARN_ON(!cpu_present(cpu)); | 143 | WARN_ON(!cpu_present(cpu)); |
141 | 144 | ||
142 | raw_spin_lock_irqsave(&cp->lock, flags); | 145 | raw_spin_lock_irqsave(&cp->lock, flags); |
143 | old_idx = cp->cpu_to_idx[cpu]; | 146 | old_idx = cp->elements[cpu].idx; |
144 | if (!is_valid) { | 147 | if (!is_valid) { |
145 | /* remove item */ | 148 | /* remove item */ |
146 | if (old_idx == IDX_INVALID) { | 149 | if (old_idx == IDX_INVALID) { |
@@ -155,8 +158,8 @@ void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid) | |||
155 | cp->elements[old_idx].dl = cp->elements[cp->size - 1].dl; | 158 | cp->elements[old_idx].dl = cp->elements[cp->size - 1].dl; |
156 | cp->elements[old_idx].cpu = new_cpu; | 159 | cp->elements[old_idx].cpu = new_cpu; |
157 | cp->size--; | 160 | cp->size--; |
158 | cp->cpu_to_idx[new_cpu] = old_idx; | 161 | cp->elements[new_cpu].idx = old_idx; |
159 | cp->cpu_to_idx[cpu] = IDX_INVALID; | 162 | cp->elements[cpu].idx = IDX_INVALID; |
160 | while (old_idx > 0 && dl_time_before( | 163 | while (old_idx > 0 && dl_time_before( |
161 | cp->elements[parent(old_idx)].dl, | 164 | cp->elements[parent(old_idx)].dl, |
162 | cp->elements[old_idx].dl)) { | 165 | cp->elements[old_idx].dl)) { |
@@ -173,7 +176,7 @@ void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid) | |||
173 | cp->size++; | 176 | cp->size++; |
174 | cp->elements[cp->size - 1].dl = 0; | 177 | cp->elements[cp->size - 1].dl = 0; |
175 | cp->elements[cp->size - 1].cpu = cpu; | 178 | cp->elements[cp->size - 1].cpu = cpu; |
176 | cp->cpu_to_idx[cpu] = cp->size - 1; | 179 | cp->elements[cpu].idx = cp->size - 1; |
177 | cpudl_change_key(cp, cp->size - 1, dl); | 180 | cpudl_change_key(cp, cp->size - 1, dl); |
178 | cpumask_clear_cpu(cpu, cp->free_cpus); | 181 | cpumask_clear_cpu(cpu, cp->free_cpus); |
179 | } else { | 182 | } else { |
@@ -195,10 +198,21 @@ int cpudl_init(struct cpudl *cp) | |||
195 | memset(cp, 0, sizeof(*cp)); | 198 | memset(cp, 0, sizeof(*cp)); |
196 | raw_spin_lock_init(&cp->lock); | 199 | raw_spin_lock_init(&cp->lock); |
197 | cp->size = 0; | 200 | cp->size = 0; |
198 | for (i = 0; i < NR_CPUS; i++) | 201 | |
199 | cp->cpu_to_idx[i] = IDX_INVALID; | 202 | cp->elements = kcalloc(nr_cpu_ids, |
200 | if (!alloc_cpumask_var(&cp->free_cpus, GFP_KERNEL)) | 203 | sizeof(struct cpudl_item), |
204 | GFP_KERNEL); | ||
205 | if (!cp->elements) | ||
206 | return -ENOMEM; | ||
207 | |||
208 | if (!alloc_cpumask_var(&cp->free_cpus, GFP_KERNEL)) { | ||
209 | kfree(cp->elements); | ||
201 | return -ENOMEM; | 210 | return -ENOMEM; |
211 | } | ||
212 | |||
213 | for_each_possible_cpu(i) | ||
214 | cp->elements[i].idx = IDX_INVALID; | ||
215 | |||
202 | cpumask_setall(cp->free_cpus); | 216 | cpumask_setall(cp->free_cpus); |
203 | 217 | ||
204 | return 0; | 218 | return 0; |
@@ -210,7 +224,6 @@ int cpudl_init(struct cpudl *cp) | |||
210 | */ | 224 | */ |
211 | void cpudl_cleanup(struct cpudl *cp) | 225 | void cpudl_cleanup(struct cpudl *cp) |
212 | { | 226 | { |
213 | /* | 227 | free_cpumask_var(cp->free_cpus); |
214 | * nothing to do for the moment | 228 | kfree(cp->elements); |
215 | */ | ||
216 | } | 229 | } |
diff --git a/kernel/sched/cpudeadline.h b/kernel/sched/cpudeadline.h index a202789a412c..538c9796ad4a 100644 --- a/kernel/sched/cpudeadline.h +++ b/kernel/sched/cpudeadline.h | |||
@@ -5,17 +5,17 @@ | |||
5 | 5 | ||
6 | #define IDX_INVALID -1 | 6 | #define IDX_INVALID -1 |
7 | 7 | ||
8 | struct array_item { | 8 | struct cpudl_item { |
9 | u64 dl; | 9 | u64 dl; |
10 | int cpu; | 10 | int cpu; |
11 | int idx; | ||
11 | }; | 12 | }; |
12 | 13 | ||
13 | struct cpudl { | 14 | struct cpudl { |
14 | raw_spinlock_t lock; | 15 | raw_spinlock_t lock; |
15 | int size; | 16 | int size; |
16 | int cpu_to_idx[NR_CPUS]; | ||
17 | struct array_item elements[NR_CPUS]; | ||
18 | cpumask_var_t free_cpus; | 17 | cpumask_var_t free_cpus; |
18 | struct cpudl_item *elements; | ||
19 | }; | 19 | }; |
20 | 20 | ||
21 | 21 | ||
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c index 8b836b376d91..8834243abee2 100644 --- a/kernel/sched/cpupri.c +++ b/kernel/sched/cpupri.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <linux/gfp.h> | 30 | #include <linux/gfp.h> |
31 | #include <linux/sched.h> | 31 | #include <linux/sched.h> |
32 | #include <linux/sched/rt.h> | 32 | #include <linux/sched/rt.h> |
33 | #include <linux/slab.h> | ||
33 | #include "cpupri.h" | 34 | #include "cpupri.h" |
34 | 35 | ||
35 | /* Convert between a 140 based task->prio, and our 102 based cpupri */ | 36 | /* Convert between a 140 based task->prio, and our 102 based cpupri */ |
@@ -70,8 +71,7 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p, | |||
70 | int idx = 0; | 71 | int idx = 0; |
71 | int task_pri = convert_prio(p->prio); | 72 | int task_pri = convert_prio(p->prio); |
72 | 73 | ||
73 | if (task_pri >= MAX_RT_PRIO) | 74 | BUG_ON(task_pri >= CPUPRI_NR_PRIORITIES); |
74 | return 0; | ||
75 | 75 | ||
76 | for (idx = 0; idx < task_pri; idx++) { | 76 | for (idx = 0; idx < task_pri; idx++) { |
77 | struct cpupri_vec *vec = &cp->pri_to_cpu[idx]; | 77 | struct cpupri_vec *vec = &cp->pri_to_cpu[idx]; |
@@ -219,8 +219,13 @@ int cpupri_init(struct cpupri *cp) | |||
219 | goto cleanup; | 219 | goto cleanup; |
220 | } | 220 | } |
221 | 221 | ||
222 | cp->cpu_to_pri = kcalloc(nr_cpu_ids, sizeof(int), GFP_KERNEL); | ||
223 | if (!cp->cpu_to_pri) | ||
224 | goto cleanup; | ||
225 | |||
222 | for_each_possible_cpu(i) | 226 | for_each_possible_cpu(i) |
223 | cp->cpu_to_pri[i] = CPUPRI_INVALID; | 227 | cp->cpu_to_pri[i] = CPUPRI_INVALID; |
228 | |||
224 | return 0; | 229 | return 0; |
225 | 230 | ||
226 | cleanup: | 231 | cleanup: |
@@ -237,6 +242,7 @@ void cpupri_cleanup(struct cpupri *cp) | |||
237 | { | 242 | { |
238 | int i; | 243 | int i; |
239 | 244 | ||
245 | kfree(cp->cpu_to_pri); | ||
240 | for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) | 246 | for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) |
241 | free_cpumask_var(cp->pri_to_cpu[i].mask); | 247 | free_cpumask_var(cp->pri_to_cpu[i].mask); |
242 | } | 248 | } |
diff --git a/kernel/sched/cpupri.h b/kernel/sched/cpupri.h index f6d756173491..6b033347fdfd 100644 --- a/kernel/sched/cpupri.h +++ b/kernel/sched/cpupri.h | |||
@@ -17,7 +17,7 @@ struct cpupri_vec { | |||
17 | 17 | ||
18 | struct cpupri { | 18 | struct cpupri { |
19 | struct cpupri_vec pri_to_cpu[CPUPRI_NR_PRIORITIES]; | 19 | struct cpupri_vec pri_to_cpu[CPUPRI_NR_PRIORITIES]; |
20 | int cpu_to_pri[NR_CPUS]; | 20 | int *cpu_to_pri; |
21 | }; | 21 | }; |
22 | 22 | ||
23 | #ifdef CONFIG_SMP | 23 | #ifdef CONFIG_SMP |
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index a95097cb4591..72fdf06ef865 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c | |||
@@ -332,50 +332,50 @@ out: | |||
332 | * softirq as those do not count in task exec_runtime any more. | 332 | * softirq as those do not count in task exec_runtime any more. |
333 | */ | 333 | */ |
334 | static void irqtime_account_process_tick(struct task_struct *p, int user_tick, | 334 | static void irqtime_account_process_tick(struct task_struct *p, int user_tick, |
335 | struct rq *rq) | 335 | struct rq *rq, int ticks) |
336 | { | 336 | { |
337 | cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); | 337 | cputime_t scaled = cputime_to_scaled(cputime_one_jiffy); |
338 | u64 cputime = (__force u64) cputime_one_jiffy; | ||
338 | u64 *cpustat = kcpustat_this_cpu->cpustat; | 339 | u64 *cpustat = kcpustat_this_cpu->cpustat; |
339 | 340 | ||
340 | if (steal_account_process_tick()) | 341 | if (steal_account_process_tick()) |
341 | return; | 342 | return; |
342 | 343 | ||
344 | cputime *= ticks; | ||
345 | scaled *= ticks; | ||
346 | |||
343 | if (irqtime_account_hi_update()) { | 347 | if (irqtime_account_hi_update()) { |
344 | cpustat[CPUTIME_IRQ] += (__force u64) cputime_one_jiffy; | 348 | cpustat[CPUTIME_IRQ] += cputime; |
345 | } else if (irqtime_account_si_update()) { | 349 | } else if (irqtime_account_si_update()) { |
346 | cpustat[CPUTIME_SOFTIRQ] += (__force u64) cputime_one_jiffy; | 350 | cpustat[CPUTIME_SOFTIRQ] += cputime; |
347 | } else if (this_cpu_ksoftirqd() == p) { | 351 | } else if (this_cpu_ksoftirqd() == p) { |
348 | /* | 352 | /* |
349 | * ksoftirqd time do not get accounted in cpu_softirq_time. | 353 | * ksoftirqd time do not get accounted in cpu_softirq_time. |
350 | * So, we have to handle it separately here. | 354 | * So, we have to handle it separately here. |
351 | * Also, p->stime needs to be updated for ksoftirqd. | 355 | * Also, p->stime needs to be updated for ksoftirqd. |
352 | */ | 356 | */ |
353 | __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, | 357 | __account_system_time(p, cputime, scaled, CPUTIME_SOFTIRQ); |
354 | CPUTIME_SOFTIRQ); | ||
355 | } else if (user_tick) { | 358 | } else if (user_tick) { |
356 | account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); | 359 | account_user_time(p, cputime, scaled); |
357 | } else if (p == rq->idle) { | 360 | } else if (p == rq->idle) { |
358 | account_idle_time(cputime_one_jiffy); | 361 | account_idle_time(cputime); |
359 | } else if (p->flags & PF_VCPU) { /* System time or guest time */ | 362 | } else if (p->flags & PF_VCPU) { /* System time or guest time */ |
360 | account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled); | 363 | account_guest_time(p, cputime, scaled); |
361 | } else { | 364 | } else { |
362 | __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, | 365 | __account_system_time(p, cputime, scaled, CPUTIME_SYSTEM); |
363 | CPUTIME_SYSTEM); | ||
364 | } | 366 | } |
365 | } | 367 | } |
366 | 368 | ||
367 | static void irqtime_account_idle_ticks(int ticks) | 369 | static void irqtime_account_idle_ticks(int ticks) |
368 | { | 370 | { |
369 | int i; | ||
370 | struct rq *rq = this_rq(); | 371 | struct rq *rq = this_rq(); |
371 | 372 | ||
372 | for (i = 0; i < ticks; i++) | 373 | irqtime_account_process_tick(current, 0, rq, ticks); |
373 | irqtime_account_process_tick(current, 0, rq); | ||
374 | } | 374 | } |
375 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ | 375 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ |
376 | static inline void irqtime_account_idle_ticks(int ticks) {} | 376 | static inline void irqtime_account_idle_ticks(int ticks) {} |
377 | static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick, | 377 | static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick, |
378 | struct rq *rq) {} | 378 | struct rq *rq, int nr_ticks) {} |
379 | #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ | 379 | #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ |
380 | 380 | ||
381 | /* | 381 | /* |
@@ -464,7 +464,7 @@ void account_process_tick(struct task_struct *p, int user_tick) | |||
464 | return; | 464 | return; |
465 | 465 | ||
466 | if (sched_clock_irqtime) { | 466 | if (sched_clock_irqtime) { |
467 | irqtime_account_process_tick(p, user_tick, rq); | 467 | irqtime_account_process_tick(p, user_tick, rq, 1); |
468 | return; | 468 | return; |
469 | } | 469 | } |
470 | 470 | ||
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 27ef40925525..800e99b99075 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c | |||
@@ -528,6 +528,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer) | |||
528 | sched_clock_tick(); | 528 | sched_clock_tick(); |
529 | update_rq_clock(rq); | 529 | update_rq_clock(rq); |
530 | dl_se->dl_throttled = 0; | 530 | dl_se->dl_throttled = 0; |
531 | dl_se->dl_yielded = 0; | ||
531 | if (p->on_rq) { | 532 | if (p->on_rq) { |
532 | enqueue_task_dl(rq, p, ENQUEUE_REPLENISH); | 533 | enqueue_task_dl(rq, p, ENQUEUE_REPLENISH); |
533 | if (task_has_dl_policy(rq->curr)) | 534 | if (task_has_dl_policy(rq->curr)) |
@@ -893,10 +894,10 @@ static void yield_task_dl(struct rq *rq) | |||
893 | * We make the task go to sleep until its current deadline by | 894 | * We make the task go to sleep until its current deadline by |
894 | * forcing its runtime to zero. This way, update_curr_dl() stops | 895 | * forcing its runtime to zero. This way, update_curr_dl() stops |
895 | * it and the bandwidth timer will wake it up and will give it | 896 | * it and the bandwidth timer will wake it up and will give it |
896 | * new scheduling parameters (thanks to dl_new=1). | 897 | * new scheduling parameters (thanks to dl_yielded=1). |
897 | */ | 898 | */ |
898 | if (p->dl.runtime > 0) { | 899 | if (p->dl.runtime > 0) { |
899 | rq->curr->dl.dl_new = 1; | 900 | rq->curr->dl.dl_yielded = 1; |
900 | p->dl.runtime = 0; | 901 | p->dl.runtime = 0; |
901 | } | 902 | } |
902 | update_curr_dl(rq); | 903 | update_curr_dl(rq); |
@@ -1021,8 +1022,17 @@ struct task_struct *pick_next_task_dl(struct rq *rq, struct task_struct *prev) | |||
1021 | 1022 | ||
1022 | dl_rq = &rq->dl; | 1023 | dl_rq = &rq->dl; |
1023 | 1024 | ||
1024 | if (need_pull_dl_task(rq, prev)) | 1025 | if (need_pull_dl_task(rq, prev)) { |
1025 | pull_dl_task(rq); | 1026 | pull_dl_task(rq); |
1027 | /* | ||
1028 | * pull_rt_task() can drop (and re-acquire) rq->lock; this | ||
1029 | * means a stop task can slip in, in which case we need to | ||
1030 | * re-start task selection. | ||
1031 | */ | ||
1032 | if (rq->stop && rq->stop->on_rq) | ||
1033 | return RETRY_TASK; | ||
1034 | } | ||
1035 | |||
1026 | /* | 1036 | /* |
1027 | * When prev is DL, we may throttle it in put_prev_task(). | 1037 | * When prev is DL, we may throttle it in put_prev_task(). |
1028 | * So, we update time before we check for dl_nr_running. | 1038 | * So, we update time before we check for dl_nr_running. |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7e9bd0b1fa9e..0fdb96de81a5 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -1497,7 +1497,7 @@ static void task_numa_placement(struct task_struct *p) | |||
1497 | /* If the task is part of a group prevent parallel updates to group stats */ | 1497 | /* If the task is part of a group prevent parallel updates to group stats */ |
1498 | if (p->numa_group) { | 1498 | if (p->numa_group) { |
1499 | group_lock = &p->numa_group->lock; | 1499 | group_lock = &p->numa_group->lock; |
1500 | spin_lock(group_lock); | 1500 | spin_lock_irq(group_lock); |
1501 | } | 1501 | } |
1502 | 1502 | ||
1503 | /* Find the node with the highest number of faults */ | 1503 | /* Find the node with the highest number of faults */ |
@@ -1572,7 +1572,7 @@ static void task_numa_placement(struct task_struct *p) | |||
1572 | } | 1572 | } |
1573 | } | 1573 | } |
1574 | 1574 | ||
1575 | spin_unlock(group_lock); | 1575 | spin_unlock_irq(group_lock); |
1576 | } | 1576 | } |
1577 | 1577 | ||
1578 | /* Preferred node as the node with the most faults */ | 1578 | /* Preferred node as the node with the most faults */ |
@@ -1677,7 +1677,8 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags, | |||
1677 | if (!join) | 1677 | if (!join) |
1678 | return; | 1678 | return; |
1679 | 1679 | ||
1680 | double_lock(&my_grp->lock, &grp->lock); | 1680 | BUG_ON(irqs_disabled()); |
1681 | double_lock_irq(&my_grp->lock, &grp->lock); | ||
1681 | 1682 | ||
1682 | for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) { | 1683 | for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) { |
1683 | my_grp->faults[i] -= p->numa_faults_memory[i]; | 1684 | my_grp->faults[i] -= p->numa_faults_memory[i]; |
@@ -1691,7 +1692,7 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags, | |||
1691 | grp->nr_tasks++; | 1692 | grp->nr_tasks++; |
1692 | 1693 | ||
1693 | spin_unlock(&my_grp->lock); | 1694 | spin_unlock(&my_grp->lock); |
1694 | spin_unlock(&grp->lock); | 1695 | spin_unlock_irq(&grp->lock); |
1695 | 1696 | ||
1696 | rcu_assign_pointer(p->numa_group, grp); | 1697 | rcu_assign_pointer(p->numa_group, grp); |
1697 | 1698 | ||
@@ -1710,14 +1711,14 @@ void task_numa_free(struct task_struct *p) | |||
1710 | void *numa_faults = p->numa_faults_memory; | 1711 | void *numa_faults = p->numa_faults_memory; |
1711 | 1712 | ||
1712 | if (grp) { | 1713 | if (grp) { |
1713 | spin_lock(&grp->lock); | 1714 | spin_lock_irq(&grp->lock); |
1714 | for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) | 1715 | for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) |
1715 | grp->faults[i] -= p->numa_faults_memory[i]; | 1716 | grp->faults[i] -= p->numa_faults_memory[i]; |
1716 | grp->total_faults -= p->total_numa_faults; | 1717 | grp->total_faults -= p->total_numa_faults; |
1717 | 1718 | ||
1718 | list_del(&p->numa_entry); | 1719 | list_del(&p->numa_entry); |
1719 | grp->nr_tasks--; | 1720 | grp->nr_tasks--; |
1720 | spin_unlock(&grp->lock); | 1721 | spin_unlock_irq(&grp->lock); |
1721 | rcu_assign_pointer(p->numa_group, NULL); | 1722 | rcu_assign_pointer(p->numa_group, NULL); |
1722 | put_numa_group(grp); | 1723 | put_numa_group(grp); |
1723 | } | 1724 | } |
@@ -6652,6 +6653,7 @@ static int idle_balance(struct rq *this_rq) | |||
6652 | int this_cpu = this_rq->cpu; | 6653 | int this_cpu = this_rq->cpu; |
6653 | 6654 | ||
6654 | idle_enter_fair(this_rq); | 6655 | idle_enter_fair(this_rq); |
6656 | |||
6655 | /* | 6657 | /* |
6656 | * We must set idle_stamp _before_ calling idle_balance(), such that we | 6658 | * We must set idle_stamp _before_ calling idle_balance(), such that we |
6657 | * measure the duration of idle_balance() as idle time. | 6659 | * measure the duration of idle_balance() as idle time. |
@@ -6704,14 +6706,16 @@ static int idle_balance(struct rq *this_rq) | |||
6704 | 6706 | ||
6705 | raw_spin_lock(&this_rq->lock); | 6707 | raw_spin_lock(&this_rq->lock); |
6706 | 6708 | ||
6709 | if (curr_cost > this_rq->max_idle_balance_cost) | ||
6710 | this_rq->max_idle_balance_cost = curr_cost; | ||
6711 | |||
6707 | /* | 6712 | /* |
6708 | * While browsing the domains, we released the rq lock. | 6713 | * While browsing the domains, we released the rq lock, a task could |
6709 | * A task could have be enqueued in the meantime | 6714 | * have been enqueued in the meantime. Since we're not going idle, |
6715 | * pretend we pulled a task. | ||
6710 | */ | 6716 | */ |
6711 | if (this_rq->cfs.h_nr_running && !pulled_task) { | 6717 | if (this_rq->cfs.h_nr_running && !pulled_task) |
6712 | pulled_task = 1; | 6718 | pulled_task = 1; |
6713 | goto out; | ||
6714 | } | ||
6715 | 6719 | ||
6716 | if (pulled_task || time_after(jiffies, this_rq->next_balance)) { | 6720 | if (pulled_task || time_after(jiffies, this_rq->next_balance)) { |
6717 | /* | 6721 | /* |
@@ -6721,13 +6725,11 @@ static int idle_balance(struct rq *this_rq) | |||
6721 | this_rq->next_balance = next_balance; | 6725 | this_rq->next_balance = next_balance; |
6722 | } | 6726 | } |
6723 | 6727 | ||
6724 | if (curr_cost > this_rq->max_idle_balance_cost) | ||
6725 | this_rq->max_idle_balance_cost = curr_cost; | ||
6726 | |||
6727 | out: | 6728 | out: |
6728 | /* Is there a task of a high priority class? */ | 6729 | /* Is there a task of a high priority class? */ |
6729 | if (this_rq->nr_running != this_rq->cfs.h_nr_running && | 6730 | if (this_rq->nr_running != this_rq->cfs.h_nr_running && |
6730 | (this_rq->dl.dl_nr_running || | 6731 | ((this_rq->stop && this_rq->stop->on_rq) || |
6732 | this_rq->dl.dl_nr_running || | ||
6731 | (this_rq->rt.rt_nr_running && !rt_rq_throttled(&this_rq->rt)))) | 6733 | (this_rq->rt.rt_nr_running && !rt_rq_throttled(&this_rq->rt)))) |
6732 | pulled_task = -1; | 6734 | pulled_task = -1; |
6733 | 6735 | ||
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index d8cdf1618551..bd2267ad404f 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c | |||
@@ -1362,10 +1362,11 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev) | |||
1362 | pull_rt_task(rq); | 1362 | pull_rt_task(rq); |
1363 | /* | 1363 | /* |
1364 | * pull_rt_task() can drop (and re-acquire) rq->lock; this | 1364 | * pull_rt_task() can drop (and re-acquire) rq->lock; this |
1365 | * means a dl task can slip in, in which case we need to | 1365 | * means a dl or stop task can slip in, in which case we need |
1366 | * re-start task selection. | 1366 | * to re-start task selection. |
1367 | */ | 1367 | */ |
1368 | if (unlikely(rq->dl.dl_nr_running)) | 1368 | if (unlikely((rq->stop && rq->stop->on_rq) || |
1369 | rq->dl.dl_nr_running)) | ||
1369 | return RETRY_TASK; | 1370 | return RETRY_TASK; |
1370 | } | 1371 | } |
1371 | 1372 | ||
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index c9007f28d3a2..456e492a3dca 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
@@ -1385,6 +1385,15 @@ static inline void double_lock(spinlock_t *l1, spinlock_t *l2) | |||
1385 | spin_lock_nested(l2, SINGLE_DEPTH_NESTING); | 1385 | spin_lock_nested(l2, SINGLE_DEPTH_NESTING); |
1386 | } | 1386 | } |
1387 | 1387 | ||
1388 | static inline void double_lock_irq(spinlock_t *l1, spinlock_t *l2) | ||
1389 | { | ||
1390 | if (l1 > l2) | ||
1391 | swap(l1, l2); | ||
1392 | |||
1393 | spin_lock_irq(l1); | ||
1394 | spin_lock_nested(l2, SINGLE_DEPTH_NESTING); | ||
1395 | } | ||
1396 | |||
1388 | static inline void double_raw_lock(raw_spinlock_t *l1, raw_spinlock_t *l2) | 1397 | static inline void double_raw_lock(raw_spinlock_t *l1, raw_spinlock_t *l2) |
1389 | { | 1398 | { |
1390 | if (l1 > l2) | 1399 | if (l1 > l2) |