aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/core.c80
-rw-r--r--kernel/sched/cpudeadline.c37
-rw-r--r--kernel/sched/cpudeadline.h6
-rw-r--r--kernel/sched/cpupri.c10
-rw-r--r--kernel/sched/cpupri.h2
-rw-r--r--kernel/sched/cputime.c32
-rw-r--r--kernel/sched/deadline.c16
-rw-r--r--kernel/sched/fair.c32
-rw-r--r--kernel/sched/rt.c7
-rw-r--r--kernel/sched/sched.h9
10 files changed, 153 insertions, 78 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 268a45ea238c..0a7251678982 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2192,7 +2192,7 @@ static inline void post_schedule(struct rq *rq)
2192 * schedule_tail - first thing a freshly forked thread must call. 2192 * schedule_tail - first thing a freshly forked thread must call.
2193 * @prev: the thread we just switched away from. 2193 * @prev: the thread we just switched away from.
2194 */ 2194 */
2195asmlinkage void schedule_tail(struct task_struct *prev) 2195asmlinkage __visible void schedule_tail(struct task_struct *prev)
2196 __releases(rq->lock) 2196 __releases(rq->lock)
2197{ 2197{
2198 struct rq *rq = this_rq(); 2198 struct rq *rq = this_rq();
@@ -2592,8 +2592,14 @@ pick_next_task(struct rq *rq, struct task_struct *prev)
2592 if (likely(prev->sched_class == class && 2592 if (likely(prev->sched_class == class &&
2593 rq->nr_running == rq->cfs.h_nr_running)) { 2593 rq->nr_running == rq->cfs.h_nr_running)) {
2594 p = fair_sched_class.pick_next_task(rq, prev); 2594 p = fair_sched_class.pick_next_task(rq, prev);
2595 if (likely(p && p != RETRY_TASK)) 2595 if (unlikely(p == RETRY_TASK))
2596 return p; 2596 goto again;
2597
2598 /* assumes fair_sched_class->next == idle_sched_class */
2599 if (unlikely(!p))
2600 p = idle_sched_class.pick_next_task(rq, prev);
2601
2602 return p;
2597 } 2603 }
2598 2604
2599again: 2605again:
@@ -2741,7 +2747,7 @@ static inline void sched_submit_work(struct task_struct *tsk)
2741 blk_schedule_flush_plug(tsk); 2747 blk_schedule_flush_plug(tsk);
2742} 2748}
2743 2749
2744asmlinkage void __sched schedule(void) 2750asmlinkage __visible void __sched schedule(void)
2745{ 2751{
2746 struct task_struct *tsk = current; 2752 struct task_struct *tsk = current;
2747 2753
@@ -2751,7 +2757,7 @@ asmlinkage void __sched schedule(void)
2751EXPORT_SYMBOL(schedule); 2757EXPORT_SYMBOL(schedule);
2752 2758
2753#ifdef CONFIG_CONTEXT_TRACKING 2759#ifdef CONFIG_CONTEXT_TRACKING
2754asmlinkage void __sched schedule_user(void) 2760asmlinkage __visible void __sched schedule_user(void)
2755{ 2761{
2756 /* 2762 /*
2757 * If we come here after a random call to set_need_resched(), 2763 * If we come here after a random call to set_need_resched(),
@@ -2783,7 +2789,7 @@ void __sched schedule_preempt_disabled(void)
2783 * off of preempt_enable. Kernel preemptions off return from interrupt 2789 * off of preempt_enable. Kernel preemptions off return from interrupt
2784 * occur there and call schedule directly. 2790 * occur there and call schedule directly.
2785 */ 2791 */
2786asmlinkage void __sched notrace preempt_schedule(void) 2792asmlinkage __visible void __sched notrace preempt_schedule(void)
2787{ 2793{
2788 /* 2794 /*
2789 * If there is a non-zero preempt_count or interrupts are disabled, 2795 * If there is a non-zero preempt_count or interrupts are disabled,
@@ -2813,7 +2819,7 @@ EXPORT_SYMBOL(preempt_schedule);
2813 * Note, that this is called and return with irqs disabled. This will 2819 * Note, that this is called and return with irqs disabled. This will
2814 * protect us against recursive calling from irq. 2820 * protect us against recursive calling from irq.
2815 */ 2821 */
2816asmlinkage void __sched preempt_schedule_irq(void) 2822asmlinkage __visible void __sched preempt_schedule_irq(void)
2817{ 2823{
2818 enum ctx_state prev_state; 2824 enum ctx_state prev_state;
2819 2825
@@ -3124,6 +3130,7 @@ __setparam_dl(struct task_struct *p, const struct sched_attr *attr)
3124 dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime); 3130 dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime);
3125 dl_se->dl_throttled = 0; 3131 dl_se->dl_throttled = 0;
3126 dl_se->dl_new = 1; 3132 dl_se->dl_new = 1;
3133 dl_se->dl_yielded = 0;
3127} 3134}
3128 3135
3129static void __setscheduler_params(struct task_struct *p, 3136static void __setscheduler_params(struct task_struct *p,
@@ -3188,17 +3195,40 @@ __getparam_dl(struct task_struct *p, struct sched_attr *attr)
3188 * We ask for the deadline not being zero, and greater or equal 3195 * We ask for the deadline not being zero, and greater or equal
3189 * than the runtime, as well as the period of being zero or 3196 * than the runtime, as well as the period of being zero or
3190 * greater than deadline. Furthermore, we have to be sure that 3197 * greater than deadline. Furthermore, we have to be sure that
3191 * user parameters are above the internal resolution (1us); we 3198 * user parameters are above the internal resolution of 1us (we
3192 * check sched_runtime only since it is always the smaller one. 3199 * check sched_runtime only since it is always the smaller one) and
3200 * below 2^63 ns (we have to check both sched_deadline and
3201 * sched_period, as the latter can be zero).
3193 */ 3202 */
3194static bool 3203static bool
3195__checkparam_dl(const struct sched_attr *attr) 3204__checkparam_dl(const struct sched_attr *attr)
3196{ 3205{
3197 return attr && attr->sched_deadline != 0 && 3206 /* deadline != 0 */
3198 (attr->sched_period == 0 || 3207 if (attr->sched_deadline == 0)
3199 (s64)(attr->sched_period - attr->sched_deadline) >= 0) && 3208 return false;
3200 (s64)(attr->sched_deadline - attr->sched_runtime ) >= 0 && 3209
3201 attr->sched_runtime >= (2 << (DL_SCALE - 1)); 3210 /*
3211 * Since we truncate DL_SCALE bits, make sure we're at least
3212 * that big.
3213 */
3214 if (attr->sched_runtime < (1ULL << DL_SCALE))
3215 return false;
3216
3217 /*
3218 * Since we use the MSB for wrap-around and sign issues, make
3219 * sure it's not set (mind that period can be equal to zero).
3220 */
3221 if (attr->sched_deadline & (1ULL << 63) ||
3222 attr->sched_period & (1ULL << 63))
3223 return false;
3224
3225 /* runtime <= deadline <= period (if period != 0) */
3226 if ((attr->sched_period != 0 &&
3227 attr->sched_period < attr->sched_deadline) ||
3228 attr->sched_deadline < attr->sched_runtime)
3229 return false;
3230
3231 return true;
3202} 3232}
3203 3233
3204/* 3234/*
@@ -3639,6 +3669,7 @@ SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
3639 * sys_sched_setattr - same as above, but with extended sched_attr 3669 * sys_sched_setattr - same as above, but with extended sched_attr
3640 * @pid: the pid in question. 3670 * @pid: the pid in question.
3641 * @uattr: structure containing the extended parameters. 3671 * @uattr: structure containing the extended parameters.
3672 * @flags: for future extension.
3642 */ 3673 */
3643SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr, 3674SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr,
3644 unsigned int, flags) 3675 unsigned int, flags)
@@ -3650,8 +3681,12 @@ SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr,
3650 if (!uattr || pid < 0 || flags) 3681 if (!uattr || pid < 0 || flags)
3651 return -EINVAL; 3682 return -EINVAL;
3652 3683
3653 if (sched_copy_attr(uattr, &attr)) 3684 retval = sched_copy_attr(uattr, &attr);
3654 return -EFAULT; 3685 if (retval)
3686 return retval;
3687
3688 if (attr.sched_policy < 0)
3689 return -EINVAL;
3655 3690
3656 rcu_read_lock(); 3691 rcu_read_lock();
3657 retval = -ESRCH; 3692 retval = -ESRCH;
@@ -3701,7 +3736,7 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
3701 */ 3736 */
3702SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) 3737SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
3703{ 3738{
3704 struct sched_param lp; 3739 struct sched_param lp = { .sched_priority = 0 };
3705 struct task_struct *p; 3740 struct task_struct *p;
3706 int retval; 3741 int retval;
3707 3742
@@ -3718,11 +3753,8 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
3718 if (retval) 3753 if (retval)
3719 goto out_unlock; 3754 goto out_unlock;
3720 3755
3721 if (task_has_dl_policy(p)) { 3756 if (task_has_rt_policy(p))
3722 retval = -EINVAL; 3757 lp.sched_priority = p->rt_priority;
3723 goto out_unlock;
3724 }
3725 lp.sched_priority = p->rt_priority;
3726 rcu_read_unlock(); 3758 rcu_read_unlock();
3727 3759
3728 /* 3760 /*
@@ -3783,6 +3815,7 @@ err_size:
3783 * @pid: the pid in question. 3815 * @pid: the pid in question.
3784 * @uattr: structure containing the extended parameters. 3816 * @uattr: structure containing the extended parameters.
3785 * @size: sizeof(attr) for fwd/bwd comp. 3817 * @size: sizeof(attr) for fwd/bwd comp.
3818 * @flags: for future extension.
3786 */ 3819 */
3787SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, 3820SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
3788 unsigned int, size, unsigned int, flags) 3821 unsigned int, size, unsigned int, flags)
@@ -5043,7 +5076,6 @@ static int sched_cpu_active(struct notifier_block *nfb,
5043 unsigned long action, void *hcpu) 5076 unsigned long action, void *hcpu)
5044{ 5077{
5045 switch (action & ~CPU_TASKS_FROZEN) { 5078 switch (action & ~CPU_TASKS_FROZEN) {
5046 case CPU_STARTING:
5047 case CPU_DOWN_FAILED: 5079 case CPU_DOWN_FAILED:
5048 set_cpu_active((long)hcpu, true); 5080 set_cpu_active((long)hcpu, true);
5049 return NOTIFY_OK; 5081 return NOTIFY_OK;
@@ -6017,6 +6049,8 @@ sd_numa_init(struct sched_domain_topology_level *tl, int cpu)
6017 , 6049 ,
6018 .last_balance = jiffies, 6050 .last_balance = jiffies,
6019 .balance_interval = sd_weight, 6051 .balance_interval = sd_weight,
6052 .max_newidle_lb_cost = 0,
6053 .next_decay_max_lb_cost = jiffies,
6020 }; 6054 };
6021 SD_INIT_NAME(sd, NUMA); 6055 SD_INIT_NAME(sd, NUMA);
6022 sd->private = &tl->data; 6056 sd->private = &tl->data;
diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
index 5b9bb42b2d47..bd95963dae80 100644
--- a/kernel/sched/cpudeadline.c
+++ b/kernel/sched/cpudeadline.c
@@ -13,6 +13,7 @@
13 13
14#include <linux/gfp.h> 14#include <linux/gfp.h>
15#include <linux/kernel.h> 15#include <linux/kernel.h>
16#include <linux/slab.h>
16#include "cpudeadline.h" 17#include "cpudeadline.h"
17 18
18static inline int parent(int i) 19static inline int parent(int i)
@@ -39,8 +40,10 @@ static void cpudl_exchange(struct cpudl *cp, int a, int b)
39{ 40{
40 int cpu_a = cp->elements[a].cpu, cpu_b = cp->elements[b].cpu; 41 int cpu_a = cp->elements[a].cpu, cpu_b = cp->elements[b].cpu;
41 42
42 swap(cp->elements[a], cp->elements[b]); 43 swap(cp->elements[a].cpu, cp->elements[b].cpu);
43 swap(cp->cpu_to_idx[cpu_a], cp->cpu_to_idx[cpu_b]); 44 swap(cp->elements[a].dl , cp->elements[b].dl );
45
46 swap(cp->elements[cpu_a].idx, cp->elements[cpu_b].idx);
44} 47}
45 48
46static void cpudl_heapify(struct cpudl *cp, int idx) 49static void cpudl_heapify(struct cpudl *cp, int idx)
@@ -140,7 +143,7 @@ void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid)
140 WARN_ON(!cpu_present(cpu)); 143 WARN_ON(!cpu_present(cpu));
141 144
142 raw_spin_lock_irqsave(&cp->lock, flags); 145 raw_spin_lock_irqsave(&cp->lock, flags);
143 old_idx = cp->cpu_to_idx[cpu]; 146 old_idx = cp->elements[cpu].idx;
144 if (!is_valid) { 147 if (!is_valid) {
145 /* remove item */ 148 /* remove item */
146 if (old_idx == IDX_INVALID) { 149 if (old_idx == IDX_INVALID) {
@@ -155,8 +158,8 @@ void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid)
155 cp->elements[old_idx].dl = cp->elements[cp->size - 1].dl; 158 cp->elements[old_idx].dl = cp->elements[cp->size - 1].dl;
156 cp->elements[old_idx].cpu = new_cpu; 159 cp->elements[old_idx].cpu = new_cpu;
157 cp->size--; 160 cp->size--;
158 cp->cpu_to_idx[new_cpu] = old_idx; 161 cp->elements[new_cpu].idx = old_idx;
159 cp->cpu_to_idx[cpu] = IDX_INVALID; 162 cp->elements[cpu].idx = IDX_INVALID;
160 while (old_idx > 0 && dl_time_before( 163 while (old_idx > 0 && dl_time_before(
161 cp->elements[parent(old_idx)].dl, 164 cp->elements[parent(old_idx)].dl,
162 cp->elements[old_idx].dl)) { 165 cp->elements[old_idx].dl)) {
@@ -173,7 +176,7 @@ void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid)
173 cp->size++; 176 cp->size++;
174 cp->elements[cp->size - 1].dl = 0; 177 cp->elements[cp->size - 1].dl = 0;
175 cp->elements[cp->size - 1].cpu = cpu; 178 cp->elements[cp->size - 1].cpu = cpu;
176 cp->cpu_to_idx[cpu] = cp->size - 1; 179 cp->elements[cpu].idx = cp->size - 1;
177 cpudl_change_key(cp, cp->size - 1, dl); 180 cpudl_change_key(cp, cp->size - 1, dl);
178 cpumask_clear_cpu(cpu, cp->free_cpus); 181 cpumask_clear_cpu(cpu, cp->free_cpus);
179 } else { 182 } else {
@@ -195,10 +198,21 @@ int cpudl_init(struct cpudl *cp)
195 memset(cp, 0, sizeof(*cp)); 198 memset(cp, 0, sizeof(*cp));
196 raw_spin_lock_init(&cp->lock); 199 raw_spin_lock_init(&cp->lock);
197 cp->size = 0; 200 cp->size = 0;
198 for (i = 0; i < NR_CPUS; i++) 201
199 cp->cpu_to_idx[i] = IDX_INVALID; 202 cp->elements = kcalloc(nr_cpu_ids,
200 if (!alloc_cpumask_var(&cp->free_cpus, GFP_KERNEL)) 203 sizeof(struct cpudl_item),
204 GFP_KERNEL);
205 if (!cp->elements)
206 return -ENOMEM;
207
208 if (!alloc_cpumask_var(&cp->free_cpus, GFP_KERNEL)) {
209 kfree(cp->elements);
201 return -ENOMEM; 210 return -ENOMEM;
211 }
212
213 for_each_possible_cpu(i)
214 cp->elements[i].idx = IDX_INVALID;
215
202 cpumask_setall(cp->free_cpus); 216 cpumask_setall(cp->free_cpus);
203 217
204 return 0; 218 return 0;
@@ -210,7 +224,6 @@ int cpudl_init(struct cpudl *cp)
210 */ 224 */
211void cpudl_cleanup(struct cpudl *cp) 225void cpudl_cleanup(struct cpudl *cp)
212{ 226{
213 /* 227 free_cpumask_var(cp->free_cpus);
214 * nothing to do for the moment 228 kfree(cp->elements);
215 */
216} 229}
diff --git a/kernel/sched/cpudeadline.h b/kernel/sched/cpudeadline.h
index a202789a412c..538c9796ad4a 100644
--- a/kernel/sched/cpudeadline.h
+++ b/kernel/sched/cpudeadline.h
@@ -5,17 +5,17 @@
5 5
6#define IDX_INVALID -1 6#define IDX_INVALID -1
7 7
8struct array_item { 8struct cpudl_item {
9 u64 dl; 9 u64 dl;
10 int cpu; 10 int cpu;
11 int idx;
11}; 12};
12 13
13struct cpudl { 14struct cpudl {
14 raw_spinlock_t lock; 15 raw_spinlock_t lock;
15 int size; 16 int size;
16 int cpu_to_idx[NR_CPUS];
17 struct array_item elements[NR_CPUS];
18 cpumask_var_t free_cpus; 17 cpumask_var_t free_cpus;
18 struct cpudl_item *elements;
19}; 19};
20 20
21 21
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
index 8b836b376d91..8834243abee2 100644
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -30,6 +30,7 @@
30#include <linux/gfp.h> 30#include <linux/gfp.h>
31#include <linux/sched.h> 31#include <linux/sched.h>
32#include <linux/sched/rt.h> 32#include <linux/sched/rt.h>
33#include <linux/slab.h>
33#include "cpupri.h" 34#include "cpupri.h"
34 35
35/* Convert between a 140 based task->prio, and our 102 based cpupri */ 36/* Convert between a 140 based task->prio, and our 102 based cpupri */
@@ -70,8 +71,7 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
70 int idx = 0; 71 int idx = 0;
71 int task_pri = convert_prio(p->prio); 72 int task_pri = convert_prio(p->prio);
72 73
73 if (task_pri >= MAX_RT_PRIO) 74 BUG_ON(task_pri >= CPUPRI_NR_PRIORITIES);
74 return 0;
75 75
76 for (idx = 0; idx < task_pri; idx++) { 76 for (idx = 0; idx < task_pri; idx++) {
77 struct cpupri_vec *vec = &cp->pri_to_cpu[idx]; 77 struct cpupri_vec *vec = &cp->pri_to_cpu[idx];
@@ -219,8 +219,13 @@ int cpupri_init(struct cpupri *cp)
219 goto cleanup; 219 goto cleanup;
220 } 220 }
221 221
222 cp->cpu_to_pri = kcalloc(nr_cpu_ids, sizeof(int), GFP_KERNEL);
223 if (!cp->cpu_to_pri)
224 goto cleanup;
225
222 for_each_possible_cpu(i) 226 for_each_possible_cpu(i)
223 cp->cpu_to_pri[i] = CPUPRI_INVALID; 227 cp->cpu_to_pri[i] = CPUPRI_INVALID;
228
224 return 0; 229 return 0;
225 230
226cleanup: 231cleanup:
@@ -237,6 +242,7 @@ void cpupri_cleanup(struct cpupri *cp)
237{ 242{
238 int i; 243 int i;
239 244
245 kfree(cp->cpu_to_pri);
240 for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) 246 for (i = 0; i < CPUPRI_NR_PRIORITIES; i++)
241 free_cpumask_var(cp->pri_to_cpu[i].mask); 247 free_cpumask_var(cp->pri_to_cpu[i].mask);
242} 248}
diff --git a/kernel/sched/cpupri.h b/kernel/sched/cpupri.h
index f6d756173491..6b033347fdfd 100644
--- a/kernel/sched/cpupri.h
+++ b/kernel/sched/cpupri.h
@@ -17,7 +17,7 @@ struct cpupri_vec {
17 17
18struct cpupri { 18struct cpupri {
19 struct cpupri_vec pri_to_cpu[CPUPRI_NR_PRIORITIES]; 19 struct cpupri_vec pri_to_cpu[CPUPRI_NR_PRIORITIES];
20 int cpu_to_pri[NR_CPUS]; 20 int *cpu_to_pri;
21}; 21};
22 22
23#ifdef CONFIG_SMP 23#ifdef CONFIG_SMP
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index a95097cb4591..72fdf06ef865 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -332,50 +332,50 @@ out:
332 * softirq as those do not count in task exec_runtime any more. 332 * softirq as those do not count in task exec_runtime any more.
333 */ 333 */
334static void irqtime_account_process_tick(struct task_struct *p, int user_tick, 334static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
335 struct rq *rq) 335 struct rq *rq, int ticks)
336{ 336{
337 cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); 337 cputime_t scaled = cputime_to_scaled(cputime_one_jiffy);
338 u64 cputime = (__force u64) cputime_one_jiffy;
338 u64 *cpustat = kcpustat_this_cpu->cpustat; 339 u64 *cpustat = kcpustat_this_cpu->cpustat;
339 340
340 if (steal_account_process_tick()) 341 if (steal_account_process_tick())
341 return; 342 return;
342 343
344 cputime *= ticks;
345 scaled *= ticks;
346
343 if (irqtime_account_hi_update()) { 347 if (irqtime_account_hi_update()) {
344 cpustat[CPUTIME_IRQ] += (__force u64) cputime_one_jiffy; 348 cpustat[CPUTIME_IRQ] += cputime;
345 } else if (irqtime_account_si_update()) { 349 } else if (irqtime_account_si_update()) {
346 cpustat[CPUTIME_SOFTIRQ] += (__force u64) cputime_one_jiffy; 350 cpustat[CPUTIME_SOFTIRQ] += cputime;
347 } else if (this_cpu_ksoftirqd() == p) { 351 } else if (this_cpu_ksoftirqd() == p) {
348 /* 352 /*
349 * ksoftirqd time do not get accounted in cpu_softirq_time. 353 * ksoftirqd time do not get accounted in cpu_softirq_time.
350 * So, we have to handle it separately here. 354 * So, we have to handle it separately here.
351 * Also, p->stime needs to be updated for ksoftirqd. 355 * Also, p->stime needs to be updated for ksoftirqd.
352 */ 356 */
353 __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, 357 __account_system_time(p, cputime, scaled, CPUTIME_SOFTIRQ);
354 CPUTIME_SOFTIRQ);
355 } else if (user_tick) { 358 } else if (user_tick) {
356 account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); 359 account_user_time(p, cputime, scaled);
357 } else if (p == rq->idle) { 360 } else if (p == rq->idle) {
358 account_idle_time(cputime_one_jiffy); 361 account_idle_time(cputime);
359 } else if (p->flags & PF_VCPU) { /* System time or guest time */ 362 } else if (p->flags & PF_VCPU) { /* System time or guest time */
360 account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled); 363 account_guest_time(p, cputime, scaled);
361 } else { 364 } else {
362 __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, 365 __account_system_time(p, cputime, scaled, CPUTIME_SYSTEM);
363 CPUTIME_SYSTEM);
364 } 366 }
365} 367}
366 368
367static void irqtime_account_idle_ticks(int ticks) 369static void irqtime_account_idle_ticks(int ticks)
368{ 370{
369 int i;
370 struct rq *rq = this_rq(); 371 struct rq *rq = this_rq();
371 372
372 for (i = 0; i < ticks; i++) 373 irqtime_account_process_tick(current, 0, rq, ticks);
373 irqtime_account_process_tick(current, 0, rq);
374} 374}
375#else /* CONFIG_IRQ_TIME_ACCOUNTING */ 375#else /* CONFIG_IRQ_TIME_ACCOUNTING */
376static inline void irqtime_account_idle_ticks(int ticks) {} 376static inline void irqtime_account_idle_ticks(int ticks) {}
377static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick, 377static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick,
378 struct rq *rq) {} 378 struct rq *rq, int nr_ticks) {}
379#endif /* CONFIG_IRQ_TIME_ACCOUNTING */ 379#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
380 380
381/* 381/*
@@ -464,7 +464,7 @@ void account_process_tick(struct task_struct *p, int user_tick)
464 return; 464 return;
465 465
466 if (sched_clock_irqtime) { 466 if (sched_clock_irqtime) {
467 irqtime_account_process_tick(p, user_tick, rq); 467 irqtime_account_process_tick(p, user_tick, rq, 1);
468 return; 468 return;
469 } 469 }
470 470
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 27ef40925525..800e99b99075 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -528,6 +528,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
528 sched_clock_tick(); 528 sched_clock_tick();
529 update_rq_clock(rq); 529 update_rq_clock(rq);
530 dl_se->dl_throttled = 0; 530 dl_se->dl_throttled = 0;
531 dl_se->dl_yielded = 0;
531 if (p->on_rq) { 532 if (p->on_rq) {
532 enqueue_task_dl(rq, p, ENQUEUE_REPLENISH); 533 enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
533 if (task_has_dl_policy(rq->curr)) 534 if (task_has_dl_policy(rq->curr))
@@ -893,10 +894,10 @@ static void yield_task_dl(struct rq *rq)
893 * We make the task go to sleep until its current deadline by 894 * We make the task go to sleep until its current deadline by
894 * forcing its runtime to zero. This way, update_curr_dl() stops 895 * forcing its runtime to zero. This way, update_curr_dl() stops
895 * it and the bandwidth timer will wake it up and will give it 896 * it and the bandwidth timer will wake it up and will give it
896 * new scheduling parameters (thanks to dl_new=1). 897 * new scheduling parameters (thanks to dl_yielded=1).
897 */ 898 */
898 if (p->dl.runtime > 0) { 899 if (p->dl.runtime > 0) {
899 rq->curr->dl.dl_new = 1; 900 rq->curr->dl.dl_yielded = 1;
900 p->dl.runtime = 0; 901 p->dl.runtime = 0;
901 } 902 }
902 update_curr_dl(rq); 903 update_curr_dl(rq);
@@ -1021,8 +1022,17 @@ struct task_struct *pick_next_task_dl(struct rq *rq, struct task_struct *prev)
1021 1022
1022 dl_rq = &rq->dl; 1023 dl_rq = &rq->dl;
1023 1024
1024 if (need_pull_dl_task(rq, prev)) 1025 if (need_pull_dl_task(rq, prev)) {
1025 pull_dl_task(rq); 1026 pull_dl_task(rq);
1027 /*
1028 * pull_rt_task() can drop (and re-acquire) rq->lock; this
1029 * means a stop task can slip in, in which case we need to
1030 * re-start task selection.
1031 */
1032 if (rq->stop && rq->stop->on_rq)
1033 return RETRY_TASK;
1034 }
1035
1026 /* 1036 /*
1027 * When prev is DL, we may throttle it in put_prev_task(). 1037 * When prev is DL, we may throttle it in put_prev_task().
1028 * So, we update time before we check for dl_nr_running. 1038 * So, we update time before we check for dl_nr_running.
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 7e9bd0b1fa9e..0fdb96de81a5 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1497,7 +1497,7 @@ static void task_numa_placement(struct task_struct *p)
1497 /* If the task is part of a group prevent parallel updates to group stats */ 1497 /* If the task is part of a group prevent parallel updates to group stats */
1498 if (p->numa_group) { 1498 if (p->numa_group) {
1499 group_lock = &p->numa_group->lock; 1499 group_lock = &p->numa_group->lock;
1500 spin_lock(group_lock); 1500 spin_lock_irq(group_lock);
1501 } 1501 }
1502 1502
1503 /* Find the node with the highest number of faults */ 1503 /* Find the node with the highest number of faults */
@@ -1572,7 +1572,7 @@ static void task_numa_placement(struct task_struct *p)
1572 } 1572 }
1573 } 1573 }
1574 1574
1575 spin_unlock(group_lock); 1575 spin_unlock_irq(group_lock);
1576 } 1576 }
1577 1577
1578 /* Preferred node as the node with the most faults */ 1578 /* Preferred node as the node with the most faults */
@@ -1677,7 +1677,8 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags,
1677 if (!join) 1677 if (!join)
1678 return; 1678 return;
1679 1679
1680 double_lock(&my_grp->lock, &grp->lock); 1680 BUG_ON(irqs_disabled());
1681 double_lock_irq(&my_grp->lock, &grp->lock);
1681 1682
1682 for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) { 1683 for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) {
1683 my_grp->faults[i] -= p->numa_faults_memory[i]; 1684 my_grp->faults[i] -= p->numa_faults_memory[i];
@@ -1691,7 +1692,7 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags,
1691 grp->nr_tasks++; 1692 grp->nr_tasks++;
1692 1693
1693 spin_unlock(&my_grp->lock); 1694 spin_unlock(&my_grp->lock);
1694 spin_unlock(&grp->lock); 1695 spin_unlock_irq(&grp->lock);
1695 1696
1696 rcu_assign_pointer(p->numa_group, grp); 1697 rcu_assign_pointer(p->numa_group, grp);
1697 1698
@@ -1710,14 +1711,14 @@ void task_numa_free(struct task_struct *p)
1710 void *numa_faults = p->numa_faults_memory; 1711 void *numa_faults = p->numa_faults_memory;
1711 1712
1712 if (grp) { 1713 if (grp) {
1713 spin_lock(&grp->lock); 1714 spin_lock_irq(&grp->lock);
1714 for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) 1715 for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++)
1715 grp->faults[i] -= p->numa_faults_memory[i]; 1716 grp->faults[i] -= p->numa_faults_memory[i];
1716 grp->total_faults -= p->total_numa_faults; 1717 grp->total_faults -= p->total_numa_faults;
1717 1718
1718 list_del(&p->numa_entry); 1719 list_del(&p->numa_entry);
1719 grp->nr_tasks--; 1720 grp->nr_tasks--;
1720 spin_unlock(&grp->lock); 1721 spin_unlock_irq(&grp->lock);
1721 rcu_assign_pointer(p->numa_group, NULL); 1722 rcu_assign_pointer(p->numa_group, NULL);
1722 put_numa_group(grp); 1723 put_numa_group(grp);
1723 } 1724 }
@@ -6652,6 +6653,7 @@ static int idle_balance(struct rq *this_rq)
6652 int this_cpu = this_rq->cpu; 6653 int this_cpu = this_rq->cpu;
6653 6654
6654 idle_enter_fair(this_rq); 6655 idle_enter_fair(this_rq);
6656
6655 /* 6657 /*
6656 * We must set idle_stamp _before_ calling idle_balance(), such that we 6658 * We must set idle_stamp _before_ calling idle_balance(), such that we
6657 * measure the duration of idle_balance() as idle time. 6659 * measure the duration of idle_balance() as idle time.
@@ -6704,14 +6706,16 @@ static int idle_balance(struct rq *this_rq)
6704 6706
6705 raw_spin_lock(&this_rq->lock); 6707 raw_spin_lock(&this_rq->lock);
6706 6708
6709 if (curr_cost > this_rq->max_idle_balance_cost)
6710 this_rq->max_idle_balance_cost = curr_cost;
6711
6707 /* 6712 /*
6708 * While browsing the domains, we released the rq lock. 6713 * While browsing the domains, we released the rq lock, a task could
6709 * A task could have be enqueued in the meantime 6714 * have been enqueued in the meantime. Since we're not going idle,
6715 * pretend we pulled a task.
6710 */ 6716 */
6711 if (this_rq->cfs.h_nr_running && !pulled_task) { 6717 if (this_rq->cfs.h_nr_running && !pulled_task)
6712 pulled_task = 1; 6718 pulled_task = 1;
6713 goto out;
6714 }
6715 6719
6716 if (pulled_task || time_after(jiffies, this_rq->next_balance)) { 6720 if (pulled_task || time_after(jiffies, this_rq->next_balance)) {
6717 /* 6721 /*
@@ -6721,13 +6725,11 @@ static int idle_balance(struct rq *this_rq)
6721 this_rq->next_balance = next_balance; 6725 this_rq->next_balance = next_balance;
6722 } 6726 }
6723 6727
6724 if (curr_cost > this_rq->max_idle_balance_cost)
6725 this_rq->max_idle_balance_cost = curr_cost;
6726
6727out: 6728out:
6728 /* Is there a task of a high priority class? */ 6729 /* Is there a task of a high priority class? */
6729 if (this_rq->nr_running != this_rq->cfs.h_nr_running && 6730 if (this_rq->nr_running != this_rq->cfs.h_nr_running &&
6730 (this_rq->dl.dl_nr_running || 6731 ((this_rq->stop && this_rq->stop->on_rq) ||
6732 this_rq->dl.dl_nr_running ||
6731 (this_rq->rt.rt_nr_running && !rt_rq_throttled(&this_rq->rt)))) 6733 (this_rq->rt.rt_nr_running && !rt_rq_throttled(&this_rq->rt))))
6732 pulled_task = -1; 6734 pulled_task = -1;
6733 6735
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index d8cdf1618551..bd2267ad404f 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1362,10 +1362,11 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev)
1362 pull_rt_task(rq); 1362 pull_rt_task(rq);
1363 /* 1363 /*
1364 * pull_rt_task() can drop (and re-acquire) rq->lock; this 1364 * pull_rt_task() can drop (and re-acquire) rq->lock; this
1365 * means a dl task can slip in, in which case we need to 1365 * means a dl or stop task can slip in, in which case we need
1366 * re-start task selection. 1366 * to re-start task selection.
1367 */ 1367 */
1368 if (unlikely(rq->dl.dl_nr_running)) 1368 if (unlikely((rq->stop && rq->stop->on_rq) ||
1369 rq->dl.dl_nr_running))
1369 return RETRY_TASK; 1370 return RETRY_TASK;
1370 } 1371 }
1371 1372
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index c9007f28d3a2..456e492a3dca 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1385,6 +1385,15 @@ static inline void double_lock(spinlock_t *l1, spinlock_t *l2)
1385 spin_lock_nested(l2, SINGLE_DEPTH_NESTING); 1385 spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
1386} 1386}
1387 1387
1388static inline void double_lock_irq(spinlock_t *l1, spinlock_t *l2)
1389{
1390 if (l1 > l2)
1391 swap(l1, l2);
1392
1393 spin_lock_irq(l1);
1394 spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
1395}
1396
1388static inline void double_raw_lock(raw_spinlock_t *l1, raw_spinlock_t *l2) 1397static inline void double_raw_lock(raw_spinlock_t *l1, raw_spinlock_t *l2)
1389{ 1398{
1390 if (l1 > l2) 1399 if (l1 > l2)