aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/core.c')
-rw-r--r--kernel/sched/core.c216
1 files changed, 158 insertions, 58 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 257002c13bb0..7f12624a393c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -83,7 +83,7 @@
83#endif 83#endif
84 84
85#include "sched.h" 85#include "sched.h"
86#include "../workqueue_sched.h" 86#include "../workqueue_internal.h"
87#include "../smpboot.h" 87#include "../smpboot.h"
88 88
89#define CREATE_TRACE_POINTS 89#define CREATE_TRACE_POINTS
@@ -1132,18 +1132,28 @@ EXPORT_SYMBOL_GPL(kick_process);
1132 */ 1132 */
1133static int select_fallback_rq(int cpu, struct task_struct *p) 1133static int select_fallback_rq(int cpu, struct task_struct *p)
1134{ 1134{
1135 const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu)); 1135 int nid = cpu_to_node(cpu);
1136 const struct cpumask *nodemask = NULL;
1136 enum { cpuset, possible, fail } state = cpuset; 1137 enum { cpuset, possible, fail } state = cpuset;
1137 int dest_cpu; 1138 int dest_cpu;
1138 1139
1139 /* Look for allowed, online CPU in same node. */ 1140 /*
1140 for_each_cpu(dest_cpu, nodemask) { 1141 * If the node that the cpu is on has been offlined, cpu_to_node()
1141 if (!cpu_online(dest_cpu)) 1142 * will return -1. There is no cpu on the node, and we should
1142 continue; 1143 * select the cpu on the other node.
1143 if (!cpu_active(dest_cpu)) 1144 */
1144 continue; 1145 if (nid != -1) {
1145 if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p))) 1146 nodemask = cpumask_of_node(nid);
1146 return dest_cpu; 1147
1148 /* Look for allowed, online CPU in same node. */
1149 for_each_cpu(dest_cpu, nodemask) {
1150 if (!cpu_online(dest_cpu))
1151 continue;
1152 if (!cpu_active(dest_cpu))
1153 continue;
1154 if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
1155 return dest_cpu;
1156 }
1147 } 1157 }
1148 1158
1149 for (;;) { 1159 for (;;) {
@@ -1523,7 +1533,8 @@ out:
1523 */ 1533 */
1524int wake_up_process(struct task_struct *p) 1534int wake_up_process(struct task_struct *p)
1525{ 1535{
1526 return try_to_wake_up(p, TASK_ALL, 0); 1536 WARN_ON(task_is_stopped_or_traced(p));
1537 return try_to_wake_up(p, TASK_NORMAL, 0);
1527} 1538}
1528EXPORT_SYMBOL(wake_up_process); 1539EXPORT_SYMBOL(wake_up_process);
1529 1540
@@ -1741,9 +1752,8 @@ EXPORT_SYMBOL_GPL(preempt_notifier_unregister);
1741static void fire_sched_in_preempt_notifiers(struct task_struct *curr) 1752static void fire_sched_in_preempt_notifiers(struct task_struct *curr)
1742{ 1753{
1743 struct preempt_notifier *notifier; 1754 struct preempt_notifier *notifier;
1744 struct hlist_node *node;
1745 1755
1746 hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link) 1756 hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
1747 notifier->ops->sched_in(notifier, raw_smp_processor_id()); 1757 notifier->ops->sched_in(notifier, raw_smp_processor_id());
1748} 1758}
1749 1759
@@ -1752,9 +1762,8 @@ fire_sched_out_preempt_notifiers(struct task_struct *curr,
1752 struct task_struct *next) 1762 struct task_struct *next)
1753{ 1763{
1754 struct preempt_notifier *notifier; 1764 struct preempt_notifier *notifier;
1755 struct hlist_node *node;
1756 1765
1757 hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link) 1766 hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
1758 notifier->ops->sched_out(notifier, next); 1767 notifier->ops->sched_out(notifier, next);
1759} 1768}
1760 1769
@@ -1968,11 +1977,10 @@ context_switch(struct rq *rq, struct task_struct *prev,
1968} 1977}
1969 1978
1970/* 1979/*
1971 * nr_running, nr_uninterruptible and nr_context_switches: 1980 * nr_running and nr_context_switches:
1972 * 1981 *
1973 * externally visible scheduler statistics: current number of runnable 1982 * externally visible scheduler statistics: current number of runnable
1974 * threads, current number of uninterruptible-sleeping threads, total 1983 * threads, total number of context switches performed since bootup.
1975 * number of context switches performed since bootup.
1976 */ 1984 */
1977unsigned long nr_running(void) 1985unsigned long nr_running(void)
1978{ 1986{
@@ -1984,23 +1992,6 @@ unsigned long nr_running(void)
1984 return sum; 1992 return sum;
1985} 1993}
1986 1994
1987unsigned long nr_uninterruptible(void)
1988{
1989 unsigned long i, sum = 0;
1990
1991 for_each_possible_cpu(i)
1992 sum += cpu_rq(i)->nr_uninterruptible;
1993
1994 /*
1995 * Since we read the counters lockless, it might be slightly
1996 * inaccurate. Do not allow it to go below zero though:
1997 */
1998 if (unlikely((long)sum < 0))
1999 sum = 0;
2000
2001 return sum;
2002}
2003
2004unsigned long long nr_context_switches(void) 1995unsigned long long nr_context_switches(void)
2005{ 1996{
2006 int i; 1997 int i;
@@ -2785,7 +2776,7 @@ static noinline void __schedule_bug(struct task_struct *prev)
2785 if (irqs_disabled()) 2776 if (irqs_disabled())
2786 print_irqtrace_events(prev); 2777 print_irqtrace_events(prev);
2787 dump_stack(); 2778 dump_stack();
2788 add_taint(TAINT_WARN); 2779 add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
2789} 2780}
2790 2781
2791/* 2782/*
@@ -3267,7 +3258,8 @@ void complete_all(struct completion *x)
3267EXPORT_SYMBOL(complete_all); 3258EXPORT_SYMBOL(complete_all);
3268 3259
3269static inline long __sched 3260static inline long __sched
3270do_wait_for_common(struct completion *x, long timeout, int state) 3261do_wait_for_common(struct completion *x,
3262 long (*action)(long), long timeout, int state)
3271{ 3263{
3272 if (!x->done) { 3264 if (!x->done) {
3273 DECLARE_WAITQUEUE(wait, current); 3265 DECLARE_WAITQUEUE(wait, current);
@@ -3280,7 +3272,7 @@ do_wait_for_common(struct completion *x, long timeout, int state)
3280 } 3272 }
3281 __set_current_state(state); 3273 __set_current_state(state);
3282 spin_unlock_irq(&x->wait.lock); 3274 spin_unlock_irq(&x->wait.lock);
3283 timeout = schedule_timeout(timeout); 3275 timeout = action(timeout);
3284 spin_lock_irq(&x->wait.lock); 3276 spin_lock_irq(&x->wait.lock);
3285 } while (!x->done && timeout); 3277 } while (!x->done && timeout);
3286 __remove_wait_queue(&x->wait, &wait); 3278 __remove_wait_queue(&x->wait, &wait);
@@ -3291,17 +3283,30 @@ do_wait_for_common(struct completion *x, long timeout, int state)
3291 return timeout ?: 1; 3283 return timeout ?: 1;
3292} 3284}
3293 3285
3294static long __sched 3286static inline long __sched
3295wait_for_common(struct completion *x, long timeout, int state) 3287__wait_for_common(struct completion *x,
3288 long (*action)(long), long timeout, int state)
3296{ 3289{
3297 might_sleep(); 3290 might_sleep();
3298 3291
3299 spin_lock_irq(&x->wait.lock); 3292 spin_lock_irq(&x->wait.lock);
3300 timeout = do_wait_for_common(x, timeout, state); 3293 timeout = do_wait_for_common(x, action, timeout, state);
3301 spin_unlock_irq(&x->wait.lock); 3294 spin_unlock_irq(&x->wait.lock);
3302 return timeout; 3295 return timeout;
3303} 3296}
3304 3297
3298static long __sched
3299wait_for_common(struct completion *x, long timeout, int state)
3300{
3301 return __wait_for_common(x, schedule_timeout, timeout, state);
3302}
3303
3304static long __sched
3305wait_for_common_io(struct completion *x, long timeout, int state)
3306{
3307 return __wait_for_common(x, io_schedule_timeout, timeout, state);
3308}
3309
3305/** 3310/**
3306 * wait_for_completion: - waits for completion of a task 3311 * wait_for_completion: - waits for completion of a task
3307 * @x: holds the state of this particular completion 3312 * @x: holds the state of this particular completion
@@ -3338,6 +3343,39 @@ wait_for_completion_timeout(struct completion *x, unsigned long timeout)
3338EXPORT_SYMBOL(wait_for_completion_timeout); 3343EXPORT_SYMBOL(wait_for_completion_timeout);
3339 3344
3340/** 3345/**
3346 * wait_for_completion_io: - waits for completion of a task
3347 * @x: holds the state of this particular completion
3348 *
3349 * This waits to be signaled for completion of a specific task. It is NOT
3350 * interruptible and there is no timeout. The caller is accounted as waiting
3351 * for IO.
3352 */
3353void __sched wait_for_completion_io(struct completion *x)
3354{
3355 wait_for_common_io(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
3356}
3357EXPORT_SYMBOL(wait_for_completion_io);
3358
3359/**
3360 * wait_for_completion_io_timeout: - waits for completion of a task (w/timeout)
3361 * @x: holds the state of this particular completion
3362 * @timeout: timeout value in jiffies
3363 *
3364 * This waits for either a completion of a specific task to be signaled or for a
3365 * specified timeout to expire. The timeout is in jiffies. It is not
3366 * interruptible. The caller is accounted as waiting for IO.
3367 *
3368 * The return value is 0 if timed out, and positive (at least 1, or number of
3369 * jiffies left till timeout) if completed.
3370 */
3371unsigned long __sched
3372wait_for_completion_io_timeout(struct completion *x, unsigned long timeout)
3373{
3374 return wait_for_common_io(x, timeout, TASK_UNINTERRUPTIBLE);
3375}
3376EXPORT_SYMBOL(wait_for_completion_io_timeout);
3377
3378/**
3341 * wait_for_completion_interruptible: - waits for completion of a task (w/intr) 3379 * wait_for_completion_interruptible: - waits for completion of a task (w/intr)
3342 * @x: holds the state of this particular completion 3380 * @x: holds the state of this particular completion
3343 * 3381 *
@@ -4363,20 +4401,32 @@ EXPORT_SYMBOL(yield);
4363 * It's the caller's job to ensure that the target task struct 4401 * It's the caller's job to ensure that the target task struct
4364 * can't go away on us before we can do any checks. 4402 * can't go away on us before we can do any checks.
4365 * 4403 *
4366 * Returns true if we indeed boosted the target task. 4404 * Returns:
4405 * true (>0) if we indeed boosted the target task.
4406 * false (0) if we failed to boost the target.
4407 * -ESRCH if there's no task to yield to.
4367 */ 4408 */
4368bool __sched yield_to(struct task_struct *p, bool preempt) 4409bool __sched yield_to(struct task_struct *p, bool preempt)
4369{ 4410{
4370 struct task_struct *curr = current; 4411 struct task_struct *curr = current;
4371 struct rq *rq, *p_rq; 4412 struct rq *rq, *p_rq;
4372 unsigned long flags; 4413 unsigned long flags;
4373 bool yielded = 0; 4414 int yielded = 0;
4374 4415
4375 local_irq_save(flags); 4416 local_irq_save(flags);
4376 rq = this_rq(); 4417 rq = this_rq();
4377 4418
4378again: 4419again:
4379 p_rq = task_rq(p); 4420 p_rq = task_rq(p);
4421 /*
4422 * If we're the only runnable task on the rq and target rq also
4423 * has only one task, there's absolutely no point in yielding.
4424 */
4425 if (rq->nr_running == 1 && p_rq->nr_running == 1) {
4426 yielded = -ESRCH;
4427 goto out_irq;
4428 }
4429
4380 double_rq_lock(rq, p_rq); 4430 double_rq_lock(rq, p_rq);
4381 while (task_rq(p) != p_rq) { 4431 while (task_rq(p) != p_rq) {
4382 double_rq_unlock(rq, p_rq); 4432 double_rq_unlock(rq, p_rq);
@@ -4384,13 +4434,13 @@ again:
4384 } 4434 }
4385 4435
4386 if (!curr->sched_class->yield_to_task) 4436 if (!curr->sched_class->yield_to_task)
4387 goto out; 4437 goto out_unlock;
4388 4438
4389 if (curr->sched_class != p->sched_class) 4439 if (curr->sched_class != p->sched_class)
4390 goto out; 4440 goto out_unlock;
4391 4441
4392 if (task_running(p_rq, p) || p->state) 4442 if (task_running(p_rq, p) || p->state)
4393 goto out; 4443 goto out_unlock;
4394 4444
4395 yielded = curr->sched_class->yield_to_task(rq, p, preempt); 4445 yielded = curr->sched_class->yield_to_task(rq, p, preempt);
4396 if (yielded) { 4446 if (yielded) {
@@ -4403,11 +4453,12 @@ again:
4403 resched_task(p_rq->curr); 4453 resched_task(p_rq->curr);
4404 } 4454 }
4405 4455
4406out: 4456out_unlock:
4407 double_rq_unlock(rq, p_rq); 4457 double_rq_unlock(rq, p_rq);
4458out_irq:
4408 local_irq_restore(flags); 4459 local_irq_restore(flags);
4409 4460
4410 if (yielded) 4461 if (yielded > 0)
4411 schedule(); 4462 schedule();
4412 4463
4413 return yielded; 4464 return yielded;
@@ -4666,6 +4717,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
4666 */ 4717 */
4667 idle->sched_class = &idle_sched_class; 4718 idle->sched_class = &idle_sched_class;
4668 ftrace_graph_init_idle_task(idle, cpu); 4719 ftrace_graph_init_idle_task(idle, cpu);
4720 vtime_init_idle(idle);
4669#if defined(CONFIG_SMP) 4721#if defined(CONFIG_SMP)
4670 sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu); 4722 sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
4671#endif 4723#endif
@@ -7159,7 +7211,6 @@ static void free_sched_group(struct task_group *tg)
7159struct task_group *sched_create_group(struct task_group *parent) 7211struct task_group *sched_create_group(struct task_group *parent)
7160{ 7212{
7161 struct task_group *tg; 7213 struct task_group *tg;
7162 unsigned long flags;
7163 7214
7164 tg = kzalloc(sizeof(*tg), GFP_KERNEL); 7215 tg = kzalloc(sizeof(*tg), GFP_KERNEL);
7165 if (!tg) 7216 if (!tg)
@@ -7171,6 +7222,17 @@ struct task_group *sched_create_group(struct task_group *parent)
7171 if (!alloc_rt_sched_group(tg, parent)) 7222 if (!alloc_rt_sched_group(tg, parent))
7172 goto err; 7223 goto err;
7173 7224
7225 return tg;
7226
7227err:
7228 free_sched_group(tg);
7229 return ERR_PTR(-ENOMEM);
7230}
7231
7232void sched_online_group(struct task_group *tg, struct task_group *parent)
7233{
7234 unsigned long flags;
7235
7174 spin_lock_irqsave(&task_group_lock, flags); 7236 spin_lock_irqsave(&task_group_lock, flags);
7175 list_add_rcu(&tg->list, &task_groups); 7237 list_add_rcu(&tg->list, &task_groups);
7176 7238
@@ -7180,12 +7242,6 @@ struct task_group *sched_create_group(struct task_group *parent)
7180 INIT_LIST_HEAD(&tg->children); 7242 INIT_LIST_HEAD(&tg->children);
7181 list_add_rcu(&tg->siblings, &parent->children); 7243 list_add_rcu(&tg->siblings, &parent->children);
7182 spin_unlock_irqrestore(&task_group_lock, flags); 7244 spin_unlock_irqrestore(&task_group_lock, flags);
7183
7184 return tg;
7185
7186err:
7187 free_sched_group(tg);
7188 return ERR_PTR(-ENOMEM);
7189} 7245}
7190 7246
7191/* rcu callback to free various structures associated with a task group */ 7247/* rcu callback to free various structures associated with a task group */
@@ -7198,6 +7254,12 @@ static void free_sched_group_rcu(struct rcu_head *rhp)
7198/* Destroy runqueue etc associated with a task group */ 7254/* Destroy runqueue etc associated with a task group */
7199void sched_destroy_group(struct task_group *tg) 7255void sched_destroy_group(struct task_group *tg)
7200{ 7256{
7257 /* wait for possible concurrent references to cfs_rqs complete */
7258 call_rcu(&tg->rcu, free_sched_group_rcu);
7259}
7260
7261void sched_offline_group(struct task_group *tg)
7262{
7201 unsigned long flags; 7263 unsigned long flags;
7202 int i; 7264 int i;
7203 7265
@@ -7209,9 +7271,6 @@ void sched_destroy_group(struct task_group *tg)
7209 list_del_rcu(&tg->list); 7271 list_del_rcu(&tg->list);
7210 list_del_rcu(&tg->siblings); 7272 list_del_rcu(&tg->siblings);
7211 spin_unlock_irqrestore(&task_group_lock, flags); 7273 spin_unlock_irqrestore(&task_group_lock, flags);
7212
7213 /* wait for possible concurrent references to cfs_rqs complete */
7214 call_rcu(&tg->rcu, free_sched_group_rcu);
7215} 7274}
7216 7275
7217/* change task's runqueue when it moves between groups. 7276/* change task's runqueue when it moves between groups.
@@ -7507,6 +7566,25 @@ static int sched_rt_global_constraints(void)
7507} 7566}
7508#endif /* CONFIG_RT_GROUP_SCHED */ 7567#endif /* CONFIG_RT_GROUP_SCHED */
7509 7568
7569int sched_rr_handler(struct ctl_table *table, int write,
7570 void __user *buffer, size_t *lenp,
7571 loff_t *ppos)
7572{
7573 int ret;
7574 static DEFINE_MUTEX(mutex);
7575
7576 mutex_lock(&mutex);
7577 ret = proc_dointvec(table, write, buffer, lenp, ppos);
7578 /* make sure that internally we keep jiffies */
7579 /* also, writing zero resets timeslice to default */
7580 if (!ret && write) {
7581 sched_rr_timeslice = sched_rr_timeslice <= 0 ?
7582 RR_TIMESLICE : msecs_to_jiffies(sched_rr_timeslice);
7583 }
7584 mutex_unlock(&mutex);
7585 return ret;
7586}
7587
7510int sched_rt_handler(struct ctl_table *table, int write, 7588int sched_rt_handler(struct ctl_table *table, int write,
7511 void __user *buffer, size_t *lenp, 7589 void __user *buffer, size_t *lenp,
7512 loff_t *ppos) 7590 loff_t *ppos)
@@ -7563,6 +7641,19 @@ static struct cgroup_subsys_state *cpu_cgroup_css_alloc(struct cgroup *cgrp)
7563 return &tg->css; 7641 return &tg->css;
7564} 7642}
7565 7643
7644static int cpu_cgroup_css_online(struct cgroup *cgrp)
7645{
7646 struct task_group *tg = cgroup_tg(cgrp);
7647 struct task_group *parent;
7648
7649 if (!cgrp->parent)
7650 return 0;
7651
7652 parent = cgroup_tg(cgrp->parent);
7653 sched_online_group(tg, parent);
7654 return 0;
7655}
7656
7566static void cpu_cgroup_css_free(struct cgroup *cgrp) 7657static void cpu_cgroup_css_free(struct cgroup *cgrp)
7567{ 7658{
7568 struct task_group *tg = cgroup_tg(cgrp); 7659 struct task_group *tg = cgroup_tg(cgrp);
@@ -7570,6 +7661,13 @@ static void cpu_cgroup_css_free(struct cgroup *cgrp)
7570 sched_destroy_group(tg); 7661 sched_destroy_group(tg);
7571} 7662}
7572 7663
7664static void cpu_cgroup_css_offline(struct cgroup *cgrp)
7665{
7666 struct task_group *tg = cgroup_tg(cgrp);
7667
7668 sched_offline_group(tg);
7669}
7670
7573static int cpu_cgroup_can_attach(struct cgroup *cgrp, 7671static int cpu_cgroup_can_attach(struct cgroup *cgrp,
7574 struct cgroup_taskset *tset) 7672 struct cgroup_taskset *tset)
7575{ 7673{
@@ -7925,6 +8023,8 @@ struct cgroup_subsys cpu_cgroup_subsys = {
7925 .name = "cpu", 8023 .name = "cpu",
7926 .css_alloc = cpu_cgroup_css_alloc, 8024 .css_alloc = cpu_cgroup_css_alloc,
7927 .css_free = cpu_cgroup_css_free, 8025 .css_free = cpu_cgroup_css_free,
8026 .css_online = cpu_cgroup_css_online,
8027 .css_offline = cpu_cgroup_css_offline,
7928 .can_attach = cpu_cgroup_can_attach, 8028 .can_attach = cpu_cgroup_can_attach,
7929 .attach = cpu_cgroup_attach, 8029 .attach = cpu_cgroup_attach,
7930 .exit = cpu_cgroup_exit, 8030 .exit = cpu_cgroup_exit,