aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c186
1 files changed, 99 insertions, 87 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 054a6012de99..f52a8801b7a2 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -306,52 +306,6 @@ static int init_task_group_load = INIT_TASK_GROUP_LOAD;
306 */ 306 */
307struct task_group init_task_group; 307struct task_group init_task_group;
308 308
309/* return group to which a task belongs */
310static inline struct task_group *task_group(struct task_struct *p)
311{
312 struct task_group *tg;
313
314#ifdef CONFIG_CGROUP_SCHED
315 tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id),
316 struct task_group, css);
317#else
318 tg = &init_task_group;
319#endif
320 return tg;
321}
322
323/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
324static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
325{
326 /*
327 * Strictly speaking this rcu_read_lock() is not needed since the
328 * task_group is tied to the cgroup, which in turn can never go away
329 * as long as there are tasks attached to it.
330 *
331 * However since task_group() uses task_subsys_state() which is an
332 * rcu_dereference() user, this quiets CONFIG_PROVE_RCU.
333 */
334 rcu_read_lock();
335#ifdef CONFIG_FAIR_GROUP_SCHED
336 p->se.cfs_rq = task_group(p)->cfs_rq[cpu];
337 p->se.parent = task_group(p)->se[cpu];
338#endif
339
340#ifdef CONFIG_RT_GROUP_SCHED
341 p->rt.rt_rq = task_group(p)->rt_rq[cpu];
342 p->rt.parent = task_group(p)->rt_se[cpu];
343#endif
344 rcu_read_unlock();
345}
346
347#else
348
349static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
350static inline struct task_group *task_group(struct task_struct *p)
351{
352 return NULL;
353}
354
355#endif /* CONFIG_CGROUP_SCHED */ 309#endif /* CONFIG_CGROUP_SCHED */
356 310
357/* CFS-related fields in a runqueue */ 311/* CFS-related fields in a runqueue */
@@ -544,6 +498,8 @@ struct rq {
544 struct root_domain *rd; 498 struct root_domain *rd;
545 struct sched_domain *sd; 499 struct sched_domain *sd;
546 500
501 unsigned long cpu_power;
502
547 unsigned char idle_at_tick; 503 unsigned char idle_at_tick;
548 /* For active balancing */ 504 /* For active balancing */
549 int post_schedule; 505 int post_schedule;
@@ -642,6 +598,49 @@ static inline int cpu_of(struct rq *rq)
642#define cpu_curr(cpu) (cpu_rq(cpu)->curr) 598#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
643#define raw_rq() (&__raw_get_cpu_var(runqueues)) 599#define raw_rq() (&__raw_get_cpu_var(runqueues))
644 600
601#ifdef CONFIG_CGROUP_SCHED
602
603/*
604 * Return the group to which this tasks belongs.
605 *
606 * We use task_subsys_state_check() and extend the RCU verification
607 * with lockdep_is_held(&task_rq(p)->lock) because cpu_cgroup_attach()
608 * holds that lock for each task it moves into the cgroup. Therefore
609 * by holding that lock, we pin the task to the current cgroup.
610 */
611static inline struct task_group *task_group(struct task_struct *p)
612{
613 struct cgroup_subsys_state *css;
614
615 css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
616 lockdep_is_held(&task_rq(p)->lock));
617 return container_of(css, struct task_group, css);
618}
619
620/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
621static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
622{
623#ifdef CONFIG_FAIR_GROUP_SCHED
624 p->se.cfs_rq = task_group(p)->cfs_rq[cpu];
625 p->se.parent = task_group(p)->se[cpu];
626#endif
627
628#ifdef CONFIG_RT_GROUP_SCHED
629 p->rt.rt_rq = task_group(p)->rt_rq[cpu];
630 p->rt.parent = task_group(p)->rt_se[cpu];
631#endif
632}
633
634#else /* CONFIG_CGROUP_SCHED */
635
636static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
637static inline struct task_group *task_group(struct task_struct *p)
638{
639 return NULL;
640}
641
642#endif /* CONFIG_CGROUP_SCHED */
643
645inline void update_rq_clock(struct rq *rq) 644inline void update_rq_clock(struct rq *rq)
646{ 645{
647 if (!rq->skip_clock_update) 646 if (!rq->skip_clock_update)
@@ -969,14 +968,6 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
969 } 968 }
970} 969}
971 970
972void task_rq_unlock_wait(struct task_struct *p)
973{
974 struct rq *rq = task_rq(p);
975
976 smp_mb(); /* spin-unlock-wait is not a full memory barrier */
977 raw_spin_unlock_wait(&rq->lock);
978}
979
980static void __task_rq_unlock(struct rq *rq) 971static void __task_rq_unlock(struct rq *rq)
981 __releases(rq->lock) 972 __releases(rq->lock)
982{ 973{
@@ -1263,6 +1254,12 @@ static void sched_avg_update(struct rq *rq)
1263 s64 period = sched_avg_period(); 1254 s64 period = sched_avg_period();
1264 1255
1265 while ((s64)(rq->clock - rq->age_stamp) > period) { 1256 while ((s64)(rq->clock - rq->age_stamp) > period) {
1257 /*
1258 * Inline assembly required to prevent the compiler
1259 * optimising this loop into a divmod call.
1260 * See __iter_div_u64_rem() for another example of this.
1261 */
1262 asm("" : "+rm" (rq->age_stamp));
1266 rq->age_stamp += period; 1263 rq->age_stamp += period;
1267 rq->rt_avg /= 2; 1264 rq->rt_avg /= 2;
1268 } 1265 }
@@ -1507,24 +1504,9 @@ static unsigned long target_load(int cpu, int type)
1507 return max(rq->cpu_load[type-1], total); 1504 return max(rq->cpu_load[type-1], total);
1508} 1505}
1509 1506
1510static struct sched_group *group_of(int cpu)
1511{
1512 struct sched_domain *sd = rcu_dereference_sched(cpu_rq(cpu)->sd);
1513
1514 if (!sd)
1515 return NULL;
1516
1517 return sd->groups;
1518}
1519
1520static unsigned long power_of(int cpu) 1507static unsigned long power_of(int cpu)
1521{ 1508{
1522 struct sched_group *group = group_of(cpu); 1509 return cpu_rq(cpu)->cpu_power;
1523
1524 if (!group)
1525 return SCHED_LOAD_SCALE;
1526
1527 return group->cpu_power;
1528} 1510}
1529 1511
1530static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); 1512static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
@@ -1681,9 +1663,6 @@ static void update_shares(struct sched_domain *sd)
1681 1663
1682static void update_h_load(long cpu) 1664static void update_h_load(long cpu)
1683{ 1665{
1684 if (root_task_group_empty())
1685 return;
1686
1687 walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); 1666 walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
1688} 1667}
1689 1668
@@ -1862,8 +1841,8 @@ static void dec_nr_running(struct rq *rq)
1862static void set_load_weight(struct task_struct *p) 1841static void set_load_weight(struct task_struct *p)
1863{ 1842{
1864 if (task_has_rt_policy(p)) { 1843 if (task_has_rt_policy(p)) {
1865 p->se.load.weight = prio_to_weight[0] * 2; 1844 p->se.load.weight = 0;
1866 p->se.load.inv_weight = prio_to_wmult[0] >> 1; 1845 p->se.load.inv_weight = WMULT_CONST;
1867 return; 1846 return;
1868 } 1847 }
1869 1848
@@ -2515,7 +2494,16 @@ void sched_fork(struct task_struct *p, int clone_flags)
2515 if (p->sched_class->task_fork) 2494 if (p->sched_class->task_fork)
2516 p->sched_class->task_fork(p); 2495 p->sched_class->task_fork(p);
2517 2496
2497 /*
2498 * The child is not yet in the pid-hash so no cgroup attach races,
2499 * and the cgroup is pinned to this child due to cgroup_fork()
2500 * is ran before sched_fork().
2501 *
2502 * Silence PROVE_RCU.
2503 */
2504 rcu_read_lock();
2518 set_task_cpu(p, cpu); 2505 set_task_cpu(p, cpu);
2506 rcu_read_unlock();
2519 2507
2520#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) 2508#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
2521 if (likely(sched_info_on())) 2509 if (likely(sched_info_on()))
@@ -2885,9 +2873,9 @@ unsigned long nr_iowait(void)
2885 return sum; 2873 return sum;
2886} 2874}
2887 2875
2888unsigned long nr_iowait_cpu(void) 2876unsigned long nr_iowait_cpu(int cpu)
2889{ 2877{
2890 struct rq *this = this_rq(); 2878 struct rq *this = cpu_rq(cpu);
2891 return atomic_read(&this->nr_iowait); 2879 return atomic_read(&this->nr_iowait);
2892} 2880}
2893 2881
@@ -4062,6 +4050,23 @@ int __sched wait_for_completion_killable(struct completion *x)
4062EXPORT_SYMBOL(wait_for_completion_killable); 4050EXPORT_SYMBOL(wait_for_completion_killable);
4063 4051
4064/** 4052/**
4053 * wait_for_completion_killable_timeout: - waits for completion of a task (w/(to,killable))
4054 * @x: holds the state of this particular completion
4055 * @timeout: timeout value in jiffies
4056 *
4057 * This waits for either a completion of a specific task to be
4058 * signaled or for a specified timeout to expire. It can be
4059 * interrupted by a kill signal. The timeout is in jiffies.
4060 */
4061unsigned long __sched
4062wait_for_completion_killable_timeout(struct completion *x,
4063 unsigned long timeout)
4064{
4065 return wait_for_common(x, timeout, TASK_KILLABLE);
4066}
4067EXPORT_SYMBOL(wait_for_completion_killable_timeout);
4068
4069/**
4065 * try_wait_for_completion - try to decrement a completion without blocking 4070 * try_wait_for_completion - try to decrement a completion without blocking
4066 * @x: completion structure 4071 * @x: completion structure
4067 * 4072 *
@@ -4469,16 +4474,6 @@ recheck:
4469 } 4474 }
4470 4475
4471 if (user) { 4476 if (user) {
4472#ifdef CONFIG_RT_GROUP_SCHED
4473 /*
4474 * Do not allow realtime tasks into groups that have no runtime
4475 * assigned.
4476 */
4477 if (rt_bandwidth_enabled() && rt_policy(policy) &&
4478 task_group(p)->rt_bandwidth.rt_runtime == 0)
4479 return -EPERM;
4480#endif
4481
4482 retval = security_task_setscheduler(p, policy, param); 4477 retval = security_task_setscheduler(p, policy, param);
4483 if (retval) 4478 if (retval)
4484 return retval; 4479 return retval;
@@ -4494,6 +4489,22 @@ recheck:
4494 * runqueue lock must be held. 4489 * runqueue lock must be held.
4495 */ 4490 */
4496 rq = __task_rq_lock(p); 4491 rq = __task_rq_lock(p);
4492
4493#ifdef CONFIG_RT_GROUP_SCHED
4494 if (user) {
4495 /*
4496 * Do not allow realtime tasks into groups that have no runtime
4497 * assigned.
4498 */
4499 if (rt_bandwidth_enabled() && rt_policy(policy) &&
4500 task_group(p)->rt_bandwidth.rt_runtime == 0) {
4501 __task_rq_unlock(rq);
4502 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
4503 return -EPERM;
4504 }
4505 }
4506#endif
4507
4497 /* recheck policy now with rq lock held */ 4508 /* recheck policy now with rq lock held */
4498 if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { 4509 if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
4499 policy = oldpolicy = -1; 4510 policy = oldpolicy = -1;
@@ -7596,6 +7607,7 @@ void __init sched_init(void)
7596#ifdef CONFIG_SMP 7607#ifdef CONFIG_SMP
7597 rq->sd = NULL; 7608 rq->sd = NULL;
7598 rq->rd = NULL; 7609 rq->rd = NULL;
7610 rq->cpu_power = SCHED_LOAD_SCALE;
7599 rq->post_schedule = 0; 7611 rq->post_schedule = 0;
7600 rq->active_balance = 0; 7612 rq->active_balance = 0;
7601 rq->next_balance = jiffies; 7613 rq->next_balance = jiffies;