aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c161
1 files changed, 82 insertions, 79 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index d48408142503..f52a8801b7a2 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -306,52 +306,6 @@ static int init_task_group_load = INIT_TASK_GROUP_LOAD;
306 */ 306 */
307struct task_group init_task_group; 307struct task_group init_task_group;
308 308
309/* return group to which a task belongs */
310static inline struct task_group *task_group(struct task_struct *p)
311{
312 struct task_group *tg;
313
314#ifdef CONFIG_CGROUP_SCHED
315 tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id),
316 struct task_group, css);
317#else
318 tg = &init_task_group;
319#endif
320 return tg;
321}
322
323/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
324static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
325{
326 /*
327 * Strictly speaking this rcu_read_lock() is not needed since the
328 * task_group is tied to the cgroup, which in turn can never go away
329 * as long as there are tasks attached to it.
330 *
331 * However since task_group() uses task_subsys_state() which is an
332 * rcu_dereference() user, this quiets CONFIG_PROVE_RCU.
333 */
334 rcu_read_lock();
335#ifdef CONFIG_FAIR_GROUP_SCHED
336 p->se.cfs_rq = task_group(p)->cfs_rq[cpu];
337 p->se.parent = task_group(p)->se[cpu];
338#endif
339
340#ifdef CONFIG_RT_GROUP_SCHED
341 p->rt.rt_rq = task_group(p)->rt_rq[cpu];
342 p->rt.parent = task_group(p)->rt_se[cpu];
343#endif
344 rcu_read_unlock();
345}
346
347#else
348
349static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
350static inline struct task_group *task_group(struct task_struct *p)
351{
352 return NULL;
353}
354
355#endif /* CONFIG_CGROUP_SCHED */ 309#endif /* CONFIG_CGROUP_SCHED */
356 310
357/* CFS-related fields in a runqueue */ 311/* CFS-related fields in a runqueue */
@@ -544,6 +498,8 @@ struct rq {
544 struct root_domain *rd; 498 struct root_domain *rd;
545 struct sched_domain *sd; 499 struct sched_domain *sd;
546 500
501 unsigned long cpu_power;
502
547 unsigned char idle_at_tick; 503 unsigned char idle_at_tick;
548 /* For active balancing */ 504 /* For active balancing */
549 int post_schedule; 505 int post_schedule;
@@ -642,6 +598,49 @@ static inline int cpu_of(struct rq *rq)
642#define cpu_curr(cpu) (cpu_rq(cpu)->curr) 598#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
643#define raw_rq() (&__raw_get_cpu_var(runqueues)) 599#define raw_rq() (&__raw_get_cpu_var(runqueues))
644 600
601#ifdef CONFIG_CGROUP_SCHED
602
603/*
604 * Return the group to which this tasks belongs.
605 *
606 * We use task_subsys_state_check() and extend the RCU verification
607 * with lockdep_is_held(&task_rq(p)->lock) because cpu_cgroup_attach()
608 * holds that lock for each task it moves into the cgroup. Therefore
609 * by holding that lock, we pin the task to the current cgroup.
610 */
611static inline struct task_group *task_group(struct task_struct *p)
612{
613 struct cgroup_subsys_state *css;
614
615 css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
616 lockdep_is_held(&task_rq(p)->lock));
617 return container_of(css, struct task_group, css);
618}
619
620/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
621static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
622{
623#ifdef CONFIG_FAIR_GROUP_SCHED
624 p->se.cfs_rq = task_group(p)->cfs_rq[cpu];
625 p->se.parent = task_group(p)->se[cpu];
626#endif
627
628#ifdef CONFIG_RT_GROUP_SCHED
629 p->rt.rt_rq = task_group(p)->rt_rq[cpu];
630 p->rt.parent = task_group(p)->rt_se[cpu];
631#endif
632}
633
634#else /* CONFIG_CGROUP_SCHED */
635
636static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
637static inline struct task_group *task_group(struct task_struct *p)
638{
639 return NULL;
640}
641
642#endif /* CONFIG_CGROUP_SCHED */
643
645inline void update_rq_clock(struct rq *rq) 644inline void update_rq_clock(struct rq *rq)
646{ 645{
647 if (!rq->skip_clock_update) 646 if (!rq->skip_clock_update)
@@ -1255,6 +1254,12 @@ static void sched_avg_update(struct rq *rq)
1255 s64 period = sched_avg_period(); 1254 s64 period = sched_avg_period();
1256 1255
1257 while ((s64)(rq->clock - rq->age_stamp) > period) { 1256 while ((s64)(rq->clock - rq->age_stamp) > period) {
1257 /*
1258 * Inline assembly required to prevent the compiler
1259 * optimising this loop into a divmod call.
1260 * See __iter_div_u64_rem() for another example of this.
1261 */
1262 asm("" : "+rm" (rq->age_stamp));
1258 rq->age_stamp += period; 1263 rq->age_stamp += period;
1259 rq->rt_avg /= 2; 1264 rq->rt_avg /= 2;
1260 } 1265 }
@@ -1499,24 +1504,9 @@ static unsigned long target_load(int cpu, int type)
1499 return max(rq->cpu_load[type-1], total); 1504 return max(rq->cpu_load[type-1], total);
1500} 1505}
1501 1506
1502static struct sched_group *group_of(int cpu)
1503{
1504 struct sched_domain *sd = rcu_dereference_sched(cpu_rq(cpu)->sd);
1505
1506 if (!sd)
1507 return NULL;
1508
1509 return sd->groups;
1510}
1511
1512static unsigned long power_of(int cpu) 1507static unsigned long power_of(int cpu)
1513{ 1508{
1514 struct sched_group *group = group_of(cpu); 1509 return cpu_rq(cpu)->cpu_power;
1515
1516 if (!group)
1517 return SCHED_LOAD_SCALE;
1518
1519 return group->cpu_power;
1520} 1510}
1521 1511
1522static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); 1512static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
@@ -1673,9 +1663,6 @@ static void update_shares(struct sched_domain *sd)
1673 1663
1674static void update_h_load(long cpu) 1664static void update_h_load(long cpu)
1675{ 1665{
1676 if (root_task_group_empty())
1677 return;
1678
1679 walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); 1666 walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
1680} 1667}
1681 1668
@@ -1854,8 +1841,8 @@ static void dec_nr_running(struct rq *rq)
1854static void set_load_weight(struct task_struct *p) 1841static void set_load_weight(struct task_struct *p)
1855{ 1842{
1856 if (task_has_rt_policy(p)) { 1843 if (task_has_rt_policy(p)) {
1857 p->se.load.weight = prio_to_weight[0] * 2; 1844 p->se.load.weight = 0;
1858 p->se.load.inv_weight = prio_to_wmult[0] >> 1; 1845 p->se.load.inv_weight = WMULT_CONST;
1859 return; 1846 return;
1860 } 1847 }
1861 1848
@@ -2507,7 +2494,16 @@ void sched_fork(struct task_struct *p, int clone_flags)
2507 if (p->sched_class->task_fork) 2494 if (p->sched_class->task_fork)
2508 p->sched_class->task_fork(p); 2495 p->sched_class->task_fork(p);
2509 2496
2497 /*
2498 * The child is not yet in the pid-hash so no cgroup attach races,
2499 * and the cgroup is pinned to this child due to cgroup_fork()
2500 * is ran before sched_fork().
2501 *
2502 * Silence PROVE_RCU.
2503 */
2504 rcu_read_lock();
2510 set_task_cpu(p, cpu); 2505 set_task_cpu(p, cpu);
2506 rcu_read_unlock();
2511 2507
2512#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) 2508#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
2513 if (likely(sched_info_on())) 2509 if (likely(sched_info_on()))
@@ -2877,9 +2873,9 @@ unsigned long nr_iowait(void)
2877 return sum; 2873 return sum;
2878} 2874}
2879 2875
2880unsigned long nr_iowait_cpu(void) 2876unsigned long nr_iowait_cpu(int cpu)
2881{ 2877{
2882 struct rq *this = this_rq(); 2878 struct rq *this = cpu_rq(cpu);
2883 return atomic_read(&this->nr_iowait); 2879 return atomic_read(&this->nr_iowait);
2884} 2880}
2885 2881
@@ -4478,16 +4474,6 @@ recheck:
4478 } 4474 }
4479 4475
4480 if (user) { 4476 if (user) {
4481#ifdef CONFIG_RT_GROUP_SCHED
4482 /*
4483 * Do not allow realtime tasks into groups that have no runtime
4484 * assigned.
4485 */
4486 if (rt_bandwidth_enabled() && rt_policy(policy) &&
4487 task_group(p)->rt_bandwidth.rt_runtime == 0)
4488 return -EPERM;
4489#endif
4490
4491 retval = security_task_setscheduler(p, policy, param); 4477 retval = security_task_setscheduler(p, policy, param);
4492 if (retval) 4478 if (retval)
4493 return retval; 4479 return retval;
@@ -4503,6 +4489,22 @@ recheck:
4503 * runqueue lock must be held. 4489 * runqueue lock must be held.
4504 */ 4490 */
4505 rq = __task_rq_lock(p); 4491 rq = __task_rq_lock(p);
4492
4493#ifdef CONFIG_RT_GROUP_SCHED
4494 if (user) {
4495 /*
4496 * Do not allow realtime tasks into groups that have no runtime
4497 * assigned.
4498 */
4499 if (rt_bandwidth_enabled() && rt_policy(policy) &&
4500 task_group(p)->rt_bandwidth.rt_runtime == 0) {
4501 __task_rq_unlock(rq);
4502 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
4503 return -EPERM;
4504 }
4505 }
4506#endif
4507
4506 /* recheck policy now with rq lock held */ 4508 /* recheck policy now with rq lock held */
4507 if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { 4509 if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
4508 policy = oldpolicy = -1; 4510 policy = oldpolicy = -1;
@@ -7605,6 +7607,7 @@ void __init sched_init(void)
7605#ifdef CONFIG_SMP 7607#ifdef CONFIG_SMP
7606 rq->sd = NULL; 7608 rq->sd = NULL;
7607 rq->rd = NULL; 7609 rq->rd = NULL;
7610 rq->cpu_power = SCHED_LOAD_SCALE;
7608 rq->post_schedule = 0; 7611 rq->post_schedule = 0;
7609 rq->active_balance = 0; 7612 rq->active_balance = 0;
7610 rq->next_balance = jiffies; 7613 rq->next_balance = jiffies;