diff options
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 161 |
1 files changed, 82 insertions, 79 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index d48408142503..f52a8801b7a2 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -306,52 +306,6 @@ static int init_task_group_load = INIT_TASK_GROUP_LOAD; | |||
306 | */ | 306 | */ |
307 | struct task_group init_task_group; | 307 | struct task_group init_task_group; |
308 | 308 | ||
309 | /* return group to which a task belongs */ | ||
310 | static inline struct task_group *task_group(struct task_struct *p) | ||
311 | { | ||
312 | struct task_group *tg; | ||
313 | |||
314 | #ifdef CONFIG_CGROUP_SCHED | ||
315 | tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id), | ||
316 | struct task_group, css); | ||
317 | #else | ||
318 | tg = &init_task_group; | ||
319 | #endif | ||
320 | return tg; | ||
321 | } | ||
322 | |||
323 | /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ | ||
324 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) | ||
325 | { | ||
326 | /* | ||
327 | * Strictly speaking this rcu_read_lock() is not needed since the | ||
328 | * task_group is tied to the cgroup, which in turn can never go away | ||
329 | * as long as there are tasks attached to it. | ||
330 | * | ||
331 | * However since task_group() uses task_subsys_state() which is an | ||
332 | * rcu_dereference() user, this quiets CONFIG_PROVE_RCU. | ||
333 | */ | ||
334 | rcu_read_lock(); | ||
335 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
336 | p->se.cfs_rq = task_group(p)->cfs_rq[cpu]; | ||
337 | p->se.parent = task_group(p)->se[cpu]; | ||
338 | #endif | ||
339 | |||
340 | #ifdef CONFIG_RT_GROUP_SCHED | ||
341 | p->rt.rt_rq = task_group(p)->rt_rq[cpu]; | ||
342 | p->rt.parent = task_group(p)->rt_se[cpu]; | ||
343 | #endif | ||
344 | rcu_read_unlock(); | ||
345 | } | ||
346 | |||
347 | #else | ||
348 | |||
349 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } | ||
350 | static inline struct task_group *task_group(struct task_struct *p) | ||
351 | { | ||
352 | return NULL; | ||
353 | } | ||
354 | |||
355 | #endif /* CONFIG_CGROUP_SCHED */ | 309 | #endif /* CONFIG_CGROUP_SCHED */ |
356 | 310 | ||
357 | /* CFS-related fields in a runqueue */ | 311 | /* CFS-related fields in a runqueue */ |
@@ -544,6 +498,8 @@ struct rq { | |||
544 | struct root_domain *rd; | 498 | struct root_domain *rd; |
545 | struct sched_domain *sd; | 499 | struct sched_domain *sd; |
546 | 500 | ||
501 | unsigned long cpu_power; | ||
502 | |||
547 | unsigned char idle_at_tick; | 503 | unsigned char idle_at_tick; |
548 | /* For active balancing */ | 504 | /* For active balancing */ |
549 | int post_schedule; | 505 | int post_schedule; |
@@ -642,6 +598,49 @@ static inline int cpu_of(struct rq *rq) | |||
642 | #define cpu_curr(cpu) (cpu_rq(cpu)->curr) | 598 | #define cpu_curr(cpu) (cpu_rq(cpu)->curr) |
643 | #define raw_rq() (&__raw_get_cpu_var(runqueues)) | 599 | #define raw_rq() (&__raw_get_cpu_var(runqueues)) |
644 | 600 | ||
601 | #ifdef CONFIG_CGROUP_SCHED | ||
602 | |||
603 | /* | ||
604 | * Return the group to which this tasks belongs. | ||
605 | * | ||
606 | * We use task_subsys_state_check() and extend the RCU verification | ||
607 | * with lockdep_is_held(&task_rq(p)->lock) because cpu_cgroup_attach() | ||
608 | * holds that lock for each task it moves into the cgroup. Therefore | ||
609 | * by holding that lock, we pin the task to the current cgroup. | ||
610 | */ | ||
611 | static inline struct task_group *task_group(struct task_struct *p) | ||
612 | { | ||
613 | struct cgroup_subsys_state *css; | ||
614 | |||
615 | css = task_subsys_state_check(p, cpu_cgroup_subsys_id, | ||
616 | lockdep_is_held(&task_rq(p)->lock)); | ||
617 | return container_of(css, struct task_group, css); | ||
618 | } | ||
619 | |||
620 | /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ | ||
621 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) | ||
622 | { | ||
623 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
624 | p->se.cfs_rq = task_group(p)->cfs_rq[cpu]; | ||
625 | p->se.parent = task_group(p)->se[cpu]; | ||
626 | #endif | ||
627 | |||
628 | #ifdef CONFIG_RT_GROUP_SCHED | ||
629 | p->rt.rt_rq = task_group(p)->rt_rq[cpu]; | ||
630 | p->rt.parent = task_group(p)->rt_se[cpu]; | ||
631 | #endif | ||
632 | } | ||
633 | |||
634 | #else /* CONFIG_CGROUP_SCHED */ | ||
635 | |||
636 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } | ||
637 | static inline struct task_group *task_group(struct task_struct *p) | ||
638 | { | ||
639 | return NULL; | ||
640 | } | ||
641 | |||
642 | #endif /* CONFIG_CGROUP_SCHED */ | ||
643 | |||
645 | inline void update_rq_clock(struct rq *rq) | 644 | inline void update_rq_clock(struct rq *rq) |
646 | { | 645 | { |
647 | if (!rq->skip_clock_update) | 646 | if (!rq->skip_clock_update) |
@@ -1255,6 +1254,12 @@ static void sched_avg_update(struct rq *rq) | |||
1255 | s64 period = sched_avg_period(); | 1254 | s64 period = sched_avg_period(); |
1256 | 1255 | ||
1257 | while ((s64)(rq->clock - rq->age_stamp) > period) { | 1256 | while ((s64)(rq->clock - rq->age_stamp) > period) { |
1257 | /* | ||
1258 | * Inline assembly required to prevent the compiler | ||
1259 | * optimising this loop into a divmod call. | ||
1260 | * See __iter_div_u64_rem() for another example of this. | ||
1261 | */ | ||
1262 | asm("" : "+rm" (rq->age_stamp)); | ||
1258 | rq->age_stamp += period; | 1263 | rq->age_stamp += period; |
1259 | rq->rt_avg /= 2; | 1264 | rq->rt_avg /= 2; |
1260 | } | 1265 | } |
@@ -1499,24 +1504,9 @@ static unsigned long target_load(int cpu, int type) | |||
1499 | return max(rq->cpu_load[type-1], total); | 1504 | return max(rq->cpu_load[type-1], total); |
1500 | } | 1505 | } |
1501 | 1506 | ||
1502 | static struct sched_group *group_of(int cpu) | ||
1503 | { | ||
1504 | struct sched_domain *sd = rcu_dereference_sched(cpu_rq(cpu)->sd); | ||
1505 | |||
1506 | if (!sd) | ||
1507 | return NULL; | ||
1508 | |||
1509 | return sd->groups; | ||
1510 | } | ||
1511 | |||
1512 | static unsigned long power_of(int cpu) | 1507 | static unsigned long power_of(int cpu) |
1513 | { | 1508 | { |
1514 | struct sched_group *group = group_of(cpu); | 1509 | return cpu_rq(cpu)->cpu_power; |
1515 | |||
1516 | if (!group) | ||
1517 | return SCHED_LOAD_SCALE; | ||
1518 | |||
1519 | return group->cpu_power; | ||
1520 | } | 1510 | } |
1521 | 1511 | ||
1522 | static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); | 1512 | static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); |
@@ -1673,9 +1663,6 @@ static void update_shares(struct sched_domain *sd) | |||
1673 | 1663 | ||
1674 | static void update_h_load(long cpu) | 1664 | static void update_h_load(long cpu) |
1675 | { | 1665 | { |
1676 | if (root_task_group_empty()) | ||
1677 | return; | ||
1678 | |||
1679 | walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); | 1666 | walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); |
1680 | } | 1667 | } |
1681 | 1668 | ||
@@ -1854,8 +1841,8 @@ static void dec_nr_running(struct rq *rq) | |||
1854 | static void set_load_weight(struct task_struct *p) | 1841 | static void set_load_weight(struct task_struct *p) |
1855 | { | 1842 | { |
1856 | if (task_has_rt_policy(p)) { | 1843 | if (task_has_rt_policy(p)) { |
1857 | p->se.load.weight = prio_to_weight[0] * 2; | 1844 | p->se.load.weight = 0; |
1858 | p->se.load.inv_weight = prio_to_wmult[0] >> 1; | 1845 | p->se.load.inv_weight = WMULT_CONST; |
1859 | return; | 1846 | return; |
1860 | } | 1847 | } |
1861 | 1848 | ||
@@ -2507,7 +2494,16 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
2507 | if (p->sched_class->task_fork) | 2494 | if (p->sched_class->task_fork) |
2508 | p->sched_class->task_fork(p); | 2495 | p->sched_class->task_fork(p); |
2509 | 2496 | ||
2497 | /* | ||
2498 | * The child is not yet in the pid-hash so no cgroup attach races, | ||
2499 | * and the cgroup is pinned to this child due to cgroup_fork() | ||
2500 | * is ran before sched_fork(). | ||
2501 | * | ||
2502 | * Silence PROVE_RCU. | ||
2503 | */ | ||
2504 | rcu_read_lock(); | ||
2510 | set_task_cpu(p, cpu); | 2505 | set_task_cpu(p, cpu); |
2506 | rcu_read_unlock(); | ||
2511 | 2507 | ||
2512 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) | 2508 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
2513 | if (likely(sched_info_on())) | 2509 | if (likely(sched_info_on())) |
@@ -2877,9 +2873,9 @@ unsigned long nr_iowait(void) | |||
2877 | return sum; | 2873 | return sum; |
2878 | } | 2874 | } |
2879 | 2875 | ||
2880 | unsigned long nr_iowait_cpu(void) | 2876 | unsigned long nr_iowait_cpu(int cpu) |
2881 | { | 2877 | { |
2882 | struct rq *this = this_rq(); | 2878 | struct rq *this = cpu_rq(cpu); |
2883 | return atomic_read(&this->nr_iowait); | 2879 | return atomic_read(&this->nr_iowait); |
2884 | } | 2880 | } |
2885 | 2881 | ||
@@ -4478,16 +4474,6 @@ recheck: | |||
4478 | } | 4474 | } |
4479 | 4475 | ||
4480 | if (user) { | 4476 | if (user) { |
4481 | #ifdef CONFIG_RT_GROUP_SCHED | ||
4482 | /* | ||
4483 | * Do not allow realtime tasks into groups that have no runtime | ||
4484 | * assigned. | ||
4485 | */ | ||
4486 | if (rt_bandwidth_enabled() && rt_policy(policy) && | ||
4487 | task_group(p)->rt_bandwidth.rt_runtime == 0) | ||
4488 | return -EPERM; | ||
4489 | #endif | ||
4490 | |||
4491 | retval = security_task_setscheduler(p, policy, param); | 4477 | retval = security_task_setscheduler(p, policy, param); |
4492 | if (retval) | 4478 | if (retval) |
4493 | return retval; | 4479 | return retval; |
@@ -4503,6 +4489,22 @@ recheck: | |||
4503 | * runqueue lock must be held. | 4489 | * runqueue lock must be held. |
4504 | */ | 4490 | */ |
4505 | rq = __task_rq_lock(p); | 4491 | rq = __task_rq_lock(p); |
4492 | |||
4493 | #ifdef CONFIG_RT_GROUP_SCHED | ||
4494 | if (user) { | ||
4495 | /* | ||
4496 | * Do not allow realtime tasks into groups that have no runtime | ||
4497 | * assigned. | ||
4498 | */ | ||
4499 | if (rt_bandwidth_enabled() && rt_policy(policy) && | ||
4500 | task_group(p)->rt_bandwidth.rt_runtime == 0) { | ||
4501 | __task_rq_unlock(rq); | ||
4502 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); | ||
4503 | return -EPERM; | ||
4504 | } | ||
4505 | } | ||
4506 | #endif | ||
4507 | |||
4506 | /* recheck policy now with rq lock held */ | 4508 | /* recheck policy now with rq lock held */ |
4507 | if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { | 4509 | if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { |
4508 | policy = oldpolicy = -1; | 4510 | policy = oldpolicy = -1; |
@@ -7605,6 +7607,7 @@ void __init sched_init(void) | |||
7605 | #ifdef CONFIG_SMP | 7607 | #ifdef CONFIG_SMP |
7606 | rq->sd = NULL; | 7608 | rq->sd = NULL; |
7607 | rq->rd = NULL; | 7609 | rq->rd = NULL; |
7610 | rq->cpu_power = SCHED_LOAD_SCALE; | ||
7608 | rq->post_schedule = 0; | 7611 | rq->post_schedule = 0; |
7609 | rq->active_balance = 0; | 7612 | rq->active_balance = 0; |
7610 | rq->next_balance = jiffies; | 7613 | rq->next_balance = jiffies; |