diff options
Diffstat (limited to 'kernel/sched.c')
| -rw-r--r-- | kernel/sched.c | 161 |
1 files changed, 82 insertions, 79 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index d48408142503..f52a8801b7a2 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -306,52 +306,6 @@ static int init_task_group_load = INIT_TASK_GROUP_LOAD; | |||
| 306 | */ | 306 | */ |
| 307 | struct task_group init_task_group; | 307 | struct task_group init_task_group; |
| 308 | 308 | ||
| 309 | /* return group to which a task belongs */ | ||
| 310 | static inline struct task_group *task_group(struct task_struct *p) | ||
| 311 | { | ||
| 312 | struct task_group *tg; | ||
| 313 | |||
| 314 | #ifdef CONFIG_CGROUP_SCHED | ||
| 315 | tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id), | ||
| 316 | struct task_group, css); | ||
| 317 | #else | ||
| 318 | tg = &init_task_group; | ||
| 319 | #endif | ||
| 320 | return tg; | ||
| 321 | } | ||
| 322 | |||
| 323 | /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ | ||
| 324 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) | ||
| 325 | { | ||
| 326 | /* | ||
| 327 | * Strictly speaking this rcu_read_lock() is not needed since the | ||
| 328 | * task_group is tied to the cgroup, which in turn can never go away | ||
| 329 | * as long as there are tasks attached to it. | ||
| 330 | * | ||
| 331 | * However since task_group() uses task_subsys_state() which is an | ||
| 332 | * rcu_dereference() user, this quiets CONFIG_PROVE_RCU. | ||
| 333 | */ | ||
| 334 | rcu_read_lock(); | ||
| 335 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
| 336 | p->se.cfs_rq = task_group(p)->cfs_rq[cpu]; | ||
| 337 | p->se.parent = task_group(p)->se[cpu]; | ||
| 338 | #endif | ||
| 339 | |||
| 340 | #ifdef CONFIG_RT_GROUP_SCHED | ||
| 341 | p->rt.rt_rq = task_group(p)->rt_rq[cpu]; | ||
| 342 | p->rt.parent = task_group(p)->rt_se[cpu]; | ||
| 343 | #endif | ||
| 344 | rcu_read_unlock(); | ||
| 345 | } | ||
| 346 | |||
| 347 | #else | ||
| 348 | |||
| 349 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } | ||
| 350 | static inline struct task_group *task_group(struct task_struct *p) | ||
| 351 | { | ||
| 352 | return NULL; | ||
| 353 | } | ||
| 354 | |||
| 355 | #endif /* CONFIG_CGROUP_SCHED */ | 309 | #endif /* CONFIG_CGROUP_SCHED */ |
| 356 | 310 | ||
| 357 | /* CFS-related fields in a runqueue */ | 311 | /* CFS-related fields in a runqueue */ |
| @@ -544,6 +498,8 @@ struct rq { | |||
| 544 | struct root_domain *rd; | 498 | struct root_domain *rd; |
| 545 | struct sched_domain *sd; | 499 | struct sched_domain *sd; |
| 546 | 500 | ||
| 501 | unsigned long cpu_power; | ||
| 502 | |||
| 547 | unsigned char idle_at_tick; | 503 | unsigned char idle_at_tick; |
| 548 | /* For active balancing */ | 504 | /* For active balancing */ |
| 549 | int post_schedule; | 505 | int post_schedule; |
| @@ -642,6 +598,49 @@ static inline int cpu_of(struct rq *rq) | |||
| 642 | #define cpu_curr(cpu) (cpu_rq(cpu)->curr) | 598 | #define cpu_curr(cpu) (cpu_rq(cpu)->curr) |
| 643 | #define raw_rq() (&__raw_get_cpu_var(runqueues)) | 599 | #define raw_rq() (&__raw_get_cpu_var(runqueues)) |
| 644 | 600 | ||
| 601 | #ifdef CONFIG_CGROUP_SCHED | ||
| 602 | |||
| 603 | /* | ||
| 604 | * Return the group to which this tasks belongs. | ||
| 605 | * | ||
| 606 | * We use task_subsys_state_check() and extend the RCU verification | ||
| 607 | * with lockdep_is_held(&task_rq(p)->lock) because cpu_cgroup_attach() | ||
| 608 | * holds that lock for each task it moves into the cgroup. Therefore | ||
| 609 | * by holding that lock, we pin the task to the current cgroup. | ||
| 610 | */ | ||
| 611 | static inline struct task_group *task_group(struct task_struct *p) | ||
| 612 | { | ||
| 613 | struct cgroup_subsys_state *css; | ||
| 614 | |||
| 615 | css = task_subsys_state_check(p, cpu_cgroup_subsys_id, | ||
| 616 | lockdep_is_held(&task_rq(p)->lock)); | ||
| 617 | return container_of(css, struct task_group, css); | ||
| 618 | } | ||
| 619 | |||
| 620 | /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ | ||
| 621 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) | ||
| 622 | { | ||
| 623 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
| 624 | p->se.cfs_rq = task_group(p)->cfs_rq[cpu]; | ||
| 625 | p->se.parent = task_group(p)->se[cpu]; | ||
| 626 | #endif | ||
| 627 | |||
| 628 | #ifdef CONFIG_RT_GROUP_SCHED | ||
| 629 | p->rt.rt_rq = task_group(p)->rt_rq[cpu]; | ||
| 630 | p->rt.parent = task_group(p)->rt_se[cpu]; | ||
| 631 | #endif | ||
| 632 | } | ||
| 633 | |||
| 634 | #else /* CONFIG_CGROUP_SCHED */ | ||
| 635 | |||
| 636 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } | ||
| 637 | static inline struct task_group *task_group(struct task_struct *p) | ||
| 638 | { | ||
| 639 | return NULL; | ||
| 640 | } | ||
| 641 | |||
| 642 | #endif /* CONFIG_CGROUP_SCHED */ | ||
| 643 | |||
| 645 | inline void update_rq_clock(struct rq *rq) | 644 | inline void update_rq_clock(struct rq *rq) |
| 646 | { | 645 | { |
| 647 | if (!rq->skip_clock_update) | 646 | if (!rq->skip_clock_update) |
| @@ -1255,6 +1254,12 @@ static void sched_avg_update(struct rq *rq) | |||
| 1255 | s64 period = sched_avg_period(); | 1254 | s64 period = sched_avg_period(); |
| 1256 | 1255 | ||
| 1257 | while ((s64)(rq->clock - rq->age_stamp) > period) { | 1256 | while ((s64)(rq->clock - rq->age_stamp) > period) { |
| 1257 | /* | ||
| 1258 | * Inline assembly required to prevent the compiler | ||
| 1259 | * optimising this loop into a divmod call. | ||
| 1260 | * See __iter_div_u64_rem() for another example of this. | ||
| 1261 | */ | ||
| 1262 | asm("" : "+rm" (rq->age_stamp)); | ||
| 1258 | rq->age_stamp += period; | 1263 | rq->age_stamp += period; |
| 1259 | rq->rt_avg /= 2; | 1264 | rq->rt_avg /= 2; |
| 1260 | } | 1265 | } |
| @@ -1499,24 +1504,9 @@ static unsigned long target_load(int cpu, int type) | |||
| 1499 | return max(rq->cpu_load[type-1], total); | 1504 | return max(rq->cpu_load[type-1], total); |
| 1500 | } | 1505 | } |
| 1501 | 1506 | ||
| 1502 | static struct sched_group *group_of(int cpu) | ||
| 1503 | { | ||
| 1504 | struct sched_domain *sd = rcu_dereference_sched(cpu_rq(cpu)->sd); | ||
| 1505 | |||
| 1506 | if (!sd) | ||
| 1507 | return NULL; | ||
| 1508 | |||
| 1509 | return sd->groups; | ||
| 1510 | } | ||
| 1511 | |||
| 1512 | static unsigned long power_of(int cpu) | 1507 | static unsigned long power_of(int cpu) |
| 1513 | { | 1508 | { |
| 1514 | struct sched_group *group = group_of(cpu); | 1509 | return cpu_rq(cpu)->cpu_power; |
| 1515 | |||
| 1516 | if (!group) | ||
| 1517 | return SCHED_LOAD_SCALE; | ||
| 1518 | |||
| 1519 | return group->cpu_power; | ||
| 1520 | } | 1510 | } |
| 1521 | 1511 | ||
| 1522 | static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); | 1512 | static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); |
| @@ -1673,9 +1663,6 @@ static void update_shares(struct sched_domain *sd) | |||
| 1673 | 1663 | ||
| 1674 | static void update_h_load(long cpu) | 1664 | static void update_h_load(long cpu) |
| 1675 | { | 1665 | { |
| 1676 | if (root_task_group_empty()) | ||
| 1677 | return; | ||
| 1678 | |||
| 1679 | walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); | 1666 | walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); |
| 1680 | } | 1667 | } |
| 1681 | 1668 | ||
| @@ -1854,8 +1841,8 @@ static void dec_nr_running(struct rq *rq) | |||
| 1854 | static void set_load_weight(struct task_struct *p) | 1841 | static void set_load_weight(struct task_struct *p) |
| 1855 | { | 1842 | { |
| 1856 | if (task_has_rt_policy(p)) { | 1843 | if (task_has_rt_policy(p)) { |
| 1857 | p->se.load.weight = prio_to_weight[0] * 2; | 1844 | p->se.load.weight = 0; |
| 1858 | p->se.load.inv_weight = prio_to_wmult[0] >> 1; | 1845 | p->se.load.inv_weight = WMULT_CONST; |
| 1859 | return; | 1846 | return; |
| 1860 | } | 1847 | } |
| 1861 | 1848 | ||
| @@ -2507,7 +2494,16 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
| 2507 | if (p->sched_class->task_fork) | 2494 | if (p->sched_class->task_fork) |
| 2508 | p->sched_class->task_fork(p); | 2495 | p->sched_class->task_fork(p); |
| 2509 | 2496 | ||
| 2497 | /* | ||
| 2498 | * The child is not yet in the pid-hash so no cgroup attach races, | ||
| 2499 | * and the cgroup is pinned to this child due to cgroup_fork() | ||
| 2500 | * is ran before sched_fork(). | ||
| 2501 | * | ||
| 2502 | * Silence PROVE_RCU. | ||
| 2503 | */ | ||
| 2504 | rcu_read_lock(); | ||
| 2510 | set_task_cpu(p, cpu); | 2505 | set_task_cpu(p, cpu); |
| 2506 | rcu_read_unlock(); | ||
| 2511 | 2507 | ||
| 2512 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) | 2508 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
| 2513 | if (likely(sched_info_on())) | 2509 | if (likely(sched_info_on())) |
| @@ -2877,9 +2873,9 @@ unsigned long nr_iowait(void) | |||
| 2877 | return sum; | 2873 | return sum; |
| 2878 | } | 2874 | } |
| 2879 | 2875 | ||
| 2880 | unsigned long nr_iowait_cpu(void) | 2876 | unsigned long nr_iowait_cpu(int cpu) |
| 2881 | { | 2877 | { |
| 2882 | struct rq *this = this_rq(); | 2878 | struct rq *this = cpu_rq(cpu); |
| 2883 | return atomic_read(&this->nr_iowait); | 2879 | return atomic_read(&this->nr_iowait); |
| 2884 | } | 2880 | } |
| 2885 | 2881 | ||
| @@ -4478,16 +4474,6 @@ recheck: | |||
| 4478 | } | 4474 | } |
| 4479 | 4475 | ||
| 4480 | if (user) { | 4476 | if (user) { |
| 4481 | #ifdef CONFIG_RT_GROUP_SCHED | ||
| 4482 | /* | ||
| 4483 | * Do not allow realtime tasks into groups that have no runtime | ||
| 4484 | * assigned. | ||
| 4485 | */ | ||
| 4486 | if (rt_bandwidth_enabled() && rt_policy(policy) && | ||
| 4487 | task_group(p)->rt_bandwidth.rt_runtime == 0) | ||
| 4488 | return -EPERM; | ||
| 4489 | #endif | ||
| 4490 | |||
| 4491 | retval = security_task_setscheduler(p, policy, param); | 4477 | retval = security_task_setscheduler(p, policy, param); |
| 4492 | if (retval) | 4478 | if (retval) |
| 4493 | return retval; | 4479 | return retval; |
| @@ -4503,6 +4489,22 @@ recheck: | |||
| 4503 | * runqueue lock must be held. | 4489 | * runqueue lock must be held. |
| 4504 | */ | 4490 | */ |
| 4505 | rq = __task_rq_lock(p); | 4491 | rq = __task_rq_lock(p); |
| 4492 | |||
| 4493 | #ifdef CONFIG_RT_GROUP_SCHED | ||
| 4494 | if (user) { | ||
| 4495 | /* | ||
| 4496 | * Do not allow realtime tasks into groups that have no runtime | ||
| 4497 | * assigned. | ||
| 4498 | */ | ||
| 4499 | if (rt_bandwidth_enabled() && rt_policy(policy) && | ||
| 4500 | task_group(p)->rt_bandwidth.rt_runtime == 0) { | ||
| 4501 | __task_rq_unlock(rq); | ||
| 4502 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); | ||
| 4503 | return -EPERM; | ||
| 4504 | } | ||
| 4505 | } | ||
| 4506 | #endif | ||
| 4507 | |||
| 4506 | /* recheck policy now with rq lock held */ | 4508 | /* recheck policy now with rq lock held */ |
| 4507 | if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { | 4509 | if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { |
| 4508 | policy = oldpolicy = -1; | 4510 | policy = oldpolicy = -1; |
| @@ -7605,6 +7607,7 @@ void __init sched_init(void) | |||
| 7605 | #ifdef CONFIG_SMP | 7607 | #ifdef CONFIG_SMP |
| 7606 | rq->sd = NULL; | 7608 | rq->sd = NULL; |
| 7607 | rq->rd = NULL; | 7609 | rq->rd = NULL; |
| 7610 | rq->cpu_power = SCHED_LOAD_SCALE; | ||
| 7608 | rq->post_schedule = 0; | 7611 | rq->post_schedule = 0; |
| 7609 | rq->active_balance = 0; | 7612 | rq->active_balance = 0; |
| 7610 | rq->next_balance = jiffies; | 7613 | rq->next_balance = jiffies; |
