diff options
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 709 |
1 files changed, 180 insertions, 529 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 297d1a0eedb0..18d38e4ec7ba 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -75,9 +75,11 @@ | |||
75 | 75 | ||
76 | #include <asm/tlb.h> | 76 | #include <asm/tlb.h> |
77 | #include <asm/irq_regs.h> | 77 | #include <asm/irq_regs.h> |
78 | #include <asm/mutex.h> | ||
78 | 79 | ||
79 | #include "sched_cpupri.h" | 80 | #include "sched_cpupri.h" |
80 | #include "workqueue_sched.h" | 81 | #include "workqueue_sched.h" |
82 | #include "sched_autogroup.h" | ||
81 | 83 | ||
82 | #define CREATE_TRACE_POINTS | 84 | #define CREATE_TRACE_POINTS |
83 | #include <trace/events/sched.h> | 85 | #include <trace/events/sched.h> |
@@ -253,6 +255,8 @@ struct task_group { | |||
253 | /* runqueue "owned" by this group on each cpu */ | 255 | /* runqueue "owned" by this group on each cpu */ |
254 | struct cfs_rq **cfs_rq; | 256 | struct cfs_rq **cfs_rq; |
255 | unsigned long shares; | 257 | unsigned long shares; |
258 | |||
259 | atomic_t load_weight; | ||
256 | #endif | 260 | #endif |
257 | 261 | ||
258 | #ifdef CONFIG_RT_GROUP_SCHED | 262 | #ifdef CONFIG_RT_GROUP_SCHED |
@@ -268,25 +272,18 @@ struct task_group { | |||
268 | struct task_group *parent; | 272 | struct task_group *parent; |
269 | struct list_head siblings; | 273 | struct list_head siblings; |
270 | struct list_head children; | 274 | struct list_head children; |
271 | }; | ||
272 | 275 | ||
273 | #define root_task_group init_task_group | 276 | #ifdef CONFIG_SCHED_AUTOGROUP |
277 | struct autogroup *autogroup; | ||
278 | #endif | ||
279 | }; | ||
274 | 280 | ||
275 | /* task_group_lock serializes add/remove of task groups and also changes to | 281 | /* task_group_lock serializes the addition/removal of task groups */ |
276 | * a task group's cpu shares. | ||
277 | */ | ||
278 | static DEFINE_SPINLOCK(task_group_lock); | 282 | static DEFINE_SPINLOCK(task_group_lock); |
279 | 283 | ||
280 | #ifdef CONFIG_FAIR_GROUP_SCHED | 284 | #ifdef CONFIG_FAIR_GROUP_SCHED |
281 | 285 | ||
282 | #ifdef CONFIG_SMP | 286 | # define ROOT_TASK_GROUP_LOAD NICE_0_LOAD |
283 | static int root_task_group_empty(void) | ||
284 | { | ||
285 | return list_empty(&root_task_group.children); | ||
286 | } | ||
287 | #endif | ||
288 | |||
289 | # define INIT_TASK_GROUP_LOAD NICE_0_LOAD | ||
290 | 287 | ||
291 | /* | 288 | /* |
292 | * A weight of 0 or 1 can cause arithmetics problems. | 289 | * A weight of 0 or 1 can cause arithmetics problems. |
@@ -299,13 +296,13 @@ static int root_task_group_empty(void) | |||
299 | #define MIN_SHARES 2 | 296 | #define MIN_SHARES 2 |
300 | #define MAX_SHARES (1UL << 18) | 297 | #define MAX_SHARES (1UL << 18) |
301 | 298 | ||
302 | static int init_task_group_load = INIT_TASK_GROUP_LOAD; | 299 | static int root_task_group_load = ROOT_TASK_GROUP_LOAD; |
303 | #endif | 300 | #endif |
304 | 301 | ||
305 | /* Default task group. | 302 | /* Default task group. |
306 | * Every task in system belong to this group at bootup. | 303 | * Every task in system belong to this group at bootup. |
307 | */ | 304 | */ |
308 | struct task_group init_task_group; | 305 | struct task_group root_task_group; |
309 | 306 | ||
310 | #endif /* CONFIG_CGROUP_SCHED */ | 307 | #endif /* CONFIG_CGROUP_SCHED */ |
311 | 308 | ||
@@ -342,6 +339,7 @@ struct cfs_rq { | |||
342 | * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a cpu. This | 339 | * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a cpu. This |
343 | * list is used during load balance. | 340 | * list is used during load balance. |
344 | */ | 341 | */ |
342 | int on_list; | ||
345 | struct list_head leaf_cfs_rq_list; | 343 | struct list_head leaf_cfs_rq_list; |
346 | struct task_group *tg; /* group that "owns" this runqueue */ | 344 | struct task_group *tg; /* group that "owns" this runqueue */ |
347 | 345 | ||
@@ -360,14 +358,17 @@ struct cfs_rq { | |||
360 | unsigned long h_load; | 358 | unsigned long h_load; |
361 | 359 | ||
362 | /* | 360 | /* |
363 | * this cpu's part of tg->shares | 361 | * Maintaining per-cpu shares distribution for group scheduling |
362 | * | ||
363 | * load_stamp is the last time we updated the load average | ||
364 | * load_last is the last time we updated the load average and saw load | ||
365 | * load_unacc_exec_time is currently unaccounted execution time | ||
364 | */ | 366 | */ |
365 | unsigned long shares; | 367 | u64 load_avg; |
368 | u64 load_period; | ||
369 | u64 load_stamp, load_last, load_unacc_exec_time; | ||
366 | 370 | ||
367 | /* | 371 | unsigned long load_contribution; |
368 | * load.weight at the time we set shares | ||
369 | */ | ||
370 | unsigned long rq_weight; | ||
371 | #endif | 372 | #endif |
372 | #endif | 373 | #endif |
373 | }; | 374 | }; |
@@ -552,9 +553,6 @@ struct rq { | |||
552 | /* try_to_wake_up() stats */ | 553 | /* try_to_wake_up() stats */ |
553 | unsigned int ttwu_count; | 554 | unsigned int ttwu_count; |
554 | unsigned int ttwu_local; | 555 | unsigned int ttwu_local; |
555 | |||
556 | /* BKL stats */ | ||
557 | unsigned int bkl_count; | ||
558 | #endif | 556 | #endif |
559 | }; | 557 | }; |
560 | 558 | ||
@@ -605,11 +603,17 @@ static inline int cpu_of(struct rq *rq) | |||
605 | */ | 603 | */ |
606 | static inline struct task_group *task_group(struct task_struct *p) | 604 | static inline struct task_group *task_group(struct task_struct *p) |
607 | { | 605 | { |
606 | struct task_group *tg; | ||
608 | struct cgroup_subsys_state *css; | 607 | struct cgroup_subsys_state *css; |
609 | 608 | ||
609 | if (p->flags & PF_EXITING) | ||
610 | return &root_task_group; | ||
611 | |||
610 | css = task_subsys_state_check(p, cpu_cgroup_subsys_id, | 612 | css = task_subsys_state_check(p, cpu_cgroup_subsys_id, |
611 | lockdep_is_held(&task_rq(p)->lock)); | 613 | lockdep_is_held(&task_rq(p)->lock)); |
612 | return container_of(css, struct task_group, css); | 614 | tg = container_of(css, struct task_group, css); |
615 | |||
616 | return autogroup_task_group(p, tg); | ||
613 | } | 617 | } |
614 | 618 | ||
615 | /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ | 619 | /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ |
@@ -737,7 +741,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf, | |||
737 | buf[cnt] = 0; | 741 | buf[cnt] = 0; |
738 | cmp = strstrip(buf); | 742 | cmp = strstrip(buf); |
739 | 743 | ||
740 | if (strncmp(buf, "NO_", 3) == 0) { | 744 | if (strncmp(cmp, "NO_", 3) == 0) { |
741 | neg = 1; | 745 | neg = 1; |
742 | cmp += 3; | 746 | cmp += 3; |
743 | } | 747 | } |
@@ -793,20 +797,6 @@ late_initcall(sched_init_debug); | |||
793 | const_debug unsigned int sysctl_sched_nr_migrate = 32; | 797 | const_debug unsigned int sysctl_sched_nr_migrate = 32; |
794 | 798 | ||
795 | /* | 799 | /* |
796 | * ratelimit for updating the group shares. | ||
797 | * default: 0.25ms | ||
798 | */ | ||
799 | unsigned int sysctl_sched_shares_ratelimit = 250000; | ||
800 | unsigned int normalized_sysctl_sched_shares_ratelimit = 250000; | ||
801 | |||
802 | /* | ||
803 | * Inject some fuzzyness into changing the per-cpu group shares | ||
804 | * this avoids remote rq-locks at the expense of fairness. | ||
805 | * default: 4 | ||
806 | */ | ||
807 | unsigned int sysctl_sched_shares_thresh = 4; | ||
808 | |||
809 | /* | ||
810 | * period over which we average the RT time consumption, measured | 800 | * period over which we average the RT time consumption, measured |
811 | * in ms. | 801 | * in ms. |
812 | * | 802 | * |
@@ -1355,6 +1345,12 @@ static inline void update_load_sub(struct load_weight *lw, unsigned long dec) | |||
1355 | lw->inv_weight = 0; | 1345 | lw->inv_weight = 0; |
1356 | } | 1346 | } |
1357 | 1347 | ||
1348 | static inline void update_load_set(struct load_weight *lw, unsigned long w) | ||
1349 | { | ||
1350 | lw->weight = w; | ||
1351 | lw->inv_weight = 0; | ||
1352 | } | ||
1353 | |||
1358 | /* | 1354 | /* |
1359 | * To aid in avoiding the subversion of "niceness" due to uneven distribution | 1355 | * To aid in avoiding the subversion of "niceness" due to uneven distribution |
1360 | * of tasks with abnormal "nice" values across CPUs the contribution that | 1356 | * of tasks with abnormal "nice" values across CPUs the contribution that |
@@ -1543,101 +1539,6 @@ static unsigned long cpu_avg_load_per_task(int cpu) | |||
1543 | 1539 | ||
1544 | #ifdef CONFIG_FAIR_GROUP_SCHED | 1540 | #ifdef CONFIG_FAIR_GROUP_SCHED |
1545 | 1541 | ||
1546 | static __read_mostly unsigned long __percpu *update_shares_data; | ||
1547 | |||
1548 | static void __set_se_shares(struct sched_entity *se, unsigned long shares); | ||
1549 | |||
1550 | /* | ||
1551 | * Calculate and set the cpu's group shares. | ||
1552 | */ | ||
1553 | static void update_group_shares_cpu(struct task_group *tg, int cpu, | ||
1554 | unsigned long sd_shares, | ||
1555 | unsigned long sd_rq_weight, | ||
1556 | unsigned long *usd_rq_weight) | ||
1557 | { | ||
1558 | unsigned long shares, rq_weight; | ||
1559 | int boost = 0; | ||
1560 | |||
1561 | rq_weight = usd_rq_weight[cpu]; | ||
1562 | if (!rq_weight) { | ||
1563 | boost = 1; | ||
1564 | rq_weight = NICE_0_LOAD; | ||
1565 | } | ||
1566 | |||
1567 | /* | ||
1568 | * \Sum_j shares_j * rq_weight_i | ||
1569 | * shares_i = ----------------------------- | ||
1570 | * \Sum_j rq_weight_j | ||
1571 | */ | ||
1572 | shares = (sd_shares * rq_weight) / sd_rq_weight; | ||
1573 | shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES); | ||
1574 | |||
1575 | if (abs(shares - tg->se[cpu]->load.weight) > | ||
1576 | sysctl_sched_shares_thresh) { | ||
1577 | struct rq *rq = cpu_rq(cpu); | ||
1578 | unsigned long flags; | ||
1579 | |||
1580 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
1581 | tg->cfs_rq[cpu]->rq_weight = boost ? 0 : rq_weight; | ||
1582 | tg->cfs_rq[cpu]->shares = boost ? 0 : shares; | ||
1583 | __set_se_shares(tg->se[cpu], shares); | ||
1584 | raw_spin_unlock_irqrestore(&rq->lock, flags); | ||
1585 | } | ||
1586 | } | ||
1587 | |||
1588 | /* | ||
1589 | * Re-compute the task group their per cpu shares over the given domain. | ||
1590 | * This needs to be done in a bottom-up fashion because the rq weight of a | ||
1591 | * parent group depends on the shares of its child groups. | ||
1592 | */ | ||
1593 | static int tg_shares_up(struct task_group *tg, void *data) | ||
1594 | { | ||
1595 | unsigned long weight, rq_weight = 0, sum_weight = 0, shares = 0; | ||
1596 | unsigned long *usd_rq_weight; | ||
1597 | struct sched_domain *sd = data; | ||
1598 | unsigned long flags; | ||
1599 | int i; | ||
1600 | |||
1601 | if (!tg->se[0]) | ||
1602 | return 0; | ||
1603 | |||
1604 | local_irq_save(flags); | ||
1605 | usd_rq_weight = per_cpu_ptr(update_shares_data, smp_processor_id()); | ||
1606 | |||
1607 | for_each_cpu(i, sched_domain_span(sd)) { | ||
1608 | weight = tg->cfs_rq[i]->load.weight; | ||
1609 | usd_rq_weight[i] = weight; | ||
1610 | |||
1611 | rq_weight += weight; | ||
1612 | /* | ||
1613 | * If there are currently no tasks on the cpu pretend there | ||
1614 | * is one of average load so that when a new task gets to | ||
1615 | * run here it will not get delayed by group starvation. | ||
1616 | */ | ||
1617 | if (!weight) | ||
1618 | weight = NICE_0_LOAD; | ||
1619 | |||
1620 | sum_weight += weight; | ||
1621 | shares += tg->cfs_rq[i]->shares; | ||
1622 | } | ||
1623 | |||
1624 | if (!rq_weight) | ||
1625 | rq_weight = sum_weight; | ||
1626 | |||
1627 | if ((!shares && rq_weight) || shares > tg->shares) | ||
1628 | shares = tg->shares; | ||
1629 | |||
1630 | if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE)) | ||
1631 | shares = tg->shares; | ||
1632 | |||
1633 | for_each_cpu(i, sched_domain_span(sd)) | ||
1634 | update_group_shares_cpu(tg, i, shares, rq_weight, usd_rq_weight); | ||
1635 | |||
1636 | local_irq_restore(flags); | ||
1637 | |||
1638 | return 0; | ||
1639 | } | ||
1640 | |||
1641 | /* | 1542 | /* |
1642 | * Compute the cpu's hierarchical load factor for each task group. | 1543 | * Compute the cpu's hierarchical load factor for each task group. |
1643 | * This needs to be done in a top-down fashion because the load of a child | 1544 | * This needs to be done in a top-down fashion because the load of a child |
@@ -1652,7 +1553,7 @@ static int tg_load_down(struct task_group *tg, void *data) | |||
1652 | load = cpu_rq(cpu)->load.weight; | 1553 | load = cpu_rq(cpu)->load.weight; |
1653 | } else { | 1554 | } else { |
1654 | load = tg->parent->cfs_rq[cpu]->h_load; | 1555 | load = tg->parent->cfs_rq[cpu]->h_load; |
1655 | load *= tg->cfs_rq[cpu]->shares; | 1556 | load *= tg->se[cpu]->load.weight; |
1656 | load /= tg->parent->cfs_rq[cpu]->load.weight + 1; | 1557 | load /= tg->parent->cfs_rq[cpu]->load.weight + 1; |
1657 | } | 1558 | } |
1658 | 1559 | ||
@@ -1661,34 +1562,11 @@ static int tg_load_down(struct task_group *tg, void *data) | |||
1661 | return 0; | 1562 | return 0; |
1662 | } | 1563 | } |
1663 | 1564 | ||
1664 | static void update_shares(struct sched_domain *sd) | ||
1665 | { | ||
1666 | s64 elapsed; | ||
1667 | u64 now; | ||
1668 | |||
1669 | if (root_task_group_empty()) | ||
1670 | return; | ||
1671 | |||
1672 | now = local_clock(); | ||
1673 | elapsed = now - sd->last_update; | ||
1674 | |||
1675 | if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) { | ||
1676 | sd->last_update = now; | ||
1677 | walk_tg_tree(tg_nop, tg_shares_up, sd); | ||
1678 | } | ||
1679 | } | ||
1680 | |||
1681 | static void update_h_load(long cpu) | 1565 | static void update_h_load(long cpu) |
1682 | { | 1566 | { |
1683 | walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); | 1567 | walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); |
1684 | } | 1568 | } |
1685 | 1569 | ||
1686 | #else | ||
1687 | |||
1688 | static inline void update_shares(struct sched_domain *sd) | ||
1689 | { | ||
1690 | } | ||
1691 | |||
1692 | #endif | 1570 | #endif |
1693 | 1571 | ||
1694 | #ifdef CONFIG_PREEMPT | 1572 | #ifdef CONFIG_PREEMPT |
@@ -1810,15 +1688,6 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2) | |||
1810 | 1688 | ||
1811 | #endif | 1689 | #endif |
1812 | 1690 | ||
1813 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
1814 | static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares) | ||
1815 | { | ||
1816 | #ifdef CONFIG_SMP | ||
1817 | cfs_rq->shares = shares; | ||
1818 | #endif | ||
1819 | } | ||
1820 | #endif | ||
1821 | |||
1822 | static void calc_load_account_idle(struct rq *this_rq); | 1691 | static void calc_load_account_idle(struct rq *this_rq); |
1823 | static void update_sysctl(void); | 1692 | static void update_sysctl(void); |
1824 | static int get_update_sysctl_factor(void); | 1693 | static int get_update_sysctl_factor(void); |
@@ -2063,6 +1932,7 @@ static void update_rq_clock_task(struct rq *rq, s64 delta) | |||
2063 | #include "sched_idletask.c" | 1932 | #include "sched_idletask.c" |
2064 | #include "sched_fair.c" | 1933 | #include "sched_fair.c" |
2065 | #include "sched_rt.c" | 1934 | #include "sched_rt.c" |
1935 | #include "sched_autogroup.c" | ||
2066 | #include "sched_stoptask.c" | 1936 | #include "sched_stoptask.c" |
2067 | #ifdef CONFIG_SCHED_DEBUG | 1937 | #ifdef CONFIG_SCHED_DEBUG |
2068 | # include "sched_debug.c" | 1938 | # include "sched_debug.c" |
@@ -2255,10 +2125,8 @@ static int migration_cpu_stop(void *data); | |||
2255 | * The task's runqueue lock must be held. | 2125 | * The task's runqueue lock must be held. |
2256 | * Returns true if you have to wait for migration thread. | 2126 | * Returns true if you have to wait for migration thread. |
2257 | */ | 2127 | */ |
2258 | static bool migrate_task(struct task_struct *p, int dest_cpu) | 2128 | static bool migrate_task(struct task_struct *p, struct rq *rq) |
2259 | { | 2129 | { |
2260 | struct rq *rq = task_rq(p); | ||
2261 | |||
2262 | /* | 2130 | /* |
2263 | * If the task is not on a runqueue (and not running), then | 2131 | * If the task is not on a runqueue (and not running), then |
2264 | * the next wake-up will properly place the task. | 2132 | * the next wake-up will properly place the task. |
@@ -2438,18 +2306,15 @@ static int select_fallback_rq(int cpu, struct task_struct *p) | |||
2438 | return dest_cpu; | 2306 | return dest_cpu; |
2439 | 2307 | ||
2440 | /* No more Mr. Nice Guy. */ | 2308 | /* No more Mr. Nice Guy. */ |
2441 | if (unlikely(dest_cpu >= nr_cpu_ids)) { | 2309 | dest_cpu = cpuset_cpus_allowed_fallback(p); |
2442 | dest_cpu = cpuset_cpus_allowed_fallback(p); | 2310 | /* |
2443 | /* | 2311 | * Don't tell them about moving exiting tasks or |
2444 | * Don't tell them about moving exiting tasks or | 2312 | * kernel threads (both mm NULL), since they never |
2445 | * kernel threads (both mm NULL), since they never | 2313 | * leave kernel. |
2446 | * leave kernel. | 2314 | */ |
2447 | */ | 2315 | if (p->mm && printk_ratelimit()) { |
2448 | if (p->mm && printk_ratelimit()) { | 2316 | printk(KERN_INFO "process %d (%s) no longer affine to cpu%d\n", |
2449 | printk(KERN_INFO "process %d (%s) no " | 2317 | task_pid_nr(p), p->comm, cpu); |
2450 | "longer affine to cpu%d\n", | ||
2451 | task_pid_nr(p), p->comm, cpu); | ||
2452 | } | ||
2453 | } | 2318 | } |
2454 | 2319 | ||
2455 | return dest_cpu; | 2320 | return dest_cpu; |
@@ -2640,7 +2505,7 @@ out: | |||
2640 | * try_to_wake_up_local - try to wake up a local task with rq lock held | 2505 | * try_to_wake_up_local - try to wake up a local task with rq lock held |
2641 | * @p: the thread to be awakened | 2506 | * @p: the thread to be awakened |
2642 | * | 2507 | * |
2643 | * Put @p on the run-queue if it's not alredy there. The caller must | 2508 | * Put @p on the run-queue if it's not already there. The caller must |
2644 | * ensure that this_rq() is locked, @p is bound to this_rq() and not | 2509 | * ensure that this_rq() is locked, @p is bound to this_rq() and not |
2645 | * the current task. this_rq() stays locked over invocation. | 2510 | * the current task. this_rq() stays locked over invocation. |
2646 | */ | 2511 | */ |
@@ -2785,7 +2650,9 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
2785 | /* Want to start with kernel preemption disabled. */ | 2650 | /* Want to start with kernel preemption disabled. */ |
2786 | task_thread_info(p)->preempt_count = 1; | 2651 | task_thread_info(p)->preempt_count = 1; |
2787 | #endif | 2652 | #endif |
2653 | #ifdef CONFIG_SMP | ||
2788 | plist_node_init(&p->pushable_tasks, MAX_PRIO); | 2654 | plist_node_init(&p->pushable_tasks, MAX_PRIO); |
2655 | #endif | ||
2789 | 2656 | ||
2790 | put_cpu(); | 2657 | put_cpu(); |
2791 | } | 2658 | } |
@@ -3549,7 +3416,7 @@ void sched_exec(void) | |||
3549 | * select_task_rq() can race against ->cpus_allowed | 3416 | * select_task_rq() can race against ->cpus_allowed |
3550 | */ | 3417 | */ |
3551 | if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) && | 3418 | if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) && |
3552 | likely(cpu_active(dest_cpu)) && migrate_task(p, dest_cpu)) { | 3419 | likely(cpu_active(dest_cpu)) && migrate_task(p, rq)) { |
3553 | struct migration_arg arg = { p, dest_cpu }; | 3420 | struct migration_arg arg = { p, dest_cpu }; |
3554 | 3421 | ||
3555 | task_rq_unlock(rq, &flags); | 3422 | task_rq_unlock(rq, &flags); |
@@ -4020,7 +3887,7 @@ static inline void schedule_debug(struct task_struct *prev) | |||
4020 | schedstat_inc(this_rq(), sched_count); | 3887 | schedstat_inc(this_rq(), sched_count); |
4021 | #ifdef CONFIG_SCHEDSTATS | 3888 | #ifdef CONFIG_SCHEDSTATS |
4022 | if (unlikely(prev->lock_depth >= 0)) { | 3889 | if (unlikely(prev->lock_depth >= 0)) { |
4023 | schedstat_inc(this_rq(), bkl_count); | 3890 | schedstat_inc(this_rq(), rq_sched_info.bkl_count); |
4024 | schedstat_inc(prev, sched_info.bkl_count); | 3891 | schedstat_inc(prev, sched_info.bkl_count); |
4025 | } | 3892 | } |
4026 | #endif | 3893 | #endif |
@@ -4214,7 +4081,7 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner) | |||
4214 | if (task_thread_info(rq->curr) != owner || need_resched()) | 4081 | if (task_thread_info(rq->curr) != owner || need_resched()) |
4215 | return 0; | 4082 | return 0; |
4216 | 4083 | ||
4217 | cpu_relax(); | 4084 | arch_mutex_cpu_relax(); |
4218 | } | 4085 | } |
4219 | 4086 | ||
4220 | return 1; | 4087 | return 1; |
@@ -4526,7 +4393,7 @@ EXPORT_SYMBOL(wait_for_completion_interruptible); | |||
4526 | * This waits for either a completion of a specific task to be signaled or for a | 4393 | * This waits for either a completion of a specific task to be signaled or for a |
4527 | * specified timeout to expire. It is interruptible. The timeout is in jiffies. | 4394 | * specified timeout to expire. It is interruptible. The timeout is in jiffies. |
4528 | */ | 4395 | */ |
4529 | unsigned long __sched | 4396 | long __sched |
4530 | wait_for_completion_interruptible_timeout(struct completion *x, | 4397 | wait_for_completion_interruptible_timeout(struct completion *x, |
4531 | unsigned long timeout) | 4398 | unsigned long timeout) |
4532 | { | 4399 | { |
@@ -4559,7 +4426,7 @@ EXPORT_SYMBOL(wait_for_completion_killable); | |||
4559 | * signaled or for a specified timeout to expire. It can be | 4426 | * signaled or for a specified timeout to expire. It can be |
4560 | * interrupted by a kill signal. The timeout is in jiffies. | 4427 | * interrupted by a kill signal. The timeout is in jiffies. |
4561 | */ | 4428 | */ |
4562 | unsigned long __sched | 4429 | long __sched |
4563 | wait_for_completion_killable_timeout(struct completion *x, | 4430 | wait_for_completion_killable_timeout(struct completion *x, |
4564 | unsigned long timeout) | 4431 | unsigned long timeout) |
4565 | { | 4432 | { |
@@ -4901,7 +4768,7 @@ static bool check_same_owner(struct task_struct *p) | |||
4901 | } | 4768 | } |
4902 | 4769 | ||
4903 | static int __sched_setscheduler(struct task_struct *p, int policy, | 4770 | static int __sched_setscheduler(struct task_struct *p, int policy, |
4904 | struct sched_param *param, bool user) | 4771 | const struct sched_param *param, bool user) |
4905 | { | 4772 | { |
4906 | int retval, oldprio, oldpolicy = -1, on_rq, running; | 4773 | int retval, oldprio, oldpolicy = -1, on_rq, running; |
4907 | unsigned long flags; | 4774 | unsigned long flags; |
@@ -5004,7 +4871,8 @@ recheck: | |||
5004 | * assigned. | 4871 | * assigned. |
5005 | */ | 4872 | */ |
5006 | if (rt_bandwidth_enabled() && rt_policy(policy) && | 4873 | if (rt_bandwidth_enabled() && rt_policy(policy) && |
5007 | task_group(p)->rt_bandwidth.rt_runtime == 0) { | 4874 | task_group(p)->rt_bandwidth.rt_runtime == 0 && |
4875 | !task_group_is_autogroup(task_group(p))) { | ||
5008 | __task_rq_unlock(rq); | 4876 | __task_rq_unlock(rq); |
5009 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); | 4877 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
5010 | return -EPERM; | 4878 | return -EPERM; |
@@ -5056,7 +4924,7 @@ recheck: | |||
5056 | * NOTE that the task may be already dead. | 4924 | * NOTE that the task may be already dead. |
5057 | */ | 4925 | */ |
5058 | int sched_setscheduler(struct task_struct *p, int policy, | 4926 | int sched_setscheduler(struct task_struct *p, int policy, |
5059 | struct sched_param *param) | 4927 | const struct sched_param *param) |
5060 | { | 4928 | { |
5061 | return __sched_setscheduler(p, policy, param, true); | 4929 | return __sched_setscheduler(p, policy, param, true); |
5062 | } | 4930 | } |
@@ -5074,7 +4942,7 @@ EXPORT_SYMBOL_GPL(sched_setscheduler); | |||
5074 | * but our caller might not have that capability. | 4942 | * but our caller might not have that capability. |
5075 | */ | 4943 | */ |
5076 | int sched_setscheduler_nocheck(struct task_struct *p, int policy, | 4944 | int sched_setscheduler_nocheck(struct task_struct *p, int policy, |
5077 | struct sched_param *param) | 4945 | const struct sched_param *param) |
5078 | { | 4946 | { |
5079 | return __sched_setscheduler(p, policy, param, false); | 4947 | return __sched_setscheduler(p, policy, param, false); |
5080 | } | 4948 | } |
@@ -5590,7 +5458,7 @@ void sched_show_task(struct task_struct *p) | |||
5590 | unsigned state; | 5458 | unsigned state; |
5591 | 5459 | ||
5592 | state = p->state ? __ffs(p->state) + 1 : 0; | 5460 | state = p->state ? __ffs(p->state) + 1 : 0; |
5593 | printk(KERN_INFO "%-13.13s %c", p->comm, | 5461 | printk(KERN_INFO "%-15.15s %c", p->comm, |
5594 | state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?'); | 5462 | state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?'); |
5595 | #if BITS_PER_LONG == 32 | 5463 | #if BITS_PER_LONG == 32 |
5596 | if (state == TASK_RUNNING) | 5464 | if (state == TASK_RUNNING) |
@@ -5754,7 +5622,6 @@ static void update_sysctl(void) | |||
5754 | SET_SYSCTL(sched_min_granularity); | 5622 | SET_SYSCTL(sched_min_granularity); |
5755 | SET_SYSCTL(sched_latency); | 5623 | SET_SYSCTL(sched_latency); |
5756 | SET_SYSCTL(sched_wakeup_granularity); | 5624 | SET_SYSCTL(sched_wakeup_granularity); |
5757 | SET_SYSCTL(sched_shares_ratelimit); | ||
5758 | #undef SET_SYSCTL | 5625 | #undef SET_SYSCTL |
5759 | } | 5626 | } |
5760 | 5627 | ||
@@ -5830,7 +5697,7 @@ again: | |||
5830 | goto out; | 5697 | goto out; |
5831 | 5698 | ||
5832 | dest_cpu = cpumask_any_and(cpu_active_mask, new_mask); | 5699 | dest_cpu = cpumask_any_and(cpu_active_mask, new_mask); |
5833 | if (migrate_task(p, dest_cpu)) { | 5700 | if (migrate_task(p, rq)) { |
5834 | struct migration_arg arg = { p, dest_cpu }; | 5701 | struct migration_arg arg = { p, dest_cpu }; |
5835 | /* Need help from migration thread: drop lock and wait. */ | 5702 | /* Need help from migration thread: drop lock and wait. */ |
5836 | task_rq_unlock(rq, &flags); | 5703 | task_rq_unlock(rq, &flags); |
@@ -5912,29 +5779,20 @@ static int migration_cpu_stop(void *data) | |||
5912 | } | 5779 | } |
5913 | 5780 | ||
5914 | #ifdef CONFIG_HOTPLUG_CPU | 5781 | #ifdef CONFIG_HOTPLUG_CPU |
5782 | |||
5915 | /* | 5783 | /* |
5916 | * Figure out where task on dead CPU should go, use force if necessary. | 5784 | * Ensures that the idle task is using init_mm right before its cpu goes |
5785 | * offline. | ||
5917 | */ | 5786 | */ |
5918 | void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) | 5787 | void idle_task_exit(void) |
5919 | { | 5788 | { |
5920 | struct rq *rq = cpu_rq(dead_cpu); | 5789 | struct mm_struct *mm = current->active_mm; |
5921 | int needs_cpu, uninitialized_var(dest_cpu); | ||
5922 | unsigned long flags; | ||
5923 | 5790 | ||
5924 | local_irq_save(flags); | 5791 | BUG_ON(cpu_online(smp_processor_id())); |
5925 | 5792 | ||
5926 | raw_spin_lock(&rq->lock); | 5793 | if (mm != &init_mm) |
5927 | needs_cpu = (task_cpu(p) == dead_cpu) && (p->state != TASK_WAKING); | 5794 | switch_mm(mm, &init_mm, current); |
5928 | if (needs_cpu) | 5795 | mmdrop(mm); |
5929 | dest_cpu = select_fallback_rq(dead_cpu, p); | ||
5930 | raw_spin_unlock(&rq->lock); | ||
5931 | /* | ||
5932 | * It can only fail if we race with set_cpus_allowed(), | ||
5933 | * in the racer should migrate the task anyway. | ||
5934 | */ | ||
5935 | if (needs_cpu) | ||
5936 | __migrate_task(p, dead_cpu, dest_cpu); | ||
5937 | local_irq_restore(flags); | ||
5938 | } | 5796 | } |
5939 | 5797 | ||
5940 | /* | 5798 | /* |
@@ -5947,128 +5805,69 @@ void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) | |||
5947 | static void migrate_nr_uninterruptible(struct rq *rq_src) | 5805 | static void migrate_nr_uninterruptible(struct rq *rq_src) |
5948 | { | 5806 | { |
5949 | struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask)); | 5807 | struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask)); |
5950 | unsigned long flags; | ||
5951 | 5808 | ||
5952 | local_irq_save(flags); | ||
5953 | double_rq_lock(rq_src, rq_dest); | ||
5954 | rq_dest->nr_uninterruptible += rq_src->nr_uninterruptible; | 5809 | rq_dest->nr_uninterruptible += rq_src->nr_uninterruptible; |
5955 | rq_src->nr_uninterruptible = 0; | 5810 | rq_src->nr_uninterruptible = 0; |
5956 | double_rq_unlock(rq_src, rq_dest); | ||
5957 | local_irq_restore(flags); | ||
5958 | } | ||
5959 | |||
5960 | /* Run through task list and migrate tasks from the dead cpu. */ | ||
5961 | static void migrate_live_tasks(int src_cpu) | ||
5962 | { | ||
5963 | struct task_struct *p, *t; | ||
5964 | |||
5965 | read_lock(&tasklist_lock); | ||
5966 | |||
5967 | do_each_thread(t, p) { | ||
5968 | if (p == current) | ||
5969 | continue; | ||
5970 | |||
5971 | if (task_cpu(p) == src_cpu) | ||
5972 | move_task_off_dead_cpu(src_cpu, p); | ||
5973 | } while_each_thread(t, p); | ||
5974 | |||
5975 | read_unlock(&tasklist_lock); | ||
5976 | } | 5811 | } |
5977 | 5812 | ||
5978 | /* | 5813 | /* |
5979 | * Schedules idle task to be the next runnable task on current CPU. | 5814 | * remove the tasks which were accounted by rq from calc_load_tasks. |
5980 | * It does so by boosting its priority to highest possible. | ||
5981 | * Used by CPU offline code. | ||
5982 | */ | 5815 | */ |
5983 | void sched_idle_next(void) | 5816 | static void calc_global_load_remove(struct rq *rq) |
5984 | { | 5817 | { |
5985 | int this_cpu = smp_processor_id(); | 5818 | atomic_long_sub(rq->calc_load_active, &calc_load_tasks); |
5986 | struct rq *rq = cpu_rq(this_cpu); | 5819 | rq->calc_load_active = 0; |
5987 | struct task_struct *p = rq->idle; | ||
5988 | unsigned long flags; | ||
5989 | |||
5990 | /* cpu has to be offline */ | ||
5991 | BUG_ON(cpu_online(this_cpu)); | ||
5992 | |||
5993 | /* | ||
5994 | * Strictly not necessary since rest of the CPUs are stopped by now | ||
5995 | * and interrupts disabled on the current cpu. | ||
5996 | */ | ||
5997 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
5998 | |||
5999 | __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1); | ||
6000 | |||
6001 | activate_task(rq, p, 0); | ||
6002 | |||
6003 | raw_spin_unlock_irqrestore(&rq->lock, flags); | ||
6004 | } | 5820 | } |
6005 | 5821 | ||
6006 | /* | 5822 | /* |
6007 | * Ensures that the idle task is using init_mm right before its cpu goes | 5823 | * Migrate all tasks from the rq, sleeping tasks will be migrated by |
6008 | * offline. | 5824 | * try_to_wake_up()->select_task_rq(). |
5825 | * | ||
5826 | * Called with rq->lock held even though we'er in stop_machine() and | ||
5827 | * there's no concurrency possible, we hold the required locks anyway | ||
5828 | * because of lock validation efforts. | ||
6009 | */ | 5829 | */ |
6010 | void idle_task_exit(void) | 5830 | static void migrate_tasks(unsigned int dead_cpu) |
6011 | { | ||
6012 | struct mm_struct *mm = current->active_mm; | ||
6013 | |||
6014 | BUG_ON(cpu_online(smp_processor_id())); | ||
6015 | |||
6016 | if (mm != &init_mm) | ||
6017 | switch_mm(mm, &init_mm, current); | ||
6018 | mmdrop(mm); | ||
6019 | } | ||
6020 | |||
6021 | /* called under rq->lock with disabled interrupts */ | ||
6022 | static void migrate_dead(unsigned int dead_cpu, struct task_struct *p) | ||
6023 | { | 5831 | { |
6024 | struct rq *rq = cpu_rq(dead_cpu); | 5832 | struct rq *rq = cpu_rq(dead_cpu); |
6025 | 5833 | struct task_struct *next, *stop = rq->stop; | |
6026 | /* Must be exiting, otherwise would be on tasklist. */ | 5834 | int dest_cpu; |
6027 | BUG_ON(!p->exit_state); | ||
6028 | |||
6029 | /* Cannot have done final schedule yet: would have vanished. */ | ||
6030 | BUG_ON(p->state == TASK_DEAD); | ||
6031 | |||
6032 | get_task_struct(p); | ||
6033 | 5835 | ||
6034 | /* | 5836 | /* |
6035 | * Drop lock around migration; if someone else moves it, | 5837 | * Fudge the rq selection such that the below task selection loop |
6036 | * that's OK. No task can be added to this CPU, so iteration is | 5838 | * doesn't get stuck on the currently eligible stop task. |
6037 | * fine. | 5839 | * |
5840 | * We're currently inside stop_machine() and the rq is either stuck | ||
5841 | * in the stop_machine_cpu_stop() loop, or we're executing this code, | ||
5842 | * either way we should never end up calling schedule() until we're | ||
5843 | * done here. | ||
6038 | */ | 5844 | */ |
6039 | raw_spin_unlock_irq(&rq->lock); | 5845 | rq->stop = NULL; |
6040 | move_task_off_dead_cpu(dead_cpu, p); | ||
6041 | raw_spin_lock_irq(&rq->lock); | ||
6042 | |||
6043 | put_task_struct(p); | ||
6044 | } | ||
6045 | |||
6046 | /* release_task() removes task from tasklist, so we won't find dead tasks. */ | ||
6047 | static void migrate_dead_tasks(unsigned int dead_cpu) | ||
6048 | { | ||
6049 | struct rq *rq = cpu_rq(dead_cpu); | ||
6050 | struct task_struct *next; | ||
6051 | 5846 | ||
6052 | for ( ; ; ) { | 5847 | for ( ; ; ) { |
6053 | if (!rq->nr_running) | 5848 | /* |
5849 | * There's this thread running, bail when that's the only | ||
5850 | * remaining thread. | ||
5851 | */ | ||
5852 | if (rq->nr_running == 1) | ||
6054 | break; | 5853 | break; |
5854 | |||
6055 | next = pick_next_task(rq); | 5855 | next = pick_next_task(rq); |
6056 | if (!next) | 5856 | BUG_ON(!next); |
6057 | break; | ||
6058 | next->sched_class->put_prev_task(rq, next); | 5857 | next->sched_class->put_prev_task(rq, next); |
6059 | migrate_dead(dead_cpu, next); | ||
6060 | 5858 | ||
5859 | /* Find suitable destination for @next, with force if needed. */ | ||
5860 | dest_cpu = select_fallback_rq(dead_cpu, next); | ||
5861 | raw_spin_unlock(&rq->lock); | ||
5862 | |||
5863 | __migrate_task(next, dead_cpu, dest_cpu); | ||
5864 | |||
5865 | raw_spin_lock(&rq->lock); | ||
6061 | } | 5866 | } |
6062 | } | ||
6063 | 5867 | ||
6064 | /* | 5868 | rq->stop = stop; |
6065 | * remove the tasks which were accounted by rq from calc_load_tasks. | ||
6066 | */ | ||
6067 | static void calc_global_load_remove(struct rq *rq) | ||
6068 | { | ||
6069 | atomic_long_sub(rq->calc_load_active, &calc_load_tasks); | ||
6070 | rq->calc_load_active = 0; | ||
6071 | } | 5869 | } |
5870 | |||
6072 | #endif /* CONFIG_HOTPLUG_CPU */ | 5871 | #endif /* CONFIG_HOTPLUG_CPU */ |
6073 | 5872 | ||
6074 | #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL) | 5873 | #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL) |
@@ -6278,15 +6077,13 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
6278 | unsigned long flags; | 6077 | unsigned long flags; |
6279 | struct rq *rq = cpu_rq(cpu); | 6078 | struct rq *rq = cpu_rq(cpu); |
6280 | 6079 | ||
6281 | switch (action) { | 6080 | switch (action & ~CPU_TASKS_FROZEN) { |
6282 | 6081 | ||
6283 | case CPU_UP_PREPARE: | 6082 | case CPU_UP_PREPARE: |
6284 | case CPU_UP_PREPARE_FROZEN: | ||
6285 | rq->calc_load_update = calc_load_update; | 6083 | rq->calc_load_update = calc_load_update; |
6286 | break; | 6084 | break; |
6287 | 6085 | ||
6288 | case CPU_ONLINE: | 6086 | case CPU_ONLINE: |
6289 | case CPU_ONLINE_FROZEN: | ||
6290 | /* Update our root-domain */ | 6087 | /* Update our root-domain */ |
6291 | raw_spin_lock_irqsave(&rq->lock, flags); | 6088 | raw_spin_lock_irqsave(&rq->lock, flags); |
6292 | if (rq->rd) { | 6089 | if (rq->rd) { |
@@ -6298,30 +6095,19 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
6298 | break; | 6095 | break; |
6299 | 6096 | ||
6300 | #ifdef CONFIG_HOTPLUG_CPU | 6097 | #ifdef CONFIG_HOTPLUG_CPU |
6301 | case CPU_DEAD: | ||
6302 | case CPU_DEAD_FROZEN: | ||
6303 | migrate_live_tasks(cpu); | ||
6304 | /* Idle task back to normal (off runqueue, low prio) */ | ||
6305 | raw_spin_lock_irq(&rq->lock); | ||
6306 | deactivate_task(rq, rq->idle, 0); | ||
6307 | __setscheduler(rq, rq->idle, SCHED_NORMAL, 0); | ||
6308 | rq->idle->sched_class = &idle_sched_class; | ||
6309 | migrate_dead_tasks(cpu); | ||
6310 | raw_spin_unlock_irq(&rq->lock); | ||
6311 | migrate_nr_uninterruptible(rq); | ||
6312 | BUG_ON(rq->nr_running != 0); | ||
6313 | calc_global_load_remove(rq); | ||
6314 | break; | ||
6315 | |||
6316 | case CPU_DYING: | 6098 | case CPU_DYING: |
6317 | case CPU_DYING_FROZEN: | ||
6318 | /* Update our root-domain */ | 6099 | /* Update our root-domain */ |
6319 | raw_spin_lock_irqsave(&rq->lock, flags); | 6100 | raw_spin_lock_irqsave(&rq->lock, flags); |
6320 | if (rq->rd) { | 6101 | if (rq->rd) { |
6321 | BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); | 6102 | BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); |
6322 | set_rq_offline(rq); | 6103 | set_rq_offline(rq); |
6323 | } | 6104 | } |
6105 | migrate_tasks(cpu); | ||
6106 | BUG_ON(rq->nr_running != 1); /* the migration thread */ | ||
6324 | raw_spin_unlock_irqrestore(&rq->lock, flags); | 6107 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
6108 | |||
6109 | migrate_nr_uninterruptible(rq); | ||
6110 | calc_global_load_remove(rq); | ||
6325 | break; | 6111 | break; |
6326 | #endif | 6112 | #endif |
6327 | } | 6113 | } |
@@ -8052,18 +7838,16 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) | |||
8052 | 7838 | ||
8053 | #ifdef CONFIG_FAIR_GROUP_SCHED | 7839 | #ifdef CONFIG_FAIR_GROUP_SCHED |
8054 | static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, | 7840 | static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, |
8055 | struct sched_entity *se, int cpu, int add, | 7841 | struct sched_entity *se, int cpu, |
8056 | struct sched_entity *parent) | 7842 | struct sched_entity *parent) |
8057 | { | 7843 | { |
8058 | struct rq *rq = cpu_rq(cpu); | 7844 | struct rq *rq = cpu_rq(cpu); |
8059 | tg->cfs_rq[cpu] = cfs_rq; | 7845 | tg->cfs_rq[cpu] = cfs_rq; |
8060 | init_cfs_rq(cfs_rq, rq); | 7846 | init_cfs_rq(cfs_rq, rq); |
8061 | cfs_rq->tg = tg; | 7847 | cfs_rq->tg = tg; |
8062 | if (add) | ||
8063 | list_add(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list); | ||
8064 | 7848 | ||
8065 | tg->se[cpu] = se; | 7849 | tg->se[cpu] = se; |
8066 | /* se could be NULL for init_task_group */ | 7850 | /* se could be NULL for root_task_group */ |
8067 | if (!se) | 7851 | if (!se) |
8068 | return; | 7852 | return; |
8069 | 7853 | ||
@@ -8073,15 +7857,14 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, | |||
8073 | se->cfs_rq = parent->my_q; | 7857 | se->cfs_rq = parent->my_q; |
8074 | 7858 | ||
8075 | se->my_q = cfs_rq; | 7859 | se->my_q = cfs_rq; |
8076 | se->load.weight = tg->shares; | 7860 | update_load_set(&se->load, 0); |
8077 | se->load.inv_weight = 0; | ||
8078 | se->parent = parent; | 7861 | se->parent = parent; |
8079 | } | 7862 | } |
8080 | #endif | 7863 | #endif |
8081 | 7864 | ||
8082 | #ifdef CONFIG_RT_GROUP_SCHED | 7865 | #ifdef CONFIG_RT_GROUP_SCHED |
8083 | static void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq, | 7866 | static void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq, |
8084 | struct sched_rt_entity *rt_se, int cpu, int add, | 7867 | struct sched_rt_entity *rt_se, int cpu, |
8085 | struct sched_rt_entity *parent) | 7868 | struct sched_rt_entity *parent) |
8086 | { | 7869 | { |
8087 | struct rq *rq = cpu_rq(cpu); | 7870 | struct rq *rq = cpu_rq(cpu); |
@@ -8090,8 +7873,6 @@ static void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq, | |||
8090 | init_rt_rq(rt_rq, rq); | 7873 | init_rt_rq(rt_rq, rq); |
8091 | rt_rq->tg = tg; | 7874 | rt_rq->tg = tg; |
8092 | rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime; | 7875 | rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime; |
8093 | if (add) | ||
8094 | list_add(&rt_rq->leaf_rt_rq_list, &rq->leaf_rt_rq_list); | ||
8095 | 7876 | ||
8096 | tg->rt_se[cpu] = rt_se; | 7877 | tg->rt_se[cpu] = rt_se; |
8097 | if (!rt_se) | 7878 | if (!rt_se) |
@@ -8126,18 +7907,18 @@ void __init sched_init(void) | |||
8126 | ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT); | 7907 | ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT); |
8127 | 7908 | ||
8128 | #ifdef CONFIG_FAIR_GROUP_SCHED | 7909 | #ifdef CONFIG_FAIR_GROUP_SCHED |
8129 | init_task_group.se = (struct sched_entity **)ptr; | 7910 | root_task_group.se = (struct sched_entity **)ptr; |
8130 | ptr += nr_cpu_ids * sizeof(void **); | 7911 | ptr += nr_cpu_ids * sizeof(void **); |
8131 | 7912 | ||
8132 | init_task_group.cfs_rq = (struct cfs_rq **)ptr; | 7913 | root_task_group.cfs_rq = (struct cfs_rq **)ptr; |
8133 | ptr += nr_cpu_ids * sizeof(void **); | 7914 | ptr += nr_cpu_ids * sizeof(void **); |
8134 | 7915 | ||
8135 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | 7916 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
8136 | #ifdef CONFIG_RT_GROUP_SCHED | 7917 | #ifdef CONFIG_RT_GROUP_SCHED |
8137 | init_task_group.rt_se = (struct sched_rt_entity **)ptr; | 7918 | root_task_group.rt_se = (struct sched_rt_entity **)ptr; |
8138 | ptr += nr_cpu_ids * sizeof(void **); | 7919 | ptr += nr_cpu_ids * sizeof(void **); |
8139 | 7920 | ||
8140 | init_task_group.rt_rq = (struct rt_rq **)ptr; | 7921 | root_task_group.rt_rq = (struct rt_rq **)ptr; |
8141 | ptr += nr_cpu_ids * sizeof(void **); | 7922 | ptr += nr_cpu_ids * sizeof(void **); |
8142 | 7923 | ||
8143 | #endif /* CONFIG_RT_GROUP_SCHED */ | 7924 | #endif /* CONFIG_RT_GROUP_SCHED */ |
@@ -8157,20 +7938,16 @@ void __init sched_init(void) | |||
8157 | global_rt_period(), global_rt_runtime()); | 7938 | global_rt_period(), global_rt_runtime()); |
8158 | 7939 | ||
8159 | #ifdef CONFIG_RT_GROUP_SCHED | 7940 | #ifdef CONFIG_RT_GROUP_SCHED |
8160 | init_rt_bandwidth(&init_task_group.rt_bandwidth, | 7941 | init_rt_bandwidth(&root_task_group.rt_bandwidth, |
8161 | global_rt_period(), global_rt_runtime()); | 7942 | global_rt_period(), global_rt_runtime()); |
8162 | #endif /* CONFIG_RT_GROUP_SCHED */ | 7943 | #endif /* CONFIG_RT_GROUP_SCHED */ |
8163 | 7944 | ||
8164 | #ifdef CONFIG_CGROUP_SCHED | 7945 | #ifdef CONFIG_CGROUP_SCHED |
8165 | list_add(&init_task_group.list, &task_groups); | 7946 | list_add(&root_task_group.list, &task_groups); |
8166 | INIT_LIST_HEAD(&init_task_group.children); | 7947 | INIT_LIST_HEAD(&root_task_group.children); |
8167 | 7948 | autogroup_init(&init_task); | |
8168 | #endif /* CONFIG_CGROUP_SCHED */ | 7949 | #endif /* CONFIG_CGROUP_SCHED */ |
8169 | 7950 | ||
8170 | #if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP | ||
8171 | update_shares_data = __alloc_percpu(nr_cpu_ids * sizeof(unsigned long), | ||
8172 | __alignof__(unsigned long)); | ||
8173 | #endif | ||
8174 | for_each_possible_cpu(i) { | 7951 | for_each_possible_cpu(i) { |
8175 | struct rq *rq; | 7952 | struct rq *rq; |
8176 | 7953 | ||
@@ -8182,38 +7959,34 @@ void __init sched_init(void) | |||
8182 | init_cfs_rq(&rq->cfs, rq); | 7959 | init_cfs_rq(&rq->cfs, rq); |
8183 | init_rt_rq(&rq->rt, rq); | 7960 | init_rt_rq(&rq->rt, rq); |
8184 | #ifdef CONFIG_FAIR_GROUP_SCHED | 7961 | #ifdef CONFIG_FAIR_GROUP_SCHED |
8185 | init_task_group.shares = init_task_group_load; | 7962 | root_task_group.shares = root_task_group_load; |
8186 | INIT_LIST_HEAD(&rq->leaf_cfs_rq_list); | 7963 | INIT_LIST_HEAD(&rq->leaf_cfs_rq_list); |
8187 | #ifdef CONFIG_CGROUP_SCHED | ||
8188 | /* | 7964 | /* |
8189 | * How much cpu bandwidth does init_task_group get? | 7965 | * How much cpu bandwidth does root_task_group get? |
8190 | * | 7966 | * |
8191 | * In case of task-groups formed thr' the cgroup filesystem, it | 7967 | * In case of task-groups formed thr' the cgroup filesystem, it |
8192 | * gets 100% of the cpu resources in the system. This overall | 7968 | * gets 100% of the cpu resources in the system. This overall |
8193 | * system cpu resource is divided among the tasks of | 7969 | * system cpu resource is divided among the tasks of |
8194 | * init_task_group and its child task-groups in a fair manner, | 7970 | * root_task_group and its child task-groups in a fair manner, |
8195 | * based on each entity's (task or task-group's) weight | 7971 | * based on each entity's (task or task-group's) weight |
8196 | * (se->load.weight). | 7972 | * (se->load.weight). |
8197 | * | 7973 | * |
8198 | * In other words, if init_task_group has 10 tasks of weight | 7974 | * In other words, if root_task_group has 10 tasks of weight |
8199 | * 1024) and two child groups A0 and A1 (of weight 1024 each), | 7975 | * 1024) and two child groups A0 and A1 (of weight 1024 each), |
8200 | * then A0's share of the cpu resource is: | 7976 | * then A0's share of the cpu resource is: |
8201 | * | 7977 | * |
8202 | * A0's bandwidth = 1024 / (10*1024 + 1024 + 1024) = 8.33% | 7978 | * A0's bandwidth = 1024 / (10*1024 + 1024 + 1024) = 8.33% |
8203 | * | 7979 | * |
8204 | * We achieve this by letting init_task_group's tasks sit | 7980 | * We achieve this by letting root_task_group's tasks sit |
8205 | * directly in rq->cfs (i.e init_task_group->se[] = NULL). | 7981 | * directly in rq->cfs (i.e root_task_group->se[] = NULL). |
8206 | */ | 7982 | */ |
8207 | init_tg_cfs_entry(&init_task_group, &rq->cfs, NULL, i, 1, NULL); | 7983 | init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL); |
8208 | #endif | ||
8209 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | 7984 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
8210 | 7985 | ||
8211 | rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime; | 7986 | rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime; |
8212 | #ifdef CONFIG_RT_GROUP_SCHED | 7987 | #ifdef CONFIG_RT_GROUP_SCHED |
8213 | INIT_LIST_HEAD(&rq->leaf_rt_rq_list); | 7988 | INIT_LIST_HEAD(&rq->leaf_rt_rq_list); |
8214 | #ifdef CONFIG_CGROUP_SCHED | 7989 | init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL); |
8215 | init_tg_rt_entry(&init_task_group, &rq->rt, NULL, i, 1, NULL); | ||
8216 | #endif | ||
8217 | #endif | 7990 | #endif |
8218 | 7991 | ||
8219 | for (j = 0; j < CPU_LOAD_IDX_MAX; j++) | 7992 | for (j = 0; j < CPU_LOAD_IDX_MAX; j++) |
@@ -8293,8 +8066,6 @@ void __init sched_init(void) | |||
8293 | zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); | 8066 | zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); |
8294 | #endif /* SMP */ | 8067 | #endif /* SMP */ |
8295 | 8068 | ||
8296 | perf_event_init(); | ||
8297 | |||
8298 | scheduler_running = 1; | 8069 | scheduler_running = 1; |
8299 | } | 8070 | } |
8300 | 8071 | ||
@@ -8488,7 +8259,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) | |||
8488 | if (!se) | 8259 | if (!se) |
8489 | goto err_free_rq; | 8260 | goto err_free_rq; |
8490 | 8261 | ||
8491 | init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]); | 8262 | init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]); |
8492 | } | 8263 | } |
8493 | 8264 | ||
8494 | return 1; | 8265 | return 1; |
@@ -8499,15 +8270,21 @@ err: | |||
8499 | return 0; | 8270 | return 0; |
8500 | } | 8271 | } |
8501 | 8272 | ||
8502 | static inline void register_fair_sched_group(struct task_group *tg, int cpu) | ||
8503 | { | ||
8504 | list_add_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list, | ||
8505 | &cpu_rq(cpu)->leaf_cfs_rq_list); | ||
8506 | } | ||
8507 | |||
8508 | static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) | 8273 | static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) |
8509 | { | 8274 | { |
8510 | list_del_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list); | 8275 | struct rq *rq = cpu_rq(cpu); |
8276 | unsigned long flags; | ||
8277 | |||
8278 | /* | ||
8279 | * Only empty task groups can be destroyed; so we can speculatively | ||
8280 | * check on_list without danger of it being re-added. | ||
8281 | */ | ||
8282 | if (!tg->cfs_rq[cpu]->on_list) | ||
8283 | return; | ||
8284 | |||
8285 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
8286 | list_del_leaf_cfs_rq(tg->cfs_rq[cpu]); | ||
8287 | raw_spin_unlock_irqrestore(&rq->lock, flags); | ||
8511 | } | 8288 | } |
8512 | #else /* !CONFG_FAIR_GROUP_SCHED */ | 8289 | #else /* !CONFG_FAIR_GROUP_SCHED */ |
8513 | static inline void free_fair_sched_group(struct task_group *tg) | 8290 | static inline void free_fair_sched_group(struct task_group *tg) |
@@ -8520,10 +8297,6 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) | |||
8520 | return 1; | 8297 | return 1; |
8521 | } | 8298 | } |
8522 | 8299 | ||
8523 | static inline void register_fair_sched_group(struct task_group *tg, int cpu) | ||
8524 | { | ||
8525 | } | ||
8526 | |||
8527 | static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) | 8300 | static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) |
8528 | { | 8301 | { |
8529 | } | 8302 | } |
@@ -8578,7 +8351,7 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) | |||
8578 | if (!rt_se) | 8351 | if (!rt_se) |
8579 | goto err_free_rq; | 8352 | goto err_free_rq; |
8580 | 8353 | ||
8581 | init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]); | 8354 | init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]); |
8582 | } | 8355 | } |
8583 | 8356 | ||
8584 | return 1; | 8357 | return 1; |
@@ -8588,17 +8361,6 @@ err_free_rq: | |||
8588 | err: | 8361 | err: |
8589 | return 0; | 8362 | return 0; |
8590 | } | 8363 | } |
8591 | |||
8592 | static inline void register_rt_sched_group(struct task_group *tg, int cpu) | ||
8593 | { | ||
8594 | list_add_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list, | ||
8595 | &cpu_rq(cpu)->leaf_rt_rq_list); | ||
8596 | } | ||
8597 | |||
8598 | static inline void unregister_rt_sched_group(struct task_group *tg, int cpu) | ||
8599 | { | ||
8600 | list_del_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list); | ||
8601 | } | ||
8602 | #else /* !CONFIG_RT_GROUP_SCHED */ | 8364 | #else /* !CONFIG_RT_GROUP_SCHED */ |
8603 | static inline void free_rt_sched_group(struct task_group *tg) | 8365 | static inline void free_rt_sched_group(struct task_group *tg) |
8604 | { | 8366 | { |
@@ -8609,14 +8371,6 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) | |||
8609 | { | 8371 | { |
8610 | return 1; | 8372 | return 1; |
8611 | } | 8373 | } |
8612 | |||
8613 | static inline void register_rt_sched_group(struct task_group *tg, int cpu) | ||
8614 | { | ||
8615 | } | ||
8616 | |||
8617 | static inline void unregister_rt_sched_group(struct task_group *tg, int cpu) | ||
8618 | { | ||
8619 | } | ||
8620 | #endif /* CONFIG_RT_GROUP_SCHED */ | 8374 | #endif /* CONFIG_RT_GROUP_SCHED */ |
8621 | 8375 | ||
8622 | #ifdef CONFIG_CGROUP_SCHED | 8376 | #ifdef CONFIG_CGROUP_SCHED |
@@ -8624,6 +8378,7 @@ static void free_sched_group(struct task_group *tg) | |||
8624 | { | 8378 | { |
8625 | free_fair_sched_group(tg); | 8379 | free_fair_sched_group(tg); |
8626 | free_rt_sched_group(tg); | 8380 | free_rt_sched_group(tg); |
8381 | autogroup_free(tg); | ||
8627 | kfree(tg); | 8382 | kfree(tg); |
8628 | } | 8383 | } |
8629 | 8384 | ||
@@ -8632,7 +8387,6 @@ struct task_group *sched_create_group(struct task_group *parent) | |||
8632 | { | 8387 | { |
8633 | struct task_group *tg; | 8388 | struct task_group *tg; |
8634 | unsigned long flags; | 8389 | unsigned long flags; |
8635 | int i; | ||
8636 | 8390 | ||
8637 | tg = kzalloc(sizeof(*tg), GFP_KERNEL); | 8391 | tg = kzalloc(sizeof(*tg), GFP_KERNEL); |
8638 | if (!tg) | 8392 | if (!tg) |
@@ -8645,10 +8399,6 @@ struct task_group *sched_create_group(struct task_group *parent) | |||
8645 | goto err; | 8399 | goto err; |
8646 | 8400 | ||
8647 | spin_lock_irqsave(&task_group_lock, flags); | 8401 | spin_lock_irqsave(&task_group_lock, flags); |
8648 | for_each_possible_cpu(i) { | ||
8649 | register_fair_sched_group(tg, i); | ||
8650 | register_rt_sched_group(tg, i); | ||
8651 | } | ||
8652 | list_add_rcu(&tg->list, &task_groups); | 8402 | list_add_rcu(&tg->list, &task_groups); |
8653 | 8403 | ||
8654 | WARN_ON(!parent); /* root should already exist */ | 8404 | WARN_ON(!parent); /* root should already exist */ |
@@ -8678,11 +8428,11 @@ void sched_destroy_group(struct task_group *tg) | |||
8678 | unsigned long flags; | 8428 | unsigned long flags; |
8679 | int i; | 8429 | int i; |
8680 | 8430 | ||
8681 | spin_lock_irqsave(&task_group_lock, flags); | 8431 | /* end participation in shares distribution */ |
8682 | for_each_possible_cpu(i) { | 8432 | for_each_possible_cpu(i) |
8683 | unregister_fair_sched_group(tg, i); | 8433 | unregister_fair_sched_group(tg, i); |
8684 | unregister_rt_sched_group(tg, i); | 8434 | |
8685 | } | 8435 | spin_lock_irqsave(&task_group_lock, flags); |
8686 | list_del_rcu(&tg->list); | 8436 | list_del_rcu(&tg->list); |
8687 | list_del_rcu(&tg->siblings); | 8437 | list_del_rcu(&tg->siblings); |
8688 | spin_unlock_irqrestore(&task_group_lock, flags); | 8438 | spin_unlock_irqrestore(&task_group_lock, flags); |
@@ -8729,33 +8479,6 @@ void sched_move_task(struct task_struct *tsk) | |||
8729 | #endif /* CONFIG_CGROUP_SCHED */ | 8479 | #endif /* CONFIG_CGROUP_SCHED */ |
8730 | 8480 | ||
8731 | #ifdef CONFIG_FAIR_GROUP_SCHED | 8481 | #ifdef CONFIG_FAIR_GROUP_SCHED |
8732 | static void __set_se_shares(struct sched_entity *se, unsigned long shares) | ||
8733 | { | ||
8734 | struct cfs_rq *cfs_rq = se->cfs_rq; | ||
8735 | int on_rq; | ||
8736 | |||
8737 | on_rq = se->on_rq; | ||
8738 | if (on_rq) | ||
8739 | dequeue_entity(cfs_rq, se, 0); | ||
8740 | |||
8741 | se->load.weight = shares; | ||
8742 | se->load.inv_weight = 0; | ||
8743 | |||
8744 | if (on_rq) | ||
8745 | enqueue_entity(cfs_rq, se, 0); | ||
8746 | } | ||
8747 | |||
8748 | static void set_se_shares(struct sched_entity *se, unsigned long shares) | ||
8749 | { | ||
8750 | struct cfs_rq *cfs_rq = se->cfs_rq; | ||
8751 | struct rq *rq = cfs_rq->rq; | ||
8752 | unsigned long flags; | ||
8753 | |||
8754 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
8755 | __set_se_shares(se, shares); | ||
8756 | raw_spin_unlock_irqrestore(&rq->lock, flags); | ||
8757 | } | ||
8758 | |||
8759 | static DEFINE_MUTEX(shares_mutex); | 8482 | static DEFINE_MUTEX(shares_mutex); |
8760 | 8483 | ||
8761 | int sched_group_set_shares(struct task_group *tg, unsigned long shares) | 8484 | int sched_group_set_shares(struct task_group *tg, unsigned long shares) |
@@ -8778,37 +8501,19 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) | |||
8778 | if (tg->shares == shares) | 8501 | if (tg->shares == shares) |
8779 | goto done; | 8502 | goto done; |
8780 | 8503 | ||
8781 | spin_lock_irqsave(&task_group_lock, flags); | ||
8782 | for_each_possible_cpu(i) | ||
8783 | unregister_fair_sched_group(tg, i); | ||
8784 | list_del_rcu(&tg->siblings); | ||
8785 | spin_unlock_irqrestore(&task_group_lock, flags); | ||
8786 | |||
8787 | /* wait for any ongoing reference to this group to finish */ | ||
8788 | synchronize_sched(); | ||
8789 | |||
8790 | /* | ||
8791 | * Now we are free to modify the group's share on each cpu | ||
8792 | * w/o tripping rebalance_share or load_balance_fair. | ||
8793 | */ | ||
8794 | tg->shares = shares; | 8504 | tg->shares = shares; |
8795 | for_each_possible_cpu(i) { | 8505 | for_each_possible_cpu(i) { |
8796 | /* | 8506 | struct rq *rq = cpu_rq(i); |
8797 | * force a rebalance | 8507 | struct sched_entity *se; |
8798 | */ | 8508 | |
8799 | cfs_rq_set_shares(tg->cfs_rq[i], 0); | 8509 | se = tg->se[i]; |
8800 | set_se_shares(tg->se[i], shares); | 8510 | /* Propagate contribution to hierarchy */ |
8511 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
8512 | for_each_sched_entity(se) | ||
8513 | update_cfs_shares(group_cfs_rq(se), 0); | ||
8514 | raw_spin_unlock_irqrestore(&rq->lock, flags); | ||
8801 | } | 8515 | } |
8802 | 8516 | ||
8803 | /* | ||
8804 | * Enable load balance activity on this group, by inserting it back on | ||
8805 | * each cpu's rq->leaf_cfs_rq_list. | ||
8806 | */ | ||
8807 | spin_lock_irqsave(&task_group_lock, flags); | ||
8808 | for_each_possible_cpu(i) | ||
8809 | register_fair_sched_group(tg, i); | ||
8810 | list_add_rcu(&tg->siblings, &tg->parent->children); | ||
8811 | spin_unlock_irqrestore(&task_group_lock, flags); | ||
8812 | done: | 8517 | done: |
8813 | mutex_unlock(&shares_mutex); | 8518 | mutex_unlock(&shares_mutex); |
8814 | return 0; | 8519 | return 0; |
@@ -9107,7 +8812,7 @@ cpu_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cgrp) | |||
9107 | 8812 | ||
9108 | if (!cgrp->parent) { | 8813 | if (!cgrp->parent) { |
9109 | /* This is early initialization for the top cgroup */ | 8814 | /* This is early initialization for the top cgroup */ |
9110 | return &init_task_group.css; | 8815 | return &root_task_group.css; |
9111 | } | 8816 | } |
9112 | 8817 | ||
9113 | parent = cgroup_tg(cgrp->parent); | 8818 | parent = cgroup_tg(cgrp->parent); |
@@ -9178,6 +8883,20 @@ cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | |||
9178 | } | 8883 | } |
9179 | } | 8884 | } |
9180 | 8885 | ||
8886 | static void | ||
8887 | cpu_cgroup_exit(struct cgroup_subsys *ss, struct task_struct *task) | ||
8888 | { | ||
8889 | /* | ||
8890 | * cgroup_exit() is called in the copy_process() failure path. | ||
8891 | * Ignore this case since the task hasn't ran yet, this avoids | ||
8892 | * trying to poke a half freed task state from generic code. | ||
8893 | */ | ||
8894 | if (!(task->flags & PF_EXITING)) | ||
8895 | return; | ||
8896 | |||
8897 | sched_move_task(task); | ||
8898 | } | ||
8899 | |||
9181 | #ifdef CONFIG_FAIR_GROUP_SCHED | 8900 | #ifdef CONFIG_FAIR_GROUP_SCHED |
9182 | static int cpu_shares_write_u64(struct cgroup *cgrp, struct cftype *cftype, | 8901 | static int cpu_shares_write_u64(struct cgroup *cgrp, struct cftype *cftype, |
9183 | u64 shareval) | 8902 | u64 shareval) |
@@ -9250,6 +8969,7 @@ struct cgroup_subsys cpu_cgroup_subsys = { | |||
9250 | .destroy = cpu_cgroup_destroy, | 8969 | .destroy = cpu_cgroup_destroy, |
9251 | .can_attach = cpu_cgroup_can_attach, | 8970 | .can_attach = cpu_cgroup_can_attach, |
9252 | .attach = cpu_cgroup_attach, | 8971 | .attach = cpu_cgroup_attach, |
8972 | .exit = cpu_cgroup_exit, | ||
9253 | .populate = cpu_cgroup_populate, | 8973 | .populate = cpu_cgroup_populate, |
9254 | .subsys_id = cpu_cgroup_subsys_id, | 8974 | .subsys_id = cpu_cgroup_subsys_id, |
9255 | .early_init = 1, | 8975 | .early_init = 1, |
@@ -9534,72 +9254,3 @@ struct cgroup_subsys cpuacct_subsys = { | |||
9534 | }; | 9254 | }; |
9535 | #endif /* CONFIG_CGROUP_CPUACCT */ | 9255 | #endif /* CONFIG_CGROUP_CPUACCT */ |
9536 | 9256 | ||
9537 | #ifndef CONFIG_SMP | ||
9538 | |||
9539 | void synchronize_sched_expedited(void) | ||
9540 | { | ||
9541 | barrier(); | ||
9542 | } | ||
9543 | EXPORT_SYMBOL_GPL(synchronize_sched_expedited); | ||
9544 | |||
9545 | #else /* #ifndef CONFIG_SMP */ | ||
9546 | |||
9547 | static atomic_t synchronize_sched_expedited_count = ATOMIC_INIT(0); | ||
9548 | |||
9549 | static int synchronize_sched_expedited_cpu_stop(void *data) | ||
9550 | { | ||
9551 | /* | ||
9552 | * There must be a full memory barrier on each affected CPU | ||
9553 | * between the time that try_stop_cpus() is called and the | ||
9554 | * time that it returns. | ||
9555 | * | ||
9556 | * In the current initial implementation of cpu_stop, the | ||
9557 | * above condition is already met when the control reaches | ||
9558 | * this point and the following smp_mb() is not strictly | ||
9559 | * necessary. Do smp_mb() anyway for documentation and | ||
9560 | * robustness against future implementation changes. | ||
9561 | */ | ||
9562 | smp_mb(); /* See above comment block. */ | ||
9563 | return 0; | ||
9564 | } | ||
9565 | |||
9566 | /* | ||
9567 | * Wait for an rcu-sched grace period to elapse, but use "big hammer" | ||
9568 | * approach to force grace period to end quickly. This consumes | ||
9569 | * significant time on all CPUs, and is thus not recommended for | ||
9570 | * any sort of common-case code. | ||
9571 | * | ||
9572 | * Note that it is illegal to call this function while holding any | ||
9573 | * lock that is acquired by a CPU-hotplug notifier. Failing to | ||
9574 | * observe this restriction will result in deadlock. | ||
9575 | */ | ||
9576 | void synchronize_sched_expedited(void) | ||
9577 | { | ||
9578 | int snap, trycount = 0; | ||
9579 | |||
9580 | smp_mb(); /* ensure prior mod happens before capturing snap. */ | ||
9581 | snap = atomic_read(&synchronize_sched_expedited_count) + 1; | ||
9582 | get_online_cpus(); | ||
9583 | while (try_stop_cpus(cpu_online_mask, | ||
9584 | synchronize_sched_expedited_cpu_stop, | ||
9585 | NULL) == -EAGAIN) { | ||
9586 | put_online_cpus(); | ||
9587 | if (trycount++ < 10) | ||
9588 | udelay(trycount * num_online_cpus()); | ||
9589 | else { | ||
9590 | synchronize_sched(); | ||
9591 | return; | ||
9592 | } | ||
9593 | if (atomic_read(&synchronize_sched_expedited_count) - snap > 0) { | ||
9594 | smp_mb(); /* ensure test happens before caller kfree */ | ||
9595 | return; | ||
9596 | } | ||
9597 | get_online_cpus(); | ||
9598 | } | ||
9599 | atomic_inc(&synchronize_sched_expedited_count); | ||
9600 | smp_mb__after_atomic_inc(); /* ensure post-GP actions seen after GP. */ | ||
9601 | put_online_cpus(); | ||
9602 | } | ||
9603 | EXPORT_SYMBOL_GPL(synchronize_sched_expedited); | ||
9604 | |||
9605 | #endif /* #else #ifndef CONFIG_SMP */ | ||