diff options
Diffstat (limited to 'kernel/sched.c')
| -rw-r--r-- | kernel/sched.c | 414 | 
1 files changed, 230 insertions, 184 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 9b1e79371c20..748ff924a290 100644 --- a/kernel/sched.c +++ b/kernel/sched.c  | |||
| @@ -118,6 +118,12 @@ | |||
| 118 | */ | 118 | */ | 
| 119 | #define RUNTIME_INF ((u64)~0ULL) | 119 | #define RUNTIME_INF ((u64)~0ULL) | 
| 120 | 120 | ||
| 121 | DEFINE_TRACE(sched_wait_task); | ||
| 122 | DEFINE_TRACE(sched_wakeup); | ||
| 123 | DEFINE_TRACE(sched_wakeup_new); | ||
| 124 | DEFINE_TRACE(sched_switch); | ||
| 125 | DEFINE_TRACE(sched_migrate_task); | ||
| 126 | |||
| 121 | #ifdef CONFIG_SMP | 127 | #ifdef CONFIG_SMP | 
| 122 | /* | 128 | /* | 
| 123 | * Divide a load by a sched group cpu_power : (load / sg->__cpu_power) | 129 | * Divide a load by a sched group cpu_power : (load / sg->__cpu_power) | 
| @@ -261,6 +267,10 @@ struct task_group { | |||
| 261 | struct cgroup_subsys_state css; | 267 | struct cgroup_subsys_state css; | 
| 262 | #endif | 268 | #endif | 
| 263 | 269 | ||
| 270 | #ifdef CONFIG_USER_SCHED | ||
| 271 | uid_t uid; | ||
| 272 | #endif | ||
| 273 | |||
| 264 | #ifdef CONFIG_FAIR_GROUP_SCHED | 274 | #ifdef CONFIG_FAIR_GROUP_SCHED | 
| 265 | /* schedulable entities of this group on each cpu */ | 275 | /* schedulable entities of this group on each cpu */ | 
| 266 | struct sched_entity **se; | 276 | struct sched_entity **se; | 
| @@ -286,6 +296,12 @@ struct task_group { | |||
| 286 | 296 | ||
| 287 | #ifdef CONFIG_USER_SCHED | 297 | #ifdef CONFIG_USER_SCHED | 
| 288 | 298 | ||
| 299 | /* Helper function to pass uid information to create_sched_user() */ | ||
| 300 | void set_tg_uid(struct user_struct *user) | ||
| 301 | { | ||
| 302 | user->tg->uid = user->uid; | ||
| 303 | } | ||
| 304 | |||
| 289 | /* | 305 | /* | 
| 290 | * Root task group. | 306 | * Root task group. | 
| 291 | * Every UID task group (including init_task_group aka UID-0) will | 307 | * Every UID task group (including init_task_group aka UID-0) will | 
| @@ -345,7 +361,9 @@ static inline struct task_group *task_group(struct task_struct *p) | |||
| 345 | struct task_group *tg; | 361 | struct task_group *tg; | 
| 346 | 362 | ||
| 347 | #ifdef CONFIG_USER_SCHED | 363 | #ifdef CONFIG_USER_SCHED | 
| 348 | tg = p->user->tg; | 364 | rcu_read_lock(); | 
| 365 | tg = __task_cred(p)->user->tg; | ||
| 366 | rcu_read_unlock(); | ||
| 349 | #elif defined(CONFIG_CGROUP_SCHED) | 367 | #elif defined(CONFIG_CGROUP_SCHED) | 
| 350 | tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id), | 368 | tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id), | 
| 351 | struct task_group, css); | 369 | struct task_group, css); | 
| @@ -586,6 +604,8 @@ struct rq { | |||
| 586 | #ifdef CONFIG_SCHEDSTATS | 604 | #ifdef CONFIG_SCHEDSTATS | 
| 587 | /* latency stats */ | 605 | /* latency stats */ | 
| 588 | struct sched_info rq_sched_info; | 606 | struct sched_info rq_sched_info; | 
| 607 | unsigned long long rq_cpu_time; | ||
| 608 | /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */ | ||
| 589 | 609 | ||
| 590 | /* sys_sched_yield() stats */ | 610 | /* sys_sched_yield() stats */ | 
| 591 | unsigned int yld_exp_empty; | 611 | unsigned int yld_exp_empty; | 
| @@ -703,45 +723,18 @@ static __read_mostly char *sched_feat_names[] = { | |||
| 703 | 723 | ||
| 704 | #undef SCHED_FEAT | 724 | #undef SCHED_FEAT | 
| 705 | 725 | ||
| 706 | static int sched_feat_open(struct inode *inode, struct file *filp) | 726 | static int sched_feat_show(struct seq_file *m, void *v) | 
| 707 | { | ||
| 708 | filp->private_data = inode->i_private; | ||
| 709 | return 0; | ||
| 710 | } | ||
| 711 | |||
| 712 | static ssize_t | ||
| 713 | sched_feat_read(struct file *filp, char __user *ubuf, | ||
| 714 | size_t cnt, loff_t *ppos) | ||
| 715 | { | 727 | { | 
| 716 | char *buf; | ||
| 717 | int r = 0; | ||
| 718 | int len = 0; | ||
| 719 | int i; | 728 | int i; | 
| 720 | 729 | ||
| 721 | for (i = 0; sched_feat_names[i]; i++) { | 730 | for (i = 0; sched_feat_names[i]; i++) { | 
| 722 | len += strlen(sched_feat_names[i]); | 731 | if (!(sysctl_sched_features & (1UL << i))) | 
| 723 | len += 4; | 732 | seq_puts(m, "NO_"); | 
| 724 | } | 733 | seq_printf(m, "%s ", sched_feat_names[i]); | 
| 725 | |||
| 726 | buf = kmalloc(len + 2, GFP_KERNEL); | ||
| 727 | if (!buf) | ||
| 728 | return -ENOMEM; | ||
| 729 | |||
| 730 | for (i = 0; sched_feat_names[i]; i++) { | ||
| 731 | if (sysctl_sched_features & (1UL << i)) | ||
| 732 | r += sprintf(buf + r, "%s ", sched_feat_names[i]); | ||
| 733 | else | ||
| 734 | r += sprintf(buf + r, "NO_%s ", sched_feat_names[i]); | ||
| 735 | } | 734 | } | 
| 735 | seq_puts(m, "\n"); | ||
| 736 | 736 | ||
| 737 | r += sprintf(buf + r, "\n"); | 737 | return 0; | 
| 738 | WARN_ON(r >= len + 2); | ||
| 739 | |||
| 740 | r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | ||
| 741 | |||
| 742 | kfree(buf); | ||
| 743 | |||
| 744 | return r; | ||
| 745 | } | 738 | } | 
| 746 | 739 | ||
| 747 | static ssize_t | 740 | static ssize_t | 
| @@ -786,10 +779,17 @@ sched_feat_write(struct file *filp, const char __user *ubuf, | |||
| 786 | return cnt; | 779 | return cnt; | 
| 787 | } | 780 | } | 
| 788 | 781 | ||
| 782 | static int sched_feat_open(struct inode *inode, struct file *filp) | ||
| 783 | { | ||
| 784 | return single_open(filp, sched_feat_show, NULL); | ||
| 785 | } | ||
| 786 | |||
| 789 | static struct file_operations sched_feat_fops = { | 787 | static struct file_operations sched_feat_fops = { | 
| 790 | .open = sched_feat_open, | 788 | .open = sched_feat_open, | 
| 791 | .read = sched_feat_read, | 789 | .write = sched_feat_write, | 
| 792 | .write = sched_feat_write, | 790 | .read = seq_read, | 
| 791 | .llseek = seq_lseek, | ||
| 792 | .release = single_release, | ||
| 793 | }; | 793 | }; | 
| 794 | 794 | ||
| 795 | static __init int sched_init_debug(void) | 795 | static __init int sched_init_debug(void) | 
| @@ -1453,9 +1453,10 @@ static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); | |||
| 1453 | static unsigned long cpu_avg_load_per_task(int cpu) | 1453 | static unsigned long cpu_avg_load_per_task(int cpu) | 
| 1454 | { | 1454 | { | 
| 1455 | struct rq *rq = cpu_rq(cpu); | 1455 | struct rq *rq = cpu_rq(cpu); | 
| 1456 | unsigned long nr_running = ACCESS_ONCE(rq->nr_running); | ||
| 1456 | 1457 | ||
| 1457 | if (rq->nr_running) | 1458 | if (nr_running) | 
| 1458 | rq->avg_load_per_task = rq->load.weight / rq->nr_running; | 1459 | rq->avg_load_per_task = rq->load.weight / nr_running; | 
| 1459 | else | 1460 | else | 
| 1460 | rq->avg_load_per_task = 0; | 1461 | rq->avg_load_per_task = 0; | 
| 1461 | 1462 | ||
| @@ -1473,27 +1474,13 @@ static void | |||
| 1473 | update_group_shares_cpu(struct task_group *tg, int cpu, | 1474 | update_group_shares_cpu(struct task_group *tg, int cpu, | 
| 1474 | unsigned long sd_shares, unsigned long sd_rq_weight) | 1475 | unsigned long sd_shares, unsigned long sd_rq_weight) | 
| 1475 | { | 1476 | { | 
| 1476 | int boost = 0; | ||
| 1477 | unsigned long shares; | 1477 | unsigned long shares; | 
| 1478 | unsigned long rq_weight; | 1478 | unsigned long rq_weight; | 
| 1479 | 1479 | ||
| 1480 | if (!tg->se[cpu]) | 1480 | if (!tg->se[cpu]) | 
| 1481 | return; | 1481 | return; | 
| 1482 | 1482 | ||
| 1483 | rq_weight = tg->cfs_rq[cpu]->load.weight; | 1483 | rq_weight = tg->cfs_rq[cpu]->rq_weight; | 
| 1484 | |||
| 1485 | /* | ||
| 1486 | * If there are currently no tasks on the cpu pretend there is one of | ||
| 1487 | * average load so that when a new task gets to run here it will not | ||
| 1488 | * get delayed by group starvation. | ||
| 1489 | */ | ||
| 1490 | if (!rq_weight) { | ||
| 1491 | boost = 1; | ||
| 1492 | rq_weight = NICE_0_LOAD; | ||
| 1493 | } | ||
| 1494 | |||
| 1495 | if (unlikely(rq_weight > sd_rq_weight)) | ||
| 1496 | rq_weight = sd_rq_weight; | ||
| 1497 | 1484 | ||
| 1498 | /* | 1485 | /* | 
| 1499 | * \Sum shares * rq_weight | 1486 | * \Sum shares * rq_weight | 
| @@ -1501,7 +1488,7 @@ update_group_shares_cpu(struct task_group *tg, int cpu, | |||
| 1501 | * \Sum rq_weight | 1488 | * \Sum rq_weight | 
| 1502 | * | 1489 | * | 
| 1503 | */ | 1490 | */ | 
| 1504 | shares = (sd_shares * rq_weight) / (sd_rq_weight + 1); | 1491 | shares = (sd_shares * rq_weight) / sd_rq_weight; | 
| 1505 | shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES); | 1492 | shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES); | 
| 1506 | 1493 | ||
| 1507 | if (abs(shares - tg->se[cpu]->load.weight) > | 1494 | if (abs(shares - tg->se[cpu]->load.weight) > | 
| @@ -1510,11 +1497,7 @@ update_group_shares_cpu(struct task_group *tg, int cpu, | |||
| 1510 | unsigned long flags; | 1497 | unsigned long flags; | 
| 1511 | 1498 | ||
| 1512 | spin_lock_irqsave(&rq->lock, flags); | 1499 | spin_lock_irqsave(&rq->lock, flags); | 
| 1513 | /* | 1500 | tg->cfs_rq[cpu]->shares = shares; | 
| 1514 | * record the actual number of shares, not the boosted amount. | ||
| 1515 | */ | ||
| 1516 | tg->cfs_rq[cpu]->shares = boost ? 0 : shares; | ||
| 1517 | tg->cfs_rq[cpu]->rq_weight = rq_weight; | ||
| 1518 | 1501 | ||
| 1519 | __set_se_shares(tg->se[cpu], shares); | 1502 | __set_se_shares(tg->se[cpu], shares); | 
| 1520 | spin_unlock_irqrestore(&rq->lock, flags); | 1503 | spin_unlock_irqrestore(&rq->lock, flags); | 
| @@ -1528,13 +1511,23 @@ update_group_shares_cpu(struct task_group *tg, int cpu, | |||
| 1528 | */ | 1511 | */ | 
| 1529 | static int tg_shares_up(struct task_group *tg, void *data) | 1512 | static int tg_shares_up(struct task_group *tg, void *data) | 
| 1530 | { | 1513 | { | 
| 1531 | unsigned long rq_weight = 0; | 1514 | unsigned long weight, rq_weight = 0; | 
| 1532 | unsigned long shares = 0; | 1515 | unsigned long shares = 0; | 
| 1533 | struct sched_domain *sd = data; | 1516 | struct sched_domain *sd = data; | 
| 1534 | int i; | 1517 | int i; | 
| 1535 | 1518 | ||
| 1536 | for_each_cpu_mask(i, sd->span) { | 1519 | for_each_cpu_mask(i, sd->span) { | 
| 1537 | rq_weight += tg->cfs_rq[i]->load.weight; | 1520 | /* | 
| 1521 | * If there are currently no tasks on the cpu pretend there | ||
| 1522 | * is one of average load so that when a new task gets to | ||
| 1523 | * run here it will not get delayed by group starvation. | ||
| 1524 | */ | ||
| 1525 | weight = tg->cfs_rq[i]->load.weight; | ||
| 1526 | if (!weight) | ||
| 1527 | weight = NICE_0_LOAD; | ||
| 1528 | |||
| 1529 | tg->cfs_rq[i]->rq_weight = weight; | ||
| 1530 | rq_weight += weight; | ||
| 1538 | shares += tg->cfs_rq[i]->shares; | 1531 | shares += tg->cfs_rq[i]->shares; | 
| 1539 | } | 1532 | } | 
| 1540 | 1533 | ||
| @@ -1544,9 +1537,6 @@ static int tg_shares_up(struct task_group *tg, void *data) | |||
| 1544 | if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE)) | 1537 | if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE)) | 
| 1545 | shares = tg->shares; | 1538 | shares = tg->shares; | 
| 1546 | 1539 | ||
| 1547 | if (!rq_weight) | ||
| 1548 | rq_weight = cpus_weight(sd->span) * NICE_0_LOAD; | ||
| 1549 | |||
| 1550 | for_each_cpu_mask(i, sd->span) | 1540 | for_each_cpu_mask(i, sd->span) | 
| 1551 | update_group_shares_cpu(tg, i, shares, rq_weight); | 1541 | update_group_shares_cpu(tg, i, shares, rq_weight); | 
| 1552 | 1542 | ||
| @@ -1611,6 +1601,39 @@ static inline void update_shares_locked(struct rq *rq, struct sched_domain *sd) | |||
| 1611 | 1601 | ||
| 1612 | #endif | 1602 | #endif | 
| 1613 | 1603 | ||
| 1604 | /* | ||
| 1605 | * double_lock_balance - lock the busiest runqueue, this_rq is locked already. | ||
| 1606 | */ | ||
| 1607 | static int double_lock_balance(struct rq *this_rq, struct rq *busiest) | ||
| 1608 | __releases(this_rq->lock) | ||
| 1609 | __acquires(busiest->lock) | ||
| 1610 | __acquires(this_rq->lock) | ||
| 1611 | { | ||
| 1612 | int ret = 0; | ||
| 1613 | |||
| 1614 | if (unlikely(!irqs_disabled())) { | ||
| 1615 | /* printk() doesn't work good under rq->lock */ | ||
| 1616 | spin_unlock(&this_rq->lock); | ||
| 1617 | BUG_ON(1); | ||
| 1618 | } | ||
| 1619 | if (unlikely(!spin_trylock(&busiest->lock))) { | ||
| 1620 | if (busiest < this_rq) { | ||
| 1621 | spin_unlock(&this_rq->lock); | ||
| 1622 | spin_lock(&busiest->lock); | ||
| 1623 | spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING); | ||
| 1624 | ret = 1; | ||
| 1625 | } else | ||
| 1626 | spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING); | ||
| 1627 | } | ||
| 1628 | return ret; | ||
| 1629 | } | ||
| 1630 | |||
| 1631 | static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest) | ||
| 1632 | __releases(busiest->lock) | ||
| 1633 | { | ||
| 1634 | spin_unlock(&busiest->lock); | ||
| 1635 | lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_); | ||
| 1636 | } | ||
| 1614 | #endif | 1637 | #endif | 
| 1615 | 1638 | ||
| 1616 | #ifdef CONFIG_FAIR_GROUP_SCHED | 1639 | #ifdef CONFIG_FAIR_GROUP_SCHED | 
| @@ -1844,6 +1867,8 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | |||
| 1844 | 1867 | ||
| 1845 | clock_offset = old_rq->clock - new_rq->clock; | 1868 | clock_offset = old_rq->clock - new_rq->clock; | 
| 1846 | 1869 | ||
| 1870 | trace_sched_migrate_task(p, task_cpu(p), new_cpu); | ||
| 1871 | |||
| 1847 | #ifdef CONFIG_SCHEDSTATS | 1872 | #ifdef CONFIG_SCHEDSTATS | 
| 1848 | if (p->se.wait_start) | 1873 | if (p->se.wait_start) | 
| 1849 | p->se.wait_start -= clock_offset; | 1874 | p->se.wait_start -= clock_offset; | 
| @@ -2253,6 +2278,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) | |||
| 2253 | 2278 | ||
| 2254 | smp_wmb(); | 2279 | smp_wmb(); | 
| 2255 | rq = task_rq_lock(p, &flags); | 2280 | rq = task_rq_lock(p, &flags); | 
| 2281 | update_rq_clock(rq); | ||
| 2256 | old_state = p->state; | 2282 | old_state = p->state; | 
| 2257 | if (!(old_state & state)) | 2283 | if (!(old_state & state)) | 
| 2258 | goto out; | 2284 | goto out; | 
| @@ -2310,12 +2336,11 @@ out_activate: | |||
| 2310 | schedstat_inc(p, se.nr_wakeups_local); | 2336 | schedstat_inc(p, se.nr_wakeups_local); | 
| 2311 | else | 2337 | else | 
| 2312 | schedstat_inc(p, se.nr_wakeups_remote); | 2338 | schedstat_inc(p, se.nr_wakeups_remote); | 
| 2313 | update_rq_clock(rq); | ||
| 2314 | activate_task(rq, p, 1); | 2339 | activate_task(rq, p, 1); | 
| 2315 | success = 1; | 2340 | success = 1; | 
| 2316 | 2341 | ||
| 2317 | out_running: | 2342 | out_running: | 
| 2318 | trace_sched_wakeup(rq, p); | 2343 | trace_sched_wakeup(rq, p, success); | 
| 2319 | check_preempt_curr(rq, p, sync); | 2344 | check_preempt_curr(rq, p, sync); | 
| 2320 | 2345 | ||
| 2321 | p->state = TASK_RUNNING; | 2346 | p->state = TASK_RUNNING; | 
| @@ -2448,7 +2473,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
| 2448 | p->sched_class->task_new(rq, p); | 2473 | p->sched_class->task_new(rq, p); | 
| 2449 | inc_nr_running(rq); | 2474 | inc_nr_running(rq); | 
| 2450 | } | 2475 | } | 
| 2451 | trace_sched_wakeup_new(rq, p); | 2476 | trace_sched_wakeup_new(rq, p, 1); | 
| 2452 | check_preempt_curr(rq, p, 0); | 2477 | check_preempt_curr(rq, p, 0); | 
| 2453 | #ifdef CONFIG_SMP | 2478 | #ifdef CONFIG_SMP | 
| 2454 | if (p->sched_class->task_wake_up) | 2479 | if (p->sched_class->task_wake_up) | 
| @@ -2811,40 +2836,6 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2) | |||
| 2811 | } | 2836 | } | 
| 2812 | 2837 | ||
| 2813 | /* | 2838 | /* | 
| 2814 | * double_lock_balance - lock the busiest runqueue, this_rq is locked already. | ||
| 2815 | */ | ||
| 2816 | static int double_lock_balance(struct rq *this_rq, struct rq *busiest) | ||
| 2817 | __releases(this_rq->lock) | ||
| 2818 | __acquires(busiest->lock) | ||
| 2819 | __acquires(this_rq->lock) | ||
| 2820 | { | ||
| 2821 | int ret = 0; | ||
| 2822 | |||
| 2823 | if (unlikely(!irqs_disabled())) { | ||
| 2824 | /* printk() doesn't work good under rq->lock */ | ||
| 2825 | spin_unlock(&this_rq->lock); | ||
| 2826 | BUG_ON(1); | ||
| 2827 | } | ||
| 2828 | if (unlikely(!spin_trylock(&busiest->lock))) { | ||
| 2829 | if (busiest < this_rq) { | ||
| 2830 | spin_unlock(&this_rq->lock); | ||
| 2831 | spin_lock(&busiest->lock); | ||
| 2832 | spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING); | ||
| 2833 | ret = 1; | ||
| 2834 | } else | ||
| 2835 | spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING); | ||
| 2836 | } | ||
| 2837 | return ret; | ||
| 2838 | } | ||
| 2839 | |||
| 2840 | static void double_unlock_balance(struct rq *this_rq, struct rq *busiest) | ||
| 2841 | __releases(busiest->lock) | ||
| 2842 | { | ||
| 2843 | spin_unlock(&busiest->lock); | ||
| 2844 | lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_); | ||
| 2845 | } | ||
| 2846 | |||
| 2847 | /* | ||
| 2848 | * If dest_cpu is allowed for this process, migrate the task to it. | 2839 | * If dest_cpu is allowed for this process, migrate the task to it. | 
| 2849 | * This is accomplished by forcing the cpu_allowed mask to only | 2840 | * This is accomplished by forcing the cpu_allowed mask to only | 
| 2850 | * allow dest_cpu, which will force the cpu onto dest_cpu. Then | 2841 | * allow dest_cpu, which will force the cpu onto dest_cpu. Then | 
| @@ -2861,7 +2852,6 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu) | |||
| 2861 | || unlikely(!cpu_active(dest_cpu))) | 2852 | || unlikely(!cpu_active(dest_cpu))) | 
| 2862 | goto out; | 2853 | goto out; | 
| 2863 | 2854 | ||
| 2864 | trace_sched_migrate_task(rq, p, dest_cpu); | ||
| 2865 | /* force the process onto the specified CPU */ | 2855 | /* force the process onto the specified CPU */ | 
| 2866 | if (migrate_task(p, dest_cpu, &req)) { | 2856 | if (migrate_task(p, dest_cpu, &req)) { | 
| 2867 | /* Need to wait for migration thread (might exit: take ref). */ | 2857 | /* Need to wait for migration thread (might exit: take ref). */ | 
| @@ -3706,7 +3696,7 @@ out_balanced: | |||
| 3706 | static void idle_balance(int this_cpu, struct rq *this_rq) | 3696 | static void idle_balance(int this_cpu, struct rq *this_rq) | 
| 3707 | { | 3697 | { | 
| 3708 | struct sched_domain *sd; | 3698 | struct sched_domain *sd; | 
| 3709 | int pulled_task = -1; | 3699 | int pulled_task = 0; | 
| 3710 | unsigned long next_balance = jiffies + HZ; | 3700 | unsigned long next_balance = jiffies + HZ; | 
| 3711 | cpumask_t tmpmask; | 3701 | cpumask_t tmpmask; | 
| 3712 | 3702 | ||
| @@ -5133,6 +5123,22 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) | |||
| 5133 | set_load_weight(p); | 5123 | set_load_weight(p); | 
| 5134 | } | 5124 | } | 
| 5135 | 5125 | ||
| 5126 | /* | ||
| 5127 | * check the target process has a UID that matches the current process's | ||
| 5128 | */ | ||
| 5129 | static bool check_same_owner(struct task_struct *p) | ||
| 5130 | { | ||
| 5131 | const struct cred *cred = current_cred(), *pcred; | ||
| 5132 | bool match; | ||
| 5133 | |||
| 5134 | rcu_read_lock(); | ||
| 5135 | pcred = __task_cred(p); | ||
| 5136 | match = (cred->euid == pcred->euid || | ||
| 5137 | cred->euid == pcred->uid); | ||
| 5138 | rcu_read_unlock(); | ||
| 5139 | return match; | ||
| 5140 | } | ||
| 5141 | |||
| 5136 | static int __sched_setscheduler(struct task_struct *p, int policy, | 5142 | static int __sched_setscheduler(struct task_struct *p, int policy, | 
| 5137 | struct sched_param *param, bool user) | 5143 | struct sched_param *param, bool user) | 
| 5138 | { | 5144 | { | 
| @@ -5192,8 +5198,7 @@ recheck: | |||
| 5192 | return -EPERM; | 5198 | return -EPERM; | 
| 5193 | 5199 | ||
| 5194 | /* can't change other user's priorities */ | 5200 | /* can't change other user's priorities */ | 
| 5195 | if ((current->euid != p->euid) && | 5201 | if (!check_same_owner(p)) | 
| 5196 | (current->euid != p->uid)) | ||
| 5197 | return -EPERM; | 5202 | return -EPERM; | 
| 5198 | } | 5203 | } | 
| 5199 | 5204 | ||
| @@ -5425,8 +5430,7 @@ long sched_setaffinity(pid_t pid, const cpumask_t *in_mask) | |||
| 5425 | read_unlock(&tasklist_lock); | 5430 | read_unlock(&tasklist_lock); | 
| 5426 | 5431 | ||
| 5427 | retval = -EPERM; | 5432 | retval = -EPERM; | 
| 5428 | if ((current->euid != p->euid) && (current->euid != p->uid) && | 5433 | if (!check_same_owner(p) && !capable(CAP_SYS_NICE)) | 
| 5429 | !capable(CAP_SYS_NICE)) | ||
| 5430 | goto out_unlock; | 5434 | goto out_unlock; | 
| 5431 | 5435 | ||
| 5432 | retval = security_task_setscheduler(p, 0, NULL); | 5436 | retval = security_task_setscheduler(p, 0, NULL); | 
| @@ -5895,6 +5899,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) | |||
| 5895 | * The idle tasks have their own, simple scheduling class: | 5899 | * The idle tasks have their own, simple scheduling class: | 
| 5896 | */ | 5900 | */ | 
| 5897 | idle->sched_class = &idle_sched_class; | 5901 | idle->sched_class = &idle_sched_class; | 
| 5902 | ftrace_graph_init_task(idle); | ||
| 5898 | } | 5903 | } | 
| 5899 | 5904 | ||
| 5900 | /* | 5905 | /* | 
| @@ -6125,7 +6130,6 @@ static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu) | |||
| 6125 | 6130 | ||
| 6126 | /* | 6131 | /* | 
| 6127 | * Figure out where task on dead CPU should go, use force if necessary. | 6132 | * Figure out where task on dead CPU should go, use force if necessary. | 
| 6128 | * NOTE: interrupts should be disabled by the caller | ||
| 6129 | */ | 6133 | */ | 
| 6130 | static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) | 6134 | static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) | 
| 6131 | { | 6135 | { | 
| @@ -6586,7 +6590,9 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
| 6586 | req = list_entry(rq->migration_queue.next, | 6590 | req = list_entry(rq->migration_queue.next, | 
| 6587 | struct migration_req, list); | 6591 | struct migration_req, list); | 
| 6588 | list_del_init(&req->list); | 6592 | list_del_init(&req->list); | 
| 6593 | spin_unlock_irq(&rq->lock); | ||
| 6589 | complete(&req->done); | 6594 | complete(&req->done); | 
| 6595 | spin_lock_irq(&rq->lock); | ||
| 6590 | } | 6596 | } | 
| 6591 | spin_unlock_irq(&rq->lock); | 6597 | spin_unlock_irq(&rq->lock); | 
| 6592 | break; | 6598 | break; | 
| @@ -6635,28 +6641,6 @@ early_initcall(migration_init); | |||
| 6635 | 6641 | ||
| 6636 | #ifdef CONFIG_SCHED_DEBUG | 6642 | #ifdef CONFIG_SCHED_DEBUG | 
| 6637 | 6643 | ||
| 6638 | static inline const char *sd_level_to_string(enum sched_domain_level lvl) | ||
| 6639 | { | ||
| 6640 | switch (lvl) { | ||
| 6641 | case SD_LV_NONE: | ||
| 6642 | return "NONE"; | ||
| 6643 | case SD_LV_SIBLING: | ||
| 6644 | return "SIBLING"; | ||
| 6645 | case SD_LV_MC: | ||
| 6646 | return "MC"; | ||
| 6647 | case SD_LV_CPU: | ||
| 6648 | return "CPU"; | ||
| 6649 | case SD_LV_NODE: | ||
| 6650 | return "NODE"; | ||
| 6651 | case SD_LV_ALLNODES: | ||
| 6652 | return "ALLNODES"; | ||
| 6653 | case SD_LV_MAX: | ||
| 6654 | return "MAX"; | ||
| 6655 | |||
| 6656 | } | ||
| 6657 | return "MAX"; | ||
| 6658 | } | ||
| 6659 | |||
| 6660 | static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | 6644 | static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | 
| 6661 | cpumask_t *groupmask) | 6645 | cpumask_t *groupmask) | 
| 6662 | { | 6646 | { | 
| @@ -6676,8 +6660,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | |||
| 6676 | return -1; | 6660 | return -1; | 
| 6677 | } | 6661 | } | 
| 6678 | 6662 | ||
| 6679 | printk(KERN_CONT "span %s level %s\n", | 6663 | printk(KERN_CONT "span %s level %s\n", str, sd->name); | 
| 6680 | str, sd_level_to_string(sd->level)); | ||
| 6681 | 6664 | ||
| 6682 | if (!cpu_isset(cpu, sd->span)) { | 6665 | if (!cpu_isset(cpu, sd->span)) { | 
| 6683 | printk(KERN_ERR "ERROR: domain->span does not contain " | 6666 | printk(KERN_ERR "ERROR: domain->span does not contain " | 
| @@ -6813,6 +6796,8 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) | |||
| 6813 | SD_BALANCE_EXEC | | 6796 | SD_BALANCE_EXEC | | 
| 6814 | SD_SHARE_CPUPOWER | | 6797 | SD_SHARE_CPUPOWER | | 
| 6815 | SD_SHARE_PKG_RESOURCES); | 6798 | SD_SHARE_PKG_RESOURCES); | 
| 6799 | if (nr_node_ids == 1) | ||
| 6800 | pflags &= ~SD_SERIALIZE; | ||
| 6816 | } | 6801 | } | 
| 6817 | if (~cflags & pflags) | 6802 | if (~cflags & pflags) | 
| 6818 | return 0; | 6803 | return 0; | 
| @@ -7333,13 +7318,21 @@ struct allmasks { | |||
| 7333 | }; | 7318 | }; | 
| 7334 | 7319 | ||
| 7335 | #if NR_CPUS > 128 | 7320 | #if NR_CPUS > 128 | 
| 7336 | #define SCHED_CPUMASK_ALLOC 1 | 7321 | #define SCHED_CPUMASK_DECLARE(v) struct allmasks *v | 
| 7337 | #define SCHED_CPUMASK_FREE(v) kfree(v) | 7322 | static inline void sched_cpumask_alloc(struct allmasks **masks) | 
| 7338 | #define SCHED_CPUMASK_DECLARE(v) struct allmasks *v | 7323 | { | 
| 7324 | *masks = kmalloc(sizeof(**masks), GFP_KERNEL); | ||
| 7325 | } | ||
| 7326 | static inline void sched_cpumask_free(struct allmasks *masks) | ||
| 7327 | { | ||
| 7328 | kfree(masks); | ||
| 7329 | } | ||
| 7339 | #else | 7330 | #else | 
| 7340 | #define SCHED_CPUMASK_ALLOC 0 | 7331 | #define SCHED_CPUMASK_DECLARE(v) struct allmasks _v, *v = &_v | 
| 7341 | #define SCHED_CPUMASK_FREE(v) | 7332 | static inline void sched_cpumask_alloc(struct allmasks **masks) | 
| 7342 | #define SCHED_CPUMASK_DECLARE(v) struct allmasks _v, *v = &_v | 7333 | { } | 
| 7334 | static inline void sched_cpumask_free(struct allmasks *masks) | ||
| 7335 | { } | ||
| 7343 | #endif | 7336 | #endif | 
| 7344 | 7337 | ||
| 7345 | #define SCHED_CPUMASK_VAR(v, a) cpumask_t *v = (cpumask_t *) \ | 7338 | #define SCHED_CPUMASK_VAR(v, a) cpumask_t *v = (cpumask_t *) \ | 
| @@ -7415,9 +7408,8 @@ static int __build_sched_domains(const cpumask_t *cpu_map, | |||
| 7415 | return -ENOMEM; | 7408 | return -ENOMEM; | 
| 7416 | } | 7409 | } | 
| 7417 | 7410 | ||
| 7418 | #if SCHED_CPUMASK_ALLOC | ||
| 7419 | /* get space for all scratch cpumask variables */ | 7411 | /* get space for all scratch cpumask variables */ | 
| 7420 | allmasks = kmalloc(sizeof(*allmasks), GFP_KERNEL); | 7412 | sched_cpumask_alloc(&allmasks); | 
| 7421 | if (!allmasks) { | 7413 | if (!allmasks) { | 
| 7422 | printk(KERN_WARNING "Cannot alloc cpumask array\n"); | 7414 | printk(KERN_WARNING "Cannot alloc cpumask array\n"); | 
| 7423 | kfree(rd); | 7415 | kfree(rd); | 
| @@ -7426,7 +7418,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map, | |||
| 7426 | #endif | 7418 | #endif | 
| 7427 | return -ENOMEM; | 7419 | return -ENOMEM; | 
| 7428 | } | 7420 | } | 
| 7429 | #endif | 7421 | |
| 7430 | tmpmask = (cpumask_t *)allmasks; | 7422 | tmpmask = (cpumask_t *)allmasks; | 
| 7431 | 7423 | ||
| 7432 | 7424 | ||
| @@ -7680,13 +7672,13 @@ static int __build_sched_domains(const cpumask_t *cpu_map, | |||
| 7680 | cpu_attach_domain(sd, rd, i); | 7672 | cpu_attach_domain(sd, rd, i); | 
| 7681 | } | 7673 | } | 
| 7682 | 7674 | ||
| 7683 | SCHED_CPUMASK_FREE((void *)allmasks); | 7675 | sched_cpumask_free(allmasks); | 
| 7684 | return 0; | 7676 | return 0; | 
| 7685 | 7677 | ||
| 7686 | #ifdef CONFIG_NUMA | 7678 | #ifdef CONFIG_NUMA | 
| 7687 | error: | 7679 | error: | 
| 7688 | free_sched_groups(cpu_map, tmpmask); | 7680 | free_sched_groups(cpu_map, tmpmask); | 
| 7689 | SCHED_CPUMASK_FREE((void *)allmasks); | 7681 | sched_cpumask_free(allmasks); | 
| 7690 | kfree(rd); | 7682 | kfree(rd); | 
| 7691 | return -ENOMEM; | 7683 | return -ENOMEM; | 
| 7692 | #endif | 7684 | #endif | 
| @@ -7709,8 +7701,14 @@ static struct sched_domain_attr *dattr_cur; | |||
| 7709 | */ | 7701 | */ | 
| 7710 | static cpumask_t fallback_doms; | 7702 | static cpumask_t fallback_doms; | 
| 7711 | 7703 | ||
| 7712 | void __attribute__((weak)) arch_update_cpu_topology(void) | 7704 | /* | 
| 7705 | * arch_update_cpu_topology lets virtualized architectures update the | ||
| 7706 | * cpu core maps. It is supposed to return 1 if the topology changed | ||
| 7707 | * or 0 if it stayed the same. | ||
| 7708 | */ | ||
| 7709 | int __attribute__((weak)) arch_update_cpu_topology(void) | ||
| 7713 | { | 7710 | { | 
| 7711 | return 0; | ||
| 7714 | } | 7712 | } | 
| 7715 | 7713 | ||
| 7716 | /* | 7714 | /* | 
| @@ -7750,8 +7748,6 @@ static void detach_destroy_domains(const cpumask_t *cpu_map) | |||
| 7750 | cpumask_t tmpmask; | 7748 | cpumask_t tmpmask; | 
| 7751 | int i; | 7749 | int i; | 
| 7752 | 7750 | ||
| 7753 | unregister_sched_domain_sysctl(); | ||
| 7754 | |||
| 7755 | for_each_cpu_mask_nr(i, *cpu_map) | 7751 | for_each_cpu_mask_nr(i, *cpu_map) | 
| 7756 | cpu_attach_domain(NULL, &def_root_domain, i); | 7752 | cpu_attach_domain(NULL, &def_root_domain, i); | 
| 7757 | synchronize_sched(); | 7753 | synchronize_sched(); | 
| @@ -7804,17 +7800,21 @@ void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, | |||
| 7804 | struct sched_domain_attr *dattr_new) | 7800 | struct sched_domain_attr *dattr_new) | 
| 7805 | { | 7801 | { | 
| 7806 | int i, j, n; | 7802 | int i, j, n; | 
| 7803 | int new_topology; | ||
| 7807 | 7804 | ||
| 7808 | mutex_lock(&sched_domains_mutex); | 7805 | mutex_lock(&sched_domains_mutex); | 
| 7809 | 7806 | ||
| 7810 | /* always unregister in case we don't destroy any domains */ | 7807 | /* always unregister in case we don't destroy any domains */ | 
| 7811 | unregister_sched_domain_sysctl(); | 7808 | unregister_sched_domain_sysctl(); | 
| 7812 | 7809 | ||
| 7810 | /* Let architecture update cpu core mappings. */ | ||
| 7811 | new_topology = arch_update_cpu_topology(); | ||
| 7812 | |||
| 7813 | n = doms_new ? ndoms_new : 0; | 7813 | n = doms_new ? ndoms_new : 0; | 
| 7814 | 7814 | ||
| 7815 | /* Destroy deleted domains */ | 7815 | /* Destroy deleted domains */ | 
| 7816 | for (i = 0; i < ndoms_cur; i++) { | 7816 | for (i = 0; i < ndoms_cur; i++) { | 
| 7817 | for (j = 0; j < n; j++) { | 7817 | for (j = 0; j < n && !new_topology; j++) { | 
| 7818 | if (cpus_equal(doms_cur[i], doms_new[j]) | 7818 | if (cpus_equal(doms_cur[i], doms_new[j]) | 
| 7819 | && dattrs_equal(dattr_cur, i, dattr_new, j)) | 7819 | && dattrs_equal(dattr_cur, i, dattr_new, j)) | 
| 7820 | goto match1; | 7820 | goto match1; | 
| @@ -7829,12 +7829,12 @@ match1: | |||
| 7829 | ndoms_cur = 0; | 7829 | ndoms_cur = 0; | 
| 7830 | doms_new = &fallback_doms; | 7830 | doms_new = &fallback_doms; | 
| 7831 | cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map); | 7831 | cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map); | 
| 7832 | dattr_new = NULL; | 7832 | WARN_ON_ONCE(dattr_new); | 
| 7833 | } | 7833 | } | 
| 7834 | 7834 | ||
| 7835 | /* Build new domains */ | 7835 | /* Build new domains */ | 
| 7836 | for (i = 0; i < ndoms_new; i++) { | 7836 | for (i = 0; i < ndoms_new; i++) { | 
| 7837 | for (j = 0; j < ndoms_cur; j++) { | 7837 | for (j = 0; j < ndoms_cur && !new_topology; j++) { | 
| 7838 | if (cpus_equal(doms_new[i], doms_cur[j]) | 7838 | if (cpus_equal(doms_new[i], doms_cur[j]) | 
| 7839 | && dattrs_equal(dattr_new, i, dattr_cur, j)) | 7839 | && dattrs_equal(dattr_new, i, dattr_cur, j)) | 
| 7840 | goto match2; | 7840 | goto match2; | 
| @@ -8489,7 +8489,7 @@ static | |||
| 8489 | int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) | 8489 | int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) | 
| 8490 | { | 8490 | { | 
| 8491 | struct cfs_rq *cfs_rq; | 8491 | struct cfs_rq *cfs_rq; | 
| 8492 | struct sched_entity *se, *parent_se; | 8492 | struct sched_entity *se; | 
| 8493 | struct rq *rq; | 8493 | struct rq *rq; | 
| 8494 | int i; | 8494 | int i; | 
| 8495 | 8495 | ||
| @@ -8505,18 +8505,17 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) | |||
| 8505 | for_each_possible_cpu(i) { | 8505 | for_each_possible_cpu(i) { | 
| 8506 | rq = cpu_rq(i); | 8506 | rq = cpu_rq(i); | 
| 8507 | 8507 | ||
| 8508 | cfs_rq = kmalloc_node(sizeof(struct cfs_rq), | 8508 | cfs_rq = kzalloc_node(sizeof(struct cfs_rq), | 
| 8509 | GFP_KERNEL|__GFP_ZERO, cpu_to_node(i)); | 8509 | GFP_KERNEL, cpu_to_node(i)); | 
| 8510 | if (!cfs_rq) | 8510 | if (!cfs_rq) | 
| 8511 | goto err; | 8511 | goto err; | 
| 8512 | 8512 | ||
| 8513 | se = kmalloc_node(sizeof(struct sched_entity), | 8513 | se = kzalloc_node(sizeof(struct sched_entity), | 
| 8514 | GFP_KERNEL|__GFP_ZERO, cpu_to_node(i)); | 8514 | GFP_KERNEL, cpu_to_node(i)); | 
| 8515 | if (!se) | 8515 | if (!se) | 
| 8516 | goto err; | 8516 | goto err; | 
| 8517 | 8517 | ||
| 8518 | parent_se = parent ? parent->se[i] : NULL; | 8518 | init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]); | 
| 8519 | init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent_se); | ||
| 8520 | } | 8519 | } | 
| 8521 | 8520 | ||
| 8522 | return 1; | 8521 | return 1; | 
| @@ -8577,7 +8576,7 @@ static | |||
| 8577 | int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) | 8576 | int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) | 
| 8578 | { | 8577 | { | 
| 8579 | struct rt_rq *rt_rq; | 8578 | struct rt_rq *rt_rq; | 
| 8580 | struct sched_rt_entity *rt_se, *parent_se; | 8579 | struct sched_rt_entity *rt_se; | 
| 8581 | struct rq *rq; | 8580 | struct rq *rq; | 
| 8582 | int i; | 8581 | int i; | 
| 8583 | 8582 | ||
| @@ -8594,18 +8593,17 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) | |||
| 8594 | for_each_possible_cpu(i) { | 8593 | for_each_possible_cpu(i) { | 
| 8595 | rq = cpu_rq(i); | 8594 | rq = cpu_rq(i); | 
| 8596 | 8595 | ||
| 8597 | rt_rq = kmalloc_node(sizeof(struct rt_rq), | 8596 | rt_rq = kzalloc_node(sizeof(struct rt_rq), | 
| 8598 | GFP_KERNEL|__GFP_ZERO, cpu_to_node(i)); | 8597 | GFP_KERNEL, cpu_to_node(i)); | 
| 8599 | if (!rt_rq) | 8598 | if (!rt_rq) | 
| 8600 | goto err; | 8599 | goto err; | 
| 8601 | 8600 | ||
| 8602 | rt_se = kmalloc_node(sizeof(struct sched_rt_entity), | 8601 | rt_se = kzalloc_node(sizeof(struct sched_rt_entity), | 
| 8603 | GFP_KERNEL|__GFP_ZERO, cpu_to_node(i)); | 8602 | GFP_KERNEL, cpu_to_node(i)); | 
| 8604 | if (!rt_se) | 8603 | if (!rt_se) | 
| 8605 | goto err; | 8604 | goto err; | 
| 8606 | 8605 | ||
| 8607 | parent_se = parent ? parent->rt_se[i] : NULL; | 8606 | init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]); | 
| 8608 | init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent_se); | ||
| 8609 | } | 8607 | } | 
| 8610 | 8608 | ||
| 8611 | return 1; | 8609 | return 1; | 
| @@ -9248,11 +9246,12 @@ struct cgroup_subsys cpu_cgroup_subsys = { | |||
| 9248 | * (balbir@in.ibm.com). | 9246 | * (balbir@in.ibm.com). | 
| 9249 | */ | 9247 | */ | 
| 9250 | 9248 | ||
| 9251 | /* track cpu usage of a group of tasks */ | 9249 | /* track cpu usage of a group of tasks and its child groups */ | 
| 9252 | struct cpuacct { | 9250 | struct cpuacct { | 
| 9253 | struct cgroup_subsys_state css; | 9251 | struct cgroup_subsys_state css; | 
| 9254 | /* cpuusage holds pointer to a u64-type object on every cpu */ | 9252 | /* cpuusage holds pointer to a u64-type object on every cpu */ | 
| 9255 | u64 *cpuusage; | 9253 | u64 *cpuusage; | 
| 9254 | struct cpuacct *parent; | ||
| 9256 | }; | 9255 | }; | 
| 9257 | 9256 | ||
| 9258 | struct cgroup_subsys cpuacct_subsys; | 9257 | struct cgroup_subsys cpuacct_subsys; | 
| @@ -9286,6 +9285,9 @@ static struct cgroup_subsys_state *cpuacct_create( | |||
| 9286 | return ERR_PTR(-ENOMEM); | 9285 | return ERR_PTR(-ENOMEM); | 
| 9287 | } | 9286 | } | 
| 9288 | 9287 | ||
| 9288 | if (cgrp->parent) | ||
| 9289 | ca->parent = cgroup_ca(cgrp->parent); | ||
| 9290 | |||
| 9289 | return &ca->css; | 9291 | return &ca->css; | 
| 9290 | } | 9292 | } | 
| 9291 | 9293 | ||
| @@ -9299,6 +9301,41 @@ cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) | |||
| 9299 | kfree(ca); | 9301 | kfree(ca); | 
| 9300 | } | 9302 | } | 
| 9301 | 9303 | ||
| 9304 | static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu) | ||
| 9305 | { | ||
| 9306 | u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu); | ||
| 9307 | u64 data; | ||
| 9308 | |||
| 9309 | #ifndef CONFIG_64BIT | ||
| 9310 | /* | ||
| 9311 | * Take rq->lock to make 64-bit read safe on 32-bit platforms. | ||
| 9312 | */ | ||
| 9313 | spin_lock_irq(&cpu_rq(cpu)->lock); | ||
| 9314 | data = *cpuusage; | ||
| 9315 | spin_unlock_irq(&cpu_rq(cpu)->lock); | ||
| 9316 | #else | ||
| 9317 | data = *cpuusage; | ||
| 9318 | #endif | ||
| 9319 | |||
| 9320 | return data; | ||
| 9321 | } | ||
| 9322 | |||
| 9323 | static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val) | ||
| 9324 | { | ||
| 9325 | u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu); | ||
| 9326 | |||
| 9327 | #ifndef CONFIG_64BIT | ||
| 9328 | /* | ||
| 9329 | * Take rq->lock to make 64-bit write safe on 32-bit platforms. | ||
| 9330 | */ | ||
| 9331 | spin_lock_irq(&cpu_rq(cpu)->lock); | ||
| 9332 | *cpuusage = val; | ||
| 9333 | spin_unlock_irq(&cpu_rq(cpu)->lock); | ||
| 9334 | #else | ||
| 9335 | *cpuusage = val; | ||
| 9336 | #endif | ||
| 9337 | } | ||
| 9338 | |||
| 9302 | /* return total cpu usage (in nanoseconds) of a group */ | 9339 | /* return total cpu usage (in nanoseconds) of a group */ | 
| 9303 | static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft) | 9340 | static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft) | 
| 9304 | { | 9341 | { | 
| @@ -9306,17 +9343,8 @@ static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft) | |||
| 9306 | u64 totalcpuusage = 0; | 9343 | u64 totalcpuusage = 0; | 
| 9307 | int i; | 9344 | int i; | 
| 9308 | 9345 | ||
| 9309 | for_each_possible_cpu(i) { | 9346 | for_each_present_cpu(i) | 
| 9310 | u64 *cpuusage = percpu_ptr(ca->cpuusage, i); | 9347 | totalcpuusage += cpuacct_cpuusage_read(ca, i); | 
| 9311 | |||
| 9312 | /* | ||
| 9313 | * Take rq->lock to make 64-bit addition safe on 32-bit | ||
| 9314 | * platforms. | ||
| 9315 | */ | ||
| 9316 | spin_lock_irq(&cpu_rq(i)->lock); | ||
| 9317 | totalcpuusage += *cpuusage; | ||
| 9318 | spin_unlock_irq(&cpu_rq(i)->lock); | ||
| 9319 | } | ||
| 9320 | 9348 | ||
| 9321 | return totalcpuusage; | 9349 | return totalcpuusage; | 
| 9322 | } | 9350 | } | 
| @@ -9333,23 +9361,39 @@ static int cpuusage_write(struct cgroup *cgrp, struct cftype *cftype, | |||
| 9333 | goto out; | 9361 | goto out; | 
| 9334 | } | 9362 | } | 
| 9335 | 9363 | ||
| 9336 | for_each_possible_cpu(i) { | 9364 | for_each_present_cpu(i) | 
| 9337 | u64 *cpuusage = percpu_ptr(ca->cpuusage, i); | 9365 | cpuacct_cpuusage_write(ca, i, 0); | 
| 9338 | 9366 | ||
| 9339 | spin_lock_irq(&cpu_rq(i)->lock); | ||
| 9340 | *cpuusage = 0; | ||
| 9341 | spin_unlock_irq(&cpu_rq(i)->lock); | ||
| 9342 | } | ||
| 9343 | out: | 9367 | out: | 
| 9344 | return err; | 9368 | return err; | 
| 9345 | } | 9369 | } | 
| 9346 | 9370 | ||
| 9371 | static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft, | ||
| 9372 | struct seq_file *m) | ||
| 9373 | { | ||
| 9374 | struct cpuacct *ca = cgroup_ca(cgroup); | ||
| 9375 | u64 percpu; | ||
| 9376 | int i; | ||
| 9377 | |||
| 9378 | for_each_present_cpu(i) { | ||
| 9379 | percpu = cpuacct_cpuusage_read(ca, i); | ||
| 9380 | seq_printf(m, "%llu ", (unsigned long long) percpu); | ||
| 9381 | } | ||
| 9382 | seq_printf(m, "\n"); | ||
| 9383 | return 0; | ||
| 9384 | } | ||
| 9385 | |||
| 9347 | static struct cftype files[] = { | 9386 | static struct cftype files[] = { | 
| 9348 | { | 9387 | { | 
| 9349 | .name = "usage", | 9388 | .name = "usage", | 
| 9350 | .read_u64 = cpuusage_read, | 9389 | .read_u64 = cpuusage_read, | 
| 9351 | .write_u64 = cpuusage_write, | 9390 | .write_u64 = cpuusage_write, | 
| 9352 | }, | 9391 | }, | 
| 9392 | { | ||
| 9393 | .name = "usage_percpu", | ||
| 9394 | .read_seq_string = cpuacct_percpu_seq_read, | ||
| 9395 | }, | ||
| 9396 | |||
| 9353 | }; | 9397 | }; | 
| 9354 | 9398 | ||
| 9355 | static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) | 9399 | static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) | 
| @@ -9365,14 +9409,16 @@ static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) | |||
| 9365 | static void cpuacct_charge(struct task_struct *tsk, u64 cputime) | 9409 | static void cpuacct_charge(struct task_struct *tsk, u64 cputime) | 
| 9366 | { | 9410 | { | 
| 9367 | struct cpuacct *ca; | 9411 | struct cpuacct *ca; | 
| 9412 | int cpu; | ||
| 9368 | 9413 | ||
| 9369 | if (!cpuacct_subsys.active) | 9414 | if (!cpuacct_subsys.active) | 
| 9370 | return; | 9415 | return; | 
| 9371 | 9416 | ||
| 9417 | cpu = task_cpu(tsk); | ||
| 9372 | ca = task_ca(tsk); | 9418 | ca = task_ca(tsk); | 
| 9373 | if (ca) { | ||
| 9374 | u64 *cpuusage = percpu_ptr(ca->cpuusage, task_cpu(tsk)); | ||
| 9375 | 9419 | ||
| 9420 | for (; ca; ca = ca->parent) { | ||
| 9421 | u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu); | ||
| 9376 | *cpuusage += cputime; | 9422 | *cpuusage += cputime; | 
| 9377 | } | 9423 | } | 
| 9378 | } | 9424 | } | 
