diff options
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 419 |
1 files changed, 231 insertions, 188 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 9b1e79371c20..fff1c4a20b65 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -118,6 +118,12 @@ | |||
118 | */ | 118 | */ |
119 | #define RUNTIME_INF ((u64)~0ULL) | 119 | #define RUNTIME_INF ((u64)~0ULL) |
120 | 120 | ||
121 | DEFINE_TRACE(sched_wait_task); | ||
122 | DEFINE_TRACE(sched_wakeup); | ||
123 | DEFINE_TRACE(sched_wakeup_new); | ||
124 | DEFINE_TRACE(sched_switch); | ||
125 | DEFINE_TRACE(sched_migrate_task); | ||
126 | |||
121 | #ifdef CONFIG_SMP | 127 | #ifdef CONFIG_SMP |
122 | /* | 128 | /* |
123 | * Divide a load by a sched group cpu_power : (load / sg->__cpu_power) | 129 | * Divide a load by a sched group cpu_power : (load / sg->__cpu_power) |
@@ -203,7 +209,6 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime) | |||
203 | hrtimer_init(&rt_b->rt_period_timer, | 209 | hrtimer_init(&rt_b->rt_period_timer, |
204 | CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 210 | CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
205 | rt_b->rt_period_timer.function = sched_rt_period_timer; | 211 | rt_b->rt_period_timer.function = sched_rt_period_timer; |
206 | rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED; | ||
207 | } | 212 | } |
208 | 213 | ||
209 | static inline int rt_bandwidth_enabled(void) | 214 | static inline int rt_bandwidth_enabled(void) |
@@ -261,6 +266,10 @@ struct task_group { | |||
261 | struct cgroup_subsys_state css; | 266 | struct cgroup_subsys_state css; |
262 | #endif | 267 | #endif |
263 | 268 | ||
269 | #ifdef CONFIG_USER_SCHED | ||
270 | uid_t uid; | ||
271 | #endif | ||
272 | |||
264 | #ifdef CONFIG_FAIR_GROUP_SCHED | 273 | #ifdef CONFIG_FAIR_GROUP_SCHED |
265 | /* schedulable entities of this group on each cpu */ | 274 | /* schedulable entities of this group on each cpu */ |
266 | struct sched_entity **se; | 275 | struct sched_entity **se; |
@@ -286,6 +295,12 @@ struct task_group { | |||
286 | 295 | ||
287 | #ifdef CONFIG_USER_SCHED | 296 | #ifdef CONFIG_USER_SCHED |
288 | 297 | ||
298 | /* Helper function to pass uid information to create_sched_user() */ | ||
299 | void set_tg_uid(struct user_struct *user) | ||
300 | { | ||
301 | user->tg->uid = user->uid; | ||
302 | } | ||
303 | |||
289 | /* | 304 | /* |
290 | * Root task group. | 305 | * Root task group. |
291 | * Every UID task group (including init_task_group aka UID-0) will | 306 | * Every UID task group (including init_task_group aka UID-0) will |
@@ -345,7 +360,9 @@ static inline struct task_group *task_group(struct task_struct *p) | |||
345 | struct task_group *tg; | 360 | struct task_group *tg; |
346 | 361 | ||
347 | #ifdef CONFIG_USER_SCHED | 362 | #ifdef CONFIG_USER_SCHED |
348 | tg = p->user->tg; | 363 | rcu_read_lock(); |
364 | tg = __task_cred(p)->user->tg; | ||
365 | rcu_read_unlock(); | ||
349 | #elif defined(CONFIG_CGROUP_SCHED) | 366 | #elif defined(CONFIG_CGROUP_SCHED) |
350 | tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id), | 367 | tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id), |
351 | struct task_group, css); | 368 | struct task_group, css); |
@@ -586,6 +603,8 @@ struct rq { | |||
586 | #ifdef CONFIG_SCHEDSTATS | 603 | #ifdef CONFIG_SCHEDSTATS |
587 | /* latency stats */ | 604 | /* latency stats */ |
588 | struct sched_info rq_sched_info; | 605 | struct sched_info rq_sched_info; |
606 | unsigned long long rq_cpu_time; | ||
607 | /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */ | ||
589 | 608 | ||
590 | /* sys_sched_yield() stats */ | 609 | /* sys_sched_yield() stats */ |
591 | unsigned int yld_exp_empty; | 610 | unsigned int yld_exp_empty; |
@@ -703,45 +722,18 @@ static __read_mostly char *sched_feat_names[] = { | |||
703 | 722 | ||
704 | #undef SCHED_FEAT | 723 | #undef SCHED_FEAT |
705 | 724 | ||
706 | static int sched_feat_open(struct inode *inode, struct file *filp) | 725 | static int sched_feat_show(struct seq_file *m, void *v) |
707 | { | ||
708 | filp->private_data = inode->i_private; | ||
709 | return 0; | ||
710 | } | ||
711 | |||
712 | static ssize_t | ||
713 | sched_feat_read(struct file *filp, char __user *ubuf, | ||
714 | size_t cnt, loff_t *ppos) | ||
715 | { | 726 | { |
716 | char *buf; | ||
717 | int r = 0; | ||
718 | int len = 0; | ||
719 | int i; | 727 | int i; |
720 | 728 | ||
721 | for (i = 0; sched_feat_names[i]; i++) { | 729 | for (i = 0; sched_feat_names[i]; i++) { |
722 | len += strlen(sched_feat_names[i]); | 730 | if (!(sysctl_sched_features & (1UL << i))) |
723 | len += 4; | 731 | seq_puts(m, "NO_"); |
724 | } | 732 | seq_printf(m, "%s ", sched_feat_names[i]); |
725 | |||
726 | buf = kmalloc(len + 2, GFP_KERNEL); | ||
727 | if (!buf) | ||
728 | return -ENOMEM; | ||
729 | |||
730 | for (i = 0; sched_feat_names[i]; i++) { | ||
731 | if (sysctl_sched_features & (1UL << i)) | ||
732 | r += sprintf(buf + r, "%s ", sched_feat_names[i]); | ||
733 | else | ||
734 | r += sprintf(buf + r, "NO_%s ", sched_feat_names[i]); | ||
735 | } | 733 | } |
734 | seq_puts(m, "\n"); | ||
736 | 735 | ||
737 | r += sprintf(buf + r, "\n"); | 736 | return 0; |
738 | WARN_ON(r >= len + 2); | ||
739 | |||
740 | r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | ||
741 | |||
742 | kfree(buf); | ||
743 | |||
744 | return r; | ||
745 | } | 737 | } |
746 | 738 | ||
747 | static ssize_t | 739 | static ssize_t |
@@ -786,10 +778,17 @@ sched_feat_write(struct file *filp, const char __user *ubuf, | |||
786 | return cnt; | 778 | return cnt; |
787 | } | 779 | } |
788 | 780 | ||
781 | static int sched_feat_open(struct inode *inode, struct file *filp) | ||
782 | { | ||
783 | return single_open(filp, sched_feat_show, NULL); | ||
784 | } | ||
785 | |||
789 | static struct file_operations sched_feat_fops = { | 786 | static struct file_operations sched_feat_fops = { |
790 | .open = sched_feat_open, | 787 | .open = sched_feat_open, |
791 | .read = sched_feat_read, | 788 | .write = sched_feat_write, |
792 | .write = sched_feat_write, | 789 | .read = seq_read, |
790 | .llseek = seq_lseek, | ||
791 | .release = single_release, | ||
793 | }; | 792 | }; |
794 | 793 | ||
795 | static __init int sched_init_debug(void) | 794 | static __init int sched_init_debug(void) |
@@ -1139,7 +1138,6 @@ static void init_rq_hrtick(struct rq *rq) | |||
1139 | 1138 | ||
1140 | hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 1139 | hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
1141 | rq->hrtick_timer.function = hrtick; | 1140 | rq->hrtick_timer.function = hrtick; |
1142 | rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU; | ||
1143 | } | 1141 | } |
1144 | #else /* CONFIG_SCHED_HRTICK */ | 1142 | #else /* CONFIG_SCHED_HRTICK */ |
1145 | static inline void hrtick_clear(struct rq *rq) | 1143 | static inline void hrtick_clear(struct rq *rq) |
@@ -1453,9 +1451,10 @@ static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); | |||
1453 | static unsigned long cpu_avg_load_per_task(int cpu) | 1451 | static unsigned long cpu_avg_load_per_task(int cpu) |
1454 | { | 1452 | { |
1455 | struct rq *rq = cpu_rq(cpu); | 1453 | struct rq *rq = cpu_rq(cpu); |
1454 | unsigned long nr_running = ACCESS_ONCE(rq->nr_running); | ||
1456 | 1455 | ||
1457 | if (rq->nr_running) | 1456 | if (nr_running) |
1458 | rq->avg_load_per_task = rq->load.weight / rq->nr_running; | 1457 | rq->avg_load_per_task = rq->load.weight / nr_running; |
1459 | else | 1458 | else |
1460 | rq->avg_load_per_task = 0; | 1459 | rq->avg_load_per_task = 0; |
1461 | 1460 | ||
@@ -1473,27 +1472,13 @@ static void | |||
1473 | update_group_shares_cpu(struct task_group *tg, int cpu, | 1472 | update_group_shares_cpu(struct task_group *tg, int cpu, |
1474 | unsigned long sd_shares, unsigned long sd_rq_weight) | 1473 | unsigned long sd_shares, unsigned long sd_rq_weight) |
1475 | { | 1474 | { |
1476 | int boost = 0; | ||
1477 | unsigned long shares; | 1475 | unsigned long shares; |
1478 | unsigned long rq_weight; | 1476 | unsigned long rq_weight; |
1479 | 1477 | ||
1480 | if (!tg->se[cpu]) | 1478 | if (!tg->se[cpu]) |
1481 | return; | 1479 | return; |
1482 | 1480 | ||
1483 | rq_weight = tg->cfs_rq[cpu]->load.weight; | 1481 | rq_weight = tg->cfs_rq[cpu]->rq_weight; |
1484 | |||
1485 | /* | ||
1486 | * If there are currently no tasks on the cpu pretend there is one of | ||
1487 | * average load so that when a new task gets to run here it will not | ||
1488 | * get delayed by group starvation. | ||
1489 | */ | ||
1490 | if (!rq_weight) { | ||
1491 | boost = 1; | ||
1492 | rq_weight = NICE_0_LOAD; | ||
1493 | } | ||
1494 | |||
1495 | if (unlikely(rq_weight > sd_rq_weight)) | ||
1496 | rq_weight = sd_rq_weight; | ||
1497 | 1482 | ||
1498 | /* | 1483 | /* |
1499 | * \Sum shares * rq_weight | 1484 | * \Sum shares * rq_weight |
@@ -1501,7 +1486,7 @@ update_group_shares_cpu(struct task_group *tg, int cpu, | |||
1501 | * \Sum rq_weight | 1486 | * \Sum rq_weight |
1502 | * | 1487 | * |
1503 | */ | 1488 | */ |
1504 | shares = (sd_shares * rq_weight) / (sd_rq_weight + 1); | 1489 | shares = (sd_shares * rq_weight) / sd_rq_weight; |
1505 | shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES); | 1490 | shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES); |
1506 | 1491 | ||
1507 | if (abs(shares - tg->se[cpu]->load.weight) > | 1492 | if (abs(shares - tg->se[cpu]->load.weight) > |
@@ -1510,11 +1495,7 @@ update_group_shares_cpu(struct task_group *tg, int cpu, | |||
1510 | unsigned long flags; | 1495 | unsigned long flags; |
1511 | 1496 | ||
1512 | spin_lock_irqsave(&rq->lock, flags); | 1497 | spin_lock_irqsave(&rq->lock, flags); |
1513 | /* | 1498 | tg->cfs_rq[cpu]->shares = shares; |
1514 | * record the actual number of shares, not the boosted amount. | ||
1515 | */ | ||
1516 | tg->cfs_rq[cpu]->shares = boost ? 0 : shares; | ||
1517 | tg->cfs_rq[cpu]->rq_weight = rq_weight; | ||
1518 | 1499 | ||
1519 | __set_se_shares(tg->se[cpu], shares); | 1500 | __set_se_shares(tg->se[cpu], shares); |
1520 | spin_unlock_irqrestore(&rq->lock, flags); | 1501 | spin_unlock_irqrestore(&rq->lock, flags); |
@@ -1528,13 +1509,23 @@ update_group_shares_cpu(struct task_group *tg, int cpu, | |||
1528 | */ | 1509 | */ |
1529 | static int tg_shares_up(struct task_group *tg, void *data) | 1510 | static int tg_shares_up(struct task_group *tg, void *data) |
1530 | { | 1511 | { |
1531 | unsigned long rq_weight = 0; | 1512 | unsigned long weight, rq_weight = 0; |
1532 | unsigned long shares = 0; | 1513 | unsigned long shares = 0; |
1533 | struct sched_domain *sd = data; | 1514 | struct sched_domain *sd = data; |
1534 | int i; | 1515 | int i; |
1535 | 1516 | ||
1536 | for_each_cpu_mask(i, sd->span) { | 1517 | for_each_cpu_mask(i, sd->span) { |
1537 | rq_weight += tg->cfs_rq[i]->load.weight; | 1518 | /* |
1519 | * If there are currently no tasks on the cpu pretend there | ||
1520 | * is one of average load so that when a new task gets to | ||
1521 | * run here it will not get delayed by group starvation. | ||
1522 | */ | ||
1523 | weight = tg->cfs_rq[i]->load.weight; | ||
1524 | if (!weight) | ||
1525 | weight = NICE_0_LOAD; | ||
1526 | |||
1527 | tg->cfs_rq[i]->rq_weight = weight; | ||
1528 | rq_weight += weight; | ||
1538 | shares += tg->cfs_rq[i]->shares; | 1529 | shares += tg->cfs_rq[i]->shares; |
1539 | } | 1530 | } |
1540 | 1531 | ||
@@ -1544,9 +1535,6 @@ static int tg_shares_up(struct task_group *tg, void *data) | |||
1544 | if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE)) | 1535 | if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE)) |
1545 | shares = tg->shares; | 1536 | shares = tg->shares; |
1546 | 1537 | ||
1547 | if (!rq_weight) | ||
1548 | rq_weight = cpus_weight(sd->span) * NICE_0_LOAD; | ||
1549 | |||
1550 | for_each_cpu_mask(i, sd->span) | 1538 | for_each_cpu_mask(i, sd->span) |
1551 | update_group_shares_cpu(tg, i, shares, rq_weight); | 1539 | update_group_shares_cpu(tg, i, shares, rq_weight); |
1552 | 1540 | ||
@@ -1611,6 +1599,39 @@ static inline void update_shares_locked(struct rq *rq, struct sched_domain *sd) | |||
1611 | 1599 | ||
1612 | #endif | 1600 | #endif |
1613 | 1601 | ||
1602 | /* | ||
1603 | * double_lock_balance - lock the busiest runqueue, this_rq is locked already. | ||
1604 | */ | ||
1605 | static int double_lock_balance(struct rq *this_rq, struct rq *busiest) | ||
1606 | __releases(this_rq->lock) | ||
1607 | __acquires(busiest->lock) | ||
1608 | __acquires(this_rq->lock) | ||
1609 | { | ||
1610 | int ret = 0; | ||
1611 | |||
1612 | if (unlikely(!irqs_disabled())) { | ||
1613 | /* printk() doesn't work good under rq->lock */ | ||
1614 | spin_unlock(&this_rq->lock); | ||
1615 | BUG_ON(1); | ||
1616 | } | ||
1617 | if (unlikely(!spin_trylock(&busiest->lock))) { | ||
1618 | if (busiest < this_rq) { | ||
1619 | spin_unlock(&this_rq->lock); | ||
1620 | spin_lock(&busiest->lock); | ||
1621 | spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING); | ||
1622 | ret = 1; | ||
1623 | } else | ||
1624 | spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING); | ||
1625 | } | ||
1626 | return ret; | ||
1627 | } | ||
1628 | |||
1629 | static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest) | ||
1630 | __releases(busiest->lock) | ||
1631 | { | ||
1632 | spin_unlock(&busiest->lock); | ||
1633 | lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_); | ||
1634 | } | ||
1614 | #endif | 1635 | #endif |
1615 | 1636 | ||
1616 | #ifdef CONFIG_FAIR_GROUP_SCHED | 1637 | #ifdef CONFIG_FAIR_GROUP_SCHED |
@@ -1844,6 +1865,8 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | |||
1844 | 1865 | ||
1845 | clock_offset = old_rq->clock - new_rq->clock; | 1866 | clock_offset = old_rq->clock - new_rq->clock; |
1846 | 1867 | ||
1868 | trace_sched_migrate_task(p, task_cpu(p), new_cpu); | ||
1869 | |||
1847 | #ifdef CONFIG_SCHEDSTATS | 1870 | #ifdef CONFIG_SCHEDSTATS |
1848 | if (p->se.wait_start) | 1871 | if (p->se.wait_start) |
1849 | p->se.wait_start -= clock_offset; | 1872 | p->se.wait_start -= clock_offset; |
@@ -2253,6 +2276,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) | |||
2253 | 2276 | ||
2254 | smp_wmb(); | 2277 | smp_wmb(); |
2255 | rq = task_rq_lock(p, &flags); | 2278 | rq = task_rq_lock(p, &flags); |
2279 | update_rq_clock(rq); | ||
2256 | old_state = p->state; | 2280 | old_state = p->state; |
2257 | if (!(old_state & state)) | 2281 | if (!(old_state & state)) |
2258 | goto out; | 2282 | goto out; |
@@ -2310,12 +2334,11 @@ out_activate: | |||
2310 | schedstat_inc(p, se.nr_wakeups_local); | 2334 | schedstat_inc(p, se.nr_wakeups_local); |
2311 | else | 2335 | else |
2312 | schedstat_inc(p, se.nr_wakeups_remote); | 2336 | schedstat_inc(p, se.nr_wakeups_remote); |
2313 | update_rq_clock(rq); | ||
2314 | activate_task(rq, p, 1); | 2337 | activate_task(rq, p, 1); |
2315 | success = 1; | 2338 | success = 1; |
2316 | 2339 | ||
2317 | out_running: | 2340 | out_running: |
2318 | trace_sched_wakeup(rq, p); | 2341 | trace_sched_wakeup(rq, p, success); |
2319 | check_preempt_curr(rq, p, sync); | 2342 | check_preempt_curr(rq, p, sync); |
2320 | 2343 | ||
2321 | p->state = TASK_RUNNING; | 2344 | p->state = TASK_RUNNING; |
@@ -2448,7 +2471,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
2448 | p->sched_class->task_new(rq, p); | 2471 | p->sched_class->task_new(rq, p); |
2449 | inc_nr_running(rq); | 2472 | inc_nr_running(rq); |
2450 | } | 2473 | } |
2451 | trace_sched_wakeup_new(rq, p); | 2474 | trace_sched_wakeup_new(rq, p, 1); |
2452 | check_preempt_curr(rq, p, 0); | 2475 | check_preempt_curr(rq, p, 0); |
2453 | #ifdef CONFIG_SMP | 2476 | #ifdef CONFIG_SMP |
2454 | if (p->sched_class->task_wake_up) | 2477 | if (p->sched_class->task_wake_up) |
@@ -2811,40 +2834,6 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2) | |||
2811 | } | 2834 | } |
2812 | 2835 | ||
2813 | /* | 2836 | /* |
2814 | * double_lock_balance - lock the busiest runqueue, this_rq is locked already. | ||
2815 | */ | ||
2816 | static int double_lock_balance(struct rq *this_rq, struct rq *busiest) | ||
2817 | __releases(this_rq->lock) | ||
2818 | __acquires(busiest->lock) | ||
2819 | __acquires(this_rq->lock) | ||
2820 | { | ||
2821 | int ret = 0; | ||
2822 | |||
2823 | if (unlikely(!irqs_disabled())) { | ||
2824 | /* printk() doesn't work good under rq->lock */ | ||
2825 | spin_unlock(&this_rq->lock); | ||
2826 | BUG_ON(1); | ||
2827 | } | ||
2828 | if (unlikely(!spin_trylock(&busiest->lock))) { | ||
2829 | if (busiest < this_rq) { | ||
2830 | spin_unlock(&this_rq->lock); | ||
2831 | spin_lock(&busiest->lock); | ||
2832 | spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING); | ||
2833 | ret = 1; | ||
2834 | } else | ||
2835 | spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING); | ||
2836 | } | ||
2837 | return ret; | ||
2838 | } | ||
2839 | |||
2840 | static void double_unlock_balance(struct rq *this_rq, struct rq *busiest) | ||
2841 | __releases(busiest->lock) | ||
2842 | { | ||
2843 | spin_unlock(&busiest->lock); | ||
2844 | lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_); | ||
2845 | } | ||
2846 | |||
2847 | /* | ||
2848 | * If dest_cpu is allowed for this process, migrate the task to it. | 2837 | * If dest_cpu is allowed for this process, migrate the task to it. |
2849 | * This is accomplished by forcing the cpu_allowed mask to only | 2838 | * This is accomplished by forcing the cpu_allowed mask to only |
2850 | * allow dest_cpu, which will force the cpu onto dest_cpu. Then | 2839 | * allow dest_cpu, which will force the cpu onto dest_cpu. Then |
@@ -2861,7 +2850,6 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu) | |||
2861 | || unlikely(!cpu_active(dest_cpu))) | 2850 | || unlikely(!cpu_active(dest_cpu))) |
2862 | goto out; | 2851 | goto out; |
2863 | 2852 | ||
2864 | trace_sched_migrate_task(rq, p, dest_cpu); | ||
2865 | /* force the process onto the specified CPU */ | 2853 | /* force the process onto the specified CPU */ |
2866 | if (migrate_task(p, dest_cpu, &req)) { | 2854 | if (migrate_task(p, dest_cpu, &req)) { |
2867 | /* Need to wait for migration thread (might exit: take ref). */ | 2855 | /* Need to wait for migration thread (might exit: take ref). */ |
@@ -3706,7 +3694,7 @@ out_balanced: | |||
3706 | static void idle_balance(int this_cpu, struct rq *this_rq) | 3694 | static void idle_balance(int this_cpu, struct rq *this_rq) |
3707 | { | 3695 | { |
3708 | struct sched_domain *sd; | 3696 | struct sched_domain *sd; |
3709 | int pulled_task = -1; | 3697 | int pulled_task = 0; |
3710 | unsigned long next_balance = jiffies + HZ; | 3698 | unsigned long next_balance = jiffies + HZ; |
3711 | cpumask_t tmpmask; | 3699 | cpumask_t tmpmask; |
3712 | 3700 | ||
@@ -4202,7 +4190,6 @@ void account_steal_time(struct task_struct *p, cputime_t steal) | |||
4202 | 4190 | ||
4203 | if (p == rq->idle) { | 4191 | if (p == rq->idle) { |
4204 | p->stime = cputime_add(p->stime, steal); | 4192 | p->stime = cputime_add(p->stime, steal); |
4205 | account_group_system_time(p, steal); | ||
4206 | if (atomic_read(&rq->nr_iowait) > 0) | 4193 | if (atomic_read(&rq->nr_iowait) > 0) |
4207 | cpustat->iowait = cputime64_add(cpustat->iowait, tmp); | 4194 | cpustat->iowait = cputime64_add(cpustat->iowait, tmp); |
4208 | else | 4195 | else |
@@ -4338,7 +4325,7 @@ void __kprobes sub_preempt_count(int val) | |||
4338 | /* | 4325 | /* |
4339 | * Underflow? | 4326 | * Underflow? |
4340 | */ | 4327 | */ |
4341 | if (DEBUG_LOCKS_WARN_ON(val > preempt_count())) | 4328 | if (DEBUG_LOCKS_WARN_ON(val > preempt_count() - (!!kernel_locked()))) |
4342 | return; | 4329 | return; |
4343 | /* | 4330 | /* |
4344 | * Is the spinlock portion underflowing? | 4331 | * Is the spinlock portion underflowing? |
@@ -5133,6 +5120,22 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) | |||
5133 | set_load_weight(p); | 5120 | set_load_weight(p); |
5134 | } | 5121 | } |
5135 | 5122 | ||
5123 | /* | ||
5124 | * check the target process has a UID that matches the current process's | ||
5125 | */ | ||
5126 | static bool check_same_owner(struct task_struct *p) | ||
5127 | { | ||
5128 | const struct cred *cred = current_cred(), *pcred; | ||
5129 | bool match; | ||
5130 | |||
5131 | rcu_read_lock(); | ||
5132 | pcred = __task_cred(p); | ||
5133 | match = (cred->euid == pcred->euid || | ||
5134 | cred->euid == pcred->uid); | ||
5135 | rcu_read_unlock(); | ||
5136 | return match; | ||
5137 | } | ||
5138 | |||
5136 | static int __sched_setscheduler(struct task_struct *p, int policy, | 5139 | static int __sched_setscheduler(struct task_struct *p, int policy, |
5137 | struct sched_param *param, bool user) | 5140 | struct sched_param *param, bool user) |
5138 | { | 5141 | { |
@@ -5192,8 +5195,7 @@ recheck: | |||
5192 | return -EPERM; | 5195 | return -EPERM; |
5193 | 5196 | ||
5194 | /* can't change other user's priorities */ | 5197 | /* can't change other user's priorities */ |
5195 | if ((current->euid != p->euid) && | 5198 | if (!check_same_owner(p)) |
5196 | (current->euid != p->uid)) | ||
5197 | return -EPERM; | 5199 | return -EPERM; |
5198 | } | 5200 | } |
5199 | 5201 | ||
@@ -5425,8 +5427,7 @@ long sched_setaffinity(pid_t pid, const cpumask_t *in_mask) | |||
5425 | read_unlock(&tasklist_lock); | 5427 | read_unlock(&tasklist_lock); |
5426 | 5428 | ||
5427 | retval = -EPERM; | 5429 | retval = -EPERM; |
5428 | if ((current->euid != p->euid) && (current->euid != p->uid) && | 5430 | if (!check_same_owner(p) && !capable(CAP_SYS_NICE)) |
5429 | !capable(CAP_SYS_NICE)) | ||
5430 | goto out_unlock; | 5431 | goto out_unlock; |
5431 | 5432 | ||
5432 | retval = security_task_setscheduler(p, 0, NULL); | 5433 | retval = security_task_setscheduler(p, 0, NULL); |
@@ -5895,6 +5896,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) | |||
5895 | * The idle tasks have their own, simple scheduling class: | 5896 | * The idle tasks have their own, simple scheduling class: |
5896 | */ | 5897 | */ |
5897 | idle->sched_class = &idle_sched_class; | 5898 | idle->sched_class = &idle_sched_class; |
5899 | ftrace_graph_init_task(idle); | ||
5898 | } | 5900 | } |
5899 | 5901 | ||
5900 | /* | 5902 | /* |
@@ -6125,7 +6127,6 @@ static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu) | |||
6125 | 6127 | ||
6126 | /* | 6128 | /* |
6127 | * Figure out where task on dead CPU should go, use force if necessary. | 6129 | * Figure out where task on dead CPU should go, use force if necessary. |
6128 | * NOTE: interrupts should be disabled by the caller | ||
6129 | */ | 6130 | */ |
6130 | static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) | 6131 | static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) |
6131 | { | 6132 | { |
@@ -6586,7 +6587,9 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
6586 | req = list_entry(rq->migration_queue.next, | 6587 | req = list_entry(rq->migration_queue.next, |
6587 | struct migration_req, list); | 6588 | struct migration_req, list); |
6588 | list_del_init(&req->list); | 6589 | list_del_init(&req->list); |
6590 | spin_unlock_irq(&rq->lock); | ||
6589 | complete(&req->done); | 6591 | complete(&req->done); |
6592 | spin_lock_irq(&rq->lock); | ||
6590 | } | 6593 | } |
6591 | spin_unlock_irq(&rq->lock); | 6594 | spin_unlock_irq(&rq->lock); |
6592 | break; | 6595 | break; |
@@ -6635,28 +6638,6 @@ early_initcall(migration_init); | |||
6635 | 6638 | ||
6636 | #ifdef CONFIG_SCHED_DEBUG | 6639 | #ifdef CONFIG_SCHED_DEBUG |
6637 | 6640 | ||
6638 | static inline const char *sd_level_to_string(enum sched_domain_level lvl) | ||
6639 | { | ||
6640 | switch (lvl) { | ||
6641 | case SD_LV_NONE: | ||
6642 | return "NONE"; | ||
6643 | case SD_LV_SIBLING: | ||
6644 | return "SIBLING"; | ||
6645 | case SD_LV_MC: | ||
6646 | return "MC"; | ||
6647 | case SD_LV_CPU: | ||
6648 | return "CPU"; | ||
6649 | case SD_LV_NODE: | ||
6650 | return "NODE"; | ||
6651 | case SD_LV_ALLNODES: | ||
6652 | return "ALLNODES"; | ||
6653 | case SD_LV_MAX: | ||
6654 | return "MAX"; | ||
6655 | |||
6656 | } | ||
6657 | return "MAX"; | ||
6658 | } | ||
6659 | |||
6660 | static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | 6641 | static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, |
6661 | cpumask_t *groupmask) | 6642 | cpumask_t *groupmask) |
6662 | { | 6643 | { |
@@ -6676,8 +6657,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | |||
6676 | return -1; | 6657 | return -1; |
6677 | } | 6658 | } |
6678 | 6659 | ||
6679 | printk(KERN_CONT "span %s level %s\n", | 6660 | printk(KERN_CONT "span %s level %s\n", str, sd->name); |
6680 | str, sd_level_to_string(sd->level)); | ||
6681 | 6661 | ||
6682 | if (!cpu_isset(cpu, sd->span)) { | 6662 | if (!cpu_isset(cpu, sd->span)) { |
6683 | printk(KERN_ERR "ERROR: domain->span does not contain " | 6663 | printk(KERN_ERR "ERROR: domain->span does not contain " |
@@ -6813,6 +6793,8 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) | |||
6813 | SD_BALANCE_EXEC | | 6793 | SD_BALANCE_EXEC | |
6814 | SD_SHARE_CPUPOWER | | 6794 | SD_SHARE_CPUPOWER | |
6815 | SD_SHARE_PKG_RESOURCES); | 6795 | SD_SHARE_PKG_RESOURCES); |
6796 | if (nr_node_ids == 1) | ||
6797 | pflags &= ~SD_SERIALIZE; | ||
6816 | } | 6798 | } |
6817 | if (~cflags & pflags) | 6799 | if (~cflags & pflags) |
6818 | return 0; | 6800 | return 0; |
@@ -7333,13 +7315,21 @@ struct allmasks { | |||
7333 | }; | 7315 | }; |
7334 | 7316 | ||
7335 | #if NR_CPUS > 128 | 7317 | #if NR_CPUS > 128 |
7336 | #define SCHED_CPUMASK_ALLOC 1 | 7318 | #define SCHED_CPUMASK_DECLARE(v) struct allmasks *v |
7337 | #define SCHED_CPUMASK_FREE(v) kfree(v) | 7319 | static inline void sched_cpumask_alloc(struct allmasks **masks) |
7338 | #define SCHED_CPUMASK_DECLARE(v) struct allmasks *v | 7320 | { |
7321 | *masks = kmalloc(sizeof(**masks), GFP_KERNEL); | ||
7322 | } | ||
7323 | static inline void sched_cpumask_free(struct allmasks *masks) | ||
7324 | { | ||
7325 | kfree(masks); | ||
7326 | } | ||
7339 | #else | 7327 | #else |
7340 | #define SCHED_CPUMASK_ALLOC 0 | 7328 | #define SCHED_CPUMASK_DECLARE(v) struct allmasks _v, *v = &_v |
7341 | #define SCHED_CPUMASK_FREE(v) | 7329 | static inline void sched_cpumask_alloc(struct allmasks **masks) |
7342 | #define SCHED_CPUMASK_DECLARE(v) struct allmasks _v, *v = &_v | 7330 | { } |
7331 | static inline void sched_cpumask_free(struct allmasks *masks) | ||
7332 | { } | ||
7343 | #endif | 7333 | #endif |
7344 | 7334 | ||
7345 | #define SCHED_CPUMASK_VAR(v, a) cpumask_t *v = (cpumask_t *) \ | 7335 | #define SCHED_CPUMASK_VAR(v, a) cpumask_t *v = (cpumask_t *) \ |
@@ -7415,9 +7405,8 @@ static int __build_sched_domains(const cpumask_t *cpu_map, | |||
7415 | return -ENOMEM; | 7405 | return -ENOMEM; |
7416 | } | 7406 | } |
7417 | 7407 | ||
7418 | #if SCHED_CPUMASK_ALLOC | ||
7419 | /* get space for all scratch cpumask variables */ | 7408 | /* get space for all scratch cpumask variables */ |
7420 | allmasks = kmalloc(sizeof(*allmasks), GFP_KERNEL); | 7409 | sched_cpumask_alloc(&allmasks); |
7421 | if (!allmasks) { | 7410 | if (!allmasks) { |
7422 | printk(KERN_WARNING "Cannot alloc cpumask array\n"); | 7411 | printk(KERN_WARNING "Cannot alloc cpumask array\n"); |
7423 | kfree(rd); | 7412 | kfree(rd); |
@@ -7426,7 +7415,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map, | |||
7426 | #endif | 7415 | #endif |
7427 | return -ENOMEM; | 7416 | return -ENOMEM; |
7428 | } | 7417 | } |
7429 | #endif | 7418 | |
7430 | tmpmask = (cpumask_t *)allmasks; | 7419 | tmpmask = (cpumask_t *)allmasks; |
7431 | 7420 | ||
7432 | 7421 | ||
@@ -7680,13 +7669,13 @@ static int __build_sched_domains(const cpumask_t *cpu_map, | |||
7680 | cpu_attach_domain(sd, rd, i); | 7669 | cpu_attach_domain(sd, rd, i); |
7681 | } | 7670 | } |
7682 | 7671 | ||
7683 | SCHED_CPUMASK_FREE((void *)allmasks); | 7672 | sched_cpumask_free(allmasks); |
7684 | return 0; | 7673 | return 0; |
7685 | 7674 | ||
7686 | #ifdef CONFIG_NUMA | 7675 | #ifdef CONFIG_NUMA |
7687 | error: | 7676 | error: |
7688 | free_sched_groups(cpu_map, tmpmask); | 7677 | free_sched_groups(cpu_map, tmpmask); |
7689 | SCHED_CPUMASK_FREE((void *)allmasks); | 7678 | sched_cpumask_free(allmasks); |
7690 | kfree(rd); | 7679 | kfree(rd); |
7691 | return -ENOMEM; | 7680 | return -ENOMEM; |
7692 | #endif | 7681 | #endif |
@@ -7709,8 +7698,14 @@ static struct sched_domain_attr *dattr_cur; | |||
7709 | */ | 7698 | */ |
7710 | static cpumask_t fallback_doms; | 7699 | static cpumask_t fallback_doms; |
7711 | 7700 | ||
7712 | void __attribute__((weak)) arch_update_cpu_topology(void) | 7701 | /* |
7702 | * arch_update_cpu_topology lets virtualized architectures update the | ||
7703 | * cpu core maps. It is supposed to return 1 if the topology changed | ||
7704 | * or 0 if it stayed the same. | ||
7705 | */ | ||
7706 | int __attribute__((weak)) arch_update_cpu_topology(void) | ||
7713 | { | 7707 | { |
7708 | return 0; | ||
7714 | } | 7709 | } |
7715 | 7710 | ||
7716 | /* | 7711 | /* |
@@ -7750,8 +7745,6 @@ static void detach_destroy_domains(const cpumask_t *cpu_map) | |||
7750 | cpumask_t tmpmask; | 7745 | cpumask_t tmpmask; |
7751 | int i; | 7746 | int i; |
7752 | 7747 | ||
7753 | unregister_sched_domain_sysctl(); | ||
7754 | |||
7755 | for_each_cpu_mask_nr(i, *cpu_map) | 7748 | for_each_cpu_mask_nr(i, *cpu_map) |
7756 | cpu_attach_domain(NULL, &def_root_domain, i); | 7749 | cpu_attach_domain(NULL, &def_root_domain, i); |
7757 | synchronize_sched(); | 7750 | synchronize_sched(); |
@@ -7804,17 +7797,21 @@ void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, | |||
7804 | struct sched_domain_attr *dattr_new) | 7797 | struct sched_domain_attr *dattr_new) |
7805 | { | 7798 | { |
7806 | int i, j, n; | 7799 | int i, j, n; |
7800 | int new_topology; | ||
7807 | 7801 | ||
7808 | mutex_lock(&sched_domains_mutex); | 7802 | mutex_lock(&sched_domains_mutex); |
7809 | 7803 | ||
7810 | /* always unregister in case we don't destroy any domains */ | 7804 | /* always unregister in case we don't destroy any domains */ |
7811 | unregister_sched_domain_sysctl(); | 7805 | unregister_sched_domain_sysctl(); |
7812 | 7806 | ||
7807 | /* Let architecture update cpu core mappings. */ | ||
7808 | new_topology = arch_update_cpu_topology(); | ||
7809 | |||
7813 | n = doms_new ? ndoms_new : 0; | 7810 | n = doms_new ? ndoms_new : 0; |
7814 | 7811 | ||
7815 | /* Destroy deleted domains */ | 7812 | /* Destroy deleted domains */ |
7816 | for (i = 0; i < ndoms_cur; i++) { | 7813 | for (i = 0; i < ndoms_cur; i++) { |
7817 | for (j = 0; j < n; j++) { | 7814 | for (j = 0; j < n && !new_topology; j++) { |
7818 | if (cpus_equal(doms_cur[i], doms_new[j]) | 7815 | if (cpus_equal(doms_cur[i], doms_new[j]) |
7819 | && dattrs_equal(dattr_cur, i, dattr_new, j)) | 7816 | && dattrs_equal(dattr_cur, i, dattr_new, j)) |
7820 | goto match1; | 7817 | goto match1; |
@@ -7829,12 +7826,12 @@ match1: | |||
7829 | ndoms_cur = 0; | 7826 | ndoms_cur = 0; |
7830 | doms_new = &fallback_doms; | 7827 | doms_new = &fallback_doms; |
7831 | cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map); | 7828 | cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map); |
7832 | dattr_new = NULL; | 7829 | WARN_ON_ONCE(dattr_new); |
7833 | } | 7830 | } |
7834 | 7831 | ||
7835 | /* Build new domains */ | 7832 | /* Build new domains */ |
7836 | for (i = 0; i < ndoms_new; i++) { | 7833 | for (i = 0; i < ndoms_new; i++) { |
7837 | for (j = 0; j < ndoms_cur; j++) { | 7834 | for (j = 0; j < ndoms_cur && !new_topology; j++) { |
7838 | if (cpus_equal(doms_new[i], doms_cur[j]) | 7835 | if (cpus_equal(doms_new[i], doms_cur[j]) |
7839 | && dattrs_equal(dattr_new, i, dattr_cur, j)) | 7836 | && dattrs_equal(dattr_new, i, dattr_cur, j)) |
7840 | goto match2; | 7837 | goto match2; |
@@ -8489,7 +8486,7 @@ static | |||
8489 | int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) | 8486 | int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) |
8490 | { | 8487 | { |
8491 | struct cfs_rq *cfs_rq; | 8488 | struct cfs_rq *cfs_rq; |
8492 | struct sched_entity *se, *parent_se; | 8489 | struct sched_entity *se; |
8493 | struct rq *rq; | 8490 | struct rq *rq; |
8494 | int i; | 8491 | int i; |
8495 | 8492 | ||
@@ -8505,18 +8502,17 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) | |||
8505 | for_each_possible_cpu(i) { | 8502 | for_each_possible_cpu(i) { |
8506 | rq = cpu_rq(i); | 8503 | rq = cpu_rq(i); |
8507 | 8504 | ||
8508 | cfs_rq = kmalloc_node(sizeof(struct cfs_rq), | 8505 | cfs_rq = kzalloc_node(sizeof(struct cfs_rq), |
8509 | GFP_KERNEL|__GFP_ZERO, cpu_to_node(i)); | 8506 | GFP_KERNEL, cpu_to_node(i)); |
8510 | if (!cfs_rq) | 8507 | if (!cfs_rq) |
8511 | goto err; | 8508 | goto err; |
8512 | 8509 | ||
8513 | se = kmalloc_node(sizeof(struct sched_entity), | 8510 | se = kzalloc_node(sizeof(struct sched_entity), |
8514 | GFP_KERNEL|__GFP_ZERO, cpu_to_node(i)); | 8511 | GFP_KERNEL, cpu_to_node(i)); |
8515 | if (!se) | 8512 | if (!se) |
8516 | goto err; | 8513 | goto err; |
8517 | 8514 | ||
8518 | parent_se = parent ? parent->se[i] : NULL; | 8515 | init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]); |
8519 | init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent_se); | ||
8520 | } | 8516 | } |
8521 | 8517 | ||
8522 | return 1; | 8518 | return 1; |
@@ -8577,7 +8573,7 @@ static | |||
8577 | int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) | 8573 | int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) |
8578 | { | 8574 | { |
8579 | struct rt_rq *rt_rq; | 8575 | struct rt_rq *rt_rq; |
8580 | struct sched_rt_entity *rt_se, *parent_se; | 8576 | struct sched_rt_entity *rt_se; |
8581 | struct rq *rq; | 8577 | struct rq *rq; |
8582 | int i; | 8578 | int i; |
8583 | 8579 | ||
@@ -8594,18 +8590,17 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) | |||
8594 | for_each_possible_cpu(i) { | 8590 | for_each_possible_cpu(i) { |
8595 | rq = cpu_rq(i); | 8591 | rq = cpu_rq(i); |
8596 | 8592 | ||
8597 | rt_rq = kmalloc_node(sizeof(struct rt_rq), | 8593 | rt_rq = kzalloc_node(sizeof(struct rt_rq), |
8598 | GFP_KERNEL|__GFP_ZERO, cpu_to_node(i)); | 8594 | GFP_KERNEL, cpu_to_node(i)); |
8599 | if (!rt_rq) | 8595 | if (!rt_rq) |
8600 | goto err; | 8596 | goto err; |
8601 | 8597 | ||
8602 | rt_se = kmalloc_node(sizeof(struct sched_rt_entity), | 8598 | rt_se = kzalloc_node(sizeof(struct sched_rt_entity), |
8603 | GFP_KERNEL|__GFP_ZERO, cpu_to_node(i)); | 8599 | GFP_KERNEL, cpu_to_node(i)); |
8604 | if (!rt_se) | 8600 | if (!rt_se) |
8605 | goto err; | 8601 | goto err; |
8606 | 8602 | ||
8607 | parent_se = parent ? parent->rt_se[i] : NULL; | 8603 | init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]); |
8608 | init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent_se); | ||
8609 | } | 8604 | } |
8610 | 8605 | ||
8611 | return 1; | 8606 | return 1; |
@@ -9248,11 +9243,12 @@ struct cgroup_subsys cpu_cgroup_subsys = { | |||
9248 | * (balbir@in.ibm.com). | 9243 | * (balbir@in.ibm.com). |
9249 | */ | 9244 | */ |
9250 | 9245 | ||
9251 | /* track cpu usage of a group of tasks */ | 9246 | /* track cpu usage of a group of tasks and its child groups */ |
9252 | struct cpuacct { | 9247 | struct cpuacct { |
9253 | struct cgroup_subsys_state css; | 9248 | struct cgroup_subsys_state css; |
9254 | /* cpuusage holds pointer to a u64-type object on every cpu */ | 9249 | /* cpuusage holds pointer to a u64-type object on every cpu */ |
9255 | u64 *cpuusage; | 9250 | u64 *cpuusage; |
9251 | struct cpuacct *parent; | ||
9256 | }; | 9252 | }; |
9257 | 9253 | ||
9258 | struct cgroup_subsys cpuacct_subsys; | 9254 | struct cgroup_subsys cpuacct_subsys; |
@@ -9286,6 +9282,9 @@ static struct cgroup_subsys_state *cpuacct_create( | |||
9286 | return ERR_PTR(-ENOMEM); | 9282 | return ERR_PTR(-ENOMEM); |
9287 | } | 9283 | } |
9288 | 9284 | ||
9285 | if (cgrp->parent) | ||
9286 | ca->parent = cgroup_ca(cgrp->parent); | ||
9287 | |||
9289 | return &ca->css; | 9288 | return &ca->css; |
9290 | } | 9289 | } |
9291 | 9290 | ||
@@ -9299,6 +9298,41 @@ cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) | |||
9299 | kfree(ca); | 9298 | kfree(ca); |
9300 | } | 9299 | } |
9301 | 9300 | ||
9301 | static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu) | ||
9302 | { | ||
9303 | u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu); | ||
9304 | u64 data; | ||
9305 | |||
9306 | #ifndef CONFIG_64BIT | ||
9307 | /* | ||
9308 | * Take rq->lock to make 64-bit read safe on 32-bit platforms. | ||
9309 | */ | ||
9310 | spin_lock_irq(&cpu_rq(cpu)->lock); | ||
9311 | data = *cpuusage; | ||
9312 | spin_unlock_irq(&cpu_rq(cpu)->lock); | ||
9313 | #else | ||
9314 | data = *cpuusage; | ||
9315 | #endif | ||
9316 | |||
9317 | return data; | ||
9318 | } | ||
9319 | |||
9320 | static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val) | ||
9321 | { | ||
9322 | u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu); | ||
9323 | |||
9324 | #ifndef CONFIG_64BIT | ||
9325 | /* | ||
9326 | * Take rq->lock to make 64-bit write safe on 32-bit platforms. | ||
9327 | */ | ||
9328 | spin_lock_irq(&cpu_rq(cpu)->lock); | ||
9329 | *cpuusage = val; | ||
9330 | spin_unlock_irq(&cpu_rq(cpu)->lock); | ||
9331 | #else | ||
9332 | *cpuusage = val; | ||
9333 | #endif | ||
9334 | } | ||
9335 | |||
9302 | /* return total cpu usage (in nanoseconds) of a group */ | 9336 | /* return total cpu usage (in nanoseconds) of a group */ |
9303 | static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft) | 9337 | static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft) |
9304 | { | 9338 | { |
@@ -9306,17 +9340,8 @@ static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft) | |||
9306 | u64 totalcpuusage = 0; | 9340 | u64 totalcpuusage = 0; |
9307 | int i; | 9341 | int i; |
9308 | 9342 | ||
9309 | for_each_possible_cpu(i) { | 9343 | for_each_present_cpu(i) |
9310 | u64 *cpuusage = percpu_ptr(ca->cpuusage, i); | 9344 | totalcpuusage += cpuacct_cpuusage_read(ca, i); |
9311 | |||
9312 | /* | ||
9313 | * Take rq->lock to make 64-bit addition safe on 32-bit | ||
9314 | * platforms. | ||
9315 | */ | ||
9316 | spin_lock_irq(&cpu_rq(i)->lock); | ||
9317 | totalcpuusage += *cpuusage; | ||
9318 | spin_unlock_irq(&cpu_rq(i)->lock); | ||
9319 | } | ||
9320 | 9345 | ||
9321 | return totalcpuusage; | 9346 | return totalcpuusage; |
9322 | } | 9347 | } |
@@ -9333,23 +9358,39 @@ static int cpuusage_write(struct cgroup *cgrp, struct cftype *cftype, | |||
9333 | goto out; | 9358 | goto out; |
9334 | } | 9359 | } |
9335 | 9360 | ||
9336 | for_each_possible_cpu(i) { | 9361 | for_each_present_cpu(i) |
9337 | u64 *cpuusage = percpu_ptr(ca->cpuusage, i); | 9362 | cpuacct_cpuusage_write(ca, i, 0); |
9338 | 9363 | ||
9339 | spin_lock_irq(&cpu_rq(i)->lock); | ||
9340 | *cpuusage = 0; | ||
9341 | spin_unlock_irq(&cpu_rq(i)->lock); | ||
9342 | } | ||
9343 | out: | 9364 | out: |
9344 | return err; | 9365 | return err; |
9345 | } | 9366 | } |
9346 | 9367 | ||
9368 | static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft, | ||
9369 | struct seq_file *m) | ||
9370 | { | ||
9371 | struct cpuacct *ca = cgroup_ca(cgroup); | ||
9372 | u64 percpu; | ||
9373 | int i; | ||
9374 | |||
9375 | for_each_present_cpu(i) { | ||
9376 | percpu = cpuacct_cpuusage_read(ca, i); | ||
9377 | seq_printf(m, "%llu ", (unsigned long long) percpu); | ||
9378 | } | ||
9379 | seq_printf(m, "\n"); | ||
9380 | return 0; | ||
9381 | } | ||
9382 | |||
9347 | static struct cftype files[] = { | 9383 | static struct cftype files[] = { |
9348 | { | 9384 | { |
9349 | .name = "usage", | 9385 | .name = "usage", |
9350 | .read_u64 = cpuusage_read, | 9386 | .read_u64 = cpuusage_read, |
9351 | .write_u64 = cpuusage_write, | 9387 | .write_u64 = cpuusage_write, |
9352 | }, | 9388 | }, |
9389 | { | ||
9390 | .name = "usage_percpu", | ||
9391 | .read_seq_string = cpuacct_percpu_seq_read, | ||
9392 | }, | ||
9393 | |||
9353 | }; | 9394 | }; |
9354 | 9395 | ||
9355 | static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) | 9396 | static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) |
@@ -9365,14 +9406,16 @@ static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) | |||
9365 | static void cpuacct_charge(struct task_struct *tsk, u64 cputime) | 9406 | static void cpuacct_charge(struct task_struct *tsk, u64 cputime) |
9366 | { | 9407 | { |
9367 | struct cpuacct *ca; | 9408 | struct cpuacct *ca; |
9409 | int cpu; | ||
9368 | 9410 | ||
9369 | if (!cpuacct_subsys.active) | 9411 | if (!cpuacct_subsys.active) |
9370 | return; | 9412 | return; |
9371 | 9413 | ||
9414 | cpu = task_cpu(tsk); | ||
9372 | ca = task_ca(tsk); | 9415 | ca = task_ca(tsk); |
9373 | if (ca) { | ||
9374 | u64 *cpuusage = percpu_ptr(ca->cpuusage, task_cpu(tsk)); | ||
9375 | 9416 | ||
9417 | for (; ca; ca = ca->parent) { | ||
9418 | u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu); | ||
9376 | *cpuusage += cputime; | 9419 | *cpuusage += cputime; |
9377 | } | 9420 | } |
9378 | } | 9421 | } |