aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c231
1 files changed, 187 insertions, 44 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 73513f4e19df..26efa475bdc1 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -231,13 +231,20 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
231 231
232 spin_lock(&rt_b->rt_runtime_lock); 232 spin_lock(&rt_b->rt_runtime_lock);
233 for (;;) { 233 for (;;) {
234 unsigned long delta;
235 ktime_t soft, hard;
236
234 if (hrtimer_active(&rt_b->rt_period_timer)) 237 if (hrtimer_active(&rt_b->rt_period_timer))
235 break; 238 break;
236 239
237 now = hrtimer_cb_get_time(&rt_b->rt_period_timer); 240 now = hrtimer_cb_get_time(&rt_b->rt_period_timer);
238 hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period); 241 hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period);
239 hrtimer_start_expires(&rt_b->rt_period_timer, 242
240 HRTIMER_MODE_ABS); 243 soft = hrtimer_get_softexpires(&rt_b->rt_period_timer);
244 hard = hrtimer_get_expires(&rt_b->rt_period_timer);
245 delta = ktime_to_ns(ktime_sub(hard, soft));
246 __hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta,
247 HRTIMER_MODE_ABS, 0);
241 } 248 }
242 spin_unlock(&rt_b->rt_runtime_lock); 249 spin_unlock(&rt_b->rt_runtime_lock);
243} 250}
@@ -1110,7 +1117,7 @@ static void hrtick_start(struct rq *rq, u64 delay)
1110 if (rq == this_rq()) { 1117 if (rq == this_rq()) {
1111 hrtimer_restart(timer); 1118 hrtimer_restart(timer);
1112 } else if (!rq->hrtick_csd_pending) { 1119 } else if (!rq->hrtick_csd_pending) {
1113 __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd); 1120 __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd, 0);
1114 rq->hrtick_csd_pending = 1; 1121 rq->hrtick_csd_pending = 1;
1115 } 1122 }
1116} 1123}
@@ -1146,7 +1153,8 @@ static __init void init_hrtick(void)
1146 */ 1153 */
1147static void hrtick_start(struct rq *rq, u64 delay) 1154static void hrtick_start(struct rq *rq, u64 delay)
1148{ 1155{
1149 hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), HRTIMER_MODE_REL); 1156 __hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0,
1157 HRTIMER_MODE_REL, 0);
1150} 1158}
1151 1159
1152static inline void init_hrtick(void) 1160static inline void init_hrtick(void)
@@ -1410,10 +1418,22 @@ iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
1410 struct rq_iterator *iterator); 1418 struct rq_iterator *iterator);
1411#endif 1419#endif
1412 1420
1421/* Time spent by the tasks of the cpu accounting group executing in ... */
1422enum cpuacct_stat_index {
1423 CPUACCT_STAT_USER, /* ... user mode */
1424 CPUACCT_STAT_SYSTEM, /* ... kernel mode */
1425
1426 CPUACCT_STAT_NSTATS,
1427};
1428
1413#ifdef CONFIG_CGROUP_CPUACCT 1429#ifdef CONFIG_CGROUP_CPUACCT
1414static void cpuacct_charge(struct task_struct *tsk, u64 cputime); 1430static void cpuacct_charge(struct task_struct *tsk, u64 cputime);
1431static void cpuacct_update_stats(struct task_struct *tsk,
1432 enum cpuacct_stat_index idx, cputime_t val);
1415#else 1433#else
1416static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {} 1434static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
1435static inline void cpuacct_update_stats(struct task_struct *tsk,
1436 enum cpuacct_stat_index idx, cputime_t val) {}
1417#endif 1437#endif
1418 1438
1419static inline void inc_cpu_load(struct rq *rq, unsigned long load) 1439static inline void inc_cpu_load(struct rq *rq, unsigned long load)
@@ -3818,19 +3838,23 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
3818 */ 3838 */
3819#define MAX_PINNED_INTERVAL 512 3839#define MAX_PINNED_INTERVAL 512
3820 3840
3841/* Working cpumask for load_balance and load_balance_newidle. */
3842static DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask);
3843
3821/* 3844/*
3822 * Check this_cpu to ensure it is balanced within domain. Attempt to move 3845 * Check this_cpu to ensure it is balanced within domain. Attempt to move
3823 * tasks if there is an imbalance. 3846 * tasks if there is an imbalance.
3824 */ 3847 */
3825static int load_balance(int this_cpu, struct rq *this_rq, 3848static int load_balance(int this_cpu, struct rq *this_rq,
3826 struct sched_domain *sd, enum cpu_idle_type idle, 3849 struct sched_domain *sd, enum cpu_idle_type idle,
3827 int *balance, struct cpumask *cpus) 3850 int *balance)
3828{ 3851{
3829 int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; 3852 int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
3830 struct sched_group *group; 3853 struct sched_group *group;
3831 unsigned long imbalance; 3854 unsigned long imbalance;
3832 struct rq *busiest; 3855 struct rq *busiest;
3833 unsigned long flags; 3856 unsigned long flags;
3857 struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
3834 3858
3835 cpumask_setall(cpus); 3859 cpumask_setall(cpus);
3836 3860
@@ -3985,8 +4009,7 @@ out:
3985 * this_rq is locked. 4009 * this_rq is locked.
3986 */ 4010 */
3987static int 4011static int
3988load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd, 4012load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
3989 struct cpumask *cpus)
3990{ 4013{
3991 struct sched_group *group; 4014 struct sched_group *group;
3992 struct rq *busiest = NULL; 4015 struct rq *busiest = NULL;
@@ -3994,6 +4017,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
3994 int ld_moved = 0; 4017 int ld_moved = 0;
3995 int sd_idle = 0; 4018 int sd_idle = 0;
3996 int all_pinned = 0; 4019 int all_pinned = 0;
4020 struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
3997 4021
3998 cpumask_setall(cpus); 4022 cpumask_setall(cpus);
3999 4023
@@ -4134,10 +4158,6 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
4134 struct sched_domain *sd; 4158 struct sched_domain *sd;
4135 int pulled_task = 0; 4159 int pulled_task = 0;
4136 unsigned long next_balance = jiffies + HZ; 4160 unsigned long next_balance = jiffies + HZ;
4137 cpumask_var_t tmpmask;
4138
4139 if (!alloc_cpumask_var(&tmpmask, GFP_ATOMIC))
4140 return;
4141 4161
4142 for_each_domain(this_cpu, sd) { 4162 for_each_domain(this_cpu, sd) {
4143 unsigned long interval; 4163 unsigned long interval;
@@ -4148,7 +4168,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
4148 if (sd->flags & SD_BALANCE_NEWIDLE) 4168 if (sd->flags & SD_BALANCE_NEWIDLE)
4149 /* If we've pulled tasks over stop searching: */ 4169 /* If we've pulled tasks over stop searching: */
4150 pulled_task = load_balance_newidle(this_cpu, this_rq, 4170 pulled_task = load_balance_newidle(this_cpu, this_rq,
4151 sd, tmpmask); 4171 sd);
4152 4172
4153 interval = msecs_to_jiffies(sd->balance_interval); 4173 interval = msecs_to_jiffies(sd->balance_interval);
4154 if (time_after(next_balance, sd->last_balance + interval)) 4174 if (time_after(next_balance, sd->last_balance + interval))
@@ -4163,7 +4183,6 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
4163 */ 4183 */
4164 this_rq->next_balance = next_balance; 4184 this_rq->next_balance = next_balance;
4165 } 4185 }
4166 free_cpumask_var(tmpmask);
4167} 4186}
4168 4187
4169/* 4188/*
@@ -4313,11 +4332,6 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
4313 unsigned long next_balance = jiffies + 60*HZ; 4332 unsigned long next_balance = jiffies + 60*HZ;
4314 int update_next_balance = 0; 4333 int update_next_balance = 0;
4315 int need_serialize; 4334 int need_serialize;
4316 cpumask_var_t tmp;
4317
4318 /* Fails alloc? Rebalancing probably not a priority right now. */
4319 if (!alloc_cpumask_var(&tmp, GFP_ATOMIC))
4320 return;
4321 4335
4322 for_each_domain(cpu, sd) { 4336 for_each_domain(cpu, sd) {
4323 if (!(sd->flags & SD_LOAD_BALANCE)) 4337 if (!(sd->flags & SD_LOAD_BALANCE))
@@ -4342,7 +4356,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
4342 } 4356 }
4343 4357
4344 if (time_after_eq(jiffies, sd->last_balance + interval)) { 4358 if (time_after_eq(jiffies, sd->last_balance + interval)) {
4345 if (load_balance(cpu, rq, sd, idle, &balance, tmp)) { 4359 if (load_balance(cpu, rq, sd, idle, &balance)) {
4346 /* 4360 /*
4347 * We've pulled tasks over so either we're no 4361 * We've pulled tasks over so either we're no
4348 * longer idle, or one of our SMT siblings is 4362 * longer idle, or one of our SMT siblings is
@@ -4376,8 +4390,6 @@ out:
4376 */ 4390 */
4377 if (likely(update_next_balance)) 4391 if (likely(update_next_balance))
4378 rq->next_balance = next_balance; 4392 rq->next_balance = next_balance;
4379
4380 free_cpumask_var(tmp);
4381} 4393}
4382 4394
4383/* 4395/*
@@ -4511,9 +4523,25 @@ DEFINE_PER_CPU(struct kernel_stat, kstat);
4511EXPORT_PER_CPU_SYMBOL(kstat); 4523EXPORT_PER_CPU_SYMBOL(kstat);
4512 4524
4513/* 4525/*
4514 * Return any ns on the sched_clock that have not yet been banked in 4526 * Return any ns on the sched_clock that have not yet been accounted in
4515 * @p in case that task is currently running. 4527 * @p in case that task is currently running.
4528 *
4529 * Called with task_rq_lock() held on @rq.
4516 */ 4530 */
4531static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq)
4532{
4533 u64 ns = 0;
4534
4535 if (task_current(rq, p)) {
4536 update_rq_clock(rq);
4537 ns = rq->clock - p->se.exec_start;
4538 if ((s64)ns < 0)
4539 ns = 0;
4540 }
4541
4542 return ns;
4543}
4544
4517unsigned long long task_delta_exec(struct task_struct *p) 4545unsigned long long task_delta_exec(struct task_struct *p)
4518{ 4546{
4519 unsigned long flags; 4547 unsigned long flags;
@@ -4521,16 +4549,49 @@ unsigned long long task_delta_exec(struct task_struct *p)
4521 u64 ns = 0; 4549 u64 ns = 0;
4522 4550
4523 rq = task_rq_lock(p, &flags); 4551 rq = task_rq_lock(p, &flags);
4552 ns = do_task_delta_exec(p, rq);
4553 task_rq_unlock(rq, &flags);
4524 4554
4525 if (task_current(rq, p)) { 4555 return ns;
4526 u64 delta_exec; 4556}
4527 4557
4528 update_rq_clock(rq); 4558/*
4529 delta_exec = rq->clock - p->se.exec_start; 4559 * Return accounted runtime for the task.
4530 if ((s64)delta_exec > 0) 4560 * In case the task is currently running, return the runtime plus current's
4531 ns = delta_exec; 4561 * pending runtime that have not been accounted yet.
4532 } 4562 */
4563unsigned long long task_sched_runtime(struct task_struct *p)
4564{
4565 unsigned long flags;
4566 struct rq *rq;
4567 u64 ns = 0;
4568
4569 rq = task_rq_lock(p, &flags);
4570 ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq);
4571 task_rq_unlock(rq, &flags);
4572
4573 return ns;
4574}
4533 4575
4576/*
4577 * Return sum_exec_runtime for the thread group.
4578 * In case the task is currently running, return the sum plus current's
4579 * pending runtime that have not been accounted yet.
4580 *
4581 * Note that the thread group might have other running tasks as well,
4582 * so the return value not includes other pending runtime that other
4583 * running tasks might have.
4584 */
4585unsigned long long thread_group_sched_runtime(struct task_struct *p)
4586{
4587 struct task_cputime totals;
4588 unsigned long flags;
4589 struct rq *rq;
4590 u64 ns;
4591
4592 rq = task_rq_lock(p, &flags);
4593 thread_group_cputime(p, &totals);
4594 ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq);
4534 task_rq_unlock(rq, &flags); 4595 task_rq_unlock(rq, &flags);
4535 4596
4536 return ns; 4597 return ns;
@@ -4559,6 +4620,8 @@ void account_user_time(struct task_struct *p, cputime_t cputime,
4559 cpustat->nice = cputime64_add(cpustat->nice, tmp); 4620 cpustat->nice = cputime64_add(cpustat->nice, tmp);
4560 else 4621 else
4561 cpustat->user = cputime64_add(cpustat->user, tmp); 4622 cpustat->user = cputime64_add(cpustat->user, tmp);
4623
4624 cpuacct_update_stats(p, CPUACCT_STAT_USER, cputime);
4562 /* Account for user time used */ 4625 /* Account for user time used */
4563 acct_update_integrals(p); 4626 acct_update_integrals(p);
4564} 4627}
@@ -4620,6 +4683,8 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
4620 else 4683 else
4621 cpustat->system = cputime64_add(cpustat->system, tmp); 4684 cpustat->system = cputime64_add(cpustat->system, tmp);
4622 4685
4686 cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime);
4687
4623 /* Account for system time used */ 4688 /* Account for system time used */
4624 acct_update_integrals(p); 4689 acct_update_integrals(p);
4625} 4690}
@@ -4667,7 +4732,7 @@ void account_process_tick(struct task_struct *p, int user_tick)
4667 4732
4668 if (user_tick) 4733 if (user_tick)
4669 account_user_time(p, one_jiffy, one_jiffy_scaled); 4734 account_user_time(p, one_jiffy, one_jiffy_scaled);
4670 else if (p != rq->idle) 4735 else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
4671 account_system_time(p, HARDIRQ_OFFSET, one_jiffy, 4736 account_system_time(p, HARDIRQ_OFFSET, one_jiffy,
4672 one_jiffy_scaled); 4737 one_jiffy_scaled);
4673 else 4738 else
@@ -4781,10 +4846,7 @@ void scheduler_tick(void)
4781#endif 4846#endif
4782} 4847}
4783 4848
4784#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \ 4849notrace unsigned long get_parent_ip(unsigned long addr)
4785 defined(CONFIG_PREEMPT_TRACER))
4786
4787static inline unsigned long get_parent_ip(unsigned long addr)
4788{ 4850{
4789 if (in_lock_functions(addr)) { 4851 if (in_lock_functions(addr)) {
4790 addr = CALLER_ADDR2; 4852 addr = CALLER_ADDR2;
@@ -4794,6 +4856,9 @@ static inline unsigned long get_parent_ip(unsigned long addr)
4794 return addr; 4856 return addr;
4795} 4857}
4796 4858
4859#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
4860 defined(CONFIG_PREEMPT_TRACER))
4861
4797void __kprobes add_preempt_count(int val) 4862void __kprobes add_preempt_count(int val)
4798{ 4863{
4799#ifdef CONFIG_DEBUG_PREEMPT 4864#ifdef CONFIG_DEBUG_PREEMPT
@@ -7302,7 +7367,12 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
7302 cpumask_or(groupmask, groupmask, sched_group_cpus(group)); 7367 cpumask_or(groupmask, groupmask, sched_group_cpus(group));
7303 7368
7304 cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); 7369 cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
7370
7305 printk(KERN_CONT " %s", str); 7371 printk(KERN_CONT " %s", str);
7372 if (group->__cpu_power != SCHED_LOAD_SCALE) {
7373 printk(KERN_CONT " (__cpu_power = %d)",
7374 group->__cpu_power);
7375 }
7306 7376
7307 group = group->next; 7377 group = group->next;
7308 } while (group != sd->groups); 7378 } while (group != sd->groups);
@@ -7728,7 +7798,7 @@ cpu_to_core_group(int cpu, const struct cpumask *cpu_map,
7728{ 7798{
7729 int group; 7799 int group;
7730 7800
7731 cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map); 7801 cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
7732 group = cpumask_first(mask); 7802 group = cpumask_first(mask);
7733 if (sg) 7803 if (sg)
7734 *sg = &per_cpu(sched_group_core, group).sg; 7804 *sg = &per_cpu(sched_group_core, group).sg;
@@ -7757,7 +7827,7 @@ cpu_to_phys_group(int cpu, const struct cpumask *cpu_map,
7757 cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map); 7827 cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map);
7758 group = cpumask_first(mask); 7828 group = cpumask_first(mask);
7759#elif defined(CONFIG_SCHED_SMT) 7829#elif defined(CONFIG_SCHED_SMT)
7760 cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map); 7830 cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
7761 group = cpumask_first(mask); 7831 group = cpumask_first(mask);
7762#else 7832#else
7763 group = cpu; 7833 group = cpu;
@@ -8100,7 +8170,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
8100 SD_INIT(sd, SIBLING); 8170 SD_INIT(sd, SIBLING);
8101 set_domain_attribute(sd, attr); 8171 set_domain_attribute(sd, attr);
8102 cpumask_and(sched_domain_span(sd), 8172 cpumask_and(sched_domain_span(sd),
8103 &per_cpu(cpu_sibling_map, i), cpu_map); 8173 topology_thread_cpumask(i), cpu_map);
8104 sd->parent = p; 8174 sd->parent = p;
8105 p->child = sd; 8175 p->child = sd;
8106 cpu_to_cpu_group(i, cpu_map, &sd->groups, tmpmask); 8176 cpu_to_cpu_group(i, cpu_map, &sd->groups, tmpmask);
@@ -8111,7 +8181,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
8111 /* Set up CPU (sibling) groups */ 8181 /* Set up CPU (sibling) groups */
8112 for_each_cpu(i, cpu_map) { 8182 for_each_cpu(i, cpu_map) {
8113 cpumask_and(this_sibling_map, 8183 cpumask_and(this_sibling_map,
8114 &per_cpu(cpu_sibling_map, i), cpu_map); 8184 topology_thread_cpumask(i), cpu_map);
8115 if (i != cpumask_first(this_sibling_map)) 8185 if (i != cpumask_first(this_sibling_map))
8116 continue; 8186 continue;
8117 8187
@@ -8787,6 +8857,9 @@ void __init sched_init(void)
8787#ifdef CONFIG_USER_SCHED 8857#ifdef CONFIG_USER_SCHED
8788 alloc_size *= 2; 8858 alloc_size *= 2;
8789#endif 8859#endif
8860#ifdef CONFIG_CPUMASK_OFFSTACK
8861 alloc_size += num_possible_cpus() * cpumask_size();
8862#endif
8790 /* 8863 /*
8791 * As sched_init() is called before page_alloc is setup, 8864 * As sched_init() is called before page_alloc is setup,
8792 * we use alloc_bootmem(). 8865 * we use alloc_bootmem().
@@ -8824,6 +8897,12 @@ void __init sched_init(void)
8824 ptr += nr_cpu_ids * sizeof(void **); 8897 ptr += nr_cpu_ids * sizeof(void **);
8825#endif /* CONFIG_USER_SCHED */ 8898#endif /* CONFIG_USER_SCHED */
8826#endif /* CONFIG_RT_GROUP_SCHED */ 8899#endif /* CONFIG_RT_GROUP_SCHED */
8900#ifdef CONFIG_CPUMASK_OFFSTACK
8901 for_each_possible_cpu(i) {
8902 per_cpu(load_balance_tmpmask, i) = (void *)ptr;
8903 ptr += cpumask_size();
8904 }
8905#endif /* CONFIG_CPUMASK_OFFSTACK */
8827 } 8906 }
8828 8907
8829#ifdef CONFIG_SMP 8908#ifdef CONFIG_SMP
@@ -9916,6 +9995,7 @@ struct cpuacct {
9916 struct cgroup_subsys_state css; 9995 struct cgroup_subsys_state css;
9917 /* cpuusage holds pointer to a u64-type object on every cpu */ 9996 /* cpuusage holds pointer to a u64-type object on every cpu */
9918 u64 *cpuusage; 9997 u64 *cpuusage;
9998 struct percpu_counter cpustat[CPUACCT_STAT_NSTATS];
9919 struct cpuacct *parent; 9999 struct cpuacct *parent;
9920}; 10000};
9921 10001
@@ -9940,20 +10020,32 @@ static struct cgroup_subsys_state *cpuacct_create(
9940 struct cgroup_subsys *ss, struct cgroup *cgrp) 10020 struct cgroup_subsys *ss, struct cgroup *cgrp)
9941{ 10021{
9942 struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL); 10022 struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
10023 int i;
9943 10024
9944 if (!ca) 10025 if (!ca)
9945 return ERR_PTR(-ENOMEM); 10026 goto out;
9946 10027
9947 ca->cpuusage = alloc_percpu(u64); 10028 ca->cpuusage = alloc_percpu(u64);
9948 if (!ca->cpuusage) { 10029 if (!ca->cpuusage)
9949 kfree(ca); 10030 goto out_free_ca;
9950 return ERR_PTR(-ENOMEM); 10031
9951 } 10032 for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
10033 if (percpu_counter_init(&ca->cpustat[i], 0))
10034 goto out_free_counters;
9952 10035
9953 if (cgrp->parent) 10036 if (cgrp->parent)
9954 ca->parent = cgroup_ca(cgrp->parent); 10037 ca->parent = cgroup_ca(cgrp->parent);
9955 10038
9956 return &ca->css; 10039 return &ca->css;
10040
10041out_free_counters:
10042 while (--i >= 0)
10043 percpu_counter_destroy(&ca->cpustat[i]);
10044 free_percpu(ca->cpuusage);
10045out_free_ca:
10046 kfree(ca);
10047out:
10048 return ERR_PTR(-ENOMEM);
9957} 10049}
9958 10050
9959/* destroy an existing cpu accounting group */ 10051/* destroy an existing cpu accounting group */
@@ -9961,7 +10053,10 @@ static void
9961cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) 10053cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
9962{ 10054{
9963 struct cpuacct *ca = cgroup_ca(cgrp); 10055 struct cpuacct *ca = cgroup_ca(cgrp);
10056 int i;
9964 10057
10058 for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
10059 percpu_counter_destroy(&ca->cpustat[i]);
9965 free_percpu(ca->cpuusage); 10060 free_percpu(ca->cpuusage);
9966 kfree(ca); 10061 kfree(ca);
9967} 10062}
@@ -10048,6 +10143,25 @@ static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft,
10048 return 0; 10143 return 0;
10049} 10144}
10050 10145
10146static const char *cpuacct_stat_desc[] = {
10147 [CPUACCT_STAT_USER] = "user",
10148 [CPUACCT_STAT_SYSTEM] = "system",
10149};
10150
10151static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft,
10152 struct cgroup_map_cb *cb)
10153{
10154 struct cpuacct *ca = cgroup_ca(cgrp);
10155 int i;
10156
10157 for (i = 0; i < CPUACCT_STAT_NSTATS; i++) {
10158 s64 val = percpu_counter_read(&ca->cpustat[i]);
10159 val = cputime64_to_clock_t(val);
10160 cb->fill(cb, cpuacct_stat_desc[i], val);
10161 }
10162 return 0;
10163}
10164
10051static struct cftype files[] = { 10165static struct cftype files[] = {
10052 { 10166 {
10053 .name = "usage", 10167 .name = "usage",
@@ -10058,7 +10172,10 @@ static struct cftype files[] = {
10058 .name = "usage_percpu", 10172 .name = "usage_percpu",
10059 .read_seq_string = cpuacct_percpu_seq_read, 10173 .read_seq_string = cpuacct_percpu_seq_read,
10060 }, 10174 },
10061 10175 {
10176 .name = "stat",
10177 .read_map = cpuacct_stats_show,
10178 },
10062}; 10179};
10063 10180
10064static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) 10181static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
@@ -10080,12 +10197,38 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
10080 return; 10197 return;
10081 10198
10082 cpu = task_cpu(tsk); 10199 cpu = task_cpu(tsk);
10200
10201 rcu_read_lock();
10202
10083 ca = task_ca(tsk); 10203 ca = task_ca(tsk);
10084 10204
10085 for (; ca; ca = ca->parent) { 10205 for (; ca; ca = ca->parent) {
10086 u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); 10206 u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
10087 *cpuusage += cputime; 10207 *cpuusage += cputime;
10088 } 10208 }
10209
10210 rcu_read_unlock();
10211}
10212
10213/*
10214 * Charge the system/user time to the task's accounting group.
10215 */
10216static void cpuacct_update_stats(struct task_struct *tsk,
10217 enum cpuacct_stat_index idx, cputime_t val)
10218{
10219 struct cpuacct *ca;
10220
10221 if (unlikely(!cpuacct_subsys.active))
10222 return;
10223
10224 rcu_read_lock();
10225 ca = task_ca(tsk);
10226
10227 do {
10228 percpu_counter_add(&ca->cpustat[idx], val);
10229 ca = ca->parent;
10230 } while (ca);
10231 rcu_read_unlock();
10089} 10232}
10090 10233
10091struct cgroup_subsys cpuacct_subsys = { 10234struct cgroup_subsys cpuacct_subsys = {