diff options
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 164 |
1 files changed, 149 insertions, 15 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index a69278eef425..2f600e30dcf0 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -1419,10 +1419,22 @@ iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
1419 | struct rq_iterator *iterator); | 1419 | struct rq_iterator *iterator); |
1420 | #endif | 1420 | #endif |
1421 | 1421 | ||
1422 | /* Time spent by the tasks of the cpu accounting group executing in ... */ | ||
1423 | enum cpuacct_stat_index { | ||
1424 | CPUACCT_STAT_USER, /* ... user mode */ | ||
1425 | CPUACCT_STAT_SYSTEM, /* ... kernel mode */ | ||
1426 | |||
1427 | CPUACCT_STAT_NSTATS, | ||
1428 | }; | ||
1429 | |||
1422 | #ifdef CONFIG_CGROUP_CPUACCT | 1430 | #ifdef CONFIG_CGROUP_CPUACCT |
1423 | static void cpuacct_charge(struct task_struct *tsk, u64 cputime); | 1431 | static void cpuacct_charge(struct task_struct *tsk, u64 cputime); |
1432 | static void cpuacct_update_stats(struct task_struct *tsk, | ||
1433 | enum cpuacct_stat_index idx, cputime_t val); | ||
1424 | #else | 1434 | #else |
1425 | static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {} | 1435 | static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {} |
1436 | static inline void cpuacct_update_stats(struct task_struct *tsk, | ||
1437 | enum cpuacct_stat_index idx, cputime_t val) {} | ||
1426 | #endif | 1438 | #endif |
1427 | 1439 | ||
1428 | static inline void inc_cpu_load(struct rq *rq, unsigned long load) | 1440 | static inline void inc_cpu_load(struct rq *rq, unsigned long load) |
@@ -4547,9 +4559,25 @@ DEFINE_PER_CPU(struct kernel_stat, kstat); | |||
4547 | EXPORT_PER_CPU_SYMBOL(kstat); | 4559 | EXPORT_PER_CPU_SYMBOL(kstat); |
4548 | 4560 | ||
4549 | /* | 4561 | /* |
4550 | * Return any ns on the sched_clock that have not yet been banked in | 4562 | * Return any ns on the sched_clock that have not yet been accounted in |
4551 | * @p in case that task is currently running. | 4563 | * @p in case that task is currently running. |
4564 | * | ||
4565 | * Called with task_rq_lock() held on @rq. | ||
4552 | */ | 4566 | */ |
4567 | static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq) | ||
4568 | { | ||
4569 | u64 ns = 0; | ||
4570 | |||
4571 | if (task_current(rq, p)) { | ||
4572 | update_rq_clock(rq); | ||
4573 | ns = rq->clock - p->se.exec_start; | ||
4574 | if ((s64)ns < 0) | ||
4575 | ns = 0; | ||
4576 | } | ||
4577 | |||
4578 | return ns; | ||
4579 | } | ||
4580 | |||
4553 | unsigned long long task_delta_exec(struct task_struct *p) | 4581 | unsigned long long task_delta_exec(struct task_struct *p) |
4554 | { | 4582 | { |
4555 | unsigned long flags; | 4583 | unsigned long flags; |
@@ -4557,16 +4585,49 @@ unsigned long long task_delta_exec(struct task_struct *p) | |||
4557 | u64 ns = 0; | 4585 | u64 ns = 0; |
4558 | 4586 | ||
4559 | rq = task_rq_lock(p, &flags); | 4587 | rq = task_rq_lock(p, &flags); |
4588 | ns = do_task_delta_exec(p, rq); | ||
4589 | task_rq_unlock(rq, &flags); | ||
4560 | 4590 | ||
4561 | if (task_current(rq, p)) { | 4591 | return ns; |
4562 | u64 delta_exec; | 4592 | } |
4563 | 4593 | ||
4564 | update_rq_clock(rq); | 4594 | /* |
4565 | delta_exec = rq->clock - p->se.exec_start; | 4595 | * Return accounted runtime for the task. |
4566 | if ((s64)delta_exec > 0) | 4596 | * In case the task is currently running, return the runtime plus current's |
4567 | ns = delta_exec; | 4597 | * pending runtime that have not been accounted yet. |
4568 | } | 4598 | */ |
4599 | unsigned long long task_sched_runtime(struct task_struct *p) | ||
4600 | { | ||
4601 | unsigned long flags; | ||
4602 | struct rq *rq; | ||
4603 | u64 ns = 0; | ||
4604 | |||
4605 | rq = task_rq_lock(p, &flags); | ||
4606 | ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq); | ||
4607 | task_rq_unlock(rq, &flags); | ||
4608 | |||
4609 | return ns; | ||
4610 | } | ||
4611 | |||
4612 | /* | ||
4613 | * Return sum_exec_runtime for the thread group. | ||
4614 | * In case the task is currently running, return the sum plus current's | ||
4615 | * pending runtime that have not been accounted yet. | ||
4616 | * | ||
4617 | * Note that the thread group might have other running tasks as well, | ||
4618 | * so the return value not includes other pending runtime that other | ||
4619 | * running tasks might have. | ||
4620 | */ | ||
4621 | unsigned long long thread_group_sched_runtime(struct task_struct *p) | ||
4622 | { | ||
4623 | struct task_cputime totals; | ||
4624 | unsigned long flags; | ||
4625 | struct rq *rq; | ||
4626 | u64 ns; | ||
4569 | 4627 | ||
4628 | rq = task_rq_lock(p, &flags); | ||
4629 | thread_group_cputime(p, &totals); | ||
4630 | ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq); | ||
4570 | task_rq_unlock(rq, &flags); | 4631 | task_rq_unlock(rq, &flags); |
4571 | 4632 | ||
4572 | return ns; | 4633 | return ns; |
@@ -4595,6 +4656,8 @@ void account_user_time(struct task_struct *p, cputime_t cputime, | |||
4595 | cpustat->nice = cputime64_add(cpustat->nice, tmp); | 4656 | cpustat->nice = cputime64_add(cpustat->nice, tmp); |
4596 | else | 4657 | else |
4597 | cpustat->user = cputime64_add(cpustat->user, tmp); | 4658 | cpustat->user = cputime64_add(cpustat->user, tmp); |
4659 | |||
4660 | cpuacct_update_stats(p, CPUACCT_STAT_USER, cputime); | ||
4598 | /* Account for user time used */ | 4661 | /* Account for user time used */ |
4599 | acct_update_integrals(p); | 4662 | acct_update_integrals(p); |
4600 | } | 4663 | } |
@@ -4656,6 +4719,8 @@ void account_system_time(struct task_struct *p, int hardirq_offset, | |||
4656 | else | 4719 | else |
4657 | cpustat->system = cputime64_add(cpustat->system, tmp); | 4720 | cpustat->system = cputime64_add(cpustat->system, tmp); |
4658 | 4721 | ||
4722 | cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime); | ||
4723 | |||
4659 | /* Account for system time used */ | 4724 | /* Account for system time used */ |
4660 | acct_update_integrals(p); | 4725 | acct_update_integrals(p); |
4661 | } | 4726 | } |
@@ -4818,7 +4883,7 @@ void scheduler_tick(void) | |||
4818 | #endif | 4883 | #endif |
4819 | } | 4884 | } |
4820 | 4885 | ||
4821 | unsigned long get_parent_ip(unsigned long addr) | 4886 | notrace unsigned long get_parent_ip(unsigned long addr) |
4822 | { | 4887 | { |
4823 | if (in_lock_functions(addr)) { | 4888 | if (in_lock_functions(addr)) { |
4824 | addr = CALLER_ADDR2; | 4889 | addr = CALLER_ADDR2; |
@@ -7340,7 +7405,12 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | |||
7340 | cpumask_or(groupmask, groupmask, sched_group_cpus(group)); | 7405 | cpumask_or(groupmask, groupmask, sched_group_cpus(group)); |
7341 | 7406 | ||
7342 | cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); | 7407 | cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); |
7408 | |||
7343 | printk(KERN_CONT " %s", str); | 7409 | printk(KERN_CONT " %s", str); |
7410 | if (group->__cpu_power != SCHED_LOAD_SCALE) { | ||
7411 | printk(KERN_CONT " (__cpu_power = %d)", | ||
7412 | group->__cpu_power); | ||
7413 | } | ||
7344 | 7414 | ||
7345 | group = group->next; | 7415 | group = group->next; |
7346 | } while (group != sd->groups); | 7416 | } while (group != sd->groups); |
@@ -9963,6 +10033,7 @@ struct cpuacct { | |||
9963 | struct cgroup_subsys_state css; | 10033 | struct cgroup_subsys_state css; |
9964 | /* cpuusage holds pointer to a u64-type object on every cpu */ | 10034 | /* cpuusage holds pointer to a u64-type object on every cpu */ |
9965 | u64 *cpuusage; | 10035 | u64 *cpuusage; |
10036 | struct percpu_counter cpustat[CPUACCT_STAT_NSTATS]; | ||
9966 | struct cpuacct *parent; | 10037 | struct cpuacct *parent; |
9967 | }; | 10038 | }; |
9968 | 10039 | ||
@@ -9987,20 +10058,32 @@ static struct cgroup_subsys_state *cpuacct_create( | |||
9987 | struct cgroup_subsys *ss, struct cgroup *cgrp) | 10058 | struct cgroup_subsys *ss, struct cgroup *cgrp) |
9988 | { | 10059 | { |
9989 | struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL); | 10060 | struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL); |
10061 | int i; | ||
9990 | 10062 | ||
9991 | if (!ca) | 10063 | if (!ca) |
9992 | return ERR_PTR(-ENOMEM); | 10064 | goto out; |
9993 | 10065 | ||
9994 | ca->cpuusage = alloc_percpu(u64); | 10066 | ca->cpuusage = alloc_percpu(u64); |
9995 | if (!ca->cpuusage) { | 10067 | if (!ca->cpuusage) |
9996 | kfree(ca); | 10068 | goto out_free_ca; |
9997 | return ERR_PTR(-ENOMEM); | 10069 | |
9998 | } | 10070 | for (i = 0; i < CPUACCT_STAT_NSTATS; i++) |
10071 | if (percpu_counter_init(&ca->cpustat[i], 0)) | ||
10072 | goto out_free_counters; | ||
9999 | 10073 | ||
10000 | if (cgrp->parent) | 10074 | if (cgrp->parent) |
10001 | ca->parent = cgroup_ca(cgrp->parent); | 10075 | ca->parent = cgroup_ca(cgrp->parent); |
10002 | 10076 | ||
10003 | return &ca->css; | 10077 | return &ca->css; |
10078 | |||
10079 | out_free_counters: | ||
10080 | while (--i >= 0) | ||
10081 | percpu_counter_destroy(&ca->cpustat[i]); | ||
10082 | free_percpu(ca->cpuusage); | ||
10083 | out_free_ca: | ||
10084 | kfree(ca); | ||
10085 | out: | ||
10086 | return ERR_PTR(-ENOMEM); | ||
10004 | } | 10087 | } |
10005 | 10088 | ||
10006 | /* destroy an existing cpu accounting group */ | 10089 | /* destroy an existing cpu accounting group */ |
@@ -10008,7 +10091,10 @@ static void | |||
10008 | cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) | 10091 | cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) |
10009 | { | 10092 | { |
10010 | struct cpuacct *ca = cgroup_ca(cgrp); | 10093 | struct cpuacct *ca = cgroup_ca(cgrp); |
10094 | int i; | ||
10011 | 10095 | ||
10096 | for (i = 0; i < CPUACCT_STAT_NSTATS; i++) | ||
10097 | percpu_counter_destroy(&ca->cpustat[i]); | ||
10012 | free_percpu(ca->cpuusage); | 10098 | free_percpu(ca->cpuusage); |
10013 | kfree(ca); | 10099 | kfree(ca); |
10014 | } | 10100 | } |
@@ -10095,6 +10181,25 @@ static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft, | |||
10095 | return 0; | 10181 | return 0; |
10096 | } | 10182 | } |
10097 | 10183 | ||
10184 | static const char *cpuacct_stat_desc[] = { | ||
10185 | [CPUACCT_STAT_USER] = "user", | ||
10186 | [CPUACCT_STAT_SYSTEM] = "system", | ||
10187 | }; | ||
10188 | |||
10189 | static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft, | ||
10190 | struct cgroup_map_cb *cb) | ||
10191 | { | ||
10192 | struct cpuacct *ca = cgroup_ca(cgrp); | ||
10193 | int i; | ||
10194 | |||
10195 | for (i = 0; i < CPUACCT_STAT_NSTATS; i++) { | ||
10196 | s64 val = percpu_counter_read(&ca->cpustat[i]); | ||
10197 | val = cputime64_to_clock_t(val); | ||
10198 | cb->fill(cb, cpuacct_stat_desc[i], val); | ||
10199 | } | ||
10200 | return 0; | ||
10201 | } | ||
10202 | |||
10098 | static struct cftype files[] = { | 10203 | static struct cftype files[] = { |
10099 | { | 10204 | { |
10100 | .name = "usage", | 10205 | .name = "usage", |
@@ -10105,7 +10210,10 @@ static struct cftype files[] = { | |||
10105 | .name = "usage_percpu", | 10210 | .name = "usage_percpu", |
10106 | .read_seq_string = cpuacct_percpu_seq_read, | 10211 | .read_seq_string = cpuacct_percpu_seq_read, |
10107 | }, | 10212 | }, |
10108 | 10213 | { | |
10214 | .name = "stat", | ||
10215 | .read_map = cpuacct_stats_show, | ||
10216 | }, | ||
10109 | }; | 10217 | }; |
10110 | 10218 | ||
10111 | static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) | 10219 | static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) |
@@ -10127,12 +10235,38 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime) | |||
10127 | return; | 10235 | return; |
10128 | 10236 | ||
10129 | cpu = task_cpu(tsk); | 10237 | cpu = task_cpu(tsk); |
10238 | |||
10239 | rcu_read_lock(); | ||
10240 | |||
10130 | ca = task_ca(tsk); | 10241 | ca = task_ca(tsk); |
10131 | 10242 | ||
10132 | for (; ca; ca = ca->parent) { | 10243 | for (; ca; ca = ca->parent) { |
10133 | u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); | 10244 | u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); |
10134 | *cpuusage += cputime; | 10245 | *cpuusage += cputime; |
10135 | } | 10246 | } |
10247 | |||
10248 | rcu_read_unlock(); | ||
10249 | } | ||
10250 | |||
10251 | /* | ||
10252 | * Charge the system/user time to the task's accounting group. | ||
10253 | */ | ||
10254 | static void cpuacct_update_stats(struct task_struct *tsk, | ||
10255 | enum cpuacct_stat_index idx, cputime_t val) | ||
10256 | { | ||
10257 | struct cpuacct *ca; | ||
10258 | |||
10259 | if (unlikely(!cpuacct_subsys.active)) | ||
10260 | return; | ||
10261 | |||
10262 | rcu_read_lock(); | ||
10263 | ca = task_ca(tsk); | ||
10264 | |||
10265 | do { | ||
10266 | percpu_counter_add(&ca->cpustat[idx], val); | ||
10267 | ca = ca->parent; | ||
10268 | } while (ca); | ||
10269 | rcu_read_unlock(); | ||
10136 | } | 10270 | } |
10137 | 10271 | ||
10138 | struct cgroup_subsys cpuacct_subsys = { | 10272 | struct cgroup_subsys cpuacct_subsys = { |