diff options
Diffstat (limited to 'kernel/sched.c')
| -rw-r--r-- | kernel/sched.c | 166 |
1 files changed, 150 insertions, 16 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 6cc1fd5d5072..26efa475bdc1 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -1418,10 +1418,22 @@ iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
| 1418 | struct rq_iterator *iterator); | 1418 | struct rq_iterator *iterator); |
| 1419 | #endif | 1419 | #endif |
| 1420 | 1420 | ||
| 1421 | /* Time spent by the tasks of the cpu accounting group executing in ... */ | ||
| 1422 | enum cpuacct_stat_index { | ||
| 1423 | CPUACCT_STAT_USER, /* ... user mode */ | ||
| 1424 | CPUACCT_STAT_SYSTEM, /* ... kernel mode */ | ||
| 1425 | |||
| 1426 | CPUACCT_STAT_NSTATS, | ||
| 1427 | }; | ||
| 1428 | |||
| 1421 | #ifdef CONFIG_CGROUP_CPUACCT | 1429 | #ifdef CONFIG_CGROUP_CPUACCT |
| 1422 | static void cpuacct_charge(struct task_struct *tsk, u64 cputime); | 1430 | static void cpuacct_charge(struct task_struct *tsk, u64 cputime); |
| 1431 | static void cpuacct_update_stats(struct task_struct *tsk, | ||
| 1432 | enum cpuacct_stat_index idx, cputime_t val); | ||
| 1423 | #else | 1433 | #else |
| 1424 | static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {} | 1434 | static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {} |
| 1435 | static inline void cpuacct_update_stats(struct task_struct *tsk, | ||
| 1436 | enum cpuacct_stat_index idx, cputime_t val) {} | ||
| 1425 | #endif | 1437 | #endif |
| 1426 | 1438 | ||
| 1427 | static inline void inc_cpu_load(struct rq *rq, unsigned long load) | 1439 | static inline void inc_cpu_load(struct rq *rq, unsigned long load) |
| @@ -4511,9 +4523,25 @@ DEFINE_PER_CPU(struct kernel_stat, kstat); | |||
| 4511 | EXPORT_PER_CPU_SYMBOL(kstat); | 4523 | EXPORT_PER_CPU_SYMBOL(kstat); |
| 4512 | 4524 | ||
| 4513 | /* | 4525 | /* |
| 4514 | * Return any ns on the sched_clock that have not yet been banked in | 4526 | * Return any ns on the sched_clock that have not yet been accounted in |
| 4515 | * @p in case that task is currently running. | 4527 | * @p in case that task is currently running. |
| 4528 | * | ||
| 4529 | * Called with task_rq_lock() held on @rq. | ||
| 4516 | */ | 4530 | */ |
| 4531 | static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq) | ||
| 4532 | { | ||
| 4533 | u64 ns = 0; | ||
| 4534 | |||
| 4535 | if (task_current(rq, p)) { | ||
| 4536 | update_rq_clock(rq); | ||
| 4537 | ns = rq->clock - p->se.exec_start; | ||
| 4538 | if ((s64)ns < 0) | ||
| 4539 | ns = 0; | ||
| 4540 | } | ||
| 4541 | |||
| 4542 | return ns; | ||
| 4543 | } | ||
| 4544 | |||
| 4517 | unsigned long long task_delta_exec(struct task_struct *p) | 4545 | unsigned long long task_delta_exec(struct task_struct *p) |
| 4518 | { | 4546 | { |
| 4519 | unsigned long flags; | 4547 | unsigned long flags; |
| @@ -4521,16 +4549,49 @@ unsigned long long task_delta_exec(struct task_struct *p) | |||
| 4521 | u64 ns = 0; | 4549 | u64 ns = 0; |
| 4522 | 4550 | ||
| 4523 | rq = task_rq_lock(p, &flags); | 4551 | rq = task_rq_lock(p, &flags); |
| 4552 | ns = do_task_delta_exec(p, rq); | ||
| 4553 | task_rq_unlock(rq, &flags); | ||
| 4524 | 4554 | ||
| 4525 | if (task_current(rq, p)) { | 4555 | return ns; |
| 4526 | u64 delta_exec; | 4556 | } |
| 4527 | 4557 | ||
| 4528 | update_rq_clock(rq); | 4558 | /* |
| 4529 | delta_exec = rq->clock - p->se.exec_start; | 4559 | * Return accounted runtime for the task. |
| 4530 | if ((s64)delta_exec > 0) | 4560 | * In case the task is currently running, return the runtime plus current's |
| 4531 | ns = delta_exec; | 4561 | * pending runtime that have not been accounted yet. |
| 4532 | } | 4562 | */ |
| 4563 | unsigned long long task_sched_runtime(struct task_struct *p) | ||
| 4564 | { | ||
| 4565 | unsigned long flags; | ||
| 4566 | struct rq *rq; | ||
| 4567 | u64 ns = 0; | ||
| 4568 | |||
| 4569 | rq = task_rq_lock(p, &flags); | ||
| 4570 | ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq); | ||
| 4571 | task_rq_unlock(rq, &flags); | ||
| 4572 | |||
| 4573 | return ns; | ||
| 4574 | } | ||
| 4575 | |||
| 4576 | /* | ||
| 4577 | * Return sum_exec_runtime for the thread group. | ||
| 4578 | * In case the task is currently running, return the sum plus current's | ||
| 4579 | * pending runtime that have not been accounted yet. | ||
| 4580 | * | ||
| 4581 | * Note that the thread group might have other running tasks as well, | ||
| 4582 | * so the return value not includes other pending runtime that other | ||
| 4583 | * running tasks might have. | ||
| 4584 | */ | ||
| 4585 | unsigned long long thread_group_sched_runtime(struct task_struct *p) | ||
| 4586 | { | ||
| 4587 | struct task_cputime totals; | ||
| 4588 | unsigned long flags; | ||
| 4589 | struct rq *rq; | ||
| 4590 | u64 ns; | ||
| 4533 | 4591 | ||
| 4592 | rq = task_rq_lock(p, &flags); | ||
| 4593 | thread_group_cputime(p, &totals); | ||
| 4594 | ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq); | ||
| 4534 | task_rq_unlock(rq, &flags); | 4595 | task_rq_unlock(rq, &flags); |
| 4535 | 4596 | ||
| 4536 | return ns; | 4597 | return ns; |
| @@ -4559,6 +4620,8 @@ void account_user_time(struct task_struct *p, cputime_t cputime, | |||
| 4559 | cpustat->nice = cputime64_add(cpustat->nice, tmp); | 4620 | cpustat->nice = cputime64_add(cpustat->nice, tmp); |
| 4560 | else | 4621 | else |
| 4561 | cpustat->user = cputime64_add(cpustat->user, tmp); | 4622 | cpustat->user = cputime64_add(cpustat->user, tmp); |
| 4623 | |||
| 4624 | cpuacct_update_stats(p, CPUACCT_STAT_USER, cputime); | ||
| 4562 | /* Account for user time used */ | 4625 | /* Account for user time used */ |
| 4563 | acct_update_integrals(p); | 4626 | acct_update_integrals(p); |
| 4564 | } | 4627 | } |
| @@ -4620,6 +4683,8 @@ void account_system_time(struct task_struct *p, int hardirq_offset, | |||
| 4620 | else | 4683 | else |
| 4621 | cpustat->system = cputime64_add(cpustat->system, tmp); | 4684 | cpustat->system = cputime64_add(cpustat->system, tmp); |
| 4622 | 4685 | ||
| 4686 | cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime); | ||
| 4687 | |||
| 4623 | /* Account for system time used */ | 4688 | /* Account for system time used */ |
| 4624 | acct_update_integrals(p); | 4689 | acct_update_integrals(p); |
| 4625 | } | 4690 | } |
| @@ -4667,7 +4732,7 @@ void account_process_tick(struct task_struct *p, int user_tick) | |||
| 4667 | 4732 | ||
| 4668 | if (user_tick) | 4733 | if (user_tick) |
| 4669 | account_user_time(p, one_jiffy, one_jiffy_scaled); | 4734 | account_user_time(p, one_jiffy, one_jiffy_scaled); |
| 4670 | else if (p != rq->idle) | 4735 | else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) |
| 4671 | account_system_time(p, HARDIRQ_OFFSET, one_jiffy, | 4736 | account_system_time(p, HARDIRQ_OFFSET, one_jiffy, |
| 4672 | one_jiffy_scaled); | 4737 | one_jiffy_scaled); |
| 4673 | else | 4738 | else |
| @@ -4781,7 +4846,7 @@ void scheduler_tick(void) | |||
| 4781 | #endif | 4846 | #endif |
| 4782 | } | 4847 | } |
| 4783 | 4848 | ||
| 4784 | unsigned long get_parent_ip(unsigned long addr) | 4849 | notrace unsigned long get_parent_ip(unsigned long addr) |
| 4785 | { | 4850 | { |
| 4786 | if (in_lock_functions(addr)) { | 4851 | if (in_lock_functions(addr)) { |
| 4787 | addr = CALLER_ADDR2; | 4852 | addr = CALLER_ADDR2; |
| @@ -7302,7 +7367,12 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | |||
| 7302 | cpumask_or(groupmask, groupmask, sched_group_cpus(group)); | 7367 | cpumask_or(groupmask, groupmask, sched_group_cpus(group)); |
| 7303 | 7368 | ||
| 7304 | cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); | 7369 | cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); |
| 7370 | |||
| 7305 | printk(KERN_CONT " %s", str); | 7371 | printk(KERN_CONT " %s", str); |
| 7372 | if (group->__cpu_power != SCHED_LOAD_SCALE) { | ||
| 7373 | printk(KERN_CONT " (__cpu_power = %d)", | ||
| 7374 | group->__cpu_power); | ||
| 7375 | } | ||
| 7306 | 7376 | ||
| 7307 | group = group->next; | 7377 | group = group->next; |
| 7308 | } while (group != sd->groups); | 7378 | } while (group != sd->groups); |
| @@ -9925,6 +9995,7 @@ struct cpuacct { | |||
| 9925 | struct cgroup_subsys_state css; | 9995 | struct cgroup_subsys_state css; |
| 9926 | /* cpuusage holds pointer to a u64-type object on every cpu */ | 9996 | /* cpuusage holds pointer to a u64-type object on every cpu */ |
| 9927 | u64 *cpuusage; | 9997 | u64 *cpuusage; |
| 9998 | struct percpu_counter cpustat[CPUACCT_STAT_NSTATS]; | ||
| 9928 | struct cpuacct *parent; | 9999 | struct cpuacct *parent; |
| 9929 | }; | 10000 | }; |
| 9930 | 10001 | ||
| @@ -9949,20 +10020,32 @@ static struct cgroup_subsys_state *cpuacct_create( | |||
| 9949 | struct cgroup_subsys *ss, struct cgroup *cgrp) | 10020 | struct cgroup_subsys *ss, struct cgroup *cgrp) |
| 9950 | { | 10021 | { |
| 9951 | struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL); | 10022 | struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL); |
| 10023 | int i; | ||
| 9952 | 10024 | ||
| 9953 | if (!ca) | 10025 | if (!ca) |
| 9954 | return ERR_PTR(-ENOMEM); | 10026 | goto out; |
| 9955 | 10027 | ||
| 9956 | ca->cpuusage = alloc_percpu(u64); | 10028 | ca->cpuusage = alloc_percpu(u64); |
| 9957 | if (!ca->cpuusage) { | 10029 | if (!ca->cpuusage) |
| 9958 | kfree(ca); | 10030 | goto out_free_ca; |
| 9959 | return ERR_PTR(-ENOMEM); | 10031 | |
| 9960 | } | 10032 | for (i = 0; i < CPUACCT_STAT_NSTATS; i++) |
| 10033 | if (percpu_counter_init(&ca->cpustat[i], 0)) | ||
| 10034 | goto out_free_counters; | ||
| 9961 | 10035 | ||
| 9962 | if (cgrp->parent) | 10036 | if (cgrp->parent) |
| 9963 | ca->parent = cgroup_ca(cgrp->parent); | 10037 | ca->parent = cgroup_ca(cgrp->parent); |
| 9964 | 10038 | ||
| 9965 | return &ca->css; | 10039 | return &ca->css; |
| 10040 | |||
| 10041 | out_free_counters: | ||
| 10042 | while (--i >= 0) | ||
| 10043 | percpu_counter_destroy(&ca->cpustat[i]); | ||
| 10044 | free_percpu(ca->cpuusage); | ||
| 10045 | out_free_ca: | ||
| 10046 | kfree(ca); | ||
| 10047 | out: | ||
| 10048 | return ERR_PTR(-ENOMEM); | ||
| 9966 | } | 10049 | } |
| 9967 | 10050 | ||
| 9968 | /* destroy an existing cpu accounting group */ | 10051 | /* destroy an existing cpu accounting group */ |
| @@ -9970,7 +10053,10 @@ static void | |||
| 9970 | cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) | 10053 | cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) |
| 9971 | { | 10054 | { |
| 9972 | struct cpuacct *ca = cgroup_ca(cgrp); | 10055 | struct cpuacct *ca = cgroup_ca(cgrp); |
| 10056 | int i; | ||
| 9973 | 10057 | ||
| 10058 | for (i = 0; i < CPUACCT_STAT_NSTATS; i++) | ||
| 10059 | percpu_counter_destroy(&ca->cpustat[i]); | ||
| 9974 | free_percpu(ca->cpuusage); | 10060 | free_percpu(ca->cpuusage); |
| 9975 | kfree(ca); | 10061 | kfree(ca); |
| 9976 | } | 10062 | } |
| @@ -10057,6 +10143,25 @@ static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft, | |||
| 10057 | return 0; | 10143 | return 0; |
| 10058 | } | 10144 | } |
| 10059 | 10145 | ||
| 10146 | static const char *cpuacct_stat_desc[] = { | ||
| 10147 | [CPUACCT_STAT_USER] = "user", | ||
| 10148 | [CPUACCT_STAT_SYSTEM] = "system", | ||
| 10149 | }; | ||
| 10150 | |||
| 10151 | static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft, | ||
| 10152 | struct cgroup_map_cb *cb) | ||
| 10153 | { | ||
| 10154 | struct cpuacct *ca = cgroup_ca(cgrp); | ||
| 10155 | int i; | ||
| 10156 | |||
| 10157 | for (i = 0; i < CPUACCT_STAT_NSTATS; i++) { | ||
| 10158 | s64 val = percpu_counter_read(&ca->cpustat[i]); | ||
| 10159 | val = cputime64_to_clock_t(val); | ||
| 10160 | cb->fill(cb, cpuacct_stat_desc[i], val); | ||
| 10161 | } | ||
| 10162 | return 0; | ||
| 10163 | } | ||
| 10164 | |||
| 10060 | static struct cftype files[] = { | 10165 | static struct cftype files[] = { |
| 10061 | { | 10166 | { |
| 10062 | .name = "usage", | 10167 | .name = "usage", |
| @@ -10067,7 +10172,10 @@ static struct cftype files[] = { | |||
| 10067 | .name = "usage_percpu", | 10172 | .name = "usage_percpu", |
| 10068 | .read_seq_string = cpuacct_percpu_seq_read, | 10173 | .read_seq_string = cpuacct_percpu_seq_read, |
| 10069 | }, | 10174 | }, |
| 10070 | 10175 | { | |
| 10176 | .name = "stat", | ||
| 10177 | .read_map = cpuacct_stats_show, | ||
| 10178 | }, | ||
| 10071 | }; | 10179 | }; |
| 10072 | 10180 | ||
| 10073 | static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) | 10181 | static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) |
| @@ -10089,12 +10197,38 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime) | |||
| 10089 | return; | 10197 | return; |
| 10090 | 10198 | ||
| 10091 | cpu = task_cpu(tsk); | 10199 | cpu = task_cpu(tsk); |
| 10200 | |||
| 10201 | rcu_read_lock(); | ||
| 10202 | |||
| 10092 | ca = task_ca(tsk); | 10203 | ca = task_ca(tsk); |
| 10093 | 10204 | ||
| 10094 | for (; ca; ca = ca->parent) { | 10205 | for (; ca; ca = ca->parent) { |
| 10095 | u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); | 10206 | u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); |
| 10096 | *cpuusage += cputime; | 10207 | *cpuusage += cputime; |
| 10097 | } | 10208 | } |
| 10209 | |||
| 10210 | rcu_read_unlock(); | ||
| 10211 | } | ||
| 10212 | |||
| 10213 | /* | ||
| 10214 | * Charge the system/user time to the task's accounting group. | ||
| 10215 | */ | ||
| 10216 | static void cpuacct_update_stats(struct task_struct *tsk, | ||
| 10217 | enum cpuacct_stat_index idx, cputime_t val) | ||
| 10218 | { | ||
| 10219 | struct cpuacct *ca; | ||
| 10220 | |||
| 10221 | if (unlikely(!cpuacct_subsys.active)) | ||
| 10222 | return; | ||
| 10223 | |||
| 10224 | rcu_read_lock(); | ||
| 10225 | ca = task_ca(tsk); | ||
| 10226 | |||
| 10227 | do { | ||
| 10228 | percpu_counter_add(&ca->cpustat[idx], val); | ||
| 10229 | ca = ca->parent; | ||
| 10230 | } while (ca); | ||
| 10231 | rcu_read_unlock(); | ||
| 10098 | } | 10232 | } |
| 10099 | 10233 | ||
| 10100 | struct cgroup_subsys cpuacct_subsys = { | 10234 | struct cgroup_subsys cpuacct_subsys = { |
