aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-04-09 13:37:28 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-04-09 13:37:28 -0400
commit17b2e9bf27d417bc186cc922b4d6d5eaa048f9d8 (patch)
tree7ae99be289ec2ffe68aa38926d9e9a13e4387ee0 /kernel
parent422a253483aa5de71a2bcdc27b0aa023053f97f8 (diff)
parente3c8ca8336707062f3f7cb1cd7e6b3c753baccdd (diff)
Merge branch 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: sched: do not count frozen tasks toward load sched: refresh MAINTAINERS entry sched: Print sched_group::__cpu_power in sched_domain_debug cpuacct: add per-cgroup utime/stime statistics posixtimers, sched: Fix posix clock monotonicity sched_rt: don't allocate cpumask in fastpath cpuacct: make cpuacct hierarchy walk in cpuacct_charge() safe when rcupreempt is used -v2
Diffstat (limited to 'kernel')
-rw-r--r--kernel/posix-cpu-timers.c7
-rw-r--r--kernel/sched.c160
-rw-r--r--kernel/sched_cpupri.c5
-rw-r--r--kernel/sched_rt.c15
4 files changed, 156 insertions, 31 deletions
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index bb53185d8c78..c9dcf98b4463 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -224,7 +224,7 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
224 cpu->cpu = virt_ticks(p); 224 cpu->cpu = virt_ticks(p);
225 break; 225 break;
226 case CPUCLOCK_SCHED: 226 case CPUCLOCK_SCHED:
227 cpu->sched = p->se.sum_exec_runtime + task_delta_exec(p); 227 cpu->sched = task_sched_runtime(p);
228 break; 228 break;
229 } 229 }
230 return 0; 230 return 0;
@@ -305,18 +305,19 @@ static int cpu_clock_sample_group(const clockid_t which_clock,
305{ 305{
306 struct task_cputime cputime; 306 struct task_cputime cputime;
307 307
308 thread_group_cputime(p, &cputime);
309 switch (CPUCLOCK_WHICH(which_clock)) { 308 switch (CPUCLOCK_WHICH(which_clock)) {
310 default: 309 default:
311 return -EINVAL; 310 return -EINVAL;
312 case CPUCLOCK_PROF: 311 case CPUCLOCK_PROF:
312 thread_group_cputime(p, &cputime);
313 cpu->cpu = cputime_add(cputime.utime, cputime.stime); 313 cpu->cpu = cputime_add(cputime.utime, cputime.stime);
314 break; 314 break;
315 case CPUCLOCK_VIRT: 315 case CPUCLOCK_VIRT:
316 thread_group_cputime(p, &cputime);
316 cpu->cpu = cputime.utime; 317 cpu->cpu = cputime.utime;
317 break; 318 break;
318 case CPUCLOCK_SCHED: 319 case CPUCLOCK_SCHED:
319 cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p); 320 cpu->sched = thread_group_sched_runtime(p);
320 break; 321 break;
321 } 322 }
322 return 0; 323 return 0;
diff --git a/kernel/sched.c b/kernel/sched.c
index 6cc1fd5d5072..5724508c3b66 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1418,10 +1418,22 @@ iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
1418 struct rq_iterator *iterator); 1418 struct rq_iterator *iterator);
1419#endif 1419#endif
1420 1420
1421/* Time spent by the tasks of the cpu accounting group executing in ... */
1422enum cpuacct_stat_index {
1423 CPUACCT_STAT_USER, /* ... user mode */
1424 CPUACCT_STAT_SYSTEM, /* ... kernel mode */
1425
1426 CPUACCT_STAT_NSTATS,
1427};
1428
1421#ifdef CONFIG_CGROUP_CPUACCT 1429#ifdef CONFIG_CGROUP_CPUACCT
1422static void cpuacct_charge(struct task_struct *tsk, u64 cputime); 1430static void cpuacct_charge(struct task_struct *tsk, u64 cputime);
1431static void cpuacct_update_stats(struct task_struct *tsk,
1432 enum cpuacct_stat_index idx, cputime_t val);
1423#else 1433#else
1424static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {} 1434static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
1435static inline void cpuacct_update_stats(struct task_struct *tsk,
1436 enum cpuacct_stat_index idx, cputime_t val) {}
1425#endif 1437#endif
1426 1438
1427static inline void inc_cpu_load(struct rq *rq, unsigned long load) 1439static inline void inc_cpu_load(struct rq *rq, unsigned long load)
@@ -4511,9 +4523,25 @@ DEFINE_PER_CPU(struct kernel_stat, kstat);
4511EXPORT_PER_CPU_SYMBOL(kstat); 4523EXPORT_PER_CPU_SYMBOL(kstat);
4512 4524
4513/* 4525/*
4514 * Return any ns on the sched_clock that have not yet been banked in 4526 * Return any ns on the sched_clock that have not yet been accounted in
4515 * @p in case that task is currently running. 4527 * @p in case that task is currently running.
4528 *
4529 * Called with task_rq_lock() held on @rq.
4516 */ 4530 */
4531static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq)
4532{
4533 u64 ns = 0;
4534
4535 if (task_current(rq, p)) {
4536 update_rq_clock(rq);
4537 ns = rq->clock - p->se.exec_start;
4538 if ((s64)ns < 0)
4539 ns = 0;
4540 }
4541
4542 return ns;
4543}
4544
4517unsigned long long task_delta_exec(struct task_struct *p) 4545unsigned long long task_delta_exec(struct task_struct *p)
4518{ 4546{
4519 unsigned long flags; 4547 unsigned long flags;
@@ -4521,16 +4549,49 @@ unsigned long long task_delta_exec(struct task_struct *p)
4521 u64 ns = 0; 4549 u64 ns = 0;
4522 4550
4523 rq = task_rq_lock(p, &flags); 4551 rq = task_rq_lock(p, &flags);
4552 ns = do_task_delta_exec(p, rq);
4553 task_rq_unlock(rq, &flags);
4524 4554
4525 if (task_current(rq, p)) { 4555 return ns;
4526 u64 delta_exec; 4556}
4527 4557
4528 update_rq_clock(rq); 4558/*
4529 delta_exec = rq->clock - p->se.exec_start; 4559 * Return accounted runtime for the task.
4530 if ((s64)delta_exec > 0) 4560 * In case the task is currently running, return the runtime plus current's
4531 ns = delta_exec; 4561 * pending runtime that have not been accounted yet.
4532 } 4562 */
4563unsigned long long task_sched_runtime(struct task_struct *p)
4564{
4565 unsigned long flags;
4566 struct rq *rq;
4567 u64 ns = 0;
4568
4569 rq = task_rq_lock(p, &flags);
4570 ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq);
4571 task_rq_unlock(rq, &flags);
4572
4573 return ns;
4574}
4575
4576/*
4577 * Return sum_exec_runtime for the thread group.
4578 * In case the task is currently running, return the sum plus current's
4579 * pending runtime that have not been accounted yet.
4580 *
4581 * Note that the thread group might have other running tasks as well,
4582 * so the return value not includes other pending runtime that other
4583 * running tasks might have.
4584 */
4585unsigned long long thread_group_sched_runtime(struct task_struct *p)
4586{
4587 struct task_cputime totals;
4588 unsigned long flags;
4589 struct rq *rq;
4590 u64 ns;
4533 4591
4592 rq = task_rq_lock(p, &flags);
4593 thread_group_cputime(p, &totals);
4594 ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq);
4534 task_rq_unlock(rq, &flags); 4595 task_rq_unlock(rq, &flags);
4535 4596
4536 return ns; 4597 return ns;
@@ -4559,6 +4620,8 @@ void account_user_time(struct task_struct *p, cputime_t cputime,
4559 cpustat->nice = cputime64_add(cpustat->nice, tmp); 4620 cpustat->nice = cputime64_add(cpustat->nice, tmp);
4560 else 4621 else
4561 cpustat->user = cputime64_add(cpustat->user, tmp); 4622 cpustat->user = cputime64_add(cpustat->user, tmp);
4623
4624 cpuacct_update_stats(p, CPUACCT_STAT_USER, cputime);
4562 /* Account for user time used */ 4625 /* Account for user time used */
4563 acct_update_integrals(p); 4626 acct_update_integrals(p);
4564} 4627}
@@ -4620,6 +4683,8 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
4620 else 4683 else
4621 cpustat->system = cputime64_add(cpustat->system, tmp); 4684 cpustat->system = cputime64_add(cpustat->system, tmp);
4622 4685
4686 cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime);
4687
4623 /* Account for system time used */ 4688 /* Account for system time used */
4624 acct_update_integrals(p); 4689 acct_update_integrals(p);
4625} 4690}
@@ -7302,7 +7367,8 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
7302 cpumask_or(groupmask, groupmask, sched_group_cpus(group)); 7367 cpumask_or(groupmask, groupmask, sched_group_cpus(group));
7303 7368
7304 cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); 7369 cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
7305 printk(KERN_CONT " %s", str); 7370 printk(KERN_CONT " %s (__cpu_power = %d)", str,
7371 group->__cpu_power);
7306 7372
7307 group = group->next; 7373 group = group->next;
7308 } while (group != sd->groups); 7374 } while (group != sd->groups);
@@ -9925,6 +9991,7 @@ struct cpuacct {
9925 struct cgroup_subsys_state css; 9991 struct cgroup_subsys_state css;
9926 /* cpuusage holds pointer to a u64-type object on every cpu */ 9992 /* cpuusage holds pointer to a u64-type object on every cpu */
9927 u64 *cpuusage; 9993 u64 *cpuusage;
9994 struct percpu_counter cpustat[CPUACCT_STAT_NSTATS];
9928 struct cpuacct *parent; 9995 struct cpuacct *parent;
9929}; 9996};
9930 9997
@@ -9949,20 +10016,32 @@ static struct cgroup_subsys_state *cpuacct_create(
9949 struct cgroup_subsys *ss, struct cgroup *cgrp) 10016 struct cgroup_subsys *ss, struct cgroup *cgrp)
9950{ 10017{
9951 struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL); 10018 struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
10019 int i;
9952 10020
9953 if (!ca) 10021 if (!ca)
9954 return ERR_PTR(-ENOMEM); 10022 goto out;
9955 10023
9956 ca->cpuusage = alloc_percpu(u64); 10024 ca->cpuusage = alloc_percpu(u64);
9957 if (!ca->cpuusage) { 10025 if (!ca->cpuusage)
9958 kfree(ca); 10026 goto out_free_ca;
9959 return ERR_PTR(-ENOMEM); 10027
9960 } 10028 for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
10029 if (percpu_counter_init(&ca->cpustat[i], 0))
10030 goto out_free_counters;
9961 10031
9962 if (cgrp->parent) 10032 if (cgrp->parent)
9963 ca->parent = cgroup_ca(cgrp->parent); 10033 ca->parent = cgroup_ca(cgrp->parent);
9964 10034
9965 return &ca->css; 10035 return &ca->css;
10036
10037out_free_counters:
10038 while (--i >= 0)
10039 percpu_counter_destroy(&ca->cpustat[i]);
10040 free_percpu(ca->cpuusage);
10041out_free_ca:
10042 kfree(ca);
10043out:
10044 return ERR_PTR(-ENOMEM);
9966} 10045}
9967 10046
9968/* destroy an existing cpu accounting group */ 10047/* destroy an existing cpu accounting group */
@@ -9970,7 +10049,10 @@ static void
9970cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) 10049cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
9971{ 10050{
9972 struct cpuacct *ca = cgroup_ca(cgrp); 10051 struct cpuacct *ca = cgroup_ca(cgrp);
10052 int i;
9973 10053
10054 for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
10055 percpu_counter_destroy(&ca->cpustat[i]);
9974 free_percpu(ca->cpuusage); 10056 free_percpu(ca->cpuusage);
9975 kfree(ca); 10057 kfree(ca);
9976} 10058}
@@ -10057,6 +10139,25 @@ static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft,
10057 return 0; 10139 return 0;
10058} 10140}
10059 10141
10142static const char *cpuacct_stat_desc[] = {
10143 [CPUACCT_STAT_USER] = "user",
10144 [CPUACCT_STAT_SYSTEM] = "system",
10145};
10146
10147static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft,
10148 struct cgroup_map_cb *cb)
10149{
10150 struct cpuacct *ca = cgroup_ca(cgrp);
10151 int i;
10152
10153 for (i = 0; i < CPUACCT_STAT_NSTATS; i++) {
10154 s64 val = percpu_counter_read(&ca->cpustat[i]);
10155 val = cputime64_to_clock_t(val);
10156 cb->fill(cb, cpuacct_stat_desc[i], val);
10157 }
10158 return 0;
10159}
10160
10060static struct cftype files[] = { 10161static struct cftype files[] = {
10061 { 10162 {
10062 .name = "usage", 10163 .name = "usage",
@@ -10067,7 +10168,10 @@ static struct cftype files[] = {
10067 .name = "usage_percpu", 10168 .name = "usage_percpu",
10068 .read_seq_string = cpuacct_percpu_seq_read, 10169 .read_seq_string = cpuacct_percpu_seq_read,
10069 }, 10170 },
10070 10171 {
10172 .name = "stat",
10173 .read_map = cpuacct_stats_show,
10174 },
10071}; 10175};
10072 10176
10073static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) 10177static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
@@ -10089,12 +10193,38 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
10089 return; 10193 return;
10090 10194
10091 cpu = task_cpu(tsk); 10195 cpu = task_cpu(tsk);
10196
10197 rcu_read_lock();
10198
10092 ca = task_ca(tsk); 10199 ca = task_ca(tsk);
10093 10200
10094 for (; ca; ca = ca->parent) { 10201 for (; ca; ca = ca->parent) {
10095 u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); 10202 u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
10096 *cpuusage += cputime; 10203 *cpuusage += cputime;
10097 } 10204 }
10205
10206 rcu_read_unlock();
10207}
10208
10209/*
10210 * Charge the system/user time to the task's accounting group.
10211 */
10212static void cpuacct_update_stats(struct task_struct *tsk,
10213 enum cpuacct_stat_index idx, cputime_t val)
10214{
10215 struct cpuacct *ca;
10216
10217 if (unlikely(!cpuacct_subsys.active))
10218 return;
10219
10220 rcu_read_lock();
10221 ca = task_ca(tsk);
10222
10223 do {
10224 percpu_counter_add(&ca->cpustat[idx], val);
10225 ca = ca->parent;
10226 } while (ca);
10227 rcu_read_unlock();
10098} 10228}
10099 10229
10100struct cgroup_subsys cpuacct_subsys = { 10230struct cgroup_subsys cpuacct_subsys = {
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c
index 1e00bfacf9b8..cdd3c89574cd 100644
--- a/kernel/sched_cpupri.c
+++ b/kernel/sched_cpupri.c
@@ -55,7 +55,7 @@ static int convert_prio(int prio)
55 * cpupri_find - find the best (lowest-pri) CPU in the system 55 * cpupri_find - find the best (lowest-pri) CPU in the system
56 * @cp: The cpupri context 56 * @cp: The cpupri context
57 * @p: The task 57 * @p: The task
58 * @lowest_mask: A mask to fill in with selected CPUs 58 * @lowest_mask: A mask to fill in with selected CPUs (or NULL)
59 * 59 *
60 * Note: This function returns the recommended CPUs as calculated during the 60 * Note: This function returns the recommended CPUs as calculated during the
61 * current invokation. By the time the call returns, the CPUs may have in 61 * current invokation. By the time the call returns, the CPUs may have in
@@ -81,7 +81,8 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
81 if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids) 81 if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
82 continue; 82 continue;
83 83
84 cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask); 84 if (lowest_mask)
85 cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
85 return 1; 86 return 1;
86 } 87 }
87 88
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 299d012b4394..f2c66f8f9712 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -948,20 +948,15 @@ static int select_task_rq_rt(struct task_struct *p, int sync)
948 948
949static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) 949static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
950{ 950{
951 cpumask_var_t mask;
952
953 if (rq->curr->rt.nr_cpus_allowed == 1) 951 if (rq->curr->rt.nr_cpus_allowed == 1)
954 return; 952 return;
955 953
956 if (!alloc_cpumask_var(&mask, GFP_ATOMIC))
957 return;
958
959 if (p->rt.nr_cpus_allowed != 1 954 if (p->rt.nr_cpus_allowed != 1
960 && cpupri_find(&rq->rd->cpupri, p, mask)) 955 && cpupri_find(&rq->rd->cpupri, p, NULL))
961 goto free; 956 return;
962 957
963 if (!cpupri_find(&rq->rd->cpupri, rq->curr, mask)) 958 if (!cpupri_find(&rq->rd->cpupri, rq->curr, NULL))
964 goto free; 959 return;
965 960
966 /* 961 /*
967 * There appears to be other cpus that can accept 962 * There appears to be other cpus that can accept
@@ -970,8 +965,6 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
970 */ 965 */
971 requeue_task_rt(rq, p, 1); 966 requeue_task_rt(rq, p, 1);
972 resched_task(rq->curr); 967 resched_task(rq->curr);
973free:
974 free_cpumask_var(mask);
975} 968}
976 969
977#endif /* CONFIG_SMP */ 970#endif /* CONFIG_SMP */