aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Schwidefsky <schwidefsky@de.ibm.com>2008-12-31 09:11:38 -0500
committerMartin Schwidefsky <schwidefsky@de.ibm.com>2008-12-31 09:11:46 -0500
commit79741dd35713ff4f6fd0eafd59fa94e8a4ba922d (patch)
tree73c6b503fbd274cb3fcca7a0a68c6f636e3a53ad
parent457533a7d3402d1d91fbc125c8bd1bd16dcd3cd4 (diff)
[PATCH] idle cputime accounting
The cpu time spent by the idle process actually doing something is currently accounted as idle time. This is plain wrong, the architectures that support VIRT_CPU_ACCOUNTING=y can do better: distinguish between the time spent doing nothing and the time spent by idle doing work. The first is accounted with account_idle_time and the second with account_system_time. The architectures that use the account_xxx_time interface directly and not the account_xxx_ticks interface now need to do the check for the idle process in their arch code. In particular to improve the system vs true idle time accounting the arch code needs to measure the true idle time instead of just testing for the idle process. To improve the tick based accounting as well we would need an architecture primitive that can tell us if the pt_regs of the interrupted context points to the magic instruction that halts the cpu. In addition idle time is no more added to the stime of the idle process. This field now contains the system time of the idle process as it should be. On systems without VIRT_CPU_ACCOUNTING this will always be zero as every tick that occurs while idle is running will be accounted as idle time. This patch contains the necessary common code changes to be able to distinguish idle system time and true idle time. The architectures with support for VIRT_CPU_ACCOUNTING need some changes to exploit this. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
-rw-r--r--arch/ia64/kernel/time.c10
-rw-r--r--arch/powerpc/kernel/process.c1
-rw-r--r--arch/powerpc/kernel/time.c13
-rw-r--r--arch/s390/kernel/vtime.c20
-rw-r--r--arch/x86/xen/time.c10
-rw-r--r--include/linux/kernel_stat.h7
-rw-r--r--include/linux/sched.h1
-rw-r--r--kernel/sched.c80
-rw-r--r--kernel/time/tick-sched.c13
-rw-r--r--kernel/timer.c13
10 files changed, 114 insertions, 54 deletions
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 4ee367817049..f0ebb342409d 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -93,7 +93,10 @@ void ia64_account_on_switch(struct task_struct *prev, struct task_struct *next)
93 now = ia64_get_itc(); 93 now = ia64_get_itc();
94 94
95 delta_stime = cycle_to_cputime(pi->ac_stime + (now - pi->ac_stamp)); 95 delta_stime = cycle_to_cputime(pi->ac_stime + (now - pi->ac_stamp));
96 account_system_time(prev, 0, delta_stime, delta_stime); 96 if (idle_task(smp_processor_id()) != prev)
97 account_system_time(prev, 0, delta_stime, delta_stime);
98 else
99 account_idle_time(delta_stime);
97 100
98 if (pi->ac_utime) { 101 if (pi->ac_utime) {
99 delta_utime = cycle_to_cputime(pi->ac_utime); 102 delta_utime = cycle_to_cputime(pi->ac_utime);
@@ -120,7 +123,10 @@ void account_system_vtime(struct task_struct *tsk)
120 now = ia64_get_itc(); 123 now = ia64_get_itc();
121 124
122 delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp)); 125 delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp));
123 account_system_time(tsk, 0, delta_stime, delta_stime); 126 if (irq_count() || idle_task(smp_processor_id()) != tsk)
127 account_system_time(tsk, 0, delta_stime, delta_stime);
128 else
129 account_idle_time(delta_stime);
124 ti->ac_stime = 0; 130 ti->ac_stime = 0;
125 131
126 ti->ac_stamp = now; 132 ti->ac_stamp = now;
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 51b201ddf9a1..fb7049c054c0 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -33,6 +33,7 @@
33#include <linux/mqueue.h> 33#include <linux/mqueue.h>
34#include <linux/hardirq.h> 34#include <linux/hardirq.h>
35#include <linux/utsname.h> 35#include <linux/utsname.h>
36#include <linux/kernel_stat.h>
36 37
37#include <asm/pgtable.h> 38#include <asm/pgtable.h>
38#include <asm/uaccess.h> 39#include <asm/uaccess.h>
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 92650ccad2e1..3be355c1cfa7 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -256,7 +256,10 @@ void account_system_vtime(struct task_struct *tsk)
256 delta += sys_time; 256 delta += sys_time;
257 get_paca()->system_time = 0; 257 get_paca()->system_time = 0;
258 } 258 }
259 account_system_time(tsk, 0, delta, deltascaled); 259 if (in_irq() || idle_task(smp_processor_id()) != tsk)
260 account_system_time(tsk, 0, delta, deltascaled);
261 else
262 account_idle_time(delta);
260 per_cpu(cputime_last_delta, smp_processor_id()) = delta; 263 per_cpu(cputime_last_delta, smp_processor_id()) = delta;
261 per_cpu(cputime_scaled_last_delta, smp_processor_id()) = deltascaled; 264 per_cpu(cputime_scaled_last_delta, smp_processor_id()) = deltascaled;
262 local_irq_restore(flags); 265 local_irq_restore(flags);
@@ -335,8 +338,12 @@ void calculate_steal_time(void)
335 tb = mftb(); 338 tb = mftb();
336 purr = mfspr(SPRN_PURR); 339 purr = mfspr(SPRN_PURR);
337 stolen = (tb - pme->tb) - (purr - pme->purr); 340 stolen = (tb - pme->tb) - (purr - pme->purr);
338 if (stolen > 0) 341 if (stolen > 0) {
339 account_steal_time(current, stolen); 342 if (idle_task(smp_processor_id()) != current)
343 account_steal_time(stolen);
344 else
345 account_idle_time(stolen);
346 }
340 pme->tb = tb; 347 pme->tb = tb;
341 pme->purr = purr; 348 pme->purr = purr;
342} 349}
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 07283aea2e56..4a4a34caec55 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -55,13 +55,19 @@ void account_process_tick(struct task_struct *tsk, int user_tick)
55 cputime = S390_lowcore.system_timer >> 12; 55 cputime = S390_lowcore.system_timer >> 12;
56 S390_lowcore.system_timer -= cputime << 12; 56 S390_lowcore.system_timer -= cputime << 12;
57 S390_lowcore.steal_clock -= cputime << 12; 57 S390_lowcore.steal_clock -= cputime << 12;
58 account_system_time(tsk, HARDIRQ_OFFSET, cputime, cputime); 58 if (idle_task(smp_processor_id()) != current)
59 account_system_time(tsk, HARDIRQ_OFFSET, cputime, cputime);
60 else
61 account_idle_time(cputime);
59 62
60 cputime = S390_lowcore.steal_clock; 63 cputime = S390_lowcore.steal_clock;
61 if ((__s64) cputime > 0) { 64 if ((__s64) cputime > 0) {
62 cputime >>= 12; 65 cputime >>= 12;
63 S390_lowcore.steal_clock -= cputime << 12; 66 S390_lowcore.steal_clock -= cputime << 12;
64 account_steal_time(tsk, cputime); 67 if (idle_task(smp_processor_id()) != current)
68 account_steal_time(cputime);
69 else
70 account_idle_time(cputime);
65 } 71 }
66} 72}
67 73
@@ -87,7 +93,10 @@ void account_vtime(struct task_struct *tsk)
87 cputime = S390_lowcore.system_timer >> 12; 93 cputime = S390_lowcore.system_timer >> 12;
88 S390_lowcore.system_timer -= cputime << 12; 94 S390_lowcore.system_timer -= cputime << 12;
89 S390_lowcore.steal_clock -= cputime << 12; 95 S390_lowcore.steal_clock -= cputime << 12;
90 account_system_time(tsk, 0, cputime, cputime); 96 if (idle_task(smp_processor_id()) != current)
97 account_system_time(tsk, 0, cputime, cputime);
98 else
99 account_idle_time(cputime);
91} 100}
92 101
93/* 102/*
@@ -107,7 +116,10 @@ void account_system_vtime(struct task_struct *tsk)
107 cputime = S390_lowcore.system_timer >> 12; 116 cputime = S390_lowcore.system_timer >> 12;
108 S390_lowcore.system_timer -= cputime << 12; 117 S390_lowcore.system_timer -= cputime << 12;
109 S390_lowcore.steal_clock -= cputime << 12; 118 S390_lowcore.steal_clock -= cputime << 12;
110 account_system_time(tsk, 0, cputime, cputime); 119 if (in_irq() || idle_task(smp_processor_id()) != current)
120 account_system_time(tsk, 0, cputime, cputime);
121 else
122 account_idle_time(cputime);
111} 123}
112EXPORT_SYMBOL_GPL(account_system_vtime); 124EXPORT_SYMBOL_GPL(account_system_vtime);
113 125
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index c9f7cda48ed7..732e52dc991a 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -132,8 +132,7 @@ static void do_stolen_accounting(void)
132 *snap = state; 132 *snap = state;
133 133
134 /* Add the appropriate number of ticks of stolen time, 134 /* Add the appropriate number of ticks of stolen time,
135 including any left-overs from last time. Passing NULL to 135 including any left-overs from last time. */
136 account_steal_time accounts the time as stolen. */
137 stolen = runnable + offline + __get_cpu_var(residual_stolen); 136 stolen = runnable + offline + __get_cpu_var(residual_stolen);
138 137
139 if (stolen < 0) 138 if (stolen < 0)
@@ -141,11 +140,10 @@ static void do_stolen_accounting(void)
141 140
142 ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen); 141 ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
143 __get_cpu_var(residual_stolen) = stolen; 142 __get_cpu_var(residual_stolen) = stolen;
144 account_steal_time(NULL, ticks); 143 account_steal_ticks(ticks);
145 144
146 /* Add the appropriate number of ticks of blocked time, 145 /* Add the appropriate number of ticks of blocked time,
147 including any left-overs from last time. Passing idle to 146 including any left-overs from last time. */
148 account_steal_time accounts the time as idle/wait. */
149 blocked += __get_cpu_var(residual_blocked); 147 blocked += __get_cpu_var(residual_blocked);
150 148
151 if (blocked < 0) 149 if (blocked < 0)
@@ -153,7 +151,7 @@ static void do_stolen_accounting(void)
153 151
154 ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked); 152 ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked);
155 __get_cpu_var(residual_blocked) = blocked; 153 __get_cpu_var(residual_blocked) = blocked;
156 account_steal_time(idle_task(smp_processor_id()), ticks); 154 account_idle_ticks(ticks);
157} 155}
158 156
159/* 157/*
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index c78a459662a6..570d20413119 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -81,6 +81,11 @@ static inline unsigned int kstat_irqs(unsigned int irq)
81extern unsigned long long task_delta_exec(struct task_struct *); 81extern unsigned long long task_delta_exec(struct task_struct *);
82extern void account_user_time(struct task_struct *, cputime_t, cputime_t); 82extern void account_user_time(struct task_struct *, cputime_t, cputime_t);
83extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t); 83extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t);
84extern void account_steal_time(struct task_struct *, cputime_t); 84extern void account_steal_time(cputime_t);
85extern void account_idle_time(cputime_t);
86
87extern void account_process_tick(struct task_struct *, int user);
88extern void account_steal_ticks(unsigned long ticks);
89extern void account_idle_ticks(unsigned long ticks);
85 90
86#endif /* _LINUX_KERNEL_STAT_H */ 91#endif /* _LINUX_KERNEL_STAT_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8395e715809d..b475d4db8053 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -284,7 +284,6 @@ long io_schedule_timeout(long timeout);
284 284
285extern void cpu_init (void); 285extern void cpu_init (void);
286extern void trap_init(void); 286extern void trap_init(void);
287extern void account_process_tick(struct task_struct *task, int user);
288extern void update_process_times(int user); 287extern void update_process_times(int user);
289extern void scheduler_tick(void); 288extern void scheduler_tick(void);
290 289
diff --git a/kernel/sched.c b/kernel/sched.c
index 5b03679ff712..635eaffe1e4c 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4139,7 +4139,6 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
4139 cputime_t cputime, cputime_t cputime_scaled) 4139 cputime_t cputime, cputime_t cputime_scaled)
4140{ 4140{
4141 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; 4141 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
4142 struct rq *rq = this_rq();
4143 cputime64_t tmp; 4142 cputime64_t tmp;
4144 4143
4145 if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { 4144 if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
@@ -4158,37 +4157,84 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
4158 cpustat->irq = cputime64_add(cpustat->irq, tmp); 4157 cpustat->irq = cputime64_add(cpustat->irq, tmp);
4159 else if (softirq_count()) 4158 else if (softirq_count())
4160 cpustat->softirq = cputime64_add(cpustat->softirq, tmp); 4159 cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
4161 else if (p != rq->idle)
4162 cpustat->system = cputime64_add(cpustat->system, tmp);
4163 else if (atomic_read(&rq->nr_iowait) > 0)
4164 cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
4165 else 4160 else
4166 cpustat->idle = cputime64_add(cpustat->idle, tmp); 4161 cpustat->system = cputime64_add(cpustat->system, tmp);
4162
4167 /* Account for system time used */ 4163 /* Account for system time used */
4168 acct_update_integrals(p); 4164 acct_update_integrals(p);
4169} 4165}
4170 4166
4171/* 4167/*
4172 * Account for involuntary wait time. 4168 * Account for involuntary wait time.
4173 * @p: the process from which the cpu time has been stolen
4174 * @steal: the cpu time spent in involuntary wait 4169 * @steal: the cpu time spent in involuntary wait
4175 */ 4170 */
4176void account_steal_time(struct task_struct *p, cputime_t steal) 4171void account_steal_time(cputime_t cputime)
4172{
4173 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
4174 cputime64_t cputime64 = cputime_to_cputime64(cputime);
4175
4176 cpustat->steal = cputime64_add(cpustat->steal, cputime64);
4177}
4178
4179/*
4180 * Account for idle time.
4181 * @cputime: the cpu time spent in idle wait
4182 */
4183void account_idle_time(cputime_t cputime)
4177{ 4184{
4178 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; 4185 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
4179 cputime64_t tmp = cputime_to_cputime64(steal); 4186 cputime64_t cputime64 = cputime_to_cputime64(cputime);
4180 struct rq *rq = this_rq(); 4187 struct rq *rq = this_rq();
4181 4188
4182 if (p == rq->idle) { 4189 if (atomic_read(&rq->nr_iowait) > 0)
4183 p->stime = cputime_add(p->stime, steal); 4190 cpustat->iowait = cputime64_add(cpustat->iowait, cputime64);
4184 if (atomic_read(&rq->nr_iowait) > 0) 4191 else
4185 cpustat->iowait = cputime64_add(cpustat->iowait, tmp); 4192 cpustat->idle = cputime64_add(cpustat->idle, cputime64);
4186 else 4193}
4187 cpustat->idle = cputime64_add(cpustat->idle, tmp); 4194
4188 } else 4195#ifndef CONFIG_VIRT_CPU_ACCOUNTING
4189 cpustat->steal = cputime64_add(cpustat->steal, tmp); 4196
4197/*
4198 * Account a single tick of cpu time.
4199 * @p: the process that the cpu time gets accounted to
4200 * @user_tick: indicates if the tick is a user or a system tick
4201 */
4202void account_process_tick(struct task_struct *p, int user_tick)
4203{
4204 cputime_t one_jiffy = jiffies_to_cputime(1);
4205 cputime_t one_jiffy_scaled = cputime_to_scaled(one_jiffy);
4206 struct rq *rq = this_rq();
4207
4208 if (user_tick)
4209 account_user_time(p, one_jiffy, one_jiffy_scaled);
4210 else if (p != rq->idle)
4211 account_system_time(p, HARDIRQ_OFFSET, one_jiffy,
4212 one_jiffy_scaled);
4213 else
4214 account_idle_time(one_jiffy);
4215}
4216
4217/*
4218 * Account multiple ticks of steal time.
4219 * @p: the process from which the cpu time has been stolen
4220 * @ticks: number of stolen ticks
4221 */
4222void account_steal_ticks(unsigned long ticks)
4223{
4224 account_steal_time(jiffies_to_cputime(ticks));
4225}
4226
4227/*
4228 * Account multiple ticks of idle time.
4229 * @ticks: number of stolen ticks
4230 */
4231void account_idle_ticks(unsigned long ticks)
4232{
4233 account_idle_time(jiffies_to_cputime(ticks));
4190} 4234}
4191 4235
4236#endif
4237
4192/* 4238/*
4193 * Use precise platform statistics if available: 4239 * Use precise platform statistics if available:
4194 */ 4240 */
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 1f2fce2479fe..611fa4c0baab 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -419,8 +419,9 @@ void tick_nohz_restart_sched_tick(void)
419{ 419{
420 int cpu = smp_processor_id(); 420 int cpu = smp_processor_id();
421 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); 421 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
422#ifndef CONFIG_VIRT_CPU_ACCOUNTING
422 unsigned long ticks; 423 unsigned long ticks;
423 cputime_t cputime; 424#endif
424 ktime_t now; 425 ktime_t now;
425 426
426 local_irq_disable(); 427 local_irq_disable();
@@ -442,6 +443,7 @@ void tick_nohz_restart_sched_tick(void)
442 tick_do_update_jiffies64(now); 443 tick_do_update_jiffies64(now);
443 cpu_clear(cpu, nohz_cpu_mask); 444 cpu_clear(cpu, nohz_cpu_mask);
444 445
446#ifndef CONFIG_VIRT_CPU_ACCOUNTING
445 /* 447 /*
446 * We stopped the tick in idle. Update process times would miss the 448 * We stopped the tick in idle. Update process times would miss the
447 * time we slept as update_process_times does only a 1 tick 449 * time we slept as update_process_times does only a 1 tick
@@ -451,12 +453,9 @@ void tick_nohz_restart_sched_tick(void)
451 /* 453 /*
452 * We might be one off. Do not randomly account a huge number of ticks! 454 * We might be one off. Do not randomly account a huge number of ticks!
453 */ 455 */
454 if (ticks && ticks < LONG_MAX) { 456 if (ticks && ticks < LONG_MAX)
455 add_preempt_count(HARDIRQ_OFFSET); 457 account_idle_ticks(ticks);
456 cputime = jiffies_to_cputime(ticks); 458#endif
457 account_system_time(current, HARDIRQ_OFFSET, cputime, cputime);
458 sub_preempt_count(HARDIRQ_OFFSET);
459 }
460 459
461 touch_softlockup_watchdog(); 460 touch_softlockup_watchdog();
462 /* 461 /*
diff --git a/kernel/timer.c b/kernel/timer.c
index b5efb528aa1d..dee3f641a7a7 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1018,19 +1018,6 @@ unsigned long get_next_timer_interrupt(unsigned long now)
1018} 1018}
1019#endif 1019#endif
1020 1020
1021#ifndef CONFIG_VIRT_CPU_ACCOUNTING
1022void account_process_tick(struct task_struct *p, int user_tick)
1023{
1024 cputime_t one_jiffy = jiffies_to_cputime(1);
1025
1026 if (user_tick)
1027 account_user_time(p, one_jiffy, cputime_to_scaled(one_jiffy));
1028 else
1029 account_system_time(p, HARDIRQ_OFFSET, one_jiffy,
1030 cputime_to_scaled(one_jiffy));
1031}
1032#endif
1033
1034/* 1021/*
1035 * Called from the timer interrupt handler to charge one tick to the current 1022 * Called from the timer interrupt handler to charge one tick to the current
1036 * process. user_tick is 1 if the tick is user time, 0 for system. 1023 * process. user_tick is 1 if the tick is user time, 0 for system.