aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRik van Riel <riel@redhat.com>2016-07-13 10:50:01 -0400
committerIngo Molnar <mingo@kernel.org>2016-07-14 04:42:34 -0400
commit57430218317e5b280a80582a139b26029c25de6c (patch)
tree41ae977e5963cb147c67a4acf2eab6319cdfcbe7
parentcefef3a76287c9b51e946d312aea1637bb3a57ff (diff)
sched/cputime: Count actually elapsed irq & softirq time
Currently, if there was any irq or softirq time during 'ticks' jiffies, the entire period will be accounted as irq or softirq time. This is inaccurate if only a subset of the time was actually spent handling irqs, and could conceivably mis-count all of the ticks during a period as irq time, when there was some irq and some softirq time. This can actually happen when irqtime_account_process_tick is called from account_idle_ticks, which can pass a larger number of ticks down all at once. Fix this by changing irqtime_account_hi_update(), irqtime_account_si_update(), and steal_account_process_ticks() to work with cputime_t time units, and return the amount of time spent in each mode. Rename steal_account_process_ticks() to steal_account_process_time(), to reflect that time is now accounted in cputime_t, instead of ticks. Additionally, have irqtime_account_process_tick() take into account how much time was spent in each of steal, irq, and softirq time. The latter could help improve the accuracy of cputime accounting when returning from idle on a NO_HZ_IDLE CPU. Properly accounting how much time was spent in hardirq and softirq time will also allow the NO_HZ_FULL code to re-use these same functions for hardirq and softirq accounting. Signed-off-by: Rik van Riel <riel@redhat.com> [ Make nsecs_to_cputime64() actually return cputime64_t. ] Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mike Galbraith <efault@gmx.de> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Radim Krcmar <rkrcmar@redhat.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Wanpeng Li <wanpeng.li@hotmail.com> Link: http://lkml.kernel.org/r/1468421405-20056-2-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--include/asm-generic/cputime_nsecs.h2
-rw-r--r--kernel/sched/cputime.c124
2 files changed, 79 insertions, 47 deletions
diff --git a/include/asm-generic/cputime_nsecs.h b/include/asm-generic/cputime_nsecs.h
index 0f1c6f315cdc..a84e28e0c634 100644
--- a/include/asm-generic/cputime_nsecs.h
+++ b/include/asm-generic/cputime_nsecs.h
@@ -50,6 +50,8 @@ typedef u64 __nocast cputime64_t;
50 (__force u64)(__ct) 50 (__force u64)(__ct)
51#define nsecs_to_cputime(__nsecs) \ 51#define nsecs_to_cputime(__nsecs) \
52 (__force cputime_t)(__nsecs) 52 (__force cputime_t)(__nsecs)
53#define nsecs_to_cputime64(__nsecs) \
54 (__force cputime64_t)(__nsecs)
53 55
54 56
55/* 57/*
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 3d60e5d76fdb..db82ae12cf01 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -79,40 +79,50 @@ void irqtime_account_irq(struct task_struct *curr)
79} 79}
80EXPORT_SYMBOL_GPL(irqtime_account_irq); 80EXPORT_SYMBOL_GPL(irqtime_account_irq);
81 81
82static int irqtime_account_hi_update(void) 82static cputime_t irqtime_account_hi_update(cputime_t maxtime)
83{ 83{
84 u64 *cpustat = kcpustat_this_cpu->cpustat; 84 u64 *cpustat = kcpustat_this_cpu->cpustat;
85 unsigned long flags; 85 unsigned long flags;
86 u64 latest_ns; 86 cputime_t irq_cputime;
87 int ret = 0;
88 87
89 local_irq_save(flags); 88 local_irq_save(flags);
90 latest_ns = this_cpu_read(cpu_hardirq_time); 89 irq_cputime = nsecs_to_cputime64(this_cpu_read(cpu_hardirq_time)) -
91 if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_IRQ]) 90 cpustat[CPUTIME_IRQ];
92 ret = 1; 91 irq_cputime = min(irq_cputime, maxtime);
92 cpustat[CPUTIME_IRQ] += irq_cputime;
93 local_irq_restore(flags); 93 local_irq_restore(flags);
94 return ret; 94 return irq_cputime;
95} 95}
96 96
97static int irqtime_account_si_update(void) 97static cputime_t irqtime_account_si_update(cputime_t maxtime)
98{ 98{
99 u64 *cpustat = kcpustat_this_cpu->cpustat; 99 u64 *cpustat = kcpustat_this_cpu->cpustat;
100 unsigned long flags; 100 unsigned long flags;
101 u64 latest_ns; 101 cputime_t softirq_cputime;
102 int ret = 0;
103 102
104 local_irq_save(flags); 103 local_irq_save(flags);
105 latest_ns = this_cpu_read(cpu_softirq_time); 104 softirq_cputime = nsecs_to_cputime64(this_cpu_read(cpu_softirq_time)) -
106 if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_SOFTIRQ]) 105 cpustat[CPUTIME_SOFTIRQ];
107 ret = 1; 106 softirq_cputime = min(softirq_cputime, maxtime);
107 cpustat[CPUTIME_SOFTIRQ] += softirq_cputime;
108 local_irq_restore(flags); 108 local_irq_restore(flags);
109 return ret; 109 return softirq_cputime;
110} 110}
111 111
112#else /* CONFIG_IRQ_TIME_ACCOUNTING */ 112#else /* CONFIG_IRQ_TIME_ACCOUNTING */
113 113
114#define sched_clock_irqtime (0) 114#define sched_clock_irqtime (0)
115 115
116static cputime_t irqtime_account_hi_update(cputime_t dummy)
117{
118 return 0;
119}
120
121static cputime_t irqtime_account_si_update(cputime_t dummy)
122{
123 return 0;
124}
125
116#endif /* !CONFIG_IRQ_TIME_ACCOUNTING */ 126#endif /* !CONFIG_IRQ_TIME_ACCOUNTING */
117 127
118static inline void task_group_account_field(struct task_struct *p, int index, 128static inline void task_group_account_field(struct task_struct *p, int index,
@@ -257,32 +267,45 @@ void account_idle_time(cputime_t cputime)
257 cpustat[CPUTIME_IDLE] += (__force u64) cputime; 267 cpustat[CPUTIME_IDLE] += (__force u64) cputime;
258} 268}
259 269
260static __always_inline unsigned long steal_account_process_tick(unsigned long max_jiffies) 270static __always_inline cputime_t steal_account_process_time(cputime_t maxtime)
261{ 271{
262#ifdef CONFIG_PARAVIRT 272#ifdef CONFIG_PARAVIRT
263 if (static_key_false(&paravirt_steal_enabled)) { 273 if (static_key_false(&paravirt_steal_enabled)) {
274 cputime_t steal_cputime;
264 u64 steal; 275 u64 steal;
265 unsigned long steal_jiffies;
266 276
267 steal = paravirt_steal_clock(smp_processor_id()); 277 steal = paravirt_steal_clock(smp_processor_id());
268 steal -= this_rq()->prev_steal_time; 278 steal -= this_rq()->prev_steal_time;
269 279
270 /* 280 steal_cputime = min(nsecs_to_cputime(steal), maxtime);
271 * steal is in nsecs but our caller is expecting steal 281 account_steal_time(steal_cputime);
272 * time in jiffies. Lets cast the result to jiffies 282 this_rq()->prev_steal_time += cputime_to_nsecs(steal_cputime);
273 * granularity and account the rest on the next rounds.
274 */
275 steal_jiffies = min(nsecs_to_jiffies(steal), max_jiffies);
276 this_rq()->prev_steal_time += jiffies_to_nsecs(steal_jiffies);
277 283
278 account_steal_time(jiffies_to_cputime(steal_jiffies)); 284 return steal_cputime;
279 return steal_jiffies;
280 } 285 }
281#endif 286#endif
282 return 0; 287 return 0;
283} 288}
284 289
285/* 290/*
291 * Account how much elapsed time was spent in steal, irq, or softirq time.
292 */
293static inline cputime_t account_other_time(cputime_t max)
294{
295 cputime_t accounted;
296
297 accounted = steal_account_process_time(max);
298
299 if (accounted < max)
300 accounted += irqtime_account_hi_update(max - accounted);
301
302 if (accounted < max)
303 accounted += irqtime_account_si_update(max - accounted);
304
305 return accounted;
306}
307
308/*
286 * Accumulate raw cputime values of dead tasks (sig->[us]time) and live 309 * Accumulate raw cputime values of dead tasks (sig->[us]time) and live
287 * tasks (sum on group iteration) belonging to @tsk's group. 310 * tasks (sum on group iteration) belonging to @tsk's group.
288 */ 311 */
@@ -342,21 +365,23 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
342static void irqtime_account_process_tick(struct task_struct *p, int user_tick, 365static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
343 struct rq *rq, int ticks) 366 struct rq *rq, int ticks)
344{ 367{
345 cputime_t scaled = cputime_to_scaled(cputime_one_jiffy); 368 u64 cputime = (__force u64) cputime_one_jiffy * ticks;
346 u64 cputime = (__force u64) cputime_one_jiffy; 369 cputime_t scaled, other;
347 u64 *cpustat = kcpustat_this_cpu->cpustat;
348 370
349 if (steal_account_process_tick(ULONG_MAX)) 371 /*
372 * When returning from idle, many ticks can get accounted at
373 * once, including some ticks of steal, irq, and softirq time.
374 * Subtract those ticks from the amount of time accounted to
375 * idle, or potentially user or system time. Due to rounding,
376 * other time can exceed ticks occasionally.
377 */
378 other = account_other_time(cputime);
379 if (other >= cputime)
350 return; 380 return;
381 cputime -= other;
382 scaled = cputime_to_scaled(cputime);
351 383
352 cputime *= ticks; 384 if (this_cpu_ksoftirqd() == p) {
353 scaled *= ticks;
354
355 if (irqtime_account_hi_update()) {
356 cpustat[CPUTIME_IRQ] += cputime;
357 } else if (irqtime_account_si_update()) {
358 cpustat[CPUTIME_SOFTIRQ] += cputime;
359 } else if (this_cpu_ksoftirqd() == p) {
360 /* 385 /*
361 * ksoftirqd time do not get accounted in cpu_softirq_time. 386 * ksoftirqd time do not get accounted in cpu_softirq_time.
362 * So, we have to handle it separately here. 387 * So, we have to handle it separately here.
@@ -466,7 +491,7 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime
466 */ 491 */
467void account_process_tick(struct task_struct *p, int user_tick) 492void account_process_tick(struct task_struct *p, int user_tick)
468{ 493{
469 cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); 494 cputime_t cputime, scaled, steal;
470 struct rq *rq = this_rq(); 495 struct rq *rq = this_rq();
471 496
472 if (vtime_accounting_cpu_enabled()) 497 if (vtime_accounting_cpu_enabled())
@@ -477,16 +502,21 @@ void account_process_tick(struct task_struct *p, int user_tick)
477 return; 502 return;
478 } 503 }
479 504
480 if (steal_account_process_tick(ULONG_MAX)) 505 cputime = cputime_one_jiffy;
506 steal = steal_account_process_time(cputime);
507
508 if (steal >= cputime)
481 return; 509 return;
482 510
511 cputime -= steal;
512 scaled = cputime_to_scaled(cputime);
513
483 if (user_tick) 514 if (user_tick)
484 account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); 515 account_user_time(p, cputime, scaled);
485 else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) 516 else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
486 account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy, 517 account_system_time(p, HARDIRQ_OFFSET, cputime, scaled);
487 one_jiffy_scaled);
488 else 518 else
489 account_idle_time(cputime_one_jiffy); 519 account_idle_time(cputime);
490} 520}
491 521
492/* 522/*
@@ -681,14 +711,14 @@ static cputime_t vtime_delta(struct task_struct *tsk)
681static cputime_t get_vtime_delta(struct task_struct *tsk) 711static cputime_t get_vtime_delta(struct task_struct *tsk)
682{ 712{
683 unsigned long now = READ_ONCE(jiffies); 713 unsigned long now = READ_ONCE(jiffies);
684 unsigned long delta_jiffies, steal_jiffies; 714 cputime_t delta, steal;
685 715
686 delta_jiffies = now - tsk->vtime_snap; 716 delta = jiffies_to_cputime(now - tsk->vtime_snap);
687 steal_jiffies = steal_account_process_tick(delta_jiffies); 717 steal = steal_account_process_time(delta);
688 WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE); 718 WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE);
689 tsk->vtime_snap = now; 719 tsk->vtime_snap = now;
690 720
691 return jiffies_to_cputime(delta_jiffies - steal_jiffies); 721 return delta - steal;
692} 722}
693 723
694static void __vtime_account_system(struct task_struct *tsk) 724static void __vtime_account_system(struct task_struct *tsk)