aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-12-17 08:10:57 -0500
committerIngo Molnar <mingo@elte.hu>2008-12-23 06:45:14 -0500
commitaa9c4c0f967fdb482ea95e8473ec3d201e6e0781 (patch)
tree8223d34630b7d3130825e8a2197e9bb51c34b7fa
parent7671581f1666ef4b54a1c1e598c51ac44c060a9b (diff)
perfcounters: fix task clock counter
Impact: fix per task clock counter precision Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--include/linux/kernel_stat.h8
-rw-r--r--kernel/exit.c17
-rw-r--r--kernel/perf_counter.c70
-rw-r--r--kernel/sched.c49
4 files changed, 120 insertions, 24 deletions
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 4a145caeee07..1b2e3242497c 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -66,7 +66,15 @@ static inline unsigned int kstat_irqs(unsigned int irq)
66 return sum; 66 return sum;
67} 67}
68 68
69
70/*
71 * Lock/unlock the current runqueue - to extract task statistics:
72 */
73extern void curr_rq_lock_irq_save(unsigned long *flags);
74extern void curr_rq_unlock_irq_restore(unsigned long *flags);
75extern unsigned long long __task_delta_exec(struct task_struct *tsk, int update);
69extern unsigned long long task_delta_exec(struct task_struct *); 76extern unsigned long long task_delta_exec(struct task_struct *);
77
70extern void account_user_time(struct task_struct *, cputime_t); 78extern void account_user_time(struct task_struct *, cputime_t);
71extern void account_user_time_scaled(struct task_struct *, cputime_t); 79extern void account_user_time_scaled(struct task_struct *, cputime_t);
72extern void account_system_time(struct task_struct *, int, cputime_t); 80extern void account_system_time(struct task_struct *, int, cputime_t);
diff --git a/kernel/exit.c b/kernel/exit.c
index d336c90a5f13..244edfd96865 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -922,6 +922,12 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
922 forget_original_parent(tsk); 922 forget_original_parent(tsk);
923 exit_task_namespaces(tsk); 923 exit_task_namespaces(tsk);
924 924
925 /*
926 * Flush inherited counters to the parent - before the parent
927 * gets woken up by child-exit notifications.
928 */
929 perf_counter_exit_task(tsk);
930
925 write_lock_irq(&tasklist_lock); 931 write_lock_irq(&tasklist_lock);
926 if (group_dead) 932 if (group_dead)
927 kill_orphaned_pgrp(tsk->group_leader, NULL); 933 kill_orphaned_pgrp(tsk->group_leader, NULL);
@@ -1093,11 +1099,6 @@ NORET_TYPE void do_exit(long code)
1093 mpol_put(tsk->mempolicy); 1099 mpol_put(tsk->mempolicy);
1094 tsk->mempolicy = NULL; 1100 tsk->mempolicy = NULL;
1095#endif 1101#endif
1096 /*
1097 * These must happen late, after the PID is not
1098 * hashed anymore, but still at a point that may sleep:
1099 */
1100 perf_counter_exit_task(tsk);
1101#ifdef CONFIG_FUTEX 1102#ifdef CONFIG_FUTEX
1102 if (unlikely(!list_empty(&tsk->pi_state_list))) 1103 if (unlikely(!list_empty(&tsk->pi_state_list)))
1103 exit_pi_state_list(tsk); 1104 exit_pi_state_list(tsk);
@@ -1121,6 +1122,12 @@ NORET_TYPE void do_exit(long code)
1121 if (tsk->splice_pipe) 1122 if (tsk->splice_pipe)
1122 __free_pipe_info(tsk->splice_pipe); 1123 __free_pipe_info(tsk->splice_pipe);
1123 1124
1125 /*
1126 * These must happen late, after the PID is not
1127 * hashed anymore, but still at a point that may sleep:
1128 */
1129 perf_counter_exit_task(tsk);
1130
1124 preempt_disable(); 1131 preempt_disable();
1125 /* causes final put_task_struct in finish_task_switch(). */ 1132 /* causes final put_task_struct in finish_task_switch(). */
1126 tsk->state = TASK_DEAD; 1133 tsk->state = TASK_DEAD;
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 961d651aa574..f1110ac1267b 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -18,6 +18,7 @@
18#include <linux/uaccess.h> 18#include <linux/uaccess.h>
19#include <linux/syscalls.h> 19#include <linux/syscalls.h>
20#include <linux/anon_inodes.h> 20#include <linux/anon_inodes.h>
21#include <linux/kernel_stat.h>
21#include <linux/perf_counter.h> 22#include <linux/perf_counter.h>
22 23
23/* 24/*
@@ -106,7 +107,8 @@ static void __perf_counter_remove_from_context(void *info)
106 if (ctx->task && cpuctx->task_ctx != ctx) 107 if (ctx->task && cpuctx->task_ctx != ctx)
107 return; 108 return;
108 109
109 spin_lock_irqsave(&ctx->lock, flags); 110 curr_rq_lock_irq_save(&flags);
111 spin_lock(&ctx->lock);
110 112
111 if (counter->state == PERF_COUNTER_STATE_ACTIVE) { 113 if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
112 counter->hw_ops->disable(counter); 114 counter->hw_ops->disable(counter);
@@ -135,7 +137,8 @@ static void __perf_counter_remove_from_context(void *info)
135 perf_max_counters - perf_reserved_percpu); 137 perf_max_counters - perf_reserved_percpu);
136 } 138 }
137 139
138 spin_unlock_irqrestore(&ctx->lock, flags); 140 spin_unlock(&ctx->lock);
141 curr_rq_unlock_irq_restore(&flags);
139} 142}
140 143
141 144
@@ -209,7 +212,8 @@ static void __perf_install_in_context(void *info)
209 if (ctx->task && cpuctx->task_ctx != ctx) 212 if (ctx->task && cpuctx->task_ctx != ctx)
210 return; 213 return;
211 214
212 spin_lock_irqsave(&ctx->lock, flags); 215 curr_rq_lock_irq_save(&flags);
216 spin_lock(&ctx->lock);
213 217
214 /* 218 /*
215 * Protect the list operation against NMI by disabling the 219 * Protect the list operation against NMI by disabling the
@@ -232,7 +236,8 @@ static void __perf_install_in_context(void *info)
232 if (!ctx->task && cpuctx->max_pertask) 236 if (!ctx->task && cpuctx->max_pertask)
233 cpuctx->max_pertask--; 237 cpuctx->max_pertask--;
234 238
235 spin_unlock_irqrestore(&ctx->lock, flags); 239 spin_unlock(&ctx->lock);
240 curr_rq_unlock_irq_restore(&flags);
236} 241}
237 242
238/* 243/*
@@ -438,15 +443,19 @@ int perf_counter_task_disable(void)
438 struct task_struct *curr = current; 443 struct task_struct *curr = current;
439 struct perf_counter_context *ctx = &curr->perf_counter_ctx; 444 struct perf_counter_context *ctx = &curr->perf_counter_ctx;
440 struct perf_counter *counter; 445 struct perf_counter *counter;
446 unsigned long flags;
441 u64 perf_flags; 447 u64 perf_flags;
442 int cpu; 448 int cpu;
443 449
444 if (likely(!ctx->nr_counters)) 450 if (likely(!ctx->nr_counters))
445 return 0; 451 return 0;
446 452
447 local_irq_disable(); 453 curr_rq_lock_irq_save(&flags);
448 cpu = smp_processor_id(); 454 cpu = smp_processor_id();
449 455
456 /* force the update of the task clock: */
457 __task_delta_exec(curr, 1);
458
450 perf_counter_task_sched_out(curr, cpu); 459 perf_counter_task_sched_out(curr, cpu);
451 460
452 spin_lock(&ctx->lock); 461 spin_lock(&ctx->lock);
@@ -463,7 +472,7 @@ int perf_counter_task_disable(void)
463 472
464 spin_unlock(&ctx->lock); 473 spin_unlock(&ctx->lock);
465 474
466 local_irq_enable(); 475 curr_rq_unlock_irq_restore(&flags);
467 476
468 return 0; 477 return 0;
469} 478}
@@ -473,15 +482,19 @@ int perf_counter_task_enable(void)
473 struct task_struct *curr = current; 482 struct task_struct *curr = current;
474 struct perf_counter_context *ctx = &curr->perf_counter_ctx; 483 struct perf_counter_context *ctx = &curr->perf_counter_ctx;
475 struct perf_counter *counter; 484 struct perf_counter *counter;
485 unsigned long flags;
476 u64 perf_flags; 486 u64 perf_flags;
477 int cpu; 487 int cpu;
478 488
479 if (likely(!ctx->nr_counters)) 489 if (likely(!ctx->nr_counters))
480 return 0; 490 return 0;
481 491
482 local_irq_disable(); 492 curr_rq_lock_irq_save(&flags);
483 cpu = smp_processor_id(); 493 cpu = smp_processor_id();
484 494
495 /* force the update of the task clock: */
496 __task_delta_exec(curr, 1);
497
485 spin_lock(&ctx->lock); 498 spin_lock(&ctx->lock);
486 499
487 /* 500 /*
@@ -493,6 +506,7 @@ int perf_counter_task_enable(void)
493 if (counter->state != PERF_COUNTER_STATE_OFF) 506 if (counter->state != PERF_COUNTER_STATE_OFF)
494 continue; 507 continue;
495 counter->state = PERF_COUNTER_STATE_INACTIVE; 508 counter->state = PERF_COUNTER_STATE_INACTIVE;
509 counter->hw_event.disabled = 0;
496 } 510 }
497 hw_perf_restore(perf_flags); 511 hw_perf_restore(perf_flags);
498 512
@@ -500,7 +514,7 @@ int perf_counter_task_enable(void)
500 514
501 perf_counter_task_sched_in(curr, cpu); 515 perf_counter_task_sched_in(curr, cpu);
502 516
503 local_irq_enable(); 517 curr_rq_unlock_irq_restore(&flags);
504 518
505 return 0; 519 return 0;
506} 520}
@@ -540,8 +554,11 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
540static void __read(void *info) 554static void __read(void *info)
541{ 555{
542 struct perf_counter *counter = info; 556 struct perf_counter *counter = info;
557 unsigned long flags;
543 558
559 curr_rq_lock_irq_save(&flags);
544 counter->hw_ops->read(counter); 560 counter->hw_ops->read(counter);
561 curr_rq_unlock_irq_restore(&flags);
545} 562}
546 563
547static u64 perf_counter_read(struct perf_counter *counter) 564static u64 perf_counter_read(struct perf_counter *counter)
@@ -860,13 +877,27 @@ static const struct hw_perf_counter_ops perf_ops_cpu_clock = {
860 .read = cpu_clock_perf_counter_read, 877 .read = cpu_clock_perf_counter_read,
861}; 878};
862 879
863static void task_clock_perf_counter_update(struct perf_counter *counter) 880/*
881 * Called from within the scheduler:
882 */
883static u64 task_clock_perf_counter_val(struct perf_counter *counter, int update)
864{ 884{
865 u64 prev, now; 885 struct task_struct *curr = counter->task;
886 u64 delta;
887
888 WARN_ON_ONCE(counter->task != current);
889
890 delta = __task_delta_exec(curr, update);
891
892 return curr->se.sum_exec_runtime + delta;
893}
894
895static void task_clock_perf_counter_update(struct perf_counter *counter, u64 now)
896{
897 u64 prev;
866 s64 delta; 898 s64 delta;
867 899
868 prev = atomic64_read(&counter->hw.prev_count); 900 prev = atomic64_read(&counter->hw.prev_count);
869 now = current->se.sum_exec_runtime;
870 901
871 atomic64_set(&counter->hw.prev_count, now); 902 atomic64_set(&counter->hw.prev_count, now);
872 903
@@ -877,17 +908,23 @@ static void task_clock_perf_counter_update(struct perf_counter *counter)
877 908
878static void task_clock_perf_counter_read(struct perf_counter *counter) 909static void task_clock_perf_counter_read(struct perf_counter *counter)
879{ 910{
880 task_clock_perf_counter_update(counter); 911 u64 now = task_clock_perf_counter_val(counter, 1);
912
913 task_clock_perf_counter_update(counter, now);
881} 914}
882 915
883static void task_clock_perf_counter_enable(struct perf_counter *counter) 916static void task_clock_perf_counter_enable(struct perf_counter *counter)
884{ 917{
885 atomic64_set(&counter->hw.prev_count, current->se.sum_exec_runtime); 918 u64 now = task_clock_perf_counter_val(counter, 0);
919
920 atomic64_set(&counter->hw.prev_count, now);
886} 921}
887 922
888static void task_clock_perf_counter_disable(struct perf_counter *counter) 923static void task_clock_perf_counter_disable(struct perf_counter *counter)
889{ 924{
890 task_clock_perf_counter_update(counter); 925 u64 now = task_clock_perf_counter_val(counter, 0);
926
927 task_clock_perf_counter_update(counter, now);
891} 928}
892 929
893static const struct hw_perf_counter_ops perf_ops_task_clock = { 930static const struct hw_perf_counter_ops perf_ops_task_clock = {
@@ -1267,6 +1304,7 @@ __perf_counter_exit_task(struct task_struct *child,
1267{ 1304{
1268 struct perf_counter *parent_counter; 1305 struct perf_counter *parent_counter;
1269 u64 parent_val, child_val; 1306 u64 parent_val, child_val;
1307 unsigned long flags;
1270 u64 perf_flags; 1308 u64 perf_flags;
1271 1309
1272 /* 1310 /*
@@ -1275,7 +1313,7 @@ __perf_counter_exit_task(struct task_struct *child,
1275 * Be careful about zapping the list - IRQ/NMI context 1313 * Be careful about zapping the list - IRQ/NMI context
1276 * could still be processing it: 1314 * could still be processing it:
1277 */ 1315 */
1278 local_irq_disable(); 1316 curr_rq_lock_irq_save(&flags);
1279 perf_flags = hw_perf_save_disable(); 1317 perf_flags = hw_perf_save_disable();
1280 1318
1281 if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) { 1319 if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) {
@@ -1294,7 +1332,7 @@ __perf_counter_exit_task(struct task_struct *child,
1294 list_del_init(&child_counter->list_entry); 1332 list_del_init(&child_counter->list_entry);
1295 1333
1296 hw_perf_restore(perf_flags); 1334 hw_perf_restore(perf_flags);
1297 local_irq_enable(); 1335 curr_rq_unlock_irq_restore(&flags);
1298 1336
1299 parent_counter = child_counter->parent; 1337 parent_counter = child_counter->parent;
1300 /* 1338 /*
diff --git a/kernel/sched.c b/kernel/sched.c
index 382cfdb5e38d..4d84ff4c8774 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -638,7 +638,7 @@ static inline int cpu_of(struct rq *rq)
638#define task_rq(p) cpu_rq(task_cpu(p)) 638#define task_rq(p) cpu_rq(task_cpu(p))
639#define cpu_curr(cpu) (cpu_rq(cpu)->curr) 639#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
640 640
641static inline void update_rq_clock(struct rq *rq) 641inline void update_rq_clock(struct rq *rq)
642{ 642{
643 rq->clock = sched_clock_cpu(cpu_of(rq)); 643 rq->clock = sched_clock_cpu(cpu_of(rq));
644} 644}
@@ -969,6 +969,26 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
969 } 969 }
970} 970}
971 971
972void curr_rq_lock_irq_save(unsigned long *flags)
973 __acquires(rq->lock)
974{
975 struct rq *rq;
976
977 local_irq_save(*flags);
978 rq = cpu_rq(smp_processor_id());
979 spin_lock(&rq->lock);
980}
981
982void curr_rq_unlock_irq_restore(unsigned long *flags)
983 __releases(rq->lock)
984{
985 struct rq *rq;
986
987 rq = cpu_rq(smp_processor_id());
988 spin_unlock(&rq->lock);
989 local_irq_restore(*flags);
990}
991
972void task_rq_unlock_wait(struct task_struct *p) 992void task_rq_unlock_wait(struct task_struct *p)
973{ 993{
974 struct rq *rq = task_rq(p); 994 struct rq *rq = task_rq(p);
@@ -2558,7 +2578,6 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev,
2558 struct task_struct *next) 2578 struct task_struct *next)
2559{ 2579{
2560 fire_sched_out_preempt_notifiers(prev, next); 2580 fire_sched_out_preempt_notifiers(prev, next);
2561 perf_counter_task_sched_out(prev, cpu_of(rq));
2562 prepare_lock_switch(rq, next); 2581 prepare_lock_switch(rq, next);
2563 prepare_arch_switch(next); 2582 prepare_arch_switch(next);
2564} 2583}
@@ -4093,6 +4112,29 @@ EXPORT_PER_CPU_SYMBOL(kstat);
4093 * Return any ns on the sched_clock that have not yet been banked in 4112 * Return any ns on the sched_clock that have not yet been banked in
4094 * @p in case that task is currently running. 4113 * @p in case that task is currently running.
4095 */ 4114 */
4115unsigned long long __task_delta_exec(struct task_struct *p, int update)
4116{
4117 s64 delta_exec;
4118 struct rq *rq;
4119
4120 rq = task_rq(p);
4121 WARN_ON_ONCE(!runqueue_is_locked());
4122 WARN_ON_ONCE(!task_current(rq, p));
4123
4124 if (update)
4125 update_rq_clock(rq);
4126
4127 delta_exec = rq->clock - p->se.exec_start;
4128
4129 WARN_ON_ONCE(delta_exec < 0);
4130
4131 return delta_exec;
4132}
4133
4134/*
4135 * Return any ns on the sched_clock that have not yet been banked in
4136 * @p in case that task is currently running.
4137 */
4096unsigned long long task_delta_exec(struct task_struct *p) 4138unsigned long long task_delta_exec(struct task_struct *p)
4097{ 4139{
4098 unsigned long flags; 4140 unsigned long flags;
@@ -4316,13 +4358,13 @@ void scheduler_tick(void)
4316 update_rq_clock(rq); 4358 update_rq_clock(rq);
4317 update_cpu_load(rq); 4359 update_cpu_load(rq);
4318 curr->sched_class->task_tick(rq, curr, 0); 4360 curr->sched_class->task_tick(rq, curr, 0);
4361 perf_counter_task_tick(curr, cpu);
4319 spin_unlock(&rq->lock); 4362 spin_unlock(&rq->lock);
4320 4363
4321#ifdef CONFIG_SMP 4364#ifdef CONFIG_SMP
4322 rq->idle_at_tick = idle_cpu(cpu); 4365 rq->idle_at_tick = idle_cpu(cpu);
4323 trigger_load_balance(rq, cpu); 4366 trigger_load_balance(rq, cpu);
4324#endif 4367#endif
4325 perf_counter_task_tick(curr, cpu);
4326} 4368}
4327 4369
4328#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \ 4370#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
@@ -4512,6 +4554,7 @@ need_resched_nonpreemptible:
4512 4554
4513 if (likely(prev != next)) { 4555 if (likely(prev != next)) {
4514 sched_info_switch(prev, next); 4556 sched_info_switch(prev, next);
4557 perf_counter_task_sched_out(prev, cpu);
4515 4558
4516 rq->nr_switches++; 4559 rq->nr_switches++;
4517 rq->curr = next; 4560 rq->curr = next;