diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-12-17 08:10:57 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-12-23 06:45:14 -0500 |
commit | aa9c4c0f967fdb482ea95e8473ec3d201e6e0781 (patch) | |
tree | 8223d34630b7d3130825e8a2197e9bb51c34b7fa | |
parent | 7671581f1666ef4b54a1c1e598c51ac44c060a9b (diff) |
perfcounters: fix task clock counter
Impact: fix per task clock counter precision
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | include/linux/kernel_stat.h | 8 | ||||
-rw-r--r-- | kernel/exit.c | 17 | ||||
-rw-r--r-- | kernel/perf_counter.c | 70 | ||||
-rw-r--r-- | kernel/sched.c | 49 |
4 files changed, 120 insertions, 24 deletions
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 4a145caeee07..1b2e3242497c 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h | |||
@@ -66,7 +66,15 @@ static inline unsigned int kstat_irqs(unsigned int irq) | |||
66 | return sum; | 66 | return sum; |
67 | } | 67 | } |
68 | 68 | ||
69 | |||
70 | /* | ||
71 | * Lock/unlock the current runqueue - to extract task statistics: | ||
72 | */ | ||
73 | extern void curr_rq_lock_irq_save(unsigned long *flags); | ||
74 | extern void curr_rq_unlock_irq_restore(unsigned long *flags); | ||
75 | extern unsigned long long __task_delta_exec(struct task_struct *tsk, int update); | ||
69 | extern unsigned long long task_delta_exec(struct task_struct *); | 76 | extern unsigned long long task_delta_exec(struct task_struct *); |
77 | |||
70 | extern void account_user_time(struct task_struct *, cputime_t); | 78 | extern void account_user_time(struct task_struct *, cputime_t); |
71 | extern void account_user_time_scaled(struct task_struct *, cputime_t); | 79 | extern void account_user_time_scaled(struct task_struct *, cputime_t); |
72 | extern void account_system_time(struct task_struct *, int, cputime_t); | 80 | extern void account_system_time(struct task_struct *, int, cputime_t); |
diff --git a/kernel/exit.c b/kernel/exit.c index d336c90a5f13..244edfd96865 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -922,6 +922,12 @@ static void exit_notify(struct task_struct *tsk, int group_dead) | |||
922 | forget_original_parent(tsk); | 922 | forget_original_parent(tsk); |
923 | exit_task_namespaces(tsk); | 923 | exit_task_namespaces(tsk); |
924 | 924 | ||
925 | /* | ||
926 | * Flush inherited counters to the parent - before the parent | ||
927 | * gets woken up by child-exit notifications. | ||
928 | */ | ||
929 | perf_counter_exit_task(tsk); | ||
930 | |||
925 | write_lock_irq(&tasklist_lock); | 931 | write_lock_irq(&tasklist_lock); |
926 | if (group_dead) | 932 | if (group_dead) |
927 | kill_orphaned_pgrp(tsk->group_leader, NULL); | 933 | kill_orphaned_pgrp(tsk->group_leader, NULL); |
@@ -1093,11 +1099,6 @@ NORET_TYPE void do_exit(long code) | |||
1093 | mpol_put(tsk->mempolicy); | 1099 | mpol_put(tsk->mempolicy); |
1094 | tsk->mempolicy = NULL; | 1100 | tsk->mempolicy = NULL; |
1095 | #endif | 1101 | #endif |
1096 | /* | ||
1097 | * These must happen late, after the PID is not | ||
1098 | * hashed anymore, but still at a point that may sleep: | ||
1099 | */ | ||
1100 | perf_counter_exit_task(tsk); | ||
1101 | #ifdef CONFIG_FUTEX | 1102 | #ifdef CONFIG_FUTEX |
1102 | if (unlikely(!list_empty(&tsk->pi_state_list))) | 1103 | if (unlikely(!list_empty(&tsk->pi_state_list))) |
1103 | exit_pi_state_list(tsk); | 1104 | exit_pi_state_list(tsk); |
@@ -1121,6 +1122,12 @@ NORET_TYPE void do_exit(long code) | |||
1121 | if (tsk->splice_pipe) | 1122 | if (tsk->splice_pipe) |
1122 | __free_pipe_info(tsk->splice_pipe); | 1123 | __free_pipe_info(tsk->splice_pipe); |
1123 | 1124 | ||
1125 | /* | ||
1126 | * These must happen late, after the PID is not | ||
1127 | * hashed anymore, but still at a point that may sleep: | ||
1128 | */ | ||
1129 | perf_counter_exit_task(tsk); | ||
1130 | |||
1124 | preempt_disable(); | 1131 | preempt_disable(); |
1125 | /* causes final put_task_struct in finish_task_switch(). */ | 1132 | /* causes final put_task_struct in finish_task_switch(). */ |
1126 | tsk->state = TASK_DEAD; | 1133 | tsk->state = TASK_DEAD; |
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 961d651aa574..f1110ac1267b 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/uaccess.h> | 18 | #include <linux/uaccess.h> |
19 | #include <linux/syscalls.h> | 19 | #include <linux/syscalls.h> |
20 | #include <linux/anon_inodes.h> | 20 | #include <linux/anon_inodes.h> |
21 | #include <linux/kernel_stat.h> | ||
21 | #include <linux/perf_counter.h> | 22 | #include <linux/perf_counter.h> |
22 | 23 | ||
23 | /* | 24 | /* |
@@ -106,7 +107,8 @@ static void __perf_counter_remove_from_context(void *info) | |||
106 | if (ctx->task && cpuctx->task_ctx != ctx) | 107 | if (ctx->task && cpuctx->task_ctx != ctx) |
107 | return; | 108 | return; |
108 | 109 | ||
109 | spin_lock_irqsave(&ctx->lock, flags); | 110 | curr_rq_lock_irq_save(&flags); |
111 | spin_lock(&ctx->lock); | ||
110 | 112 | ||
111 | if (counter->state == PERF_COUNTER_STATE_ACTIVE) { | 113 | if (counter->state == PERF_COUNTER_STATE_ACTIVE) { |
112 | counter->hw_ops->disable(counter); | 114 | counter->hw_ops->disable(counter); |
@@ -135,7 +137,8 @@ static void __perf_counter_remove_from_context(void *info) | |||
135 | perf_max_counters - perf_reserved_percpu); | 137 | perf_max_counters - perf_reserved_percpu); |
136 | } | 138 | } |
137 | 139 | ||
138 | spin_unlock_irqrestore(&ctx->lock, flags); | 140 | spin_unlock(&ctx->lock); |
141 | curr_rq_unlock_irq_restore(&flags); | ||
139 | } | 142 | } |
140 | 143 | ||
141 | 144 | ||
@@ -209,7 +212,8 @@ static void __perf_install_in_context(void *info) | |||
209 | if (ctx->task && cpuctx->task_ctx != ctx) | 212 | if (ctx->task && cpuctx->task_ctx != ctx) |
210 | return; | 213 | return; |
211 | 214 | ||
212 | spin_lock_irqsave(&ctx->lock, flags); | 215 | curr_rq_lock_irq_save(&flags); |
216 | spin_lock(&ctx->lock); | ||
213 | 217 | ||
214 | /* | 218 | /* |
215 | * Protect the list operation against NMI by disabling the | 219 | * Protect the list operation against NMI by disabling the |
@@ -232,7 +236,8 @@ static void __perf_install_in_context(void *info) | |||
232 | if (!ctx->task && cpuctx->max_pertask) | 236 | if (!ctx->task && cpuctx->max_pertask) |
233 | cpuctx->max_pertask--; | 237 | cpuctx->max_pertask--; |
234 | 238 | ||
235 | spin_unlock_irqrestore(&ctx->lock, flags); | 239 | spin_unlock(&ctx->lock); |
240 | curr_rq_unlock_irq_restore(&flags); | ||
236 | } | 241 | } |
237 | 242 | ||
238 | /* | 243 | /* |
@@ -438,15 +443,19 @@ int perf_counter_task_disable(void) | |||
438 | struct task_struct *curr = current; | 443 | struct task_struct *curr = current; |
439 | struct perf_counter_context *ctx = &curr->perf_counter_ctx; | 444 | struct perf_counter_context *ctx = &curr->perf_counter_ctx; |
440 | struct perf_counter *counter; | 445 | struct perf_counter *counter; |
446 | unsigned long flags; | ||
441 | u64 perf_flags; | 447 | u64 perf_flags; |
442 | int cpu; | 448 | int cpu; |
443 | 449 | ||
444 | if (likely(!ctx->nr_counters)) | 450 | if (likely(!ctx->nr_counters)) |
445 | return 0; | 451 | return 0; |
446 | 452 | ||
447 | local_irq_disable(); | 453 | curr_rq_lock_irq_save(&flags); |
448 | cpu = smp_processor_id(); | 454 | cpu = smp_processor_id(); |
449 | 455 | ||
456 | /* force the update of the task clock: */ | ||
457 | __task_delta_exec(curr, 1); | ||
458 | |||
450 | perf_counter_task_sched_out(curr, cpu); | 459 | perf_counter_task_sched_out(curr, cpu); |
451 | 460 | ||
452 | spin_lock(&ctx->lock); | 461 | spin_lock(&ctx->lock); |
@@ -463,7 +472,7 @@ int perf_counter_task_disable(void) | |||
463 | 472 | ||
464 | spin_unlock(&ctx->lock); | 473 | spin_unlock(&ctx->lock); |
465 | 474 | ||
466 | local_irq_enable(); | 475 | curr_rq_unlock_irq_restore(&flags); |
467 | 476 | ||
468 | return 0; | 477 | return 0; |
469 | } | 478 | } |
@@ -473,15 +482,19 @@ int perf_counter_task_enable(void) | |||
473 | struct task_struct *curr = current; | 482 | struct task_struct *curr = current; |
474 | struct perf_counter_context *ctx = &curr->perf_counter_ctx; | 483 | struct perf_counter_context *ctx = &curr->perf_counter_ctx; |
475 | struct perf_counter *counter; | 484 | struct perf_counter *counter; |
485 | unsigned long flags; | ||
476 | u64 perf_flags; | 486 | u64 perf_flags; |
477 | int cpu; | 487 | int cpu; |
478 | 488 | ||
479 | if (likely(!ctx->nr_counters)) | 489 | if (likely(!ctx->nr_counters)) |
480 | return 0; | 490 | return 0; |
481 | 491 | ||
482 | local_irq_disable(); | 492 | curr_rq_lock_irq_save(&flags); |
483 | cpu = smp_processor_id(); | 493 | cpu = smp_processor_id(); |
484 | 494 | ||
495 | /* force the update of the task clock: */ | ||
496 | __task_delta_exec(curr, 1); | ||
497 | |||
485 | spin_lock(&ctx->lock); | 498 | spin_lock(&ctx->lock); |
486 | 499 | ||
487 | /* | 500 | /* |
@@ -493,6 +506,7 @@ int perf_counter_task_enable(void) | |||
493 | if (counter->state != PERF_COUNTER_STATE_OFF) | 506 | if (counter->state != PERF_COUNTER_STATE_OFF) |
494 | continue; | 507 | continue; |
495 | counter->state = PERF_COUNTER_STATE_INACTIVE; | 508 | counter->state = PERF_COUNTER_STATE_INACTIVE; |
509 | counter->hw_event.disabled = 0; | ||
496 | } | 510 | } |
497 | hw_perf_restore(perf_flags); | 511 | hw_perf_restore(perf_flags); |
498 | 512 | ||
@@ -500,7 +514,7 @@ int perf_counter_task_enable(void) | |||
500 | 514 | ||
501 | perf_counter_task_sched_in(curr, cpu); | 515 | perf_counter_task_sched_in(curr, cpu); |
502 | 516 | ||
503 | local_irq_enable(); | 517 | curr_rq_unlock_irq_restore(&flags); |
504 | 518 | ||
505 | return 0; | 519 | return 0; |
506 | } | 520 | } |
@@ -540,8 +554,11 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu) | |||
540 | static void __read(void *info) | 554 | static void __read(void *info) |
541 | { | 555 | { |
542 | struct perf_counter *counter = info; | 556 | struct perf_counter *counter = info; |
557 | unsigned long flags; | ||
543 | 558 | ||
559 | curr_rq_lock_irq_save(&flags); | ||
544 | counter->hw_ops->read(counter); | 560 | counter->hw_ops->read(counter); |
561 | curr_rq_unlock_irq_restore(&flags); | ||
545 | } | 562 | } |
546 | 563 | ||
547 | static u64 perf_counter_read(struct perf_counter *counter) | 564 | static u64 perf_counter_read(struct perf_counter *counter) |
@@ -860,13 +877,27 @@ static const struct hw_perf_counter_ops perf_ops_cpu_clock = { | |||
860 | .read = cpu_clock_perf_counter_read, | 877 | .read = cpu_clock_perf_counter_read, |
861 | }; | 878 | }; |
862 | 879 | ||
863 | static void task_clock_perf_counter_update(struct perf_counter *counter) | 880 | /* |
881 | * Called from within the scheduler: | ||
882 | */ | ||
883 | static u64 task_clock_perf_counter_val(struct perf_counter *counter, int update) | ||
864 | { | 884 | { |
865 | u64 prev, now; | 885 | struct task_struct *curr = counter->task; |
886 | u64 delta; | ||
887 | |||
888 | WARN_ON_ONCE(counter->task != current); | ||
889 | |||
890 | delta = __task_delta_exec(curr, update); | ||
891 | |||
892 | return curr->se.sum_exec_runtime + delta; | ||
893 | } | ||
894 | |||
895 | static void task_clock_perf_counter_update(struct perf_counter *counter, u64 now) | ||
896 | { | ||
897 | u64 prev; | ||
866 | s64 delta; | 898 | s64 delta; |
867 | 899 | ||
868 | prev = atomic64_read(&counter->hw.prev_count); | 900 | prev = atomic64_read(&counter->hw.prev_count); |
869 | now = current->se.sum_exec_runtime; | ||
870 | 901 | ||
871 | atomic64_set(&counter->hw.prev_count, now); | 902 | atomic64_set(&counter->hw.prev_count, now); |
872 | 903 | ||
@@ -877,17 +908,23 @@ static void task_clock_perf_counter_update(struct perf_counter *counter) | |||
877 | 908 | ||
878 | static void task_clock_perf_counter_read(struct perf_counter *counter) | 909 | static void task_clock_perf_counter_read(struct perf_counter *counter) |
879 | { | 910 | { |
880 | task_clock_perf_counter_update(counter); | 911 | u64 now = task_clock_perf_counter_val(counter, 1); |
912 | |||
913 | task_clock_perf_counter_update(counter, now); | ||
881 | } | 914 | } |
882 | 915 | ||
883 | static void task_clock_perf_counter_enable(struct perf_counter *counter) | 916 | static void task_clock_perf_counter_enable(struct perf_counter *counter) |
884 | { | 917 | { |
885 | atomic64_set(&counter->hw.prev_count, current->se.sum_exec_runtime); | 918 | u64 now = task_clock_perf_counter_val(counter, 0); |
919 | |||
920 | atomic64_set(&counter->hw.prev_count, now); | ||
886 | } | 921 | } |
887 | 922 | ||
888 | static void task_clock_perf_counter_disable(struct perf_counter *counter) | 923 | static void task_clock_perf_counter_disable(struct perf_counter *counter) |
889 | { | 924 | { |
890 | task_clock_perf_counter_update(counter); | 925 | u64 now = task_clock_perf_counter_val(counter, 0); |
926 | |||
927 | task_clock_perf_counter_update(counter, now); | ||
891 | } | 928 | } |
892 | 929 | ||
893 | static const struct hw_perf_counter_ops perf_ops_task_clock = { | 930 | static const struct hw_perf_counter_ops perf_ops_task_clock = { |
@@ -1267,6 +1304,7 @@ __perf_counter_exit_task(struct task_struct *child, | |||
1267 | { | 1304 | { |
1268 | struct perf_counter *parent_counter; | 1305 | struct perf_counter *parent_counter; |
1269 | u64 parent_val, child_val; | 1306 | u64 parent_val, child_val; |
1307 | unsigned long flags; | ||
1270 | u64 perf_flags; | 1308 | u64 perf_flags; |
1271 | 1309 | ||
1272 | /* | 1310 | /* |
@@ -1275,7 +1313,7 @@ __perf_counter_exit_task(struct task_struct *child, | |||
1275 | * Be careful about zapping the list - IRQ/NMI context | 1313 | * Be careful about zapping the list - IRQ/NMI context |
1276 | * could still be processing it: | 1314 | * could still be processing it: |
1277 | */ | 1315 | */ |
1278 | local_irq_disable(); | 1316 | curr_rq_lock_irq_save(&flags); |
1279 | perf_flags = hw_perf_save_disable(); | 1317 | perf_flags = hw_perf_save_disable(); |
1280 | 1318 | ||
1281 | if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) { | 1319 | if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) { |
@@ -1294,7 +1332,7 @@ __perf_counter_exit_task(struct task_struct *child, | |||
1294 | list_del_init(&child_counter->list_entry); | 1332 | list_del_init(&child_counter->list_entry); |
1295 | 1333 | ||
1296 | hw_perf_restore(perf_flags); | 1334 | hw_perf_restore(perf_flags); |
1297 | local_irq_enable(); | 1335 | curr_rq_unlock_irq_restore(&flags); |
1298 | 1336 | ||
1299 | parent_counter = child_counter->parent; | 1337 | parent_counter = child_counter->parent; |
1300 | /* | 1338 | /* |
diff --git a/kernel/sched.c b/kernel/sched.c index 382cfdb5e38d..4d84ff4c8774 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -638,7 +638,7 @@ static inline int cpu_of(struct rq *rq) | |||
638 | #define task_rq(p) cpu_rq(task_cpu(p)) | 638 | #define task_rq(p) cpu_rq(task_cpu(p)) |
639 | #define cpu_curr(cpu) (cpu_rq(cpu)->curr) | 639 | #define cpu_curr(cpu) (cpu_rq(cpu)->curr) |
640 | 640 | ||
641 | static inline void update_rq_clock(struct rq *rq) | 641 | inline void update_rq_clock(struct rq *rq) |
642 | { | 642 | { |
643 | rq->clock = sched_clock_cpu(cpu_of(rq)); | 643 | rq->clock = sched_clock_cpu(cpu_of(rq)); |
644 | } | 644 | } |
@@ -969,6 +969,26 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) | |||
969 | } | 969 | } |
970 | } | 970 | } |
971 | 971 | ||
972 | void curr_rq_lock_irq_save(unsigned long *flags) | ||
973 | __acquires(rq->lock) | ||
974 | { | ||
975 | struct rq *rq; | ||
976 | |||
977 | local_irq_save(*flags); | ||
978 | rq = cpu_rq(smp_processor_id()); | ||
979 | spin_lock(&rq->lock); | ||
980 | } | ||
981 | |||
982 | void curr_rq_unlock_irq_restore(unsigned long *flags) | ||
983 | __releases(rq->lock) | ||
984 | { | ||
985 | struct rq *rq; | ||
986 | |||
987 | rq = cpu_rq(smp_processor_id()); | ||
988 | spin_unlock(&rq->lock); | ||
989 | local_irq_restore(*flags); | ||
990 | } | ||
991 | |||
972 | void task_rq_unlock_wait(struct task_struct *p) | 992 | void task_rq_unlock_wait(struct task_struct *p) |
973 | { | 993 | { |
974 | struct rq *rq = task_rq(p); | 994 | struct rq *rq = task_rq(p); |
@@ -2558,7 +2578,6 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev, | |||
2558 | struct task_struct *next) | 2578 | struct task_struct *next) |
2559 | { | 2579 | { |
2560 | fire_sched_out_preempt_notifiers(prev, next); | 2580 | fire_sched_out_preempt_notifiers(prev, next); |
2561 | perf_counter_task_sched_out(prev, cpu_of(rq)); | ||
2562 | prepare_lock_switch(rq, next); | 2581 | prepare_lock_switch(rq, next); |
2563 | prepare_arch_switch(next); | 2582 | prepare_arch_switch(next); |
2564 | } | 2583 | } |
@@ -4093,6 +4112,29 @@ EXPORT_PER_CPU_SYMBOL(kstat); | |||
4093 | * Return any ns on the sched_clock that have not yet been banked in | 4112 | * Return any ns on the sched_clock that have not yet been banked in |
4094 | * @p in case that task is currently running. | 4113 | * @p in case that task is currently running. |
4095 | */ | 4114 | */ |
4115 | unsigned long long __task_delta_exec(struct task_struct *p, int update) | ||
4116 | { | ||
4117 | s64 delta_exec; | ||
4118 | struct rq *rq; | ||
4119 | |||
4120 | rq = task_rq(p); | ||
4121 | WARN_ON_ONCE(!runqueue_is_locked()); | ||
4122 | WARN_ON_ONCE(!task_current(rq, p)); | ||
4123 | |||
4124 | if (update) | ||
4125 | update_rq_clock(rq); | ||
4126 | |||
4127 | delta_exec = rq->clock - p->se.exec_start; | ||
4128 | |||
4129 | WARN_ON_ONCE(delta_exec < 0); | ||
4130 | |||
4131 | return delta_exec; | ||
4132 | } | ||
4133 | |||
4134 | /* | ||
4135 | * Return any ns on the sched_clock that have not yet been banked in | ||
4136 | * @p in case that task is currently running. | ||
4137 | */ | ||
4096 | unsigned long long task_delta_exec(struct task_struct *p) | 4138 | unsigned long long task_delta_exec(struct task_struct *p) |
4097 | { | 4139 | { |
4098 | unsigned long flags; | 4140 | unsigned long flags; |
@@ -4316,13 +4358,13 @@ void scheduler_tick(void) | |||
4316 | update_rq_clock(rq); | 4358 | update_rq_clock(rq); |
4317 | update_cpu_load(rq); | 4359 | update_cpu_load(rq); |
4318 | curr->sched_class->task_tick(rq, curr, 0); | 4360 | curr->sched_class->task_tick(rq, curr, 0); |
4361 | perf_counter_task_tick(curr, cpu); | ||
4319 | spin_unlock(&rq->lock); | 4362 | spin_unlock(&rq->lock); |
4320 | 4363 | ||
4321 | #ifdef CONFIG_SMP | 4364 | #ifdef CONFIG_SMP |
4322 | rq->idle_at_tick = idle_cpu(cpu); | 4365 | rq->idle_at_tick = idle_cpu(cpu); |
4323 | trigger_load_balance(rq, cpu); | 4366 | trigger_load_balance(rq, cpu); |
4324 | #endif | 4367 | #endif |
4325 | perf_counter_task_tick(curr, cpu); | ||
4326 | } | 4368 | } |
4327 | 4369 | ||
4328 | #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \ | 4370 | #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \ |
@@ -4512,6 +4554,7 @@ need_resched_nonpreemptible: | |||
4512 | 4554 | ||
4513 | if (likely(prev != next)) { | 4555 | if (likely(prev != next)) { |
4514 | sched_info_switch(prev, next); | 4556 | sched_info_switch(prev, next); |
4557 | perf_counter_task_sched_out(prev, cpu); | ||
4515 | 4558 | ||
4516 | rq->nr_switches++; | 4559 | rq->nr_switches++; |
4517 | rq->curr = next; | 4560 | rq->curr = next; |