diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2009-06-24 15:11:59 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-06-25 15:39:07 -0400 |
commit | bfbd3381e63aa2a14c6706afb50ce4630aa0d9a2 (patch) | |
tree | 9b165a3068b8a7976ea34e633013165855bb5f51 | |
parent | 38b200d67636a30cb8dc1508137908e7a649b5c9 (diff) |
perf_counter: Implement more accurate per task statistics
With the introduction of PERF_EVENT_READ we have the
possibility to provide accurate counter values for
individual tasks in a task hierarchy.
However, due to the lazy context switching used for similar
counter contexts our current per task counts are way off.
In order to maintain some of the lazy switch benefits we
don't disable it out-right, but simply iterate the active
counters and flip the values between the contexts.
This only reads the counters but does not need to reprogram
the full PMU.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | include/linux/perf_counter.h | 4 | ||||
-rw-r--r-- | kernel/perf_counter.c | 83 |
2 files changed, 83 insertions, 4 deletions
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 6a384f04755a..de70a10b5ec8 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h | |||
@@ -178,8 +178,9 @@ struct perf_counter_attr { | |||
178 | mmap : 1, /* include mmap data */ | 178 | mmap : 1, /* include mmap data */ |
179 | comm : 1, /* include comm data */ | 179 | comm : 1, /* include comm data */ |
180 | freq : 1, /* use freq, not period */ | 180 | freq : 1, /* use freq, not period */ |
181 | inherit_stat : 1, /* per task counts */ | ||
181 | 182 | ||
182 | __reserved_1 : 53; | 183 | __reserved_1 : 52; |
183 | 184 | ||
184 | __u32 wakeup_events; /* wakeup every n events */ | 185 | __u32 wakeup_events; /* wakeup every n events */ |
185 | __u32 __reserved_2; | 186 | __u32 __reserved_2; |
@@ -602,6 +603,7 @@ struct perf_counter_context { | |||
602 | int nr_counters; | 603 | int nr_counters; |
603 | int nr_active; | 604 | int nr_active; |
604 | int is_active; | 605 | int is_active; |
606 | int nr_stat; | ||
605 | atomic_t refcount; | 607 | atomic_t refcount; |
606 | struct task_struct *task; | 608 | struct task_struct *task; |
607 | 609 | ||
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index a72c20e91953..385ca51c6e60 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c | |||
@@ -236,6 +236,8 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx) | |||
236 | 236 | ||
237 | list_add_rcu(&counter->event_entry, &ctx->event_list); | 237 | list_add_rcu(&counter->event_entry, &ctx->event_list); |
238 | ctx->nr_counters++; | 238 | ctx->nr_counters++; |
239 | if (counter->attr.inherit_stat) | ||
240 | ctx->nr_stat++; | ||
239 | } | 241 | } |
240 | 242 | ||
241 | /* | 243 | /* |
@@ -250,6 +252,8 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx) | |||
250 | if (list_empty(&counter->list_entry)) | 252 | if (list_empty(&counter->list_entry)) |
251 | return; | 253 | return; |
252 | ctx->nr_counters--; | 254 | ctx->nr_counters--; |
255 | if (counter->attr.inherit_stat) | ||
256 | ctx->nr_stat--; | ||
253 | 257 | ||
254 | list_del_init(&counter->list_entry); | 258 | list_del_init(&counter->list_entry); |
255 | list_del_rcu(&counter->event_entry); | 259 | list_del_rcu(&counter->event_entry); |
@@ -1006,6 +1010,76 @@ static int context_equiv(struct perf_counter_context *ctx1, | |||
1006 | && !ctx1->pin_count && !ctx2->pin_count; | 1010 | && !ctx1->pin_count && !ctx2->pin_count; |
1007 | } | 1011 | } |
1008 | 1012 | ||
1013 | static void __perf_counter_read(void *counter); | ||
1014 | |||
1015 | static void __perf_counter_sync_stat(struct perf_counter *counter, | ||
1016 | struct perf_counter *next_counter) | ||
1017 | { | ||
1018 | u64 value; | ||
1019 | |||
1020 | if (!counter->attr.inherit_stat) | ||
1021 | return; | ||
1022 | |||
1023 | /* | ||
1024 | * Update the counter value, we cannot use perf_counter_read() | ||
1025 | * because we're in the middle of a context switch and have IRQs | ||
1026 | * disabled, which upsets smp_call_function_single(), however | ||
1027 | * we know the counter must be on the current CPU, therefore we | ||
1028 | * don't need to use it. | ||
1029 | */ | ||
1030 | switch (counter->state) { | ||
1031 | case PERF_COUNTER_STATE_ACTIVE: | ||
1032 | __perf_counter_read(counter); | ||
1033 | break; | ||
1034 | |||
1035 | case PERF_COUNTER_STATE_INACTIVE: | ||
1036 | update_counter_times(counter); | ||
1037 | break; | ||
1038 | |||
1039 | default: | ||
1040 | break; | ||
1041 | } | ||
1042 | |||
1043 | /* | ||
1044 | * In order to keep per-task stats reliable we need to flip the counter | ||
1045 | * values when we flip the contexts. | ||
1046 | */ | ||
1047 | value = atomic64_read(&next_counter->count); | ||
1048 | value = atomic64_xchg(&counter->count, value); | ||
1049 | atomic64_set(&next_counter->count, value); | ||
1050 | |||
1051 | /* | ||
1052 | * XXX also sync time_enabled and time_running ? | ||
1053 | */ | ||
1054 | } | ||
1055 | |||
1056 | #define list_next_entry(pos, member) \ | ||
1057 | list_entry(pos->member.next, typeof(*pos), member) | ||
1058 | |||
1059 | static void perf_counter_sync_stat(struct perf_counter_context *ctx, | ||
1060 | struct perf_counter_context *next_ctx) | ||
1061 | { | ||
1062 | struct perf_counter *counter, *next_counter; | ||
1063 | |||
1064 | if (!ctx->nr_stat) | ||
1065 | return; | ||
1066 | |||
1067 | counter = list_first_entry(&ctx->event_list, | ||
1068 | struct perf_counter, event_entry); | ||
1069 | |||
1070 | next_counter = list_first_entry(&next_ctx->event_list, | ||
1071 | struct perf_counter, event_entry); | ||
1072 | |||
1073 | while (&counter->event_entry != &ctx->event_list && | ||
1074 | &next_counter->event_entry != &next_ctx->event_list) { | ||
1075 | |||
1076 | __perf_counter_sync_stat(counter, next_counter); | ||
1077 | |||
1078 | counter = list_next_entry(counter, event_entry); | ||
1079 | next_counter = list_next_entry(counter, event_entry); | ||
1080 | } | ||
1081 | } | ||
1082 | |||
1009 | /* | 1083 | /* |
1010 | * Called from scheduler to remove the counters of the current task, | 1084 | * Called from scheduler to remove the counters of the current task, |
1011 | * with interrupts disabled. | 1085 | * with interrupts disabled. |
@@ -1061,6 +1135,8 @@ void perf_counter_task_sched_out(struct task_struct *task, | |||
1061 | ctx->task = next; | 1135 | ctx->task = next; |
1062 | next_ctx->task = task; | 1136 | next_ctx->task = task; |
1063 | do_switch = 0; | 1137 | do_switch = 0; |
1138 | |||
1139 | perf_counter_sync_stat(ctx, next_ctx); | ||
1064 | } | 1140 | } |
1065 | spin_unlock(&next_ctx->lock); | 1141 | spin_unlock(&next_ctx->lock); |
1066 | spin_unlock(&ctx->lock); | 1142 | spin_unlock(&ctx->lock); |
@@ -1350,7 +1426,7 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu) | |||
1350 | /* | 1426 | /* |
1351 | * Cross CPU call to read the hardware counter | 1427 | * Cross CPU call to read the hardware counter |
1352 | */ | 1428 | */ |
1353 | static void __read(void *info) | 1429 | static void __perf_counter_read(void *info) |
1354 | { | 1430 | { |
1355 | struct perf_counter *counter = info; | 1431 | struct perf_counter *counter = info; |
1356 | struct perf_counter_context *ctx = counter->ctx; | 1432 | struct perf_counter_context *ctx = counter->ctx; |
@@ -1372,7 +1448,7 @@ static u64 perf_counter_read(struct perf_counter *counter) | |||
1372 | */ | 1448 | */ |
1373 | if (counter->state == PERF_COUNTER_STATE_ACTIVE) { | 1449 | if (counter->state == PERF_COUNTER_STATE_ACTIVE) { |
1374 | smp_call_function_single(counter->oncpu, | 1450 | smp_call_function_single(counter->oncpu, |
1375 | __read, counter, 1); | 1451 | __perf_counter_read, counter, 1); |
1376 | } else if (counter->state == PERF_COUNTER_STATE_INACTIVE) { | 1452 | } else if (counter->state == PERF_COUNTER_STATE_INACTIVE) { |
1377 | update_counter_times(counter); | 1453 | update_counter_times(counter); |
1378 | } | 1454 | } |
@@ -4050,7 +4126,8 @@ static void sync_child_counter(struct perf_counter *child_counter, | |||
4050 | struct perf_counter *parent_counter = child_counter->parent; | 4126 | struct perf_counter *parent_counter = child_counter->parent; |
4051 | u64 child_val; | 4127 | u64 child_val; |
4052 | 4128 | ||
4053 | perf_counter_read_event(child_counter, child); | 4129 | if (child_counter->attr.inherit_stat) |
4130 | perf_counter_read_event(child_counter, child); | ||
4054 | 4131 | ||
4055 | child_val = atomic64_read(&child_counter->count); | 4132 | child_val = atomic64_read(&child_counter->count); |
4056 | 4133 | ||