aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2009-06-24 15:11:59 -0400
committerIngo Molnar <mingo@elte.hu>2009-06-25 15:39:07 -0400
commitbfbd3381e63aa2a14c6706afb50ce4630aa0d9a2 (patch)
tree9b165a3068b8a7976ea34e633013165855bb5f51
parent38b200d67636a30cb8dc1508137908e7a649b5c9 (diff)
perf_counter: Implement more accurate per task statistics
With the introduction of PERF_EVENT_READ we have the possibility to provide accurate counter values for individual tasks in a task hierarchy. However, due to the lazy context switching used for similar counter contexts our current per task counts are way off. In order to maintain some of the lazy switch benefits we don't disable it out-right, but simply iterate the active counters and flip the values between the contexts. This only reads the counters but does not need to reprogram the full PMU. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--include/linux/perf_counter.h4
-rw-r--r--kernel/perf_counter.c83
2 files changed, 83 insertions, 4 deletions
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 6a384f04755a..de70a10b5ec8 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -178,8 +178,9 @@ struct perf_counter_attr {
178 mmap : 1, /* include mmap data */ 178 mmap : 1, /* include mmap data */
179 comm : 1, /* include comm data */ 179 comm : 1, /* include comm data */
180 freq : 1, /* use freq, not period */ 180 freq : 1, /* use freq, not period */
181 inherit_stat : 1, /* per task counts */
181 182
182 __reserved_1 : 53; 183 __reserved_1 : 52;
183 184
184 __u32 wakeup_events; /* wakeup every n events */ 185 __u32 wakeup_events; /* wakeup every n events */
185 __u32 __reserved_2; 186 __u32 __reserved_2;
@@ -602,6 +603,7 @@ struct perf_counter_context {
602 int nr_counters; 603 int nr_counters;
603 int nr_active; 604 int nr_active;
604 int is_active; 605 int is_active;
606 int nr_stat;
605 atomic_t refcount; 607 atomic_t refcount;
606 struct task_struct *task; 608 struct task_struct *task;
607 609
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index a72c20e91953..385ca51c6e60 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -236,6 +236,8 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
236 236
237 list_add_rcu(&counter->event_entry, &ctx->event_list); 237 list_add_rcu(&counter->event_entry, &ctx->event_list);
238 ctx->nr_counters++; 238 ctx->nr_counters++;
239 if (counter->attr.inherit_stat)
240 ctx->nr_stat++;
239} 241}
240 242
241/* 243/*
@@ -250,6 +252,8 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
250 if (list_empty(&counter->list_entry)) 252 if (list_empty(&counter->list_entry))
251 return; 253 return;
252 ctx->nr_counters--; 254 ctx->nr_counters--;
255 if (counter->attr.inherit_stat)
256 ctx->nr_stat--;
253 257
254 list_del_init(&counter->list_entry); 258 list_del_init(&counter->list_entry);
255 list_del_rcu(&counter->event_entry); 259 list_del_rcu(&counter->event_entry);
@@ -1006,6 +1010,76 @@ static int context_equiv(struct perf_counter_context *ctx1,
1006 && !ctx1->pin_count && !ctx2->pin_count; 1010 && !ctx1->pin_count && !ctx2->pin_count;
1007} 1011}
1008 1012
1013static void __perf_counter_read(void *counter);
1014
1015static void __perf_counter_sync_stat(struct perf_counter *counter,
1016 struct perf_counter *next_counter)
1017{
1018 u64 value;
1019
1020 if (!counter->attr.inherit_stat)
1021 return;
1022
1023 /*
1024 * Update the counter value, we cannot use perf_counter_read()
1025 * because we're in the middle of a context switch and have IRQs
1026 * disabled, which upsets smp_call_function_single(), however
1027 * we know the counter must be on the current CPU, therefore we
1028 * don't need to use it.
1029 */
1030 switch (counter->state) {
1031 case PERF_COUNTER_STATE_ACTIVE:
1032 __perf_counter_read(counter);
1033 break;
1034
1035 case PERF_COUNTER_STATE_INACTIVE:
1036 update_counter_times(counter);
1037 break;
1038
1039 default:
1040 break;
1041 }
1042
1043 /*
1044 * In order to keep per-task stats reliable we need to flip the counter
1045 * values when we flip the contexts.
1046 */
1047 value = atomic64_read(&next_counter->count);
1048 value = atomic64_xchg(&counter->count, value);
1049 atomic64_set(&next_counter->count, value);
1050
1051 /*
1052 * XXX also sync time_enabled and time_running ?
1053 */
1054}
1055
1056#define list_next_entry(pos, member) \
1057 list_entry(pos->member.next, typeof(*pos), member)
1058
1059static void perf_counter_sync_stat(struct perf_counter_context *ctx,
1060 struct perf_counter_context *next_ctx)
1061{
1062 struct perf_counter *counter, *next_counter;
1063
1064 if (!ctx->nr_stat)
1065 return;
1066
1067 counter = list_first_entry(&ctx->event_list,
1068 struct perf_counter, event_entry);
1069
1070 next_counter = list_first_entry(&next_ctx->event_list,
1071 struct perf_counter, event_entry);
1072
1073 while (&counter->event_entry != &ctx->event_list &&
1074 &next_counter->event_entry != &next_ctx->event_list) {
1075
1076 __perf_counter_sync_stat(counter, next_counter);
1077
1078 counter = list_next_entry(counter, event_entry);
1079 next_counter = list_next_entry(counter, event_entry);
1080 }
1081}
1082
1009/* 1083/*
1010 * Called from scheduler to remove the counters of the current task, 1084 * Called from scheduler to remove the counters of the current task,
1011 * with interrupts disabled. 1085 * with interrupts disabled.
@@ -1061,6 +1135,8 @@ void perf_counter_task_sched_out(struct task_struct *task,
1061 ctx->task = next; 1135 ctx->task = next;
1062 next_ctx->task = task; 1136 next_ctx->task = task;
1063 do_switch = 0; 1137 do_switch = 0;
1138
1139 perf_counter_sync_stat(ctx, next_ctx);
1064 } 1140 }
1065 spin_unlock(&next_ctx->lock); 1141 spin_unlock(&next_ctx->lock);
1066 spin_unlock(&ctx->lock); 1142 spin_unlock(&ctx->lock);
@@ -1350,7 +1426,7 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
1350/* 1426/*
1351 * Cross CPU call to read the hardware counter 1427 * Cross CPU call to read the hardware counter
1352 */ 1428 */
1353static void __read(void *info) 1429static void __perf_counter_read(void *info)
1354{ 1430{
1355 struct perf_counter *counter = info; 1431 struct perf_counter *counter = info;
1356 struct perf_counter_context *ctx = counter->ctx; 1432 struct perf_counter_context *ctx = counter->ctx;
@@ -1372,7 +1448,7 @@ static u64 perf_counter_read(struct perf_counter *counter)
1372 */ 1448 */
1373 if (counter->state == PERF_COUNTER_STATE_ACTIVE) { 1449 if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
1374 smp_call_function_single(counter->oncpu, 1450 smp_call_function_single(counter->oncpu,
1375 __read, counter, 1); 1451 __perf_counter_read, counter, 1);
1376 } else if (counter->state == PERF_COUNTER_STATE_INACTIVE) { 1452 } else if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
1377 update_counter_times(counter); 1453 update_counter_times(counter);
1378 } 1454 }
@@ -4050,7 +4126,8 @@ static void sync_child_counter(struct perf_counter *child_counter,
4050 struct perf_counter *parent_counter = child_counter->parent; 4126 struct perf_counter *parent_counter = child_counter->parent;
4051 u64 child_val; 4127 u64 child_val;
4052 4128
4053 perf_counter_read_event(child_counter, child); 4129 if (child_counter->attr.inherit_stat)
4130 perf_counter_read_event(child_counter, child);
4054 4131
4055 child_val = atomic64_read(&child_counter->count); 4132 child_val = atomic64_read(&child_counter->count);
4056 4133