aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/perf_counter.h4
-rw-r--r--kernel/perf_counter.c83
2 files changed, 83 insertions, 4 deletions
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 6a384f04755..de70a10b5ec 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -178,8 +178,9 @@ struct perf_counter_attr {
178 mmap : 1, /* include mmap data */ 178 mmap : 1, /* include mmap data */
179 comm : 1, /* include comm data */ 179 comm : 1, /* include comm data */
180 freq : 1, /* use freq, not period */ 180 freq : 1, /* use freq, not period */
181 inherit_stat : 1, /* per task counts */
181 182
182 __reserved_1 : 53; 183 __reserved_1 : 52;
183 184
184 __u32 wakeup_events; /* wakeup every n events */ 185 __u32 wakeup_events; /* wakeup every n events */
185 __u32 __reserved_2; 186 __u32 __reserved_2;
@@ -602,6 +603,7 @@ struct perf_counter_context {
602 int nr_counters; 603 int nr_counters;
603 int nr_active; 604 int nr_active;
604 int is_active; 605 int is_active;
606 int nr_stat;
605 atomic_t refcount; 607 atomic_t refcount;
606 struct task_struct *task; 608 struct task_struct *task;
607 609
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index a72c20e9195..385ca51c6e6 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -236,6 +236,8 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
236 236
237 list_add_rcu(&counter->event_entry, &ctx->event_list); 237 list_add_rcu(&counter->event_entry, &ctx->event_list);
238 ctx->nr_counters++; 238 ctx->nr_counters++;
239 if (counter->attr.inherit_stat)
240 ctx->nr_stat++;
239} 241}
240 242
241/* 243/*
@@ -250,6 +252,8 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
250 if (list_empty(&counter->list_entry)) 252 if (list_empty(&counter->list_entry))
251 return; 253 return;
252 ctx->nr_counters--; 254 ctx->nr_counters--;
255 if (counter->attr.inherit_stat)
256 ctx->nr_stat--;
253 257
254 list_del_init(&counter->list_entry); 258 list_del_init(&counter->list_entry);
255 list_del_rcu(&counter->event_entry); 259 list_del_rcu(&counter->event_entry);
@@ -1006,6 +1010,76 @@ static int context_equiv(struct perf_counter_context *ctx1,
1006 && !ctx1->pin_count && !ctx2->pin_count; 1010 && !ctx1->pin_count && !ctx2->pin_count;
1007} 1011}
1008 1012
1013static void __perf_counter_read(void *counter);
1014
1015static void __perf_counter_sync_stat(struct perf_counter *counter,
1016 struct perf_counter *next_counter)
1017{
1018 u64 value;
1019
1020 if (!counter->attr.inherit_stat)
1021 return;
1022
1023 /*
1024 * Update the counter value, we cannot use perf_counter_read()
1025 * because we're in the middle of a context switch and have IRQs
1026 * disabled, which upsets smp_call_function_single(), however
1027 * we know the counter must be on the current CPU, therefore we
1028 * don't need to use it.
1029 */
1030 switch (counter->state) {
1031 case PERF_COUNTER_STATE_ACTIVE:
1032 __perf_counter_read(counter);
1033 break;
1034
1035 case PERF_COUNTER_STATE_INACTIVE:
1036 update_counter_times(counter);
1037 break;
1038
1039 default:
1040 break;
1041 }
1042
1043 /*
1044 * In order to keep per-task stats reliable we need to flip the counter
1045 * values when we flip the contexts.
1046 */
1047 value = atomic64_read(&next_counter->count);
1048 value = atomic64_xchg(&counter->count, value);
1049 atomic64_set(&next_counter->count, value);
1050
1051 /*
1052 * XXX also sync time_enabled and time_running ?
1053 */
1054}
1055
1056#define list_next_entry(pos, member) \
1057 list_entry(pos->member.next, typeof(*pos), member)
1058
1059static void perf_counter_sync_stat(struct perf_counter_context *ctx,
1060 struct perf_counter_context *next_ctx)
1061{
1062 struct perf_counter *counter, *next_counter;
1063
1064 if (!ctx->nr_stat)
1065 return;
1066
1067 counter = list_first_entry(&ctx->event_list,
1068 struct perf_counter, event_entry);
1069
1070 next_counter = list_first_entry(&next_ctx->event_list,
1071 struct perf_counter, event_entry);
1072
1073 while (&counter->event_entry != &ctx->event_list &&
1074 &next_counter->event_entry != &next_ctx->event_list) {
1075
1076 __perf_counter_sync_stat(counter, next_counter);
1077
1078 counter = list_next_entry(counter, event_entry);
1079 next_counter = list_next_entry(counter, event_entry);
1080 }
1081}
1082
1009/* 1083/*
1010 * Called from scheduler to remove the counters of the current task, 1084 * Called from scheduler to remove the counters of the current task,
1011 * with interrupts disabled. 1085 * with interrupts disabled.
@@ -1061,6 +1135,8 @@ void perf_counter_task_sched_out(struct task_struct *task,
1061 ctx->task = next; 1135 ctx->task = next;
1062 next_ctx->task = task; 1136 next_ctx->task = task;
1063 do_switch = 0; 1137 do_switch = 0;
1138
1139 perf_counter_sync_stat(ctx, next_ctx);
1064 } 1140 }
1065 spin_unlock(&next_ctx->lock); 1141 spin_unlock(&next_ctx->lock);
1066 spin_unlock(&ctx->lock); 1142 spin_unlock(&ctx->lock);
@@ -1350,7 +1426,7 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
1350/* 1426/*
1351 * Cross CPU call to read the hardware counter 1427 * Cross CPU call to read the hardware counter
1352 */ 1428 */
1353static void __read(void *info) 1429static void __perf_counter_read(void *info)
1354{ 1430{
1355 struct perf_counter *counter = info; 1431 struct perf_counter *counter = info;
1356 struct perf_counter_context *ctx = counter->ctx; 1432 struct perf_counter_context *ctx = counter->ctx;
@@ -1372,7 +1448,7 @@ static u64 perf_counter_read(struct perf_counter *counter)
1372 */ 1448 */
1373 if (counter->state == PERF_COUNTER_STATE_ACTIVE) { 1449 if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
1374 smp_call_function_single(counter->oncpu, 1450 smp_call_function_single(counter->oncpu,
1375 __read, counter, 1); 1451 __perf_counter_read, counter, 1);
1376 } else if (counter->state == PERF_COUNTER_STATE_INACTIVE) { 1452 } else if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
1377 update_counter_times(counter); 1453 update_counter_times(counter);
1378 } 1454 }
@@ -4050,7 +4126,8 @@ static void sync_child_counter(struct perf_counter *child_counter,
4050 struct perf_counter *parent_counter = child_counter->parent; 4126 struct perf_counter *parent_counter = child_counter->parent;
4051 u64 child_val; 4127 u64 child_val;
4052 4128
4053 perf_counter_read_event(child_counter, child); 4129 if (child_counter->attr.inherit_stat)
4130 perf_counter_read_event(child_counter, child);
4054 4131
4055 child_val = atomic64_read(&child_counter->count); 4132 child_val = atomic64_read(&child_counter->count);
4056 4133