aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/powerpc/kernel/perf_counter.c2
-rw-r--r--include/linux/perf_counter.h53
-rw-r--r--kernel/perf_counter.c157
3 files changed, 191 insertions, 21 deletions
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index d48596ab6557..df007fe0cc0b 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -455,6 +455,8 @@ static void counter_sched_in(struct perf_counter *counter, int cpu)
455{ 455{
456 counter->state = PERF_COUNTER_STATE_ACTIVE; 456 counter->state = PERF_COUNTER_STATE_ACTIVE;
457 counter->oncpu = cpu; 457 counter->oncpu = cpu;
458 counter->tstamp_running += counter->ctx->time_now -
459 counter->tstamp_stopped;
458 if (is_software_counter(counter)) 460 if (is_software_counter(counter))
459 counter->hw_ops->enable(counter); 461 counter->hw_ops->enable(counter);
460} 462}
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 7fdbdf8be775..6bf67ce17625 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -103,6 +103,16 @@ enum perf_counter_record_type {
103#define PERF_COUNTER_EVENT_MASK __PERF_COUNTER_MASK(EVENT) 103#define PERF_COUNTER_EVENT_MASK __PERF_COUNTER_MASK(EVENT)
104 104
105/* 105/*
106 * Bits that can be set in hw_event.read_format to request that
107 * reads on the counter should return the indicated quantities,
108 * in increasing order of bit value, after the counter value.
109 */
110enum perf_counter_read_format {
111 PERF_FORMAT_TOTAL_TIME_ENABLED = 1,
112 PERF_FORMAT_TOTAL_TIME_RUNNING = 2,
113};
114
115/*
106 * Hardware event to monitor via a performance monitoring counter: 116 * Hardware event to monitor via a performance monitoring counter:
107 */ 117 */
108struct perf_counter_hw_event { 118struct perf_counter_hw_event {
@@ -281,6 +291,32 @@ struct perf_counter {
281 enum perf_counter_active_state prev_state; 291 enum perf_counter_active_state prev_state;
282 atomic64_t count; 292 atomic64_t count;
283 293
294 /*
295 * These are the total time in nanoseconds that the counter
296 * has been enabled (i.e. eligible to run, and the task has
297 * been scheduled in, if this is a per-task counter)
298 * and running (scheduled onto the CPU), respectively.
299 *
300 * They are computed from tstamp_enabled, tstamp_running and
301 * tstamp_stopped when the counter is in INACTIVE or ACTIVE state.
302 */
303 u64 total_time_enabled;
304 u64 total_time_running;
305
306 /*
307 * These are timestamps used for computing total_time_enabled
308 * and total_time_running when the counter is in INACTIVE or
309 * ACTIVE state, measured in nanoseconds from an arbitrary point
310 * in time.
311 * tstamp_enabled: the notional time when the counter was enabled
312 * tstamp_running: the notional time when the counter was scheduled on
313 * tstamp_stopped: in INACTIVE state, the notional time when the
314 * counter was scheduled off.
315 */
316 u64 tstamp_enabled;
317 u64 tstamp_running;
318 u64 tstamp_stopped;
319
284 struct perf_counter_hw_event hw_event; 320 struct perf_counter_hw_event hw_event;
285 struct hw_perf_counter hw; 321 struct hw_perf_counter hw;
286 322
@@ -292,6 +328,13 @@ struct perf_counter {
292 struct list_head child_list; 328 struct list_head child_list;
293 329
294 /* 330 /*
331 * These accumulate total time (in nanoseconds) that children
332 * counters have been enabled and running, respectively.
333 */
334 atomic64_t child_total_time_enabled;
335 atomic64_t child_total_time_running;
336
337 /*
295 * Protect attach/detach and child_list: 338 * Protect attach/detach and child_list:
296 */ 339 */
297 struct mutex mutex; 340 struct mutex mutex;
@@ -339,6 +382,16 @@ struct perf_counter_context {
339 int nr_active; 382 int nr_active;
340 int is_active; 383 int is_active;
341 struct task_struct *task; 384 struct task_struct *task;
385
386 /*
387 * time_now is the current time in nanoseconds since an arbitrary
388 * point in the past. For per-task counters, this is based on the
389 * task clock, and for per-cpu counters it is based on the cpu clock.
390 * time_lost is an offset from the task/cpu clock, used to make it
391 * appear that time only passes while the context is scheduled in.
392 */
393 u64 time_now;
394 u64 time_lost;
342#endif 395#endif
343}; 396};
344 397
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 95e02575546b..3b862a7988cd 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -116,6 +116,7 @@ counter_sched_out(struct perf_counter *counter,
116 return; 116 return;
117 117
118 counter->state = PERF_COUNTER_STATE_INACTIVE; 118 counter->state = PERF_COUNTER_STATE_INACTIVE;
119 counter->tstamp_stopped = ctx->time_now;
119 counter->hw_ops->disable(counter); 120 counter->hw_ops->disable(counter);
120 counter->oncpu = -1; 121 counter->oncpu = -1;
121 122
@@ -252,6 +253,60 @@ retry:
252} 253}
253 254
254/* 255/*
256 * Get the current time for this context.
257 * If this is a task context, we use the task's task clock,
258 * or for a per-cpu context, we use the cpu clock.
259 */
260static u64 get_context_time(struct perf_counter_context *ctx, int update)
261{
262 struct task_struct *curr = ctx->task;
263
264 if (!curr)
265 return cpu_clock(smp_processor_id());
266
267 return __task_delta_exec(curr, update) + curr->se.sum_exec_runtime;
268}
269
270/*
271 * Update the record of the current time in a context.
272 */
273static void update_context_time(struct perf_counter_context *ctx, int update)
274{
275 ctx->time_now = get_context_time(ctx, update) - ctx->time_lost;
276}
277
278/*
279 * Update the total_time_enabled and total_time_running fields for a counter.
280 */
281static void update_counter_times(struct perf_counter *counter)
282{
283 struct perf_counter_context *ctx = counter->ctx;
284 u64 run_end;
285
286 if (counter->state >= PERF_COUNTER_STATE_INACTIVE) {
287 counter->total_time_enabled = ctx->time_now -
288 counter->tstamp_enabled;
289 if (counter->state == PERF_COUNTER_STATE_INACTIVE)
290 run_end = counter->tstamp_stopped;
291 else
292 run_end = ctx->time_now;
293 counter->total_time_running = run_end - counter->tstamp_running;
294 }
295}
296
297/*
298 * Update total_time_enabled and total_time_running for all counters in a group.
299 */
300static void update_group_times(struct perf_counter *leader)
301{
302 struct perf_counter *counter;
303
304 update_counter_times(leader);
305 list_for_each_entry(counter, &leader->sibling_list, list_entry)
306 update_counter_times(counter);
307}
308
309/*
255 * Cross CPU call to disable a performance counter 310 * Cross CPU call to disable a performance counter
256 */ 311 */
257static void __perf_counter_disable(void *info) 312static void __perf_counter_disable(void *info)
@@ -276,6 +331,8 @@ static void __perf_counter_disable(void *info)
276 * If it is in error state, leave it in error state. 331 * If it is in error state, leave it in error state.
277 */ 332 */
278 if (counter->state >= PERF_COUNTER_STATE_INACTIVE) { 333 if (counter->state >= PERF_COUNTER_STATE_INACTIVE) {
334 update_context_time(ctx, 1);
335 update_counter_times(counter);
279 if (counter == counter->group_leader) 336 if (counter == counter->group_leader)
280 group_sched_out(counter, cpuctx, ctx); 337 group_sched_out(counter, cpuctx, ctx);
281 else 338 else
@@ -320,8 +377,10 @@ static void perf_counter_disable(struct perf_counter *counter)
320 * Since we have the lock this context can't be scheduled 377 * Since we have the lock this context can't be scheduled
321 * in, so we can change the state safely. 378 * in, so we can change the state safely.
322 */ 379 */
323 if (counter->state == PERF_COUNTER_STATE_INACTIVE) 380 if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
381 update_counter_times(counter);
324 counter->state = PERF_COUNTER_STATE_OFF; 382 counter->state = PERF_COUNTER_STATE_OFF;
383 }
325 384
326 spin_unlock_irq(&ctx->lock); 385 spin_unlock_irq(&ctx->lock);
327} 386}
@@ -366,6 +425,8 @@ counter_sched_in(struct perf_counter *counter,
366 return -EAGAIN; 425 return -EAGAIN;
367 } 426 }
368 427
428 counter->tstamp_running += ctx->time_now - counter->tstamp_stopped;
429
369 if (!is_software_counter(counter)) 430 if (!is_software_counter(counter))
370 cpuctx->active_oncpu++; 431 cpuctx->active_oncpu++;
371 ctx->nr_active++; 432 ctx->nr_active++;
@@ -425,6 +486,17 @@ static int group_can_go_on(struct perf_counter *counter,
425 return can_add_hw; 486 return can_add_hw;
426} 487}
427 488
489static void add_counter_to_ctx(struct perf_counter *counter,
490 struct perf_counter_context *ctx)
491{
492 list_add_counter(counter, ctx);
493 ctx->nr_counters++;
494 counter->prev_state = PERF_COUNTER_STATE_OFF;
495 counter->tstamp_enabled = ctx->time_now;
496 counter->tstamp_running = ctx->time_now;
497 counter->tstamp_stopped = ctx->time_now;
498}
499
428/* 500/*
429 * Cross CPU call to install and enable a performance counter 501 * Cross CPU call to install and enable a performance counter
430 */ 502 */
@@ -449,6 +521,7 @@ static void __perf_install_in_context(void *info)
449 521
450 curr_rq_lock_irq_save(&flags); 522 curr_rq_lock_irq_save(&flags);
451 spin_lock(&ctx->lock); 523 spin_lock(&ctx->lock);
524 update_context_time(ctx, 1);
452 525
453 /* 526 /*
454 * Protect the list operation against NMI by disabling the 527 * Protect the list operation against NMI by disabling the
@@ -456,9 +529,7 @@ static void __perf_install_in_context(void *info)
456 */ 529 */
457 perf_flags = hw_perf_save_disable(); 530 perf_flags = hw_perf_save_disable();
458 531
459 list_add_counter(counter, ctx); 532 add_counter_to_ctx(counter, ctx);
460 ctx->nr_counters++;
461 counter->prev_state = PERF_COUNTER_STATE_OFF;
462 533
463 /* 534 /*
464 * Don't put the counter on if it is disabled or if 535 * Don't put the counter on if it is disabled or if
@@ -486,8 +557,10 @@ static void __perf_install_in_context(void *info)
486 */ 557 */
487 if (leader != counter) 558 if (leader != counter)
488 group_sched_out(leader, cpuctx, ctx); 559 group_sched_out(leader, cpuctx, ctx);
489 if (leader->hw_event.pinned) 560 if (leader->hw_event.pinned) {
561 update_group_times(leader);
490 leader->state = PERF_COUNTER_STATE_ERROR; 562 leader->state = PERF_COUNTER_STATE_ERROR;
563 }
491 } 564 }
492 565
493 if (!err && !ctx->task && cpuctx->max_pertask) 566 if (!err && !ctx->task && cpuctx->max_pertask)
@@ -548,10 +621,8 @@ retry:
548 * can add the counter safely, if it the call above did not 621 * can add the counter safely, if it the call above did not
549 * succeed. 622 * succeed.
550 */ 623 */
551 if (list_empty(&counter->list_entry)) { 624 if (list_empty(&counter->list_entry))
552 list_add_counter(counter, ctx); 625 add_counter_to_ctx(counter, ctx);
553 ctx->nr_counters++;
554 }
555 spin_unlock_irq(&ctx->lock); 626 spin_unlock_irq(&ctx->lock);
556} 627}
557 628
@@ -576,11 +647,13 @@ static void __perf_counter_enable(void *info)
576 647
577 curr_rq_lock_irq_save(&flags); 648 curr_rq_lock_irq_save(&flags);
578 spin_lock(&ctx->lock); 649 spin_lock(&ctx->lock);
650 update_context_time(ctx, 1);
579 651
580 counter->prev_state = counter->state; 652 counter->prev_state = counter->state;
581 if (counter->state >= PERF_COUNTER_STATE_INACTIVE) 653 if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
582 goto unlock; 654 goto unlock;
583 counter->state = PERF_COUNTER_STATE_INACTIVE; 655 counter->state = PERF_COUNTER_STATE_INACTIVE;
656 counter->tstamp_enabled = ctx->time_now - counter->total_time_enabled;
584 657
585 /* 658 /*
586 * If the counter is in a group and isn't the group leader, 659 * If the counter is in a group and isn't the group leader,
@@ -602,8 +675,10 @@ static void __perf_counter_enable(void *info)
602 */ 675 */
603 if (leader != counter) 676 if (leader != counter)
604 group_sched_out(leader, cpuctx, ctx); 677 group_sched_out(leader, cpuctx, ctx);
605 if (leader->hw_event.pinned) 678 if (leader->hw_event.pinned) {
679 update_group_times(leader);
606 leader->state = PERF_COUNTER_STATE_ERROR; 680 leader->state = PERF_COUNTER_STATE_ERROR;
681 }
607 } 682 }
608 683
609 unlock: 684 unlock:
@@ -659,8 +734,11 @@ static void perf_counter_enable(struct perf_counter *counter)
659 * Since we have the lock this context can't be scheduled 734 * Since we have the lock this context can't be scheduled
660 * in, so we can change the state safely. 735 * in, so we can change the state safely.
661 */ 736 */
662 if (counter->state == PERF_COUNTER_STATE_OFF) 737 if (counter->state == PERF_COUNTER_STATE_OFF) {
663 counter->state = PERF_COUNTER_STATE_INACTIVE; 738 counter->state = PERF_COUNTER_STATE_INACTIVE;
739 counter->tstamp_enabled = ctx->time_now -
740 counter->total_time_enabled;
741 }
664 out: 742 out:
665 spin_unlock_irq(&ctx->lock); 743 spin_unlock_irq(&ctx->lock);
666} 744}
@@ -693,6 +771,7 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx,
693 ctx->is_active = 0; 771 ctx->is_active = 0;
694 if (likely(!ctx->nr_counters)) 772 if (likely(!ctx->nr_counters))
695 goto out; 773 goto out;
774 update_context_time(ctx, 0);
696 775
697 flags = hw_perf_save_disable(); 776 flags = hw_perf_save_disable();
698 if (ctx->nr_active) { 777 if (ctx->nr_active) {
@@ -797,6 +876,13 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
797 if (likely(!ctx->nr_counters)) 876 if (likely(!ctx->nr_counters))
798 goto out; 877 goto out;
799 878
879 /*
880 * Add any time since the last sched_out to the lost time
881 * so it doesn't get included in the total_time_enabled and
882 * total_time_running measures for counters in the context.
883 */
884 ctx->time_lost = get_context_time(ctx, 0) - ctx->time_now;
885
800 flags = hw_perf_save_disable(); 886 flags = hw_perf_save_disable();
801 887
802 /* 888 /*
@@ -817,8 +903,10 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
817 * If this pinned group hasn't been scheduled, 903 * If this pinned group hasn't been scheduled,
818 * put it in error state. 904 * put it in error state.
819 */ 905 */
820 if (counter->state == PERF_COUNTER_STATE_INACTIVE) 906 if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
907 update_group_times(counter);
821 counter->state = PERF_COUNTER_STATE_ERROR; 908 counter->state = PERF_COUNTER_STATE_ERROR;
909 }
822 } 910 }
823 911
824 list_for_each_entry(counter, &ctx->counter_list, list_entry) { 912 list_for_each_entry(counter, &ctx->counter_list, list_entry) {
@@ -902,8 +990,10 @@ int perf_counter_task_disable(void)
902 perf_flags = hw_perf_save_disable(); 990 perf_flags = hw_perf_save_disable();
903 991
904 list_for_each_entry(counter, &ctx->counter_list, list_entry) { 992 list_for_each_entry(counter, &ctx->counter_list, list_entry) {
905 if (counter->state != PERF_COUNTER_STATE_ERROR) 993 if (counter->state != PERF_COUNTER_STATE_ERROR) {
994 update_group_times(counter);
906 counter->state = PERF_COUNTER_STATE_OFF; 995 counter->state = PERF_COUNTER_STATE_OFF;
996 }
907 } 997 }
908 998
909 hw_perf_restore(perf_flags); 999 hw_perf_restore(perf_flags);
@@ -946,6 +1036,8 @@ int perf_counter_task_enable(void)
946 if (counter->state > PERF_COUNTER_STATE_OFF) 1036 if (counter->state > PERF_COUNTER_STATE_OFF)
947 continue; 1037 continue;
948 counter->state = PERF_COUNTER_STATE_INACTIVE; 1038 counter->state = PERF_COUNTER_STATE_INACTIVE;
1039 counter->tstamp_enabled = ctx->time_now -
1040 counter->total_time_enabled;
949 counter->hw_event.disabled = 0; 1041 counter->hw_event.disabled = 0;
950 } 1042 }
951 hw_perf_restore(perf_flags); 1043 hw_perf_restore(perf_flags);
@@ -1009,10 +1101,14 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
1009static void __read(void *info) 1101static void __read(void *info)
1010{ 1102{
1011 struct perf_counter *counter = info; 1103 struct perf_counter *counter = info;
1104 struct perf_counter_context *ctx = counter->ctx;
1012 unsigned long flags; 1105 unsigned long flags;
1013 1106
1014 curr_rq_lock_irq_save(&flags); 1107 curr_rq_lock_irq_save(&flags);
1108 if (ctx->is_active)
1109 update_context_time(ctx, 1);
1015 counter->hw_ops->read(counter); 1110 counter->hw_ops->read(counter);
1111 update_counter_times(counter);
1016 curr_rq_unlock_irq_restore(&flags); 1112 curr_rq_unlock_irq_restore(&flags);
1017} 1113}
1018 1114
@@ -1025,6 +1121,8 @@ static u64 perf_counter_read(struct perf_counter *counter)
1025 if (counter->state == PERF_COUNTER_STATE_ACTIVE) { 1121 if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
1026 smp_call_function_single(counter->oncpu, 1122 smp_call_function_single(counter->oncpu,
1027 __read, counter, 1); 1123 __read, counter, 1);
1124 } else if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
1125 update_counter_times(counter);
1028 } 1126 }
1029 1127
1030 return atomic64_read(&counter->count); 1128 return atomic64_read(&counter->count);
@@ -1137,10 +1235,8 @@ static int perf_release(struct inode *inode, struct file *file)
1137static ssize_t 1235static ssize_t
1138perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) 1236perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
1139{ 1237{
1140 u64 cntval; 1238 u64 values[3];
1141 1239 int n;
1142 if (count < sizeof(cntval))
1143 return -EINVAL;
1144 1240
1145 /* 1241 /*
1146 * Return end-of-file for a read on a counter that is in 1242 * Return end-of-file for a read on a counter that is in
@@ -1151,10 +1247,24 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
1151 return 0; 1247 return 0;
1152 1248
1153 mutex_lock(&counter->mutex); 1249 mutex_lock(&counter->mutex);
1154 cntval = perf_counter_read(counter); 1250 values[0] = perf_counter_read(counter);
1251 n = 1;
1252 if (counter->hw_event.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
1253 values[n++] = counter->total_time_enabled +
1254 atomic64_read(&counter->child_total_time_enabled);
1255 if (counter->hw_event.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
1256 values[n++] = counter->total_time_running +
1257 atomic64_read(&counter->child_total_time_running);
1155 mutex_unlock(&counter->mutex); 1258 mutex_unlock(&counter->mutex);
1156 1259
1157 return put_user(cntval, (u64 __user *) buf) ? -EFAULT : sizeof(cntval); 1260 if (count < n * sizeof(u64))
1261 return -EINVAL;
1262 count = n * sizeof(u64);
1263
1264 if (copy_to_user(buf, values, count))
1265 return -EFAULT;
1266
1267 return count;
1158} 1268}
1159 1269
1160static ssize_t 1270static ssize_t
@@ -2290,8 +2400,7 @@ inherit_counter(struct perf_counter *parent_counter,
2290 * Link it up in the child's context: 2400 * Link it up in the child's context:
2291 */ 2401 */
2292 child_counter->task = child; 2402 child_counter->task = child;
2293 list_add_counter(child_counter, child_ctx); 2403 add_counter_to_ctx(child_counter, child_ctx);
2294 child_ctx->nr_counters++;
2295 2404
2296 child_counter->parent = parent_counter; 2405 child_counter->parent = parent_counter;
2297 /* 2406 /*
@@ -2361,6 +2470,10 @@ static void sync_child_counter(struct perf_counter *child_counter,
2361 * Add back the child's count to the parent's count: 2470 * Add back the child's count to the parent's count:
2362 */ 2471 */
2363 atomic64_add(child_val, &parent_counter->count); 2472 atomic64_add(child_val, &parent_counter->count);
2473 atomic64_add(child_counter->total_time_enabled,
2474 &parent_counter->child_total_time_enabled);
2475 atomic64_add(child_counter->total_time_running,
2476 &parent_counter->child_total_time_running);
2364 2477
2365 /* 2478 /*
2366 * Remove this counter from the parent's list 2479 * Remove this counter from the parent's list
@@ -2395,6 +2508,7 @@ __perf_counter_exit_task(struct task_struct *child,
2395 if (child != current) { 2508 if (child != current) {
2396 wait_task_inactive(child, 0); 2509 wait_task_inactive(child, 0);
2397 list_del_init(&child_counter->list_entry); 2510 list_del_init(&child_counter->list_entry);
2511 update_counter_times(child_counter);
2398 } else { 2512 } else {
2399 struct perf_cpu_context *cpuctx; 2513 struct perf_cpu_context *cpuctx;
2400 unsigned long flags; 2514 unsigned long flags;
@@ -2412,6 +2526,7 @@ __perf_counter_exit_task(struct task_struct *child,
2412 cpuctx = &__get_cpu_var(perf_cpu_context); 2526 cpuctx = &__get_cpu_var(perf_cpu_context);
2413 2527
2414 group_sched_out(child_counter, cpuctx, child_ctx); 2528 group_sched_out(child_counter, cpuctx, child_ctx);
2529 update_counter_times(child_counter);
2415 2530
2416 list_del_init(&child_counter->list_entry); 2531 list_del_init(&child_counter->list_entry);
2417 2532