aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/perf_event.c
diff options
context:
space:
mode:
authorStephane Eranian <eranian@google.com>2010-10-15 10:54:01 -0400
committerIngo Molnar <mingo@elte.hu>2010-10-18 13:58:49 -0400
commit8e5fc1a7320baf6076391607515dceb61319b36a (patch)
tree24a5e7ac4bbd8a960fa90ce2a668b9475a3f0b16 /kernel/perf_event.c
parentba0cef3d149ce4db293c572bf36ed352b11ce7b9 (diff)
perf_events: Fix transaction recovery in group_sched_in()
The group_sched_in() function uses a transactional approach to schedule a group of events. In a group, either all events can be scheduled or none are. To schedule each event in, the function calls event_sched_in(). In case of error, event_sched_out() is called on each event in the group. The problem is that event_sched_out() does not completely cancel the effects of event_sched_in(). Furthermore event_sched_out() changes the state of the event as if it had run which is not true is this particular case. Those inconsistencies impact time tracking fields and may lead to events in a group not all reporting the same time_enabled and time_running values. This is demonstrated with the example below: $ task -eunhalted_core_cycles,baclears,baclears -e unhalted_core_cycles,baclears,baclears sleep 5 1946101 unhalted_core_cycles (32.85% scaling, ena=829181, run=556827) 11423 baclears (32.85% scaling, ena=829181, run=556827) 7671 baclears (0.00% scaling, ena=556827, run=556827) 2250443 unhalted_core_cycles (57.83% scaling, ena=962822, run=405995) 11705 baclears (57.83% scaling, ena=962822, run=405995) 11705 baclears (57.83% scaling, ena=962822, run=405995) Notice that in the first group, the last baclears event does not report the same timings as its siblings. This issue comes from the fact that tstamp_stopped is updated by event_sched_out() as if the event had actually run. To solve the issue, we must ensure that, in case of error, there is no change in the event state whatsoever. That means timings must remain as they were when entering group_sched_in(). To do this we defer updating tstamp_running until we know the transaction succeeded. Therefore, we have split event_sched_in() in two parts separating the update to tstamp_running. Similarly, in case of error, we do not want to update tstamp_stopped. Therefore, we have split event_sched_out() in two parts separating the update to tstamp_stopped. With this patch, we now get the following output: $ task -eunhalted_core_cycles,baclears,baclears -e unhalted_core_cycles,baclears,baclears sleep 5 2492050 unhalted_core_cycles (71.75% scaling, ena=1093330, run=308841) 11243 baclears (71.75% scaling, ena=1093330, run=308841) 11243 baclears (71.75% scaling, ena=1093330, run=308841) 1852746 unhalted_core_cycles (0.00% scaling, ena=784489, run=784489) 9253 baclears (0.00% scaling, ena=784489, run=784489) 9253 baclears (0.00% scaling, ena=784489, run=784489) Note that the uneven timing between groups is a side effect of the process spending most of its time sleeping, i.e., not enough event rotations (but that's a separate issue). Signed-off-by: Stephane Eranian <eranian@google.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <4cb86b4c.41e9d80a.44e9.3e19@mx.google.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/perf_event.c')
-rw-r--r--kernel/perf_event.c76
1 files changed, 63 insertions, 13 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index e7eeba1794fd..634f86a4b2f9 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -412,8 +412,8 @@ event_filter_match(struct perf_event *event)
412 return event->cpu == -1 || event->cpu == smp_processor_id(); 412 return event->cpu == -1 || event->cpu == smp_processor_id();
413} 413}
414 414
415static void 415static int
416event_sched_out(struct perf_event *event, 416__event_sched_out(struct perf_event *event,
417 struct perf_cpu_context *cpuctx, 417 struct perf_cpu_context *cpuctx,
418 struct perf_event_context *ctx) 418 struct perf_event_context *ctx)
419{ 419{
@@ -432,14 +432,13 @@ event_sched_out(struct perf_event *event,
432 } 432 }
433 433
434 if (event->state != PERF_EVENT_STATE_ACTIVE) 434 if (event->state != PERF_EVENT_STATE_ACTIVE)
435 return; 435 return 0;
436 436
437 event->state = PERF_EVENT_STATE_INACTIVE; 437 event->state = PERF_EVENT_STATE_INACTIVE;
438 if (event->pending_disable) { 438 if (event->pending_disable) {
439 event->pending_disable = 0; 439 event->pending_disable = 0;
440 event->state = PERF_EVENT_STATE_OFF; 440 event->state = PERF_EVENT_STATE_OFF;
441 } 441 }
442 event->tstamp_stopped = ctx->time;
443 event->pmu->del(event, 0); 442 event->pmu->del(event, 0);
444 event->oncpu = -1; 443 event->oncpu = -1;
445 444
@@ -448,6 +447,19 @@ event_sched_out(struct perf_event *event,
448 ctx->nr_active--; 447 ctx->nr_active--;
449 if (event->attr.exclusive || !cpuctx->active_oncpu) 448 if (event->attr.exclusive || !cpuctx->active_oncpu)
450 cpuctx->exclusive = 0; 449 cpuctx->exclusive = 0;
450 return 1;
451}
452
453static void
454event_sched_out(struct perf_event *event,
455 struct perf_cpu_context *cpuctx,
456 struct perf_event_context *ctx)
457{
458 int ret;
459
460 ret = __event_sched_out(event, cpuctx, ctx);
461 if (ret)
462 event->tstamp_stopped = ctx->time;
451} 463}
452 464
453static void 465static void
@@ -647,7 +659,7 @@ retry:
647} 659}
648 660
649static int 661static int
650event_sched_in(struct perf_event *event, 662__event_sched_in(struct perf_event *event,
651 struct perf_cpu_context *cpuctx, 663 struct perf_cpu_context *cpuctx,
652 struct perf_event_context *ctx) 664 struct perf_event_context *ctx)
653{ 665{
@@ -667,8 +679,6 @@ event_sched_in(struct perf_event *event,
667 return -EAGAIN; 679 return -EAGAIN;
668 } 680 }
669 681
670 event->tstamp_running += ctx->time - event->tstamp_stopped;
671
672 if (!is_software_event(event)) 682 if (!is_software_event(event))
673 cpuctx->active_oncpu++; 683 cpuctx->active_oncpu++;
674 ctx->nr_active++; 684 ctx->nr_active++;
@@ -679,6 +689,35 @@ event_sched_in(struct perf_event *event,
679 return 0; 689 return 0;
680} 690}
681 691
692static inline int
693event_sched_in(struct perf_event *event,
694 struct perf_cpu_context *cpuctx,
695 struct perf_event_context *ctx)
696{
697 int ret = __event_sched_in(event, cpuctx, ctx);
698 if (ret)
699 return ret;
700 event->tstamp_running += ctx->time - event->tstamp_stopped;
701 return 0;
702}
703
704static void
705group_commit_event_sched_in(struct perf_event *group_event,
706 struct perf_cpu_context *cpuctx,
707 struct perf_event_context *ctx)
708{
709 struct perf_event *event;
710 u64 now = ctx->time;
711
712 group_event->tstamp_running += now - group_event->tstamp_stopped;
713 /*
714 * Schedule in siblings as one group (if any):
715 */
716 list_for_each_entry(event, &group_event->sibling_list, group_entry) {
717 event->tstamp_running += now - event->tstamp_stopped;
718 }
719}
720
682static int 721static int
683group_sched_in(struct perf_event *group_event, 722group_sched_in(struct perf_event *group_event,
684 struct perf_cpu_context *cpuctx, 723 struct perf_cpu_context *cpuctx,
@@ -692,7 +731,13 @@ group_sched_in(struct perf_event *group_event,
692 731
693 pmu->start_txn(pmu); 732 pmu->start_txn(pmu);
694 733
695 if (event_sched_in(group_event, cpuctx, ctx)) { 734 /*
735 * use __event_sched_in() to delay updating tstamp_running
736 * until the transaction is committed. In case of failure
737 * we will keep an unmodified tstamp_running which is a
738 * requirement to get correct timing information
739 */
740 if (__event_sched_in(group_event, cpuctx, ctx)) {
696 pmu->cancel_txn(pmu); 741 pmu->cancel_txn(pmu);
697 return -EAGAIN; 742 return -EAGAIN;
698 } 743 }
@@ -701,26 +746,31 @@ group_sched_in(struct perf_event *group_event,
701 * Schedule in siblings as one group (if any): 746 * Schedule in siblings as one group (if any):
702 */ 747 */
703 list_for_each_entry(event, &group_event->sibling_list, group_entry) { 748 list_for_each_entry(event, &group_event->sibling_list, group_entry) {
704 if (event_sched_in(event, cpuctx, ctx)) { 749 if (__event_sched_in(event, cpuctx, ctx)) {
705 partial_group = event; 750 partial_group = event;
706 goto group_error; 751 goto group_error;
707 } 752 }
708 } 753 }
709 754
710 if (!pmu->commit_txn(pmu)) 755 if (!pmu->commit_txn(pmu)) {
756 /* commit tstamp_running */
757 group_commit_event_sched_in(group_event, cpuctx, ctx);
711 return 0; 758 return 0;
712 759 }
713group_error: 760group_error:
714 /* 761 /*
715 * Groups can be scheduled in as one unit only, so undo any 762 * Groups can be scheduled in as one unit only, so undo any
716 * partial group before returning: 763 * partial group before returning:
764 *
765 * use __event_sched_out() to avoid updating tstamp_stopped
766 * because the event never actually ran
717 */ 767 */
718 list_for_each_entry(event, &group_event->sibling_list, group_entry) { 768 list_for_each_entry(event, &group_event->sibling_list, group_entry) {
719 if (event == partial_group) 769 if (event == partial_group)
720 break; 770 break;
721 event_sched_out(event, cpuctx, ctx); 771 __event_sched_out(event, cpuctx, ctx);
722 } 772 }
723 event_sched_out(group_event, cpuctx, ctx); 773 __event_sched_out(group_event, cpuctx, ctx);
724 774
725 pmu->cancel_txn(pmu); 775 pmu->cancel_txn(pmu);
726 776