aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/perf_counter.c
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2009-02-13 06:10:34 -0500
committerIngo Molnar <mingo@elte.hu>2009-02-13 06:20:38 -0500
commitc07c99b67233ccaad38a961c17405dc1e1542aa4 (patch)
tree29682173de8f81b030a0d68006b56b115eea0ce9 /kernel/perf_counter.c
parentb1864e9a1afef41709886072c6e6248def0386f4 (diff)
perfcounters: make context switch and migration software counters work again
Jaswinder Singh Rajput reported that commit 23a185ca8abbeef caused the context switch and migration software counters to report zero always. With that commit, the software counters only count events that occur between sched-in and sched-out for a task. This is necessary for the counter enable/disable prctls and ioctls to work. However, the context switch and migration counts are incremented after sched-out for one task and before sched-in for the next. Since the increment doesn't occur while a task is scheduled in (as far as the software counters are concerned) it doesn't count towards any counter. Thus the context switch and migration counters need to count events that occur at any time, provided the counter is enabled, not just those that occur while the task is scheduled in (from the perf_counter subsystem's point of view). The problem though is that the software counter code can't tell the difference between being enabled and being scheduled in, and between being disabled and being scheduled out, since we use the one pair of enable/disable entry points for both. That is, the high-level disable operation simply arranges for the counter to not be scheduled in any more, and the high-level enable operation arranges for it to be scheduled in again. One way to solve this would be to have sched_in/out operations in the hw_perf_counter_ops struct as well as enable/disable. However, this takes a simpler approach: it adds a 'prev_state' field to the perf_counter struct that allows a counter's enable method to know whether the counter was previously disabled or just inactive (scheduled out), and therefore whether the enable method is being called as a result of a high-level enable or a schedule-in operation. This then allows the context switch, migration and page fault counters to reset their hw.prev_count value in their enable functions only if they are called as a result of a high-level enable operation. Although page faults would normally only occur while the counter is scheduled in, this changes the page fault counter code too in case there are ever circumstances where page faults get counted against a task while its counters are not scheduled in. Reported-by: Jaswinder Singh Rajput <jaswinder@kernel.org> Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/perf_counter.c')
-rw-r--r--kernel/perf_counter.c21
1 files changed, 15 insertions, 6 deletions
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index fcefb0a726f3..ad62965828d3 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -444,6 +444,7 @@ static void __perf_install_in_context(void *info)
444 444
445 list_add_counter(counter, ctx); 445 list_add_counter(counter, ctx);
446 ctx->nr_counters++; 446 ctx->nr_counters++;
447 counter->prev_state = PERF_COUNTER_STATE_OFF;
447 448
448 /* 449 /*
449 * Don't put the counter on if it is disabled or if 450 * Don't put the counter on if it is disabled or if
@@ -562,6 +563,7 @@ static void __perf_counter_enable(void *info)
562 curr_rq_lock_irq_save(&flags); 563 curr_rq_lock_irq_save(&flags);
563 spin_lock(&ctx->lock); 564 spin_lock(&ctx->lock);
564 565
566 counter->prev_state = counter->state;
565 if (counter->state >= PERF_COUNTER_STATE_INACTIVE) 567 if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
566 goto unlock; 568 goto unlock;
567 counter->state = PERF_COUNTER_STATE_INACTIVE; 569 counter->state = PERF_COUNTER_STATE_INACTIVE;
@@ -733,6 +735,7 @@ group_sched_in(struct perf_counter *group_counter,
733 if (ret) 735 if (ret)
734 return ret < 0 ? ret : 0; 736 return ret < 0 ? ret : 0;
735 737
738 group_counter->prev_state = group_counter->state;
736 if (counter_sched_in(group_counter, cpuctx, ctx, cpu)) 739 if (counter_sched_in(group_counter, cpuctx, ctx, cpu))
737 return -EAGAIN; 740 return -EAGAIN;
738 741
@@ -740,6 +743,7 @@ group_sched_in(struct perf_counter *group_counter,
740 * Schedule in siblings as one group (if any): 743 * Schedule in siblings as one group (if any):
741 */ 744 */
742 list_for_each_entry(counter, &group_counter->sibling_list, list_entry) { 745 list_for_each_entry(counter, &group_counter->sibling_list, list_entry) {
746 counter->prev_state = counter->state;
743 if (counter_sched_in(counter, cpuctx, ctx, cpu)) { 747 if (counter_sched_in(counter, cpuctx, ctx, cpu)) {
744 partial_group = counter; 748 partial_group = counter;
745 goto group_error; 749 goto group_error;
@@ -1398,9 +1402,9 @@ static void task_clock_perf_counter_read(struct perf_counter *counter)
1398 1402
1399static int task_clock_perf_counter_enable(struct perf_counter *counter) 1403static int task_clock_perf_counter_enable(struct perf_counter *counter)
1400{ 1404{
1401 u64 now = task_clock_perf_counter_val(counter, 0); 1405 if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
1402 1406 atomic64_set(&counter->hw.prev_count,
1403 atomic64_set(&counter->hw.prev_count, now); 1407 task_clock_perf_counter_val(counter, 0));
1404 1408
1405 return 0; 1409 return 0;
1406} 1410}
@@ -1455,7 +1459,8 @@ static void page_faults_perf_counter_read(struct perf_counter *counter)
1455 1459
1456static int page_faults_perf_counter_enable(struct perf_counter *counter) 1460static int page_faults_perf_counter_enable(struct perf_counter *counter)
1457{ 1461{
1458 atomic64_set(&counter->hw.prev_count, get_page_faults(counter)); 1462 if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
1463 atomic64_set(&counter->hw.prev_count, get_page_faults(counter));
1459 return 0; 1464 return 0;
1460} 1465}
1461 1466
@@ -1501,7 +1506,9 @@ static void context_switches_perf_counter_read(struct perf_counter *counter)
1501 1506
1502static int context_switches_perf_counter_enable(struct perf_counter *counter) 1507static int context_switches_perf_counter_enable(struct perf_counter *counter)
1503{ 1508{
1504 atomic64_set(&counter->hw.prev_count, get_context_switches(counter)); 1509 if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
1510 atomic64_set(&counter->hw.prev_count,
1511 get_context_switches(counter));
1505 return 0; 1512 return 0;
1506} 1513}
1507 1514
@@ -1547,7 +1554,9 @@ static void cpu_migrations_perf_counter_read(struct perf_counter *counter)
1547 1554
1548static int cpu_migrations_perf_counter_enable(struct perf_counter *counter) 1555static int cpu_migrations_perf_counter_enable(struct perf_counter *counter)
1549{ 1556{
1550 atomic64_set(&counter->hw.prev_count, get_cpu_migrations(counter)); 1557 if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
1558 atomic64_set(&counter->hw.prev_count,
1559 get_cpu_migrations(counter));
1551 return 0; 1560 return 0;
1552} 1561}
1553 1562