perfcounters: make context switch and migration software counters work again

Jaswinder Singh Rajput reported that commit 23a185ca8abbeef caused the context switch and migration software counters to report zero always. With that commit, the software counters only count events that occur between sched-in and sched-out for a task. This is necessary for the counter enable/disable prctls and ioctls to work. However, the context switch and migration counts are incremented after sched-out for one task and before sched-in for the next. Since the increment doesn't occur while a task is scheduled in (as far as the software counters are concerned) it doesn't count towards any counter. Thus the context switch and migration counters need to count events that occur at any time, provided the counter is enabled, not just those that occur while the task is scheduled in (from the perf_counter subsystem's point of view). The problem though is that the software counter code can't tell the difference between being enabled and being scheduled in, and between being disabled and being scheduled out, since we use the one pair of enable/disable entry points for both. That is, the high-level disable operation simply arranges for the counter to not be scheduled in any more, and the high-level enable operation arranges for it to be scheduled in again. One way to solve this would be to have sched_in/out operations in the hw_perf_counter_ops struct as well as enable/disable. However, this takes a simpler approach: it adds a 'prev_state' field to the perf_counter struct that allows a counter's enable method to know whether the counter was previously disabled or just inactive (scheduled out), and therefore whether the enable method is being called as a result of a high-level enable or a schedule-in operation. This then allows the context switch, migration and page fault counters to reset their hw.prev_count value in their enable functions only if they are called as a result of a high-level enable operation. Although page faults would normally only occur while the counter is scheduled in, this changes the page fault counter code too in case there are ever circumstances where page faults get counted against a task while its counters are not scheduled in. Reported-by: Jaswinder Singh Rajput <jaswinder@kernel.org> Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Paul Mackerras <paulus@samba.org> 2009-02-13 06:10:34 -0500
committer: Ingo Molnar <mingo@elte.hu> 2009-02-13 06:20:38 -0500
commit: c07c99b67233ccaad38a961c17405dc1e1542aa4 (patch)
tree: 29682173de8f81b030a0d68006b56b115eea0ce9 /kernel/perf_counter.c
parent: b1864e9a1afef41709886072c6e6248def0386f4 (diff)
1 files changed, 15 insertions, 6 deletions
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index fcefb0a726f3..ad62965828d3 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -444,6 +444,7 @@ static void __perf_install_in_context(void *info)
        list_add_counter(counter, ctx);
        ctx->nr_counters++;
+        counter->prev_state = PERF_COUNTER_STATE_OFF;
        /*
         * Don't put the counter on if it is disabled or if
@@ -562,6 +563,7 @@ static void __perf_counter_enable(void *info)
        curr_rq_lock_irq_save(&flags);
        spin_lock(&ctx->lock);
+        counter->prev_state = counter->state;
        if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
                goto unlock;
        counter->state = PERF_COUNTER_STATE_INACTIVE;
@@ -733,6 +735,7 @@ group_sched_in(struct perf_counter *group_counter,
        if (ret)
                return ret < 0 ? ret : 0;
+        group_counter->prev_state = group_counter->state;
        if (counter_sched_in(group_counter, cpuctx, ctx, cpu))
                return -EAGAIN;
@@ -740,6 +743,7 @@ group_sched_in(struct perf_counter *group_counter,
         * Schedule in siblings as one group (if any):
         */
        list_for_each_entry(counter, &group_counter->sibling_list, list_entry) {
+                counter->prev_state = counter->state;
                if (counter_sched_in(counter, cpuctx, ctx, cpu)) {
                        partial_group = counter;
                        goto group_error;
@@ -1398,9 +1402,9 @@ static void task_clock_perf_counter_read(struct perf_counter *counter)
 static int task_clock_perf_counter_enable(struct perf_counter *counter)
 {
-        u64 now = task_clock_perf_counter_val(counter, 0);
+        if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
+                atomic64_set(&counter->hw.prev_count,
-        atomic64_set(&counter->hw.prev_count, now);
+                             task_clock_perf_counter_val(counter, 0));
        return 0;
 }
@@ -1455,7 +1459,8 @@ static void page_faults_perf_counter_read(struct perf_counter *counter)
 static int page_faults_perf_counter_enable(struct perf_counter *counter)
 {
-        atomic64_set(&counter->hw.prev_count, get_page_faults(counter));
+        if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
+                atomic64_set(&counter->hw.prev_count, get_page_faults(counter));
        return 0;
 }
@@ -1501,7 +1506,9 @@ static void context_switches_perf_counter_read(struct perf_counter *counter)
 static int context_switches_perf_counter_enable(struct perf_counter *counter)
 {
-        atomic64_set(&counter->hw.prev_count, get_context_switches(counter));
+        if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
+                atomic64_set(&counter->hw.prev_count,
+                             get_context_switches(counter));
        return 0;
 }
@@ -1547,7 +1554,9 @@ static void cpu_migrations_perf_counter_read(struct perf_counter *counter)
 static int cpu_migrations_perf_counter_enable(struct perf_counter *counter)
 {
-        atomic64_set(&counter->hw.prev_count, get_cpu_migrations(counter));
+        if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
+                atomic64_set(&counter->hw.prev_count,
+                             get_cpu_migrations(counter));
        return 0;
 }
author	Paul Mackerras <paulus@samba.org>	2009-02-13 06:10:34 -0500
committer	Ingo Molnar <mingo@elte.hu>	2009-02-13 06:20:38 -0500
commit	c07c99b67233ccaad38a961c17405dc1e1542aa4 (patch)
tree	29682173de8f81b030a0d68006b56b115eea0ce9 /kernel/perf_counter.c
parent	b1864e9a1afef41709886072c6e6248def0386f4 (diff)

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index fcefb0a726f3..ad62965828d3 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c
@@ -444,6 +444,7 @@ static void __perf_install_in_context(void *info)
444		444
445	list_add_counter(counter, ctx);	445	list_add_counter(counter, ctx);
446	ctx->nr_counters++;	446	ctx->nr_counters++;
		447	counter->prev_state = PERF_COUNTER_STATE_OFF;
447		448
448	/*	449	/*
449	* Don't put the counter on if it is disabled or if	450	* Don't put the counter on if it is disabled or if
@@ -562,6 +563,7 @@ static void __perf_counter_enable(void *info)
562	curr_rq_lock_irq_save(&flags);	563	curr_rq_lock_irq_save(&flags);
563	spin_lock(&ctx->lock);	564	spin_lock(&ctx->lock);
564		565
		566	counter->prev_state = counter->state;
565	if (counter->state >= PERF_COUNTER_STATE_INACTIVE)	567	if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
566	goto unlock;	568	goto unlock;
567	counter->state = PERF_COUNTER_STATE_INACTIVE;	569	counter->state = PERF_COUNTER_STATE_INACTIVE;
@@ -733,6 +735,7 @@ group_sched_in(struct perf_counter *group_counter,
733	if (ret)	735	if (ret)
734	return ret < 0 ? ret : 0;	736	return ret < 0 ? ret : 0;
735		737
		738	group_counter->prev_state = group_counter->state;
736	if (counter_sched_in(group_counter, cpuctx, ctx, cpu))	739	if (counter_sched_in(group_counter, cpuctx, ctx, cpu))
737	return -EAGAIN;	740	return -EAGAIN;
738		741
@@ -740,6 +743,7 @@ group_sched_in(struct perf_counter *group_counter,
740	* Schedule in siblings as one group (if any):	743	* Schedule in siblings as one group (if any):
741	*/	744	*/
742	list_for_each_entry(counter, &group_counter->sibling_list, list_entry) {	745	list_for_each_entry(counter, &group_counter->sibling_list, list_entry) {
		746	counter->prev_state = counter->state;
743	if (counter_sched_in(counter, cpuctx, ctx, cpu)) {	747	if (counter_sched_in(counter, cpuctx, ctx, cpu)) {
744	partial_group = counter;	748	partial_group = counter;
745	goto group_error;	749	goto group_error;
@@ -1398,9 +1402,9 @@ static void task_clock_perf_counter_read(struct perf_counter *counter)
1398		1402
1399	static int task_clock_perf_counter_enable(struct perf_counter *counter)	1403	static int task_clock_perf_counter_enable(struct perf_counter *counter)
1400	{	1404	{
1401	u64 now = task_clock_perf_counter_val(counter, 0);	1405	if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
1402		1406	atomic64_set(&counter->hw.prev_count,
1403	atomic64_set(&counter->hw.prev_count, now);	1407	task_clock_perf_counter_val(counter, 0));
1404		1408
1405	return 0;	1409	return 0;
1406	}	1410	}
@@ -1455,7 +1459,8 @@ static void page_faults_perf_counter_read(struct perf_counter *counter)
1455		1459
1456	static int page_faults_perf_counter_enable(struct perf_counter *counter)	1460	static int page_faults_perf_counter_enable(struct perf_counter *counter)
1457	{	1461	{
1458	atomic64_set(&counter->hw.prev_count, get_page_faults(counter));	1462	if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
		1463	atomic64_set(&counter->hw.prev_count, get_page_faults(counter));
1459	return 0;	1464	return 0;
1460	}	1465	}
1461		1466
@@ -1501,7 +1506,9 @@ static void context_switches_perf_counter_read(struct perf_counter *counter)
1501		1506
1502	static int context_switches_perf_counter_enable(struct perf_counter *counter)	1507	static int context_switches_perf_counter_enable(struct perf_counter *counter)
1503	{	1508	{
1504	atomic64_set(&counter->hw.prev_count, get_context_switches(counter));	1509	if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
		1510	atomic64_set(&counter->hw.prev_count,
		1511	get_context_switches(counter));
1505	return 0;	1512	return 0;
1506	}	1513	}
1507		1514
@@ -1547,7 +1554,9 @@ static void cpu_migrations_perf_counter_read(struct perf_counter *counter)
1547		1554
1548	static int cpu_migrations_perf_counter_enable(struct perf_counter *counter)	1555	static int cpu_migrations_perf_counter_enable(struct perf_counter *counter)
1549	{	1556	{
1550	atomic64_set(&counter->hw.prev_count, get_cpu_migrations(counter));	1557	if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
		1558	atomic64_set(&counter->hw.prev_count,
		1559	get_cpu_migrations(counter));
1551	return 0;	1560	return 0;
1552	}	1561	}
1553		1562