aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorStephane Eranian <eranian@google.com>2010-05-25 10:23:10 -0400
committerIngo Molnar <mingo@elte.hu>2010-05-31 02:46:10 -0400
commit90151c35b19633e0cab5a6c80f1ba4a51e7c913b (patch)
tree448c86520eef5b9dc0f06c59a8a96abfd4096fab /arch
parent2e97942fe57864588774f173cf4cd7bb68968b76 (diff)
perf_events: Fix event scheduling issues introduced by transactional API
The transactional API patch between the generic and model-specific code introduced several important bugs with event scheduling, at least on X86. If you had pinned events, e.g., watchdog, and were over-committing the PMU, you would get bogus counts. The bug was showing up on Intel CPU because events would move around more often that on AMD. But the problem also existed on AMD, though harder to expose. The issues were: - group_sched_in() was missing a cancel_txn() in the error path - cpuc->n_added was not properly maintained, leading to missing actions in hw_perf_enable(), i.e., n_running being 0. You cannot update n_added until you know the transaction has succeeded. In case of failed transaction n_added was not adjusted back. - in case of failed transactions, event_sched_out() was called and eventually invoked x86_disable_event() to touch the HW reg. But with transactions, on X86, event_sched_in() does not touch HW registers, it simply collects events into a list. Thus, you could end up calling x86_disable_event() on a counter which did not correspond to the current event when idx != -1. The patch modifies the generic and X86 code to avoid all those problems. First, we keep track of the number of events added last. In case the transaction fails, we substract them from n_added. This approach is necessary (as opposed to delaying updates to n_added) because not all event updates use the transaction API, e.g., single events. Second, we encapsulate the event_sched_in() and event_sched_out() in group_sched_in() inside the transaction. That makes the operations symmetrical and you can also detect that you are inside a transaction and skip the HW reg access by checking cpuc->group_flag. With this patch, you can now overcommit the PMU even with pinned system-wide events present and still get valid counts. Signed-off-by: Stephane Eranian <eranian@google.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <1274796225.5882.1389.camel@twins> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/kernel/cpu/perf_event.c22
1 files changed, 22 insertions, 0 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index c77586061bcb..5db5b7d65a18 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -106,6 +106,7 @@ struct cpu_hw_events {
106 106
107 int n_events; 107 int n_events;
108 int n_added; 108 int n_added;
109 int n_txn;
109 int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ 110 int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
110 u64 tags[X86_PMC_IDX_MAX]; 111 u64 tags[X86_PMC_IDX_MAX];
111 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ 112 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
@@ -983,6 +984,7 @@ static int x86_pmu_enable(struct perf_event *event)
983out: 984out:
984 cpuc->n_events = n; 985 cpuc->n_events = n;
985 cpuc->n_added += n - n0; 986 cpuc->n_added += n - n0;
987 cpuc->n_txn += n - n0;
986 988
987 return 0; 989 return 0;
988} 990}
@@ -1089,6 +1091,14 @@ static void x86_pmu_disable(struct perf_event *event)
1089 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1091 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1090 int i; 1092 int i;
1091 1093
1094 /*
1095 * If we're called during a txn, we don't need to do anything.
1096 * The events never got scheduled and ->cancel_txn will truncate
1097 * the event_list.
1098 */
1099 if (cpuc->group_flag & PERF_EVENT_TXN_STARTED)
1100 return;
1101
1092 x86_pmu_stop(event); 1102 x86_pmu_stop(event);
1093 1103
1094 for (i = 0; i < cpuc->n_events; i++) { 1104 for (i = 0; i < cpuc->n_events; i++) {
@@ -1379,6 +1389,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu)
1379 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1389 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1380 1390
1381 cpuc->group_flag |= PERF_EVENT_TXN_STARTED; 1391 cpuc->group_flag |= PERF_EVENT_TXN_STARTED;
1392 cpuc->n_txn = 0;
1382} 1393}
1383 1394
1384/* 1395/*
@@ -1391,6 +1402,11 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
1391 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1402 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1392 1403
1393 cpuc->group_flag &= ~PERF_EVENT_TXN_STARTED; 1404 cpuc->group_flag &= ~PERF_EVENT_TXN_STARTED;
1405 /*
1406 * Truncate the collected events.
1407 */
1408 cpuc->n_added -= cpuc->n_txn;
1409 cpuc->n_events -= cpuc->n_txn;
1394} 1410}
1395 1411
1396/* 1412/*
@@ -1419,6 +1435,12 @@ static int x86_pmu_commit_txn(const struct pmu *pmu)
1419 */ 1435 */
1420 memcpy(cpuc->assign, assign, n*sizeof(int)); 1436 memcpy(cpuc->assign, assign, n*sizeof(int));
1421 1437
1438 /*
1439 * Clear out the txn count so that ->cancel_txn() which gets
1440 * run after ->commit_txn() doesn't undo things.
1441 */
1442 cpuc->n_txn = 0;
1443
1422 return 0; 1444 return 0;
1423} 1445}
1424 1446