aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm/kernel/perf_event.c
diff options
context:
space:
mode:
authorWill Deacon <will.deacon@arm.com>2012-03-06 11:33:17 -0500
committerRussell King <rmk+kernel@arm.linux.org.uk>2012-03-07 04:40:48 -0500
commit5727347180ebc6b4a866fcbe00dcb39cc03acb37 (patch)
treeed27e4b37407917d2f9813a4684cceb64f0d6f93 /arch/arm/kernel/perf_event.c
parent81caaf2503be8fbb738ea4f124063dcc24958397 (diff)
ARM: 7354/1: perf: limit sample_period to half max_period in non-sampling mode
On ARM, the PMU does not stop counting after an overflow and therefore IRQ latency affects the new counter value read by the kernel. This is significant for non-sampling runs where it is possible for the new value to overtake the previous one, causing the delta to be out by up to max_period events. Commit a737823d ("ARM: 6835/1: perf: ensure overflows aren't missed due to IRQ latency") attempted to fix this problem by allowing interrupt handlers to pass an overflow flag to the event update function, causing the overflow calculation to assume that the counter passed through zero when going from prev to new. Unfortunately, this doesn't work when overflow occurs on the perf_task_tick path because we have the flag cleared and end up computing a large negative delta. This patch removes the overflow flag from armpmu_event_update and instead limits the sample_period to half of the max_period for non-sampling profiling runs. Cc: <stable@vger.kernel.org> Signed-off-by: Ming Lei <ming.lei@canonical.com> Signed-off-by: Will Deacon <will.deacon@arm.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Diffstat (limited to 'arch/arm/kernel/perf_event.c')
-rw-r--r--arch/arm/kernel/perf_event.c22
1 files changed, 11 insertions, 11 deletions
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 5bb91bf3d47f..56173ae16448 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -180,7 +180,7 @@ armpmu_event_set_period(struct perf_event *event,
180u64 180u64
181armpmu_event_update(struct perf_event *event, 181armpmu_event_update(struct perf_event *event,
182 struct hw_perf_event *hwc, 182 struct hw_perf_event *hwc,
183 int idx, int overflow) 183 int idx)
184{ 184{
185 struct arm_pmu *armpmu = to_arm_pmu(event->pmu); 185 struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
186 u64 delta, prev_raw_count, new_raw_count; 186 u64 delta, prev_raw_count, new_raw_count;
@@ -193,13 +193,7 @@ again:
193 new_raw_count) != prev_raw_count) 193 new_raw_count) != prev_raw_count)
194 goto again; 194 goto again;
195 195
196 new_raw_count &= armpmu->max_period; 196 delta = (new_raw_count - prev_raw_count) & armpmu->max_period;
197 prev_raw_count &= armpmu->max_period;
198
199 if (overflow)
200 delta = armpmu->max_period - prev_raw_count + new_raw_count + 1;
201 else
202 delta = new_raw_count - prev_raw_count;
203 197
204 local64_add(delta, &event->count); 198 local64_add(delta, &event->count);
205 local64_sub(delta, &hwc->period_left); 199 local64_sub(delta, &hwc->period_left);
@@ -216,7 +210,7 @@ armpmu_read(struct perf_event *event)
216 if (hwc->idx < 0) 210 if (hwc->idx < 0)
217 return; 211 return;
218 212
219 armpmu_event_update(event, hwc, hwc->idx, 0); 213 armpmu_event_update(event, hwc, hwc->idx);
220} 214}
221 215
222static void 216static void
@@ -232,7 +226,7 @@ armpmu_stop(struct perf_event *event, int flags)
232 if (!(hwc->state & PERF_HES_STOPPED)) { 226 if (!(hwc->state & PERF_HES_STOPPED)) {
233 armpmu->disable(hwc, hwc->idx); 227 armpmu->disable(hwc, hwc->idx);
234 barrier(); /* why? */ 228 barrier(); /* why? */
235 armpmu_event_update(event, hwc, hwc->idx, 0); 229 armpmu_event_update(event, hwc, hwc->idx);
236 hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; 230 hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
237 } 231 }
238} 232}
@@ -518,7 +512,13 @@ __hw_perf_event_init(struct perf_event *event)
518 hwc->config_base |= (unsigned long)mapping; 512 hwc->config_base |= (unsigned long)mapping;
519 513
520 if (!hwc->sample_period) { 514 if (!hwc->sample_period) {
521 hwc->sample_period = armpmu->max_period; 515 /*
516 * For non-sampling runs, limit the sample_period to half
517 * of the counter width. That way, the new counter value
518 * is far less likely to overtake the previous one unless
519 * you have some serious IRQ latency issues.
520 */
521 hwc->sample_period = armpmu->max_period >> 1;
522 hwc->last_period = hwc->sample_period; 522 hwc->last_period = hwc->sample_period;
523 local64_set(&hwc->period_left, hwc->sample_period); 523 local64_set(&hwc->period_left, hwc->sample_period);
524 } 524 }