aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2013-10-17 09:32:10 -0400
committerIngo Molnar <mingo@kernel.org>2013-10-29 07:01:20 -0400
commite8a923cc1fff6e627f906655ad52ee694ef2f6d7 (patch)
tree10e56e0bd4463cfbc91a82132f75ecfebef2d474
parentbf378d341e4873ed928dc3c636252e6895a21f50 (diff)
perf/x86: Fix NMI measurements
OK, so what I'm actually seeing on my WSM is that sched/clock.c is 'broken' for the purpose we're using it for. What triggered it is that my WSM-EP is broken :-( [ 0.001000] tsc: Fast TSC calibration using PIT [ 0.002000] tsc: Detected 2533.715 MHz processor [ 0.500180] TSC synchronization [CPU#0 -> CPU#6]: [ 0.505197] Measured 3 cycles TSC warp between CPUs, turning off TSC clock. [ 0.004000] tsc: Marking TSC unstable due to check_tsc_sync_source failed For some reason it consistently detects TSC skew, even though NHM+ should have a single clock domain for 'reasonable' systems. This marks sched_clock_stable=0, which means that we do fancy stuff to try and get a 'sane' clock. Part of this fancy stuff relies on the tick, clearly that's gone when NOHZ=y. So for idle cpus time gets stuck, until it either wakes up or gets kicked by another cpu. While this is perfectly fine for the scheduler -- it only cares about actually running stuff, and when we're running stuff we're obviously not idle. This does somewhat break down for perf which can trigger events just fine on an otherwise idle cpu. So I've got NMIs get get 'measured' as taking ~1ms, which actually don't last nearly that long: <idle>-0 [013] d.h. 886.311970: rcu_nmi_enter <-do_nmi ... <idle>-0 [013] d.h. 886.311997: perf_sample_event_took: HERE!!! : 1040990 So ftrace (which uses sched_clock(), not the fancy bits) only sees ~27us, but we measure ~1ms !! Now since all this measurement stuff lives in x86 code, we can actually fix it. Signed-off-by: Peter Zijlstra <peterz@infradead.org> Cc: mingo@kernel.org Cc: dave.hansen@linux.intel.com Cc: eranian@google.com Cc: Don Zickus <dzickus@redhat.com> Cc: jmario@redhat.com Cc: acme@infradead.org Link: http://lkml.kernel.org/r/20131017133350.GG3364@laptop.programming.kicks-ass.net Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/kernel/cpu/perf_event.c6
-rw-r--r--arch/x86/kernel/nmi.c4
2 files changed, 5 insertions, 5 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 9d8449158cf9..8a87a3224121 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1276,16 +1276,16 @@ void perf_events_lapic_init(void)
1276static int __kprobes 1276static int __kprobes
1277perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs) 1277perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
1278{ 1278{
1279 int ret;
1280 u64 start_clock; 1279 u64 start_clock;
1281 u64 finish_clock; 1280 u64 finish_clock;
1281 int ret;
1282 1282
1283 if (!atomic_read(&active_events)) 1283 if (!atomic_read(&active_events))
1284 return NMI_DONE; 1284 return NMI_DONE;
1285 1285
1286 start_clock = local_clock(); 1286 start_clock = sched_clock();
1287 ret = x86_pmu.handle_irq(regs); 1287 ret = x86_pmu.handle_irq(regs);
1288 finish_clock = local_clock(); 1288 finish_clock = sched_clock();
1289 1289
1290 perf_sample_event_took(finish_clock - start_clock); 1290 perf_sample_event_took(finish_clock - start_clock);
1291 1291
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index ba77ebc2c353..6fcb49ce50a1 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -113,10 +113,10 @@ static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2
113 u64 before, delta, whole_msecs; 113 u64 before, delta, whole_msecs;
114 int remainder_ns, decimal_msecs, thishandled; 114 int remainder_ns, decimal_msecs, thishandled;
115 115
116 before = local_clock(); 116 before = sched_clock();
117 thishandled = a->handler(type, regs); 117 thishandled = a->handler(type, regs);
118 handled += thishandled; 118 handled += thishandled;
119 delta = local_clock() - before; 119 delta = sched_clock() - before;
120 trace_nmi_handler(a->handler, (int)delta, thishandled); 120 trace_nmi_handler(a->handler, (int)delta, thishandled);
121 121
122 if (delta < nmi_longest_ns) 122 if (delta < nmi_longest_ns)