aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAdrian Hunter <adrian.hunter@intel.com>2015-10-16 09:24:05 -0400
committerIngo Molnar <mingo@kernel.org>2015-10-20 04:30:52 -0400
commitb9511cd761faafca7a1acc059e792c1399f9d7c6 (patch)
treed051932d0f8e0b7c8c81c2159f3a9028d1353fb4
parent40d4f23f42b0e6997db5bbfba9dc6bf349e8ad81 (diff)
perf/x86: Fix time_shift in perf_event_mmap_page
Commit: b20112edeadf ("perf/x86: Improve accuracy of perf/sched clock") allowed the time_shift value in perf_event_mmap_page to be as much as 32. Unfortunately the documented algorithms for using time_shift have it shifting an integer, whereas to work correctly with the value 32, the type must be u64. In the case of perf tools, Intel PT decodes correctly but the timestamps that are output (for example by perf script) have lost 32-bits of granularity so they look like they are not changing at all. Fix by limiting the shift to 31 and adjusting the multiplier accordingly. Also update the documentation of perf_event_mmap_page so that new code based on it will be more future-proof. Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Arnaldo Carvalho de Melo <acme@kernel.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vince Weaver <vincent.weaver@maine.edu> Fixes: b20112edeadf ("perf/x86: Improve accuracy of perf/sched clock") Link: http://lkml.kernel.org/r/1445001845-13688-2-git-send-email-adrian.hunter@intel.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/kernel/tsc.c11
-rw-r--r--include/uapi/linux/perf_event.h4
2 files changed, 13 insertions, 2 deletions
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 69b84a26ea17..c7c4d9c51e99 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -259,6 +259,17 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
259 clocks_calc_mult_shift(&data->cyc2ns_mul, &data->cyc2ns_shift, cpu_khz, 259 clocks_calc_mult_shift(&data->cyc2ns_mul, &data->cyc2ns_shift, cpu_khz,
260 NSEC_PER_MSEC, 0); 260 NSEC_PER_MSEC, 0);
261 261
262 /*
263 * cyc2ns_shift is exported via arch_perf_update_userpage() where it is
264 * not expected to be greater than 31 due to the original published
265 * conversion algorithm shifting a 32-bit value (now specifies a 64-bit
266 * value) - refer perf_event_mmap_page documentation in perf_event.h.
267 */
268 if (data->cyc2ns_shift == 32) {
269 data->cyc2ns_shift = 31;
270 data->cyc2ns_mul >>= 1;
271 }
272
262 data->cyc2ns_offset = ns_now - 273 data->cyc2ns_offset = ns_now -
263 mul_u64_u32_shr(tsc_now, data->cyc2ns_mul, data->cyc2ns_shift); 274 mul_u64_u32_shr(tsc_now, data->cyc2ns_mul, data->cyc2ns_shift);
264 275
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 2881145cda86..6c72e72e975c 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -476,7 +476,7 @@ struct perf_event_mmap_page {
476 * u64 delta; 476 * u64 delta;
477 * 477 *
478 * quot = (cyc >> time_shift); 478 * quot = (cyc >> time_shift);
479 * rem = cyc & ((1 << time_shift) - 1); 479 * rem = cyc & (((u64)1 << time_shift) - 1);
480 * delta = time_offset + quot * time_mult + 480 * delta = time_offset + quot * time_mult +
481 * ((rem * time_mult) >> time_shift); 481 * ((rem * time_mult) >> time_shift);
482 * 482 *
@@ -507,7 +507,7 @@ struct perf_event_mmap_page {
507 * And vice versa: 507 * And vice versa:
508 * 508 *
509 * quot = cyc >> time_shift; 509 * quot = cyc >> time_shift;
510 * rem = cyc & ((1 << time_shift) - 1); 510 * rem = cyc & (((u64)1 << time_shift) - 1);
511 * timestamp = time_zero + quot * time_mult + 511 * timestamp = time_zero + quot * time_mult +
512 * ((rem * time_mult) >> time_shift); 512 * ((rem * time_mult) >> time_shift);
513 */ 513 */