aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/kernel/cpu/perf_event.c10
-rw-r--r--include/linux/perf_event.h83
-rw-r--r--kernel/events/core.c4
3 files changed, 84 insertions, 13 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 453ac9497574..4ef8104958ee 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1622,6 +1622,9 @@ static int x86_pmu_event_idx(struct perf_event *event)
1622{ 1622{
1623 int idx = event->hw.idx; 1623 int idx = event->hw.idx;
1624 1624
1625 if (!x86_pmu.attr_rdpmc)
1626 return 0;
1627
1625 if (x86_pmu.num_counters_fixed && idx >= X86_PMC_IDX_FIXED) { 1628 if (x86_pmu.num_counters_fixed && idx >= X86_PMC_IDX_FIXED) {
1626 idx -= X86_PMC_IDX_FIXED; 1629 idx -= X86_PMC_IDX_FIXED;
1627 idx |= 1 << 30; 1630 idx |= 1 << 30;
@@ -1706,14 +1709,19 @@ static struct pmu pmu = {
1706 .flush_branch_stack = x86_pmu_flush_branch_stack, 1709 .flush_branch_stack = x86_pmu_flush_branch_stack,
1707}; 1710};
1708 1711
1709void perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now) 1712void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
1710{ 1713{
1714 userpg->cap_usr_time = 0;
1715 userpg->cap_usr_rdpmc = x86_pmu.attr_rdpmc;
1716 userpg->pmc_width = x86_pmu.cntval_bits;
1717
1711 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) 1718 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
1712 return; 1719 return;
1713 1720
1714 if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) 1721 if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
1715 return; 1722 return;
1716 1723
1724 userpg->cap_usr_time = 1;
1717 userpg->time_mult = this_cpu_read(cyc2ns); 1725 userpg->time_mult = this_cpu_read(cyc2ns);
1718 userpg->time_shift = CYC2NS_SCALE_FACTOR; 1726 userpg->time_shift = CYC2NS_SCALE_FACTOR;
1719 userpg->time_offset = this_cpu_read(cyc2ns_offset) - now; 1727 userpg->time_offset = this_cpu_read(cyc2ns_offset) - now;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 57ae485e80fc..ca9ed4e6a286 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -299,18 +299,31 @@ struct perf_event_mmap_page {
299 /* 299 /*
300 * Bits needed to read the hw events in user-space. 300 * Bits needed to read the hw events in user-space.
301 * 301 *
302 * u32 seq; 302 * u32 seq, time_mult, time_shift, idx, width;
303 * s64 count; 303 * u64 count, enabled, running;
304 * u64 cyc, time_offset;
305 * s64 pmc = 0;
304 * 306 *
305 * do { 307 * do {
306 * seq = pc->lock; 308 * seq = pc->lock;
307 *
308 * barrier() 309 * barrier()
309 * if (pc->index) { 310 *
310 * count = pmc_read(pc->index - 1); 311 * enabled = pc->time_enabled;
311 * count += pc->offset; 312 * running = pc->time_running;
312 * } else 313 *
313 * goto regular_read; 314 * if (pc->cap_usr_time && enabled != running) {
315 * cyc = rdtsc();
316 * time_offset = pc->time_offset;
317 * time_mult = pc->time_mult;
318 * time_shift = pc->time_shift;
319 * }
320 *
321 * idx = pc->index;
322 * count = pc->offset;
323 * if (pc->cap_usr_rdpmc && idx) {
324 * width = pc->pmc_width;
325 * pmc = rdpmc(idx - 1);
326 * }
314 * 327 *
315 * barrier(); 328 * barrier();
316 * } while (pc->lock != seq); 329 * } while (pc->lock != seq);
@@ -323,14 +336,57 @@ struct perf_event_mmap_page {
323 __s64 offset; /* add to hardware event value */ 336 __s64 offset; /* add to hardware event value */
324 __u64 time_enabled; /* time event active */ 337 __u64 time_enabled; /* time event active */
325 __u64 time_running; /* time event on cpu */ 338 __u64 time_running; /* time event on cpu */
326 __u32 time_mult, time_shift; 339 union {
340 __u64 capabilities;
341 __u64 cap_usr_time : 1,
342 cap_usr_rdpmc : 1,
343 cap_____res : 62;
344 };
345
346 /*
347 * If cap_usr_rdpmc this field provides the bit-width of the value
348 * read using the rdpmc() or equivalent instruction. This can be used
349 * to sign extend the result like:
350 *
351 * pmc <<= 64 - width;
352 * pmc >>= 64 - width; // signed shift right
353 * count += pmc;
354 */
355 __u16 pmc_width;
356
357 /*
358 * If cap_usr_time the below fields can be used to compute the time
359 * delta since time_enabled (in ns) using rdtsc or similar.
360 *
361 * u64 quot, rem;
362 * u64 delta;
363 *
364 * quot = (cyc >> time_shift);
365 * rem = cyc & ((1 << time_shift) - 1);
366 * delta = time_offset + quot * time_mult +
367 * ((rem * time_mult) >> time_shift);
368 *
369 * Where time_offset,time_mult,time_shift and cyc are read in the
370 * seqcount loop described above. This delta can then be added to
371 * enabled and possible running (if idx), improving the scaling:
372 *
373 * enabled += delta;
374 * if (idx)
375 * running += delta;
376 *
377 * quot = count / running;
378 * rem = count % running;
379 * count = quot * enabled + (rem * enabled) / running;
380 */
381 __u16 time_shift;
382 __u32 time_mult;
327 __u64 time_offset; 383 __u64 time_offset;
328 384
329 /* 385 /*
330 * Hole for extension of the self monitor capabilities 386 * Hole for extension of the self monitor capabilities
331 */ 387 */
332 388
333 __u64 __reserved[121]; /* align to 1k */ 389 __u64 __reserved[120]; /* align to 1k */
334 390
335 /* 391 /*
336 * Control data for the mmap() data buffer. 392 * Control data for the mmap() data buffer.
@@ -347,6 +403,13 @@ struct perf_event_mmap_page {
347 __u64 data_tail; /* user-space written tail */ 403 __u64 data_tail; /* user-space written tail */
348}; 404};
349 405
406/*
407 * Build time assertion that we keep the data_head at the intended location.
408 * IOW, validation we got the __reserved[] size right.
409 */
410extern char __assert_mmap_data_head_offset
411 [1 - 2*!!(offsetof(struct perf_event_mmap_page, data_head) != 1024)];
412
350#define PERF_RECORD_MISC_CPUMODE_MASK (7 << 0) 413#define PERF_RECORD_MISC_CPUMODE_MASK (7 << 0)
351#define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0) 414#define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0)
352#define PERF_RECORD_MISC_KERNEL (1 << 0) 415#define PERF_RECORD_MISC_KERNEL (1 << 0)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index c61234b1a988..dc3b05272511 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3348,7 +3348,7 @@ static void calc_timer_values(struct perf_event *event,
3348 *running = ctx_time - event->tstamp_running; 3348 *running = ctx_time - event->tstamp_running;
3349} 3349}
3350 3350
3351void __weak perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now) 3351void __weak arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
3352{ 3352{
3353} 3353}
3354 3354
@@ -3398,7 +3398,7 @@ void perf_event_update_userpage(struct perf_event *event)
3398 userpg->time_running = running + 3398 userpg->time_running = running +
3399 atomic64_read(&event->child_total_time_running); 3399 atomic64_read(&event->child_total_time_running);
3400 3400
3401 perf_update_user_clock(userpg, now); 3401 arch_perf_update_userpage(userpg, now);
3402 3402
3403 barrier(); 3403 barrier();
3404 ++userpg->lock; 3404 ++userpg->lock;