diff options
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 10 | ||||
-rw-r--r-- | include/linux/perf_event.h | 83 | ||||
-rw-r--r-- | kernel/events/core.c | 4 |
3 files changed, 84 insertions, 13 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 453ac9497574..4ef8104958ee 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -1622,6 +1622,9 @@ static int x86_pmu_event_idx(struct perf_event *event) | |||
1622 | { | 1622 | { |
1623 | int idx = event->hw.idx; | 1623 | int idx = event->hw.idx; |
1624 | 1624 | ||
1625 | if (!x86_pmu.attr_rdpmc) | ||
1626 | return 0; | ||
1627 | |||
1625 | if (x86_pmu.num_counters_fixed && idx >= X86_PMC_IDX_FIXED) { | 1628 | if (x86_pmu.num_counters_fixed && idx >= X86_PMC_IDX_FIXED) { |
1626 | idx -= X86_PMC_IDX_FIXED; | 1629 | idx -= X86_PMC_IDX_FIXED; |
1627 | idx |= 1 << 30; | 1630 | idx |= 1 << 30; |
@@ -1706,14 +1709,19 @@ static struct pmu pmu = { | |||
1706 | .flush_branch_stack = x86_pmu_flush_branch_stack, | 1709 | .flush_branch_stack = x86_pmu_flush_branch_stack, |
1707 | }; | 1710 | }; |
1708 | 1711 | ||
1709 | void perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now) | 1712 | void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) |
1710 | { | 1713 | { |
1714 | userpg->cap_usr_time = 0; | ||
1715 | userpg->cap_usr_rdpmc = x86_pmu.attr_rdpmc; | ||
1716 | userpg->pmc_width = x86_pmu.cntval_bits; | ||
1717 | |||
1711 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) | 1718 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) |
1712 | return; | 1719 | return; |
1713 | 1720 | ||
1714 | if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) | 1721 | if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) |
1715 | return; | 1722 | return; |
1716 | 1723 | ||
1724 | userpg->cap_usr_time = 1; | ||
1717 | userpg->time_mult = this_cpu_read(cyc2ns); | 1725 | userpg->time_mult = this_cpu_read(cyc2ns); |
1718 | userpg->time_shift = CYC2NS_SCALE_FACTOR; | 1726 | userpg->time_shift = CYC2NS_SCALE_FACTOR; |
1719 | userpg->time_offset = this_cpu_read(cyc2ns_offset) - now; | 1727 | userpg->time_offset = this_cpu_read(cyc2ns_offset) - now; |
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 57ae485e80fc..ca9ed4e6a286 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h | |||
@@ -299,18 +299,31 @@ struct perf_event_mmap_page { | |||
299 | /* | 299 | /* |
300 | * Bits needed to read the hw events in user-space. | 300 | * Bits needed to read the hw events in user-space. |
301 | * | 301 | * |
302 | * u32 seq; | 302 | * u32 seq, time_mult, time_shift, idx, width; |
303 | * s64 count; | 303 | * u64 count, enabled, running; |
304 | * u64 cyc, time_offset; | ||
305 | * s64 pmc = 0; | ||
304 | * | 306 | * |
305 | * do { | 307 | * do { |
306 | * seq = pc->lock; | 308 | * seq = pc->lock; |
307 | * | ||
308 | * barrier() | 309 | * barrier() |
309 | * if (pc->index) { | 310 | * |
310 | * count = pmc_read(pc->index - 1); | 311 | * enabled = pc->time_enabled; |
311 | * count += pc->offset; | 312 | * running = pc->time_running; |
312 | * } else | 313 | * |
313 | * goto regular_read; | 314 | * if (pc->cap_usr_time && enabled != running) { |
315 | * cyc = rdtsc(); | ||
316 | * time_offset = pc->time_offset; | ||
317 | * time_mult = pc->time_mult; | ||
318 | * time_shift = pc->time_shift; | ||
319 | * } | ||
320 | * | ||
321 | * idx = pc->index; | ||
322 | * count = pc->offset; | ||
323 | * if (pc->cap_usr_rdpmc && idx) { | ||
324 | * width = pc->pmc_width; | ||
325 | * pmc = rdpmc(idx - 1); | ||
326 | * } | ||
314 | * | 327 | * |
315 | * barrier(); | 328 | * barrier(); |
316 | * } while (pc->lock != seq); | 329 | * } while (pc->lock != seq); |
@@ -323,14 +336,57 @@ struct perf_event_mmap_page { | |||
323 | __s64 offset; /* add to hardware event value */ | 336 | __s64 offset; /* add to hardware event value */ |
324 | __u64 time_enabled; /* time event active */ | 337 | __u64 time_enabled; /* time event active */ |
325 | __u64 time_running; /* time event on cpu */ | 338 | __u64 time_running; /* time event on cpu */ |
326 | __u32 time_mult, time_shift; | 339 | union { |
340 | __u64 capabilities; | ||
341 | __u64 cap_usr_time : 1, | ||
342 | cap_usr_rdpmc : 1, | ||
343 | cap_____res : 62; | ||
344 | }; | ||
345 | |||
346 | /* | ||
347 | * If cap_usr_rdpmc this field provides the bit-width of the value | ||
348 | * read using the rdpmc() or equivalent instruction. This can be used | ||
349 | * to sign extend the result like: | ||
350 | * | ||
351 | * pmc <<= 64 - width; | ||
352 | * pmc >>= 64 - width; // signed shift right | ||
353 | * count += pmc; | ||
354 | */ | ||
355 | __u16 pmc_width; | ||
356 | |||
357 | /* | ||
358 | * If cap_usr_time the below fields can be used to compute the time | ||
359 | * delta since time_enabled (in ns) using rdtsc or similar. | ||
360 | * | ||
361 | * u64 quot, rem; | ||
362 | * u64 delta; | ||
363 | * | ||
364 | * quot = (cyc >> time_shift); | ||
365 | * rem = cyc & ((1 << time_shift) - 1); | ||
366 | * delta = time_offset + quot * time_mult + | ||
367 | * ((rem * time_mult) >> time_shift); | ||
368 | * | ||
369 | * Where time_offset,time_mult,time_shift and cyc are read in the | ||
370 | * seqcount loop described above. This delta can then be added to | ||
371 | * enabled and possible running (if idx), improving the scaling: | ||
372 | * | ||
373 | * enabled += delta; | ||
374 | * if (idx) | ||
375 | * running += delta; | ||
376 | * | ||
377 | * quot = count / running; | ||
378 | * rem = count % running; | ||
379 | * count = quot * enabled + (rem * enabled) / running; | ||
380 | */ | ||
381 | __u16 time_shift; | ||
382 | __u32 time_mult; | ||
327 | __u64 time_offset; | 383 | __u64 time_offset; |
328 | 384 | ||
329 | /* | 385 | /* |
330 | * Hole for extension of the self monitor capabilities | 386 | * Hole for extension of the self monitor capabilities |
331 | */ | 387 | */ |
332 | 388 | ||
333 | __u64 __reserved[121]; /* align to 1k */ | 389 | __u64 __reserved[120]; /* align to 1k */ |
334 | 390 | ||
335 | /* | 391 | /* |
336 | * Control data for the mmap() data buffer. | 392 | * Control data for the mmap() data buffer. |
@@ -347,6 +403,13 @@ struct perf_event_mmap_page { | |||
347 | __u64 data_tail; /* user-space written tail */ | 403 | __u64 data_tail; /* user-space written tail */ |
348 | }; | 404 | }; |
349 | 405 | ||
406 | /* | ||
407 | * Build time assertion that we keep the data_head at the intended location. | ||
408 | * IOW, validation we got the __reserved[] size right. | ||
409 | */ | ||
410 | extern char __assert_mmap_data_head_offset | ||
411 | [1 - 2*!!(offsetof(struct perf_event_mmap_page, data_head) != 1024)]; | ||
412 | |||
350 | #define PERF_RECORD_MISC_CPUMODE_MASK (7 << 0) | 413 | #define PERF_RECORD_MISC_CPUMODE_MASK (7 << 0) |
351 | #define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0) | 414 | #define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0) |
352 | #define PERF_RECORD_MISC_KERNEL (1 << 0) | 415 | #define PERF_RECORD_MISC_KERNEL (1 << 0) |
diff --git a/kernel/events/core.c b/kernel/events/core.c index c61234b1a988..dc3b05272511 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -3348,7 +3348,7 @@ static void calc_timer_values(struct perf_event *event, | |||
3348 | *running = ctx_time - event->tstamp_running; | 3348 | *running = ctx_time - event->tstamp_running; |
3349 | } | 3349 | } |
3350 | 3350 | ||
3351 | void __weak perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now) | 3351 | void __weak arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) |
3352 | { | 3352 | { |
3353 | } | 3353 | } |
3354 | 3354 | ||
@@ -3398,7 +3398,7 @@ void perf_event_update_userpage(struct perf_event *event) | |||
3398 | userpg->time_running = running + | 3398 | userpg->time_running = running + |
3399 | atomic64_read(&event->child_total_time_running); | 3399 | atomic64_read(&event->child_total_time_running); |
3400 | 3400 | ||
3401 | perf_update_user_clock(userpg, now); | 3401 | arch_perf_update_userpage(userpg, now); |
3402 | 3402 | ||
3403 | barrier(); | 3403 | barrier(); |
3404 | ++userpg->lock; | 3404 | ++userpg->lock; |