aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2015-02-20 08:05:38 -0500
committerIngo Molnar <mingo@kernel.org>2015-03-27 05:13:22 -0400
commit34f439278cef7b1177f8ce24f9fc81dfc6221d3b (patch)
tree8bd86bf3d73aff36e8bee13c0102c7ae7e44e40c /kernel
parentb381e63b48a0b6befc7b4e55408c39012a0dcf8c (diff)
perf: Add per event clockid support
While thinking on the whole clock discussion it occurred to me we have two distinct uses of time: 1) the tracking of event/ctx/cgroup enabled/running/stopped times which includes the self-monitoring support in struct perf_event_mmap_page. 2) the actual timestamps visible in the data records. And we've been conflating them. The first is all about tracking time deltas, nobody should really care in what time base that happens, its all relative information, as long as its internally consistent it works. The second however is what people are worried about when having to merge their data with external sources. And here we have the discussion on MONOTONIC vs MONOTONIC_RAW etc.. Where MONOTONIC is good for correlating between machines (static offset), MONOTNIC_RAW is required for correlating against a fixed rate hardware clock. This means configurability; now 1) makes that hard because it needs to be internally consistent across groups of unrelated events; which is why we had to have a global perf_clock(). However, for 2) it doesn't really matter, perf itself doesn't care what it writes into the buffer. The below patch makes the distinction between these two cases by adding perf_event_clock() which is used for the second case. It further makes this configurable on a per-event basis, but adds a few sanity checks such that we cannot combine events with different clocks in confusing ways. And since we then have per-event configurability we might as well retain the 'legacy' behaviour as a default. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: John Stultz <john.stultz@linaro.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/events/core.c77
1 files changed, 74 insertions, 3 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index bb1a7c36e794..c40c2cac2d8e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -327,6 +327,11 @@ static inline u64 perf_clock(void)
327 return local_clock(); 327 return local_clock();
328} 328}
329 329
330static inline u64 perf_event_clock(struct perf_event *event)
331{
332 return event->clock();
333}
334
330static inline struct perf_cpu_context * 335static inline struct perf_cpu_context *
331__get_cpu_context(struct perf_event_context *ctx) 336__get_cpu_context(struct perf_event_context *ctx)
332{ 337{
@@ -4762,7 +4767,7 @@ static void __perf_event_header__init_id(struct perf_event_header *header,
4762 } 4767 }
4763 4768
4764 if (sample_type & PERF_SAMPLE_TIME) 4769 if (sample_type & PERF_SAMPLE_TIME)
4765 data->time = perf_clock(); 4770 data->time = perf_event_clock(event);
4766 4771
4767 if (sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER)) 4772 if (sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER))
4768 data->id = primary_event_id(event); 4773 data->id = primary_event_id(event);
@@ -5340,6 +5345,8 @@ static void perf_event_task_output(struct perf_event *event,
5340 task_event->event_id.tid = perf_event_tid(event, task); 5345 task_event->event_id.tid = perf_event_tid(event, task);
5341 task_event->event_id.ptid = perf_event_tid(event, current); 5346 task_event->event_id.ptid = perf_event_tid(event, current);
5342 5347
5348 task_event->event_id.time = perf_event_clock(event);
5349
5343 perf_output_put(&handle, task_event->event_id); 5350 perf_output_put(&handle, task_event->event_id);
5344 5351
5345 perf_event__output_id_sample(event, &handle, &sample); 5352 perf_event__output_id_sample(event, &handle, &sample);
@@ -5373,7 +5380,7 @@ static void perf_event_task(struct task_struct *task,
5373 /* .ppid */ 5380 /* .ppid */
5374 /* .tid */ 5381 /* .tid */
5375 /* .ptid */ 5382 /* .ptid */
5376 .time = perf_clock(), 5383 /* .time */
5377 }, 5384 },
5378 }; 5385 };
5379 5386
@@ -5749,7 +5756,7 @@ static void perf_log_throttle(struct perf_event *event, int enable)
5749 .misc = 0, 5756 .misc = 0,
5750 .size = sizeof(throttle_event), 5757 .size = sizeof(throttle_event),
5751 }, 5758 },
5752 .time = perf_clock(), 5759 .time = perf_event_clock(event),
5753 .id = primary_event_id(event), 5760 .id = primary_event_id(event),
5754 .stream_id = event->id, 5761 .stream_id = event->id,
5755 }; 5762 };
@@ -6293,6 +6300,8 @@ static int perf_swevent_init(struct perf_event *event)
6293static struct pmu perf_swevent = { 6300static struct pmu perf_swevent = {
6294 .task_ctx_nr = perf_sw_context, 6301 .task_ctx_nr = perf_sw_context,
6295 6302
6303 .capabilities = PERF_PMU_CAP_NO_NMI,
6304
6296 .event_init = perf_swevent_init, 6305 .event_init = perf_swevent_init,
6297 .add = perf_swevent_add, 6306 .add = perf_swevent_add,
6298 .del = perf_swevent_del, 6307 .del = perf_swevent_del,
@@ -6636,6 +6645,8 @@ static int cpu_clock_event_init(struct perf_event *event)
6636static struct pmu perf_cpu_clock = { 6645static struct pmu perf_cpu_clock = {
6637 .task_ctx_nr = perf_sw_context, 6646 .task_ctx_nr = perf_sw_context,
6638 6647
6648 .capabilities = PERF_PMU_CAP_NO_NMI,
6649
6639 .event_init = cpu_clock_event_init, 6650 .event_init = cpu_clock_event_init,
6640 .add = cpu_clock_event_add, 6651 .add = cpu_clock_event_add,
6641 .del = cpu_clock_event_del, 6652 .del = cpu_clock_event_del,
@@ -6715,6 +6726,8 @@ static int task_clock_event_init(struct perf_event *event)
6715static struct pmu perf_task_clock = { 6726static struct pmu perf_task_clock = {
6716 .task_ctx_nr = perf_sw_context, 6727 .task_ctx_nr = perf_sw_context,
6717 6728
6729 .capabilities = PERF_PMU_CAP_NO_NMI,
6730
6718 .event_init = task_clock_event_init, 6731 .event_init = task_clock_event_init,
6719 .add = task_clock_event_add, 6732 .add = task_clock_event_add,
6720 .del = task_clock_event_del, 6733 .del = task_clock_event_del,
@@ -7200,6 +7213,10 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
7200 event->hw.target = task; 7213 event->hw.target = task;
7201 } 7214 }
7202 7215
7216 event->clock = &local_clock;
7217 if (parent_event)
7218 event->clock = parent_event->clock;
7219
7203 if (!overflow_handler && parent_event) { 7220 if (!overflow_handler && parent_event) {
7204 overflow_handler = parent_event->overflow_handler; 7221 overflow_handler = parent_event->overflow_handler;
7205 context = parent_event->overflow_handler_context; 7222 context = parent_event->overflow_handler_context;
@@ -7422,6 +7439,12 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
7422 if (output_event->cpu == -1 && output_event->ctx != event->ctx) 7439 if (output_event->cpu == -1 && output_event->ctx != event->ctx)
7423 goto out; 7440 goto out;
7424 7441
7442 /*
7443 * Mixing clocks in the same buffer is trouble you don't need.
7444 */
7445 if (output_event->clock != event->clock)
7446 goto out;
7447
7425set: 7448set:
7426 mutex_lock(&event->mmap_mutex); 7449 mutex_lock(&event->mmap_mutex);
7427 /* Can't redirect output if we've got an active mmap() */ 7450 /* Can't redirect output if we've got an active mmap() */
@@ -7454,6 +7477,43 @@ static void mutex_lock_double(struct mutex *a, struct mutex *b)
7454 mutex_lock_nested(b, SINGLE_DEPTH_NESTING); 7477 mutex_lock_nested(b, SINGLE_DEPTH_NESTING);
7455} 7478}
7456 7479
7480static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id)
7481{
7482 bool nmi_safe = false;
7483
7484 switch (clk_id) {
7485 case CLOCK_MONOTONIC:
7486 event->clock = &ktime_get_mono_fast_ns;
7487 nmi_safe = true;
7488 break;
7489
7490 case CLOCK_MONOTONIC_RAW:
7491 event->clock = &ktime_get_raw_fast_ns;
7492 nmi_safe = true;
7493 break;
7494
7495 case CLOCK_REALTIME:
7496 event->clock = &ktime_get_real_ns;
7497 break;
7498
7499 case CLOCK_BOOTTIME:
7500 event->clock = &ktime_get_boot_ns;
7501 break;
7502
7503 case CLOCK_TAI:
7504 event->clock = &ktime_get_tai_ns;
7505 break;
7506
7507 default:
7508 return -EINVAL;
7509 }
7510
7511 if (!nmi_safe && !(event->pmu->capabilities & PERF_PMU_CAP_NO_NMI))
7512 return -EINVAL;
7513
7514 return 0;
7515}
7516
7457/** 7517/**
7458 * sys_perf_event_open - open a performance event, associate it to a task/cpu 7518 * sys_perf_event_open - open a performance event, associate it to a task/cpu
7459 * 7519 *
@@ -7569,6 +7629,12 @@ SYSCALL_DEFINE5(perf_event_open,
7569 */ 7629 */
7570 pmu = event->pmu; 7630 pmu = event->pmu;
7571 7631
7632 if (attr.use_clockid) {
7633 err = perf_event_set_clock(event, attr.clockid);
7634 if (err)
7635 goto err_alloc;
7636 }
7637
7572 if (group_leader && 7638 if (group_leader &&
7573 (is_software_event(event) != is_software_event(group_leader))) { 7639 (is_software_event(event) != is_software_event(group_leader))) {
7574 if (is_software_event(event)) { 7640 if (is_software_event(event)) {
@@ -7618,6 +7684,11 @@ SYSCALL_DEFINE5(perf_event_open,
7618 */ 7684 */
7619 if (group_leader->group_leader != group_leader) 7685 if (group_leader->group_leader != group_leader)
7620 goto err_context; 7686 goto err_context;
7687
7688 /* All events in a group should have the same clock */
7689 if (group_leader->clock != event->clock)
7690 goto err_context;
7691
7621 /* 7692 /*
7622 * Do not allow to attach to a group in a different 7693 * Do not allow to attach to a group in a different
7623 * task or CPU context: 7694 * task or CPU context: