aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/perf_event.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/perf_event.c')
-rw-r--r--kernel/perf_event.c200
1 files changed, 116 insertions, 84 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index eac7e3364335..aede71245e9f 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -312,9 +312,75 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
312 ctx->nr_stat++; 312 ctx->nr_stat++;
313} 313}
314 314
315/*
316 * Called at perf_event creation and when events are attached/detached from a
317 * group.
318 */
319static void perf_event__read_size(struct perf_event *event)
320{
321 int entry = sizeof(u64); /* value */
322 int size = 0;
323 int nr = 1;
324
325 if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
326 size += sizeof(u64);
327
328 if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
329 size += sizeof(u64);
330
331 if (event->attr.read_format & PERF_FORMAT_ID)
332 entry += sizeof(u64);
333
334 if (event->attr.read_format & PERF_FORMAT_GROUP) {
335 nr += event->group_leader->nr_siblings;
336 size += sizeof(u64);
337 }
338
339 size += entry * nr;
340 event->read_size = size;
341}
342
343static void perf_event__header_size(struct perf_event *event)
344{
345 struct perf_sample_data *data;
346 u64 sample_type = event->attr.sample_type;
347 u16 size = 0;
348
349 perf_event__read_size(event);
350
351 if (sample_type & PERF_SAMPLE_IP)
352 size += sizeof(data->ip);
353
354 if (sample_type & PERF_SAMPLE_TID)
355 size += sizeof(data->tid_entry);
356
357 if (sample_type & PERF_SAMPLE_TIME)
358 size += sizeof(data->time);
359
360 if (sample_type & PERF_SAMPLE_ADDR)
361 size += sizeof(data->addr);
362
363 if (sample_type & PERF_SAMPLE_ID)
364 size += sizeof(data->id);
365
366 if (sample_type & PERF_SAMPLE_STREAM_ID)
367 size += sizeof(data->stream_id);
368
369 if (sample_type & PERF_SAMPLE_CPU)
370 size += sizeof(data->cpu_entry);
371
372 if (sample_type & PERF_SAMPLE_PERIOD)
373 size += sizeof(data->period);
374
375 if (sample_type & PERF_SAMPLE_READ)
376 size += event->read_size;
377
378 event->header_size = size;
379}
380
315static void perf_group_attach(struct perf_event *event) 381static void perf_group_attach(struct perf_event *event)
316{ 382{
317 struct perf_event *group_leader = event->group_leader; 383 struct perf_event *group_leader = event->group_leader, *pos;
318 384
319 /* 385 /*
320 * We can have double attach due to group movement in perf_event_open. 386 * We can have double attach due to group movement in perf_event_open.
@@ -333,6 +399,11 @@ static void perf_group_attach(struct perf_event *event)
333 399
334 list_add_tail(&event->group_entry, &group_leader->sibling_list); 400 list_add_tail(&event->group_entry, &group_leader->sibling_list);
335 group_leader->nr_siblings++; 401 group_leader->nr_siblings++;
402
403 perf_event__header_size(group_leader);
404
405 list_for_each_entry(pos, &group_leader->sibling_list, group_entry)
406 perf_event__header_size(pos);
336} 407}
337 408
338/* 409/*
@@ -391,7 +462,7 @@ static void perf_group_detach(struct perf_event *event)
391 if (event->group_leader != event) { 462 if (event->group_leader != event) {
392 list_del_init(&event->group_entry); 463 list_del_init(&event->group_entry);
393 event->group_leader->nr_siblings--; 464 event->group_leader->nr_siblings--;
394 return; 465 goto out;
395 } 466 }
396 467
397 if (!list_empty(&event->group_entry)) 468 if (!list_empty(&event->group_entry))
@@ -410,6 +481,12 @@ static void perf_group_detach(struct perf_event *event)
410 /* Inherit group flags from the previous leader */ 481 /* Inherit group flags from the previous leader */
411 sibling->group_flags = event->group_flags; 482 sibling->group_flags = event->group_flags;
412 } 483 }
484
485out:
486 perf_event__header_size(event->group_leader);
487
488 list_for_each_entry(tmp, &event->group_leader->sibling_list, group_entry)
489 perf_event__header_size(tmp);
413} 490}
414 491
415static inline int 492static inline int
@@ -1073,7 +1150,7 @@ static int perf_event_refresh(struct perf_event *event, int refresh)
1073 /* 1150 /*
1074 * not supported on inherited events 1151 * not supported on inherited events
1075 */ 1152 */
1076 if (event->attr.inherit) 1153 if (event->attr.inherit || !is_sampling_event(event))
1077 return -EINVAL; 1154 return -EINVAL;
1078 1155
1079 atomic_add(refresh, &event->event_limit); 1156 atomic_add(refresh, &event->event_limit);
@@ -2289,31 +2366,6 @@ static int perf_release(struct inode *inode, struct file *file)
2289 return perf_event_release_kernel(event); 2366 return perf_event_release_kernel(event);
2290} 2367}
2291 2368
2292static int perf_event_read_size(struct perf_event *event)
2293{
2294 int entry = sizeof(u64); /* value */
2295 int size = 0;
2296 int nr = 1;
2297
2298 if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
2299 size += sizeof(u64);
2300
2301 if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
2302 size += sizeof(u64);
2303
2304 if (event->attr.read_format & PERF_FORMAT_ID)
2305 entry += sizeof(u64);
2306
2307 if (event->attr.read_format & PERF_FORMAT_GROUP) {
2308 nr += event->group_leader->nr_siblings;
2309 size += sizeof(u64);
2310 }
2311
2312 size += entry * nr;
2313
2314 return size;
2315}
2316
2317u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) 2369u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
2318{ 2370{
2319 struct perf_event *child; 2371 struct perf_event *child;
@@ -2428,7 +2480,7 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count)
2428 if (event->state == PERF_EVENT_STATE_ERROR) 2480 if (event->state == PERF_EVENT_STATE_ERROR)
2429 return 0; 2481 return 0;
2430 2482
2431 if (count < perf_event_read_size(event)) 2483 if (count < event->read_size)
2432 return -ENOSPC; 2484 return -ENOSPC;
2433 2485
2434 WARN_ON_ONCE(event->ctx->parent_ctx); 2486 WARN_ON_ONCE(event->ctx->parent_ctx);
@@ -2514,7 +2566,7 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
2514 int ret = 0; 2566 int ret = 0;
2515 u64 value; 2567 u64 value;
2516 2568
2517 if (!event->attr.sample_period) 2569 if (!is_sampling_event(event))
2518 return -EINVAL; 2570 return -EINVAL;
2519 2571
2520 if (copy_from_user(&value, arg, sizeof(value))) 2572 if (copy_from_user(&value, arg, sizeof(value)))
@@ -3606,59 +3658,34 @@ void perf_prepare_sample(struct perf_event_header *header,
3606 data->type = sample_type; 3658 data->type = sample_type;
3607 3659
3608 header->type = PERF_RECORD_SAMPLE; 3660 header->type = PERF_RECORD_SAMPLE;
3609 header->size = sizeof(*header); 3661 header->size = sizeof(*header) + event->header_size;
3610 3662
3611 header->misc = 0; 3663 header->misc = 0;
3612 header->misc |= perf_misc_flags(regs); 3664 header->misc |= perf_misc_flags(regs);
3613 3665
3614 if (sample_type & PERF_SAMPLE_IP) { 3666 if (sample_type & PERF_SAMPLE_IP)
3615 data->ip = perf_instruction_pointer(regs); 3667 data->ip = perf_instruction_pointer(regs);
3616 3668
3617 header->size += sizeof(data->ip);
3618 }
3619
3620 if (sample_type & PERF_SAMPLE_TID) { 3669 if (sample_type & PERF_SAMPLE_TID) {
3621 /* namespace issues */ 3670 /* namespace issues */
3622 data->tid_entry.pid = perf_event_pid(event, current); 3671 data->tid_entry.pid = perf_event_pid(event, current);
3623 data->tid_entry.tid = perf_event_tid(event, current); 3672 data->tid_entry.tid = perf_event_tid(event, current);
3624
3625 header->size += sizeof(data->tid_entry);
3626 } 3673 }
3627 3674
3628 if (sample_type & PERF_SAMPLE_TIME) { 3675 if (sample_type & PERF_SAMPLE_TIME)
3629 data->time = perf_clock(); 3676 data->time = perf_clock();
3630 3677
3631 header->size += sizeof(data->time); 3678 if (sample_type & PERF_SAMPLE_ID)
3632 }
3633
3634 if (sample_type & PERF_SAMPLE_ADDR)
3635 header->size += sizeof(data->addr);
3636
3637 if (sample_type & PERF_SAMPLE_ID) {
3638 data->id = primary_event_id(event); 3679 data->id = primary_event_id(event);
3639 3680
3640 header->size += sizeof(data->id); 3681 if (sample_type & PERF_SAMPLE_STREAM_ID)
3641 }
3642
3643 if (sample_type & PERF_SAMPLE_STREAM_ID) {
3644 data->stream_id = event->id; 3682 data->stream_id = event->id;
3645 3683
3646 header->size += sizeof(data->stream_id);
3647 }
3648
3649 if (sample_type & PERF_SAMPLE_CPU) { 3684 if (sample_type & PERF_SAMPLE_CPU) {
3650 data->cpu_entry.cpu = raw_smp_processor_id(); 3685 data->cpu_entry.cpu = raw_smp_processor_id();
3651 data->cpu_entry.reserved = 0; 3686 data->cpu_entry.reserved = 0;
3652
3653 header->size += sizeof(data->cpu_entry);
3654 } 3687 }
3655 3688
3656 if (sample_type & PERF_SAMPLE_PERIOD)
3657 header->size += sizeof(data->period);
3658
3659 if (sample_type & PERF_SAMPLE_READ)
3660 header->size += perf_event_read_size(event);
3661
3662 if (sample_type & PERF_SAMPLE_CALLCHAIN) { 3689 if (sample_type & PERF_SAMPLE_CALLCHAIN) {
3663 int size = 1; 3690 int size = 1;
3664 3691
@@ -3726,7 +3753,7 @@ perf_event_read_event(struct perf_event *event,
3726 .header = { 3753 .header = {
3727 .type = PERF_RECORD_READ, 3754 .type = PERF_RECORD_READ,
3728 .misc = 0, 3755 .misc = 0,
3729 .size = sizeof(read_event) + perf_event_read_size(event), 3756 .size = sizeof(read_event) + event->read_size,
3730 }, 3757 },
3731 .pid = perf_event_pid(event, task), 3758 .pid = perf_event_pid(event, task),
3732 .tid = perf_event_tid(event, task), 3759 .tid = perf_event_tid(event, task),
@@ -4240,6 +4267,13 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
4240 struct hw_perf_event *hwc = &event->hw; 4267 struct hw_perf_event *hwc = &event->hw;
4241 int ret = 0; 4268 int ret = 0;
4242 4269
4270 /*
4271 * Non-sampling counters might still use the PMI to fold short
4272 * hardware counters, ignore those.
4273 */
4274 if (unlikely(!is_sampling_event(event)))
4275 return 0;
4276
4243 if (!throttle) { 4277 if (!throttle) {
4244 hwc->interrupts++; 4278 hwc->interrupts++;
4245 } else { 4279 } else {
@@ -4385,7 +4419,7 @@ static void perf_swevent_event(struct perf_event *event, u64 nr,
4385 if (!regs) 4419 if (!regs)
4386 return; 4420 return;
4387 4421
4388 if (!hwc->sample_period) 4422 if (!is_sampling_event(event))
4389 return; 4423 return;
4390 4424
4391 if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq) 4425 if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq)
@@ -4548,7 +4582,7 @@ static int perf_swevent_add(struct perf_event *event, int flags)
4548 struct hw_perf_event *hwc = &event->hw; 4582 struct hw_perf_event *hwc = &event->hw;
4549 struct hlist_head *head; 4583 struct hlist_head *head;
4550 4584
4551 if (hwc->sample_period) { 4585 if (is_sampling_event(event)) {
4552 hwc->last_period = hwc->sample_period; 4586 hwc->last_period = hwc->sample_period;
4553 perf_swevent_set_period(event); 4587 perf_swevent_set_period(event);
4554 } 4588 }
@@ -4805,15 +4839,6 @@ static int perf_tp_event_init(struct perf_event *event)
4805 if (event->attr.type != PERF_TYPE_TRACEPOINT) 4839 if (event->attr.type != PERF_TYPE_TRACEPOINT)
4806 return -ENOENT; 4840 return -ENOENT;
4807 4841
4808 /*
4809 * Raw tracepoint data is a severe data leak, only allow root to
4810 * have these.
4811 */
4812 if ((event->attr.sample_type & PERF_SAMPLE_RAW) &&
4813 perf_paranoid_tracepoint_raw() &&
4814 !capable(CAP_SYS_ADMIN))
4815 return -EPERM;
4816
4817 err = perf_trace_init(event); 4842 err = perf_trace_init(event);
4818 if (err) 4843 if (err)
4819 return err; 4844 return err;
@@ -4926,31 +4951,33 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
4926static void perf_swevent_start_hrtimer(struct perf_event *event) 4951static void perf_swevent_start_hrtimer(struct perf_event *event)
4927{ 4952{
4928 struct hw_perf_event *hwc = &event->hw; 4953 struct hw_perf_event *hwc = &event->hw;
4954 s64 period;
4955
4956 if (!is_sampling_event(event))
4957 return;
4929 4958
4930 hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 4959 hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
4931 hwc->hrtimer.function = perf_swevent_hrtimer; 4960 hwc->hrtimer.function = perf_swevent_hrtimer;
4932 if (hwc->sample_period) {
4933 s64 period = local64_read(&hwc->period_left);
4934 4961
4935 if (period) { 4962 period = local64_read(&hwc->period_left);
4936 if (period < 0) 4963 if (period) {
4937 period = 10000; 4964 if (period < 0)
4965 period = 10000;
4938 4966
4939 local64_set(&hwc->period_left, 0); 4967 local64_set(&hwc->period_left, 0);
4940 } else { 4968 } else {
4941 period = max_t(u64, 10000, hwc->sample_period); 4969 period = max_t(u64, 10000, hwc->sample_period);
4942 } 4970 }
4943 __hrtimer_start_range_ns(&hwc->hrtimer, 4971 __hrtimer_start_range_ns(&hwc->hrtimer,
4944 ns_to_ktime(period), 0, 4972 ns_to_ktime(period), 0,
4945 HRTIMER_MODE_REL_PINNED, 0); 4973 HRTIMER_MODE_REL_PINNED, 0);
4946 }
4947} 4974}
4948 4975
4949static void perf_swevent_cancel_hrtimer(struct perf_event *event) 4976static void perf_swevent_cancel_hrtimer(struct perf_event *event)
4950{ 4977{
4951 struct hw_perf_event *hwc = &event->hw; 4978 struct hw_perf_event *hwc = &event->hw;
4952 4979
4953 if (hwc->sample_period) { 4980 if (is_sampling_event(event)) {
4954 ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); 4981 ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer);
4955 local64_set(&hwc->period_left, ktime_to_ns(remaining)); 4982 local64_set(&hwc->period_left, ktime_to_ns(remaining));
4956 4983
@@ -5715,6 +5742,11 @@ SYSCALL_DEFINE5(perf_event_open,
5715 mutex_unlock(&current->perf_event_mutex); 5742 mutex_unlock(&current->perf_event_mutex);
5716 5743
5717 /* 5744 /*
5745 * Precalculate sample_data sizes
5746 */
5747 perf_event__header_size(event);
5748
5749 /*
5718 * Drop the reference on the group_event after placing the 5750 * Drop the reference on the group_event after placing the
5719 * new event on the sibling_list. This ensures destruction 5751 * new event on the sibling_list. This ensures destruction
5720 * of the group leader will find the pointer to itself in 5752 * of the group leader will find the pointer to itself in