diff options
Diffstat (limited to 'kernel/perf_event.c')
-rw-r--r-- | kernel/perf_event.c | 200 |
1 files changed, 116 insertions, 84 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index eac7e3364335..aede71245e9f 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -312,9 +312,75 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) | |||
312 | ctx->nr_stat++; | 312 | ctx->nr_stat++; |
313 | } | 313 | } |
314 | 314 | ||
315 | /* | ||
316 | * Called at perf_event creation and when events are attached/detached from a | ||
317 | * group. | ||
318 | */ | ||
319 | static void perf_event__read_size(struct perf_event *event) | ||
320 | { | ||
321 | int entry = sizeof(u64); /* value */ | ||
322 | int size = 0; | ||
323 | int nr = 1; | ||
324 | |||
325 | if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) | ||
326 | size += sizeof(u64); | ||
327 | |||
328 | if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) | ||
329 | size += sizeof(u64); | ||
330 | |||
331 | if (event->attr.read_format & PERF_FORMAT_ID) | ||
332 | entry += sizeof(u64); | ||
333 | |||
334 | if (event->attr.read_format & PERF_FORMAT_GROUP) { | ||
335 | nr += event->group_leader->nr_siblings; | ||
336 | size += sizeof(u64); | ||
337 | } | ||
338 | |||
339 | size += entry * nr; | ||
340 | event->read_size = size; | ||
341 | } | ||
342 | |||
343 | static void perf_event__header_size(struct perf_event *event) | ||
344 | { | ||
345 | struct perf_sample_data *data; | ||
346 | u64 sample_type = event->attr.sample_type; | ||
347 | u16 size = 0; | ||
348 | |||
349 | perf_event__read_size(event); | ||
350 | |||
351 | if (sample_type & PERF_SAMPLE_IP) | ||
352 | size += sizeof(data->ip); | ||
353 | |||
354 | if (sample_type & PERF_SAMPLE_TID) | ||
355 | size += sizeof(data->tid_entry); | ||
356 | |||
357 | if (sample_type & PERF_SAMPLE_TIME) | ||
358 | size += sizeof(data->time); | ||
359 | |||
360 | if (sample_type & PERF_SAMPLE_ADDR) | ||
361 | size += sizeof(data->addr); | ||
362 | |||
363 | if (sample_type & PERF_SAMPLE_ID) | ||
364 | size += sizeof(data->id); | ||
365 | |||
366 | if (sample_type & PERF_SAMPLE_STREAM_ID) | ||
367 | size += sizeof(data->stream_id); | ||
368 | |||
369 | if (sample_type & PERF_SAMPLE_CPU) | ||
370 | size += sizeof(data->cpu_entry); | ||
371 | |||
372 | if (sample_type & PERF_SAMPLE_PERIOD) | ||
373 | size += sizeof(data->period); | ||
374 | |||
375 | if (sample_type & PERF_SAMPLE_READ) | ||
376 | size += event->read_size; | ||
377 | |||
378 | event->header_size = size; | ||
379 | } | ||
380 | |||
315 | static void perf_group_attach(struct perf_event *event) | 381 | static void perf_group_attach(struct perf_event *event) |
316 | { | 382 | { |
317 | struct perf_event *group_leader = event->group_leader; | 383 | struct perf_event *group_leader = event->group_leader, *pos; |
318 | 384 | ||
319 | /* | 385 | /* |
320 | * We can have double attach due to group movement in perf_event_open. | 386 | * We can have double attach due to group movement in perf_event_open. |
@@ -333,6 +399,11 @@ static void perf_group_attach(struct perf_event *event) | |||
333 | 399 | ||
334 | list_add_tail(&event->group_entry, &group_leader->sibling_list); | 400 | list_add_tail(&event->group_entry, &group_leader->sibling_list); |
335 | group_leader->nr_siblings++; | 401 | group_leader->nr_siblings++; |
402 | |||
403 | perf_event__header_size(group_leader); | ||
404 | |||
405 | list_for_each_entry(pos, &group_leader->sibling_list, group_entry) | ||
406 | perf_event__header_size(pos); | ||
336 | } | 407 | } |
337 | 408 | ||
338 | /* | 409 | /* |
@@ -391,7 +462,7 @@ static void perf_group_detach(struct perf_event *event) | |||
391 | if (event->group_leader != event) { | 462 | if (event->group_leader != event) { |
392 | list_del_init(&event->group_entry); | 463 | list_del_init(&event->group_entry); |
393 | event->group_leader->nr_siblings--; | 464 | event->group_leader->nr_siblings--; |
394 | return; | 465 | goto out; |
395 | } | 466 | } |
396 | 467 | ||
397 | if (!list_empty(&event->group_entry)) | 468 | if (!list_empty(&event->group_entry)) |
@@ -410,6 +481,12 @@ static void perf_group_detach(struct perf_event *event) | |||
410 | /* Inherit group flags from the previous leader */ | 481 | /* Inherit group flags from the previous leader */ |
411 | sibling->group_flags = event->group_flags; | 482 | sibling->group_flags = event->group_flags; |
412 | } | 483 | } |
484 | |||
485 | out: | ||
486 | perf_event__header_size(event->group_leader); | ||
487 | |||
488 | list_for_each_entry(tmp, &event->group_leader->sibling_list, group_entry) | ||
489 | perf_event__header_size(tmp); | ||
413 | } | 490 | } |
414 | 491 | ||
415 | static inline int | 492 | static inline int |
@@ -1073,7 +1150,7 @@ static int perf_event_refresh(struct perf_event *event, int refresh) | |||
1073 | /* | 1150 | /* |
1074 | * not supported on inherited events | 1151 | * not supported on inherited events |
1075 | */ | 1152 | */ |
1076 | if (event->attr.inherit) | 1153 | if (event->attr.inherit || !is_sampling_event(event)) |
1077 | return -EINVAL; | 1154 | return -EINVAL; |
1078 | 1155 | ||
1079 | atomic_add(refresh, &event->event_limit); | 1156 | atomic_add(refresh, &event->event_limit); |
@@ -2289,31 +2366,6 @@ static int perf_release(struct inode *inode, struct file *file) | |||
2289 | return perf_event_release_kernel(event); | 2366 | return perf_event_release_kernel(event); |
2290 | } | 2367 | } |
2291 | 2368 | ||
2292 | static int perf_event_read_size(struct perf_event *event) | ||
2293 | { | ||
2294 | int entry = sizeof(u64); /* value */ | ||
2295 | int size = 0; | ||
2296 | int nr = 1; | ||
2297 | |||
2298 | if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) | ||
2299 | size += sizeof(u64); | ||
2300 | |||
2301 | if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) | ||
2302 | size += sizeof(u64); | ||
2303 | |||
2304 | if (event->attr.read_format & PERF_FORMAT_ID) | ||
2305 | entry += sizeof(u64); | ||
2306 | |||
2307 | if (event->attr.read_format & PERF_FORMAT_GROUP) { | ||
2308 | nr += event->group_leader->nr_siblings; | ||
2309 | size += sizeof(u64); | ||
2310 | } | ||
2311 | |||
2312 | size += entry * nr; | ||
2313 | |||
2314 | return size; | ||
2315 | } | ||
2316 | |||
2317 | u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) | 2369 | u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) |
2318 | { | 2370 | { |
2319 | struct perf_event *child; | 2371 | struct perf_event *child; |
@@ -2428,7 +2480,7 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count) | |||
2428 | if (event->state == PERF_EVENT_STATE_ERROR) | 2480 | if (event->state == PERF_EVENT_STATE_ERROR) |
2429 | return 0; | 2481 | return 0; |
2430 | 2482 | ||
2431 | if (count < perf_event_read_size(event)) | 2483 | if (count < event->read_size) |
2432 | return -ENOSPC; | 2484 | return -ENOSPC; |
2433 | 2485 | ||
2434 | WARN_ON_ONCE(event->ctx->parent_ctx); | 2486 | WARN_ON_ONCE(event->ctx->parent_ctx); |
@@ -2514,7 +2566,7 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg) | |||
2514 | int ret = 0; | 2566 | int ret = 0; |
2515 | u64 value; | 2567 | u64 value; |
2516 | 2568 | ||
2517 | if (!event->attr.sample_period) | 2569 | if (!is_sampling_event(event)) |
2518 | return -EINVAL; | 2570 | return -EINVAL; |
2519 | 2571 | ||
2520 | if (copy_from_user(&value, arg, sizeof(value))) | 2572 | if (copy_from_user(&value, arg, sizeof(value))) |
@@ -3606,59 +3658,34 @@ void perf_prepare_sample(struct perf_event_header *header, | |||
3606 | data->type = sample_type; | 3658 | data->type = sample_type; |
3607 | 3659 | ||
3608 | header->type = PERF_RECORD_SAMPLE; | 3660 | header->type = PERF_RECORD_SAMPLE; |
3609 | header->size = sizeof(*header); | 3661 | header->size = sizeof(*header) + event->header_size; |
3610 | 3662 | ||
3611 | header->misc = 0; | 3663 | header->misc = 0; |
3612 | header->misc |= perf_misc_flags(regs); | 3664 | header->misc |= perf_misc_flags(regs); |
3613 | 3665 | ||
3614 | if (sample_type & PERF_SAMPLE_IP) { | 3666 | if (sample_type & PERF_SAMPLE_IP) |
3615 | data->ip = perf_instruction_pointer(regs); | 3667 | data->ip = perf_instruction_pointer(regs); |
3616 | 3668 | ||
3617 | header->size += sizeof(data->ip); | ||
3618 | } | ||
3619 | |||
3620 | if (sample_type & PERF_SAMPLE_TID) { | 3669 | if (sample_type & PERF_SAMPLE_TID) { |
3621 | /* namespace issues */ | 3670 | /* namespace issues */ |
3622 | data->tid_entry.pid = perf_event_pid(event, current); | 3671 | data->tid_entry.pid = perf_event_pid(event, current); |
3623 | data->tid_entry.tid = perf_event_tid(event, current); | 3672 | data->tid_entry.tid = perf_event_tid(event, current); |
3624 | |||
3625 | header->size += sizeof(data->tid_entry); | ||
3626 | } | 3673 | } |
3627 | 3674 | ||
3628 | if (sample_type & PERF_SAMPLE_TIME) { | 3675 | if (sample_type & PERF_SAMPLE_TIME) |
3629 | data->time = perf_clock(); | 3676 | data->time = perf_clock(); |
3630 | 3677 | ||
3631 | header->size += sizeof(data->time); | 3678 | if (sample_type & PERF_SAMPLE_ID) |
3632 | } | ||
3633 | |||
3634 | if (sample_type & PERF_SAMPLE_ADDR) | ||
3635 | header->size += sizeof(data->addr); | ||
3636 | |||
3637 | if (sample_type & PERF_SAMPLE_ID) { | ||
3638 | data->id = primary_event_id(event); | 3679 | data->id = primary_event_id(event); |
3639 | 3680 | ||
3640 | header->size += sizeof(data->id); | 3681 | if (sample_type & PERF_SAMPLE_STREAM_ID) |
3641 | } | ||
3642 | |||
3643 | if (sample_type & PERF_SAMPLE_STREAM_ID) { | ||
3644 | data->stream_id = event->id; | 3682 | data->stream_id = event->id; |
3645 | 3683 | ||
3646 | header->size += sizeof(data->stream_id); | ||
3647 | } | ||
3648 | |||
3649 | if (sample_type & PERF_SAMPLE_CPU) { | 3684 | if (sample_type & PERF_SAMPLE_CPU) { |
3650 | data->cpu_entry.cpu = raw_smp_processor_id(); | 3685 | data->cpu_entry.cpu = raw_smp_processor_id(); |
3651 | data->cpu_entry.reserved = 0; | 3686 | data->cpu_entry.reserved = 0; |
3652 | |||
3653 | header->size += sizeof(data->cpu_entry); | ||
3654 | } | 3687 | } |
3655 | 3688 | ||
3656 | if (sample_type & PERF_SAMPLE_PERIOD) | ||
3657 | header->size += sizeof(data->period); | ||
3658 | |||
3659 | if (sample_type & PERF_SAMPLE_READ) | ||
3660 | header->size += perf_event_read_size(event); | ||
3661 | |||
3662 | if (sample_type & PERF_SAMPLE_CALLCHAIN) { | 3689 | if (sample_type & PERF_SAMPLE_CALLCHAIN) { |
3663 | int size = 1; | 3690 | int size = 1; |
3664 | 3691 | ||
@@ -3726,7 +3753,7 @@ perf_event_read_event(struct perf_event *event, | |||
3726 | .header = { | 3753 | .header = { |
3727 | .type = PERF_RECORD_READ, | 3754 | .type = PERF_RECORD_READ, |
3728 | .misc = 0, | 3755 | .misc = 0, |
3729 | .size = sizeof(read_event) + perf_event_read_size(event), | 3756 | .size = sizeof(read_event) + event->read_size, |
3730 | }, | 3757 | }, |
3731 | .pid = perf_event_pid(event, task), | 3758 | .pid = perf_event_pid(event, task), |
3732 | .tid = perf_event_tid(event, task), | 3759 | .tid = perf_event_tid(event, task), |
@@ -4240,6 +4267,13 @@ static int __perf_event_overflow(struct perf_event *event, int nmi, | |||
4240 | struct hw_perf_event *hwc = &event->hw; | 4267 | struct hw_perf_event *hwc = &event->hw; |
4241 | int ret = 0; | 4268 | int ret = 0; |
4242 | 4269 | ||
4270 | /* | ||
4271 | * Non-sampling counters might still use the PMI to fold short | ||
4272 | * hardware counters, ignore those. | ||
4273 | */ | ||
4274 | if (unlikely(!is_sampling_event(event))) | ||
4275 | return 0; | ||
4276 | |||
4243 | if (!throttle) { | 4277 | if (!throttle) { |
4244 | hwc->interrupts++; | 4278 | hwc->interrupts++; |
4245 | } else { | 4279 | } else { |
@@ -4385,7 +4419,7 @@ static void perf_swevent_event(struct perf_event *event, u64 nr, | |||
4385 | if (!regs) | 4419 | if (!regs) |
4386 | return; | 4420 | return; |
4387 | 4421 | ||
4388 | if (!hwc->sample_period) | 4422 | if (!is_sampling_event(event)) |
4389 | return; | 4423 | return; |
4390 | 4424 | ||
4391 | if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq) | 4425 | if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq) |
@@ -4548,7 +4582,7 @@ static int perf_swevent_add(struct perf_event *event, int flags) | |||
4548 | struct hw_perf_event *hwc = &event->hw; | 4582 | struct hw_perf_event *hwc = &event->hw; |
4549 | struct hlist_head *head; | 4583 | struct hlist_head *head; |
4550 | 4584 | ||
4551 | if (hwc->sample_period) { | 4585 | if (is_sampling_event(event)) { |
4552 | hwc->last_period = hwc->sample_period; | 4586 | hwc->last_period = hwc->sample_period; |
4553 | perf_swevent_set_period(event); | 4587 | perf_swevent_set_period(event); |
4554 | } | 4588 | } |
@@ -4805,15 +4839,6 @@ static int perf_tp_event_init(struct perf_event *event) | |||
4805 | if (event->attr.type != PERF_TYPE_TRACEPOINT) | 4839 | if (event->attr.type != PERF_TYPE_TRACEPOINT) |
4806 | return -ENOENT; | 4840 | return -ENOENT; |
4807 | 4841 | ||
4808 | /* | ||
4809 | * Raw tracepoint data is a severe data leak, only allow root to | ||
4810 | * have these. | ||
4811 | */ | ||
4812 | if ((event->attr.sample_type & PERF_SAMPLE_RAW) && | ||
4813 | perf_paranoid_tracepoint_raw() && | ||
4814 | !capable(CAP_SYS_ADMIN)) | ||
4815 | return -EPERM; | ||
4816 | |||
4817 | err = perf_trace_init(event); | 4842 | err = perf_trace_init(event); |
4818 | if (err) | 4843 | if (err) |
4819 | return err; | 4844 | return err; |
@@ -4926,31 +4951,33 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) | |||
4926 | static void perf_swevent_start_hrtimer(struct perf_event *event) | 4951 | static void perf_swevent_start_hrtimer(struct perf_event *event) |
4927 | { | 4952 | { |
4928 | struct hw_perf_event *hwc = &event->hw; | 4953 | struct hw_perf_event *hwc = &event->hw; |
4954 | s64 period; | ||
4955 | |||
4956 | if (!is_sampling_event(event)) | ||
4957 | return; | ||
4929 | 4958 | ||
4930 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 4959 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
4931 | hwc->hrtimer.function = perf_swevent_hrtimer; | 4960 | hwc->hrtimer.function = perf_swevent_hrtimer; |
4932 | if (hwc->sample_period) { | ||
4933 | s64 period = local64_read(&hwc->period_left); | ||
4934 | 4961 | ||
4935 | if (period) { | 4962 | period = local64_read(&hwc->period_left); |
4936 | if (period < 0) | 4963 | if (period) { |
4937 | period = 10000; | 4964 | if (period < 0) |
4965 | period = 10000; | ||
4938 | 4966 | ||
4939 | local64_set(&hwc->period_left, 0); | 4967 | local64_set(&hwc->period_left, 0); |
4940 | } else { | 4968 | } else { |
4941 | period = max_t(u64, 10000, hwc->sample_period); | 4969 | period = max_t(u64, 10000, hwc->sample_period); |
4942 | } | 4970 | } |
4943 | __hrtimer_start_range_ns(&hwc->hrtimer, | 4971 | __hrtimer_start_range_ns(&hwc->hrtimer, |
4944 | ns_to_ktime(period), 0, | 4972 | ns_to_ktime(period), 0, |
4945 | HRTIMER_MODE_REL_PINNED, 0); | 4973 | HRTIMER_MODE_REL_PINNED, 0); |
4946 | } | ||
4947 | } | 4974 | } |
4948 | 4975 | ||
4949 | static void perf_swevent_cancel_hrtimer(struct perf_event *event) | 4976 | static void perf_swevent_cancel_hrtimer(struct perf_event *event) |
4950 | { | 4977 | { |
4951 | struct hw_perf_event *hwc = &event->hw; | 4978 | struct hw_perf_event *hwc = &event->hw; |
4952 | 4979 | ||
4953 | if (hwc->sample_period) { | 4980 | if (is_sampling_event(event)) { |
4954 | ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); | 4981 | ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); |
4955 | local64_set(&hwc->period_left, ktime_to_ns(remaining)); | 4982 | local64_set(&hwc->period_left, ktime_to_ns(remaining)); |
4956 | 4983 | ||
@@ -5715,6 +5742,11 @@ SYSCALL_DEFINE5(perf_event_open, | |||
5715 | mutex_unlock(¤t->perf_event_mutex); | 5742 | mutex_unlock(¤t->perf_event_mutex); |
5716 | 5743 | ||
5717 | /* | 5744 | /* |
5745 | * Precalculate sample_data sizes | ||
5746 | */ | ||
5747 | perf_event__header_size(event); | ||
5748 | |||
5749 | /* | ||
5718 | * Drop the reference on the group_event after placing the | 5750 | * Drop the reference on the group_event after placing the |
5719 | * new event on the sibling_list. This ensures destruction | 5751 | * new event on the sibling_list. This ensures destruction |
5720 | * of the group leader will find the pointer to itself in | 5752 | * of the group leader will find the pointer to itself in |