diff options
Diffstat (limited to 'kernel/perf_event.c')
-rw-r--r-- | kernel/perf_event.c | 699 |
1 files changed, 533 insertions, 166 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index cb6c0d2af68..11847bf1e8c 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/mm.h> | 13 | #include <linux/mm.h> |
14 | #include <linux/cpu.h> | 14 | #include <linux/cpu.h> |
15 | #include <linux/smp.h> | 15 | #include <linux/smp.h> |
16 | #include <linux/idr.h> | ||
16 | #include <linux/file.h> | 17 | #include <linux/file.h> |
17 | #include <linux/poll.h> | 18 | #include <linux/poll.h> |
18 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
@@ -21,7 +22,9 @@ | |||
21 | #include <linux/dcache.h> | 22 | #include <linux/dcache.h> |
22 | #include <linux/percpu.h> | 23 | #include <linux/percpu.h> |
23 | #include <linux/ptrace.h> | 24 | #include <linux/ptrace.h> |
25 | #include <linux/reboot.h> | ||
24 | #include <linux/vmstat.h> | 26 | #include <linux/vmstat.h> |
27 | #include <linux/device.h> | ||
25 | #include <linux/vmalloc.h> | 28 | #include <linux/vmalloc.h> |
26 | #include <linux/hardirq.h> | 29 | #include <linux/hardirq.h> |
27 | #include <linux/rculist.h> | 30 | #include <linux/rculist.h> |
@@ -31,6 +34,7 @@ | |||
31 | #include <linux/kernel_stat.h> | 34 | #include <linux/kernel_stat.h> |
32 | #include <linux/perf_event.h> | 35 | #include <linux/perf_event.h> |
33 | #include <linux/ftrace_event.h> | 36 | #include <linux/ftrace_event.h> |
37 | #include <linux/hw_breakpoint.h> | ||
34 | 38 | ||
35 | #include <asm/irq_regs.h> | 39 | #include <asm/irq_regs.h> |
36 | 40 | ||
@@ -132,6 +136,28 @@ static void unclone_ctx(struct perf_event_context *ctx) | |||
132 | } | 136 | } |
133 | } | 137 | } |
134 | 138 | ||
139 | static u32 perf_event_pid(struct perf_event *event, struct task_struct *p) | ||
140 | { | ||
141 | /* | ||
142 | * only top level events have the pid namespace they were created in | ||
143 | */ | ||
144 | if (event->parent) | ||
145 | event = event->parent; | ||
146 | |||
147 | return task_tgid_nr_ns(p, event->ns); | ||
148 | } | ||
149 | |||
150 | static u32 perf_event_tid(struct perf_event *event, struct task_struct *p) | ||
151 | { | ||
152 | /* | ||
153 | * only top level events have the pid namespace they were created in | ||
154 | */ | ||
155 | if (event->parent) | ||
156 | event = event->parent; | ||
157 | |||
158 | return task_pid_nr_ns(p, event->ns); | ||
159 | } | ||
160 | |||
135 | /* | 161 | /* |
136 | * If we inherit events we want to return the parent event id | 162 | * If we inherit events we want to return the parent event id |
137 | * to userspace. | 163 | * to userspace. |
@@ -311,9 +337,84 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) | |||
311 | ctx->nr_stat++; | 337 | ctx->nr_stat++; |
312 | } | 338 | } |
313 | 339 | ||
340 | /* | ||
341 | * Called at perf_event creation and when events are attached/detached from a | ||
342 | * group. | ||
343 | */ | ||
344 | static void perf_event__read_size(struct perf_event *event) | ||
345 | { | ||
346 | int entry = sizeof(u64); /* value */ | ||
347 | int size = 0; | ||
348 | int nr = 1; | ||
349 | |||
350 | if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) | ||
351 | size += sizeof(u64); | ||
352 | |||
353 | if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) | ||
354 | size += sizeof(u64); | ||
355 | |||
356 | if (event->attr.read_format & PERF_FORMAT_ID) | ||
357 | entry += sizeof(u64); | ||
358 | |||
359 | if (event->attr.read_format & PERF_FORMAT_GROUP) { | ||
360 | nr += event->group_leader->nr_siblings; | ||
361 | size += sizeof(u64); | ||
362 | } | ||
363 | |||
364 | size += entry * nr; | ||
365 | event->read_size = size; | ||
366 | } | ||
367 | |||
368 | static void perf_event__header_size(struct perf_event *event) | ||
369 | { | ||
370 | struct perf_sample_data *data; | ||
371 | u64 sample_type = event->attr.sample_type; | ||
372 | u16 size = 0; | ||
373 | |||
374 | perf_event__read_size(event); | ||
375 | |||
376 | if (sample_type & PERF_SAMPLE_IP) | ||
377 | size += sizeof(data->ip); | ||
378 | |||
379 | if (sample_type & PERF_SAMPLE_ADDR) | ||
380 | size += sizeof(data->addr); | ||
381 | |||
382 | if (sample_type & PERF_SAMPLE_PERIOD) | ||
383 | size += sizeof(data->period); | ||
384 | |||
385 | if (sample_type & PERF_SAMPLE_READ) | ||
386 | size += event->read_size; | ||
387 | |||
388 | event->header_size = size; | ||
389 | } | ||
390 | |||
391 | static void perf_event__id_header_size(struct perf_event *event) | ||
392 | { | ||
393 | struct perf_sample_data *data; | ||
394 | u64 sample_type = event->attr.sample_type; | ||
395 | u16 size = 0; | ||
396 | |||
397 | if (sample_type & PERF_SAMPLE_TID) | ||
398 | size += sizeof(data->tid_entry); | ||
399 | |||
400 | if (sample_type & PERF_SAMPLE_TIME) | ||
401 | size += sizeof(data->time); | ||
402 | |||
403 | if (sample_type & PERF_SAMPLE_ID) | ||
404 | size += sizeof(data->id); | ||
405 | |||
406 | if (sample_type & PERF_SAMPLE_STREAM_ID) | ||
407 | size += sizeof(data->stream_id); | ||
408 | |||
409 | if (sample_type & PERF_SAMPLE_CPU) | ||
410 | size += sizeof(data->cpu_entry); | ||
411 | |||
412 | event->id_header_size = size; | ||
413 | } | ||
414 | |||
314 | static void perf_group_attach(struct perf_event *event) | 415 | static void perf_group_attach(struct perf_event *event) |
315 | { | 416 | { |
316 | struct perf_event *group_leader = event->group_leader; | 417 | struct perf_event *group_leader = event->group_leader, *pos; |
317 | 418 | ||
318 | /* | 419 | /* |
319 | * We can have double attach due to group movement in perf_event_open. | 420 | * We can have double attach due to group movement in perf_event_open. |
@@ -332,6 +433,11 @@ static void perf_group_attach(struct perf_event *event) | |||
332 | 433 | ||
333 | list_add_tail(&event->group_entry, &group_leader->sibling_list); | 434 | list_add_tail(&event->group_entry, &group_leader->sibling_list); |
334 | group_leader->nr_siblings++; | 435 | group_leader->nr_siblings++; |
436 | |||
437 | perf_event__header_size(group_leader); | ||
438 | |||
439 | list_for_each_entry(pos, &group_leader->sibling_list, group_entry) | ||
440 | perf_event__header_size(pos); | ||
335 | } | 441 | } |
336 | 442 | ||
337 | /* | 443 | /* |
@@ -390,7 +496,7 @@ static void perf_group_detach(struct perf_event *event) | |||
390 | if (event->group_leader != event) { | 496 | if (event->group_leader != event) { |
391 | list_del_init(&event->group_entry); | 497 | list_del_init(&event->group_entry); |
392 | event->group_leader->nr_siblings--; | 498 | event->group_leader->nr_siblings--; |
393 | return; | 499 | goto out; |
394 | } | 500 | } |
395 | 501 | ||
396 | if (!list_empty(&event->group_entry)) | 502 | if (!list_empty(&event->group_entry)) |
@@ -409,6 +515,12 @@ static void perf_group_detach(struct perf_event *event) | |||
409 | /* Inherit group flags from the previous leader */ | 515 | /* Inherit group flags from the previous leader */ |
410 | sibling->group_flags = event->group_flags; | 516 | sibling->group_flags = event->group_flags; |
411 | } | 517 | } |
518 | |||
519 | out: | ||
520 | perf_event__header_size(event->group_leader); | ||
521 | |||
522 | list_for_each_entry(tmp, &event->group_leader->sibling_list, group_entry) | ||
523 | perf_event__header_size(tmp); | ||
412 | } | 524 | } |
413 | 525 | ||
414 | static inline int | 526 | static inline int |
@@ -1072,7 +1184,7 @@ static int perf_event_refresh(struct perf_event *event, int refresh) | |||
1072 | /* | 1184 | /* |
1073 | * not supported on inherited events | 1185 | * not supported on inherited events |
1074 | */ | 1186 | */ |
1075 | if (event->attr.inherit) | 1187 | if (event->attr.inherit || !is_sampling_event(event)) |
1076 | return -EINVAL; | 1188 | return -EINVAL; |
1077 | 1189 | ||
1078 | atomic_add(refresh, &event->event_limit); | 1190 | atomic_add(refresh, &event->event_limit); |
@@ -1286,8 +1398,6 @@ void __perf_event_task_sched_out(struct task_struct *task, | |||
1286 | { | 1398 | { |
1287 | int ctxn; | 1399 | int ctxn; |
1288 | 1400 | ||
1289 | perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); | ||
1290 | |||
1291 | for_each_task_context_nr(ctxn) | 1401 | for_each_task_context_nr(ctxn) |
1292 | perf_event_context_sched_out(task, ctxn, next); | 1402 | perf_event_context_sched_out(task, ctxn, next); |
1293 | } | 1403 | } |
@@ -1621,8 +1731,12 @@ static void rotate_ctx(struct perf_event_context *ctx) | |||
1621 | { | 1731 | { |
1622 | raw_spin_lock(&ctx->lock); | 1732 | raw_spin_lock(&ctx->lock); |
1623 | 1733 | ||
1624 | /* Rotate the first entry last of non-pinned groups */ | 1734 | /* |
1625 | list_rotate_left(&ctx->flexible_groups); | 1735 | * Rotate the first entry last of non-pinned groups. Rotation might be |
1736 | * disabled by the inheritance code. | ||
1737 | */ | ||
1738 | if (!ctx->rotate_disable) | ||
1739 | list_rotate_left(&ctx->flexible_groups); | ||
1626 | 1740 | ||
1627 | raw_spin_unlock(&ctx->lock); | 1741 | raw_spin_unlock(&ctx->lock); |
1628 | } | 1742 | } |
@@ -2234,11 +2348,6 @@ int perf_event_release_kernel(struct perf_event *event) | |||
2234 | raw_spin_unlock_irq(&ctx->lock); | 2348 | raw_spin_unlock_irq(&ctx->lock); |
2235 | mutex_unlock(&ctx->mutex); | 2349 | mutex_unlock(&ctx->mutex); |
2236 | 2350 | ||
2237 | mutex_lock(&event->owner->perf_event_mutex); | ||
2238 | list_del_init(&event->owner_entry); | ||
2239 | mutex_unlock(&event->owner->perf_event_mutex); | ||
2240 | put_task_struct(event->owner); | ||
2241 | |||
2242 | free_event(event); | 2351 | free_event(event); |
2243 | 2352 | ||
2244 | return 0; | 2353 | return 0; |
@@ -2251,35 +2360,44 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel); | |||
2251 | static int perf_release(struct inode *inode, struct file *file) | 2360 | static int perf_release(struct inode *inode, struct file *file) |
2252 | { | 2361 | { |
2253 | struct perf_event *event = file->private_data; | 2362 | struct perf_event *event = file->private_data; |
2363 | struct task_struct *owner; | ||
2254 | 2364 | ||
2255 | file->private_data = NULL; | 2365 | file->private_data = NULL; |
2256 | 2366 | ||
2257 | return perf_event_release_kernel(event); | 2367 | rcu_read_lock(); |
2258 | } | 2368 | owner = ACCESS_ONCE(event->owner); |
2259 | 2369 | /* | |
2260 | static int perf_event_read_size(struct perf_event *event) | 2370 | * Matches the smp_wmb() in perf_event_exit_task(). If we observe |
2261 | { | 2371 | * !owner it means the list deletion is complete and we can indeed |
2262 | int entry = sizeof(u64); /* value */ | 2372 | * free this event, otherwise we need to serialize on |
2263 | int size = 0; | 2373 | * owner->perf_event_mutex. |
2264 | int nr = 1; | 2374 | */ |
2265 | 2375 | smp_read_barrier_depends(); | |
2266 | if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) | 2376 | if (owner) { |
2267 | size += sizeof(u64); | 2377 | /* |
2268 | 2378 | * Since delayed_put_task_struct() also drops the last | |
2269 | if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) | 2379 | * task reference we can safely take a new reference |
2270 | size += sizeof(u64); | 2380 | * while holding the rcu_read_lock(). |
2271 | 2381 | */ | |
2272 | if (event->attr.read_format & PERF_FORMAT_ID) | 2382 | get_task_struct(owner); |
2273 | entry += sizeof(u64); | ||
2274 | |||
2275 | if (event->attr.read_format & PERF_FORMAT_GROUP) { | ||
2276 | nr += event->group_leader->nr_siblings; | ||
2277 | size += sizeof(u64); | ||
2278 | } | 2383 | } |
2384 | rcu_read_unlock(); | ||
2279 | 2385 | ||
2280 | size += entry * nr; | 2386 | if (owner) { |
2387 | mutex_lock(&owner->perf_event_mutex); | ||
2388 | /* | ||
2389 | * We have to re-check the event->owner field, if it is cleared | ||
2390 | * we raced with perf_event_exit_task(), acquiring the mutex | ||
2391 | * ensured they're done, and we can proceed with freeing the | ||
2392 | * event. | ||
2393 | */ | ||
2394 | if (event->owner) | ||
2395 | list_del_init(&event->owner_entry); | ||
2396 | mutex_unlock(&owner->perf_event_mutex); | ||
2397 | put_task_struct(owner); | ||
2398 | } | ||
2281 | 2399 | ||
2282 | return size; | 2400 | return perf_event_release_kernel(event); |
2283 | } | 2401 | } |
2284 | 2402 | ||
2285 | u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) | 2403 | u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) |
@@ -2396,7 +2514,7 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count) | |||
2396 | if (event->state == PERF_EVENT_STATE_ERROR) | 2514 | if (event->state == PERF_EVENT_STATE_ERROR) |
2397 | return 0; | 2515 | return 0; |
2398 | 2516 | ||
2399 | if (count < perf_event_read_size(event)) | 2517 | if (count < event->read_size) |
2400 | return -ENOSPC; | 2518 | return -ENOSPC; |
2401 | 2519 | ||
2402 | WARN_ON_ONCE(event->ctx->parent_ctx); | 2520 | WARN_ON_ONCE(event->ctx->parent_ctx); |
@@ -2482,7 +2600,7 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg) | |||
2482 | int ret = 0; | 2600 | int ret = 0; |
2483 | u64 value; | 2601 | u64 value; |
2484 | 2602 | ||
2485 | if (!event->attr.sample_period) | 2603 | if (!is_sampling_event(event)) |
2486 | return -EINVAL; | 2604 | return -EINVAL; |
2487 | 2605 | ||
2488 | if (copy_from_user(&value, arg, sizeof(value))) | 2606 | if (copy_from_user(&value, arg, sizeof(value))) |
@@ -3273,6 +3391,73 @@ __always_inline void perf_output_copy(struct perf_output_handle *handle, | |||
3273 | } while (len); | 3391 | } while (len); |
3274 | } | 3392 | } |
3275 | 3393 | ||
3394 | static void __perf_event_header__init_id(struct perf_event_header *header, | ||
3395 | struct perf_sample_data *data, | ||
3396 | struct perf_event *event) | ||
3397 | { | ||
3398 | u64 sample_type = event->attr.sample_type; | ||
3399 | |||
3400 | data->type = sample_type; | ||
3401 | header->size += event->id_header_size; | ||
3402 | |||
3403 | if (sample_type & PERF_SAMPLE_TID) { | ||
3404 | /* namespace issues */ | ||
3405 | data->tid_entry.pid = perf_event_pid(event, current); | ||
3406 | data->tid_entry.tid = perf_event_tid(event, current); | ||
3407 | } | ||
3408 | |||
3409 | if (sample_type & PERF_SAMPLE_TIME) | ||
3410 | data->time = perf_clock(); | ||
3411 | |||
3412 | if (sample_type & PERF_SAMPLE_ID) | ||
3413 | data->id = primary_event_id(event); | ||
3414 | |||
3415 | if (sample_type & PERF_SAMPLE_STREAM_ID) | ||
3416 | data->stream_id = event->id; | ||
3417 | |||
3418 | if (sample_type & PERF_SAMPLE_CPU) { | ||
3419 | data->cpu_entry.cpu = raw_smp_processor_id(); | ||
3420 | data->cpu_entry.reserved = 0; | ||
3421 | } | ||
3422 | } | ||
3423 | |||
3424 | static void perf_event_header__init_id(struct perf_event_header *header, | ||
3425 | struct perf_sample_data *data, | ||
3426 | struct perf_event *event) | ||
3427 | { | ||
3428 | if (event->attr.sample_id_all) | ||
3429 | __perf_event_header__init_id(header, data, event); | ||
3430 | } | ||
3431 | |||
3432 | static void __perf_event__output_id_sample(struct perf_output_handle *handle, | ||
3433 | struct perf_sample_data *data) | ||
3434 | { | ||
3435 | u64 sample_type = data->type; | ||
3436 | |||
3437 | if (sample_type & PERF_SAMPLE_TID) | ||
3438 | perf_output_put(handle, data->tid_entry); | ||
3439 | |||
3440 | if (sample_type & PERF_SAMPLE_TIME) | ||
3441 | perf_output_put(handle, data->time); | ||
3442 | |||
3443 | if (sample_type & PERF_SAMPLE_ID) | ||
3444 | perf_output_put(handle, data->id); | ||
3445 | |||
3446 | if (sample_type & PERF_SAMPLE_STREAM_ID) | ||
3447 | perf_output_put(handle, data->stream_id); | ||
3448 | |||
3449 | if (sample_type & PERF_SAMPLE_CPU) | ||
3450 | perf_output_put(handle, data->cpu_entry); | ||
3451 | } | ||
3452 | |||
3453 | static void perf_event__output_id_sample(struct perf_event *event, | ||
3454 | struct perf_output_handle *handle, | ||
3455 | struct perf_sample_data *sample) | ||
3456 | { | ||
3457 | if (event->attr.sample_id_all) | ||
3458 | __perf_event__output_id_sample(handle, sample); | ||
3459 | } | ||
3460 | |||
3276 | int perf_output_begin(struct perf_output_handle *handle, | 3461 | int perf_output_begin(struct perf_output_handle *handle, |
3277 | struct perf_event *event, unsigned int size, | 3462 | struct perf_event *event, unsigned int size, |
3278 | int nmi, int sample) | 3463 | int nmi, int sample) |
@@ -3280,6 +3465,7 @@ int perf_output_begin(struct perf_output_handle *handle, | |||
3280 | struct perf_buffer *buffer; | 3465 | struct perf_buffer *buffer; |
3281 | unsigned long tail, offset, head; | 3466 | unsigned long tail, offset, head; |
3282 | int have_lost; | 3467 | int have_lost; |
3468 | struct perf_sample_data sample_data; | ||
3283 | struct { | 3469 | struct { |
3284 | struct perf_event_header header; | 3470 | struct perf_event_header header; |
3285 | u64 id; | 3471 | u64 id; |
@@ -3306,8 +3492,12 @@ int perf_output_begin(struct perf_output_handle *handle, | |||
3306 | goto out; | 3492 | goto out; |
3307 | 3493 | ||
3308 | have_lost = local_read(&buffer->lost); | 3494 | have_lost = local_read(&buffer->lost); |
3309 | if (have_lost) | 3495 | if (have_lost) { |
3310 | size += sizeof(lost_event); | 3496 | lost_event.header.size = sizeof(lost_event); |
3497 | perf_event_header__init_id(&lost_event.header, &sample_data, | ||
3498 | event); | ||
3499 | size += lost_event.header.size; | ||
3500 | } | ||
3311 | 3501 | ||
3312 | perf_output_get_handle(handle); | 3502 | perf_output_get_handle(handle); |
3313 | 3503 | ||
@@ -3338,11 +3528,11 @@ int perf_output_begin(struct perf_output_handle *handle, | |||
3338 | if (have_lost) { | 3528 | if (have_lost) { |
3339 | lost_event.header.type = PERF_RECORD_LOST; | 3529 | lost_event.header.type = PERF_RECORD_LOST; |
3340 | lost_event.header.misc = 0; | 3530 | lost_event.header.misc = 0; |
3341 | lost_event.header.size = sizeof(lost_event); | ||
3342 | lost_event.id = event->id; | 3531 | lost_event.id = event->id; |
3343 | lost_event.lost = local_xchg(&buffer->lost, 0); | 3532 | lost_event.lost = local_xchg(&buffer->lost, 0); |
3344 | 3533 | ||
3345 | perf_output_put(handle, lost_event); | 3534 | perf_output_put(handle, lost_event); |
3535 | perf_event__output_id_sample(event, handle, &sample_data); | ||
3346 | } | 3536 | } |
3347 | 3537 | ||
3348 | return 0; | 3538 | return 0; |
@@ -3375,28 +3565,6 @@ void perf_output_end(struct perf_output_handle *handle) | |||
3375 | rcu_read_unlock(); | 3565 | rcu_read_unlock(); |
3376 | } | 3566 | } |
3377 | 3567 | ||
3378 | static u32 perf_event_pid(struct perf_event *event, struct task_struct *p) | ||
3379 | { | ||
3380 | /* | ||
3381 | * only top level events have the pid namespace they were created in | ||
3382 | */ | ||
3383 | if (event->parent) | ||
3384 | event = event->parent; | ||
3385 | |||
3386 | return task_tgid_nr_ns(p, event->ns); | ||
3387 | } | ||
3388 | |||
3389 | static u32 perf_event_tid(struct perf_event *event, struct task_struct *p) | ||
3390 | { | ||
3391 | /* | ||
3392 | * only top level events have the pid namespace they were created in | ||
3393 | */ | ||
3394 | if (event->parent) | ||
3395 | event = event->parent; | ||
3396 | |||
3397 | return task_pid_nr_ns(p, event->ns); | ||
3398 | } | ||
3399 | |||
3400 | static void perf_output_read_one(struct perf_output_handle *handle, | 3568 | static void perf_output_read_one(struct perf_output_handle *handle, |
3401 | struct perf_event *event, | 3569 | struct perf_event *event, |
3402 | u64 enabled, u64 running) | 3570 | u64 enabled, u64 running) |
@@ -3571,61 +3739,16 @@ void perf_prepare_sample(struct perf_event_header *header, | |||
3571 | { | 3739 | { |
3572 | u64 sample_type = event->attr.sample_type; | 3740 | u64 sample_type = event->attr.sample_type; |
3573 | 3741 | ||
3574 | data->type = sample_type; | ||
3575 | |||
3576 | header->type = PERF_RECORD_SAMPLE; | 3742 | header->type = PERF_RECORD_SAMPLE; |
3577 | header->size = sizeof(*header); | 3743 | header->size = sizeof(*header) + event->header_size; |
3578 | 3744 | ||
3579 | header->misc = 0; | 3745 | header->misc = 0; |
3580 | header->misc |= perf_misc_flags(regs); | 3746 | header->misc |= perf_misc_flags(regs); |
3581 | 3747 | ||
3582 | if (sample_type & PERF_SAMPLE_IP) { | 3748 | __perf_event_header__init_id(header, data, event); |
3583 | data->ip = perf_instruction_pointer(regs); | ||
3584 | |||
3585 | header->size += sizeof(data->ip); | ||
3586 | } | ||
3587 | |||
3588 | if (sample_type & PERF_SAMPLE_TID) { | ||
3589 | /* namespace issues */ | ||
3590 | data->tid_entry.pid = perf_event_pid(event, current); | ||
3591 | data->tid_entry.tid = perf_event_tid(event, current); | ||
3592 | |||
3593 | header->size += sizeof(data->tid_entry); | ||
3594 | } | ||
3595 | |||
3596 | if (sample_type & PERF_SAMPLE_TIME) { | ||
3597 | data->time = perf_clock(); | ||
3598 | |||
3599 | header->size += sizeof(data->time); | ||
3600 | } | ||
3601 | |||
3602 | if (sample_type & PERF_SAMPLE_ADDR) | ||
3603 | header->size += sizeof(data->addr); | ||
3604 | |||
3605 | if (sample_type & PERF_SAMPLE_ID) { | ||
3606 | data->id = primary_event_id(event); | ||
3607 | |||
3608 | header->size += sizeof(data->id); | ||
3609 | } | ||
3610 | |||
3611 | if (sample_type & PERF_SAMPLE_STREAM_ID) { | ||
3612 | data->stream_id = event->id; | ||
3613 | |||
3614 | header->size += sizeof(data->stream_id); | ||
3615 | } | ||
3616 | |||
3617 | if (sample_type & PERF_SAMPLE_CPU) { | ||
3618 | data->cpu_entry.cpu = raw_smp_processor_id(); | ||
3619 | data->cpu_entry.reserved = 0; | ||
3620 | |||
3621 | header->size += sizeof(data->cpu_entry); | ||
3622 | } | ||
3623 | |||
3624 | if (sample_type & PERF_SAMPLE_PERIOD) | ||
3625 | header->size += sizeof(data->period); | ||
3626 | 3749 | ||
3627 | if (sample_type & PERF_SAMPLE_READ) | 3750 | if (sample_type & PERF_SAMPLE_IP) |
3628 | header->size += perf_event_read_size(event); | 3751 | data->ip = perf_instruction_pointer(regs); |
3629 | 3752 | ||
3630 | if (sample_type & PERF_SAMPLE_CALLCHAIN) { | 3753 | if (sample_type & PERF_SAMPLE_CALLCHAIN) { |
3631 | int size = 1; | 3754 | int size = 1; |
@@ -3690,23 +3813,26 @@ perf_event_read_event(struct perf_event *event, | |||
3690 | struct task_struct *task) | 3813 | struct task_struct *task) |
3691 | { | 3814 | { |
3692 | struct perf_output_handle handle; | 3815 | struct perf_output_handle handle; |
3816 | struct perf_sample_data sample; | ||
3693 | struct perf_read_event read_event = { | 3817 | struct perf_read_event read_event = { |
3694 | .header = { | 3818 | .header = { |
3695 | .type = PERF_RECORD_READ, | 3819 | .type = PERF_RECORD_READ, |
3696 | .misc = 0, | 3820 | .misc = 0, |
3697 | .size = sizeof(read_event) + perf_event_read_size(event), | 3821 | .size = sizeof(read_event) + event->read_size, |
3698 | }, | 3822 | }, |
3699 | .pid = perf_event_pid(event, task), | 3823 | .pid = perf_event_pid(event, task), |
3700 | .tid = perf_event_tid(event, task), | 3824 | .tid = perf_event_tid(event, task), |
3701 | }; | 3825 | }; |
3702 | int ret; | 3826 | int ret; |
3703 | 3827 | ||
3828 | perf_event_header__init_id(&read_event.header, &sample, event); | ||
3704 | ret = perf_output_begin(&handle, event, read_event.header.size, 0, 0); | 3829 | ret = perf_output_begin(&handle, event, read_event.header.size, 0, 0); |
3705 | if (ret) | 3830 | if (ret) |
3706 | return; | 3831 | return; |
3707 | 3832 | ||
3708 | perf_output_put(&handle, read_event); | 3833 | perf_output_put(&handle, read_event); |
3709 | perf_output_read(&handle, event); | 3834 | perf_output_read(&handle, event); |
3835 | perf_event__output_id_sample(event, &handle, &sample); | ||
3710 | 3836 | ||
3711 | perf_output_end(&handle); | 3837 | perf_output_end(&handle); |
3712 | } | 3838 | } |
@@ -3736,14 +3862,16 @@ static void perf_event_task_output(struct perf_event *event, | |||
3736 | struct perf_task_event *task_event) | 3862 | struct perf_task_event *task_event) |
3737 | { | 3863 | { |
3738 | struct perf_output_handle handle; | 3864 | struct perf_output_handle handle; |
3865 | struct perf_sample_data sample; | ||
3739 | struct task_struct *task = task_event->task; | 3866 | struct task_struct *task = task_event->task; |
3740 | int size, ret; | 3867 | int ret, size = task_event->event_id.header.size; |
3741 | 3868 | ||
3742 | size = task_event->event_id.header.size; | 3869 | perf_event_header__init_id(&task_event->event_id.header, &sample, event); |
3743 | ret = perf_output_begin(&handle, event, size, 0, 0); | ||
3744 | 3870 | ||
3871 | ret = perf_output_begin(&handle, event, | ||
3872 | task_event->event_id.header.size, 0, 0); | ||
3745 | if (ret) | 3873 | if (ret) |
3746 | return; | 3874 | goto out; |
3747 | 3875 | ||
3748 | task_event->event_id.pid = perf_event_pid(event, task); | 3876 | task_event->event_id.pid = perf_event_pid(event, task); |
3749 | task_event->event_id.ppid = perf_event_pid(event, current); | 3877 | task_event->event_id.ppid = perf_event_pid(event, current); |
@@ -3753,7 +3881,11 @@ static void perf_event_task_output(struct perf_event *event, | |||
3753 | 3881 | ||
3754 | perf_output_put(&handle, task_event->event_id); | 3882 | perf_output_put(&handle, task_event->event_id); |
3755 | 3883 | ||
3884 | perf_event__output_id_sample(event, &handle, &sample); | ||
3885 | |||
3756 | perf_output_end(&handle); | 3886 | perf_output_end(&handle); |
3887 | out: | ||
3888 | task_event->event_id.header.size = size; | ||
3757 | } | 3889 | } |
3758 | 3890 | ||
3759 | static int perf_event_task_match(struct perf_event *event) | 3891 | static int perf_event_task_match(struct perf_event *event) |
@@ -3792,6 +3924,8 @@ static void perf_event_task_event(struct perf_task_event *task_event) | |||
3792 | rcu_read_lock(); | 3924 | rcu_read_lock(); |
3793 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 3925 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
3794 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); | 3926 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); |
3927 | if (cpuctx->active_pmu != pmu) | ||
3928 | goto next; | ||
3795 | perf_event_task_ctx(&cpuctx->ctx, task_event); | 3929 | perf_event_task_ctx(&cpuctx->ctx, task_event); |
3796 | 3930 | ||
3797 | ctx = task_event->task_ctx; | 3931 | ctx = task_event->task_ctx; |
@@ -3866,11 +4000,16 @@ static void perf_event_comm_output(struct perf_event *event, | |||
3866 | struct perf_comm_event *comm_event) | 4000 | struct perf_comm_event *comm_event) |
3867 | { | 4001 | { |
3868 | struct perf_output_handle handle; | 4002 | struct perf_output_handle handle; |
4003 | struct perf_sample_data sample; | ||
3869 | int size = comm_event->event_id.header.size; | 4004 | int size = comm_event->event_id.header.size; |
3870 | int ret = perf_output_begin(&handle, event, size, 0, 0); | 4005 | int ret; |
4006 | |||
4007 | perf_event_header__init_id(&comm_event->event_id.header, &sample, event); | ||
4008 | ret = perf_output_begin(&handle, event, | ||
4009 | comm_event->event_id.header.size, 0, 0); | ||
3871 | 4010 | ||
3872 | if (ret) | 4011 | if (ret) |
3873 | return; | 4012 | goto out; |
3874 | 4013 | ||
3875 | comm_event->event_id.pid = perf_event_pid(event, comm_event->task); | 4014 | comm_event->event_id.pid = perf_event_pid(event, comm_event->task); |
3876 | comm_event->event_id.tid = perf_event_tid(event, comm_event->task); | 4015 | comm_event->event_id.tid = perf_event_tid(event, comm_event->task); |
@@ -3878,7 +4017,12 @@ static void perf_event_comm_output(struct perf_event *event, | |||
3878 | perf_output_put(&handle, comm_event->event_id); | 4017 | perf_output_put(&handle, comm_event->event_id); |
3879 | perf_output_copy(&handle, comm_event->comm, | 4018 | perf_output_copy(&handle, comm_event->comm, |
3880 | comm_event->comm_size); | 4019 | comm_event->comm_size); |
4020 | |||
4021 | perf_event__output_id_sample(event, &handle, &sample); | ||
4022 | |||
3881 | perf_output_end(&handle); | 4023 | perf_output_end(&handle); |
4024 | out: | ||
4025 | comm_event->event_id.header.size = size; | ||
3882 | } | 4026 | } |
3883 | 4027 | ||
3884 | static int perf_event_comm_match(struct perf_event *event) | 4028 | static int perf_event_comm_match(struct perf_event *event) |
@@ -3923,10 +4067,11 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) | |||
3923 | comm_event->comm_size = size; | 4067 | comm_event->comm_size = size; |
3924 | 4068 | ||
3925 | comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; | 4069 | comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; |
3926 | |||
3927 | rcu_read_lock(); | 4070 | rcu_read_lock(); |
3928 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 4071 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
3929 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); | 4072 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); |
4073 | if (cpuctx->active_pmu != pmu) | ||
4074 | goto next; | ||
3930 | perf_event_comm_ctx(&cpuctx->ctx, comm_event); | 4075 | perf_event_comm_ctx(&cpuctx->ctx, comm_event); |
3931 | 4076 | ||
3932 | ctxn = pmu->task_ctx_nr; | 4077 | ctxn = pmu->task_ctx_nr; |
@@ -4002,11 +4147,15 @@ static void perf_event_mmap_output(struct perf_event *event, | |||
4002 | struct perf_mmap_event *mmap_event) | 4147 | struct perf_mmap_event *mmap_event) |
4003 | { | 4148 | { |
4004 | struct perf_output_handle handle; | 4149 | struct perf_output_handle handle; |
4150 | struct perf_sample_data sample; | ||
4005 | int size = mmap_event->event_id.header.size; | 4151 | int size = mmap_event->event_id.header.size; |
4006 | int ret = perf_output_begin(&handle, event, size, 0, 0); | 4152 | int ret; |
4007 | 4153 | ||
4154 | perf_event_header__init_id(&mmap_event->event_id.header, &sample, event); | ||
4155 | ret = perf_output_begin(&handle, event, | ||
4156 | mmap_event->event_id.header.size, 0, 0); | ||
4008 | if (ret) | 4157 | if (ret) |
4009 | return; | 4158 | goto out; |
4010 | 4159 | ||
4011 | mmap_event->event_id.pid = perf_event_pid(event, current); | 4160 | mmap_event->event_id.pid = perf_event_pid(event, current); |
4012 | mmap_event->event_id.tid = perf_event_tid(event, current); | 4161 | mmap_event->event_id.tid = perf_event_tid(event, current); |
@@ -4014,7 +4163,12 @@ static void perf_event_mmap_output(struct perf_event *event, | |||
4014 | perf_output_put(&handle, mmap_event->event_id); | 4163 | perf_output_put(&handle, mmap_event->event_id); |
4015 | perf_output_copy(&handle, mmap_event->file_name, | 4164 | perf_output_copy(&handle, mmap_event->file_name, |
4016 | mmap_event->file_size); | 4165 | mmap_event->file_size); |
4166 | |||
4167 | perf_event__output_id_sample(event, &handle, &sample); | ||
4168 | |||
4017 | perf_output_end(&handle); | 4169 | perf_output_end(&handle); |
4170 | out: | ||
4171 | mmap_event->event_id.header.size = size; | ||
4018 | } | 4172 | } |
4019 | 4173 | ||
4020 | static int perf_event_mmap_match(struct perf_event *event, | 4174 | static int perf_event_mmap_match(struct perf_event *event, |
@@ -4112,6 +4266,8 @@ got_name: | |||
4112 | rcu_read_lock(); | 4266 | rcu_read_lock(); |
4113 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 4267 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
4114 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); | 4268 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); |
4269 | if (cpuctx->active_pmu != pmu) | ||
4270 | goto next; | ||
4115 | perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, | 4271 | perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, |
4116 | vma->vm_flags & VM_EXEC); | 4272 | vma->vm_flags & VM_EXEC); |
4117 | 4273 | ||
@@ -4167,6 +4323,7 @@ void perf_event_mmap(struct vm_area_struct *vma) | |||
4167 | static void perf_log_throttle(struct perf_event *event, int enable) | 4323 | static void perf_log_throttle(struct perf_event *event, int enable) |
4168 | { | 4324 | { |
4169 | struct perf_output_handle handle; | 4325 | struct perf_output_handle handle; |
4326 | struct perf_sample_data sample; | ||
4170 | int ret; | 4327 | int ret; |
4171 | 4328 | ||
4172 | struct { | 4329 | struct { |
@@ -4188,11 +4345,15 @@ static void perf_log_throttle(struct perf_event *event, int enable) | |||
4188 | if (enable) | 4345 | if (enable) |
4189 | throttle_event.header.type = PERF_RECORD_UNTHROTTLE; | 4346 | throttle_event.header.type = PERF_RECORD_UNTHROTTLE; |
4190 | 4347 | ||
4191 | ret = perf_output_begin(&handle, event, sizeof(throttle_event), 1, 0); | 4348 | perf_event_header__init_id(&throttle_event.header, &sample, event); |
4349 | |||
4350 | ret = perf_output_begin(&handle, event, | ||
4351 | throttle_event.header.size, 1, 0); | ||
4192 | if (ret) | 4352 | if (ret) |
4193 | return; | 4353 | return; |
4194 | 4354 | ||
4195 | perf_output_put(&handle, throttle_event); | 4355 | perf_output_put(&handle, throttle_event); |
4356 | perf_event__output_id_sample(event, &handle, &sample); | ||
4196 | perf_output_end(&handle); | 4357 | perf_output_end(&handle); |
4197 | } | 4358 | } |
4198 | 4359 | ||
@@ -4208,6 +4369,13 @@ static int __perf_event_overflow(struct perf_event *event, int nmi, | |||
4208 | struct hw_perf_event *hwc = &event->hw; | 4369 | struct hw_perf_event *hwc = &event->hw; |
4209 | int ret = 0; | 4370 | int ret = 0; |
4210 | 4371 | ||
4372 | /* | ||
4373 | * Non-sampling counters might still use the PMI to fold short | ||
4374 | * hardware counters, ignore those. | ||
4375 | */ | ||
4376 | if (unlikely(!is_sampling_event(event))) | ||
4377 | return 0; | ||
4378 | |||
4211 | if (!throttle) { | 4379 | if (!throttle) { |
4212 | hwc->interrupts++; | 4380 | hwc->interrupts++; |
4213 | } else { | 4381 | } else { |
@@ -4353,7 +4521,7 @@ static void perf_swevent_event(struct perf_event *event, u64 nr, | |||
4353 | if (!regs) | 4521 | if (!regs) |
4354 | return; | 4522 | return; |
4355 | 4523 | ||
4356 | if (!hwc->sample_period) | 4524 | if (!is_sampling_event(event)) |
4357 | return; | 4525 | return; |
4358 | 4526 | ||
4359 | if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq) | 4527 | if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq) |
@@ -4516,7 +4684,7 @@ static int perf_swevent_add(struct perf_event *event, int flags) | |||
4516 | struct hw_perf_event *hwc = &event->hw; | 4684 | struct hw_perf_event *hwc = &event->hw; |
4517 | struct hlist_head *head; | 4685 | struct hlist_head *head; |
4518 | 4686 | ||
4519 | if (hwc->sample_period) { | 4687 | if (is_sampling_event(event)) { |
4520 | hwc->last_period = hwc->sample_period; | 4688 | hwc->last_period = hwc->sample_period; |
4521 | perf_swevent_set_period(event); | 4689 | perf_swevent_set_period(event); |
4522 | } | 4690 | } |
@@ -4681,7 +4849,7 @@ static int perf_swevent_init(struct perf_event *event) | |||
4681 | break; | 4849 | break; |
4682 | } | 4850 | } |
4683 | 4851 | ||
4684 | if (event_id > PERF_COUNT_SW_MAX) | 4852 | if (event_id >= PERF_COUNT_SW_MAX) |
4685 | return -ENOENT; | 4853 | return -ENOENT; |
4686 | 4854 | ||
4687 | if (!event->parent) { | 4855 | if (!event->parent) { |
@@ -4773,15 +4941,6 @@ static int perf_tp_event_init(struct perf_event *event) | |||
4773 | if (event->attr.type != PERF_TYPE_TRACEPOINT) | 4941 | if (event->attr.type != PERF_TYPE_TRACEPOINT) |
4774 | return -ENOENT; | 4942 | return -ENOENT; |
4775 | 4943 | ||
4776 | /* | ||
4777 | * Raw tracepoint data is a severe data leak, only allow root to | ||
4778 | * have these. | ||
4779 | */ | ||
4780 | if ((event->attr.sample_type & PERF_SAMPLE_RAW) && | ||
4781 | perf_paranoid_tracepoint_raw() && | ||
4782 | !capable(CAP_SYS_ADMIN)) | ||
4783 | return -EPERM; | ||
4784 | |||
4785 | err = perf_trace_init(event); | 4944 | err = perf_trace_init(event); |
4786 | if (err) | 4945 | if (err) |
4787 | return err; | 4946 | return err; |
@@ -4804,7 +4963,7 @@ static struct pmu perf_tracepoint = { | |||
4804 | 4963 | ||
4805 | static inline void perf_tp_register(void) | 4964 | static inline void perf_tp_register(void) |
4806 | { | 4965 | { |
4807 | perf_pmu_register(&perf_tracepoint); | 4966 | perf_pmu_register(&perf_tracepoint, "tracepoint", PERF_TYPE_TRACEPOINT); |
4808 | } | 4967 | } |
4809 | 4968 | ||
4810 | static int perf_event_set_filter(struct perf_event *event, void __user *arg) | 4969 | static int perf_event_set_filter(struct perf_event *event, void __user *arg) |
@@ -4894,31 +5053,33 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) | |||
4894 | static void perf_swevent_start_hrtimer(struct perf_event *event) | 5053 | static void perf_swevent_start_hrtimer(struct perf_event *event) |
4895 | { | 5054 | { |
4896 | struct hw_perf_event *hwc = &event->hw; | 5055 | struct hw_perf_event *hwc = &event->hw; |
5056 | s64 period; | ||
5057 | |||
5058 | if (!is_sampling_event(event)) | ||
5059 | return; | ||
4897 | 5060 | ||
4898 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 5061 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
4899 | hwc->hrtimer.function = perf_swevent_hrtimer; | 5062 | hwc->hrtimer.function = perf_swevent_hrtimer; |
4900 | if (hwc->sample_period) { | ||
4901 | s64 period = local64_read(&hwc->period_left); | ||
4902 | 5063 | ||
4903 | if (period) { | 5064 | period = local64_read(&hwc->period_left); |
4904 | if (period < 0) | 5065 | if (period) { |
4905 | period = 10000; | 5066 | if (period < 0) |
5067 | period = 10000; | ||
4906 | 5068 | ||
4907 | local64_set(&hwc->period_left, 0); | 5069 | local64_set(&hwc->period_left, 0); |
4908 | } else { | 5070 | } else { |
4909 | period = max_t(u64, 10000, hwc->sample_period); | 5071 | period = max_t(u64, 10000, hwc->sample_period); |
4910 | } | 5072 | } |
4911 | __hrtimer_start_range_ns(&hwc->hrtimer, | 5073 | __hrtimer_start_range_ns(&hwc->hrtimer, |
4912 | ns_to_ktime(period), 0, | 5074 | ns_to_ktime(period), 0, |
4913 | HRTIMER_MODE_REL_PINNED, 0); | 5075 | HRTIMER_MODE_REL_PINNED, 0); |
4914 | } | ||
4915 | } | 5076 | } |
4916 | 5077 | ||
4917 | static void perf_swevent_cancel_hrtimer(struct perf_event *event) | 5078 | static void perf_swevent_cancel_hrtimer(struct perf_event *event) |
4918 | { | 5079 | { |
4919 | struct hw_perf_event *hwc = &event->hw; | 5080 | struct hw_perf_event *hwc = &event->hw; |
4920 | 5081 | ||
4921 | if (hwc->sample_period) { | 5082 | if (is_sampling_event(event)) { |
4922 | ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); | 5083 | ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); |
4923 | local64_set(&hwc->period_left, ktime_to_ns(remaining)); | 5084 | local64_set(&hwc->period_left, ktime_to_ns(remaining)); |
4924 | 5085 | ||
@@ -5113,25 +5274,94 @@ static void *find_pmu_context(int ctxn) | |||
5113 | return NULL; | 5274 | return NULL; |
5114 | } | 5275 | } |
5115 | 5276 | ||
5116 | static void free_pmu_context(void * __percpu cpu_context) | 5277 | static void update_pmu_context(struct pmu *pmu, struct pmu *old_pmu) |
5117 | { | 5278 | { |
5118 | struct pmu *pmu; | 5279 | int cpu; |
5280 | |||
5281 | for_each_possible_cpu(cpu) { | ||
5282 | struct perf_cpu_context *cpuctx; | ||
5283 | |||
5284 | cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); | ||
5285 | |||
5286 | if (cpuctx->active_pmu == old_pmu) | ||
5287 | cpuctx->active_pmu = pmu; | ||
5288 | } | ||
5289 | } | ||
5290 | |||
5291 | static void free_pmu_context(struct pmu *pmu) | ||
5292 | { | ||
5293 | struct pmu *i; | ||
5119 | 5294 | ||
5120 | mutex_lock(&pmus_lock); | 5295 | mutex_lock(&pmus_lock); |
5121 | /* | 5296 | /* |
5122 | * Like a real lame refcount. | 5297 | * Like a real lame refcount. |
5123 | */ | 5298 | */ |
5124 | list_for_each_entry(pmu, &pmus, entry) { | 5299 | list_for_each_entry(i, &pmus, entry) { |
5125 | if (pmu->pmu_cpu_context == cpu_context) | 5300 | if (i->pmu_cpu_context == pmu->pmu_cpu_context) { |
5301 | update_pmu_context(i, pmu); | ||
5126 | goto out; | 5302 | goto out; |
5303 | } | ||
5127 | } | 5304 | } |
5128 | 5305 | ||
5129 | free_percpu(cpu_context); | 5306 | free_percpu(pmu->pmu_cpu_context); |
5130 | out: | 5307 | out: |
5131 | mutex_unlock(&pmus_lock); | 5308 | mutex_unlock(&pmus_lock); |
5132 | } | 5309 | } |
5310 | static struct idr pmu_idr; | ||
5311 | |||
5312 | static ssize_t | ||
5313 | type_show(struct device *dev, struct device_attribute *attr, char *page) | ||
5314 | { | ||
5315 | struct pmu *pmu = dev_get_drvdata(dev); | ||
5316 | |||
5317 | return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->type); | ||
5318 | } | ||
5319 | |||
5320 | static struct device_attribute pmu_dev_attrs[] = { | ||
5321 | __ATTR_RO(type), | ||
5322 | __ATTR_NULL, | ||
5323 | }; | ||
5324 | |||
5325 | static int pmu_bus_running; | ||
5326 | static struct bus_type pmu_bus = { | ||
5327 | .name = "event_source", | ||
5328 | .dev_attrs = pmu_dev_attrs, | ||
5329 | }; | ||
5330 | |||
5331 | static void pmu_dev_release(struct device *dev) | ||
5332 | { | ||
5333 | kfree(dev); | ||
5334 | } | ||
5335 | |||
5336 | static int pmu_dev_alloc(struct pmu *pmu) | ||
5337 | { | ||
5338 | int ret = -ENOMEM; | ||
5339 | |||
5340 | pmu->dev = kzalloc(sizeof(struct device), GFP_KERNEL); | ||
5341 | if (!pmu->dev) | ||
5342 | goto out; | ||
5343 | |||
5344 | device_initialize(pmu->dev); | ||
5345 | ret = dev_set_name(pmu->dev, "%s", pmu->name); | ||
5346 | if (ret) | ||
5347 | goto free_dev; | ||
5348 | |||
5349 | dev_set_drvdata(pmu->dev, pmu); | ||
5350 | pmu->dev->bus = &pmu_bus; | ||
5351 | pmu->dev->release = pmu_dev_release; | ||
5352 | ret = device_add(pmu->dev); | ||
5353 | if (ret) | ||
5354 | goto free_dev; | ||
5355 | |||
5356 | out: | ||
5357 | return ret; | ||
5358 | |||
5359 | free_dev: | ||
5360 | put_device(pmu->dev); | ||
5361 | goto out; | ||
5362 | } | ||
5133 | 5363 | ||
5134 | int perf_pmu_register(struct pmu *pmu) | 5364 | int perf_pmu_register(struct pmu *pmu, char *name, int type) |
5135 | { | 5365 | { |
5136 | int cpu, ret; | 5366 | int cpu, ret; |
5137 | 5367 | ||
@@ -5141,13 +5371,38 @@ int perf_pmu_register(struct pmu *pmu) | |||
5141 | if (!pmu->pmu_disable_count) | 5371 | if (!pmu->pmu_disable_count) |
5142 | goto unlock; | 5372 | goto unlock; |
5143 | 5373 | ||
5374 | pmu->type = -1; | ||
5375 | if (!name) | ||
5376 | goto skip_type; | ||
5377 | pmu->name = name; | ||
5378 | |||
5379 | if (type < 0) { | ||
5380 | int err = idr_pre_get(&pmu_idr, GFP_KERNEL); | ||
5381 | if (!err) | ||
5382 | goto free_pdc; | ||
5383 | |||
5384 | err = idr_get_new_above(&pmu_idr, pmu, PERF_TYPE_MAX, &type); | ||
5385 | if (err) { | ||
5386 | ret = err; | ||
5387 | goto free_pdc; | ||
5388 | } | ||
5389 | } | ||
5390 | pmu->type = type; | ||
5391 | |||
5392 | if (pmu_bus_running) { | ||
5393 | ret = pmu_dev_alloc(pmu); | ||
5394 | if (ret) | ||
5395 | goto free_idr; | ||
5396 | } | ||
5397 | |||
5398 | skip_type: | ||
5144 | pmu->pmu_cpu_context = find_pmu_context(pmu->task_ctx_nr); | 5399 | pmu->pmu_cpu_context = find_pmu_context(pmu->task_ctx_nr); |
5145 | if (pmu->pmu_cpu_context) | 5400 | if (pmu->pmu_cpu_context) |
5146 | goto got_cpu_context; | 5401 | goto got_cpu_context; |
5147 | 5402 | ||
5148 | pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context); | 5403 | pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context); |
5149 | if (!pmu->pmu_cpu_context) | 5404 | if (!pmu->pmu_cpu_context) |
5150 | goto free_pdc; | 5405 | goto free_dev; |
5151 | 5406 | ||
5152 | for_each_possible_cpu(cpu) { | 5407 | for_each_possible_cpu(cpu) { |
5153 | struct perf_cpu_context *cpuctx; | 5408 | struct perf_cpu_context *cpuctx; |
@@ -5158,6 +5413,7 @@ int perf_pmu_register(struct pmu *pmu) | |||
5158 | cpuctx->ctx.pmu = pmu; | 5413 | cpuctx->ctx.pmu = pmu; |
5159 | cpuctx->jiffies_interval = 1; | 5414 | cpuctx->jiffies_interval = 1; |
5160 | INIT_LIST_HEAD(&cpuctx->rotation_list); | 5415 | INIT_LIST_HEAD(&cpuctx->rotation_list); |
5416 | cpuctx->active_pmu = pmu; | ||
5161 | } | 5417 | } |
5162 | 5418 | ||
5163 | got_cpu_context: | 5419 | got_cpu_context: |
@@ -5190,6 +5446,14 @@ unlock: | |||
5190 | 5446 | ||
5191 | return ret; | 5447 | return ret; |
5192 | 5448 | ||
5449 | free_dev: | ||
5450 | device_del(pmu->dev); | ||
5451 | put_device(pmu->dev); | ||
5452 | |||
5453 | free_idr: | ||
5454 | if (pmu->type >= PERF_TYPE_MAX) | ||
5455 | idr_remove(&pmu_idr, pmu->type); | ||
5456 | |||
5193 | free_pdc: | 5457 | free_pdc: |
5194 | free_percpu(pmu->pmu_disable_count); | 5458 | free_percpu(pmu->pmu_disable_count); |
5195 | goto unlock; | 5459 | goto unlock; |
@@ -5209,7 +5473,11 @@ void perf_pmu_unregister(struct pmu *pmu) | |||
5209 | synchronize_rcu(); | 5473 | synchronize_rcu(); |
5210 | 5474 | ||
5211 | free_percpu(pmu->pmu_disable_count); | 5475 | free_percpu(pmu->pmu_disable_count); |
5212 | free_pmu_context(pmu->pmu_cpu_context); | 5476 | if (pmu->type >= PERF_TYPE_MAX) |
5477 | idr_remove(&pmu_idr, pmu->type); | ||
5478 | device_del(pmu->dev); | ||
5479 | put_device(pmu->dev); | ||
5480 | free_pmu_context(pmu); | ||
5213 | } | 5481 | } |
5214 | 5482 | ||
5215 | struct pmu *perf_init_event(struct perf_event *event) | 5483 | struct pmu *perf_init_event(struct perf_event *event) |
@@ -5218,6 +5486,13 @@ struct pmu *perf_init_event(struct perf_event *event) | |||
5218 | int idx; | 5486 | int idx; |
5219 | 5487 | ||
5220 | idx = srcu_read_lock(&pmus_srcu); | 5488 | idx = srcu_read_lock(&pmus_srcu); |
5489 | |||
5490 | rcu_read_lock(); | ||
5491 | pmu = idr_find(&pmu_idr, event->attr.type); | ||
5492 | rcu_read_unlock(); | ||
5493 | if (pmu) | ||
5494 | goto unlock; | ||
5495 | |||
5221 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 5496 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
5222 | int ret = pmu->event_init(event); | 5497 | int ret = pmu->event_init(event); |
5223 | if (!ret) | 5498 | if (!ret) |
@@ -5677,12 +5952,18 @@ SYSCALL_DEFINE5(perf_event_open, | |||
5677 | mutex_unlock(&ctx->mutex); | 5952 | mutex_unlock(&ctx->mutex); |
5678 | 5953 | ||
5679 | event->owner = current; | 5954 | event->owner = current; |
5680 | get_task_struct(current); | 5955 | |
5681 | mutex_lock(¤t->perf_event_mutex); | 5956 | mutex_lock(¤t->perf_event_mutex); |
5682 | list_add_tail(&event->owner_entry, ¤t->perf_event_list); | 5957 | list_add_tail(&event->owner_entry, ¤t->perf_event_list); |
5683 | mutex_unlock(¤t->perf_event_mutex); | 5958 | mutex_unlock(¤t->perf_event_mutex); |
5684 | 5959 | ||
5685 | /* | 5960 | /* |
5961 | * Precalculate sample_data sizes | ||
5962 | */ | ||
5963 | perf_event__header_size(event); | ||
5964 | perf_event__id_header_size(event); | ||
5965 | |||
5966 | /* | ||
5686 | * Drop the reference on the group_event after placing the | 5967 | * Drop the reference on the group_event after placing the |
5687 | * new event on the sibling_list. This ensures destruction | 5968 | * new event on the sibling_list. This ensures destruction |
5688 | * of the group leader will find the pointer to itself in | 5969 | * of the group leader will find the pointer to itself in |
@@ -5745,12 +6026,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | |||
5745 | ++ctx->generation; | 6026 | ++ctx->generation; |
5746 | mutex_unlock(&ctx->mutex); | 6027 | mutex_unlock(&ctx->mutex); |
5747 | 6028 | ||
5748 | event->owner = current; | ||
5749 | get_task_struct(current); | ||
5750 | mutex_lock(¤t->perf_event_mutex); | ||
5751 | list_add_tail(&event->owner_entry, ¤t->perf_event_list); | ||
5752 | mutex_unlock(¤t->perf_event_mutex); | ||
5753 | |||
5754 | return event; | 6029 | return event; |
5755 | 6030 | ||
5756 | err_free: | 6031 | err_free: |
@@ -5901,8 +6176,24 @@ again: | |||
5901 | */ | 6176 | */ |
5902 | void perf_event_exit_task(struct task_struct *child) | 6177 | void perf_event_exit_task(struct task_struct *child) |
5903 | { | 6178 | { |
6179 | struct perf_event *event, *tmp; | ||
5904 | int ctxn; | 6180 | int ctxn; |
5905 | 6181 | ||
6182 | mutex_lock(&child->perf_event_mutex); | ||
6183 | list_for_each_entry_safe(event, tmp, &child->perf_event_list, | ||
6184 | owner_entry) { | ||
6185 | list_del_init(&event->owner_entry); | ||
6186 | |||
6187 | /* | ||
6188 | * Ensure the list deletion is visible before we clear | ||
6189 | * the owner, closes a race against perf_release() where | ||
6190 | * we need to serialize on the owner->perf_event_mutex. | ||
6191 | */ | ||
6192 | smp_wmb(); | ||
6193 | event->owner = NULL; | ||
6194 | } | ||
6195 | mutex_unlock(&child->perf_event_mutex); | ||
6196 | |||
5906 | for_each_task_context_nr(ctxn) | 6197 | for_each_task_context_nr(ctxn) |
5907 | perf_event_exit_task_context(child, ctxn); | 6198 | perf_event_exit_task_context(child, ctxn); |
5908 | } | 6199 | } |
@@ -6025,6 +6316,12 @@ inherit_event(struct perf_event *parent_event, | |||
6025 | child_event->overflow_handler = parent_event->overflow_handler; | 6316 | child_event->overflow_handler = parent_event->overflow_handler; |
6026 | 6317 | ||
6027 | /* | 6318 | /* |
6319 | * Precalculate sample_data sizes | ||
6320 | */ | ||
6321 | perf_event__header_size(child_event); | ||
6322 | perf_event__id_header_size(child_event); | ||
6323 | |||
6324 | /* | ||
6028 | * Link it up in the child's context: | 6325 | * Link it up in the child's context: |
6029 | */ | 6326 | */ |
6030 | raw_spin_lock_irqsave(&child_ctx->lock, flags); | 6327 | raw_spin_lock_irqsave(&child_ctx->lock, flags); |
@@ -6122,6 +6419,7 @@ int perf_event_init_context(struct task_struct *child, int ctxn) | |||
6122 | struct perf_event *event; | 6419 | struct perf_event *event; |
6123 | struct task_struct *parent = current; | 6420 | struct task_struct *parent = current; |
6124 | int inherited_all = 1; | 6421 | int inherited_all = 1; |
6422 | unsigned long flags; | ||
6125 | int ret = 0; | 6423 | int ret = 0; |
6126 | 6424 | ||
6127 | child->perf_event_ctxp[ctxn] = NULL; | 6425 | child->perf_event_ctxp[ctxn] = NULL; |
@@ -6162,6 +6460,15 @@ int perf_event_init_context(struct task_struct *child, int ctxn) | |||
6162 | break; | 6460 | break; |
6163 | } | 6461 | } |
6164 | 6462 | ||
6463 | /* | ||
6464 | * We can't hold ctx->lock when iterating the ->flexible_group list due | ||
6465 | * to allocations, but we need to prevent rotation because | ||
6466 | * rotate_ctx() will change the list from interrupt context. | ||
6467 | */ | ||
6468 | raw_spin_lock_irqsave(&parent_ctx->lock, flags); | ||
6469 | parent_ctx->rotate_disable = 1; | ||
6470 | raw_spin_unlock_irqrestore(&parent_ctx->lock, flags); | ||
6471 | |||
6165 | list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { | 6472 | list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { |
6166 | ret = inherit_task_group(event, parent, parent_ctx, | 6473 | ret = inherit_task_group(event, parent, parent_ctx, |
6167 | child, ctxn, &inherited_all); | 6474 | child, ctxn, &inherited_all); |
@@ -6169,6 +6476,10 @@ int perf_event_init_context(struct task_struct *child, int ctxn) | |||
6169 | break; | 6476 | break; |
6170 | } | 6477 | } |
6171 | 6478 | ||
6479 | raw_spin_lock_irqsave(&parent_ctx->lock, flags); | ||
6480 | parent_ctx->rotate_disable = 0; | ||
6481 | raw_spin_unlock_irqrestore(&parent_ctx->lock, flags); | ||
6482 | |||
6172 | child_ctx = child->perf_event_ctxp[ctxn]; | 6483 | child_ctx = child->perf_event_ctxp[ctxn]; |
6173 | 6484 | ||
6174 | if (child_ctx && inherited_all) { | 6485 | if (child_ctx && inherited_all) { |
@@ -6241,7 +6552,7 @@ static void __cpuinit perf_event_init_cpu(int cpu) | |||
6241 | mutex_unlock(&swhash->hlist_mutex); | 6552 | mutex_unlock(&swhash->hlist_mutex); |
6242 | } | 6553 | } |
6243 | 6554 | ||
6244 | #ifdef CONFIG_HOTPLUG_CPU | 6555 | #if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC |
6245 | static void perf_pmu_rotate_stop(struct pmu *pmu) | 6556 | static void perf_pmu_rotate_stop(struct pmu *pmu) |
6246 | { | 6557 | { |
6247 | struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); | 6558 | struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); |
@@ -6295,6 +6606,26 @@ static void perf_event_exit_cpu(int cpu) | |||
6295 | static inline void perf_event_exit_cpu(int cpu) { } | 6606 | static inline void perf_event_exit_cpu(int cpu) { } |
6296 | #endif | 6607 | #endif |
6297 | 6608 | ||
6609 | static int | ||
6610 | perf_reboot(struct notifier_block *notifier, unsigned long val, void *v) | ||
6611 | { | ||
6612 | int cpu; | ||
6613 | |||
6614 | for_each_online_cpu(cpu) | ||
6615 | perf_event_exit_cpu(cpu); | ||
6616 | |||
6617 | return NOTIFY_OK; | ||
6618 | } | ||
6619 | |||
6620 | /* | ||
6621 | * Run the perf reboot notifier at the very last possible moment so that | ||
6622 | * the generic watchdog code runs as long as possible. | ||
6623 | */ | ||
6624 | static struct notifier_block perf_reboot_notifier = { | ||
6625 | .notifier_call = perf_reboot, | ||
6626 | .priority = INT_MIN, | ||
6627 | }; | ||
6628 | |||
6298 | static int __cpuinit | 6629 | static int __cpuinit |
6299 | perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) | 6630 | perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) |
6300 | { | 6631 | { |
@@ -6321,11 +6652,47 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) | |||
6321 | 6652 | ||
6322 | void __init perf_event_init(void) | 6653 | void __init perf_event_init(void) |
6323 | { | 6654 | { |
6655 | int ret; | ||
6656 | |||
6657 | idr_init(&pmu_idr); | ||
6658 | |||
6324 | perf_event_init_all_cpus(); | 6659 | perf_event_init_all_cpus(); |
6325 | init_srcu_struct(&pmus_srcu); | 6660 | init_srcu_struct(&pmus_srcu); |
6326 | perf_pmu_register(&perf_swevent); | 6661 | perf_pmu_register(&perf_swevent, "software", PERF_TYPE_SOFTWARE); |
6327 | perf_pmu_register(&perf_cpu_clock); | 6662 | perf_pmu_register(&perf_cpu_clock, NULL, -1); |
6328 | perf_pmu_register(&perf_task_clock); | 6663 | perf_pmu_register(&perf_task_clock, NULL, -1); |
6329 | perf_tp_register(); | 6664 | perf_tp_register(); |
6330 | perf_cpu_notifier(perf_cpu_notify); | 6665 | perf_cpu_notifier(perf_cpu_notify); |
6666 | register_reboot_notifier(&perf_reboot_notifier); | ||
6667 | |||
6668 | ret = init_hw_breakpoint(); | ||
6669 | WARN(ret, "hw_breakpoint initialization failed with: %d", ret); | ||
6670 | } | ||
6671 | |||
6672 | static int __init perf_event_sysfs_init(void) | ||
6673 | { | ||
6674 | struct pmu *pmu; | ||
6675 | int ret; | ||
6676 | |||
6677 | mutex_lock(&pmus_lock); | ||
6678 | |||
6679 | ret = bus_register(&pmu_bus); | ||
6680 | if (ret) | ||
6681 | goto unlock; | ||
6682 | |||
6683 | list_for_each_entry(pmu, &pmus, entry) { | ||
6684 | if (!pmu->name || pmu->type < 0) | ||
6685 | continue; | ||
6686 | |||
6687 | ret = pmu_dev_alloc(pmu); | ||
6688 | WARN(ret, "Failed to register pmu: %s, reason %d\n", pmu->name, ret); | ||
6689 | } | ||
6690 | pmu_bus_running = 1; | ||
6691 | ret = 0; | ||
6692 | |||
6693 | unlock: | ||
6694 | mutex_unlock(&pmus_lock); | ||
6695 | |||
6696 | return ret; | ||
6331 | } | 6697 | } |
6698 | device_initcall(perf_event_sysfs_init); | ||