diff options
Diffstat (limited to 'kernel/perf_event.c')
-rw-r--r-- | kernel/perf_event.c | 890 |
1 files changed, 654 insertions, 236 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 517d827f4982..126a302c481c 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/mm.h> | 13 | #include <linux/mm.h> |
14 | #include <linux/cpu.h> | 14 | #include <linux/cpu.h> |
15 | #include <linux/smp.h> | 15 | #include <linux/smp.h> |
16 | #include <linux/idr.h> | ||
16 | #include <linux/file.h> | 17 | #include <linux/file.h> |
17 | #include <linux/poll.h> | 18 | #include <linux/poll.h> |
18 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
@@ -21,7 +22,9 @@ | |||
21 | #include <linux/dcache.h> | 22 | #include <linux/dcache.h> |
22 | #include <linux/percpu.h> | 23 | #include <linux/percpu.h> |
23 | #include <linux/ptrace.h> | 24 | #include <linux/ptrace.h> |
25 | #include <linux/reboot.h> | ||
24 | #include <linux/vmstat.h> | 26 | #include <linux/vmstat.h> |
27 | #include <linux/device.h> | ||
25 | #include <linux/vmalloc.h> | 28 | #include <linux/vmalloc.h> |
26 | #include <linux/hardirq.h> | 29 | #include <linux/hardirq.h> |
27 | #include <linux/rculist.h> | 30 | #include <linux/rculist.h> |
@@ -31,9 +34,16 @@ | |||
31 | #include <linux/kernel_stat.h> | 34 | #include <linux/kernel_stat.h> |
32 | #include <linux/perf_event.h> | 35 | #include <linux/perf_event.h> |
33 | #include <linux/ftrace_event.h> | 36 | #include <linux/ftrace_event.h> |
37 | #include <linux/hw_breakpoint.h> | ||
34 | 38 | ||
35 | #include <asm/irq_regs.h> | 39 | #include <asm/irq_regs.h> |
36 | 40 | ||
41 | enum event_type_t { | ||
42 | EVENT_FLEXIBLE = 0x1, | ||
43 | EVENT_PINNED = 0x2, | ||
44 | EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED, | ||
45 | }; | ||
46 | |||
37 | atomic_t perf_task_events __read_mostly; | 47 | atomic_t perf_task_events __read_mostly; |
38 | static atomic_t nr_mmap_events __read_mostly; | 48 | static atomic_t nr_mmap_events __read_mostly; |
39 | static atomic_t nr_comm_events __read_mostly; | 49 | static atomic_t nr_comm_events __read_mostly; |
@@ -61,6 +71,12 @@ int sysctl_perf_event_sample_rate __read_mostly = 100000; | |||
61 | 71 | ||
62 | static atomic64_t perf_event_id; | 72 | static atomic64_t perf_event_id; |
63 | 73 | ||
74 | static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx, | ||
75 | enum event_type_t event_type); | ||
76 | |||
77 | static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, | ||
78 | enum event_type_t event_type); | ||
79 | |||
64 | void __weak perf_event_print_debug(void) { } | 80 | void __weak perf_event_print_debug(void) { } |
65 | 81 | ||
66 | extern __weak const char *perf_pmu_name(void) | 82 | extern __weak const char *perf_pmu_name(void) |
@@ -68,6 +84,11 @@ extern __weak const char *perf_pmu_name(void) | |||
68 | return "pmu"; | 84 | return "pmu"; |
69 | } | 85 | } |
70 | 86 | ||
87 | static inline u64 perf_clock(void) | ||
88 | { | ||
89 | return local_clock(); | ||
90 | } | ||
91 | |||
71 | void perf_pmu_disable(struct pmu *pmu) | 92 | void perf_pmu_disable(struct pmu *pmu) |
72 | { | 93 | { |
73 | int *count = this_cpu_ptr(pmu->pmu_disable_count); | 94 | int *count = this_cpu_ptr(pmu->pmu_disable_count); |
@@ -132,6 +153,28 @@ static void unclone_ctx(struct perf_event_context *ctx) | |||
132 | } | 153 | } |
133 | } | 154 | } |
134 | 155 | ||
156 | static u32 perf_event_pid(struct perf_event *event, struct task_struct *p) | ||
157 | { | ||
158 | /* | ||
159 | * only top level events have the pid namespace they were created in | ||
160 | */ | ||
161 | if (event->parent) | ||
162 | event = event->parent; | ||
163 | |||
164 | return task_tgid_nr_ns(p, event->ns); | ||
165 | } | ||
166 | |||
167 | static u32 perf_event_tid(struct perf_event *event, struct task_struct *p) | ||
168 | { | ||
169 | /* | ||
170 | * only top level events have the pid namespace they were created in | ||
171 | */ | ||
172 | if (event->parent) | ||
173 | event = event->parent; | ||
174 | |||
175 | return task_pid_nr_ns(p, event->ns); | ||
176 | } | ||
177 | |||
135 | /* | 178 | /* |
136 | * If we inherit events we want to return the parent event id | 179 | * If we inherit events we want to return the parent event id |
137 | * to userspace. | 180 | * to userspace. |
@@ -214,11 +257,6 @@ static void perf_unpin_context(struct perf_event_context *ctx) | |||
214 | put_ctx(ctx); | 257 | put_ctx(ctx); |
215 | } | 258 | } |
216 | 259 | ||
217 | static inline u64 perf_clock(void) | ||
218 | { | ||
219 | return local_clock(); | ||
220 | } | ||
221 | |||
222 | /* | 260 | /* |
223 | * Update the record of the current time in a context. | 261 | * Update the record of the current time in a context. |
224 | */ | 262 | */ |
@@ -230,6 +268,12 @@ static void update_context_time(struct perf_event_context *ctx) | |||
230 | ctx->timestamp = now; | 268 | ctx->timestamp = now; |
231 | } | 269 | } |
232 | 270 | ||
271 | static u64 perf_event_time(struct perf_event *event) | ||
272 | { | ||
273 | struct perf_event_context *ctx = event->ctx; | ||
274 | return ctx ? ctx->time : 0; | ||
275 | } | ||
276 | |||
233 | /* | 277 | /* |
234 | * Update the total_time_enabled and total_time_running fields for a event. | 278 | * Update the total_time_enabled and total_time_running fields for a event. |
235 | */ | 279 | */ |
@@ -243,7 +287,7 @@ static void update_event_times(struct perf_event *event) | |||
243 | return; | 287 | return; |
244 | 288 | ||
245 | if (ctx->is_active) | 289 | if (ctx->is_active) |
246 | run_end = ctx->time; | 290 | run_end = perf_event_time(event); |
247 | else | 291 | else |
248 | run_end = event->tstamp_stopped; | 292 | run_end = event->tstamp_stopped; |
249 | 293 | ||
@@ -252,7 +296,7 @@ static void update_event_times(struct perf_event *event) | |||
252 | if (event->state == PERF_EVENT_STATE_INACTIVE) | 296 | if (event->state == PERF_EVENT_STATE_INACTIVE) |
253 | run_end = event->tstamp_stopped; | 297 | run_end = event->tstamp_stopped; |
254 | else | 298 | else |
255 | run_end = ctx->time; | 299 | run_end = perf_event_time(event); |
256 | 300 | ||
257 | event->total_time_running = run_end - event->tstamp_running; | 301 | event->total_time_running = run_end - event->tstamp_running; |
258 | } | 302 | } |
@@ -311,9 +355,84 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) | |||
311 | ctx->nr_stat++; | 355 | ctx->nr_stat++; |
312 | } | 356 | } |
313 | 357 | ||
358 | /* | ||
359 | * Called at perf_event creation and when events are attached/detached from a | ||
360 | * group. | ||
361 | */ | ||
362 | static void perf_event__read_size(struct perf_event *event) | ||
363 | { | ||
364 | int entry = sizeof(u64); /* value */ | ||
365 | int size = 0; | ||
366 | int nr = 1; | ||
367 | |||
368 | if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) | ||
369 | size += sizeof(u64); | ||
370 | |||
371 | if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) | ||
372 | size += sizeof(u64); | ||
373 | |||
374 | if (event->attr.read_format & PERF_FORMAT_ID) | ||
375 | entry += sizeof(u64); | ||
376 | |||
377 | if (event->attr.read_format & PERF_FORMAT_GROUP) { | ||
378 | nr += event->group_leader->nr_siblings; | ||
379 | size += sizeof(u64); | ||
380 | } | ||
381 | |||
382 | size += entry * nr; | ||
383 | event->read_size = size; | ||
384 | } | ||
385 | |||
386 | static void perf_event__header_size(struct perf_event *event) | ||
387 | { | ||
388 | struct perf_sample_data *data; | ||
389 | u64 sample_type = event->attr.sample_type; | ||
390 | u16 size = 0; | ||
391 | |||
392 | perf_event__read_size(event); | ||
393 | |||
394 | if (sample_type & PERF_SAMPLE_IP) | ||
395 | size += sizeof(data->ip); | ||
396 | |||
397 | if (sample_type & PERF_SAMPLE_ADDR) | ||
398 | size += sizeof(data->addr); | ||
399 | |||
400 | if (sample_type & PERF_SAMPLE_PERIOD) | ||
401 | size += sizeof(data->period); | ||
402 | |||
403 | if (sample_type & PERF_SAMPLE_READ) | ||
404 | size += event->read_size; | ||
405 | |||
406 | event->header_size = size; | ||
407 | } | ||
408 | |||
409 | static void perf_event__id_header_size(struct perf_event *event) | ||
410 | { | ||
411 | struct perf_sample_data *data; | ||
412 | u64 sample_type = event->attr.sample_type; | ||
413 | u16 size = 0; | ||
414 | |||
415 | if (sample_type & PERF_SAMPLE_TID) | ||
416 | size += sizeof(data->tid_entry); | ||
417 | |||
418 | if (sample_type & PERF_SAMPLE_TIME) | ||
419 | size += sizeof(data->time); | ||
420 | |||
421 | if (sample_type & PERF_SAMPLE_ID) | ||
422 | size += sizeof(data->id); | ||
423 | |||
424 | if (sample_type & PERF_SAMPLE_STREAM_ID) | ||
425 | size += sizeof(data->stream_id); | ||
426 | |||
427 | if (sample_type & PERF_SAMPLE_CPU) | ||
428 | size += sizeof(data->cpu_entry); | ||
429 | |||
430 | event->id_header_size = size; | ||
431 | } | ||
432 | |||
314 | static void perf_group_attach(struct perf_event *event) | 433 | static void perf_group_attach(struct perf_event *event) |
315 | { | 434 | { |
316 | struct perf_event *group_leader = event->group_leader; | 435 | struct perf_event *group_leader = event->group_leader, *pos; |
317 | 436 | ||
318 | /* | 437 | /* |
319 | * We can have double attach due to group movement in perf_event_open. | 438 | * We can have double attach due to group movement in perf_event_open. |
@@ -332,6 +451,11 @@ static void perf_group_attach(struct perf_event *event) | |||
332 | 451 | ||
333 | list_add_tail(&event->group_entry, &group_leader->sibling_list); | 452 | list_add_tail(&event->group_entry, &group_leader->sibling_list); |
334 | group_leader->nr_siblings++; | 453 | group_leader->nr_siblings++; |
454 | |||
455 | perf_event__header_size(group_leader); | ||
456 | |||
457 | list_for_each_entry(pos, &group_leader->sibling_list, group_entry) | ||
458 | perf_event__header_size(pos); | ||
335 | } | 459 | } |
336 | 460 | ||
337 | /* | 461 | /* |
@@ -390,7 +514,7 @@ static void perf_group_detach(struct perf_event *event) | |||
390 | if (event->group_leader != event) { | 514 | if (event->group_leader != event) { |
391 | list_del_init(&event->group_entry); | 515 | list_del_init(&event->group_entry); |
392 | event->group_leader->nr_siblings--; | 516 | event->group_leader->nr_siblings--; |
393 | return; | 517 | goto out; |
394 | } | 518 | } |
395 | 519 | ||
396 | if (!list_empty(&event->group_entry)) | 520 | if (!list_empty(&event->group_entry)) |
@@ -409,6 +533,12 @@ static void perf_group_detach(struct perf_event *event) | |||
409 | /* Inherit group flags from the previous leader */ | 533 | /* Inherit group flags from the previous leader */ |
410 | sibling->group_flags = event->group_flags; | 534 | sibling->group_flags = event->group_flags; |
411 | } | 535 | } |
536 | |||
537 | out: | ||
538 | perf_event__header_size(event->group_leader); | ||
539 | |||
540 | list_for_each_entry(tmp, &event->group_leader->sibling_list, group_entry) | ||
541 | perf_event__header_size(tmp); | ||
412 | } | 542 | } |
413 | 543 | ||
414 | static inline int | 544 | static inline int |
@@ -422,6 +552,7 @@ event_sched_out(struct perf_event *event, | |||
422 | struct perf_cpu_context *cpuctx, | 552 | struct perf_cpu_context *cpuctx, |
423 | struct perf_event_context *ctx) | 553 | struct perf_event_context *ctx) |
424 | { | 554 | { |
555 | u64 tstamp = perf_event_time(event); | ||
425 | u64 delta; | 556 | u64 delta; |
426 | /* | 557 | /* |
427 | * An event which could not be activated because of | 558 | * An event which could not be activated because of |
@@ -433,7 +564,7 @@ event_sched_out(struct perf_event *event, | |||
433 | && !event_filter_match(event)) { | 564 | && !event_filter_match(event)) { |
434 | delta = ctx->time - event->tstamp_stopped; | 565 | delta = ctx->time - event->tstamp_stopped; |
435 | event->tstamp_running += delta; | 566 | event->tstamp_running += delta; |
436 | event->tstamp_stopped = ctx->time; | 567 | event->tstamp_stopped = tstamp; |
437 | } | 568 | } |
438 | 569 | ||
439 | if (event->state != PERF_EVENT_STATE_ACTIVE) | 570 | if (event->state != PERF_EVENT_STATE_ACTIVE) |
@@ -444,7 +575,7 @@ event_sched_out(struct perf_event *event, | |||
444 | event->pending_disable = 0; | 575 | event->pending_disable = 0; |
445 | event->state = PERF_EVENT_STATE_OFF; | 576 | event->state = PERF_EVENT_STATE_OFF; |
446 | } | 577 | } |
447 | event->tstamp_stopped = ctx->time; | 578 | event->tstamp_stopped = tstamp; |
448 | event->pmu->del(event, 0); | 579 | event->pmu->del(event, 0); |
449 | event->oncpu = -1; | 580 | event->oncpu = -1; |
450 | 581 | ||
@@ -656,6 +787,8 @@ event_sched_in(struct perf_event *event, | |||
656 | struct perf_cpu_context *cpuctx, | 787 | struct perf_cpu_context *cpuctx, |
657 | struct perf_event_context *ctx) | 788 | struct perf_event_context *ctx) |
658 | { | 789 | { |
790 | u64 tstamp = perf_event_time(event); | ||
791 | |||
659 | if (event->state <= PERF_EVENT_STATE_OFF) | 792 | if (event->state <= PERF_EVENT_STATE_OFF) |
660 | return 0; | 793 | return 0; |
661 | 794 | ||
@@ -672,7 +805,9 @@ event_sched_in(struct perf_event *event, | |||
672 | return -EAGAIN; | 805 | return -EAGAIN; |
673 | } | 806 | } |
674 | 807 | ||
675 | event->tstamp_running += ctx->time - event->tstamp_stopped; | 808 | event->tstamp_running += tstamp - event->tstamp_stopped; |
809 | |||
810 | event->shadow_ctx_time = tstamp - ctx->timestamp; | ||
676 | 811 | ||
677 | if (!is_software_event(event)) | 812 | if (!is_software_event(event)) |
678 | cpuctx->active_oncpu++; | 813 | cpuctx->active_oncpu++; |
@@ -784,11 +919,13 @@ static int group_can_go_on(struct perf_event *event, | |||
784 | static void add_event_to_ctx(struct perf_event *event, | 919 | static void add_event_to_ctx(struct perf_event *event, |
785 | struct perf_event_context *ctx) | 920 | struct perf_event_context *ctx) |
786 | { | 921 | { |
922 | u64 tstamp = perf_event_time(event); | ||
923 | |||
787 | list_add_event(event, ctx); | 924 | list_add_event(event, ctx); |
788 | perf_group_attach(event); | 925 | perf_group_attach(event); |
789 | event->tstamp_enabled = ctx->time; | 926 | event->tstamp_enabled = tstamp; |
790 | event->tstamp_running = ctx->time; | 927 | event->tstamp_running = tstamp; |
791 | event->tstamp_stopped = ctx->time; | 928 | event->tstamp_stopped = tstamp; |
792 | } | 929 | } |
793 | 930 | ||
794 | /* | 931 | /* |
@@ -823,7 +960,7 @@ static void __perf_install_in_context(void *info) | |||
823 | 960 | ||
824 | add_event_to_ctx(event, ctx); | 961 | add_event_to_ctx(event, ctx); |
825 | 962 | ||
826 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 963 | if (!event_filter_match(event)) |
827 | goto unlock; | 964 | goto unlock; |
828 | 965 | ||
829 | /* | 966 | /* |
@@ -928,14 +1065,13 @@ static void __perf_event_mark_enabled(struct perf_event *event, | |||
928 | struct perf_event_context *ctx) | 1065 | struct perf_event_context *ctx) |
929 | { | 1066 | { |
930 | struct perf_event *sub; | 1067 | struct perf_event *sub; |
1068 | u64 tstamp = perf_event_time(event); | ||
931 | 1069 | ||
932 | event->state = PERF_EVENT_STATE_INACTIVE; | 1070 | event->state = PERF_EVENT_STATE_INACTIVE; |
933 | event->tstamp_enabled = ctx->time - event->total_time_enabled; | 1071 | event->tstamp_enabled = tstamp - event->total_time_enabled; |
934 | list_for_each_entry(sub, &event->sibling_list, group_entry) { | 1072 | list_for_each_entry(sub, &event->sibling_list, group_entry) { |
935 | if (sub->state >= PERF_EVENT_STATE_INACTIVE) { | 1073 | if (sub->state >= PERF_EVENT_STATE_INACTIVE) |
936 | sub->tstamp_enabled = | 1074 | sub->tstamp_enabled = tstamp - sub->total_time_enabled; |
937 | ctx->time - sub->total_time_enabled; | ||
938 | } | ||
939 | } | 1075 | } |
940 | } | 1076 | } |
941 | 1077 | ||
@@ -968,7 +1104,7 @@ static void __perf_event_enable(void *info) | |||
968 | goto unlock; | 1104 | goto unlock; |
969 | __perf_event_mark_enabled(event, ctx); | 1105 | __perf_event_mark_enabled(event, ctx); |
970 | 1106 | ||
971 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 1107 | if (!event_filter_match(event)) |
972 | goto unlock; | 1108 | goto unlock; |
973 | 1109 | ||
974 | /* | 1110 | /* |
@@ -1070,7 +1206,7 @@ static int perf_event_refresh(struct perf_event *event, int refresh) | |||
1070 | /* | 1206 | /* |
1071 | * not supported on inherited events | 1207 | * not supported on inherited events |
1072 | */ | 1208 | */ |
1073 | if (event->attr.inherit) | 1209 | if (event->attr.inherit || !is_sampling_event(event)) |
1074 | return -EINVAL; | 1210 | return -EINVAL; |
1075 | 1211 | ||
1076 | atomic_add(refresh, &event->event_limit); | 1212 | atomic_add(refresh, &event->event_limit); |
@@ -1079,12 +1215,6 @@ static int perf_event_refresh(struct perf_event *event, int refresh) | |||
1079 | return 0; | 1215 | return 0; |
1080 | } | 1216 | } |
1081 | 1217 | ||
1082 | enum event_type_t { | ||
1083 | EVENT_FLEXIBLE = 0x1, | ||
1084 | EVENT_PINNED = 0x2, | ||
1085 | EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED, | ||
1086 | }; | ||
1087 | |||
1088 | static void ctx_sched_out(struct perf_event_context *ctx, | 1218 | static void ctx_sched_out(struct perf_event_context *ctx, |
1089 | struct perf_cpu_context *cpuctx, | 1219 | struct perf_cpu_context *cpuctx, |
1090 | enum event_type_t event_type) | 1220 | enum event_type_t event_type) |
@@ -1284,8 +1414,6 @@ void __perf_event_task_sched_out(struct task_struct *task, | |||
1284 | { | 1414 | { |
1285 | int ctxn; | 1415 | int ctxn; |
1286 | 1416 | ||
1287 | perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); | ||
1288 | |||
1289 | for_each_task_context_nr(ctxn) | 1417 | for_each_task_context_nr(ctxn) |
1290 | perf_event_context_sched_out(task, ctxn, next); | 1418 | perf_event_context_sched_out(task, ctxn, next); |
1291 | } | 1419 | } |
@@ -1323,7 +1451,7 @@ ctx_pinned_sched_in(struct perf_event_context *ctx, | |||
1323 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) { | 1451 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) { |
1324 | if (event->state <= PERF_EVENT_STATE_OFF) | 1452 | if (event->state <= PERF_EVENT_STATE_OFF) |
1325 | continue; | 1453 | continue; |
1326 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 1454 | if (!event_filter_match(event)) |
1327 | continue; | 1455 | continue; |
1328 | 1456 | ||
1329 | if (group_can_go_on(event, cpuctx, 1)) | 1457 | if (group_can_go_on(event, cpuctx, 1)) |
@@ -1355,7 +1483,7 @@ ctx_flexible_sched_in(struct perf_event_context *ctx, | |||
1355 | * Listen to the 'cpu' scheduling filter constraint | 1483 | * Listen to the 'cpu' scheduling filter constraint |
1356 | * of events: | 1484 | * of events: |
1357 | */ | 1485 | */ |
1358 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 1486 | if (!event_filter_match(event)) |
1359 | continue; | 1487 | continue; |
1360 | 1488 | ||
1361 | if (group_can_go_on(event, cpuctx, can_add_hw)) { | 1489 | if (group_can_go_on(event, cpuctx, can_add_hw)) { |
@@ -1582,7 +1710,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period) | |||
1582 | if (event->state != PERF_EVENT_STATE_ACTIVE) | 1710 | if (event->state != PERF_EVENT_STATE_ACTIVE) |
1583 | continue; | 1711 | continue; |
1584 | 1712 | ||
1585 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 1713 | if (!event_filter_match(event)) |
1586 | continue; | 1714 | continue; |
1587 | 1715 | ||
1588 | hwc = &event->hw; | 1716 | hwc = &event->hw; |
@@ -1619,8 +1747,12 @@ static void rotate_ctx(struct perf_event_context *ctx) | |||
1619 | { | 1747 | { |
1620 | raw_spin_lock(&ctx->lock); | 1748 | raw_spin_lock(&ctx->lock); |
1621 | 1749 | ||
1622 | /* Rotate the first entry last of non-pinned groups */ | 1750 | /* |
1623 | list_rotate_left(&ctx->flexible_groups); | 1751 | * Rotate the first entry last of non-pinned groups. Rotation might be |
1752 | * disabled by the inheritance code. | ||
1753 | */ | ||
1754 | if (!ctx->rotate_disable) | ||
1755 | list_rotate_left(&ctx->flexible_groups); | ||
1624 | 1756 | ||
1625 | raw_spin_unlock(&ctx->lock); | 1757 | raw_spin_unlock(&ctx->lock); |
1626 | } | 1758 | } |
@@ -2069,13 +2201,6 @@ find_lively_task_by_vpid(pid_t vpid) | |||
2069 | if (!task) | 2201 | if (!task) |
2070 | return ERR_PTR(-ESRCH); | 2202 | return ERR_PTR(-ESRCH); |
2071 | 2203 | ||
2072 | /* | ||
2073 | * Can't attach events to a dying task. | ||
2074 | */ | ||
2075 | err = -ESRCH; | ||
2076 | if (task->flags & PF_EXITING) | ||
2077 | goto errout; | ||
2078 | |||
2079 | /* Reuse ptrace permission checks for now. */ | 2204 | /* Reuse ptrace permission checks for now. */ |
2080 | err = -EACCES; | 2205 | err = -EACCES; |
2081 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) | 2206 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) |
@@ -2096,14 +2221,11 @@ find_get_context(struct pmu *pmu, struct task_struct *task, int cpu) | |||
2096 | unsigned long flags; | 2221 | unsigned long flags; |
2097 | int ctxn, err; | 2222 | int ctxn, err; |
2098 | 2223 | ||
2099 | if (!task && cpu != -1) { | 2224 | if (!task) { |
2100 | /* Must be root to operate on a CPU event: */ | 2225 | /* Must be root to operate on a CPU event: */ |
2101 | if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) | 2226 | if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) |
2102 | return ERR_PTR(-EACCES); | 2227 | return ERR_PTR(-EACCES); |
2103 | 2228 | ||
2104 | if (cpu < 0 || cpu >= nr_cpumask_bits) | ||
2105 | return ERR_PTR(-EINVAL); | ||
2106 | |||
2107 | /* | 2229 | /* |
2108 | * We could be clever and allow to attach a event to an | 2230 | * We could be clever and allow to attach a event to an |
2109 | * offline CPU and activate it when the CPU comes up, but | 2231 | * offline CPU and activate it when the CPU comes up, but |
@@ -2139,14 +2261,27 @@ retry: | |||
2139 | 2261 | ||
2140 | get_ctx(ctx); | 2262 | get_ctx(ctx); |
2141 | 2263 | ||
2142 | if (cmpxchg(&task->perf_event_ctxp[ctxn], NULL, ctx)) { | 2264 | err = 0; |
2143 | /* | 2265 | mutex_lock(&task->perf_event_mutex); |
2144 | * We raced with some other task; use | 2266 | /* |
2145 | * the context they set. | 2267 | * If it has already passed perf_event_exit_task(). |
2146 | */ | 2268 | * we must see PF_EXITING, it takes this mutex too. |
2269 | */ | ||
2270 | if (task->flags & PF_EXITING) | ||
2271 | err = -ESRCH; | ||
2272 | else if (task->perf_event_ctxp[ctxn]) | ||
2273 | err = -EAGAIN; | ||
2274 | else | ||
2275 | rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx); | ||
2276 | mutex_unlock(&task->perf_event_mutex); | ||
2277 | |||
2278 | if (unlikely(err)) { | ||
2147 | put_task_struct(task); | 2279 | put_task_struct(task); |
2148 | kfree(ctx); | 2280 | kfree(ctx); |
2149 | goto retry; | 2281 | |
2282 | if (err == -EAGAIN) | ||
2283 | goto retry; | ||
2284 | goto errout; | ||
2150 | } | 2285 | } |
2151 | } | 2286 | } |
2152 | 2287 | ||
@@ -2232,11 +2367,6 @@ int perf_event_release_kernel(struct perf_event *event) | |||
2232 | raw_spin_unlock_irq(&ctx->lock); | 2367 | raw_spin_unlock_irq(&ctx->lock); |
2233 | mutex_unlock(&ctx->mutex); | 2368 | mutex_unlock(&ctx->mutex); |
2234 | 2369 | ||
2235 | mutex_lock(&event->owner->perf_event_mutex); | ||
2236 | list_del_init(&event->owner_entry); | ||
2237 | mutex_unlock(&event->owner->perf_event_mutex); | ||
2238 | put_task_struct(event->owner); | ||
2239 | |||
2240 | free_event(event); | 2370 | free_event(event); |
2241 | 2371 | ||
2242 | return 0; | 2372 | return 0; |
@@ -2249,35 +2379,44 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel); | |||
2249 | static int perf_release(struct inode *inode, struct file *file) | 2379 | static int perf_release(struct inode *inode, struct file *file) |
2250 | { | 2380 | { |
2251 | struct perf_event *event = file->private_data; | 2381 | struct perf_event *event = file->private_data; |
2382 | struct task_struct *owner; | ||
2252 | 2383 | ||
2253 | file->private_data = NULL; | 2384 | file->private_data = NULL; |
2254 | 2385 | ||
2255 | return perf_event_release_kernel(event); | 2386 | rcu_read_lock(); |
2256 | } | 2387 | owner = ACCESS_ONCE(event->owner); |
2257 | 2388 | /* | |
2258 | static int perf_event_read_size(struct perf_event *event) | 2389 | * Matches the smp_wmb() in perf_event_exit_task(). If we observe |
2259 | { | 2390 | * !owner it means the list deletion is complete and we can indeed |
2260 | int entry = sizeof(u64); /* value */ | 2391 | * free this event, otherwise we need to serialize on |
2261 | int size = 0; | 2392 | * owner->perf_event_mutex. |
2262 | int nr = 1; | 2393 | */ |
2263 | 2394 | smp_read_barrier_depends(); | |
2264 | if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) | 2395 | if (owner) { |
2265 | size += sizeof(u64); | 2396 | /* |
2266 | 2397 | * Since delayed_put_task_struct() also drops the last | |
2267 | if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) | 2398 | * task reference we can safely take a new reference |
2268 | size += sizeof(u64); | 2399 | * while holding the rcu_read_lock(). |
2269 | 2400 | */ | |
2270 | if (event->attr.read_format & PERF_FORMAT_ID) | 2401 | get_task_struct(owner); |
2271 | entry += sizeof(u64); | ||
2272 | |||
2273 | if (event->attr.read_format & PERF_FORMAT_GROUP) { | ||
2274 | nr += event->group_leader->nr_siblings; | ||
2275 | size += sizeof(u64); | ||
2276 | } | 2402 | } |
2403 | rcu_read_unlock(); | ||
2277 | 2404 | ||
2278 | size += entry * nr; | 2405 | if (owner) { |
2406 | mutex_lock(&owner->perf_event_mutex); | ||
2407 | /* | ||
2408 | * We have to re-check the event->owner field, if it is cleared | ||
2409 | * we raced with perf_event_exit_task(), acquiring the mutex | ||
2410 | * ensured they're done, and we can proceed with freeing the | ||
2411 | * event. | ||
2412 | */ | ||
2413 | if (event->owner) | ||
2414 | list_del_init(&event->owner_entry); | ||
2415 | mutex_unlock(&owner->perf_event_mutex); | ||
2416 | put_task_struct(owner); | ||
2417 | } | ||
2279 | 2418 | ||
2280 | return size; | 2419 | return perf_event_release_kernel(event); |
2281 | } | 2420 | } |
2282 | 2421 | ||
2283 | u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) | 2422 | u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) |
@@ -2394,7 +2533,7 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count) | |||
2394 | if (event->state == PERF_EVENT_STATE_ERROR) | 2533 | if (event->state == PERF_EVENT_STATE_ERROR) |
2395 | return 0; | 2534 | return 0; |
2396 | 2535 | ||
2397 | if (count < perf_event_read_size(event)) | 2536 | if (count < event->read_size) |
2398 | return -ENOSPC; | 2537 | return -ENOSPC; |
2399 | 2538 | ||
2400 | WARN_ON_ONCE(event->ctx->parent_ctx); | 2539 | WARN_ON_ONCE(event->ctx->parent_ctx); |
@@ -2480,7 +2619,7 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg) | |||
2480 | int ret = 0; | 2619 | int ret = 0; |
2481 | u64 value; | 2620 | u64 value; |
2482 | 2621 | ||
2483 | if (!event->attr.sample_period) | 2622 | if (!is_sampling_event(event)) |
2484 | return -EINVAL; | 2623 | return -EINVAL; |
2485 | 2624 | ||
2486 | if (copy_from_user(&value, arg, sizeof(value))) | 2625 | if (copy_from_user(&value, arg, sizeof(value))) |
@@ -3271,6 +3410,73 @@ __always_inline void perf_output_copy(struct perf_output_handle *handle, | |||
3271 | } while (len); | 3410 | } while (len); |
3272 | } | 3411 | } |
3273 | 3412 | ||
3413 | static void __perf_event_header__init_id(struct perf_event_header *header, | ||
3414 | struct perf_sample_data *data, | ||
3415 | struct perf_event *event) | ||
3416 | { | ||
3417 | u64 sample_type = event->attr.sample_type; | ||
3418 | |||
3419 | data->type = sample_type; | ||
3420 | header->size += event->id_header_size; | ||
3421 | |||
3422 | if (sample_type & PERF_SAMPLE_TID) { | ||
3423 | /* namespace issues */ | ||
3424 | data->tid_entry.pid = perf_event_pid(event, current); | ||
3425 | data->tid_entry.tid = perf_event_tid(event, current); | ||
3426 | } | ||
3427 | |||
3428 | if (sample_type & PERF_SAMPLE_TIME) | ||
3429 | data->time = perf_clock(); | ||
3430 | |||
3431 | if (sample_type & PERF_SAMPLE_ID) | ||
3432 | data->id = primary_event_id(event); | ||
3433 | |||
3434 | if (sample_type & PERF_SAMPLE_STREAM_ID) | ||
3435 | data->stream_id = event->id; | ||
3436 | |||
3437 | if (sample_type & PERF_SAMPLE_CPU) { | ||
3438 | data->cpu_entry.cpu = raw_smp_processor_id(); | ||
3439 | data->cpu_entry.reserved = 0; | ||
3440 | } | ||
3441 | } | ||
3442 | |||
3443 | static void perf_event_header__init_id(struct perf_event_header *header, | ||
3444 | struct perf_sample_data *data, | ||
3445 | struct perf_event *event) | ||
3446 | { | ||
3447 | if (event->attr.sample_id_all) | ||
3448 | __perf_event_header__init_id(header, data, event); | ||
3449 | } | ||
3450 | |||
3451 | static void __perf_event__output_id_sample(struct perf_output_handle *handle, | ||
3452 | struct perf_sample_data *data) | ||
3453 | { | ||
3454 | u64 sample_type = data->type; | ||
3455 | |||
3456 | if (sample_type & PERF_SAMPLE_TID) | ||
3457 | perf_output_put(handle, data->tid_entry); | ||
3458 | |||
3459 | if (sample_type & PERF_SAMPLE_TIME) | ||
3460 | perf_output_put(handle, data->time); | ||
3461 | |||
3462 | if (sample_type & PERF_SAMPLE_ID) | ||
3463 | perf_output_put(handle, data->id); | ||
3464 | |||
3465 | if (sample_type & PERF_SAMPLE_STREAM_ID) | ||
3466 | perf_output_put(handle, data->stream_id); | ||
3467 | |||
3468 | if (sample_type & PERF_SAMPLE_CPU) | ||
3469 | perf_output_put(handle, data->cpu_entry); | ||
3470 | } | ||
3471 | |||
3472 | static void perf_event__output_id_sample(struct perf_event *event, | ||
3473 | struct perf_output_handle *handle, | ||
3474 | struct perf_sample_data *sample) | ||
3475 | { | ||
3476 | if (event->attr.sample_id_all) | ||
3477 | __perf_event__output_id_sample(handle, sample); | ||
3478 | } | ||
3479 | |||
3274 | int perf_output_begin(struct perf_output_handle *handle, | 3480 | int perf_output_begin(struct perf_output_handle *handle, |
3275 | struct perf_event *event, unsigned int size, | 3481 | struct perf_event *event, unsigned int size, |
3276 | int nmi, int sample) | 3482 | int nmi, int sample) |
@@ -3278,6 +3484,7 @@ int perf_output_begin(struct perf_output_handle *handle, | |||
3278 | struct perf_buffer *buffer; | 3484 | struct perf_buffer *buffer; |
3279 | unsigned long tail, offset, head; | 3485 | unsigned long tail, offset, head; |
3280 | int have_lost; | 3486 | int have_lost; |
3487 | struct perf_sample_data sample_data; | ||
3281 | struct { | 3488 | struct { |
3282 | struct perf_event_header header; | 3489 | struct perf_event_header header; |
3283 | u64 id; | 3490 | u64 id; |
@@ -3304,8 +3511,12 @@ int perf_output_begin(struct perf_output_handle *handle, | |||
3304 | goto out; | 3511 | goto out; |
3305 | 3512 | ||
3306 | have_lost = local_read(&buffer->lost); | 3513 | have_lost = local_read(&buffer->lost); |
3307 | if (have_lost) | 3514 | if (have_lost) { |
3308 | size += sizeof(lost_event); | 3515 | lost_event.header.size = sizeof(lost_event); |
3516 | perf_event_header__init_id(&lost_event.header, &sample_data, | ||
3517 | event); | ||
3518 | size += lost_event.header.size; | ||
3519 | } | ||
3309 | 3520 | ||
3310 | perf_output_get_handle(handle); | 3521 | perf_output_get_handle(handle); |
3311 | 3522 | ||
@@ -3336,11 +3547,11 @@ int perf_output_begin(struct perf_output_handle *handle, | |||
3336 | if (have_lost) { | 3547 | if (have_lost) { |
3337 | lost_event.header.type = PERF_RECORD_LOST; | 3548 | lost_event.header.type = PERF_RECORD_LOST; |
3338 | lost_event.header.misc = 0; | 3549 | lost_event.header.misc = 0; |
3339 | lost_event.header.size = sizeof(lost_event); | ||
3340 | lost_event.id = event->id; | 3550 | lost_event.id = event->id; |
3341 | lost_event.lost = local_xchg(&buffer->lost, 0); | 3551 | lost_event.lost = local_xchg(&buffer->lost, 0); |
3342 | 3552 | ||
3343 | perf_output_put(handle, lost_event); | 3553 | perf_output_put(handle, lost_event); |
3554 | perf_event__output_id_sample(event, handle, &sample_data); | ||
3344 | } | 3555 | } |
3345 | 3556 | ||
3346 | return 0; | 3557 | return 0; |
@@ -3373,30 +3584,9 @@ void perf_output_end(struct perf_output_handle *handle) | |||
3373 | rcu_read_unlock(); | 3584 | rcu_read_unlock(); |
3374 | } | 3585 | } |
3375 | 3586 | ||
3376 | static u32 perf_event_pid(struct perf_event *event, struct task_struct *p) | ||
3377 | { | ||
3378 | /* | ||
3379 | * only top level events have the pid namespace they were created in | ||
3380 | */ | ||
3381 | if (event->parent) | ||
3382 | event = event->parent; | ||
3383 | |||
3384 | return task_tgid_nr_ns(p, event->ns); | ||
3385 | } | ||
3386 | |||
3387 | static u32 perf_event_tid(struct perf_event *event, struct task_struct *p) | ||
3388 | { | ||
3389 | /* | ||
3390 | * only top level events have the pid namespace they were created in | ||
3391 | */ | ||
3392 | if (event->parent) | ||
3393 | event = event->parent; | ||
3394 | |||
3395 | return task_pid_nr_ns(p, event->ns); | ||
3396 | } | ||
3397 | |||
3398 | static void perf_output_read_one(struct perf_output_handle *handle, | 3587 | static void perf_output_read_one(struct perf_output_handle *handle, |
3399 | struct perf_event *event) | 3588 | struct perf_event *event, |
3589 | u64 enabled, u64 running) | ||
3400 | { | 3590 | { |
3401 | u64 read_format = event->attr.read_format; | 3591 | u64 read_format = event->attr.read_format; |
3402 | u64 values[4]; | 3592 | u64 values[4]; |
@@ -3404,11 +3594,11 @@ static void perf_output_read_one(struct perf_output_handle *handle, | |||
3404 | 3594 | ||
3405 | values[n++] = perf_event_count(event); | 3595 | values[n++] = perf_event_count(event); |
3406 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { | 3596 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { |
3407 | values[n++] = event->total_time_enabled + | 3597 | values[n++] = enabled + |
3408 | atomic64_read(&event->child_total_time_enabled); | 3598 | atomic64_read(&event->child_total_time_enabled); |
3409 | } | 3599 | } |
3410 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { | 3600 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { |
3411 | values[n++] = event->total_time_running + | 3601 | values[n++] = running + |
3412 | atomic64_read(&event->child_total_time_running); | 3602 | atomic64_read(&event->child_total_time_running); |
3413 | } | 3603 | } |
3414 | if (read_format & PERF_FORMAT_ID) | 3604 | if (read_format & PERF_FORMAT_ID) |
@@ -3421,7 +3611,8 @@ static void perf_output_read_one(struct perf_output_handle *handle, | |||
3421 | * XXX PERF_FORMAT_GROUP vs inherited events seems difficult. | 3611 | * XXX PERF_FORMAT_GROUP vs inherited events seems difficult. |
3422 | */ | 3612 | */ |
3423 | static void perf_output_read_group(struct perf_output_handle *handle, | 3613 | static void perf_output_read_group(struct perf_output_handle *handle, |
3424 | struct perf_event *event) | 3614 | struct perf_event *event, |
3615 | u64 enabled, u64 running) | ||
3425 | { | 3616 | { |
3426 | struct perf_event *leader = event->group_leader, *sub; | 3617 | struct perf_event *leader = event->group_leader, *sub; |
3427 | u64 read_format = event->attr.read_format; | 3618 | u64 read_format = event->attr.read_format; |
@@ -3431,10 +3622,10 @@ static void perf_output_read_group(struct perf_output_handle *handle, | |||
3431 | values[n++] = 1 + leader->nr_siblings; | 3622 | values[n++] = 1 + leader->nr_siblings; |
3432 | 3623 | ||
3433 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) | 3624 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) |
3434 | values[n++] = leader->total_time_enabled; | 3625 | values[n++] = enabled; |
3435 | 3626 | ||
3436 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) | 3627 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) |
3437 | values[n++] = leader->total_time_running; | 3628 | values[n++] = running; |
3438 | 3629 | ||
3439 | if (leader != event) | 3630 | if (leader != event) |
3440 | leader->pmu->read(leader); | 3631 | leader->pmu->read(leader); |
@@ -3459,13 +3650,35 @@ static void perf_output_read_group(struct perf_output_handle *handle, | |||
3459 | } | 3650 | } |
3460 | } | 3651 | } |
3461 | 3652 | ||
3653 | #define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\ | ||
3654 | PERF_FORMAT_TOTAL_TIME_RUNNING) | ||
3655 | |||
3462 | static void perf_output_read(struct perf_output_handle *handle, | 3656 | static void perf_output_read(struct perf_output_handle *handle, |
3463 | struct perf_event *event) | 3657 | struct perf_event *event) |
3464 | { | 3658 | { |
3659 | u64 enabled = 0, running = 0, now, ctx_time; | ||
3660 | u64 read_format = event->attr.read_format; | ||
3661 | |||
3662 | /* | ||
3663 | * compute total_time_enabled, total_time_running | ||
3664 | * based on snapshot values taken when the event | ||
3665 | * was last scheduled in. | ||
3666 | * | ||
3667 | * we cannot simply called update_context_time() | ||
3668 | * because of locking issue as we are called in | ||
3669 | * NMI context | ||
3670 | */ | ||
3671 | if (read_format & PERF_FORMAT_TOTAL_TIMES) { | ||
3672 | now = perf_clock(); | ||
3673 | ctx_time = event->shadow_ctx_time + now; | ||
3674 | enabled = ctx_time - event->tstamp_enabled; | ||
3675 | running = ctx_time - event->tstamp_running; | ||
3676 | } | ||
3677 | |||
3465 | if (event->attr.read_format & PERF_FORMAT_GROUP) | 3678 | if (event->attr.read_format & PERF_FORMAT_GROUP) |
3466 | perf_output_read_group(handle, event); | 3679 | perf_output_read_group(handle, event, enabled, running); |
3467 | else | 3680 | else |
3468 | perf_output_read_one(handle, event); | 3681 | perf_output_read_one(handle, event, enabled, running); |
3469 | } | 3682 | } |
3470 | 3683 | ||
3471 | void perf_output_sample(struct perf_output_handle *handle, | 3684 | void perf_output_sample(struct perf_output_handle *handle, |
@@ -3545,61 +3758,16 @@ void perf_prepare_sample(struct perf_event_header *header, | |||
3545 | { | 3758 | { |
3546 | u64 sample_type = event->attr.sample_type; | 3759 | u64 sample_type = event->attr.sample_type; |
3547 | 3760 | ||
3548 | data->type = sample_type; | ||
3549 | |||
3550 | header->type = PERF_RECORD_SAMPLE; | 3761 | header->type = PERF_RECORD_SAMPLE; |
3551 | header->size = sizeof(*header); | 3762 | header->size = sizeof(*header) + event->header_size; |
3552 | 3763 | ||
3553 | header->misc = 0; | 3764 | header->misc = 0; |
3554 | header->misc |= perf_misc_flags(regs); | 3765 | header->misc |= perf_misc_flags(regs); |
3555 | 3766 | ||
3556 | if (sample_type & PERF_SAMPLE_IP) { | 3767 | __perf_event_header__init_id(header, data, event); |
3557 | data->ip = perf_instruction_pointer(regs); | ||
3558 | |||
3559 | header->size += sizeof(data->ip); | ||
3560 | } | ||
3561 | |||
3562 | if (sample_type & PERF_SAMPLE_TID) { | ||
3563 | /* namespace issues */ | ||
3564 | data->tid_entry.pid = perf_event_pid(event, current); | ||
3565 | data->tid_entry.tid = perf_event_tid(event, current); | ||
3566 | |||
3567 | header->size += sizeof(data->tid_entry); | ||
3568 | } | ||
3569 | |||
3570 | if (sample_type & PERF_SAMPLE_TIME) { | ||
3571 | data->time = perf_clock(); | ||
3572 | |||
3573 | header->size += sizeof(data->time); | ||
3574 | } | ||
3575 | |||
3576 | if (sample_type & PERF_SAMPLE_ADDR) | ||
3577 | header->size += sizeof(data->addr); | ||
3578 | |||
3579 | if (sample_type & PERF_SAMPLE_ID) { | ||
3580 | data->id = primary_event_id(event); | ||
3581 | |||
3582 | header->size += sizeof(data->id); | ||
3583 | } | ||
3584 | 3768 | ||
3585 | if (sample_type & PERF_SAMPLE_STREAM_ID) { | 3769 | if (sample_type & PERF_SAMPLE_IP) |
3586 | data->stream_id = event->id; | 3770 | data->ip = perf_instruction_pointer(regs); |
3587 | |||
3588 | header->size += sizeof(data->stream_id); | ||
3589 | } | ||
3590 | |||
3591 | if (sample_type & PERF_SAMPLE_CPU) { | ||
3592 | data->cpu_entry.cpu = raw_smp_processor_id(); | ||
3593 | data->cpu_entry.reserved = 0; | ||
3594 | |||
3595 | header->size += sizeof(data->cpu_entry); | ||
3596 | } | ||
3597 | |||
3598 | if (sample_type & PERF_SAMPLE_PERIOD) | ||
3599 | header->size += sizeof(data->period); | ||
3600 | |||
3601 | if (sample_type & PERF_SAMPLE_READ) | ||
3602 | header->size += perf_event_read_size(event); | ||
3603 | 3771 | ||
3604 | if (sample_type & PERF_SAMPLE_CALLCHAIN) { | 3772 | if (sample_type & PERF_SAMPLE_CALLCHAIN) { |
3605 | int size = 1; | 3773 | int size = 1; |
@@ -3664,23 +3832,26 @@ perf_event_read_event(struct perf_event *event, | |||
3664 | struct task_struct *task) | 3832 | struct task_struct *task) |
3665 | { | 3833 | { |
3666 | struct perf_output_handle handle; | 3834 | struct perf_output_handle handle; |
3835 | struct perf_sample_data sample; | ||
3667 | struct perf_read_event read_event = { | 3836 | struct perf_read_event read_event = { |
3668 | .header = { | 3837 | .header = { |
3669 | .type = PERF_RECORD_READ, | 3838 | .type = PERF_RECORD_READ, |
3670 | .misc = 0, | 3839 | .misc = 0, |
3671 | .size = sizeof(read_event) + perf_event_read_size(event), | 3840 | .size = sizeof(read_event) + event->read_size, |
3672 | }, | 3841 | }, |
3673 | .pid = perf_event_pid(event, task), | 3842 | .pid = perf_event_pid(event, task), |
3674 | .tid = perf_event_tid(event, task), | 3843 | .tid = perf_event_tid(event, task), |
3675 | }; | 3844 | }; |
3676 | int ret; | 3845 | int ret; |
3677 | 3846 | ||
3847 | perf_event_header__init_id(&read_event.header, &sample, event); | ||
3678 | ret = perf_output_begin(&handle, event, read_event.header.size, 0, 0); | 3848 | ret = perf_output_begin(&handle, event, read_event.header.size, 0, 0); |
3679 | if (ret) | 3849 | if (ret) |
3680 | return; | 3850 | return; |
3681 | 3851 | ||
3682 | perf_output_put(&handle, read_event); | 3852 | perf_output_put(&handle, read_event); |
3683 | perf_output_read(&handle, event); | 3853 | perf_output_read(&handle, event); |
3854 | perf_event__output_id_sample(event, &handle, &sample); | ||
3684 | 3855 | ||
3685 | perf_output_end(&handle); | 3856 | perf_output_end(&handle); |
3686 | } | 3857 | } |
@@ -3710,14 +3881,16 @@ static void perf_event_task_output(struct perf_event *event, | |||
3710 | struct perf_task_event *task_event) | 3881 | struct perf_task_event *task_event) |
3711 | { | 3882 | { |
3712 | struct perf_output_handle handle; | 3883 | struct perf_output_handle handle; |
3884 | struct perf_sample_data sample; | ||
3713 | struct task_struct *task = task_event->task; | 3885 | struct task_struct *task = task_event->task; |
3714 | int size, ret; | 3886 | int ret, size = task_event->event_id.header.size; |
3715 | 3887 | ||
3716 | size = task_event->event_id.header.size; | 3888 | perf_event_header__init_id(&task_event->event_id.header, &sample, event); |
3717 | ret = perf_output_begin(&handle, event, size, 0, 0); | ||
3718 | 3889 | ||
3890 | ret = perf_output_begin(&handle, event, | ||
3891 | task_event->event_id.header.size, 0, 0); | ||
3719 | if (ret) | 3892 | if (ret) |
3720 | return; | 3893 | goto out; |
3721 | 3894 | ||
3722 | task_event->event_id.pid = perf_event_pid(event, task); | 3895 | task_event->event_id.pid = perf_event_pid(event, task); |
3723 | task_event->event_id.ppid = perf_event_pid(event, current); | 3896 | task_event->event_id.ppid = perf_event_pid(event, current); |
@@ -3727,7 +3900,11 @@ static void perf_event_task_output(struct perf_event *event, | |||
3727 | 3900 | ||
3728 | perf_output_put(&handle, task_event->event_id); | 3901 | perf_output_put(&handle, task_event->event_id); |
3729 | 3902 | ||
3903 | perf_event__output_id_sample(event, &handle, &sample); | ||
3904 | |||
3730 | perf_output_end(&handle); | 3905 | perf_output_end(&handle); |
3906 | out: | ||
3907 | task_event->event_id.header.size = size; | ||
3731 | } | 3908 | } |
3732 | 3909 | ||
3733 | static int perf_event_task_match(struct perf_event *event) | 3910 | static int perf_event_task_match(struct perf_event *event) |
@@ -3735,7 +3912,7 @@ static int perf_event_task_match(struct perf_event *event) | |||
3735 | if (event->state < PERF_EVENT_STATE_INACTIVE) | 3912 | if (event->state < PERF_EVENT_STATE_INACTIVE) |
3736 | return 0; | 3913 | return 0; |
3737 | 3914 | ||
3738 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 3915 | if (!event_filter_match(event)) |
3739 | return 0; | 3916 | return 0; |
3740 | 3917 | ||
3741 | if (event->attr.comm || event->attr.mmap || | 3918 | if (event->attr.comm || event->attr.mmap || |
@@ -3766,6 +3943,8 @@ static void perf_event_task_event(struct perf_task_event *task_event) | |||
3766 | rcu_read_lock(); | 3943 | rcu_read_lock(); |
3767 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 3944 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
3768 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); | 3945 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); |
3946 | if (cpuctx->active_pmu != pmu) | ||
3947 | goto next; | ||
3769 | perf_event_task_ctx(&cpuctx->ctx, task_event); | 3948 | perf_event_task_ctx(&cpuctx->ctx, task_event); |
3770 | 3949 | ||
3771 | ctx = task_event->task_ctx; | 3950 | ctx = task_event->task_ctx; |
@@ -3840,11 +4019,16 @@ static void perf_event_comm_output(struct perf_event *event, | |||
3840 | struct perf_comm_event *comm_event) | 4019 | struct perf_comm_event *comm_event) |
3841 | { | 4020 | { |
3842 | struct perf_output_handle handle; | 4021 | struct perf_output_handle handle; |
4022 | struct perf_sample_data sample; | ||
3843 | int size = comm_event->event_id.header.size; | 4023 | int size = comm_event->event_id.header.size; |
3844 | int ret = perf_output_begin(&handle, event, size, 0, 0); | 4024 | int ret; |
4025 | |||
4026 | perf_event_header__init_id(&comm_event->event_id.header, &sample, event); | ||
4027 | ret = perf_output_begin(&handle, event, | ||
4028 | comm_event->event_id.header.size, 0, 0); | ||
3845 | 4029 | ||
3846 | if (ret) | 4030 | if (ret) |
3847 | return; | 4031 | goto out; |
3848 | 4032 | ||
3849 | comm_event->event_id.pid = perf_event_pid(event, comm_event->task); | 4033 | comm_event->event_id.pid = perf_event_pid(event, comm_event->task); |
3850 | comm_event->event_id.tid = perf_event_tid(event, comm_event->task); | 4034 | comm_event->event_id.tid = perf_event_tid(event, comm_event->task); |
@@ -3852,7 +4036,12 @@ static void perf_event_comm_output(struct perf_event *event, | |||
3852 | perf_output_put(&handle, comm_event->event_id); | 4036 | perf_output_put(&handle, comm_event->event_id); |
3853 | perf_output_copy(&handle, comm_event->comm, | 4037 | perf_output_copy(&handle, comm_event->comm, |
3854 | comm_event->comm_size); | 4038 | comm_event->comm_size); |
4039 | |||
4040 | perf_event__output_id_sample(event, &handle, &sample); | ||
4041 | |||
3855 | perf_output_end(&handle); | 4042 | perf_output_end(&handle); |
4043 | out: | ||
4044 | comm_event->event_id.header.size = size; | ||
3856 | } | 4045 | } |
3857 | 4046 | ||
3858 | static int perf_event_comm_match(struct perf_event *event) | 4047 | static int perf_event_comm_match(struct perf_event *event) |
@@ -3860,7 +4049,7 @@ static int perf_event_comm_match(struct perf_event *event) | |||
3860 | if (event->state < PERF_EVENT_STATE_INACTIVE) | 4049 | if (event->state < PERF_EVENT_STATE_INACTIVE) |
3861 | return 0; | 4050 | return 0; |
3862 | 4051 | ||
3863 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 4052 | if (!event_filter_match(event)) |
3864 | return 0; | 4053 | return 0; |
3865 | 4054 | ||
3866 | if (event->attr.comm) | 4055 | if (event->attr.comm) |
@@ -3897,10 +4086,11 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) | |||
3897 | comm_event->comm_size = size; | 4086 | comm_event->comm_size = size; |
3898 | 4087 | ||
3899 | comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; | 4088 | comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; |
3900 | |||
3901 | rcu_read_lock(); | 4089 | rcu_read_lock(); |
3902 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 4090 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
3903 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); | 4091 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); |
4092 | if (cpuctx->active_pmu != pmu) | ||
4093 | goto next; | ||
3904 | perf_event_comm_ctx(&cpuctx->ctx, comm_event); | 4094 | perf_event_comm_ctx(&cpuctx->ctx, comm_event); |
3905 | 4095 | ||
3906 | ctxn = pmu->task_ctx_nr; | 4096 | ctxn = pmu->task_ctx_nr; |
@@ -3976,11 +4166,15 @@ static void perf_event_mmap_output(struct perf_event *event, | |||
3976 | struct perf_mmap_event *mmap_event) | 4166 | struct perf_mmap_event *mmap_event) |
3977 | { | 4167 | { |
3978 | struct perf_output_handle handle; | 4168 | struct perf_output_handle handle; |
4169 | struct perf_sample_data sample; | ||
3979 | int size = mmap_event->event_id.header.size; | 4170 | int size = mmap_event->event_id.header.size; |
3980 | int ret = perf_output_begin(&handle, event, size, 0, 0); | 4171 | int ret; |
3981 | 4172 | ||
4173 | perf_event_header__init_id(&mmap_event->event_id.header, &sample, event); | ||
4174 | ret = perf_output_begin(&handle, event, | ||
4175 | mmap_event->event_id.header.size, 0, 0); | ||
3982 | if (ret) | 4176 | if (ret) |
3983 | return; | 4177 | goto out; |
3984 | 4178 | ||
3985 | mmap_event->event_id.pid = perf_event_pid(event, current); | 4179 | mmap_event->event_id.pid = perf_event_pid(event, current); |
3986 | mmap_event->event_id.tid = perf_event_tid(event, current); | 4180 | mmap_event->event_id.tid = perf_event_tid(event, current); |
@@ -3988,7 +4182,12 @@ static void perf_event_mmap_output(struct perf_event *event, | |||
3988 | perf_output_put(&handle, mmap_event->event_id); | 4182 | perf_output_put(&handle, mmap_event->event_id); |
3989 | perf_output_copy(&handle, mmap_event->file_name, | 4183 | perf_output_copy(&handle, mmap_event->file_name, |
3990 | mmap_event->file_size); | 4184 | mmap_event->file_size); |
4185 | |||
4186 | perf_event__output_id_sample(event, &handle, &sample); | ||
4187 | |||
3991 | perf_output_end(&handle); | 4188 | perf_output_end(&handle); |
4189 | out: | ||
4190 | mmap_event->event_id.header.size = size; | ||
3992 | } | 4191 | } |
3993 | 4192 | ||
3994 | static int perf_event_mmap_match(struct perf_event *event, | 4193 | static int perf_event_mmap_match(struct perf_event *event, |
@@ -3998,7 +4197,7 @@ static int perf_event_mmap_match(struct perf_event *event, | |||
3998 | if (event->state < PERF_EVENT_STATE_INACTIVE) | 4197 | if (event->state < PERF_EVENT_STATE_INACTIVE) |
3999 | return 0; | 4198 | return 0; |
4000 | 4199 | ||
4001 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 4200 | if (!event_filter_match(event)) |
4002 | return 0; | 4201 | return 0; |
4003 | 4202 | ||
4004 | if ((!executable && event->attr.mmap_data) || | 4203 | if ((!executable && event->attr.mmap_data) || |
@@ -4086,6 +4285,8 @@ got_name: | |||
4086 | rcu_read_lock(); | 4285 | rcu_read_lock(); |
4087 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 4286 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
4088 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); | 4287 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); |
4288 | if (cpuctx->active_pmu != pmu) | ||
4289 | goto next; | ||
4089 | perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, | 4290 | perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, |
4090 | vma->vm_flags & VM_EXEC); | 4291 | vma->vm_flags & VM_EXEC); |
4091 | 4292 | ||
@@ -4141,6 +4342,7 @@ void perf_event_mmap(struct vm_area_struct *vma) | |||
4141 | static void perf_log_throttle(struct perf_event *event, int enable) | 4342 | static void perf_log_throttle(struct perf_event *event, int enable) |
4142 | { | 4343 | { |
4143 | struct perf_output_handle handle; | 4344 | struct perf_output_handle handle; |
4345 | struct perf_sample_data sample; | ||
4144 | int ret; | 4346 | int ret; |
4145 | 4347 | ||
4146 | struct { | 4348 | struct { |
@@ -4162,11 +4364,15 @@ static void perf_log_throttle(struct perf_event *event, int enable) | |||
4162 | if (enable) | 4364 | if (enable) |
4163 | throttle_event.header.type = PERF_RECORD_UNTHROTTLE; | 4365 | throttle_event.header.type = PERF_RECORD_UNTHROTTLE; |
4164 | 4366 | ||
4165 | ret = perf_output_begin(&handle, event, sizeof(throttle_event), 1, 0); | 4367 | perf_event_header__init_id(&throttle_event.header, &sample, event); |
4368 | |||
4369 | ret = perf_output_begin(&handle, event, | ||
4370 | throttle_event.header.size, 1, 0); | ||
4166 | if (ret) | 4371 | if (ret) |
4167 | return; | 4372 | return; |
4168 | 4373 | ||
4169 | perf_output_put(&handle, throttle_event); | 4374 | perf_output_put(&handle, throttle_event); |
4375 | perf_event__output_id_sample(event, &handle, &sample); | ||
4170 | perf_output_end(&handle); | 4376 | perf_output_end(&handle); |
4171 | } | 4377 | } |
4172 | 4378 | ||
@@ -4182,6 +4388,13 @@ static int __perf_event_overflow(struct perf_event *event, int nmi, | |||
4182 | struct hw_perf_event *hwc = &event->hw; | 4388 | struct hw_perf_event *hwc = &event->hw; |
4183 | int ret = 0; | 4389 | int ret = 0; |
4184 | 4390 | ||
4391 | /* | ||
4392 | * Non-sampling counters might still use the PMI to fold short | ||
4393 | * hardware counters, ignore those. | ||
4394 | */ | ||
4395 | if (unlikely(!is_sampling_event(event))) | ||
4396 | return 0; | ||
4397 | |||
4185 | if (!throttle) { | 4398 | if (!throttle) { |
4186 | hwc->interrupts++; | 4399 | hwc->interrupts++; |
4187 | } else { | 4400 | } else { |
@@ -4327,7 +4540,7 @@ static void perf_swevent_event(struct perf_event *event, u64 nr, | |||
4327 | if (!regs) | 4540 | if (!regs) |
4328 | return; | 4541 | return; |
4329 | 4542 | ||
4330 | if (!hwc->sample_period) | 4543 | if (!is_sampling_event(event)) |
4331 | return; | 4544 | return; |
4332 | 4545 | ||
4333 | if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq) | 4546 | if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq) |
@@ -4454,7 +4667,7 @@ int perf_swevent_get_recursion_context(void) | |||
4454 | } | 4667 | } |
4455 | EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); | 4668 | EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); |
4456 | 4669 | ||
4457 | void inline perf_swevent_put_recursion_context(int rctx) | 4670 | inline void perf_swevent_put_recursion_context(int rctx) |
4458 | { | 4671 | { |
4459 | struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); | 4672 | struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); |
4460 | 4673 | ||
@@ -4490,7 +4703,7 @@ static int perf_swevent_add(struct perf_event *event, int flags) | |||
4490 | struct hw_perf_event *hwc = &event->hw; | 4703 | struct hw_perf_event *hwc = &event->hw; |
4491 | struct hlist_head *head; | 4704 | struct hlist_head *head; |
4492 | 4705 | ||
4493 | if (hwc->sample_period) { | 4706 | if (is_sampling_event(event)) { |
4494 | hwc->last_period = hwc->sample_period; | 4707 | hwc->last_period = hwc->sample_period; |
4495 | perf_swevent_set_period(event); | 4708 | perf_swevent_set_period(event); |
4496 | } | 4709 | } |
@@ -4655,7 +4868,7 @@ static int perf_swevent_init(struct perf_event *event) | |||
4655 | break; | 4868 | break; |
4656 | } | 4869 | } |
4657 | 4870 | ||
4658 | if (event_id > PERF_COUNT_SW_MAX) | 4871 | if (event_id >= PERF_COUNT_SW_MAX) |
4659 | return -ENOENT; | 4872 | return -ENOENT; |
4660 | 4873 | ||
4661 | if (!event->parent) { | 4874 | if (!event->parent) { |
@@ -4747,15 +4960,6 @@ static int perf_tp_event_init(struct perf_event *event) | |||
4747 | if (event->attr.type != PERF_TYPE_TRACEPOINT) | 4960 | if (event->attr.type != PERF_TYPE_TRACEPOINT) |
4748 | return -ENOENT; | 4961 | return -ENOENT; |
4749 | 4962 | ||
4750 | /* | ||
4751 | * Raw tracepoint data is a severe data leak, only allow root to | ||
4752 | * have these. | ||
4753 | */ | ||
4754 | if ((event->attr.sample_type & PERF_SAMPLE_RAW) && | ||
4755 | perf_paranoid_tracepoint_raw() && | ||
4756 | !capable(CAP_SYS_ADMIN)) | ||
4757 | return -EPERM; | ||
4758 | |||
4759 | err = perf_trace_init(event); | 4963 | err = perf_trace_init(event); |
4760 | if (err) | 4964 | if (err) |
4761 | return err; | 4965 | return err; |
@@ -4778,7 +4982,7 @@ static struct pmu perf_tracepoint = { | |||
4778 | 4982 | ||
4779 | static inline void perf_tp_register(void) | 4983 | static inline void perf_tp_register(void) |
4780 | { | 4984 | { |
4781 | perf_pmu_register(&perf_tracepoint); | 4985 | perf_pmu_register(&perf_tracepoint, "tracepoint", PERF_TYPE_TRACEPOINT); |
4782 | } | 4986 | } |
4783 | 4987 | ||
4784 | static int perf_event_set_filter(struct perf_event *event, void __user *arg) | 4988 | static int perf_event_set_filter(struct perf_event *event, void __user *arg) |
@@ -4868,31 +5072,33 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) | |||
4868 | static void perf_swevent_start_hrtimer(struct perf_event *event) | 5072 | static void perf_swevent_start_hrtimer(struct perf_event *event) |
4869 | { | 5073 | { |
4870 | struct hw_perf_event *hwc = &event->hw; | 5074 | struct hw_perf_event *hwc = &event->hw; |
5075 | s64 period; | ||
5076 | |||
5077 | if (!is_sampling_event(event)) | ||
5078 | return; | ||
4871 | 5079 | ||
4872 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 5080 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
4873 | hwc->hrtimer.function = perf_swevent_hrtimer; | 5081 | hwc->hrtimer.function = perf_swevent_hrtimer; |
4874 | if (hwc->sample_period) { | ||
4875 | s64 period = local64_read(&hwc->period_left); | ||
4876 | 5082 | ||
4877 | if (period) { | 5083 | period = local64_read(&hwc->period_left); |
4878 | if (period < 0) | 5084 | if (period) { |
4879 | period = 10000; | 5085 | if (period < 0) |
5086 | period = 10000; | ||
4880 | 5087 | ||
4881 | local64_set(&hwc->period_left, 0); | 5088 | local64_set(&hwc->period_left, 0); |
4882 | } else { | 5089 | } else { |
4883 | period = max_t(u64, 10000, hwc->sample_period); | 5090 | period = max_t(u64, 10000, hwc->sample_period); |
4884 | } | 5091 | } |
4885 | __hrtimer_start_range_ns(&hwc->hrtimer, | 5092 | __hrtimer_start_range_ns(&hwc->hrtimer, |
4886 | ns_to_ktime(period), 0, | 5093 | ns_to_ktime(period), 0, |
4887 | HRTIMER_MODE_REL_PINNED, 0); | 5094 | HRTIMER_MODE_REL_PINNED, 0); |
4888 | } | ||
4889 | } | 5095 | } |
4890 | 5096 | ||
4891 | static void perf_swevent_cancel_hrtimer(struct perf_event *event) | 5097 | static void perf_swevent_cancel_hrtimer(struct perf_event *event) |
4892 | { | 5098 | { |
4893 | struct hw_perf_event *hwc = &event->hw; | 5099 | struct hw_perf_event *hwc = &event->hw; |
4894 | 5100 | ||
4895 | if (hwc->sample_period) { | 5101 | if (is_sampling_event(event)) { |
4896 | ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); | 5102 | ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); |
4897 | local64_set(&hwc->period_left, ktime_to_ns(remaining)); | 5103 | local64_set(&hwc->period_left, ktime_to_ns(remaining)); |
4898 | 5104 | ||
@@ -5087,25 +5293,96 @@ static void *find_pmu_context(int ctxn) | |||
5087 | return NULL; | 5293 | return NULL; |
5088 | } | 5294 | } |
5089 | 5295 | ||
5090 | static void free_pmu_context(void * __percpu cpu_context) | 5296 | static void update_pmu_context(struct pmu *pmu, struct pmu *old_pmu) |
5091 | { | 5297 | { |
5092 | struct pmu *pmu; | 5298 | int cpu; |
5299 | |||
5300 | for_each_possible_cpu(cpu) { | ||
5301 | struct perf_cpu_context *cpuctx; | ||
5302 | |||
5303 | cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); | ||
5304 | |||
5305 | if (cpuctx->active_pmu == old_pmu) | ||
5306 | cpuctx->active_pmu = pmu; | ||
5307 | } | ||
5308 | } | ||
5309 | |||
5310 | static void free_pmu_context(struct pmu *pmu) | ||
5311 | { | ||
5312 | struct pmu *i; | ||
5093 | 5313 | ||
5094 | mutex_lock(&pmus_lock); | 5314 | mutex_lock(&pmus_lock); |
5095 | /* | 5315 | /* |
5096 | * Like a real lame refcount. | 5316 | * Like a real lame refcount. |
5097 | */ | 5317 | */ |
5098 | list_for_each_entry(pmu, &pmus, entry) { | 5318 | list_for_each_entry(i, &pmus, entry) { |
5099 | if (pmu->pmu_cpu_context == cpu_context) | 5319 | if (i->pmu_cpu_context == pmu->pmu_cpu_context) { |
5320 | update_pmu_context(i, pmu); | ||
5100 | goto out; | 5321 | goto out; |
5322 | } | ||
5101 | } | 5323 | } |
5102 | 5324 | ||
5103 | free_percpu(cpu_context); | 5325 | free_percpu(pmu->pmu_cpu_context); |
5104 | out: | 5326 | out: |
5105 | mutex_unlock(&pmus_lock); | 5327 | mutex_unlock(&pmus_lock); |
5106 | } | 5328 | } |
5329 | static struct idr pmu_idr; | ||
5107 | 5330 | ||
5108 | int perf_pmu_register(struct pmu *pmu) | 5331 | static ssize_t |
5332 | type_show(struct device *dev, struct device_attribute *attr, char *page) | ||
5333 | { | ||
5334 | struct pmu *pmu = dev_get_drvdata(dev); | ||
5335 | |||
5336 | return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->type); | ||
5337 | } | ||
5338 | |||
5339 | static struct device_attribute pmu_dev_attrs[] = { | ||
5340 | __ATTR_RO(type), | ||
5341 | __ATTR_NULL, | ||
5342 | }; | ||
5343 | |||
5344 | static int pmu_bus_running; | ||
5345 | static struct bus_type pmu_bus = { | ||
5346 | .name = "event_source", | ||
5347 | .dev_attrs = pmu_dev_attrs, | ||
5348 | }; | ||
5349 | |||
5350 | static void pmu_dev_release(struct device *dev) | ||
5351 | { | ||
5352 | kfree(dev); | ||
5353 | } | ||
5354 | |||
5355 | static int pmu_dev_alloc(struct pmu *pmu) | ||
5356 | { | ||
5357 | int ret = -ENOMEM; | ||
5358 | |||
5359 | pmu->dev = kzalloc(sizeof(struct device), GFP_KERNEL); | ||
5360 | if (!pmu->dev) | ||
5361 | goto out; | ||
5362 | |||
5363 | device_initialize(pmu->dev); | ||
5364 | ret = dev_set_name(pmu->dev, "%s", pmu->name); | ||
5365 | if (ret) | ||
5366 | goto free_dev; | ||
5367 | |||
5368 | dev_set_drvdata(pmu->dev, pmu); | ||
5369 | pmu->dev->bus = &pmu_bus; | ||
5370 | pmu->dev->release = pmu_dev_release; | ||
5371 | ret = device_add(pmu->dev); | ||
5372 | if (ret) | ||
5373 | goto free_dev; | ||
5374 | |||
5375 | out: | ||
5376 | return ret; | ||
5377 | |||
5378 | free_dev: | ||
5379 | put_device(pmu->dev); | ||
5380 | goto out; | ||
5381 | } | ||
5382 | |||
5383 | static struct lock_class_key cpuctx_mutex; | ||
5384 | |||
5385 | int perf_pmu_register(struct pmu *pmu, char *name, int type) | ||
5109 | { | 5386 | { |
5110 | int cpu, ret; | 5387 | int cpu, ret; |
5111 | 5388 | ||
@@ -5115,23 +5392,50 @@ int perf_pmu_register(struct pmu *pmu) | |||
5115 | if (!pmu->pmu_disable_count) | 5392 | if (!pmu->pmu_disable_count) |
5116 | goto unlock; | 5393 | goto unlock; |
5117 | 5394 | ||
5395 | pmu->type = -1; | ||
5396 | if (!name) | ||
5397 | goto skip_type; | ||
5398 | pmu->name = name; | ||
5399 | |||
5400 | if (type < 0) { | ||
5401 | int err = idr_pre_get(&pmu_idr, GFP_KERNEL); | ||
5402 | if (!err) | ||
5403 | goto free_pdc; | ||
5404 | |||
5405 | err = idr_get_new_above(&pmu_idr, pmu, PERF_TYPE_MAX, &type); | ||
5406 | if (err) { | ||
5407 | ret = err; | ||
5408 | goto free_pdc; | ||
5409 | } | ||
5410 | } | ||
5411 | pmu->type = type; | ||
5412 | |||
5413 | if (pmu_bus_running) { | ||
5414 | ret = pmu_dev_alloc(pmu); | ||
5415 | if (ret) | ||
5416 | goto free_idr; | ||
5417 | } | ||
5418 | |||
5419 | skip_type: | ||
5118 | pmu->pmu_cpu_context = find_pmu_context(pmu->task_ctx_nr); | 5420 | pmu->pmu_cpu_context = find_pmu_context(pmu->task_ctx_nr); |
5119 | if (pmu->pmu_cpu_context) | 5421 | if (pmu->pmu_cpu_context) |
5120 | goto got_cpu_context; | 5422 | goto got_cpu_context; |
5121 | 5423 | ||
5122 | pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context); | 5424 | pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context); |
5123 | if (!pmu->pmu_cpu_context) | 5425 | if (!pmu->pmu_cpu_context) |
5124 | goto free_pdc; | 5426 | goto free_dev; |
5125 | 5427 | ||
5126 | for_each_possible_cpu(cpu) { | 5428 | for_each_possible_cpu(cpu) { |
5127 | struct perf_cpu_context *cpuctx; | 5429 | struct perf_cpu_context *cpuctx; |
5128 | 5430 | ||
5129 | cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); | 5431 | cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); |
5130 | __perf_event_init_context(&cpuctx->ctx); | 5432 | __perf_event_init_context(&cpuctx->ctx); |
5433 | lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex); | ||
5131 | cpuctx->ctx.type = cpu_context; | 5434 | cpuctx->ctx.type = cpu_context; |
5132 | cpuctx->ctx.pmu = pmu; | 5435 | cpuctx->ctx.pmu = pmu; |
5133 | cpuctx->jiffies_interval = 1; | 5436 | cpuctx->jiffies_interval = 1; |
5134 | INIT_LIST_HEAD(&cpuctx->rotation_list); | 5437 | INIT_LIST_HEAD(&cpuctx->rotation_list); |
5438 | cpuctx->active_pmu = pmu; | ||
5135 | } | 5439 | } |
5136 | 5440 | ||
5137 | got_cpu_context: | 5441 | got_cpu_context: |
@@ -5164,6 +5468,14 @@ unlock: | |||
5164 | 5468 | ||
5165 | return ret; | 5469 | return ret; |
5166 | 5470 | ||
5471 | free_dev: | ||
5472 | device_del(pmu->dev); | ||
5473 | put_device(pmu->dev); | ||
5474 | |||
5475 | free_idr: | ||
5476 | if (pmu->type >= PERF_TYPE_MAX) | ||
5477 | idr_remove(&pmu_idr, pmu->type); | ||
5478 | |||
5167 | free_pdc: | 5479 | free_pdc: |
5168 | free_percpu(pmu->pmu_disable_count); | 5480 | free_percpu(pmu->pmu_disable_count); |
5169 | goto unlock; | 5481 | goto unlock; |
@@ -5183,7 +5495,11 @@ void perf_pmu_unregister(struct pmu *pmu) | |||
5183 | synchronize_rcu(); | 5495 | synchronize_rcu(); |
5184 | 5496 | ||
5185 | free_percpu(pmu->pmu_disable_count); | 5497 | free_percpu(pmu->pmu_disable_count); |
5186 | free_pmu_context(pmu->pmu_cpu_context); | 5498 | if (pmu->type >= PERF_TYPE_MAX) |
5499 | idr_remove(&pmu_idr, pmu->type); | ||
5500 | device_del(pmu->dev); | ||
5501 | put_device(pmu->dev); | ||
5502 | free_pmu_context(pmu); | ||
5187 | } | 5503 | } |
5188 | 5504 | ||
5189 | struct pmu *perf_init_event(struct perf_event *event) | 5505 | struct pmu *perf_init_event(struct perf_event *event) |
@@ -5192,6 +5508,13 @@ struct pmu *perf_init_event(struct perf_event *event) | |||
5192 | int idx; | 5508 | int idx; |
5193 | 5509 | ||
5194 | idx = srcu_read_lock(&pmus_srcu); | 5510 | idx = srcu_read_lock(&pmus_srcu); |
5511 | |||
5512 | rcu_read_lock(); | ||
5513 | pmu = idr_find(&pmu_idr, event->attr.type); | ||
5514 | rcu_read_unlock(); | ||
5515 | if (pmu) | ||
5516 | goto unlock; | ||
5517 | |||
5195 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 5518 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
5196 | int ret = pmu->event_init(event); | 5519 | int ret = pmu->event_init(event); |
5197 | if (!ret) | 5520 | if (!ret) |
@@ -5224,6 +5547,11 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, | |||
5224 | struct hw_perf_event *hwc; | 5547 | struct hw_perf_event *hwc; |
5225 | long err; | 5548 | long err; |
5226 | 5549 | ||
5550 | if ((unsigned)cpu >= nr_cpu_ids) { | ||
5551 | if (!task || cpu != -1) | ||
5552 | return ERR_PTR(-EINVAL); | ||
5553 | } | ||
5554 | |||
5227 | event = kzalloc(sizeof(*event), GFP_KERNEL); | 5555 | event = kzalloc(sizeof(*event), GFP_KERNEL); |
5228 | if (!event) | 5556 | if (!event) |
5229 | return ERR_PTR(-ENOMEM); | 5557 | return ERR_PTR(-ENOMEM); |
@@ -5272,7 +5600,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, | |||
5272 | 5600 | ||
5273 | if (!overflow_handler && parent_event) | 5601 | if (!overflow_handler && parent_event) |
5274 | overflow_handler = parent_event->overflow_handler; | 5602 | overflow_handler = parent_event->overflow_handler; |
5275 | 5603 | ||
5276 | event->overflow_handler = overflow_handler; | 5604 | event->overflow_handler = overflow_handler; |
5277 | 5605 | ||
5278 | if (attr->disabled) | 5606 | if (attr->disabled) |
@@ -5651,12 +5979,18 @@ SYSCALL_DEFINE5(perf_event_open, | |||
5651 | mutex_unlock(&ctx->mutex); | 5979 | mutex_unlock(&ctx->mutex); |
5652 | 5980 | ||
5653 | event->owner = current; | 5981 | event->owner = current; |
5654 | get_task_struct(current); | 5982 | |
5655 | mutex_lock(¤t->perf_event_mutex); | 5983 | mutex_lock(¤t->perf_event_mutex); |
5656 | list_add_tail(&event->owner_entry, ¤t->perf_event_list); | 5984 | list_add_tail(&event->owner_entry, ¤t->perf_event_list); |
5657 | mutex_unlock(¤t->perf_event_mutex); | 5985 | mutex_unlock(¤t->perf_event_mutex); |
5658 | 5986 | ||
5659 | /* | 5987 | /* |
5988 | * Precalculate sample_data sizes | ||
5989 | */ | ||
5990 | perf_event__header_size(event); | ||
5991 | perf_event__id_header_size(event); | ||
5992 | |||
5993 | /* | ||
5660 | * Drop the reference on the group_event after placing the | 5994 | * Drop the reference on the group_event after placing the |
5661 | * new event on the sibling_list. This ensures destruction | 5995 | * new event on the sibling_list. This ensures destruction |
5662 | * of the group leader will find the pointer to itself in | 5996 | * of the group leader will find the pointer to itself in |
@@ -5719,12 +6053,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | |||
5719 | ++ctx->generation; | 6053 | ++ctx->generation; |
5720 | mutex_unlock(&ctx->mutex); | 6054 | mutex_unlock(&ctx->mutex); |
5721 | 6055 | ||
5722 | event->owner = current; | ||
5723 | get_task_struct(current); | ||
5724 | mutex_lock(¤t->perf_event_mutex); | ||
5725 | list_add_tail(&event->owner_entry, ¤t->perf_event_list); | ||
5726 | mutex_unlock(¤t->perf_event_mutex); | ||
5727 | |||
5728 | return event; | 6056 | return event; |
5729 | 6057 | ||
5730 | err_free: | 6058 | err_free: |
@@ -5808,7 +6136,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn) | |||
5808 | * scheduled, so we are now safe from rescheduling changing | 6136 | * scheduled, so we are now safe from rescheduling changing |
5809 | * our context. | 6137 | * our context. |
5810 | */ | 6138 | */ |
5811 | child_ctx = child->perf_event_ctxp[ctxn]; | 6139 | child_ctx = rcu_dereference_raw(child->perf_event_ctxp[ctxn]); |
5812 | task_ctx_sched_out(child_ctx, EVENT_ALL); | 6140 | task_ctx_sched_out(child_ctx, EVENT_ALL); |
5813 | 6141 | ||
5814 | /* | 6142 | /* |
@@ -5875,8 +6203,24 @@ again: | |||
5875 | */ | 6203 | */ |
5876 | void perf_event_exit_task(struct task_struct *child) | 6204 | void perf_event_exit_task(struct task_struct *child) |
5877 | { | 6205 | { |
6206 | struct perf_event *event, *tmp; | ||
5878 | int ctxn; | 6207 | int ctxn; |
5879 | 6208 | ||
6209 | mutex_lock(&child->perf_event_mutex); | ||
6210 | list_for_each_entry_safe(event, tmp, &child->perf_event_list, | ||
6211 | owner_entry) { | ||
6212 | list_del_init(&event->owner_entry); | ||
6213 | |||
6214 | /* | ||
6215 | * Ensure the list deletion is visible before we clear | ||
6216 | * the owner, closes a race against perf_release() where | ||
6217 | * we need to serialize on the owner->perf_event_mutex. | ||
6218 | */ | ||
6219 | smp_wmb(); | ||
6220 | event->owner = NULL; | ||
6221 | } | ||
6222 | mutex_unlock(&child->perf_event_mutex); | ||
6223 | |||
5880 | for_each_task_context_nr(ctxn) | 6224 | for_each_task_context_nr(ctxn) |
5881 | perf_event_exit_task_context(child, ctxn); | 6225 | perf_event_exit_task_context(child, ctxn); |
5882 | } | 6226 | } |
@@ -5999,6 +6343,12 @@ inherit_event(struct perf_event *parent_event, | |||
5999 | child_event->overflow_handler = parent_event->overflow_handler; | 6343 | child_event->overflow_handler = parent_event->overflow_handler; |
6000 | 6344 | ||
6001 | /* | 6345 | /* |
6346 | * Precalculate sample_data sizes | ||
6347 | */ | ||
6348 | perf_event__header_size(child_event); | ||
6349 | perf_event__id_header_size(child_event); | ||
6350 | |||
6351 | /* | ||
6002 | * Link it up in the child's context: | 6352 | * Link it up in the child's context: |
6003 | */ | 6353 | */ |
6004 | raw_spin_lock_irqsave(&child_ctx->lock, flags); | 6354 | raw_spin_lock_irqsave(&child_ctx->lock, flags); |
@@ -6096,13 +6446,9 @@ int perf_event_init_context(struct task_struct *child, int ctxn) | |||
6096 | struct perf_event *event; | 6446 | struct perf_event *event; |
6097 | struct task_struct *parent = current; | 6447 | struct task_struct *parent = current; |
6098 | int inherited_all = 1; | 6448 | int inherited_all = 1; |
6449 | unsigned long flags; | ||
6099 | int ret = 0; | 6450 | int ret = 0; |
6100 | 6451 | ||
6101 | child->perf_event_ctxp[ctxn] = NULL; | ||
6102 | |||
6103 | mutex_init(&child->perf_event_mutex); | ||
6104 | INIT_LIST_HEAD(&child->perf_event_list); | ||
6105 | |||
6106 | if (likely(!parent->perf_event_ctxp[ctxn])) | 6452 | if (likely(!parent->perf_event_ctxp[ctxn])) |
6107 | return 0; | 6453 | return 0; |
6108 | 6454 | ||
@@ -6136,6 +6482,15 @@ int perf_event_init_context(struct task_struct *child, int ctxn) | |||
6136 | break; | 6482 | break; |
6137 | } | 6483 | } |
6138 | 6484 | ||
6485 | /* | ||
6486 | * We can't hold ctx->lock when iterating the ->flexible_group list due | ||
6487 | * to allocations, but we need to prevent rotation because | ||
6488 | * rotate_ctx() will change the list from interrupt context. | ||
6489 | */ | ||
6490 | raw_spin_lock_irqsave(&parent_ctx->lock, flags); | ||
6491 | parent_ctx->rotate_disable = 1; | ||
6492 | raw_spin_unlock_irqrestore(&parent_ctx->lock, flags); | ||
6493 | |||
6139 | list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { | 6494 | list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { |
6140 | ret = inherit_task_group(event, parent, parent_ctx, | 6495 | ret = inherit_task_group(event, parent, parent_ctx, |
6141 | child, ctxn, &inherited_all); | 6496 | child, ctxn, &inherited_all); |
@@ -6143,18 +6498,20 @@ int perf_event_init_context(struct task_struct *child, int ctxn) | |||
6143 | break; | 6498 | break; |
6144 | } | 6499 | } |
6145 | 6500 | ||
6501 | raw_spin_lock_irqsave(&parent_ctx->lock, flags); | ||
6502 | parent_ctx->rotate_disable = 0; | ||
6503 | |||
6146 | child_ctx = child->perf_event_ctxp[ctxn]; | 6504 | child_ctx = child->perf_event_ctxp[ctxn]; |
6147 | 6505 | ||
6148 | if (child_ctx && inherited_all) { | 6506 | if (child_ctx && inherited_all) { |
6149 | /* | 6507 | /* |
6150 | * Mark the child context as a clone of the parent | 6508 | * Mark the child context as a clone of the parent |
6151 | * context, or of whatever the parent is a clone of. | 6509 | * context, or of whatever the parent is a clone of. |
6152 | * Note that if the parent is a clone, it could get | 6510 | * |
6153 | * uncloned at any point, but that doesn't matter | 6511 | * Note that if the parent is a clone, the holding of |
6154 | * because the list of events and the generation | 6512 | * parent_ctx->lock avoids it from being uncloned. |
6155 | * count can't have changed since we took the mutex. | ||
6156 | */ | 6513 | */ |
6157 | cloned_ctx = rcu_dereference(parent_ctx->parent_ctx); | 6514 | cloned_ctx = parent_ctx->parent_ctx; |
6158 | if (cloned_ctx) { | 6515 | if (cloned_ctx) { |
6159 | child_ctx->parent_ctx = cloned_ctx; | 6516 | child_ctx->parent_ctx = cloned_ctx; |
6160 | child_ctx->parent_gen = parent_ctx->parent_gen; | 6517 | child_ctx->parent_gen = parent_ctx->parent_gen; |
@@ -6165,6 +6522,7 @@ int perf_event_init_context(struct task_struct *child, int ctxn) | |||
6165 | get_ctx(child_ctx->parent_ctx); | 6522 | get_ctx(child_ctx->parent_ctx); |
6166 | } | 6523 | } |
6167 | 6524 | ||
6525 | raw_spin_unlock_irqrestore(&parent_ctx->lock, flags); | ||
6168 | mutex_unlock(&parent_ctx->mutex); | 6526 | mutex_unlock(&parent_ctx->mutex); |
6169 | 6527 | ||
6170 | perf_unpin_context(parent_ctx); | 6528 | perf_unpin_context(parent_ctx); |
@@ -6179,6 +6537,10 @@ int perf_event_init_task(struct task_struct *child) | |||
6179 | { | 6537 | { |
6180 | int ctxn, ret; | 6538 | int ctxn, ret; |
6181 | 6539 | ||
6540 | memset(child->perf_event_ctxp, 0, sizeof(child->perf_event_ctxp)); | ||
6541 | mutex_init(&child->perf_event_mutex); | ||
6542 | INIT_LIST_HEAD(&child->perf_event_list); | ||
6543 | |||
6182 | for_each_task_context_nr(ctxn) { | 6544 | for_each_task_context_nr(ctxn) { |
6183 | ret = perf_event_init_context(child, ctxn); | 6545 | ret = perf_event_init_context(child, ctxn); |
6184 | if (ret) | 6546 | if (ret) |
@@ -6215,7 +6577,7 @@ static void __cpuinit perf_event_init_cpu(int cpu) | |||
6215 | mutex_unlock(&swhash->hlist_mutex); | 6577 | mutex_unlock(&swhash->hlist_mutex); |
6216 | } | 6578 | } |
6217 | 6579 | ||
6218 | #ifdef CONFIG_HOTPLUG_CPU | 6580 | #if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC |
6219 | static void perf_pmu_rotate_stop(struct pmu *pmu) | 6581 | static void perf_pmu_rotate_stop(struct pmu *pmu) |
6220 | { | 6582 | { |
6221 | struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); | 6583 | struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); |
@@ -6269,6 +6631,26 @@ static void perf_event_exit_cpu(int cpu) | |||
6269 | static inline void perf_event_exit_cpu(int cpu) { } | 6631 | static inline void perf_event_exit_cpu(int cpu) { } |
6270 | #endif | 6632 | #endif |
6271 | 6633 | ||
6634 | static int | ||
6635 | perf_reboot(struct notifier_block *notifier, unsigned long val, void *v) | ||
6636 | { | ||
6637 | int cpu; | ||
6638 | |||
6639 | for_each_online_cpu(cpu) | ||
6640 | perf_event_exit_cpu(cpu); | ||
6641 | |||
6642 | return NOTIFY_OK; | ||
6643 | } | ||
6644 | |||
6645 | /* | ||
6646 | * Run the perf reboot notifier at the very last possible moment so that | ||
6647 | * the generic watchdog code runs as long as possible. | ||
6648 | */ | ||
6649 | static struct notifier_block perf_reboot_notifier = { | ||
6650 | .notifier_call = perf_reboot, | ||
6651 | .priority = INT_MIN, | ||
6652 | }; | ||
6653 | |||
6272 | static int __cpuinit | 6654 | static int __cpuinit |
6273 | perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) | 6655 | perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) |
6274 | { | 6656 | { |
@@ -6295,11 +6677,47 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) | |||
6295 | 6677 | ||
6296 | void __init perf_event_init(void) | 6678 | void __init perf_event_init(void) |
6297 | { | 6679 | { |
6680 | int ret; | ||
6681 | |||
6682 | idr_init(&pmu_idr); | ||
6683 | |||
6298 | perf_event_init_all_cpus(); | 6684 | perf_event_init_all_cpus(); |
6299 | init_srcu_struct(&pmus_srcu); | 6685 | init_srcu_struct(&pmus_srcu); |
6300 | perf_pmu_register(&perf_swevent); | 6686 | perf_pmu_register(&perf_swevent, "software", PERF_TYPE_SOFTWARE); |
6301 | perf_pmu_register(&perf_cpu_clock); | 6687 | perf_pmu_register(&perf_cpu_clock, NULL, -1); |
6302 | perf_pmu_register(&perf_task_clock); | 6688 | perf_pmu_register(&perf_task_clock, NULL, -1); |
6303 | perf_tp_register(); | 6689 | perf_tp_register(); |
6304 | perf_cpu_notifier(perf_cpu_notify); | 6690 | perf_cpu_notifier(perf_cpu_notify); |
6691 | register_reboot_notifier(&perf_reboot_notifier); | ||
6692 | |||
6693 | ret = init_hw_breakpoint(); | ||
6694 | WARN(ret, "hw_breakpoint initialization failed with: %d", ret); | ||
6695 | } | ||
6696 | |||
6697 | static int __init perf_event_sysfs_init(void) | ||
6698 | { | ||
6699 | struct pmu *pmu; | ||
6700 | int ret; | ||
6701 | |||
6702 | mutex_lock(&pmus_lock); | ||
6703 | |||
6704 | ret = bus_register(&pmu_bus); | ||
6705 | if (ret) | ||
6706 | goto unlock; | ||
6707 | |||
6708 | list_for_each_entry(pmu, &pmus, entry) { | ||
6709 | if (!pmu->name || pmu->type < 0) | ||
6710 | continue; | ||
6711 | |||
6712 | ret = pmu_dev_alloc(pmu); | ||
6713 | WARN(ret, "Failed to register pmu: %s, reason %d\n", pmu->name, ret); | ||
6714 | } | ||
6715 | pmu_bus_running = 1; | ||
6716 | ret = 0; | ||
6717 | |||
6718 | unlock: | ||
6719 | mutex_unlock(&pmus_lock); | ||
6720 | |||
6721 | return ret; | ||
6305 | } | 6722 | } |
6723 | device_initcall(perf_event_sysfs_init); | ||