diff options
Diffstat (limited to 'kernel/perf_event.c')
| -rw-r--r-- | kernel/perf_event.c | 736 |
1 files changed, 522 insertions, 214 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 2870feee81dd..999835b6112b 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include <linux/mm.h> | 13 | #include <linux/mm.h> |
| 14 | #include <linux/cpu.h> | 14 | #include <linux/cpu.h> |
| 15 | #include <linux/smp.h> | 15 | #include <linux/smp.h> |
| 16 | #include <linux/idr.h> | ||
| 16 | #include <linux/file.h> | 17 | #include <linux/file.h> |
| 17 | #include <linux/poll.h> | 18 | #include <linux/poll.h> |
| 18 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
| @@ -21,7 +22,9 @@ | |||
| 21 | #include <linux/dcache.h> | 22 | #include <linux/dcache.h> |
| 22 | #include <linux/percpu.h> | 23 | #include <linux/percpu.h> |
| 23 | #include <linux/ptrace.h> | 24 | #include <linux/ptrace.h> |
| 25 | #include <linux/reboot.h> | ||
| 24 | #include <linux/vmstat.h> | 26 | #include <linux/vmstat.h> |
| 27 | #include <linux/device.h> | ||
| 25 | #include <linux/vmalloc.h> | 28 | #include <linux/vmalloc.h> |
| 26 | #include <linux/hardirq.h> | 29 | #include <linux/hardirq.h> |
| 27 | #include <linux/rculist.h> | 30 | #include <linux/rculist.h> |
| @@ -35,6 +38,12 @@ | |||
| 35 | 38 | ||
| 36 | #include <asm/irq_regs.h> | 39 | #include <asm/irq_regs.h> |
| 37 | 40 | ||
| 41 | enum event_type_t { | ||
| 42 | EVENT_FLEXIBLE = 0x1, | ||
| 43 | EVENT_PINNED = 0x2, | ||
| 44 | EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED, | ||
| 45 | }; | ||
| 46 | |||
| 38 | atomic_t perf_task_events __read_mostly; | 47 | atomic_t perf_task_events __read_mostly; |
| 39 | static atomic_t nr_mmap_events __read_mostly; | 48 | static atomic_t nr_mmap_events __read_mostly; |
| 40 | static atomic_t nr_comm_events __read_mostly; | 49 | static atomic_t nr_comm_events __read_mostly; |
| @@ -62,6 +71,12 @@ int sysctl_perf_event_sample_rate __read_mostly = 100000; | |||
| 62 | 71 | ||
| 63 | static atomic64_t perf_event_id; | 72 | static atomic64_t perf_event_id; |
| 64 | 73 | ||
| 74 | static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx, | ||
| 75 | enum event_type_t event_type); | ||
| 76 | |||
| 77 | static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, | ||
| 78 | enum event_type_t event_type); | ||
| 79 | |||
| 65 | void __weak perf_event_print_debug(void) { } | 80 | void __weak perf_event_print_debug(void) { } |
| 66 | 81 | ||
| 67 | extern __weak const char *perf_pmu_name(void) | 82 | extern __weak const char *perf_pmu_name(void) |
| @@ -69,6 +84,11 @@ extern __weak const char *perf_pmu_name(void) | |||
| 69 | return "pmu"; | 84 | return "pmu"; |
| 70 | } | 85 | } |
| 71 | 86 | ||
| 87 | static inline u64 perf_clock(void) | ||
| 88 | { | ||
| 89 | return local_clock(); | ||
| 90 | } | ||
| 91 | |||
| 72 | void perf_pmu_disable(struct pmu *pmu) | 92 | void perf_pmu_disable(struct pmu *pmu) |
| 73 | { | 93 | { |
| 74 | int *count = this_cpu_ptr(pmu->pmu_disable_count); | 94 | int *count = this_cpu_ptr(pmu->pmu_disable_count); |
| @@ -133,6 +153,28 @@ static void unclone_ctx(struct perf_event_context *ctx) | |||
| 133 | } | 153 | } |
| 134 | } | 154 | } |
| 135 | 155 | ||
| 156 | static u32 perf_event_pid(struct perf_event *event, struct task_struct *p) | ||
| 157 | { | ||
| 158 | /* | ||
| 159 | * only top level events have the pid namespace they were created in | ||
| 160 | */ | ||
| 161 | if (event->parent) | ||
| 162 | event = event->parent; | ||
| 163 | |||
| 164 | return task_tgid_nr_ns(p, event->ns); | ||
| 165 | } | ||
| 166 | |||
| 167 | static u32 perf_event_tid(struct perf_event *event, struct task_struct *p) | ||
| 168 | { | ||
| 169 | /* | ||
| 170 | * only top level events have the pid namespace they were created in | ||
| 171 | */ | ||
| 172 | if (event->parent) | ||
| 173 | event = event->parent; | ||
| 174 | |||
| 175 | return task_pid_nr_ns(p, event->ns); | ||
| 176 | } | ||
| 177 | |||
| 136 | /* | 178 | /* |
| 137 | * If we inherit events we want to return the parent event id | 179 | * If we inherit events we want to return the parent event id |
| 138 | * to userspace. | 180 | * to userspace. |
| @@ -215,11 +257,6 @@ static void perf_unpin_context(struct perf_event_context *ctx) | |||
| 215 | put_ctx(ctx); | 257 | put_ctx(ctx); |
| 216 | } | 258 | } |
| 217 | 259 | ||
| 218 | static inline u64 perf_clock(void) | ||
| 219 | { | ||
| 220 | return local_clock(); | ||
| 221 | } | ||
| 222 | |||
| 223 | /* | 260 | /* |
| 224 | * Update the record of the current time in a context. | 261 | * Update the record of the current time in a context. |
| 225 | */ | 262 | */ |
| @@ -231,6 +268,12 @@ static void update_context_time(struct perf_event_context *ctx) | |||
| 231 | ctx->timestamp = now; | 268 | ctx->timestamp = now; |
| 232 | } | 269 | } |
| 233 | 270 | ||
| 271 | static u64 perf_event_time(struct perf_event *event) | ||
| 272 | { | ||
| 273 | struct perf_event_context *ctx = event->ctx; | ||
| 274 | return ctx ? ctx->time : 0; | ||
| 275 | } | ||
| 276 | |||
| 234 | /* | 277 | /* |
| 235 | * Update the total_time_enabled and total_time_running fields for a event. | 278 | * Update the total_time_enabled and total_time_running fields for a event. |
| 236 | */ | 279 | */ |
| @@ -244,7 +287,7 @@ static void update_event_times(struct perf_event *event) | |||
| 244 | return; | 287 | return; |
| 245 | 288 | ||
| 246 | if (ctx->is_active) | 289 | if (ctx->is_active) |
| 247 | run_end = ctx->time; | 290 | run_end = perf_event_time(event); |
| 248 | else | 291 | else |
| 249 | run_end = event->tstamp_stopped; | 292 | run_end = event->tstamp_stopped; |
| 250 | 293 | ||
| @@ -253,7 +296,7 @@ static void update_event_times(struct perf_event *event) | |||
| 253 | if (event->state == PERF_EVENT_STATE_INACTIVE) | 296 | if (event->state == PERF_EVENT_STATE_INACTIVE) |
| 254 | run_end = event->tstamp_stopped; | 297 | run_end = event->tstamp_stopped; |
| 255 | else | 298 | else |
| 256 | run_end = ctx->time; | 299 | run_end = perf_event_time(event); |
| 257 | 300 | ||
| 258 | event->total_time_running = run_end - event->tstamp_running; | 301 | event->total_time_running = run_end - event->tstamp_running; |
| 259 | } | 302 | } |
| @@ -312,9 +355,84 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) | |||
| 312 | ctx->nr_stat++; | 355 | ctx->nr_stat++; |
| 313 | } | 356 | } |
| 314 | 357 | ||
| 358 | /* | ||
| 359 | * Called at perf_event creation and when events are attached/detached from a | ||
| 360 | * group. | ||
| 361 | */ | ||
| 362 | static void perf_event__read_size(struct perf_event *event) | ||
| 363 | { | ||
| 364 | int entry = sizeof(u64); /* value */ | ||
| 365 | int size = 0; | ||
| 366 | int nr = 1; | ||
| 367 | |||
| 368 | if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) | ||
| 369 | size += sizeof(u64); | ||
| 370 | |||
| 371 | if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) | ||
| 372 | size += sizeof(u64); | ||
| 373 | |||
| 374 | if (event->attr.read_format & PERF_FORMAT_ID) | ||
| 375 | entry += sizeof(u64); | ||
| 376 | |||
| 377 | if (event->attr.read_format & PERF_FORMAT_GROUP) { | ||
| 378 | nr += event->group_leader->nr_siblings; | ||
| 379 | size += sizeof(u64); | ||
| 380 | } | ||
| 381 | |||
| 382 | size += entry * nr; | ||
| 383 | event->read_size = size; | ||
| 384 | } | ||
| 385 | |||
| 386 | static void perf_event__header_size(struct perf_event *event) | ||
| 387 | { | ||
| 388 | struct perf_sample_data *data; | ||
| 389 | u64 sample_type = event->attr.sample_type; | ||
| 390 | u16 size = 0; | ||
| 391 | |||
| 392 | perf_event__read_size(event); | ||
| 393 | |||
| 394 | if (sample_type & PERF_SAMPLE_IP) | ||
| 395 | size += sizeof(data->ip); | ||
| 396 | |||
| 397 | if (sample_type & PERF_SAMPLE_ADDR) | ||
| 398 | size += sizeof(data->addr); | ||
| 399 | |||
| 400 | if (sample_type & PERF_SAMPLE_PERIOD) | ||
| 401 | size += sizeof(data->period); | ||
| 402 | |||
| 403 | if (sample_type & PERF_SAMPLE_READ) | ||
| 404 | size += event->read_size; | ||
| 405 | |||
| 406 | event->header_size = size; | ||
| 407 | } | ||
| 408 | |||
| 409 | static void perf_event__id_header_size(struct perf_event *event) | ||
| 410 | { | ||
| 411 | struct perf_sample_data *data; | ||
| 412 | u64 sample_type = event->attr.sample_type; | ||
| 413 | u16 size = 0; | ||
| 414 | |||
| 415 | if (sample_type & PERF_SAMPLE_TID) | ||
| 416 | size += sizeof(data->tid_entry); | ||
| 417 | |||
| 418 | if (sample_type & PERF_SAMPLE_TIME) | ||
| 419 | size += sizeof(data->time); | ||
| 420 | |||
| 421 | if (sample_type & PERF_SAMPLE_ID) | ||
| 422 | size += sizeof(data->id); | ||
| 423 | |||
| 424 | if (sample_type & PERF_SAMPLE_STREAM_ID) | ||
| 425 | size += sizeof(data->stream_id); | ||
| 426 | |||
| 427 | if (sample_type & PERF_SAMPLE_CPU) | ||
| 428 | size += sizeof(data->cpu_entry); | ||
| 429 | |||
| 430 | event->id_header_size = size; | ||
| 431 | } | ||
| 432 | |||
| 315 | static void perf_group_attach(struct perf_event *event) | 433 | static void perf_group_attach(struct perf_event *event) |
| 316 | { | 434 | { |
| 317 | struct perf_event *group_leader = event->group_leader; | 435 | struct perf_event *group_leader = event->group_leader, *pos; |
| 318 | 436 | ||
| 319 | /* | 437 | /* |
| 320 | * We can have double attach due to group movement in perf_event_open. | 438 | * We can have double attach due to group movement in perf_event_open. |
| @@ -333,6 +451,11 @@ static void perf_group_attach(struct perf_event *event) | |||
| 333 | 451 | ||
| 334 | list_add_tail(&event->group_entry, &group_leader->sibling_list); | 452 | list_add_tail(&event->group_entry, &group_leader->sibling_list); |
| 335 | group_leader->nr_siblings++; | 453 | group_leader->nr_siblings++; |
| 454 | |||
| 455 | perf_event__header_size(group_leader); | ||
| 456 | |||
| 457 | list_for_each_entry(pos, &group_leader->sibling_list, group_entry) | ||
| 458 | perf_event__header_size(pos); | ||
| 336 | } | 459 | } |
| 337 | 460 | ||
| 338 | /* | 461 | /* |
| @@ -391,7 +514,7 @@ static void perf_group_detach(struct perf_event *event) | |||
| 391 | if (event->group_leader != event) { | 514 | if (event->group_leader != event) { |
| 392 | list_del_init(&event->group_entry); | 515 | list_del_init(&event->group_entry); |
| 393 | event->group_leader->nr_siblings--; | 516 | event->group_leader->nr_siblings--; |
| 394 | return; | 517 | goto out; |
| 395 | } | 518 | } |
| 396 | 519 | ||
| 397 | if (!list_empty(&event->group_entry)) | 520 | if (!list_empty(&event->group_entry)) |
| @@ -410,6 +533,12 @@ static void perf_group_detach(struct perf_event *event) | |||
| 410 | /* Inherit group flags from the previous leader */ | 533 | /* Inherit group flags from the previous leader */ |
| 411 | sibling->group_flags = event->group_flags; | 534 | sibling->group_flags = event->group_flags; |
| 412 | } | 535 | } |
| 536 | |||
| 537 | out: | ||
| 538 | perf_event__header_size(event->group_leader); | ||
| 539 | |||
| 540 | list_for_each_entry(tmp, &event->group_leader->sibling_list, group_entry) | ||
| 541 | perf_event__header_size(tmp); | ||
| 413 | } | 542 | } |
| 414 | 543 | ||
| 415 | static inline int | 544 | static inline int |
| @@ -423,6 +552,7 @@ event_sched_out(struct perf_event *event, | |||
| 423 | struct perf_cpu_context *cpuctx, | 552 | struct perf_cpu_context *cpuctx, |
| 424 | struct perf_event_context *ctx) | 553 | struct perf_event_context *ctx) |
| 425 | { | 554 | { |
| 555 | u64 tstamp = perf_event_time(event); | ||
| 426 | u64 delta; | 556 | u64 delta; |
| 427 | /* | 557 | /* |
| 428 | * An event which could not be activated because of | 558 | * An event which could not be activated because of |
| @@ -434,7 +564,7 @@ event_sched_out(struct perf_event *event, | |||
| 434 | && !event_filter_match(event)) { | 564 | && !event_filter_match(event)) { |
| 435 | delta = ctx->time - event->tstamp_stopped; | 565 | delta = ctx->time - event->tstamp_stopped; |
| 436 | event->tstamp_running += delta; | 566 | event->tstamp_running += delta; |
| 437 | event->tstamp_stopped = ctx->time; | 567 | event->tstamp_stopped = tstamp; |
| 438 | } | 568 | } |
| 439 | 569 | ||
| 440 | if (event->state != PERF_EVENT_STATE_ACTIVE) | 570 | if (event->state != PERF_EVENT_STATE_ACTIVE) |
| @@ -445,7 +575,7 @@ event_sched_out(struct perf_event *event, | |||
| 445 | event->pending_disable = 0; | 575 | event->pending_disable = 0; |
| 446 | event->state = PERF_EVENT_STATE_OFF; | 576 | event->state = PERF_EVENT_STATE_OFF; |
| 447 | } | 577 | } |
| 448 | event->tstamp_stopped = ctx->time; | 578 | event->tstamp_stopped = tstamp; |
| 449 | event->pmu->del(event, 0); | 579 | event->pmu->del(event, 0); |
| 450 | event->oncpu = -1; | 580 | event->oncpu = -1; |
| 451 | 581 | ||
| @@ -657,6 +787,8 @@ event_sched_in(struct perf_event *event, | |||
| 657 | struct perf_cpu_context *cpuctx, | 787 | struct perf_cpu_context *cpuctx, |
| 658 | struct perf_event_context *ctx) | 788 | struct perf_event_context *ctx) |
| 659 | { | 789 | { |
| 790 | u64 tstamp = perf_event_time(event); | ||
| 791 | |||
| 660 | if (event->state <= PERF_EVENT_STATE_OFF) | 792 | if (event->state <= PERF_EVENT_STATE_OFF) |
| 661 | return 0; | 793 | return 0; |
| 662 | 794 | ||
| @@ -673,9 +805,9 @@ event_sched_in(struct perf_event *event, | |||
| 673 | return -EAGAIN; | 805 | return -EAGAIN; |
| 674 | } | 806 | } |
| 675 | 807 | ||
| 676 | event->tstamp_running += ctx->time - event->tstamp_stopped; | 808 | event->tstamp_running += tstamp - event->tstamp_stopped; |
| 677 | 809 | ||
| 678 | event->shadow_ctx_time = ctx->time - ctx->timestamp; | 810 | event->shadow_ctx_time = tstamp - ctx->timestamp; |
| 679 | 811 | ||
| 680 | if (!is_software_event(event)) | 812 | if (!is_software_event(event)) |
| 681 | cpuctx->active_oncpu++; | 813 | cpuctx->active_oncpu++; |
| @@ -787,11 +919,13 @@ static int group_can_go_on(struct perf_event *event, | |||
| 787 | static void add_event_to_ctx(struct perf_event *event, | 919 | static void add_event_to_ctx(struct perf_event *event, |
| 788 | struct perf_event_context *ctx) | 920 | struct perf_event_context *ctx) |
| 789 | { | 921 | { |
| 922 | u64 tstamp = perf_event_time(event); | ||
| 923 | |||
| 790 | list_add_event(event, ctx); | 924 | list_add_event(event, ctx); |
| 791 | perf_group_attach(event); | 925 | perf_group_attach(event); |
| 792 | event->tstamp_enabled = ctx->time; | 926 | event->tstamp_enabled = tstamp; |
| 793 | event->tstamp_running = ctx->time; | 927 | event->tstamp_running = tstamp; |
| 794 | event->tstamp_stopped = ctx->time; | 928 | event->tstamp_stopped = tstamp; |
| 795 | } | 929 | } |
| 796 | 930 | ||
| 797 | /* | 931 | /* |
| @@ -826,7 +960,7 @@ static void __perf_install_in_context(void *info) | |||
| 826 | 960 | ||
| 827 | add_event_to_ctx(event, ctx); | 961 | add_event_to_ctx(event, ctx); |
| 828 | 962 | ||
| 829 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 963 | if (!event_filter_match(event)) |
| 830 | goto unlock; | 964 | goto unlock; |
| 831 | 965 | ||
| 832 | /* | 966 | /* |
| @@ -931,14 +1065,13 @@ static void __perf_event_mark_enabled(struct perf_event *event, | |||
| 931 | struct perf_event_context *ctx) | 1065 | struct perf_event_context *ctx) |
| 932 | { | 1066 | { |
| 933 | struct perf_event *sub; | 1067 | struct perf_event *sub; |
| 1068 | u64 tstamp = perf_event_time(event); | ||
| 934 | 1069 | ||
| 935 | event->state = PERF_EVENT_STATE_INACTIVE; | 1070 | event->state = PERF_EVENT_STATE_INACTIVE; |
| 936 | event->tstamp_enabled = ctx->time - event->total_time_enabled; | 1071 | event->tstamp_enabled = tstamp - event->total_time_enabled; |
| 937 | list_for_each_entry(sub, &event->sibling_list, group_entry) { | 1072 | list_for_each_entry(sub, &event->sibling_list, group_entry) { |
| 938 | if (sub->state >= PERF_EVENT_STATE_INACTIVE) { | 1073 | if (sub->state >= PERF_EVENT_STATE_INACTIVE) |
| 939 | sub->tstamp_enabled = | 1074 | sub->tstamp_enabled = tstamp - sub->total_time_enabled; |
| 940 | ctx->time - sub->total_time_enabled; | ||
| 941 | } | ||
| 942 | } | 1075 | } |
| 943 | } | 1076 | } |
| 944 | 1077 | ||
| @@ -971,7 +1104,7 @@ static void __perf_event_enable(void *info) | |||
| 971 | goto unlock; | 1104 | goto unlock; |
| 972 | __perf_event_mark_enabled(event, ctx); | 1105 | __perf_event_mark_enabled(event, ctx); |
| 973 | 1106 | ||
| 974 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 1107 | if (!event_filter_match(event)) |
| 975 | goto unlock; | 1108 | goto unlock; |
| 976 | 1109 | ||
| 977 | /* | 1110 | /* |
| @@ -1073,7 +1206,7 @@ static int perf_event_refresh(struct perf_event *event, int refresh) | |||
| 1073 | /* | 1206 | /* |
| 1074 | * not supported on inherited events | 1207 | * not supported on inherited events |
| 1075 | */ | 1208 | */ |
| 1076 | if (event->attr.inherit) | 1209 | if (event->attr.inherit || !is_sampling_event(event)) |
| 1077 | return -EINVAL; | 1210 | return -EINVAL; |
| 1078 | 1211 | ||
| 1079 | atomic_add(refresh, &event->event_limit); | 1212 | atomic_add(refresh, &event->event_limit); |
| @@ -1082,12 +1215,6 @@ static int perf_event_refresh(struct perf_event *event, int refresh) | |||
| 1082 | return 0; | 1215 | return 0; |
| 1083 | } | 1216 | } |
| 1084 | 1217 | ||
| 1085 | enum event_type_t { | ||
| 1086 | EVENT_FLEXIBLE = 0x1, | ||
| 1087 | EVENT_PINNED = 0x2, | ||
| 1088 | EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED, | ||
| 1089 | }; | ||
| 1090 | |||
| 1091 | static void ctx_sched_out(struct perf_event_context *ctx, | 1218 | static void ctx_sched_out(struct perf_event_context *ctx, |
| 1092 | struct perf_cpu_context *cpuctx, | 1219 | struct perf_cpu_context *cpuctx, |
| 1093 | enum event_type_t event_type) | 1220 | enum event_type_t event_type) |
| @@ -1324,7 +1451,7 @@ ctx_pinned_sched_in(struct perf_event_context *ctx, | |||
| 1324 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) { | 1451 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) { |
| 1325 | if (event->state <= PERF_EVENT_STATE_OFF) | 1452 | if (event->state <= PERF_EVENT_STATE_OFF) |
| 1326 | continue; | 1453 | continue; |
| 1327 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 1454 | if (!event_filter_match(event)) |
| 1328 | continue; | 1455 | continue; |
| 1329 | 1456 | ||
| 1330 | if (group_can_go_on(event, cpuctx, 1)) | 1457 | if (group_can_go_on(event, cpuctx, 1)) |
| @@ -1356,7 +1483,7 @@ ctx_flexible_sched_in(struct perf_event_context *ctx, | |||
| 1356 | * Listen to the 'cpu' scheduling filter constraint | 1483 | * Listen to the 'cpu' scheduling filter constraint |
| 1357 | * of events: | 1484 | * of events: |
| 1358 | */ | 1485 | */ |
| 1359 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 1486 | if (!event_filter_match(event)) |
| 1360 | continue; | 1487 | continue; |
| 1361 | 1488 | ||
| 1362 | if (group_can_go_on(event, cpuctx, can_add_hw)) { | 1489 | if (group_can_go_on(event, cpuctx, can_add_hw)) { |
| @@ -1583,7 +1710,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period) | |||
| 1583 | if (event->state != PERF_EVENT_STATE_ACTIVE) | 1710 | if (event->state != PERF_EVENT_STATE_ACTIVE) |
| 1584 | continue; | 1711 | continue; |
| 1585 | 1712 | ||
| 1586 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 1713 | if (!event_filter_match(event)) |
| 1587 | continue; | 1714 | continue; |
| 1588 | 1715 | ||
| 1589 | hwc = &event->hw; | 1716 | hwc = &event->hw; |
| @@ -1774,11 +1901,12 @@ static void __perf_event_read(void *info) | |||
| 1774 | return; | 1901 | return; |
| 1775 | 1902 | ||
| 1776 | raw_spin_lock(&ctx->lock); | 1903 | raw_spin_lock(&ctx->lock); |
| 1777 | update_context_time(ctx); | 1904 | if (ctx->is_active) |
| 1905 | update_context_time(ctx); | ||
| 1778 | update_event_times(event); | 1906 | update_event_times(event); |
| 1907 | if (event->state == PERF_EVENT_STATE_ACTIVE) | ||
| 1908 | event->pmu->read(event); | ||
| 1779 | raw_spin_unlock(&ctx->lock); | 1909 | raw_spin_unlock(&ctx->lock); |
| 1780 | |||
| 1781 | event->pmu->read(event); | ||
| 1782 | } | 1910 | } |
| 1783 | 1911 | ||
| 1784 | static inline u64 perf_event_count(struct perf_event *event) | 1912 | static inline u64 perf_event_count(struct perf_event *event) |
| @@ -1872,8 +2000,7 @@ static int alloc_callchain_buffers(void) | |||
| 1872 | * accessed from NMI. Use a temporary manual per cpu allocation | 2000 | * accessed from NMI. Use a temporary manual per cpu allocation |
| 1873 | * until that gets sorted out. | 2001 | * until that gets sorted out. |
| 1874 | */ | 2002 | */ |
| 1875 | size = sizeof(*entries) + sizeof(struct perf_callchain_entry *) * | 2003 | size = offsetof(struct callchain_cpus_entries, cpu_entries[nr_cpu_ids]); |
| 1876 | num_possible_cpus(); | ||
| 1877 | 2004 | ||
| 1878 | entries = kzalloc(size, GFP_KERNEL); | 2005 | entries = kzalloc(size, GFP_KERNEL); |
| 1879 | if (!entries) | 2006 | if (!entries) |
| @@ -2074,13 +2201,6 @@ find_lively_task_by_vpid(pid_t vpid) | |||
| 2074 | if (!task) | 2201 | if (!task) |
| 2075 | return ERR_PTR(-ESRCH); | 2202 | return ERR_PTR(-ESRCH); |
| 2076 | 2203 | ||
| 2077 | /* | ||
| 2078 | * Can't attach events to a dying task. | ||
| 2079 | */ | ||
| 2080 | err = -ESRCH; | ||
| 2081 | if (task->flags & PF_EXITING) | ||
| 2082 | goto errout; | ||
| 2083 | |||
| 2084 | /* Reuse ptrace permission checks for now. */ | 2204 | /* Reuse ptrace permission checks for now. */ |
| 2085 | err = -EACCES; | 2205 | err = -EACCES; |
| 2086 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) | 2206 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) |
| @@ -2101,14 +2221,11 @@ find_get_context(struct pmu *pmu, struct task_struct *task, int cpu) | |||
| 2101 | unsigned long flags; | 2221 | unsigned long flags; |
| 2102 | int ctxn, err; | 2222 | int ctxn, err; |
| 2103 | 2223 | ||
| 2104 | if (!task && cpu != -1) { | 2224 | if (!task) { |
| 2105 | /* Must be root to operate on a CPU event: */ | 2225 | /* Must be root to operate on a CPU event: */ |
| 2106 | if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) | 2226 | if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) |
| 2107 | return ERR_PTR(-EACCES); | 2227 | return ERR_PTR(-EACCES); |
| 2108 | 2228 | ||
| 2109 | if (cpu < 0 || cpu >= nr_cpumask_bits) | ||
| 2110 | return ERR_PTR(-EINVAL); | ||
| 2111 | |||
| 2112 | /* | 2229 | /* |
| 2113 | * We could be clever and allow to attach a event to an | 2230 | * We could be clever and allow to attach a event to an |
| 2114 | * offline CPU and activate it when the CPU comes up, but | 2231 | * offline CPU and activate it when the CPU comes up, but |
| @@ -2144,14 +2261,27 @@ retry: | |||
| 2144 | 2261 | ||
| 2145 | get_ctx(ctx); | 2262 | get_ctx(ctx); |
| 2146 | 2263 | ||
| 2147 | if (cmpxchg(&task->perf_event_ctxp[ctxn], NULL, ctx)) { | 2264 | err = 0; |
| 2148 | /* | 2265 | mutex_lock(&task->perf_event_mutex); |
| 2149 | * We raced with some other task; use | 2266 | /* |
| 2150 | * the context they set. | 2267 | * If it has already passed perf_event_exit_task(). |
| 2151 | */ | 2268 | * we must see PF_EXITING, it takes this mutex too. |
| 2269 | */ | ||
| 2270 | if (task->flags & PF_EXITING) | ||
| 2271 | err = -ESRCH; | ||
| 2272 | else if (task->perf_event_ctxp[ctxn]) | ||
| 2273 | err = -EAGAIN; | ||
| 2274 | else | ||
| 2275 | rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx); | ||
| 2276 | mutex_unlock(&task->perf_event_mutex); | ||
| 2277 | |||
| 2278 | if (unlikely(err)) { | ||
| 2152 | put_task_struct(task); | 2279 | put_task_struct(task); |
| 2153 | kfree(ctx); | 2280 | kfree(ctx); |
| 2154 | goto retry; | 2281 | |
| 2282 | if (err == -EAGAIN) | ||
| 2283 | goto retry; | ||
| 2284 | goto errout; | ||
| 2155 | } | 2285 | } |
| 2156 | } | 2286 | } |
| 2157 | 2287 | ||
| @@ -2289,31 +2419,6 @@ static int perf_release(struct inode *inode, struct file *file) | |||
| 2289 | return perf_event_release_kernel(event); | 2419 | return perf_event_release_kernel(event); |
| 2290 | } | 2420 | } |
| 2291 | 2421 | ||
| 2292 | static int perf_event_read_size(struct perf_event *event) | ||
| 2293 | { | ||
| 2294 | int entry = sizeof(u64); /* value */ | ||
| 2295 | int size = 0; | ||
| 2296 | int nr = 1; | ||
| 2297 | |||
| 2298 | if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) | ||
| 2299 | size += sizeof(u64); | ||
| 2300 | |||
| 2301 | if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) | ||
| 2302 | size += sizeof(u64); | ||
| 2303 | |||
| 2304 | if (event->attr.read_format & PERF_FORMAT_ID) | ||
| 2305 | entry += sizeof(u64); | ||
| 2306 | |||
| 2307 | if (event->attr.read_format & PERF_FORMAT_GROUP) { | ||
| 2308 | nr += event->group_leader->nr_siblings; | ||
| 2309 | size += sizeof(u64); | ||
| 2310 | } | ||
| 2311 | |||
| 2312 | size += entry * nr; | ||
| 2313 | |||
| 2314 | return size; | ||
| 2315 | } | ||
| 2316 | |||
| 2317 | u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) | 2422 | u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) |
| 2318 | { | 2423 | { |
| 2319 | struct perf_event *child; | 2424 | struct perf_event *child; |
| @@ -2428,7 +2533,7 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count) | |||
| 2428 | if (event->state == PERF_EVENT_STATE_ERROR) | 2533 | if (event->state == PERF_EVENT_STATE_ERROR) |
| 2429 | return 0; | 2534 | return 0; |
| 2430 | 2535 | ||
| 2431 | if (count < perf_event_read_size(event)) | 2536 | if (count < event->read_size) |
| 2432 | return -ENOSPC; | 2537 | return -ENOSPC; |
| 2433 | 2538 | ||
| 2434 | WARN_ON_ONCE(event->ctx->parent_ctx); | 2539 | WARN_ON_ONCE(event->ctx->parent_ctx); |
| @@ -2514,7 +2619,7 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg) | |||
| 2514 | int ret = 0; | 2619 | int ret = 0; |
| 2515 | u64 value; | 2620 | u64 value; |
| 2516 | 2621 | ||
| 2517 | if (!event->attr.sample_period) | 2622 | if (!is_sampling_event(event)) |
| 2518 | return -EINVAL; | 2623 | return -EINVAL; |
| 2519 | 2624 | ||
| 2520 | if (copy_from_user(&value, arg, sizeof(value))) | 2625 | if (copy_from_user(&value, arg, sizeof(value))) |
| @@ -3305,6 +3410,73 @@ __always_inline void perf_output_copy(struct perf_output_handle *handle, | |||
| 3305 | } while (len); | 3410 | } while (len); |
| 3306 | } | 3411 | } |
| 3307 | 3412 | ||
| 3413 | static void __perf_event_header__init_id(struct perf_event_header *header, | ||
| 3414 | struct perf_sample_data *data, | ||
| 3415 | struct perf_event *event) | ||
| 3416 | { | ||
| 3417 | u64 sample_type = event->attr.sample_type; | ||
| 3418 | |||
| 3419 | data->type = sample_type; | ||
| 3420 | header->size += event->id_header_size; | ||
| 3421 | |||
| 3422 | if (sample_type & PERF_SAMPLE_TID) { | ||
| 3423 | /* namespace issues */ | ||
| 3424 | data->tid_entry.pid = perf_event_pid(event, current); | ||
| 3425 | data->tid_entry.tid = perf_event_tid(event, current); | ||
| 3426 | } | ||
| 3427 | |||
| 3428 | if (sample_type & PERF_SAMPLE_TIME) | ||
| 3429 | data->time = perf_clock(); | ||
| 3430 | |||
| 3431 | if (sample_type & PERF_SAMPLE_ID) | ||
| 3432 | data->id = primary_event_id(event); | ||
| 3433 | |||
| 3434 | if (sample_type & PERF_SAMPLE_STREAM_ID) | ||
| 3435 | data->stream_id = event->id; | ||
| 3436 | |||
| 3437 | if (sample_type & PERF_SAMPLE_CPU) { | ||
| 3438 | data->cpu_entry.cpu = raw_smp_processor_id(); | ||
| 3439 | data->cpu_entry.reserved = 0; | ||
| 3440 | } | ||
| 3441 | } | ||
| 3442 | |||
| 3443 | static void perf_event_header__init_id(struct perf_event_header *header, | ||
| 3444 | struct perf_sample_data *data, | ||
| 3445 | struct perf_event *event) | ||
| 3446 | { | ||
| 3447 | if (event->attr.sample_id_all) | ||
| 3448 | __perf_event_header__init_id(header, data, event); | ||
| 3449 | } | ||
| 3450 | |||
| 3451 | static void __perf_event__output_id_sample(struct perf_output_handle *handle, | ||
| 3452 | struct perf_sample_data *data) | ||
| 3453 | { | ||
| 3454 | u64 sample_type = data->type; | ||
| 3455 | |||
| 3456 | if (sample_type & PERF_SAMPLE_TID) | ||
| 3457 | perf_output_put(handle, data->tid_entry); | ||
| 3458 | |||
| 3459 | if (sample_type & PERF_SAMPLE_TIME) | ||
| 3460 | perf_output_put(handle, data->time); | ||
| 3461 | |||
| 3462 | if (sample_type & PERF_SAMPLE_ID) | ||
| 3463 | perf_output_put(handle, data->id); | ||
| 3464 | |||
| 3465 | if (sample_type & PERF_SAMPLE_STREAM_ID) | ||
| 3466 | perf_output_put(handle, data->stream_id); | ||
| 3467 | |||
| 3468 | if (sample_type & PERF_SAMPLE_CPU) | ||
| 3469 | perf_output_put(handle, data->cpu_entry); | ||
| 3470 | } | ||
| 3471 | |||
| 3472 | static void perf_event__output_id_sample(struct perf_event *event, | ||
| 3473 | struct perf_output_handle *handle, | ||
| 3474 | struct perf_sample_data *sample) | ||
| 3475 | { | ||
| 3476 | if (event->attr.sample_id_all) | ||
| 3477 | __perf_event__output_id_sample(handle, sample); | ||
| 3478 | } | ||
| 3479 | |||
| 3308 | int perf_output_begin(struct perf_output_handle *handle, | 3480 | int perf_output_begin(struct perf_output_handle *handle, |
| 3309 | struct perf_event *event, unsigned int size, | 3481 | struct perf_event *event, unsigned int size, |
| 3310 | int nmi, int sample) | 3482 | int nmi, int sample) |
| @@ -3312,6 +3484,7 @@ int perf_output_begin(struct perf_output_handle *handle, | |||
| 3312 | struct perf_buffer *buffer; | 3484 | struct perf_buffer *buffer; |
| 3313 | unsigned long tail, offset, head; | 3485 | unsigned long tail, offset, head; |
| 3314 | int have_lost; | 3486 | int have_lost; |
| 3487 | struct perf_sample_data sample_data; | ||
| 3315 | struct { | 3488 | struct { |
| 3316 | struct perf_event_header header; | 3489 | struct perf_event_header header; |
| 3317 | u64 id; | 3490 | u64 id; |
| @@ -3338,8 +3511,12 @@ int perf_output_begin(struct perf_output_handle *handle, | |||
| 3338 | goto out; | 3511 | goto out; |
| 3339 | 3512 | ||
| 3340 | have_lost = local_read(&buffer->lost); | 3513 | have_lost = local_read(&buffer->lost); |
| 3341 | if (have_lost) | 3514 | if (have_lost) { |
| 3342 | size += sizeof(lost_event); | 3515 | lost_event.header.size = sizeof(lost_event); |
| 3516 | perf_event_header__init_id(&lost_event.header, &sample_data, | ||
| 3517 | event); | ||
| 3518 | size += lost_event.header.size; | ||
| 3519 | } | ||
| 3343 | 3520 | ||
| 3344 | perf_output_get_handle(handle); | 3521 | perf_output_get_handle(handle); |
| 3345 | 3522 | ||
| @@ -3370,11 +3547,11 @@ int perf_output_begin(struct perf_output_handle *handle, | |||
| 3370 | if (have_lost) { | 3547 | if (have_lost) { |
| 3371 | lost_event.header.type = PERF_RECORD_LOST; | 3548 | lost_event.header.type = PERF_RECORD_LOST; |
| 3372 | lost_event.header.misc = 0; | 3549 | lost_event.header.misc = 0; |
| 3373 | lost_event.header.size = sizeof(lost_event); | ||
| 3374 | lost_event.id = event->id; | 3550 | lost_event.id = event->id; |
| 3375 | lost_event.lost = local_xchg(&buffer->lost, 0); | 3551 | lost_event.lost = local_xchg(&buffer->lost, 0); |
| 3376 | 3552 | ||
| 3377 | perf_output_put(handle, lost_event); | 3553 | perf_output_put(handle, lost_event); |
| 3554 | perf_event__output_id_sample(event, handle, &sample_data); | ||
| 3378 | } | 3555 | } |
| 3379 | 3556 | ||
| 3380 | return 0; | 3557 | return 0; |
| @@ -3407,28 +3584,6 @@ void perf_output_end(struct perf_output_handle *handle) | |||
| 3407 | rcu_read_unlock(); | 3584 | rcu_read_unlock(); |
| 3408 | } | 3585 | } |
| 3409 | 3586 | ||
| 3410 | static u32 perf_event_pid(struct perf_event *event, struct task_struct *p) | ||
| 3411 | { | ||
| 3412 | /* | ||
| 3413 | * only top level events have the pid namespace they were created in | ||
| 3414 | */ | ||
| 3415 | if (event->parent) | ||
| 3416 | event = event->parent; | ||
| 3417 | |||
| 3418 | return task_tgid_nr_ns(p, event->ns); | ||
| 3419 | } | ||
| 3420 | |||
| 3421 | static u32 perf_event_tid(struct perf_event *event, struct task_struct *p) | ||
| 3422 | { | ||
| 3423 | /* | ||
| 3424 | * only top level events have the pid namespace they were created in | ||
| 3425 | */ | ||
| 3426 | if (event->parent) | ||
| 3427 | event = event->parent; | ||
| 3428 | |||
| 3429 | return task_pid_nr_ns(p, event->ns); | ||
| 3430 | } | ||
| 3431 | |||
| 3432 | static void perf_output_read_one(struct perf_output_handle *handle, | 3587 | static void perf_output_read_one(struct perf_output_handle *handle, |
| 3433 | struct perf_event *event, | 3588 | struct perf_event *event, |
| 3434 | u64 enabled, u64 running) | 3589 | u64 enabled, u64 running) |
| @@ -3603,61 +3758,16 @@ void perf_prepare_sample(struct perf_event_header *header, | |||
| 3603 | { | 3758 | { |
| 3604 | u64 sample_type = event->attr.sample_type; | 3759 | u64 sample_type = event->attr.sample_type; |
| 3605 | 3760 | ||
| 3606 | data->type = sample_type; | ||
| 3607 | |||
| 3608 | header->type = PERF_RECORD_SAMPLE; | 3761 | header->type = PERF_RECORD_SAMPLE; |
| 3609 | header->size = sizeof(*header); | 3762 | header->size = sizeof(*header) + event->header_size; |
| 3610 | 3763 | ||
| 3611 | header->misc = 0; | 3764 | header->misc = 0; |
| 3612 | header->misc |= perf_misc_flags(regs); | 3765 | header->misc |= perf_misc_flags(regs); |
| 3613 | 3766 | ||
| 3614 | if (sample_type & PERF_SAMPLE_IP) { | 3767 | __perf_event_header__init_id(header, data, event); |
| 3615 | data->ip = perf_instruction_pointer(regs); | ||
| 3616 | |||
| 3617 | header->size += sizeof(data->ip); | ||
| 3618 | } | ||
| 3619 | |||
| 3620 | if (sample_type & PERF_SAMPLE_TID) { | ||
| 3621 | /* namespace issues */ | ||
| 3622 | data->tid_entry.pid = perf_event_pid(event, current); | ||
| 3623 | data->tid_entry.tid = perf_event_tid(event, current); | ||
| 3624 | |||
| 3625 | header->size += sizeof(data->tid_entry); | ||
| 3626 | } | ||
| 3627 | |||
| 3628 | if (sample_type & PERF_SAMPLE_TIME) { | ||
| 3629 | data->time = perf_clock(); | ||
| 3630 | |||
| 3631 | header->size += sizeof(data->time); | ||
| 3632 | } | ||
| 3633 | |||
| 3634 | if (sample_type & PERF_SAMPLE_ADDR) | ||
| 3635 | header->size += sizeof(data->addr); | ||
| 3636 | |||
| 3637 | if (sample_type & PERF_SAMPLE_ID) { | ||
| 3638 | data->id = primary_event_id(event); | ||
| 3639 | |||
| 3640 | header->size += sizeof(data->id); | ||
| 3641 | } | ||
| 3642 | |||
| 3643 | if (sample_type & PERF_SAMPLE_STREAM_ID) { | ||
| 3644 | data->stream_id = event->id; | ||
| 3645 | |||
| 3646 | header->size += sizeof(data->stream_id); | ||
| 3647 | } | ||
| 3648 | |||
| 3649 | if (sample_type & PERF_SAMPLE_CPU) { | ||
| 3650 | data->cpu_entry.cpu = raw_smp_processor_id(); | ||
| 3651 | data->cpu_entry.reserved = 0; | ||
| 3652 | |||
| 3653 | header->size += sizeof(data->cpu_entry); | ||
| 3654 | } | ||
| 3655 | |||
| 3656 | if (sample_type & PERF_SAMPLE_PERIOD) | ||
| 3657 | header->size += sizeof(data->period); | ||
| 3658 | 3768 | ||
| 3659 | if (sample_type & PERF_SAMPLE_READ) | 3769 | if (sample_type & PERF_SAMPLE_IP) |
| 3660 | header->size += perf_event_read_size(event); | 3770 | data->ip = perf_instruction_pointer(regs); |
| 3661 | 3771 | ||
| 3662 | if (sample_type & PERF_SAMPLE_CALLCHAIN) { | 3772 | if (sample_type & PERF_SAMPLE_CALLCHAIN) { |
| 3663 | int size = 1; | 3773 | int size = 1; |
| @@ -3722,23 +3832,26 @@ perf_event_read_event(struct perf_event *event, | |||
| 3722 | struct task_struct *task) | 3832 | struct task_struct *task) |
| 3723 | { | 3833 | { |
| 3724 | struct perf_output_handle handle; | 3834 | struct perf_output_handle handle; |
| 3835 | struct perf_sample_data sample; | ||
| 3725 | struct perf_read_event read_event = { | 3836 | struct perf_read_event read_event = { |
| 3726 | .header = { | 3837 | .header = { |
| 3727 | .type = PERF_RECORD_READ, | 3838 | .type = PERF_RECORD_READ, |
| 3728 | .misc = 0, | 3839 | .misc = 0, |
| 3729 | .size = sizeof(read_event) + perf_event_read_size(event), | 3840 | .size = sizeof(read_event) + event->read_size, |
| 3730 | }, | 3841 | }, |
| 3731 | .pid = perf_event_pid(event, task), | 3842 | .pid = perf_event_pid(event, task), |
| 3732 | .tid = perf_event_tid(event, task), | 3843 | .tid = perf_event_tid(event, task), |
| 3733 | }; | 3844 | }; |
| 3734 | int ret; | 3845 | int ret; |
| 3735 | 3846 | ||
| 3847 | perf_event_header__init_id(&read_event.header, &sample, event); | ||
| 3736 | ret = perf_output_begin(&handle, event, read_event.header.size, 0, 0); | 3848 | ret = perf_output_begin(&handle, event, read_event.header.size, 0, 0); |
| 3737 | if (ret) | 3849 | if (ret) |
| 3738 | return; | 3850 | return; |
| 3739 | 3851 | ||
| 3740 | perf_output_put(&handle, read_event); | 3852 | perf_output_put(&handle, read_event); |
| 3741 | perf_output_read(&handle, event); | 3853 | perf_output_read(&handle, event); |
| 3854 | perf_event__output_id_sample(event, &handle, &sample); | ||
| 3742 | 3855 | ||
| 3743 | perf_output_end(&handle); | 3856 | perf_output_end(&handle); |
| 3744 | } | 3857 | } |
| @@ -3768,14 +3881,16 @@ static void perf_event_task_output(struct perf_event *event, | |||
| 3768 | struct perf_task_event *task_event) | 3881 | struct perf_task_event *task_event) |
| 3769 | { | 3882 | { |
| 3770 | struct perf_output_handle handle; | 3883 | struct perf_output_handle handle; |
| 3884 | struct perf_sample_data sample; | ||
| 3771 | struct task_struct *task = task_event->task; | 3885 | struct task_struct *task = task_event->task; |
| 3772 | int size, ret; | 3886 | int ret, size = task_event->event_id.header.size; |
| 3773 | 3887 | ||
| 3774 | size = task_event->event_id.header.size; | 3888 | perf_event_header__init_id(&task_event->event_id.header, &sample, event); |
| 3775 | ret = perf_output_begin(&handle, event, size, 0, 0); | ||
| 3776 | 3889 | ||
| 3890 | ret = perf_output_begin(&handle, event, | ||
| 3891 | task_event->event_id.header.size, 0, 0); | ||
| 3777 | if (ret) | 3892 | if (ret) |
| 3778 | return; | 3893 | goto out; |
| 3779 | 3894 | ||
| 3780 | task_event->event_id.pid = perf_event_pid(event, task); | 3895 | task_event->event_id.pid = perf_event_pid(event, task); |
| 3781 | task_event->event_id.ppid = perf_event_pid(event, current); | 3896 | task_event->event_id.ppid = perf_event_pid(event, current); |
| @@ -3785,7 +3900,11 @@ static void perf_event_task_output(struct perf_event *event, | |||
| 3785 | 3900 | ||
| 3786 | perf_output_put(&handle, task_event->event_id); | 3901 | perf_output_put(&handle, task_event->event_id); |
| 3787 | 3902 | ||
| 3903 | perf_event__output_id_sample(event, &handle, &sample); | ||
| 3904 | |||
| 3788 | perf_output_end(&handle); | 3905 | perf_output_end(&handle); |
| 3906 | out: | ||
| 3907 | task_event->event_id.header.size = size; | ||
| 3789 | } | 3908 | } |
| 3790 | 3909 | ||
| 3791 | static int perf_event_task_match(struct perf_event *event) | 3910 | static int perf_event_task_match(struct perf_event *event) |
| @@ -3793,7 +3912,7 @@ static int perf_event_task_match(struct perf_event *event) | |||
| 3793 | if (event->state < PERF_EVENT_STATE_INACTIVE) | 3912 | if (event->state < PERF_EVENT_STATE_INACTIVE) |
| 3794 | return 0; | 3913 | return 0; |
| 3795 | 3914 | ||
| 3796 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 3915 | if (!event_filter_match(event)) |
| 3797 | return 0; | 3916 | return 0; |
| 3798 | 3917 | ||
| 3799 | if (event->attr.comm || event->attr.mmap || | 3918 | if (event->attr.comm || event->attr.mmap || |
| @@ -3900,11 +4019,16 @@ static void perf_event_comm_output(struct perf_event *event, | |||
| 3900 | struct perf_comm_event *comm_event) | 4019 | struct perf_comm_event *comm_event) |
| 3901 | { | 4020 | { |
| 3902 | struct perf_output_handle handle; | 4021 | struct perf_output_handle handle; |
| 4022 | struct perf_sample_data sample; | ||
| 3903 | int size = comm_event->event_id.header.size; | 4023 | int size = comm_event->event_id.header.size; |
| 3904 | int ret = perf_output_begin(&handle, event, size, 0, 0); | 4024 | int ret; |
| 4025 | |||
| 4026 | perf_event_header__init_id(&comm_event->event_id.header, &sample, event); | ||
| 4027 | ret = perf_output_begin(&handle, event, | ||
| 4028 | comm_event->event_id.header.size, 0, 0); | ||
| 3905 | 4029 | ||
| 3906 | if (ret) | 4030 | if (ret) |
| 3907 | return; | 4031 | goto out; |
| 3908 | 4032 | ||
| 3909 | comm_event->event_id.pid = perf_event_pid(event, comm_event->task); | 4033 | comm_event->event_id.pid = perf_event_pid(event, comm_event->task); |
| 3910 | comm_event->event_id.tid = perf_event_tid(event, comm_event->task); | 4034 | comm_event->event_id.tid = perf_event_tid(event, comm_event->task); |
| @@ -3912,7 +4036,12 @@ static void perf_event_comm_output(struct perf_event *event, | |||
| 3912 | perf_output_put(&handle, comm_event->event_id); | 4036 | perf_output_put(&handle, comm_event->event_id); |
| 3913 | perf_output_copy(&handle, comm_event->comm, | 4037 | perf_output_copy(&handle, comm_event->comm, |
| 3914 | comm_event->comm_size); | 4038 | comm_event->comm_size); |
| 4039 | |||
| 4040 | perf_event__output_id_sample(event, &handle, &sample); | ||
| 4041 | |||
| 3915 | perf_output_end(&handle); | 4042 | perf_output_end(&handle); |
| 4043 | out: | ||
| 4044 | comm_event->event_id.header.size = size; | ||
| 3916 | } | 4045 | } |
| 3917 | 4046 | ||
| 3918 | static int perf_event_comm_match(struct perf_event *event) | 4047 | static int perf_event_comm_match(struct perf_event *event) |
| @@ -3920,7 +4049,7 @@ static int perf_event_comm_match(struct perf_event *event) | |||
| 3920 | if (event->state < PERF_EVENT_STATE_INACTIVE) | 4049 | if (event->state < PERF_EVENT_STATE_INACTIVE) |
| 3921 | return 0; | 4050 | return 0; |
| 3922 | 4051 | ||
| 3923 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 4052 | if (!event_filter_match(event)) |
| 3924 | return 0; | 4053 | return 0; |
| 3925 | 4054 | ||
| 3926 | if (event->attr.comm) | 4055 | if (event->attr.comm) |
| @@ -3957,7 +4086,6 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) | |||
| 3957 | comm_event->comm_size = size; | 4086 | comm_event->comm_size = size; |
| 3958 | 4087 | ||
| 3959 | comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; | 4088 | comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; |
| 3960 | |||
| 3961 | rcu_read_lock(); | 4089 | rcu_read_lock(); |
| 3962 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 4090 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
| 3963 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); | 4091 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); |
| @@ -4038,11 +4166,15 @@ static void perf_event_mmap_output(struct perf_event *event, | |||
| 4038 | struct perf_mmap_event *mmap_event) | 4166 | struct perf_mmap_event *mmap_event) |
| 4039 | { | 4167 | { |
| 4040 | struct perf_output_handle handle; | 4168 | struct perf_output_handle handle; |
| 4169 | struct perf_sample_data sample; | ||
| 4041 | int size = mmap_event->event_id.header.size; | 4170 | int size = mmap_event->event_id.header.size; |
| 4042 | int ret = perf_output_begin(&handle, event, size, 0, 0); | 4171 | int ret; |
| 4043 | 4172 | ||
| 4173 | perf_event_header__init_id(&mmap_event->event_id.header, &sample, event); | ||
| 4174 | ret = perf_output_begin(&handle, event, | ||
| 4175 | mmap_event->event_id.header.size, 0, 0); | ||
| 4044 | if (ret) | 4176 | if (ret) |
| 4045 | return; | 4177 | goto out; |
| 4046 | 4178 | ||
| 4047 | mmap_event->event_id.pid = perf_event_pid(event, current); | 4179 | mmap_event->event_id.pid = perf_event_pid(event, current); |
| 4048 | mmap_event->event_id.tid = perf_event_tid(event, current); | 4180 | mmap_event->event_id.tid = perf_event_tid(event, current); |
| @@ -4050,7 +4182,12 @@ static void perf_event_mmap_output(struct perf_event *event, | |||
| 4050 | perf_output_put(&handle, mmap_event->event_id); | 4182 | perf_output_put(&handle, mmap_event->event_id); |
| 4051 | perf_output_copy(&handle, mmap_event->file_name, | 4183 | perf_output_copy(&handle, mmap_event->file_name, |
| 4052 | mmap_event->file_size); | 4184 | mmap_event->file_size); |
| 4185 | |||
| 4186 | perf_event__output_id_sample(event, &handle, &sample); | ||
| 4187 | |||
| 4053 | perf_output_end(&handle); | 4188 | perf_output_end(&handle); |
| 4189 | out: | ||
| 4190 | mmap_event->event_id.header.size = size; | ||
| 4054 | } | 4191 | } |
| 4055 | 4192 | ||
| 4056 | static int perf_event_mmap_match(struct perf_event *event, | 4193 | static int perf_event_mmap_match(struct perf_event *event, |
| @@ -4060,7 +4197,7 @@ static int perf_event_mmap_match(struct perf_event *event, | |||
| 4060 | if (event->state < PERF_EVENT_STATE_INACTIVE) | 4197 | if (event->state < PERF_EVENT_STATE_INACTIVE) |
| 4061 | return 0; | 4198 | return 0; |
| 4062 | 4199 | ||
| 4063 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 4200 | if (!event_filter_match(event)) |
| 4064 | return 0; | 4201 | return 0; |
| 4065 | 4202 | ||
| 4066 | if ((!executable && event->attr.mmap_data) || | 4203 | if ((!executable && event->attr.mmap_data) || |
| @@ -4205,6 +4342,7 @@ void perf_event_mmap(struct vm_area_struct *vma) | |||
| 4205 | static void perf_log_throttle(struct perf_event *event, int enable) | 4342 | static void perf_log_throttle(struct perf_event *event, int enable) |
| 4206 | { | 4343 | { |
| 4207 | struct perf_output_handle handle; | 4344 | struct perf_output_handle handle; |
| 4345 | struct perf_sample_data sample; | ||
| 4208 | int ret; | 4346 | int ret; |
| 4209 | 4347 | ||
| 4210 | struct { | 4348 | struct { |
| @@ -4226,11 +4364,15 @@ static void perf_log_throttle(struct perf_event *event, int enable) | |||
| 4226 | if (enable) | 4364 | if (enable) |
| 4227 | throttle_event.header.type = PERF_RECORD_UNTHROTTLE; | 4365 | throttle_event.header.type = PERF_RECORD_UNTHROTTLE; |
| 4228 | 4366 | ||
| 4229 | ret = perf_output_begin(&handle, event, sizeof(throttle_event), 1, 0); | 4367 | perf_event_header__init_id(&throttle_event.header, &sample, event); |
| 4368 | |||
| 4369 | ret = perf_output_begin(&handle, event, | ||
| 4370 | throttle_event.header.size, 1, 0); | ||
| 4230 | if (ret) | 4371 | if (ret) |
| 4231 | return; | 4372 | return; |
| 4232 | 4373 | ||
| 4233 | perf_output_put(&handle, throttle_event); | 4374 | perf_output_put(&handle, throttle_event); |
| 4375 | perf_event__output_id_sample(event, &handle, &sample); | ||
| 4234 | perf_output_end(&handle); | 4376 | perf_output_end(&handle); |
| 4235 | } | 4377 | } |
| 4236 | 4378 | ||
| @@ -4246,6 +4388,13 @@ static int __perf_event_overflow(struct perf_event *event, int nmi, | |||
| 4246 | struct hw_perf_event *hwc = &event->hw; | 4388 | struct hw_perf_event *hwc = &event->hw; |
| 4247 | int ret = 0; | 4389 | int ret = 0; |
| 4248 | 4390 | ||
| 4391 | /* | ||
| 4392 | * Non-sampling counters might still use the PMI to fold short | ||
| 4393 | * hardware counters, ignore those. | ||
| 4394 | */ | ||
| 4395 | if (unlikely(!is_sampling_event(event))) | ||
| 4396 | return 0; | ||
| 4397 | |||
| 4249 | if (!throttle) { | 4398 | if (!throttle) { |
| 4250 | hwc->interrupts++; | 4399 | hwc->interrupts++; |
| 4251 | } else { | 4400 | } else { |
| @@ -4391,7 +4540,7 @@ static void perf_swevent_event(struct perf_event *event, u64 nr, | |||
| 4391 | if (!regs) | 4540 | if (!regs) |
| 4392 | return; | 4541 | return; |
| 4393 | 4542 | ||
| 4394 | if (!hwc->sample_period) | 4543 | if (!is_sampling_event(event)) |
| 4395 | return; | 4544 | return; |
| 4396 | 4545 | ||
| 4397 | if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq) | 4546 | if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq) |
| @@ -4518,7 +4667,7 @@ int perf_swevent_get_recursion_context(void) | |||
| 4518 | } | 4667 | } |
| 4519 | EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); | 4668 | EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); |
| 4520 | 4669 | ||
| 4521 | void inline perf_swevent_put_recursion_context(int rctx) | 4670 | inline void perf_swevent_put_recursion_context(int rctx) |
| 4522 | { | 4671 | { |
| 4523 | struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); | 4672 | struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); |
| 4524 | 4673 | ||
| @@ -4554,7 +4703,7 @@ static int perf_swevent_add(struct perf_event *event, int flags) | |||
| 4554 | struct hw_perf_event *hwc = &event->hw; | 4703 | struct hw_perf_event *hwc = &event->hw; |
| 4555 | struct hlist_head *head; | 4704 | struct hlist_head *head; |
| 4556 | 4705 | ||
| 4557 | if (hwc->sample_period) { | 4706 | if (is_sampling_event(event)) { |
| 4558 | hwc->last_period = hwc->sample_period; | 4707 | hwc->last_period = hwc->sample_period; |
| 4559 | perf_swevent_set_period(event); | 4708 | perf_swevent_set_period(event); |
| 4560 | } | 4709 | } |
| @@ -4811,15 +4960,6 @@ static int perf_tp_event_init(struct perf_event *event) | |||
| 4811 | if (event->attr.type != PERF_TYPE_TRACEPOINT) | 4960 | if (event->attr.type != PERF_TYPE_TRACEPOINT) |
| 4812 | return -ENOENT; | 4961 | return -ENOENT; |
| 4813 | 4962 | ||
| 4814 | /* | ||
| 4815 | * Raw tracepoint data is a severe data leak, only allow root to | ||
| 4816 | * have these. | ||
| 4817 | */ | ||
| 4818 | if ((event->attr.sample_type & PERF_SAMPLE_RAW) && | ||
| 4819 | perf_paranoid_tracepoint_raw() && | ||
| 4820 | !capable(CAP_SYS_ADMIN)) | ||
| 4821 | return -EPERM; | ||
| 4822 | |||
| 4823 | err = perf_trace_init(event); | 4963 | err = perf_trace_init(event); |
| 4824 | if (err) | 4964 | if (err) |
| 4825 | return err; | 4965 | return err; |
| @@ -4842,7 +4982,7 @@ static struct pmu perf_tracepoint = { | |||
| 4842 | 4982 | ||
| 4843 | static inline void perf_tp_register(void) | 4983 | static inline void perf_tp_register(void) |
| 4844 | { | 4984 | { |
| 4845 | perf_pmu_register(&perf_tracepoint); | 4985 | perf_pmu_register(&perf_tracepoint, "tracepoint", PERF_TYPE_TRACEPOINT); |
| 4846 | } | 4986 | } |
| 4847 | 4987 | ||
| 4848 | static int perf_event_set_filter(struct perf_event *event, void __user *arg) | 4988 | static int perf_event_set_filter(struct perf_event *event, void __user *arg) |
| @@ -4932,31 +5072,33 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) | |||
| 4932 | static void perf_swevent_start_hrtimer(struct perf_event *event) | 5072 | static void perf_swevent_start_hrtimer(struct perf_event *event) |
| 4933 | { | 5073 | { |
| 4934 | struct hw_perf_event *hwc = &event->hw; | 5074 | struct hw_perf_event *hwc = &event->hw; |
| 5075 | s64 period; | ||
| 5076 | |||
| 5077 | if (!is_sampling_event(event)) | ||
| 5078 | return; | ||
| 4935 | 5079 | ||
| 4936 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 5080 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
| 4937 | hwc->hrtimer.function = perf_swevent_hrtimer; | 5081 | hwc->hrtimer.function = perf_swevent_hrtimer; |
| 4938 | if (hwc->sample_period) { | ||
| 4939 | s64 period = local64_read(&hwc->period_left); | ||
| 4940 | 5082 | ||
| 4941 | if (period) { | 5083 | period = local64_read(&hwc->period_left); |
| 4942 | if (period < 0) | 5084 | if (period) { |
| 4943 | period = 10000; | 5085 | if (period < 0) |
| 5086 | period = 10000; | ||
| 4944 | 5087 | ||
| 4945 | local64_set(&hwc->period_left, 0); | 5088 | local64_set(&hwc->period_left, 0); |
| 4946 | } else { | 5089 | } else { |
| 4947 | period = max_t(u64, 10000, hwc->sample_period); | 5090 | period = max_t(u64, 10000, hwc->sample_period); |
| 4948 | } | 5091 | } |
| 4949 | __hrtimer_start_range_ns(&hwc->hrtimer, | 5092 | __hrtimer_start_range_ns(&hwc->hrtimer, |
| 4950 | ns_to_ktime(period), 0, | 5093 | ns_to_ktime(period), 0, |
| 4951 | HRTIMER_MODE_REL_PINNED, 0); | 5094 | HRTIMER_MODE_REL_PINNED, 0); |
| 4952 | } | ||
| 4953 | } | 5095 | } |
| 4954 | 5096 | ||
| 4955 | static void perf_swevent_cancel_hrtimer(struct perf_event *event) | 5097 | static void perf_swevent_cancel_hrtimer(struct perf_event *event) |
| 4956 | { | 5098 | { |
| 4957 | struct hw_perf_event *hwc = &event->hw; | 5099 | struct hw_perf_event *hwc = &event->hw; |
| 4958 | 5100 | ||
| 4959 | if (hwc->sample_period) { | 5101 | if (is_sampling_event(event)) { |
| 4960 | ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); | 5102 | ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); |
| 4961 | local64_set(&hwc->period_left, ktime_to_ns(remaining)); | 5103 | local64_set(&hwc->period_left, ktime_to_ns(remaining)); |
| 4962 | 5104 | ||
| @@ -5184,8 +5326,63 @@ static void free_pmu_context(struct pmu *pmu) | |||
| 5184 | out: | 5326 | out: |
| 5185 | mutex_unlock(&pmus_lock); | 5327 | mutex_unlock(&pmus_lock); |
| 5186 | } | 5328 | } |
| 5329 | static struct idr pmu_idr; | ||
| 5330 | |||
| 5331 | static ssize_t | ||
| 5332 | type_show(struct device *dev, struct device_attribute *attr, char *page) | ||
| 5333 | { | ||
| 5334 | struct pmu *pmu = dev_get_drvdata(dev); | ||
| 5335 | |||
| 5336 | return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->type); | ||
| 5337 | } | ||
| 5338 | |||
| 5339 | static struct device_attribute pmu_dev_attrs[] = { | ||
| 5340 | __ATTR_RO(type), | ||
| 5341 | __ATTR_NULL, | ||
| 5342 | }; | ||
| 5343 | |||
| 5344 | static int pmu_bus_running; | ||
| 5345 | static struct bus_type pmu_bus = { | ||
| 5346 | .name = "event_source", | ||
| 5347 | .dev_attrs = pmu_dev_attrs, | ||
| 5348 | }; | ||
| 5349 | |||
| 5350 | static void pmu_dev_release(struct device *dev) | ||
| 5351 | { | ||
| 5352 | kfree(dev); | ||
| 5353 | } | ||
| 5187 | 5354 | ||
| 5188 | int perf_pmu_register(struct pmu *pmu) | 5355 | static int pmu_dev_alloc(struct pmu *pmu) |
| 5356 | { | ||
| 5357 | int ret = -ENOMEM; | ||
| 5358 | |||
| 5359 | pmu->dev = kzalloc(sizeof(struct device), GFP_KERNEL); | ||
| 5360 | if (!pmu->dev) | ||
| 5361 | goto out; | ||
| 5362 | |||
| 5363 | device_initialize(pmu->dev); | ||
| 5364 | ret = dev_set_name(pmu->dev, "%s", pmu->name); | ||
| 5365 | if (ret) | ||
| 5366 | goto free_dev; | ||
| 5367 | |||
| 5368 | dev_set_drvdata(pmu->dev, pmu); | ||
| 5369 | pmu->dev->bus = &pmu_bus; | ||
| 5370 | pmu->dev->release = pmu_dev_release; | ||
| 5371 | ret = device_add(pmu->dev); | ||
| 5372 | if (ret) | ||
| 5373 | goto free_dev; | ||
| 5374 | |||
| 5375 | out: | ||
| 5376 | return ret; | ||
| 5377 | |||
| 5378 | free_dev: | ||
| 5379 | put_device(pmu->dev); | ||
| 5380 | goto out; | ||
| 5381 | } | ||
| 5382 | |||
| 5383 | static struct lock_class_key cpuctx_mutex; | ||
| 5384 | |||
| 5385 | int perf_pmu_register(struct pmu *pmu, char *name, int type) | ||
| 5189 | { | 5386 | { |
| 5190 | int cpu, ret; | 5387 | int cpu, ret; |
| 5191 | 5388 | ||
| @@ -5195,19 +5392,45 @@ int perf_pmu_register(struct pmu *pmu) | |||
| 5195 | if (!pmu->pmu_disable_count) | 5392 | if (!pmu->pmu_disable_count) |
| 5196 | goto unlock; | 5393 | goto unlock; |
| 5197 | 5394 | ||
| 5395 | pmu->type = -1; | ||
| 5396 | if (!name) | ||
| 5397 | goto skip_type; | ||
| 5398 | pmu->name = name; | ||
| 5399 | |||
| 5400 | if (type < 0) { | ||
| 5401 | int err = idr_pre_get(&pmu_idr, GFP_KERNEL); | ||
| 5402 | if (!err) | ||
| 5403 | goto free_pdc; | ||
| 5404 | |||
| 5405 | err = idr_get_new_above(&pmu_idr, pmu, PERF_TYPE_MAX, &type); | ||
| 5406 | if (err) { | ||
| 5407 | ret = err; | ||
| 5408 | goto free_pdc; | ||
| 5409 | } | ||
| 5410 | } | ||
| 5411 | pmu->type = type; | ||
| 5412 | |||
| 5413 | if (pmu_bus_running) { | ||
| 5414 | ret = pmu_dev_alloc(pmu); | ||
| 5415 | if (ret) | ||
| 5416 | goto free_idr; | ||
| 5417 | } | ||
| 5418 | |||
| 5419 | skip_type: | ||
| 5198 | pmu->pmu_cpu_context = find_pmu_context(pmu->task_ctx_nr); | 5420 | pmu->pmu_cpu_context = find_pmu_context(pmu->task_ctx_nr); |
| 5199 | if (pmu->pmu_cpu_context) | 5421 | if (pmu->pmu_cpu_context) |
| 5200 | goto got_cpu_context; | 5422 | goto got_cpu_context; |
| 5201 | 5423 | ||
| 5202 | pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context); | 5424 | pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context); |
| 5203 | if (!pmu->pmu_cpu_context) | 5425 | if (!pmu->pmu_cpu_context) |
| 5204 | goto free_pdc; | 5426 | goto free_dev; |
| 5205 | 5427 | ||
| 5206 | for_each_possible_cpu(cpu) { | 5428 | for_each_possible_cpu(cpu) { |
| 5207 | struct perf_cpu_context *cpuctx; | 5429 | struct perf_cpu_context *cpuctx; |
| 5208 | 5430 | ||
| 5209 | cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); | 5431 | cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); |
| 5210 | __perf_event_init_context(&cpuctx->ctx); | 5432 | __perf_event_init_context(&cpuctx->ctx); |
| 5433 | lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex); | ||
| 5211 | cpuctx->ctx.type = cpu_context; | 5434 | cpuctx->ctx.type = cpu_context; |
| 5212 | cpuctx->ctx.pmu = pmu; | 5435 | cpuctx->ctx.pmu = pmu; |
| 5213 | cpuctx->jiffies_interval = 1; | 5436 | cpuctx->jiffies_interval = 1; |
| @@ -5245,6 +5468,14 @@ unlock: | |||
| 5245 | 5468 | ||
| 5246 | return ret; | 5469 | return ret; |
| 5247 | 5470 | ||
| 5471 | free_dev: | ||
| 5472 | device_del(pmu->dev); | ||
| 5473 | put_device(pmu->dev); | ||
| 5474 | |||
| 5475 | free_idr: | ||
| 5476 | if (pmu->type >= PERF_TYPE_MAX) | ||
| 5477 | idr_remove(&pmu_idr, pmu->type); | ||
| 5478 | |||
| 5248 | free_pdc: | 5479 | free_pdc: |
| 5249 | free_percpu(pmu->pmu_disable_count); | 5480 | free_percpu(pmu->pmu_disable_count); |
| 5250 | goto unlock; | 5481 | goto unlock; |
| @@ -5264,6 +5495,10 @@ void perf_pmu_unregister(struct pmu *pmu) | |||
| 5264 | synchronize_rcu(); | 5495 | synchronize_rcu(); |
| 5265 | 5496 | ||
| 5266 | free_percpu(pmu->pmu_disable_count); | 5497 | free_percpu(pmu->pmu_disable_count); |
| 5498 | if (pmu->type >= PERF_TYPE_MAX) | ||
| 5499 | idr_remove(&pmu_idr, pmu->type); | ||
| 5500 | device_del(pmu->dev); | ||
| 5501 | put_device(pmu->dev); | ||
| 5267 | free_pmu_context(pmu); | 5502 | free_pmu_context(pmu); |
| 5268 | } | 5503 | } |
| 5269 | 5504 | ||
| @@ -5273,6 +5508,13 @@ struct pmu *perf_init_event(struct perf_event *event) | |||
| 5273 | int idx; | 5508 | int idx; |
| 5274 | 5509 | ||
| 5275 | idx = srcu_read_lock(&pmus_srcu); | 5510 | idx = srcu_read_lock(&pmus_srcu); |
| 5511 | |||
| 5512 | rcu_read_lock(); | ||
| 5513 | pmu = idr_find(&pmu_idr, event->attr.type); | ||
| 5514 | rcu_read_unlock(); | ||
| 5515 | if (pmu) | ||
| 5516 | goto unlock; | ||
| 5517 | |||
| 5276 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 5518 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
| 5277 | int ret = pmu->event_init(event); | 5519 | int ret = pmu->event_init(event); |
| 5278 | if (!ret) | 5520 | if (!ret) |
| @@ -5305,6 +5547,11 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, | |||
| 5305 | struct hw_perf_event *hwc; | 5547 | struct hw_perf_event *hwc; |
| 5306 | long err; | 5548 | long err; |
| 5307 | 5549 | ||
| 5550 | if ((unsigned)cpu >= nr_cpu_ids) { | ||
| 5551 | if (!task || cpu != -1) | ||
| 5552 | return ERR_PTR(-EINVAL); | ||
| 5553 | } | ||
| 5554 | |||
| 5308 | event = kzalloc(sizeof(*event), GFP_KERNEL); | 5555 | event = kzalloc(sizeof(*event), GFP_KERNEL); |
| 5309 | if (!event) | 5556 | if (!event) |
| 5310 | return ERR_PTR(-ENOMEM); | 5557 | return ERR_PTR(-ENOMEM); |
| @@ -5353,7 +5600,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, | |||
| 5353 | 5600 | ||
| 5354 | if (!overflow_handler && parent_event) | 5601 | if (!overflow_handler && parent_event) |
| 5355 | overflow_handler = parent_event->overflow_handler; | 5602 | overflow_handler = parent_event->overflow_handler; |
| 5356 | 5603 | ||
| 5357 | event->overflow_handler = overflow_handler; | 5604 | event->overflow_handler = overflow_handler; |
| 5358 | 5605 | ||
| 5359 | if (attr->disabled) | 5606 | if (attr->disabled) |
| @@ -5738,6 +5985,12 @@ SYSCALL_DEFINE5(perf_event_open, | |||
| 5738 | mutex_unlock(¤t->perf_event_mutex); | 5985 | mutex_unlock(¤t->perf_event_mutex); |
| 5739 | 5986 | ||
| 5740 | /* | 5987 | /* |
| 5988 | * Precalculate sample_data sizes | ||
| 5989 | */ | ||
| 5990 | perf_event__header_size(event); | ||
| 5991 | perf_event__id_header_size(event); | ||
| 5992 | |||
| 5993 | /* | ||
| 5741 | * Drop the reference on the group_event after placing the | 5994 | * Drop the reference on the group_event after placing the |
| 5742 | * new event on the sibling_list. This ensures destruction | 5995 | * new event on the sibling_list. This ensures destruction |
| 5743 | * of the group leader will find the pointer to itself in | 5996 | * of the group leader will find the pointer to itself in |
| @@ -5883,7 +6136,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn) | |||
| 5883 | * scheduled, so we are now safe from rescheduling changing | 6136 | * scheduled, so we are now safe from rescheduling changing |
| 5884 | * our context. | 6137 | * our context. |
| 5885 | */ | 6138 | */ |
| 5886 | child_ctx = child->perf_event_ctxp[ctxn]; | 6139 | child_ctx = rcu_dereference_raw(child->perf_event_ctxp[ctxn]); |
| 5887 | task_ctx_sched_out(child_ctx, EVENT_ALL); | 6140 | task_ctx_sched_out(child_ctx, EVENT_ALL); |
| 5888 | 6141 | ||
| 5889 | /* | 6142 | /* |
| @@ -6090,6 +6343,12 @@ inherit_event(struct perf_event *parent_event, | |||
| 6090 | child_event->overflow_handler = parent_event->overflow_handler; | 6343 | child_event->overflow_handler = parent_event->overflow_handler; |
| 6091 | 6344 | ||
| 6092 | /* | 6345 | /* |
| 6346 | * Precalculate sample_data sizes | ||
| 6347 | */ | ||
| 6348 | perf_event__header_size(child_event); | ||
| 6349 | perf_event__id_header_size(child_event); | ||
| 6350 | |||
| 6351 | /* | ||
| 6093 | * Link it up in the child's context: | 6352 | * Link it up in the child's context: |
| 6094 | */ | 6353 | */ |
| 6095 | raw_spin_lock_irqsave(&child_ctx->lock, flags); | 6354 | raw_spin_lock_irqsave(&child_ctx->lock, flags); |
| @@ -6190,11 +6449,6 @@ int perf_event_init_context(struct task_struct *child, int ctxn) | |||
| 6190 | unsigned long flags; | 6449 | unsigned long flags; |
| 6191 | int ret = 0; | 6450 | int ret = 0; |
| 6192 | 6451 | ||
| 6193 | child->perf_event_ctxp[ctxn] = NULL; | ||
| 6194 | |||
| 6195 | mutex_init(&child->perf_event_mutex); | ||
| 6196 | INIT_LIST_HEAD(&child->perf_event_list); | ||
| 6197 | |||
| 6198 | if (likely(!parent->perf_event_ctxp[ctxn])) | 6452 | if (likely(!parent->perf_event_ctxp[ctxn])) |
| 6199 | return 0; | 6453 | return 0; |
| 6200 | 6454 | ||
| @@ -6246,7 +6500,6 @@ int perf_event_init_context(struct task_struct *child, int ctxn) | |||
| 6246 | 6500 | ||
| 6247 | raw_spin_lock_irqsave(&parent_ctx->lock, flags); | 6501 | raw_spin_lock_irqsave(&parent_ctx->lock, flags); |
| 6248 | parent_ctx->rotate_disable = 0; | 6502 | parent_ctx->rotate_disable = 0; |
| 6249 | raw_spin_unlock_irqrestore(&parent_ctx->lock, flags); | ||
| 6250 | 6503 | ||
| 6251 | child_ctx = child->perf_event_ctxp[ctxn]; | 6504 | child_ctx = child->perf_event_ctxp[ctxn]; |
| 6252 | 6505 | ||
| @@ -6254,12 +6507,11 @@ int perf_event_init_context(struct task_struct *child, int ctxn) | |||
| 6254 | /* | 6507 | /* |
| 6255 | * Mark the child context as a clone of the parent | 6508 | * Mark the child context as a clone of the parent |
| 6256 | * context, or of whatever the parent is a clone of. | 6509 | * context, or of whatever the parent is a clone of. |
| 6257 | * Note that if the parent is a clone, it could get | 6510 | * |
| 6258 | * uncloned at any point, but that doesn't matter | 6511 | * Note that if the parent is a clone, the holding of |
| 6259 | * because the list of events and the generation | 6512 | * parent_ctx->lock avoids it from being uncloned. |
| 6260 | * count can't have changed since we took the mutex. | ||
| 6261 | */ | 6513 | */ |
| 6262 | cloned_ctx = rcu_dereference(parent_ctx->parent_ctx); | 6514 | cloned_ctx = parent_ctx->parent_ctx; |
| 6263 | if (cloned_ctx) { | 6515 | if (cloned_ctx) { |
| 6264 | child_ctx->parent_ctx = cloned_ctx; | 6516 | child_ctx->parent_ctx = cloned_ctx; |
| 6265 | child_ctx->parent_gen = parent_ctx->parent_gen; | 6517 | child_ctx->parent_gen = parent_ctx->parent_gen; |
| @@ -6270,6 +6522,7 @@ int perf_event_init_context(struct task_struct *child, int ctxn) | |||
| 6270 | get_ctx(child_ctx->parent_ctx); | 6522 | get_ctx(child_ctx->parent_ctx); |
| 6271 | } | 6523 | } |
| 6272 | 6524 | ||
| 6525 | raw_spin_unlock_irqrestore(&parent_ctx->lock, flags); | ||
| 6273 | mutex_unlock(&parent_ctx->mutex); | 6526 | mutex_unlock(&parent_ctx->mutex); |
| 6274 | 6527 | ||
| 6275 | perf_unpin_context(parent_ctx); | 6528 | perf_unpin_context(parent_ctx); |
| @@ -6284,6 +6537,10 @@ int perf_event_init_task(struct task_struct *child) | |||
| 6284 | { | 6537 | { |
| 6285 | int ctxn, ret; | 6538 | int ctxn, ret; |
| 6286 | 6539 | ||
| 6540 | memset(child->perf_event_ctxp, 0, sizeof(child->perf_event_ctxp)); | ||
| 6541 | mutex_init(&child->perf_event_mutex); | ||
| 6542 | INIT_LIST_HEAD(&child->perf_event_list); | ||
| 6543 | |||
| 6287 | for_each_task_context_nr(ctxn) { | 6544 | for_each_task_context_nr(ctxn) { |
| 6288 | ret = perf_event_init_context(child, ctxn); | 6545 | ret = perf_event_init_context(child, ctxn); |
| 6289 | if (ret) | 6546 | if (ret) |
| @@ -6320,7 +6577,7 @@ static void __cpuinit perf_event_init_cpu(int cpu) | |||
| 6320 | mutex_unlock(&swhash->hlist_mutex); | 6577 | mutex_unlock(&swhash->hlist_mutex); |
| 6321 | } | 6578 | } |
| 6322 | 6579 | ||
| 6323 | #ifdef CONFIG_HOTPLUG_CPU | 6580 | #if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC |
| 6324 | static void perf_pmu_rotate_stop(struct pmu *pmu) | 6581 | static void perf_pmu_rotate_stop(struct pmu *pmu) |
| 6325 | { | 6582 | { |
| 6326 | struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); | 6583 | struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); |
| @@ -6374,6 +6631,26 @@ static void perf_event_exit_cpu(int cpu) | |||
| 6374 | static inline void perf_event_exit_cpu(int cpu) { } | 6631 | static inline void perf_event_exit_cpu(int cpu) { } |
| 6375 | #endif | 6632 | #endif |
| 6376 | 6633 | ||
| 6634 | static int | ||
| 6635 | perf_reboot(struct notifier_block *notifier, unsigned long val, void *v) | ||
| 6636 | { | ||
| 6637 | int cpu; | ||
| 6638 | |||
| 6639 | for_each_online_cpu(cpu) | ||
| 6640 | perf_event_exit_cpu(cpu); | ||
| 6641 | |||
| 6642 | return NOTIFY_OK; | ||
| 6643 | } | ||
| 6644 | |||
| 6645 | /* | ||
| 6646 | * Run the perf reboot notifier at the very last possible moment so that | ||
| 6647 | * the generic watchdog code runs as long as possible. | ||
| 6648 | */ | ||
| 6649 | static struct notifier_block perf_reboot_notifier = { | ||
| 6650 | .notifier_call = perf_reboot, | ||
| 6651 | .priority = INT_MIN, | ||
| 6652 | }; | ||
| 6653 | |||
| 6377 | static int __cpuinit | 6654 | static int __cpuinit |
| 6378 | perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) | 6655 | perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) |
| 6379 | { | 6656 | { |
| @@ -6402,14 +6679,45 @@ void __init perf_event_init(void) | |||
| 6402 | { | 6679 | { |
| 6403 | int ret; | 6680 | int ret; |
| 6404 | 6681 | ||
| 6682 | idr_init(&pmu_idr); | ||
| 6683 | |||
| 6405 | perf_event_init_all_cpus(); | 6684 | perf_event_init_all_cpus(); |
| 6406 | init_srcu_struct(&pmus_srcu); | 6685 | init_srcu_struct(&pmus_srcu); |
| 6407 | perf_pmu_register(&perf_swevent); | 6686 | perf_pmu_register(&perf_swevent, "software", PERF_TYPE_SOFTWARE); |
| 6408 | perf_pmu_register(&perf_cpu_clock); | 6687 | perf_pmu_register(&perf_cpu_clock, NULL, -1); |
| 6409 | perf_pmu_register(&perf_task_clock); | 6688 | perf_pmu_register(&perf_task_clock, NULL, -1); |
| 6410 | perf_tp_register(); | 6689 | perf_tp_register(); |
| 6411 | perf_cpu_notifier(perf_cpu_notify); | 6690 | perf_cpu_notifier(perf_cpu_notify); |
| 6691 | register_reboot_notifier(&perf_reboot_notifier); | ||
| 6412 | 6692 | ||
| 6413 | ret = init_hw_breakpoint(); | 6693 | ret = init_hw_breakpoint(); |
| 6414 | WARN(ret, "hw_breakpoint initialization failed with: %d", ret); | 6694 | WARN(ret, "hw_breakpoint initialization failed with: %d", ret); |
| 6415 | } | 6695 | } |
| 6696 | |||
| 6697 | static int __init perf_event_sysfs_init(void) | ||
| 6698 | { | ||
| 6699 | struct pmu *pmu; | ||
| 6700 | int ret; | ||
| 6701 | |||
| 6702 | mutex_lock(&pmus_lock); | ||
| 6703 | |||
| 6704 | ret = bus_register(&pmu_bus); | ||
| 6705 | if (ret) | ||
| 6706 | goto unlock; | ||
| 6707 | |||
| 6708 | list_for_each_entry(pmu, &pmus, entry) { | ||
| 6709 | if (!pmu->name || pmu->type < 0) | ||
| 6710 | continue; | ||
| 6711 | |||
| 6712 | ret = pmu_dev_alloc(pmu); | ||
| 6713 | WARN(ret, "Failed to register pmu: %s, reason %d\n", pmu->name, ret); | ||
| 6714 | } | ||
| 6715 | pmu_bus_running = 1; | ||
| 6716 | ret = 0; | ||
| 6717 | |||
| 6718 | unlock: | ||
| 6719 | mutex_unlock(&pmus_lock); | ||
| 6720 | |||
| 6721 | return ret; | ||
| 6722 | } | ||
| 6723 | device_initcall(perf_event_sysfs_init); | ||
