diff options
Diffstat (limited to 'kernel/perf_event.c')
-rw-r--r-- | kernel/perf_event.c | 1164 |
1 files changed, 765 insertions, 399 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 0f86feb6db0c..603c0d8b5df1 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/percpu.h> | 20 | #include <linux/percpu.h> |
21 | #include <linux/ptrace.h> | 21 | #include <linux/ptrace.h> |
22 | #include <linux/vmstat.h> | 22 | #include <linux/vmstat.h> |
23 | #include <linux/vmalloc.h> | ||
23 | #include <linux/hardirq.h> | 24 | #include <linux/hardirq.h> |
24 | #include <linux/rculist.h> | 25 | #include <linux/rculist.h> |
25 | #include <linux/uaccess.h> | 26 | #include <linux/uaccess.h> |
@@ -27,13 +28,15 @@ | |||
27 | #include <linux/anon_inodes.h> | 28 | #include <linux/anon_inodes.h> |
28 | #include <linux/kernel_stat.h> | 29 | #include <linux/kernel_stat.h> |
29 | #include <linux/perf_event.h> | 30 | #include <linux/perf_event.h> |
31 | #include <linux/ftrace_event.h> | ||
32 | #include <linux/hw_breakpoint.h> | ||
30 | 33 | ||
31 | #include <asm/irq_regs.h> | 34 | #include <asm/irq_regs.h> |
32 | 35 | ||
33 | /* | 36 | /* |
34 | * Each CPU has a list of per CPU events: | 37 | * Each CPU has a list of per CPU events: |
35 | */ | 38 | */ |
36 | DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context); | 39 | static DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context); |
37 | 40 | ||
38 | int perf_max_events __read_mostly = 1; | 41 | int perf_max_events __read_mostly = 1; |
39 | static int perf_reserved_percpu __read_mostly; | 42 | static int perf_reserved_percpu __read_mostly; |
@@ -200,14 +203,14 @@ perf_lock_task_context(struct task_struct *task, unsigned long *flags) | |||
200 | * if so. If we locked the right context, then it | 203 | * if so. If we locked the right context, then it |
201 | * can't get swapped on us any more. | 204 | * can't get swapped on us any more. |
202 | */ | 205 | */ |
203 | spin_lock_irqsave(&ctx->lock, *flags); | 206 | raw_spin_lock_irqsave(&ctx->lock, *flags); |
204 | if (ctx != rcu_dereference(task->perf_event_ctxp)) { | 207 | if (ctx != rcu_dereference(task->perf_event_ctxp)) { |
205 | spin_unlock_irqrestore(&ctx->lock, *flags); | 208 | raw_spin_unlock_irqrestore(&ctx->lock, *flags); |
206 | goto retry; | 209 | goto retry; |
207 | } | 210 | } |
208 | 211 | ||
209 | if (!atomic_inc_not_zero(&ctx->refcount)) { | 212 | if (!atomic_inc_not_zero(&ctx->refcount)) { |
210 | spin_unlock_irqrestore(&ctx->lock, *flags); | 213 | raw_spin_unlock_irqrestore(&ctx->lock, *flags); |
211 | ctx = NULL; | 214 | ctx = NULL; |
212 | } | 215 | } |
213 | } | 216 | } |
@@ -228,7 +231,7 @@ static struct perf_event_context *perf_pin_task_context(struct task_struct *task | |||
228 | ctx = perf_lock_task_context(task, &flags); | 231 | ctx = perf_lock_task_context(task, &flags); |
229 | if (ctx) { | 232 | if (ctx) { |
230 | ++ctx->pin_count; | 233 | ++ctx->pin_count; |
231 | spin_unlock_irqrestore(&ctx->lock, flags); | 234 | raw_spin_unlock_irqrestore(&ctx->lock, flags); |
232 | } | 235 | } |
233 | return ctx; | 236 | return ctx; |
234 | } | 237 | } |
@@ -237,12 +240,55 @@ static void perf_unpin_context(struct perf_event_context *ctx) | |||
237 | { | 240 | { |
238 | unsigned long flags; | 241 | unsigned long flags; |
239 | 242 | ||
240 | spin_lock_irqsave(&ctx->lock, flags); | 243 | raw_spin_lock_irqsave(&ctx->lock, flags); |
241 | --ctx->pin_count; | 244 | --ctx->pin_count; |
242 | spin_unlock_irqrestore(&ctx->lock, flags); | 245 | raw_spin_unlock_irqrestore(&ctx->lock, flags); |
243 | put_ctx(ctx); | 246 | put_ctx(ctx); |
244 | } | 247 | } |
245 | 248 | ||
249 | static inline u64 perf_clock(void) | ||
250 | { | ||
251 | return cpu_clock(smp_processor_id()); | ||
252 | } | ||
253 | |||
254 | /* | ||
255 | * Update the record of the current time in a context. | ||
256 | */ | ||
257 | static void update_context_time(struct perf_event_context *ctx) | ||
258 | { | ||
259 | u64 now = perf_clock(); | ||
260 | |||
261 | ctx->time += now - ctx->timestamp; | ||
262 | ctx->timestamp = now; | ||
263 | } | ||
264 | |||
265 | /* | ||
266 | * Update the total_time_enabled and total_time_running fields for a event. | ||
267 | */ | ||
268 | static void update_event_times(struct perf_event *event) | ||
269 | { | ||
270 | struct perf_event_context *ctx = event->ctx; | ||
271 | u64 run_end; | ||
272 | |||
273 | if (event->state < PERF_EVENT_STATE_INACTIVE || | ||
274 | event->group_leader->state < PERF_EVENT_STATE_INACTIVE) | ||
275 | return; | ||
276 | |||
277 | if (ctx->is_active) | ||
278 | run_end = ctx->time; | ||
279 | else | ||
280 | run_end = event->tstamp_stopped; | ||
281 | |||
282 | event->total_time_enabled = run_end - event->tstamp_enabled; | ||
283 | |||
284 | if (event->state == PERF_EVENT_STATE_INACTIVE) | ||
285 | run_end = event->tstamp_stopped; | ||
286 | else | ||
287 | run_end = ctx->time; | ||
288 | |||
289 | event->total_time_running = run_end - event->tstamp_running; | ||
290 | } | ||
291 | |||
246 | /* | 292 | /* |
247 | * Add a event from the lists for its context. | 293 | * Add a event from the lists for its context. |
248 | * Must be called with ctx->mutex and ctx->lock held. | 294 | * Must be called with ctx->mutex and ctx->lock held. |
@@ -291,6 +337,18 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) | |||
291 | if (event->group_leader != event) | 337 | if (event->group_leader != event) |
292 | event->group_leader->nr_siblings--; | 338 | event->group_leader->nr_siblings--; |
293 | 339 | ||
340 | update_event_times(event); | ||
341 | |||
342 | /* | ||
343 | * If event was in error state, then keep it | ||
344 | * that way, otherwise bogus counts will be | ||
345 | * returned on read(). The only way to get out | ||
346 | * of error state is by explicit re-enabling | ||
347 | * of the event | ||
348 | */ | ||
349 | if (event->state > PERF_EVENT_STATE_OFF) | ||
350 | event->state = PERF_EVENT_STATE_OFF; | ||
351 | |||
294 | /* | 352 | /* |
295 | * If this was a group event with sibling events then | 353 | * If this was a group event with sibling events then |
296 | * upgrade the siblings to singleton events by adding them | 354 | * upgrade the siblings to singleton events by adding them |
@@ -369,7 +427,7 @@ static void __perf_event_remove_from_context(void *info) | |||
369 | if (ctx->task && cpuctx->task_ctx != ctx) | 427 | if (ctx->task && cpuctx->task_ctx != ctx) |
370 | return; | 428 | return; |
371 | 429 | ||
372 | spin_lock(&ctx->lock); | 430 | raw_spin_lock(&ctx->lock); |
373 | /* | 431 | /* |
374 | * Protect the list operation against NMI by disabling the | 432 | * Protect the list operation against NMI by disabling the |
375 | * events on a global level. | 433 | * events on a global level. |
@@ -391,7 +449,7 @@ static void __perf_event_remove_from_context(void *info) | |||
391 | } | 449 | } |
392 | 450 | ||
393 | perf_enable(); | 451 | perf_enable(); |
394 | spin_unlock(&ctx->lock); | 452 | raw_spin_unlock(&ctx->lock); |
395 | } | 453 | } |
396 | 454 | ||
397 | 455 | ||
@@ -418,7 +476,7 @@ static void perf_event_remove_from_context(struct perf_event *event) | |||
418 | if (!task) { | 476 | if (!task) { |
419 | /* | 477 | /* |
420 | * Per cpu events are removed via an smp call and | 478 | * Per cpu events are removed via an smp call and |
421 | * the removal is always sucessful. | 479 | * the removal is always successful. |
422 | */ | 480 | */ |
423 | smp_call_function_single(event->cpu, | 481 | smp_call_function_single(event->cpu, |
424 | __perf_event_remove_from_context, | 482 | __perf_event_remove_from_context, |
@@ -430,12 +488,12 @@ retry: | |||
430 | task_oncpu_function_call(task, __perf_event_remove_from_context, | 488 | task_oncpu_function_call(task, __perf_event_remove_from_context, |
431 | event); | 489 | event); |
432 | 490 | ||
433 | spin_lock_irq(&ctx->lock); | 491 | raw_spin_lock_irq(&ctx->lock); |
434 | /* | 492 | /* |
435 | * If the context is active we need to retry the smp call. | 493 | * If the context is active we need to retry the smp call. |
436 | */ | 494 | */ |
437 | if (ctx->nr_active && !list_empty(&event->group_entry)) { | 495 | if (ctx->nr_active && !list_empty(&event->group_entry)) { |
438 | spin_unlock_irq(&ctx->lock); | 496 | raw_spin_unlock_irq(&ctx->lock); |
439 | goto retry; | 497 | goto retry; |
440 | } | 498 | } |
441 | 499 | ||
@@ -444,48 +502,9 @@ retry: | |||
444 | * can remove the event safely, if the call above did not | 502 | * can remove the event safely, if the call above did not |
445 | * succeed. | 503 | * succeed. |
446 | */ | 504 | */ |
447 | if (!list_empty(&event->group_entry)) { | 505 | if (!list_empty(&event->group_entry)) |
448 | list_del_event(event, ctx); | 506 | list_del_event(event, ctx); |
449 | } | 507 | raw_spin_unlock_irq(&ctx->lock); |
450 | spin_unlock_irq(&ctx->lock); | ||
451 | } | ||
452 | |||
453 | static inline u64 perf_clock(void) | ||
454 | { | ||
455 | return cpu_clock(smp_processor_id()); | ||
456 | } | ||
457 | |||
458 | /* | ||
459 | * Update the record of the current time in a context. | ||
460 | */ | ||
461 | static void update_context_time(struct perf_event_context *ctx) | ||
462 | { | ||
463 | u64 now = perf_clock(); | ||
464 | |||
465 | ctx->time += now - ctx->timestamp; | ||
466 | ctx->timestamp = now; | ||
467 | } | ||
468 | |||
469 | /* | ||
470 | * Update the total_time_enabled and total_time_running fields for a event. | ||
471 | */ | ||
472 | static void update_event_times(struct perf_event *event) | ||
473 | { | ||
474 | struct perf_event_context *ctx = event->ctx; | ||
475 | u64 run_end; | ||
476 | |||
477 | if (event->state < PERF_EVENT_STATE_INACTIVE || | ||
478 | event->group_leader->state < PERF_EVENT_STATE_INACTIVE) | ||
479 | return; | ||
480 | |||
481 | event->total_time_enabled = ctx->time - event->tstamp_enabled; | ||
482 | |||
483 | if (event->state == PERF_EVENT_STATE_INACTIVE) | ||
484 | run_end = event->tstamp_stopped; | ||
485 | else | ||
486 | run_end = ctx->time; | ||
487 | |||
488 | event->total_time_running = run_end - event->tstamp_running; | ||
489 | } | 508 | } |
490 | 509 | ||
491 | /* | 510 | /* |
@@ -516,7 +535,7 @@ static void __perf_event_disable(void *info) | |||
516 | if (ctx->task && cpuctx->task_ctx != ctx) | 535 | if (ctx->task && cpuctx->task_ctx != ctx) |
517 | return; | 536 | return; |
518 | 537 | ||
519 | spin_lock(&ctx->lock); | 538 | raw_spin_lock(&ctx->lock); |
520 | 539 | ||
521 | /* | 540 | /* |
522 | * If the event is on, turn it off. | 541 | * If the event is on, turn it off. |
@@ -532,7 +551,7 @@ static void __perf_event_disable(void *info) | |||
532 | event->state = PERF_EVENT_STATE_OFF; | 551 | event->state = PERF_EVENT_STATE_OFF; |
533 | } | 552 | } |
534 | 553 | ||
535 | spin_unlock(&ctx->lock); | 554 | raw_spin_unlock(&ctx->lock); |
536 | } | 555 | } |
537 | 556 | ||
538 | /* | 557 | /* |
@@ -548,7 +567,7 @@ static void __perf_event_disable(void *info) | |||
548 | * is the current context on this CPU and preemption is disabled, | 567 | * is the current context on this CPU and preemption is disabled, |
549 | * hence we can't get into perf_event_task_sched_out for this context. | 568 | * hence we can't get into perf_event_task_sched_out for this context. |
550 | */ | 569 | */ |
551 | static void perf_event_disable(struct perf_event *event) | 570 | void perf_event_disable(struct perf_event *event) |
552 | { | 571 | { |
553 | struct perf_event_context *ctx = event->ctx; | 572 | struct perf_event_context *ctx = event->ctx; |
554 | struct task_struct *task = ctx->task; | 573 | struct task_struct *task = ctx->task; |
@@ -565,12 +584,12 @@ static void perf_event_disable(struct perf_event *event) | |||
565 | retry: | 584 | retry: |
566 | task_oncpu_function_call(task, __perf_event_disable, event); | 585 | task_oncpu_function_call(task, __perf_event_disable, event); |
567 | 586 | ||
568 | spin_lock_irq(&ctx->lock); | 587 | raw_spin_lock_irq(&ctx->lock); |
569 | /* | 588 | /* |
570 | * If the event is still active, we need to retry the cross-call. | 589 | * If the event is still active, we need to retry the cross-call. |
571 | */ | 590 | */ |
572 | if (event->state == PERF_EVENT_STATE_ACTIVE) { | 591 | if (event->state == PERF_EVENT_STATE_ACTIVE) { |
573 | spin_unlock_irq(&ctx->lock); | 592 | raw_spin_unlock_irq(&ctx->lock); |
574 | goto retry; | 593 | goto retry; |
575 | } | 594 | } |
576 | 595 | ||
@@ -583,7 +602,7 @@ static void perf_event_disable(struct perf_event *event) | |||
583 | event->state = PERF_EVENT_STATE_OFF; | 602 | event->state = PERF_EVENT_STATE_OFF; |
584 | } | 603 | } |
585 | 604 | ||
586 | spin_unlock_irq(&ctx->lock); | 605 | raw_spin_unlock_irq(&ctx->lock); |
587 | } | 606 | } |
588 | 607 | ||
589 | static int | 608 | static int |
@@ -751,7 +770,7 @@ static void __perf_install_in_context(void *info) | |||
751 | cpuctx->task_ctx = ctx; | 770 | cpuctx->task_ctx = ctx; |
752 | } | 771 | } |
753 | 772 | ||
754 | spin_lock(&ctx->lock); | 773 | raw_spin_lock(&ctx->lock); |
755 | ctx->is_active = 1; | 774 | ctx->is_active = 1; |
756 | update_context_time(ctx); | 775 | update_context_time(ctx); |
757 | 776 | ||
@@ -763,6 +782,9 @@ static void __perf_install_in_context(void *info) | |||
763 | 782 | ||
764 | add_event_to_ctx(event, ctx); | 783 | add_event_to_ctx(event, ctx); |
765 | 784 | ||
785 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | ||
786 | goto unlock; | ||
787 | |||
766 | /* | 788 | /* |
767 | * Don't put the event on if it is disabled or if | 789 | * Don't put the event on if it is disabled or if |
768 | * it is in a group and the group isn't on. | 790 | * it is in a group and the group isn't on. |
@@ -801,7 +823,7 @@ static void __perf_install_in_context(void *info) | |||
801 | unlock: | 823 | unlock: |
802 | perf_enable(); | 824 | perf_enable(); |
803 | 825 | ||
804 | spin_unlock(&ctx->lock); | 826 | raw_spin_unlock(&ctx->lock); |
805 | } | 827 | } |
806 | 828 | ||
807 | /* | 829 | /* |
@@ -826,7 +848,7 @@ perf_install_in_context(struct perf_event_context *ctx, | |||
826 | if (!task) { | 848 | if (!task) { |
827 | /* | 849 | /* |
828 | * Per cpu events are installed via an smp call and | 850 | * Per cpu events are installed via an smp call and |
829 | * the install is always sucessful. | 851 | * the install is always successful. |
830 | */ | 852 | */ |
831 | smp_call_function_single(cpu, __perf_install_in_context, | 853 | smp_call_function_single(cpu, __perf_install_in_context, |
832 | event, 1); | 854 | event, 1); |
@@ -837,12 +859,12 @@ retry: | |||
837 | task_oncpu_function_call(task, __perf_install_in_context, | 859 | task_oncpu_function_call(task, __perf_install_in_context, |
838 | event); | 860 | event); |
839 | 861 | ||
840 | spin_lock_irq(&ctx->lock); | 862 | raw_spin_lock_irq(&ctx->lock); |
841 | /* | 863 | /* |
842 | * we need to retry the smp call. | 864 | * we need to retry the smp call. |
843 | */ | 865 | */ |
844 | if (ctx->is_active && list_empty(&event->group_entry)) { | 866 | if (ctx->is_active && list_empty(&event->group_entry)) { |
845 | spin_unlock_irq(&ctx->lock); | 867 | raw_spin_unlock_irq(&ctx->lock); |
846 | goto retry; | 868 | goto retry; |
847 | } | 869 | } |
848 | 870 | ||
@@ -853,7 +875,7 @@ retry: | |||
853 | */ | 875 | */ |
854 | if (list_empty(&event->group_entry)) | 876 | if (list_empty(&event->group_entry)) |
855 | add_event_to_ctx(event, ctx); | 877 | add_event_to_ctx(event, ctx); |
856 | spin_unlock_irq(&ctx->lock); | 878 | raw_spin_unlock_irq(&ctx->lock); |
857 | } | 879 | } |
858 | 880 | ||
859 | /* | 881 | /* |
@@ -898,7 +920,7 @@ static void __perf_event_enable(void *info) | |||
898 | cpuctx->task_ctx = ctx; | 920 | cpuctx->task_ctx = ctx; |
899 | } | 921 | } |
900 | 922 | ||
901 | spin_lock(&ctx->lock); | 923 | raw_spin_lock(&ctx->lock); |
902 | ctx->is_active = 1; | 924 | ctx->is_active = 1; |
903 | update_context_time(ctx); | 925 | update_context_time(ctx); |
904 | 926 | ||
@@ -906,6 +928,9 @@ static void __perf_event_enable(void *info) | |||
906 | goto unlock; | 928 | goto unlock; |
907 | __perf_event_mark_enabled(event, ctx); | 929 | __perf_event_mark_enabled(event, ctx); |
908 | 930 | ||
931 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | ||
932 | goto unlock; | ||
933 | |||
909 | /* | 934 | /* |
910 | * If the event is in a group and isn't the group leader, | 935 | * If the event is in a group and isn't the group leader, |
911 | * then don't put it on unless the group is on. | 936 | * then don't put it on unless the group is on. |
@@ -940,7 +965,7 @@ static void __perf_event_enable(void *info) | |||
940 | } | 965 | } |
941 | 966 | ||
942 | unlock: | 967 | unlock: |
943 | spin_unlock(&ctx->lock); | 968 | raw_spin_unlock(&ctx->lock); |
944 | } | 969 | } |
945 | 970 | ||
946 | /* | 971 | /* |
@@ -952,7 +977,7 @@ static void __perf_event_enable(void *info) | |||
952 | * perf_event_for_each_child or perf_event_for_each as described | 977 | * perf_event_for_each_child or perf_event_for_each as described |
953 | * for perf_event_disable. | 978 | * for perf_event_disable. |
954 | */ | 979 | */ |
955 | static void perf_event_enable(struct perf_event *event) | 980 | void perf_event_enable(struct perf_event *event) |
956 | { | 981 | { |
957 | struct perf_event_context *ctx = event->ctx; | 982 | struct perf_event_context *ctx = event->ctx; |
958 | struct task_struct *task = ctx->task; | 983 | struct task_struct *task = ctx->task; |
@@ -966,7 +991,7 @@ static void perf_event_enable(struct perf_event *event) | |||
966 | return; | 991 | return; |
967 | } | 992 | } |
968 | 993 | ||
969 | spin_lock_irq(&ctx->lock); | 994 | raw_spin_lock_irq(&ctx->lock); |
970 | if (event->state >= PERF_EVENT_STATE_INACTIVE) | 995 | if (event->state >= PERF_EVENT_STATE_INACTIVE) |
971 | goto out; | 996 | goto out; |
972 | 997 | ||
@@ -981,10 +1006,10 @@ static void perf_event_enable(struct perf_event *event) | |||
981 | event->state = PERF_EVENT_STATE_OFF; | 1006 | event->state = PERF_EVENT_STATE_OFF; |
982 | 1007 | ||
983 | retry: | 1008 | retry: |
984 | spin_unlock_irq(&ctx->lock); | 1009 | raw_spin_unlock_irq(&ctx->lock); |
985 | task_oncpu_function_call(task, __perf_event_enable, event); | 1010 | task_oncpu_function_call(task, __perf_event_enable, event); |
986 | 1011 | ||
987 | spin_lock_irq(&ctx->lock); | 1012 | raw_spin_lock_irq(&ctx->lock); |
988 | 1013 | ||
989 | /* | 1014 | /* |
990 | * If the context is active and the event is still off, | 1015 | * If the context is active and the event is still off, |
@@ -1001,7 +1026,7 @@ static void perf_event_enable(struct perf_event *event) | |||
1001 | __perf_event_mark_enabled(event, ctx); | 1026 | __perf_event_mark_enabled(event, ctx); |
1002 | 1027 | ||
1003 | out: | 1028 | out: |
1004 | spin_unlock_irq(&ctx->lock); | 1029 | raw_spin_unlock_irq(&ctx->lock); |
1005 | } | 1030 | } |
1006 | 1031 | ||
1007 | static int perf_event_refresh(struct perf_event *event, int refresh) | 1032 | static int perf_event_refresh(struct perf_event *event, int refresh) |
@@ -1023,7 +1048,7 @@ void __perf_event_sched_out(struct perf_event_context *ctx, | |||
1023 | { | 1048 | { |
1024 | struct perf_event *event; | 1049 | struct perf_event *event; |
1025 | 1050 | ||
1026 | spin_lock(&ctx->lock); | 1051 | raw_spin_lock(&ctx->lock); |
1027 | ctx->is_active = 0; | 1052 | ctx->is_active = 0; |
1028 | if (likely(!ctx->nr_events)) | 1053 | if (likely(!ctx->nr_events)) |
1029 | goto out; | 1054 | goto out; |
@@ -1031,16 +1056,12 @@ void __perf_event_sched_out(struct perf_event_context *ctx, | |||
1031 | 1056 | ||
1032 | perf_disable(); | 1057 | perf_disable(); |
1033 | if (ctx->nr_active) { | 1058 | if (ctx->nr_active) { |
1034 | list_for_each_entry(event, &ctx->group_list, group_entry) { | 1059 | list_for_each_entry(event, &ctx->group_list, group_entry) |
1035 | if (event != event->group_leader) | 1060 | group_sched_out(event, cpuctx, ctx); |
1036 | event_sched_out(event, cpuctx, ctx); | ||
1037 | else | ||
1038 | group_sched_out(event, cpuctx, ctx); | ||
1039 | } | ||
1040 | } | 1061 | } |
1041 | perf_enable(); | 1062 | perf_enable(); |
1042 | out: | 1063 | out: |
1043 | spin_unlock(&ctx->lock); | 1064 | raw_spin_unlock(&ctx->lock); |
1044 | } | 1065 | } |
1045 | 1066 | ||
1046 | /* | 1067 | /* |
@@ -1062,8 +1083,6 @@ static int context_equiv(struct perf_event_context *ctx1, | |||
1062 | && !ctx1->pin_count && !ctx2->pin_count; | 1083 | && !ctx1->pin_count && !ctx2->pin_count; |
1063 | } | 1084 | } |
1064 | 1085 | ||
1065 | static void __perf_event_read(void *event); | ||
1066 | |||
1067 | static void __perf_event_sync_stat(struct perf_event *event, | 1086 | static void __perf_event_sync_stat(struct perf_event *event, |
1068 | struct perf_event *next_event) | 1087 | struct perf_event *next_event) |
1069 | { | 1088 | { |
@@ -1081,8 +1100,8 @@ static void __perf_event_sync_stat(struct perf_event *event, | |||
1081 | */ | 1100 | */ |
1082 | switch (event->state) { | 1101 | switch (event->state) { |
1083 | case PERF_EVENT_STATE_ACTIVE: | 1102 | case PERF_EVENT_STATE_ACTIVE: |
1084 | __perf_event_read(event); | 1103 | event->pmu->read(event); |
1085 | break; | 1104 | /* fall-through */ |
1086 | 1105 | ||
1087 | case PERF_EVENT_STATE_INACTIVE: | 1106 | case PERF_EVENT_STATE_INACTIVE: |
1088 | update_event_times(event); | 1107 | update_event_times(event); |
@@ -1121,6 +1140,8 @@ static void perf_event_sync_stat(struct perf_event_context *ctx, | |||
1121 | if (!ctx->nr_stat) | 1140 | if (!ctx->nr_stat) |
1122 | return; | 1141 | return; |
1123 | 1142 | ||
1143 | update_context_time(ctx); | ||
1144 | |||
1124 | event = list_first_entry(&ctx->event_list, | 1145 | event = list_first_entry(&ctx->event_list, |
1125 | struct perf_event, event_entry); | 1146 | struct perf_event, event_entry); |
1126 | 1147 | ||
@@ -1164,8 +1185,6 @@ void perf_event_task_sched_out(struct task_struct *task, | |||
1164 | if (likely(!ctx || !cpuctx->task_ctx)) | 1185 | if (likely(!ctx || !cpuctx->task_ctx)) |
1165 | return; | 1186 | return; |
1166 | 1187 | ||
1167 | update_context_time(ctx); | ||
1168 | |||
1169 | rcu_read_lock(); | 1188 | rcu_read_lock(); |
1170 | parent = rcu_dereference(ctx->parent_ctx); | 1189 | parent = rcu_dereference(ctx->parent_ctx); |
1171 | next_ctx = next->perf_event_ctxp; | 1190 | next_ctx = next->perf_event_ctxp; |
@@ -1180,8 +1199,8 @@ void perf_event_task_sched_out(struct task_struct *task, | |||
1180 | * order we take the locks because no other cpu could | 1199 | * order we take the locks because no other cpu could |
1181 | * be trying to lock both of these tasks. | 1200 | * be trying to lock both of these tasks. |
1182 | */ | 1201 | */ |
1183 | spin_lock(&ctx->lock); | 1202 | raw_spin_lock(&ctx->lock); |
1184 | spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING); | 1203 | raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING); |
1185 | if (context_equiv(ctx, next_ctx)) { | 1204 | if (context_equiv(ctx, next_ctx)) { |
1186 | /* | 1205 | /* |
1187 | * XXX do we need a memory barrier of sorts | 1206 | * XXX do we need a memory barrier of sorts |
@@ -1195,8 +1214,8 @@ void perf_event_task_sched_out(struct task_struct *task, | |||
1195 | 1214 | ||
1196 | perf_event_sync_stat(ctx, next_ctx); | 1215 | perf_event_sync_stat(ctx, next_ctx); |
1197 | } | 1216 | } |
1198 | spin_unlock(&next_ctx->lock); | 1217 | raw_spin_unlock(&next_ctx->lock); |
1199 | spin_unlock(&ctx->lock); | 1218 | raw_spin_unlock(&ctx->lock); |
1200 | } | 1219 | } |
1201 | rcu_read_unlock(); | 1220 | rcu_read_unlock(); |
1202 | 1221 | ||
@@ -1238,7 +1257,7 @@ __perf_event_sched_in(struct perf_event_context *ctx, | |||
1238 | struct perf_event *event; | 1257 | struct perf_event *event; |
1239 | int can_add_hw = 1; | 1258 | int can_add_hw = 1; |
1240 | 1259 | ||
1241 | spin_lock(&ctx->lock); | 1260 | raw_spin_lock(&ctx->lock); |
1242 | ctx->is_active = 1; | 1261 | ctx->is_active = 1; |
1243 | if (likely(!ctx->nr_events)) | 1262 | if (likely(!ctx->nr_events)) |
1244 | goto out; | 1263 | goto out; |
@@ -1258,12 +1277,8 @@ __perf_event_sched_in(struct perf_event_context *ctx, | |||
1258 | if (event->cpu != -1 && event->cpu != cpu) | 1277 | if (event->cpu != -1 && event->cpu != cpu) |
1259 | continue; | 1278 | continue; |
1260 | 1279 | ||
1261 | if (event != event->group_leader) | 1280 | if (group_can_go_on(event, cpuctx, 1)) |
1262 | event_sched_in(event, cpuctx, ctx, cpu); | 1281 | group_sched_in(event, cpuctx, ctx, cpu); |
1263 | else { | ||
1264 | if (group_can_go_on(event, cpuctx, 1)) | ||
1265 | group_sched_in(event, cpuctx, ctx, cpu); | ||
1266 | } | ||
1267 | 1282 | ||
1268 | /* | 1283 | /* |
1269 | * If this pinned group hasn't been scheduled, | 1284 | * If this pinned group hasn't been scheduled, |
@@ -1291,19 +1306,13 @@ __perf_event_sched_in(struct perf_event_context *ctx, | |||
1291 | if (event->cpu != -1 && event->cpu != cpu) | 1306 | if (event->cpu != -1 && event->cpu != cpu) |
1292 | continue; | 1307 | continue; |
1293 | 1308 | ||
1294 | if (event != event->group_leader) { | 1309 | if (group_can_go_on(event, cpuctx, can_add_hw)) |
1295 | if (event_sched_in(event, cpuctx, ctx, cpu)) | 1310 | if (group_sched_in(event, cpuctx, ctx, cpu)) |
1296 | can_add_hw = 0; | 1311 | can_add_hw = 0; |
1297 | } else { | ||
1298 | if (group_can_go_on(event, cpuctx, can_add_hw)) { | ||
1299 | if (group_sched_in(event, cpuctx, ctx, cpu)) | ||
1300 | can_add_hw = 0; | ||
1301 | } | ||
1302 | } | ||
1303 | } | 1312 | } |
1304 | perf_enable(); | 1313 | perf_enable(); |
1305 | out: | 1314 | out: |
1306 | spin_unlock(&ctx->lock); | 1315 | raw_spin_unlock(&ctx->lock); |
1307 | } | 1316 | } |
1308 | 1317 | ||
1309 | /* | 1318 | /* |
@@ -1367,11 +1376,14 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) | |||
1367 | struct hw_perf_event *hwc; | 1376 | struct hw_perf_event *hwc; |
1368 | u64 interrupts, freq; | 1377 | u64 interrupts, freq; |
1369 | 1378 | ||
1370 | spin_lock(&ctx->lock); | 1379 | raw_spin_lock(&ctx->lock); |
1371 | list_for_each_entry(event, &ctx->group_list, group_entry) { | 1380 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { |
1372 | if (event->state != PERF_EVENT_STATE_ACTIVE) | 1381 | if (event->state != PERF_EVENT_STATE_ACTIVE) |
1373 | continue; | 1382 | continue; |
1374 | 1383 | ||
1384 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | ||
1385 | continue; | ||
1386 | |||
1375 | hwc = &event->hw; | 1387 | hwc = &event->hw; |
1376 | 1388 | ||
1377 | interrupts = hwc->interrupts; | 1389 | interrupts = hwc->interrupts; |
@@ -1422,7 +1434,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) | |||
1422 | perf_enable(); | 1434 | perf_enable(); |
1423 | } | 1435 | } |
1424 | } | 1436 | } |
1425 | spin_unlock(&ctx->lock); | 1437 | raw_spin_unlock(&ctx->lock); |
1426 | } | 1438 | } |
1427 | 1439 | ||
1428 | /* | 1440 | /* |
@@ -1435,7 +1447,7 @@ static void rotate_ctx(struct perf_event_context *ctx) | |||
1435 | if (!ctx->nr_events) | 1447 | if (!ctx->nr_events) |
1436 | return; | 1448 | return; |
1437 | 1449 | ||
1438 | spin_lock(&ctx->lock); | 1450 | raw_spin_lock(&ctx->lock); |
1439 | /* | 1451 | /* |
1440 | * Rotate the first entry last (works just fine for group events too): | 1452 | * Rotate the first entry last (works just fine for group events too): |
1441 | */ | 1453 | */ |
@@ -1446,7 +1458,7 @@ static void rotate_ctx(struct perf_event_context *ctx) | |||
1446 | } | 1458 | } |
1447 | perf_enable(); | 1459 | perf_enable(); |
1448 | 1460 | ||
1449 | spin_unlock(&ctx->lock); | 1461 | raw_spin_unlock(&ctx->lock); |
1450 | } | 1462 | } |
1451 | 1463 | ||
1452 | void perf_event_task_tick(struct task_struct *curr, int cpu) | 1464 | void perf_event_task_tick(struct task_struct *curr, int cpu) |
@@ -1495,7 +1507,7 @@ static void perf_event_enable_on_exec(struct task_struct *task) | |||
1495 | 1507 | ||
1496 | __perf_event_task_sched_out(ctx); | 1508 | __perf_event_task_sched_out(ctx); |
1497 | 1509 | ||
1498 | spin_lock(&ctx->lock); | 1510 | raw_spin_lock(&ctx->lock); |
1499 | 1511 | ||
1500 | list_for_each_entry(event, &ctx->group_list, group_entry) { | 1512 | list_for_each_entry(event, &ctx->group_list, group_entry) { |
1501 | if (!event->attr.enable_on_exec) | 1513 | if (!event->attr.enable_on_exec) |
@@ -1513,7 +1525,7 @@ static void perf_event_enable_on_exec(struct task_struct *task) | |||
1513 | if (enabled) | 1525 | if (enabled) |
1514 | unclone_ctx(ctx); | 1526 | unclone_ctx(ctx); |
1515 | 1527 | ||
1516 | spin_unlock(&ctx->lock); | 1528 | raw_spin_unlock(&ctx->lock); |
1517 | 1529 | ||
1518 | perf_event_task_sched_in(task, smp_processor_id()); | 1530 | perf_event_task_sched_in(task, smp_processor_id()); |
1519 | out: | 1531 | out: |
@@ -1528,7 +1540,6 @@ static void __perf_event_read(void *info) | |||
1528 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | 1540 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); |
1529 | struct perf_event *event = info; | 1541 | struct perf_event *event = info; |
1530 | struct perf_event_context *ctx = event->ctx; | 1542 | struct perf_event_context *ctx = event->ctx; |
1531 | unsigned long flags; | ||
1532 | 1543 | ||
1533 | /* | 1544 | /* |
1534 | * If this is a task context, we need to check whether it is | 1545 | * If this is a task context, we need to check whether it is |
@@ -1540,12 +1551,12 @@ static void __perf_event_read(void *info) | |||
1540 | if (ctx->task && cpuctx->task_ctx != ctx) | 1551 | if (ctx->task && cpuctx->task_ctx != ctx) |
1541 | return; | 1552 | return; |
1542 | 1553 | ||
1543 | local_irq_save(flags); | 1554 | raw_spin_lock(&ctx->lock); |
1544 | if (ctx->is_active) | 1555 | update_context_time(ctx); |
1545 | update_context_time(ctx); | ||
1546 | event->pmu->read(event); | ||
1547 | update_event_times(event); | 1556 | update_event_times(event); |
1548 | local_irq_restore(flags); | 1557 | raw_spin_unlock(&ctx->lock); |
1558 | |||
1559 | event->pmu->read(event); | ||
1549 | } | 1560 | } |
1550 | 1561 | ||
1551 | static u64 perf_event_read(struct perf_event *event) | 1562 | static u64 perf_event_read(struct perf_event *event) |
@@ -1558,7 +1569,13 @@ static u64 perf_event_read(struct perf_event *event) | |||
1558 | smp_call_function_single(event->oncpu, | 1569 | smp_call_function_single(event->oncpu, |
1559 | __perf_event_read, event, 1); | 1570 | __perf_event_read, event, 1); |
1560 | } else if (event->state == PERF_EVENT_STATE_INACTIVE) { | 1571 | } else if (event->state == PERF_EVENT_STATE_INACTIVE) { |
1572 | struct perf_event_context *ctx = event->ctx; | ||
1573 | unsigned long flags; | ||
1574 | |||
1575 | raw_spin_lock_irqsave(&ctx->lock, flags); | ||
1576 | update_context_time(ctx); | ||
1561 | update_event_times(event); | 1577 | update_event_times(event); |
1578 | raw_spin_unlock_irqrestore(&ctx->lock, flags); | ||
1562 | } | 1579 | } |
1563 | 1580 | ||
1564 | return atomic64_read(&event->count); | 1581 | return atomic64_read(&event->count); |
@@ -1571,8 +1588,7 @@ static void | |||
1571 | __perf_event_init_context(struct perf_event_context *ctx, | 1588 | __perf_event_init_context(struct perf_event_context *ctx, |
1572 | struct task_struct *task) | 1589 | struct task_struct *task) |
1573 | { | 1590 | { |
1574 | memset(ctx, 0, sizeof(*ctx)); | 1591 | raw_spin_lock_init(&ctx->lock); |
1575 | spin_lock_init(&ctx->lock); | ||
1576 | mutex_init(&ctx->mutex); | 1592 | mutex_init(&ctx->mutex); |
1577 | INIT_LIST_HEAD(&ctx->group_list); | 1593 | INIT_LIST_HEAD(&ctx->group_list); |
1578 | INIT_LIST_HEAD(&ctx->event_list); | 1594 | INIT_LIST_HEAD(&ctx->event_list); |
@@ -1588,15 +1604,12 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu) | |||
1588 | unsigned long flags; | 1604 | unsigned long flags; |
1589 | int err; | 1605 | int err; |
1590 | 1606 | ||
1591 | /* | 1607 | if (pid == -1 && cpu != -1) { |
1592 | * If cpu is not a wildcard then this is a percpu event: | ||
1593 | */ | ||
1594 | if (cpu != -1) { | ||
1595 | /* Must be root to operate on a CPU event: */ | 1608 | /* Must be root to operate on a CPU event: */ |
1596 | if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) | 1609 | if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) |
1597 | return ERR_PTR(-EACCES); | 1610 | return ERR_PTR(-EACCES); |
1598 | 1611 | ||
1599 | if (cpu < 0 || cpu > num_possible_cpus()) | 1612 | if (cpu < 0 || cpu >= nr_cpumask_bits) |
1600 | return ERR_PTR(-EINVAL); | 1613 | return ERR_PTR(-EINVAL); |
1601 | 1614 | ||
1602 | /* | 1615 | /* |
@@ -1604,7 +1617,7 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu) | |||
1604 | * offline CPU and activate it when the CPU comes up, but | 1617 | * offline CPU and activate it when the CPU comes up, but |
1605 | * that's for later. | 1618 | * that's for later. |
1606 | */ | 1619 | */ |
1607 | if (!cpu_isset(cpu, cpu_online_map)) | 1620 | if (!cpu_online(cpu)) |
1608 | return ERR_PTR(-ENODEV); | 1621 | return ERR_PTR(-ENODEV); |
1609 | 1622 | ||
1610 | cpuctx = &per_cpu(perf_cpu_context, cpu); | 1623 | cpuctx = &per_cpu(perf_cpu_context, cpu); |
@@ -1642,11 +1655,11 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu) | |||
1642 | ctx = perf_lock_task_context(task, &flags); | 1655 | ctx = perf_lock_task_context(task, &flags); |
1643 | if (ctx) { | 1656 | if (ctx) { |
1644 | unclone_ctx(ctx); | 1657 | unclone_ctx(ctx); |
1645 | spin_unlock_irqrestore(&ctx->lock, flags); | 1658 | raw_spin_unlock_irqrestore(&ctx->lock, flags); |
1646 | } | 1659 | } |
1647 | 1660 | ||
1648 | if (!ctx) { | 1661 | if (!ctx) { |
1649 | ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL); | 1662 | ctx = kzalloc(sizeof(struct perf_event_context), GFP_KERNEL); |
1650 | err = -ENOMEM; | 1663 | err = -ENOMEM; |
1651 | if (!ctx) | 1664 | if (!ctx) |
1652 | goto errout; | 1665 | goto errout; |
@@ -1671,6 +1684,8 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu) | |||
1671 | return ERR_PTR(err); | 1684 | return ERR_PTR(err); |
1672 | } | 1685 | } |
1673 | 1686 | ||
1687 | static void perf_event_free_filter(struct perf_event *event); | ||
1688 | |||
1674 | static void free_event_rcu(struct rcu_head *head) | 1689 | static void free_event_rcu(struct rcu_head *head) |
1675 | { | 1690 | { |
1676 | struct perf_event *event; | 1691 | struct perf_event *event; |
@@ -1678,6 +1693,7 @@ static void free_event_rcu(struct rcu_head *head) | |||
1678 | event = container_of(head, struct perf_event, rcu_head); | 1693 | event = container_of(head, struct perf_event, rcu_head); |
1679 | if (event->ns) | 1694 | if (event->ns) |
1680 | put_pid_ns(event->ns); | 1695 | put_pid_ns(event->ns); |
1696 | perf_event_free_filter(event); | ||
1681 | kfree(event); | 1697 | kfree(event); |
1682 | } | 1698 | } |
1683 | 1699 | ||
@@ -1709,16 +1725,10 @@ static void free_event(struct perf_event *event) | |||
1709 | call_rcu(&event->rcu_head, free_event_rcu); | 1725 | call_rcu(&event->rcu_head, free_event_rcu); |
1710 | } | 1726 | } |
1711 | 1727 | ||
1712 | /* | 1728 | int perf_event_release_kernel(struct perf_event *event) |
1713 | * Called when the last reference to the file is gone. | ||
1714 | */ | ||
1715 | static int perf_release(struct inode *inode, struct file *file) | ||
1716 | { | 1729 | { |
1717 | struct perf_event *event = file->private_data; | ||
1718 | struct perf_event_context *ctx = event->ctx; | 1730 | struct perf_event_context *ctx = event->ctx; |
1719 | 1731 | ||
1720 | file->private_data = NULL; | ||
1721 | |||
1722 | WARN_ON_ONCE(ctx->parent_ctx); | 1732 | WARN_ON_ONCE(ctx->parent_ctx); |
1723 | mutex_lock(&ctx->mutex); | 1733 | mutex_lock(&ctx->mutex); |
1724 | perf_event_remove_from_context(event); | 1734 | perf_event_remove_from_context(event); |
@@ -1733,6 +1743,19 @@ static int perf_release(struct inode *inode, struct file *file) | |||
1733 | 1743 | ||
1734 | return 0; | 1744 | return 0; |
1735 | } | 1745 | } |
1746 | EXPORT_SYMBOL_GPL(perf_event_release_kernel); | ||
1747 | |||
1748 | /* | ||
1749 | * Called when the last reference to the file is gone. | ||
1750 | */ | ||
1751 | static int perf_release(struct inode *inode, struct file *file) | ||
1752 | { | ||
1753 | struct perf_event *event = file->private_data; | ||
1754 | |||
1755 | file->private_data = NULL; | ||
1756 | |||
1757 | return perf_event_release_kernel(event); | ||
1758 | } | ||
1736 | 1759 | ||
1737 | static int perf_event_read_size(struct perf_event *event) | 1760 | static int perf_event_read_size(struct perf_event *event) |
1738 | { | 1761 | { |
@@ -1759,91 +1782,94 @@ static int perf_event_read_size(struct perf_event *event) | |||
1759 | return size; | 1782 | return size; |
1760 | } | 1783 | } |
1761 | 1784 | ||
1762 | static u64 perf_event_read_value(struct perf_event *event) | 1785 | u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) |
1763 | { | 1786 | { |
1764 | struct perf_event *child; | 1787 | struct perf_event *child; |
1765 | u64 total = 0; | 1788 | u64 total = 0; |
1766 | 1789 | ||
1790 | *enabled = 0; | ||
1791 | *running = 0; | ||
1792 | |||
1793 | mutex_lock(&event->child_mutex); | ||
1767 | total += perf_event_read(event); | 1794 | total += perf_event_read(event); |
1768 | list_for_each_entry(child, &event->child_list, child_list) | 1795 | *enabled += event->total_time_enabled + |
1796 | atomic64_read(&event->child_total_time_enabled); | ||
1797 | *running += event->total_time_running + | ||
1798 | atomic64_read(&event->child_total_time_running); | ||
1799 | |||
1800 | list_for_each_entry(child, &event->child_list, child_list) { | ||
1769 | total += perf_event_read(child); | 1801 | total += perf_event_read(child); |
1802 | *enabled += child->total_time_enabled; | ||
1803 | *running += child->total_time_running; | ||
1804 | } | ||
1805 | mutex_unlock(&event->child_mutex); | ||
1770 | 1806 | ||
1771 | return total; | 1807 | return total; |
1772 | } | 1808 | } |
1773 | 1809 | EXPORT_SYMBOL_GPL(perf_event_read_value); | |
1774 | static int perf_event_read_entry(struct perf_event *event, | ||
1775 | u64 read_format, char __user *buf) | ||
1776 | { | ||
1777 | int n = 0, count = 0; | ||
1778 | u64 values[2]; | ||
1779 | |||
1780 | values[n++] = perf_event_read_value(event); | ||
1781 | if (read_format & PERF_FORMAT_ID) | ||
1782 | values[n++] = primary_event_id(event); | ||
1783 | |||
1784 | count = n * sizeof(u64); | ||
1785 | |||
1786 | if (copy_to_user(buf, values, count)) | ||
1787 | return -EFAULT; | ||
1788 | |||
1789 | return count; | ||
1790 | } | ||
1791 | 1810 | ||
1792 | static int perf_event_read_group(struct perf_event *event, | 1811 | static int perf_event_read_group(struct perf_event *event, |
1793 | u64 read_format, char __user *buf) | 1812 | u64 read_format, char __user *buf) |
1794 | { | 1813 | { |
1795 | struct perf_event *leader = event->group_leader, *sub; | 1814 | struct perf_event *leader = event->group_leader, *sub; |
1796 | int n = 0, size = 0, err = -EFAULT; | 1815 | int n = 0, size = 0, ret = -EFAULT; |
1797 | u64 values[3]; | 1816 | struct perf_event_context *ctx = leader->ctx; |
1817 | u64 values[5]; | ||
1818 | u64 count, enabled, running; | ||
1819 | |||
1820 | mutex_lock(&ctx->mutex); | ||
1821 | count = perf_event_read_value(leader, &enabled, &running); | ||
1798 | 1822 | ||
1799 | values[n++] = 1 + leader->nr_siblings; | 1823 | values[n++] = 1 + leader->nr_siblings; |
1800 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { | 1824 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) |
1801 | values[n++] = leader->total_time_enabled + | 1825 | values[n++] = enabled; |
1802 | atomic64_read(&leader->child_total_time_enabled); | 1826 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) |
1803 | } | 1827 | values[n++] = running; |
1804 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { | 1828 | values[n++] = count; |
1805 | values[n++] = leader->total_time_running + | 1829 | if (read_format & PERF_FORMAT_ID) |
1806 | atomic64_read(&leader->child_total_time_running); | 1830 | values[n++] = primary_event_id(leader); |
1807 | } | ||
1808 | 1831 | ||
1809 | size = n * sizeof(u64); | 1832 | size = n * sizeof(u64); |
1810 | 1833 | ||
1811 | if (copy_to_user(buf, values, size)) | 1834 | if (copy_to_user(buf, values, size)) |
1812 | return -EFAULT; | 1835 | goto unlock; |
1813 | |||
1814 | err = perf_event_read_entry(leader, read_format, buf + size); | ||
1815 | if (err < 0) | ||
1816 | return err; | ||
1817 | 1836 | ||
1818 | size += err; | 1837 | ret = size; |
1819 | 1838 | ||
1820 | list_for_each_entry(sub, &leader->sibling_list, group_entry) { | 1839 | list_for_each_entry(sub, &leader->sibling_list, group_entry) { |
1821 | err = perf_event_read_entry(sub, read_format, | 1840 | n = 0; |
1822 | buf + size); | ||
1823 | if (err < 0) | ||
1824 | return err; | ||
1825 | 1841 | ||
1826 | size += err; | 1842 | values[n++] = perf_event_read_value(sub, &enabled, &running); |
1843 | if (read_format & PERF_FORMAT_ID) | ||
1844 | values[n++] = primary_event_id(sub); | ||
1845 | |||
1846 | size = n * sizeof(u64); | ||
1847 | |||
1848 | if (copy_to_user(buf + ret, values, size)) { | ||
1849 | ret = -EFAULT; | ||
1850 | goto unlock; | ||
1851 | } | ||
1852 | |||
1853 | ret += size; | ||
1827 | } | 1854 | } |
1855 | unlock: | ||
1856 | mutex_unlock(&ctx->mutex); | ||
1828 | 1857 | ||
1829 | return size; | 1858 | return ret; |
1830 | } | 1859 | } |
1831 | 1860 | ||
1832 | static int perf_event_read_one(struct perf_event *event, | 1861 | static int perf_event_read_one(struct perf_event *event, |
1833 | u64 read_format, char __user *buf) | 1862 | u64 read_format, char __user *buf) |
1834 | { | 1863 | { |
1864 | u64 enabled, running; | ||
1835 | u64 values[4]; | 1865 | u64 values[4]; |
1836 | int n = 0; | 1866 | int n = 0; |
1837 | 1867 | ||
1838 | values[n++] = perf_event_read_value(event); | 1868 | values[n++] = perf_event_read_value(event, &enabled, &running); |
1839 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { | 1869 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) |
1840 | values[n++] = event->total_time_enabled + | 1870 | values[n++] = enabled; |
1841 | atomic64_read(&event->child_total_time_enabled); | 1871 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) |
1842 | } | 1872 | values[n++] = running; |
1843 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { | ||
1844 | values[n++] = event->total_time_running + | ||
1845 | atomic64_read(&event->child_total_time_running); | ||
1846 | } | ||
1847 | if (read_format & PERF_FORMAT_ID) | 1873 | if (read_format & PERF_FORMAT_ID) |
1848 | values[n++] = primary_event_id(event); | 1874 | values[n++] = primary_event_id(event); |
1849 | 1875 | ||
@@ -1874,12 +1900,10 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count) | |||
1874 | return -ENOSPC; | 1900 | return -ENOSPC; |
1875 | 1901 | ||
1876 | WARN_ON_ONCE(event->ctx->parent_ctx); | 1902 | WARN_ON_ONCE(event->ctx->parent_ctx); |
1877 | mutex_lock(&event->child_mutex); | ||
1878 | if (read_format & PERF_FORMAT_GROUP) | 1903 | if (read_format & PERF_FORMAT_GROUP) |
1879 | ret = perf_event_read_group(event, read_format, buf); | 1904 | ret = perf_event_read_group(event, read_format, buf); |
1880 | else | 1905 | else |
1881 | ret = perf_event_read_one(event, read_format, buf); | 1906 | ret = perf_event_read_one(event, read_format, buf); |
1882 | mutex_unlock(&event->child_mutex); | ||
1883 | 1907 | ||
1884 | return ret; | 1908 | return ret; |
1885 | } | 1909 | } |
@@ -1969,7 +1993,7 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg) | |||
1969 | if (!value) | 1993 | if (!value) |
1970 | return -EINVAL; | 1994 | return -EINVAL; |
1971 | 1995 | ||
1972 | spin_lock_irq(&ctx->lock); | 1996 | raw_spin_lock_irq(&ctx->lock); |
1973 | if (event->attr.freq) { | 1997 | if (event->attr.freq) { |
1974 | if (value > sysctl_perf_event_sample_rate) { | 1998 | if (value > sysctl_perf_event_sample_rate) { |
1975 | ret = -EINVAL; | 1999 | ret = -EINVAL; |
@@ -1982,12 +2006,13 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg) | |||
1982 | event->hw.sample_period = value; | 2006 | event->hw.sample_period = value; |
1983 | } | 2007 | } |
1984 | unlock: | 2008 | unlock: |
1985 | spin_unlock_irq(&ctx->lock); | 2009 | raw_spin_unlock_irq(&ctx->lock); |
1986 | 2010 | ||
1987 | return ret; | 2011 | return ret; |
1988 | } | 2012 | } |
1989 | 2013 | ||
1990 | int perf_event_set_output(struct perf_event *event, int output_fd); | 2014 | static int perf_event_set_output(struct perf_event *event, int output_fd); |
2015 | static int perf_event_set_filter(struct perf_event *event, void __user *arg); | ||
1991 | 2016 | ||
1992 | static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | 2017 | static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) |
1993 | { | 2018 | { |
@@ -2015,6 +2040,9 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
2015 | case PERF_EVENT_IOC_SET_OUTPUT: | 2040 | case PERF_EVENT_IOC_SET_OUTPUT: |
2016 | return perf_event_set_output(event, arg); | 2041 | return perf_event_set_output(event, arg); |
2017 | 2042 | ||
2043 | case PERF_EVENT_IOC_SET_FILTER: | ||
2044 | return perf_event_set_filter(event, (void __user *)arg); | ||
2045 | |||
2018 | default: | 2046 | default: |
2019 | return -ENOTTY; | 2047 | return -ENOTTY; |
2020 | } | 2048 | } |
@@ -2105,49 +2133,31 @@ unlock: | |||
2105 | rcu_read_unlock(); | 2133 | rcu_read_unlock(); |
2106 | } | 2134 | } |
2107 | 2135 | ||
2108 | static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | 2136 | static unsigned long perf_data_size(struct perf_mmap_data *data) |
2109 | { | 2137 | { |
2110 | struct perf_event *event = vma->vm_file->private_data; | 2138 | return data->nr_pages << (PAGE_SHIFT + data->data_order); |
2111 | struct perf_mmap_data *data; | 2139 | } |
2112 | int ret = VM_FAULT_SIGBUS; | ||
2113 | |||
2114 | if (vmf->flags & FAULT_FLAG_MKWRITE) { | ||
2115 | if (vmf->pgoff == 0) | ||
2116 | ret = 0; | ||
2117 | return ret; | ||
2118 | } | ||
2119 | |||
2120 | rcu_read_lock(); | ||
2121 | data = rcu_dereference(event->data); | ||
2122 | if (!data) | ||
2123 | goto unlock; | ||
2124 | |||
2125 | if (vmf->pgoff == 0) { | ||
2126 | vmf->page = virt_to_page(data->user_page); | ||
2127 | } else { | ||
2128 | int nr = vmf->pgoff - 1; | ||
2129 | |||
2130 | if ((unsigned)nr > data->nr_pages) | ||
2131 | goto unlock; | ||
2132 | 2140 | ||
2133 | if (vmf->flags & FAULT_FLAG_WRITE) | 2141 | #ifndef CONFIG_PERF_USE_VMALLOC |
2134 | goto unlock; | ||
2135 | 2142 | ||
2136 | vmf->page = virt_to_page(data->data_pages[nr]); | 2143 | /* |
2137 | } | 2144 | * Back perf_mmap() with regular GFP_KERNEL-0 pages. |
2145 | */ | ||
2138 | 2146 | ||
2139 | get_page(vmf->page); | 2147 | static struct page * |
2140 | vmf->page->mapping = vma->vm_file->f_mapping; | 2148 | perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff) |
2141 | vmf->page->index = vmf->pgoff; | 2149 | { |
2150 | if (pgoff > data->nr_pages) | ||
2151 | return NULL; | ||
2142 | 2152 | ||
2143 | ret = 0; | 2153 | if (pgoff == 0) |
2144 | unlock: | 2154 | return virt_to_page(data->user_page); |
2145 | rcu_read_unlock(); | ||
2146 | 2155 | ||
2147 | return ret; | 2156 | return virt_to_page(data->data_pages[pgoff - 1]); |
2148 | } | 2157 | } |
2149 | 2158 | ||
2150 | static int perf_mmap_data_alloc(struct perf_event *event, int nr_pages) | 2159 | static struct perf_mmap_data * |
2160 | perf_mmap_data_alloc(struct perf_event *event, int nr_pages) | ||
2151 | { | 2161 | { |
2152 | struct perf_mmap_data *data; | 2162 | struct perf_mmap_data *data; |
2153 | unsigned long size; | 2163 | unsigned long size; |
@@ -2172,19 +2182,10 @@ static int perf_mmap_data_alloc(struct perf_event *event, int nr_pages) | |||
2172 | goto fail_data_pages; | 2182 | goto fail_data_pages; |
2173 | } | 2183 | } |
2174 | 2184 | ||
2185 | data->data_order = 0; | ||
2175 | data->nr_pages = nr_pages; | 2186 | data->nr_pages = nr_pages; |
2176 | atomic_set(&data->lock, -1); | ||
2177 | 2187 | ||
2178 | if (event->attr.watermark) { | 2188 | return data; |
2179 | data->watermark = min_t(long, PAGE_SIZE * nr_pages, | ||
2180 | event->attr.wakeup_watermark); | ||
2181 | } | ||
2182 | if (!data->watermark) | ||
2183 | data->watermark = max(PAGE_SIZE, PAGE_SIZE * nr_pages / 4); | ||
2184 | |||
2185 | rcu_assign_pointer(event->data, data); | ||
2186 | |||
2187 | return 0; | ||
2188 | 2189 | ||
2189 | fail_data_pages: | 2190 | fail_data_pages: |
2190 | for (i--; i >= 0; i--) | 2191 | for (i--; i >= 0; i--) |
@@ -2196,7 +2197,7 @@ fail_user_page: | |||
2196 | kfree(data); | 2197 | kfree(data); |
2197 | 2198 | ||
2198 | fail: | 2199 | fail: |
2199 | return -ENOMEM; | 2200 | return NULL; |
2200 | } | 2201 | } |
2201 | 2202 | ||
2202 | static void perf_mmap_free_page(unsigned long addr) | 2203 | static void perf_mmap_free_page(unsigned long addr) |
@@ -2207,28 +2208,170 @@ static void perf_mmap_free_page(unsigned long addr) | |||
2207 | __free_page(page); | 2208 | __free_page(page); |
2208 | } | 2209 | } |
2209 | 2210 | ||
2210 | static void __perf_mmap_data_free(struct rcu_head *rcu_head) | 2211 | static void perf_mmap_data_free(struct perf_mmap_data *data) |
2211 | { | 2212 | { |
2212 | struct perf_mmap_data *data; | ||
2213 | int i; | 2213 | int i; |
2214 | 2214 | ||
2215 | data = container_of(rcu_head, struct perf_mmap_data, rcu_head); | ||
2216 | |||
2217 | perf_mmap_free_page((unsigned long)data->user_page); | 2215 | perf_mmap_free_page((unsigned long)data->user_page); |
2218 | for (i = 0; i < data->nr_pages; i++) | 2216 | for (i = 0; i < data->nr_pages; i++) |
2219 | perf_mmap_free_page((unsigned long)data->data_pages[i]); | 2217 | perf_mmap_free_page((unsigned long)data->data_pages[i]); |
2218 | kfree(data); | ||
2219 | } | ||
2220 | |||
2221 | #else | ||
2220 | 2222 | ||
2223 | /* | ||
2224 | * Back perf_mmap() with vmalloc memory. | ||
2225 | * | ||
2226 | * Required for architectures that have d-cache aliasing issues. | ||
2227 | */ | ||
2228 | |||
2229 | static struct page * | ||
2230 | perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff) | ||
2231 | { | ||
2232 | if (pgoff > (1UL << data->data_order)) | ||
2233 | return NULL; | ||
2234 | |||
2235 | return vmalloc_to_page((void *)data->user_page + pgoff * PAGE_SIZE); | ||
2236 | } | ||
2237 | |||
2238 | static void perf_mmap_unmark_page(void *addr) | ||
2239 | { | ||
2240 | struct page *page = vmalloc_to_page(addr); | ||
2241 | |||
2242 | page->mapping = NULL; | ||
2243 | } | ||
2244 | |||
2245 | static void perf_mmap_data_free_work(struct work_struct *work) | ||
2246 | { | ||
2247 | struct perf_mmap_data *data; | ||
2248 | void *base; | ||
2249 | int i, nr; | ||
2250 | |||
2251 | data = container_of(work, struct perf_mmap_data, work); | ||
2252 | nr = 1 << data->data_order; | ||
2253 | |||
2254 | base = data->user_page; | ||
2255 | for (i = 0; i < nr + 1; i++) | ||
2256 | perf_mmap_unmark_page(base + (i * PAGE_SIZE)); | ||
2257 | |||
2258 | vfree(base); | ||
2221 | kfree(data); | 2259 | kfree(data); |
2222 | } | 2260 | } |
2223 | 2261 | ||
2224 | static void perf_mmap_data_free(struct perf_event *event) | 2262 | static void perf_mmap_data_free(struct perf_mmap_data *data) |
2263 | { | ||
2264 | schedule_work(&data->work); | ||
2265 | } | ||
2266 | |||
2267 | static struct perf_mmap_data * | ||
2268 | perf_mmap_data_alloc(struct perf_event *event, int nr_pages) | ||
2269 | { | ||
2270 | struct perf_mmap_data *data; | ||
2271 | unsigned long size; | ||
2272 | void *all_buf; | ||
2273 | |||
2274 | WARN_ON(atomic_read(&event->mmap_count)); | ||
2275 | |||
2276 | size = sizeof(struct perf_mmap_data); | ||
2277 | size += sizeof(void *); | ||
2278 | |||
2279 | data = kzalloc(size, GFP_KERNEL); | ||
2280 | if (!data) | ||
2281 | goto fail; | ||
2282 | |||
2283 | INIT_WORK(&data->work, perf_mmap_data_free_work); | ||
2284 | |||
2285 | all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE); | ||
2286 | if (!all_buf) | ||
2287 | goto fail_all_buf; | ||
2288 | |||
2289 | data->user_page = all_buf; | ||
2290 | data->data_pages[0] = all_buf + PAGE_SIZE; | ||
2291 | data->data_order = ilog2(nr_pages); | ||
2292 | data->nr_pages = 1; | ||
2293 | |||
2294 | return data; | ||
2295 | |||
2296 | fail_all_buf: | ||
2297 | kfree(data); | ||
2298 | |||
2299 | fail: | ||
2300 | return NULL; | ||
2301 | } | ||
2302 | |||
2303 | #endif | ||
2304 | |||
2305 | static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | ||
2306 | { | ||
2307 | struct perf_event *event = vma->vm_file->private_data; | ||
2308 | struct perf_mmap_data *data; | ||
2309 | int ret = VM_FAULT_SIGBUS; | ||
2310 | |||
2311 | if (vmf->flags & FAULT_FLAG_MKWRITE) { | ||
2312 | if (vmf->pgoff == 0) | ||
2313 | ret = 0; | ||
2314 | return ret; | ||
2315 | } | ||
2316 | |||
2317 | rcu_read_lock(); | ||
2318 | data = rcu_dereference(event->data); | ||
2319 | if (!data) | ||
2320 | goto unlock; | ||
2321 | |||
2322 | if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE)) | ||
2323 | goto unlock; | ||
2324 | |||
2325 | vmf->page = perf_mmap_to_page(data, vmf->pgoff); | ||
2326 | if (!vmf->page) | ||
2327 | goto unlock; | ||
2328 | |||
2329 | get_page(vmf->page); | ||
2330 | vmf->page->mapping = vma->vm_file->f_mapping; | ||
2331 | vmf->page->index = vmf->pgoff; | ||
2332 | |||
2333 | ret = 0; | ||
2334 | unlock: | ||
2335 | rcu_read_unlock(); | ||
2336 | |||
2337 | return ret; | ||
2338 | } | ||
2339 | |||
2340 | static void | ||
2341 | perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data) | ||
2342 | { | ||
2343 | long max_size = perf_data_size(data); | ||
2344 | |||
2345 | atomic_set(&data->lock, -1); | ||
2346 | |||
2347 | if (event->attr.watermark) { | ||
2348 | data->watermark = min_t(long, max_size, | ||
2349 | event->attr.wakeup_watermark); | ||
2350 | } | ||
2351 | |||
2352 | if (!data->watermark) | ||
2353 | data->watermark = max_size / 2; | ||
2354 | |||
2355 | |||
2356 | rcu_assign_pointer(event->data, data); | ||
2357 | } | ||
2358 | |||
2359 | static void perf_mmap_data_free_rcu(struct rcu_head *rcu_head) | ||
2360 | { | ||
2361 | struct perf_mmap_data *data; | ||
2362 | |||
2363 | data = container_of(rcu_head, struct perf_mmap_data, rcu_head); | ||
2364 | perf_mmap_data_free(data); | ||
2365 | } | ||
2366 | |||
2367 | static void perf_mmap_data_release(struct perf_event *event) | ||
2225 | { | 2368 | { |
2226 | struct perf_mmap_data *data = event->data; | 2369 | struct perf_mmap_data *data = event->data; |
2227 | 2370 | ||
2228 | WARN_ON(atomic_read(&event->mmap_count)); | 2371 | WARN_ON(atomic_read(&event->mmap_count)); |
2229 | 2372 | ||
2230 | rcu_assign_pointer(event->data, NULL); | 2373 | rcu_assign_pointer(event->data, NULL); |
2231 | call_rcu(&data->rcu_head, __perf_mmap_data_free); | 2374 | call_rcu(&data->rcu_head, perf_mmap_data_free_rcu); |
2232 | } | 2375 | } |
2233 | 2376 | ||
2234 | static void perf_mmap_open(struct vm_area_struct *vma) | 2377 | static void perf_mmap_open(struct vm_area_struct *vma) |
@@ -2244,11 +2387,12 @@ static void perf_mmap_close(struct vm_area_struct *vma) | |||
2244 | 2387 | ||
2245 | WARN_ON_ONCE(event->ctx->parent_ctx); | 2388 | WARN_ON_ONCE(event->ctx->parent_ctx); |
2246 | if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) { | 2389 | if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) { |
2390 | unsigned long size = perf_data_size(event->data); | ||
2247 | struct user_struct *user = current_user(); | 2391 | struct user_struct *user = current_user(); |
2248 | 2392 | ||
2249 | atomic_long_sub(event->data->nr_pages + 1, &user->locked_vm); | 2393 | atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm); |
2250 | vma->vm_mm->locked_vm -= event->data->nr_locked; | 2394 | vma->vm_mm->locked_vm -= event->data->nr_locked; |
2251 | perf_mmap_data_free(event); | 2395 | perf_mmap_data_release(event); |
2252 | mutex_unlock(&event->mmap_mutex); | 2396 | mutex_unlock(&event->mmap_mutex); |
2253 | } | 2397 | } |
2254 | } | 2398 | } |
@@ -2266,6 +2410,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
2266 | unsigned long user_locked, user_lock_limit; | 2410 | unsigned long user_locked, user_lock_limit; |
2267 | struct user_struct *user = current_user(); | 2411 | struct user_struct *user = current_user(); |
2268 | unsigned long locked, lock_limit; | 2412 | unsigned long locked, lock_limit; |
2413 | struct perf_mmap_data *data; | ||
2269 | unsigned long vma_size; | 2414 | unsigned long vma_size; |
2270 | unsigned long nr_pages; | 2415 | unsigned long nr_pages; |
2271 | long user_extra, extra; | 2416 | long user_extra, extra; |
@@ -2328,10 +2473,15 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
2328 | } | 2473 | } |
2329 | 2474 | ||
2330 | WARN_ON(event->data); | 2475 | WARN_ON(event->data); |
2331 | ret = perf_mmap_data_alloc(event, nr_pages); | 2476 | |
2332 | if (ret) | 2477 | data = perf_mmap_data_alloc(event, nr_pages); |
2478 | ret = -ENOMEM; | ||
2479 | if (!data) | ||
2333 | goto unlock; | 2480 | goto unlock; |
2334 | 2481 | ||
2482 | ret = 0; | ||
2483 | perf_mmap_data_init(event, data); | ||
2484 | |||
2335 | atomic_set(&event->mmap_count, 1); | 2485 | atomic_set(&event->mmap_count, 1); |
2336 | atomic_long_add(user_extra, &user->locked_vm); | 2486 | atomic_long_add(user_extra, &user->locked_vm); |
2337 | vma->vm_mm->locked_vm += extra; | 2487 | vma->vm_mm->locked_vm += extra; |
@@ -2519,7 +2669,7 @@ static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail, | |||
2519 | if (!data->writable) | 2669 | if (!data->writable) |
2520 | return true; | 2670 | return true; |
2521 | 2671 | ||
2522 | mask = (data->nr_pages << PAGE_SHIFT) - 1; | 2672 | mask = perf_data_size(data) - 1; |
2523 | 2673 | ||
2524 | offset = (offset - tail) & mask; | 2674 | offset = (offset - tail) & mask; |
2525 | head = (head - tail) & mask; | 2675 | head = (head - tail) & mask; |
@@ -2558,20 +2708,21 @@ static void perf_output_wakeup(struct perf_output_handle *handle) | |||
2558 | static void perf_output_lock(struct perf_output_handle *handle) | 2708 | static void perf_output_lock(struct perf_output_handle *handle) |
2559 | { | 2709 | { |
2560 | struct perf_mmap_data *data = handle->data; | 2710 | struct perf_mmap_data *data = handle->data; |
2561 | int cpu; | 2711 | int cur, cpu = get_cpu(); |
2562 | 2712 | ||
2563 | handle->locked = 0; | 2713 | handle->locked = 0; |
2564 | 2714 | ||
2565 | local_irq_save(handle->flags); | 2715 | for (;;) { |
2566 | cpu = smp_processor_id(); | 2716 | cur = atomic_cmpxchg(&data->lock, -1, cpu); |
2567 | 2717 | if (cur == -1) { | |
2568 | if (in_nmi() && atomic_read(&data->lock) == cpu) | 2718 | handle->locked = 1; |
2569 | return; | 2719 | break; |
2720 | } | ||
2721 | if (cur == cpu) | ||
2722 | break; | ||
2570 | 2723 | ||
2571 | while (atomic_cmpxchg(&data->lock, -1, cpu) != -1) | ||
2572 | cpu_relax(); | 2724 | cpu_relax(); |
2573 | 2725 | } | |
2574 | handle->locked = 1; | ||
2575 | } | 2726 | } |
2576 | 2727 | ||
2577 | static void perf_output_unlock(struct perf_output_handle *handle) | 2728 | static void perf_output_unlock(struct perf_output_handle *handle) |
@@ -2617,14 +2768,14 @@ again: | |||
2617 | if (atomic_xchg(&data->wakeup, 0)) | 2768 | if (atomic_xchg(&data->wakeup, 0)) |
2618 | perf_output_wakeup(handle); | 2769 | perf_output_wakeup(handle); |
2619 | out: | 2770 | out: |
2620 | local_irq_restore(handle->flags); | 2771 | put_cpu(); |
2621 | } | 2772 | } |
2622 | 2773 | ||
2623 | void perf_output_copy(struct perf_output_handle *handle, | 2774 | void perf_output_copy(struct perf_output_handle *handle, |
2624 | const void *buf, unsigned int len) | 2775 | const void *buf, unsigned int len) |
2625 | { | 2776 | { |
2626 | unsigned int pages_mask; | 2777 | unsigned int pages_mask; |
2627 | unsigned int offset; | 2778 | unsigned long offset; |
2628 | unsigned int size; | 2779 | unsigned int size; |
2629 | void **pages; | 2780 | void **pages; |
2630 | 2781 | ||
@@ -2633,12 +2784,14 @@ void perf_output_copy(struct perf_output_handle *handle, | |||
2633 | pages = handle->data->data_pages; | 2784 | pages = handle->data->data_pages; |
2634 | 2785 | ||
2635 | do { | 2786 | do { |
2636 | unsigned int page_offset; | 2787 | unsigned long page_offset; |
2788 | unsigned long page_size; | ||
2637 | int nr; | 2789 | int nr; |
2638 | 2790 | ||
2639 | nr = (offset >> PAGE_SHIFT) & pages_mask; | 2791 | nr = (offset >> PAGE_SHIFT) & pages_mask; |
2640 | page_offset = offset & (PAGE_SIZE - 1); | 2792 | page_size = 1UL << (handle->data->data_order + PAGE_SHIFT); |
2641 | size = min_t(unsigned int, PAGE_SIZE - page_offset, len); | 2793 | page_offset = offset & (page_size - 1); |
2794 | size = min_t(unsigned int, page_size - page_offset, len); | ||
2642 | 2795 | ||
2643 | memcpy(pages[nr] + page_offset, buf, size); | 2796 | memcpy(pages[nr] + page_offset, buf, size); |
2644 | 2797 | ||
@@ -3115,6 +3268,9 @@ static void perf_event_task_output(struct perf_event *event, | |||
3115 | 3268 | ||
3116 | static int perf_event_task_match(struct perf_event *event) | 3269 | static int perf_event_task_match(struct perf_event *event) |
3117 | { | 3270 | { |
3271 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | ||
3272 | return 0; | ||
3273 | |||
3118 | if (event->attr.comm || event->attr.mmap || event->attr.task) | 3274 | if (event->attr.comm || event->attr.mmap || event->attr.task) |
3119 | return 1; | 3275 | return 1; |
3120 | 3276 | ||
@@ -3126,15 +3282,10 @@ static void perf_event_task_ctx(struct perf_event_context *ctx, | |||
3126 | { | 3282 | { |
3127 | struct perf_event *event; | 3283 | struct perf_event *event; |
3128 | 3284 | ||
3129 | if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) | ||
3130 | return; | ||
3131 | |||
3132 | rcu_read_lock(); | ||
3133 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { | 3285 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { |
3134 | if (perf_event_task_match(event)) | 3286 | if (perf_event_task_match(event)) |
3135 | perf_event_task_output(event, task_event); | 3287 | perf_event_task_output(event, task_event); |
3136 | } | 3288 | } |
3137 | rcu_read_unlock(); | ||
3138 | } | 3289 | } |
3139 | 3290 | ||
3140 | static void perf_event_task_event(struct perf_task_event *task_event) | 3291 | static void perf_event_task_event(struct perf_task_event *task_event) |
@@ -3142,15 +3293,14 @@ static void perf_event_task_event(struct perf_task_event *task_event) | |||
3142 | struct perf_cpu_context *cpuctx; | 3293 | struct perf_cpu_context *cpuctx; |
3143 | struct perf_event_context *ctx = task_event->task_ctx; | 3294 | struct perf_event_context *ctx = task_event->task_ctx; |
3144 | 3295 | ||
3296 | rcu_read_lock(); | ||
3145 | cpuctx = &get_cpu_var(perf_cpu_context); | 3297 | cpuctx = &get_cpu_var(perf_cpu_context); |
3146 | perf_event_task_ctx(&cpuctx->ctx, task_event); | 3298 | perf_event_task_ctx(&cpuctx->ctx, task_event); |
3147 | put_cpu_var(perf_cpu_context); | ||
3148 | |||
3149 | rcu_read_lock(); | ||
3150 | if (!ctx) | 3299 | if (!ctx) |
3151 | ctx = rcu_dereference(task_event->task->perf_event_ctxp); | 3300 | ctx = rcu_dereference(task_event->task->perf_event_ctxp); |
3152 | if (ctx) | 3301 | if (ctx) |
3153 | perf_event_task_ctx(ctx, task_event); | 3302 | perf_event_task_ctx(ctx, task_event); |
3303 | put_cpu_var(perf_cpu_context); | ||
3154 | rcu_read_unlock(); | 3304 | rcu_read_unlock(); |
3155 | } | 3305 | } |
3156 | 3306 | ||
@@ -3227,6 +3377,9 @@ static void perf_event_comm_output(struct perf_event *event, | |||
3227 | 3377 | ||
3228 | static int perf_event_comm_match(struct perf_event *event) | 3378 | static int perf_event_comm_match(struct perf_event *event) |
3229 | { | 3379 | { |
3380 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | ||
3381 | return 0; | ||
3382 | |||
3230 | if (event->attr.comm) | 3383 | if (event->attr.comm) |
3231 | return 1; | 3384 | return 1; |
3232 | 3385 | ||
@@ -3238,15 +3391,10 @@ static void perf_event_comm_ctx(struct perf_event_context *ctx, | |||
3238 | { | 3391 | { |
3239 | struct perf_event *event; | 3392 | struct perf_event *event; |
3240 | 3393 | ||
3241 | if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) | ||
3242 | return; | ||
3243 | |||
3244 | rcu_read_lock(); | ||
3245 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { | 3394 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { |
3246 | if (perf_event_comm_match(event)) | 3395 | if (perf_event_comm_match(event)) |
3247 | perf_event_comm_output(event, comm_event); | 3396 | perf_event_comm_output(event, comm_event); |
3248 | } | 3397 | } |
3249 | rcu_read_unlock(); | ||
3250 | } | 3398 | } |
3251 | 3399 | ||
3252 | static void perf_event_comm_event(struct perf_comm_event *comm_event) | 3400 | static void perf_event_comm_event(struct perf_comm_event *comm_event) |
@@ -3257,7 +3405,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) | |||
3257 | char comm[TASK_COMM_LEN]; | 3405 | char comm[TASK_COMM_LEN]; |
3258 | 3406 | ||
3259 | memset(comm, 0, sizeof(comm)); | 3407 | memset(comm, 0, sizeof(comm)); |
3260 | strncpy(comm, comm_event->task->comm, sizeof(comm)); | 3408 | strlcpy(comm, comm_event->task->comm, sizeof(comm)); |
3261 | size = ALIGN(strlen(comm)+1, sizeof(u64)); | 3409 | size = ALIGN(strlen(comm)+1, sizeof(u64)); |
3262 | 3410 | ||
3263 | comm_event->comm = comm; | 3411 | comm_event->comm = comm; |
@@ -3265,18 +3413,13 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) | |||
3265 | 3413 | ||
3266 | comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; | 3414 | comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; |
3267 | 3415 | ||
3416 | rcu_read_lock(); | ||
3268 | cpuctx = &get_cpu_var(perf_cpu_context); | 3417 | cpuctx = &get_cpu_var(perf_cpu_context); |
3269 | perf_event_comm_ctx(&cpuctx->ctx, comm_event); | 3418 | perf_event_comm_ctx(&cpuctx->ctx, comm_event); |
3270 | put_cpu_var(perf_cpu_context); | ||
3271 | |||
3272 | rcu_read_lock(); | ||
3273 | /* | ||
3274 | * doesn't really matter which of the child contexts the | ||
3275 | * events ends up in. | ||
3276 | */ | ||
3277 | ctx = rcu_dereference(current->perf_event_ctxp); | 3419 | ctx = rcu_dereference(current->perf_event_ctxp); |
3278 | if (ctx) | 3420 | if (ctx) |
3279 | perf_event_comm_ctx(ctx, comm_event); | 3421 | perf_event_comm_ctx(ctx, comm_event); |
3422 | put_cpu_var(perf_cpu_context); | ||
3280 | rcu_read_unlock(); | 3423 | rcu_read_unlock(); |
3281 | } | 3424 | } |
3282 | 3425 | ||
@@ -3351,6 +3494,9 @@ static void perf_event_mmap_output(struct perf_event *event, | |||
3351 | static int perf_event_mmap_match(struct perf_event *event, | 3494 | static int perf_event_mmap_match(struct perf_event *event, |
3352 | struct perf_mmap_event *mmap_event) | 3495 | struct perf_mmap_event *mmap_event) |
3353 | { | 3496 | { |
3497 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | ||
3498 | return 0; | ||
3499 | |||
3354 | if (event->attr.mmap) | 3500 | if (event->attr.mmap) |
3355 | return 1; | 3501 | return 1; |
3356 | 3502 | ||
@@ -3362,15 +3508,10 @@ static void perf_event_mmap_ctx(struct perf_event_context *ctx, | |||
3362 | { | 3508 | { |
3363 | struct perf_event *event; | 3509 | struct perf_event *event; |
3364 | 3510 | ||
3365 | if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) | ||
3366 | return; | ||
3367 | |||
3368 | rcu_read_lock(); | ||
3369 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { | 3511 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { |
3370 | if (perf_event_mmap_match(event, mmap_event)) | 3512 | if (perf_event_mmap_match(event, mmap_event)) |
3371 | perf_event_mmap_output(event, mmap_event); | 3513 | perf_event_mmap_output(event, mmap_event); |
3372 | } | 3514 | } |
3373 | rcu_read_unlock(); | ||
3374 | } | 3515 | } |
3375 | 3516 | ||
3376 | static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) | 3517 | static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) |
@@ -3426,18 +3567,13 @@ got_name: | |||
3426 | 3567 | ||
3427 | mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; | 3568 | mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; |
3428 | 3569 | ||
3570 | rcu_read_lock(); | ||
3429 | cpuctx = &get_cpu_var(perf_cpu_context); | 3571 | cpuctx = &get_cpu_var(perf_cpu_context); |
3430 | perf_event_mmap_ctx(&cpuctx->ctx, mmap_event); | 3572 | perf_event_mmap_ctx(&cpuctx->ctx, mmap_event); |
3431 | put_cpu_var(perf_cpu_context); | ||
3432 | |||
3433 | rcu_read_lock(); | ||
3434 | /* | ||
3435 | * doesn't really matter which of the child contexts the | ||
3436 | * events ends up in. | ||
3437 | */ | ||
3438 | ctx = rcu_dereference(current->perf_event_ctxp); | 3573 | ctx = rcu_dereference(current->perf_event_ctxp); |
3439 | if (ctx) | 3574 | if (ctx) |
3440 | perf_event_mmap_ctx(ctx, mmap_event); | 3575 | perf_event_mmap_ctx(ctx, mmap_event); |
3576 | put_cpu_var(perf_cpu_context); | ||
3441 | rcu_read_unlock(); | 3577 | rcu_read_unlock(); |
3442 | 3578 | ||
3443 | kfree(buf); | 3579 | kfree(buf); |
@@ -3569,7 +3705,11 @@ static int __perf_event_overflow(struct perf_event *event, int nmi, | |||
3569 | perf_event_disable(event); | 3705 | perf_event_disable(event); |
3570 | } | 3706 | } |
3571 | 3707 | ||
3572 | perf_event_output(event, nmi, data, regs); | 3708 | if (event->overflow_handler) |
3709 | event->overflow_handler(event, nmi, data, regs); | ||
3710 | else | ||
3711 | perf_event_output(event, nmi, data, regs); | ||
3712 | |||
3573 | return ret; | 3713 | return ret; |
3574 | } | 3714 | } |
3575 | 3715 | ||
@@ -3614,16 +3754,16 @@ again: | |||
3614 | return nr; | 3754 | return nr; |
3615 | } | 3755 | } |
3616 | 3756 | ||
3617 | static void perf_swevent_overflow(struct perf_event *event, | 3757 | static void perf_swevent_overflow(struct perf_event *event, u64 overflow, |
3618 | int nmi, struct perf_sample_data *data, | 3758 | int nmi, struct perf_sample_data *data, |
3619 | struct pt_regs *regs) | 3759 | struct pt_regs *regs) |
3620 | { | 3760 | { |
3621 | struct hw_perf_event *hwc = &event->hw; | 3761 | struct hw_perf_event *hwc = &event->hw; |
3622 | int throttle = 0; | 3762 | int throttle = 0; |
3623 | u64 overflow; | ||
3624 | 3763 | ||
3625 | data->period = event->hw.last_period; | 3764 | data->period = event->hw.last_period; |
3626 | overflow = perf_swevent_set_period(event); | 3765 | if (!overflow) |
3766 | overflow = perf_swevent_set_period(event); | ||
3627 | 3767 | ||
3628 | if (hwc->interrupts == MAX_INTERRUPTS) | 3768 | if (hwc->interrupts == MAX_INTERRUPTS) |
3629 | return; | 3769 | return; |
@@ -3656,14 +3796,19 @@ static void perf_swevent_add(struct perf_event *event, u64 nr, | |||
3656 | 3796 | ||
3657 | atomic64_add(nr, &event->count); | 3797 | atomic64_add(nr, &event->count); |
3658 | 3798 | ||
3799 | if (!regs) | ||
3800 | return; | ||
3801 | |||
3659 | if (!hwc->sample_period) | 3802 | if (!hwc->sample_period) |
3660 | return; | 3803 | return; |
3661 | 3804 | ||
3662 | if (!regs) | 3805 | if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq) |
3806 | return perf_swevent_overflow(event, 1, nmi, data, regs); | ||
3807 | |||
3808 | if (atomic64_add_negative(nr, &hwc->period_left)) | ||
3663 | return; | 3809 | return; |
3664 | 3810 | ||
3665 | if (!atomic64_add_negative(nr, &hwc->period_left)) | 3811 | perf_swevent_overflow(event, 0, nmi, data, regs); |
3666 | perf_swevent_overflow(event, nmi, data, regs); | ||
3667 | } | 3812 | } |
3668 | 3813 | ||
3669 | static int perf_swevent_is_counting(struct perf_event *event) | 3814 | static int perf_swevent_is_counting(struct perf_event *event) |
@@ -3696,25 +3841,47 @@ static int perf_swevent_is_counting(struct perf_event *event) | |||
3696 | return 1; | 3841 | return 1; |
3697 | } | 3842 | } |
3698 | 3843 | ||
3844 | static int perf_tp_event_match(struct perf_event *event, | ||
3845 | struct perf_sample_data *data); | ||
3846 | |||
3847 | static int perf_exclude_event(struct perf_event *event, | ||
3848 | struct pt_regs *regs) | ||
3849 | { | ||
3850 | if (regs) { | ||
3851 | if (event->attr.exclude_user && user_mode(regs)) | ||
3852 | return 1; | ||
3853 | |||
3854 | if (event->attr.exclude_kernel && !user_mode(regs)) | ||
3855 | return 1; | ||
3856 | } | ||
3857 | |||
3858 | return 0; | ||
3859 | } | ||
3860 | |||
3699 | static int perf_swevent_match(struct perf_event *event, | 3861 | static int perf_swevent_match(struct perf_event *event, |
3700 | enum perf_type_id type, | 3862 | enum perf_type_id type, |
3701 | u32 event_id, struct pt_regs *regs) | 3863 | u32 event_id, |
3864 | struct perf_sample_data *data, | ||
3865 | struct pt_regs *regs) | ||
3702 | { | 3866 | { |
3867 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | ||
3868 | return 0; | ||
3869 | |||
3703 | if (!perf_swevent_is_counting(event)) | 3870 | if (!perf_swevent_is_counting(event)) |
3704 | return 0; | 3871 | return 0; |
3705 | 3872 | ||
3706 | if (event->attr.type != type) | 3873 | if (event->attr.type != type) |
3707 | return 0; | 3874 | return 0; |
3875 | |||
3708 | if (event->attr.config != event_id) | 3876 | if (event->attr.config != event_id) |
3709 | return 0; | 3877 | return 0; |
3710 | 3878 | ||
3711 | if (regs) { | 3879 | if (perf_exclude_event(event, regs)) |
3712 | if (event->attr.exclude_user && user_mode(regs)) | 3880 | return 0; |
3713 | return 0; | ||
3714 | 3881 | ||
3715 | if (event->attr.exclude_kernel && !user_mode(regs)) | 3882 | if (event->attr.type == PERF_TYPE_TRACEPOINT && |
3716 | return 0; | 3883 | !perf_tp_event_match(event, data)) |
3717 | } | 3884 | return 0; |
3718 | 3885 | ||
3719 | return 1; | 3886 | return 1; |
3720 | } | 3887 | } |
@@ -3727,49 +3894,59 @@ static void perf_swevent_ctx_event(struct perf_event_context *ctx, | |||
3727 | { | 3894 | { |
3728 | struct perf_event *event; | 3895 | struct perf_event *event; |
3729 | 3896 | ||
3730 | if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) | ||
3731 | return; | ||
3732 | |||
3733 | rcu_read_lock(); | ||
3734 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { | 3897 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { |
3735 | if (perf_swevent_match(event, type, event_id, regs)) | 3898 | if (perf_swevent_match(event, type, event_id, data, regs)) |
3736 | perf_swevent_add(event, nr, nmi, data, regs); | 3899 | perf_swevent_add(event, nr, nmi, data, regs); |
3737 | } | 3900 | } |
3738 | rcu_read_unlock(); | ||
3739 | } | 3901 | } |
3740 | 3902 | ||
3741 | static int *perf_swevent_recursion_context(struct perf_cpu_context *cpuctx) | 3903 | int perf_swevent_get_recursion_context(void) |
3742 | { | 3904 | { |
3905 | struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); | ||
3906 | int rctx; | ||
3907 | |||
3743 | if (in_nmi()) | 3908 | if (in_nmi()) |
3744 | return &cpuctx->recursion[3]; | 3909 | rctx = 3; |
3910 | else if (in_irq()) | ||
3911 | rctx = 2; | ||
3912 | else if (in_softirq()) | ||
3913 | rctx = 1; | ||
3914 | else | ||
3915 | rctx = 0; | ||
3745 | 3916 | ||
3746 | if (in_irq()) | 3917 | if (cpuctx->recursion[rctx]) { |
3747 | return &cpuctx->recursion[2]; | 3918 | put_cpu_var(perf_cpu_context); |
3919 | return -1; | ||
3920 | } | ||
3748 | 3921 | ||
3749 | if (in_softirq()) | 3922 | cpuctx->recursion[rctx]++; |
3750 | return &cpuctx->recursion[1]; | 3923 | barrier(); |
3751 | 3924 | ||
3752 | return &cpuctx->recursion[0]; | 3925 | return rctx; |
3926 | } | ||
3927 | EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); | ||
3928 | |||
3929 | void perf_swevent_put_recursion_context(int rctx) | ||
3930 | { | ||
3931 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
3932 | barrier(); | ||
3933 | cpuctx->recursion[rctx]--; | ||
3934 | put_cpu_var(perf_cpu_context); | ||
3753 | } | 3935 | } |
3936 | EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context); | ||
3754 | 3937 | ||
3755 | static void do_perf_sw_event(enum perf_type_id type, u32 event_id, | 3938 | static void do_perf_sw_event(enum perf_type_id type, u32 event_id, |
3756 | u64 nr, int nmi, | 3939 | u64 nr, int nmi, |
3757 | struct perf_sample_data *data, | 3940 | struct perf_sample_data *data, |
3758 | struct pt_regs *regs) | 3941 | struct pt_regs *regs) |
3759 | { | 3942 | { |
3760 | struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); | 3943 | struct perf_cpu_context *cpuctx; |
3761 | int *recursion = perf_swevent_recursion_context(cpuctx); | ||
3762 | struct perf_event_context *ctx; | 3944 | struct perf_event_context *ctx; |
3763 | 3945 | ||
3764 | if (*recursion) | 3946 | cpuctx = &__get_cpu_var(perf_cpu_context); |
3765 | goto out; | 3947 | rcu_read_lock(); |
3766 | |||
3767 | (*recursion)++; | ||
3768 | barrier(); | ||
3769 | |||
3770 | perf_swevent_ctx_event(&cpuctx->ctx, type, event_id, | 3948 | perf_swevent_ctx_event(&cpuctx->ctx, type, event_id, |
3771 | nr, nmi, data, regs); | 3949 | nr, nmi, data, regs); |
3772 | rcu_read_lock(); | ||
3773 | /* | 3950 | /* |
3774 | * doesn't really matter which of the child contexts the | 3951 | * doesn't really matter which of the child contexts the |
3775 | * events ends up in. | 3952 | * events ends up in. |
@@ -3778,23 +3955,24 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id, | |||
3778 | if (ctx) | 3955 | if (ctx) |
3779 | perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs); | 3956 | perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs); |
3780 | rcu_read_unlock(); | 3957 | rcu_read_unlock(); |
3781 | |||
3782 | barrier(); | ||
3783 | (*recursion)--; | ||
3784 | |||
3785 | out: | ||
3786 | put_cpu_var(perf_cpu_context); | ||
3787 | } | 3958 | } |
3788 | 3959 | ||
3789 | void __perf_sw_event(u32 event_id, u64 nr, int nmi, | 3960 | void __perf_sw_event(u32 event_id, u64 nr, int nmi, |
3790 | struct pt_regs *regs, u64 addr) | 3961 | struct pt_regs *regs, u64 addr) |
3791 | { | 3962 | { |
3792 | struct perf_sample_data data = { | 3963 | struct perf_sample_data data; |
3793 | .addr = addr, | 3964 | int rctx; |
3794 | }; | ||
3795 | 3965 | ||
3796 | do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, | 3966 | rctx = perf_swevent_get_recursion_context(); |
3797 | &data, regs); | 3967 | if (rctx < 0) |
3968 | return; | ||
3969 | |||
3970 | data.addr = addr; | ||
3971 | data.raw = NULL; | ||
3972 | |||
3973 | do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs); | ||
3974 | |||
3975 | perf_swevent_put_recursion_context(rctx); | ||
3798 | } | 3976 | } |
3799 | 3977 | ||
3800 | static void perf_swevent_read(struct perf_event *event) | 3978 | static void perf_swevent_read(struct perf_event *event) |
@@ -3839,6 +4017,8 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) | |||
3839 | event->pmu->read(event); | 4017 | event->pmu->read(event); |
3840 | 4018 | ||
3841 | data.addr = 0; | 4019 | data.addr = 0; |
4020 | data.raw = NULL; | ||
4021 | data.period = event->hw.last_period; | ||
3842 | regs = get_irq_regs(); | 4022 | regs = get_irq_regs(); |
3843 | /* | 4023 | /* |
3844 | * In case we exclude kernel IPs or are somehow not in interrupt | 4024 | * In case we exclude kernel IPs or are somehow not in interrupt |
@@ -3849,8 +4029,9 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) | |||
3849 | regs = task_pt_regs(current); | 4029 | regs = task_pt_regs(current); |
3850 | 4030 | ||
3851 | if (regs) { | 4031 | if (regs) { |
3852 | if (perf_event_overflow(event, 0, &data, regs)) | 4032 | if (!(event->attr.exclude_idle && current->pid == 0)) |
3853 | ret = HRTIMER_NORESTART; | 4033 | if (perf_event_overflow(event, 0, &data, regs)) |
4034 | ret = HRTIMER_NORESTART; | ||
3854 | } | 4035 | } |
3855 | 4036 | ||
3856 | period = max_t(u64, 10000, event->hw.sample_period); | 4037 | period = max_t(u64, 10000, event->hw.sample_period); |
@@ -3859,6 +4040,42 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) | |||
3859 | return ret; | 4040 | return ret; |
3860 | } | 4041 | } |
3861 | 4042 | ||
4043 | static void perf_swevent_start_hrtimer(struct perf_event *event) | ||
4044 | { | ||
4045 | struct hw_perf_event *hwc = &event->hw; | ||
4046 | |||
4047 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
4048 | hwc->hrtimer.function = perf_swevent_hrtimer; | ||
4049 | if (hwc->sample_period) { | ||
4050 | u64 period; | ||
4051 | |||
4052 | if (hwc->remaining) { | ||
4053 | if (hwc->remaining < 0) | ||
4054 | period = 10000; | ||
4055 | else | ||
4056 | period = hwc->remaining; | ||
4057 | hwc->remaining = 0; | ||
4058 | } else { | ||
4059 | period = max_t(u64, 10000, hwc->sample_period); | ||
4060 | } | ||
4061 | __hrtimer_start_range_ns(&hwc->hrtimer, | ||
4062 | ns_to_ktime(period), 0, | ||
4063 | HRTIMER_MODE_REL, 0); | ||
4064 | } | ||
4065 | } | ||
4066 | |||
4067 | static void perf_swevent_cancel_hrtimer(struct perf_event *event) | ||
4068 | { | ||
4069 | struct hw_perf_event *hwc = &event->hw; | ||
4070 | |||
4071 | if (hwc->sample_period) { | ||
4072 | ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); | ||
4073 | hwc->remaining = ktime_to_ns(remaining); | ||
4074 | |||
4075 | hrtimer_cancel(&hwc->hrtimer); | ||
4076 | } | ||
4077 | } | ||
4078 | |||
3862 | /* | 4079 | /* |
3863 | * Software event: cpu wall time clock | 4080 | * Software event: cpu wall time clock |
3864 | */ | 4081 | */ |
@@ -3870,8 +4087,7 @@ static void cpu_clock_perf_event_update(struct perf_event *event) | |||
3870 | u64 now; | 4087 | u64 now; |
3871 | 4088 | ||
3872 | now = cpu_clock(cpu); | 4089 | now = cpu_clock(cpu); |
3873 | prev = atomic64_read(&event->hw.prev_count); | 4090 | prev = atomic64_xchg(&event->hw.prev_count, now); |
3874 | atomic64_set(&event->hw.prev_count, now); | ||
3875 | atomic64_add(now - prev, &event->count); | 4091 | atomic64_add(now - prev, &event->count); |
3876 | } | 4092 | } |
3877 | 4093 | ||
@@ -3881,22 +4097,14 @@ static int cpu_clock_perf_event_enable(struct perf_event *event) | |||
3881 | int cpu = raw_smp_processor_id(); | 4097 | int cpu = raw_smp_processor_id(); |
3882 | 4098 | ||
3883 | atomic64_set(&hwc->prev_count, cpu_clock(cpu)); | 4099 | atomic64_set(&hwc->prev_count, cpu_clock(cpu)); |
3884 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 4100 | perf_swevent_start_hrtimer(event); |
3885 | hwc->hrtimer.function = perf_swevent_hrtimer; | ||
3886 | if (hwc->sample_period) { | ||
3887 | u64 period = max_t(u64, 10000, hwc->sample_period); | ||
3888 | __hrtimer_start_range_ns(&hwc->hrtimer, | ||
3889 | ns_to_ktime(period), 0, | ||
3890 | HRTIMER_MODE_REL, 0); | ||
3891 | } | ||
3892 | 4101 | ||
3893 | return 0; | 4102 | return 0; |
3894 | } | 4103 | } |
3895 | 4104 | ||
3896 | static void cpu_clock_perf_event_disable(struct perf_event *event) | 4105 | static void cpu_clock_perf_event_disable(struct perf_event *event) |
3897 | { | 4106 | { |
3898 | if (event->hw.sample_period) | 4107 | perf_swevent_cancel_hrtimer(event); |
3899 | hrtimer_cancel(&event->hw.hrtimer); | ||
3900 | cpu_clock_perf_event_update(event); | 4108 | cpu_clock_perf_event_update(event); |
3901 | } | 4109 | } |
3902 | 4110 | ||
@@ -3933,22 +4141,15 @@ static int task_clock_perf_event_enable(struct perf_event *event) | |||
3933 | now = event->ctx->time; | 4141 | now = event->ctx->time; |
3934 | 4142 | ||
3935 | atomic64_set(&hwc->prev_count, now); | 4143 | atomic64_set(&hwc->prev_count, now); |
3936 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 4144 | |
3937 | hwc->hrtimer.function = perf_swevent_hrtimer; | 4145 | perf_swevent_start_hrtimer(event); |
3938 | if (hwc->sample_period) { | ||
3939 | u64 period = max_t(u64, 10000, hwc->sample_period); | ||
3940 | __hrtimer_start_range_ns(&hwc->hrtimer, | ||
3941 | ns_to_ktime(period), 0, | ||
3942 | HRTIMER_MODE_REL, 0); | ||
3943 | } | ||
3944 | 4146 | ||
3945 | return 0; | 4147 | return 0; |
3946 | } | 4148 | } |
3947 | 4149 | ||
3948 | static void task_clock_perf_event_disable(struct perf_event *event) | 4150 | static void task_clock_perf_event_disable(struct perf_event *event) |
3949 | { | 4151 | { |
3950 | if (event->hw.sample_period) | 4152 | perf_swevent_cancel_hrtimer(event); |
3951 | hrtimer_cancel(&event->hw.hrtimer); | ||
3952 | task_clock_perf_event_update(event, event->ctx->time); | 4153 | task_clock_perf_event_update(event, event->ctx->time); |
3953 | 4154 | ||
3954 | } | 4155 | } |
@@ -3976,6 +4177,7 @@ static const struct pmu perf_ops_task_clock = { | |||
3976 | }; | 4177 | }; |
3977 | 4178 | ||
3978 | #ifdef CONFIG_EVENT_PROFILE | 4179 | #ifdef CONFIG_EVENT_PROFILE |
4180 | |||
3979 | void perf_tp_event(int event_id, u64 addr, u64 count, void *record, | 4181 | void perf_tp_event(int event_id, u64 addr, u64 count, void *record, |
3980 | int entry_size) | 4182 | int entry_size) |
3981 | { | 4183 | { |
@@ -3994,13 +4196,21 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record, | |||
3994 | if (!regs) | 4196 | if (!regs) |
3995 | regs = task_pt_regs(current); | 4197 | regs = task_pt_regs(current); |
3996 | 4198 | ||
4199 | /* Trace events already protected against recursion */ | ||
3997 | do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, | 4200 | do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, |
3998 | &data, regs); | 4201 | &data, regs); |
3999 | } | 4202 | } |
4000 | EXPORT_SYMBOL_GPL(perf_tp_event); | 4203 | EXPORT_SYMBOL_GPL(perf_tp_event); |
4001 | 4204 | ||
4002 | extern int ftrace_profile_enable(int); | 4205 | static int perf_tp_event_match(struct perf_event *event, |
4003 | extern void ftrace_profile_disable(int); | 4206 | struct perf_sample_data *data) |
4207 | { | ||
4208 | void *record = data->raw->data; | ||
4209 | |||
4210 | if (likely(!event->filter) || filter_match_preds(event->filter, record)) | ||
4211 | return 1; | ||
4212 | return 0; | ||
4213 | } | ||
4004 | 4214 | ||
4005 | static void tp_perf_event_destroy(struct perf_event *event) | 4215 | static void tp_perf_event_destroy(struct perf_event *event) |
4006 | { | 4216 | { |
@@ -4025,11 +4235,93 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event) | |||
4025 | 4235 | ||
4026 | return &perf_ops_generic; | 4236 | return &perf_ops_generic; |
4027 | } | 4237 | } |
4238 | |||
4239 | static int perf_event_set_filter(struct perf_event *event, void __user *arg) | ||
4240 | { | ||
4241 | char *filter_str; | ||
4242 | int ret; | ||
4243 | |||
4244 | if (event->attr.type != PERF_TYPE_TRACEPOINT) | ||
4245 | return -EINVAL; | ||
4246 | |||
4247 | filter_str = strndup_user(arg, PAGE_SIZE); | ||
4248 | if (IS_ERR(filter_str)) | ||
4249 | return PTR_ERR(filter_str); | ||
4250 | |||
4251 | ret = ftrace_profile_set_filter(event, event->attr.config, filter_str); | ||
4252 | |||
4253 | kfree(filter_str); | ||
4254 | return ret; | ||
4255 | } | ||
4256 | |||
4257 | static void perf_event_free_filter(struct perf_event *event) | ||
4258 | { | ||
4259 | ftrace_profile_free_filter(event); | ||
4260 | } | ||
4261 | |||
4028 | #else | 4262 | #else |
4263 | |||
4264 | static int perf_tp_event_match(struct perf_event *event, | ||
4265 | struct perf_sample_data *data) | ||
4266 | { | ||
4267 | return 1; | ||
4268 | } | ||
4269 | |||
4029 | static const struct pmu *tp_perf_event_init(struct perf_event *event) | 4270 | static const struct pmu *tp_perf_event_init(struct perf_event *event) |
4030 | { | 4271 | { |
4031 | return NULL; | 4272 | return NULL; |
4032 | } | 4273 | } |
4274 | |||
4275 | static int perf_event_set_filter(struct perf_event *event, void __user *arg) | ||
4276 | { | ||
4277 | return -ENOENT; | ||
4278 | } | ||
4279 | |||
4280 | static void perf_event_free_filter(struct perf_event *event) | ||
4281 | { | ||
4282 | } | ||
4283 | |||
4284 | #endif /* CONFIG_EVENT_PROFILE */ | ||
4285 | |||
4286 | #ifdef CONFIG_HAVE_HW_BREAKPOINT | ||
4287 | static void bp_perf_event_destroy(struct perf_event *event) | ||
4288 | { | ||
4289 | release_bp_slot(event); | ||
4290 | } | ||
4291 | |||
4292 | static const struct pmu *bp_perf_event_init(struct perf_event *bp) | ||
4293 | { | ||
4294 | int err; | ||
4295 | |||
4296 | err = register_perf_hw_breakpoint(bp); | ||
4297 | if (err) | ||
4298 | return ERR_PTR(err); | ||
4299 | |||
4300 | bp->destroy = bp_perf_event_destroy; | ||
4301 | |||
4302 | return &perf_ops_bp; | ||
4303 | } | ||
4304 | |||
4305 | void perf_bp_event(struct perf_event *bp, void *data) | ||
4306 | { | ||
4307 | struct perf_sample_data sample; | ||
4308 | struct pt_regs *regs = data; | ||
4309 | |||
4310 | sample.raw = NULL; | ||
4311 | sample.addr = bp->attr.bp_addr; | ||
4312 | |||
4313 | if (!perf_exclude_event(bp, regs)) | ||
4314 | perf_swevent_add(bp, 1, 1, &sample, regs); | ||
4315 | } | ||
4316 | #else | ||
4317 | static const struct pmu *bp_perf_event_init(struct perf_event *bp) | ||
4318 | { | ||
4319 | return NULL; | ||
4320 | } | ||
4321 | |||
4322 | void perf_bp_event(struct perf_event *bp, void *regs) | ||
4323 | { | ||
4324 | } | ||
4033 | #endif | 4325 | #endif |
4034 | 4326 | ||
4035 | atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; | 4327 | atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; |
@@ -4076,6 +4368,8 @@ static const struct pmu *sw_perf_event_init(struct perf_event *event) | |||
4076 | case PERF_COUNT_SW_PAGE_FAULTS_MAJ: | 4368 | case PERF_COUNT_SW_PAGE_FAULTS_MAJ: |
4077 | case PERF_COUNT_SW_CONTEXT_SWITCHES: | 4369 | case PERF_COUNT_SW_CONTEXT_SWITCHES: |
4078 | case PERF_COUNT_SW_CPU_MIGRATIONS: | 4370 | case PERF_COUNT_SW_CPU_MIGRATIONS: |
4371 | case PERF_COUNT_SW_ALIGNMENT_FAULTS: | ||
4372 | case PERF_COUNT_SW_EMULATION_FAULTS: | ||
4079 | if (!event->parent) { | 4373 | if (!event->parent) { |
4080 | atomic_inc(&perf_swevent_enabled[event_id]); | 4374 | atomic_inc(&perf_swevent_enabled[event_id]); |
4081 | event->destroy = sw_perf_event_destroy; | 4375 | event->destroy = sw_perf_event_destroy; |
@@ -4096,6 +4390,7 @@ perf_event_alloc(struct perf_event_attr *attr, | |||
4096 | struct perf_event_context *ctx, | 4390 | struct perf_event_context *ctx, |
4097 | struct perf_event *group_leader, | 4391 | struct perf_event *group_leader, |
4098 | struct perf_event *parent_event, | 4392 | struct perf_event *parent_event, |
4393 | perf_overflow_handler_t overflow_handler, | ||
4099 | gfp_t gfpflags) | 4394 | gfp_t gfpflags) |
4100 | { | 4395 | { |
4101 | const struct pmu *pmu; | 4396 | const struct pmu *pmu; |
@@ -4138,6 +4433,11 @@ perf_event_alloc(struct perf_event_attr *attr, | |||
4138 | 4433 | ||
4139 | event->state = PERF_EVENT_STATE_INACTIVE; | 4434 | event->state = PERF_EVENT_STATE_INACTIVE; |
4140 | 4435 | ||
4436 | if (!overflow_handler && parent_event) | ||
4437 | overflow_handler = parent_event->overflow_handler; | ||
4438 | |||
4439 | event->overflow_handler = overflow_handler; | ||
4440 | |||
4141 | if (attr->disabled) | 4441 | if (attr->disabled) |
4142 | event->state = PERF_EVENT_STATE_OFF; | 4442 | event->state = PERF_EVENT_STATE_OFF; |
4143 | 4443 | ||
@@ -4172,6 +4472,11 @@ perf_event_alloc(struct perf_event_attr *attr, | |||
4172 | pmu = tp_perf_event_init(event); | 4472 | pmu = tp_perf_event_init(event); |
4173 | break; | 4473 | break; |
4174 | 4474 | ||
4475 | case PERF_TYPE_BREAKPOINT: | ||
4476 | pmu = bp_perf_event_init(event); | ||
4477 | break; | ||
4478 | |||
4479 | |||
4175 | default: | 4480 | default: |
4176 | break; | 4481 | break; |
4177 | } | 4482 | } |
@@ -4266,7 +4571,7 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, | |||
4266 | if (attr->type >= PERF_TYPE_MAX) | 4571 | if (attr->type >= PERF_TYPE_MAX) |
4267 | return -EINVAL; | 4572 | return -EINVAL; |
4268 | 4573 | ||
4269 | if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3) | 4574 | if (attr->__reserved_1 || attr->__reserved_2) |
4270 | return -EINVAL; | 4575 | return -EINVAL; |
4271 | 4576 | ||
4272 | if (attr->sample_type & ~(PERF_SAMPLE_MAX-1)) | 4577 | if (attr->sample_type & ~(PERF_SAMPLE_MAX-1)) |
@@ -4284,7 +4589,7 @@ err_size: | |||
4284 | goto out; | 4589 | goto out; |
4285 | } | 4590 | } |
4286 | 4591 | ||
4287 | int perf_event_set_output(struct perf_event *event, int output_fd) | 4592 | static int perf_event_set_output(struct perf_event *event, int output_fd) |
4288 | { | 4593 | { |
4289 | struct perf_event *output_event = NULL; | 4594 | struct perf_event *output_event = NULL; |
4290 | struct file *output_file = NULL; | 4595 | struct file *output_file = NULL; |
@@ -4414,12 +4719,12 @@ SYSCALL_DEFINE5(perf_event_open, | |||
4414 | } | 4719 | } |
4415 | 4720 | ||
4416 | event = perf_event_alloc(&attr, cpu, ctx, group_leader, | 4721 | event = perf_event_alloc(&attr, cpu, ctx, group_leader, |
4417 | NULL, GFP_KERNEL); | 4722 | NULL, NULL, GFP_KERNEL); |
4418 | err = PTR_ERR(event); | 4723 | err = PTR_ERR(event); |
4419 | if (IS_ERR(event)) | 4724 | if (IS_ERR(event)) |
4420 | goto err_put_context; | 4725 | goto err_put_context; |
4421 | 4726 | ||
4422 | err = anon_inode_getfd("[perf_event]", &perf_fops, event, 0); | 4727 | err = anon_inode_getfd("[perf_event]", &perf_fops, event, O_RDWR); |
4423 | if (err < 0) | 4728 | if (err < 0) |
4424 | goto err_free_put_context; | 4729 | goto err_free_put_context; |
4425 | 4730 | ||
@@ -4462,6 +4767,61 @@ err_put_context: | |||
4462 | return err; | 4767 | return err; |
4463 | } | 4768 | } |
4464 | 4769 | ||
4770 | /** | ||
4771 | * perf_event_create_kernel_counter | ||
4772 | * | ||
4773 | * @attr: attributes of the counter to create | ||
4774 | * @cpu: cpu in which the counter is bound | ||
4775 | * @pid: task to profile | ||
4776 | */ | ||
4777 | struct perf_event * | ||
4778 | perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | ||
4779 | pid_t pid, | ||
4780 | perf_overflow_handler_t overflow_handler) | ||
4781 | { | ||
4782 | struct perf_event *event; | ||
4783 | struct perf_event_context *ctx; | ||
4784 | int err; | ||
4785 | |||
4786 | /* | ||
4787 | * Get the target context (task or percpu): | ||
4788 | */ | ||
4789 | |||
4790 | ctx = find_get_context(pid, cpu); | ||
4791 | if (IS_ERR(ctx)) { | ||
4792 | err = PTR_ERR(ctx); | ||
4793 | goto err_exit; | ||
4794 | } | ||
4795 | |||
4796 | event = perf_event_alloc(attr, cpu, ctx, NULL, | ||
4797 | NULL, overflow_handler, GFP_KERNEL); | ||
4798 | if (IS_ERR(event)) { | ||
4799 | err = PTR_ERR(event); | ||
4800 | goto err_put_context; | ||
4801 | } | ||
4802 | |||
4803 | event->filp = NULL; | ||
4804 | WARN_ON_ONCE(ctx->parent_ctx); | ||
4805 | mutex_lock(&ctx->mutex); | ||
4806 | perf_install_in_context(ctx, event, cpu); | ||
4807 | ++ctx->generation; | ||
4808 | mutex_unlock(&ctx->mutex); | ||
4809 | |||
4810 | event->owner = current; | ||
4811 | get_task_struct(current); | ||
4812 | mutex_lock(¤t->perf_event_mutex); | ||
4813 | list_add_tail(&event->owner_entry, ¤t->perf_event_list); | ||
4814 | mutex_unlock(¤t->perf_event_mutex); | ||
4815 | |||
4816 | return event; | ||
4817 | |||
4818 | err_put_context: | ||
4819 | put_ctx(ctx); | ||
4820 | err_exit: | ||
4821 | return ERR_PTR(err); | ||
4822 | } | ||
4823 | EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter); | ||
4824 | |||
4465 | /* | 4825 | /* |
4466 | * inherit a event from parent task to child task: | 4826 | * inherit a event from parent task to child task: |
4467 | */ | 4827 | */ |
@@ -4487,7 +4847,7 @@ inherit_event(struct perf_event *parent_event, | |||
4487 | child_event = perf_event_alloc(&parent_event->attr, | 4847 | child_event = perf_event_alloc(&parent_event->attr, |
4488 | parent_event->cpu, child_ctx, | 4848 | parent_event->cpu, child_ctx, |
4489 | group_leader, parent_event, | 4849 | group_leader, parent_event, |
4490 | GFP_KERNEL); | 4850 | NULL, GFP_KERNEL); |
4491 | if (IS_ERR(child_event)) | 4851 | if (IS_ERR(child_event)) |
4492 | return child_event; | 4852 | return child_event; |
4493 | get_ctx(child_ctx); | 4853 | get_ctx(child_ctx); |
@@ -4505,6 +4865,8 @@ inherit_event(struct perf_event *parent_event, | |||
4505 | if (parent_event->attr.freq) | 4865 | if (parent_event->attr.freq) |
4506 | child_event->hw.sample_period = parent_event->hw.sample_period; | 4866 | child_event->hw.sample_period = parent_event->hw.sample_period; |
4507 | 4867 | ||
4868 | child_event->overflow_handler = parent_event->overflow_handler; | ||
4869 | |||
4508 | /* | 4870 | /* |
4509 | * Link it up in the child's context: | 4871 | * Link it up in the child's context: |
4510 | */ | 4872 | */ |
@@ -4594,7 +4956,6 @@ __perf_event_exit_task(struct perf_event *child_event, | |||
4594 | { | 4956 | { |
4595 | struct perf_event *parent_event; | 4957 | struct perf_event *parent_event; |
4596 | 4958 | ||
4597 | update_event_times(child_event); | ||
4598 | perf_event_remove_from_context(child_event); | 4959 | perf_event_remove_from_context(child_event); |
4599 | 4960 | ||
4600 | parent_event = child_event->parent; | 4961 | parent_event = child_event->parent; |
@@ -4638,7 +4999,7 @@ void perf_event_exit_task(struct task_struct *child) | |||
4638 | * reading child->perf_event_ctxp, we wait until it has | 4999 | * reading child->perf_event_ctxp, we wait until it has |
4639 | * incremented the context's refcount before we do put_ctx below. | 5000 | * incremented the context's refcount before we do put_ctx below. |
4640 | */ | 5001 | */ |
4641 | spin_lock(&child_ctx->lock); | 5002 | raw_spin_lock(&child_ctx->lock); |
4642 | child->perf_event_ctxp = NULL; | 5003 | child->perf_event_ctxp = NULL; |
4643 | /* | 5004 | /* |
4644 | * If this context is a clone; unclone it so it can't get | 5005 | * If this context is a clone; unclone it so it can't get |
@@ -4646,7 +5007,8 @@ void perf_event_exit_task(struct task_struct *child) | |||
4646 | * the events from it. | 5007 | * the events from it. |
4647 | */ | 5008 | */ |
4648 | unclone_ctx(child_ctx); | 5009 | unclone_ctx(child_ctx); |
4649 | spin_unlock_irqrestore(&child_ctx->lock, flags); | 5010 | update_context_time(child_ctx); |
5011 | raw_spin_unlock_irqrestore(&child_ctx->lock, flags); | ||
4650 | 5012 | ||
4651 | /* | 5013 | /* |
4652 | * Report the task dead after unscheduling the events so that we | 5014 | * Report the task dead after unscheduling the events so that we |
@@ -4729,7 +5091,7 @@ again: | |||
4729 | */ | 5091 | */ |
4730 | int perf_event_init_task(struct task_struct *child) | 5092 | int perf_event_init_task(struct task_struct *child) |
4731 | { | 5093 | { |
4732 | struct perf_event_context *child_ctx, *parent_ctx; | 5094 | struct perf_event_context *child_ctx = NULL, *parent_ctx; |
4733 | struct perf_event_context *cloned_ctx; | 5095 | struct perf_event_context *cloned_ctx; |
4734 | struct perf_event *event; | 5096 | struct perf_event *event; |
4735 | struct task_struct *parent = current; | 5097 | struct task_struct *parent = current; |
@@ -4745,20 +5107,6 @@ int perf_event_init_task(struct task_struct *child) | |||
4745 | return 0; | 5107 | return 0; |
4746 | 5108 | ||
4747 | /* | 5109 | /* |
4748 | * This is executed from the parent task context, so inherit | ||
4749 | * events that have been marked for cloning. | ||
4750 | * First allocate and initialize a context for the child. | ||
4751 | */ | ||
4752 | |||
4753 | child_ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL); | ||
4754 | if (!child_ctx) | ||
4755 | return -ENOMEM; | ||
4756 | |||
4757 | __perf_event_init_context(child_ctx, child); | ||
4758 | child->perf_event_ctxp = child_ctx; | ||
4759 | get_task_struct(child); | ||
4760 | |||
4761 | /* | ||
4762 | * If the parent's context is a clone, pin it so it won't get | 5110 | * If the parent's context is a clone, pin it so it won't get |
4763 | * swapped under us. | 5111 | * swapped under us. |
4764 | */ | 5112 | */ |
@@ -4781,15 +5129,33 @@ int perf_event_init_task(struct task_struct *child) | |||
4781 | * We dont have to disable NMIs - we are only looking at | 5129 | * We dont have to disable NMIs - we are only looking at |
4782 | * the list, not manipulating it: | 5130 | * the list, not manipulating it: |
4783 | */ | 5131 | */ |
4784 | list_for_each_entry_rcu(event, &parent_ctx->event_list, event_entry) { | 5132 | list_for_each_entry(event, &parent_ctx->group_list, group_entry) { |
4785 | if (event != event->group_leader) | ||
4786 | continue; | ||
4787 | 5133 | ||
4788 | if (!event->attr.inherit) { | 5134 | if (!event->attr.inherit) { |
4789 | inherited_all = 0; | 5135 | inherited_all = 0; |
4790 | continue; | 5136 | continue; |
4791 | } | 5137 | } |
4792 | 5138 | ||
5139 | if (!child->perf_event_ctxp) { | ||
5140 | /* | ||
5141 | * This is executed from the parent task context, so | ||
5142 | * inherit events that have been marked for cloning. | ||
5143 | * First allocate and initialize a context for the | ||
5144 | * child. | ||
5145 | */ | ||
5146 | |||
5147 | child_ctx = kzalloc(sizeof(struct perf_event_context), | ||
5148 | GFP_KERNEL); | ||
5149 | if (!child_ctx) { | ||
5150 | ret = -ENOMEM; | ||
5151 | break; | ||
5152 | } | ||
5153 | |||
5154 | __perf_event_init_context(child_ctx, child); | ||
5155 | child->perf_event_ctxp = child_ctx; | ||
5156 | get_task_struct(child); | ||
5157 | } | ||
5158 | |||
4793 | ret = inherit_group(event, parent, parent_ctx, | 5159 | ret = inherit_group(event, parent, parent_ctx, |
4794 | child, child_ctx); | 5160 | child, child_ctx); |
4795 | if (ret) { | 5161 | if (ret) { |
@@ -4798,7 +5164,7 @@ int perf_event_init_task(struct task_struct *child) | |||
4798 | } | 5164 | } |
4799 | } | 5165 | } |
4800 | 5166 | ||
4801 | if (inherited_all) { | 5167 | if (child_ctx && inherited_all) { |
4802 | /* | 5168 | /* |
4803 | * Mark the child context as a clone of the parent | 5169 | * Mark the child context as a clone of the parent |
4804 | * context, or of whatever the parent is a clone of. | 5170 | * context, or of whatever the parent is a clone of. |
@@ -4932,11 +5298,11 @@ perf_set_reserve_percpu(struct sysdev_class *class, | |||
4932 | perf_reserved_percpu = val; | 5298 | perf_reserved_percpu = val; |
4933 | for_each_online_cpu(cpu) { | 5299 | for_each_online_cpu(cpu) { |
4934 | cpuctx = &per_cpu(perf_cpu_context, cpu); | 5300 | cpuctx = &per_cpu(perf_cpu_context, cpu); |
4935 | spin_lock_irq(&cpuctx->ctx.lock); | 5301 | raw_spin_lock_irq(&cpuctx->ctx.lock); |
4936 | mpt = min(perf_max_events - cpuctx->ctx.nr_events, | 5302 | mpt = min(perf_max_events - cpuctx->ctx.nr_events, |
4937 | perf_max_events - perf_reserved_percpu); | 5303 | perf_max_events - perf_reserved_percpu); |
4938 | cpuctx->max_pertask = mpt; | 5304 | cpuctx->max_pertask = mpt; |
4939 | spin_unlock_irq(&cpuctx->ctx.lock); | 5305 | raw_spin_unlock_irq(&cpuctx->ctx.lock); |
4940 | } | 5306 | } |
4941 | spin_unlock(&perf_resource_lock); | 5307 | spin_unlock(&perf_resource_lock); |
4942 | 5308 | ||