aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/perf_event.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/perf_event.c')
-rw-r--r--kernel/perf_event.c789
1 files changed, 511 insertions, 278 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 7f29643c8985..8ab86988bd24 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -28,13 +28,15 @@
28#include <linux/anon_inodes.h> 28#include <linux/anon_inodes.h>
29#include <linux/kernel_stat.h> 29#include <linux/kernel_stat.h>
30#include <linux/perf_event.h> 30#include <linux/perf_event.h>
31#include <linux/ftrace_event.h>
32#include <linux/hw_breakpoint.h>
31 33
32#include <asm/irq_regs.h> 34#include <asm/irq_regs.h>
33 35
34/* 36/*
35 * Each CPU has a list of per CPU events: 37 * Each CPU has a list of per CPU events:
36 */ 38 */
37DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context); 39static DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context);
38 40
39int perf_max_events __read_mostly = 1; 41int perf_max_events __read_mostly = 1;
40static int perf_reserved_percpu __read_mostly; 42static int perf_reserved_percpu __read_mostly;
@@ -201,14 +203,14 @@ perf_lock_task_context(struct task_struct *task, unsigned long *flags)
201 * if so. If we locked the right context, then it 203 * if so. If we locked the right context, then it
202 * can't get swapped on us any more. 204 * can't get swapped on us any more.
203 */ 205 */
204 spin_lock_irqsave(&ctx->lock, *flags); 206 raw_spin_lock_irqsave(&ctx->lock, *flags);
205 if (ctx != rcu_dereference(task->perf_event_ctxp)) { 207 if (ctx != rcu_dereference(task->perf_event_ctxp)) {
206 spin_unlock_irqrestore(&ctx->lock, *flags); 208 raw_spin_unlock_irqrestore(&ctx->lock, *flags);
207 goto retry; 209 goto retry;
208 } 210 }
209 211
210 if (!atomic_inc_not_zero(&ctx->refcount)) { 212 if (!atomic_inc_not_zero(&ctx->refcount)) {
211 spin_unlock_irqrestore(&ctx->lock, *flags); 213 raw_spin_unlock_irqrestore(&ctx->lock, *flags);
212 ctx = NULL; 214 ctx = NULL;
213 } 215 }
214 } 216 }
@@ -229,7 +231,7 @@ static struct perf_event_context *perf_pin_task_context(struct task_struct *task
229 ctx = perf_lock_task_context(task, &flags); 231 ctx = perf_lock_task_context(task, &flags);
230 if (ctx) { 232 if (ctx) {
231 ++ctx->pin_count; 233 ++ctx->pin_count;
232 spin_unlock_irqrestore(&ctx->lock, flags); 234 raw_spin_unlock_irqrestore(&ctx->lock, flags);
233 } 235 }
234 return ctx; 236 return ctx;
235} 237}
@@ -238,12 +240,55 @@ static void perf_unpin_context(struct perf_event_context *ctx)
238{ 240{
239 unsigned long flags; 241 unsigned long flags;
240 242
241 spin_lock_irqsave(&ctx->lock, flags); 243 raw_spin_lock_irqsave(&ctx->lock, flags);
242 --ctx->pin_count; 244 --ctx->pin_count;
243 spin_unlock_irqrestore(&ctx->lock, flags); 245 raw_spin_unlock_irqrestore(&ctx->lock, flags);
244 put_ctx(ctx); 246 put_ctx(ctx);
245} 247}
246 248
249static inline u64 perf_clock(void)
250{
251 return cpu_clock(smp_processor_id());
252}
253
254/*
255 * Update the record of the current time in a context.
256 */
257static void update_context_time(struct perf_event_context *ctx)
258{
259 u64 now = perf_clock();
260
261 ctx->time += now - ctx->timestamp;
262 ctx->timestamp = now;
263}
264
265/*
266 * Update the total_time_enabled and total_time_running fields for a event.
267 */
268static void update_event_times(struct perf_event *event)
269{
270 struct perf_event_context *ctx = event->ctx;
271 u64 run_end;
272
273 if (event->state < PERF_EVENT_STATE_INACTIVE ||
274 event->group_leader->state < PERF_EVENT_STATE_INACTIVE)
275 return;
276
277 if (ctx->is_active)
278 run_end = ctx->time;
279 else
280 run_end = event->tstamp_stopped;
281
282 event->total_time_enabled = run_end - event->tstamp_enabled;
283
284 if (event->state == PERF_EVENT_STATE_INACTIVE)
285 run_end = event->tstamp_stopped;
286 else
287 run_end = ctx->time;
288
289 event->total_time_running = run_end - event->tstamp_running;
290}
291
247/* 292/*
248 * Add a event from the lists for its context. 293 * Add a event from the lists for its context.
249 * Must be called with ctx->mutex and ctx->lock held. 294 * Must be called with ctx->mutex and ctx->lock held.
@@ -292,6 +337,18 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
292 if (event->group_leader != event) 337 if (event->group_leader != event)
293 event->group_leader->nr_siblings--; 338 event->group_leader->nr_siblings--;
294 339
340 update_event_times(event);
341
342 /*
343 * If event was in error state, then keep it
344 * that way, otherwise bogus counts will be
345 * returned on read(). The only way to get out
346 * of error state is by explicit re-enabling
347 * of the event
348 */
349 if (event->state > PERF_EVENT_STATE_OFF)
350 event->state = PERF_EVENT_STATE_OFF;
351
295 /* 352 /*
296 * If this was a group event with sibling events then 353 * If this was a group event with sibling events then
297 * upgrade the siblings to singleton events by adding them 354 * upgrade the siblings to singleton events by adding them
@@ -370,7 +427,7 @@ static void __perf_event_remove_from_context(void *info)
370 if (ctx->task && cpuctx->task_ctx != ctx) 427 if (ctx->task && cpuctx->task_ctx != ctx)
371 return; 428 return;
372 429
373 spin_lock(&ctx->lock); 430 raw_spin_lock(&ctx->lock);
374 /* 431 /*
375 * Protect the list operation against NMI by disabling the 432 * Protect the list operation against NMI by disabling the
376 * events on a global level. 433 * events on a global level.
@@ -392,7 +449,7 @@ static void __perf_event_remove_from_context(void *info)
392 } 449 }
393 450
394 perf_enable(); 451 perf_enable();
395 spin_unlock(&ctx->lock); 452 raw_spin_unlock(&ctx->lock);
396} 453}
397 454
398 455
@@ -419,7 +476,7 @@ static void perf_event_remove_from_context(struct perf_event *event)
419 if (!task) { 476 if (!task) {
420 /* 477 /*
421 * Per cpu events are removed via an smp call and 478 * Per cpu events are removed via an smp call and
422 * the removal is always sucessful. 479 * the removal is always successful.
423 */ 480 */
424 smp_call_function_single(event->cpu, 481 smp_call_function_single(event->cpu,
425 __perf_event_remove_from_context, 482 __perf_event_remove_from_context,
@@ -431,12 +488,12 @@ retry:
431 task_oncpu_function_call(task, __perf_event_remove_from_context, 488 task_oncpu_function_call(task, __perf_event_remove_from_context,
432 event); 489 event);
433 490
434 spin_lock_irq(&ctx->lock); 491 raw_spin_lock_irq(&ctx->lock);
435 /* 492 /*
436 * If the context is active we need to retry the smp call. 493 * If the context is active we need to retry the smp call.
437 */ 494 */
438 if (ctx->nr_active && !list_empty(&event->group_entry)) { 495 if (ctx->nr_active && !list_empty(&event->group_entry)) {
439 spin_unlock_irq(&ctx->lock); 496 raw_spin_unlock_irq(&ctx->lock);
440 goto retry; 497 goto retry;
441 } 498 }
442 499
@@ -445,48 +502,9 @@ retry:
445 * can remove the event safely, if the call above did not 502 * can remove the event safely, if the call above did not
446 * succeed. 503 * succeed.
447 */ 504 */
448 if (!list_empty(&event->group_entry)) { 505 if (!list_empty(&event->group_entry))
449 list_del_event(event, ctx); 506 list_del_event(event, ctx);
450 } 507 raw_spin_unlock_irq(&ctx->lock);
451 spin_unlock_irq(&ctx->lock);
452}
453
454static inline u64 perf_clock(void)
455{
456 return cpu_clock(smp_processor_id());
457}
458
459/*
460 * Update the record of the current time in a context.
461 */
462static void update_context_time(struct perf_event_context *ctx)
463{
464 u64 now = perf_clock();
465
466 ctx->time += now - ctx->timestamp;
467 ctx->timestamp = now;
468}
469
470/*
471 * Update the total_time_enabled and total_time_running fields for a event.
472 */
473static void update_event_times(struct perf_event *event)
474{
475 struct perf_event_context *ctx = event->ctx;
476 u64 run_end;
477
478 if (event->state < PERF_EVENT_STATE_INACTIVE ||
479 event->group_leader->state < PERF_EVENT_STATE_INACTIVE)
480 return;
481
482 event->total_time_enabled = ctx->time - event->tstamp_enabled;
483
484 if (event->state == PERF_EVENT_STATE_INACTIVE)
485 run_end = event->tstamp_stopped;
486 else
487 run_end = ctx->time;
488
489 event->total_time_running = run_end - event->tstamp_running;
490} 508}
491 509
492/* 510/*
@@ -517,7 +535,7 @@ static void __perf_event_disable(void *info)
517 if (ctx->task && cpuctx->task_ctx != ctx) 535 if (ctx->task && cpuctx->task_ctx != ctx)
518 return; 536 return;
519 537
520 spin_lock(&ctx->lock); 538 raw_spin_lock(&ctx->lock);
521 539
522 /* 540 /*
523 * If the event is on, turn it off. 541 * If the event is on, turn it off.
@@ -533,7 +551,7 @@ static void __perf_event_disable(void *info)
533 event->state = PERF_EVENT_STATE_OFF; 551 event->state = PERF_EVENT_STATE_OFF;
534 } 552 }
535 553
536 spin_unlock(&ctx->lock); 554 raw_spin_unlock(&ctx->lock);
537} 555}
538 556
539/* 557/*
@@ -549,7 +567,7 @@ static void __perf_event_disable(void *info)
549 * is the current context on this CPU and preemption is disabled, 567 * is the current context on this CPU and preemption is disabled,
550 * hence we can't get into perf_event_task_sched_out for this context. 568 * hence we can't get into perf_event_task_sched_out for this context.
551 */ 569 */
552static void perf_event_disable(struct perf_event *event) 570void perf_event_disable(struct perf_event *event)
553{ 571{
554 struct perf_event_context *ctx = event->ctx; 572 struct perf_event_context *ctx = event->ctx;
555 struct task_struct *task = ctx->task; 573 struct task_struct *task = ctx->task;
@@ -566,12 +584,12 @@ static void perf_event_disable(struct perf_event *event)
566 retry: 584 retry:
567 task_oncpu_function_call(task, __perf_event_disable, event); 585 task_oncpu_function_call(task, __perf_event_disable, event);
568 586
569 spin_lock_irq(&ctx->lock); 587 raw_spin_lock_irq(&ctx->lock);
570 /* 588 /*
571 * If the event is still active, we need to retry the cross-call. 589 * If the event is still active, we need to retry the cross-call.
572 */ 590 */
573 if (event->state == PERF_EVENT_STATE_ACTIVE) { 591 if (event->state == PERF_EVENT_STATE_ACTIVE) {
574 spin_unlock_irq(&ctx->lock); 592 raw_spin_unlock_irq(&ctx->lock);
575 goto retry; 593 goto retry;
576 } 594 }
577 595
@@ -584,7 +602,7 @@ static void perf_event_disable(struct perf_event *event)
584 event->state = PERF_EVENT_STATE_OFF; 602 event->state = PERF_EVENT_STATE_OFF;
585 } 603 }
586 604
587 spin_unlock_irq(&ctx->lock); 605 raw_spin_unlock_irq(&ctx->lock);
588} 606}
589 607
590static int 608static int
@@ -752,7 +770,7 @@ static void __perf_install_in_context(void *info)
752 cpuctx->task_ctx = ctx; 770 cpuctx->task_ctx = ctx;
753 } 771 }
754 772
755 spin_lock(&ctx->lock); 773 raw_spin_lock(&ctx->lock);
756 ctx->is_active = 1; 774 ctx->is_active = 1;
757 update_context_time(ctx); 775 update_context_time(ctx);
758 776
@@ -764,6 +782,9 @@ static void __perf_install_in_context(void *info)
764 782
765 add_event_to_ctx(event, ctx); 783 add_event_to_ctx(event, ctx);
766 784
785 if (event->cpu != -1 && event->cpu != smp_processor_id())
786 goto unlock;
787
767 /* 788 /*
768 * Don't put the event on if it is disabled or if 789 * Don't put the event on if it is disabled or if
769 * it is in a group and the group isn't on. 790 * it is in a group and the group isn't on.
@@ -802,7 +823,7 @@ static void __perf_install_in_context(void *info)
802 unlock: 823 unlock:
803 perf_enable(); 824 perf_enable();
804 825
805 spin_unlock(&ctx->lock); 826 raw_spin_unlock(&ctx->lock);
806} 827}
807 828
808/* 829/*
@@ -827,7 +848,7 @@ perf_install_in_context(struct perf_event_context *ctx,
827 if (!task) { 848 if (!task) {
828 /* 849 /*
829 * Per cpu events are installed via an smp call and 850 * Per cpu events are installed via an smp call and
830 * the install is always sucessful. 851 * the install is always successful.
831 */ 852 */
832 smp_call_function_single(cpu, __perf_install_in_context, 853 smp_call_function_single(cpu, __perf_install_in_context,
833 event, 1); 854 event, 1);
@@ -838,12 +859,12 @@ retry:
838 task_oncpu_function_call(task, __perf_install_in_context, 859 task_oncpu_function_call(task, __perf_install_in_context,
839 event); 860 event);
840 861
841 spin_lock_irq(&ctx->lock); 862 raw_spin_lock_irq(&ctx->lock);
842 /* 863 /*
843 * we need to retry the smp call. 864 * we need to retry the smp call.
844 */ 865 */
845 if (ctx->is_active && list_empty(&event->group_entry)) { 866 if (ctx->is_active && list_empty(&event->group_entry)) {
846 spin_unlock_irq(&ctx->lock); 867 raw_spin_unlock_irq(&ctx->lock);
847 goto retry; 868 goto retry;
848 } 869 }
849 870
@@ -854,7 +875,7 @@ retry:
854 */ 875 */
855 if (list_empty(&event->group_entry)) 876 if (list_empty(&event->group_entry))
856 add_event_to_ctx(event, ctx); 877 add_event_to_ctx(event, ctx);
857 spin_unlock_irq(&ctx->lock); 878 raw_spin_unlock_irq(&ctx->lock);
858} 879}
859 880
860/* 881/*
@@ -899,7 +920,7 @@ static void __perf_event_enable(void *info)
899 cpuctx->task_ctx = ctx; 920 cpuctx->task_ctx = ctx;
900 } 921 }
901 922
902 spin_lock(&ctx->lock); 923 raw_spin_lock(&ctx->lock);
903 ctx->is_active = 1; 924 ctx->is_active = 1;
904 update_context_time(ctx); 925 update_context_time(ctx);
905 926
@@ -907,6 +928,9 @@ static void __perf_event_enable(void *info)
907 goto unlock; 928 goto unlock;
908 __perf_event_mark_enabled(event, ctx); 929 __perf_event_mark_enabled(event, ctx);
909 930
931 if (event->cpu != -1 && event->cpu != smp_processor_id())
932 goto unlock;
933
910 /* 934 /*
911 * If the event is in a group and isn't the group leader, 935 * If the event is in a group and isn't the group leader,
912 * then don't put it on unless the group is on. 936 * then don't put it on unless the group is on.
@@ -941,7 +965,7 @@ static void __perf_event_enable(void *info)
941 } 965 }
942 966
943 unlock: 967 unlock:
944 spin_unlock(&ctx->lock); 968 raw_spin_unlock(&ctx->lock);
945} 969}
946 970
947/* 971/*
@@ -953,7 +977,7 @@ static void __perf_event_enable(void *info)
953 * perf_event_for_each_child or perf_event_for_each as described 977 * perf_event_for_each_child or perf_event_for_each as described
954 * for perf_event_disable. 978 * for perf_event_disable.
955 */ 979 */
956static void perf_event_enable(struct perf_event *event) 980void perf_event_enable(struct perf_event *event)
957{ 981{
958 struct perf_event_context *ctx = event->ctx; 982 struct perf_event_context *ctx = event->ctx;
959 struct task_struct *task = ctx->task; 983 struct task_struct *task = ctx->task;
@@ -967,7 +991,7 @@ static void perf_event_enable(struct perf_event *event)
967 return; 991 return;
968 } 992 }
969 993
970 spin_lock_irq(&ctx->lock); 994 raw_spin_lock_irq(&ctx->lock);
971 if (event->state >= PERF_EVENT_STATE_INACTIVE) 995 if (event->state >= PERF_EVENT_STATE_INACTIVE)
972 goto out; 996 goto out;
973 997
@@ -982,10 +1006,10 @@ static void perf_event_enable(struct perf_event *event)
982 event->state = PERF_EVENT_STATE_OFF; 1006 event->state = PERF_EVENT_STATE_OFF;
983 1007
984 retry: 1008 retry:
985 spin_unlock_irq(&ctx->lock); 1009 raw_spin_unlock_irq(&ctx->lock);
986 task_oncpu_function_call(task, __perf_event_enable, event); 1010 task_oncpu_function_call(task, __perf_event_enable, event);
987 1011
988 spin_lock_irq(&ctx->lock); 1012 raw_spin_lock_irq(&ctx->lock);
989 1013
990 /* 1014 /*
991 * If the context is active and the event is still off, 1015 * If the context is active and the event is still off,
@@ -1002,7 +1026,7 @@ static void perf_event_enable(struct perf_event *event)
1002 __perf_event_mark_enabled(event, ctx); 1026 __perf_event_mark_enabled(event, ctx);
1003 1027
1004 out: 1028 out:
1005 spin_unlock_irq(&ctx->lock); 1029 raw_spin_unlock_irq(&ctx->lock);
1006} 1030}
1007 1031
1008static int perf_event_refresh(struct perf_event *event, int refresh) 1032static int perf_event_refresh(struct perf_event *event, int refresh)
@@ -1024,20 +1048,20 @@ void __perf_event_sched_out(struct perf_event_context *ctx,
1024{ 1048{
1025 struct perf_event *event; 1049 struct perf_event *event;
1026 1050
1027 spin_lock(&ctx->lock); 1051 raw_spin_lock(&ctx->lock);
1028 ctx->is_active = 0; 1052 ctx->is_active = 0;
1029 if (likely(!ctx->nr_events)) 1053 if (likely(!ctx->nr_events))
1030 goto out; 1054 goto out;
1031 update_context_time(ctx); 1055 update_context_time(ctx);
1032 1056
1033 perf_disable(); 1057 perf_disable();
1034 if (ctx->nr_active) 1058 if (ctx->nr_active) {
1035 list_for_each_entry(event, &ctx->group_list, group_entry) 1059 list_for_each_entry(event, &ctx->group_list, group_entry)
1036 group_sched_out(event, cpuctx, ctx); 1060 group_sched_out(event, cpuctx, ctx);
1037 1061 }
1038 perf_enable(); 1062 perf_enable();
1039 out: 1063 out:
1040 spin_unlock(&ctx->lock); 1064 raw_spin_unlock(&ctx->lock);
1041} 1065}
1042 1066
1043/* 1067/*
@@ -1059,8 +1083,6 @@ static int context_equiv(struct perf_event_context *ctx1,
1059 && !ctx1->pin_count && !ctx2->pin_count; 1083 && !ctx1->pin_count && !ctx2->pin_count;
1060} 1084}
1061 1085
1062static void __perf_event_read(void *event);
1063
1064static void __perf_event_sync_stat(struct perf_event *event, 1086static void __perf_event_sync_stat(struct perf_event *event,
1065 struct perf_event *next_event) 1087 struct perf_event *next_event)
1066{ 1088{
@@ -1078,8 +1100,8 @@ static void __perf_event_sync_stat(struct perf_event *event,
1078 */ 1100 */
1079 switch (event->state) { 1101 switch (event->state) {
1080 case PERF_EVENT_STATE_ACTIVE: 1102 case PERF_EVENT_STATE_ACTIVE:
1081 __perf_event_read(event); 1103 event->pmu->read(event);
1082 break; 1104 /* fall-through */
1083 1105
1084 case PERF_EVENT_STATE_INACTIVE: 1106 case PERF_EVENT_STATE_INACTIVE:
1085 update_event_times(event); 1107 update_event_times(event);
@@ -1118,6 +1140,8 @@ static void perf_event_sync_stat(struct perf_event_context *ctx,
1118 if (!ctx->nr_stat) 1140 if (!ctx->nr_stat)
1119 return; 1141 return;
1120 1142
1143 update_context_time(ctx);
1144
1121 event = list_first_entry(&ctx->event_list, 1145 event = list_first_entry(&ctx->event_list,
1122 struct perf_event, event_entry); 1146 struct perf_event, event_entry);
1123 1147
@@ -1161,8 +1185,6 @@ void perf_event_task_sched_out(struct task_struct *task,
1161 if (likely(!ctx || !cpuctx->task_ctx)) 1185 if (likely(!ctx || !cpuctx->task_ctx))
1162 return; 1186 return;
1163 1187
1164 update_context_time(ctx);
1165
1166 rcu_read_lock(); 1188 rcu_read_lock();
1167 parent = rcu_dereference(ctx->parent_ctx); 1189 parent = rcu_dereference(ctx->parent_ctx);
1168 next_ctx = next->perf_event_ctxp; 1190 next_ctx = next->perf_event_ctxp;
@@ -1177,8 +1199,8 @@ void perf_event_task_sched_out(struct task_struct *task,
1177 * order we take the locks because no other cpu could 1199 * order we take the locks because no other cpu could
1178 * be trying to lock both of these tasks. 1200 * be trying to lock both of these tasks.
1179 */ 1201 */
1180 spin_lock(&ctx->lock); 1202 raw_spin_lock(&ctx->lock);
1181 spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING); 1203 raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
1182 if (context_equiv(ctx, next_ctx)) { 1204 if (context_equiv(ctx, next_ctx)) {
1183 /* 1205 /*
1184 * XXX do we need a memory barrier of sorts 1206 * XXX do we need a memory barrier of sorts
@@ -1192,8 +1214,8 @@ void perf_event_task_sched_out(struct task_struct *task,
1192 1214
1193 perf_event_sync_stat(ctx, next_ctx); 1215 perf_event_sync_stat(ctx, next_ctx);
1194 } 1216 }
1195 spin_unlock(&next_ctx->lock); 1217 raw_spin_unlock(&next_ctx->lock);
1196 spin_unlock(&ctx->lock); 1218 raw_spin_unlock(&ctx->lock);
1197 } 1219 }
1198 rcu_read_unlock(); 1220 rcu_read_unlock();
1199 1221
@@ -1235,7 +1257,7 @@ __perf_event_sched_in(struct perf_event_context *ctx,
1235 struct perf_event *event; 1257 struct perf_event *event;
1236 int can_add_hw = 1; 1258 int can_add_hw = 1;
1237 1259
1238 spin_lock(&ctx->lock); 1260 raw_spin_lock(&ctx->lock);
1239 ctx->is_active = 1; 1261 ctx->is_active = 1;
1240 if (likely(!ctx->nr_events)) 1262 if (likely(!ctx->nr_events))
1241 goto out; 1263 goto out;
@@ -1290,7 +1312,7 @@ __perf_event_sched_in(struct perf_event_context *ctx,
1290 } 1312 }
1291 perf_enable(); 1313 perf_enable();
1292 out: 1314 out:
1293 spin_unlock(&ctx->lock); 1315 raw_spin_unlock(&ctx->lock);
1294} 1316}
1295 1317
1296/* 1318/*
@@ -1354,7 +1376,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
1354 struct hw_perf_event *hwc; 1376 struct hw_perf_event *hwc;
1355 u64 interrupts, freq; 1377 u64 interrupts, freq;
1356 1378
1357 spin_lock(&ctx->lock); 1379 raw_spin_lock(&ctx->lock);
1358 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 1380 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
1359 if (event->state != PERF_EVENT_STATE_ACTIVE) 1381 if (event->state != PERF_EVENT_STATE_ACTIVE)
1360 continue; 1382 continue;
@@ -1409,7 +1431,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
1409 perf_enable(); 1431 perf_enable();
1410 } 1432 }
1411 } 1433 }
1412 spin_unlock(&ctx->lock); 1434 raw_spin_unlock(&ctx->lock);
1413} 1435}
1414 1436
1415/* 1437/*
@@ -1422,7 +1444,7 @@ static void rotate_ctx(struct perf_event_context *ctx)
1422 if (!ctx->nr_events) 1444 if (!ctx->nr_events)
1423 return; 1445 return;
1424 1446
1425 spin_lock(&ctx->lock); 1447 raw_spin_lock(&ctx->lock);
1426 /* 1448 /*
1427 * Rotate the first entry last (works just fine for group events too): 1449 * Rotate the first entry last (works just fine for group events too):
1428 */ 1450 */
@@ -1433,7 +1455,7 @@ static void rotate_ctx(struct perf_event_context *ctx)
1433 } 1455 }
1434 perf_enable(); 1456 perf_enable();
1435 1457
1436 spin_unlock(&ctx->lock); 1458 raw_spin_unlock(&ctx->lock);
1437} 1459}
1438 1460
1439void perf_event_task_tick(struct task_struct *curr, int cpu) 1461void perf_event_task_tick(struct task_struct *curr, int cpu)
@@ -1482,7 +1504,7 @@ static void perf_event_enable_on_exec(struct task_struct *task)
1482 1504
1483 __perf_event_task_sched_out(ctx); 1505 __perf_event_task_sched_out(ctx);
1484 1506
1485 spin_lock(&ctx->lock); 1507 raw_spin_lock(&ctx->lock);
1486 1508
1487 list_for_each_entry(event, &ctx->group_list, group_entry) { 1509 list_for_each_entry(event, &ctx->group_list, group_entry) {
1488 if (!event->attr.enable_on_exec) 1510 if (!event->attr.enable_on_exec)
@@ -1500,7 +1522,7 @@ static void perf_event_enable_on_exec(struct task_struct *task)
1500 if (enabled) 1522 if (enabled)
1501 unclone_ctx(ctx); 1523 unclone_ctx(ctx);
1502 1524
1503 spin_unlock(&ctx->lock); 1525 raw_spin_unlock(&ctx->lock);
1504 1526
1505 perf_event_task_sched_in(task, smp_processor_id()); 1527 perf_event_task_sched_in(task, smp_processor_id());
1506 out: 1528 out:
@@ -1515,7 +1537,6 @@ static void __perf_event_read(void *info)
1515 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); 1537 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
1516 struct perf_event *event = info; 1538 struct perf_event *event = info;
1517 struct perf_event_context *ctx = event->ctx; 1539 struct perf_event_context *ctx = event->ctx;
1518 unsigned long flags;
1519 1540
1520 /* 1541 /*
1521 * If this is a task context, we need to check whether it is 1542 * If this is a task context, we need to check whether it is
@@ -1527,12 +1548,12 @@ static void __perf_event_read(void *info)
1527 if (ctx->task && cpuctx->task_ctx != ctx) 1548 if (ctx->task && cpuctx->task_ctx != ctx)
1528 return; 1549 return;
1529 1550
1530 local_irq_save(flags); 1551 raw_spin_lock(&ctx->lock);
1531 if (ctx->is_active) 1552 update_context_time(ctx);
1532 update_context_time(ctx);
1533 event->pmu->read(event);
1534 update_event_times(event); 1553 update_event_times(event);
1535 local_irq_restore(flags); 1554 raw_spin_unlock(&ctx->lock);
1555
1556 event->pmu->read(event);
1536} 1557}
1537 1558
1538static u64 perf_event_read(struct perf_event *event) 1559static u64 perf_event_read(struct perf_event *event)
@@ -1545,7 +1566,13 @@ static u64 perf_event_read(struct perf_event *event)
1545 smp_call_function_single(event->oncpu, 1566 smp_call_function_single(event->oncpu,
1546 __perf_event_read, event, 1); 1567 __perf_event_read, event, 1);
1547 } else if (event->state == PERF_EVENT_STATE_INACTIVE) { 1568 } else if (event->state == PERF_EVENT_STATE_INACTIVE) {
1569 struct perf_event_context *ctx = event->ctx;
1570 unsigned long flags;
1571
1572 raw_spin_lock_irqsave(&ctx->lock, flags);
1573 update_context_time(ctx);
1548 update_event_times(event); 1574 update_event_times(event);
1575 raw_spin_unlock_irqrestore(&ctx->lock, flags);
1549 } 1576 }
1550 1577
1551 return atomic64_read(&event->count); 1578 return atomic64_read(&event->count);
@@ -1558,8 +1585,7 @@ static void
1558__perf_event_init_context(struct perf_event_context *ctx, 1585__perf_event_init_context(struct perf_event_context *ctx,
1559 struct task_struct *task) 1586 struct task_struct *task)
1560{ 1587{
1561 memset(ctx, 0, sizeof(*ctx)); 1588 raw_spin_lock_init(&ctx->lock);
1562 spin_lock_init(&ctx->lock);
1563 mutex_init(&ctx->mutex); 1589 mutex_init(&ctx->mutex);
1564 INIT_LIST_HEAD(&ctx->group_list); 1590 INIT_LIST_HEAD(&ctx->group_list);
1565 INIT_LIST_HEAD(&ctx->event_list); 1591 INIT_LIST_HEAD(&ctx->event_list);
@@ -1575,15 +1601,12 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
1575 unsigned long flags; 1601 unsigned long flags;
1576 int err; 1602 int err;
1577 1603
1578 /* 1604 if (pid == -1 && cpu != -1) {
1579 * If cpu is not a wildcard then this is a percpu event:
1580 */
1581 if (cpu != -1) {
1582 /* Must be root to operate on a CPU event: */ 1605 /* Must be root to operate on a CPU event: */
1583 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) 1606 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
1584 return ERR_PTR(-EACCES); 1607 return ERR_PTR(-EACCES);
1585 1608
1586 if (cpu < 0 || cpu > num_possible_cpus()) 1609 if (cpu < 0 || cpu >= nr_cpumask_bits)
1587 return ERR_PTR(-EINVAL); 1610 return ERR_PTR(-EINVAL);
1588 1611
1589 /* 1612 /*
@@ -1629,11 +1652,11 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
1629 ctx = perf_lock_task_context(task, &flags); 1652 ctx = perf_lock_task_context(task, &flags);
1630 if (ctx) { 1653 if (ctx) {
1631 unclone_ctx(ctx); 1654 unclone_ctx(ctx);
1632 spin_unlock_irqrestore(&ctx->lock, flags); 1655 raw_spin_unlock_irqrestore(&ctx->lock, flags);
1633 } 1656 }
1634 1657
1635 if (!ctx) { 1658 if (!ctx) {
1636 ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL); 1659 ctx = kzalloc(sizeof(struct perf_event_context), GFP_KERNEL);
1637 err = -ENOMEM; 1660 err = -ENOMEM;
1638 if (!ctx) 1661 if (!ctx)
1639 goto errout; 1662 goto errout;
@@ -1658,6 +1681,8 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
1658 return ERR_PTR(err); 1681 return ERR_PTR(err);
1659} 1682}
1660 1683
1684static void perf_event_free_filter(struct perf_event *event);
1685
1661static void free_event_rcu(struct rcu_head *head) 1686static void free_event_rcu(struct rcu_head *head)
1662{ 1687{
1663 struct perf_event *event; 1688 struct perf_event *event;
@@ -1665,6 +1690,7 @@ static void free_event_rcu(struct rcu_head *head)
1665 event = container_of(head, struct perf_event, rcu_head); 1690 event = container_of(head, struct perf_event, rcu_head);
1666 if (event->ns) 1691 if (event->ns)
1667 put_pid_ns(event->ns); 1692 put_pid_ns(event->ns);
1693 perf_event_free_filter(event);
1668 kfree(event); 1694 kfree(event);
1669} 1695}
1670 1696
@@ -1696,16 +1722,10 @@ static void free_event(struct perf_event *event)
1696 call_rcu(&event->rcu_head, free_event_rcu); 1722 call_rcu(&event->rcu_head, free_event_rcu);
1697} 1723}
1698 1724
1699/* 1725int perf_event_release_kernel(struct perf_event *event)
1700 * Called when the last reference to the file is gone.
1701 */
1702static int perf_release(struct inode *inode, struct file *file)
1703{ 1726{
1704 struct perf_event *event = file->private_data;
1705 struct perf_event_context *ctx = event->ctx; 1727 struct perf_event_context *ctx = event->ctx;
1706 1728
1707 file->private_data = NULL;
1708
1709 WARN_ON_ONCE(ctx->parent_ctx); 1729 WARN_ON_ONCE(ctx->parent_ctx);
1710 mutex_lock(&ctx->mutex); 1730 mutex_lock(&ctx->mutex);
1711 perf_event_remove_from_context(event); 1731 perf_event_remove_from_context(event);
@@ -1720,6 +1740,19 @@ static int perf_release(struct inode *inode, struct file *file)
1720 1740
1721 return 0; 1741 return 0;
1722} 1742}
1743EXPORT_SYMBOL_GPL(perf_event_release_kernel);
1744
1745/*
1746 * Called when the last reference to the file is gone.
1747 */
1748static int perf_release(struct inode *inode, struct file *file)
1749{
1750 struct perf_event *event = file->private_data;
1751
1752 file->private_data = NULL;
1753
1754 return perf_event_release_kernel(event);
1755}
1723 1756
1724static int perf_event_read_size(struct perf_event *event) 1757static int perf_event_read_size(struct perf_event *event)
1725{ 1758{
@@ -1746,91 +1779,94 @@ static int perf_event_read_size(struct perf_event *event)
1746 return size; 1779 return size;
1747} 1780}
1748 1781
1749static u64 perf_event_read_value(struct perf_event *event) 1782u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
1750{ 1783{
1751 struct perf_event *child; 1784 struct perf_event *child;
1752 u64 total = 0; 1785 u64 total = 0;
1753 1786
1787 *enabled = 0;
1788 *running = 0;
1789
1790 mutex_lock(&event->child_mutex);
1754 total += perf_event_read(event); 1791 total += perf_event_read(event);
1755 list_for_each_entry(child, &event->child_list, child_list) 1792 *enabled += event->total_time_enabled +
1793 atomic64_read(&event->child_total_time_enabled);
1794 *running += event->total_time_running +
1795 atomic64_read(&event->child_total_time_running);
1796
1797 list_for_each_entry(child, &event->child_list, child_list) {
1756 total += perf_event_read(child); 1798 total += perf_event_read(child);
1799 *enabled += child->total_time_enabled;
1800 *running += child->total_time_running;
1801 }
1802 mutex_unlock(&event->child_mutex);
1757 1803
1758 return total; 1804 return total;
1759} 1805}
1760 1806EXPORT_SYMBOL_GPL(perf_event_read_value);
1761static int perf_event_read_entry(struct perf_event *event,
1762 u64 read_format, char __user *buf)
1763{
1764 int n = 0, count = 0;
1765 u64 values[2];
1766
1767 values[n++] = perf_event_read_value(event);
1768 if (read_format & PERF_FORMAT_ID)
1769 values[n++] = primary_event_id(event);
1770
1771 count = n * sizeof(u64);
1772
1773 if (copy_to_user(buf, values, count))
1774 return -EFAULT;
1775
1776 return count;
1777}
1778 1807
1779static int perf_event_read_group(struct perf_event *event, 1808static int perf_event_read_group(struct perf_event *event,
1780 u64 read_format, char __user *buf) 1809 u64 read_format, char __user *buf)
1781{ 1810{
1782 struct perf_event *leader = event->group_leader, *sub; 1811 struct perf_event *leader = event->group_leader, *sub;
1783 int n = 0, size = 0, err = -EFAULT; 1812 int n = 0, size = 0, ret = -EFAULT;
1784 u64 values[3]; 1813 struct perf_event_context *ctx = leader->ctx;
1814 u64 values[5];
1815 u64 count, enabled, running;
1816
1817 mutex_lock(&ctx->mutex);
1818 count = perf_event_read_value(leader, &enabled, &running);
1785 1819
1786 values[n++] = 1 + leader->nr_siblings; 1820 values[n++] = 1 + leader->nr_siblings;
1787 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { 1821 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
1788 values[n++] = leader->total_time_enabled + 1822 values[n++] = enabled;
1789 atomic64_read(&leader->child_total_time_enabled); 1823 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
1790 } 1824 values[n++] = running;
1791 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { 1825 values[n++] = count;
1792 values[n++] = leader->total_time_running + 1826 if (read_format & PERF_FORMAT_ID)
1793 atomic64_read(&leader->child_total_time_running); 1827 values[n++] = primary_event_id(leader);
1794 }
1795 1828
1796 size = n * sizeof(u64); 1829 size = n * sizeof(u64);
1797 1830
1798 if (copy_to_user(buf, values, size)) 1831 if (copy_to_user(buf, values, size))
1799 return -EFAULT; 1832 goto unlock;
1800
1801 err = perf_event_read_entry(leader, read_format, buf + size);
1802 if (err < 0)
1803 return err;
1804 1833
1805 size += err; 1834 ret = size;
1806 1835
1807 list_for_each_entry(sub, &leader->sibling_list, group_entry) { 1836 list_for_each_entry(sub, &leader->sibling_list, group_entry) {
1808 err = perf_event_read_entry(sub, read_format, 1837 n = 0;
1809 buf + size); 1838
1810 if (err < 0) 1839 values[n++] = perf_event_read_value(sub, &enabled, &running);
1811 return err; 1840 if (read_format & PERF_FORMAT_ID)
1841 values[n++] = primary_event_id(sub);
1842
1843 size = n * sizeof(u64);
1844
1845 if (copy_to_user(buf + ret, values, size)) {
1846 ret = -EFAULT;
1847 goto unlock;
1848 }
1812 1849
1813 size += err; 1850 ret += size;
1814 } 1851 }
1852unlock:
1853 mutex_unlock(&ctx->mutex);
1815 1854
1816 return size; 1855 return ret;
1817} 1856}
1818 1857
1819static int perf_event_read_one(struct perf_event *event, 1858static int perf_event_read_one(struct perf_event *event,
1820 u64 read_format, char __user *buf) 1859 u64 read_format, char __user *buf)
1821{ 1860{
1861 u64 enabled, running;
1822 u64 values[4]; 1862 u64 values[4];
1823 int n = 0; 1863 int n = 0;
1824 1864
1825 values[n++] = perf_event_read_value(event); 1865 values[n++] = perf_event_read_value(event, &enabled, &running);
1826 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { 1866 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
1827 values[n++] = event->total_time_enabled + 1867 values[n++] = enabled;
1828 atomic64_read(&event->child_total_time_enabled); 1868 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
1829 } 1869 values[n++] = running;
1830 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
1831 values[n++] = event->total_time_running +
1832 atomic64_read(&event->child_total_time_running);
1833 }
1834 if (read_format & PERF_FORMAT_ID) 1870 if (read_format & PERF_FORMAT_ID)
1835 values[n++] = primary_event_id(event); 1871 values[n++] = primary_event_id(event);
1836 1872
@@ -1861,12 +1897,10 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count)
1861 return -ENOSPC; 1897 return -ENOSPC;
1862 1898
1863 WARN_ON_ONCE(event->ctx->parent_ctx); 1899 WARN_ON_ONCE(event->ctx->parent_ctx);
1864 mutex_lock(&event->child_mutex);
1865 if (read_format & PERF_FORMAT_GROUP) 1900 if (read_format & PERF_FORMAT_GROUP)
1866 ret = perf_event_read_group(event, read_format, buf); 1901 ret = perf_event_read_group(event, read_format, buf);
1867 else 1902 else
1868 ret = perf_event_read_one(event, read_format, buf); 1903 ret = perf_event_read_one(event, read_format, buf);
1869 mutex_unlock(&event->child_mutex);
1870 1904
1871 return ret; 1905 return ret;
1872} 1906}
@@ -1956,7 +1990,7 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
1956 if (!value) 1990 if (!value)
1957 return -EINVAL; 1991 return -EINVAL;
1958 1992
1959 spin_lock_irq(&ctx->lock); 1993 raw_spin_lock_irq(&ctx->lock);
1960 if (event->attr.freq) { 1994 if (event->attr.freq) {
1961 if (value > sysctl_perf_event_sample_rate) { 1995 if (value > sysctl_perf_event_sample_rate) {
1962 ret = -EINVAL; 1996 ret = -EINVAL;
@@ -1969,12 +2003,13 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
1969 event->hw.sample_period = value; 2003 event->hw.sample_period = value;
1970 } 2004 }
1971unlock: 2005unlock:
1972 spin_unlock_irq(&ctx->lock); 2006 raw_spin_unlock_irq(&ctx->lock);
1973 2007
1974 return ret; 2008 return ret;
1975} 2009}
1976 2010
1977int perf_event_set_output(struct perf_event *event, int output_fd); 2011static int perf_event_set_output(struct perf_event *event, int output_fd);
2012static int perf_event_set_filter(struct perf_event *event, void __user *arg);
1978 2013
1979static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 2014static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1980{ 2015{
@@ -2002,6 +2037,9 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2002 case PERF_EVENT_IOC_SET_OUTPUT: 2037 case PERF_EVENT_IOC_SET_OUTPUT:
2003 return perf_event_set_output(event, arg); 2038 return perf_event_set_output(event, arg);
2004 2039
2040 case PERF_EVENT_IOC_SET_FILTER:
2041 return perf_event_set_filter(event, (void __user *)arg);
2042
2005 default: 2043 default:
2006 return -ENOTTY; 2044 return -ENOTTY;
2007 } 2045 }
@@ -2174,6 +2212,7 @@ static void perf_mmap_data_free(struct perf_mmap_data *data)
2174 perf_mmap_free_page((unsigned long)data->user_page); 2212 perf_mmap_free_page((unsigned long)data->user_page);
2175 for (i = 0; i < data->nr_pages; i++) 2213 for (i = 0; i < data->nr_pages; i++)
2176 perf_mmap_free_page((unsigned long)data->data_pages[i]); 2214 perf_mmap_free_page((unsigned long)data->data_pages[i]);
2215 kfree(data);
2177} 2216}
2178 2217
2179#else 2218#else
@@ -2214,6 +2253,7 @@ static void perf_mmap_data_free_work(struct work_struct *work)
2214 perf_mmap_unmark_page(base + (i * PAGE_SIZE)); 2253 perf_mmap_unmark_page(base + (i * PAGE_SIZE));
2215 2254
2216 vfree(base); 2255 vfree(base);
2256 kfree(data);
2217} 2257}
2218 2258
2219static void perf_mmap_data_free(struct perf_mmap_data *data) 2259static void perf_mmap_data_free(struct perf_mmap_data *data)
@@ -2307,7 +2347,7 @@ perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data)
2307 } 2347 }
2308 2348
2309 if (!data->watermark) 2349 if (!data->watermark)
2310 data->watermark = max_t(long, PAGE_SIZE, max_size / 2); 2350 data->watermark = max_size / 2;
2311 2351
2312 2352
2313 rcu_assign_pointer(event->data, data); 2353 rcu_assign_pointer(event->data, data);
@@ -2319,7 +2359,6 @@ static void perf_mmap_data_free_rcu(struct rcu_head *rcu_head)
2319 2359
2320 data = container_of(rcu_head, struct perf_mmap_data, rcu_head); 2360 data = container_of(rcu_head, struct perf_mmap_data, rcu_head);
2321 perf_mmap_data_free(data); 2361 perf_mmap_data_free(data);
2322 kfree(data);
2323} 2362}
2324 2363
2325static void perf_mmap_data_release(struct perf_event *event) 2364static void perf_mmap_data_release(struct perf_event *event)
@@ -2666,20 +2705,21 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
2666static void perf_output_lock(struct perf_output_handle *handle) 2705static void perf_output_lock(struct perf_output_handle *handle)
2667{ 2706{
2668 struct perf_mmap_data *data = handle->data; 2707 struct perf_mmap_data *data = handle->data;
2669 int cpu; 2708 int cur, cpu = get_cpu();
2670 2709
2671 handle->locked = 0; 2710 handle->locked = 0;
2672 2711
2673 local_irq_save(handle->flags); 2712 for (;;) {
2674 cpu = smp_processor_id(); 2713 cur = atomic_cmpxchg(&data->lock, -1, cpu);
2675 2714 if (cur == -1) {
2676 if (in_nmi() && atomic_read(&data->lock) == cpu) 2715 handle->locked = 1;
2677 return; 2716 break;
2717 }
2718 if (cur == cpu)
2719 break;
2678 2720
2679 while (atomic_cmpxchg(&data->lock, -1, cpu) != -1)
2680 cpu_relax(); 2721 cpu_relax();
2681 2722 }
2682 handle->locked = 1;
2683} 2723}
2684 2724
2685static void perf_output_unlock(struct perf_output_handle *handle) 2725static void perf_output_unlock(struct perf_output_handle *handle)
@@ -2725,7 +2765,7 @@ again:
2725 if (atomic_xchg(&data->wakeup, 0)) 2765 if (atomic_xchg(&data->wakeup, 0))
2726 perf_output_wakeup(handle); 2766 perf_output_wakeup(handle);
2727out: 2767out:
2728 local_irq_restore(handle->flags); 2768 put_cpu();
2729} 2769}
2730 2770
2731void perf_output_copy(struct perf_output_handle *handle, 2771void perf_output_copy(struct perf_output_handle *handle,
@@ -3236,15 +3276,10 @@ static void perf_event_task_ctx(struct perf_event_context *ctx,
3236{ 3276{
3237 struct perf_event *event; 3277 struct perf_event *event;
3238 3278
3239 if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
3240 return;
3241
3242 rcu_read_lock();
3243 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 3279 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
3244 if (perf_event_task_match(event)) 3280 if (perf_event_task_match(event))
3245 perf_event_task_output(event, task_event); 3281 perf_event_task_output(event, task_event);
3246 } 3282 }
3247 rcu_read_unlock();
3248} 3283}
3249 3284
3250static void perf_event_task_event(struct perf_task_event *task_event) 3285static void perf_event_task_event(struct perf_task_event *task_event)
@@ -3252,11 +3287,11 @@ static void perf_event_task_event(struct perf_task_event *task_event)
3252 struct perf_cpu_context *cpuctx; 3287 struct perf_cpu_context *cpuctx;
3253 struct perf_event_context *ctx = task_event->task_ctx; 3288 struct perf_event_context *ctx = task_event->task_ctx;
3254 3289
3290 rcu_read_lock();
3255 cpuctx = &get_cpu_var(perf_cpu_context); 3291 cpuctx = &get_cpu_var(perf_cpu_context);
3256 perf_event_task_ctx(&cpuctx->ctx, task_event); 3292 perf_event_task_ctx(&cpuctx->ctx, task_event);
3257 put_cpu_var(perf_cpu_context); 3293 put_cpu_var(perf_cpu_context);
3258 3294
3259 rcu_read_lock();
3260 if (!ctx) 3295 if (!ctx)
3261 ctx = rcu_dereference(task_event->task->perf_event_ctxp); 3296 ctx = rcu_dereference(task_event->task->perf_event_ctxp);
3262 if (ctx) 3297 if (ctx)
@@ -3348,15 +3383,10 @@ static void perf_event_comm_ctx(struct perf_event_context *ctx,
3348{ 3383{
3349 struct perf_event *event; 3384 struct perf_event *event;
3350 3385
3351 if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
3352 return;
3353
3354 rcu_read_lock();
3355 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 3386 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
3356 if (perf_event_comm_match(event)) 3387 if (perf_event_comm_match(event))
3357 perf_event_comm_output(event, comm_event); 3388 perf_event_comm_output(event, comm_event);
3358 } 3389 }
3359 rcu_read_unlock();
3360} 3390}
3361 3391
3362static void perf_event_comm_event(struct perf_comm_event *comm_event) 3392static void perf_event_comm_event(struct perf_comm_event *comm_event)
@@ -3367,7 +3397,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
3367 char comm[TASK_COMM_LEN]; 3397 char comm[TASK_COMM_LEN];
3368 3398
3369 memset(comm, 0, sizeof(comm)); 3399 memset(comm, 0, sizeof(comm));
3370 strncpy(comm, comm_event->task->comm, sizeof(comm)); 3400 strlcpy(comm, comm_event->task->comm, sizeof(comm));
3371 size = ALIGN(strlen(comm)+1, sizeof(u64)); 3401 size = ALIGN(strlen(comm)+1, sizeof(u64));
3372 3402
3373 comm_event->comm = comm; 3403 comm_event->comm = comm;
@@ -3375,11 +3405,11 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
3375 3405
3376 comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; 3406 comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;
3377 3407
3408 rcu_read_lock();
3378 cpuctx = &get_cpu_var(perf_cpu_context); 3409 cpuctx = &get_cpu_var(perf_cpu_context);
3379 perf_event_comm_ctx(&cpuctx->ctx, comm_event); 3410 perf_event_comm_ctx(&cpuctx->ctx, comm_event);
3380 put_cpu_var(perf_cpu_context); 3411 put_cpu_var(perf_cpu_context);
3381 3412
3382 rcu_read_lock();
3383 /* 3413 /*
3384 * doesn't really matter which of the child contexts the 3414 * doesn't really matter which of the child contexts the
3385 * events ends up in. 3415 * events ends up in.
@@ -3472,15 +3502,10 @@ static void perf_event_mmap_ctx(struct perf_event_context *ctx,
3472{ 3502{
3473 struct perf_event *event; 3503 struct perf_event *event;
3474 3504
3475 if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
3476 return;
3477
3478 rcu_read_lock();
3479 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 3505 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
3480 if (perf_event_mmap_match(event, mmap_event)) 3506 if (perf_event_mmap_match(event, mmap_event))
3481 perf_event_mmap_output(event, mmap_event); 3507 perf_event_mmap_output(event, mmap_event);
3482 } 3508 }
3483 rcu_read_unlock();
3484} 3509}
3485 3510
3486static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) 3511static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
@@ -3536,11 +3561,11 @@ got_name:
3536 3561
3537 mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; 3562 mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
3538 3563
3564 rcu_read_lock();
3539 cpuctx = &get_cpu_var(perf_cpu_context); 3565 cpuctx = &get_cpu_var(perf_cpu_context);
3540 perf_event_mmap_ctx(&cpuctx->ctx, mmap_event); 3566 perf_event_mmap_ctx(&cpuctx->ctx, mmap_event);
3541 put_cpu_var(perf_cpu_context); 3567 put_cpu_var(perf_cpu_context);
3542 3568
3543 rcu_read_lock();
3544 /* 3569 /*
3545 * doesn't really matter which of the child contexts the 3570 * doesn't really matter which of the child contexts the
3546 * events ends up in. 3571 * events ends up in.
@@ -3679,7 +3704,11 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
3679 perf_event_disable(event); 3704 perf_event_disable(event);
3680 } 3705 }
3681 3706
3682 perf_event_output(event, nmi, data, regs); 3707 if (event->overflow_handler)
3708 event->overflow_handler(event, nmi, data, regs);
3709 else
3710 perf_event_output(event, nmi, data, regs);
3711
3683 return ret; 3712 return ret;
3684} 3713}
3685 3714
@@ -3724,16 +3753,16 @@ again:
3724 return nr; 3753 return nr;
3725} 3754}
3726 3755
3727static void perf_swevent_overflow(struct perf_event *event, 3756static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
3728 int nmi, struct perf_sample_data *data, 3757 int nmi, struct perf_sample_data *data,
3729 struct pt_regs *regs) 3758 struct pt_regs *regs)
3730{ 3759{
3731 struct hw_perf_event *hwc = &event->hw; 3760 struct hw_perf_event *hwc = &event->hw;
3732 int throttle = 0; 3761 int throttle = 0;
3733 u64 overflow;
3734 3762
3735 data->period = event->hw.last_period; 3763 data->period = event->hw.last_period;
3736 overflow = perf_swevent_set_period(event); 3764 if (!overflow)
3765 overflow = perf_swevent_set_period(event);
3737 3766
3738 if (hwc->interrupts == MAX_INTERRUPTS) 3767 if (hwc->interrupts == MAX_INTERRUPTS)
3739 return; 3768 return;
@@ -3766,14 +3795,19 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,
3766 3795
3767 atomic64_add(nr, &event->count); 3796 atomic64_add(nr, &event->count);
3768 3797
3798 if (!regs)
3799 return;
3800
3769 if (!hwc->sample_period) 3801 if (!hwc->sample_period)
3770 return; 3802 return;
3771 3803
3772 if (!regs) 3804 if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq)
3805 return perf_swevent_overflow(event, 1, nmi, data, regs);
3806
3807 if (atomic64_add_negative(nr, &hwc->period_left))
3773 return; 3808 return;
3774 3809
3775 if (!atomic64_add_negative(nr, &hwc->period_left)) 3810 perf_swevent_overflow(event, 0, nmi, data, regs);
3776 perf_swevent_overflow(event, nmi, data, regs);
3777} 3811}
3778 3812
3779static int perf_swevent_is_counting(struct perf_event *event) 3813static int perf_swevent_is_counting(struct perf_event *event)
@@ -3806,25 +3840,44 @@ static int perf_swevent_is_counting(struct perf_event *event)
3806 return 1; 3840 return 1;
3807} 3841}
3808 3842
3843static int perf_tp_event_match(struct perf_event *event,
3844 struct perf_sample_data *data);
3845
3846static int perf_exclude_event(struct perf_event *event,
3847 struct pt_regs *regs)
3848{
3849 if (regs) {
3850 if (event->attr.exclude_user && user_mode(regs))
3851 return 1;
3852
3853 if (event->attr.exclude_kernel && !user_mode(regs))
3854 return 1;
3855 }
3856
3857 return 0;
3858}
3859
3809static int perf_swevent_match(struct perf_event *event, 3860static int perf_swevent_match(struct perf_event *event,
3810 enum perf_type_id type, 3861 enum perf_type_id type,
3811 u32 event_id, struct pt_regs *regs) 3862 u32 event_id,
3863 struct perf_sample_data *data,
3864 struct pt_regs *regs)
3812{ 3865{
3813 if (!perf_swevent_is_counting(event)) 3866 if (!perf_swevent_is_counting(event))
3814 return 0; 3867 return 0;
3815 3868
3816 if (event->attr.type != type) 3869 if (event->attr.type != type)
3817 return 0; 3870 return 0;
3871
3818 if (event->attr.config != event_id) 3872 if (event->attr.config != event_id)
3819 return 0; 3873 return 0;
3820 3874
3821 if (regs) { 3875 if (perf_exclude_event(event, regs))
3822 if (event->attr.exclude_user && user_mode(regs)) 3876 return 0;
3823 return 0;
3824 3877
3825 if (event->attr.exclude_kernel && !user_mode(regs)) 3878 if (event->attr.type == PERF_TYPE_TRACEPOINT &&
3826 return 0; 3879 !perf_tp_event_match(event, data))
3827 } 3880 return 0;
3828 3881
3829 return 1; 3882 return 1;
3830} 3883}
@@ -3837,49 +3890,59 @@ static void perf_swevent_ctx_event(struct perf_event_context *ctx,
3837{ 3890{
3838 struct perf_event *event; 3891 struct perf_event *event;
3839 3892
3840 if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
3841 return;
3842
3843 rcu_read_lock();
3844 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 3893 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
3845 if (perf_swevent_match(event, type, event_id, regs)) 3894 if (perf_swevent_match(event, type, event_id, data, regs))
3846 perf_swevent_add(event, nr, nmi, data, regs); 3895 perf_swevent_add(event, nr, nmi, data, regs);
3847 } 3896 }
3848 rcu_read_unlock();
3849} 3897}
3850 3898
3851static int *perf_swevent_recursion_context(struct perf_cpu_context *cpuctx) 3899int perf_swevent_get_recursion_context(void)
3852{ 3900{
3901 struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
3902 int rctx;
3903
3853 if (in_nmi()) 3904 if (in_nmi())
3854 return &cpuctx->recursion[3]; 3905 rctx = 3;
3906 else if (in_irq())
3907 rctx = 2;
3908 else if (in_softirq())
3909 rctx = 1;
3910 else
3911 rctx = 0;
3855 3912
3856 if (in_irq()) 3913 if (cpuctx->recursion[rctx]) {
3857 return &cpuctx->recursion[2]; 3914 put_cpu_var(perf_cpu_context);
3915 return -1;
3916 }
3858 3917
3859 if (in_softirq()) 3918 cpuctx->recursion[rctx]++;
3860 return &cpuctx->recursion[1]; 3919 barrier();
3861 3920
3862 return &cpuctx->recursion[0]; 3921 return rctx;
3863} 3922}
3923EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
3924
3925void perf_swevent_put_recursion_context(int rctx)
3926{
3927 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
3928 barrier();
3929 cpuctx->recursion[rctx]--;
3930 put_cpu_var(perf_cpu_context);
3931}
3932EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context);
3864 3933
3865static void do_perf_sw_event(enum perf_type_id type, u32 event_id, 3934static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
3866 u64 nr, int nmi, 3935 u64 nr, int nmi,
3867 struct perf_sample_data *data, 3936 struct perf_sample_data *data,
3868 struct pt_regs *regs) 3937 struct pt_regs *regs)
3869{ 3938{
3870 struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); 3939 struct perf_cpu_context *cpuctx;
3871 int *recursion = perf_swevent_recursion_context(cpuctx);
3872 struct perf_event_context *ctx; 3940 struct perf_event_context *ctx;
3873 3941
3874 if (*recursion) 3942 cpuctx = &__get_cpu_var(perf_cpu_context);
3875 goto out; 3943 rcu_read_lock();
3876
3877 (*recursion)++;
3878 barrier();
3879
3880 perf_swevent_ctx_event(&cpuctx->ctx, type, event_id, 3944 perf_swevent_ctx_event(&cpuctx->ctx, type, event_id,
3881 nr, nmi, data, regs); 3945 nr, nmi, data, regs);
3882 rcu_read_lock();
3883 /* 3946 /*
3884 * doesn't really matter which of the child contexts the 3947 * doesn't really matter which of the child contexts the
3885 * events ends up in. 3948 * events ends up in.
@@ -3888,23 +3951,24 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
3888 if (ctx) 3951 if (ctx)
3889 perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs); 3952 perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs);
3890 rcu_read_unlock(); 3953 rcu_read_unlock();
3891
3892 barrier();
3893 (*recursion)--;
3894
3895out:
3896 put_cpu_var(perf_cpu_context);
3897} 3954}
3898 3955
3899void __perf_sw_event(u32 event_id, u64 nr, int nmi, 3956void __perf_sw_event(u32 event_id, u64 nr, int nmi,
3900 struct pt_regs *regs, u64 addr) 3957 struct pt_regs *regs, u64 addr)
3901{ 3958{
3902 struct perf_sample_data data = { 3959 struct perf_sample_data data;
3903 .addr = addr, 3960 int rctx;
3904 };
3905 3961
3906 do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, 3962 rctx = perf_swevent_get_recursion_context();
3907 &data, regs); 3963 if (rctx < 0)
3964 return;
3965
3966 data.addr = addr;
3967 data.raw = NULL;
3968
3969 do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs);
3970
3971 perf_swevent_put_recursion_context(rctx);
3908} 3972}
3909 3973
3910static void perf_swevent_read(struct perf_event *event) 3974static void perf_swevent_read(struct perf_event *event)
@@ -3949,6 +4013,8 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
3949 event->pmu->read(event); 4013 event->pmu->read(event);
3950 4014
3951 data.addr = 0; 4015 data.addr = 0;
4016 data.raw = NULL;
4017 data.period = event->hw.last_period;
3952 regs = get_irq_regs(); 4018 regs = get_irq_regs();
3953 /* 4019 /*
3954 * In case we exclude kernel IPs or are somehow not in interrupt 4020 * In case we exclude kernel IPs or are somehow not in interrupt
@@ -4017,8 +4083,7 @@ static void cpu_clock_perf_event_update(struct perf_event *event)
4017 u64 now; 4083 u64 now;
4018 4084
4019 now = cpu_clock(cpu); 4085 now = cpu_clock(cpu);
4020 prev = atomic64_read(&event->hw.prev_count); 4086 prev = atomic64_xchg(&event->hw.prev_count, now);
4021 atomic64_set(&event->hw.prev_count, now);
4022 atomic64_add(now - prev, &event->count); 4087 atomic64_add(now - prev, &event->count);
4023} 4088}
4024 4089
@@ -4108,6 +4173,7 @@ static const struct pmu perf_ops_task_clock = {
4108}; 4173};
4109 4174
4110#ifdef CONFIG_EVENT_PROFILE 4175#ifdef CONFIG_EVENT_PROFILE
4176
4111void perf_tp_event(int event_id, u64 addr, u64 count, void *record, 4177void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
4112 int entry_size) 4178 int entry_size)
4113{ 4179{
@@ -4126,13 +4192,21 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
4126 if (!regs) 4192 if (!regs)
4127 regs = task_pt_regs(current); 4193 regs = task_pt_regs(current);
4128 4194
4195 /* Trace events already protected against recursion */
4129 do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, 4196 do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1,
4130 &data, regs); 4197 &data, regs);
4131} 4198}
4132EXPORT_SYMBOL_GPL(perf_tp_event); 4199EXPORT_SYMBOL_GPL(perf_tp_event);
4133 4200
4134extern int ftrace_profile_enable(int); 4201static int perf_tp_event_match(struct perf_event *event,
4135extern void ftrace_profile_disable(int); 4202 struct perf_sample_data *data)
4203{
4204 void *record = data->raw->data;
4205
4206 if (likely(!event->filter) || filter_match_preds(event->filter, record))
4207 return 1;
4208 return 0;
4209}
4136 4210
4137static void tp_perf_event_destroy(struct perf_event *event) 4211static void tp_perf_event_destroy(struct perf_event *event)
4138{ 4212{
@@ -4157,11 +4231,93 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event)
4157 4231
4158 return &perf_ops_generic; 4232 return &perf_ops_generic;
4159} 4233}
4234
4235static int perf_event_set_filter(struct perf_event *event, void __user *arg)
4236{
4237 char *filter_str;
4238 int ret;
4239
4240 if (event->attr.type != PERF_TYPE_TRACEPOINT)
4241 return -EINVAL;
4242
4243 filter_str = strndup_user(arg, PAGE_SIZE);
4244 if (IS_ERR(filter_str))
4245 return PTR_ERR(filter_str);
4246
4247 ret = ftrace_profile_set_filter(event, event->attr.config, filter_str);
4248
4249 kfree(filter_str);
4250 return ret;
4251}
4252
4253static void perf_event_free_filter(struct perf_event *event)
4254{
4255 ftrace_profile_free_filter(event);
4256}
4257
4160#else 4258#else
4259
4260static int perf_tp_event_match(struct perf_event *event,
4261 struct perf_sample_data *data)
4262{
4263 return 1;
4264}
4265
4161static const struct pmu *tp_perf_event_init(struct perf_event *event) 4266static const struct pmu *tp_perf_event_init(struct perf_event *event)
4162{ 4267{
4163 return NULL; 4268 return NULL;
4164} 4269}
4270
4271static int perf_event_set_filter(struct perf_event *event, void __user *arg)
4272{
4273 return -ENOENT;
4274}
4275
4276static void perf_event_free_filter(struct perf_event *event)
4277{
4278}
4279
4280#endif /* CONFIG_EVENT_PROFILE */
4281
4282#ifdef CONFIG_HAVE_HW_BREAKPOINT
4283static void bp_perf_event_destroy(struct perf_event *event)
4284{
4285 release_bp_slot(event);
4286}
4287
4288static const struct pmu *bp_perf_event_init(struct perf_event *bp)
4289{
4290 int err;
4291
4292 err = register_perf_hw_breakpoint(bp);
4293 if (err)
4294 return ERR_PTR(err);
4295
4296 bp->destroy = bp_perf_event_destroy;
4297
4298 return &perf_ops_bp;
4299}
4300
4301void perf_bp_event(struct perf_event *bp, void *data)
4302{
4303 struct perf_sample_data sample;
4304 struct pt_regs *regs = data;
4305
4306 sample.raw = NULL;
4307 sample.addr = bp->attr.bp_addr;
4308
4309 if (!perf_exclude_event(bp, regs))
4310 perf_swevent_add(bp, 1, 1, &sample, regs);
4311}
4312#else
4313static const struct pmu *bp_perf_event_init(struct perf_event *bp)
4314{
4315 return NULL;
4316}
4317
4318void perf_bp_event(struct perf_event *bp, void *regs)
4319{
4320}
4165#endif 4321#endif
4166 4322
4167atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; 4323atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
@@ -4208,6 +4364,8 @@ static const struct pmu *sw_perf_event_init(struct perf_event *event)
4208 case PERF_COUNT_SW_PAGE_FAULTS_MAJ: 4364 case PERF_COUNT_SW_PAGE_FAULTS_MAJ:
4209 case PERF_COUNT_SW_CONTEXT_SWITCHES: 4365 case PERF_COUNT_SW_CONTEXT_SWITCHES:
4210 case PERF_COUNT_SW_CPU_MIGRATIONS: 4366 case PERF_COUNT_SW_CPU_MIGRATIONS:
4367 case PERF_COUNT_SW_ALIGNMENT_FAULTS:
4368 case PERF_COUNT_SW_EMULATION_FAULTS:
4211 if (!event->parent) { 4369 if (!event->parent) {
4212 atomic_inc(&perf_swevent_enabled[event_id]); 4370 atomic_inc(&perf_swevent_enabled[event_id]);
4213 event->destroy = sw_perf_event_destroy; 4371 event->destroy = sw_perf_event_destroy;
@@ -4228,6 +4386,7 @@ perf_event_alloc(struct perf_event_attr *attr,
4228 struct perf_event_context *ctx, 4386 struct perf_event_context *ctx,
4229 struct perf_event *group_leader, 4387 struct perf_event *group_leader,
4230 struct perf_event *parent_event, 4388 struct perf_event *parent_event,
4389 perf_overflow_handler_t overflow_handler,
4231 gfp_t gfpflags) 4390 gfp_t gfpflags)
4232{ 4391{
4233 const struct pmu *pmu; 4392 const struct pmu *pmu;
@@ -4270,6 +4429,11 @@ perf_event_alloc(struct perf_event_attr *attr,
4270 4429
4271 event->state = PERF_EVENT_STATE_INACTIVE; 4430 event->state = PERF_EVENT_STATE_INACTIVE;
4272 4431
4432 if (!overflow_handler && parent_event)
4433 overflow_handler = parent_event->overflow_handler;
4434
4435 event->overflow_handler = overflow_handler;
4436
4273 if (attr->disabled) 4437 if (attr->disabled)
4274 event->state = PERF_EVENT_STATE_OFF; 4438 event->state = PERF_EVENT_STATE_OFF;
4275 4439
@@ -4304,6 +4468,11 @@ perf_event_alloc(struct perf_event_attr *attr,
4304 pmu = tp_perf_event_init(event); 4468 pmu = tp_perf_event_init(event);
4305 break; 4469 break;
4306 4470
4471 case PERF_TYPE_BREAKPOINT:
4472 pmu = bp_perf_event_init(event);
4473 break;
4474
4475
4307 default: 4476 default:
4308 break; 4477 break;
4309 } 4478 }
@@ -4398,7 +4567,7 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
4398 if (attr->type >= PERF_TYPE_MAX) 4567 if (attr->type >= PERF_TYPE_MAX)
4399 return -EINVAL; 4568 return -EINVAL;
4400 4569
4401 if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3) 4570 if (attr->__reserved_1 || attr->__reserved_2)
4402 return -EINVAL; 4571 return -EINVAL;
4403 4572
4404 if (attr->sample_type & ~(PERF_SAMPLE_MAX-1)) 4573 if (attr->sample_type & ~(PERF_SAMPLE_MAX-1))
@@ -4416,7 +4585,7 @@ err_size:
4416 goto out; 4585 goto out;
4417} 4586}
4418 4587
4419int perf_event_set_output(struct perf_event *event, int output_fd) 4588static int perf_event_set_output(struct perf_event *event, int output_fd)
4420{ 4589{
4421 struct perf_event *output_event = NULL; 4590 struct perf_event *output_event = NULL;
4422 struct file *output_file = NULL; 4591 struct file *output_file = NULL;
@@ -4546,7 +4715,7 @@ SYSCALL_DEFINE5(perf_event_open,
4546 } 4715 }
4547 4716
4548 event = perf_event_alloc(&attr, cpu, ctx, group_leader, 4717 event = perf_event_alloc(&attr, cpu, ctx, group_leader,
4549 NULL, GFP_KERNEL); 4718 NULL, NULL, GFP_KERNEL);
4550 err = PTR_ERR(event); 4719 err = PTR_ERR(event);
4551 if (IS_ERR(event)) 4720 if (IS_ERR(event))
4552 goto err_put_context; 4721 goto err_put_context;
@@ -4594,6 +4763,61 @@ err_put_context:
4594 return err; 4763 return err;
4595} 4764}
4596 4765
4766/**
4767 * perf_event_create_kernel_counter
4768 *
4769 * @attr: attributes of the counter to create
4770 * @cpu: cpu in which the counter is bound
4771 * @pid: task to profile
4772 */
4773struct perf_event *
4774perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
4775 pid_t pid,
4776 perf_overflow_handler_t overflow_handler)
4777{
4778 struct perf_event *event;
4779 struct perf_event_context *ctx;
4780 int err;
4781
4782 /*
4783 * Get the target context (task or percpu):
4784 */
4785
4786 ctx = find_get_context(pid, cpu);
4787 if (IS_ERR(ctx)) {
4788 err = PTR_ERR(ctx);
4789 goto err_exit;
4790 }
4791
4792 event = perf_event_alloc(attr, cpu, ctx, NULL,
4793 NULL, overflow_handler, GFP_KERNEL);
4794 if (IS_ERR(event)) {
4795 err = PTR_ERR(event);
4796 goto err_put_context;
4797 }
4798
4799 event->filp = NULL;
4800 WARN_ON_ONCE(ctx->parent_ctx);
4801 mutex_lock(&ctx->mutex);
4802 perf_install_in_context(ctx, event, cpu);
4803 ++ctx->generation;
4804 mutex_unlock(&ctx->mutex);
4805
4806 event->owner = current;
4807 get_task_struct(current);
4808 mutex_lock(&current->perf_event_mutex);
4809 list_add_tail(&event->owner_entry, &current->perf_event_list);
4810 mutex_unlock(&current->perf_event_mutex);
4811
4812 return event;
4813
4814 err_put_context:
4815 put_ctx(ctx);
4816 err_exit:
4817 return ERR_PTR(err);
4818}
4819EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);
4820
4597/* 4821/*
4598 * inherit a event from parent task to child task: 4822 * inherit a event from parent task to child task:
4599 */ 4823 */
@@ -4619,7 +4843,7 @@ inherit_event(struct perf_event *parent_event,
4619 child_event = perf_event_alloc(&parent_event->attr, 4843 child_event = perf_event_alloc(&parent_event->attr,
4620 parent_event->cpu, child_ctx, 4844 parent_event->cpu, child_ctx,
4621 group_leader, parent_event, 4845 group_leader, parent_event,
4622 GFP_KERNEL); 4846 NULL, GFP_KERNEL);
4623 if (IS_ERR(child_event)) 4847 if (IS_ERR(child_event))
4624 return child_event; 4848 return child_event;
4625 get_ctx(child_ctx); 4849 get_ctx(child_ctx);
@@ -4637,6 +4861,8 @@ inherit_event(struct perf_event *parent_event,
4637 if (parent_event->attr.freq) 4861 if (parent_event->attr.freq)
4638 child_event->hw.sample_period = parent_event->hw.sample_period; 4862 child_event->hw.sample_period = parent_event->hw.sample_period;
4639 4863
4864 child_event->overflow_handler = parent_event->overflow_handler;
4865
4640 /* 4866 /*
4641 * Link it up in the child's context: 4867 * Link it up in the child's context:
4642 */ 4868 */
@@ -4726,7 +4952,6 @@ __perf_event_exit_task(struct perf_event *child_event,
4726{ 4952{
4727 struct perf_event *parent_event; 4953 struct perf_event *parent_event;
4728 4954
4729 update_event_times(child_event);
4730 perf_event_remove_from_context(child_event); 4955 perf_event_remove_from_context(child_event);
4731 4956
4732 parent_event = child_event->parent; 4957 parent_event = child_event->parent;
@@ -4770,7 +4995,7 @@ void perf_event_exit_task(struct task_struct *child)
4770 * reading child->perf_event_ctxp, we wait until it has 4995 * reading child->perf_event_ctxp, we wait until it has
4771 * incremented the context's refcount before we do put_ctx below. 4996 * incremented the context's refcount before we do put_ctx below.
4772 */ 4997 */
4773 spin_lock(&child_ctx->lock); 4998 raw_spin_lock(&child_ctx->lock);
4774 child->perf_event_ctxp = NULL; 4999 child->perf_event_ctxp = NULL;
4775 /* 5000 /*
4776 * If this context is a clone; unclone it so it can't get 5001 * If this context is a clone; unclone it so it can't get
@@ -4778,7 +5003,8 @@ void perf_event_exit_task(struct task_struct *child)
4778 * the events from it. 5003 * the events from it.
4779 */ 5004 */
4780 unclone_ctx(child_ctx); 5005 unclone_ctx(child_ctx);
4781 spin_unlock_irqrestore(&child_ctx->lock, flags); 5006 update_context_time(child_ctx);
5007 raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
4782 5008
4783 /* 5009 /*
4784 * Report the task dead after unscheduling the events so that we 5010 * Report the task dead after unscheduling the events so that we
@@ -4861,7 +5087,7 @@ again:
4861 */ 5087 */
4862int perf_event_init_task(struct task_struct *child) 5088int perf_event_init_task(struct task_struct *child)
4863{ 5089{
4864 struct perf_event_context *child_ctx, *parent_ctx; 5090 struct perf_event_context *child_ctx = NULL, *parent_ctx;
4865 struct perf_event_context *cloned_ctx; 5091 struct perf_event_context *cloned_ctx;
4866 struct perf_event *event; 5092 struct perf_event *event;
4867 struct task_struct *parent = current; 5093 struct task_struct *parent = current;
@@ -4877,20 +5103,6 @@ int perf_event_init_task(struct task_struct *child)
4877 return 0; 5103 return 0;
4878 5104
4879 /* 5105 /*
4880 * This is executed from the parent task context, so inherit
4881 * events that have been marked for cloning.
4882 * First allocate and initialize a context for the child.
4883 */
4884
4885 child_ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL);
4886 if (!child_ctx)
4887 return -ENOMEM;
4888
4889 __perf_event_init_context(child_ctx, child);
4890 child->perf_event_ctxp = child_ctx;
4891 get_task_struct(child);
4892
4893 /*
4894 * If the parent's context is a clone, pin it so it won't get 5106 * If the parent's context is a clone, pin it so it won't get
4895 * swapped under us. 5107 * swapped under us.
4896 */ 5108 */
@@ -4920,6 +5132,26 @@ int perf_event_init_task(struct task_struct *child)
4920 continue; 5132 continue;
4921 } 5133 }
4922 5134
5135 if (!child->perf_event_ctxp) {
5136 /*
5137 * This is executed from the parent task context, so
5138 * inherit events that have been marked for cloning.
5139 * First allocate and initialize a context for the
5140 * child.
5141 */
5142
5143 child_ctx = kzalloc(sizeof(struct perf_event_context),
5144 GFP_KERNEL);
5145 if (!child_ctx) {
5146 ret = -ENOMEM;
5147 goto exit;
5148 }
5149
5150 __perf_event_init_context(child_ctx, child);
5151 child->perf_event_ctxp = child_ctx;
5152 get_task_struct(child);
5153 }
5154
4923 ret = inherit_group(event, parent, parent_ctx, 5155 ret = inherit_group(event, parent, parent_ctx,
4924 child, child_ctx); 5156 child, child_ctx);
4925 if (ret) { 5157 if (ret) {
@@ -4948,6 +5180,7 @@ int perf_event_init_task(struct task_struct *child)
4948 get_ctx(child_ctx->parent_ctx); 5180 get_ctx(child_ctx->parent_ctx);
4949 } 5181 }
4950 5182
5183exit:
4951 mutex_unlock(&parent_ctx->mutex); 5184 mutex_unlock(&parent_ctx->mutex);
4952 5185
4953 perf_unpin_context(parent_ctx); 5186 perf_unpin_context(parent_ctx);
@@ -5062,11 +5295,11 @@ perf_set_reserve_percpu(struct sysdev_class *class,
5062 perf_reserved_percpu = val; 5295 perf_reserved_percpu = val;
5063 for_each_online_cpu(cpu) { 5296 for_each_online_cpu(cpu) {
5064 cpuctx = &per_cpu(perf_cpu_context, cpu); 5297 cpuctx = &per_cpu(perf_cpu_context, cpu);
5065 spin_lock_irq(&cpuctx->ctx.lock); 5298 raw_spin_lock_irq(&cpuctx->ctx.lock);
5066 mpt = min(perf_max_events - cpuctx->ctx.nr_events, 5299 mpt = min(perf_max_events - cpuctx->ctx.nr_events,
5067 perf_max_events - perf_reserved_percpu); 5300 perf_max_events - perf_reserved_percpu);
5068 cpuctx->max_pertask = mpt; 5301 cpuctx->max_pertask = mpt;
5069 spin_unlock_irq(&cpuctx->ctx.lock); 5302 raw_spin_unlock_irq(&cpuctx->ctx.lock);
5070 } 5303 }
5071 spin_unlock(&perf_resource_lock); 5304 spin_unlock(&perf_resource_lock);
5072 5305