aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/perf_event.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/perf_event.c')
-rw-r--r--kernel/perf_event.c825
1 files changed, 531 insertions, 294 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 7f29643c8985..1f38270f08c7 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -28,13 +28,15 @@
28#include <linux/anon_inodes.h> 28#include <linux/anon_inodes.h>
29#include <linux/kernel_stat.h> 29#include <linux/kernel_stat.h>
30#include <linux/perf_event.h> 30#include <linux/perf_event.h>
31#include <linux/ftrace_event.h>
32#include <linux/hw_breakpoint.h>
31 33
32#include <asm/irq_regs.h> 34#include <asm/irq_regs.h>
33 35
34/* 36/*
35 * Each CPU has a list of per CPU events: 37 * Each CPU has a list of per CPU events:
36 */ 38 */
37DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context); 39static DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context);
38 40
39int perf_max_events __read_mostly = 1; 41int perf_max_events __read_mostly = 1;
40static int perf_reserved_percpu __read_mostly; 42static int perf_reserved_percpu __read_mostly;
@@ -201,14 +203,14 @@ perf_lock_task_context(struct task_struct *task, unsigned long *flags)
201 * if so. If we locked the right context, then it 203 * if so. If we locked the right context, then it
202 * can't get swapped on us any more. 204 * can't get swapped on us any more.
203 */ 205 */
204 spin_lock_irqsave(&ctx->lock, *flags); 206 raw_spin_lock_irqsave(&ctx->lock, *flags);
205 if (ctx != rcu_dereference(task->perf_event_ctxp)) { 207 if (ctx != rcu_dereference(task->perf_event_ctxp)) {
206 spin_unlock_irqrestore(&ctx->lock, *flags); 208 raw_spin_unlock_irqrestore(&ctx->lock, *flags);
207 goto retry; 209 goto retry;
208 } 210 }
209 211
210 if (!atomic_inc_not_zero(&ctx->refcount)) { 212 if (!atomic_inc_not_zero(&ctx->refcount)) {
211 spin_unlock_irqrestore(&ctx->lock, *flags); 213 raw_spin_unlock_irqrestore(&ctx->lock, *flags);
212 ctx = NULL; 214 ctx = NULL;
213 } 215 }
214 } 216 }
@@ -229,7 +231,7 @@ static struct perf_event_context *perf_pin_task_context(struct task_struct *task
229 ctx = perf_lock_task_context(task, &flags); 231 ctx = perf_lock_task_context(task, &flags);
230 if (ctx) { 232 if (ctx) {
231 ++ctx->pin_count; 233 ++ctx->pin_count;
232 spin_unlock_irqrestore(&ctx->lock, flags); 234 raw_spin_unlock_irqrestore(&ctx->lock, flags);
233 } 235 }
234 return ctx; 236 return ctx;
235} 237}
@@ -238,12 +240,55 @@ static void perf_unpin_context(struct perf_event_context *ctx)
238{ 240{
239 unsigned long flags; 241 unsigned long flags;
240 242
241 spin_lock_irqsave(&ctx->lock, flags); 243 raw_spin_lock_irqsave(&ctx->lock, flags);
242 --ctx->pin_count; 244 --ctx->pin_count;
243 spin_unlock_irqrestore(&ctx->lock, flags); 245 raw_spin_unlock_irqrestore(&ctx->lock, flags);
244 put_ctx(ctx); 246 put_ctx(ctx);
245} 247}
246 248
249static inline u64 perf_clock(void)
250{
251 return cpu_clock(smp_processor_id());
252}
253
254/*
255 * Update the record of the current time in a context.
256 */
257static void update_context_time(struct perf_event_context *ctx)
258{
259 u64 now = perf_clock();
260
261 ctx->time += now - ctx->timestamp;
262 ctx->timestamp = now;
263}
264
265/*
266 * Update the total_time_enabled and total_time_running fields for a event.
267 */
268static void update_event_times(struct perf_event *event)
269{
270 struct perf_event_context *ctx = event->ctx;
271 u64 run_end;
272
273 if (event->state < PERF_EVENT_STATE_INACTIVE ||
274 event->group_leader->state < PERF_EVENT_STATE_INACTIVE)
275 return;
276
277 if (ctx->is_active)
278 run_end = ctx->time;
279 else
280 run_end = event->tstamp_stopped;
281
282 event->total_time_enabled = run_end - event->tstamp_enabled;
283
284 if (event->state == PERF_EVENT_STATE_INACTIVE)
285 run_end = event->tstamp_stopped;
286 else
287 run_end = ctx->time;
288
289 event->total_time_running = run_end - event->tstamp_running;
290}
291
247/* 292/*
248 * Add a event from the lists for its context. 293 * Add a event from the lists for its context.
249 * Must be called with ctx->mutex and ctx->lock held. 294 * Must be called with ctx->mutex and ctx->lock held.
@@ -292,6 +337,18 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
292 if (event->group_leader != event) 337 if (event->group_leader != event)
293 event->group_leader->nr_siblings--; 338 event->group_leader->nr_siblings--;
294 339
340 update_event_times(event);
341
342 /*
343 * If event was in error state, then keep it
344 * that way, otherwise bogus counts will be
345 * returned on read(). The only way to get out
346 * of error state is by explicit re-enabling
347 * of the event
348 */
349 if (event->state > PERF_EVENT_STATE_OFF)
350 event->state = PERF_EVENT_STATE_OFF;
351
295 /* 352 /*
296 * If this was a group event with sibling events then 353 * If this was a group event with sibling events then
297 * upgrade the siblings to singleton events by adding them 354 * upgrade the siblings to singleton events by adding them
@@ -370,7 +427,7 @@ static void __perf_event_remove_from_context(void *info)
370 if (ctx->task && cpuctx->task_ctx != ctx) 427 if (ctx->task && cpuctx->task_ctx != ctx)
371 return; 428 return;
372 429
373 spin_lock(&ctx->lock); 430 raw_spin_lock(&ctx->lock);
374 /* 431 /*
375 * Protect the list operation against NMI by disabling the 432 * Protect the list operation against NMI by disabling the
376 * events on a global level. 433 * events on a global level.
@@ -392,7 +449,7 @@ static void __perf_event_remove_from_context(void *info)
392 } 449 }
393 450
394 perf_enable(); 451 perf_enable();
395 spin_unlock(&ctx->lock); 452 raw_spin_unlock(&ctx->lock);
396} 453}
397 454
398 455
@@ -419,7 +476,7 @@ static void perf_event_remove_from_context(struct perf_event *event)
419 if (!task) { 476 if (!task) {
420 /* 477 /*
421 * Per cpu events are removed via an smp call and 478 * Per cpu events are removed via an smp call and
422 * the removal is always sucessful. 479 * the removal is always successful.
423 */ 480 */
424 smp_call_function_single(event->cpu, 481 smp_call_function_single(event->cpu,
425 __perf_event_remove_from_context, 482 __perf_event_remove_from_context,
@@ -431,12 +488,12 @@ retry:
431 task_oncpu_function_call(task, __perf_event_remove_from_context, 488 task_oncpu_function_call(task, __perf_event_remove_from_context,
432 event); 489 event);
433 490
434 spin_lock_irq(&ctx->lock); 491 raw_spin_lock_irq(&ctx->lock);
435 /* 492 /*
436 * If the context is active we need to retry the smp call. 493 * If the context is active we need to retry the smp call.
437 */ 494 */
438 if (ctx->nr_active && !list_empty(&event->group_entry)) { 495 if (ctx->nr_active && !list_empty(&event->group_entry)) {
439 spin_unlock_irq(&ctx->lock); 496 raw_spin_unlock_irq(&ctx->lock);
440 goto retry; 497 goto retry;
441 } 498 }
442 499
@@ -445,48 +502,9 @@ retry:
445 * can remove the event safely, if the call above did not 502 * can remove the event safely, if the call above did not
446 * succeed. 503 * succeed.
447 */ 504 */
448 if (!list_empty(&event->group_entry)) { 505 if (!list_empty(&event->group_entry))
449 list_del_event(event, ctx); 506 list_del_event(event, ctx);
450 } 507 raw_spin_unlock_irq(&ctx->lock);
451 spin_unlock_irq(&ctx->lock);
452}
453
454static inline u64 perf_clock(void)
455{
456 return cpu_clock(smp_processor_id());
457}
458
459/*
460 * Update the record of the current time in a context.
461 */
462static void update_context_time(struct perf_event_context *ctx)
463{
464 u64 now = perf_clock();
465
466 ctx->time += now - ctx->timestamp;
467 ctx->timestamp = now;
468}
469
470/*
471 * Update the total_time_enabled and total_time_running fields for a event.
472 */
473static void update_event_times(struct perf_event *event)
474{
475 struct perf_event_context *ctx = event->ctx;
476 u64 run_end;
477
478 if (event->state < PERF_EVENT_STATE_INACTIVE ||
479 event->group_leader->state < PERF_EVENT_STATE_INACTIVE)
480 return;
481
482 event->total_time_enabled = ctx->time - event->tstamp_enabled;
483
484 if (event->state == PERF_EVENT_STATE_INACTIVE)
485 run_end = event->tstamp_stopped;
486 else
487 run_end = ctx->time;
488
489 event->total_time_running = run_end - event->tstamp_running;
490} 508}
491 509
492/* 510/*
@@ -517,7 +535,7 @@ static void __perf_event_disable(void *info)
517 if (ctx->task && cpuctx->task_ctx != ctx) 535 if (ctx->task && cpuctx->task_ctx != ctx)
518 return; 536 return;
519 537
520 spin_lock(&ctx->lock); 538 raw_spin_lock(&ctx->lock);
521 539
522 /* 540 /*
523 * If the event is on, turn it off. 541 * If the event is on, turn it off.
@@ -533,7 +551,7 @@ static void __perf_event_disable(void *info)
533 event->state = PERF_EVENT_STATE_OFF; 551 event->state = PERF_EVENT_STATE_OFF;
534 } 552 }
535 553
536 spin_unlock(&ctx->lock); 554 raw_spin_unlock(&ctx->lock);
537} 555}
538 556
539/* 557/*
@@ -549,7 +567,7 @@ static void __perf_event_disable(void *info)
549 * is the current context on this CPU and preemption is disabled, 567 * is the current context on this CPU and preemption is disabled,
550 * hence we can't get into perf_event_task_sched_out for this context. 568 * hence we can't get into perf_event_task_sched_out for this context.
551 */ 569 */
552static void perf_event_disable(struct perf_event *event) 570void perf_event_disable(struct perf_event *event)
553{ 571{
554 struct perf_event_context *ctx = event->ctx; 572 struct perf_event_context *ctx = event->ctx;
555 struct task_struct *task = ctx->task; 573 struct task_struct *task = ctx->task;
@@ -566,12 +584,12 @@ static void perf_event_disable(struct perf_event *event)
566 retry: 584 retry:
567 task_oncpu_function_call(task, __perf_event_disable, event); 585 task_oncpu_function_call(task, __perf_event_disable, event);
568 586
569 spin_lock_irq(&ctx->lock); 587 raw_spin_lock_irq(&ctx->lock);
570 /* 588 /*
571 * If the event is still active, we need to retry the cross-call. 589 * If the event is still active, we need to retry the cross-call.
572 */ 590 */
573 if (event->state == PERF_EVENT_STATE_ACTIVE) { 591 if (event->state == PERF_EVENT_STATE_ACTIVE) {
574 spin_unlock_irq(&ctx->lock); 592 raw_spin_unlock_irq(&ctx->lock);
575 goto retry; 593 goto retry;
576 } 594 }
577 595
@@ -584,7 +602,7 @@ static void perf_event_disable(struct perf_event *event)
584 event->state = PERF_EVENT_STATE_OFF; 602 event->state = PERF_EVENT_STATE_OFF;
585 } 603 }
586 604
587 spin_unlock_irq(&ctx->lock); 605 raw_spin_unlock_irq(&ctx->lock);
588} 606}
589 607
590static int 608static int
@@ -752,7 +770,7 @@ static void __perf_install_in_context(void *info)
752 cpuctx->task_ctx = ctx; 770 cpuctx->task_ctx = ctx;
753 } 771 }
754 772
755 spin_lock(&ctx->lock); 773 raw_spin_lock(&ctx->lock);
756 ctx->is_active = 1; 774 ctx->is_active = 1;
757 update_context_time(ctx); 775 update_context_time(ctx);
758 776
@@ -764,6 +782,9 @@ static void __perf_install_in_context(void *info)
764 782
765 add_event_to_ctx(event, ctx); 783 add_event_to_ctx(event, ctx);
766 784
785 if (event->cpu != -1 && event->cpu != smp_processor_id())
786 goto unlock;
787
767 /* 788 /*
768 * Don't put the event on if it is disabled or if 789 * Don't put the event on if it is disabled or if
769 * it is in a group and the group isn't on. 790 * it is in a group and the group isn't on.
@@ -802,7 +823,7 @@ static void __perf_install_in_context(void *info)
802 unlock: 823 unlock:
803 perf_enable(); 824 perf_enable();
804 825
805 spin_unlock(&ctx->lock); 826 raw_spin_unlock(&ctx->lock);
806} 827}
807 828
808/* 829/*
@@ -827,7 +848,7 @@ perf_install_in_context(struct perf_event_context *ctx,
827 if (!task) { 848 if (!task) {
828 /* 849 /*
829 * Per cpu events are installed via an smp call and 850 * Per cpu events are installed via an smp call and
830 * the install is always sucessful. 851 * the install is always successful.
831 */ 852 */
832 smp_call_function_single(cpu, __perf_install_in_context, 853 smp_call_function_single(cpu, __perf_install_in_context,
833 event, 1); 854 event, 1);
@@ -838,12 +859,12 @@ retry:
838 task_oncpu_function_call(task, __perf_install_in_context, 859 task_oncpu_function_call(task, __perf_install_in_context,
839 event); 860 event);
840 861
841 spin_lock_irq(&ctx->lock); 862 raw_spin_lock_irq(&ctx->lock);
842 /* 863 /*
843 * we need to retry the smp call. 864 * we need to retry the smp call.
844 */ 865 */
845 if (ctx->is_active && list_empty(&event->group_entry)) { 866 if (ctx->is_active && list_empty(&event->group_entry)) {
846 spin_unlock_irq(&ctx->lock); 867 raw_spin_unlock_irq(&ctx->lock);
847 goto retry; 868 goto retry;
848 } 869 }
849 870
@@ -854,7 +875,7 @@ retry:
854 */ 875 */
855 if (list_empty(&event->group_entry)) 876 if (list_empty(&event->group_entry))
856 add_event_to_ctx(event, ctx); 877 add_event_to_ctx(event, ctx);
857 spin_unlock_irq(&ctx->lock); 878 raw_spin_unlock_irq(&ctx->lock);
858} 879}
859 880
860/* 881/*
@@ -899,7 +920,7 @@ static void __perf_event_enable(void *info)
899 cpuctx->task_ctx = ctx; 920 cpuctx->task_ctx = ctx;
900 } 921 }
901 922
902 spin_lock(&ctx->lock); 923 raw_spin_lock(&ctx->lock);
903 ctx->is_active = 1; 924 ctx->is_active = 1;
904 update_context_time(ctx); 925 update_context_time(ctx);
905 926
@@ -907,6 +928,9 @@ static void __perf_event_enable(void *info)
907 goto unlock; 928 goto unlock;
908 __perf_event_mark_enabled(event, ctx); 929 __perf_event_mark_enabled(event, ctx);
909 930
931 if (event->cpu != -1 && event->cpu != smp_processor_id())
932 goto unlock;
933
910 /* 934 /*
911 * If the event is in a group and isn't the group leader, 935 * If the event is in a group and isn't the group leader,
912 * then don't put it on unless the group is on. 936 * then don't put it on unless the group is on.
@@ -941,7 +965,7 @@ static void __perf_event_enable(void *info)
941 } 965 }
942 966
943 unlock: 967 unlock:
944 spin_unlock(&ctx->lock); 968 raw_spin_unlock(&ctx->lock);
945} 969}
946 970
947/* 971/*
@@ -953,7 +977,7 @@ static void __perf_event_enable(void *info)
953 * perf_event_for_each_child or perf_event_for_each as described 977 * perf_event_for_each_child or perf_event_for_each as described
954 * for perf_event_disable. 978 * for perf_event_disable.
955 */ 979 */
956static void perf_event_enable(struct perf_event *event) 980void perf_event_enable(struct perf_event *event)
957{ 981{
958 struct perf_event_context *ctx = event->ctx; 982 struct perf_event_context *ctx = event->ctx;
959 struct task_struct *task = ctx->task; 983 struct task_struct *task = ctx->task;
@@ -967,7 +991,7 @@ static void perf_event_enable(struct perf_event *event)
967 return; 991 return;
968 } 992 }
969 993
970 spin_lock_irq(&ctx->lock); 994 raw_spin_lock_irq(&ctx->lock);
971 if (event->state >= PERF_EVENT_STATE_INACTIVE) 995 if (event->state >= PERF_EVENT_STATE_INACTIVE)
972 goto out; 996 goto out;
973 997
@@ -982,10 +1006,10 @@ static void perf_event_enable(struct perf_event *event)
982 event->state = PERF_EVENT_STATE_OFF; 1006 event->state = PERF_EVENT_STATE_OFF;
983 1007
984 retry: 1008 retry:
985 spin_unlock_irq(&ctx->lock); 1009 raw_spin_unlock_irq(&ctx->lock);
986 task_oncpu_function_call(task, __perf_event_enable, event); 1010 task_oncpu_function_call(task, __perf_event_enable, event);
987 1011
988 spin_lock_irq(&ctx->lock); 1012 raw_spin_lock_irq(&ctx->lock);
989 1013
990 /* 1014 /*
991 * If the context is active and the event is still off, 1015 * If the context is active and the event is still off,
@@ -1002,7 +1026,7 @@ static void perf_event_enable(struct perf_event *event)
1002 __perf_event_mark_enabled(event, ctx); 1026 __perf_event_mark_enabled(event, ctx);
1003 1027
1004 out: 1028 out:
1005 spin_unlock_irq(&ctx->lock); 1029 raw_spin_unlock_irq(&ctx->lock);
1006} 1030}
1007 1031
1008static int perf_event_refresh(struct perf_event *event, int refresh) 1032static int perf_event_refresh(struct perf_event *event, int refresh)
@@ -1024,20 +1048,20 @@ void __perf_event_sched_out(struct perf_event_context *ctx,
1024{ 1048{
1025 struct perf_event *event; 1049 struct perf_event *event;
1026 1050
1027 spin_lock(&ctx->lock); 1051 raw_spin_lock(&ctx->lock);
1028 ctx->is_active = 0; 1052 ctx->is_active = 0;
1029 if (likely(!ctx->nr_events)) 1053 if (likely(!ctx->nr_events))
1030 goto out; 1054 goto out;
1031 update_context_time(ctx); 1055 update_context_time(ctx);
1032 1056
1033 perf_disable(); 1057 perf_disable();
1034 if (ctx->nr_active) 1058 if (ctx->nr_active) {
1035 list_for_each_entry(event, &ctx->group_list, group_entry) 1059 list_for_each_entry(event, &ctx->group_list, group_entry)
1036 group_sched_out(event, cpuctx, ctx); 1060 group_sched_out(event, cpuctx, ctx);
1037 1061 }
1038 perf_enable(); 1062 perf_enable();
1039 out: 1063 out:
1040 spin_unlock(&ctx->lock); 1064 raw_spin_unlock(&ctx->lock);
1041} 1065}
1042 1066
1043/* 1067/*
@@ -1059,8 +1083,6 @@ static int context_equiv(struct perf_event_context *ctx1,
1059 && !ctx1->pin_count && !ctx2->pin_count; 1083 && !ctx1->pin_count && !ctx2->pin_count;
1060} 1084}
1061 1085
1062static void __perf_event_read(void *event);
1063
1064static void __perf_event_sync_stat(struct perf_event *event, 1086static void __perf_event_sync_stat(struct perf_event *event,
1065 struct perf_event *next_event) 1087 struct perf_event *next_event)
1066{ 1088{
@@ -1078,8 +1100,8 @@ static void __perf_event_sync_stat(struct perf_event *event,
1078 */ 1100 */
1079 switch (event->state) { 1101 switch (event->state) {
1080 case PERF_EVENT_STATE_ACTIVE: 1102 case PERF_EVENT_STATE_ACTIVE:
1081 __perf_event_read(event); 1103 event->pmu->read(event);
1082 break; 1104 /* fall-through */
1083 1105
1084 case PERF_EVENT_STATE_INACTIVE: 1106 case PERF_EVENT_STATE_INACTIVE:
1085 update_event_times(event); 1107 update_event_times(event);
@@ -1118,6 +1140,8 @@ static void perf_event_sync_stat(struct perf_event_context *ctx,
1118 if (!ctx->nr_stat) 1140 if (!ctx->nr_stat)
1119 return; 1141 return;
1120 1142
1143 update_context_time(ctx);
1144
1121 event = list_first_entry(&ctx->event_list, 1145 event = list_first_entry(&ctx->event_list,
1122 struct perf_event, event_entry); 1146 struct perf_event, event_entry);
1123 1147
@@ -1161,8 +1185,6 @@ void perf_event_task_sched_out(struct task_struct *task,
1161 if (likely(!ctx || !cpuctx->task_ctx)) 1185 if (likely(!ctx || !cpuctx->task_ctx))
1162 return; 1186 return;
1163 1187
1164 update_context_time(ctx);
1165
1166 rcu_read_lock(); 1188 rcu_read_lock();
1167 parent = rcu_dereference(ctx->parent_ctx); 1189 parent = rcu_dereference(ctx->parent_ctx);
1168 next_ctx = next->perf_event_ctxp; 1190 next_ctx = next->perf_event_ctxp;
@@ -1177,8 +1199,8 @@ void perf_event_task_sched_out(struct task_struct *task,
1177 * order we take the locks because no other cpu could 1199 * order we take the locks because no other cpu could
1178 * be trying to lock both of these tasks. 1200 * be trying to lock both of these tasks.
1179 */ 1201 */
1180 spin_lock(&ctx->lock); 1202 raw_spin_lock(&ctx->lock);
1181 spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING); 1203 raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
1182 if (context_equiv(ctx, next_ctx)) { 1204 if (context_equiv(ctx, next_ctx)) {
1183 /* 1205 /*
1184 * XXX do we need a memory barrier of sorts 1206 * XXX do we need a memory barrier of sorts
@@ -1192,8 +1214,8 @@ void perf_event_task_sched_out(struct task_struct *task,
1192 1214
1193 perf_event_sync_stat(ctx, next_ctx); 1215 perf_event_sync_stat(ctx, next_ctx);
1194 } 1216 }
1195 spin_unlock(&next_ctx->lock); 1217 raw_spin_unlock(&next_ctx->lock);
1196 spin_unlock(&ctx->lock); 1218 raw_spin_unlock(&ctx->lock);
1197 } 1219 }
1198 rcu_read_unlock(); 1220 rcu_read_unlock();
1199 1221
@@ -1235,7 +1257,7 @@ __perf_event_sched_in(struct perf_event_context *ctx,
1235 struct perf_event *event; 1257 struct perf_event *event;
1236 int can_add_hw = 1; 1258 int can_add_hw = 1;
1237 1259
1238 spin_lock(&ctx->lock); 1260 raw_spin_lock(&ctx->lock);
1239 ctx->is_active = 1; 1261 ctx->is_active = 1;
1240 if (likely(!ctx->nr_events)) 1262 if (likely(!ctx->nr_events))
1241 goto out; 1263 goto out;
@@ -1290,7 +1312,7 @@ __perf_event_sched_in(struct perf_event_context *ctx,
1290 } 1312 }
1291 perf_enable(); 1313 perf_enable();
1292 out: 1314 out:
1293 spin_unlock(&ctx->lock); 1315 raw_spin_unlock(&ctx->lock);
1294} 1316}
1295 1317
1296/* 1318/*
@@ -1354,11 +1376,14 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
1354 struct hw_perf_event *hwc; 1376 struct hw_perf_event *hwc;
1355 u64 interrupts, freq; 1377 u64 interrupts, freq;
1356 1378
1357 spin_lock(&ctx->lock); 1379 raw_spin_lock(&ctx->lock);
1358 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 1380 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
1359 if (event->state != PERF_EVENT_STATE_ACTIVE) 1381 if (event->state != PERF_EVENT_STATE_ACTIVE)
1360 continue; 1382 continue;
1361 1383
1384 if (event->cpu != -1 && event->cpu != smp_processor_id())
1385 continue;
1386
1362 hwc = &event->hw; 1387 hwc = &event->hw;
1363 1388
1364 interrupts = hwc->interrupts; 1389 interrupts = hwc->interrupts;
@@ -1409,7 +1434,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
1409 perf_enable(); 1434 perf_enable();
1410 } 1435 }
1411 } 1436 }
1412 spin_unlock(&ctx->lock); 1437 raw_spin_unlock(&ctx->lock);
1413} 1438}
1414 1439
1415/* 1440/*
@@ -1422,7 +1447,7 @@ static void rotate_ctx(struct perf_event_context *ctx)
1422 if (!ctx->nr_events) 1447 if (!ctx->nr_events)
1423 return; 1448 return;
1424 1449
1425 spin_lock(&ctx->lock); 1450 raw_spin_lock(&ctx->lock);
1426 /* 1451 /*
1427 * Rotate the first entry last (works just fine for group events too): 1452 * Rotate the first entry last (works just fine for group events too):
1428 */ 1453 */
@@ -1433,7 +1458,7 @@ static void rotate_ctx(struct perf_event_context *ctx)
1433 } 1458 }
1434 perf_enable(); 1459 perf_enable();
1435 1460
1436 spin_unlock(&ctx->lock); 1461 raw_spin_unlock(&ctx->lock);
1437} 1462}
1438 1463
1439void perf_event_task_tick(struct task_struct *curr, int cpu) 1464void perf_event_task_tick(struct task_struct *curr, int cpu)
@@ -1482,7 +1507,7 @@ static void perf_event_enable_on_exec(struct task_struct *task)
1482 1507
1483 __perf_event_task_sched_out(ctx); 1508 __perf_event_task_sched_out(ctx);
1484 1509
1485 spin_lock(&ctx->lock); 1510 raw_spin_lock(&ctx->lock);
1486 1511
1487 list_for_each_entry(event, &ctx->group_list, group_entry) { 1512 list_for_each_entry(event, &ctx->group_list, group_entry) {
1488 if (!event->attr.enable_on_exec) 1513 if (!event->attr.enable_on_exec)
@@ -1500,7 +1525,7 @@ static void perf_event_enable_on_exec(struct task_struct *task)
1500 if (enabled) 1525 if (enabled)
1501 unclone_ctx(ctx); 1526 unclone_ctx(ctx);
1502 1527
1503 spin_unlock(&ctx->lock); 1528 raw_spin_unlock(&ctx->lock);
1504 1529
1505 perf_event_task_sched_in(task, smp_processor_id()); 1530 perf_event_task_sched_in(task, smp_processor_id());
1506 out: 1531 out:
@@ -1515,7 +1540,6 @@ static void __perf_event_read(void *info)
1515 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); 1540 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
1516 struct perf_event *event = info; 1541 struct perf_event *event = info;
1517 struct perf_event_context *ctx = event->ctx; 1542 struct perf_event_context *ctx = event->ctx;
1518 unsigned long flags;
1519 1543
1520 /* 1544 /*
1521 * If this is a task context, we need to check whether it is 1545 * If this is a task context, we need to check whether it is
@@ -1527,12 +1551,12 @@ static void __perf_event_read(void *info)
1527 if (ctx->task && cpuctx->task_ctx != ctx) 1551 if (ctx->task && cpuctx->task_ctx != ctx)
1528 return; 1552 return;
1529 1553
1530 local_irq_save(flags); 1554 raw_spin_lock(&ctx->lock);
1531 if (ctx->is_active) 1555 update_context_time(ctx);
1532 update_context_time(ctx);
1533 event->pmu->read(event);
1534 update_event_times(event); 1556 update_event_times(event);
1535 local_irq_restore(flags); 1557 raw_spin_unlock(&ctx->lock);
1558
1559 event->pmu->read(event);
1536} 1560}
1537 1561
1538static u64 perf_event_read(struct perf_event *event) 1562static u64 perf_event_read(struct perf_event *event)
@@ -1545,7 +1569,13 @@ static u64 perf_event_read(struct perf_event *event)
1545 smp_call_function_single(event->oncpu, 1569 smp_call_function_single(event->oncpu,
1546 __perf_event_read, event, 1); 1570 __perf_event_read, event, 1);
1547 } else if (event->state == PERF_EVENT_STATE_INACTIVE) { 1571 } else if (event->state == PERF_EVENT_STATE_INACTIVE) {
1572 struct perf_event_context *ctx = event->ctx;
1573 unsigned long flags;
1574
1575 raw_spin_lock_irqsave(&ctx->lock, flags);
1576 update_context_time(ctx);
1548 update_event_times(event); 1577 update_event_times(event);
1578 raw_spin_unlock_irqrestore(&ctx->lock, flags);
1549 } 1579 }
1550 1580
1551 return atomic64_read(&event->count); 1581 return atomic64_read(&event->count);
@@ -1558,8 +1588,7 @@ static void
1558__perf_event_init_context(struct perf_event_context *ctx, 1588__perf_event_init_context(struct perf_event_context *ctx,
1559 struct task_struct *task) 1589 struct task_struct *task)
1560{ 1590{
1561 memset(ctx, 0, sizeof(*ctx)); 1591 raw_spin_lock_init(&ctx->lock);
1562 spin_lock_init(&ctx->lock);
1563 mutex_init(&ctx->mutex); 1592 mutex_init(&ctx->mutex);
1564 INIT_LIST_HEAD(&ctx->group_list); 1593 INIT_LIST_HEAD(&ctx->group_list);
1565 INIT_LIST_HEAD(&ctx->event_list); 1594 INIT_LIST_HEAD(&ctx->event_list);
@@ -1575,15 +1604,12 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
1575 unsigned long flags; 1604 unsigned long flags;
1576 int err; 1605 int err;
1577 1606
1578 /* 1607 if (pid == -1 && cpu != -1) {
1579 * If cpu is not a wildcard then this is a percpu event:
1580 */
1581 if (cpu != -1) {
1582 /* Must be root to operate on a CPU event: */ 1608 /* Must be root to operate on a CPU event: */
1583 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) 1609 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
1584 return ERR_PTR(-EACCES); 1610 return ERR_PTR(-EACCES);
1585 1611
1586 if (cpu < 0 || cpu > num_possible_cpus()) 1612 if (cpu < 0 || cpu >= nr_cpumask_bits)
1587 return ERR_PTR(-EINVAL); 1613 return ERR_PTR(-EINVAL);
1588 1614
1589 /* 1615 /*
@@ -1591,7 +1617,7 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
1591 * offline CPU and activate it when the CPU comes up, but 1617 * offline CPU and activate it when the CPU comes up, but
1592 * that's for later. 1618 * that's for later.
1593 */ 1619 */
1594 if (!cpu_isset(cpu, cpu_online_map)) 1620 if (!cpu_online(cpu))
1595 return ERR_PTR(-ENODEV); 1621 return ERR_PTR(-ENODEV);
1596 1622
1597 cpuctx = &per_cpu(perf_cpu_context, cpu); 1623 cpuctx = &per_cpu(perf_cpu_context, cpu);
@@ -1629,11 +1655,11 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
1629 ctx = perf_lock_task_context(task, &flags); 1655 ctx = perf_lock_task_context(task, &flags);
1630 if (ctx) { 1656 if (ctx) {
1631 unclone_ctx(ctx); 1657 unclone_ctx(ctx);
1632 spin_unlock_irqrestore(&ctx->lock, flags); 1658 raw_spin_unlock_irqrestore(&ctx->lock, flags);
1633 } 1659 }
1634 1660
1635 if (!ctx) { 1661 if (!ctx) {
1636 ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL); 1662 ctx = kzalloc(sizeof(struct perf_event_context), GFP_KERNEL);
1637 err = -ENOMEM; 1663 err = -ENOMEM;
1638 if (!ctx) 1664 if (!ctx)
1639 goto errout; 1665 goto errout;
@@ -1658,6 +1684,8 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
1658 return ERR_PTR(err); 1684 return ERR_PTR(err);
1659} 1685}
1660 1686
1687static void perf_event_free_filter(struct perf_event *event);
1688
1661static void free_event_rcu(struct rcu_head *head) 1689static void free_event_rcu(struct rcu_head *head)
1662{ 1690{
1663 struct perf_event *event; 1691 struct perf_event *event;
@@ -1665,6 +1693,7 @@ static void free_event_rcu(struct rcu_head *head)
1665 event = container_of(head, struct perf_event, rcu_head); 1693 event = container_of(head, struct perf_event, rcu_head);
1666 if (event->ns) 1694 if (event->ns)
1667 put_pid_ns(event->ns); 1695 put_pid_ns(event->ns);
1696 perf_event_free_filter(event);
1668 kfree(event); 1697 kfree(event);
1669} 1698}
1670 1699
@@ -1696,16 +1725,10 @@ static void free_event(struct perf_event *event)
1696 call_rcu(&event->rcu_head, free_event_rcu); 1725 call_rcu(&event->rcu_head, free_event_rcu);
1697} 1726}
1698 1727
1699/* 1728int perf_event_release_kernel(struct perf_event *event)
1700 * Called when the last reference to the file is gone.
1701 */
1702static int perf_release(struct inode *inode, struct file *file)
1703{ 1729{
1704 struct perf_event *event = file->private_data;
1705 struct perf_event_context *ctx = event->ctx; 1730 struct perf_event_context *ctx = event->ctx;
1706 1731
1707 file->private_data = NULL;
1708
1709 WARN_ON_ONCE(ctx->parent_ctx); 1732 WARN_ON_ONCE(ctx->parent_ctx);
1710 mutex_lock(&ctx->mutex); 1733 mutex_lock(&ctx->mutex);
1711 perf_event_remove_from_context(event); 1734 perf_event_remove_from_context(event);
@@ -1720,6 +1743,19 @@ static int perf_release(struct inode *inode, struct file *file)
1720 1743
1721 return 0; 1744 return 0;
1722} 1745}
1746EXPORT_SYMBOL_GPL(perf_event_release_kernel);
1747
1748/*
1749 * Called when the last reference to the file is gone.
1750 */
1751static int perf_release(struct inode *inode, struct file *file)
1752{
1753 struct perf_event *event = file->private_data;
1754
1755 file->private_data = NULL;
1756
1757 return perf_event_release_kernel(event);
1758}
1723 1759
1724static int perf_event_read_size(struct perf_event *event) 1760static int perf_event_read_size(struct perf_event *event)
1725{ 1761{
@@ -1746,91 +1782,94 @@ static int perf_event_read_size(struct perf_event *event)
1746 return size; 1782 return size;
1747} 1783}
1748 1784
1749static u64 perf_event_read_value(struct perf_event *event) 1785u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
1750{ 1786{
1751 struct perf_event *child; 1787 struct perf_event *child;
1752 u64 total = 0; 1788 u64 total = 0;
1753 1789
1790 *enabled = 0;
1791 *running = 0;
1792
1793 mutex_lock(&event->child_mutex);
1754 total += perf_event_read(event); 1794 total += perf_event_read(event);
1755 list_for_each_entry(child, &event->child_list, child_list) 1795 *enabled += event->total_time_enabled +
1796 atomic64_read(&event->child_total_time_enabled);
1797 *running += event->total_time_running +
1798 atomic64_read(&event->child_total_time_running);
1799
1800 list_for_each_entry(child, &event->child_list, child_list) {
1756 total += perf_event_read(child); 1801 total += perf_event_read(child);
1802 *enabled += child->total_time_enabled;
1803 *running += child->total_time_running;
1804 }
1805 mutex_unlock(&event->child_mutex);
1757 1806
1758 return total; 1807 return total;
1759} 1808}
1760 1809EXPORT_SYMBOL_GPL(perf_event_read_value);
1761static int perf_event_read_entry(struct perf_event *event,
1762 u64 read_format, char __user *buf)
1763{
1764 int n = 0, count = 0;
1765 u64 values[2];
1766
1767 values[n++] = perf_event_read_value(event);
1768 if (read_format & PERF_FORMAT_ID)
1769 values[n++] = primary_event_id(event);
1770
1771 count = n * sizeof(u64);
1772
1773 if (copy_to_user(buf, values, count))
1774 return -EFAULT;
1775
1776 return count;
1777}
1778 1810
1779static int perf_event_read_group(struct perf_event *event, 1811static int perf_event_read_group(struct perf_event *event,
1780 u64 read_format, char __user *buf) 1812 u64 read_format, char __user *buf)
1781{ 1813{
1782 struct perf_event *leader = event->group_leader, *sub; 1814 struct perf_event *leader = event->group_leader, *sub;
1783 int n = 0, size = 0, err = -EFAULT; 1815 int n = 0, size = 0, ret = -EFAULT;
1784 u64 values[3]; 1816 struct perf_event_context *ctx = leader->ctx;
1817 u64 values[5];
1818 u64 count, enabled, running;
1819
1820 mutex_lock(&ctx->mutex);
1821 count = perf_event_read_value(leader, &enabled, &running);
1785 1822
1786 values[n++] = 1 + leader->nr_siblings; 1823 values[n++] = 1 + leader->nr_siblings;
1787 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { 1824 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
1788 values[n++] = leader->total_time_enabled + 1825 values[n++] = enabled;
1789 atomic64_read(&leader->child_total_time_enabled); 1826 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
1790 } 1827 values[n++] = running;
1791 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { 1828 values[n++] = count;
1792 values[n++] = leader->total_time_running + 1829 if (read_format & PERF_FORMAT_ID)
1793 atomic64_read(&leader->child_total_time_running); 1830 values[n++] = primary_event_id(leader);
1794 }
1795 1831
1796 size = n * sizeof(u64); 1832 size = n * sizeof(u64);
1797 1833
1798 if (copy_to_user(buf, values, size)) 1834 if (copy_to_user(buf, values, size))
1799 return -EFAULT; 1835 goto unlock;
1800
1801 err = perf_event_read_entry(leader, read_format, buf + size);
1802 if (err < 0)
1803 return err;
1804 1836
1805 size += err; 1837 ret = size;
1806 1838
1807 list_for_each_entry(sub, &leader->sibling_list, group_entry) { 1839 list_for_each_entry(sub, &leader->sibling_list, group_entry) {
1808 err = perf_event_read_entry(sub, read_format, 1840 n = 0;
1809 buf + size); 1841
1810 if (err < 0) 1842 values[n++] = perf_event_read_value(sub, &enabled, &running);
1811 return err; 1843 if (read_format & PERF_FORMAT_ID)
1844 values[n++] = primary_event_id(sub);
1845
1846 size = n * sizeof(u64);
1812 1847
1813 size += err; 1848 if (copy_to_user(buf + ret, values, size)) {
1849 ret = -EFAULT;
1850 goto unlock;
1851 }
1852
1853 ret += size;
1814 } 1854 }
1855unlock:
1856 mutex_unlock(&ctx->mutex);
1815 1857
1816 return size; 1858 return ret;
1817} 1859}
1818 1860
1819static int perf_event_read_one(struct perf_event *event, 1861static int perf_event_read_one(struct perf_event *event,
1820 u64 read_format, char __user *buf) 1862 u64 read_format, char __user *buf)
1821{ 1863{
1864 u64 enabled, running;
1822 u64 values[4]; 1865 u64 values[4];
1823 int n = 0; 1866 int n = 0;
1824 1867
1825 values[n++] = perf_event_read_value(event); 1868 values[n++] = perf_event_read_value(event, &enabled, &running);
1826 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { 1869 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
1827 values[n++] = event->total_time_enabled + 1870 values[n++] = enabled;
1828 atomic64_read(&event->child_total_time_enabled); 1871 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
1829 } 1872 values[n++] = running;
1830 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
1831 values[n++] = event->total_time_running +
1832 atomic64_read(&event->child_total_time_running);
1833 }
1834 if (read_format & PERF_FORMAT_ID) 1873 if (read_format & PERF_FORMAT_ID)
1835 values[n++] = primary_event_id(event); 1874 values[n++] = primary_event_id(event);
1836 1875
@@ -1861,12 +1900,10 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count)
1861 return -ENOSPC; 1900 return -ENOSPC;
1862 1901
1863 WARN_ON_ONCE(event->ctx->parent_ctx); 1902 WARN_ON_ONCE(event->ctx->parent_ctx);
1864 mutex_lock(&event->child_mutex);
1865 if (read_format & PERF_FORMAT_GROUP) 1903 if (read_format & PERF_FORMAT_GROUP)
1866 ret = perf_event_read_group(event, read_format, buf); 1904 ret = perf_event_read_group(event, read_format, buf);
1867 else 1905 else
1868 ret = perf_event_read_one(event, read_format, buf); 1906 ret = perf_event_read_one(event, read_format, buf);
1869 mutex_unlock(&event->child_mutex);
1870 1907
1871 return ret; 1908 return ret;
1872} 1909}
@@ -1956,7 +1993,7 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
1956 if (!value) 1993 if (!value)
1957 return -EINVAL; 1994 return -EINVAL;
1958 1995
1959 spin_lock_irq(&ctx->lock); 1996 raw_spin_lock_irq(&ctx->lock);
1960 if (event->attr.freq) { 1997 if (event->attr.freq) {
1961 if (value > sysctl_perf_event_sample_rate) { 1998 if (value > sysctl_perf_event_sample_rate) {
1962 ret = -EINVAL; 1999 ret = -EINVAL;
@@ -1969,12 +2006,13 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
1969 event->hw.sample_period = value; 2006 event->hw.sample_period = value;
1970 } 2007 }
1971unlock: 2008unlock:
1972 spin_unlock_irq(&ctx->lock); 2009 raw_spin_unlock_irq(&ctx->lock);
1973 2010
1974 return ret; 2011 return ret;
1975} 2012}
1976 2013
1977int perf_event_set_output(struct perf_event *event, int output_fd); 2014static int perf_event_set_output(struct perf_event *event, int output_fd);
2015static int perf_event_set_filter(struct perf_event *event, void __user *arg);
1978 2016
1979static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 2017static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1980{ 2018{
@@ -2002,6 +2040,9 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2002 case PERF_EVENT_IOC_SET_OUTPUT: 2040 case PERF_EVENT_IOC_SET_OUTPUT:
2003 return perf_event_set_output(event, arg); 2041 return perf_event_set_output(event, arg);
2004 2042
2043 case PERF_EVENT_IOC_SET_FILTER:
2044 return perf_event_set_filter(event, (void __user *)arg);
2045
2005 default: 2046 default:
2006 return -ENOTTY; 2047 return -ENOTTY;
2007 } 2048 }
@@ -2174,6 +2215,7 @@ static void perf_mmap_data_free(struct perf_mmap_data *data)
2174 perf_mmap_free_page((unsigned long)data->user_page); 2215 perf_mmap_free_page((unsigned long)data->user_page);
2175 for (i = 0; i < data->nr_pages; i++) 2216 for (i = 0; i < data->nr_pages; i++)
2176 perf_mmap_free_page((unsigned long)data->data_pages[i]); 2217 perf_mmap_free_page((unsigned long)data->data_pages[i]);
2218 kfree(data);
2177} 2219}
2178 2220
2179#else 2221#else
@@ -2214,6 +2256,7 @@ static void perf_mmap_data_free_work(struct work_struct *work)
2214 perf_mmap_unmark_page(base + (i * PAGE_SIZE)); 2256 perf_mmap_unmark_page(base + (i * PAGE_SIZE));
2215 2257
2216 vfree(base); 2258 vfree(base);
2259 kfree(data);
2217} 2260}
2218 2261
2219static void perf_mmap_data_free(struct perf_mmap_data *data) 2262static void perf_mmap_data_free(struct perf_mmap_data *data)
@@ -2307,7 +2350,7 @@ perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data)
2307 } 2350 }
2308 2351
2309 if (!data->watermark) 2352 if (!data->watermark)
2310 data->watermark = max_t(long, PAGE_SIZE, max_size / 2); 2353 data->watermark = max_size / 2;
2311 2354
2312 2355
2313 rcu_assign_pointer(event->data, data); 2356 rcu_assign_pointer(event->data, data);
@@ -2319,7 +2362,6 @@ static void perf_mmap_data_free_rcu(struct rcu_head *rcu_head)
2319 2362
2320 data = container_of(rcu_head, struct perf_mmap_data, rcu_head); 2363 data = container_of(rcu_head, struct perf_mmap_data, rcu_head);
2321 perf_mmap_data_free(data); 2364 perf_mmap_data_free(data);
2322 kfree(data);
2323} 2365}
2324 2366
2325static void perf_mmap_data_release(struct perf_event *event) 2367static void perf_mmap_data_release(struct perf_event *event)
@@ -2666,20 +2708,21 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
2666static void perf_output_lock(struct perf_output_handle *handle) 2708static void perf_output_lock(struct perf_output_handle *handle)
2667{ 2709{
2668 struct perf_mmap_data *data = handle->data; 2710 struct perf_mmap_data *data = handle->data;
2669 int cpu; 2711 int cur, cpu = get_cpu();
2670 2712
2671 handle->locked = 0; 2713 handle->locked = 0;
2672 2714
2673 local_irq_save(handle->flags); 2715 for (;;) {
2674 cpu = smp_processor_id(); 2716 cur = atomic_cmpxchg(&data->lock, -1, cpu);
2675 2717 if (cur == -1) {
2676 if (in_nmi() && atomic_read(&data->lock) == cpu) 2718 handle->locked = 1;
2677 return; 2719 break;
2720 }
2721 if (cur == cpu)
2722 break;
2678 2723
2679 while (atomic_cmpxchg(&data->lock, -1, cpu) != -1)
2680 cpu_relax(); 2724 cpu_relax();
2681 2725 }
2682 handle->locked = 1;
2683} 2726}
2684 2727
2685static void perf_output_unlock(struct perf_output_handle *handle) 2728static void perf_output_unlock(struct perf_output_handle *handle)
@@ -2725,7 +2768,7 @@ again:
2725 if (atomic_xchg(&data->wakeup, 0)) 2768 if (atomic_xchg(&data->wakeup, 0))
2726 perf_output_wakeup(handle); 2769 perf_output_wakeup(handle);
2727out: 2770out:
2728 local_irq_restore(handle->flags); 2771 put_cpu();
2729} 2772}
2730 2773
2731void perf_output_copy(struct perf_output_handle *handle, 2774void perf_output_copy(struct perf_output_handle *handle,
@@ -3225,6 +3268,9 @@ static void perf_event_task_output(struct perf_event *event,
3225 3268
3226static int perf_event_task_match(struct perf_event *event) 3269static int perf_event_task_match(struct perf_event *event)
3227{ 3270{
3271 if (event->cpu != -1 && event->cpu != smp_processor_id())
3272 return 0;
3273
3228 if (event->attr.comm || event->attr.mmap || event->attr.task) 3274 if (event->attr.comm || event->attr.mmap || event->attr.task)
3229 return 1; 3275 return 1;
3230 3276
@@ -3236,15 +3282,10 @@ static void perf_event_task_ctx(struct perf_event_context *ctx,
3236{ 3282{
3237 struct perf_event *event; 3283 struct perf_event *event;
3238 3284
3239 if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
3240 return;
3241
3242 rcu_read_lock();
3243 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 3285 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
3244 if (perf_event_task_match(event)) 3286 if (perf_event_task_match(event))
3245 perf_event_task_output(event, task_event); 3287 perf_event_task_output(event, task_event);
3246 } 3288 }
3247 rcu_read_unlock();
3248} 3289}
3249 3290
3250static void perf_event_task_event(struct perf_task_event *task_event) 3291static void perf_event_task_event(struct perf_task_event *task_event)
@@ -3252,15 +3293,14 @@ static void perf_event_task_event(struct perf_task_event *task_event)
3252 struct perf_cpu_context *cpuctx; 3293 struct perf_cpu_context *cpuctx;
3253 struct perf_event_context *ctx = task_event->task_ctx; 3294 struct perf_event_context *ctx = task_event->task_ctx;
3254 3295
3296 rcu_read_lock();
3255 cpuctx = &get_cpu_var(perf_cpu_context); 3297 cpuctx = &get_cpu_var(perf_cpu_context);
3256 perf_event_task_ctx(&cpuctx->ctx, task_event); 3298 perf_event_task_ctx(&cpuctx->ctx, task_event);
3257 put_cpu_var(perf_cpu_context);
3258
3259 rcu_read_lock();
3260 if (!ctx) 3299 if (!ctx)
3261 ctx = rcu_dereference(task_event->task->perf_event_ctxp); 3300 ctx = rcu_dereference(task_event->task->perf_event_ctxp);
3262 if (ctx) 3301 if (ctx)
3263 perf_event_task_ctx(ctx, task_event); 3302 perf_event_task_ctx(ctx, task_event);
3303 put_cpu_var(perf_cpu_context);
3264 rcu_read_unlock(); 3304 rcu_read_unlock();
3265} 3305}
3266 3306
@@ -3337,6 +3377,9 @@ static void perf_event_comm_output(struct perf_event *event,
3337 3377
3338static int perf_event_comm_match(struct perf_event *event) 3378static int perf_event_comm_match(struct perf_event *event)
3339{ 3379{
3380 if (event->cpu != -1 && event->cpu != smp_processor_id())
3381 return 0;
3382
3340 if (event->attr.comm) 3383 if (event->attr.comm)
3341 return 1; 3384 return 1;
3342 3385
@@ -3348,15 +3391,10 @@ static void perf_event_comm_ctx(struct perf_event_context *ctx,
3348{ 3391{
3349 struct perf_event *event; 3392 struct perf_event *event;
3350 3393
3351 if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
3352 return;
3353
3354 rcu_read_lock();
3355 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 3394 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
3356 if (perf_event_comm_match(event)) 3395 if (perf_event_comm_match(event))
3357 perf_event_comm_output(event, comm_event); 3396 perf_event_comm_output(event, comm_event);
3358 } 3397 }
3359 rcu_read_unlock();
3360} 3398}
3361 3399
3362static void perf_event_comm_event(struct perf_comm_event *comm_event) 3400static void perf_event_comm_event(struct perf_comm_event *comm_event)
@@ -3367,7 +3405,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
3367 char comm[TASK_COMM_LEN]; 3405 char comm[TASK_COMM_LEN];
3368 3406
3369 memset(comm, 0, sizeof(comm)); 3407 memset(comm, 0, sizeof(comm));
3370 strncpy(comm, comm_event->task->comm, sizeof(comm)); 3408 strlcpy(comm, comm_event->task->comm, sizeof(comm));
3371 size = ALIGN(strlen(comm)+1, sizeof(u64)); 3409 size = ALIGN(strlen(comm)+1, sizeof(u64));
3372 3410
3373 comm_event->comm = comm; 3411 comm_event->comm = comm;
@@ -3375,18 +3413,13 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
3375 3413
3376 comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; 3414 comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;
3377 3415
3416 rcu_read_lock();
3378 cpuctx = &get_cpu_var(perf_cpu_context); 3417 cpuctx = &get_cpu_var(perf_cpu_context);
3379 perf_event_comm_ctx(&cpuctx->ctx, comm_event); 3418 perf_event_comm_ctx(&cpuctx->ctx, comm_event);
3380 put_cpu_var(perf_cpu_context);
3381
3382 rcu_read_lock();
3383 /*
3384 * doesn't really matter which of the child contexts the
3385 * events ends up in.
3386 */
3387 ctx = rcu_dereference(current->perf_event_ctxp); 3419 ctx = rcu_dereference(current->perf_event_ctxp);
3388 if (ctx) 3420 if (ctx)
3389 perf_event_comm_ctx(ctx, comm_event); 3421 perf_event_comm_ctx(ctx, comm_event);
3422 put_cpu_var(perf_cpu_context);
3390 rcu_read_unlock(); 3423 rcu_read_unlock();
3391} 3424}
3392 3425
@@ -3461,6 +3494,9 @@ static void perf_event_mmap_output(struct perf_event *event,
3461static int perf_event_mmap_match(struct perf_event *event, 3494static int perf_event_mmap_match(struct perf_event *event,
3462 struct perf_mmap_event *mmap_event) 3495 struct perf_mmap_event *mmap_event)
3463{ 3496{
3497 if (event->cpu != -1 && event->cpu != smp_processor_id())
3498 return 0;
3499
3464 if (event->attr.mmap) 3500 if (event->attr.mmap)
3465 return 1; 3501 return 1;
3466 3502
@@ -3472,15 +3508,10 @@ static void perf_event_mmap_ctx(struct perf_event_context *ctx,
3472{ 3508{
3473 struct perf_event *event; 3509 struct perf_event *event;
3474 3510
3475 if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
3476 return;
3477
3478 rcu_read_lock();
3479 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 3511 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
3480 if (perf_event_mmap_match(event, mmap_event)) 3512 if (perf_event_mmap_match(event, mmap_event))
3481 perf_event_mmap_output(event, mmap_event); 3513 perf_event_mmap_output(event, mmap_event);
3482 } 3514 }
3483 rcu_read_unlock();
3484} 3515}
3485 3516
3486static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) 3517static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
@@ -3536,18 +3567,13 @@ got_name:
3536 3567
3537 mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; 3568 mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
3538 3569
3570 rcu_read_lock();
3539 cpuctx = &get_cpu_var(perf_cpu_context); 3571 cpuctx = &get_cpu_var(perf_cpu_context);
3540 perf_event_mmap_ctx(&cpuctx->ctx, mmap_event); 3572 perf_event_mmap_ctx(&cpuctx->ctx, mmap_event);
3541 put_cpu_var(perf_cpu_context);
3542
3543 rcu_read_lock();
3544 /*
3545 * doesn't really matter which of the child contexts the
3546 * events ends up in.
3547 */
3548 ctx = rcu_dereference(current->perf_event_ctxp); 3573 ctx = rcu_dereference(current->perf_event_ctxp);
3549 if (ctx) 3574 if (ctx)
3550 perf_event_mmap_ctx(ctx, mmap_event); 3575 perf_event_mmap_ctx(ctx, mmap_event);
3576 put_cpu_var(perf_cpu_context);
3551 rcu_read_unlock(); 3577 rcu_read_unlock();
3552 3578
3553 kfree(buf); 3579 kfree(buf);
@@ -3679,7 +3705,11 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
3679 perf_event_disable(event); 3705 perf_event_disable(event);
3680 } 3706 }
3681 3707
3682 perf_event_output(event, nmi, data, regs); 3708 if (event->overflow_handler)
3709 event->overflow_handler(event, nmi, data, regs);
3710 else
3711 perf_event_output(event, nmi, data, regs);
3712
3683 return ret; 3713 return ret;
3684} 3714}
3685 3715
@@ -3724,16 +3754,16 @@ again:
3724 return nr; 3754 return nr;
3725} 3755}
3726 3756
3727static void perf_swevent_overflow(struct perf_event *event, 3757static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
3728 int nmi, struct perf_sample_data *data, 3758 int nmi, struct perf_sample_data *data,
3729 struct pt_regs *regs) 3759 struct pt_regs *regs)
3730{ 3760{
3731 struct hw_perf_event *hwc = &event->hw; 3761 struct hw_perf_event *hwc = &event->hw;
3732 int throttle = 0; 3762 int throttle = 0;
3733 u64 overflow;
3734 3763
3735 data->period = event->hw.last_period; 3764 data->period = event->hw.last_period;
3736 overflow = perf_swevent_set_period(event); 3765 if (!overflow)
3766 overflow = perf_swevent_set_period(event);
3737 3767
3738 if (hwc->interrupts == MAX_INTERRUPTS) 3768 if (hwc->interrupts == MAX_INTERRUPTS)
3739 return; 3769 return;
@@ -3766,14 +3796,19 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,
3766 3796
3767 atomic64_add(nr, &event->count); 3797 atomic64_add(nr, &event->count);
3768 3798
3799 if (!regs)
3800 return;
3801
3769 if (!hwc->sample_period) 3802 if (!hwc->sample_period)
3770 return; 3803 return;
3771 3804
3772 if (!regs) 3805 if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq)
3806 return perf_swevent_overflow(event, 1, nmi, data, regs);
3807
3808 if (atomic64_add_negative(nr, &hwc->period_left))
3773 return; 3809 return;
3774 3810
3775 if (!atomic64_add_negative(nr, &hwc->period_left)) 3811 perf_swevent_overflow(event, 0, nmi, data, regs);
3776 perf_swevent_overflow(event, nmi, data, regs);
3777} 3812}
3778 3813
3779static int perf_swevent_is_counting(struct perf_event *event) 3814static int perf_swevent_is_counting(struct perf_event *event)
@@ -3806,25 +3841,47 @@ static int perf_swevent_is_counting(struct perf_event *event)
3806 return 1; 3841 return 1;
3807} 3842}
3808 3843
3844static int perf_tp_event_match(struct perf_event *event,
3845 struct perf_sample_data *data);
3846
3847static int perf_exclude_event(struct perf_event *event,
3848 struct pt_regs *regs)
3849{
3850 if (regs) {
3851 if (event->attr.exclude_user && user_mode(regs))
3852 return 1;
3853
3854 if (event->attr.exclude_kernel && !user_mode(regs))
3855 return 1;
3856 }
3857
3858 return 0;
3859}
3860
3809static int perf_swevent_match(struct perf_event *event, 3861static int perf_swevent_match(struct perf_event *event,
3810 enum perf_type_id type, 3862 enum perf_type_id type,
3811 u32 event_id, struct pt_regs *regs) 3863 u32 event_id,
3864 struct perf_sample_data *data,
3865 struct pt_regs *regs)
3812{ 3866{
3867 if (event->cpu != -1 && event->cpu != smp_processor_id())
3868 return 0;
3869
3813 if (!perf_swevent_is_counting(event)) 3870 if (!perf_swevent_is_counting(event))
3814 return 0; 3871 return 0;
3815 3872
3816 if (event->attr.type != type) 3873 if (event->attr.type != type)
3817 return 0; 3874 return 0;
3875
3818 if (event->attr.config != event_id) 3876 if (event->attr.config != event_id)
3819 return 0; 3877 return 0;
3820 3878
3821 if (regs) { 3879 if (perf_exclude_event(event, regs))
3822 if (event->attr.exclude_user && user_mode(regs)) 3880 return 0;
3823 return 0;
3824 3881
3825 if (event->attr.exclude_kernel && !user_mode(regs)) 3882 if (event->attr.type == PERF_TYPE_TRACEPOINT &&
3826 return 0; 3883 !perf_tp_event_match(event, data))
3827 } 3884 return 0;
3828 3885
3829 return 1; 3886 return 1;
3830} 3887}
@@ -3837,49 +3894,59 @@ static void perf_swevent_ctx_event(struct perf_event_context *ctx,
3837{ 3894{
3838 struct perf_event *event; 3895 struct perf_event *event;
3839 3896
3840 if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
3841 return;
3842
3843 rcu_read_lock();
3844 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 3897 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
3845 if (perf_swevent_match(event, type, event_id, regs)) 3898 if (perf_swevent_match(event, type, event_id, data, regs))
3846 perf_swevent_add(event, nr, nmi, data, regs); 3899 perf_swevent_add(event, nr, nmi, data, regs);
3847 } 3900 }
3848 rcu_read_unlock();
3849} 3901}
3850 3902
3851static int *perf_swevent_recursion_context(struct perf_cpu_context *cpuctx) 3903int perf_swevent_get_recursion_context(void)
3852{ 3904{
3905 struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
3906 int rctx;
3907
3853 if (in_nmi()) 3908 if (in_nmi())
3854 return &cpuctx->recursion[3]; 3909 rctx = 3;
3910 else if (in_irq())
3911 rctx = 2;
3912 else if (in_softirq())
3913 rctx = 1;
3914 else
3915 rctx = 0;
3855 3916
3856 if (in_irq()) 3917 if (cpuctx->recursion[rctx]) {
3857 return &cpuctx->recursion[2]; 3918 put_cpu_var(perf_cpu_context);
3919 return -1;
3920 }
3858 3921
3859 if (in_softirq()) 3922 cpuctx->recursion[rctx]++;
3860 return &cpuctx->recursion[1]; 3923 barrier();
3861 3924
3862 return &cpuctx->recursion[0]; 3925 return rctx;
3926}
3927EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
3928
3929void perf_swevent_put_recursion_context(int rctx)
3930{
3931 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
3932 barrier();
3933 cpuctx->recursion[rctx]--;
3934 put_cpu_var(perf_cpu_context);
3863} 3935}
3936EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context);
3864 3937
3865static void do_perf_sw_event(enum perf_type_id type, u32 event_id, 3938static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
3866 u64 nr, int nmi, 3939 u64 nr, int nmi,
3867 struct perf_sample_data *data, 3940 struct perf_sample_data *data,
3868 struct pt_regs *regs) 3941 struct pt_regs *regs)
3869{ 3942{
3870 struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); 3943 struct perf_cpu_context *cpuctx;
3871 int *recursion = perf_swevent_recursion_context(cpuctx);
3872 struct perf_event_context *ctx; 3944 struct perf_event_context *ctx;
3873 3945
3874 if (*recursion) 3946 cpuctx = &__get_cpu_var(perf_cpu_context);
3875 goto out; 3947 rcu_read_lock();
3876
3877 (*recursion)++;
3878 barrier();
3879
3880 perf_swevent_ctx_event(&cpuctx->ctx, type, event_id, 3948 perf_swevent_ctx_event(&cpuctx->ctx, type, event_id,
3881 nr, nmi, data, regs); 3949 nr, nmi, data, regs);
3882 rcu_read_lock();
3883 /* 3950 /*
3884 * doesn't really matter which of the child contexts the 3951 * doesn't really matter which of the child contexts the
3885 * events ends up in. 3952 * events ends up in.
@@ -3888,23 +3955,24 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
3888 if (ctx) 3955 if (ctx)
3889 perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs); 3956 perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs);
3890 rcu_read_unlock(); 3957 rcu_read_unlock();
3891
3892 barrier();
3893 (*recursion)--;
3894
3895out:
3896 put_cpu_var(perf_cpu_context);
3897} 3958}
3898 3959
3899void __perf_sw_event(u32 event_id, u64 nr, int nmi, 3960void __perf_sw_event(u32 event_id, u64 nr, int nmi,
3900 struct pt_regs *regs, u64 addr) 3961 struct pt_regs *regs, u64 addr)
3901{ 3962{
3902 struct perf_sample_data data = { 3963 struct perf_sample_data data;
3903 .addr = addr, 3964 int rctx;
3904 };
3905 3965
3906 do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, 3966 rctx = perf_swevent_get_recursion_context();
3907 &data, regs); 3967 if (rctx < 0)
3968 return;
3969
3970 data.addr = addr;
3971 data.raw = NULL;
3972
3973 do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs);
3974
3975 perf_swevent_put_recursion_context(rctx);
3908} 3976}
3909 3977
3910static void perf_swevent_read(struct perf_event *event) 3978static void perf_swevent_read(struct perf_event *event)
@@ -3949,6 +4017,8 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
3949 event->pmu->read(event); 4017 event->pmu->read(event);
3950 4018
3951 data.addr = 0; 4019 data.addr = 0;
4020 data.raw = NULL;
4021 data.period = event->hw.last_period;
3952 regs = get_irq_regs(); 4022 regs = get_irq_regs();
3953 /* 4023 /*
3954 * In case we exclude kernel IPs or are somehow not in interrupt 4024 * In case we exclude kernel IPs or are somehow not in interrupt
@@ -4017,8 +4087,7 @@ static void cpu_clock_perf_event_update(struct perf_event *event)
4017 u64 now; 4087 u64 now;
4018 4088
4019 now = cpu_clock(cpu); 4089 now = cpu_clock(cpu);
4020 prev = atomic64_read(&event->hw.prev_count); 4090 prev = atomic64_xchg(&event->hw.prev_count, now);
4021 atomic64_set(&event->hw.prev_count, now);
4022 atomic64_add(now - prev, &event->count); 4091 atomic64_add(now - prev, &event->count);
4023} 4092}
4024 4093
@@ -4108,6 +4177,7 @@ static const struct pmu perf_ops_task_clock = {
4108}; 4177};
4109 4178
4110#ifdef CONFIG_EVENT_PROFILE 4179#ifdef CONFIG_EVENT_PROFILE
4180
4111void perf_tp_event(int event_id, u64 addr, u64 count, void *record, 4181void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
4112 int entry_size) 4182 int entry_size)
4113{ 4183{
@@ -4126,13 +4196,21 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
4126 if (!regs) 4196 if (!regs)
4127 regs = task_pt_regs(current); 4197 regs = task_pt_regs(current);
4128 4198
4199 /* Trace events already protected against recursion */
4129 do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, 4200 do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1,
4130 &data, regs); 4201 &data, regs);
4131} 4202}
4132EXPORT_SYMBOL_GPL(perf_tp_event); 4203EXPORT_SYMBOL_GPL(perf_tp_event);
4133 4204
4134extern int ftrace_profile_enable(int); 4205static int perf_tp_event_match(struct perf_event *event,
4135extern void ftrace_profile_disable(int); 4206 struct perf_sample_data *data)
4207{
4208 void *record = data->raw->data;
4209
4210 if (likely(!event->filter) || filter_match_preds(event->filter, record))
4211 return 1;
4212 return 0;
4213}
4136 4214
4137static void tp_perf_event_destroy(struct perf_event *event) 4215static void tp_perf_event_destroy(struct perf_event *event)
4138{ 4216{
@@ -4157,11 +4235,93 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event)
4157 4235
4158 return &perf_ops_generic; 4236 return &perf_ops_generic;
4159} 4237}
4238
4239static int perf_event_set_filter(struct perf_event *event, void __user *arg)
4240{
4241 char *filter_str;
4242 int ret;
4243
4244 if (event->attr.type != PERF_TYPE_TRACEPOINT)
4245 return -EINVAL;
4246
4247 filter_str = strndup_user(arg, PAGE_SIZE);
4248 if (IS_ERR(filter_str))
4249 return PTR_ERR(filter_str);
4250
4251 ret = ftrace_profile_set_filter(event, event->attr.config, filter_str);
4252
4253 kfree(filter_str);
4254 return ret;
4255}
4256
4257static void perf_event_free_filter(struct perf_event *event)
4258{
4259 ftrace_profile_free_filter(event);
4260}
4261
4160#else 4262#else
4263
4264static int perf_tp_event_match(struct perf_event *event,
4265 struct perf_sample_data *data)
4266{
4267 return 1;
4268}
4269
4161static const struct pmu *tp_perf_event_init(struct perf_event *event) 4270static const struct pmu *tp_perf_event_init(struct perf_event *event)
4162{ 4271{
4163 return NULL; 4272 return NULL;
4164} 4273}
4274
4275static int perf_event_set_filter(struct perf_event *event, void __user *arg)
4276{
4277 return -ENOENT;
4278}
4279
4280static void perf_event_free_filter(struct perf_event *event)
4281{
4282}
4283
4284#endif /* CONFIG_EVENT_PROFILE */
4285
4286#ifdef CONFIG_HAVE_HW_BREAKPOINT
4287static void bp_perf_event_destroy(struct perf_event *event)
4288{
4289 release_bp_slot(event);
4290}
4291
4292static const struct pmu *bp_perf_event_init(struct perf_event *bp)
4293{
4294 int err;
4295
4296 err = register_perf_hw_breakpoint(bp);
4297 if (err)
4298 return ERR_PTR(err);
4299
4300 bp->destroy = bp_perf_event_destroy;
4301
4302 return &perf_ops_bp;
4303}
4304
4305void perf_bp_event(struct perf_event *bp, void *data)
4306{
4307 struct perf_sample_data sample;
4308 struct pt_regs *regs = data;
4309
4310 sample.raw = NULL;
4311 sample.addr = bp->attr.bp_addr;
4312
4313 if (!perf_exclude_event(bp, regs))
4314 perf_swevent_add(bp, 1, 1, &sample, regs);
4315}
4316#else
4317static const struct pmu *bp_perf_event_init(struct perf_event *bp)
4318{
4319 return NULL;
4320}
4321
4322void perf_bp_event(struct perf_event *bp, void *regs)
4323{
4324}
4165#endif 4325#endif
4166 4326
4167atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; 4327atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
@@ -4208,6 +4368,8 @@ static const struct pmu *sw_perf_event_init(struct perf_event *event)
4208 case PERF_COUNT_SW_PAGE_FAULTS_MAJ: 4368 case PERF_COUNT_SW_PAGE_FAULTS_MAJ:
4209 case PERF_COUNT_SW_CONTEXT_SWITCHES: 4369 case PERF_COUNT_SW_CONTEXT_SWITCHES:
4210 case PERF_COUNT_SW_CPU_MIGRATIONS: 4370 case PERF_COUNT_SW_CPU_MIGRATIONS:
4371 case PERF_COUNT_SW_ALIGNMENT_FAULTS:
4372 case PERF_COUNT_SW_EMULATION_FAULTS:
4211 if (!event->parent) { 4373 if (!event->parent) {
4212 atomic_inc(&perf_swevent_enabled[event_id]); 4374 atomic_inc(&perf_swevent_enabled[event_id]);
4213 event->destroy = sw_perf_event_destroy; 4375 event->destroy = sw_perf_event_destroy;
@@ -4228,6 +4390,7 @@ perf_event_alloc(struct perf_event_attr *attr,
4228 struct perf_event_context *ctx, 4390 struct perf_event_context *ctx,
4229 struct perf_event *group_leader, 4391 struct perf_event *group_leader,
4230 struct perf_event *parent_event, 4392 struct perf_event *parent_event,
4393 perf_overflow_handler_t overflow_handler,
4231 gfp_t gfpflags) 4394 gfp_t gfpflags)
4232{ 4395{
4233 const struct pmu *pmu; 4396 const struct pmu *pmu;
@@ -4270,6 +4433,11 @@ perf_event_alloc(struct perf_event_attr *attr,
4270 4433
4271 event->state = PERF_EVENT_STATE_INACTIVE; 4434 event->state = PERF_EVENT_STATE_INACTIVE;
4272 4435
4436 if (!overflow_handler && parent_event)
4437 overflow_handler = parent_event->overflow_handler;
4438
4439 event->overflow_handler = overflow_handler;
4440
4273 if (attr->disabled) 4441 if (attr->disabled)
4274 event->state = PERF_EVENT_STATE_OFF; 4442 event->state = PERF_EVENT_STATE_OFF;
4275 4443
@@ -4304,6 +4472,11 @@ perf_event_alloc(struct perf_event_attr *attr,
4304 pmu = tp_perf_event_init(event); 4472 pmu = tp_perf_event_init(event);
4305 break; 4473 break;
4306 4474
4475 case PERF_TYPE_BREAKPOINT:
4476 pmu = bp_perf_event_init(event);
4477 break;
4478
4479
4307 default: 4480 default:
4308 break; 4481 break;
4309 } 4482 }
@@ -4398,7 +4571,7 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
4398 if (attr->type >= PERF_TYPE_MAX) 4571 if (attr->type >= PERF_TYPE_MAX)
4399 return -EINVAL; 4572 return -EINVAL;
4400 4573
4401 if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3) 4574 if (attr->__reserved_1 || attr->__reserved_2)
4402 return -EINVAL; 4575 return -EINVAL;
4403 4576
4404 if (attr->sample_type & ~(PERF_SAMPLE_MAX-1)) 4577 if (attr->sample_type & ~(PERF_SAMPLE_MAX-1))
@@ -4416,7 +4589,7 @@ err_size:
4416 goto out; 4589 goto out;
4417} 4590}
4418 4591
4419int perf_event_set_output(struct perf_event *event, int output_fd) 4592static int perf_event_set_output(struct perf_event *event, int output_fd)
4420{ 4593{
4421 struct perf_event *output_event = NULL; 4594 struct perf_event *output_event = NULL;
4422 struct file *output_file = NULL; 4595 struct file *output_file = NULL;
@@ -4546,12 +4719,12 @@ SYSCALL_DEFINE5(perf_event_open,
4546 } 4719 }
4547 4720
4548 event = perf_event_alloc(&attr, cpu, ctx, group_leader, 4721 event = perf_event_alloc(&attr, cpu, ctx, group_leader,
4549 NULL, GFP_KERNEL); 4722 NULL, NULL, GFP_KERNEL);
4550 err = PTR_ERR(event); 4723 err = PTR_ERR(event);
4551 if (IS_ERR(event)) 4724 if (IS_ERR(event))
4552 goto err_put_context; 4725 goto err_put_context;
4553 4726
4554 err = anon_inode_getfd("[perf_event]", &perf_fops, event, 0); 4727 err = anon_inode_getfd("[perf_event]", &perf_fops, event, O_RDWR);
4555 if (err < 0) 4728 if (err < 0)
4556 goto err_free_put_context; 4729 goto err_free_put_context;
4557 4730
@@ -4594,6 +4767,61 @@ err_put_context:
4594 return err; 4767 return err;
4595} 4768}
4596 4769
4770/**
4771 * perf_event_create_kernel_counter
4772 *
4773 * @attr: attributes of the counter to create
4774 * @cpu: cpu in which the counter is bound
4775 * @pid: task to profile
4776 */
4777struct perf_event *
4778perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
4779 pid_t pid,
4780 perf_overflow_handler_t overflow_handler)
4781{
4782 struct perf_event *event;
4783 struct perf_event_context *ctx;
4784 int err;
4785
4786 /*
4787 * Get the target context (task or percpu):
4788 */
4789
4790 ctx = find_get_context(pid, cpu);
4791 if (IS_ERR(ctx)) {
4792 err = PTR_ERR(ctx);
4793 goto err_exit;
4794 }
4795
4796 event = perf_event_alloc(attr, cpu, ctx, NULL,
4797 NULL, overflow_handler, GFP_KERNEL);
4798 if (IS_ERR(event)) {
4799 err = PTR_ERR(event);
4800 goto err_put_context;
4801 }
4802
4803 event->filp = NULL;
4804 WARN_ON_ONCE(ctx->parent_ctx);
4805 mutex_lock(&ctx->mutex);
4806 perf_install_in_context(ctx, event, cpu);
4807 ++ctx->generation;
4808 mutex_unlock(&ctx->mutex);
4809
4810 event->owner = current;
4811 get_task_struct(current);
4812 mutex_lock(&current->perf_event_mutex);
4813 list_add_tail(&event->owner_entry, &current->perf_event_list);
4814 mutex_unlock(&current->perf_event_mutex);
4815
4816 return event;
4817
4818 err_put_context:
4819 put_ctx(ctx);
4820 err_exit:
4821 return ERR_PTR(err);
4822}
4823EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);
4824
4597/* 4825/*
4598 * inherit a event from parent task to child task: 4826 * inherit a event from parent task to child task:
4599 */ 4827 */
@@ -4619,7 +4847,7 @@ inherit_event(struct perf_event *parent_event,
4619 child_event = perf_event_alloc(&parent_event->attr, 4847 child_event = perf_event_alloc(&parent_event->attr,
4620 parent_event->cpu, child_ctx, 4848 parent_event->cpu, child_ctx,
4621 group_leader, parent_event, 4849 group_leader, parent_event,
4622 GFP_KERNEL); 4850 NULL, GFP_KERNEL);
4623 if (IS_ERR(child_event)) 4851 if (IS_ERR(child_event))
4624 return child_event; 4852 return child_event;
4625 get_ctx(child_ctx); 4853 get_ctx(child_ctx);
@@ -4637,6 +4865,8 @@ inherit_event(struct perf_event *parent_event,
4637 if (parent_event->attr.freq) 4865 if (parent_event->attr.freq)
4638 child_event->hw.sample_period = parent_event->hw.sample_period; 4866 child_event->hw.sample_period = parent_event->hw.sample_period;
4639 4867
4868 child_event->overflow_handler = parent_event->overflow_handler;
4869
4640 /* 4870 /*
4641 * Link it up in the child's context: 4871 * Link it up in the child's context:
4642 */ 4872 */
@@ -4726,7 +4956,6 @@ __perf_event_exit_task(struct perf_event *child_event,
4726{ 4956{
4727 struct perf_event *parent_event; 4957 struct perf_event *parent_event;
4728 4958
4729 update_event_times(child_event);
4730 perf_event_remove_from_context(child_event); 4959 perf_event_remove_from_context(child_event);
4731 4960
4732 parent_event = child_event->parent; 4961 parent_event = child_event->parent;
@@ -4770,7 +4999,7 @@ void perf_event_exit_task(struct task_struct *child)
4770 * reading child->perf_event_ctxp, we wait until it has 4999 * reading child->perf_event_ctxp, we wait until it has
4771 * incremented the context's refcount before we do put_ctx below. 5000 * incremented the context's refcount before we do put_ctx below.
4772 */ 5001 */
4773 spin_lock(&child_ctx->lock); 5002 raw_spin_lock(&child_ctx->lock);
4774 child->perf_event_ctxp = NULL; 5003 child->perf_event_ctxp = NULL;
4775 /* 5004 /*
4776 * If this context is a clone; unclone it so it can't get 5005 * If this context is a clone; unclone it so it can't get
@@ -4778,7 +5007,8 @@ void perf_event_exit_task(struct task_struct *child)
4778 * the events from it. 5007 * the events from it.
4779 */ 5008 */
4780 unclone_ctx(child_ctx); 5009 unclone_ctx(child_ctx);
4781 spin_unlock_irqrestore(&child_ctx->lock, flags); 5010 update_context_time(child_ctx);
5011 raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
4782 5012
4783 /* 5013 /*
4784 * Report the task dead after unscheduling the events so that we 5014 * Report the task dead after unscheduling the events so that we
@@ -4861,7 +5091,7 @@ again:
4861 */ 5091 */
4862int perf_event_init_task(struct task_struct *child) 5092int perf_event_init_task(struct task_struct *child)
4863{ 5093{
4864 struct perf_event_context *child_ctx, *parent_ctx; 5094 struct perf_event_context *child_ctx = NULL, *parent_ctx;
4865 struct perf_event_context *cloned_ctx; 5095 struct perf_event_context *cloned_ctx;
4866 struct perf_event *event; 5096 struct perf_event *event;
4867 struct task_struct *parent = current; 5097 struct task_struct *parent = current;
@@ -4877,20 +5107,6 @@ int perf_event_init_task(struct task_struct *child)
4877 return 0; 5107 return 0;
4878 5108
4879 /* 5109 /*
4880 * This is executed from the parent task context, so inherit
4881 * events that have been marked for cloning.
4882 * First allocate and initialize a context for the child.
4883 */
4884
4885 child_ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL);
4886 if (!child_ctx)
4887 return -ENOMEM;
4888
4889 __perf_event_init_context(child_ctx, child);
4890 child->perf_event_ctxp = child_ctx;
4891 get_task_struct(child);
4892
4893 /*
4894 * If the parent's context is a clone, pin it so it won't get 5110 * If the parent's context is a clone, pin it so it won't get
4895 * swapped under us. 5111 * swapped under us.
4896 */ 5112 */
@@ -4920,6 +5136,26 @@ int perf_event_init_task(struct task_struct *child)
4920 continue; 5136 continue;
4921 } 5137 }
4922 5138
5139 if (!child->perf_event_ctxp) {
5140 /*
5141 * This is executed from the parent task context, so
5142 * inherit events that have been marked for cloning.
5143 * First allocate and initialize a context for the
5144 * child.
5145 */
5146
5147 child_ctx = kzalloc(sizeof(struct perf_event_context),
5148 GFP_KERNEL);
5149 if (!child_ctx) {
5150 ret = -ENOMEM;
5151 goto exit;
5152 }
5153
5154 __perf_event_init_context(child_ctx, child);
5155 child->perf_event_ctxp = child_ctx;
5156 get_task_struct(child);
5157 }
5158
4923 ret = inherit_group(event, parent, parent_ctx, 5159 ret = inherit_group(event, parent, parent_ctx,
4924 child, child_ctx); 5160 child, child_ctx);
4925 if (ret) { 5161 if (ret) {
@@ -4948,6 +5184,7 @@ int perf_event_init_task(struct task_struct *child)
4948 get_ctx(child_ctx->parent_ctx); 5184 get_ctx(child_ctx->parent_ctx);
4949 } 5185 }
4950 5186
5187exit:
4951 mutex_unlock(&parent_ctx->mutex); 5188 mutex_unlock(&parent_ctx->mutex);
4952 5189
4953 perf_unpin_context(parent_ctx); 5190 perf_unpin_context(parent_ctx);
@@ -5062,11 +5299,11 @@ perf_set_reserve_percpu(struct sysdev_class *class,
5062 perf_reserved_percpu = val; 5299 perf_reserved_percpu = val;
5063 for_each_online_cpu(cpu) { 5300 for_each_online_cpu(cpu) {
5064 cpuctx = &per_cpu(perf_cpu_context, cpu); 5301 cpuctx = &per_cpu(perf_cpu_context, cpu);
5065 spin_lock_irq(&cpuctx->ctx.lock); 5302 raw_spin_lock_irq(&cpuctx->ctx.lock);
5066 mpt = min(perf_max_events - cpuctx->ctx.nr_events, 5303 mpt = min(perf_max_events - cpuctx->ctx.nr_events,
5067 perf_max_events - perf_reserved_percpu); 5304 perf_max_events - perf_reserved_percpu);
5068 cpuctx->max_pertask = mpt; 5305 cpuctx->max_pertask = mpt;
5069 spin_unlock_irq(&cpuctx->ctx.lock); 5306 raw_spin_unlock_irq(&cpuctx->ctx.lock);
5070 } 5307 }
5071 spin_unlock(&perf_resource_lock); 5308 spin_unlock(&perf_resource_lock);
5072 5309