aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/perf_event.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/perf_event.c')
-rw-r--r--kernel/perf_event.c835
1 files changed, 540 insertions, 295 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 7f29643c8985..d27746bd3a06 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -28,13 +28,15 @@
28#include <linux/anon_inodes.h> 28#include <linux/anon_inodes.h>
29#include <linux/kernel_stat.h> 29#include <linux/kernel_stat.h>
30#include <linux/perf_event.h> 30#include <linux/perf_event.h>
31#include <linux/ftrace_event.h>
32#include <linux/hw_breakpoint.h>
31 33
32#include <asm/irq_regs.h> 34#include <asm/irq_regs.h>
33 35
34/* 36/*
35 * Each CPU has a list of per CPU events: 37 * Each CPU has a list of per CPU events:
36 */ 38 */
37DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context); 39static DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context);
38 40
39int perf_max_events __read_mostly = 1; 41int perf_max_events __read_mostly = 1;
40static int perf_reserved_percpu __read_mostly; 42static int perf_reserved_percpu __read_mostly;
@@ -201,14 +203,14 @@ perf_lock_task_context(struct task_struct *task, unsigned long *flags)
201 * if so. If we locked the right context, then it 203 * if so. If we locked the right context, then it
202 * can't get swapped on us any more. 204 * can't get swapped on us any more.
203 */ 205 */
204 spin_lock_irqsave(&ctx->lock, *flags); 206 raw_spin_lock_irqsave(&ctx->lock, *flags);
205 if (ctx != rcu_dereference(task->perf_event_ctxp)) { 207 if (ctx != rcu_dereference(task->perf_event_ctxp)) {
206 spin_unlock_irqrestore(&ctx->lock, *flags); 208 raw_spin_unlock_irqrestore(&ctx->lock, *flags);
207 goto retry; 209 goto retry;
208 } 210 }
209 211
210 if (!atomic_inc_not_zero(&ctx->refcount)) { 212 if (!atomic_inc_not_zero(&ctx->refcount)) {
211 spin_unlock_irqrestore(&ctx->lock, *flags); 213 raw_spin_unlock_irqrestore(&ctx->lock, *flags);
212 ctx = NULL; 214 ctx = NULL;
213 } 215 }
214 } 216 }
@@ -229,7 +231,7 @@ static struct perf_event_context *perf_pin_task_context(struct task_struct *task
229 ctx = perf_lock_task_context(task, &flags); 231 ctx = perf_lock_task_context(task, &flags);
230 if (ctx) { 232 if (ctx) {
231 ++ctx->pin_count; 233 ++ctx->pin_count;
232 spin_unlock_irqrestore(&ctx->lock, flags); 234 raw_spin_unlock_irqrestore(&ctx->lock, flags);
233 } 235 }
234 return ctx; 236 return ctx;
235} 237}
@@ -238,12 +240,55 @@ static void perf_unpin_context(struct perf_event_context *ctx)
238{ 240{
239 unsigned long flags; 241 unsigned long flags;
240 242
241 spin_lock_irqsave(&ctx->lock, flags); 243 raw_spin_lock_irqsave(&ctx->lock, flags);
242 --ctx->pin_count; 244 --ctx->pin_count;
243 spin_unlock_irqrestore(&ctx->lock, flags); 245 raw_spin_unlock_irqrestore(&ctx->lock, flags);
244 put_ctx(ctx); 246 put_ctx(ctx);
245} 247}
246 248
249static inline u64 perf_clock(void)
250{
251 return cpu_clock(smp_processor_id());
252}
253
254/*
255 * Update the record of the current time in a context.
256 */
257static void update_context_time(struct perf_event_context *ctx)
258{
259 u64 now = perf_clock();
260
261 ctx->time += now - ctx->timestamp;
262 ctx->timestamp = now;
263}
264
265/*
266 * Update the total_time_enabled and total_time_running fields for a event.
267 */
268static void update_event_times(struct perf_event *event)
269{
270 struct perf_event_context *ctx = event->ctx;
271 u64 run_end;
272
273 if (event->state < PERF_EVENT_STATE_INACTIVE ||
274 event->group_leader->state < PERF_EVENT_STATE_INACTIVE)
275 return;
276
277 if (ctx->is_active)
278 run_end = ctx->time;
279 else
280 run_end = event->tstamp_stopped;
281
282 event->total_time_enabled = run_end - event->tstamp_enabled;
283
284 if (event->state == PERF_EVENT_STATE_INACTIVE)
285 run_end = event->tstamp_stopped;
286 else
287 run_end = ctx->time;
288
289 event->total_time_running = run_end - event->tstamp_running;
290}
291
247/* 292/*
248 * Add a event from the lists for its context. 293 * Add a event from the lists for its context.
249 * Must be called with ctx->mutex and ctx->lock held. 294 * Must be called with ctx->mutex and ctx->lock held.
@@ -292,6 +337,18 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
292 if (event->group_leader != event) 337 if (event->group_leader != event)
293 event->group_leader->nr_siblings--; 338 event->group_leader->nr_siblings--;
294 339
340 update_event_times(event);
341
342 /*
343 * If event was in error state, then keep it
344 * that way, otherwise bogus counts will be
345 * returned on read(). The only way to get out
346 * of error state is by explicit re-enabling
347 * of the event
348 */
349 if (event->state > PERF_EVENT_STATE_OFF)
350 event->state = PERF_EVENT_STATE_OFF;
351
295 /* 352 /*
296 * If this was a group event with sibling events then 353 * If this was a group event with sibling events then
297 * upgrade the siblings to singleton events by adding them 354 * upgrade the siblings to singleton events by adding them
@@ -370,7 +427,7 @@ static void __perf_event_remove_from_context(void *info)
370 if (ctx->task && cpuctx->task_ctx != ctx) 427 if (ctx->task && cpuctx->task_ctx != ctx)
371 return; 428 return;
372 429
373 spin_lock(&ctx->lock); 430 raw_spin_lock(&ctx->lock);
374 /* 431 /*
375 * Protect the list operation against NMI by disabling the 432 * Protect the list operation against NMI by disabling the
376 * events on a global level. 433 * events on a global level.
@@ -392,7 +449,7 @@ static void __perf_event_remove_from_context(void *info)
392 } 449 }
393 450
394 perf_enable(); 451 perf_enable();
395 spin_unlock(&ctx->lock); 452 raw_spin_unlock(&ctx->lock);
396} 453}
397 454
398 455
@@ -419,7 +476,7 @@ static void perf_event_remove_from_context(struct perf_event *event)
419 if (!task) { 476 if (!task) {
420 /* 477 /*
421 * Per cpu events are removed via an smp call and 478 * Per cpu events are removed via an smp call and
422 * the removal is always sucessful. 479 * the removal is always successful.
423 */ 480 */
424 smp_call_function_single(event->cpu, 481 smp_call_function_single(event->cpu,
425 __perf_event_remove_from_context, 482 __perf_event_remove_from_context,
@@ -431,12 +488,12 @@ retry:
431 task_oncpu_function_call(task, __perf_event_remove_from_context, 488 task_oncpu_function_call(task, __perf_event_remove_from_context,
432 event); 489 event);
433 490
434 spin_lock_irq(&ctx->lock); 491 raw_spin_lock_irq(&ctx->lock);
435 /* 492 /*
436 * If the context is active we need to retry the smp call. 493 * If the context is active we need to retry the smp call.
437 */ 494 */
438 if (ctx->nr_active && !list_empty(&event->group_entry)) { 495 if (ctx->nr_active && !list_empty(&event->group_entry)) {
439 spin_unlock_irq(&ctx->lock); 496 raw_spin_unlock_irq(&ctx->lock);
440 goto retry; 497 goto retry;
441 } 498 }
442 499
@@ -445,48 +502,9 @@ retry:
445 * can remove the event safely, if the call above did not 502 * can remove the event safely, if the call above did not
446 * succeed. 503 * succeed.
447 */ 504 */
448 if (!list_empty(&event->group_entry)) { 505 if (!list_empty(&event->group_entry))
449 list_del_event(event, ctx); 506 list_del_event(event, ctx);
450 } 507 raw_spin_unlock_irq(&ctx->lock);
451 spin_unlock_irq(&ctx->lock);
452}
453
454static inline u64 perf_clock(void)
455{
456 return cpu_clock(smp_processor_id());
457}
458
459/*
460 * Update the record of the current time in a context.
461 */
462static void update_context_time(struct perf_event_context *ctx)
463{
464 u64 now = perf_clock();
465
466 ctx->time += now - ctx->timestamp;
467 ctx->timestamp = now;
468}
469
470/*
471 * Update the total_time_enabled and total_time_running fields for a event.
472 */
473static void update_event_times(struct perf_event *event)
474{
475 struct perf_event_context *ctx = event->ctx;
476 u64 run_end;
477
478 if (event->state < PERF_EVENT_STATE_INACTIVE ||
479 event->group_leader->state < PERF_EVENT_STATE_INACTIVE)
480 return;
481
482 event->total_time_enabled = ctx->time - event->tstamp_enabled;
483
484 if (event->state == PERF_EVENT_STATE_INACTIVE)
485 run_end = event->tstamp_stopped;
486 else
487 run_end = ctx->time;
488
489 event->total_time_running = run_end - event->tstamp_running;
490} 508}
491 509
492/* 510/*
@@ -517,7 +535,7 @@ static void __perf_event_disable(void *info)
517 if (ctx->task && cpuctx->task_ctx != ctx) 535 if (ctx->task && cpuctx->task_ctx != ctx)
518 return; 536 return;
519 537
520 spin_lock(&ctx->lock); 538 raw_spin_lock(&ctx->lock);
521 539
522 /* 540 /*
523 * If the event is on, turn it off. 541 * If the event is on, turn it off.
@@ -533,7 +551,7 @@ static void __perf_event_disable(void *info)
533 event->state = PERF_EVENT_STATE_OFF; 551 event->state = PERF_EVENT_STATE_OFF;
534 } 552 }
535 553
536 spin_unlock(&ctx->lock); 554 raw_spin_unlock(&ctx->lock);
537} 555}
538 556
539/* 557/*
@@ -549,7 +567,7 @@ static void __perf_event_disable(void *info)
549 * is the current context on this CPU and preemption is disabled, 567 * is the current context on this CPU and preemption is disabled,
550 * hence we can't get into perf_event_task_sched_out for this context. 568 * hence we can't get into perf_event_task_sched_out for this context.
551 */ 569 */
552static void perf_event_disable(struct perf_event *event) 570void perf_event_disable(struct perf_event *event)
553{ 571{
554 struct perf_event_context *ctx = event->ctx; 572 struct perf_event_context *ctx = event->ctx;
555 struct task_struct *task = ctx->task; 573 struct task_struct *task = ctx->task;
@@ -566,12 +584,12 @@ static void perf_event_disable(struct perf_event *event)
566 retry: 584 retry:
567 task_oncpu_function_call(task, __perf_event_disable, event); 585 task_oncpu_function_call(task, __perf_event_disable, event);
568 586
569 spin_lock_irq(&ctx->lock); 587 raw_spin_lock_irq(&ctx->lock);
570 /* 588 /*
571 * If the event is still active, we need to retry the cross-call. 589 * If the event is still active, we need to retry the cross-call.
572 */ 590 */
573 if (event->state == PERF_EVENT_STATE_ACTIVE) { 591 if (event->state == PERF_EVENT_STATE_ACTIVE) {
574 spin_unlock_irq(&ctx->lock); 592 raw_spin_unlock_irq(&ctx->lock);
575 goto retry; 593 goto retry;
576 } 594 }
577 595
@@ -584,7 +602,7 @@ static void perf_event_disable(struct perf_event *event)
584 event->state = PERF_EVENT_STATE_OFF; 602 event->state = PERF_EVENT_STATE_OFF;
585 } 603 }
586 604
587 spin_unlock_irq(&ctx->lock); 605 raw_spin_unlock_irq(&ctx->lock);
588} 606}
589 607
590static int 608static int
@@ -752,7 +770,7 @@ static void __perf_install_in_context(void *info)
752 cpuctx->task_ctx = ctx; 770 cpuctx->task_ctx = ctx;
753 } 771 }
754 772
755 spin_lock(&ctx->lock); 773 raw_spin_lock(&ctx->lock);
756 ctx->is_active = 1; 774 ctx->is_active = 1;
757 update_context_time(ctx); 775 update_context_time(ctx);
758 776
@@ -764,6 +782,9 @@ static void __perf_install_in_context(void *info)
764 782
765 add_event_to_ctx(event, ctx); 783 add_event_to_ctx(event, ctx);
766 784
785 if (event->cpu != -1 && event->cpu != smp_processor_id())
786 goto unlock;
787
767 /* 788 /*
768 * Don't put the event on if it is disabled or if 789 * Don't put the event on if it is disabled or if
769 * it is in a group and the group isn't on. 790 * it is in a group and the group isn't on.
@@ -802,7 +823,7 @@ static void __perf_install_in_context(void *info)
802 unlock: 823 unlock:
803 perf_enable(); 824 perf_enable();
804 825
805 spin_unlock(&ctx->lock); 826 raw_spin_unlock(&ctx->lock);
806} 827}
807 828
808/* 829/*
@@ -827,7 +848,7 @@ perf_install_in_context(struct perf_event_context *ctx,
827 if (!task) { 848 if (!task) {
828 /* 849 /*
829 * Per cpu events are installed via an smp call and 850 * Per cpu events are installed via an smp call and
830 * the install is always sucessful. 851 * the install is always successful.
831 */ 852 */
832 smp_call_function_single(cpu, __perf_install_in_context, 853 smp_call_function_single(cpu, __perf_install_in_context,
833 event, 1); 854 event, 1);
@@ -838,12 +859,12 @@ retry:
838 task_oncpu_function_call(task, __perf_install_in_context, 859 task_oncpu_function_call(task, __perf_install_in_context,
839 event); 860 event);
840 861
841 spin_lock_irq(&ctx->lock); 862 raw_spin_lock_irq(&ctx->lock);
842 /* 863 /*
843 * we need to retry the smp call. 864 * we need to retry the smp call.
844 */ 865 */
845 if (ctx->is_active && list_empty(&event->group_entry)) { 866 if (ctx->is_active && list_empty(&event->group_entry)) {
846 spin_unlock_irq(&ctx->lock); 867 raw_spin_unlock_irq(&ctx->lock);
847 goto retry; 868 goto retry;
848 } 869 }
849 870
@@ -854,7 +875,7 @@ retry:
854 */ 875 */
855 if (list_empty(&event->group_entry)) 876 if (list_empty(&event->group_entry))
856 add_event_to_ctx(event, ctx); 877 add_event_to_ctx(event, ctx);
857 spin_unlock_irq(&ctx->lock); 878 raw_spin_unlock_irq(&ctx->lock);
858} 879}
859 880
860/* 881/*
@@ -899,7 +920,7 @@ static void __perf_event_enable(void *info)
899 cpuctx->task_ctx = ctx; 920 cpuctx->task_ctx = ctx;
900 } 921 }
901 922
902 spin_lock(&ctx->lock); 923 raw_spin_lock(&ctx->lock);
903 ctx->is_active = 1; 924 ctx->is_active = 1;
904 update_context_time(ctx); 925 update_context_time(ctx);
905 926
@@ -907,6 +928,9 @@ static void __perf_event_enable(void *info)
907 goto unlock; 928 goto unlock;
908 __perf_event_mark_enabled(event, ctx); 929 __perf_event_mark_enabled(event, ctx);
909 930
931 if (event->cpu != -1 && event->cpu != smp_processor_id())
932 goto unlock;
933
910 /* 934 /*
911 * If the event is in a group and isn't the group leader, 935 * If the event is in a group and isn't the group leader,
912 * then don't put it on unless the group is on. 936 * then don't put it on unless the group is on.
@@ -941,7 +965,7 @@ static void __perf_event_enable(void *info)
941 } 965 }
942 966
943 unlock: 967 unlock:
944 spin_unlock(&ctx->lock); 968 raw_spin_unlock(&ctx->lock);
945} 969}
946 970
947/* 971/*
@@ -953,7 +977,7 @@ static void __perf_event_enable(void *info)
953 * perf_event_for_each_child or perf_event_for_each as described 977 * perf_event_for_each_child or perf_event_for_each as described
954 * for perf_event_disable. 978 * for perf_event_disable.
955 */ 979 */
956static void perf_event_enable(struct perf_event *event) 980void perf_event_enable(struct perf_event *event)
957{ 981{
958 struct perf_event_context *ctx = event->ctx; 982 struct perf_event_context *ctx = event->ctx;
959 struct task_struct *task = ctx->task; 983 struct task_struct *task = ctx->task;
@@ -967,7 +991,7 @@ static void perf_event_enable(struct perf_event *event)
967 return; 991 return;
968 } 992 }
969 993
970 spin_lock_irq(&ctx->lock); 994 raw_spin_lock_irq(&ctx->lock);
971 if (event->state >= PERF_EVENT_STATE_INACTIVE) 995 if (event->state >= PERF_EVENT_STATE_INACTIVE)
972 goto out; 996 goto out;
973 997
@@ -982,10 +1006,10 @@ static void perf_event_enable(struct perf_event *event)
982 event->state = PERF_EVENT_STATE_OFF; 1006 event->state = PERF_EVENT_STATE_OFF;
983 1007
984 retry: 1008 retry:
985 spin_unlock_irq(&ctx->lock); 1009 raw_spin_unlock_irq(&ctx->lock);
986 task_oncpu_function_call(task, __perf_event_enable, event); 1010 task_oncpu_function_call(task, __perf_event_enable, event);
987 1011
988 spin_lock_irq(&ctx->lock); 1012 raw_spin_lock_irq(&ctx->lock);
989 1013
990 /* 1014 /*
991 * If the context is active and the event is still off, 1015 * If the context is active and the event is still off,
@@ -1002,7 +1026,7 @@ static void perf_event_enable(struct perf_event *event)
1002 __perf_event_mark_enabled(event, ctx); 1026 __perf_event_mark_enabled(event, ctx);
1003 1027
1004 out: 1028 out:
1005 spin_unlock_irq(&ctx->lock); 1029 raw_spin_unlock_irq(&ctx->lock);
1006} 1030}
1007 1031
1008static int perf_event_refresh(struct perf_event *event, int refresh) 1032static int perf_event_refresh(struct perf_event *event, int refresh)
@@ -1024,20 +1048,20 @@ void __perf_event_sched_out(struct perf_event_context *ctx,
1024{ 1048{
1025 struct perf_event *event; 1049 struct perf_event *event;
1026 1050
1027 spin_lock(&ctx->lock); 1051 raw_spin_lock(&ctx->lock);
1028 ctx->is_active = 0; 1052 ctx->is_active = 0;
1029 if (likely(!ctx->nr_events)) 1053 if (likely(!ctx->nr_events))
1030 goto out; 1054 goto out;
1031 update_context_time(ctx); 1055 update_context_time(ctx);
1032 1056
1033 perf_disable(); 1057 perf_disable();
1034 if (ctx->nr_active) 1058 if (ctx->nr_active) {
1035 list_for_each_entry(event, &ctx->group_list, group_entry) 1059 list_for_each_entry(event, &ctx->group_list, group_entry)
1036 group_sched_out(event, cpuctx, ctx); 1060 group_sched_out(event, cpuctx, ctx);
1037 1061 }
1038 perf_enable(); 1062 perf_enable();
1039 out: 1063 out:
1040 spin_unlock(&ctx->lock); 1064 raw_spin_unlock(&ctx->lock);
1041} 1065}
1042 1066
1043/* 1067/*
@@ -1059,8 +1083,6 @@ static int context_equiv(struct perf_event_context *ctx1,
1059 && !ctx1->pin_count && !ctx2->pin_count; 1083 && !ctx1->pin_count && !ctx2->pin_count;
1060} 1084}
1061 1085
1062static void __perf_event_read(void *event);
1063
1064static void __perf_event_sync_stat(struct perf_event *event, 1086static void __perf_event_sync_stat(struct perf_event *event,
1065 struct perf_event *next_event) 1087 struct perf_event *next_event)
1066{ 1088{
@@ -1078,8 +1100,8 @@ static void __perf_event_sync_stat(struct perf_event *event,
1078 */ 1100 */
1079 switch (event->state) { 1101 switch (event->state) {
1080 case PERF_EVENT_STATE_ACTIVE: 1102 case PERF_EVENT_STATE_ACTIVE:
1081 __perf_event_read(event); 1103 event->pmu->read(event);
1082 break; 1104 /* fall-through */
1083 1105
1084 case PERF_EVENT_STATE_INACTIVE: 1106 case PERF_EVENT_STATE_INACTIVE:
1085 update_event_times(event); 1107 update_event_times(event);
@@ -1118,6 +1140,8 @@ static void perf_event_sync_stat(struct perf_event_context *ctx,
1118 if (!ctx->nr_stat) 1140 if (!ctx->nr_stat)
1119 return; 1141 return;
1120 1142
1143 update_context_time(ctx);
1144
1121 event = list_first_entry(&ctx->event_list, 1145 event = list_first_entry(&ctx->event_list,
1122 struct perf_event, event_entry); 1146 struct perf_event, event_entry);
1123 1147
@@ -1161,8 +1185,6 @@ void perf_event_task_sched_out(struct task_struct *task,
1161 if (likely(!ctx || !cpuctx->task_ctx)) 1185 if (likely(!ctx || !cpuctx->task_ctx))
1162 return; 1186 return;
1163 1187
1164 update_context_time(ctx);
1165
1166 rcu_read_lock(); 1188 rcu_read_lock();
1167 parent = rcu_dereference(ctx->parent_ctx); 1189 parent = rcu_dereference(ctx->parent_ctx);
1168 next_ctx = next->perf_event_ctxp; 1190 next_ctx = next->perf_event_ctxp;
@@ -1177,8 +1199,8 @@ void perf_event_task_sched_out(struct task_struct *task,
1177 * order we take the locks because no other cpu could 1199 * order we take the locks because no other cpu could
1178 * be trying to lock both of these tasks. 1200 * be trying to lock both of these tasks.
1179 */ 1201 */
1180 spin_lock(&ctx->lock); 1202 raw_spin_lock(&ctx->lock);
1181 spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING); 1203 raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
1182 if (context_equiv(ctx, next_ctx)) { 1204 if (context_equiv(ctx, next_ctx)) {
1183 /* 1205 /*
1184 * XXX do we need a memory barrier of sorts 1206 * XXX do we need a memory barrier of sorts
@@ -1192,8 +1214,8 @@ void perf_event_task_sched_out(struct task_struct *task,
1192 1214
1193 perf_event_sync_stat(ctx, next_ctx); 1215 perf_event_sync_stat(ctx, next_ctx);
1194 } 1216 }
1195 spin_unlock(&next_ctx->lock); 1217 raw_spin_unlock(&next_ctx->lock);
1196 spin_unlock(&ctx->lock); 1218 raw_spin_unlock(&ctx->lock);
1197 } 1219 }
1198 rcu_read_unlock(); 1220 rcu_read_unlock();
1199 1221
@@ -1235,7 +1257,7 @@ __perf_event_sched_in(struct perf_event_context *ctx,
1235 struct perf_event *event; 1257 struct perf_event *event;
1236 int can_add_hw = 1; 1258 int can_add_hw = 1;
1237 1259
1238 spin_lock(&ctx->lock); 1260 raw_spin_lock(&ctx->lock);
1239 ctx->is_active = 1; 1261 ctx->is_active = 1;
1240 if (likely(!ctx->nr_events)) 1262 if (likely(!ctx->nr_events))
1241 goto out; 1263 goto out;
@@ -1290,7 +1312,7 @@ __perf_event_sched_in(struct perf_event_context *ctx,
1290 } 1312 }
1291 perf_enable(); 1313 perf_enable();
1292 out: 1314 out:
1293 spin_unlock(&ctx->lock); 1315 raw_spin_unlock(&ctx->lock);
1294} 1316}
1295 1317
1296/* 1318/*
@@ -1354,11 +1376,14 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
1354 struct hw_perf_event *hwc; 1376 struct hw_perf_event *hwc;
1355 u64 interrupts, freq; 1377 u64 interrupts, freq;
1356 1378
1357 spin_lock(&ctx->lock); 1379 raw_spin_lock(&ctx->lock);
1358 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 1380 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
1359 if (event->state != PERF_EVENT_STATE_ACTIVE) 1381 if (event->state != PERF_EVENT_STATE_ACTIVE)
1360 continue; 1382 continue;
1361 1383
1384 if (event->cpu != -1 && event->cpu != smp_processor_id())
1385 continue;
1386
1362 hwc = &event->hw; 1387 hwc = &event->hw;
1363 1388
1364 interrupts = hwc->interrupts; 1389 interrupts = hwc->interrupts;
@@ -1409,7 +1434,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
1409 perf_enable(); 1434 perf_enable();
1410 } 1435 }
1411 } 1436 }
1412 spin_unlock(&ctx->lock); 1437 raw_spin_unlock(&ctx->lock);
1413} 1438}
1414 1439
1415/* 1440/*
@@ -1422,7 +1447,7 @@ static void rotate_ctx(struct perf_event_context *ctx)
1422 if (!ctx->nr_events) 1447 if (!ctx->nr_events)
1423 return; 1448 return;
1424 1449
1425 spin_lock(&ctx->lock); 1450 raw_spin_lock(&ctx->lock);
1426 /* 1451 /*
1427 * Rotate the first entry last (works just fine for group events too): 1452 * Rotate the first entry last (works just fine for group events too):
1428 */ 1453 */
@@ -1433,7 +1458,7 @@ static void rotate_ctx(struct perf_event_context *ctx)
1433 } 1458 }
1434 perf_enable(); 1459 perf_enable();
1435 1460
1436 spin_unlock(&ctx->lock); 1461 raw_spin_unlock(&ctx->lock);
1437} 1462}
1438 1463
1439void perf_event_task_tick(struct task_struct *curr, int cpu) 1464void perf_event_task_tick(struct task_struct *curr, int cpu)
@@ -1482,7 +1507,7 @@ static void perf_event_enable_on_exec(struct task_struct *task)
1482 1507
1483 __perf_event_task_sched_out(ctx); 1508 __perf_event_task_sched_out(ctx);
1484 1509
1485 spin_lock(&ctx->lock); 1510 raw_spin_lock(&ctx->lock);
1486 1511
1487 list_for_each_entry(event, &ctx->group_list, group_entry) { 1512 list_for_each_entry(event, &ctx->group_list, group_entry) {
1488 if (!event->attr.enable_on_exec) 1513 if (!event->attr.enable_on_exec)
@@ -1500,7 +1525,7 @@ static void perf_event_enable_on_exec(struct task_struct *task)
1500 if (enabled) 1525 if (enabled)
1501 unclone_ctx(ctx); 1526 unclone_ctx(ctx);
1502 1527
1503 spin_unlock(&ctx->lock); 1528 raw_spin_unlock(&ctx->lock);
1504 1529
1505 perf_event_task_sched_in(task, smp_processor_id()); 1530 perf_event_task_sched_in(task, smp_processor_id());
1506 out: 1531 out:
@@ -1515,7 +1540,6 @@ static void __perf_event_read(void *info)
1515 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); 1540 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
1516 struct perf_event *event = info; 1541 struct perf_event *event = info;
1517 struct perf_event_context *ctx = event->ctx; 1542 struct perf_event_context *ctx = event->ctx;
1518 unsigned long flags;
1519 1543
1520 /* 1544 /*
1521 * If this is a task context, we need to check whether it is 1545 * If this is a task context, we need to check whether it is
@@ -1527,12 +1551,12 @@ static void __perf_event_read(void *info)
1527 if (ctx->task && cpuctx->task_ctx != ctx) 1551 if (ctx->task && cpuctx->task_ctx != ctx)
1528 return; 1552 return;
1529 1553
1530 local_irq_save(flags); 1554 raw_spin_lock(&ctx->lock);
1531 if (ctx->is_active) 1555 update_context_time(ctx);
1532 update_context_time(ctx);
1533 event->pmu->read(event);
1534 update_event_times(event); 1556 update_event_times(event);
1535 local_irq_restore(flags); 1557 raw_spin_unlock(&ctx->lock);
1558
1559 event->pmu->read(event);
1536} 1560}
1537 1561
1538static u64 perf_event_read(struct perf_event *event) 1562static u64 perf_event_read(struct perf_event *event)
@@ -1545,7 +1569,13 @@ static u64 perf_event_read(struct perf_event *event)
1545 smp_call_function_single(event->oncpu, 1569 smp_call_function_single(event->oncpu,
1546 __perf_event_read, event, 1); 1570 __perf_event_read, event, 1);
1547 } else if (event->state == PERF_EVENT_STATE_INACTIVE) { 1571 } else if (event->state == PERF_EVENT_STATE_INACTIVE) {
1572 struct perf_event_context *ctx = event->ctx;
1573 unsigned long flags;
1574
1575 raw_spin_lock_irqsave(&ctx->lock, flags);
1576 update_context_time(ctx);
1548 update_event_times(event); 1577 update_event_times(event);
1578 raw_spin_unlock_irqrestore(&ctx->lock, flags);
1549 } 1579 }
1550 1580
1551 return atomic64_read(&event->count); 1581 return atomic64_read(&event->count);
@@ -1558,8 +1588,7 @@ static void
1558__perf_event_init_context(struct perf_event_context *ctx, 1588__perf_event_init_context(struct perf_event_context *ctx,
1559 struct task_struct *task) 1589 struct task_struct *task)
1560{ 1590{
1561 memset(ctx, 0, sizeof(*ctx)); 1591 raw_spin_lock_init(&ctx->lock);
1562 spin_lock_init(&ctx->lock);
1563 mutex_init(&ctx->mutex); 1592 mutex_init(&ctx->mutex);
1564 INIT_LIST_HEAD(&ctx->group_list); 1593 INIT_LIST_HEAD(&ctx->group_list);
1565 INIT_LIST_HEAD(&ctx->event_list); 1594 INIT_LIST_HEAD(&ctx->event_list);
@@ -1575,15 +1604,12 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
1575 unsigned long flags; 1604 unsigned long flags;
1576 int err; 1605 int err;
1577 1606
1578 /* 1607 if (pid == -1 && cpu != -1) {
1579 * If cpu is not a wildcard then this is a percpu event:
1580 */
1581 if (cpu != -1) {
1582 /* Must be root to operate on a CPU event: */ 1608 /* Must be root to operate on a CPU event: */
1583 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) 1609 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
1584 return ERR_PTR(-EACCES); 1610 return ERR_PTR(-EACCES);
1585 1611
1586 if (cpu < 0 || cpu > num_possible_cpus()) 1612 if (cpu < 0 || cpu >= nr_cpumask_bits)
1587 return ERR_PTR(-EINVAL); 1613 return ERR_PTR(-EINVAL);
1588 1614
1589 /* 1615 /*
@@ -1591,7 +1617,7 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
1591 * offline CPU and activate it when the CPU comes up, but 1617 * offline CPU and activate it when the CPU comes up, but
1592 * that's for later. 1618 * that's for later.
1593 */ 1619 */
1594 if (!cpu_isset(cpu, cpu_online_map)) 1620 if (!cpu_online(cpu))
1595 return ERR_PTR(-ENODEV); 1621 return ERR_PTR(-ENODEV);
1596 1622
1597 cpuctx = &per_cpu(perf_cpu_context, cpu); 1623 cpuctx = &per_cpu(perf_cpu_context, cpu);
@@ -1629,11 +1655,11 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
1629 ctx = perf_lock_task_context(task, &flags); 1655 ctx = perf_lock_task_context(task, &flags);
1630 if (ctx) { 1656 if (ctx) {
1631 unclone_ctx(ctx); 1657 unclone_ctx(ctx);
1632 spin_unlock_irqrestore(&ctx->lock, flags); 1658 raw_spin_unlock_irqrestore(&ctx->lock, flags);
1633 } 1659 }
1634 1660
1635 if (!ctx) { 1661 if (!ctx) {
1636 ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL); 1662 ctx = kzalloc(sizeof(struct perf_event_context), GFP_KERNEL);
1637 err = -ENOMEM; 1663 err = -ENOMEM;
1638 if (!ctx) 1664 if (!ctx)
1639 goto errout; 1665 goto errout;
@@ -1658,6 +1684,8 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
1658 return ERR_PTR(err); 1684 return ERR_PTR(err);
1659} 1685}
1660 1686
1687static void perf_event_free_filter(struct perf_event *event);
1688
1661static void free_event_rcu(struct rcu_head *head) 1689static void free_event_rcu(struct rcu_head *head)
1662{ 1690{
1663 struct perf_event *event; 1691 struct perf_event *event;
@@ -1665,6 +1693,7 @@ static void free_event_rcu(struct rcu_head *head)
1665 event = container_of(head, struct perf_event, rcu_head); 1693 event = container_of(head, struct perf_event, rcu_head);
1666 if (event->ns) 1694 if (event->ns)
1667 put_pid_ns(event->ns); 1695 put_pid_ns(event->ns);
1696 perf_event_free_filter(event);
1668 kfree(event); 1697 kfree(event);
1669} 1698}
1670 1699
@@ -1696,16 +1725,10 @@ static void free_event(struct perf_event *event)
1696 call_rcu(&event->rcu_head, free_event_rcu); 1725 call_rcu(&event->rcu_head, free_event_rcu);
1697} 1726}
1698 1727
1699/* 1728int perf_event_release_kernel(struct perf_event *event)
1700 * Called when the last reference to the file is gone.
1701 */
1702static int perf_release(struct inode *inode, struct file *file)
1703{ 1729{
1704 struct perf_event *event = file->private_data;
1705 struct perf_event_context *ctx = event->ctx; 1730 struct perf_event_context *ctx = event->ctx;
1706 1731
1707 file->private_data = NULL;
1708
1709 WARN_ON_ONCE(ctx->parent_ctx); 1732 WARN_ON_ONCE(ctx->parent_ctx);
1710 mutex_lock(&ctx->mutex); 1733 mutex_lock(&ctx->mutex);
1711 perf_event_remove_from_context(event); 1734 perf_event_remove_from_context(event);
@@ -1720,6 +1743,19 @@ static int perf_release(struct inode *inode, struct file *file)
1720 1743
1721 return 0; 1744 return 0;
1722} 1745}
1746EXPORT_SYMBOL_GPL(perf_event_release_kernel);
1747
1748/*
1749 * Called when the last reference to the file is gone.
1750 */
1751static int perf_release(struct inode *inode, struct file *file)
1752{
1753 struct perf_event *event = file->private_data;
1754
1755 file->private_data = NULL;
1756
1757 return perf_event_release_kernel(event);
1758}
1723 1759
1724static int perf_event_read_size(struct perf_event *event) 1760static int perf_event_read_size(struct perf_event *event)
1725{ 1761{
@@ -1746,91 +1782,94 @@ static int perf_event_read_size(struct perf_event *event)
1746 return size; 1782 return size;
1747} 1783}
1748 1784
1749static u64 perf_event_read_value(struct perf_event *event) 1785u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
1750{ 1786{
1751 struct perf_event *child; 1787 struct perf_event *child;
1752 u64 total = 0; 1788 u64 total = 0;
1753 1789
1790 *enabled = 0;
1791 *running = 0;
1792
1793 mutex_lock(&event->child_mutex);
1754 total += perf_event_read(event); 1794 total += perf_event_read(event);
1755 list_for_each_entry(child, &event->child_list, child_list) 1795 *enabled += event->total_time_enabled +
1796 atomic64_read(&event->child_total_time_enabled);
1797 *running += event->total_time_running +
1798 atomic64_read(&event->child_total_time_running);
1799
1800 list_for_each_entry(child, &event->child_list, child_list) {
1756 total += perf_event_read(child); 1801 total += perf_event_read(child);
1802 *enabled += child->total_time_enabled;
1803 *running += child->total_time_running;
1804 }
1805 mutex_unlock(&event->child_mutex);
1757 1806
1758 return total; 1807 return total;
1759} 1808}
1760 1809EXPORT_SYMBOL_GPL(perf_event_read_value);
1761static int perf_event_read_entry(struct perf_event *event,
1762 u64 read_format, char __user *buf)
1763{
1764 int n = 0, count = 0;
1765 u64 values[2];
1766
1767 values[n++] = perf_event_read_value(event);
1768 if (read_format & PERF_FORMAT_ID)
1769 values[n++] = primary_event_id(event);
1770
1771 count = n * sizeof(u64);
1772
1773 if (copy_to_user(buf, values, count))
1774 return -EFAULT;
1775
1776 return count;
1777}
1778 1810
1779static int perf_event_read_group(struct perf_event *event, 1811static int perf_event_read_group(struct perf_event *event,
1780 u64 read_format, char __user *buf) 1812 u64 read_format, char __user *buf)
1781{ 1813{
1782 struct perf_event *leader = event->group_leader, *sub; 1814 struct perf_event *leader = event->group_leader, *sub;
1783 int n = 0, size = 0, err = -EFAULT; 1815 int n = 0, size = 0, ret = -EFAULT;
1784 u64 values[3]; 1816 struct perf_event_context *ctx = leader->ctx;
1817 u64 values[5];
1818 u64 count, enabled, running;
1819
1820 mutex_lock(&ctx->mutex);
1821 count = perf_event_read_value(leader, &enabled, &running);
1785 1822
1786 values[n++] = 1 + leader->nr_siblings; 1823 values[n++] = 1 + leader->nr_siblings;
1787 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { 1824 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
1788 values[n++] = leader->total_time_enabled + 1825 values[n++] = enabled;
1789 atomic64_read(&leader->child_total_time_enabled); 1826 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
1790 } 1827 values[n++] = running;
1791 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { 1828 values[n++] = count;
1792 values[n++] = leader->total_time_running + 1829 if (read_format & PERF_FORMAT_ID)
1793 atomic64_read(&leader->child_total_time_running); 1830 values[n++] = primary_event_id(leader);
1794 }
1795 1831
1796 size = n * sizeof(u64); 1832 size = n * sizeof(u64);
1797 1833
1798 if (copy_to_user(buf, values, size)) 1834 if (copy_to_user(buf, values, size))
1799 return -EFAULT; 1835 goto unlock;
1800
1801 err = perf_event_read_entry(leader, read_format, buf + size);
1802 if (err < 0)
1803 return err;
1804 1836
1805 size += err; 1837 ret = size;
1806 1838
1807 list_for_each_entry(sub, &leader->sibling_list, group_entry) { 1839 list_for_each_entry(sub, &leader->sibling_list, group_entry) {
1808 err = perf_event_read_entry(sub, read_format, 1840 n = 0;
1809 buf + size); 1841
1810 if (err < 0) 1842 values[n++] = perf_event_read_value(sub, &enabled, &running);
1811 return err; 1843 if (read_format & PERF_FORMAT_ID)
1844 values[n++] = primary_event_id(sub);
1812 1845
1813 size += err; 1846 size = n * sizeof(u64);
1847
1848 if (copy_to_user(buf + ret, values, size)) {
1849 ret = -EFAULT;
1850 goto unlock;
1851 }
1852
1853 ret += size;
1814 } 1854 }
1855unlock:
1856 mutex_unlock(&ctx->mutex);
1815 1857
1816 return size; 1858 return ret;
1817} 1859}
1818 1860
1819static int perf_event_read_one(struct perf_event *event, 1861static int perf_event_read_one(struct perf_event *event,
1820 u64 read_format, char __user *buf) 1862 u64 read_format, char __user *buf)
1821{ 1863{
1864 u64 enabled, running;
1822 u64 values[4]; 1865 u64 values[4];
1823 int n = 0; 1866 int n = 0;
1824 1867
1825 values[n++] = perf_event_read_value(event); 1868 values[n++] = perf_event_read_value(event, &enabled, &running);
1826 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { 1869 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
1827 values[n++] = event->total_time_enabled + 1870 values[n++] = enabled;
1828 atomic64_read(&event->child_total_time_enabled); 1871 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
1829 } 1872 values[n++] = running;
1830 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
1831 values[n++] = event->total_time_running +
1832 atomic64_read(&event->child_total_time_running);
1833 }
1834 if (read_format & PERF_FORMAT_ID) 1873 if (read_format & PERF_FORMAT_ID)
1835 values[n++] = primary_event_id(event); 1874 values[n++] = primary_event_id(event);
1836 1875
@@ -1861,12 +1900,10 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count)
1861 return -ENOSPC; 1900 return -ENOSPC;
1862 1901
1863 WARN_ON_ONCE(event->ctx->parent_ctx); 1902 WARN_ON_ONCE(event->ctx->parent_ctx);
1864 mutex_lock(&event->child_mutex);
1865 if (read_format & PERF_FORMAT_GROUP) 1903 if (read_format & PERF_FORMAT_GROUP)
1866 ret = perf_event_read_group(event, read_format, buf); 1904 ret = perf_event_read_group(event, read_format, buf);
1867 else 1905 else
1868 ret = perf_event_read_one(event, read_format, buf); 1906 ret = perf_event_read_one(event, read_format, buf);
1869 mutex_unlock(&event->child_mutex);
1870 1907
1871 return ret; 1908 return ret;
1872} 1909}
@@ -1956,7 +1993,7 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
1956 if (!value) 1993 if (!value)
1957 return -EINVAL; 1994 return -EINVAL;
1958 1995
1959 spin_lock_irq(&ctx->lock); 1996 raw_spin_lock_irq(&ctx->lock);
1960 if (event->attr.freq) { 1997 if (event->attr.freq) {
1961 if (value > sysctl_perf_event_sample_rate) { 1998 if (value > sysctl_perf_event_sample_rate) {
1962 ret = -EINVAL; 1999 ret = -EINVAL;
@@ -1969,12 +2006,13 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
1969 event->hw.sample_period = value; 2006 event->hw.sample_period = value;
1970 } 2007 }
1971unlock: 2008unlock:
1972 spin_unlock_irq(&ctx->lock); 2009 raw_spin_unlock_irq(&ctx->lock);
1973 2010
1974 return ret; 2011 return ret;
1975} 2012}
1976 2013
1977int perf_event_set_output(struct perf_event *event, int output_fd); 2014static int perf_event_set_output(struct perf_event *event, int output_fd);
2015static int perf_event_set_filter(struct perf_event *event, void __user *arg);
1978 2016
1979static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 2017static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1980{ 2018{
@@ -2002,6 +2040,9 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2002 case PERF_EVENT_IOC_SET_OUTPUT: 2040 case PERF_EVENT_IOC_SET_OUTPUT:
2003 return perf_event_set_output(event, arg); 2041 return perf_event_set_output(event, arg);
2004 2042
2043 case PERF_EVENT_IOC_SET_FILTER:
2044 return perf_event_set_filter(event, (void __user *)arg);
2045
2005 default: 2046 default:
2006 return -ENOTTY; 2047 return -ENOTTY;
2007 } 2048 }
@@ -2174,6 +2215,7 @@ static void perf_mmap_data_free(struct perf_mmap_data *data)
2174 perf_mmap_free_page((unsigned long)data->user_page); 2215 perf_mmap_free_page((unsigned long)data->user_page);
2175 for (i = 0; i < data->nr_pages; i++) 2216 for (i = 0; i < data->nr_pages; i++)
2176 perf_mmap_free_page((unsigned long)data->data_pages[i]); 2217 perf_mmap_free_page((unsigned long)data->data_pages[i]);
2218 kfree(data);
2177} 2219}
2178 2220
2179#else 2221#else
@@ -2214,6 +2256,7 @@ static void perf_mmap_data_free_work(struct work_struct *work)
2214 perf_mmap_unmark_page(base + (i * PAGE_SIZE)); 2256 perf_mmap_unmark_page(base + (i * PAGE_SIZE));
2215 2257
2216 vfree(base); 2258 vfree(base);
2259 kfree(data);
2217} 2260}
2218 2261
2219static void perf_mmap_data_free(struct perf_mmap_data *data) 2262static void perf_mmap_data_free(struct perf_mmap_data *data)
@@ -2307,7 +2350,7 @@ perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data)
2307 } 2350 }
2308 2351
2309 if (!data->watermark) 2352 if (!data->watermark)
2310 data->watermark = max_t(long, PAGE_SIZE, max_size / 2); 2353 data->watermark = max_size / 2;
2311 2354
2312 2355
2313 rcu_assign_pointer(event->data, data); 2356 rcu_assign_pointer(event->data, data);
@@ -2319,7 +2362,6 @@ static void perf_mmap_data_free_rcu(struct rcu_head *rcu_head)
2319 2362
2320 data = container_of(rcu_head, struct perf_mmap_data, rcu_head); 2363 data = container_of(rcu_head, struct perf_mmap_data, rcu_head);
2321 perf_mmap_data_free(data); 2364 perf_mmap_data_free(data);
2322 kfree(data);
2323} 2365}
2324 2366
2325static void perf_mmap_data_release(struct perf_event *event) 2367static void perf_mmap_data_release(struct perf_event *event)
@@ -2666,20 +2708,21 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
2666static void perf_output_lock(struct perf_output_handle *handle) 2708static void perf_output_lock(struct perf_output_handle *handle)
2667{ 2709{
2668 struct perf_mmap_data *data = handle->data; 2710 struct perf_mmap_data *data = handle->data;
2669 int cpu; 2711 int cur, cpu = get_cpu();
2670 2712
2671 handle->locked = 0; 2713 handle->locked = 0;
2672 2714
2673 local_irq_save(handle->flags); 2715 for (;;) {
2674 cpu = smp_processor_id(); 2716 cur = atomic_cmpxchg(&data->lock, -1, cpu);
2675 2717 if (cur == -1) {
2676 if (in_nmi() && atomic_read(&data->lock) == cpu) 2718 handle->locked = 1;
2677 return; 2719 break;
2720 }
2721 if (cur == cpu)
2722 break;
2678 2723
2679 while (atomic_cmpxchg(&data->lock, -1, cpu) != -1)
2680 cpu_relax(); 2724 cpu_relax();
2681 2725 }
2682 handle->locked = 1;
2683} 2726}
2684 2727
2685static void perf_output_unlock(struct perf_output_handle *handle) 2728static void perf_output_unlock(struct perf_output_handle *handle)
@@ -2725,7 +2768,7 @@ again:
2725 if (atomic_xchg(&data->wakeup, 0)) 2768 if (atomic_xchg(&data->wakeup, 0))
2726 perf_output_wakeup(handle); 2769 perf_output_wakeup(handle);
2727out: 2770out:
2728 local_irq_restore(handle->flags); 2771 put_cpu();
2729} 2772}
2730 2773
2731void perf_output_copy(struct perf_output_handle *handle, 2774void perf_output_copy(struct perf_output_handle *handle,
@@ -3225,6 +3268,12 @@ static void perf_event_task_output(struct perf_event *event,
3225 3268
3226static int perf_event_task_match(struct perf_event *event) 3269static int perf_event_task_match(struct perf_event *event)
3227{ 3270{
3271 if (event->state != PERF_EVENT_STATE_ACTIVE)
3272 return 0;
3273
3274 if (event->cpu != -1 && event->cpu != smp_processor_id())
3275 return 0;
3276
3228 if (event->attr.comm || event->attr.mmap || event->attr.task) 3277 if (event->attr.comm || event->attr.mmap || event->attr.task)
3229 return 1; 3278 return 1;
3230 3279
@@ -3236,15 +3285,10 @@ static void perf_event_task_ctx(struct perf_event_context *ctx,
3236{ 3285{
3237 struct perf_event *event; 3286 struct perf_event *event;
3238 3287
3239 if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
3240 return;
3241
3242 rcu_read_lock();
3243 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 3288 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
3244 if (perf_event_task_match(event)) 3289 if (perf_event_task_match(event))
3245 perf_event_task_output(event, task_event); 3290 perf_event_task_output(event, task_event);
3246 } 3291 }
3247 rcu_read_unlock();
3248} 3292}
3249 3293
3250static void perf_event_task_event(struct perf_task_event *task_event) 3294static void perf_event_task_event(struct perf_task_event *task_event)
@@ -3252,15 +3296,14 @@ static void perf_event_task_event(struct perf_task_event *task_event)
3252 struct perf_cpu_context *cpuctx; 3296 struct perf_cpu_context *cpuctx;
3253 struct perf_event_context *ctx = task_event->task_ctx; 3297 struct perf_event_context *ctx = task_event->task_ctx;
3254 3298
3299 rcu_read_lock();
3255 cpuctx = &get_cpu_var(perf_cpu_context); 3300 cpuctx = &get_cpu_var(perf_cpu_context);
3256 perf_event_task_ctx(&cpuctx->ctx, task_event); 3301 perf_event_task_ctx(&cpuctx->ctx, task_event);
3257 put_cpu_var(perf_cpu_context);
3258
3259 rcu_read_lock();
3260 if (!ctx) 3302 if (!ctx)
3261 ctx = rcu_dereference(task_event->task->perf_event_ctxp); 3303 ctx = rcu_dereference(task_event->task->perf_event_ctxp);
3262 if (ctx) 3304 if (ctx)
3263 perf_event_task_ctx(ctx, task_event); 3305 perf_event_task_ctx(ctx, task_event);
3306 put_cpu_var(perf_cpu_context);
3264 rcu_read_unlock(); 3307 rcu_read_unlock();
3265} 3308}
3266 3309
@@ -3337,6 +3380,12 @@ static void perf_event_comm_output(struct perf_event *event,
3337 3380
3338static int perf_event_comm_match(struct perf_event *event) 3381static int perf_event_comm_match(struct perf_event *event)
3339{ 3382{
3383 if (event->state != PERF_EVENT_STATE_ACTIVE)
3384 return 0;
3385
3386 if (event->cpu != -1 && event->cpu != smp_processor_id())
3387 return 0;
3388
3340 if (event->attr.comm) 3389 if (event->attr.comm)
3341 return 1; 3390 return 1;
3342 3391
@@ -3348,15 +3397,10 @@ static void perf_event_comm_ctx(struct perf_event_context *ctx,
3348{ 3397{
3349 struct perf_event *event; 3398 struct perf_event *event;
3350 3399
3351 if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
3352 return;
3353
3354 rcu_read_lock();
3355 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 3400 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
3356 if (perf_event_comm_match(event)) 3401 if (perf_event_comm_match(event))
3357 perf_event_comm_output(event, comm_event); 3402 perf_event_comm_output(event, comm_event);
3358 } 3403 }
3359 rcu_read_unlock();
3360} 3404}
3361 3405
3362static void perf_event_comm_event(struct perf_comm_event *comm_event) 3406static void perf_event_comm_event(struct perf_comm_event *comm_event)
@@ -3367,7 +3411,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
3367 char comm[TASK_COMM_LEN]; 3411 char comm[TASK_COMM_LEN];
3368 3412
3369 memset(comm, 0, sizeof(comm)); 3413 memset(comm, 0, sizeof(comm));
3370 strncpy(comm, comm_event->task->comm, sizeof(comm)); 3414 strlcpy(comm, comm_event->task->comm, sizeof(comm));
3371 size = ALIGN(strlen(comm)+1, sizeof(u64)); 3415 size = ALIGN(strlen(comm)+1, sizeof(u64));
3372 3416
3373 comm_event->comm = comm; 3417 comm_event->comm = comm;
@@ -3375,18 +3419,13 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
3375 3419
3376 comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; 3420 comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;
3377 3421
3422 rcu_read_lock();
3378 cpuctx = &get_cpu_var(perf_cpu_context); 3423 cpuctx = &get_cpu_var(perf_cpu_context);
3379 perf_event_comm_ctx(&cpuctx->ctx, comm_event); 3424 perf_event_comm_ctx(&cpuctx->ctx, comm_event);
3380 put_cpu_var(perf_cpu_context);
3381
3382 rcu_read_lock();
3383 /*
3384 * doesn't really matter which of the child contexts the
3385 * events ends up in.
3386 */
3387 ctx = rcu_dereference(current->perf_event_ctxp); 3425 ctx = rcu_dereference(current->perf_event_ctxp);
3388 if (ctx) 3426 if (ctx)
3389 perf_event_comm_ctx(ctx, comm_event); 3427 perf_event_comm_ctx(ctx, comm_event);
3428 put_cpu_var(perf_cpu_context);
3390 rcu_read_unlock(); 3429 rcu_read_unlock();
3391} 3430}
3392 3431
@@ -3461,6 +3500,12 @@ static void perf_event_mmap_output(struct perf_event *event,
3461static int perf_event_mmap_match(struct perf_event *event, 3500static int perf_event_mmap_match(struct perf_event *event,
3462 struct perf_mmap_event *mmap_event) 3501 struct perf_mmap_event *mmap_event)
3463{ 3502{
3503 if (event->state != PERF_EVENT_STATE_ACTIVE)
3504 return 0;
3505
3506 if (event->cpu != -1 && event->cpu != smp_processor_id())
3507 return 0;
3508
3464 if (event->attr.mmap) 3509 if (event->attr.mmap)
3465 return 1; 3510 return 1;
3466 3511
@@ -3472,15 +3517,10 @@ static void perf_event_mmap_ctx(struct perf_event_context *ctx,
3472{ 3517{
3473 struct perf_event *event; 3518 struct perf_event *event;
3474 3519
3475 if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
3476 return;
3477
3478 rcu_read_lock();
3479 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 3520 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
3480 if (perf_event_mmap_match(event, mmap_event)) 3521 if (perf_event_mmap_match(event, mmap_event))
3481 perf_event_mmap_output(event, mmap_event); 3522 perf_event_mmap_output(event, mmap_event);
3482 } 3523 }
3483 rcu_read_unlock();
3484} 3524}
3485 3525
3486static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) 3526static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
@@ -3536,18 +3576,13 @@ got_name:
3536 3576
3537 mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; 3577 mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
3538 3578
3579 rcu_read_lock();
3539 cpuctx = &get_cpu_var(perf_cpu_context); 3580 cpuctx = &get_cpu_var(perf_cpu_context);
3540 perf_event_mmap_ctx(&cpuctx->ctx, mmap_event); 3581 perf_event_mmap_ctx(&cpuctx->ctx, mmap_event);
3541 put_cpu_var(perf_cpu_context);
3542
3543 rcu_read_lock();
3544 /*
3545 * doesn't really matter which of the child contexts the
3546 * events ends up in.
3547 */
3548 ctx = rcu_dereference(current->perf_event_ctxp); 3582 ctx = rcu_dereference(current->perf_event_ctxp);
3549 if (ctx) 3583 if (ctx)
3550 perf_event_mmap_ctx(ctx, mmap_event); 3584 perf_event_mmap_ctx(ctx, mmap_event);
3585 put_cpu_var(perf_cpu_context);
3551 rcu_read_unlock(); 3586 rcu_read_unlock();
3552 3587
3553 kfree(buf); 3588 kfree(buf);
@@ -3679,7 +3714,11 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
3679 perf_event_disable(event); 3714 perf_event_disable(event);
3680 } 3715 }
3681 3716
3682 perf_event_output(event, nmi, data, regs); 3717 if (event->overflow_handler)
3718 event->overflow_handler(event, nmi, data, regs);
3719 else
3720 perf_event_output(event, nmi, data, regs);
3721
3683 return ret; 3722 return ret;
3684} 3723}
3685 3724
@@ -3724,16 +3763,16 @@ again:
3724 return nr; 3763 return nr;
3725} 3764}
3726 3765
3727static void perf_swevent_overflow(struct perf_event *event, 3766static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
3728 int nmi, struct perf_sample_data *data, 3767 int nmi, struct perf_sample_data *data,
3729 struct pt_regs *regs) 3768 struct pt_regs *regs)
3730{ 3769{
3731 struct hw_perf_event *hwc = &event->hw; 3770 struct hw_perf_event *hwc = &event->hw;
3732 int throttle = 0; 3771 int throttle = 0;
3733 u64 overflow;
3734 3772
3735 data->period = event->hw.last_period; 3773 data->period = event->hw.last_period;
3736 overflow = perf_swevent_set_period(event); 3774 if (!overflow)
3775 overflow = perf_swevent_set_period(event);
3737 3776
3738 if (hwc->interrupts == MAX_INTERRUPTS) 3777 if (hwc->interrupts == MAX_INTERRUPTS)
3739 return; 3778 return;
@@ -3766,14 +3805,19 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,
3766 3805
3767 atomic64_add(nr, &event->count); 3806 atomic64_add(nr, &event->count);
3768 3807
3808 if (!regs)
3809 return;
3810
3769 if (!hwc->sample_period) 3811 if (!hwc->sample_period)
3770 return; 3812 return;
3771 3813
3772 if (!regs) 3814 if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq)
3815 return perf_swevent_overflow(event, 1, nmi, data, regs);
3816
3817 if (atomic64_add_negative(nr, &hwc->period_left))
3773 return; 3818 return;
3774 3819
3775 if (!atomic64_add_negative(nr, &hwc->period_left)) 3820 perf_swevent_overflow(event, 0, nmi, data, regs);
3776 perf_swevent_overflow(event, nmi, data, regs);
3777} 3821}
3778 3822
3779static int perf_swevent_is_counting(struct perf_event *event) 3823static int perf_swevent_is_counting(struct perf_event *event)
@@ -3806,25 +3850,47 @@ static int perf_swevent_is_counting(struct perf_event *event)
3806 return 1; 3850 return 1;
3807} 3851}
3808 3852
3853static int perf_tp_event_match(struct perf_event *event,
3854 struct perf_sample_data *data);
3855
3856static int perf_exclude_event(struct perf_event *event,
3857 struct pt_regs *regs)
3858{
3859 if (regs) {
3860 if (event->attr.exclude_user && user_mode(regs))
3861 return 1;
3862
3863 if (event->attr.exclude_kernel && !user_mode(regs))
3864 return 1;
3865 }
3866
3867 return 0;
3868}
3869
3809static int perf_swevent_match(struct perf_event *event, 3870static int perf_swevent_match(struct perf_event *event,
3810 enum perf_type_id type, 3871 enum perf_type_id type,
3811 u32 event_id, struct pt_regs *regs) 3872 u32 event_id,
3873 struct perf_sample_data *data,
3874 struct pt_regs *regs)
3812{ 3875{
3876 if (event->cpu != -1 && event->cpu != smp_processor_id())
3877 return 0;
3878
3813 if (!perf_swevent_is_counting(event)) 3879 if (!perf_swevent_is_counting(event))
3814 return 0; 3880 return 0;
3815 3881
3816 if (event->attr.type != type) 3882 if (event->attr.type != type)
3817 return 0; 3883 return 0;
3884
3818 if (event->attr.config != event_id) 3885 if (event->attr.config != event_id)
3819 return 0; 3886 return 0;
3820 3887
3821 if (regs) { 3888 if (perf_exclude_event(event, regs))
3822 if (event->attr.exclude_user && user_mode(regs)) 3889 return 0;
3823 return 0;
3824 3890
3825 if (event->attr.exclude_kernel && !user_mode(regs)) 3891 if (event->attr.type == PERF_TYPE_TRACEPOINT &&
3826 return 0; 3892 !perf_tp_event_match(event, data))
3827 } 3893 return 0;
3828 3894
3829 return 1; 3895 return 1;
3830} 3896}
@@ -3837,49 +3903,59 @@ static void perf_swevent_ctx_event(struct perf_event_context *ctx,
3837{ 3903{
3838 struct perf_event *event; 3904 struct perf_event *event;
3839 3905
3840 if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
3841 return;
3842
3843 rcu_read_lock();
3844 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 3906 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
3845 if (perf_swevent_match(event, type, event_id, regs)) 3907 if (perf_swevent_match(event, type, event_id, data, regs))
3846 perf_swevent_add(event, nr, nmi, data, regs); 3908 perf_swevent_add(event, nr, nmi, data, regs);
3847 } 3909 }
3848 rcu_read_unlock();
3849} 3910}
3850 3911
3851static int *perf_swevent_recursion_context(struct perf_cpu_context *cpuctx) 3912int perf_swevent_get_recursion_context(void)
3852{ 3913{
3914 struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
3915 int rctx;
3916
3853 if (in_nmi()) 3917 if (in_nmi())
3854 return &cpuctx->recursion[3]; 3918 rctx = 3;
3919 else if (in_irq())
3920 rctx = 2;
3921 else if (in_softirq())
3922 rctx = 1;
3923 else
3924 rctx = 0;
3925
3926 if (cpuctx->recursion[rctx]) {
3927 put_cpu_var(perf_cpu_context);
3928 return -1;
3929 }
3855 3930
3856 if (in_irq()) 3931 cpuctx->recursion[rctx]++;
3857 return &cpuctx->recursion[2]; 3932 barrier();
3858 3933
3859 if (in_softirq()) 3934 return rctx;
3860 return &cpuctx->recursion[1]; 3935}
3936EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
3861 3937
3862 return &cpuctx->recursion[0]; 3938void perf_swevent_put_recursion_context(int rctx)
3939{
3940 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
3941 barrier();
3942 cpuctx->recursion[rctx]--;
3943 put_cpu_var(perf_cpu_context);
3863} 3944}
3945EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context);
3864 3946
3865static void do_perf_sw_event(enum perf_type_id type, u32 event_id, 3947static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
3866 u64 nr, int nmi, 3948 u64 nr, int nmi,
3867 struct perf_sample_data *data, 3949 struct perf_sample_data *data,
3868 struct pt_regs *regs) 3950 struct pt_regs *regs)
3869{ 3951{
3870 struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); 3952 struct perf_cpu_context *cpuctx;
3871 int *recursion = perf_swevent_recursion_context(cpuctx);
3872 struct perf_event_context *ctx; 3953 struct perf_event_context *ctx;
3873 3954
3874 if (*recursion) 3955 cpuctx = &__get_cpu_var(perf_cpu_context);
3875 goto out; 3956 rcu_read_lock();
3876
3877 (*recursion)++;
3878 barrier();
3879
3880 perf_swevent_ctx_event(&cpuctx->ctx, type, event_id, 3957 perf_swevent_ctx_event(&cpuctx->ctx, type, event_id,
3881 nr, nmi, data, regs); 3958 nr, nmi, data, regs);
3882 rcu_read_lock();
3883 /* 3959 /*
3884 * doesn't really matter which of the child contexts the 3960 * doesn't really matter which of the child contexts the
3885 * events ends up in. 3961 * events ends up in.
@@ -3888,23 +3964,24 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
3888 if (ctx) 3964 if (ctx)
3889 perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs); 3965 perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs);
3890 rcu_read_unlock(); 3966 rcu_read_unlock();
3891
3892 barrier();
3893 (*recursion)--;
3894
3895out:
3896 put_cpu_var(perf_cpu_context);
3897} 3967}
3898 3968
3899void __perf_sw_event(u32 event_id, u64 nr, int nmi, 3969void __perf_sw_event(u32 event_id, u64 nr, int nmi,
3900 struct pt_regs *regs, u64 addr) 3970 struct pt_regs *regs, u64 addr)
3901{ 3971{
3902 struct perf_sample_data data = { 3972 struct perf_sample_data data;
3903 .addr = addr, 3973 int rctx;
3904 };
3905 3974
3906 do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, 3975 rctx = perf_swevent_get_recursion_context();
3907 &data, regs); 3976 if (rctx < 0)
3977 return;
3978
3979 data.addr = addr;
3980 data.raw = NULL;
3981
3982 do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs);
3983
3984 perf_swevent_put_recursion_context(rctx);
3908} 3985}
3909 3986
3910static void perf_swevent_read(struct perf_event *event) 3987static void perf_swevent_read(struct perf_event *event)
@@ -3949,6 +4026,8 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
3949 event->pmu->read(event); 4026 event->pmu->read(event);
3950 4027
3951 data.addr = 0; 4028 data.addr = 0;
4029 data.raw = NULL;
4030 data.period = event->hw.last_period;
3952 regs = get_irq_regs(); 4031 regs = get_irq_regs();
3953 /* 4032 /*
3954 * In case we exclude kernel IPs or are somehow not in interrupt 4033 * In case we exclude kernel IPs or are somehow not in interrupt
@@ -4017,8 +4096,7 @@ static void cpu_clock_perf_event_update(struct perf_event *event)
4017 u64 now; 4096 u64 now;
4018 4097
4019 now = cpu_clock(cpu); 4098 now = cpu_clock(cpu);
4020 prev = atomic64_read(&event->hw.prev_count); 4099 prev = atomic64_xchg(&event->hw.prev_count, now);
4021 atomic64_set(&event->hw.prev_count, now);
4022 atomic64_add(now - prev, &event->count); 4100 atomic64_add(now - prev, &event->count);
4023} 4101}
4024 4102
@@ -4108,6 +4186,7 @@ static const struct pmu perf_ops_task_clock = {
4108}; 4186};
4109 4187
4110#ifdef CONFIG_EVENT_PROFILE 4188#ifdef CONFIG_EVENT_PROFILE
4189
4111void perf_tp_event(int event_id, u64 addr, u64 count, void *record, 4190void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
4112 int entry_size) 4191 int entry_size)
4113{ 4192{
@@ -4126,13 +4205,21 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
4126 if (!regs) 4205 if (!regs)
4127 regs = task_pt_regs(current); 4206 regs = task_pt_regs(current);
4128 4207
4208 /* Trace events already protected against recursion */
4129 do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, 4209 do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1,
4130 &data, regs); 4210 &data, regs);
4131} 4211}
4132EXPORT_SYMBOL_GPL(perf_tp_event); 4212EXPORT_SYMBOL_GPL(perf_tp_event);
4133 4213
4134extern int ftrace_profile_enable(int); 4214static int perf_tp_event_match(struct perf_event *event,
4135extern void ftrace_profile_disable(int); 4215 struct perf_sample_data *data)
4216{
4217 void *record = data->raw->data;
4218
4219 if (likely(!event->filter) || filter_match_preds(event->filter, record))
4220 return 1;
4221 return 0;
4222}
4136 4223
4137static void tp_perf_event_destroy(struct perf_event *event) 4224static void tp_perf_event_destroy(struct perf_event *event)
4138{ 4225{
@@ -4157,11 +4244,93 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event)
4157 4244
4158 return &perf_ops_generic; 4245 return &perf_ops_generic;
4159} 4246}
4247
4248static int perf_event_set_filter(struct perf_event *event, void __user *arg)
4249{
4250 char *filter_str;
4251 int ret;
4252
4253 if (event->attr.type != PERF_TYPE_TRACEPOINT)
4254 return -EINVAL;
4255
4256 filter_str = strndup_user(arg, PAGE_SIZE);
4257 if (IS_ERR(filter_str))
4258 return PTR_ERR(filter_str);
4259
4260 ret = ftrace_profile_set_filter(event, event->attr.config, filter_str);
4261
4262 kfree(filter_str);
4263 return ret;
4264}
4265
4266static void perf_event_free_filter(struct perf_event *event)
4267{
4268 ftrace_profile_free_filter(event);
4269}
4270
4160#else 4271#else
4272
4273static int perf_tp_event_match(struct perf_event *event,
4274 struct perf_sample_data *data)
4275{
4276 return 1;
4277}
4278
4161static const struct pmu *tp_perf_event_init(struct perf_event *event) 4279static const struct pmu *tp_perf_event_init(struct perf_event *event)
4162{ 4280{
4163 return NULL; 4281 return NULL;
4164} 4282}
4283
4284static int perf_event_set_filter(struct perf_event *event, void __user *arg)
4285{
4286 return -ENOENT;
4287}
4288
4289static void perf_event_free_filter(struct perf_event *event)
4290{
4291}
4292
4293#endif /* CONFIG_EVENT_PROFILE */
4294
4295#ifdef CONFIG_HAVE_HW_BREAKPOINT
4296static void bp_perf_event_destroy(struct perf_event *event)
4297{
4298 release_bp_slot(event);
4299}
4300
4301static const struct pmu *bp_perf_event_init(struct perf_event *bp)
4302{
4303 int err;
4304
4305 err = register_perf_hw_breakpoint(bp);
4306 if (err)
4307 return ERR_PTR(err);
4308
4309 bp->destroy = bp_perf_event_destroy;
4310
4311 return &perf_ops_bp;
4312}
4313
4314void perf_bp_event(struct perf_event *bp, void *data)
4315{
4316 struct perf_sample_data sample;
4317 struct pt_regs *regs = data;
4318
4319 sample.raw = NULL;
4320 sample.addr = bp->attr.bp_addr;
4321
4322 if (!perf_exclude_event(bp, regs))
4323 perf_swevent_add(bp, 1, 1, &sample, regs);
4324}
4325#else
4326static const struct pmu *bp_perf_event_init(struct perf_event *bp)
4327{
4328 return NULL;
4329}
4330
4331void perf_bp_event(struct perf_event *bp, void *regs)
4332{
4333}
4165#endif 4334#endif
4166 4335
4167atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; 4336atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
@@ -4208,6 +4377,8 @@ static const struct pmu *sw_perf_event_init(struct perf_event *event)
4208 case PERF_COUNT_SW_PAGE_FAULTS_MAJ: 4377 case PERF_COUNT_SW_PAGE_FAULTS_MAJ:
4209 case PERF_COUNT_SW_CONTEXT_SWITCHES: 4378 case PERF_COUNT_SW_CONTEXT_SWITCHES:
4210 case PERF_COUNT_SW_CPU_MIGRATIONS: 4379 case PERF_COUNT_SW_CPU_MIGRATIONS:
4380 case PERF_COUNT_SW_ALIGNMENT_FAULTS:
4381 case PERF_COUNT_SW_EMULATION_FAULTS:
4211 if (!event->parent) { 4382 if (!event->parent) {
4212 atomic_inc(&perf_swevent_enabled[event_id]); 4383 atomic_inc(&perf_swevent_enabled[event_id]);
4213 event->destroy = sw_perf_event_destroy; 4384 event->destroy = sw_perf_event_destroy;
@@ -4228,6 +4399,7 @@ perf_event_alloc(struct perf_event_attr *attr,
4228 struct perf_event_context *ctx, 4399 struct perf_event_context *ctx,
4229 struct perf_event *group_leader, 4400 struct perf_event *group_leader,
4230 struct perf_event *parent_event, 4401 struct perf_event *parent_event,
4402 perf_overflow_handler_t overflow_handler,
4231 gfp_t gfpflags) 4403 gfp_t gfpflags)
4232{ 4404{
4233 const struct pmu *pmu; 4405 const struct pmu *pmu;
@@ -4270,6 +4442,11 @@ perf_event_alloc(struct perf_event_attr *attr,
4270 4442
4271 event->state = PERF_EVENT_STATE_INACTIVE; 4443 event->state = PERF_EVENT_STATE_INACTIVE;
4272 4444
4445 if (!overflow_handler && parent_event)
4446 overflow_handler = parent_event->overflow_handler;
4447
4448 event->overflow_handler = overflow_handler;
4449
4273 if (attr->disabled) 4450 if (attr->disabled)
4274 event->state = PERF_EVENT_STATE_OFF; 4451 event->state = PERF_EVENT_STATE_OFF;
4275 4452
@@ -4304,6 +4481,11 @@ perf_event_alloc(struct perf_event_attr *attr,
4304 pmu = tp_perf_event_init(event); 4481 pmu = tp_perf_event_init(event);
4305 break; 4482 break;
4306 4483
4484 case PERF_TYPE_BREAKPOINT:
4485 pmu = bp_perf_event_init(event);
4486 break;
4487
4488
4307 default: 4489 default:
4308 break; 4490 break;
4309 } 4491 }
@@ -4398,7 +4580,7 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
4398 if (attr->type >= PERF_TYPE_MAX) 4580 if (attr->type >= PERF_TYPE_MAX)
4399 return -EINVAL; 4581 return -EINVAL;
4400 4582
4401 if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3) 4583 if (attr->__reserved_1 || attr->__reserved_2)
4402 return -EINVAL; 4584 return -EINVAL;
4403 4585
4404 if (attr->sample_type & ~(PERF_SAMPLE_MAX-1)) 4586 if (attr->sample_type & ~(PERF_SAMPLE_MAX-1))
@@ -4416,7 +4598,7 @@ err_size:
4416 goto out; 4598 goto out;
4417} 4599}
4418 4600
4419int perf_event_set_output(struct perf_event *event, int output_fd) 4601static int perf_event_set_output(struct perf_event *event, int output_fd)
4420{ 4602{
4421 struct perf_event *output_event = NULL; 4603 struct perf_event *output_event = NULL;
4422 struct file *output_file = NULL; 4604 struct file *output_file = NULL;
@@ -4546,12 +4728,12 @@ SYSCALL_DEFINE5(perf_event_open,
4546 } 4728 }
4547 4729
4548 event = perf_event_alloc(&attr, cpu, ctx, group_leader, 4730 event = perf_event_alloc(&attr, cpu, ctx, group_leader,
4549 NULL, GFP_KERNEL); 4731 NULL, NULL, GFP_KERNEL);
4550 err = PTR_ERR(event); 4732 err = PTR_ERR(event);
4551 if (IS_ERR(event)) 4733 if (IS_ERR(event))
4552 goto err_put_context; 4734 goto err_put_context;
4553 4735
4554 err = anon_inode_getfd("[perf_event]", &perf_fops, event, 0); 4736 err = anon_inode_getfd("[perf_event]", &perf_fops, event, O_RDWR);
4555 if (err < 0) 4737 if (err < 0)
4556 goto err_free_put_context; 4738 goto err_free_put_context;
4557 4739
@@ -4594,6 +4776,61 @@ err_put_context:
4594 return err; 4776 return err;
4595} 4777}
4596 4778
4779/**
4780 * perf_event_create_kernel_counter
4781 *
4782 * @attr: attributes of the counter to create
4783 * @cpu: cpu in which the counter is bound
4784 * @pid: task to profile
4785 */
4786struct perf_event *
4787perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
4788 pid_t pid,
4789 perf_overflow_handler_t overflow_handler)
4790{
4791 struct perf_event *event;
4792 struct perf_event_context *ctx;
4793 int err;
4794
4795 /*
4796 * Get the target context (task or percpu):
4797 */
4798
4799 ctx = find_get_context(pid, cpu);
4800 if (IS_ERR(ctx)) {
4801 err = PTR_ERR(ctx);
4802 goto err_exit;
4803 }
4804
4805 event = perf_event_alloc(attr, cpu, ctx, NULL,
4806 NULL, overflow_handler, GFP_KERNEL);
4807 if (IS_ERR(event)) {
4808 err = PTR_ERR(event);
4809 goto err_put_context;
4810 }
4811
4812 event->filp = NULL;
4813 WARN_ON_ONCE(ctx->parent_ctx);
4814 mutex_lock(&ctx->mutex);
4815 perf_install_in_context(ctx, event, cpu);
4816 ++ctx->generation;
4817 mutex_unlock(&ctx->mutex);
4818
4819 event->owner = current;
4820 get_task_struct(current);
4821 mutex_lock(&current->perf_event_mutex);
4822 list_add_tail(&event->owner_entry, &current->perf_event_list);
4823 mutex_unlock(&current->perf_event_mutex);
4824
4825 return event;
4826
4827 err_put_context:
4828 put_ctx(ctx);
4829 err_exit:
4830 return ERR_PTR(err);
4831}
4832EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);
4833
4597/* 4834/*
4598 * inherit a event from parent task to child task: 4835 * inherit a event from parent task to child task:
4599 */ 4836 */
@@ -4619,7 +4856,7 @@ inherit_event(struct perf_event *parent_event,
4619 child_event = perf_event_alloc(&parent_event->attr, 4856 child_event = perf_event_alloc(&parent_event->attr,
4620 parent_event->cpu, child_ctx, 4857 parent_event->cpu, child_ctx,
4621 group_leader, parent_event, 4858 group_leader, parent_event,
4622 GFP_KERNEL); 4859 NULL, GFP_KERNEL);
4623 if (IS_ERR(child_event)) 4860 if (IS_ERR(child_event))
4624 return child_event; 4861 return child_event;
4625 get_ctx(child_ctx); 4862 get_ctx(child_ctx);
@@ -4637,6 +4874,8 @@ inherit_event(struct perf_event *parent_event,
4637 if (parent_event->attr.freq) 4874 if (parent_event->attr.freq)
4638 child_event->hw.sample_period = parent_event->hw.sample_period; 4875 child_event->hw.sample_period = parent_event->hw.sample_period;
4639 4876
4877 child_event->overflow_handler = parent_event->overflow_handler;
4878
4640 /* 4879 /*
4641 * Link it up in the child's context: 4880 * Link it up in the child's context:
4642 */ 4881 */
@@ -4726,7 +4965,6 @@ __perf_event_exit_task(struct perf_event *child_event,
4726{ 4965{
4727 struct perf_event *parent_event; 4966 struct perf_event *parent_event;
4728 4967
4729 update_event_times(child_event);
4730 perf_event_remove_from_context(child_event); 4968 perf_event_remove_from_context(child_event);
4731 4969
4732 parent_event = child_event->parent; 4970 parent_event = child_event->parent;
@@ -4770,7 +5008,7 @@ void perf_event_exit_task(struct task_struct *child)
4770 * reading child->perf_event_ctxp, we wait until it has 5008 * reading child->perf_event_ctxp, we wait until it has
4771 * incremented the context's refcount before we do put_ctx below. 5009 * incremented the context's refcount before we do put_ctx below.
4772 */ 5010 */
4773 spin_lock(&child_ctx->lock); 5011 raw_spin_lock(&child_ctx->lock);
4774 child->perf_event_ctxp = NULL; 5012 child->perf_event_ctxp = NULL;
4775 /* 5013 /*
4776 * If this context is a clone; unclone it so it can't get 5014 * If this context is a clone; unclone it so it can't get
@@ -4778,7 +5016,8 @@ void perf_event_exit_task(struct task_struct *child)
4778 * the events from it. 5016 * the events from it.
4779 */ 5017 */
4780 unclone_ctx(child_ctx); 5018 unclone_ctx(child_ctx);
4781 spin_unlock_irqrestore(&child_ctx->lock, flags); 5019 update_context_time(child_ctx);
5020 raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
4782 5021
4783 /* 5022 /*
4784 * Report the task dead after unscheduling the events so that we 5023 * Report the task dead after unscheduling the events so that we
@@ -4861,7 +5100,7 @@ again:
4861 */ 5100 */
4862int perf_event_init_task(struct task_struct *child) 5101int perf_event_init_task(struct task_struct *child)
4863{ 5102{
4864 struct perf_event_context *child_ctx, *parent_ctx; 5103 struct perf_event_context *child_ctx = NULL, *parent_ctx;
4865 struct perf_event_context *cloned_ctx; 5104 struct perf_event_context *cloned_ctx;
4866 struct perf_event *event; 5105 struct perf_event *event;
4867 struct task_struct *parent = current; 5106 struct task_struct *parent = current;
@@ -4877,20 +5116,6 @@ int perf_event_init_task(struct task_struct *child)
4877 return 0; 5116 return 0;
4878 5117
4879 /* 5118 /*
4880 * This is executed from the parent task context, so inherit
4881 * events that have been marked for cloning.
4882 * First allocate and initialize a context for the child.
4883 */
4884
4885 child_ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL);
4886 if (!child_ctx)
4887 return -ENOMEM;
4888
4889 __perf_event_init_context(child_ctx, child);
4890 child->perf_event_ctxp = child_ctx;
4891 get_task_struct(child);
4892
4893 /*
4894 * If the parent's context is a clone, pin it so it won't get 5119 * If the parent's context is a clone, pin it so it won't get
4895 * swapped under us. 5120 * swapped under us.
4896 */ 5121 */
@@ -4920,6 +5145,26 @@ int perf_event_init_task(struct task_struct *child)
4920 continue; 5145 continue;
4921 } 5146 }
4922 5147
5148 if (!child->perf_event_ctxp) {
5149 /*
5150 * This is executed from the parent task context, so
5151 * inherit events that have been marked for cloning.
5152 * First allocate and initialize a context for the
5153 * child.
5154 */
5155
5156 child_ctx = kzalloc(sizeof(struct perf_event_context),
5157 GFP_KERNEL);
5158 if (!child_ctx) {
5159 ret = -ENOMEM;
5160 break;
5161 }
5162
5163 __perf_event_init_context(child_ctx, child);
5164 child->perf_event_ctxp = child_ctx;
5165 get_task_struct(child);
5166 }
5167
4923 ret = inherit_group(event, parent, parent_ctx, 5168 ret = inherit_group(event, parent, parent_ctx,
4924 child, child_ctx); 5169 child, child_ctx);
4925 if (ret) { 5170 if (ret) {
@@ -4928,7 +5173,7 @@ int perf_event_init_task(struct task_struct *child)
4928 } 5173 }
4929 } 5174 }
4930 5175
4931 if (inherited_all) { 5176 if (child_ctx && inherited_all) {
4932 /* 5177 /*
4933 * Mark the child context as a clone of the parent 5178 * Mark the child context as a clone of the parent
4934 * context, or of whatever the parent is a clone of. 5179 * context, or of whatever the parent is a clone of.
@@ -5062,11 +5307,11 @@ perf_set_reserve_percpu(struct sysdev_class *class,
5062 perf_reserved_percpu = val; 5307 perf_reserved_percpu = val;
5063 for_each_online_cpu(cpu) { 5308 for_each_online_cpu(cpu) {
5064 cpuctx = &per_cpu(perf_cpu_context, cpu); 5309 cpuctx = &per_cpu(perf_cpu_context, cpu);
5065 spin_lock_irq(&cpuctx->ctx.lock); 5310 raw_spin_lock_irq(&cpuctx->ctx.lock);
5066 mpt = min(perf_max_events - cpuctx->ctx.nr_events, 5311 mpt = min(perf_max_events - cpuctx->ctx.nr_events,
5067 perf_max_events - perf_reserved_percpu); 5312 perf_max_events - perf_reserved_percpu);
5068 cpuctx->max_pertask = mpt; 5313 cpuctx->max_pertask = mpt;
5069 spin_unlock_irq(&cpuctx->ctx.lock); 5314 raw_spin_unlock_irq(&cpuctx->ctx.lock);
5070 } 5315 }
5071 spin_unlock(&perf_resource_lock); 5316 spin_unlock(&perf_resource_lock);
5072 5317