aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/perf_event.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/perf_event.c')
-rw-r--r--kernel/perf_event.c774
1 files changed, 502 insertions, 272 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 7f29643c8985..9052d6c8c9fd 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -28,13 +28,15 @@
28#include <linux/anon_inodes.h> 28#include <linux/anon_inodes.h>
29#include <linux/kernel_stat.h> 29#include <linux/kernel_stat.h>
30#include <linux/perf_event.h> 30#include <linux/perf_event.h>
31#include <linux/ftrace_event.h>
32#include <linux/hw_breakpoint.h>
31 33
32#include <asm/irq_regs.h> 34#include <asm/irq_regs.h>
33 35
34/* 36/*
35 * Each CPU has a list of per CPU events: 37 * Each CPU has a list of per CPU events:
36 */ 38 */
37DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context); 39static DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context);
38 40
39int perf_max_events __read_mostly = 1; 41int perf_max_events __read_mostly = 1;
40static int perf_reserved_percpu __read_mostly; 42static int perf_reserved_percpu __read_mostly;
@@ -201,14 +203,14 @@ perf_lock_task_context(struct task_struct *task, unsigned long *flags)
201 * if so. If we locked the right context, then it 203 * if so. If we locked the right context, then it
202 * can't get swapped on us any more. 204 * can't get swapped on us any more.
203 */ 205 */
204 spin_lock_irqsave(&ctx->lock, *flags); 206 raw_spin_lock_irqsave(&ctx->lock, *flags);
205 if (ctx != rcu_dereference(task->perf_event_ctxp)) { 207 if (ctx != rcu_dereference(task->perf_event_ctxp)) {
206 spin_unlock_irqrestore(&ctx->lock, *flags); 208 raw_spin_unlock_irqrestore(&ctx->lock, *flags);
207 goto retry; 209 goto retry;
208 } 210 }
209 211
210 if (!atomic_inc_not_zero(&ctx->refcount)) { 212 if (!atomic_inc_not_zero(&ctx->refcount)) {
211 spin_unlock_irqrestore(&ctx->lock, *flags); 213 raw_spin_unlock_irqrestore(&ctx->lock, *flags);
212 ctx = NULL; 214 ctx = NULL;
213 } 215 }
214 } 216 }
@@ -229,7 +231,7 @@ static struct perf_event_context *perf_pin_task_context(struct task_struct *task
229 ctx = perf_lock_task_context(task, &flags); 231 ctx = perf_lock_task_context(task, &flags);
230 if (ctx) { 232 if (ctx) {
231 ++ctx->pin_count; 233 ++ctx->pin_count;
232 spin_unlock_irqrestore(&ctx->lock, flags); 234 raw_spin_unlock_irqrestore(&ctx->lock, flags);
233 } 235 }
234 return ctx; 236 return ctx;
235} 237}
@@ -238,12 +240,55 @@ static void perf_unpin_context(struct perf_event_context *ctx)
238{ 240{
239 unsigned long flags; 241 unsigned long flags;
240 242
241 spin_lock_irqsave(&ctx->lock, flags); 243 raw_spin_lock_irqsave(&ctx->lock, flags);
242 --ctx->pin_count; 244 --ctx->pin_count;
243 spin_unlock_irqrestore(&ctx->lock, flags); 245 raw_spin_unlock_irqrestore(&ctx->lock, flags);
244 put_ctx(ctx); 246 put_ctx(ctx);
245} 247}
246 248
249static inline u64 perf_clock(void)
250{
251 return cpu_clock(smp_processor_id());
252}
253
254/*
255 * Update the record of the current time in a context.
256 */
257static void update_context_time(struct perf_event_context *ctx)
258{
259 u64 now = perf_clock();
260
261 ctx->time += now - ctx->timestamp;
262 ctx->timestamp = now;
263}
264
265/*
266 * Update the total_time_enabled and total_time_running fields for a event.
267 */
268static void update_event_times(struct perf_event *event)
269{
270 struct perf_event_context *ctx = event->ctx;
271 u64 run_end;
272
273 if (event->state < PERF_EVENT_STATE_INACTIVE ||
274 event->group_leader->state < PERF_EVENT_STATE_INACTIVE)
275 return;
276
277 if (ctx->is_active)
278 run_end = ctx->time;
279 else
280 run_end = event->tstamp_stopped;
281
282 event->total_time_enabled = run_end - event->tstamp_enabled;
283
284 if (event->state == PERF_EVENT_STATE_INACTIVE)
285 run_end = event->tstamp_stopped;
286 else
287 run_end = ctx->time;
288
289 event->total_time_running = run_end - event->tstamp_running;
290}
291
247/* 292/*
248 * Add a event from the lists for its context. 293 * Add a event from the lists for its context.
249 * Must be called with ctx->mutex and ctx->lock held. 294 * Must be called with ctx->mutex and ctx->lock held.
@@ -292,6 +337,18 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
292 if (event->group_leader != event) 337 if (event->group_leader != event)
293 event->group_leader->nr_siblings--; 338 event->group_leader->nr_siblings--;
294 339
340 update_event_times(event);
341
342 /*
343 * If event was in error state, then keep it
344 * that way, otherwise bogus counts will be
345 * returned on read(). The only way to get out
346 * of error state is by explicit re-enabling
347 * of the event
348 */
349 if (event->state > PERF_EVENT_STATE_OFF)
350 event->state = PERF_EVENT_STATE_OFF;
351
295 /* 352 /*
296 * If this was a group event with sibling events then 353 * If this was a group event with sibling events then
297 * upgrade the siblings to singleton events by adding them 354 * upgrade the siblings to singleton events by adding them
@@ -370,7 +427,7 @@ static void __perf_event_remove_from_context(void *info)
370 if (ctx->task && cpuctx->task_ctx != ctx) 427 if (ctx->task && cpuctx->task_ctx != ctx)
371 return; 428 return;
372 429
373 spin_lock(&ctx->lock); 430 raw_spin_lock(&ctx->lock);
374 /* 431 /*
375 * Protect the list operation against NMI by disabling the 432 * Protect the list operation against NMI by disabling the
376 * events on a global level. 433 * events on a global level.
@@ -392,7 +449,7 @@ static void __perf_event_remove_from_context(void *info)
392 } 449 }
393 450
394 perf_enable(); 451 perf_enable();
395 spin_unlock(&ctx->lock); 452 raw_spin_unlock(&ctx->lock);
396} 453}
397 454
398 455
@@ -419,7 +476,7 @@ static void perf_event_remove_from_context(struct perf_event *event)
419 if (!task) { 476 if (!task) {
420 /* 477 /*
421 * Per cpu events are removed via an smp call and 478 * Per cpu events are removed via an smp call and
422 * the removal is always sucessful. 479 * the removal is always successful.
423 */ 480 */
424 smp_call_function_single(event->cpu, 481 smp_call_function_single(event->cpu,
425 __perf_event_remove_from_context, 482 __perf_event_remove_from_context,
@@ -431,12 +488,12 @@ retry:
431 task_oncpu_function_call(task, __perf_event_remove_from_context, 488 task_oncpu_function_call(task, __perf_event_remove_from_context,
432 event); 489 event);
433 490
434 spin_lock_irq(&ctx->lock); 491 raw_spin_lock_irq(&ctx->lock);
435 /* 492 /*
436 * If the context is active we need to retry the smp call. 493 * If the context is active we need to retry the smp call.
437 */ 494 */
438 if (ctx->nr_active && !list_empty(&event->group_entry)) { 495 if (ctx->nr_active && !list_empty(&event->group_entry)) {
439 spin_unlock_irq(&ctx->lock); 496 raw_spin_unlock_irq(&ctx->lock);
440 goto retry; 497 goto retry;
441 } 498 }
442 499
@@ -445,48 +502,9 @@ retry:
445 * can remove the event safely, if the call above did not 502 * can remove the event safely, if the call above did not
446 * succeed. 503 * succeed.
447 */ 504 */
448 if (!list_empty(&event->group_entry)) { 505 if (!list_empty(&event->group_entry))
449 list_del_event(event, ctx); 506 list_del_event(event, ctx);
450 } 507 raw_spin_unlock_irq(&ctx->lock);
451 spin_unlock_irq(&ctx->lock);
452}
453
454static inline u64 perf_clock(void)
455{
456 return cpu_clock(smp_processor_id());
457}
458
459/*
460 * Update the record of the current time in a context.
461 */
462static void update_context_time(struct perf_event_context *ctx)
463{
464 u64 now = perf_clock();
465
466 ctx->time += now - ctx->timestamp;
467 ctx->timestamp = now;
468}
469
470/*
471 * Update the total_time_enabled and total_time_running fields for a event.
472 */
473static void update_event_times(struct perf_event *event)
474{
475 struct perf_event_context *ctx = event->ctx;
476 u64 run_end;
477
478 if (event->state < PERF_EVENT_STATE_INACTIVE ||
479 event->group_leader->state < PERF_EVENT_STATE_INACTIVE)
480 return;
481
482 event->total_time_enabled = ctx->time - event->tstamp_enabled;
483
484 if (event->state == PERF_EVENT_STATE_INACTIVE)
485 run_end = event->tstamp_stopped;
486 else
487 run_end = ctx->time;
488
489 event->total_time_running = run_end - event->tstamp_running;
490} 508}
491 509
492/* 510/*
@@ -517,7 +535,7 @@ static void __perf_event_disable(void *info)
517 if (ctx->task && cpuctx->task_ctx != ctx) 535 if (ctx->task && cpuctx->task_ctx != ctx)
518 return; 536 return;
519 537
520 spin_lock(&ctx->lock); 538 raw_spin_lock(&ctx->lock);
521 539
522 /* 540 /*
523 * If the event is on, turn it off. 541 * If the event is on, turn it off.
@@ -533,7 +551,7 @@ static void __perf_event_disable(void *info)
533 event->state = PERF_EVENT_STATE_OFF; 551 event->state = PERF_EVENT_STATE_OFF;
534 } 552 }
535 553
536 spin_unlock(&ctx->lock); 554 raw_spin_unlock(&ctx->lock);
537} 555}
538 556
539/* 557/*
@@ -549,7 +567,7 @@ static void __perf_event_disable(void *info)
549 * is the current context on this CPU and preemption is disabled, 567 * is the current context on this CPU and preemption is disabled,
550 * hence we can't get into perf_event_task_sched_out for this context. 568 * hence we can't get into perf_event_task_sched_out for this context.
551 */ 569 */
552static void perf_event_disable(struct perf_event *event) 570void perf_event_disable(struct perf_event *event)
553{ 571{
554 struct perf_event_context *ctx = event->ctx; 572 struct perf_event_context *ctx = event->ctx;
555 struct task_struct *task = ctx->task; 573 struct task_struct *task = ctx->task;
@@ -566,12 +584,12 @@ static void perf_event_disable(struct perf_event *event)
566 retry: 584 retry:
567 task_oncpu_function_call(task, __perf_event_disable, event); 585 task_oncpu_function_call(task, __perf_event_disable, event);
568 586
569 spin_lock_irq(&ctx->lock); 587 raw_spin_lock_irq(&ctx->lock);
570 /* 588 /*
571 * If the event is still active, we need to retry the cross-call. 589 * If the event is still active, we need to retry the cross-call.
572 */ 590 */
573 if (event->state == PERF_EVENT_STATE_ACTIVE) { 591 if (event->state == PERF_EVENT_STATE_ACTIVE) {
574 spin_unlock_irq(&ctx->lock); 592 raw_spin_unlock_irq(&ctx->lock);
575 goto retry; 593 goto retry;
576 } 594 }
577 595
@@ -584,7 +602,7 @@ static void perf_event_disable(struct perf_event *event)
584 event->state = PERF_EVENT_STATE_OFF; 602 event->state = PERF_EVENT_STATE_OFF;
585 } 603 }
586 604
587 spin_unlock_irq(&ctx->lock); 605 raw_spin_unlock_irq(&ctx->lock);
588} 606}
589 607
590static int 608static int
@@ -752,7 +770,7 @@ static void __perf_install_in_context(void *info)
752 cpuctx->task_ctx = ctx; 770 cpuctx->task_ctx = ctx;
753 } 771 }
754 772
755 spin_lock(&ctx->lock); 773 raw_spin_lock(&ctx->lock);
756 ctx->is_active = 1; 774 ctx->is_active = 1;
757 update_context_time(ctx); 775 update_context_time(ctx);
758 776
@@ -802,7 +820,7 @@ static void __perf_install_in_context(void *info)
802 unlock: 820 unlock:
803 perf_enable(); 821 perf_enable();
804 822
805 spin_unlock(&ctx->lock); 823 raw_spin_unlock(&ctx->lock);
806} 824}
807 825
808/* 826/*
@@ -827,7 +845,7 @@ perf_install_in_context(struct perf_event_context *ctx,
827 if (!task) { 845 if (!task) {
828 /* 846 /*
829 * Per cpu events are installed via an smp call and 847 * Per cpu events are installed via an smp call and
830 * the install is always sucessful. 848 * the install is always successful.
831 */ 849 */
832 smp_call_function_single(cpu, __perf_install_in_context, 850 smp_call_function_single(cpu, __perf_install_in_context,
833 event, 1); 851 event, 1);
@@ -838,12 +856,12 @@ retry:
838 task_oncpu_function_call(task, __perf_install_in_context, 856 task_oncpu_function_call(task, __perf_install_in_context,
839 event); 857 event);
840 858
841 spin_lock_irq(&ctx->lock); 859 raw_spin_lock_irq(&ctx->lock);
842 /* 860 /*
843 * we need to retry the smp call. 861 * we need to retry the smp call.
844 */ 862 */
845 if (ctx->is_active && list_empty(&event->group_entry)) { 863 if (ctx->is_active && list_empty(&event->group_entry)) {
846 spin_unlock_irq(&ctx->lock); 864 raw_spin_unlock_irq(&ctx->lock);
847 goto retry; 865 goto retry;
848 } 866 }
849 867
@@ -854,7 +872,7 @@ retry:
854 */ 872 */
855 if (list_empty(&event->group_entry)) 873 if (list_empty(&event->group_entry))
856 add_event_to_ctx(event, ctx); 874 add_event_to_ctx(event, ctx);
857 spin_unlock_irq(&ctx->lock); 875 raw_spin_unlock_irq(&ctx->lock);
858} 876}
859 877
860/* 878/*
@@ -899,7 +917,7 @@ static void __perf_event_enable(void *info)
899 cpuctx->task_ctx = ctx; 917 cpuctx->task_ctx = ctx;
900 } 918 }
901 919
902 spin_lock(&ctx->lock); 920 raw_spin_lock(&ctx->lock);
903 ctx->is_active = 1; 921 ctx->is_active = 1;
904 update_context_time(ctx); 922 update_context_time(ctx);
905 923
@@ -941,7 +959,7 @@ static void __perf_event_enable(void *info)
941 } 959 }
942 960
943 unlock: 961 unlock:
944 spin_unlock(&ctx->lock); 962 raw_spin_unlock(&ctx->lock);
945} 963}
946 964
947/* 965/*
@@ -953,7 +971,7 @@ static void __perf_event_enable(void *info)
953 * perf_event_for_each_child or perf_event_for_each as described 971 * perf_event_for_each_child or perf_event_for_each as described
954 * for perf_event_disable. 972 * for perf_event_disable.
955 */ 973 */
956static void perf_event_enable(struct perf_event *event) 974void perf_event_enable(struct perf_event *event)
957{ 975{
958 struct perf_event_context *ctx = event->ctx; 976 struct perf_event_context *ctx = event->ctx;
959 struct task_struct *task = ctx->task; 977 struct task_struct *task = ctx->task;
@@ -967,7 +985,7 @@ static void perf_event_enable(struct perf_event *event)
967 return; 985 return;
968 } 986 }
969 987
970 spin_lock_irq(&ctx->lock); 988 raw_spin_lock_irq(&ctx->lock);
971 if (event->state >= PERF_EVENT_STATE_INACTIVE) 989 if (event->state >= PERF_EVENT_STATE_INACTIVE)
972 goto out; 990 goto out;
973 991
@@ -982,10 +1000,10 @@ static void perf_event_enable(struct perf_event *event)
982 event->state = PERF_EVENT_STATE_OFF; 1000 event->state = PERF_EVENT_STATE_OFF;
983 1001
984 retry: 1002 retry:
985 spin_unlock_irq(&ctx->lock); 1003 raw_spin_unlock_irq(&ctx->lock);
986 task_oncpu_function_call(task, __perf_event_enable, event); 1004 task_oncpu_function_call(task, __perf_event_enable, event);
987 1005
988 spin_lock_irq(&ctx->lock); 1006 raw_spin_lock_irq(&ctx->lock);
989 1007
990 /* 1008 /*
991 * If the context is active and the event is still off, 1009 * If the context is active and the event is still off,
@@ -1002,7 +1020,7 @@ static void perf_event_enable(struct perf_event *event)
1002 __perf_event_mark_enabled(event, ctx); 1020 __perf_event_mark_enabled(event, ctx);
1003 1021
1004 out: 1022 out:
1005 spin_unlock_irq(&ctx->lock); 1023 raw_spin_unlock_irq(&ctx->lock);
1006} 1024}
1007 1025
1008static int perf_event_refresh(struct perf_event *event, int refresh) 1026static int perf_event_refresh(struct perf_event *event, int refresh)
@@ -1024,20 +1042,20 @@ void __perf_event_sched_out(struct perf_event_context *ctx,
1024{ 1042{
1025 struct perf_event *event; 1043 struct perf_event *event;
1026 1044
1027 spin_lock(&ctx->lock); 1045 raw_spin_lock(&ctx->lock);
1028 ctx->is_active = 0; 1046 ctx->is_active = 0;
1029 if (likely(!ctx->nr_events)) 1047 if (likely(!ctx->nr_events))
1030 goto out; 1048 goto out;
1031 update_context_time(ctx); 1049 update_context_time(ctx);
1032 1050
1033 perf_disable(); 1051 perf_disable();
1034 if (ctx->nr_active) 1052 if (ctx->nr_active) {
1035 list_for_each_entry(event, &ctx->group_list, group_entry) 1053 list_for_each_entry(event, &ctx->group_list, group_entry)
1036 group_sched_out(event, cpuctx, ctx); 1054 group_sched_out(event, cpuctx, ctx);
1037 1055 }
1038 perf_enable(); 1056 perf_enable();
1039 out: 1057 out:
1040 spin_unlock(&ctx->lock); 1058 raw_spin_unlock(&ctx->lock);
1041} 1059}
1042 1060
1043/* 1061/*
@@ -1059,8 +1077,6 @@ static int context_equiv(struct perf_event_context *ctx1,
1059 && !ctx1->pin_count && !ctx2->pin_count; 1077 && !ctx1->pin_count && !ctx2->pin_count;
1060} 1078}
1061 1079
1062static void __perf_event_read(void *event);
1063
1064static void __perf_event_sync_stat(struct perf_event *event, 1080static void __perf_event_sync_stat(struct perf_event *event,
1065 struct perf_event *next_event) 1081 struct perf_event *next_event)
1066{ 1082{
@@ -1078,8 +1094,8 @@ static void __perf_event_sync_stat(struct perf_event *event,
1078 */ 1094 */
1079 switch (event->state) { 1095 switch (event->state) {
1080 case PERF_EVENT_STATE_ACTIVE: 1096 case PERF_EVENT_STATE_ACTIVE:
1081 __perf_event_read(event); 1097 event->pmu->read(event);
1082 break; 1098 /* fall-through */
1083 1099
1084 case PERF_EVENT_STATE_INACTIVE: 1100 case PERF_EVENT_STATE_INACTIVE:
1085 update_event_times(event); 1101 update_event_times(event);
@@ -1118,6 +1134,8 @@ static void perf_event_sync_stat(struct perf_event_context *ctx,
1118 if (!ctx->nr_stat) 1134 if (!ctx->nr_stat)
1119 return; 1135 return;
1120 1136
1137 update_context_time(ctx);
1138
1121 event = list_first_entry(&ctx->event_list, 1139 event = list_first_entry(&ctx->event_list,
1122 struct perf_event, event_entry); 1140 struct perf_event, event_entry);
1123 1141
@@ -1161,8 +1179,6 @@ void perf_event_task_sched_out(struct task_struct *task,
1161 if (likely(!ctx || !cpuctx->task_ctx)) 1179 if (likely(!ctx || !cpuctx->task_ctx))
1162 return; 1180 return;
1163 1181
1164 update_context_time(ctx);
1165
1166 rcu_read_lock(); 1182 rcu_read_lock();
1167 parent = rcu_dereference(ctx->parent_ctx); 1183 parent = rcu_dereference(ctx->parent_ctx);
1168 next_ctx = next->perf_event_ctxp; 1184 next_ctx = next->perf_event_ctxp;
@@ -1177,8 +1193,8 @@ void perf_event_task_sched_out(struct task_struct *task,
1177 * order we take the locks because no other cpu could 1193 * order we take the locks because no other cpu could
1178 * be trying to lock both of these tasks. 1194 * be trying to lock both of these tasks.
1179 */ 1195 */
1180 spin_lock(&ctx->lock); 1196 raw_spin_lock(&ctx->lock);
1181 spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING); 1197 raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
1182 if (context_equiv(ctx, next_ctx)) { 1198 if (context_equiv(ctx, next_ctx)) {
1183 /* 1199 /*
1184 * XXX do we need a memory barrier of sorts 1200 * XXX do we need a memory barrier of sorts
@@ -1192,8 +1208,8 @@ void perf_event_task_sched_out(struct task_struct *task,
1192 1208
1193 perf_event_sync_stat(ctx, next_ctx); 1209 perf_event_sync_stat(ctx, next_ctx);
1194 } 1210 }
1195 spin_unlock(&next_ctx->lock); 1211 raw_spin_unlock(&next_ctx->lock);
1196 spin_unlock(&ctx->lock); 1212 raw_spin_unlock(&ctx->lock);
1197 } 1213 }
1198 rcu_read_unlock(); 1214 rcu_read_unlock();
1199 1215
@@ -1235,7 +1251,7 @@ __perf_event_sched_in(struct perf_event_context *ctx,
1235 struct perf_event *event; 1251 struct perf_event *event;
1236 int can_add_hw = 1; 1252 int can_add_hw = 1;
1237 1253
1238 spin_lock(&ctx->lock); 1254 raw_spin_lock(&ctx->lock);
1239 ctx->is_active = 1; 1255 ctx->is_active = 1;
1240 if (likely(!ctx->nr_events)) 1256 if (likely(!ctx->nr_events))
1241 goto out; 1257 goto out;
@@ -1290,7 +1306,7 @@ __perf_event_sched_in(struct perf_event_context *ctx,
1290 } 1306 }
1291 perf_enable(); 1307 perf_enable();
1292 out: 1308 out:
1293 spin_unlock(&ctx->lock); 1309 raw_spin_unlock(&ctx->lock);
1294} 1310}
1295 1311
1296/* 1312/*
@@ -1354,7 +1370,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
1354 struct hw_perf_event *hwc; 1370 struct hw_perf_event *hwc;
1355 u64 interrupts, freq; 1371 u64 interrupts, freq;
1356 1372
1357 spin_lock(&ctx->lock); 1373 raw_spin_lock(&ctx->lock);
1358 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 1374 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
1359 if (event->state != PERF_EVENT_STATE_ACTIVE) 1375 if (event->state != PERF_EVENT_STATE_ACTIVE)
1360 continue; 1376 continue;
@@ -1409,7 +1425,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
1409 perf_enable(); 1425 perf_enable();
1410 } 1426 }
1411 } 1427 }
1412 spin_unlock(&ctx->lock); 1428 raw_spin_unlock(&ctx->lock);
1413} 1429}
1414 1430
1415/* 1431/*
@@ -1422,7 +1438,7 @@ static void rotate_ctx(struct perf_event_context *ctx)
1422 if (!ctx->nr_events) 1438 if (!ctx->nr_events)
1423 return; 1439 return;
1424 1440
1425 spin_lock(&ctx->lock); 1441 raw_spin_lock(&ctx->lock);
1426 /* 1442 /*
1427 * Rotate the first entry last (works just fine for group events too): 1443 * Rotate the first entry last (works just fine for group events too):
1428 */ 1444 */
@@ -1433,7 +1449,7 @@ static void rotate_ctx(struct perf_event_context *ctx)
1433 } 1449 }
1434 perf_enable(); 1450 perf_enable();
1435 1451
1436 spin_unlock(&ctx->lock); 1452 raw_spin_unlock(&ctx->lock);
1437} 1453}
1438 1454
1439void perf_event_task_tick(struct task_struct *curr, int cpu) 1455void perf_event_task_tick(struct task_struct *curr, int cpu)
@@ -1482,7 +1498,7 @@ static void perf_event_enable_on_exec(struct task_struct *task)
1482 1498
1483 __perf_event_task_sched_out(ctx); 1499 __perf_event_task_sched_out(ctx);
1484 1500
1485 spin_lock(&ctx->lock); 1501 raw_spin_lock(&ctx->lock);
1486 1502
1487 list_for_each_entry(event, &ctx->group_list, group_entry) { 1503 list_for_each_entry(event, &ctx->group_list, group_entry) {
1488 if (!event->attr.enable_on_exec) 1504 if (!event->attr.enable_on_exec)
@@ -1500,7 +1516,7 @@ static void perf_event_enable_on_exec(struct task_struct *task)
1500 if (enabled) 1516 if (enabled)
1501 unclone_ctx(ctx); 1517 unclone_ctx(ctx);
1502 1518
1503 spin_unlock(&ctx->lock); 1519 raw_spin_unlock(&ctx->lock);
1504 1520
1505 perf_event_task_sched_in(task, smp_processor_id()); 1521 perf_event_task_sched_in(task, smp_processor_id());
1506 out: 1522 out:
@@ -1515,7 +1531,6 @@ static void __perf_event_read(void *info)
1515 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); 1531 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
1516 struct perf_event *event = info; 1532 struct perf_event *event = info;
1517 struct perf_event_context *ctx = event->ctx; 1533 struct perf_event_context *ctx = event->ctx;
1518 unsigned long flags;
1519 1534
1520 /* 1535 /*
1521 * If this is a task context, we need to check whether it is 1536 * If this is a task context, we need to check whether it is
@@ -1527,12 +1542,12 @@ static void __perf_event_read(void *info)
1527 if (ctx->task && cpuctx->task_ctx != ctx) 1542 if (ctx->task && cpuctx->task_ctx != ctx)
1528 return; 1543 return;
1529 1544
1530 local_irq_save(flags); 1545 raw_spin_lock(&ctx->lock);
1531 if (ctx->is_active) 1546 update_context_time(ctx);
1532 update_context_time(ctx);
1533 event->pmu->read(event);
1534 update_event_times(event); 1547 update_event_times(event);
1535 local_irq_restore(flags); 1548 raw_spin_unlock(&ctx->lock);
1549
1550 event->pmu->read(event);
1536} 1551}
1537 1552
1538static u64 perf_event_read(struct perf_event *event) 1553static u64 perf_event_read(struct perf_event *event)
@@ -1545,7 +1560,13 @@ static u64 perf_event_read(struct perf_event *event)
1545 smp_call_function_single(event->oncpu, 1560 smp_call_function_single(event->oncpu,
1546 __perf_event_read, event, 1); 1561 __perf_event_read, event, 1);
1547 } else if (event->state == PERF_EVENT_STATE_INACTIVE) { 1562 } else if (event->state == PERF_EVENT_STATE_INACTIVE) {
1563 struct perf_event_context *ctx = event->ctx;
1564 unsigned long flags;
1565
1566 raw_spin_lock_irqsave(&ctx->lock, flags);
1567 update_context_time(ctx);
1548 update_event_times(event); 1568 update_event_times(event);
1569 raw_spin_unlock_irqrestore(&ctx->lock, flags);
1549 } 1570 }
1550 1571
1551 return atomic64_read(&event->count); 1572 return atomic64_read(&event->count);
@@ -1558,8 +1579,7 @@ static void
1558__perf_event_init_context(struct perf_event_context *ctx, 1579__perf_event_init_context(struct perf_event_context *ctx,
1559 struct task_struct *task) 1580 struct task_struct *task)
1560{ 1581{
1561 memset(ctx, 0, sizeof(*ctx)); 1582 raw_spin_lock_init(&ctx->lock);
1562 spin_lock_init(&ctx->lock);
1563 mutex_init(&ctx->mutex); 1583 mutex_init(&ctx->mutex);
1564 INIT_LIST_HEAD(&ctx->group_list); 1584 INIT_LIST_HEAD(&ctx->group_list);
1565 INIT_LIST_HEAD(&ctx->event_list); 1585 INIT_LIST_HEAD(&ctx->event_list);
@@ -1629,11 +1649,11 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
1629 ctx = perf_lock_task_context(task, &flags); 1649 ctx = perf_lock_task_context(task, &flags);
1630 if (ctx) { 1650 if (ctx) {
1631 unclone_ctx(ctx); 1651 unclone_ctx(ctx);
1632 spin_unlock_irqrestore(&ctx->lock, flags); 1652 raw_spin_unlock_irqrestore(&ctx->lock, flags);
1633 } 1653 }
1634 1654
1635 if (!ctx) { 1655 if (!ctx) {
1636 ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL); 1656 ctx = kzalloc(sizeof(struct perf_event_context), GFP_KERNEL);
1637 err = -ENOMEM; 1657 err = -ENOMEM;
1638 if (!ctx) 1658 if (!ctx)
1639 goto errout; 1659 goto errout;
@@ -1658,6 +1678,8 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
1658 return ERR_PTR(err); 1678 return ERR_PTR(err);
1659} 1679}
1660 1680
1681static void perf_event_free_filter(struct perf_event *event);
1682
1661static void free_event_rcu(struct rcu_head *head) 1683static void free_event_rcu(struct rcu_head *head)
1662{ 1684{
1663 struct perf_event *event; 1685 struct perf_event *event;
@@ -1665,6 +1687,7 @@ static void free_event_rcu(struct rcu_head *head)
1665 event = container_of(head, struct perf_event, rcu_head); 1687 event = container_of(head, struct perf_event, rcu_head);
1666 if (event->ns) 1688 if (event->ns)
1667 put_pid_ns(event->ns); 1689 put_pid_ns(event->ns);
1690 perf_event_free_filter(event);
1668 kfree(event); 1691 kfree(event);
1669} 1692}
1670 1693
@@ -1696,16 +1719,10 @@ static void free_event(struct perf_event *event)
1696 call_rcu(&event->rcu_head, free_event_rcu); 1719 call_rcu(&event->rcu_head, free_event_rcu);
1697} 1720}
1698 1721
1699/* 1722int perf_event_release_kernel(struct perf_event *event)
1700 * Called when the last reference to the file is gone.
1701 */
1702static int perf_release(struct inode *inode, struct file *file)
1703{ 1723{
1704 struct perf_event *event = file->private_data;
1705 struct perf_event_context *ctx = event->ctx; 1724 struct perf_event_context *ctx = event->ctx;
1706 1725
1707 file->private_data = NULL;
1708
1709 WARN_ON_ONCE(ctx->parent_ctx); 1726 WARN_ON_ONCE(ctx->parent_ctx);
1710 mutex_lock(&ctx->mutex); 1727 mutex_lock(&ctx->mutex);
1711 perf_event_remove_from_context(event); 1728 perf_event_remove_from_context(event);
@@ -1720,6 +1737,19 @@ static int perf_release(struct inode *inode, struct file *file)
1720 1737
1721 return 0; 1738 return 0;
1722} 1739}
1740EXPORT_SYMBOL_GPL(perf_event_release_kernel);
1741
1742/*
1743 * Called when the last reference to the file is gone.
1744 */
1745static int perf_release(struct inode *inode, struct file *file)
1746{
1747 struct perf_event *event = file->private_data;
1748
1749 file->private_data = NULL;
1750
1751 return perf_event_release_kernel(event);
1752}
1723 1753
1724static int perf_event_read_size(struct perf_event *event) 1754static int perf_event_read_size(struct perf_event *event)
1725{ 1755{
@@ -1746,91 +1776,94 @@ static int perf_event_read_size(struct perf_event *event)
1746 return size; 1776 return size;
1747} 1777}
1748 1778
1749static u64 perf_event_read_value(struct perf_event *event) 1779u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
1750{ 1780{
1751 struct perf_event *child; 1781 struct perf_event *child;
1752 u64 total = 0; 1782 u64 total = 0;
1753 1783
1784 *enabled = 0;
1785 *running = 0;
1786
1787 mutex_lock(&event->child_mutex);
1754 total += perf_event_read(event); 1788 total += perf_event_read(event);
1755 list_for_each_entry(child, &event->child_list, child_list) 1789 *enabled += event->total_time_enabled +
1790 atomic64_read(&event->child_total_time_enabled);
1791 *running += event->total_time_running +
1792 atomic64_read(&event->child_total_time_running);
1793
1794 list_for_each_entry(child, &event->child_list, child_list) {
1756 total += perf_event_read(child); 1795 total += perf_event_read(child);
1796 *enabled += child->total_time_enabled;
1797 *running += child->total_time_running;
1798 }
1799 mutex_unlock(&event->child_mutex);
1757 1800
1758 return total; 1801 return total;
1759} 1802}
1760 1803EXPORT_SYMBOL_GPL(perf_event_read_value);
1761static int perf_event_read_entry(struct perf_event *event,
1762 u64 read_format, char __user *buf)
1763{
1764 int n = 0, count = 0;
1765 u64 values[2];
1766
1767 values[n++] = perf_event_read_value(event);
1768 if (read_format & PERF_FORMAT_ID)
1769 values[n++] = primary_event_id(event);
1770
1771 count = n * sizeof(u64);
1772
1773 if (copy_to_user(buf, values, count))
1774 return -EFAULT;
1775
1776 return count;
1777}
1778 1804
1779static int perf_event_read_group(struct perf_event *event, 1805static int perf_event_read_group(struct perf_event *event,
1780 u64 read_format, char __user *buf) 1806 u64 read_format, char __user *buf)
1781{ 1807{
1782 struct perf_event *leader = event->group_leader, *sub; 1808 struct perf_event *leader = event->group_leader, *sub;
1783 int n = 0, size = 0, err = -EFAULT; 1809 int n = 0, size = 0, ret = -EFAULT;
1784 u64 values[3]; 1810 struct perf_event_context *ctx = leader->ctx;
1811 u64 values[5];
1812 u64 count, enabled, running;
1813
1814 mutex_lock(&ctx->mutex);
1815 count = perf_event_read_value(leader, &enabled, &running);
1785 1816
1786 values[n++] = 1 + leader->nr_siblings; 1817 values[n++] = 1 + leader->nr_siblings;
1787 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { 1818 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
1788 values[n++] = leader->total_time_enabled + 1819 values[n++] = enabled;
1789 atomic64_read(&leader->child_total_time_enabled); 1820 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
1790 } 1821 values[n++] = running;
1791 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { 1822 values[n++] = count;
1792 values[n++] = leader->total_time_running + 1823 if (read_format & PERF_FORMAT_ID)
1793 atomic64_read(&leader->child_total_time_running); 1824 values[n++] = primary_event_id(leader);
1794 }
1795 1825
1796 size = n * sizeof(u64); 1826 size = n * sizeof(u64);
1797 1827
1798 if (copy_to_user(buf, values, size)) 1828 if (copy_to_user(buf, values, size))
1799 return -EFAULT; 1829 goto unlock;
1800
1801 err = perf_event_read_entry(leader, read_format, buf + size);
1802 if (err < 0)
1803 return err;
1804 1830
1805 size += err; 1831 ret = size;
1806 1832
1807 list_for_each_entry(sub, &leader->sibling_list, group_entry) { 1833 list_for_each_entry(sub, &leader->sibling_list, group_entry) {
1808 err = perf_event_read_entry(sub, read_format, 1834 n = 0;
1809 buf + size); 1835
1810 if (err < 0) 1836 values[n++] = perf_event_read_value(sub, &enabled, &running);
1811 return err; 1837 if (read_format & PERF_FORMAT_ID)
1838 values[n++] = primary_event_id(sub);
1839
1840 size = n * sizeof(u64);
1841
1842 if (copy_to_user(buf + ret, values, size)) {
1843 ret = -EFAULT;
1844 goto unlock;
1845 }
1812 1846
1813 size += err; 1847 ret += size;
1814 } 1848 }
1849unlock:
1850 mutex_unlock(&ctx->mutex);
1815 1851
1816 return size; 1852 return ret;
1817} 1853}
1818 1854
1819static int perf_event_read_one(struct perf_event *event, 1855static int perf_event_read_one(struct perf_event *event,
1820 u64 read_format, char __user *buf) 1856 u64 read_format, char __user *buf)
1821{ 1857{
1858 u64 enabled, running;
1822 u64 values[4]; 1859 u64 values[4];
1823 int n = 0; 1860 int n = 0;
1824 1861
1825 values[n++] = perf_event_read_value(event); 1862 values[n++] = perf_event_read_value(event, &enabled, &running);
1826 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { 1863 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
1827 values[n++] = event->total_time_enabled + 1864 values[n++] = enabled;
1828 atomic64_read(&event->child_total_time_enabled); 1865 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
1829 } 1866 values[n++] = running;
1830 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
1831 values[n++] = event->total_time_running +
1832 atomic64_read(&event->child_total_time_running);
1833 }
1834 if (read_format & PERF_FORMAT_ID) 1867 if (read_format & PERF_FORMAT_ID)
1835 values[n++] = primary_event_id(event); 1868 values[n++] = primary_event_id(event);
1836 1869
@@ -1861,12 +1894,10 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count)
1861 return -ENOSPC; 1894 return -ENOSPC;
1862 1895
1863 WARN_ON_ONCE(event->ctx->parent_ctx); 1896 WARN_ON_ONCE(event->ctx->parent_ctx);
1864 mutex_lock(&event->child_mutex);
1865 if (read_format & PERF_FORMAT_GROUP) 1897 if (read_format & PERF_FORMAT_GROUP)
1866 ret = perf_event_read_group(event, read_format, buf); 1898 ret = perf_event_read_group(event, read_format, buf);
1867 else 1899 else
1868 ret = perf_event_read_one(event, read_format, buf); 1900 ret = perf_event_read_one(event, read_format, buf);
1869 mutex_unlock(&event->child_mutex);
1870 1901
1871 return ret; 1902 return ret;
1872} 1903}
@@ -1956,7 +1987,7 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
1956 if (!value) 1987 if (!value)
1957 return -EINVAL; 1988 return -EINVAL;
1958 1989
1959 spin_lock_irq(&ctx->lock); 1990 raw_spin_lock_irq(&ctx->lock);
1960 if (event->attr.freq) { 1991 if (event->attr.freq) {
1961 if (value > sysctl_perf_event_sample_rate) { 1992 if (value > sysctl_perf_event_sample_rate) {
1962 ret = -EINVAL; 1993 ret = -EINVAL;
@@ -1969,12 +2000,13 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
1969 event->hw.sample_period = value; 2000 event->hw.sample_period = value;
1970 } 2001 }
1971unlock: 2002unlock:
1972 spin_unlock_irq(&ctx->lock); 2003 raw_spin_unlock_irq(&ctx->lock);
1973 2004
1974 return ret; 2005 return ret;
1975} 2006}
1976 2007
1977int perf_event_set_output(struct perf_event *event, int output_fd); 2008static int perf_event_set_output(struct perf_event *event, int output_fd);
2009static int perf_event_set_filter(struct perf_event *event, void __user *arg);
1978 2010
1979static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 2011static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1980{ 2012{
@@ -2002,6 +2034,9 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2002 case PERF_EVENT_IOC_SET_OUTPUT: 2034 case PERF_EVENT_IOC_SET_OUTPUT:
2003 return perf_event_set_output(event, arg); 2035 return perf_event_set_output(event, arg);
2004 2036
2037 case PERF_EVENT_IOC_SET_FILTER:
2038 return perf_event_set_filter(event, (void __user *)arg);
2039
2005 default: 2040 default:
2006 return -ENOTTY; 2041 return -ENOTTY;
2007 } 2042 }
@@ -2174,6 +2209,7 @@ static void perf_mmap_data_free(struct perf_mmap_data *data)
2174 perf_mmap_free_page((unsigned long)data->user_page); 2209 perf_mmap_free_page((unsigned long)data->user_page);
2175 for (i = 0; i < data->nr_pages; i++) 2210 for (i = 0; i < data->nr_pages; i++)
2176 perf_mmap_free_page((unsigned long)data->data_pages[i]); 2211 perf_mmap_free_page((unsigned long)data->data_pages[i]);
2212 kfree(data);
2177} 2213}
2178 2214
2179#else 2215#else
@@ -2214,6 +2250,7 @@ static void perf_mmap_data_free_work(struct work_struct *work)
2214 perf_mmap_unmark_page(base + (i * PAGE_SIZE)); 2250 perf_mmap_unmark_page(base + (i * PAGE_SIZE));
2215 2251
2216 vfree(base); 2252 vfree(base);
2253 kfree(data);
2217} 2254}
2218 2255
2219static void perf_mmap_data_free(struct perf_mmap_data *data) 2256static void perf_mmap_data_free(struct perf_mmap_data *data)
@@ -2307,7 +2344,7 @@ perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data)
2307 } 2344 }
2308 2345
2309 if (!data->watermark) 2346 if (!data->watermark)
2310 data->watermark = max_t(long, PAGE_SIZE, max_size / 2); 2347 data->watermark = max_size / 2;
2311 2348
2312 2349
2313 rcu_assign_pointer(event->data, data); 2350 rcu_assign_pointer(event->data, data);
@@ -2319,7 +2356,6 @@ static void perf_mmap_data_free_rcu(struct rcu_head *rcu_head)
2319 2356
2320 data = container_of(rcu_head, struct perf_mmap_data, rcu_head); 2357 data = container_of(rcu_head, struct perf_mmap_data, rcu_head);
2321 perf_mmap_data_free(data); 2358 perf_mmap_data_free(data);
2322 kfree(data);
2323} 2359}
2324 2360
2325static void perf_mmap_data_release(struct perf_event *event) 2361static void perf_mmap_data_release(struct perf_event *event)
@@ -2666,20 +2702,21 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
2666static void perf_output_lock(struct perf_output_handle *handle) 2702static void perf_output_lock(struct perf_output_handle *handle)
2667{ 2703{
2668 struct perf_mmap_data *data = handle->data; 2704 struct perf_mmap_data *data = handle->data;
2669 int cpu; 2705 int cur, cpu = get_cpu();
2670 2706
2671 handle->locked = 0; 2707 handle->locked = 0;
2672 2708
2673 local_irq_save(handle->flags); 2709 for (;;) {
2674 cpu = smp_processor_id(); 2710 cur = atomic_cmpxchg(&data->lock, -1, cpu);
2675 2711 if (cur == -1) {
2676 if (in_nmi() && atomic_read(&data->lock) == cpu) 2712 handle->locked = 1;
2677 return; 2713 break;
2714 }
2715 if (cur == cpu)
2716 break;
2678 2717
2679 while (atomic_cmpxchg(&data->lock, -1, cpu) != -1)
2680 cpu_relax(); 2718 cpu_relax();
2681 2719 }
2682 handle->locked = 1;
2683} 2720}
2684 2721
2685static void perf_output_unlock(struct perf_output_handle *handle) 2722static void perf_output_unlock(struct perf_output_handle *handle)
@@ -2725,7 +2762,7 @@ again:
2725 if (atomic_xchg(&data->wakeup, 0)) 2762 if (atomic_xchg(&data->wakeup, 0))
2726 perf_output_wakeup(handle); 2763 perf_output_wakeup(handle);
2727out: 2764out:
2728 local_irq_restore(handle->flags); 2765 put_cpu();
2729} 2766}
2730 2767
2731void perf_output_copy(struct perf_output_handle *handle, 2768void perf_output_copy(struct perf_output_handle *handle,
@@ -3236,15 +3273,10 @@ static void perf_event_task_ctx(struct perf_event_context *ctx,
3236{ 3273{
3237 struct perf_event *event; 3274 struct perf_event *event;
3238 3275
3239 if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
3240 return;
3241
3242 rcu_read_lock();
3243 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 3276 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
3244 if (perf_event_task_match(event)) 3277 if (perf_event_task_match(event))
3245 perf_event_task_output(event, task_event); 3278 perf_event_task_output(event, task_event);
3246 } 3279 }
3247 rcu_read_unlock();
3248} 3280}
3249 3281
3250static void perf_event_task_event(struct perf_task_event *task_event) 3282static void perf_event_task_event(struct perf_task_event *task_event)
@@ -3252,11 +3284,11 @@ static void perf_event_task_event(struct perf_task_event *task_event)
3252 struct perf_cpu_context *cpuctx; 3284 struct perf_cpu_context *cpuctx;
3253 struct perf_event_context *ctx = task_event->task_ctx; 3285 struct perf_event_context *ctx = task_event->task_ctx;
3254 3286
3287 rcu_read_lock();
3255 cpuctx = &get_cpu_var(perf_cpu_context); 3288 cpuctx = &get_cpu_var(perf_cpu_context);
3256 perf_event_task_ctx(&cpuctx->ctx, task_event); 3289 perf_event_task_ctx(&cpuctx->ctx, task_event);
3257 put_cpu_var(perf_cpu_context); 3290 put_cpu_var(perf_cpu_context);
3258 3291
3259 rcu_read_lock();
3260 if (!ctx) 3292 if (!ctx)
3261 ctx = rcu_dereference(task_event->task->perf_event_ctxp); 3293 ctx = rcu_dereference(task_event->task->perf_event_ctxp);
3262 if (ctx) 3294 if (ctx)
@@ -3348,15 +3380,10 @@ static void perf_event_comm_ctx(struct perf_event_context *ctx,
3348{ 3380{
3349 struct perf_event *event; 3381 struct perf_event *event;
3350 3382
3351 if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
3352 return;
3353
3354 rcu_read_lock();
3355 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 3383 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
3356 if (perf_event_comm_match(event)) 3384 if (perf_event_comm_match(event))
3357 perf_event_comm_output(event, comm_event); 3385 perf_event_comm_output(event, comm_event);
3358 } 3386 }
3359 rcu_read_unlock();
3360} 3387}
3361 3388
3362static void perf_event_comm_event(struct perf_comm_event *comm_event) 3389static void perf_event_comm_event(struct perf_comm_event *comm_event)
@@ -3367,7 +3394,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
3367 char comm[TASK_COMM_LEN]; 3394 char comm[TASK_COMM_LEN];
3368 3395
3369 memset(comm, 0, sizeof(comm)); 3396 memset(comm, 0, sizeof(comm));
3370 strncpy(comm, comm_event->task->comm, sizeof(comm)); 3397 strlcpy(comm, comm_event->task->comm, sizeof(comm));
3371 size = ALIGN(strlen(comm)+1, sizeof(u64)); 3398 size = ALIGN(strlen(comm)+1, sizeof(u64));
3372 3399
3373 comm_event->comm = comm; 3400 comm_event->comm = comm;
@@ -3375,11 +3402,11 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
3375 3402
3376 comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; 3403 comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;
3377 3404
3405 rcu_read_lock();
3378 cpuctx = &get_cpu_var(perf_cpu_context); 3406 cpuctx = &get_cpu_var(perf_cpu_context);
3379 perf_event_comm_ctx(&cpuctx->ctx, comm_event); 3407 perf_event_comm_ctx(&cpuctx->ctx, comm_event);
3380 put_cpu_var(perf_cpu_context); 3408 put_cpu_var(perf_cpu_context);
3381 3409
3382 rcu_read_lock();
3383 /* 3410 /*
3384 * doesn't really matter which of the child contexts the 3411 * doesn't really matter which of the child contexts the
3385 * events ends up in. 3412 * events ends up in.
@@ -3472,15 +3499,10 @@ static void perf_event_mmap_ctx(struct perf_event_context *ctx,
3472{ 3499{
3473 struct perf_event *event; 3500 struct perf_event *event;
3474 3501
3475 if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
3476 return;
3477
3478 rcu_read_lock();
3479 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 3502 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
3480 if (perf_event_mmap_match(event, mmap_event)) 3503 if (perf_event_mmap_match(event, mmap_event))
3481 perf_event_mmap_output(event, mmap_event); 3504 perf_event_mmap_output(event, mmap_event);
3482 } 3505 }
3483 rcu_read_unlock();
3484} 3506}
3485 3507
3486static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) 3508static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
@@ -3536,11 +3558,11 @@ got_name:
3536 3558
3537 mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; 3559 mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
3538 3560
3561 rcu_read_lock();
3539 cpuctx = &get_cpu_var(perf_cpu_context); 3562 cpuctx = &get_cpu_var(perf_cpu_context);
3540 perf_event_mmap_ctx(&cpuctx->ctx, mmap_event); 3563 perf_event_mmap_ctx(&cpuctx->ctx, mmap_event);
3541 put_cpu_var(perf_cpu_context); 3564 put_cpu_var(perf_cpu_context);
3542 3565
3543 rcu_read_lock();
3544 /* 3566 /*
3545 * doesn't really matter which of the child contexts the 3567 * doesn't really matter which of the child contexts the
3546 * events ends up in. 3568 * events ends up in.
@@ -3679,7 +3701,11 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
3679 perf_event_disable(event); 3701 perf_event_disable(event);
3680 } 3702 }
3681 3703
3682 perf_event_output(event, nmi, data, regs); 3704 if (event->overflow_handler)
3705 event->overflow_handler(event, nmi, data, regs);
3706 else
3707 perf_event_output(event, nmi, data, regs);
3708
3683 return ret; 3709 return ret;
3684} 3710}
3685 3711
@@ -3724,16 +3750,16 @@ again:
3724 return nr; 3750 return nr;
3725} 3751}
3726 3752
3727static void perf_swevent_overflow(struct perf_event *event, 3753static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
3728 int nmi, struct perf_sample_data *data, 3754 int nmi, struct perf_sample_data *data,
3729 struct pt_regs *regs) 3755 struct pt_regs *regs)
3730{ 3756{
3731 struct hw_perf_event *hwc = &event->hw; 3757 struct hw_perf_event *hwc = &event->hw;
3732 int throttle = 0; 3758 int throttle = 0;
3733 u64 overflow;
3734 3759
3735 data->period = event->hw.last_period; 3760 data->period = event->hw.last_period;
3736 overflow = perf_swevent_set_period(event); 3761 if (!overflow)
3762 overflow = perf_swevent_set_period(event);
3737 3763
3738 if (hwc->interrupts == MAX_INTERRUPTS) 3764 if (hwc->interrupts == MAX_INTERRUPTS)
3739 return; 3765 return;
@@ -3766,14 +3792,19 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,
3766 3792
3767 atomic64_add(nr, &event->count); 3793 atomic64_add(nr, &event->count);
3768 3794
3795 if (!regs)
3796 return;
3797
3769 if (!hwc->sample_period) 3798 if (!hwc->sample_period)
3770 return; 3799 return;
3771 3800
3772 if (!regs) 3801 if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq)
3802 return perf_swevent_overflow(event, 1, nmi, data, regs);
3803
3804 if (atomic64_add_negative(nr, &hwc->period_left))
3773 return; 3805 return;
3774 3806
3775 if (!atomic64_add_negative(nr, &hwc->period_left)) 3807 perf_swevent_overflow(event, 0, nmi, data, regs);
3776 perf_swevent_overflow(event, nmi, data, regs);
3777} 3808}
3778 3809
3779static int perf_swevent_is_counting(struct perf_event *event) 3810static int perf_swevent_is_counting(struct perf_event *event)
@@ -3806,25 +3837,44 @@ static int perf_swevent_is_counting(struct perf_event *event)
3806 return 1; 3837 return 1;
3807} 3838}
3808 3839
3840static int perf_tp_event_match(struct perf_event *event,
3841 struct perf_sample_data *data);
3842
3843static int perf_exclude_event(struct perf_event *event,
3844 struct pt_regs *regs)
3845{
3846 if (regs) {
3847 if (event->attr.exclude_user && user_mode(regs))
3848 return 1;
3849
3850 if (event->attr.exclude_kernel && !user_mode(regs))
3851 return 1;
3852 }
3853
3854 return 0;
3855}
3856
3809static int perf_swevent_match(struct perf_event *event, 3857static int perf_swevent_match(struct perf_event *event,
3810 enum perf_type_id type, 3858 enum perf_type_id type,
3811 u32 event_id, struct pt_regs *regs) 3859 u32 event_id,
3860 struct perf_sample_data *data,
3861 struct pt_regs *regs)
3812{ 3862{
3813 if (!perf_swevent_is_counting(event)) 3863 if (!perf_swevent_is_counting(event))
3814 return 0; 3864 return 0;
3815 3865
3816 if (event->attr.type != type) 3866 if (event->attr.type != type)
3817 return 0; 3867 return 0;
3868
3818 if (event->attr.config != event_id) 3869 if (event->attr.config != event_id)
3819 return 0; 3870 return 0;
3820 3871
3821 if (regs) { 3872 if (perf_exclude_event(event, regs))
3822 if (event->attr.exclude_user && user_mode(regs)) 3873 return 0;
3823 return 0;
3824 3874
3825 if (event->attr.exclude_kernel && !user_mode(regs)) 3875 if (event->attr.type == PERF_TYPE_TRACEPOINT &&
3826 return 0; 3876 !perf_tp_event_match(event, data))
3827 } 3877 return 0;
3828 3878
3829 return 1; 3879 return 1;
3830} 3880}
@@ -3837,49 +3887,59 @@ static void perf_swevent_ctx_event(struct perf_event_context *ctx,
3837{ 3887{
3838 struct perf_event *event; 3888 struct perf_event *event;
3839 3889
3840 if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
3841 return;
3842
3843 rcu_read_lock();
3844 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 3890 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
3845 if (perf_swevent_match(event, type, event_id, regs)) 3891 if (perf_swevent_match(event, type, event_id, data, regs))
3846 perf_swevent_add(event, nr, nmi, data, regs); 3892 perf_swevent_add(event, nr, nmi, data, regs);
3847 } 3893 }
3848 rcu_read_unlock();
3849} 3894}
3850 3895
3851static int *perf_swevent_recursion_context(struct perf_cpu_context *cpuctx) 3896int perf_swevent_get_recursion_context(void)
3852{ 3897{
3898 struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
3899 int rctx;
3900
3853 if (in_nmi()) 3901 if (in_nmi())
3854 return &cpuctx->recursion[3]; 3902 rctx = 3;
3903 else if (in_irq())
3904 rctx = 2;
3905 else if (in_softirq())
3906 rctx = 1;
3907 else
3908 rctx = 0;
3855 3909
3856 if (in_irq()) 3910 if (cpuctx->recursion[rctx]) {
3857 return &cpuctx->recursion[2]; 3911 put_cpu_var(perf_cpu_context);
3912 return -1;
3913 }
3858 3914
3859 if (in_softirq()) 3915 cpuctx->recursion[rctx]++;
3860 return &cpuctx->recursion[1]; 3916 barrier();
3861 3917
3862 return &cpuctx->recursion[0]; 3918 return rctx;
3863} 3919}
3920EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
3921
3922void perf_swevent_put_recursion_context(int rctx)
3923{
3924 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
3925 barrier();
3926 cpuctx->recursion[rctx]--;
3927 put_cpu_var(perf_cpu_context);
3928}
3929EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context);
3864 3930
3865static void do_perf_sw_event(enum perf_type_id type, u32 event_id, 3931static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
3866 u64 nr, int nmi, 3932 u64 nr, int nmi,
3867 struct perf_sample_data *data, 3933 struct perf_sample_data *data,
3868 struct pt_regs *regs) 3934 struct pt_regs *regs)
3869{ 3935{
3870 struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); 3936 struct perf_cpu_context *cpuctx;
3871 int *recursion = perf_swevent_recursion_context(cpuctx);
3872 struct perf_event_context *ctx; 3937 struct perf_event_context *ctx;
3873 3938
3874 if (*recursion) 3939 cpuctx = &__get_cpu_var(perf_cpu_context);
3875 goto out; 3940 rcu_read_lock();
3876
3877 (*recursion)++;
3878 barrier();
3879
3880 perf_swevent_ctx_event(&cpuctx->ctx, type, event_id, 3941 perf_swevent_ctx_event(&cpuctx->ctx, type, event_id,
3881 nr, nmi, data, regs); 3942 nr, nmi, data, regs);
3882 rcu_read_lock();
3883 /* 3943 /*
3884 * doesn't really matter which of the child contexts the 3944 * doesn't really matter which of the child contexts the
3885 * events ends up in. 3945 * events ends up in.
@@ -3888,23 +3948,24 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
3888 if (ctx) 3948 if (ctx)
3889 perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs); 3949 perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs);
3890 rcu_read_unlock(); 3950 rcu_read_unlock();
3891
3892 barrier();
3893 (*recursion)--;
3894
3895out:
3896 put_cpu_var(perf_cpu_context);
3897} 3951}
3898 3952
3899void __perf_sw_event(u32 event_id, u64 nr, int nmi, 3953void __perf_sw_event(u32 event_id, u64 nr, int nmi,
3900 struct pt_regs *regs, u64 addr) 3954 struct pt_regs *regs, u64 addr)
3901{ 3955{
3902 struct perf_sample_data data = { 3956 struct perf_sample_data data;
3903 .addr = addr, 3957 int rctx;
3904 };
3905 3958
3906 do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, 3959 rctx = perf_swevent_get_recursion_context();
3907 &data, regs); 3960 if (rctx < 0)
3961 return;
3962
3963 data.addr = addr;
3964 data.raw = NULL;
3965
3966 do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs);
3967
3968 perf_swevent_put_recursion_context(rctx);
3908} 3969}
3909 3970
3910static void perf_swevent_read(struct perf_event *event) 3971static void perf_swevent_read(struct perf_event *event)
@@ -3949,6 +4010,8 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
3949 event->pmu->read(event); 4010 event->pmu->read(event);
3950 4011
3951 data.addr = 0; 4012 data.addr = 0;
4013 data.raw = NULL;
4014 data.period = event->hw.last_period;
3952 regs = get_irq_regs(); 4015 regs = get_irq_regs();
3953 /* 4016 /*
3954 * In case we exclude kernel IPs or are somehow not in interrupt 4017 * In case we exclude kernel IPs or are somehow not in interrupt
@@ -4017,8 +4080,7 @@ static void cpu_clock_perf_event_update(struct perf_event *event)
4017 u64 now; 4080 u64 now;
4018 4081
4019 now = cpu_clock(cpu); 4082 now = cpu_clock(cpu);
4020 prev = atomic64_read(&event->hw.prev_count); 4083 prev = atomic64_xchg(&event->hw.prev_count, now);
4021 atomic64_set(&event->hw.prev_count, now);
4022 atomic64_add(now - prev, &event->count); 4084 atomic64_add(now - prev, &event->count);
4023} 4085}
4024 4086
@@ -4108,6 +4170,7 @@ static const struct pmu perf_ops_task_clock = {
4108}; 4170};
4109 4171
4110#ifdef CONFIG_EVENT_PROFILE 4172#ifdef CONFIG_EVENT_PROFILE
4173
4111void perf_tp_event(int event_id, u64 addr, u64 count, void *record, 4174void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
4112 int entry_size) 4175 int entry_size)
4113{ 4176{
@@ -4126,13 +4189,21 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
4126 if (!regs) 4189 if (!regs)
4127 regs = task_pt_regs(current); 4190 regs = task_pt_regs(current);
4128 4191
4192 /* Trace events already protected against recursion */
4129 do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, 4193 do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1,
4130 &data, regs); 4194 &data, regs);
4131} 4195}
4132EXPORT_SYMBOL_GPL(perf_tp_event); 4196EXPORT_SYMBOL_GPL(perf_tp_event);
4133 4197
4134extern int ftrace_profile_enable(int); 4198static int perf_tp_event_match(struct perf_event *event,
4135extern void ftrace_profile_disable(int); 4199 struct perf_sample_data *data)
4200{
4201 void *record = data->raw->data;
4202
4203 if (likely(!event->filter) || filter_match_preds(event->filter, record))
4204 return 1;
4205 return 0;
4206}
4136 4207
4137static void tp_perf_event_destroy(struct perf_event *event) 4208static void tp_perf_event_destroy(struct perf_event *event)
4138{ 4209{
@@ -4157,11 +4228,93 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event)
4157 4228
4158 return &perf_ops_generic; 4229 return &perf_ops_generic;
4159} 4230}
4231
4232static int perf_event_set_filter(struct perf_event *event, void __user *arg)
4233{
4234 char *filter_str;
4235 int ret;
4236
4237 if (event->attr.type != PERF_TYPE_TRACEPOINT)
4238 return -EINVAL;
4239
4240 filter_str = strndup_user(arg, PAGE_SIZE);
4241 if (IS_ERR(filter_str))
4242 return PTR_ERR(filter_str);
4243
4244 ret = ftrace_profile_set_filter(event, event->attr.config, filter_str);
4245
4246 kfree(filter_str);
4247 return ret;
4248}
4249
4250static void perf_event_free_filter(struct perf_event *event)
4251{
4252 ftrace_profile_free_filter(event);
4253}
4254
4160#else 4255#else
4256
4257static int perf_tp_event_match(struct perf_event *event,
4258 struct perf_sample_data *data)
4259{
4260 return 1;
4261}
4262
4161static const struct pmu *tp_perf_event_init(struct perf_event *event) 4263static const struct pmu *tp_perf_event_init(struct perf_event *event)
4162{ 4264{
4163 return NULL; 4265 return NULL;
4164} 4266}
4267
4268static int perf_event_set_filter(struct perf_event *event, void __user *arg)
4269{
4270 return -ENOENT;
4271}
4272
4273static void perf_event_free_filter(struct perf_event *event)
4274{
4275}
4276
4277#endif /* CONFIG_EVENT_PROFILE */
4278
4279#ifdef CONFIG_HAVE_HW_BREAKPOINT
4280static void bp_perf_event_destroy(struct perf_event *event)
4281{
4282 release_bp_slot(event);
4283}
4284
4285static const struct pmu *bp_perf_event_init(struct perf_event *bp)
4286{
4287 int err;
4288
4289 err = register_perf_hw_breakpoint(bp);
4290 if (err)
4291 return ERR_PTR(err);
4292
4293 bp->destroy = bp_perf_event_destroy;
4294
4295 return &perf_ops_bp;
4296}
4297
4298void perf_bp_event(struct perf_event *bp, void *data)
4299{
4300 struct perf_sample_data sample;
4301 struct pt_regs *regs = data;
4302
4303 sample.raw = NULL;
4304 sample.addr = bp->attr.bp_addr;
4305
4306 if (!perf_exclude_event(bp, regs))
4307 perf_swevent_add(bp, 1, 1, &sample, regs);
4308}
4309#else
4310static const struct pmu *bp_perf_event_init(struct perf_event *bp)
4311{
4312 return NULL;
4313}
4314
4315void perf_bp_event(struct perf_event *bp, void *regs)
4316{
4317}
4165#endif 4318#endif
4166 4319
4167atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; 4320atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
@@ -4208,6 +4361,8 @@ static const struct pmu *sw_perf_event_init(struct perf_event *event)
4208 case PERF_COUNT_SW_PAGE_FAULTS_MAJ: 4361 case PERF_COUNT_SW_PAGE_FAULTS_MAJ:
4209 case PERF_COUNT_SW_CONTEXT_SWITCHES: 4362 case PERF_COUNT_SW_CONTEXT_SWITCHES:
4210 case PERF_COUNT_SW_CPU_MIGRATIONS: 4363 case PERF_COUNT_SW_CPU_MIGRATIONS:
4364 case PERF_COUNT_SW_ALIGNMENT_FAULTS:
4365 case PERF_COUNT_SW_EMULATION_FAULTS:
4211 if (!event->parent) { 4366 if (!event->parent) {
4212 atomic_inc(&perf_swevent_enabled[event_id]); 4367 atomic_inc(&perf_swevent_enabled[event_id]);
4213 event->destroy = sw_perf_event_destroy; 4368 event->destroy = sw_perf_event_destroy;
@@ -4228,6 +4383,7 @@ perf_event_alloc(struct perf_event_attr *attr,
4228 struct perf_event_context *ctx, 4383 struct perf_event_context *ctx,
4229 struct perf_event *group_leader, 4384 struct perf_event *group_leader,
4230 struct perf_event *parent_event, 4385 struct perf_event *parent_event,
4386 perf_overflow_handler_t overflow_handler,
4231 gfp_t gfpflags) 4387 gfp_t gfpflags)
4232{ 4388{
4233 const struct pmu *pmu; 4389 const struct pmu *pmu;
@@ -4270,6 +4426,11 @@ perf_event_alloc(struct perf_event_attr *attr,
4270 4426
4271 event->state = PERF_EVENT_STATE_INACTIVE; 4427 event->state = PERF_EVENT_STATE_INACTIVE;
4272 4428
4429 if (!overflow_handler && parent_event)
4430 overflow_handler = parent_event->overflow_handler;
4431
4432 event->overflow_handler = overflow_handler;
4433
4273 if (attr->disabled) 4434 if (attr->disabled)
4274 event->state = PERF_EVENT_STATE_OFF; 4435 event->state = PERF_EVENT_STATE_OFF;
4275 4436
@@ -4304,6 +4465,11 @@ perf_event_alloc(struct perf_event_attr *attr,
4304 pmu = tp_perf_event_init(event); 4465 pmu = tp_perf_event_init(event);
4305 break; 4466 break;
4306 4467
4468 case PERF_TYPE_BREAKPOINT:
4469 pmu = bp_perf_event_init(event);
4470 break;
4471
4472
4307 default: 4473 default:
4308 break; 4474 break;
4309 } 4475 }
@@ -4416,7 +4582,7 @@ err_size:
4416 goto out; 4582 goto out;
4417} 4583}
4418 4584
4419int perf_event_set_output(struct perf_event *event, int output_fd) 4585static int perf_event_set_output(struct perf_event *event, int output_fd)
4420{ 4586{
4421 struct perf_event *output_event = NULL; 4587 struct perf_event *output_event = NULL;
4422 struct file *output_file = NULL; 4588 struct file *output_file = NULL;
@@ -4546,7 +4712,7 @@ SYSCALL_DEFINE5(perf_event_open,
4546 } 4712 }
4547 4713
4548 event = perf_event_alloc(&attr, cpu, ctx, group_leader, 4714 event = perf_event_alloc(&attr, cpu, ctx, group_leader,
4549 NULL, GFP_KERNEL); 4715 NULL, NULL, GFP_KERNEL);
4550 err = PTR_ERR(event); 4716 err = PTR_ERR(event);
4551 if (IS_ERR(event)) 4717 if (IS_ERR(event))
4552 goto err_put_context; 4718 goto err_put_context;
@@ -4594,6 +4760,61 @@ err_put_context:
4594 return err; 4760 return err;
4595} 4761}
4596 4762
4763/**
4764 * perf_event_create_kernel_counter
4765 *
4766 * @attr: attributes of the counter to create
4767 * @cpu: cpu in which the counter is bound
4768 * @pid: task to profile
4769 */
4770struct perf_event *
4771perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
4772 pid_t pid,
4773 perf_overflow_handler_t overflow_handler)
4774{
4775 struct perf_event *event;
4776 struct perf_event_context *ctx;
4777 int err;
4778
4779 /*
4780 * Get the target context (task or percpu):
4781 */
4782
4783 ctx = find_get_context(pid, cpu);
4784 if (IS_ERR(ctx)) {
4785 err = PTR_ERR(ctx);
4786 goto err_exit;
4787 }
4788
4789 event = perf_event_alloc(attr, cpu, ctx, NULL,
4790 NULL, overflow_handler, GFP_KERNEL);
4791 if (IS_ERR(event)) {
4792 err = PTR_ERR(event);
4793 goto err_put_context;
4794 }
4795
4796 event->filp = NULL;
4797 WARN_ON_ONCE(ctx->parent_ctx);
4798 mutex_lock(&ctx->mutex);
4799 perf_install_in_context(ctx, event, cpu);
4800 ++ctx->generation;
4801 mutex_unlock(&ctx->mutex);
4802
4803 event->owner = current;
4804 get_task_struct(current);
4805 mutex_lock(&current->perf_event_mutex);
4806 list_add_tail(&event->owner_entry, &current->perf_event_list);
4807 mutex_unlock(&current->perf_event_mutex);
4808
4809 return event;
4810
4811 err_put_context:
4812 put_ctx(ctx);
4813 err_exit:
4814 return ERR_PTR(err);
4815}
4816EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);
4817
4597/* 4818/*
4598 * inherit a event from parent task to child task: 4819 * inherit a event from parent task to child task:
4599 */ 4820 */
@@ -4619,7 +4840,7 @@ inherit_event(struct perf_event *parent_event,
4619 child_event = perf_event_alloc(&parent_event->attr, 4840 child_event = perf_event_alloc(&parent_event->attr,
4620 parent_event->cpu, child_ctx, 4841 parent_event->cpu, child_ctx,
4621 group_leader, parent_event, 4842 group_leader, parent_event,
4622 GFP_KERNEL); 4843 NULL, GFP_KERNEL);
4623 if (IS_ERR(child_event)) 4844 if (IS_ERR(child_event))
4624 return child_event; 4845 return child_event;
4625 get_ctx(child_ctx); 4846 get_ctx(child_ctx);
@@ -4637,6 +4858,8 @@ inherit_event(struct perf_event *parent_event,
4637 if (parent_event->attr.freq) 4858 if (parent_event->attr.freq)
4638 child_event->hw.sample_period = parent_event->hw.sample_period; 4859 child_event->hw.sample_period = parent_event->hw.sample_period;
4639 4860
4861 child_event->overflow_handler = parent_event->overflow_handler;
4862
4640 /* 4863 /*
4641 * Link it up in the child's context: 4864 * Link it up in the child's context:
4642 */ 4865 */
@@ -4726,7 +4949,6 @@ __perf_event_exit_task(struct perf_event *child_event,
4726{ 4949{
4727 struct perf_event *parent_event; 4950 struct perf_event *parent_event;
4728 4951
4729 update_event_times(child_event);
4730 perf_event_remove_from_context(child_event); 4952 perf_event_remove_from_context(child_event);
4731 4953
4732 parent_event = child_event->parent; 4954 parent_event = child_event->parent;
@@ -4770,7 +4992,7 @@ void perf_event_exit_task(struct task_struct *child)
4770 * reading child->perf_event_ctxp, we wait until it has 4992 * reading child->perf_event_ctxp, we wait until it has
4771 * incremented the context's refcount before we do put_ctx below. 4993 * incremented the context's refcount before we do put_ctx below.
4772 */ 4994 */
4773 spin_lock(&child_ctx->lock); 4995 raw_spin_lock(&child_ctx->lock);
4774 child->perf_event_ctxp = NULL; 4996 child->perf_event_ctxp = NULL;
4775 /* 4997 /*
4776 * If this context is a clone; unclone it so it can't get 4998 * If this context is a clone; unclone it so it can't get
@@ -4778,7 +5000,8 @@ void perf_event_exit_task(struct task_struct *child)
4778 * the events from it. 5000 * the events from it.
4779 */ 5001 */
4780 unclone_ctx(child_ctx); 5002 unclone_ctx(child_ctx);
4781 spin_unlock_irqrestore(&child_ctx->lock, flags); 5003 update_context_time(child_ctx);
5004 raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
4782 5005
4783 /* 5006 /*
4784 * Report the task dead after unscheduling the events so that we 5007 * Report the task dead after unscheduling the events so that we
@@ -4861,7 +5084,7 @@ again:
4861 */ 5084 */
4862int perf_event_init_task(struct task_struct *child) 5085int perf_event_init_task(struct task_struct *child)
4863{ 5086{
4864 struct perf_event_context *child_ctx, *parent_ctx; 5087 struct perf_event_context *child_ctx = NULL, *parent_ctx;
4865 struct perf_event_context *cloned_ctx; 5088 struct perf_event_context *cloned_ctx;
4866 struct perf_event *event; 5089 struct perf_event *event;
4867 struct task_struct *parent = current; 5090 struct task_struct *parent = current;
@@ -4877,20 +5100,6 @@ int perf_event_init_task(struct task_struct *child)
4877 return 0; 5100 return 0;
4878 5101
4879 /* 5102 /*
4880 * This is executed from the parent task context, so inherit
4881 * events that have been marked for cloning.
4882 * First allocate and initialize a context for the child.
4883 */
4884
4885 child_ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL);
4886 if (!child_ctx)
4887 return -ENOMEM;
4888
4889 __perf_event_init_context(child_ctx, child);
4890 child->perf_event_ctxp = child_ctx;
4891 get_task_struct(child);
4892
4893 /*
4894 * If the parent's context is a clone, pin it so it won't get 5103 * If the parent's context is a clone, pin it so it won't get
4895 * swapped under us. 5104 * swapped under us.
4896 */ 5105 */
@@ -4920,6 +5129,26 @@ int perf_event_init_task(struct task_struct *child)
4920 continue; 5129 continue;
4921 } 5130 }
4922 5131
5132 if (!child->perf_event_ctxp) {
5133 /*
5134 * This is executed from the parent task context, so
5135 * inherit events that have been marked for cloning.
5136 * First allocate and initialize a context for the
5137 * child.
5138 */
5139
5140 child_ctx = kzalloc(sizeof(struct perf_event_context),
5141 GFP_KERNEL);
5142 if (!child_ctx) {
5143 ret = -ENOMEM;
5144 goto exit;
5145 }
5146
5147 __perf_event_init_context(child_ctx, child);
5148 child->perf_event_ctxp = child_ctx;
5149 get_task_struct(child);
5150 }
5151
4923 ret = inherit_group(event, parent, parent_ctx, 5152 ret = inherit_group(event, parent, parent_ctx,
4924 child, child_ctx); 5153 child, child_ctx);
4925 if (ret) { 5154 if (ret) {
@@ -4948,6 +5177,7 @@ int perf_event_init_task(struct task_struct *child)
4948 get_ctx(child_ctx->parent_ctx); 5177 get_ctx(child_ctx->parent_ctx);
4949 } 5178 }
4950 5179
5180exit:
4951 mutex_unlock(&parent_ctx->mutex); 5181 mutex_unlock(&parent_ctx->mutex);
4952 5182
4953 perf_unpin_context(parent_ctx); 5183 perf_unpin_context(parent_ctx);
@@ -5062,11 +5292,11 @@ perf_set_reserve_percpu(struct sysdev_class *class,
5062 perf_reserved_percpu = val; 5292 perf_reserved_percpu = val;
5063 for_each_online_cpu(cpu) { 5293 for_each_online_cpu(cpu) {
5064 cpuctx = &per_cpu(perf_cpu_context, cpu); 5294 cpuctx = &per_cpu(perf_cpu_context, cpu);
5065 spin_lock_irq(&cpuctx->ctx.lock); 5295 raw_spin_lock_irq(&cpuctx->ctx.lock);
5066 mpt = min(perf_max_events - cpuctx->ctx.nr_events, 5296 mpt = min(perf_max_events - cpuctx->ctx.nr_events,
5067 perf_max_events - perf_reserved_percpu); 5297 perf_max_events - perf_reserved_percpu);
5068 cpuctx->max_pertask = mpt; 5298 cpuctx->max_pertask = mpt;
5069 spin_unlock_irq(&cpuctx->ctx.lock); 5299 raw_spin_unlock_irq(&cpuctx->ctx.lock);
5070 } 5300 }
5071 spin_unlock(&perf_resource_lock); 5301 spin_unlock(&perf_resource_lock);
5072 5302