diff options
Diffstat (limited to 'kernel/perf_event.c')
-rw-r--r-- | kernel/perf_event.c | 642 |
1 files changed, 413 insertions, 229 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 2ae7409bf38f..482d5e1d3764 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -56,21 +56,6 @@ static atomic_t nr_task_events __read_mostly; | |||
56 | */ | 56 | */ |
57 | int sysctl_perf_event_paranoid __read_mostly = 1; | 57 | int sysctl_perf_event_paranoid __read_mostly = 1; |
58 | 58 | ||
59 | static inline bool perf_paranoid_tracepoint_raw(void) | ||
60 | { | ||
61 | return sysctl_perf_event_paranoid > -1; | ||
62 | } | ||
63 | |||
64 | static inline bool perf_paranoid_cpu(void) | ||
65 | { | ||
66 | return sysctl_perf_event_paranoid > 0; | ||
67 | } | ||
68 | |||
69 | static inline bool perf_paranoid_kernel(void) | ||
70 | { | ||
71 | return sysctl_perf_event_paranoid > 1; | ||
72 | } | ||
73 | |||
74 | int sysctl_perf_event_mlock __read_mostly = 512; /* 'free' kb per user */ | 59 | int sysctl_perf_event_mlock __read_mostly = 512; /* 'free' kb per user */ |
75 | 60 | ||
76 | /* | 61 | /* |
@@ -98,11 +83,12 @@ void __weak hw_perf_enable(void) { barrier(); } | |||
98 | 83 | ||
99 | void __weak hw_perf_event_setup(int cpu) { barrier(); } | 84 | void __weak hw_perf_event_setup(int cpu) { barrier(); } |
100 | void __weak hw_perf_event_setup_online(int cpu) { barrier(); } | 85 | void __weak hw_perf_event_setup_online(int cpu) { barrier(); } |
86 | void __weak hw_perf_event_setup_offline(int cpu) { barrier(); } | ||
101 | 87 | ||
102 | int __weak | 88 | int __weak |
103 | hw_perf_group_sched_in(struct perf_event *group_leader, | 89 | hw_perf_group_sched_in(struct perf_event *group_leader, |
104 | struct perf_cpu_context *cpuctx, | 90 | struct perf_cpu_context *cpuctx, |
105 | struct perf_event_context *ctx, int cpu) | 91 | struct perf_event_context *ctx) |
106 | { | 92 | { |
107 | return 0; | 93 | return 0; |
108 | } | 94 | } |
@@ -248,7 +234,7 @@ static void perf_unpin_context(struct perf_event_context *ctx) | |||
248 | 234 | ||
249 | static inline u64 perf_clock(void) | 235 | static inline u64 perf_clock(void) |
250 | { | 236 | { |
251 | return cpu_clock(smp_processor_id()); | 237 | return cpu_clock(raw_smp_processor_id()); |
252 | } | 238 | } |
253 | 239 | ||
254 | /* | 240 | /* |
@@ -289,6 +275,15 @@ static void update_event_times(struct perf_event *event) | |||
289 | event->total_time_running = run_end - event->tstamp_running; | 275 | event->total_time_running = run_end - event->tstamp_running; |
290 | } | 276 | } |
291 | 277 | ||
278 | static struct list_head * | ||
279 | ctx_group_list(struct perf_event *event, struct perf_event_context *ctx) | ||
280 | { | ||
281 | if (event->attr.pinned) | ||
282 | return &ctx->pinned_groups; | ||
283 | else | ||
284 | return &ctx->flexible_groups; | ||
285 | } | ||
286 | |||
292 | /* | 287 | /* |
293 | * Add a event from the lists for its context. | 288 | * Add a event from the lists for its context. |
294 | * Must be called with ctx->mutex and ctx->lock held. | 289 | * Must be called with ctx->mutex and ctx->lock held. |
@@ -303,9 +298,19 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) | |||
303 | * add it straight to the context's event list, or to the group | 298 | * add it straight to the context's event list, or to the group |
304 | * leader's sibling list: | 299 | * leader's sibling list: |
305 | */ | 300 | */ |
306 | if (group_leader == event) | 301 | if (group_leader == event) { |
307 | list_add_tail(&event->group_entry, &ctx->group_list); | 302 | struct list_head *list; |
308 | else { | 303 | |
304 | if (is_software_event(event)) | ||
305 | event->group_flags |= PERF_GROUP_SOFTWARE; | ||
306 | |||
307 | list = ctx_group_list(event, ctx); | ||
308 | list_add_tail(&event->group_entry, list); | ||
309 | } else { | ||
310 | if (group_leader->group_flags & PERF_GROUP_SOFTWARE && | ||
311 | !is_software_event(event)) | ||
312 | group_leader->group_flags &= ~PERF_GROUP_SOFTWARE; | ||
313 | |||
309 | list_add_tail(&event->group_entry, &group_leader->sibling_list); | 314 | list_add_tail(&event->group_entry, &group_leader->sibling_list); |
310 | group_leader->nr_siblings++; | 315 | group_leader->nr_siblings++; |
311 | } | 316 | } |
@@ -355,9 +360,14 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) | |||
355 | * to the context list directly: | 360 | * to the context list directly: |
356 | */ | 361 | */ |
357 | list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) { | 362 | list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) { |
363 | struct list_head *list; | ||
358 | 364 | ||
359 | list_move_tail(&sibling->group_entry, &ctx->group_list); | 365 | list = ctx_group_list(event, ctx); |
366 | list_move_tail(&sibling->group_entry, list); | ||
360 | sibling->group_leader = sibling; | 367 | sibling->group_leader = sibling; |
368 | |||
369 | /* Inherit group flags from the previous leader */ | ||
370 | sibling->group_flags = event->group_flags; | ||
361 | } | 371 | } |
362 | } | 372 | } |
363 | 373 | ||
@@ -608,14 +618,13 @@ void perf_event_disable(struct perf_event *event) | |||
608 | static int | 618 | static int |
609 | event_sched_in(struct perf_event *event, | 619 | event_sched_in(struct perf_event *event, |
610 | struct perf_cpu_context *cpuctx, | 620 | struct perf_cpu_context *cpuctx, |
611 | struct perf_event_context *ctx, | 621 | struct perf_event_context *ctx) |
612 | int cpu) | ||
613 | { | 622 | { |
614 | if (event->state <= PERF_EVENT_STATE_OFF) | 623 | if (event->state <= PERF_EVENT_STATE_OFF) |
615 | return 0; | 624 | return 0; |
616 | 625 | ||
617 | event->state = PERF_EVENT_STATE_ACTIVE; | 626 | event->state = PERF_EVENT_STATE_ACTIVE; |
618 | event->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */ | 627 | event->oncpu = smp_processor_id(); |
619 | /* | 628 | /* |
620 | * The new state must be visible before we turn it on in the hardware: | 629 | * The new state must be visible before we turn it on in the hardware: |
621 | */ | 630 | */ |
@@ -642,8 +651,7 @@ event_sched_in(struct perf_event *event, | |||
642 | static int | 651 | static int |
643 | group_sched_in(struct perf_event *group_event, | 652 | group_sched_in(struct perf_event *group_event, |
644 | struct perf_cpu_context *cpuctx, | 653 | struct perf_cpu_context *cpuctx, |
645 | struct perf_event_context *ctx, | 654 | struct perf_event_context *ctx) |
646 | int cpu) | ||
647 | { | 655 | { |
648 | struct perf_event *event, *partial_group; | 656 | struct perf_event *event, *partial_group; |
649 | int ret; | 657 | int ret; |
@@ -651,18 +659,18 @@ group_sched_in(struct perf_event *group_event, | |||
651 | if (group_event->state == PERF_EVENT_STATE_OFF) | 659 | if (group_event->state == PERF_EVENT_STATE_OFF) |
652 | return 0; | 660 | return 0; |
653 | 661 | ||
654 | ret = hw_perf_group_sched_in(group_event, cpuctx, ctx, cpu); | 662 | ret = hw_perf_group_sched_in(group_event, cpuctx, ctx); |
655 | if (ret) | 663 | if (ret) |
656 | return ret < 0 ? ret : 0; | 664 | return ret < 0 ? ret : 0; |
657 | 665 | ||
658 | if (event_sched_in(group_event, cpuctx, ctx, cpu)) | 666 | if (event_sched_in(group_event, cpuctx, ctx)) |
659 | return -EAGAIN; | 667 | return -EAGAIN; |
660 | 668 | ||
661 | /* | 669 | /* |
662 | * Schedule in siblings as one group (if any): | 670 | * Schedule in siblings as one group (if any): |
663 | */ | 671 | */ |
664 | list_for_each_entry(event, &group_event->sibling_list, group_entry) { | 672 | list_for_each_entry(event, &group_event->sibling_list, group_entry) { |
665 | if (event_sched_in(event, cpuctx, ctx, cpu)) { | 673 | if (event_sched_in(event, cpuctx, ctx)) { |
666 | partial_group = event; | 674 | partial_group = event; |
667 | goto group_error; | 675 | goto group_error; |
668 | } | 676 | } |
@@ -686,24 +694,6 @@ group_error: | |||
686 | } | 694 | } |
687 | 695 | ||
688 | /* | 696 | /* |
689 | * Return 1 for a group consisting entirely of software events, | ||
690 | * 0 if the group contains any hardware events. | ||
691 | */ | ||
692 | static int is_software_only_group(struct perf_event *leader) | ||
693 | { | ||
694 | struct perf_event *event; | ||
695 | |||
696 | if (!is_software_event(leader)) | ||
697 | return 0; | ||
698 | |||
699 | list_for_each_entry(event, &leader->sibling_list, group_entry) | ||
700 | if (!is_software_event(event)) | ||
701 | return 0; | ||
702 | |||
703 | return 1; | ||
704 | } | ||
705 | |||
706 | /* | ||
707 | * Work out whether we can put this event group on the CPU now. | 697 | * Work out whether we can put this event group on the CPU now. |
708 | */ | 698 | */ |
709 | static int group_can_go_on(struct perf_event *event, | 699 | static int group_can_go_on(struct perf_event *event, |
@@ -713,7 +703,7 @@ static int group_can_go_on(struct perf_event *event, | |||
713 | /* | 703 | /* |
714 | * Groups consisting entirely of software events can always go on. | 704 | * Groups consisting entirely of software events can always go on. |
715 | */ | 705 | */ |
716 | if (is_software_only_group(event)) | 706 | if (event->group_flags & PERF_GROUP_SOFTWARE) |
717 | return 1; | 707 | return 1; |
718 | /* | 708 | /* |
719 | * If an exclusive group is already on, no other hardware | 709 | * If an exclusive group is already on, no other hardware |
@@ -754,7 +744,6 @@ static void __perf_install_in_context(void *info) | |||
754 | struct perf_event *event = info; | 744 | struct perf_event *event = info; |
755 | struct perf_event_context *ctx = event->ctx; | 745 | struct perf_event_context *ctx = event->ctx; |
756 | struct perf_event *leader = event->group_leader; | 746 | struct perf_event *leader = event->group_leader; |
757 | int cpu = smp_processor_id(); | ||
758 | int err; | 747 | int err; |
759 | 748 | ||
760 | /* | 749 | /* |
@@ -801,7 +790,7 @@ static void __perf_install_in_context(void *info) | |||
801 | if (!group_can_go_on(event, cpuctx, 1)) | 790 | if (!group_can_go_on(event, cpuctx, 1)) |
802 | err = -EEXIST; | 791 | err = -EEXIST; |
803 | else | 792 | else |
804 | err = event_sched_in(event, cpuctx, ctx, cpu); | 793 | err = event_sched_in(event, cpuctx, ctx); |
805 | 794 | ||
806 | if (err) { | 795 | if (err) { |
807 | /* | 796 | /* |
@@ -943,11 +932,9 @@ static void __perf_event_enable(void *info) | |||
943 | } else { | 932 | } else { |
944 | perf_disable(); | 933 | perf_disable(); |
945 | if (event == leader) | 934 | if (event == leader) |
946 | err = group_sched_in(event, cpuctx, ctx, | 935 | err = group_sched_in(event, cpuctx, ctx); |
947 | smp_processor_id()); | ||
948 | else | 936 | else |
949 | err = event_sched_in(event, cpuctx, ctx, | 937 | err = event_sched_in(event, cpuctx, ctx); |
950 | smp_processor_id()); | ||
951 | perf_enable(); | 938 | perf_enable(); |
952 | } | 939 | } |
953 | 940 | ||
@@ -1043,8 +1030,15 @@ static int perf_event_refresh(struct perf_event *event, int refresh) | |||
1043 | return 0; | 1030 | return 0; |
1044 | } | 1031 | } |
1045 | 1032 | ||
1046 | void __perf_event_sched_out(struct perf_event_context *ctx, | 1033 | enum event_type_t { |
1047 | struct perf_cpu_context *cpuctx) | 1034 | EVENT_FLEXIBLE = 0x1, |
1035 | EVENT_PINNED = 0x2, | ||
1036 | EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED, | ||
1037 | }; | ||
1038 | |||
1039 | static void ctx_sched_out(struct perf_event_context *ctx, | ||
1040 | struct perf_cpu_context *cpuctx, | ||
1041 | enum event_type_t event_type) | ||
1048 | { | 1042 | { |
1049 | struct perf_event *event; | 1043 | struct perf_event *event; |
1050 | 1044 | ||
@@ -1055,10 +1049,18 @@ void __perf_event_sched_out(struct perf_event_context *ctx, | |||
1055 | update_context_time(ctx); | 1049 | update_context_time(ctx); |
1056 | 1050 | ||
1057 | perf_disable(); | 1051 | perf_disable(); |
1058 | if (ctx->nr_active) { | 1052 | if (!ctx->nr_active) |
1059 | list_for_each_entry(event, &ctx->group_list, group_entry) | 1053 | goto out_enable; |
1054 | |||
1055 | if (event_type & EVENT_PINNED) | ||
1056 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) | ||
1060 | group_sched_out(event, cpuctx, ctx); | 1057 | group_sched_out(event, cpuctx, ctx); |
1061 | } | 1058 | |
1059 | if (event_type & EVENT_FLEXIBLE) | ||
1060 | list_for_each_entry(event, &ctx->flexible_groups, group_entry) | ||
1061 | group_sched_out(event, cpuctx, ctx); | ||
1062 | |||
1063 | out_enable: | ||
1062 | perf_enable(); | 1064 | perf_enable(); |
1063 | out: | 1065 | out: |
1064 | raw_spin_unlock(&ctx->lock); | 1066 | raw_spin_unlock(&ctx->lock); |
@@ -1170,9 +1172,9 @@ static void perf_event_sync_stat(struct perf_event_context *ctx, | |||
1170 | * not restart the event. | 1172 | * not restart the event. |
1171 | */ | 1173 | */ |
1172 | void perf_event_task_sched_out(struct task_struct *task, | 1174 | void perf_event_task_sched_out(struct task_struct *task, |
1173 | struct task_struct *next, int cpu) | 1175 | struct task_struct *next) |
1174 | { | 1176 | { |
1175 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | 1177 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); |
1176 | struct perf_event_context *ctx = task->perf_event_ctxp; | 1178 | struct perf_event_context *ctx = task->perf_event_ctxp; |
1177 | struct perf_event_context *next_ctx; | 1179 | struct perf_event_context *next_ctx; |
1178 | struct perf_event_context *parent; | 1180 | struct perf_event_context *parent; |
@@ -1220,15 +1222,13 @@ void perf_event_task_sched_out(struct task_struct *task, | |||
1220 | rcu_read_unlock(); | 1222 | rcu_read_unlock(); |
1221 | 1223 | ||
1222 | if (do_switch) { | 1224 | if (do_switch) { |
1223 | __perf_event_sched_out(ctx, cpuctx); | 1225 | ctx_sched_out(ctx, cpuctx, EVENT_ALL); |
1224 | cpuctx->task_ctx = NULL; | 1226 | cpuctx->task_ctx = NULL; |
1225 | } | 1227 | } |
1226 | } | 1228 | } |
1227 | 1229 | ||
1228 | /* | 1230 | static void task_ctx_sched_out(struct perf_event_context *ctx, |
1229 | * Called with IRQs disabled | 1231 | enum event_type_t event_type) |
1230 | */ | ||
1231 | static void __perf_event_task_sched_out(struct perf_event_context *ctx) | ||
1232 | { | 1232 | { |
1233 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | 1233 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); |
1234 | 1234 | ||
@@ -1238,47 +1238,41 @@ static void __perf_event_task_sched_out(struct perf_event_context *ctx) | |||
1238 | if (WARN_ON_ONCE(ctx != cpuctx->task_ctx)) | 1238 | if (WARN_ON_ONCE(ctx != cpuctx->task_ctx)) |
1239 | return; | 1239 | return; |
1240 | 1240 | ||
1241 | __perf_event_sched_out(ctx, cpuctx); | 1241 | ctx_sched_out(ctx, cpuctx, event_type); |
1242 | cpuctx->task_ctx = NULL; | 1242 | cpuctx->task_ctx = NULL; |
1243 | } | 1243 | } |
1244 | 1244 | ||
1245 | /* | 1245 | /* |
1246 | * Called with IRQs disabled | 1246 | * Called with IRQs disabled |
1247 | */ | 1247 | */ |
1248 | static void perf_event_cpu_sched_out(struct perf_cpu_context *cpuctx) | 1248 | static void __perf_event_task_sched_out(struct perf_event_context *ctx) |
1249 | { | ||
1250 | task_ctx_sched_out(ctx, EVENT_ALL); | ||
1251 | } | ||
1252 | |||
1253 | /* | ||
1254 | * Called with IRQs disabled | ||
1255 | */ | ||
1256 | static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx, | ||
1257 | enum event_type_t event_type) | ||
1249 | { | 1258 | { |
1250 | __perf_event_sched_out(&cpuctx->ctx, cpuctx); | 1259 | ctx_sched_out(&cpuctx->ctx, cpuctx, event_type); |
1251 | } | 1260 | } |
1252 | 1261 | ||
1253 | static void | 1262 | static void |
1254 | __perf_event_sched_in(struct perf_event_context *ctx, | 1263 | ctx_pinned_sched_in(struct perf_event_context *ctx, |
1255 | struct perf_cpu_context *cpuctx, int cpu) | 1264 | struct perf_cpu_context *cpuctx) |
1256 | { | 1265 | { |
1257 | struct perf_event *event; | 1266 | struct perf_event *event; |
1258 | int can_add_hw = 1; | ||
1259 | |||
1260 | raw_spin_lock(&ctx->lock); | ||
1261 | ctx->is_active = 1; | ||
1262 | if (likely(!ctx->nr_events)) | ||
1263 | goto out; | ||
1264 | |||
1265 | ctx->timestamp = perf_clock(); | ||
1266 | |||
1267 | perf_disable(); | ||
1268 | 1267 | ||
1269 | /* | 1268 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) { |
1270 | * First go through the list and put on any pinned groups | 1269 | if (event->state <= PERF_EVENT_STATE_OFF) |
1271 | * in order to give them the best chance of going on. | ||
1272 | */ | ||
1273 | list_for_each_entry(event, &ctx->group_list, group_entry) { | ||
1274 | if (event->state <= PERF_EVENT_STATE_OFF || | ||
1275 | !event->attr.pinned) | ||
1276 | continue; | 1270 | continue; |
1277 | if (event->cpu != -1 && event->cpu != cpu) | 1271 | if (event->cpu != -1 && event->cpu != smp_processor_id()) |
1278 | continue; | 1272 | continue; |
1279 | 1273 | ||
1280 | if (group_can_go_on(event, cpuctx, 1)) | 1274 | if (group_can_go_on(event, cpuctx, 1)) |
1281 | group_sched_in(event, cpuctx, ctx, cpu); | 1275 | group_sched_in(event, cpuctx, ctx); |
1282 | 1276 | ||
1283 | /* | 1277 | /* |
1284 | * If this pinned group hasn't been scheduled, | 1278 | * If this pinned group hasn't been scheduled, |
@@ -1289,32 +1283,83 @@ __perf_event_sched_in(struct perf_event_context *ctx, | |||
1289 | event->state = PERF_EVENT_STATE_ERROR; | 1283 | event->state = PERF_EVENT_STATE_ERROR; |
1290 | } | 1284 | } |
1291 | } | 1285 | } |
1286 | } | ||
1292 | 1287 | ||
1293 | list_for_each_entry(event, &ctx->group_list, group_entry) { | 1288 | static void |
1294 | /* | 1289 | ctx_flexible_sched_in(struct perf_event_context *ctx, |
1295 | * Ignore events in OFF or ERROR state, and | 1290 | struct perf_cpu_context *cpuctx) |
1296 | * ignore pinned events since we did them already. | 1291 | { |
1297 | */ | 1292 | struct perf_event *event; |
1298 | if (event->state <= PERF_EVENT_STATE_OFF || | 1293 | int can_add_hw = 1; |
1299 | event->attr.pinned) | ||
1300 | continue; | ||
1301 | 1294 | ||
1295 | list_for_each_entry(event, &ctx->flexible_groups, group_entry) { | ||
1296 | /* Ignore events in OFF or ERROR state */ | ||
1297 | if (event->state <= PERF_EVENT_STATE_OFF) | ||
1298 | continue; | ||
1302 | /* | 1299 | /* |
1303 | * Listen to the 'cpu' scheduling filter constraint | 1300 | * Listen to the 'cpu' scheduling filter constraint |
1304 | * of events: | 1301 | * of events: |
1305 | */ | 1302 | */ |
1306 | if (event->cpu != -1 && event->cpu != cpu) | 1303 | if (event->cpu != -1 && event->cpu != smp_processor_id()) |
1307 | continue; | 1304 | continue; |
1308 | 1305 | ||
1309 | if (group_can_go_on(event, cpuctx, can_add_hw)) | 1306 | if (group_can_go_on(event, cpuctx, can_add_hw)) |
1310 | if (group_sched_in(event, cpuctx, ctx, cpu)) | 1307 | if (group_sched_in(event, cpuctx, ctx)) |
1311 | can_add_hw = 0; | 1308 | can_add_hw = 0; |
1312 | } | 1309 | } |
1310 | } | ||
1311 | |||
1312 | static void | ||
1313 | ctx_sched_in(struct perf_event_context *ctx, | ||
1314 | struct perf_cpu_context *cpuctx, | ||
1315 | enum event_type_t event_type) | ||
1316 | { | ||
1317 | raw_spin_lock(&ctx->lock); | ||
1318 | ctx->is_active = 1; | ||
1319 | if (likely(!ctx->nr_events)) | ||
1320 | goto out; | ||
1321 | |||
1322 | ctx->timestamp = perf_clock(); | ||
1323 | |||
1324 | perf_disable(); | ||
1325 | |||
1326 | /* | ||
1327 | * First go through the list and put on any pinned groups | ||
1328 | * in order to give them the best chance of going on. | ||
1329 | */ | ||
1330 | if (event_type & EVENT_PINNED) | ||
1331 | ctx_pinned_sched_in(ctx, cpuctx); | ||
1332 | |||
1333 | /* Then walk through the lower prio flexible groups */ | ||
1334 | if (event_type & EVENT_FLEXIBLE) | ||
1335 | ctx_flexible_sched_in(ctx, cpuctx); | ||
1336 | |||
1313 | perf_enable(); | 1337 | perf_enable(); |
1314 | out: | 1338 | out: |
1315 | raw_spin_unlock(&ctx->lock); | 1339 | raw_spin_unlock(&ctx->lock); |
1316 | } | 1340 | } |
1317 | 1341 | ||
1342 | static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, | ||
1343 | enum event_type_t event_type) | ||
1344 | { | ||
1345 | struct perf_event_context *ctx = &cpuctx->ctx; | ||
1346 | |||
1347 | ctx_sched_in(ctx, cpuctx, event_type); | ||
1348 | } | ||
1349 | |||
1350 | static void task_ctx_sched_in(struct task_struct *task, | ||
1351 | enum event_type_t event_type) | ||
1352 | { | ||
1353 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
1354 | struct perf_event_context *ctx = task->perf_event_ctxp; | ||
1355 | |||
1356 | if (likely(!ctx)) | ||
1357 | return; | ||
1358 | if (cpuctx->task_ctx == ctx) | ||
1359 | return; | ||
1360 | ctx_sched_in(ctx, cpuctx, event_type); | ||
1361 | cpuctx->task_ctx = ctx; | ||
1362 | } | ||
1318 | /* | 1363 | /* |
1319 | * Called from scheduler to add the events of the current task | 1364 | * Called from scheduler to add the events of the current task |
1320 | * with interrupts disabled. | 1365 | * with interrupts disabled. |
@@ -1326,38 +1371,128 @@ __perf_event_sched_in(struct perf_event_context *ctx, | |||
1326 | * accessing the event control register. If a NMI hits, then it will | 1371 | * accessing the event control register. If a NMI hits, then it will |
1327 | * keep the event running. | 1372 | * keep the event running. |
1328 | */ | 1373 | */ |
1329 | void perf_event_task_sched_in(struct task_struct *task, int cpu) | 1374 | void perf_event_task_sched_in(struct task_struct *task) |
1330 | { | 1375 | { |
1331 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | 1376 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); |
1332 | struct perf_event_context *ctx = task->perf_event_ctxp; | 1377 | struct perf_event_context *ctx = task->perf_event_ctxp; |
1333 | 1378 | ||
1334 | if (likely(!ctx)) | 1379 | if (likely(!ctx)) |
1335 | return; | 1380 | return; |
1381 | |||
1336 | if (cpuctx->task_ctx == ctx) | 1382 | if (cpuctx->task_ctx == ctx) |
1337 | return; | 1383 | return; |
1338 | __perf_event_sched_in(ctx, cpuctx, cpu); | 1384 | |
1385 | /* | ||
1386 | * We want to keep the following priority order: | ||
1387 | * cpu pinned (that don't need to move), task pinned, | ||
1388 | * cpu flexible, task flexible. | ||
1389 | */ | ||
1390 | cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); | ||
1391 | |||
1392 | ctx_sched_in(ctx, cpuctx, EVENT_PINNED); | ||
1393 | cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); | ||
1394 | ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE); | ||
1395 | |||
1339 | cpuctx->task_ctx = ctx; | 1396 | cpuctx->task_ctx = ctx; |
1340 | } | 1397 | } |
1341 | 1398 | ||
1342 | static void perf_event_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu) | 1399 | #define MAX_INTERRUPTS (~0ULL) |
1400 | |||
1401 | static void perf_log_throttle(struct perf_event *event, int enable); | ||
1402 | |||
1403 | static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) | ||
1343 | { | 1404 | { |
1344 | struct perf_event_context *ctx = &cpuctx->ctx; | 1405 | u64 frequency = event->attr.sample_freq; |
1406 | u64 sec = NSEC_PER_SEC; | ||
1407 | u64 divisor, dividend; | ||
1408 | |||
1409 | int count_fls, nsec_fls, frequency_fls, sec_fls; | ||
1410 | |||
1411 | count_fls = fls64(count); | ||
1412 | nsec_fls = fls64(nsec); | ||
1413 | frequency_fls = fls64(frequency); | ||
1414 | sec_fls = 30; | ||
1415 | |||
1416 | /* | ||
1417 | * We got @count in @nsec, with a target of sample_freq HZ | ||
1418 | * the target period becomes: | ||
1419 | * | ||
1420 | * @count * 10^9 | ||
1421 | * period = ------------------- | ||
1422 | * @nsec * sample_freq | ||
1423 | * | ||
1424 | */ | ||
1425 | |||
1426 | /* | ||
1427 | * Reduce accuracy by one bit such that @a and @b converge | ||
1428 | * to a similar magnitude. | ||
1429 | */ | ||
1430 | #define REDUCE_FLS(a, b) \ | ||
1431 | do { \ | ||
1432 | if (a##_fls > b##_fls) { \ | ||
1433 | a >>= 1; \ | ||
1434 | a##_fls--; \ | ||
1435 | } else { \ | ||
1436 | b >>= 1; \ | ||
1437 | b##_fls--; \ | ||
1438 | } \ | ||
1439 | } while (0) | ||
1440 | |||
1441 | /* | ||
1442 | * Reduce accuracy until either term fits in a u64, then proceed with | ||
1443 | * the other, so that finally we can do a u64/u64 division. | ||
1444 | */ | ||
1445 | while (count_fls + sec_fls > 64 && nsec_fls + frequency_fls > 64) { | ||
1446 | REDUCE_FLS(nsec, frequency); | ||
1447 | REDUCE_FLS(sec, count); | ||
1448 | } | ||
1449 | |||
1450 | if (count_fls + sec_fls > 64) { | ||
1451 | divisor = nsec * frequency; | ||
1452 | |||
1453 | while (count_fls + sec_fls > 64) { | ||
1454 | REDUCE_FLS(count, sec); | ||
1455 | divisor >>= 1; | ||
1456 | } | ||
1345 | 1457 | ||
1346 | __perf_event_sched_in(ctx, cpuctx, cpu); | 1458 | dividend = count * sec; |
1459 | } else { | ||
1460 | dividend = count * sec; | ||
1461 | |||
1462 | while (nsec_fls + frequency_fls > 64) { | ||
1463 | REDUCE_FLS(nsec, frequency); | ||
1464 | dividend >>= 1; | ||
1465 | } | ||
1466 | |||
1467 | divisor = nsec * frequency; | ||
1468 | } | ||
1469 | |||
1470 | return div64_u64(dividend, divisor); | ||
1347 | } | 1471 | } |
1348 | 1472 | ||
1349 | #define MAX_INTERRUPTS (~0ULL) | 1473 | static void perf_event_stop(struct perf_event *event) |
1474 | { | ||
1475 | if (!event->pmu->stop) | ||
1476 | return event->pmu->disable(event); | ||
1350 | 1477 | ||
1351 | static void perf_log_throttle(struct perf_event *event, int enable); | 1478 | return event->pmu->stop(event); |
1479 | } | ||
1480 | |||
1481 | static int perf_event_start(struct perf_event *event) | ||
1482 | { | ||
1483 | if (!event->pmu->start) | ||
1484 | return event->pmu->enable(event); | ||
1352 | 1485 | ||
1353 | static void perf_adjust_period(struct perf_event *event, u64 events) | 1486 | return event->pmu->start(event); |
1487 | } | ||
1488 | |||
1489 | static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) | ||
1354 | { | 1490 | { |
1355 | struct hw_perf_event *hwc = &event->hw; | 1491 | struct hw_perf_event *hwc = &event->hw; |
1356 | u64 period, sample_period; | 1492 | u64 period, sample_period; |
1357 | s64 delta; | 1493 | s64 delta; |
1358 | 1494 | ||
1359 | events *= hwc->sample_period; | 1495 | period = perf_calculate_period(event, nsec, count); |
1360 | period = div64_u64(events, event->attr.sample_freq); | ||
1361 | 1496 | ||
1362 | delta = (s64)(period - hwc->sample_period); | 1497 | delta = (s64)(period - hwc->sample_period); |
1363 | delta = (delta + 7) / 8; /* low pass filter */ | 1498 | delta = (delta + 7) / 8; /* low pass filter */ |
@@ -1368,13 +1503,22 @@ static void perf_adjust_period(struct perf_event *event, u64 events) | |||
1368 | sample_period = 1; | 1503 | sample_period = 1; |
1369 | 1504 | ||
1370 | hwc->sample_period = sample_period; | 1505 | hwc->sample_period = sample_period; |
1506 | |||
1507 | if (atomic64_read(&hwc->period_left) > 8*sample_period) { | ||
1508 | perf_disable(); | ||
1509 | perf_event_stop(event); | ||
1510 | atomic64_set(&hwc->period_left, 0); | ||
1511 | perf_event_start(event); | ||
1512 | perf_enable(); | ||
1513 | } | ||
1371 | } | 1514 | } |
1372 | 1515 | ||
1373 | static void perf_ctx_adjust_freq(struct perf_event_context *ctx) | 1516 | static void perf_ctx_adjust_freq(struct perf_event_context *ctx) |
1374 | { | 1517 | { |
1375 | struct perf_event *event; | 1518 | struct perf_event *event; |
1376 | struct hw_perf_event *hwc; | 1519 | struct hw_perf_event *hwc; |
1377 | u64 interrupts, freq; | 1520 | u64 interrupts, now; |
1521 | s64 delta; | ||
1378 | 1522 | ||
1379 | raw_spin_lock(&ctx->lock); | 1523 | raw_spin_lock(&ctx->lock); |
1380 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { | 1524 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { |
@@ -1395,44 +1539,18 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) | |||
1395 | if (interrupts == MAX_INTERRUPTS) { | 1539 | if (interrupts == MAX_INTERRUPTS) { |
1396 | perf_log_throttle(event, 1); | 1540 | perf_log_throttle(event, 1); |
1397 | event->pmu->unthrottle(event); | 1541 | event->pmu->unthrottle(event); |
1398 | interrupts = 2*sysctl_perf_event_sample_rate/HZ; | ||
1399 | } | 1542 | } |
1400 | 1543 | ||
1401 | if (!event->attr.freq || !event->attr.sample_freq) | 1544 | if (!event->attr.freq || !event->attr.sample_freq) |
1402 | continue; | 1545 | continue; |
1403 | 1546 | ||
1404 | /* | 1547 | event->pmu->read(event); |
1405 | * if the specified freq < HZ then we need to skip ticks | 1548 | now = atomic64_read(&event->count); |
1406 | */ | 1549 | delta = now - hwc->freq_count_stamp; |
1407 | if (event->attr.sample_freq < HZ) { | 1550 | hwc->freq_count_stamp = now; |
1408 | freq = event->attr.sample_freq; | ||
1409 | |||
1410 | hwc->freq_count += freq; | ||
1411 | hwc->freq_interrupts += interrupts; | ||
1412 | |||
1413 | if (hwc->freq_count < HZ) | ||
1414 | continue; | ||
1415 | |||
1416 | interrupts = hwc->freq_interrupts; | ||
1417 | hwc->freq_interrupts = 0; | ||
1418 | hwc->freq_count -= HZ; | ||
1419 | } else | ||
1420 | freq = HZ; | ||
1421 | |||
1422 | perf_adjust_period(event, freq * interrupts); | ||
1423 | 1551 | ||
1424 | /* | 1552 | if (delta > 0) |
1425 | * In order to avoid being stalled by an (accidental) huge | 1553 | perf_adjust_period(event, TICK_NSEC, delta); |
1426 | * sample period, force reset the sample period if we didn't | ||
1427 | * get any events in this freq period. | ||
1428 | */ | ||
1429 | if (!interrupts) { | ||
1430 | perf_disable(); | ||
1431 | event->pmu->disable(event); | ||
1432 | atomic64_set(&hwc->period_left, 0); | ||
1433 | event->pmu->enable(event); | ||
1434 | perf_enable(); | ||
1435 | } | ||
1436 | } | 1554 | } |
1437 | raw_spin_unlock(&ctx->lock); | 1555 | raw_spin_unlock(&ctx->lock); |
1438 | } | 1556 | } |
@@ -1442,26 +1560,18 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) | |||
1442 | */ | 1560 | */ |
1443 | static void rotate_ctx(struct perf_event_context *ctx) | 1561 | static void rotate_ctx(struct perf_event_context *ctx) |
1444 | { | 1562 | { |
1445 | struct perf_event *event; | ||
1446 | |||
1447 | if (!ctx->nr_events) | 1563 | if (!ctx->nr_events) |
1448 | return; | 1564 | return; |
1449 | 1565 | ||
1450 | raw_spin_lock(&ctx->lock); | 1566 | raw_spin_lock(&ctx->lock); |
1451 | /* | 1567 | |
1452 | * Rotate the first entry last (works just fine for group events too): | 1568 | /* Rotate the first entry last of non-pinned groups */ |
1453 | */ | 1569 | list_rotate_left(&ctx->flexible_groups); |
1454 | perf_disable(); | ||
1455 | list_for_each_entry(event, &ctx->group_list, group_entry) { | ||
1456 | list_move_tail(&event->group_entry, &ctx->group_list); | ||
1457 | break; | ||
1458 | } | ||
1459 | perf_enable(); | ||
1460 | 1570 | ||
1461 | raw_spin_unlock(&ctx->lock); | 1571 | raw_spin_unlock(&ctx->lock); |
1462 | } | 1572 | } |
1463 | 1573 | ||
1464 | void perf_event_task_tick(struct task_struct *curr, int cpu) | 1574 | void perf_event_task_tick(struct task_struct *curr) |
1465 | { | 1575 | { |
1466 | struct perf_cpu_context *cpuctx; | 1576 | struct perf_cpu_context *cpuctx; |
1467 | struct perf_event_context *ctx; | 1577 | struct perf_event_context *ctx; |
@@ -1469,24 +1579,43 @@ void perf_event_task_tick(struct task_struct *curr, int cpu) | |||
1469 | if (!atomic_read(&nr_events)) | 1579 | if (!atomic_read(&nr_events)) |
1470 | return; | 1580 | return; |
1471 | 1581 | ||
1472 | cpuctx = &per_cpu(perf_cpu_context, cpu); | 1582 | cpuctx = &__get_cpu_var(perf_cpu_context); |
1473 | ctx = curr->perf_event_ctxp; | 1583 | ctx = curr->perf_event_ctxp; |
1474 | 1584 | ||
1585 | perf_disable(); | ||
1586 | |||
1475 | perf_ctx_adjust_freq(&cpuctx->ctx); | 1587 | perf_ctx_adjust_freq(&cpuctx->ctx); |
1476 | if (ctx) | 1588 | if (ctx) |
1477 | perf_ctx_adjust_freq(ctx); | 1589 | perf_ctx_adjust_freq(ctx); |
1478 | 1590 | ||
1479 | perf_event_cpu_sched_out(cpuctx); | 1591 | cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); |
1480 | if (ctx) | 1592 | if (ctx) |
1481 | __perf_event_task_sched_out(ctx); | 1593 | task_ctx_sched_out(ctx, EVENT_FLEXIBLE); |
1482 | 1594 | ||
1483 | rotate_ctx(&cpuctx->ctx); | 1595 | rotate_ctx(&cpuctx->ctx); |
1484 | if (ctx) | 1596 | if (ctx) |
1485 | rotate_ctx(ctx); | 1597 | rotate_ctx(ctx); |
1486 | 1598 | ||
1487 | perf_event_cpu_sched_in(cpuctx, cpu); | 1599 | cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); |
1488 | if (ctx) | 1600 | if (ctx) |
1489 | perf_event_task_sched_in(curr, cpu); | 1601 | task_ctx_sched_in(curr, EVENT_FLEXIBLE); |
1602 | |||
1603 | perf_enable(); | ||
1604 | } | ||
1605 | |||
1606 | static int event_enable_on_exec(struct perf_event *event, | ||
1607 | struct perf_event_context *ctx) | ||
1608 | { | ||
1609 | if (!event->attr.enable_on_exec) | ||
1610 | return 0; | ||
1611 | |||
1612 | event->attr.enable_on_exec = 0; | ||
1613 | if (event->state >= PERF_EVENT_STATE_INACTIVE) | ||
1614 | return 0; | ||
1615 | |||
1616 | __perf_event_mark_enabled(event, ctx); | ||
1617 | |||
1618 | return 1; | ||
1490 | } | 1619 | } |
1491 | 1620 | ||
1492 | /* | 1621 | /* |
@@ -1499,6 +1628,7 @@ static void perf_event_enable_on_exec(struct task_struct *task) | |||
1499 | struct perf_event *event; | 1628 | struct perf_event *event; |
1500 | unsigned long flags; | 1629 | unsigned long flags; |
1501 | int enabled = 0; | 1630 | int enabled = 0; |
1631 | int ret; | ||
1502 | 1632 | ||
1503 | local_irq_save(flags); | 1633 | local_irq_save(flags); |
1504 | ctx = task->perf_event_ctxp; | 1634 | ctx = task->perf_event_ctxp; |
@@ -1509,14 +1639,16 @@ static void perf_event_enable_on_exec(struct task_struct *task) | |||
1509 | 1639 | ||
1510 | raw_spin_lock(&ctx->lock); | 1640 | raw_spin_lock(&ctx->lock); |
1511 | 1641 | ||
1512 | list_for_each_entry(event, &ctx->group_list, group_entry) { | 1642 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) { |
1513 | if (!event->attr.enable_on_exec) | 1643 | ret = event_enable_on_exec(event, ctx); |
1514 | continue; | 1644 | if (ret) |
1515 | event->attr.enable_on_exec = 0; | 1645 | enabled = 1; |
1516 | if (event->state >= PERF_EVENT_STATE_INACTIVE) | 1646 | } |
1517 | continue; | 1647 | |
1518 | __perf_event_mark_enabled(event, ctx); | 1648 | list_for_each_entry(event, &ctx->flexible_groups, group_entry) { |
1519 | enabled = 1; | 1649 | ret = event_enable_on_exec(event, ctx); |
1650 | if (ret) | ||
1651 | enabled = 1; | ||
1520 | } | 1652 | } |
1521 | 1653 | ||
1522 | /* | 1654 | /* |
@@ -1527,7 +1659,7 @@ static void perf_event_enable_on_exec(struct task_struct *task) | |||
1527 | 1659 | ||
1528 | raw_spin_unlock(&ctx->lock); | 1660 | raw_spin_unlock(&ctx->lock); |
1529 | 1661 | ||
1530 | perf_event_task_sched_in(task, smp_processor_id()); | 1662 | perf_event_task_sched_in(task); |
1531 | out: | 1663 | out: |
1532 | local_irq_restore(flags); | 1664 | local_irq_restore(flags); |
1533 | } | 1665 | } |
@@ -1590,7 +1722,8 @@ __perf_event_init_context(struct perf_event_context *ctx, | |||
1590 | { | 1722 | { |
1591 | raw_spin_lock_init(&ctx->lock); | 1723 | raw_spin_lock_init(&ctx->lock); |
1592 | mutex_init(&ctx->mutex); | 1724 | mutex_init(&ctx->mutex); |
1593 | INIT_LIST_HEAD(&ctx->group_list); | 1725 | INIT_LIST_HEAD(&ctx->pinned_groups); |
1726 | INIT_LIST_HEAD(&ctx->flexible_groups); | ||
1594 | INIT_LIST_HEAD(&ctx->event_list); | 1727 | INIT_LIST_HEAD(&ctx->event_list); |
1595 | atomic_set(&ctx->refcount, 1); | 1728 | atomic_set(&ctx->refcount, 1); |
1596 | ctx->task = task; | 1729 | ctx->task = task; |
@@ -3608,7 +3741,7 @@ void __perf_event_mmap(struct vm_area_struct *vma) | |||
3608 | /* .tid */ | 3741 | /* .tid */ |
3609 | .start = vma->vm_start, | 3742 | .start = vma->vm_start, |
3610 | .len = vma->vm_end - vma->vm_start, | 3743 | .len = vma->vm_end - vma->vm_start, |
3611 | .pgoff = vma->vm_pgoff, | 3744 | .pgoff = (u64)vma->vm_pgoff << PAGE_SHIFT, |
3612 | }, | 3745 | }, |
3613 | }; | 3746 | }; |
3614 | 3747 | ||
@@ -3688,12 +3821,12 @@ static int __perf_event_overflow(struct perf_event *event, int nmi, | |||
3688 | 3821 | ||
3689 | if (event->attr.freq) { | 3822 | if (event->attr.freq) { |
3690 | u64 now = perf_clock(); | 3823 | u64 now = perf_clock(); |
3691 | s64 delta = now - hwc->freq_stamp; | 3824 | s64 delta = now - hwc->freq_time_stamp; |
3692 | 3825 | ||
3693 | hwc->freq_stamp = now; | 3826 | hwc->freq_time_stamp = now; |
3694 | 3827 | ||
3695 | if (delta > 0 && delta < TICK_NSEC) | 3828 | if (delta > 0 && delta < 2*TICK_NSEC) |
3696 | perf_adjust_period(event, NSEC_PER_SEC / (int)delta); | 3829 | perf_adjust_period(event, delta, hwc->last_period); |
3697 | } | 3830 | } |
3698 | 3831 | ||
3699 | /* | 3832 | /* |
@@ -4184,7 +4317,7 @@ static const struct pmu perf_ops_task_clock = { | |||
4184 | .read = task_clock_perf_event_read, | 4317 | .read = task_clock_perf_event_read, |
4185 | }; | 4318 | }; |
4186 | 4319 | ||
4187 | #ifdef CONFIG_EVENT_PROFILE | 4320 | #ifdef CONFIG_EVENT_TRACING |
4188 | 4321 | ||
4189 | void perf_tp_event(int event_id, u64 addr, u64 count, void *record, | 4322 | void perf_tp_event(int event_id, u64 addr, u64 count, void *record, |
4190 | int entry_size) | 4323 | int entry_size) |
@@ -4289,7 +4422,7 @@ static void perf_event_free_filter(struct perf_event *event) | |||
4289 | { | 4422 | { |
4290 | } | 4423 | } |
4291 | 4424 | ||
4292 | #endif /* CONFIG_EVENT_PROFILE */ | 4425 | #endif /* CONFIG_EVENT_TRACING */ |
4293 | 4426 | ||
4294 | #ifdef CONFIG_HAVE_HW_BREAKPOINT | 4427 | #ifdef CONFIG_HAVE_HW_BREAKPOINT |
4295 | static void bp_perf_event_destroy(struct perf_event *event) | 4428 | static void bp_perf_event_destroy(struct perf_event *event) |
@@ -4870,8 +5003,15 @@ inherit_event(struct perf_event *parent_event, | |||
4870 | else | 5003 | else |
4871 | child_event->state = PERF_EVENT_STATE_OFF; | 5004 | child_event->state = PERF_EVENT_STATE_OFF; |
4872 | 5005 | ||
4873 | if (parent_event->attr.freq) | 5006 | if (parent_event->attr.freq) { |
4874 | child_event->hw.sample_period = parent_event->hw.sample_period; | 5007 | u64 sample_period = parent_event->hw.sample_period; |
5008 | struct hw_perf_event *hwc = &child_event->hw; | ||
5009 | |||
5010 | hwc->sample_period = sample_period; | ||
5011 | hwc->last_period = sample_period; | ||
5012 | |||
5013 | atomic64_set(&hwc->period_left, sample_period); | ||
5014 | } | ||
4875 | 5015 | ||
4876 | child_event->overflow_handler = parent_event->overflow_handler; | 5016 | child_event->overflow_handler = parent_event->overflow_handler; |
4877 | 5017 | ||
@@ -5039,7 +5179,11 @@ void perf_event_exit_task(struct task_struct *child) | |||
5039 | mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING); | 5179 | mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING); |
5040 | 5180 | ||
5041 | again: | 5181 | again: |
5042 | list_for_each_entry_safe(child_event, tmp, &child_ctx->group_list, | 5182 | list_for_each_entry_safe(child_event, tmp, &child_ctx->pinned_groups, |
5183 | group_entry) | ||
5184 | __perf_event_exit_task(child_event, child_ctx, child); | ||
5185 | |||
5186 | list_for_each_entry_safe(child_event, tmp, &child_ctx->flexible_groups, | ||
5043 | group_entry) | 5187 | group_entry) |
5044 | __perf_event_exit_task(child_event, child_ctx, child); | 5188 | __perf_event_exit_task(child_event, child_ctx, child); |
5045 | 5189 | ||
@@ -5048,7 +5192,8 @@ again: | |||
5048 | * its siblings to the list, but we obtained 'tmp' before that which | 5192 | * its siblings to the list, but we obtained 'tmp' before that which |
5049 | * will still point to the list head terminating the iteration. | 5193 | * will still point to the list head terminating the iteration. |
5050 | */ | 5194 | */ |
5051 | if (!list_empty(&child_ctx->group_list)) | 5195 | if (!list_empty(&child_ctx->pinned_groups) || |
5196 | !list_empty(&child_ctx->flexible_groups)) | ||
5052 | goto again; | 5197 | goto again; |
5053 | 5198 | ||
5054 | mutex_unlock(&child_ctx->mutex); | 5199 | mutex_unlock(&child_ctx->mutex); |
@@ -5056,6 +5201,24 @@ again: | |||
5056 | put_ctx(child_ctx); | 5201 | put_ctx(child_ctx); |
5057 | } | 5202 | } |
5058 | 5203 | ||
5204 | static void perf_free_event(struct perf_event *event, | ||
5205 | struct perf_event_context *ctx) | ||
5206 | { | ||
5207 | struct perf_event *parent = event->parent; | ||
5208 | |||
5209 | if (WARN_ON_ONCE(!parent)) | ||
5210 | return; | ||
5211 | |||
5212 | mutex_lock(&parent->child_mutex); | ||
5213 | list_del_init(&event->child_list); | ||
5214 | mutex_unlock(&parent->child_mutex); | ||
5215 | |||
5216 | fput(parent->filp); | ||
5217 | |||
5218 | list_del_event(event, ctx); | ||
5219 | free_event(event); | ||
5220 | } | ||
5221 | |||
5059 | /* | 5222 | /* |
5060 | * free an unexposed, unused context as created by inheritance by | 5223 | * free an unexposed, unused context as created by inheritance by |
5061 | * init_task below, used by fork() in case of fail. | 5224 | * init_task below, used by fork() in case of fail. |
@@ -5070,36 +5233,70 @@ void perf_event_free_task(struct task_struct *task) | |||
5070 | 5233 | ||
5071 | mutex_lock(&ctx->mutex); | 5234 | mutex_lock(&ctx->mutex); |
5072 | again: | 5235 | again: |
5073 | list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) { | 5236 | list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) |
5074 | struct perf_event *parent = event->parent; | 5237 | perf_free_event(event, ctx); |
5075 | 5238 | ||
5076 | if (WARN_ON_ONCE(!parent)) | 5239 | list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, |
5077 | continue; | 5240 | group_entry) |
5241 | perf_free_event(event, ctx); | ||
5242 | |||
5243 | if (!list_empty(&ctx->pinned_groups) || | ||
5244 | !list_empty(&ctx->flexible_groups)) | ||
5245 | goto again; | ||
5078 | 5246 | ||
5079 | mutex_lock(&parent->child_mutex); | 5247 | mutex_unlock(&ctx->mutex); |
5080 | list_del_init(&event->child_list); | ||
5081 | mutex_unlock(&parent->child_mutex); | ||
5082 | 5248 | ||
5083 | fput(parent->filp); | 5249 | put_ctx(ctx); |
5250 | } | ||
5084 | 5251 | ||
5085 | list_del_event(event, ctx); | 5252 | static int |
5086 | free_event(event); | 5253 | inherit_task_group(struct perf_event *event, struct task_struct *parent, |
5254 | struct perf_event_context *parent_ctx, | ||
5255 | struct task_struct *child, | ||
5256 | int *inherited_all) | ||
5257 | { | ||
5258 | int ret; | ||
5259 | struct perf_event_context *child_ctx = child->perf_event_ctxp; | ||
5260 | |||
5261 | if (!event->attr.inherit) { | ||
5262 | *inherited_all = 0; | ||
5263 | return 0; | ||
5087 | } | 5264 | } |
5088 | 5265 | ||
5089 | if (!list_empty(&ctx->group_list)) | 5266 | if (!child_ctx) { |
5090 | goto again; | 5267 | /* |
5268 | * This is executed from the parent task context, so | ||
5269 | * inherit events that have been marked for cloning. | ||
5270 | * First allocate and initialize a context for the | ||
5271 | * child. | ||
5272 | */ | ||
5091 | 5273 | ||
5092 | mutex_unlock(&ctx->mutex); | 5274 | child_ctx = kzalloc(sizeof(struct perf_event_context), |
5275 | GFP_KERNEL); | ||
5276 | if (!child_ctx) | ||
5277 | return -ENOMEM; | ||
5093 | 5278 | ||
5094 | put_ctx(ctx); | 5279 | __perf_event_init_context(child_ctx, child); |
5280 | child->perf_event_ctxp = child_ctx; | ||
5281 | get_task_struct(child); | ||
5282 | } | ||
5283 | |||
5284 | ret = inherit_group(event, parent, parent_ctx, | ||
5285 | child, child_ctx); | ||
5286 | |||
5287 | if (ret) | ||
5288 | *inherited_all = 0; | ||
5289 | |||
5290 | return ret; | ||
5095 | } | 5291 | } |
5096 | 5292 | ||
5293 | |||
5097 | /* | 5294 | /* |
5098 | * Initialize the perf_event context in task_struct | 5295 | * Initialize the perf_event context in task_struct |
5099 | */ | 5296 | */ |
5100 | int perf_event_init_task(struct task_struct *child) | 5297 | int perf_event_init_task(struct task_struct *child) |
5101 | { | 5298 | { |
5102 | struct perf_event_context *child_ctx = NULL, *parent_ctx; | 5299 | struct perf_event_context *child_ctx, *parent_ctx; |
5103 | struct perf_event_context *cloned_ctx; | 5300 | struct perf_event_context *cloned_ctx; |
5104 | struct perf_event *event; | 5301 | struct perf_event *event; |
5105 | struct task_struct *parent = current; | 5302 | struct task_struct *parent = current; |
@@ -5137,41 +5334,22 @@ int perf_event_init_task(struct task_struct *child) | |||
5137 | * We dont have to disable NMIs - we are only looking at | 5334 | * We dont have to disable NMIs - we are only looking at |
5138 | * the list, not manipulating it: | 5335 | * the list, not manipulating it: |
5139 | */ | 5336 | */ |
5140 | list_for_each_entry(event, &parent_ctx->group_list, group_entry) { | 5337 | list_for_each_entry(event, &parent_ctx->pinned_groups, group_entry) { |
5141 | 5338 | ret = inherit_task_group(event, parent, parent_ctx, child, | |
5142 | if (!event->attr.inherit) { | 5339 | &inherited_all); |
5143 | inherited_all = 0; | 5340 | if (ret) |
5144 | continue; | 5341 | break; |
5145 | } | 5342 | } |
5146 | |||
5147 | if (!child->perf_event_ctxp) { | ||
5148 | /* | ||
5149 | * This is executed from the parent task context, so | ||
5150 | * inherit events that have been marked for cloning. | ||
5151 | * First allocate and initialize a context for the | ||
5152 | * child. | ||
5153 | */ | ||
5154 | |||
5155 | child_ctx = kzalloc(sizeof(struct perf_event_context), | ||
5156 | GFP_KERNEL); | ||
5157 | if (!child_ctx) { | ||
5158 | ret = -ENOMEM; | ||
5159 | break; | ||
5160 | } | ||
5161 | |||
5162 | __perf_event_init_context(child_ctx, child); | ||
5163 | child->perf_event_ctxp = child_ctx; | ||
5164 | get_task_struct(child); | ||
5165 | } | ||
5166 | 5343 | ||
5167 | ret = inherit_group(event, parent, parent_ctx, | 5344 | list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { |
5168 | child, child_ctx); | 5345 | ret = inherit_task_group(event, parent, parent_ctx, child, |
5169 | if (ret) { | 5346 | &inherited_all); |
5170 | inherited_all = 0; | 5347 | if (ret) |
5171 | break; | 5348 | break; |
5172 | } | ||
5173 | } | 5349 | } |
5174 | 5350 | ||
5351 | child_ctx = child->perf_event_ctxp; | ||
5352 | |||
5175 | if (child_ctx && inherited_all) { | 5353 | if (child_ctx && inherited_all) { |
5176 | /* | 5354 | /* |
5177 | * Mark the child context as a clone of the parent | 5355 | * Mark the child context as a clone of the parent |
@@ -5220,7 +5398,9 @@ static void __perf_event_exit_cpu(void *info) | |||
5220 | struct perf_event_context *ctx = &cpuctx->ctx; | 5398 | struct perf_event_context *ctx = &cpuctx->ctx; |
5221 | struct perf_event *event, *tmp; | 5399 | struct perf_event *event, *tmp; |
5222 | 5400 | ||
5223 | list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) | 5401 | list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) |
5402 | __perf_event_remove_from_context(event); | ||
5403 | list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry) | ||
5224 | __perf_event_remove_from_context(event); | 5404 | __perf_event_remove_from_context(event); |
5225 | } | 5405 | } |
5226 | static void perf_event_exit_cpu(int cpu) | 5406 | static void perf_event_exit_cpu(int cpu) |
@@ -5258,6 +5438,10 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) | |||
5258 | perf_event_exit_cpu(cpu); | 5438 | perf_event_exit_cpu(cpu); |
5259 | break; | 5439 | break; |
5260 | 5440 | ||
5441 | case CPU_DEAD: | ||
5442 | hw_perf_event_setup_offline(cpu); | ||
5443 | break; | ||
5444 | |||
5261 | default: | 5445 | default: |
5262 | break; | 5446 | break; |
5263 | } | 5447 | } |