diff options
Diffstat (limited to 'kernel/perf_event.c')
-rw-r--r-- | kernel/perf_event.c | 627 |
1 files changed, 413 insertions, 214 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 2ae7409bf38f..a661e7991865 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -98,11 +98,12 @@ void __weak hw_perf_enable(void) { barrier(); } | |||
98 | 98 | ||
99 | void __weak hw_perf_event_setup(int cpu) { barrier(); } | 99 | void __weak hw_perf_event_setup(int cpu) { barrier(); } |
100 | void __weak hw_perf_event_setup_online(int cpu) { barrier(); } | 100 | void __weak hw_perf_event_setup_online(int cpu) { barrier(); } |
101 | void __weak hw_perf_event_setup_offline(int cpu) { barrier(); } | ||
101 | 102 | ||
102 | int __weak | 103 | int __weak |
103 | hw_perf_group_sched_in(struct perf_event *group_leader, | 104 | hw_perf_group_sched_in(struct perf_event *group_leader, |
104 | struct perf_cpu_context *cpuctx, | 105 | struct perf_cpu_context *cpuctx, |
105 | struct perf_event_context *ctx, int cpu) | 106 | struct perf_event_context *ctx) |
106 | { | 107 | { |
107 | return 0; | 108 | return 0; |
108 | } | 109 | } |
@@ -248,7 +249,7 @@ static void perf_unpin_context(struct perf_event_context *ctx) | |||
248 | 249 | ||
249 | static inline u64 perf_clock(void) | 250 | static inline u64 perf_clock(void) |
250 | { | 251 | { |
251 | return cpu_clock(smp_processor_id()); | 252 | return cpu_clock(raw_smp_processor_id()); |
252 | } | 253 | } |
253 | 254 | ||
254 | /* | 255 | /* |
@@ -289,6 +290,15 @@ static void update_event_times(struct perf_event *event) | |||
289 | event->total_time_running = run_end - event->tstamp_running; | 290 | event->total_time_running = run_end - event->tstamp_running; |
290 | } | 291 | } |
291 | 292 | ||
293 | static struct list_head * | ||
294 | ctx_group_list(struct perf_event *event, struct perf_event_context *ctx) | ||
295 | { | ||
296 | if (event->attr.pinned) | ||
297 | return &ctx->pinned_groups; | ||
298 | else | ||
299 | return &ctx->flexible_groups; | ||
300 | } | ||
301 | |||
292 | /* | 302 | /* |
293 | * Add a event from the lists for its context. | 303 | * Add a event from the lists for its context. |
294 | * Must be called with ctx->mutex and ctx->lock held. | 304 | * Must be called with ctx->mutex and ctx->lock held. |
@@ -303,9 +313,19 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) | |||
303 | * add it straight to the context's event list, or to the group | 313 | * add it straight to the context's event list, or to the group |
304 | * leader's sibling list: | 314 | * leader's sibling list: |
305 | */ | 315 | */ |
306 | if (group_leader == event) | 316 | if (group_leader == event) { |
307 | list_add_tail(&event->group_entry, &ctx->group_list); | 317 | struct list_head *list; |
308 | else { | 318 | |
319 | if (is_software_event(event)) | ||
320 | event->group_flags |= PERF_GROUP_SOFTWARE; | ||
321 | |||
322 | list = ctx_group_list(event, ctx); | ||
323 | list_add_tail(&event->group_entry, list); | ||
324 | } else { | ||
325 | if (group_leader->group_flags & PERF_GROUP_SOFTWARE && | ||
326 | !is_software_event(event)) | ||
327 | group_leader->group_flags &= ~PERF_GROUP_SOFTWARE; | ||
328 | |||
309 | list_add_tail(&event->group_entry, &group_leader->sibling_list); | 329 | list_add_tail(&event->group_entry, &group_leader->sibling_list); |
310 | group_leader->nr_siblings++; | 330 | group_leader->nr_siblings++; |
311 | } | 331 | } |
@@ -355,9 +375,14 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) | |||
355 | * to the context list directly: | 375 | * to the context list directly: |
356 | */ | 376 | */ |
357 | list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) { | 377 | list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) { |
378 | struct list_head *list; | ||
358 | 379 | ||
359 | list_move_tail(&sibling->group_entry, &ctx->group_list); | 380 | list = ctx_group_list(event, ctx); |
381 | list_move_tail(&sibling->group_entry, list); | ||
360 | sibling->group_leader = sibling; | 382 | sibling->group_leader = sibling; |
383 | |||
384 | /* Inherit group flags from the previous leader */ | ||
385 | sibling->group_flags = event->group_flags; | ||
361 | } | 386 | } |
362 | } | 387 | } |
363 | 388 | ||
@@ -608,14 +633,13 @@ void perf_event_disable(struct perf_event *event) | |||
608 | static int | 633 | static int |
609 | event_sched_in(struct perf_event *event, | 634 | event_sched_in(struct perf_event *event, |
610 | struct perf_cpu_context *cpuctx, | 635 | struct perf_cpu_context *cpuctx, |
611 | struct perf_event_context *ctx, | 636 | struct perf_event_context *ctx) |
612 | int cpu) | ||
613 | { | 637 | { |
614 | if (event->state <= PERF_EVENT_STATE_OFF) | 638 | if (event->state <= PERF_EVENT_STATE_OFF) |
615 | return 0; | 639 | return 0; |
616 | 640 | ||
617 | event->state = PERF_EVENT_STATE_ACTIVE; | 641 | event->state = PERF_EVENT_STATE_ACTIVE; |
618 | event->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */ | 642 | event->oncpu = smp_processor_id(); |
619 | /* | 643 | /* |
620 | * The new state must be visible before we turn it on in the hardware: | 644 | * The new state must be visible before we turn it on in the hardware: |
621 | */ | 645 | */ |
@@ -642,8 +666,7 @@ event_sched_in(struct perf_event *event, | |||
642 | static int | 666 | static int |
643 | group_sched_in(struct perf_event *group_event, | 667 | group_sched_in(struct perf_event *group_event, |
644 | struct perf_cpu_context *cpuctx, | 668 | struct perf_cpu_context *cpuctx, |
645 | struct perf_event_context *ctx, | 669 | struct perf_event_context *ctx) |
646 | int cpu) | ||
647 | { | 670 | { |
648 | struct perf_event *event, *partial_group; | 671 | struct perf_event *event, *partial_group; |
649 | int ret; | 672 | int ret; |
@@ -651,18 +674,18 @@ group_sched_in(struct perf_event *group_event, | |||
651 | if (group_event->state == PERF_EVENT_STATE_OFF) | 674 | if (group_event->state == PERF_EVENT_STATE_OFF) |
652 | return 0; | 675 | return 0; |
653 | 676 | ||
654 | ret = hw_perf_group_sched_in(group_event, cpuctx, ctx, cpu); | 677 | ret = hw_perf_group_sched_in(group_event, cpuctx, ctx); |
655 | if (ret) | 678 | if (ret) |
656 | return ret < 0 ? ret : 0; | 679 | return ret < 0 ? ret : 0; |
657 | 680 | ||
658 | if (event_sched_in(group_event, cpuctx, ctx, cpu)) | 681 | if (event_sched_in(group_event, cpuctx, ctx)) |
659 | return -EAGAIN; | 682 | return -EAGAIN; |
660 | 683 | ||
661 | /* | 684 | /* |
662 | * Schedule in siblings as one group (if any): | 685 | * Schedule in siblings as one group (if any): |
663 | */ | 686 | */ |
664 | list_for_each_entry(event, &group_event->sibling_list, group_entry) { | 687 | list_for_each_entry(event, &group_event->sibling_list, group_entry) { |
665 | if (event_sched_in(event, cpuctx, ctx, cpu)) { | 688 | if (event_sched_in(event, cpuctx, ctx)) { |
666 | partial_group = event; | 689 | partial_group = event; |
667 | goto group_error; | 690 | goto group_error; |
668 | } | 691 | } |
@@ -686,24 +709,6 @@ group_error: | |||
686 | } | 709 | } |
687 | 710 | ||
688 | /* | 711 | /* |
689 | * Return 1 for a group consisting entirely of software events, | ||
690 | * 0 if the group contains any hardware events. | ||
691 | */ | ||
692 | static int is_software_only_group(struct perf_event *leader) | ||
693 | { | ||
694 | struct perf_event *event; | ||
695 | |||
696 | if (!is_software_event(leader)) | ||
697 | return 0; | ||
698 | |||
699 | list_for_each_entry(event, &leader->sibling_list, group_entry) | ||
700 | if (!is_software_event(event)) | ||
701 | return 0; | ||
702 | |||
703 | return 1; | ||
704 | } | ||
705 | |||
706 | /* | ||
707 | * Work out whether we can put this event group on the CPU now. | 712 | * Work out whether we can put this event group on the CPU now. |
708 | */ | 713 | */ |
709 | static int group_can_go_on(struct perf_event *event, | 714 | static int group_can_go_on(struct perf_event *event, |
@@ -713,7 +718,7 @@ static int group_can_go_on(struct perf_event *event, | |||
713 | /* | 718 | /* |
714 | * Groups consisting entirely of software events can always go on. | 719 | * Groups consisting entirely of software events can always go on. |
715 | */ | 720 | */ |
716 | if (is_software_only_group(event)) | 721 | if (event->group_flags & PERF_GROUP_SOFTWARE) |
717 | return 1; | 722 | return 1; |
718 | /* | 723 | /* |
719 | * If an exclusive group is already on, no other hardware | 724 | * If an exclusive group is already on, no other hardware |
@@ -754,7 +759,6 @@ static void __perf_install_in_context(void *info) | |||
754 | struct perf_event *event = info; | 759 | struct perf_event *event = info; |
755 | struct perf_event_context *ctx = event->ctx; | 760 | struct perf_event_context *ctx = event->ctx; |
756 | struct perf_event *leader = event->group_leader; | 761 | struct perf_event *leader = event->group_leader; |
757 | int cpu = smp_processor_id(); | ||
758 | int err; | 762 | int err; |
759 | 763 | ||
760 | /* | 764 | /* |
@@ -801,7 +805,7 @@ static void __perf_install_in_context(void *info) | |||
801 | if (!group_can_go_on(event, cpuctx, 1)) | 805 | if (!group_can_go_on(event, cpuctx, 1)) |
802 | err = -EEXIST; | 806 | err = -EEXIST; |
803 | else | 807 | else |
804 | err = event_sched_in(event, cpuctx, ctx, cpu); | 808 | err = event_sched_in(event, cpuctx, ctx); |
805 | 809 | ||
806 | if (err) { | 810 | if (err) { |
807 | /* | 811 | /* |
@@ -943,11 +947,9 @@ static void __perf_event_enable(void *info) | |||
943 | } else { | 947 | } else { |
944 | perf_disable(); | 948 | perf_disable(); |
945 | if (event == leader) | 949 | if (event == leader) |
946 | err = group_sched_in(event, cpuctx, ctx, | 950 | err = group_sched_in(event, cpuctx, ctx); |
947 | smp_processor_id()); | ||
948 | else | 951 | else |
949 | err = event_sched_in(event, cpuctx, ctx, | 952 | err = event_sched_in(event, cpuctx, ctx); |
950 | smp_processor_id()); | ||
951 | perf_enable(); | 953 | perf_enable(); |
952 | } | 954 | } |
953 | 955 | ||
@@ -1043,8 +1045,15 @@ static int perf_event_refresh(struct perf_event *event, int refresh) | |||
1043 | return 0; | 1045 | return 0; |
1044 | } | 1046 | } |
1045 | 1047 | ||
1046 | void __perf_event_sched_out(struct perf_event_context *ctx, | 1048 | enum event_type_t { |
1047 | struct perf_cpu_context *cpuctx) | 1049 | EVENT_FLEXIBLE = 0x1, |
1050 | EVENT_PINNED = 0x2, | ||
1051 | EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED, | ||
1052 | }; | ||
1053 | |||
1054 | static void ctx_sched_out(struct perf_event_context *ctx, | ||
1055 | struct perf_cpu_context *cpuctx, | ||
1056 | enum event_type_t event_type) | ||
1048 | { | 1057 | { |
1049 | struct perf_event *event; | 1058 | struct perf_event *event; |
1050 | 1059 | ||
@@ -1055,10 +1064,18 @@ void __perf_event_sched_out(struct perf_event_context *ctx, | |||
1055 | update_context_time(ctx); | 1064 | update_context_time(ctx); |
1056 | 1065 | ||
1057 | perf_disable(); | 1066 | perf_disable(); |
1058 | if (ctx->nr_active) { | 1067 | if (!ctx->nr_active) |
1059 | list_for_each_entry(event, &ctx->group_list, group_entry) | 1068 | goto out_enable; |
1069 | |||
1070 | if (event_type & EVENT_PINNED) | ||
1071 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) | ||
1060 | group_sched_out(event, cpuctx, ctx); | 1072 | group_sched_out(event, cpuctx, ctx); |
1061 | } | 1073 | |
1074 | if (event_type & EVENT_FLEXIBLE) | ||
1075 | list_for_each_entry(event, &ctx->flexible_groups, group_entry) | ||
1076 | group_sched_out(event, cpuctx, ctx); | ||
1077 | |||
1078 | out_enable: | ||
1062 | perf_enable(); | 1079 | perf_enable(); |
1063 | out: | 1080 | out: |
1064 | raw_spin_unlock(&ctx->lock); | 1081 | raw_spin_unlock(&ctx->lock); |
@@ -1170,9 +1187,9 @@ static void perf_event_sync_stat(struct perf_event_context *ctx, | |||
1170 | * not restart the event. | 1187 | * not restart the event. |
1171 | */ | 1188 | */ |
1172 | void perf_event_task_sched_out(struct task_struct *task, | 1189 | void perf_event_task_sched_out(struct task_struct *task, |
1173 | struct task_struct *next, int cpu) | 1190 | struct task_struct *next) |
1174 | { | 1191 | { |
1175 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | 1192 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); |
1176 | struct perf_event_context *ctx = task->perf_event_ctxp; | 1193 | struct perf_event_context *ctx = task->perf_event_ctxp; |
1177 | struct perf_event_context *next_ctx; | 1194 | struct perf_event_context *next_ctx; |
1178 | struct perf_event_context *parent; | 1195 | struct perf_event_context *parent; |
@@ -1220,15 +1237,13 @@ void perf_event_task_sched_out(struct task_struct *task, | |||
1220 | rcu_read_unlock(); | 1237 | rcu_read_unlock(); |
1221 | 1238 | ||
1222 | if (do_switch) { | 1239 | if (do_switch) { |
1223 | __perf_event_sched_out(ctx, cpuctx); | 1240 | ctx_sched_out(ctx, cpuctx, EVENT_ALL); |
1224 | cpuctx->task_ctx = NULL; | 1241 | cpuctx->task_ctx = NULL; |
1225 | } | 1242 | } |
1226 | } | 1243 | } |
1227 | 1244 | ||
1228 | /* | 1245 | static void task_ctx_sched_out(struct perf_event_context *ctx, |
1229 | * Called with IRQs disabled | 1246 | enum event_type_t event_type) |
1230 | */ | ||
1231 | static void __perf_event_task_sched_out(struct perf_event_context *ctx) | ||
1232 | { | 1247 | { |
1233 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | 1248 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); |
1234 | 1249 | ||
@@ -1238,47 +1253,41 @@ static void __perf_event_task_sched_out(struct perf_event_context *ctx) | |||
1238 | if (WARN_ON_ONCE(ctx != cpuctx->task_ctx)) | 1253 | if (WARN_ON_ONCE(ctx != cpuctx->task_ctx)) |
1239 | return; | 1254 | return; |
1240 | 1255 | ||
1241 | __perf_event_sched_out(ctx, cpuctx); | 1256 | ctx_sched_out(ctx, cpuctx, event_type); |
1242 | cpuctx->task_ctx = NULL; | 1257 | cpuctx->task_ctx = NULL; |
1243 | } | 1258 | } |
1244 | 1259 | ||
1245 | /* | 1260 | /* |
1246 | * Called with IRQs disabled | 1261 | * Called with IRQs disabled |
1247 | */ | 1262 | */ |
1248 | static void perf_event_cpu_sched_out(struct perf_cpu_context *cpuctx) | 1263 | static void __perf_event_task_sched_out(struct perf_event_context *ctx) |
1264 | { | ||
1265 | task_ctx_sched_out(ctx, EVENT_ALL); | ||
1266 | } | ||
1267 | |||
1268 | /* | ||
1269 | * Called with IRQs disabled | ||
1270 | */ | ||
1271 | static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx, | ||
1272 | enum event_type_t event_type) | ||
1249 | { | 1273 | { |
1250 | __perf_event_sched_out(&cpuctx->ctx, cpuctx); | 1274 | ctx_sched_out(&cpuctx->ctx, cpuctx, event_type); |
1251 | } | 1275 | } |
1252 | 1276 | ||
1253 | static void | 1277 | static void |
1254 | __perf_event_sched_in(struct perf_event_context *ctx, | 1278 | ctx_pinned_sched_in(struct perf_event_context *ctx, |
1255 | struct perf_cpu_context *cpuctx, int cpu) | 1279 | struct perf_cpu_context *cpuctx) |
1256 | { | 1280 | { |
1257 | struct perf_event *event; | 1281 | struct perf_event *event; |
1258 | int can_add_hw = 1; | ||
1259 | |||
1260 | raw_spin_lock(&ctx->lock); | ||
1261 | ctx->is_active = 1; | ||
1262 | if (likely(!ctx->nr_events)) | ||
1263 | goto out; | ||
1264 | 1282 | ||
1265 | ctx->timestamp = perf_clock(); | 1283 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) { |
1266 | 1284 | if (event->state <= PERF_EVENT_STATE_OFF) | |
1267 | perf_disable(); | ||
1268 | |||
1269 | /* | ||
1270 | * First go through the list and put on any pinned groups | ||
1271 | * in order to give them the best chance of going on. | ||
1272 | */ | ||
1273 | list_for_each_entry(event, &ctx->group_list, group_entry) { | ||
1274 | if (event->state <= PERF_EVENT_STATE_OFF || | ||
1275 | !event->attr.pinned) | ||
1276 | continue; | 1285 | continue; |
1277 | if (event->cpu != -1 && event->cpu != cpu) | 1286 | if (event->cpu != -1 && event->cpu != smp_processor_id()) |
1278 | continue; | 1287 | continue; |
1279 | 1288 | ||
1280 | if (group_can_go_on(event, cpuctx, 1)) | 1289 | if (group_can_go_on(event, cpuctx, 1)) |
1281 | group_sched_in(event, cpuctx, ctx, cpu); | 1290 | group_sched_in(event, cpuctx, ctx); |
1282 | 1291 | ||
1283 | /* | 1292 | /* |
1284 | * If this pinned group hasn't been scheduled, | 1293 | * If this pinned group hasn't been scheduled, |
@@ -1289,32 +1298,83 @@ __perf_event_sched_in(struct perf_event_context *ctx, | |||
1289 | event->state = PERF_EVENT_STATE_ERROR; | 1298 | event->state = PERF_EVENT_STATE_ERROR; |
1290 | } | 1299 | } |
1291 | } | 1300 | } |
1301 | } | ||
1292 | 1302 | ||
1293 | list_for_each_entry(event, &ctx->group_list, group_entry) { | 1303 | static void |
1294 | /* | 1304 | ctx_flexible_sched_in(struct perf_event_context *ctx, |
1295 | * Ignore events in OFF or ERROR state, and | 1305 | struct perf_cpu_context *cpuctx) |
1296 | * ignore pinned events since we did them already. | 1306 | { |
1297 | */ | 1307 | struct perf_event *event; |
1298 | if (event->state <= PERF_EVENT_STATE_OFF || | 1308 | int can_add_hw = 1; |
1299 | event->attr.pinned) | ||
1300 | continue; | ||
1301 | 1309 | ||
1310 | list_for_each_entry(event, &ctx->flexible_groups, group_entry) { | ||
1311 | /* Ignore events in OFF or ERROR state */ | ||
1312 | if (event->state <= PERF_EVENT_STATE_OFF) | ||
1313 | continue; | ||
1302 | /* | 1314 | /* |
1303 | * Listen to the 'cpu' scheduling filter constraint | 1315 | * Listen to the 'cpu' scheduling filter constraint |
1304 | * of events: | 1316 | * of events: |
1305 | */ | 1317 | */ |
1306 | if (event->cpu != -1 && event->cpu != cpu) | 1318 | if (event->cpu != -1 && event->cpu != smp_processor_id()) |
1307 | continue; | 1319 | continue; |
1308 | 1320 | ||
1309 | if (group_can_go_on(event, cpuctx, can_add_hw)) | 1321 | if (group_can_go_on(event, cpuctx, can_add_hw)) |
1310 | if (group_sched_in(event, cpuctx, ctx, cpu)) | 1322 | if (group_sched_in(event, cpuctx, ctx)) |
1311 | can_add_hw = 0; | 1323 | can_add_hw = 0; |
1312 | } | 1324 | } |
1325 | } | ||
1326 | |||
1327 | static void | ||
1328 | ctx_sched_in(struct perf_event_context *ctx, | ||
1329 | struct perf_cpu_context *cpuctx, | ||
1330 | enum event_type_t event_type) | ||
1331 | { | ||
1332 | raw_spin_lock(&ctx->lock); | ||
1333 | ctx->is_active = 1; | ||
1334 | if (likely(!ctx->nr_events)) | ||
1335 | goto out; | ||
1336 | |||
1337 | ctx->timestamp = perf_clock(); | ||
1338 | |||
1339 | perf_disable(); | ||
1340 | |||
1341 | /* | ||
1342 | * First go through the list and put on any pinned groups | ||
1343 | * in order to give them the best chance of going on. | ||
1344 | */ | ||
1345 | if (event_type & EVENT_PINNED) | ||
1346 | ctx_pinned_sched_in(ctx, cpuctx); | ||
1347 | |||
1348 | /* Then walk through the lower prio flexible groups */ | ||
1349 | if (event_type & EVENT_FLEXIBLE) | ||
1350 | ctx_flexible_sched_in(ctx, cpuctx); | ||
1351 | |||
1313 | perf_enable(); | 1352 | perf_enable(); |
1314 | out: | 1353 | out: |
1315 | raw_spin_unlock(&ctx->lock); | 1354 | raw_spin_unlock(&ctx->lock); |
1316 | } | 1355 | } |
1317 | 1356 | ||
1357 | static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, | ||
1358 | enum event_type_t event_type) | ||
1359 | { | ||
1360 | struct perf_event_context *ctx = &cpuctx->ctx; | ||
1361 | |||
1362 | ctx_sched_in(ctx, cpuctx, event_type); | ||
1363 | } | ||
1364 | |||
1365 | static void task_ctx_sched_in(struct task_struct *task, | ||
1366 | enum event_type_t event_type) | ||
1367 | { | ||
1368 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
1369 | struct perf_event_context *ctx = task->perf_event_ctxp; | ||
1370 | |||
1371 | if (likely(!ctx)) | ||
1372 | return; | ||
1373 | if (cpuctx->task_ctx == ctx) | ||
1374 | return; | ||
1375 | ctx_sched_in(ctx, cpuctx, event_type); | ||
1376 | cpuctx->task_ctx = ctx; | ||
1377 | } | ||
1318 | /* | 1378 | /* |
1319 | * Called from scheduler to add the events of the current task | 1379 | * Called from scheduler to add the events of the current task |
1320 | * with interrupts disabled. | 1380 | * with interrupts disabled. |
@@ -1326,38 +1386,128 @@ __perf_event_sched_in(struct perf_event_context *ctx, | |||
1326 | * accessing the event control register. If a NMI hits, then it will | 1386 | * accessing the event control register. If a NMI hits, then it will |
1327 | * keep the event running. | 1387 | * keep the event running. |
1328 | */ | 1388 | */ |
1329 | void perf_event_task_sched_in(struct task_struct *task, int cpu) | 1389 | void perf_event_task_sched_in(struct task_struct *task) |
1330 | { | 1390 | { |
1331 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | 1391 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); |
1332 | struct perf_event_context *ctx = task->perf_event_ctxp; | 1392 | struct perf_event_context *ctx = task->perf_event_ctxp; |
1333 | 1393 | ||
1334 | if (likely(!ctx)) | 1394 | if (likely(!ctx)) |
1335 | return; | 1395 | return; |
1396 | |||
1336 | if (cpuctx->task_ctx == ctx) | 1397 | if (cpuctx->task_ctx == ctx) |
1337 | return; | 1398 | return; |
1338 | __perf_event_sched_in(ctx, cpuctx, cpu); | 1399 | |
1400 | /* | ||
1401 | * We want to keep the following priority order: | ||
1402 | * cpu pinned (that don't need to move), task pinned, | ||
1403 | * cpu flexible, task flexible. | ||
1404 | */ | ||
1405 | cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); | ||
1406 | |||
1407 | ctx_sched_in(ctx, cpuctx, EVENT_PINNED); | ||
1408 | cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); | ||
1409 | ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE); | ||
1410 | |||
1339 | cpuctx->task_ctx = ctx; | 1411 | cpuctx->task_ctx = ctx; |
1340 | } | 1412 | } |
1341 | 1413 | ||
1342 | static void perf_event_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu) | 1414 | #define MAX_INTERRUPTS (~0ULL) |
1415 | |||
1416 | static void perf_log_throttle(struct perf_event *event, int enable); | ||
1417 | |||
1418 | static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) | ||
1343 | { | 1419 | { |
1344 | struct perf_event_context *ctx = &cpuctx->ctx; | 1420 | u64 frequency = event->attr.sample_freq; |
1421 | u64 sec = NSEC_PER_SEC; | ||
1422 | u64 divisor, dividend; | ||
1423 | |||
1424 | int count_fls, nsec_fls, frequency_fls, sec_fls; | ||
1425 | |||
1426 | count_fls = fls64(count); | ||
1427 | nsec_fls = fls64(nsec); | ||
1428 | frequency_fls = fls64(frequency); | ||
1429 | sec_fls = 30; | ||
1345 | 1430 | ||
1346 | __perf_event_sched_in(ctx, cpuctx, cpu); | 1431 | /* |
1432 | * We got @count in @nsec, with a target of sample_freq HZ | ||
1433 | * the target period becomes: | ||
1434 | * | ||
1435 | * @count * 10^9 | ||
1436 | * period = ------------------- | ||
1437 | * @nsec * sample_freq | ||
1438 | * | ||
1439 | */ | ||
1440 | |||
1441 | /* | ||
1442 | * Reduce accuracy by one bit such that @a and @b converge | ||
1443 | * to a similar magnitude. | ||
1444 | */ | ||
1445 | #define REDUCE_FLS(a, b) \ | ||
1446 | do { \ | ||
1447 | if (a##_fls > b##_fls) { \ | ||
1448 | a >>= 1; \ | ||
1449 | a##_fls--; \ | ||
1450 | } else { \ | ||
1451 | b >>= 1; \ | ||
1452 | b##_fls--; \ | ||
1453 | } \ | ||
1454 | } while (0) | ||
1455 | |||
1456 | /* | ||
1457 | * Reduce accuracy until either term fits in a u64, then proceed with | ||
1458 | * the other, so that finally we can do a u64/u64 division. | ||
1459 | */ | ||
1460 | while (count_fls + sec_fls > 64 && nsec_fls + frequency_fls > 64) { | ||
1461 | REDUCE_FLS(nsec, frequency); | ||
1462 | REDUCE_FLS(sec, count); | ||
1463 | } | ||
1464 | |||
1465 | if (count_fls + sec_fls > 64) { | ||
1466 | divisor = nsec * frequency; | ||
1467 | |||
1468 | while (count_fls + sec_fls > 64) { | ||
1469 | REDUCE_FLS(count, sec); | ||
1470 | divisor >>= 1; | ||
1471 | } | ||
1472 | |||
1473 | dividend = count * sec; | ||
1474 | } else { | ||
1475 | dividend = count * sec; | ||
1476 | |||
1477 | while (nsec_fls + frequency_fls > 64) { | ||
1478 | REDUCE_FLS(nsec, frequency); | ||
1479 | dividend >>= 1; | ||
1480 | } | ||
1481 | |||
1482 | divisor = nsec * frequency; | ||
1483 | } | ||
1484 | |||
1485 | return div64_u64(dividend, divisor); | ||
1347 | } | 1486 | } |
1348 | 1487 | ||
1349 | #define MAX_INTERRUPTS (~0ULL) | 1488 | static void perf_event_stop(struct perf_event *event) |
1489 | { | ||
1490 | if (!event->pmu->stop) | ||
1491 | return event->pmu->disable(event); | ||
1350 | 1492 | ||
1351 | static void perf_log_throttle(struct perf_event *event, int enable); | 1493 | return event->pmu->stop(event); |
1494 | } | ||
1495 | |||
1496 | static int perf_event_start(struct perf_event *event) | ||
1497 | { | ||
1498 | if (!event->pmu->start) | ||
1499 | return event->pmu->enable(event); | ||
1352 | 1500 | ||
1353 | static void perf_adjust_period(struct perf_event *event, u64 events) | 1501 | return event->pmu->start(event); |
1502 | } | ||
1503 | |||
1504 | static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) | ||
1354 | { | 1505 | { |
1355 | struct hw_perf_event *hwc = &event->hw; | 1506 | struct hw_perf_event *hwc = &event->hw; |
1356 | u64 period, sample_period; | 1507 | u64 period, sample_period; |
1357 | s64 delta; | 1508 | s64 delta; |
1358 | 1509 | ||
1359 | events *= hwc->sample_period; | 1510 | period = perf_calculate_period(event, nsec, count); |
1360 | period = div64_u64(events, event->attr.sample_freq); | ||
1361 | 1511 | ||
1362 | delta = (s64)(period - hwc->sample_period); | 1512 | delta = (s64)(period - hwc->sample_period); |
1363 | delta = (delta + 7) / 8; /* low pass filter */ | 1513 | delta = (delta + 7) / 8; /* low pass filter */ |
@@ -1368,13 +1518,22 @@ static void perf_adjust_period(struct perf_event *event, u64 events) | |||
1368 | sample_period = 1; | 1518 | sample_period = 1; |
1369 | 1519 | ||
1370 | hwc->sample_period = sample_period; | 1520 | hwc->sample_period = sample_period; |
1521 | |||
1522 | if (atomic64_read(&hwc->period_left) > 8*sample_period) { | ||
1523 | perf_disable(); | ||
1524 | perf_event_stop(event); | ||
1525 | atomic64_set(&hwc->period_left, 0); | ||
1526 | perf_event_start(event); | ||
1527 | perf_enable(); | ||
1528 | } | ||
1371 | } | 1529 | } |
1372 | 1530 | ||
1373 | static void perf_ctx_adjust_freq(struct perf_event_context *ctx) | 1531 | static void perf_ctx_adjust_freq(struct perf_event_context *ctx) |
1374 | { | 1532 | { |
1375 | struct perf_event *event; | 1533 | struct perf_event *event; |
1376 | struct hw_perf_event *hwc; | 1534 | struct hw_perf_event *hwc; |
1377 | u64 interrupts, freq; | 1535 | u64 interrupts, now; |
1536 | s64 delta; | ||
1378 | 1537 | ||
1379 | raw_spin_lock(&ctx->lock); | 1538 | raw_spin_lock(&ctx->lock); |
1380 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { | 1539 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { |
@@ -1395,44 +1554,18 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) | |||
1395 | if (interrupts == MAX_INTERRUPTS) { | 1554 | if (interrupts == MAX_INTERRUPTS) { |
1396 | perf_log_throttle(event, 1); | 1555 | perf_log_throttle(event, 1); |
1397 | event->pmu->unthrottle(event); | 1556 | event->pmu->unthrottle(event); |
1398 | interrupts = 2*sysctl_perf_event_sample_rate/HZ; | ||
1399 | } | 1557 | } |
1400 | 1558 | ||
1401 | if (!event->attr.freq || !event->attr.sample_freq) | 1559 | if (!event->attr.freq || !event->attr.sample_freq) |
1402 | continue; | 1560 | continue; |
1403 | 1561 | ||
1404 | /* | 1562 | event->pmu->read(event); |
1405 | * if the specified freq < HZ then we need to skip ticks | 1563 | now = atomic64_read(&event->count); |
1406 | */ | 1564 | delta = now - hwc->freq_count_stamp; |
1407 | if (event->attr.sample_freq < HZ) { | 1565 | hwc->freq_count_stamp = now; |
1408 | freq = event->attr.sample_freq; | ||
1409 | |||
1410 | hwc->freq_count += freq; | ||
1411 | hwc->freq_interrupts += interrupts; | ||
1412 | |||
1413 | if (hwc->freq_count < HZ) | ||
1414 | continue; | ||
1415 | |||
1416 | interrupts = hwc->freq_interrupts; | ||
1417 | hwc->freq_interrupts = 0; | ||
1418 | hwc->freq_count -= HZ; | ||
1419 | } else | ||
1420 | freq = HZ; | ||
1421 | |||
1422 | perf_adjust_period(event, freq * interrupts); | ||
1423 | 1566 | ||
1424 | /* | 1567 | if (delta > 0) |
1425 | * In order to avoid being stalled by an (accidental) huge | 1568 | perf_adjust_period(event, TICK_NSEC, delta); |
1426 | * sample period, force reset the sample period if we didn't | ||
1427 | * get any events in this freq period. | ||
1428 | */ | ||
1429 | if (!interrupts) { | ||
1430 | perf_disable(); | ||
1431 | event->pmu->disable(event); | ||
1432 | atomic64_set(&hwc->period_left, 0); | ||
1433 | event->pmu->enable(event); | ||
1434 | perf_enable(); | ||
1435 | } | ||
1436 | } | 1569 | } |
1437 | raw_spin_unlock(&ctx->lock); | 1570 | raw_spin_unlock(&ctx->lock); |
1438 | } | 1571 | } |
@@ -1442,26 +1575,18 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) | |||
1442 | */ | 1575 | */ |
1443 | static void rotate_ctx(struct perf_event_context *ctx) | 1576 | static void rotate_ctx(struct perf_event_context *ctx) |
1444 | { | 1577 | { |
1445 | struct perf_event *event; | ||
1446 | |||
1447 | if (!ctx->nr_events) | 1578 | if (!ctx->nr_events) |
1448 | return; | 1579 | return; |
1449 | 1580 | ||
1450 | raw_spin_lock(&ctx->lock); | 1581 | raw_spin_lock(&ctx->lock); |
1451 | /* | 1582 | |
1452 | * Rotate the first entry last (works just fine for group events too): | 1583 | /* Rotate the first entry last of non-pinned groups */ |
1453 | */ | 1584 | list_rotate_left(&ctx->flexible_groups); |
1454 | perf_disable(); | ||
1455 | list_for_each_entry(event, &ctx->group_list, group_entry) { | ||
1456 | list_move_tail(&event->group_entry, &ctx->group_list); | ||
1457 | break; | ||
1458 | } | ||
1459 | perf_enable(); | ||
1460 | 1585 | ||
1461 | raw_spin_unlock(&ctx->lock); | 1586 | raw_spin_unlock(&ctx->lock); |
1462 | } | 1587 | } |
1463 | 1588 | ||
1464 | void perf_event_task_tick(struct task_struct *curr, int cpu) | 1589 | void perf_event_task_tick(struct task_struct *curr) |
1465 | { | 1590 | { |
1466 | struct perf_cpu_context *cpuctx; | 1591 | struct perf_cpu_context *cpuctx; |
1467 | struct perf_event_context *ctx; | 1592 | struct perf_event_context *ctx; |
@@ -1469,24 +1594,43 @@ void perf_event_task_tick(struct task_struct *curr, int cpu) | |||
1469 | if (!atomic_read(&nr_events)) | 1594 | if (!atomic_read(&nr_events)) |
1470 | return; | 1595 | return; |
1471 | 1596 | ||
1472 | cpuctx = &per_cpu(perf_cpu_context, cpu); | 1597 | cpuctx = &__get_cpu_var(perf_cpu_context); |
1473 | ctx = curr->perf_event_ctxp; | 1598 | ctx = curr->perf_event_ctxp; |
1474 | 1599 | ||
1600 | perf_disable(); | ||
1601 | |||
1475 | perf_ctx_adjust_freq(&cpuctx->ctx); | 1602 | perf_ctx_adjust_freq(&cpuctx->ctx); |
1476 | if (ctx) | 1603 | if (ctx) |
1477 | perf_ctx_adjust_freq(ctx); | 1604 | perf_ctx_adjust_freq(ctx); |
1478 | 1605 | ||
1479 | perf_event_cpu_sched_out(cpuctx); | 1606 | cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); |
1480 | if (ctx) | 1607 | if (ctx) |
1481 | __perf_event_task_sched_out(ctx); | 1608 | task_ctx_sched_out(ctx, EVENT_FLEXIBLE); |
1482 | 1609 | ||
1483 | rotate_ctx(&cpuctx->ctx); | 1610 | rotate_ctx(&cpuctx->ctx); |
1484 | if (ctx) | 1611 | if (ctx) |
1485 | rotate_ctx(ctx); | 1612 | rotate_ctx(ctx); |
1486 | 1613 | ||
1487 | perf_event_cpu_sched_in(cpuctx, cpu); | 1614 | cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); |
1488 | if (ctx) | 1615 | if (ctx) |
1489 | perf_event_task_sched_in(curr, cpu); | 1616 | task_ctx_sched_in(curr, EVENT_FLEXIBLE); |
1617 | |||
1618 | perf_enable(); | ||
1619 | } | ||
1620 | |||
1621 | static int event_enable_on_exec(struct perf_event *event, | ||
1622 | struct perf_event_context *ctx) | ||
1623 | { | ||
1624 | if (!event->attr.enable_on_exec) | ||
1625 | return 0; | ||
1626 | |||
1627 | event->attr.enable_on_exec = 0; | ||
1628 | if (event->state >= PERF_EVENT_STATE_INACTIVE) | ||
1629 | return 0; | ||
1630 | |||
1631 | __perf_event_mark_enabled(event, ctx); | ||
1632 | |||
1633 | return 1; | ||
1490 | } | 1634 | } |
1491 | 1635 | ||
1492 | /* | 1636 | /* |
@@ -1499,6 +1643,7 @@ static void perf_event_enable_on_exec(struct task_struct *task) | |||
1499 | struct perf_event *event; | 1643 | struct perf_event *event; |
1500 | unsigned long flags; | 1644 | unsigned long flags; |
1501 | int enabled = 0; | 1645 | int enabled = 0; |
1646 | int ret; | ||
1502 | 1647 | ||
1503 | local_irq_save(flags); | 1648 | local_irq_save(flags); |
1504 | ctx = task->perf_event_ctxp; | 1649 | ctx = task->perf_event_ctxp; |
@@ -1509,14 +1654,16 @@ static void perf_event_enable_on_exec(struct task_struct *task) | |||
1509 | 1654 | ||
1510 | raw_spin_lock(&ctx->lock); | 1655 | raw_spin_lock(&ctx->lock); |
1511 | 1656 | ||
1512 | list_for_each_entry(event, &ctx->group_list, group_entry) { | 1657 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) { |
1513 | if (!event->attr.enable_on_exec) | 1658 | ret = event_enable_on_exec(event, ctx); |
1514 | continue; | 1659 | if (ret) |
1515 | event->attr.enable_on_exec = 0; | 1660 | enabled = 1; |
1516 | if (event->state >= PERF_EVENT_STATE_INACTIVE) | 1661 | } |
1517 | continue; | 1662 | |
1518 | __perf_event_mark_enabled(event, ctx); | 1663 | list_for_each_entry(event, &ctx->flexible_groups, group_entry) { |
1519 | enabled = 1; | 1664 | ret = event_enable_on_exec(event, ctx); |
1665 | if (ret) | ||
1666 | enabled = 1; | ||
1520 | } | 1667 | } |
1521 | 1668 | ||
1522 | /* | 1669 | /* |
@@ -1527,7 +1674,7 @@ static void perf_event_enable_on_exec(struct task_struct *task) | |||
1527 | 1674 | ||
1528 | raw_spin_unlock(&ctx->lock); | 1675 | raw_spin_unlock(&ctx->lock); |
1529 | 1676 | ||
1530 | perf_event_task_sched_in(task, smp_processor_id()); | 1677 | perf_event_task_sched_in(task); |
1531 | out: | 1678 | out: |
1532 | local_irq_restore(flags); | 1679 | local_irq_restore(flags); |
1533 | } | 1680 | } |
@@ -1590,7 +1737,8 @@ __perf_event_init_context(struct perf_event_context *ctx, | |||
1590 | { | 1737 | { |
1591 | raw_spin_lock_init(&ctx->lock); | 1738 | raw_spin_lock_init(&ctx->lock); |
1592 | mutex_init(&ctx->mutex); | 1739 | mutex_init(&ctx->mutex); |
1593 | INIT_LIST_HEAD(&ctx->group_list); | 1740 | INIT_LIST_HEAD(&ctx->pinned_groups); |
1741 | INIT_LIST_HEAD(&ctx->flexible_groups); | ||
1594 | INIT_LIST_HEAD(&ctx->event_list); | 1742 | INIT_LIST_HEAD(&ctx->event_list); |
1595 | atomic_set(&ctx->refcount, 1); | 1743 | atomic_set(&ctx->refcount, 1); |
1596 | ctx->task = task; | 1744 | ctx->task = task; |
@@ -3608,7 +3756,7 @@ void __perf_event_mmap(struct vm_area_struct *vma) | |||
3608 | /* .tid */ | 3756 | /* .tid */ |
3609 | .start = vma->vm_start, | 3757 | .start = vma->vm_start, |
3610 | .len = vma->vm_end - vma->vm_start, | 3758 | .len = vma->vm_end - vma->vm_start, |
3611 | .pgoff = vma->vm_pgoff, | 3759 | .pgoff = (u64)vma->vm_pgoff << PAGE_SHIFT, |
3612 | }, | 3760 | }, |
3613 | }; | 3761 | }; |
3614 | 3762 | ||
@@ -3688,12 +3836,12 @@ static int __perf_event_overflow(struct perf_event *event, int nmi, | |||
3688 | 3836 | ||
3689 | if (event->attr.freq) { | 3837 | if (event->attr.freq) { |
3690 | u64 now = perf_clock(); | 3838 | u64 now = perf_clock(); |
3691 | s64 delta = now - hwc->freq_stamp; | 3839 | s64 delta = now - hwc->freq_time_stamp; |
3692 | 3840 | ||
3693 | hwc->freq_stamp = now; | 3841 | hwc->freq_time_stamp = now; |
3694 | 3842 | ||
3695 | if (delta > 0 && delta < TICK_NSEC) | 3843 | if (delta > 0 && delta < 2*TICK_NSEC) |
3696 | perf_adjust_period(event, NSEC_PER_SEC / (int)delta); | 3844 | perf_adjust_period(event, delta, hwc->last_period); |
3697 | } | 3845 | } |
3698 | 3846 | ||
3699 | /* | 3847 | /* |
@@ -4184,7 +4332,7 @@ static const struct pmu perf_ops_task_clock = { | |||
4184 | .read = task_clock_perf_event_read, | 4332 | .read = task_clock_perf_event_read, |
4185 | }; | 4333 | }; |
4186 | 4334 | ||
4187 | #ifdef CONFIG_EVENT_PROFILE | 4335 | #ifdef CONFIG_EVENT_TRACING |
4188 | 4336 | ||
4189 | void perf_tp_event(int event_id, u64 addr, u64 count, void *record, | 4337 | void perf_tp_event(int event_id, u64 addr, u64 count, void *record, |
4190 | int entry_size) | 4338 | int entry_size) |
@@ -4289,7 +4437,7 @@ static void perf_event_free_filter(struct perf_event *event) | |||
4289 | { | 4437 | { |
4290 | } | 4438 | } |
4291 | 4439 | ||
4292 | #endif /* CONFIG_EVENT_PROFILE */ | 4440 | #endif /* CONFIG_EVENT_TRACING */ |
4293 | 4441 | ||
4294 | #ifdef CONFIG_HAVE_HW_BREAKPOINT | 4442 | #ifdef CONFIG_HAVE_HW_BREAKPOINT |
4295 | static void bp_perf_event_destroy(struct perf_event *event) | 4443 | static void bp_perf_event_destroy(struct perf_event *event) |
@@ -4870,8 +5018,15 @@ inherit_event(struct perf_event *parent_event, | |||
4870 | else | 5018 | else |
4871 | child_event->state = PERF_EVENT_STATE_OFF; | 5019 | child_event->state = PERF_EVENT_STATE_OFF; |
4872 | 5020 | ||
4873 | if (parent_event->attr.freq) | 5021 | if (parent_event->attr.freq) { |
4874 | child_event->hw.sample_period = parent_event->hw.sample_period; | 5022 | u64 sample_period = parent_event->hw.sample_period; |
5023 | struct hw_perf_event *hwc = &child_event->hw; | ||
5024 | |||
5025 | hwc->sample_period = sample_period; | ||
5026 | hwc->last_period = sample_period; | ||
5027 | |||
5028 | atomic64_set(&hwc->period_left, sample_period); | ||
5029 | } | ||
4875 | 5030 | ||
4876 | child_event->overflow_handler = parent_event->overflow_handler; | 5031 | child_event->overflow_handler = parent_event->overflow_handler; |
4877 | 5032 | ||
@@ -5039,7 +5194,11 @@ void perf_event_exit_task(struct task_struct *child) | |||
5039 | mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING); | 5194 | mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING); |
5040 | 5195 | ||
5041 | again: | 5196 | again: |
5042 | list_for_each_entry_safe(child_event, tmp, &child_ctx->group_list, | 5197 | list_for_each_entry_safe(child_event, tmp, &child_ctx->pinned_groups, |
5198 | group_entry) | ||
5199 | __perf_event_exit_task(child_event, child_ctx, child); | ||
5200 | |||
5201 | list_for_each_entry_safe(child_event, tmp, &child_ctx->flexible_groups, | ||
5043 | group_entry) | 5202 | group_entry) |
5044 | __perf_event_exit_task(child_event, child_ctx, child); | 5203 | __perf_event_exit_task(child_event, child_ctx, child); |
5045 | 5204 | ||
@@ -5048,7 +5207,8 @@ again: | |||
5048 | * its siblings to the list, but we obtained 'tmp' before that which | 5207 | * its siblings to the list, but we obtained 'tmp' before that which |
5049 | * will still point to the list head terminating the iteration. | 5208 | * will still point to the list head terminating the iteration. |
5050 | */ | 5209 | */ |
5051 | if (!list_empty(&child_ctx->group_list)) | 5210 | if (!list_empty(&child_ctx->pinned_groups) || |
5211 | !list_empty(&child_ctx->flexible_groups)) | ||
5052 | goto again; | 5212 | goto again; |
5053 | 5213 | ||
5054 | mutex_unlock(&child_ctx->mutex); | 5214 | mutex_unlock(&child_ctx->mutex); |
@@ -5056,6 +5216,24 @@ again: | |||
5056 | put_ctx(child_ctx); | 5216 | put_ctx(child_ctx); |
5057 | } | 5217 | } |
5058 | 5218 | ||
5219 | static void perf_free_event(struct perf_event *event, | ||
5220 | struct perf_event_context *ctx) | ||
5221 | { | ||
5222 | struct perf_event *parent = event->parent; | ||
5223 | |||
5224 | if (WARN_ON_ONCE(!parent)) | ||
5225 | return; | ||
5226 | |||
5227 | mutex_lock(&parent->child_mutex); | ||
5228 | list_del_init(&event->child_list); | ||
5229 | mutex_unlock(&parent->child_mutex); | ||
5230 | |||
5231 | fput(parent->filp); | ||
5232 | |||
5233 | list_del_event(event, ctx); | ||
5234 | free_event(event); | ||
5235 | } | ||
5236 | |||
5059 | /* | 5237 | /* |
5060 | * free an unexposed, unused context as created by inheritance by | 5238 | * free an unexposed, unused context as created by inheritance by |
5061 | * init_task below, used by fork() in case of fail. | 5239 | * init_task below, used by fork() in case of fail. |
@@ -5070,36 +5248,70 @@ void perf_event_free_task(struct task_struct *task) | |||
5070 | 5248 | ||
5071 | mutex_lock(&ctx->mutex); | 5249 | mutex_lock(&ctx->mutex); |
5072 | again: | 5250 | again: |
5073 | list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) { | 5251 | list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) |
5074 | struct perf_event *parent = event->parent; | 5252 | perf_free_event(event, ctx); |
5075 | 5253 | ||
5076 | if (WARN_ON_ONCE(!parent)) | 5254 | list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, |
5077 | continue; | 5255 | group_entry) |
5256 | perf_free_event(event, ctx); | ||
5078 | 5257 | ||
5079 | mutex_lock(&parent->child_mutex); | 5258 | if (!list_empty(&ctx->pinned_groups) || |
5080 | list_del_init(&event->child_list); | 5259 | !list_empty(&ctx->flexible_groups)) |
5081 | mutex_unlock(&parent->child_mutex); | 5260 | goto again; |
5082 | 5261 | ||
5083 | fput(parent->filp); | 5262 | mutex_unlock(&ctx->mutex); |
5084 | 5263 | ||
5085 | list_del_event(event, ctx); | 5264 | put_ctx(ctx); |
5086 | free_event(event); | 5265 | } |
5266 | |||
5267 | static int | ||
5268 | inherit_task_group(struct perf_event *event, struct task_struct *parent, | ||
5269 | struct perf_event_context *parent_ctx, | ||
5270 | struct task_struct *child, | ||
5271 | int *inherited_all) | ||
5272 | { | ||
5273 | int ret; | ||
5274 | struct perf_event_context *child_ctx = child->perf_event_ctxp; | ||
5275 | |||
5276 | if (!event->attr.inherit) { | ||
5277 | *inherited_all = 0; | ||
5278 | return 0; | ||
5087 | } | 5279 | } |
5088 | 5280 | ||
5089 | if (!list_empty(&ctx->group_list)) | 5281 | if (!child_ctx) { |
5090 | goto again; | 5282 | /* |
5283 | * This is executed from the parent task context, so | ||
5284 | * inherit events that have been marked for cloning. | ||
5285 | * First allocate and initialize a context for the | ||
5286 | * child. | ||
5287 | */ | ||
5091 | 5288 | ||
5092 | mutex_unlock(&ctx->mutex); | 5289 | child_ctx = kzalloc(sizeof(struct perf_event_context), |
5290 | GFP_KERNEL); | ||
5291 | if (!child_ctx) | ||
5292 | return -ENOMEM; | ||
5093 | 5293 | ||
5094 | put_ctx(ctx); | 5294 | __perf_event_init_context(child_ctx, child); |
5295 | child->perf_event_ctxp = child_ctx; | ||
5296 | get_task_struct(child); | ||
5297 | } | ||
5298 | |||
5299 | ret = inherit_group(event, parent, parent_ctx, | ||
5300 | child, child_ctx); | ||
5301 | |||
5302 | if (ret) | ||
5303 | *inherited_all = 0; | ||
5304 | |||
5305 | return ret; | ||
5095 | } | 5306 | } |
5096 | 5307 | ||
5308 | |||
5097 | /* | 5309 | /* |
5098 | * Initialize the perf_event context in task_struct | 5310 | * Initialize the perf_event context in task_struct |
5099 | */ | 5311 | */ |
5100 | int perf_event_init_task(struct task_struct *child) | 5312 | int perf_event_init_task(struct task_struct *child) |
5101 | { | 5313 | { |
5102 | struct perf_event_context *child_ctx = NULL, *parent_ctx; | 5314 | struct perf_event_context *child_ctx, *parent_ctx; |
5103 | struct perf_event_context *cloned_ctx; | 5315 | struct perf_event_context *cloned_ctx; |
5104 | struct perf_event *event; | 5316 | struct perf_event *event; |
5105 | struct task_struct *parent = current; | 5317 | struct task_struct *parent = current; |
@@ -5137,41 +5349,22 @@ int perf_event_init_task(struct task_struct *child) | |||
5137 | * We dont have to disable NMIs - we are only looking at | 5349 | * We dont have to disable NMIs - we are only looking at |
5138 | * the list, not manipulating it: | 5350 | * the list, not manipulating it: |
5139 | */ | 5351 | */ |
5140 | list_for_each_entry(event, &parent_ctx->group_list, group_entry) { | 5352 | list_for_each_entry(event, &parent_ctx->pinned_groups, group_entry) { |
5141 | 5353 | ret = inherit_task_group(event, parent, parent_ctx, child, | |
5142 | if (!event->attr.inherit) { | 5354 | &inherited_all); |
5143 | inherited_all = 0; | 5355 | if (ret) |
5144 | continue; | 5356 | break; |
5145 | } | 5357 | } |
5146 | |||
5147 | if (!child->perf_event_ctxp) { | ||
5148 | /* | ||
5149 | * This is executed from the parent task context, so | ||
5150 | * inherit events that have been marked for cloning. | ||
5151 | * First allocate and initialize a context for the | ||
5152 | * child. | ||
5153 | */ | ||
5154 | |||
5155 | child_ctx = kzalloc(sizeof(struct perf_event_context), | ||
5156 | GFP_KERNEL); | ||
5157 | if (!child_ctx) { | ||
5158 | ret = -ENOMEM; | ||
5159 | break; | ||
5160 | } | ||
5161 | |||
5162 | __perf_event_init_context(child_ctx, child); | ||
5163 | child->perf_event_ctxp = child_ctx; | ||
5164 | get_task_struct(child); | ||
5165 | } | ||
5166 | 5358 | ||
5167 | ret = inherit_group(event, parent, parent_ctx, | 5359 | list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { |
5168 | child, child_ctx); | 5360 | ret = inherit_task_group(event, parent, parent_ctx, child, |
5169 | if (ret) { | 5361 | &inherited_all); |
5170 | inherited_all = 0; | 5362 | if (ret) |
5171 | break; | 5363 | break; |
5172 | } | ||
5173 | } | 5364 | } |
5174 | 5365 | ||
5366 | child_ctx = child->perf_event_ctxp; | ||
5367 | |||
5175 | if (child_ctx && inherited_all) { | 5368 | if (child_ctx && inherited_all) { |
5176 | /* | 5369 | /* |
5177 | * Mark the child context as a clone of the parent | 5370 | * Mark the child context as a clone of the parent |
@@ -5220,7 +5413,9 @@ static void __perf_event_exit_cpu(void *info) | |||
5220 | struct perf_event_context *ctx = &cpuctx->ctx; | 5413 | struct perf_event_context *ctx = &cpuctx->ctx; |
5221 | struct perf_event *event, *tmp; | 5414 | struct perf_event *event, *tmp; |
5222 | 5415 | ||
5223 | list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) | 5416 | list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) |
5417 | __perf_event_remove_from_context(event); | ||
5418 | list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry) | ||
5224 | __perf_event_remove_from_context(event); | 5419 | __perf_event_remove_from_context(event); |
5225 | } | 5420 | } |
5226 | static void perf_event_exit_cpu(int cpu) | 5421 | static void perf_event_exit_cpu(int cpu) |
@@ -5258,6 +5453,10 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) | |||
5258 | perf_event_exit_cpu(cpu); | 5453 | perf_event_exit_cpu(cpu); |
5259 | break; | 5454 | break; |
5260 | 5455 | ||
5456 | case CPU_DEAD: | ||
5457 | hw_perf_event_setup_offline(cpu); | ||
5458 | break; | ||
5459 | |||
5261 | default: | 5460 | default: |
5262 | break; | 5461 | break; |
5263 | } | 5462 | } |