diff options
Diffstat (limited to 'kernel/perf_event.c')
| -rw-r--r-- | kernel/perf_event.c | 640 |
1 files changed, 419 insertions, 221 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index d27746bd3a06..a661e7991865 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
| @@ -98,11 +98,12 @@ void __weak hw_perf_enable(void) { barrier(); } | |||
| 98 | 98 | ||
| 99 | void __weak hw_perf_event_setup(int cpu) { barrier(); } | 99 | void __weak hw_perf_event_setup(int cpu) { barrier(); } |
| 100 | void __weak hw_perf_event_setup_online(int cpu) { barrier(); } | 100 | void __weak hw_perf_event_setup_online(int cpu) { barrier(); } |
| 101 | void __weak hw_perf_event_setup_offline(int cpu) { barrier(); } | ||
| 101 | 102 | ||
| 102 | int __weak | 103 | int __weak |
| 103 | hw_perf_group_sched_in(struct perf_event *group_leader, | 104 | hw_perf_group_sched_in(struct perf_event *group_leader, |
| 104 | struct perf_cpu_context *cpuctx, | 105 | struct perf_cpu_context *cpuctx, |
| 105 | struct perf_event_context *ctx, int cpu) | 106 | struct perf_event_context *ctx) |
| 106 | { | 107 | { |
| 107 | return 0; | 108 | return 0; |
| 108 | } | 109 | } |
| @@ -248,7 +249,7 @@ static void perf_unpin_context(struct perf_event_context *ctx) | |||
| 248 | 249 | ||
| 249 | static inline u64 perf_clock(void) | 250 | static inline u64 perf_clock(void) |
| 250 | { | 251 | { |
| 251 | return cpu_clock(smp_processor_id()); | 252 | return cpu_clock(raw_smp_processor_id()); |
| 252 | } | 253 | } |
| 253 | 254 | ||
| 254 | /* | 255 | /* |
| @@ -289,6 +290,15 @@ static void update_event_times(struct perf_event *event) | |||
| 289 | event->total_time_running = run_end - event->tstamp_running; | 290 | event->total_time_running = run_end - event->tstamp_running; |
| 290 | } | 291 | } |
| 291 | 292 | ||
| 293 | static struct list_head * | ||
| 294 | ctx_group_list(struct perf_event *event, struct perf_event_context *ctx) | ||
| 295 | { | ||
| 296 | if (event->attr.pinned) | ||
| 297 | return &ctx->pinned_groups; | ||
| 298 | else | ||
| 299 | return &ctx->flexible_groups; | ||
| 300 | } | ||
| 301 | |||
| 292 | /* | 302 | /* |
| 293 | * Add a event from the lists for its context. | 303 | * Add a event from the lists for its context. |
| 294 | * Must be called with ctx->mutex and ctx->lock held. | 304 | * Must be called with ctx->mutex and ctx->lock held. |
| @@ -303,9 +313,19 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) | |||
| 303 | * add it straight to the context's event list, or to the group | 313 | * add it straight to the context's event list, or to the group |
| 304 | * leader's sibling list: | 314 | * leader's sibling list: |
| 305 | */ | 315 | */ |
| 306 | if (group_leader == event) | 316 | if (group_leader == event) { |
| 307 | list_add_tail(&event->group_entry, &ctx->group_list); | 317 | struct list_head *list; |
| 308 | else { | 318 | |
| 319 | if (is_software_event(event)) | ||
| 320 | event->group_flags |= PERF_GROUP_SOFTWARE; | ||
| 321 | |||
| 322 | list = ctx_group_list(event, ctx); | ||
| 323 | list_add_tail(&event->group_entry, list); | ||
| 324 | } else { | ||
| 325 | if (group_leader->group_flags & PERF_GROUP_SOFTWARE && | ||
| 326 | !is_software_event(event)) | ||
| 327 | group_leader->group_flags &= ~PERF_GROUP_SOFTWARE; | ||
| 328 | |||
| 309 | list_add_tail(&event->group_entry, &group_leader->sibling_list); | 329 | list_add_tail(&event->group_entry, &group_leader->sibling_list); |
| 310 | group_leader->nr_siblings++; | 330 | group_leader->nr_siblings++; |
| 311 | } | 331 | } |
| @@ -355,9 +375,14 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) | |||
| 355 | * to the context list directly: | 375 | * to the context list directly: |
| 356 | */ | 376 | */ |
| 357 | list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) { | 377 | list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) { |
| 378 | struct list_head *list; | ||
| 358 | 379 | ||
| 359 | list_move_tail(&sibling->group_entry, &ctx->group_list); | 380 | list = ctx_group_list(event, ctx); |
| 381 | list_move_tail(&sibling->group_entry, list); | ||
| 360 | sibling->group_leader = sibling; | 382 | sibling->group_leader = sibling; |
| 383 | |||
| 384 | /* Inherit group flags from the previous leader */ | ||
| 385 | sibling->group_flags = event->group_flags; | ||
| 361 | } | 386 | } |
| 362 | } | 387 | } |
| 363 | 388 | ||
| @@ -608,14 +633,13 @@ void perf_event_disable(struct perf_event *event) | |||
| 608 | static int | 633 | static int |
| 609 | event_sched_in(struct perf_event *event, | 634 | event_sched_in(struct perf_event *event, |
| 610 | struct perf_cpu_context *cpuctx, | 635 | struct perf_cpu_context *cpuctx, |
| 611 | struct perf_event_context *ctx, | 636 | struct perf_event_context *ctx) |
| 612 | int cpu) | ||
| 613 | { | 637 | { |
| 614 | if (event->state <= PERF_EVENT_STATE_OFF) | 638 | if (event->state <= PERF_EVENT_STATE_OFF) |
| 615 | return 0; | 639 | return 0; |
| 616 | 640 | ||
| 617 | event->state = PERF_EVENT_STATE_ACTIVE; | 641 | event->state = PERF_EVENT_STATE_ACTIVE; |
| 618 | event->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */ | 642 | event->oncpu = smp_processor_id(); |
| 619 | /* | 643 | /* |
| 620 | * The new state must be visible before we turn it on in the hardware: | 644 | * The new state must be visible before we turn it on in the hardware: |
| 621 | */ | 645 | */ |
| @@ -642,8 +666,7 @@ event_sched_in(struct perf_event *event, | |||
| 642 | static int | 666 | static int |
| 643 | group_sched_in(struct perf_event *group_event, | 667 | group_sched_in(struct perf_event *group_event, |
| 644 | struct perf_cpu_context *cpuctx, | 668 | struct perf_cpu_context *cpuctx, |
| 645 | struct perf_event_context *ctx, | 669 | struct perf_event_context *ctx) |
| 646 | int cpu) | ||
| 647 | { | 670 | { |
| 648 | struct perf_event *event, *partial_group; | 671 | struct perf_event *event, *partial_group; |
| 649 | int ret; | 672 | int ret; |
| @@ -651,18 +674,18 @@ group_sched_in(struct perf_event *group_event, | |||
| 651 | if (group_event->state == PERF_EVENT_STATE_OFF) | 674 | if (group_event->state == PERF_EVENT_STATE_OFF) |
| 652 | return 0; | 675 | return 0; |
| 653 | 676 | ||
| 654 | ret = hw_perf_group_sched_in(group_event, cpuctx, ctx, cpu); | 677 | ret = hw_perf_group_sched_in(group_event, cpuctx, ctx); |
| 655 | if (ret) | 678 | if (ret) |
| 656 | return ret < 0 ? ret : 0; | 679 | return ret < 0 ? ret : 0; |
| 657 | 680 | ||
| 658 | if (event_sched_in(group_event, cpuctx, ctx, cpu)) | 681 | if (event_sched_in(group_event, cpuctx, ctx)) |
| 659 | return -EAGAIN; | 682 | return -EAGAIN; |
| 660 | 683 | ||
| 661 | /* | 684 | /* |
| 662 | * Schedule in siblings as one group (if any): | 685 | * Schedule in siblings as one group (if any): |
| 663 | */ | 686 | */ |
| 664 | list_for_each_entry(event, &group_event->sibling_list, group_entry) { | 687 | list_for_each_entry(event, &group_event->sibling_list, group_entry) { |
| 665 | if (event_sched_in(event, cpuctx, ctx, cpu)) { | 688 | if (event_sched_in(event, cpuctx, ctx)) { |
| 666 | partial_group = event; | 689 | partial_group = event; |
| 667 | goto group_error; | 690 | goto group_error; |
| 668 | } | 691 | } |
| @@ -686,24 +709,6 @@ group_error: | |||
| 686 | } | 709 | } |
| 687 | 710 | ||
| 688 | /* | 711 | /* |
| 689 | * Return 1 for a group consisting entirely of software events, | ||
| 690 | * 0 if the group contains any hardware events. | ||
| 691 | */ | ||
| 692 | static int is_software_only_group(struct perf_event *leader) | ||
| 693 | { | ||
| 694 | struct perf_event *event; | ||
| 695 | |||
| 696 | if (!is_software_event(leader)) | ||
| 697 | return 0; | ||
| 698 | |||
| 699 | list_for_each_entry(event, &leader->sibling_list, group_entry) | ||
| 700 | if (!is_software_event(event)) | ||
| 701 | return 0; | ||
| 702 | |||
| 703 | return 1; | ||
| 704 | } | ||
| 705 | |||
| 706 | /* | ||
| 707 | * Work out whether we can put this event group on the CPU now. | 712 | * Work out whether we can put this event group on the CPU now. |
| 708 | */ | 713 | */ |
| 709 | static int group_can_go_on(struct perf_event *event, | 714 | static int group_can_go_on(struct perf_event *event, |
| @@ -713,7 +718,7 @@ static int group_can_go_on(struct perf_event *event, | |||
| 713 | /* | 718 | /* |
| 714 | * Groups consisting entirely of software events can always go on. | 719 | * Groups consisting entirely of software events can always go on. |
| 715 | */ | 720 | */ |
| 716 | if (is_software_only_group(event)) | 721 | if (event->group_flags & PERF_GROUP_SOFTWARE) |
| 717 | return 1; | 722 | return 1; |
| 718 | /* | 723 | /* |
| 719 | * If an exclusive group is already on, no other hardware | 724 | * If an exclusive group is already on, no other hardware |
| @@ -754,7 +759,6 @@ static void __perf_install_in_context(void *info) | |||
| 754 | struct perf_event *event = info; | 759 | struct perf_event *event = info; |
| 755 | struct perf_event_context *ctx = event->ctx; | 760 | struct perf_event_context *ctx = event->ctx; |
| 756 | struct perf_event *leader = event->group_leader; | 761 | struct perf_event *leader = event->group_leader; |
| 757 | int cpu = smp_processor_id(); | ||
| 758 | int err; | 762 | int err; |
| 759 | 763 | ||
| 760 | /* | 764 | /* |
| @@ -801,7 +805,7 @@ static void __perf_install_in_context(void *info) | |||
| 801 | if (!group_can_go_on(event, cpuctx, 1)) | 805 | if (!group_can_go_on(event, cpuctx, 1)) |
| 802 | err = -EEXIST; | 806 | err = -EEXIST; |
| 803 | else | 807 | else |
| 804 | err = event_sched_in(event, cpuctx, ctx, cpu); | 808 | err = event_sched_in(event, cpuctx, ctx); |
| 805 | 809 | ||
| 806 | if (err) { | 810 | if (err) { |
| 807 | /* | 811 | /* |
| @@ -943,11 +947,9 @@ static void __perf_event_enable(void *info) | |||
| 943 | } else { | 947 | } else { |
| 944 | perf_disable(); | 948 | perf_disable(); |
| 945 | if (event == leader) | 949 | if (event == leader) |
| 946 | err = group_sched_in(event, cpuctx, ctx, | 950 | err = group_sched_in(event, cpuctx, ctx); |
| 947 | smp_processor_id()); | ||
| 948 | else | 951 | else |
| 949 | err = event_sched_in(event, cpuctx, ctx, | 952 | err = event_sched_in(event, cpuctx, ctx); |
| 950 | smp_processor_id()); | ||
| 951 | perf_enable(); | 953 | perf_enable(); |
| 952 | } | 954 | } |
| 953 | 955 | ||
| @@ -1043,8 +1045,15 @@ static int perf_event_refresh(struct perf_event *event, int refresh) | |||
| 1043 | return 0; | 1045 | return 0; |
| 1044 | } | 1046 | } |
| 1045 | 1047 | ||
| 1046 | void __perf_event_sched_out(struct perf_event_context *ctx, | 1048 | enum event_type_t { |
| 1047 | struct perf_cpu_context *cpuctx) | 1049 | EVENT_FLEXIBLE = 0x1, |
| 1050 | EVENT_PINNED = 0x2, | ||
| 1051 | EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED, | ||
| 1052 | }; | ||
| 1053 | |||
| 1054 | static void ctx_sched_out(struct perf_event_context *ctx, | ||
| 1055 | struct perf_cpu_context *cpuctx, | ||
| 1056 | enum event_type_t event_type) | ||
| 1048 | { | 1057 | { |
| 1049 | struct perf_event *event; | 1058 | struct perf_event *event; |
| 1050 | 1059 | ||
| @@ -1055,10 +1064,18 @@ void __perf_event_sched_out(struct perf_event_context *ctx, | |||
| 1055 | update_context_time(ctx); | 1064 | update_context_time(ctx); |
| 1056 | 1065 | ||
| 1057 | perf_disable(); | 1066 | perf_disable(); |
| 1058 | if (ctx->nr_active) { | 1067 | if (!ctx->nr_active) |
| 1059 | list_for_each_entry(event, &ctx->group_list, group_entry) | 1068 | goto out_enable; |
| 1069 | |||
| 1070 | if (event_type & EVENT_PINNED) | ||
| 1071 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) | ||
| 1060 | group_sched_out(event, cpuctx, ctx); | 1072 | group_sched_out(event, cpuctx, ctx); |
| 1061 | } | 1073 | |
| 1074 | if (event_type & EVENT_FLEXIBLE) | ||
| 1075 | list_for_each_entry(event, &ctx->flexible_groups, group_entry) | ||
| 1076 | group_sched_out(event, cpuctx, ctx); | ||
| 1077 | |||
| 1078 | out_enable: | ||
| 1062 | perf_enable(); | 1079 | perf_enable(); |
| 1063 | out: | 1080 | out: |
| 1064 | raw_spin_unlock(&ctx->lock); | 1081 | raw_spin_unlock(&ctx->lock); |
| @@ -1170,9 +1187,9 @@ static void perf_event_sync_stat(struct perf_event_context *ctx, | |||
| 1170 | * not restart the event. | 1187 | * not restart the event. |
| 1171 | */ | 1188 | */ |
| 1172 | void perf_event_task_sched_out(struct task_struct *task, | 1189 | void perf_event_task_sched_out(struct task_struct *task, |
| 1173 | struct task_struct *next, int cpu) | 1190 | struct task_struct *next) |
| 1174 | { | 1191 | { |
| 1175 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | 1192 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); |
| 1176 | struct perf_event_context *ctx = task->perf_event_ctxp; | 1193 | struct perf_event_context *ctx = task->perf_event_ctxp; |
| 1177 | struct perf_event_context *next_ctx; | 1194 | struct perf_event_context *next_ctx; |
| 1178 | struct perf_event_context *parent; | 1195 | struct perf_event_context *parent; |
| @@ -1220,15 +1237,13 @@ void perf_event_task_sched_out(struct task_struct *task, | |||
| 1220 | rcu_read_unlock(); | 1237 | rcu_read_unlock(); |
| 1221 | 1238 | ||
| 1222 | if (do_switch) { | 1239 | if (do_switch) { |
| 1223 | __perf_event_sched_out(ctx, cpuctx); | 1240 | ctx_sched_out(ctx, cpuctx, EVENT_ALL); |
| 1224 | cpuctx->task_ctx = NULL; | 1241 | cpuctx->task_ctx = NULL; |
| 1225 | } | 1242 | } |
| 1226 | } | 1243 | } |
| 1227 | 1244 | ||
| 1228 | /* | 1245 | static void task_ctx_sched_out(struct perf_event_context *ctx, |
| 1229 | * Called with IRQs disabled | 1246 | enum event_type_t event_type) |
| 1230 | */ | ||
| 1231 | static void __perf_event_task_sched_out(struct perf_event_context *ctx) | ||
| 1232 | { | 1247 | { |
| 1233 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | 1248 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); |
| 1234 | 1249 | ||
| @@ -1238,47 +1253,41 @@ static void __perf_event_task_sched_out(struct perf_event_context *ctx) | |||
| 1238 | if (WARN_ON_ONCE(ctx != cpuctx->task_ctx)) | 1253 | if (WARN_ON_ONCE(ctx != cpuctx->task_ctx)) |
| 1239 | return; | 1254 | return; |
| 1240 | 1255 | ||
| 1241 | __perf_event_sched_out(ctx, cpuctx); | 1256 | ctx_sched_out(ctx, cpuctx, event_type); |
| 1242 | cpuctx->task_ctx = NULL; | 1257 | cpuctx->task_ctx = NULL; |
| 1243 | } | 1258 | } |
| 1244 | 1259 | ||
| 1245 | /* | 1260 | /* |
| 1246 | * Called with IRQs disabled | 1261 | * Called with IRQs disabled |
| 1247 | */ | 1262 | */ |
| 1248 | static void perf_event_cpu_sched_out(struct perf_cpu_context *cpuctx) | 1263 | static void __perf_event_task_sched_out(struct perf_event_context *ctx) |
| 1264 | { | ||
| 1265 | task_ctx_sched_out(ctx, EVENT_ALL); | ||
| 1266 | } | ||
| 1267 | |||
| 1268 | /* | ||
| 1269 | * Called with IRQs disabled | ||
| 1270 | */ | ||
| 1271 | static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx, | ||
| 1272 | enum event_type_t event_type) | ||
| 1249 | { | 1273 | { |
| 1250 | __perf_event_sched_out(&cpuctx->ctx, cpuctx); | 1274 | ctx_sched_out(&cpuctx->ctx, cpuctx, event_type); |
| 1251 | } | 1275 | } |
| 1252 | 1276 | ||
| 1253 | static void | 1277 | static void |
| 1254 | __perf_event_sched_in(struct perf_event_context *ctx, | 1278 | ctx_pinned_sched_in(struct perf_event_context *ctx, |
| 1255 | struct perf_cpu_context *cpuctx, int cpu) | 1279 | struct perf_cpu_context *cpuctx) |
| 1256 | { | 1280 | { |
| 1257 | struct perf_event *event; | 1281 | struct perf_event *event; |
| 1258 | int can_add_hw = 1; | ||
| 1259 | |||
| 1260 | raw_spin_lock(&ctx->lock); | ||
| 1261 | ctx->is_active = 1; | ||
| 1262 | if (likely(!ctx->nr_events)) | ||
| 1263 | goto out; | ||
| 1264 | |||
| 1265 | ctx->timestamp = perf_clock(); | ||
| 1266 | |||
| 1267 | perf_disable(); | ||
| 1268 | 1282 | ||
| 1269 | /* | 1283 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) { |
| 1270 | * First go through the list and put on any pinned groups | 1284 | if (event->state <= PERF_EVENT_STATE_OFF) |
| 1271 | * in order to give them the best chance of going on. | ||
| 1272 | */ | ||
| 1273 | list_for_each_entry(event, &ctx->group_list, group_entry) { | ||
| 1274 | if (event->state <= PERF_EVENT_STATE_OFF || | ||
| 1275 | !event->attr.pinned) | ||
| 1276 | continue; | 1285 | continue; |
| 1277 | if (event->cpu != -1 && event->cpu != cpu) | 1286 | if (event->cpu != -1 && event->cpu != smp_processor_id()) |
| 1278 | continue; | 1287 | continue; |
| 1279 | 1288 | ||
| 1280 | if (group_can_go_on(event, cpuctx, 1)) | 1289 | if (group_can_go_on(event, cpuctx, 1)) |
| 1281 | group_sched_in(event, cpuctx, ctx, cpu); | 1290 | group_sched_in(event, cpuctx, ctx); |
| 1282 | 1291 | ||
| 1283 | /* | 1292 | /* |
| 1284 | * If this pinned group hasn't been scheduled, | 1293 | * If this pinned group hasn't been scheduled, |
| @@ -1289,32 +1298,83 @@ __perf_event_sched_in(struct perf_event_context *ctx, | |||
| 1289 | event->state = PERF_EVENT_STATE_ERROR; | 1298 | event->state = PERF_EVENT_STATE_ERROR; |
| 1290 | } | 1299 | } |
| 1291 | } | 1300 | } |
| 1301 | } | ||
| 1292 | 1302 | ||
| 1293 | list_for_each_entry(event, &ctx->group_list, group_entry) { | 1303 | static void |
| 1294 | /* | 1304 | ctx_flexible_sched_in(struct perf_event_context *ctx, |
| 1295 | * Ignore events in OFF or ERROR state, and | 1305 | struct perf_cpu_context *cpuctx) |
| 1296 | * ignore pinned events since we did them already. | 1306 | { |
| 1297 | */ | 1307 | struct perf_event *event; |
| 1298 | if (event->state <= PERF_EVENT_STATE_OFF || | 1308 | int can_add_hw = 1; |
| 1299 | event->attr.pinned) | ||
| 1300 | continue; | ||
| 1301 | 1309 | ||
| 1310 | list_for_each_entry(event, &ctx->flexible_groups, group_entry) { | ||
| 1311 | /* Ignore events in OFF or ERROR state */ | ||
| 1312 | if (event->state <= PERF_EVENT_STATE_OFF) | ||
| 1313 | continue; | ||
| 1302 | /* | 1314 | /* |
| 1303 | * Listen to the 'cpu' scheduling filter constraint | 1315 | * Listen to the 'cpu' scheduling filter constraint |
| 1304 | * of events: | 1316 | * of events: |
| 1305 | */ | 1317 | */ |
| 1306 | if (event->cpu != -1 && event->cpu != cpu) | 1318 | if (event->cpu != -1 && event->cpu != smp_processor_id()) |
| 1307 | continue; | 1319 | continue; |
| 1308 | 1320 | ||
| 1309 | if (group_can_go_on(event, cpuctx, can_add_hw)) | 1321 | if (group_can_go_on(event, cpuctx, can_add_hw)) |
| 1310 | if (group_sched_in(event, cpuctx, ctx, cpu)) | 1322 | if (group_sched_in(event, cpuctx, ctx)) |
| 1311 | can_add_hw = 0; | 1323 | can_add_hw = 0; |
| 1312 | } | 1324 | } |
| 1325 | } | ||
| 1326 | |||
| 1327 | static void | ||
| 1328 | ctx_sched_in(struct perf_event_context *ctx, | ||
| 1329 | struct perf_cpu_context *cpuctx, | ||
| 1330 | enum event_type_t event_type) | ||
| 1331 | { | ||
| 1332 | raw_spin_lock(&ctx->lock); | ||
| 1333 | ctx->is_active = 1; | ||
| 1334 | if (likely(!ctx->nr_events)) | ||
| 1335 | goto out; | ||
| 1336 | |||
| 1337 | ctx->timestamp = perf_clock(); | ||
| 1338 | |||
| 1339 | perf_disable(); | ||
| 1340 | |||
| 1341 | /* | ||
| 1342 | * First go through the list and put on any pinned groups | ||
| 1343 | * in order to give them the best chance of going on. | ||
| 1344 | */ | ||
| 1345 | if (event_type & EVENT_PINNED) | ||
| 1346 | ctx_pinned_sched_in(ctx, cpuctx); | ||
| 1347 | |||
| 1348 | /* Then walk through the lower prio flexible groups */ | ||
| 1349 | if (event_type & EVENT_FLEXIBLE) | ||
| 1350 | ctx_flexible_sched_in(ctx, cpuctx); | ||
| 1351 | |||
| 1313 | perf_enable(); | 1352 | perf_enable(); |
| 1314 | out: | 1353 | out: |
| 1315 | raw_spin_unlock(&ctx->lock); | 1354 | raw_spin_unlock(&ctx->lock); |
| 1316 | } | 1355 | } |
| 1317 | 1356 | ||
| 1357 | static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, | ||
| 1358 | enum event_type_t event_type) | ||
| 1359 | { | ||
| 1360 | struct perf_event_context *ctx = &cpuctx->ctx; | ||
| 1361 | |||
| 1362 | ctx_sched_in(ctx, cpuctx, event_type); | ||
| 1363 | } | ||
| 1364 | |||
| 1365 | static void task_ctx_sched_in(struct task_struct *task, | ||
| 1366 | enum event_type_t event_type) | ||
| 1367 | { | ||
| 1368 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
| 1369 | struct perf_event_context *ctx = task->perf_event_ctxp; | ||
| 1370 | |||
| 1371 | if (likely(!ctx)) | ||
| 1372 | return; | ||
| 1373 | if (cpuctx->task_ctx == ctx) | ||
| 1374 | return; | ||
| 1375 | ctx_sched_in(ctx, cpuctx, event_type); | ||
| 1376 | cpuctx->task_ctx = ctx; | ||
| 1377 | } | ||
| 1318 | /* | 1378 | /* |
| 1319 | * Called from scheduler to add the events of the current task | 1379 | * Called from scheduler to add the events of the current task |
| 1320 | * with interrupts disabled. | 1380 | * with interrupts disabled. |
| @@ -1326,38 +1386,128 @@ __perf_event_sched_in(struct perf_event_context *ctx, | |||
| 1326 | * accessing the event control register. If a NMI hits, then it will | 1386 | * accessing the event control register. If a NMI hits, then it will |
| 1327 | * keep the event running. | 1387 | * keep the event running. |
| 1328 | */ | 1388 | */ |
| 1329 | void perf_event_task_sched_in(struct task_struct *task, int cpu) | 1389 | void perf_event_task_sched_in(struct task_struct *task) |
| 1330 | { | 1390 | { |
| 1331 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | 1391 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); |
| 1332 | struct perf_event_context *ctx = task->perf_event_ctxp; | 1392 | struct perf_event_context *ctx = task->perf_event_ctxp; |
| 1333 | 1393 | ||
| 1334 | if (likely(!ctx)) | 1394 | if (likely(!ctx)) |
| 1335 | return; | 1395 | return; |
| 1396 | |||
| 1336 | if (cpuctx->task_ctx == ctx) | 1397 | if (cpuctx->task_ctx == ctx) |
| 1337 | return; | 1398 | return; |
| 1338 | __perf_event_sched_in(ctx, cpuctx, cpu); | 1399 | |
| 1400 | /* | ||
| 1401 | * We want to keep the following priority order: | ||
| 1402 | * cpu pinned (that don't need to move), task pinned, | ||
| 1403 | * cpu flexible, task flexible. | ||
| 1404 | */ | ||
| 1405 | cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); | ||
| 1406 | |||
| 1407 | ctx_sched_in(ctx, cpuctx, EVENT_PINNED); | ||
| 1408 | cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); | ||
| 1409 | ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE); | ||
| 1410 | |||
| 1339 | cpuctx->task_ctx = ctx; | 1411 | cpuctx->task_ctx = ctx; |
| 1340 | } | 1412 | } |
| 1341 | 1413 | ||
| 1342 | static void perf_event_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu) | 1414 | #define MAX_INTERRUPTS (~0ULL) |
| 1415 | |||
| 1416 | static void perf_log_throttle(struct perf_event *event, int enable); | ||
| 1417 | |||
| 1418 | static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) | ||
| 1343 | { | 1419 | { |
| 1344 | struct perf_event_context *ctx = &cpuctx->ctx; | 1420 | u64 frequency = event->attr.sample_freq; |
| 1421 | u64 sec = NSEC_PER_SEC; | ||
| 1422 | u64 divisor, dividend; | ||
| 1423 | |||
| 1424 | int count_fls, nsec_fls, frequency_fls, sec_fls; | ||
| 1425 | |||
| 1426 | count_fls = fls64(count); | ||
| 1427 | nsec_fls = fls64(nsec); | ||
| 1428 | frequency_fls = fls64(frequency); | ||
| 1429 | sec_fls = 30; | ||
| 1430 | |||
| 1431 | /* | ||
| 1432 | * We got @count in @nsec, with a target of sample_freq HZ | ||
| 1433 | * the target period becomes: | ||
| 1434 | * | ||
| 1435 | * @count * 10^9 | ||
| 1436 | * period = ------------------- | ||
| 1437 | * @nsec * sample_freq | ||
| 1438 | * | ||
| 1439 | */ | ||
| 1440 | |||
| 1441 | /* | ||
| 1442 | * Reduce accuracy by one bit such that @a and @b converge | ||
| 1443 | * to a similar magnitude. | ||
| 1444 | */ | ||
| 1445 | #define REDUCE_FLS(a, b) \ | ||
| 1446 | do { \ | ||
| 1447 | if (a##_fls > b##_fls) { \ | ||
| 1448 | a >>= 1; \ | ||
| 1449 | a##_fls--; \ | ||
| 1450 | } else { \ | ||
| 1451 | b >>= 1; \ | ||
| 1452 | b##_fls--; \ | ||
| 1453 | } \ | ||
| 1454 | } while (0) | ||
| 1455 | |||
| 1456 | /* | ||
| 1457 | * Reduce accuracy until either term fits in a u64, then proceed with | ||
| 1458 | * the other, so that finally we can do a u64/u64 division. | ||
| 1459 | */ | ||
| 1460 | while (count_fls + sec_fls > 64 && nsec_fls + frequency_fls > 64) { | ||
| 1461 | REDUCE_FLS(nsec, frequency); | ||
| 1462 | REDUCE_FLS(sec, count); | ||
| 1463 | } | ||
| 1464 | |||
| 1465 | if (count_fls + sec_fls > 64) { | ||
| 1466 | divisor = nsec * frequency; | ||
| 1345 | 1467 | ||
| 1346 | __perf_event_sched_in(ctx, cpuctx, cpu); | 1468 | while (count_fls + sec_fls > 64) { |
| 1469 | REDUCE_FLS(count, sec); | ||
| 1470 | divisor >>= 1; | ||
| 1471 | } | ||
| 1472 | |||
| 1473 | dividend = count * sec; | ||
| 1474 | } else { | ||
| 1475 | dividend = count * sec; | ||
| 1476 | |||
| 1477 | while (nsec_fls + frequency_fls > 64) { | ||
| 1478 | REDUCE_FLS(nsec, frequency); | ||
| 1479 | dividend >>= 1; | ||
| 1480 | } | ||
| 1481 | |||
| 1482 | divisor = nsec * frequency; | ||
| 1483 | } | ||
| 1484 | |||
| 1485 | return div64_u64(dividend, divisor); | ||
| 1347 | } | 1486 | } |
| 1348 | 1487 | ||
| 1349 | #define MAX_INTERRUPTS (~0ULL) | 1488 | static void perf_event_stop(struct perf_event *event) |
| 1489 | { | ||
| 1490 | if (!event->pmu->stop) | ||
| 1491 | return event->pmu->disable(event); | ||
| 1350 | 1492 | ||
| 1351 | static void perf_log_throttle(struct perf_event *event, int enable); | 1493 | return event->pmu->stop(event); |
| 1494 | } | ||
| 1495 | |||
| 1496 | static int perf_event_start(struct perf_event *event) | ||
| 1497 | { | ||
| 1498 | if (!event->pmu->start) | ||
| 1499 | return event->pmu->enable(event); | ||
| 1500 | |||
| 1501 | return event->pmu->start(event); | ||
| 1502 | } | ||
| 1352 | 1503 | ||
| 1353 | static void perf_adjust_period(struct perf_event *event, u64 events) | 1504 | static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) |
| 1354 | { | 1505 | { |
| 1355 | struct hw_perf_event *hwc = &event->hw; | 1506 | struct hw_perf_event *hwc = &event->hw; |
| 1356 | u64 period, sample_period; | 1507 | u64 period, sample_period; |
| 1357 | s64 delta; | 1508 | s64 delta; |
| 1358 | 1509 | ||
| 1359 | events *= hwc->sample_period; | 1510 | period = perf_calculate_period(event, nsec, count); |
| 1360 | period = div64_u64(events, event->attr.sample_freq); | ||
| 1361 | 1511 | ||
| 1362 | delta = (s64)(period - hwc->sample_period); | 1512 | delta = (s64)(period - hwc->sample_period); |
| 1363 | delta = (delta + 7) / 8; /* low pass filter */ | 1513 | delta = (delta + 7) / 8; /* low pass filter */ |
| @@ -1368,13 +1518,22 @@ static void perf_adjust_period(struct perf_event *event, u64 events) | |||
| 1368 | sample_period = 1; | 1518 | sample_period = 1; |
| 1369 | 1519 | ||
| 1370 | hwc->sample_period = sample_period; | 1520 | hwc->sample_period = sample_period; |
| 1521 | |||
| 1522 | if (atomic64_read(&hwc->period_left) > 8*sample_period) { | ||
| 1523 | perf_disable(); | ||
| 1524 | perf_event_stop(event); | ||
| 1525 | atomic64_set(&hwc->period_left, 0); | ||
| 1526 | perf_event_start(event); | ||
| 1527 | perf_enable(); | ||
| 1528 | } | ||
| 1371 | } | 1529 | } |
| 1372 | 1530 | ||
| 1373 | static void perf_ctx_adjust_freq(struct perf_event_context *ctx) | 1531 | static void perf_ctx_adjust_freq(struct perf_event_context *ctx) |
| 1374 | { | 1532 | { |
| 1375 | struct perf_event *event; | 1533 | struct perf_event *event; |
| 1376 | struct hw_perf_event *hwc; | 1534 | struct hw_perf_event *hwc; |
| 1377 | u64 interrupts, freq; | 1535 | u64 interrupts, now; |
| 1536 | s64 delta; | ||
| 1378 | 1537 | ||
| 1379 | raw_spin_lock(&ctx->lock); | 1538 | raw_spin_lock(&ctx->lock); |
| 1380 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { | 1539 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { |
| @@ -1395,44 +1554,18 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) | |||
| 1395 | if (interrupts == MAX_INTERRUPTS) { | 1554 | if (interrupts == MAX_INTERRUPTS) { |
| 1396 | perf_log_throttle(event, 1); | 1555 | perf_log_throttle(event, 1); |
| 1397 | event->pmu->unthrottle(event); | 1556 | event->pmu->unthrottle(event); |
| 1398 | interrupts = 2*sysctl_perf_event_sample_rate/HZ; | ||
| 1399 | } | 1557 | } |
| 1400 | 1558 | ||
| 1401 | if (!event->attr.freq || !event->attr.sample_freq) | 1559 | if (!event->attr.freq || !event->attr.sample_freq) |
| 1402 | continue; | 1560 | continue; |
| 1403 | 1561 | ||
| 1404 | /* | 1562 | event->pmu->read(event); |
| 1405 | * if the specified freq < HZ then we need to skip ticks | 1563 | now = atomic64_read(&event->count); |
| 1406 | */ | 1564 | delta = now - hwc->freq_count_stamp; |
| 1407 | if (event->attr.sample_freq < HZ) { | 1565 | hwc->freq_count_stamp = now; |
| 1408 | freq = event->attr.sample_freq; | ||
| 1409 | |||
| 1410 | hwc->freq_count += freq; | ||
| 1411 | hwc->freq_interrupts += interrupts; | ||
| 1412 | |||
| 1413 | if (hwc->freq_count < HZ) | ||
| 1414 | continue; | ||
| 1415 | |||
| 1416 | interrupts = hwc->freq_interrupts; | ||
| 1417 | hwc->freq_interrupts = 0; | ||
| 1418 | hwc->freq_count -= HZ; | ||
| 1419 | } else | ||
| 1420 | freq = HZ; | ||
| 1421 | |||
| 1422 | perf_adjust_period(event, freq * interrupts); | ||
| 1423 | 1566 | ||
| 1424 | /* | 1567 | if (delta > 0) |
| 1425 | * In order to avoid being stalled by an (accidental) huge | 1568 | perf_adjust_period(event, TICK_NSEC, delta); |
| 1426 | * sample period, force reset the sample period if we didn't | ||
| 1427 | * get any events in this freq period. | ||
| 1428 | */ | ||
| 1429 | if (!interrupts) { | ||
| 1430 | perf_disable(); | ||
| 1431 | event->pmu->disable(event); | ||
| 1432 | atomic64_set(&hwc->period_left, 0); | ||
| 1433 | event->pmu->enable(event); | ||
| 1434 | perf_enable(); | ||
| 1435 | } | ||
| 1436 | } | 1569 | } |
| 1437 | raw_spin_unlock(&ctx->lock); | 1570 | raw_spin_unlock(&ctx->lock); |
| 1438 | } | 1571 | } |
| @@ -1442,26 +1575,18 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) | |||
| 1442 | */ | 1575 | */ |
| 1443 | static void rotate_ctx(struct perf_event_context *ctx) | 1576 | static void rotate_ctx(struct perf_event_context *ctx) |
| 1444 | { | 1577 | { |
| 1445 | struct perf_event *event; | ||
| 1446 | |||
| 1447 | if (!ctx->nr_events) | 1578 | if (!ctx->nr_events) |
| 1448 | return; | 1579 | return; |
| 1449 | 1580 | ||
| 1450 | raw_spin_lock(&ctx->lock); | 1581 | raw_spin_lock(&ctx->lock); |
| 1451 | /* | 1582 | |
| 1452 | * Rotate the first entry last (works just fine for group events too): | 1583 | /* Rotate the first entry last of non-pinned groups */ |
| 1453 | */ | 1584 | list_rotate_left(&ctx->flexible_groups); |
| 1454 | perf_disable(); | ||
| 1455 | list_for_each_entry(event, &ctx->group_list, group_entry) { | ||
| 1456 | list_move_tail(&event->group_entry, &ctx->group_list); | ||
| 1457 | break; | ||
| 1458 | } | ||
| 1459 | perf_enable(); | ||
| 1460 | 1585 | ||
| 1461 | raw_spin_unlock(&ctx->lock); | 1586 | raw_spin_unlock(&ctx->lock); |
| 1462 | } | 1587 | } |
| 1463 | 1588 | ||
| 1464 | void perf_event_task_tick(struct task_struct *curr, int cpu) | 1589 | void perf_event_task_tick(struct task_struct *curr) |
| 1465 | { | 1590 | { |
| 1466 | struct perf_cpu_context *cpuctx; | 1591 | struct perf_cpu_context *cpuctx; |
| 1467 | struct perf_event_context *ctx; | 1592 | struct perf_event_context *ctx; |
| @@ -1469,24 +1594,43 @@ void perf_event_task_tick(struct task_struct *curr, int cpu) | |||
| 1469 | if (!atomic_read(&nr_events)) | 1594 | if (!atomic_read(&nr_events)) |
| 1470 | return; | 1595 | return; |
| 1471 | 1596 | ||
| 1472 | cpuctx = &per_cpu(perf_cpu_context, cpu); | 1597 | cpuctx = &__get_cpu_var(perf_cpu_context); |
| 1473 | ctx = curr->perf_event_ctxp; | 1598 | ctx = curr->perf_event_ctxp; |
| 1474 | 1599 | ||
| 1600 | perf_disable(); | ||
| 1601 | |||
| 1475 | perf_ctx_adjust_freq(&cpuctx->ctx); | 1602 | perf_ctx_adjust_freq(&cpuctx->ctx); |
| 1476 | if (ctx) | 1603 | if (ctx) |
| 1477 | perf_ctx_adjust_freq(ctx); | 1604 | perf_ctx_adjust_freq(ctx); |
| 1478 | 1605 | ||
| 1479 | perf_event_cpu_sched_out(cpuctx); | 1606 | cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); |
| 1480 | if (ctx) | 1607 | if (ctx) |
| 1481 | __perf_event_task_sched_out(ctx); | 1608 | task_ctx_sched_out(ctx, EVENT_FLEXIBLE); |
| 1482 | 1609 | ||
| 1483 | rotate_ctx(&cpuctx->ctx); | 1610 | rotate_ctx(&cpuctx->ctx); |
| 1484 | if (ctx) | 1611 | if (ctx) |
| 1485 | rotate_ctx(ctx); | 1612 | rotate_ctx(ctx); |
| 1486 | 1613 | ||
| 1487 | perf_event_cpu_sched_in(cpuctx, cpu); | 1614 | cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); |
| 1488 | if (ctx) | 1615 | if (ctx) |
| 1489 | perf_event_task_sched_in(curr, cpu); | 1616 | task_ctx_sched_in(curr, EVENT_FLEXIBLE); |
| 1617 | |||
| 1618 | perf_enable(); | ||
| 1619 | } | ||
| 1620 | |||
| 1621 | static int event_enable_on_exec(struct perf_event *event, | ||
| 1622 | struct perf_event_context *ctx) | ||
| 1623 | { | ||
| 1624 | if (!event->attr.enable_on_exec) | ||
| 1625 | return 0; | ||
| 1626 | |||
| 1627 | event->attr.enable_on_exec = 0; | ||
| 1628 | if (event->state >= PERF_EVENT_STATE_INACTIVE) | ||
| 1629 | return 0; | ||
| 1630 | |||
| 1631 | __perf_event_mark_enabled(event, ctx); | ||
| 1632 | |||
| 1633 | return 1; | ||
| 1490 | } | 1634 | } |
| 1491 | 1635 | ||
| 1492 | /* | 1636 | /* |
| @@ -1499,6 +1643,7 @@ static void perf_event_enable_on_exec(struct task_struct *task) | |||
| 1499 | struct perf_event *event; | 1643 | struct perf_event *event; |
| 1500 | unsigned long flags; | 1644 | unsigned long flags; |
| 1501 | int enabled = 0; | 1645 | int enabled = 0; |
| 1646 | int ret; | ||
| 1502 | 1647 | ||
| 1503 | local_irq_save(flags); | 1648 | local_irq_save(flags); |
| 1504 | ctx = task->perf_event_ctxp; | 1649 | ctx = task->perf_event_ctxp; |
| @@ -1509,14 +1654,16 @@ static void perf_event_enable_on_exec(struct task_struct *task) | |||
| 1509 | 1654 | ||
| 1510 | raw_spin_lock(&ctx->lock); | 1655 | raw_spin_lock(&ctx->lock); |
| 1511 | 1656 | ||
| 1512 | list_for_each_entry(event, &ctx->group_list, group_entry) { | 1657 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) { |
| 1513 | if (!event->attr.enable_on_exec) | 1658 | ret = event_enable_on_exec(event, ctx); |
| 1514 | continue; | 1659 | if (ret) |
| 1515 | event->attr.enable_on_exec = 0; | 1660 | enabled = 1; |
| 1516 | if (event->state >= PERF_EVENT_STATE_INACTIVE) | 1661 | } |
| 1517 | continue; | 1662 | |
| 1518 | __perf_event_mark_enabled(event, ctx); | 1663 | list_for_each_entry(event, &ctx->flexible_groups, group_entry) { |
| 1519 | enabled = 1; | 1664 | ret = event_enable_on_exec(event, ctx); |
| 1665 | if (ret) | ||
| 1666 | enabled = 1; | ||
| 1520 | } | 1667 | } |
| 1521 | 1668 | ||
| 1522 | /* | 1669 | /* |
| @@ -1527,7 +1674,7 @@ static void perf_event_enable_on_exec(struct task_struct *task) | |||
| 1527 | 1674 | ||
| 1528 | raw_spin_unlock(&ctx->lock); | 1675 | raw_spin_unlock(&ctx->lock); |
| 1529 | 1676 | ||
| 1530 | perf_event_task_sched_in(task, smp_processor_id()); | 1677 | perf_event_task_sched_in(task); |
| 1531 | out: | 1678 | out: |
| 1532 | local_irq_restore(flags); | 1679 | local_irq_restore(flags); |
| 1533 | } | 1680 | } |
| @@ -1590,7 +1737,8 @@ __perf_event_init_context(struct perf_event_context *ctx, | |||
| 1590 | { | 1737 | { |
| 1591 | raw_spin_lock_init(&ctx->lock); | 1738 | raw_spin_lock_init(&ctx->lock); |
| 1592 | mutex_init(&ctx->mutex); | 1739 | mutex_init(&ctx->mutex); |
| 1593 | INIT_LIST_HEAD(&ctx->group_list); | 1740 | INIT_LIST_HEAD(&ctx->pinned_groups); |
| 1741 | INIT_LIST_HEAD(&ctx->flexible_groups); | ||
| 1594 | INIT_LIST_HEAD(&ctx->event_list); | 1742 | INIT_LIST_HEAD(&ctx->event_list); |
| 1595 | atomic_set(&ctx->refcount, 1); | 1743 | atomic_set(&ctx->refcount, 1); |
| 1596 | ctx->task = task; | 1744 | ctx->task = task; |
| @@ -3259,8 +3407,6 @@ static void perf_event_task_output(struct perf_event *event, | |||
| 3259 | task_event->event_id.tid = perf_event_tid(event, task); | 3407 | task_event->event_id.tid = perf_event_tid(event, task); |
| 3260 | task_event->event_id.ptid = perf_event_tid(event, current); | 3408 | task_event->event_id.ptid = perf_event_tid(event, current); |
| 3261 | 3409 | ||
| 3262 | task_event->event_id.time = perf_clock(); | ||
| 3263 | |||
| 3264 | perf_output_put(&handle, task_event->event_id); | 3410 | perf_output_put(&handle, task_event->event_id); |
| 3265 | 3411 | ||
| 3266 | perf_output_end(&handle); | 3412 | perf_output_end(&handle); |
| @@ -3268,7 +3414,7 @@ static void perf_event_task_output(struct perf_event *event, | |||
| 3268 | 3414 | ||
| 3269 | static int perf_event_task_match(struct perf_event *event) | 3415 | static int perf_event_task_match(struct perf_event *event) |
| 3270 | { | 3416 | { |
| 3271 | if (event->state != PERF_EVENT_STATE_ACTIVE) | 3417 | if (event->state < PERF_EVENT_STATE_INACTIVE) |
| 3272 | return 0; | 3418 | return 0; |
| 3273 | 3419 | ||
| 3274 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 3420 | if (event->cpu != -1 && event->cpu != smp_processor_id()) |
| @@ -3300,7 +3446,7 @@ static void perf_event_task_event(struct perf_task_event *task_event) | |||
| 3300 | cpuctx = &get_cpu_var(perf_cpu_context); | 3446 | cpuctx = &get_cpu_var(perf_cpu_context); |
| 3301 | perf_event_task_ctx(&cpuctx->ctx, task_event); | 3447 | perf_event_task_ctx(&cpuctx->ctx, task_event); |
| 3302 | if (!ctx) | 3448 | if (!ctx) |
| 3303 | ctx = rcu_dereference(task_event->task->perf_event_ctxp); | 3449 | ctx = rcu_dereference(current->perf_event_ctxp); |
| 3304 | if (ctx) | 3450 | if (ctx) |
| 3305 | perf_event_task_ctx(ctx, task_event); | 3451 | perf_event_task_ctx(ctx, task_event); |
| 3306 | put_cpu_var(perf_cpu_context); | 3452 | put_cpu_var(perf_cpu_context); |
| @@ -3331,6 +3477,7 @@ static void perf_event_task(struct task_struct *task, | |||
| 3331 | /* .ppid */ | 3477 | /* .ppid */ |
| 3332 | /* .tid */ | 3478 | /* .tid */ |
| 3333 | /* .ptid */ | 3479 | /* .ptid */ |
| 3480 | .time = perf_clock(), | ||
| 3334 | }, | 3481 | }, |
| 3335 | }; | 3482 | }; |
| 3336 | 3483 | ||
| @@ -3380,7 +3527,7 @@ static void perf_event_comm_output(struct perf_event *event, | |||
| 3380 | 3527 | ||
| 3381 | static int perf_event_comm_match(struct perf_event *event) | 3528 | static int perf_event_comm_match(struct perf_event *event) |
| 3382 | { | 3529 | { |
| 3383 | if (event->state != PERF_EVENT_STATE_ACTIVE) | 3530 | if (event->state < PERF_EVENT_STATE_INACTIVE) |
| 3384 | return 0; | 3531 | return 0; |
| 3385 | 3532 | ||
| 3386 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 3533 | if (event->cpu != -1 && event->cpu != smp_processor_id()) |
| @@ -3500,7 +3647,7 @@ static void perf_event_mmap_output(struct perf_event *event, | |||
| 3500 | static int perf_event_mmap_match(struct perf_event *event, | 3647 | static int perf_event_mmap_match(struct perf_event *event, |
| 3501 | struct perf_mmap_event *mmap_event) | 3648 | struct perf_mmap_event *mmap_event) |
| 3502 | { | 3649 | { |
| 3503 | if (event->state != PERF_EVENT_STATE_ACTIVE) | 3650 | if (event->state < PERF_EVENT_STATE_INACTIVE) |
| 3504 | return 0; | 3651 | return 0; |
| 3505 | 3652 | ||
| 3506 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 3653 | if (event->cpu != -1 && event->cpu != smp_processor_id()) |
| @@ -3609,7 +3756,7 @@ void __perf_event_mmap(struct vm_area_struct *vma) | |||
| 3609 | /* .tid */ | 3756 | /* .tid */ |
| 3610 | .start = vma->vm_start, | 3757 | .start = vma->vm_start, |
| 3611 | .len = vma->vm_end - vma->vm_start, | 3758 | .len = vma->vm_end - vma->vm_start, |
| 3612 | .pgoff = vma->vm_pgoff, | 3759 | .pgoff = (u64)vma->vm_pgoff << PAGE_SHIFT, |
| 3613 | }, | 3760 | }, |
| 3614 | }; | 3761 | }; |
| 3615 | 3762 | ||
| @@ -3689,12 +3836,12 @@ static int __perf_event_overflow(struct perf_event *event, int nmi, | |||
| 3689 | 3836 | ||
| 3690 | if (event->attr.freq) { | 3837 | if (event->attr.freq) { |
| 3691 | u64 now = perf_clock(); | 3838 | u64 now = perf_clock(); |
| 3692 | s64 delta = now - hwc->freq_stamp; | 3839 | s64 delta = now - hwc->freq_time_stamp; |
| 3693 | 3840 | ||
| 3694 | hwc->freq_stamp = now; | 3841 | hwc->freq_time_stamp = now; |
| 3695 | 3842 | ||
| 3696 | if (delta > 0 && delta < TICK_NSEC) | 3843 | if (delta > 0 && delta < 2*TICK_NSEC) |
| 3697 | perf_adjust_period(event, NSEC_PER_SEC / (int)delta); | 3844 | perf_adjust_period(event, delta, hwc->last_period); |
| 3698 | } | 3845 | } |
| 3699 | 3846 | ||
| 3700 | /* | 3847 | /* |
| @@ -4185,7 +4332,7 @@ static const struct pmu perf_ops_task_clock = { | |||
| 4185 | .read = task_clock_perf_event_read, | 4332 | .read = task_clock_perf_event_read, |
| 4186 | }; | 4333 | }; |
| 4187 | 4334 | ||
| 4188 | #ifdef CONFIG_EVENT_PROFILE | 4335 | #ifdef CONFIG_EVENT_TRACING |
| 4189 | 4336 | ||
| 4190 | void perf_tp_event(int event_id, u64 addr, u64 count, void *record, | 4337 | void perf_tp_event(int event_id, u64 addr, u64 count, void *record, |
| 4191 | int entry_size) | 4338 | int entry_size) |
| @@ -4290,7 +4437,7 @@ static void perf_event_free_filter(struct perf_event *event) | |||
| 4290 | { | 4437 | { |
| 4291 | } | 4438 | } |
| 4292 | 4439 | ||
| 4293 | #endif /* CONFIG_EVENT_PROFILE */ | 4440 | #endif /* CONFIG_EVENT_TRACING */ |
| 4294 | 4441 | ||
| 4295 | #ifdef CONFIG_HAVE_HW_BREAKPOINT | 4442 | #ifdef CONFIG_HAVE_HW_BREAKPOINT |
| 4296 | static void bp_perf_event_destroy(struct perf_event *event) | 4443 | static void bp_perf_event_destroy(struct perf_event *event) |
| @@ -4580,7 +4727,7 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, | |||
| 4580 | if (attr->type >= PERF_TYPE_MAX) | 4727 | if (attr->type >= PERF_TYPE_MAX) |
| 4581 | return -EINVAL; | 4728 | return -EINVAL; |
| 4582 | 4729 | ||
| 4583 | if (attr->__reserved_1 || attr->__reserved_2) | 4730 | if (attr->__reserved_1) |
| 4584 | return -EINVAL; | 4731 | return -EINVAL; |
| 4585 | 4732 | ||
| 4586 | if (attr->sample_type & ~(PERF_SAMPLE_MAX-1)) | 4733 | if (attr->sample_type & ~(PERF_SAMPLE_MAX-1)) |
| @@ -4871,8 +5018,15 @@ inherit_event(struct perf_event *parent_event, | |||
| 4871 | else | 5018 | else |
| 4872 | child_event->state = PERF_EVENT_STATE_OFF; | 5019 | child_event->state = PERF_EVENT_STATE_OFF; |
| 4873 | 5020 | ||
| 4874 | if (parent_event->attr.freq) | 5021 | if (parent_event->attr.freq) { |
| 4875 | child_event->hw.sample_period = parent_event->hw.sample_period; | 5022 | u64 sample_period = parent_event->hw.sample_period; |
| 5023 | struct hw_perf_event *hwc = &child_event->hw; | ||
| 5024 | |||
| 5025 | hwc->sample_period = sample_period; | ||
| 5026 | hwc->last_period = sample_period; | ||
| 5027 | |||
| 5028 | atomic64_set(&hwc->period_left, sample_period); | ||
| 5029 | } | ||
| 4876 | 5030 | ||
| 4877 | child_event->overflow_handler = parent_event->overflow_handler; | 5031 | child_event->overflow_handler = parent_event->overflow_handler; |
| 4878 | 5032 | ||
| @@ -5040,7 +5194,11 @@ void perf_event_exit_task(struct task_struct *child) | |||
| 5040 | mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING); | 5194 | mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING); |
| 5041 | 5195 | ||
| 5042 | again: | 5196 | again: |
| 5043 | list_for_each_entry_safe(child_event, tmp, &child_ctx->group_list, | 5197 | list_for_each_entry_safe(child_event, tmp, &child_ctx->pinned_groups, |
| 5198 | group_entry) | ||
| 5199 | __perf_event_exit_task(child_event, child_ctx, child); | ||
| 5200 | |||
| 5201 | list_for_each_entry_safe(child_event, tmp, &child_ctx->flexible_groups, | ||
| 5044 | group_entry) | 5202 | group_entry) |
| 5045 | __perf_event_exit_task(child_event, child_ctx, child); | 5203 | __perf_event_exit_task(child_event, child_ctx, child); |
| 5046 | 5204 | ||
| @@ -5049,7 +5207,8 @@ again: | |||
| 5049 | * its siblings to the list, but we obtained 'tmp' before that which | 5207 | * its siblings to the list, but we obtained 'tmp' before that which |
| 5050 | * will still point to the list head terminating the iteration. | 5208 | * will still point to the list head terminating the iteration. |
| 5051 | */ | 5209 | */ |
| 5052 | if (!list_empty(&child_ctx->group_list)) | 5210 | if (!list_empty(&child_ctx->pinned_groups) || |
| 5211 | !list_empty(&child_ctx->flexible_groups)) | ||
| 5053 | goto again; | 5212 | goto again; |
| 5054 | 5213 | ||
| 5055 | mutex_unlock(&child_ctx->mutex); | 5214 | mutex_unlock(&child_ctx->mutex); |
| @@ -5057,6 +5216,24 @@ again: | |||
| 5057 | put_ctx(child_ctx); | 5216 | put_ctx(child_ctx); |
| 5058 | } | 5217 | } |
| 5059 | 5218 | ||
| 5219 | static void perf_free_event(struct perf_event *event, | ||
| 5220 | struct perf_event_context *ctx) | ||
| 5221 | { | ||
| 5222 | struct perf_event *parent = event->parent; | ||
| 5223 | |||
| 5224 | if (WARN_ON_ONCE(!parent)) | ||
| 5225 | return; | ||
| 5226 | |||
| 5227 | mutex_lock(&parent->child_mutex); | ||
| 5228 | list_del_init(&event->child_list); | ||
| 5229 | mutex_unlock(&parent->child_mutex); | ||
| 5230 | |||
| 5231 | fput(parent->filp); | ||
| 5232 | |||
| 5233 | list_del_event(event, ctx); | ||
| 5234 | free_event(event); | ||
| 5235 | } | ||
| 5236 | |||
| 5060 | /* | 5237 | /* |
| 5061 | * free an unexposed, unused context as created by inheritance by | 5238 | * free an unexposed, unused context as created by inheritance by |
| 5062 | * init_task below, used by fork() in case of fail. | 5239 | * init_task below, used by fork() in case of fail. |
| @@ -5071,36 +5248,70 @@ void perf_event_free_task(struct task_struct *task) | |||
| 5071 | 5248 | ||
| 5072 | mutex_lock(&ctx->mutex); | 5249 | mutex_lock(&ctx->mutex); |
| 5073 | again: | 5250 | again: |
| 5074 | list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) { | 5251 | list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) |
| 5075 | struct perf_event *parent = event->parent; | 5252 | perf_free_event(event, ctx); |
| 5076 | 5253 | ||
| 5077 | if (WARN_ON_ONCE(!parent)) | 5254 | list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, |
| 5078 | continue; | 5255 | group_entry) |
| 5256 | perf_free_event(event, ctx); | ||
| 5257 | |||
| 5258 | if (!list_empty(&ctx->pinned_groups) || | ||
| 5259 | !list_empty(&ctx->flexible_groups)) | ||
| 5260 | goto again; | ||
| 5079 | 5261 | ||
| 5080 | mutex_lock(&parent->child_mutex); | 5262 | mutex_unlock(&ctx->mutex); |
| 5081 | list_del_init(&event->child_list); | ||
| 5082 | mutex_unlock(&parent->child_mutex); | ||
| 5083 | 5263 | ||
| 5084 | fput(parent->filp); | 5264 | put_ctx(ctx); |
| 5265 | } | ||
| 5085 | 5266 | ||
| 5086 | list_del_event(event, ctx); | 5267 | static int |
| 5087 | free_event(event); | 5268 | inherit_task_group(struct perf_event *event, struct task_struct *parent, |
| 5269 | struct perf_event_context *parent_ctx, | ||
| 5270 | struct task_struct *child, | ||
| 5271 | int *inherited_all) | ||
| 5272 | { | ||
| 5273 | int ret; | ||
| 5274 | struct perf_event_context *child_ctx = child->perf_event_ctxp; | ||
| 5275 | |||
| 5276 | if (!event->attr.inherit) { | ||
| 5277 | *inherited_all = 0; | ||
| 5278 | return 0; | ||
| 5088 | } | 5279 | } |
| 5089 | 5280 | ||
| 5090 | if (!list_empty(&ctx->group_list)) | 5281 | if (!child_ctx) { |
| 5091 | goto again; | 5282 | /* |
| 5283 | * This is executed from the parent task context, so | ||
| 5284 | * inherit events that have been marked for cloning. | ||
| 5285 | * First allocate and initialize a context for the | ||
| 5286 | * child. | ||
| 5287 | */ | ||
| 5092 | 5288 | ||
| 5093 | mutex_unlock(&ctx->mutex); | 5289 | child_ctx = kzalloc(sizeof(struct perf_event_context), |
| 5290 | GFP_KERNEL); | ||
| 5291 | if (!child_ctx) | ||
| 5292 | return -ENOMEM; | ||
| 5094 | 5293 | ||
| 5095 | put_ctx(ctx); | 5294 | __perf_event_init_context(child_ctx, child); |
| 5295 | child->perf_event_ctxp = child_ctx; | ||
| 5296 | get_task_struct(child); | ||
| 5297 | } | ||
| 5298 | |||
| 5299 | ret = inherit_group(event, parent, parent_ctx, | ||
| 5300 | child, child_ctx); | ||
| 5301 | |||
| 5302 | if (ret) | ||
| 5303 | *inherited_all = 0; | ||
| 5304 | |||
| 5305 | return ret; | ||
| 5096 | } | 5306 | } |
| 5097 | 5307 | ||
| 5308 | |||
| 5098 | /* | 5309 | /* |
| 5099 | * Initialize the perf_event context in task_struct | 5310 | * Initialize the perf_event context in task_struct |
| 5100 | */ | 5311 | */ |
| 5101 | int perf_event_init_task(struct task_struct *child) | 5312 | int perf_event_init_task(struct task_struct *child) |
| 5102 | { | 5313 | { |
| 5103 | struct perf_event_context *child_ctx = NULL, *parent_ctx; | 5314 | struct perf_event_context *child_ctx, *parent_ctx; |
| 5104 | struct perf_event_context *cloned_ctx; | 5315 | struct perf_event_context *cloned_ctx; |
| 5105 | struct perf_event *event; | 5316 | struct perf_event *event; |
| 5106 | struct task_struct *parent = current; | 5317 | struct task_struct *parent = current; |
| @@ -5138,41 +5349,22 @@ int perf_event_init_task(struct task_struct *child) | |||
| 5138 | * We dont have to disable NMIs - we are only looking at | 5349 | * We dont have to disable NMIs - we are only looking at |
| 5139 | * the list, not manipulating it: | 5350 | * the list, not manipulating it: |
| 5140 | */ | 5351 | */ |
| 5141 | list_for_each_entry(event, &parent_ctx->group_list, group_entry) { | 5352 | list_for_each_entry(event, &parent_ctx->pinned_groups, group_entry) { |
| 5142 | 5353 | ret = inherit_task_group(event, parent, parent_ctx, child, | |
| 5143 | if (!event->attr.inherit) { | 5354 | &inherited_all); |
| 5144 | inherited_all = 0; | 5355 | if (ret) |
| 5145 | continue; | 5356 | break; |
| 5146 | } | 5357 | } |
| 5147 | |||
| 5148 | if (!child->perf_event_ctxp) { | ||
| 5149 | /* | ||
| 5150 | * This is executed from the parent task context, so | ||
| 5151 | * inherit events that have been marked for cloning. | ||
| 5152 | * First allocate and initialize a context for the | ||
| 5153 | * child. | ||
| 5154 | */ | ||
| 5155 | |||
| 5156 | child_ctx = kzalloc(sizeof(struct perf_event_context), | ||
| 5157 | GFP_KERNEL); | ||
| 5158 | if (!child_ctx) { | ||
| 5159 | ret = -ENOMEM; | ||
| 5160 | break; | ||
| 5161 | } | ||
| 5162 | |||
| 5163 | __perf_event_init_context(child_ctx, child); | ||
| 5164 | child->perf_event_ctxp = child_ctx; | ||
| 5165 | get_task_struct(child); | ||
| 5166 | } | ||
| 5167 | 5358 | ||
| 5168 | ret = inherit_group(event, parent, parent_ctx, | 5359 | list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { |
| 5169 | child, child_ctx); | 5360 | ret = inherit_task_group(event, parent, parent_ctx, child, |
| 5170 | if (ret) { | 5361 | &inherited_all); |
| 5171 | inherited_all = 0; | 5362 | if (ret) |
| 5172 | break; | 5363 | break; |
| 5173 | } | ||
| 5174 | } | 5364 | } |
| 5175 | 5365 | ||
| 5366 | child_ctx = child->perf_event_ctxp; | ||
| 5367 | |||
| 5176 | if (child_ctx && inherited_all) { | 5368 | if (child_ctx && inherited_all) { |
| 5177 | /* | 5369 | /* |
| 5178 | * Mark the child context as a clone of the parent | 5370 | * Mark the child context as a clone of the parent |
| @@ -5221,7 +5413,9 @@ static void __perf_event_exit_cpu(void *info) | |||
| 5221 | struct perf_event_context *ctx = &cpuctx->ctx; | 5413 | struct perf_event_context *ctx = &cpuctx->ctx; |
| 5222 | struct perf_event *event, *tmp; | 5414 | struct perf_event *event, *tmp; |
| 5223 | 5415 | ||
| 5224 | list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) | 5416 | list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) |
| 5417 | __perf_event_remove_from_context(event); | ||
| 5418 | list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry) | ||
| 5225 | __perf_event_remove_from_context(event); | 5419 | __perf_event_remove_from_context(event); |
| 5226 | } | 5420 | } |
| 5227 | static void perf_event_exit_cpu(int cpu) | 5421 | static void perf_event_exit_cpu(int cpu) |
| @@ -5259,6 +5453,10 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) | |||
| 5259 | perf_event_exit_cpu(cpu); | 5453 | perf_event_exit_cpu(cpu); |
| 5260 | break; | 5454 | break; |
| 5261 | 5455 | ||
| 5456 | case CPU_DEAD: | ||
| 5457 | hw_perf_event_setup_offline(cpu); | ||
| 5458 | break; | ||
| 5459 | |||
| 5262 | default: | 5460 | default: |
| 5263 | break; | 5461 | break; |
| 5264 | } | 5462 | } |
