diff options
Diffstat (limited to 'kernel/perf_counter.c')
-rw-r--r-- | kernel/perf_counter.c | 226 |
1 files changed, 154 insertions, 72 deletions
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 52f2f526248e..faf671b29566 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c | |||
@@ -93,6 +93,25 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx) | |||
93 | } | 93 | } |
94 | } | 94 | } |
95 | 95 | ||
96 | static void | ||
97 | counter_sched_out(struct perf_counter *counter, | ||
98 | struct perf_cpu_context *cpuctx, | ||
99 | struct perf_counter_context *ctx) | ||
100 | { | ||
101 | if (counter->state != PERF_COUNTER_STATE_ACTIVE) | ||
102 | return; | ||
103 | |||
104 | counter->state = PERF_COUNTER_STATE_INACTIVE; | ||
105 | counter->hw_ops->disable(counter); | ||
106 | counter->oncpu = -1; | ||
107 | |||
108 | if (!is_software_counter(counter)) | ||
109 | cpuctx->active_oncpu--; | ||
110 | ctx->nr_active--; | ||
111 | if (counter->hw_event.exclusive || !cpuctx->active_oncpu) | ||
112 | cpuctx->exclusive = 0; | ||
113 | } | ||
114 | |||
96 | /* | 115 | /* |
97 | * Cross CPU call to remove a performance counter | 116 | * Cross CPU call to remove a performance counter |
98 | * | 117 | * |
@@ -118,14 +137,9 @@ static void __perf_counter_remove_from_context(void *info) | |||
118 | curr_rq_lock_irq_save(&flags); | 137 | curr_rq_lock_irq_save(&flags); |
119 | spin_lock(&ctx->lock); | 138 | spin_lock(&ctx->lock); |
120 | 139 | ||
121 | if (counter->state == PERF_COUNTER_STATE_ACTIVE) { | 140 | counter_sched_out(counter, cpuctx, ctx); |
122 | counter->state = PERF_COUNTER_STATE_INACTIVE; | 141 | |
123 | counter->hw_ops->disable(counter); | 142 | counter->task = NULL; |
124 | ctx->nr_active--; | ||
125 | cpuctx->active_oncpu--; | ||
126 | counter->task = NULL; | ||
127 | counter->oncpu = -1; | ||
128 | } | ||
129 | ctx->nr_counters--; | 143 | ctx->nr_counters--; |
130 | 144 | ||
131 | /* | 145 | /* |
@@ -207,7 +221,7 @@ counter_sched_in(struct perf_counter *counter, | |||
207 | struct perf_counter_context *ctx, | 221 | struct perf_counter_context *ctx, |
208 | int cpu) | 222 | int cpu) |
209 | { | 223 | { |
210 | if (counter->state == PERF_COUNTER_STATE_OFF) | 224 | if (counter->state <= PERF_COUNTER_STATE_OFF) |
211 | return 0; | 225 | return 0; |
212 | 226 | ||
213 | counter->state = PERF_COUNTER_STATE_ACTIVE; | 227 | counter->state = PERF_COUNTER_STATE_ACTIVE; |
@@ -223,13 +237,64 @@ counter_sched_in(struct perf_counter *counter, | |||
223 | return -EAGAIN; | 237 | return -EAGAIN; |
224 | } | 238 | } |
225 | 239 | ||
226 | cpuctx->active_oncpu++; | 240 | if (!is_software_counter(counter)) |
241 | cpuctx->active_oncpu++; | ||
227 | ctx->nr_active++; | 242 | ctx->nr_active++; |
228 | 243 | ||
244 | if (counter->hw_event.exclusive) | ||
245 | cpuctx->exclusive = 1; | ||
246 | |||
229 | return 0; | 247 | return 0; |
230 | } | 248 | } |
231 | 249 | ||
232 | /* | 250 | /* |
251 | * Return 1 for a group consisting entirely of software counters, | ||
252 | * 0 if the group contains any hardware counters. | ||
253 | */ | ||
254 | static int is_software_only_group(struct perf_counter *leader) | ||
255 | { | ||
256 | struct perf_counter *counter; | ||
257 | |||
258 | if (!is_software_counter(leader)) | ||
259 | return 0; | ||
260 | list_for_each_entry(counter, &leader->sibling_list, list_entry) | ||
261 | if (!is_software_counter(counter)) | ||
262 | return 0; | ||
263 | return 1; | ||
264 | } | ||
265 | |||
266 | /* | ||
267 | * Work out whether we can put this counter group on the CPU now. | ||
268 | */ | ||
269 | static int group_can_go_on(struct perf_counter *counter, | ||
270 | struct perf_cpu_context *cpuctx, | ||
271 | int can_add_hw) | ||
272 | { | ||
273 | /* | ||
274 | * Groups consisting entirely of software counters can always go on. | ||
275 | */ | ||
276 | if (is_software_only_group(counter)) | ||
277 | return 1; | ||
278 | /* | ||
279 | * If an exclusive group is already on, no other hardware | ||
280 | * counters can go on. | ||
281 | */ | ||
282 | if (cpuctx->exclusive) | ||
283 | return 0; | ||
284 | /* | ||
285 | * If this group is exclusive and there are already | ||
286 | * counters on the CPU, it can't go on. | ||
287 | */ | ||
288 | if (counter->hw_event.exclusive && cpuctx->active_oncpu) | ||
289 | return 0; | ||
290 | /* | ||
291 | * Otherwise, try to add it if all previous groups were able | ||
292 | * to go on. | ||
293 | */ | ||
294 | return can_add_hw; | ||
295 | } | ||
296 | |||
297 | /* | ||
233 | * Cross CPU call to install and enable a performance counter | 298 | * Cross CPU call to install and enable a performance counter |
234 | */ | 299 | */ |
235 | static void __perf_install_in_context(void *info) | 300 | static void __perf_install_in_context(void *info) |
@@ -240,6 +305,7 @@ static void __perf_install_in_context(void *info) | |||
240 | int cpu = smp_processor_id(); | 305 | int cpu = smp_processor_id(); |
241 | unsigned long flags; | 306 | unsigned long flags; |
242 | u64 perf_flags; | 307 | u64 perf_flags; |
308 | int err; | ||
243 | 309 | ||
244 | /* | 310 | /* |
245 | * If this is a task context, we need to check whether it is | 311 | * If this is a task context, we need to check whether it is |
@@ -261,9 +327,21 @@ static void __perf_install_in_context(void *info) | |||
261 | list_add_counter(counter, ctx); | 327 | list_add_counter(counter, ctx); |
262 | ctx->nr_counters++; | 328 | ctx->nr_counters++; |
263 | 329 | ||
264 | counter_sched_in(counter, cpuctx, ctx, cpu); | 330 | /* |
331 | * An exclusive counter can't go on if there are already active | ||
332 | * hardware counters, and no hardware counter can go on if there | ||
333 | * is already an exclusive counter on. | ||
334 | */ | ||
335 | if (counter->state == PERF_COUNTER_STATE_INACTIVE && | ||
336 | !group_can_go_on(counter, cpuctx, 1)) | ||
337 | err = -EEXIST; | ||
338 | else | ||
339 | err = counter_sched_in(counter, cpuctx, ctx, cpu); | ||
340 | |||
341 | if (err && counter->hw_event.pinned) | ||
342 | counter->state = PERF_COUNTER_STATE_ERROR; | ||
265 | 343 | ||
266 | if (!ctx->task && cpuctx->max_pertask) | 344 | if (!err && !ctx->task && cpuctx->max_pertask) |
267 | cpuctx->max_pertask--; | 345 | cpuctx->max_pertask--; |
268 | 346 | ||
269 | hw_perf_restore(perf_flags); | 347 | hw_perf_restore(perf_flags); |
@@ -327,22 +405,6 @@ retry: | |||
327 | } | 405 | } |
328 | 406 | ||
329 | static void | 407 | static void |
330 | counter_sched_out(struct perf_counter *counter, | ||
331 | struct perf_cpu_context *cpuctx, | ||
332 | struct perf_counter_context *ctx) | ||
333 | { | ||
334 | if (counter->state != PERF_COUNTER_STATE_ACTIVE) | ||
335 | return; | ||
336 | |||
337 | counter->state = PERF_COUNTER_STATE_INACTIVE; | ||
338 | counter->hw_ops->disable(counter); | ||
339 | counter->oncpu = -1; | ||
340 | |||
341 | cpuctx->active_oncpu--; | ||
342 | ctx->nr_active--; | ||
343 | } | ||
344 | |||
345 | static void | ||
346 | group_sched_out(struct perf_counter *group_counter, | 408 | group_sched_out(struct perf_counter *group_counter, |
347 | struct perf_cpu_context *cpuctx, | 409 | struct perf_cpu_context *cpuctx, |
348 | struct perf_counter_context *ctx) | 410 | struct perf_counter_context *ctx) |
@@ -359,6 +421,9 @@ group_sched_out(struct perf_counter *group_counter, | |||
359 | */ | 421 | */ |
360 | list_for_each_entry(counter, &group_counter->sibling_list, list_entry) | 422 | list_for_each_entry(counter, &group_counter->sibling_list, list_entry) |
361 | counter_sched_out(counter, cpuctx, ctx); | 423 | counter_sched_out(counter, cpuctx, ctx); |
424 | |||
425 | if (group_counter->hw_event.exclusive) | ||
426 | cpuctx->exclusive = 0; | ||
362 | } | 427 | } |
363 | 428 | ||
364 | void __perf_counter_sched_out(struct perf_counter_context *ctx, | 429 | void __perf_counter_sched_out(struct perf_counter_context *ctx, |
@@ -455,30 +520,6 @@ group_error: | |||
455 | return -EAGAIN; | 520 | return -EAGAIN; |
456 | } | 521 | } |
457 | 522 | ||
458 | /* | ||
459 | * Return 1 for a software counter, 0 for a hardware counter | ||
460 | */ | ||
461 | static inline int is_software_counter(struct perf_counter *counter) | ||
462 | { | ||
463 | return !counter->hw_event.raw && counter->hw_event.type < 0; | ||
464 | } | ||
465 | |||
466 | /* | ||
467 | * Return 1 for a group consisting entirely of software counters, | ||
468 | * 0 if the group contains any hardware counters. | ||
469 | */ | ||
470 | static int is_software_only_group(struct perf_counter *leader) | ||
471 | { | ||
472 | struct perf_counter *counter; | ||
473 | |||
474 | if (!is_software_counter(leader)) | ||
475 | return 0; | ||
476 | list_for_each_entry(counter, &leader->sibling_list, list_entry) | ||
477 | if (!is_software_counter(counter)) | ||
478 | return 0; | ||
479 | return 1; | ||
480 | } | ||
481 | |||
482 | static void | 523 | static void |
483 | __perf_counter_sched_in(struct perf_counter_context *ctx, | 524 | __perf_counter_sched_in(struct perf_counter_context *ctx, |
484 | struct perf_cpu_context *cpuctx, int cpu) | 525 | struct perf_cpu_context *cpuctx, int cpu) |
@@ -492,22 +533,49 @@ __perf_counter_sched_in(struct perf_counter_context *ctx, | |||
492 | 533 | ||
493 | spin_lock(&ctx->lock); | 534 | spin_lock(&ctx->lock); |
494 | flags = hw_perf_save_disable(); | 535 | flags = hw_perf_save_disable(); |
536 | |||
537 | /* | ||
538 | * First go through the list and put on any pinned groups | ||
539 | * in order to give them the best chance of going on. | ||
540 | */ | ||
541 | list_for_each_entry(counter, &ctx->counter_list, list_entry) { | ||
542 | if (counter->state <= PERF_COUNTER_STATE_OFF || | ||
543 | !counter->hw_event.pinned) | ||
544 | continue; | ||
545 | if (counter->cpu != -1 && counter->cpu != cpu) | ||
546 | continue; | ||
547 | |||
548 | if (group_can_go_on(counter, cpuctx, 1)) | ||
549 | group_sched_in(counter, cpuctx, ctx, cpu); | ||
550 | |||
551 | /* | ||
552 | * If this pinned group hasn't been scheduled, | ||
553 | * put it in error state. | ||
554 | */ | ||
555 | if (counter->state == PERF_COUNTER_STATE_INACTIVE) | ||
556 | counter->state = PERF_COUNTER_STATE_ERROR; | ||
557 | } | ||
558 | |||
495 | list_for_each_entry(counter, &ctx->counter_list, list_entry) { | 559 | list_for_each_entry(counter, &ctx->counter_list, list_entry) { |
496 | /* | 560 | /* |
561 | * Ignore counters in OFF or ERROR state, and | ||
562 | * ignore pinned counters since we did them already. | ||
563 | */ | ||
564 | if (counter->state <= PERF_COUNTER_STATE_OFF || | ||
565 | counter->hw_event.pinned) | ||
566 | continue; | ||
567 | |||
568 | /* | ||
497 | * Listen to the 'cpu' scheduling filter constraint | 569 | * Listen to the 'cpu' scheduling filter constraint |
498 | * of counters: | 570 | * of counters: |
499 | */ | 571 | */ |
500 | if (counter->cpu != -1 && counter->cpu != cpu) | 572 | if (counter->cpu != -1 && counter->cpu != cpu) |
501 | continue; | 573 | continue; |
502 | 574 | ||
503 | /* | 575 | if (group_can_go_on(counter, cpuctx, can_add_hw)) { |
504 | * If we scheduled in a group atomically and exclusively, | ||
505 | * or if this group can't go on, don't add any more | ||
506 | * hardware counters. | ||
507 | */ | ||
508 | if (can_add_hw || is_software_only_group(counter)) | ||
509 | if (group_sched_in(counter, cpuctx, ctx, cpu)) | 576 | if (group_sched_in(counter, cpuctx, ctx, cpu)) |
510 | can_add_hw = 0; | 577 | can_add_hw = 0; |
578 | } | ||
511 | } | 579 | } |
512 | hw_perf_restore(flags); | 580 | hw_perf_restore(flags); |
513 | spin_unlock(&ctx->lock); | 581 | spin_unlock(&ctx->lock); |
@@ -567,8 +635,10 @@ int perf_counter_task_disable(void) | |||
567 | */ | 635 | */ |
568 | perf_flags = hw_perf_save_disable(); | 636 | perf_flags = hw_perf_save_disable(); |
569 | 637 | ||
570 | list_for_each_entry(counter, &ctx->counter_list, list_entry) | 638 | list_for_each_entry(counter, &ctx->counter_list, list_entry) { |
571 | counter->state = PERF_COUNTER_STATE_OFF; | 639 | if (counter->state != PERF_COUNTER_STATE_ERROR) |
640 | counter->state = PERF_COUNTER_STATE_OFF; | ||
641 | } | ||
572 | 642 | ||
573 | hw_perf_restore(perf_flags); | 643 | hw_perf_restore(perf_flags); |
574 | 644 | ||
@@ -607,7 +677,7 @@ int perf_counter_task_enable(void) | |||
607 | perf_flags = hw_perf_save_disable(); | 677 | perf_flags = hw_perf_save_disable(); |
608 | 678 | ||
609 | list_for_each_entry(counter, &ctx->counter_list, list_entry) { | 679 | list_for_each_entry(counter, &ctx->counter_list, list_entry) { |
610 | if (counter->state != PERF_COUNTER_STATE_OFF) | 680 | if (counter->state > PERF_COUNTER_STATE_OFF) |
611 | continue; | 681 | continue; |
612 | counter->state = PERF_COUNTER_STATE_INACTIVE; | 682 | counter->state = PERF_COUNTER_STATE_INACTIVE; |
613 | counter->hw_event.disabled = 0; | 683 | counter->hw_event.disabled = 0; |
@@ -849,6 +919,14 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) | |||
849 | if (count != sizeof(cntval)) | 919 | if (count != sizeof(cntval)) |
850 | return -EINVAL; | 920 | return -EINVAL; |
851 | 921 | ||
922 | /* | ||
923 | * Return end-of-file for a read on a counter that is in | ||
924 | * error state (i.e. because it was pinned but it couldn't be | ||
925 | * scheduled on to the CPU at some point). | ||
926 | */ | ||
927 | if (counter->state == PERF_COUNTER_STATE_ERROR) | ||
928 | return 0; | ||
929 | |||
852 | mutex_lock(&counter->mutex); | 930 | mutex_lock(&counter->mutex); |
853 | cntval = perf_counter_read(counter); | 931 | cntval = perf_counter_read(counter); |
854 | mutex_unlock(&counter->mutex); | 932 | mutex_unlock(&counter->mutex); |
@@ -884,7 +962,7 @@ perf_read_irq_data(struct perf_counter *counter, | |||
884 | { | 962 | { |
885 | struct perf_data *irqdata, *usrdata; | 963 | struct perf_data *irqdata, *usrdata; |
886 | DECLARE_WAITQUEUE(wait, current); | 964 | DECLARE_WAITQUEUE(wait, current); |
887 | ssize_t res; | 965 | ssize_t res, res2; |
888 | 966 | ||
889 | irqdata = counter->irqdata; | 967 | irqdata = counter->irqdata; |
890 | usrdata = counter->usrdata; | 968 | usrdata = counter->usrdata; |
@@ -905,6 +983,9 @@ perf_read_irq_data(struct perf_counter *counter, | |||
905 | if (signal_pending(current)) | 983 | if (signal_pending(current)) |
906 | break; | 984 | break; |
907 | 985 | ||
986 | if (counter->state == PERF_COUNTER_STATE_ERROR) | ||
987 | break; | ||
988 | |||
908 | spin_unlock_irq(&counter->waitq.lock); | 989 | spin_unlock_irq(&counter->waitq.lock); |
909 | schedule(); | 990 | schedule(); |
910 | spin_lock_irq(&counter->waitq.lock); | 991 | spin_lock_irq(&counter->waitq.lock); |
@@ -913,7 +994,8 @@ perf_read_irq_data(struct perf_counter *counter, | |||
913 | __set_current_state(TASK_RUNNING); | 994 | __set_current_state(TASK_RUNNING); |
914 | spin_unlock_irq(&counter->waitq.lock); | 995 | spin_unlock_irq(&counter->waitq.lock); |
915 | 996 | ||
916 | if (usrdata->len + irqdata->len < count) | 997 | if (usrdata->len + irqdata->len < count && |
998 | counter->state != PERF_COUNTER_STATE_ERROR) | ||
917 | return -ERESTARTSYS; | 999 | return -ERESTARTSYS; |
918 | read_pending: | 1000 | read_pending: |
919 | mutex_lock(&counter->mutex); | 1001 | mutex_lock(&counter->mutex); |
@@ -925,11 +1007,12 @@ read_pending: | |||
925 | 1007 | ||
926 | /* Switch irq buffer: */ | 1008 | /* Switch irq buffer: */ |
927 | usrdata = perf_switch_irq_data(counter); | 1009 | usrdata = perf_switch_irq_data(counter); |
928 | if (perf_copy_usrdata(usrdata, buf + res, count - res) < 0) { | 1010 | res2 = perf_copy_usrdata(usrdata, buf + res, count - res); |
1011 | if (res2 < 0) { | ||
929 | if (!res) | 1012 | if (!res) |
930 | res = -EFAULT; | 1013 | res = -EFAULT; |
931 | } else { | 1014 | } else { |
932 | res = count; | 1015 | res += res2; |
933 | } | 1016 | } |
934 | out: | 1017 | out: |
935 | mutex_unlock(&counter->mutex); | 1018 | mutex_unlock(&counter->mutex); |
@@ -1348,6 +1431,11 @@ sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr __user, | |||
1348 | */ | 1431 | */ |
1349 | if (group_leader->ctx != ctx) | 1432 | if (group_leader->ctx != ctx) |
1350 | goto err_put_context; | 1433 | goto err_put_context; |
1434 | /* | ||
1435 | * Only a group leader can be exclusive or pinned | ||
1436 | */ | ||
1437 | if (hw_event.exclusive || hw_event.pinned) | ||
1438 | goto err_put_context; | ||
1351 | } | 1439 | } |
1352 | 1440 | ||
1353 | ret = -EINVAL; | 1441 | ret = -EINVAL; |
@@ -1473,13 +1561,7 @@ __perf_counter_exit_task(struct task_struct *child, | |||
1473 | 1561 | ||
1474 | cpuctx = &__get_cpu_var(perf_cpu_context); | 1562 | cpuctx = &__get_cpu_var(perf_cpu_context); |
1475 | 1563 | ||
1476 | if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) { | 1564 | counter_sched_out(child_counter, cpuctx, child_ctx); |
1477 | child_counter->state = PERF_COUNTER_STATE_INACTIVE; | ||
1478 | child_counter->hw_ops->disable(child_counter); | ||
1479 | cpuctx->active_oncpu--; | ||
1480 | child_ctx->nr_active--; | ||
1481 | child_counter->oncpu = -1; | ||
1482 | } | ||
1483 | 1565 | ||
1484 | list_del_init(&child_counter->list_entry); | 1566 | list_del_init(&child_counter->list_entry); |
1485 | 1567 | ||