aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/perf_counter.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/perf_counter.c')
-rw-r--r--kernel/perf_counter.c226
1 files changed, 154 insertions, 72 deletions
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 52f2f526248e..faf671b29566 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -93,6 +93,25 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
93 } 93 }
94} 94}
95 95
96static void
97counter_sched_out(struct perf_counter *counter,
98 struct perf_cpu_context *cpuctx,
99 struct perf_counter_context *ctx)
100{
101 if (counter->state != PERF_COUNTER_STATE_ACTIVE)
102 return;
103
104 counter->state = PERF_COUNTER_STATE_INACTIVE;
105 counter->hw_ops->disable(counter);
106 counter->oncpu = -1;
107
108 if (!is_software_counter(counter))
109 cpuctx->active_oncpu--;
110 ctx->nr_active--;
111 if (counter->hw_event.exclusive || !cpuctx->active_oncpu)
112 cpuctx->exclusive = 0;
113}
114
96/* 115/*
97 * Cross CPU call to remove a performance counter 116 * Cross CPU call to remove a performance counter
98 * 117 *
@@ -118,14 +137,9 @@ static void __perf_counter_remove_from_context(void *info)
118 curr_rq_lock_irq_save(&flags); 137 curr_rq_lock_irq_save(&flags);
119 spin_lock(&ctx->lock); 138 spin_lock(&ctx->lock);
120 139
121 if (counter->state == PERF_COUNTER_STATE_ACTIVE) { 140 counter_sched_out(counter, cpuctx, ctx);
122 counter->state = PERF_COUNTER_STATE_INACTIVE; 141
123 counter->hw_ops->disable(counter); 142 counter->task = NULL;
124 ctx->nr_active--;
125 cpuctx->active_oncpu--;
126 counter->task = NULL;
127 counter->oncpu = -1;
128 }
129 ctx->nr_counters--; 143 ctx->nr_counters--;
130 144
131 /* 145 /*
@@ -207,7 +221,7 @@ counter_sched_in(struct perf_counter *counter,
207 struct perf_counter_context *ctx, 221 struct perf_counter_context *ctx,
208 int cpu) 222 int cpu)
209{ 223{
210 if (counter->state == PERF_COUNTER_STATE_OFF) 224 if (counter->state <= PERF_COUNTER_STATE_OFF)
211 return 0; 225 return 0;
212 226
213 counter->state = PERF_COUNTER_STATE_ACTIVE; 227 counter->state = PERF_COUNTER_STATE_ACTIVE;
@@ -223,13 +237,64 @@ counter_sched_in(struct perf_counter *counter,
223 return -EAGAIN; 237 return -EAGAIN;
224 } 238 }
225 239
226 cpuctx->active_oncpu++; 240 if (!is_software_counter(counter))
241 cpuctx->active_oncpu++;
227 ctx->nr_active++; 242 ctx->nr_active++;
228 243
244 if (counter->hw_event.exclusive)
245 cpuctx->exclusive = 1;
246
229 return 0; 247 return 0;
230} 248}
231 249
232/* 250/*
251 * Return 1 for a group consisting entirely of software counters,
252 * 0 if the group contains any hardware counters.
253 */
254static int is_software_only_group(struct perf_counter *leader)
255{
256 struct perf_counter *counter;
257
258 if (!is_software_counter(leader))
259 return 0;
260 list_for_each_entry(counter, &leader->sibling_list, list_entry)
261 if (!is_software_counter(counter))
262 return 0;
263 return 1;
264}
265
266/*
267 * Work out whether we can put this counter group on the CPU now.
268 */
269static int group_can_go_on(struct perf_counter *counter,
270 struct perf_cpu_context *cpuctx,
271 int can_add_hw)
272{
273 /*
274 * Groups consisting entirely of software counters can always go on.
275 */
276 if (is_software_only_group(counter))
277 return 1;
278 /*
279 * If an exclusive group is already on, no other hardware
280 * counters can go on.
281 */
282 if (cpuctx->exclusive)
283 return 0;
284 /*
285 * If this group is exclusive and there are already
286 * counters on the CPU, it can't go on.
287 */
288 if (counter->hw_event.exclusive && cpuctx->active_oncpu)
289 return 0;
290 /*
291 * Otherwise, try to add it if all previous groups were able
292 * to go on.
293 */
294 return can_add_hw;
295}
296
297/*
233 * Cross CPU call to install and enable a performance counter 298 * Cross CPU call to install and enable a performance counter
234 */ 299 */
235static void __perf_install_in_context(void *info) 300static void __perf_install_in_context(void *info)
@@ -240,6 +305,7 @@ static void __perf_install_in_context(void *info)
240 int cpu = smp_processor_id(); 305 int cpu = smp_processor_id();
241 unsigned long flags; 306 unsigned long flags;
242 u64 perf_flags; 307 u64 perf_flags;
308 int err;
243 309
244 /* 310 /*
245 * If this is a task context, we need to check whether it is 311 * If this is a task context, we need to check whether it is
@@ -261,9 +327,21 @@ static void __perf_install_in_context(void *info)
261 list_add_counter(counter, ctx); 327 list_add_counter(counter, ctx);
262 ctx->nr_counters++; 328 ctx->nr_counters++;
263 329
264 counter_sched_in(counter, cpuctx, ctx, cpu); 330 /*
331 * An exclusive counter can't go on if there are already active
332 * hardware counters, and no hardware counter can go on if there
333 * is already an exclusive counter on.
334 */
335 if (counter->state == PERF_COUNTER_STATE_INACTIVE &&
336 !group_can_go_on(counter, cpuctx, 1))
337 err = -EEXIST;
338 else
339 err = counter_sched_in(counter, cpuctx, ctx, cpu);
340
341 if (err && counter->hw_event.pinned)
342 counter->state = PERF_COUNTER_STATE_ERROR;
265 343
266 if (!ctx->task && cpuctx->max_pertask) 344 if (!err && !ctx->task && cpuctx->max_pertask)
267 cpuctx->max_pertask--; 345 cpuctx->max_pertask--;
268 346
269 hw_perf_restore(perf_flags); 347 hw_perf_restore(perf_flags);
@@ -327,22 +405,6 @@ retry:
327} 405}
328 406
329static void 407static void
330counter_sched_out(struct perf_counter *counter,
331 struct perf_cpu_context *cpuctx,
332 struct perf_counter_context *ctx)
333{
334 if (counter->state != PERF_COUNTER_STATE_ACTIVE)
335 return;
336
337 counter->state = PERF_COUNTER_STATE_INACTIVE;
338 counter->hw_ops->disable(counter);
339 counter->oncpu = -1;
340
341 cpuctx->active_oncpu--;
342 ctx->nr_active--;
343}
344
345static void
346group_sched_out(struct perf_counter *group_counter, 408group_sched_out(struct perf_counter *group_counter,
347 struct perf_cpu_context *cpuctx, 409 struct perf_cpu_context *cpuctx,
348 struct perf_counter_context *ctx) 410 struct perf_counter_context *ctx)
@@ -359,6 +421,9 @@ group_sched_out(struct perf_counter *group_counter,
359 */ 421 */
360 list_for_each_entry(counter, &group_counter->sibling_list, list_entry) 422 list_for_each_entry(counter, &group_counter->sibling_list, list_entry)
361 counter_sched_out(counter, cpuctx, ctx); 423 counter_sched_out(counter, cpuctx, ctx);
424
425 if (group_counter->hw_event.exclusive)
426 cpuctx->exclusive = 0;
362} 427}
363 428
364void __perf_counter_sched_out(struct perf_counter_context *ctx, 429void __perf_counter_sched_out(struct perf_counter_context *ctx,
@@ -455,30 +520,6 @@ group_error:
455 return -EAGAIN; 520 return -EAGAIN;
456} 521}
457 522
458/*
459 * Return 1 for a software counter, 0 for a hardware counter
460 */
461static inline int is_software_counter(struct perf_counter *counter)
462{
463 return !counter->hw_event.raw && counter->hw_event.type < 0;
464}
465
466/*
467 * Return 1 for a group consisting entirely of software counters,
468 * 0 if the group contains any hardware counters.
469 */
470static int is_software_only_group(struct perf_counter *leader)
471{
472 struct perf_counter *counter;
473
474 if (!is_software_counter(leader))
475 return 0;
476 list_for_each_entry(counter, &leader->sibling_list, list_entry)
477 if (!is_software_counter(counter))
478 return 0;
479 return 1;
480}
481
482static void 523static void
483__perf_counter_sched_in(struct perf_counter_context *ctx, 524__perf_counter_sched_in(struct perf_counter_context *ctx,
484 struct perf_cpu_context *cpuctx, int cpu) 525 struct perf_cpu_context *cpuctx, int cpu)
@@ -492,22 +533,49 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
492 533
493 spin_lock(&ctx->lock); 534 spin_lock(&ctx->lock);
494 flags = hw_perf_save_disable(); 535 flags = hw_perf_save_disable();
536
537 /*
538 * First go through the list and put on any pinned groups
539 * in order to give them the best chance of going on.
540 */
541 list_for_each_entry(counter, &ctx->counter_list, list_entry) {
542 if (counter->state <= PERF_COUNTER_STATE_OFF ||
543 !counter->hw_event.pinned)
544 continue;
545 if (counter->cpu != -1 && counter->cpu != cpu)
546 continue;
547
548 if (group_can_go_on(counter, cpuctx, 1))
549 group_sched_in(counter, cpuctx, ctx, cpu);
550
551 /*
552 * If this pinned group hasn't been scheduled,
553 * put it in error state.
554 */
555 if (counter->state == PERF_COUNTER_STATE_INACTIVE)
556 counter->state = PERF_COUNTER_STATE_ERROR;
557 }
558
495 list_for_each_entry(counter, &ctx->counter_list, list_entry) { 559 list_for_each_entry(counter, &ctx->counter_list, list_entry) {
496 /* 560 /*
561 * Ignore counters in OFF or ERROR state, and
562 * ignore pinned counters since we did them already.
563 */
564 if (counter->state <= PERF_COUNTER_STATE_OFF ||
565 counter->hw_event.pinned)
566 continue;
567
568 /*
497 * Listen to the 'cpu' scheduling filter constraint 569 * Listen to the 'cpu' scheduling filter constraint
498 * of counters: 570 * of counters:
499 */ 571 */
500 if (counter->cpu != -1 && counter->cpu != cpu) 572 if (counter->cpu != -1 && counter->cpu != cpu)
501 continue; 573 continue;
502 574
503 /* 575 if (group_can_go_on(counter, cpuctx, can_add_hw)) {
504 * If we scheduled in a group atomically and exclusively,
505 * or if this group can't go on, don't add any more
506 * hardware counters.
507 */
508 if (can_add_hw || is_software_only_group(counter))
509 if (group_sched_in(counter, cpuctx, ctx, cpu)) 576 if (group_sched_in(counter, cpuctx, ctx, cpu))
510 can_add_hw = 0; 577 can_add_hw = 0;
578 }
511 } 579 }
512 hw_perf_restore(flags); 580 hw_perf_restore(flags);
513 spin_unlock(&ctx->lock); 581 spin_unlock(&ctx->lock);
@@ -567,8 +635,10 @@ int perf_counter_task_disable(void)
567 */ 635 */
568 perf_flags = hw_perf_save_disable(); 636 perf_flags = hw_perf_save_disable();
569 637
570 list_for_each_entry(counter, &ctx->counter_list, list_entry) 638 list_for_each_entry(counter, &ctx->counter_list, list_entry) {
571 counter->state = PERF_COUNTER_STATE_OFF; 639 if (counter->state != PERF_COUNTER_STATE_ERROR)
640 counter->state = PERF_COUNTER_STATE_OFF;
641 }
572 642
573 hw_perf_restore(perf_flags); 643 hw_perf_restore(perf_flags);
574 644
@@ -607,7 +677,7 @@ int perf_counter_task_enable(void)
607 perf_flags = hw_perf_save_disable(); 677 perf_flags = hw_perf_save_disable();
608 678
609 list_for_each_entry(counter, &ctx->counter_list, list_entry) { 679 list_for_each_entry(counter, &ctx->counter_list, list_entry) {
610 if (counter->state != PERF_COUNTER_STATE_OFF) 680 if (counter->state > PERF_COUNTER_STATE_OFF)
611 continue; 681 continue;
612 counter->state = PERF_COUNTER_STATE_INACTIVE; 682 counter->state = PERF_COUNTER_STATE_INACTIVE;
613 counter->hw_event.disabled = 0; 683 counter->hw_event.disabled = 0;
@@ -849,6 +919,14 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
849 if (count != sizeof(cntval)) 919 if (count != sizeof(cntval))
850 return -EINVAL; 920 return -EINVAL;
851 921
922 /*
923 * Return end-of-file for a read on a counter that is in
924 * error state (i.e. because it was pinned but it couldn't be
925 * scheduled on to the CPU at some point).
926 */
927 if (counter->state == PERF_COUNTER_STATE_ERROR)
928 return 0;
929
852 mutex_lock(&counter->mutex); 930 mutex_lock(&counter->mutex);
853 cntval = perf_counter_read(counter); 931 cntval = perf_counter_read(counter);
854 mutex_unlock(&counter->mutex); 932 mutex_unlock(&counter->mutex);
@@ -884,7 +962,7 @@ perf_read_irq_data(struct perf_counter *counter,
884{ 962{
885 struct perf_data *irqdata, *usrdata; 963 struct perf_data *irqdata, *usrdata;
886 DECLARE_WAITQUEUE(wait, current); 964 DECLARE_WAITQUEUE(wait, current);
887 ssize_t res; 965 ssize_t res, res2;
888 966
889 irqdata = counter->irqdata; 967 irqdata = counter->irqdata;
890 usrdata = counter->usrdata; 968 usrdata = counter->usrdata;
@@ -905,6 +983,9 @@ perf_read_irq_data(struct perf_counter *counter,
905 if (signal_pending(current)) 983 if (signal_pending(current))
906 break; 984 break;
907 985
986 if (counter->state == PERF_COUNTER_STATE_ERROR)
987 break;
988
908 spin_unlock_irq(&counter->waitq.lock); 989 spin_unlock_irq(&counter->waitq.lock);
909 schedule(); 990 schedule();
910 spin_lock_irq(&counter->waitq.lock); 991 spin_lock_irq(&counter->waitq.lock);
@@ -913,7 +994,8 @@ perf_read_irq_data(struct perf_counter *counter,
913 __set_current_state(TASK_RUNNING); 994 __set_current_state(TASK_RUNNING);
914 spin_unlock_irq(&counter->waitq.lock); 995 spin_unlock_irq(&counter->waitq.lock);
915 996
916 if (usrdata->len + irqdata->len < count) 997 if (usrdata->len + irqdata->len < count &&
998 counter->state != PERF_COUNTER_STATE_ERROR)
917 return -ERESTARTSYS; 999 return -ERESTARTSYS;
918read_pending: 1000read_pending:
919 mutex_lock(&counter->mutex); 1001 mutex_lock(&counter->mutex);
@@ -925,11 +1007,12 @@ read_pending:
925 1007
926 /* Switch irq buffer: */ 1008 /* Switch irq buffer: */
927 usrdata = perf_switch_irq_data(counter); 1009 usrdata = perf_switch_irq_data(counter);
928 if (perf_copy_usrdata(usrdata, buf + res, count - res) < 0) { 1010 res2 = perf_copy_usrdata(usrdata, buf + res, count - res);
1011 if (res2 < 0) {
929 if (!res) 1012 if (!res)
930 res = -EFAULT; 1013 res = -EFAULT;
931 } else { 1014 } else {
932 res = count; 1015 res += res2;
933 } 1016 }
934out: 1017out:
935 mutex_unlock(&counter->mutex); 1018 mutex_unlock(&counter->mutex);
@@ -1348,6 +1431,11 @@ sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr __user,
1348 */ 1431 */
1349 if (group_leader->ctx != ctx) 1432 if (group_leader->ctx != ctx)
1350 goto err_put_context; 1433 goto err_put_context;
1434 /*
1435 * Only a group leader can be exclusive or pinned
1436 */
1437 if (hw_event.exclusive || hw_event.pinned)
1438 goto err_put_context;
1351 } 1439 }
1352 1440
1353 ret = -EINVAL; 1441 ret = -EINVAL;
@@ -1473,13 +1561,7 @@ __perf_counter_exit_task(struct task_struct *child,
1473 1561
1474 cpuctx = &__get_cpu_var(perf_cpu_context); 1562 cpuctx = &__get_cpu_var(perf_cpu_context);
1475 1563
1476 if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) { 1564 counter_sched_out(child_counter, cpuctx, child_ctx);
1477 child_counter->state = PERF_COUNTER_STATE_INACTIVE;
1478 child_counter->hw_ops->disable(child_counter);
1479 cpuctx->active_oncpu--;
1480 child_ctx->nr_active--;
1481 child_counter->oncpu = -1;
1482 }
1483 1565
1484 list_del_init(&child_counter->list_entry); 1566 list_del_init(&child_counter->list_entry);
1485 1567