aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/powerpc/kernel/perf_counter.c10
-rw-r--r--include/linux/perf_counter.h15
-rw-r--r--kernel/perf_counter.c226
3 files changed, 169 insertions, 82 deletions
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 85ad25923c2c..5b0211348c73 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -36,14 +36,6 @@ void perf_counter_print_debug(void)
36} 36}
37 37
38/* 38/*
39 * Return 1 for a software counter, 0 for a hardware counter
40 */
41static inline int is_software_counter(struct perf_counter *counter)
42{
43 return !counter->hw_event.raw && counter->hw_event.type < 0;
44}
45
46/*
47 * Read one performance monitor counter (PMC). 39 * Read one performance monitor counter (PMC).
48 */ 40 */
49static unsigned long read_pmc(int idx) 41static unsigned long read_pmc(int idx)
@@ -443,6 +435,7 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader,
443 */ 435 */
444 for (i = n0; i < n0 + n; ++i) 436 for (i = n0; i < n0 + n; ++i)
445 cpuhw->counter[i]->hw.config = cpuhw->events[i]; 437 cpuhw->counter[i]->hw.config = cpuhw->events[i];
438 cpuctx->active_oncpu += n;
446 n = 1; 439 n = 1;
447 counter_sched_in(group_leader, cpu); 440 counter_sched_in(group_leader, cpu);
448 list_for_each_entry(sub, &group_leader->sibling_list, list_entry) { 441 list_for_each_entry(sub, &group_leader->sibling_list, list_entry) {
@@ -451,7 +444,6 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader,
451 ++n; 444 ++n;
452 } 445 }
453 } 446 }
454 cpuctx->active_oncpu += n;
455 ctx->nr_active += n; 447 ctx->nr_active += n;
456 448
457 return 1; 449 return 1;
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index b21d1ea4c054..7ab8e5f96f5b 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -86,7 +86,10 @@ struct perf_counter_hw_event {
86 nmi : 1, /* NMI sampling */ 86 nmi : 1, /* NMI sampling */
87 raw : 1, /* raw event type */ 87 raw : 1, /* raw event type */
88 inherit : 1, /* children inherit it */ 88 inherit : 1, /* children inherit it */
89 __reserved_1 : 28; 89 pinned : 1, /* must always be on PMU */
90 exclusive : 1, /* only counter on PMU */
91
92 __reserved_1 : 26;
90 93
91 u64 __reserved_2; 94 u64 __reserved_2;
92}; 95};
@@ -141,6 +144,7 @@ struct hw_perf_counter_ops {
141 * enum perf_counter_active_state - the states of a counter 144 * enum perf_counter_active_state - the states of a counter
142 */ 145 */
143enum perf_counter_active_state { 146enum perf_counter_active_state {
147 PERF_COUNTER_STATE_ERROR = -2,
144 PERF_COUNTER_STATE_OFF = -1, 148 PERF_COUNTER_STATE_OFF = -1,
145 PERF_COUNTER_STATE_INACTIVE = 0, 149 PERF_COUNTER_STATE_INACTIVE = 0,
146 PERF_COUNTER_STATE_ACTIVE = 1, 150 PERF_COUNTER_STATE_ACTIVE = 1,
@@ -214,6 +218,7 @@ struct perf_cpu_context {
214 struct perf_counter_context *task_ctx; 218 struct perf_counter_context *task_ctx;
215 int active_oncpu; 219 int active_oncpu;
216 int max_pertask; 220 int max_pertask;
221 int exclusive;
217}; 222};
218 223
219/* 224/*
@@ -240,6 +245,14 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
240 struct perf_cpu_context *cpuctx, 245 struct perf_cpu_context *cpuctx,
241 struct perf_counter_context *ctx, int cpu); 246 struct perf_counter_context *ctx, int cpu);
242 247
248/*
249 * Return 1 for a software counter, 0 for a hardware counter
250 */
251static inline int is_software_counter(struct perf_counter *counter)
252{
253 return !counter->hw_event.raw && counter->hw_event.type < 0;
254}
255
243#else 256#else
244static inline void 257static inline void
245perf_counter_task_sched_in(struct task_struct *task, int cpu) { } 258perf_counter_task_sched_in(struct task_struct *task, int cpu) { }
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 52f2f526248e..faf671b29566 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -93,6 +93,25 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
93 } 93 }
94} 94}
95 95
96static void
97counter_sched_out(struct perf_counter *counter,
98 struct perf_cpu_context *cpuctx,
99 struct perf_counter_context *ctx)
100{
101 if (counter->state != PERF_COUNTER_STATE_ACTIVE)
102 return;
103
104 counter->state = PERF_COUNTER_STATE_INACTIVE;
105 counter->hw_ops->disable(counter);
106 counter->oncpu = -1;
107
108 if (!is_software_counter(counter))
109 cpuctx->active_oncpu--;
110 ctx->nr_active--;
111 if (counter->hw_event.exclusive || !cpuctx->active_oncpu)
112 cpuctx->exclusive = 0;
113}
114
96/* 115/*
97 * Cross CPU call to remove a performance counter 116 * Cross CPU call to remove a performance counter
98 * 117 *
@@ -118,14 +137,9 @@ static void __perf_counter_remove_from_context(void *info)
118 curr_rq_lock_irq_save(&flags); 137 curr_rq_lock_irq_save(&flags);
119 spin_lock(&ctx->lock); 138 spin_lock(&ctx->lock);
120 139
121 if (counter->state == PERF_COUNTER_STATE_ACTIVE) { 140 counter_sched_out(counter, cpuctx, ctx);
122 counter->state = PERF_COUNTER_STATE_INACTIVE; 141
123 counter->hw_ops->disable(counter); 142 counter->task = NULL;
124 ctx->nr_active--;
125 cpuctx->active_oncpu--;
126 counter->task = NULL;
127 counter->oncpu = -1;
128 }
129 ctx->nr_counters--; 143 ctx->nr_counters--;
130 144
131 /* 145 /*
@@ -207,7 +221,7 @@ counter_sched_in(struct perf_counter *counter,
207 struct perf_counter_context *ctx, 221 struct perf_counter_context *ctx,
208 int cpu) 222 int cpu)
209{ 223{
210 if (counter->state == PERF_COUNTER_STATE_OFF) 224 if (counter->state <= PERF_COUNTER_STATE_OFF)
211 return 0; 225 return 0;
212 226
213 counter->state = PERF_COUNTER_STATE_ACTIVE; 227 counter->state = PERF_COUNTER_STATE_ACTIVE;
@@ -223,13 +237,64 @@ counter_sched_in(struct perf_counter *counter,
223 return -EAGAIN; 237 return -EAGAIN;
224 } 238 }
225 239
226 cpuctx->active_oncpu++; 240 if (!is_software_counter(counter))
241 cpuctx->active_oncpu++;
227 ctx->nr_active++; 242 ctx->nr_active++;
228 243
244 if (counter->hw_event.exclusive)
245 cpuctx->exclusive = 1;
246
229 return 0; 247 return 0;
230} 248}
231 249
232/* 250/*
251 * Return 1 for a group consisting entirely of software counters,
252 * 0 if the group contains any hardware counters.
253 */
254static int is_software_only_group(struct perf_counter *leader)
255{
256 struct perf_counter *counter;
257
258 if (!is_software_counter(leader))
259 return 0;
260 list_for_each_entry(counter, &leader->sibling_list, list_entry)
261 if (!is_software_counter(counter))
262 return 0;
263 return 1;
264}
265
266/*
267 * Work out whether we can put this counter group on the CPU now.
268 */
269static int group_can_go_on(struct perf_counter *counter,
270 struct perf_cpu_context *cpuctx,
271 int can_add_hw)
272{
273 /*
274 * Groups consisting entirely of software counters can always go on.
275 */
276 if (is_software_only_group(counter))
277 return 1;
278 /*
279 * If an exclusive group is already on, no other hardware
280 * counters can go on.
281 */
282 if (cpuctx->exclusive)
283 return 0;
284 /*
285 * If this group is exclusive and there are already
286 * counters on the CPU, it can't go on.
287 */
288 if (counter->hw_event.exclusive && cpuctx->active_oncpu)
289 return 0;
290 /*
291 * Otherwise, try to add it if all previous groups were able
292 * to go on.
293 */
294 return can_add_hw;
295}
296
297/*
233 * Cross CPU call to install and enable a performance counter 298 * Cross CPU call to install and enable a performance counter
234 */ 299 */
235static void __perf_install_in_context(void *info) 300static void __perf_install_in_context(void *info)
@@ -240,6 +305,7 @@ static void __perf_install_in_context(void *info)
240 int cpu = smp_processor_id(); 305 int cpu = smp_processor_id();
241 unsigned long flags; 306 unsigned long flags;
242 u64 perf_flags; 307 u64 perf_flags;
308 int err;
243 309
244 /* 310 /*
245 * If this is a task context, we need to check whether it is 311 * If this is a task context, we need to check whether it is
@@ -261,9 +327,21 @@ static void __perf_install_in_context(void *info)
261 list_add_counter(counter, ctx); 327 list_add_counter(counter, ctx);
262 ctx->nr_counters++; 328 ctx->nr_counters++;
263 329
264 counter_sched_in(counter, cpuctx, ctx, cpu); 330 /*
331 * An exclusive counter can't go on if there are already active
332 * hardware counters, and no hardware counter can go on if there
333 * is already an exclusive counter on.
334 */
335 if (counter->state == PERF_COUNTER_STATE_INACTIVE &&
336 !group_can_go_on(counter, cpuctx, 1))
337 err = -EEXIST;
338 else
339 err = counter_sched_in(counter, cpuctx, ctx, cpu);
340
341 if (err && counter->hw_event.pinned)
342 counter->state = PERF_COUNTER_STATE_ERROR;
265 343
266 if (!ctx->task && cpuctx->max_pertask) 344 if (!err && !ctx->task && cpuctx->max_pertask)
267 cpuctx->max_pertask--; 345 cpuctx->max_pertask--;
268 346
269 hw_perf_restore(perf_flags); 347 hw_perf_restore(perf_flags);
@@ -327,22 +405,6 @@ retry:
327} 405}
328 406
329static void 407static void
330counter_sched_out(struct perf_counter *counter,
331 struct perf_cpu_context *cpuctx,
332 struct perf_counter_context *ctx)
333{
334 if (counter->state != PERF_COUNTER_STATE_ACTIVE)
335 return;
336
337 counter->state = PERF_COUNTER_STATE_INACTIVE;
338 counter->hw_ops->disable(counter);
339 counter->oncpu = -1;
340
341 cpuctx->active_oncpu--;
342 ctx->nr_active--;
343}
344
345static void
346group_sched_out(struct perf_counter *group_counter, 408group_sched_out(struct perf_counter *group_counter,
347 struct perf_cpu_context *cpuctx, 409 struct perf_cpu_context *cpuctx,
348 struct perf_counter_context *ctx) 410 struct perf_counter_context *ctx)
@@ -359,6 +421,9 @@ group_sched_out(struct perf_counter *group_counter,
359 */ 421 */
360 list_for_each_entry(counter, &group_counter->sibling_list, list_entry) 422 list_for_each_entry(counter, &group_counter->sibling_list, list_entry)
361 counter_sched_out(counter, cpuctx, ctx); 423 counter_sched_out(counter, cpuctx, ctx);
424
425 if (group_counter->hw_event.exclusive)
426 cpuctx->exclusive = 0;
362} 427}
363 428
364void __perf_counter_sched_out(struct perf_counter_context *ctx, 429void __perf_counter_sched_out(struct perf_counter_context *ctx,
@@ -455,30 +520,6 @@ group_error:
455 return -EAGAIN; 520 return -EAGAIN;
456} 521}
457 522
458/*
459 * Return 1 for a software counter, 0 for a hardware counter
460 */
461static inline int is_software_counter(struct perf_counter *counter)
462{
463 return !counter->hw_event.raw && counter->hw_event.type < 0;
464}
465
466/*
467 * Return 1 for a group consisting entirely of software counters,
468 * 0 if the group contains any hardware counters.
469 */
470static int is_software_only_group(struct perf_counter *leader)
471{
472 struct perf_counter *counter;
473
474 if (!is_software_counter(leader))
475 return 0;
476 list_for_each_entry(counter, &leader->sibling_list, list_entry)
477 if (!is_software_counter(counter))
478 return 0;
479 return 1;
480}
481
482static void 523static void
483__perf_counter_sched_in(struct perf_counter_context *ctx, 524__perf_counter_sched_in(struct perf_counter_context *ctx,
484 struct perf_cpu_context *cpuctx, int cpu) 525 struct perf_cpu_context *cpuctx, int cpu)
@@ -492,22 +533,49 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
492 533
493 spin_lock(&ctx->lock); 534 spin_lock(&ctx->lock);
494 flags = hw_perf_save_disable(); 535 flags = hw_perf_save_disable();
536
537 /*
538 * First go through the list and put on any pinned groups
539 * in order to give them the best chance of going on.
540 */
541 list_for_each_entry(counter, &ctx->counter_list, list_entry) {
542 if (counter->state <= PERF_COUNTER_STATE_OFF ||
543 !counter->hw_event.pinned)
544 continue;
545 if (counter->cpu != -1 && counter->cpu != cpu)
546 continue;
547
548 if (group_can_go_on(counter, cpuctx, 1))
549 group_sched_in(counter, cpuctx, ctx, cpu);
550
551 /*
552 * If this pinned group hasn't been scheduled,
553 * put it in error state.
554 */
555 if (counter->state == PERF_COUNTER_STATE_INACTIVE)
556 counter->state = PERF_COUNTER_STATE_ERROR;
557 }
558
495 list_for_each_entry(counter, &ctx->counter_list, list_entry) { 559 list_for_each_entry(counter, &ctx->counter_list, list_entry) {
496 /* 560 /*
561 * Ignore counters in OFF or ERROR state, and
562 * ignore pinned counters since we did them already.
563 */
564 if (counter->state <= PERF_COUNTER_STATE_OFF ||
565 counter->hw_event.pinned)
566 continue;
567
568 /*
497 * Listen to the 'cpu' scheduling filter constraint 569 * Listen to the 'cpu' scheduling filter constraint
498 * of counters: 570 * of counters:
499 */ 571 */
500 if (counter->cpu != -1 && counter->cpu != cpu) 572 if (counter->cpu != -1 && counter->cpu != cpu)
501 continue; 573 continue;
502 574
503 /* 575 if (group_can_go_on(counter, cpuctx, can_add_hw)) {
504 * If we scheduled in a group atomically and exclusively,
505 * or if this group can't go on, don't add any more
506 * hardware counters.
507 */
508 if (can_add_hw || is_software_only_group(counter))
509 if (group_sched_in(counter, cpuctx, ctx, cpu)) 576 if (group_sched_in(counter, cpuctx, ctx, cpu))
510 can_add_hw = 0; 577 can_add_hw = 0;
578 }
511 } 579 }
512 hw_perf_restore(flags); 580 hw_perf_restore(flags);
513 spin_unlock(&ctx->lock); 581 spin_unlock(&ctx->lock);
@@ -567,8 +635,10 @@ int perf_counter_task_disable(void)
567 */ 635 */
568 perf_flags = hw_perf_save_disable(); 636 perf_flags = hw_perf_save_disable();
569 637
570 list_for_each_entry(counter, &ctx->counter_list, list_entry) 638 list_for_each_entry(counter, &ctx->counter_list, list_entry) {
571 counter->state = PERF_COUNTER_STATE_OFF; 639 if (counter->state != PERF_COUNTER_STATE_ERROR)
640 counter->state = PERF_COUNTER_STATE_OFF;
641 }
572 642
573 hw_perf_restore(perf_flags); 643 hw_perf_restore(perf_flags);
574 644
@@ -607,7 +677,7 @@ int perf_counter_task_enable(void)
607 perf_flags = hw_perf_save_disable(); 677 perf_flags = hw_perf_save_disable();
608 678
609 list_for_each_entry(counter, &ctx->counter_list, list_entry) { 679 list_for_each_entry(counter, &ctx->counter_list, list_entry) {
610 if (counter->state != PERF_COUNTER_STATE_OFF) 680 if (counter->state > PERF_COUNTER_STATE_OFF)
611 continue; 681 continue;
612 counter->state = PERF_COUNTER_STATE_INACTIVE; 682 counter->state = PERF_COUNTER_STATE_INACTIVE;
613 counter->hw_event.disabled = 0; 683 counter->hw_event.disabled = 0;
@@ -849,6 +919,14 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
849 if (count != sizeof(cntval)) 919 if (count != sizeof(cntval))
850 return -EINVAL; 920 return -EINVAL;
851 921
922 /*
923 * Return end-of-file for a read on a counter that is in
924 * error state (i.e. because it was pinned but it couldn't be
925 * scheduled on to the CPU at some point).
926 */
927 if (counter->state == PERF_COUNTER_STATE_ERROR)
928 return 0;
929
852 mutex_lock(&counter->mutex); 930 mutex_lock(&counter->mutex);
853 cntval = perf_counter_read(counter); 931 cntval = perf_counter_read(counter);
854 mutex_unlock(&counter->mutex); 932 mutex_unlock(&counter->mutex);
@@ -884,7 +962,7 @@ perf_read_irq_data(struct perf_counter *counter,
884{ 962{
885 struct perf_data *irqdata, *usrdata; 963 struct perf_data *irqdata, *usrdata;
886 DECLARE_WAITQUEUE(wait, current); 964 DECLARE_WAITQUEUE(wait, current);
887 ssize_t res; 965 ssize_t res, res2;
888 966
889 irqdata = counter->irqdata; 967 irqdata = counter->irqdata;
890 usrdata = counter->usrdata; 968 usrdata = counter->usrdata;
@@ -905,6 +983,9 @@ perf_read_irq_data(struct perf_counter *counter,
905 if (signal_pending(current)) 983 if (signal_pending(current))
906 break; 984 break;
907 985
986 if (counter->state == PERF_COUNTER_STATE_ERROR)
987 break;
988
908 spin_unlock_irq(&counter->waitq.lock); 989 spin_unlock_irq(&counter->waitq.lock);
909 schedule(); 990 schedule();
910 spin_lock_irq(&counter->waitq.lock); 991 spin_lock_irq(&counter->waitq.lock);
@@ -913,7 +994,8 @@ perf_read_irq_data(struct perf_counter *counter,
913 __set_current_state(TASK_RUNNING); 994 __set_current_state(TASK_RUNNING);
914 spin_unlock_irq(&counter->waitq.lock); 995 spin_unlock_irq(&counter->waitq.lock);
915 996
916 if (usrdata->len + irqdata->len < count) 997 if (usrdata->len + irqdata->len < count &&
998 counter->state != PERF_COUNTER_STATE_ERROR)
917 return -ERESTARTSYS; 999 return -ERESTARTSYS;
918read_pending: 1000read_pending:
919 mutex_lock(&counter->mutex); 1001 mutex_lock(&counter->mutex);
@@ -925,11 +1007,12 @@ read_pending:
925 1007
926 /* Switch irq buffer: */ 1008 /* Switch irq buffer: */
927 usrdata = perf_switch_irq_data(counter); 1009 usrdata = perf_switch_irq_data(counter);
928 if (perf_copy_usrdata(usrdata, buf + res, count - res) < 0) { 1010 res2 = perf_copy_usrdata(usrdata, buf + res, count - res);
1011 if (res2 < 0) {
929 if (!res) 1012 if (!res)
930 res = -EFAULT; 1013 res = -EFAULT;
931 } else { 1014 } else {
932 res = count; 1015 res += res2;
933 } 1016 }
934out: 1017out:
935 mutex_unlock(&counter->mutex); 1018 mutex_unlock(&counter->mutex);
@@ -1348,6 +1431,11 @@ sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr __user,
1348 */ 1431 */
1349 if (group_leader->ctx != ctx) 1432 if (group_leader->ctx != ctx)
1350 goto err_put_context; 1433 goto err_put_context;
1434 /*
1435 * Only a group leader can be exclusive or pinned
1436 */
1437 if (hw_event.exclusive || hw_event.pinned)
1438 goto err_put_context;
1351 } 1439 }
1352 1440
1353 ret = -EINVAL; 1441 ret = -EINVAL;
@@ -1473,13 +1561,7 @@ __perf_counter_exit_task(struct task_struct *child,
1473 1561
1474 cpuctx = &__get_cpu_var(perf_cpu_context); 1562 cpuctx = &__get_cpu_var(perf_cpu_context);
1475 1563
1476 if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) { 1564 counter_sched_out(child_counter, cpuctx, child_ctx);
1477 child_counter->state = PERF_COUNTER_STATE_INACTIVE;
1478 child_counter->hw_ops->disable(child_counter);
1479 cpuctx->active_oncpu--;
1480 child_ctx->nr_active--;
1481 child_counter->oncpu = -1;
1482 }
1483 1565
1484 list_del_init(&child_counter->list_entry); 1566 list_del_init(&child_counter->list_entry);
1485 1567