aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/perf_counter.h21
-rw-r--r--kernel/perf_counter.c455
2 files changed, 415 insertions, 61 deletions
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 7ab8e5f96f5b..33ba9fe0a781 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -14,6 +14,7 @@
14#define _LINUX_PERF_COUNTER_H 14#define _LINUX_PERF_COUNTER_H
15 15
16#include <asm/atomic.h> 16#include <asm/atomic.h>
17#include <asm/ioctl.h>
17 18
18#ifdef CONFIG_PERF_COUNTERS 19#ifdef CONFIG_PERF_COUNTERS
19# include <asm/perf_counter.h> 20# include <asm/perf_counter.h>
@@ -95,6 +96,12 @@ struct perf_counter_hw_event {
95}; 96};
96 97
97/* 98/*
99 * Ioctls that can be done on a perf counter fd:
100 */
101#define PERF_COUNTER_IOC_ENABLE _IO('$', 0)
102#define PERF_COUNTER_IOC_DISABLE _IO('$', 1)
103
104/*
98 * Kernel-internal data types: 105 * Kernel-internal data types:
99 */ 106 */
100 107
@@ -173,8 +180,10 @@ struct perf_counter {
173 struct file *filp; 180 struct file *filp;
174 181
175 struct perf_counter *parent; 182 struct perf_counter *parent;
183 struct list_head child_list;
184
176 /* 185 /*
177 * Protect attach/detach: 186 * Protect attach/detach and child_list:
178 */ 187 */
179 struct mutex mutex; 188 struct mutex mutex;
180 189
@@ -199,13 +208,21 @@ struct perf_counter {
199struct perf_counter_context { 208struct perf_counter_context {
200#ifdef CONFIG_PERF_COUNTERS 209#ifdef CONFIG_PERF_COUNTERS
201 /* 210 /*
202 * Protect the list of counters: 211 * Protect the states of the counters in the list,
212 * nr_active, and the list:
203 */ 213 */
204 spinlock_t lock; 214 spinlock_t lock;
215 /*
216 * Protect the list of counters. Locking either mutex or lock
217 * is sufficient to ensure the list doesn't change; to change
218 * the list you need to lock both the mutex and the spinlock.
219 */
220 struct mutex mutex;
205 221
206 struct list_head counter_list; 222 struct list_head counter_list;
207 int nr_counters; 223 int nr_counters;
208 int nr_active; 224 int nr_active;
225 int is_active;
209 struct task_struct *task; 226 struct task_struct *task;
210#endif 227#endif
211}; 228};
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index faf671b29566..1ac18daa424f 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -112,6 +112,28 @@ counter_sched_out(struct perf_counter *counter,
112 cpuctx->exclusive = 0; 112 cpuctx->exclusive = 0;
113} 113}
114 114
115static void
116group_sched_out(struct perf_counter *group_counter,
117 struct perf_cpu_context *cpuctx,
118 struct perf_counter_context *ctx)
119{
120 struct perf_counter *counter;
121
122 if (group_counter->state != PERF_COUNTER_STATE_ACTIVE)
123 return;
124
125 counter_sched_out(group_counter, cpuctx, ctx);
126
127 /*
128 * Schedule out siblings (if any):
129 */
130 list_for_each_entry(counter, &group_counter->sibling_list, list_entry)
131 counter_sched_out(counter, cpuctx, ctx);
132
133 if (group_counter->hw_event.exclusive)
134 cpuctx->exclusive = 0;
135}
136
115/* 137/*
116 * Cross CPU call to remove a performance counter 138 * Cross CPU call to remove a performance counter
117 * 139 *
@@ -168,7 +190,7 @@ static void __perf_counter_remove_from_context(void *info)
168/* 190/*
169 * Remove the counter from a task's (or a CPU's) list of counters. 191 * Remove the counter from a task's (or a CPU's) list of counters.
170 * 192 *
171 * Must be called with counter->mutex held. 193 * Must be called with counter->mutex and ctx->mutex held.
172 * 194 *
173 * CPU counters are removed with a smp call. For task counters we only 195 * CPU counters are removed with a smp call. For task counters we only
174 * call when the task is on a CPU. 196 * call when the task is on a CPU.
@@ -215,6 +237,99 @@ retry:
215 spin_unlock_irq(&ctx->lock); 237 spin_unlock_irq(&ctx->lock);
216} 238}
217 239
240/*
241 * Cross CPU call to disable a performance counter
242 */
243static void __perf_counter_disable(void *info)
244{
245 struct perf_counter *counter = info;
246 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
247 struct perf_counter_context *ctx = counter->ctx;
248 unsigned long flags;
249
250 /*
251 * If this is a per-task counter, need to check whether this
252 * counter's task is the current task on this cpu.
253 */
254 if (ctx->task && cpuctx->task_ctx != ctx)
255 return;
256
257 curr_rq_lock_irq_save(&flags);
258 spin_lock(&ctx->lock);
259
260 /*
261 * If the counter is on, turn it off.
262 * If it is in error state, leave it in error state.
263 */
264 if (counter->state >= PERF_COUNTER_STATE_INACTIVE) {
265 if (counter == counter->group_leader)
266 group_sched_out(counter, cpuctx, ctx);
267 else
268 counter_sched_out(counter, cpuctx, ctx);
269 counter->state = PERF_COUNTER_STATE_OFF;
270 }
271
272 spin_unlock(&ctx->lock);
273 curr_rq_unlock_irq_restore(&flags);
274}
275
276/*
277 * Disable a counter.
278 */
279static void perf_counter_disable(struct perf_counter *counter)
280{
281 struct perf_counter_context *ctx = counter->ctx;
282 struct task_struct *task = ctx->task;
283
284 if (!task) {
285 /*
286 * Disable the counter on the cpu that it's on
287 */
288 smp_call_function_single(counter->cpu, __perf_counter_disable,
289 counter, 1);
290 return;
291 }
292
293 retry:
294 task_oncpu_function_call(task, __perf_counter_disable, counter);
295
296 spin_lock_irq(&ctx->lock);
297 /*
298 * If the counter is still active, we need to retry the cross-call.
299 */
300 if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
301 spin_unlock_irq(&ctx->lock);
302 goto retry;
303 }
304
305 /*
306 * Since we have the lock this context can't be scheduled
307 * in, so we can change the state safely.
308 */
309 if (counter->state == PERF_COUNTER_STATE_INACTIVE)
310 counter->state = PERF_COUNTER_STATE_OFF;
311
312 spin_unlock_irq(&ctx->lock);
313}
314
315/*
316 * Disable a counter and all its children.
317 */
318static void perf_counter_disable_family(struct perf_counter *counter)
319{
320 struct perf_counter *child;
321
322 perf_counter_disable(counter);
323
324 /*
325 * Lock the mutex to protect the list of children
326 */
327 mutex_lock(&counter->mutex);
328 list_for_each_entry(child, &counter->child_list, child_list)
329 perf_counter_disable(child);
330 mutex_unlock(&counter->mutex);
331}
332
218static int 333static int
219counter_sched_in(struct perf_counter *counter, 334counter_sched_in(struct perf_counter *counter,
220 struct perf_cpu_context *cpuctx, 335 struct perf_cpu_context *cpuctx,
@@ -302,6 +417,7 @@ static void __perf_install_in_context(void *info)
302 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); 417 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
303 struct perf_counter *counter = info; 418 struct perf_counter *counter = info;
304 struct perf_counter_context *ctx = counter->ctx; 419 struct perf_counter_context *ctx = counter->ctx;
420 struct perf_counter *leader = counter->group_leader;
305 int cpu = smp_processor_id(); 421 int cpu = smp_processor_id();
306 unsigned long flags; 422 unsigned long flags;
307 u64 perf_flags; 423 u64 perf_flags;
@@ -328,22 +444,39 @@ static void __perf_install_in_context(void *info)
328 ctx->nr_counters++; 444 ctx->nr_counters++;
329 445
330 /* 446 /*
447 * Don't put the counter on if it is disabled or if
448 * it is in a group and the group isn't on.
449 */
450 if (counter->state != PERF_COUNTER_STATE_INACTIVE ||
451 (leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE))
452 goto unlock;
453
454 /*
331 * An exclusive counter can't go on if there are already active 455 * An exclusive counter can't go on if there are already active
332 * hardware counters, and no hardware counter can go on if there 456 * hardware counters, and no hardware counter can go on if there
333 * is already an exclusive counter on. 457 * is already an exclusive counter on.
334 */ 458 */
335 if (counter->state == PERF_COUNTER_STATE_INACTIVE && 459 if (!group_can_go_on(counter, cpuctx, 1))
336 !group_can_go_on(counter, cpuctx, 1))
337 err = -EEXIST; 460 err = -EEXIST;
338 else 461 else
339 err = counter_sched_in(counter, cpuctx, ctx, cpu); 462 err = counter_sched_in(counter, cpuctx, ctx, cpu);
340 463
341 if (err && counter->hw_event.pinned) 464 if (err) {
342 counter->state = PERF_COUNTER_STATE_ERROR; 465 /*
466 * This counter couldn't go on. If it is in a group
467 * then we have to pull the whole group off.
468 * If the counter group is pinned then put it in error state.
469 */
470 if (leader != counter)
471 group_sched_out(leader, cpuctx, ctx);
472 if (leader->hw_event.pinned)
473 leader->state = PERF_COUNTER_STATE_ERROR;
474 }
343 475
344 if (!err && !ctx->task && cpuctx->max_pertask) 476 if (!err && !ctx->task && cpuctx->max_pertask)
345 cpuctx->max_pertask--; 477 cpuctx->max_pertask--;
346 478
479 unlock:
347 hw_perf_restore(perf_flags); 480 hw_perf_restore(perf_flags);
348 481
349 spin_unlock(&ctx->lock); 482 spin_unlock(&ctx->lock);
@@ -359,6 +492,8 @@ static void __perf_install_in_context(void *info)
359 * If the counter is attached to a task which is on a CPU we use a smp 492 * If the counter is attached to a task which is on a CPU we use a smp
360 * call to enable it in the task context. The task might have been 493 * call to enable it in the task context. The task might have been
361 * scheduled away, but we check this in the smp call again. 494 * scheduled away, but we check this in the smp call again.
495 *
496 * Must be called with ctx->mutex held.
362 */ 497 */
363static void 498static void
364perf_install_in_context(struct perf_counter_context *ctx, 499perf_install_in_context(struct perf_counter_context *ctx,
@@ -387,7 +522,7 @@ retry:
387 /* 522 /*
388 * we need to retry the smp call. 523 * we need to retry the smp call.
389 */ 524 */
390 if (ctx->nr_active && list_empty(&counter->list_entry)) { 525 if (ctx->is_active && list_empty(&counter->list_entry)) {
391 spin_unlock_irq(&ctx->lock); 526 spin_unlock_irq(&ctx->lock);
392 goto retry; 527 goto retry;
393 } 528 }
@@ -404,26 +539,131 @@ retry:
404 spin_unlock_irq(&ctx->lock); 539 spin_unlock_irq(&ctx->lock);
405} 540}
406 541
407static void 542/*
408group_sched_out(struct perf_counter *group_counter, 543 * Cross CPU call to enable a performance counter
409 struct perf_cpu_context *cpuctx, 544 */
410 struct perf_counter_context *ctx) 545static void __perf_counter_enable(void *info)
411{ 546{
412 struct perf_counter *counter; 547 struct perf_counter *counter = info;
548 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
549 struct perf_counter_context *ctx = counter->ctx;
550 struct perf_counter *leader = counter->group_leader;
551 unsigned long flags;
552 int err;
413 553
414 if (group_counter->state != PERF_COUNTER_STATE_ACTIVE) 554 /*
555 * If this is a per-task counter, need to check whether this
556 * counter's task is the current task on this cpu.
557 */
558 if (ctx->task && cpuctx->task_ctx != ctx)
415 return; 559 return;
416 560
417 counter_sched_out(group_counter, cpuctx, ctx); 561 curr_rq_lock_irq_save(&flags);
562 spin_lock(&ctx->lock);
563
564 if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
565 goto unlock;
566 counter->state = PERF_COUNTER_STATE_INACTIVE;
418 567
419 /* 568 /*
420 * Schedule out siblings (if any): 569 * If the counter is in a group and isn't the group leader,
570 * then don't put it on unless the group is on.
421 */ 571 */
422 list_for_each_entry(counter, &group_counter->sibling_list, list_entry) 572 if (leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE)
423 counter_sched_out(counter, cpuctx, ctx); 573 goto unlock;
424 574
425 if (group_counter->hw_event.exclusive) 575 if (!group_can_go_on(counter, cpuctx, 1))
426 cpuctx->exclusive = 0; 576 err = -EEXIST;
577 else
578 err = counter_sched_in(counter, cpuctx, ctx,
579 smp_processor_id());
580
581 if (err) {
582 /*
583 * If this counter can't go on and it's part of a
584 * group, then the whole group has to come off.
585 */
586 if (leader != counter)
587 group_sched_out(leader, cpuctx, ctx);
588 if (leader->hw_event.pinned)
589 leader->state = PERF_COUNTER_STATE_ERROR;
590 }
591
592 unlock:
593 spin_unlock(&ctx->lock);
594 curr_rq_unlock_irq_restore(&flags);
595}
596
597/*
598 * Enable a counter.
599 */
600static void perf_counter_enable(struct perf_counter *counter)
601{
602 struct perf_counter_context *ctx = counter->ctx;
603 struct task_struct *task = ctx->task;
604
605 if (!task) {
606 /*
607 * Enable the counter on the cpu that it's on
608 */
609 smp_call_function_single(counter->cpu, __perf_counter_enable,
610 counter, 1);
611 return;
612 }
613
614 spin_lock_irq(&ctx->lock);
615 if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
616 goto out;
617
618 /*
619 * If the counter is in error state, clear that first.
620 * That way, if we see the counter in error state below, we
621 * know that it has gone back into error state, as distinct
622 * from the task having been scheduled away before the
623 * cross-call arrived.
624 */
625 if (counter->state == PERF_COUNTER_STATE_ERROR)
626 counter->state = PERF_COUNTER_STATE_OFF;
627
628 retry:
629 spin_unlock_irq(&ctx->lock);
630 task_oncpu_function_call(task, __perf_counter_enable, counter);
631
632 spin_lock_irq(&ctx->lock);
633
634 /*
635 * If the context is active and the counter is still off,
636 * we need to retry the cross-call.
637 */
638 if (ctx->is_active && counter->state == PERF_COUNTER_STATE_OFF)
639 goto retry;
640
641 /*
642 * Since we have the lock this context can't be scheduled
643 * in, so we can change the state safely.
644 */
645 if (counter->state == PERF_COUNTER_STATE_OFF)
646 counter->state = PERF_COUNTER_STATE_INACTIVE;
647 out:
648 spin_unlock_irq(&ctx->lock);
649}
650
651/*
652 * Enable a counter and all its children.
653 */
654static void perf_counter_enable_family(struct perf_counter *counter)
655{
656 struct perf_counter *child;
657
658 perf_counter_enable(counter);
659
660 /*
661 * Lock the mutex to protect the list of children
662 */
663 mutex_lock(&counter->mutex);
664 list_for_each_entry(child, &counter->child_list, child_list)
665 perf_counter_enable(child);
666 mutex_unlock(&counter->mutex);
427} 667}
428 668
429void __perf_counter_sched_out(struct perf_counter_context *ctx, 669void __perf_counter_sched_out(struct perf_counter_context *ctx,
@@ -432,16 +672,18 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx,
432 struct perf_counter *counter; 672 struct perf_counter *counter;
433 u64 flags; 673 u64 flags;
434 674
675 spin_lock(&ctx->lock);
676 ctx->is_active = 0;
435 if (likely(!ctx->nr_counters)) 677 if (likely(!ctx->nr_counters))
436 return; 678 goto out;
437 679
438 spin_lock(&ctx->lock);
439 flags = hw_perf_save_disable(); 680 flags = hw_perf_save_disable();
440 if (ctx->nr_active) { 681 if (ctx->nr_active) {
441 list_for_each_entry(counter, &ctx->counter_list, list_entry) 682 list_for_each_entry(counter, &ctx->counter_list, list_entry)
442 group_sched_out(counter, cpuctx, ctx); 683 group_sched_out(counter, cpuctx, ctx);
443 } 684 }
444 hw_perf_restore(flags); 685 hw_perf_restore(flags);
686 out:
445 spin_unlock(&ctx->lock); 687 spin_unlock(&ctx->lock);
446} 688}
447 689
@@ -528,10 +770,11 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
528 u64 flags; 770 u64 flags;
529 int can_add_hw = 1; 771 int can_add_hw = 1;
530 772
773 spin_lock(&ctx->lock);
774 ctx->is_active = 1;
531 if (likely(!ctx->nr_counters)) 775 if (likely(!ctx->nr_counters))
532 return; 776 goto out;
533 777
534 spin_lock(&ctx->lock);
535 flags = hw_perf_save_disable(); 778 flags = hw_perf_save_disable();
536 779
537 /* 780 /*
@@ -578,6 +821,7 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
578 } 821 }
579 } 822 }
580 hw_perf_restore(flags); 823 hw_perf_restore(flags);
824 out:
581 spin_unlock(&ctx->lock); 825 spin_unlock(&ctx->lock);
582} 826}
583 827
@@ -896,12 +1140,14 @@ static int perf_release(struct inode *inode, struct file *file)
896 1140
897 file->private_data = NULL; 1141 file->private_data = NULL;
898 1142
1143 mutex_lock(&ctx->mutex);
899 mutex_lock(&counter->mutex); 1144 mutex_lock(&counter->mutex);
900 1145
901 perf_counter_remove_from_context(counter); 1146 perf_counter_remove_from_context(counter);
902 put_context(ctx); 1147 put_context(ctx);
903 1148
904 mutex_unlock(&counter->mutex); 1149 mutex_unlock(&counter->mutex);
1150 mutex_unlock(&ctx->mutex);
905 1151
906 kfree(counter); 1152 kfree(counter);
907 1153
@@ -1053,10 +1299,30 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)
1053 return events; 1299 return events;
1054} 1300}
1055 1301
1302static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1303{
1304 struct perf_counter *counter = file->private_data;
1305 int err = 0;
1306
1307 switch (cmd) {
1308 case PERF_COUNTER_IOC_ENABLE:
1309 perf_counter_enable_family(counter);
1310 break;
1311 case PERF_COUNTER_IOC_DISABLE:
1312 perf_counter_disable_family(counter);
1313 break;
1314 default:
1315 err = -ENOTTY;
1316 }
1317 return err;
1318}
1319
1056static const struct file_operations perf_fops = { 1320static const struct file_operations perf_fops = {
1057 .release = perf_release, 1321 .release = perf_release,
1058 .read = perf_read, 1322 .read = perf_read,
1059 .poll = perf_poll, 1323 .poll = perf_poll,
1324 .unlocked_ioctl = perf_ioctl,
1325 .compat_ioctl = perf_ioctl,
1060}; 1326};
1061 1327
1062static int cpu_clock_perf_counter_enable(struct perf_counter *counter) 1328static int cpu_clock_perf_counter_enable(struct perf_counter *counter)
@@ -1348,6 +1614,8 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
1348 INIT_LIST_HEAD(&counter->sibling_list); 1614 INIT_LIST_HEAD(&counter->sibling_list);
1349 init_waitqueue_head(&counter->waitq); 1615 init_waitqueue_head(&counter->waitq);
1350 1616
1617 INIT_LIST_HEAD(&counter->child_list);
1618
1351 counter->irqdata = &counter->data[0]; 1619 counter->irqdata = &counter->data[0];
1352 counter->usrdata = &counter->data[1]; 1620 counter->usrdata = &counter->data[1];
1353 counter->cpu = cpu; 1621 counter->cpu = cpu;
@@ -1452,7 +1720,9 @@ sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr __user,
1452 goto err_free_put_context; 1720 goto err_free_put_context;
1453 1721
1454 counter->filp = counter_file; 1722 counter->filp = counter_file;
1723 mutex_lock(&ctx->mutex);
1455 perf_install_in_context(ctx, counter, cpu); 1724 perf_install_in_context(ctx, counter, cpu);
1725 mutex_unlock(&ctx->mutex);
1456 1726
1457 fput_light(counter_file, fput_needed2); 1727 fput_light(counter_file, fput_needed2);
1458 1728
@@ -1479,6 +1749,7 @@ __perf_counter_init_context(struct perf_counter_context *ctx,
1479{ 1749{
1480 memset(ctx, 0, sizeof(*ctx)); 1750 memset(ctx, 0, sizeof(*ctx));
1481 spin_lock_init(&ctx->lock); 1751 spin_lock_init(&ctx->lock);
1752 mutex_init(&ctx->mutex);
1482 INIT_LIST_HEAD(&ctx->counter_list); 1753 INIT_LIST_HEAD(&ctx->counter_list);
1483 ctx->task = task; 1754 ctx->task = task;
1484} 1755}
@@ -1486,20 +1757,30 @@ __perf_counter_init_context(struct perf_counter_context *ctx,
1486/* 1757/*
1487 * inherit a counter from parent task to child task: 1758 * inherit a counter from parent task to child task:
1488 */ 1759 */
1489static int 1760static struct perf_counter *
1490inherit_counter(struct perf_counter *parent_counter, 1761inherit_counter(struct perf_counter *parent_counter,
1491 struct task_struct *parent, 1762 struct task_struct *parent,
1492 struct perf_counter_context *parent_ctx, 1763 struct perf_counter_context *parent_ctx,
1493 struct task_struct *child, 1764 struct task_struct *child,
1765 struct perf_counter *group_leader,
1494 struct perf_counter_context *child_ctx) 1766 struct perf_counter_context *child_ctx)
1495{ 1767{
1496 struct perf_counter *child_counter; 1768 struct perf_counter *child_counter;
1497 1769
1770 /*
1771 * Instead of creating recursive hierarchies of counters,
1772 * we link inherited counters back to the original parent,
1773 * which has a filp for sure, which we use as the reference
1774 * count:
1775 */
1776 if (parent_counter->parent)
1777 parent_counter = parent_counter->parent;
1778
1498 child_counter = perf_counter_alloc(&parent_counter->hw_event, 1779 child_counter = perf_counter_alloc(&parent_counter->hw_event,
1499 parent_counter->cpu, NULL, 1780 parent_counter->cpu, group_leader,
1500 GFP_ATOMIC); 1781 GFP_KERNEL);
1501 if (!child_counter) 1782 if (!child_counter)
1502 return -ENOMEM; 1783 return NULL;
1503 1784
1504 /* 1785 /*
1505 * Link it up in the child's context: 1786 * Link it up in the child's context:
@@ -1523,16 +1804,82 @@ inherit_counter(struct perf_counter *parent_counter,
1523 */ 1804 */
1524 atomic_long_inc(&parent_counter->filp->f_count); 1805 atomic_long_inc(&parent_counter->filp->f_count);
1525 1806
1807 /*
1808 * Link this into the parent counter's child list
1809 */
1810 mutex_lock(&parent_counter->mutex);
1811 list_add_tail(&child_counter->child_list, &parent_counter->child_list);
1812
1813 /*
1814 * Make the child state follow the state of the parent counter,
1815 * not its hw_event.disabled bit. We hold the parent's mutex,
1816 * so we won't race with perf_counter_{en,dis}able_family.
1817 */
1818 if (parent_counter->state >= PERF_COUNTER_STATE_INACTIVE)
1819 child_counter->state = PERF_COUNTER_STATE_INACTIVE;
1820 else
1821 child_counter->state = PERF_COUNTER_STATE_OFF;
1822
1823 mutex_unlock(&parent_counter->mutex);
1824
1825 return child_counter;
1826}
1827
1828static int inherit_group(struct perf_counter *parent_counter,
1829 struct task_struct *parent,
1830 struct perf_counter_context *parent_ctx,
1831 struct task_struct *child,
1832 struct perf_counter_context *child_ctx)
1833{
1834 struct perf_counter *leader;
1835 struct perf_counter *sub;
1836
1837 leader = inherit_counter(parent_counter, parent, parent_ctx,
1838 child, NULL, child_ctx);
1839 if (!leader)
1840 return -ENOMEM;
1841 list_for_each_entry(sub, &parent_counter->sibling_list, list_entry) {
1842 if (!inherit_counter(sub, parent, parent_ctx,
1843 child, leader, child_ctx))
1844 return -ENOMEM;
1845 }
1526 return 0; 1846 return 0;
1527} 1847}
1528 1848
1849static void sync_child_counter(struct perf_counter *child_counter,
1850 struct perf_counter *parent_counter)
1851{
1852 u64 parent_val, child_val;
1853
1854 parent_val = atomic64_read(&parent_counter->count);
1855 child_val = atomic64_read(&child_counter->count);
1856
1857 /*
1858 * Add back the child's count to the parent's count:
1859 */
1860 atomic64_add(child_val, &parent_counter->count);
1861
1862 /*
1863 * Remove this counter from the parent's list
1864 */
1865 mutex_lock(&parent_counter->mutex);
1866 list_del_init(&child_counter->child_list);
1867 mutex_unlock(&parent_counter->mutex);
1868
1869 /*
1870 * Release the parent counter, if this was the last
1871 * reference to it.
1872 */
1873 fput(parent_counter->filp);
1874}
1875
1529static void 1876static void
1530__perf_counter_exit_task(struct task_struct *child, 1877__perf_counter_exit_task(struct task_struct *child,
1531 struct perf_counter *child_counter, 1878 struct perf_counter *child_counter,
1532 struct perf_counter_context *child_ctx) 1879 struct perf_counter_context *child_ctx)
1533{ 1880{
1534 struct perf_counter *parent_counter; 1881 struct perf_counter *parent_counter;
1535 u64 parent_val, child_val; 1882 struct perf_counter *sub, *tmp;
1536 1883
1537 /* 1884 /*
1538 * If we do not self-reap then we have to wait for the 1885 * If we do not self-reap then we have to wait for the
@@ -1561,7 +1908,7 @@ __perf_counter_exit_task(struct task_struct *child,
1561 1908
1562 cpuctx = &__get_cpu_var(perf_cpu_context); 1909 cpuctx = &__get_cpu_var(perf_cpu_context);
1563 1910
1564 counter_sched_out(child_counter, cpuctx, child_ctx); 1911 group_sched_out(child_counter, cpuctx, child_ctx);
1565 1912
1566 list_del_init(&child_counter->list_entry); 1913 list_del_init(&child_counter->list_entry);
1567 1914
@@ -1577,26 +1924,23 @@ __perf_counter_exit_task(struct task_struct *child,
1577 * that are still around due to the child reference. These 1924 * that are still around due to the child reference. These
1578 * counters need to be zapped - but otherwise linger. 1925 * counters need to be zapped - but otherwise linger.
1579 */ 1926 */
1580 if (!parent_counter) 1927 if (parent_counter) {
1581 return; 1928 sync_child_counter(child_counter, parent_counter);
1582 1929 list_for_each_entry_safe(sub, tmp, &child_counter->sibling_list,
1583 parent_val = atomic64_read(&parent_counter->count); 1930 list_entry) {
1584 child_val = atomic64_read(&child_counter->count); 1931 if (sub->parent)
1585 1932 sync_child_counter(sub, sub->parent);
1586 /* 1933 kfree(sub);
1587 * Add back the child's count to the parent's count: 1934 }
1588 */ 1935 }
1589 atomic64_add(child_val, &parent_counter->count);
1590
1591 fput(parent_counter->filp);
1592 1936
1593 kfree(child_counter); 1937 kfree(child_counter);
1594} 1938}
1595 1939
1596/* 1940/*
1597 * When a child task exist, feed back counter values to parent counters. 1941 * When a child task exits, feed back counter values to parent counters.
1598 * 1942 *
1599 * Note: we are running in child context, but the PID is not hashed 1943 * Note: we may be running in child context, but the PID is not hashed
1600 * anymore so new counters will not be added. 1944 * anymore so new counters will not be added.
1601 */ 1945 */
1602void perf_counter_exit_task(struct task_struct *child) 1946void perf_counter_exit_task(struct task_struct *child)
@@ -1620,9 +1964,8 @@ void perf_counter_exit_task(struct task_struct *child)
1620void perf_counter_init_task(struct task_struct *child) 1964void perf_counter_init_task(struct task_struct *child)
1621{ 1965{
1622 struct perf_counter_context *child_ctx, *parent_ctx; 1966 struct perf_counter_context *child_ctx, *parent_ctx;
1623 struct perf_counter *counter, *parent_counter; 1967 struct perf_counter *counter;
1624 struct task_struct *parent = current; 1968 struct task_struct *parent = current;
1625 unsigned long flags;
1626 1969
1627 child_ctx = &child->perf_counter_ctx; 1970 child_ctx = &child->perf_counter_ctx;
1628 parent_ctx = &parent->perf_counter_ctx; 1971 parent_ctx = &parent->perf_counter_ctx;
@@ -1641,32 +1984,22 @@ void perf_counter_init_task(struct task_struct *child)
1641 * Lock the parent list. No need to lock the child - not PID 1984 * Lock the parent list. No need to lock the child - not PID
1642 * hashed yet and not running, so nobody can access it. 1985 * hashed yet and not running, so nobody can access it.
1643 */ 1986 */
1644 spin_lock_irqsave(&parent_ctx->lock, flags); 1987 mutex_lock(&parent_ctx->mutex);
1645 1988
1646 /* 1989 /*
1647 * We dont have to disable NMIs - we are only looking at 1990 * We dont have to disable NMIs - we are only looking at
1648 * the list, not manipulating it: 1991 * the list, not manipulating it:
1649 */ 1992 */
1650 list_for_each_entry(counter, &parent_ctx->counter_list, list_entry) { 1993 list_for_each_entry(counter, &parent_ctx->counter_list, list_entry) {
1651 if (!counter->hw_event.inherit || counter->group_leader != counter) 1994 if (!counter->hw_event.inherit)
1652 continue; 1995 continue;
1653 1996
1654 /* 1997 if (inherit_group(counter, parent,
1655 * Instead of creating recursive hierarchies of counters,
1656 * we link inheritd counters back to the original parent,
1657 * which has a filp for sure, which we use as the reference
1658 * count:
1659 */
1660 parent_counter = counter;
1661 if (counter->parent)
1662 parent_counter = counter->parent;
1663
1664 if (inherit_counter(parent_counter, parent,
1665 parent_ctx, child, child_ctx)) 1998 parent_ctx, child, child_ctx))
1666 break; 1999 break;
1667 } 2000 }
1668 2001
1669 spin_unlock_irqrestore(&parent_ctx->lock, flags); 2002 mutex_unlock(&parent_ctx->mutex);
1670} 2003}
1671 2004
1672static void __cpuinit perf_counter_init_cpu(int cpu) 2005static void __cpuinit perf_counter_init_cpu(int cpu)
@@ -1692,11 +2025,15 @@ static void __perf_counter_exit_cpu(void *info)
1692 2025
1693 list_for_each_entry_safe(counter, tmp, &ctx->counter_list, list_entry) 2026 list_for_each_entry_safe(counter, tmp, &ctx->counter_list, list_entry)
1694 __perf_counter_remove_from_context(counter); 2027 __perf_counter_remove_from_context(counter);
1695
1696} 2028}
1697static void perf_counter_exit_cpu(int cpu) 2029static void perf_counter_exit_cpu(int cpu)
1698{ 2030{
2031 struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
2032 struct perf_counter_context *ctx = &cpuctx->ctx;
2033
2034 mutex_lock(&ctx->mutex);
1699 smp_call_function_single(cpu, __perf_counter_exit_cpu, NULL, 1); 2035 smp_call_function_single(cpu, __perf_counter_exit_cpu, NULL, 1);
2036 mutex_unlock(&ctx->mutex);
1700} 2037}
1701#else 2038#else
1702static inline void perf_counter_exit_cpu(int cpu) { } 2039static inline void perf_counter_exit_cpu(int cpu) { }