diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-12-11 02:38:42 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-12-11 09:45:49 -0500 |
commit | 04289bb9891882202d7e961c4c04d2376930e9f9 (patch) | |
tree | 13340847915efc809a62bf91b3cd45e0e0416deb /kernel/perf_counter.c | |
parent | 9f66a3810fe0d4100972db84290f3ae4a4d77025 (diff) |
perf counters: add support for group counters
Impact: add group counters
This patch adds the "counter groups" abstraction.
Groups of counters behave much like normal 'single' counters, with a
few semantic and behavioral extensions on top of that.
A counter group is created by creating a new counter with the open()
syscall's group-leader group_fd file descriptor parameter pointing
to another, already existing counter.
Groups of counters are scheduled in and out in one atomic group, and
they are also roundrobin-scheduled atomically.
Counters that are member of a group can also record events with an
(atomic) extended timestamp that extends to all members of the group,
if the record type is set to PERF_RECORD_GROUP.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/perf_counter.c')
-rw-r--r-- | kernel/perf_counter.c | 282 |
1 files changed, 216 insertions, 66 deletions
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 0d323ceda3a4..fa59fe8c02d5 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/fs.h> | 10 | #include <linux/fs.h> |
11 | #include <linux/cpu.h> | 11 | #include <linux/cpu.h> |
12 | #include <linux/smp.h> | 12 | #include <linux/smp.h> |
13 | #include <linux/file.h> | ||
13 | #include <linux/poll.h> | 14 | #include <linux/poll.h> |
14 | #include <linux/sysfs.h> | 15 | #include <linux/sysfs.h> |
15 | #include <linux/ptrace.h> | 16 | #include <linux/ptrace.h> |
@@ -55,7 +56,7 @@ void __weak hw_perf_counter_setup(void) { } | |||
55 | * Read the cached counter in counter safe against cross CPU / NMI | 56 | * Read the cached counter in counter safe against cross CPU / NMI |
56 | * modifications. 64 bit version - no complications. | 57 | * modifications. 64 bit version - no complications. |
57 | */ | 58 | */ |
58 | static inline u64 perf_read_counter_safe(struct perf_counter *counter) | 59 | static inline u64 perf_counter_read_safe(struct perf_counter *counter) |
59 | { | 60 | { |
60 | return (u64) atomic64_read(&counter->count); | 61 | return (u64) atomic64_read(&counter->count); |
61 | } | 62 | } |
@@ -66,7 +67,7 @@ static inline u64 perf_read_counter_safe(struct perf_counter *counter) | |||
66 | * Read the cached counter in counter safe against cross CPU / NMI | 67 | * Read the cached counter in counter safe against cross CPU / NMI |
67 | * modifications. 32 bit version. | 68 | * modifications. 32 bit version. |
68 | */ | 69 | */ |
69 | static u64 perf_read_counter_safe(struct perf_counter *counter) | 70 | static u64 perf_counter_read_safe(struct perf_counter *counter) |
70 | { | 71 | { |
71 | u32 cntl, cnth; | 72 | u32 cntl, cnth; |
72 | 73 | ||
@@ -83,13 +84,55 @@ static u64 perf_read_counter_safe(struct perf_counter *counter) | |||
83 | 84 | ||
84 | #endif | 85 | #endif |
85 | 86 | ||
87 | static void | ||
88 | list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx) | ||
89 | { | ||
90 | struct perf_counter *group_leader = counter->group_leader; | ||
91 | |||
92 | /* | ||
93 | * Depending on whether it is a standalone or sibling counter, | ||
94 | * add it straight to the context's counter list, or to the group | ||
95 | * leader's sibling list: | ||
96 | */ | ||
97 | if (counter->group_leader == counter) | ||
98 | list_add_tail(&counter->list_entry, &ctx->counter_list); | ||
99 | else | ||
100 | list_add_tail(&counter->list_entry, &group_leader->sibling_list); | ||
101 | } | ||
102 | |||
103 | static void | ||
104 | list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx) | ||
105 | { | ||
106 | struct perf_counter *sibling, *tmp; | ||
107 | |||
108 | list_del_init(&counter->list_entry); | ||
109 | |||
110 | if (list_empty(&counter->sibling_list)) | ||
111 | return; | ||
112 | |||
113 | /* | ||
114 | * If this was a group counter with sibling counters then | ||
115 | * upgrade the siblings to singleton counters by adding them | ||
116 | * to the context list directly: | ||
117 | */ | ||
118 | list_for_each_entry_safe(sibling, tmp, | ||
119 | &counter->sibling_list, list_entry) { | ||
120 | |||
121 | list_del_init(&sibling->list_entry); | ||
122 | list_add_tail(&sibling->list_entry, &ctx->counter_list); | ||
123 | WARN_ON_ONCE(!sibling->group_leader); | ||
124 | WARN_ON_ONCE(sibling->group_leader == sibling); | ||
125 | sibling->group_leader = sibling; | ||
126 | } | ||
127 | } | ||
128 | |||
86 | /* | 129 | /* |
87 | * Cross CPU call to remove a performance counter | 130 | * Cross CPU call to remove a performance counter |
88 | * | 131 | * |
89 | * We disable the counter on the hardware level first. After that we | 132 | * We disable the counter on the hardware level first. After that we |
90 | * remove it from the context list. | 133 | * remove it from the context list. |
91 | */ | 134 | */ |
92 | static void __perf_remove_from_context(void *info) | 135 | static void __perf_counter_remove_from_context(void *info) |
93 | { | 136 | { |
94 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | 137 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); |
95 | struct perf_counter *counter = info; | 138 | struct perf_counter *counter = info; |
@@ -119,7 +162,7 @@ static void __perf_remove_from_context(void *info) | |||
119 | * counters on a global level. NOP for non NMI based counters. | 162 | * counters on a global level. NOP for non NMI based counters. |
120 | */ | 163 | */ |
121 | hw_perf_disable_all(); | 164 | hw_perf_disable_all(); |
122 | list_del_init(&counter->list); | 165 | list_del_counter(counter, ctx); |
123 | hw_perf_enable_all(); | 166 | hw_perf_enable_all(); |
124 | 167 | ||
125 | if (!ctx->task) { | 168 | if (!ctx->task) { |
@@ -144,7 +187,7 @@ static void __perf_remove_from_context(void *info) | |||
144 | * CPU counters are removed with a smp call. For task counters we only | 187 | * CPU counters are removed with a smp call. For task counters we only |
145 | * call when the task is on a CPU. | 188 | * call when the task is on a CPU. |
146 | */ | 189 | */ |
147 | static void perf_remove_from_context(struct perf_counter *counter) | 190 | static void perf_counter_remove_from_context(struct perf_counter *counter) |
148 | { | 191 | { |
149 | struct perf_counter_context *ctx = counter->ctx; | 192 | struct perf_counter_context *ctx = counter->ctx; |
150 | struct task_struct *task = ctx->task; | 193 | struct task_struct *task = ctx->task; |
@@ -155,32 +198,32 @@ static void perf_remove_from_context(struct perf_counter *counter) | |||
155 | * the removal is always sucessful. | 198 | * the removal is always sucessful. |
156 | */ | 199 | */ |
157 | smp_call_function_single(counter->cpu, | 200 | smp_call_function_single(counter->cpu, |
158 | __perf_remove_from_context, | 201 | __perf_counter_remove_from_context, |
159 | counter, 1); | 202 | counter, 1); |
160 | return; | 203 | return; |
161 | } | 204 | } |
162 | 205 | ||
163 | retry: | 206 | retry: |
164 | task_oncpu_function_call(task, __perf_remove_from_context, | 207 | task_oncpu_function_call(task, __perf_counter_remove_from_context, |
165 | counter); | 208 | counter); |
166 | 209 | ||
167 | spin_lock_irq(&ctx->lock); | 210 | spin_lock_irq(&ctx->lock); |
168 | /* | 211 | /* |
169 | * If the context is active we need to retry the smp call. | 212 | * If the context is active we need to retry the smp call. |
170 | */ | 213 | */ |
171 | if (ctx->nr_active && !list_empty(&counter->list)) { | 214 | if (ctx->nr_active && !list_empty(&counter->list_entry)) { |
172 | spin_unlock_irq(&ctx->lock); | 215 | spin_unlock_irq(&ctx->lock); |
173 | goto retry; | 216 | goto retry; |
174 | } | 217 | } |
175 | 218 | ||
176 | /* | 219 | /* |
177 | * The lock prevents that this context is scheduled in so we | 220 | * The lock prevents that this context is scheduled in so we |
178 | * can remove the counter safely, if it the call above did not | 221 | * can remove the counter safely, if the call above did not |
179 | * succeed. | 222 | * succeed. |
180 | */ | 223 | */ |
181 | if (!list_empty(&counter->list)) { | 224 | if (!list_empty(&counter->list_entry)) { |
182 | ctx->nr_counters--; | 225 | ctx->nr_counters--; |
183 | list_del_init(&counter->list); | 226 | list_del_counter(counter, ctx); |
184 | counter->task = NULL; | 227 | counter->task = NULL; |
185 | } | 228 | } |
186 | spin_unlock_irq(&ctx->lock); | 229 | spin_unlock_irq(&ctx->lock); |
@@ -211,7 +254,7 @@ static void __perf_install_in_context(void *info) | |||
211 | * counters on a global level. NOP for non NMI based counters. | 254 | * counters on a global level. NOP for non NMI based counters. |
212 | */ | 255 | */ |
213 | hw_perf_disable_all(); | 256 | hw_perf_disable_all(); |
214 | list_add_tail(&counter->list, &ctx->counters); | 257 | list_add_counter(counter, ctx); |
215 | hw_perf_enable_all(); | 258 | hw_perf_enable_all(); |
216 | 259 | ||
217 | ctx->nr_counters++; | 260 | ctx->nr_counters++; |
@@ -268,7 +311,7 @@ retry: | |||
268 | * If the context is active and the counter has not been added | 311 | * If the context is active and the counter has not been added |
269 | * we need to retry the smp call. | 312 | * we need to retry the smp call. |
270 | */ | 313 | */ |
271 | if (ctx->nr_active && list_empty(&counter->list)) { | 314 | if (ctx->nr_active && list_empty(&counter->list_entry)) { |
272 | spin_unlock_irq(&ctx->lock); | 315 | spin_unlock_irq(&ctx->lock); |
273 | goto retry; | 316 | goto retry; |
274 | } | 317 | } |
@@ -278,13 +321,45 @@ retry: | |||
278 | * can add the counter safely, if it the call above did not | 321 | * can add the counter safely, if it the call above did not |
279 | * succeed. | 322 | * succeed. |
280 | */ | 323 | */ |
281 | if (list_empty(&counter->list)) { | 324 | if (list_empty(&counter->list_entry)) { |
282 | list_add_tail(&counter->list, &ctx->counters); | 325 | list_add_counter(counter, ctx); |
283 | ctx->nr_counters++; | 326 | ctx->nr_counters++; |
284 | } | 327 | } |
285 | spin_unlock_irq(&ctx->lock); | 328 | spin_unlock_irq(&ctx->lock); |
286 | } | 329 | } |
287 | 330 | ||
331 | static void | ||
332 | counter_sched_out(struct perf_counter *counter, | ||
333 | struct perf_cpu_context *cpuctx, | ||
334 | struct perf_counter_context *ctx) | ||
335 | { | ||
336 | if (!counter->active) | ||
337 | return; | ||
338 | |||
339 | hw_perf_counter_disable(counter); | ||
340 | counter->active = 0; | ||
341 | counter->oncpu = -1; | ||
342 | |||
343 | cpuctx->active_oncpu--; | ||
344 | ctx->nr_active--; | ||
345 | } | ||
346 | |||
347 | static void | ||
348 | group_sched_out(struct perf_counter *group_counter, | ||
349 | struct perf_cpu_context *cpuctx, | ||
350 | struct perf_counter_context *ctx) | ||
351 | { | ||
352 | struct perf_counter *counter; | ||
353 | |||
354 | counter_sched_out(group_counter, cpuctx, ctx); | ||
355 | |||
356 | /* | ||
357 | * Schedule out siblings (if any): | ||
358 | */ | ||
359 | list_for_each_entry(counter, &group_counter->sibling_list, list_entry) | ||
360 | counter_sched_out(counter, cpuctx, ctx); | ||
361 | } | ||
362 | |||
288 | /* | 363 | /* |
289 | * Called from scheduler to remove the counters of the current task, | 364 | * Called from scheduler to remove the counters of the current task, |
290 | * with interrupts disabled. | 365 | * with interrupts disabled. |
@@ -306,21 +381,48 @@ void perf_counter_task_sched_out(struct task_struct *task, int cpu) | |||
306 | return; | 381 | return; |
307 | 382 | ||
308 | spin_lock(&ctx->lock); | 383 | spin_lock(&ctx->lock); |
309 | list_for_each_entry(counter, &ctx->counters, list) { | 384 | if (ctx->nr_active) { |
310 | if (!ctx->nr_active) | 385 | list_for_each_entry(counter, &ctx->counter_list, list_entry) |
311 | break; | 386 | group_sched_out(counter, cpuctx, ctx); |
312 | if (counter->active) { | ||
313 | hw_perf_counter_disable(counter); | ||
314 | counter->active = 0; | ||
315 | counter->oncpu = -1; | ||
316 | ctx->nr_active--; | ||
317 | cpuctx->active_oncpu--; | ||
318 | } | ||
319 | } | 387 | } |
320 | spin_unlock(&ctx->lock); | 388 | spin_unlock(&ctx->lock); |
321 | cpuctx->task_ctx = NULL; | 389 | cpuctx->task_ctx = NULL; |
322 | } | 390 | } |
323 | 391 | ||
392 | static void | ||
393 | counter_sched_in(struct perf_counter *counter, | ||
394 | struct perf_cpu_context *cpuctx, | ||
395 | struct perf_counter_context *ctx, | ||
396 | int cpu) | ||
397 | { | ||
398 | if (!counter->active) | ||
399 | return; | ||
400 | |||
401 | hw_perf_counter_enable(counter); | ||
402 | counter->active = 1; | ||
403 | counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */ | ||
404 | |||
405 | cpuctx->active_oncpu++; | ||
406 | ctx->nr_active++; | ||
407 | } | ||
408 | |||
409 | static void | ||
410 | group_sched_in(struct perf_counter *group_counter, | ||
411 | struct perf_cpu_context *cpuctx, | ||
412 | struct perf_counter_context *ctx, | ||
413 | int cpu) | ||
414 | { | ||
415 | struct perf_counter *counter; | ||
416 | |||
417 | counter_sched_in(group_counter, cpuctx, ctx, cpu); | ||
418 | |||
419 | /* | ||
420 | * Schedule in siblings as one group (if any): | ||
421 | */ | ||
422 | list_for_each_entry(counter, &group_counter->sibling_list, list_entry) | ||
423 | counter_sched_in(counter, cpuctx, ctx, cpu); | ||
424 | } | ||
425 | |||
324 | /* | 426 | /* |
325 | * Called from scheduler to add the counters of the current task | 427 | * Called from scheduler to add the counters of the current task |
326 | * with interrupts disabled. | 428 | * with interrupts disabled. |
@@ -342,19 +444,21 @@ void perf_counter_task_sched_in(struct task_struct *task, int cpu) | |||
342 | return; | 444 | return; |
343 | 445 | ||
344 | spin_lock(&ctx->lock); | 446 | spin_lock(&ctx->lock); |
345 | list_for_each_entry(counter, &ctx->counters, list) { | 447 | list_for_each_entry(counter, &ctx->counter_list, list_entry) { |
346 | if (ctx->nr_active == cpuctx->max_pertask) | 448 | if (ctx->nr_active == cpuctx->max_pertask) |
347 | break; | 449 | break; |
450 | |||
451 | /* | ||
452 | * Listen to the 'cpu' scheduling filter constraint | ||
453 | * of counters: | ||
454 | */ | ||
348 | if (counter->cpu != -1 && counter->cpu != cpu) | 455 | if (counter->cpu != -1 && counter->cpu != cpu) |
349 | continue; | 456 | continue; |
350 | 457 | ||
351 | hw_perf_counter_enable(counter); | 458 | group_sched_in(counter, cpuctx, ctx, cpu); |
352 | counter->active = 1; | ||
353 | counter->oncpu = cpu; | ||
354 | ctx->nr_active++; | ||
355 | cpuctx->active_oncpu++; | ||
356 | } | 459 | } |
357 | spin_unlock(&ctx->lock); | 460 | spin_unlock(&ctx->lock); |
461 | |||
358 | cpuctx->task_ctx = ctx; | 462 | cpuctx->task_ctx = ctx; |
359 | } | 463 | } |
360 | 464 | ||
@@ -371,12 +475,12 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu) | |||
371 | spin_lock(&ctx->lock); | 475 | spin_lock(&ctx->lock); |
372 | 476 | ||
373 | /* | 477 | /* |
374 | * Rotate the first entry last: | 478 | * Rotate the first entry last (works just fine for group counters too): |
375 | */ | 479 | */ |
376 | hw_perf_disable_all(); | 480 | hw_perf_disable_all(); |
377 | list_for_each_entry(counter, &ctx->counters, list) { | 481 | list_for_each_entry(counter, &ctx->counter_list, list_entry) { |
378 | list_del(&counter->list); | 482 | list_del(&counter->list_entry); |
379 | list_add_tail(&counter->list, &ctx->counters); | 483 | list_add_tail(&counter->list_entry, &ctx->counter_list); |
380 | break; | 484 | break; |
381 | } | 485 | } |
382 | hw_perf_enable_all(); | 486 | hw_perf_enable_all(); |
@@ -387,16 +491,23 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu) | |||
387 | } | 491 | } |
388 | 492 | ||
389 | /* | 493 | /* |
494 | * Initialize the perf_counter context in a task_struct: | ||
495 | */ | ||
496 | static void | ||
497 | __perf_counter_init_context(struct perf_counter_context *ctx, | ||
498 | struct task_struct *task) | ||
499 | { | ||
500 | spin_lock_init(&ctx->lock); | ||
501 | INIT_LIST_HEAD(&ctx->counter_list); | ||
502 | ctx->nr_counters = 0; | ||
503 | ctx->task = task; | ||
504 | } | ||
505 | /* | ||
390 | * Initialize the perf_counter context in task_struct | 506 | * Initialize the perf_counter context in task_struct |
391 | */ | 507 | */ |
392 | void perf_counter_init_task(struct task_struct *task) | 508 | void perf_counter_init_task(struct task_struct *task) |
393 | { | 509 | { |
394 | struct perf_counter_context *ctx = &task->perf_counter_ctx; | 510 | __perf_counter_init_context(&task->perf_counter_ctx, task); |
395 | |||
396 | spin_lock_init(&ctx->lock); | ||
397 | INIT_LIST_HEAD(&ctx->counters); | ||
398 | ctx->nr_counters = 0; | ||
399 | ctx->task = task; | ||
400 | } | 511 | } |
401 | 512 | ||
402 | /* | 513 | /* |
@@ -407,7 +518,7 @@ static void __hw_perf_counter_read(void *info) | |||
407 | hw_perf_counter_read(info); | 518 | hw_perf_counter_read(info); |
408 | } | 519 | } |
409 | 520 | ||
410 | static u64 perf_read_counter(struct perf_counter *counter) | 521 | static u64 perf_counter_read(struct perf_counter *counter) |
411 | { | 522 | { |
412 | /* | 523 | /* |
413 | * If counter is enabled and currently active on a CPU, update the | 524 | * If counter is enabled and currently active on a CPU, update the |
@@ -418,7 +529,7 @@ static u64 perf_read_counter(struct perf_counter *counter) | |||
418 | __hw_perf_counter_read, counter, 1); | 529 | __hw_perf_counter_read, counter, 1); |
419 | } | 530 | } |
420 | 531 | ||
421 | return perf_read_counter_safe(counter); | 532 | return perf_counter_read_safe(counter); |
422 | } | 533 | } |
423 | 534 | ||
424 | /* | 535 | /* |
@@ -555,7 +666,7 @@ static int perf_release(struct inode *inode, struct file *file) | |||
555 | 666 | ||
556 | mutex_lock(&counter->mutex); | 667 | mutex_lock(&counter->mutex); |
557 | 668 | ||
558 | perf_remove_from_context(counter); | 669 | perf_counter_remove_from_context(counter); |
559 | put_context(ctx); | 670 | put_context(ctx); |
560 | 671 | ||
561 | mutex_unlock(&counter->mutex); | 672 | mutex_unlock(&counter->mutex); |
@@ -577,7 +688,7 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) | |||
577 | return -EINVAL; | 688 | return -EINVAL; |
578 | 689 | ||
579 | mutex_lock(&counter->mutex); | 690 | mutex_lock(&counter->mutex); |
580 | cntval = perf_read_counter(counter); | 691 | cntval = perf_counter_read(counter); |
581 | mutex_unlock(&counter->mutex); | 692 | mutex_unlock(&counter->mutex); |
582 | 693 | ||
583 | return put_user(cntval, (u64 __user *) buf) ? -EFAULT : sizeof(cntval); | 694 | return put_user(cntval, (u64 __user *) buf) ? -EFAULT : sizeof(cntval); |
@@ -707,15 +818,25 @@ static const struct file_operations perf_fops = { | |||
707 | * Allocate and initialize a counter structure | 818 | * Allocate and initialize a counter structure |
708 | */ | 819 | */ |
709 | static struct perf_counter * | 820 | static struct perf_counter * |
710 | perf_counter_alloc(struct perf_counter_hw_event *hw_event, int cpu) | 821 | perf_counter_alloc(struct perf_counter_hw_event *hw_event, |
822 | int cpu, | ||
823 | struct perf_counter *group_leader) | ||
711 | { | 824 | { |
712 | struct perf_counter *counter = kzalloc(sizeof(*counter), GFP_KERNEL); | 825 | struct perf_counter *counter = kzalloc(sizeof(*counter), GFP_KERNEL); |
713 | 826 | ||
714 | if (!counter) | 827 | if (!counter) |
715 | return NULL; | 828 | return NULL; |
716 | 829 | ||
830 | /* | ||
831 | * Single counters are their own group leaders, with an | ||
832 | * empty sibling list: | ||
833 | */ | ||
834 | if (!group_leader) | ||
835 | group_leader = counter; | ||
836 | |||
717 | mutex_init(&counter->mutex); | 837 | mutex_init(&counter->mutex); |
718 | INIT_LIST_HEAD(&counter->list); | 838 | INIT_LIST_HEAD(&counter->list_entry); |
839 | INIT_LIST_HEAD(&counter->sibling_list); | ||
719 | init_waitqueue_head(&counter->waitq); | 840 | init_waitqueue_head(&counter->waitq); |
720 | 841 | ||
721 | counter->irqdata = &counter->data[0]; | 842 | counter->irqdata = &counter->data[0]; |
@@ -723,6 +844,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, int cpu) | |||
723 | counter->cpu = cpu; | 844 | counter->cpu = cpu; |
724 | counter->hw_event = *hw_event; | 845 | counter->hw_event = *hw_event; |
725 | counter->wakeup_pending = 0; | 846 | counter->wakeup_pending = 0; |
847 | counter->group_leader = group_leader; | ||
726 | 848 | ||
727 | return counter; | 849 | return counter; |
728 | } | 850 | } |
@@ -743,20 +865,45 @@ asmlinkage int sys_perf_counter_open( | |||
743 | int group_fd) | 865 | int group_fd) |
744 | 866 | ||
745 | { | 867 | { |
746 | struct perf_counter_context *ctx; | 868 | struct perf_counter *counter, *group_leader; |
747 | struct perf_counter_hw_event hw_event; | 869 | struct perf_counter_hw_event hw_event; |
748 | struct perf_counter *counter; | 870 | struct perf_counter_context *ctx; |
871 | struct file *group_file = NULL; | ||
872 | int fput_needed = 0; | ||
749 | int ret; | 873 | int ret; |
750 | 874 | ||
751 | if (copy_from_user(&hw_event, hw_event_uptr, sizeof(hw_event)) != 0) | 875 | if (copy_from_user(&hw_event, hw_event_uptr, sizeof(hw_event)) != 0) |
752 | return -EFAULT; | 876 | return -EFAULT; |
753 | 877 | ||
878 | /* | ||
879 | * Look up the group leader: | ||
880 | */ | ||
881 | group_leader = NULL; | ||
882 | if (group_fd != -1) { | ||
883 | ret = -EINVAL; | ||
884 | group_file = fget_light(group_fd, &fput_needed); | ||
885 | if (!group_file) | ||
886 | goto out_fput; | ||
887 | if (group_file->f_op != &perf_fops) | ||
888 | goto out_fput; | ||
889 | |||
890 | group_leader = group_file->private_data; | ||
891 | /* | ||
892 | * Do not allow a recursive hierarchy: | ||
893 | */ | ||
894 | if (group_leader->group_leader) | ||
895 | goto out_fput; | ||
896 | } | ||
897 | |||
898 | /* | ||
899 | * Get the target context (task or percpu): | ||
900 | */ | ||
754 | ctx = find_get_context(pid, cpu); | 901 | ctx = find_get_context(pid, cpu); |
755 | if (IS_ERR(ctx)) | 902 | if (IS_ERR(ctx)) |
756 | return PTR_ERR(ctx); | 903 | return PTR_ERR(ctx); |
757 | 904 | ||
758 | ret = -ENOMEM; | 905 | ret = -ENOMEM; |
759 | counter = perf_counter_alloc(&hw_event, cpu); | 906 | counter = perf_counter_alloc(&hw_event, cpu, group_leader); |
760 | if (!counter) | 907 | if (!counter) |
761 | goto err_put_context; | 908 | goto err_put_context; |
762 | 909 | ||
@@ -770,11 +917,14 @@ asmlinkage int sys_perf_counter_open( | |||
770 | if (ret < 0) | 917 | if (ret < 0) |
771 | goto err_remove_free_put_context; | 918 | goto err_remove_free_put_context; |
772 | 919 | ||
920 | out_fput: | ||
921 | fput_light(group_file, fput_needed); | ||
922 | |||
773 | return ret; | 923 | return ret; |
774 | 924 | ||
775 | err_remove_free_put_context: | 925 | err_remove_free_put_context: |
776 | mutex_lock(&counter->mutex); | 926 | mutex_lock(&counter->mutex); |
777 | perf_remove_from_context(counter); | 927 | perf_counter_remove_from_context(counter); |
778 | mutex_unlock(&counter->mutex); | 928 | mutex_unlock(&counter->mutex); |
779 | 929 | ||
780 | err_free_put_context: | 930 | err_free_put_context: |
@@ -783,40 +933,40 @@ err_free_put_context: | |||
783 | err_put_context: | 933 | err_put_context: |
784 | put_context(ctx); | 934 | put_context(ctx); |
785 | 935 | ||
786 | return ret; | 936 | goto out_fput; |
787 | } | 937 | } |
788 | 938 | ||
789 | static void __cpuinit perf_init_cpu(int cpu) | 939 | static void __cpuinit perf_counter_init_cpu(int cpu) |
790 | { | 940 | { |
791 | struct perf_cpu_context *ctx; | 941 | struct perf_cpu_context *cpuctx; |
792 | 942 | ||
793 | ctx = &per_cpu(perf_cpu_context, cpu); | 943 | cpuctx = &per_cpu(perf_cpu_context, cpu); |
794 | spin_lock_init(&ctx->ctx.lock); | 944 | __perf_counter_init_context(&cpuctx->ctx, NULL); |
795 | INIT_LIST_HEAD(&ctx->ctx.counters); | ||
796 | 945 | ||
797 | mutex_lock(&perf_resource_mutex); | 946 | mutex_lock(&perf_resource_mutex); |
798 | ctx->max_pertask = perf_max_counters - perf_reserved_percpu; | 947 | cpuctx->max_pertask = perf_max_counters - perf_reserved_percpu; |
799 | mutex_unlock(&perf_resource_mutex); | 948 | mutex_unlock(&perf_resource_mutex); |
949 | |||
800 | hw_perf_counter_setup(); | 950 | hw_perf_counter_setup(); |
801 | } | 951 | } |
802 | 952 | ||
803 | #ifdef CONFIG_HOTPLUG_CPU | 953 | #ifdef CONFIG_HOTPLUG_CPU |
804 | static void __perf_exit_cpu(void *info) | 954 | static void __perf_counter_exit_cpu(void *info) |
805 | { | 955 | { |
806 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | 956 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); |
807 | struct perf_counter_context *ctx = &cpuctx->ctx; | 957 | struct perf_counter_context *ctx = &cpuctx->ctx; |
808 | struct perf_counter *counter, *tmp; | 958 | struct perf_counter *counter, *tmp; |
809 | 959 | ||
810 | list_for_each_entry_safe(counter, tmp, &ctx->counters, list) | 960 | list_for_each_entry_safe(counter, tmp, &ctx->counter_list, list_entry) |
811 | __perf_remove_from_context(counter); | 961 | __perf_counter_remove_from_context(counter); |
812 | 962 | ||
813 | } | 963 | } |
814 | static void perf_exit_cpu(int cpu) | 964 | static void perf_counter_exit_cpu(int cpu) |
815 | { | 965 | { |
816 | smp_call_function_single(cpu, __perf_exit_cpu, NULL, 1); | 966 | smp_call_function_single(cpu, __perf_counter_exit_cpu, NULL, 1); |
817 | } | 967 | } |
818 | #else | 968 | #else |
819 | static inline void perf_exit_cpu(int cpu) { } | 969 | static inline void perf_counter_exit_cpu(int cpu) { } |
820 | #endif | 970 | #endif |
821 | 971 | ||
822 | static int __cpuinit | 972 | static int __cpuinit |
@@ -828,12 +978,12 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) | |||
828 | 978 | ||
829 | case CPU_UP_PREPARE: | 979 | case CPU_UP_PREPARE: |
830 | case CPU_UP_PREPARE_FROZEN: | 980 | case CPU_UP_PREPARE_FROZEN: |
831 | perf_init_cpu(cpu); | 981 | perf_counter_init_cpu(cpu); |
832 | break; | 982 | break; |
833 | 983 | ||
834 | case CPU_DOWN_PREPARE: | 984 | case CPU_DOWN_PREPARE: |
835 | case CPU_DOWN_PREPARE_FROZEN: | 985 | case CPU_DOWN_PREPARE_FROZEN: |
836 | perf_exit_cpu(cpu); | 986 | perf_counter_exit_cpu(cpu); |
837 | break; | 987 | break; |
838 | 988 | ||
839 | default: | 989 | default: |