aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-12-11 02:38:42 -0500
committerIngo Molnar <mingo@elte.hu>2008-12-11 09:45:49 -0500
commit04289bb9891882202d7e961c4c04d2376930e9f9 (patch)
tree13340847915efc809a62bf91b3cd45e0e0416deb
parent9f66a3810fe0d4100972db84290f3ae4a4d77025 (diff)
perf counters: add support for group counters
Impact: add group counters This patch adds the "counter groups" abstraction. Groups of counters behave much like normal 'single' counters, with a few semantic and behavioral extensions on top of that. A counter group is created by creating a new counter with the open() syscall's group-leader group_fd file descriptor parameter pointing to another, already existing counter. Groups of counters are scheduled in and out in one atomic group, and they are also roundrobin-scheduled atomically. Counters that are member of a group can also record events with an (atomic) extended timestamp that extends to all members of the group, if the record type is set to PERF_RECORD_GROUP. Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c28
-rw-r--r--include/linux/perf_counter.h8
-rw-r--r--kernel/perf_counter.c282
3 files changed, 236 insertions, 82 deletions
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index ef1936a871aa..54b4ad0cce68 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -346,18 +346,22 @@ static void perf_save_and_restart(struct perf_counter *counter)
346} 346}
347 347
348static void 348static void
349perf_handle_group(struct perf_counter *leader, u64 *status, u64 *overflown) 349perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown)
350{ 350{
351 struct perf_counter_context *ctx = leader->ctx; 351 struct perf_counter *counter, *group_leader = sibling->group_leader;
352 struct perf_counter *counter;
353 int bit; 352 int bit;
354 353
355 list_for_each_entry(counter, &ctx->counters, list) { 354 /*
356 if (counter->hw_event.record_type != PERF_RECORD_SIMPLE || 355 * Store the counter's own timestamp first:
357 counter == leader) 356 */
358 continue; 357 perf_store_irq_data(sibling, sibling->hw_event.type);
358 perf_store_irq_data(sibling, atomic64_counter_read(sibling));
359 359
360 if (counter->active) { 360 /*
361 * Then store sibling timestamps (if any):
362 */
363 list_for_each_entry(counter, &group_leader->sibling_list, list_entry) {
364 if (!counter->active) {
361 /* 365 /*
362 * When counter was not in the overflow mask, we have to 366 * When counter was not in the overflow mask, we have to
363 * read it from hardware. We read it as well, when it 367 * read it from hardware. We read it as well, when it
@@ -371,8 +375,8 @@ perf_handle_group(struct perf_counter *leader, u64 *status, u64 *overflown)
371 perf_save_and_restart(counter); 375 perf_save_and_restart(counter);
372 } 376 }
373 } 377 }
374 perf_store_irq_data(leader, counter->hw_event.type); 378 perf_store_irq_data(sibling, counter->hw_event.type);
375 perf_store_irq_data(leader, atomic64_counter_read(counter)); 379 perf_store_irq_data(sibling, atomic64_counter_read(counter));
376 } 380 }
377} 381}
378 382
@@ -416,10 +420,6 @@ again:
416 perf_store_irq_data(counter, instruction_pointer(regs)); 420 perf_store_irq_data(counter, instruction_pointer(regs));
417 break; 421 break;
418 case PERF_RECORD_GROUP: 422 case PERF_RECORD_GROUP:
419 perf_store_irq_data(counter,
420 counter->hw_event.type);
421 perf_store_irq_data(counter,
422 atomic64_counter_read(counter));
423 perf_handle_group(counter, &status, &ack); 423 perf_handle_group(counter, &status, &ack);
424 break; 424 break;
425 } 425 }
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index a2b4852e2d70..7af7d8965460 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -117,7 +117,10 @@ struct perf_data {
117 * struct perf_counter - performance counter kernel representation: 117 * struct perf_counter - performance counter kernel representation:
118 */ 118 */
119struct perf_counter { 119struct perf_counter {
120 struct list_head list; 120 struct list_head list_entry;
121 struct list_head sibling_list;
122 struct perf_counter *group_leader;
123
121 int active; 124 int active;
122#if BITS_PER_LONG == 64 125#if BITS_PER_LONG == 64
123 atomic64_t count; 126 atomic64_t count;
@@ -158,7 +161,8 @@ struct perf_counter_context {
158 * Protect the list of counters: 161 * Protect the list of counters:
159 */ 162 */
160 spinlock_t lock; 163 spinlock_t lock;
161 struct list_head counters; 164
165 struct list_head counter_list;
162 int nr_counters; 166 int nr_counters;
163 int nr_active; 167 int nr_active;
164 struct task_struct *task; 168 struct task_struct *task;
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 0d323ceda3a4..fa59fe8c02d5 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -10,6 +10,7 @@
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/cpu.h> 11#include <linux/cpu.h>
12#include <linux/smp.h> 12#include <linux/smp.h>
13#include <linux/file.h>
13#include <linux/poll.h> 14#include <linux/poll.h>
14#include <linux/sysfs.h> 15#include <linux/sysfs.h>
15#include <linux/ptrace.h> 16#include <linux/ptrace.h>
@@ -55,7 +56,7 @@ void __weak hw_perf_counter_setup(void) { }
55 * Read the cached counter in counter safe against cross CPU / NMI 56 * Read the cached counter in counter safe against cross CPU / NMI
56 * modifications. 64 bit version - no complications. 57 * modifications. 64 bit version - no complications.
57 */ 58 */
58static inline u64 perf_read_counter_safe(struct perf_counter *counter) 59static inline u64 perf_counter_read_safe(struct perf_counter *counter)
59{ 60{
60 return (u64) atomic64_read(&counter->count); 61 return (u64) atomic64_read(&counter->count);
61} 62}
@@ -66,7 +67,7 @@ static inline u64 perf_read_counter_safe(struct perf_counter *counter)
66 * Read the cached counter in counter safe against cross CPU / NMI 67 * Read the cached counter in counter safe against cross CPU / NMI
67 * modifications. 32 bit version. 68 * modifications. 32 bit version.
68 */ 69 */
69static u64 perf_read_counter_safe(struct perf_counter *counter) 70static u64 perf_counter_read_safe(struct perf_counter *counter)
70{ 71{
71 u32 cntl, cnth; 72 u32 cntl, cnth;
72 73
@@ -83,13 +84,55 @@ static u64 perf_read_counter_safe(struct perf_counter *counter)
83 84
84#endif 85#endif
85 86
87static void
88list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
89{
90 struct perf_counter *group_leader = counter->group_leader;
91
92 /*
93 * Depending on whether it is a standalone or sibling counter,
94 * add it straight to the context's counter list, or to the group
95 * leader's sibling list:
96 */
97 if (counter->group_leader == counter)
98 list_add_tail(&counter->list_entry, &ctx->counter_list);
99 else
100 list_add_tail(&counter->list_entry, &group_leader->sibling_list);
101}
102
103static void
104list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
105{
106 struct perf_counter *sibling, *tmp;
107
108 list_del_init(&counter->list_entry);
109
110 if (list_empty(&counter->sibling_list))
111 return;
112
113 /*
114 * If this was a group counter with sibling counters then
115 * upgrade the siblings to singleton counters by adding them
116 * to the context list directly:
117 */
118 list_for_each_entry_safe(sibling, tmp,
119 &counter->sibling_list, list_entry) {
120
121 list_del_init(&sibling->list_entry);
122 list_add_tail(&sibling->list_entry, &ctx->counter_list);
123 WARN_ON_ONCE(!sibling->group_leader);
124 WARN_ON_ONCE(sibling->group_leader == sibling);
125 sibling->group_leader = sibling;
126 }
127}
128
86/* 129/*
87 * Cross CPU call to remove a performance counter 130 * Cross CPU call to remove a performance counter
88 * 131 *
89 * We disable the counter on the hardware level first. After that we 132 * We disable the counter on the hardware level first. After that we
90 * remove it from the context list. 133 * remove it from the context list.
91 */ 134 */
92static void __perf_remove_from_context(void *info) 135static void __perf_counter_remove_from_context(void *info)
93{ 136{
94 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); 137 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
95 struct perf_counter *counter = info; 138 struct perf_counter *counter = info;
@@ -119,7 +162,7 @@ static void __perf_remove_from_context(void *info)
119 * counters on a global level. NOP for non NMI based counters. 162 * counters on a global level. NOP for non NMI based counters.
120 */ 163 */
121 hw_perf_disable_all(); 164 hw_perf_disable_all();
122 list_del_init(&counter->list); 165 list_del_counter(counter, ctx);
123 hw_perf_enable_all(); 166 hw_perf_enable_all();
124 167
125 if (!ctx->task) { 168 if (!ctx->task) {
@@ -144,7 +187,7 @@ static void __perf_remove_from_context(void *info)
144 * CPU counters are removed with a smp call. For task counters we only 187 * CPU counters are removed with a smp call. For task counters we only
145 * call when the task is on a CPU. 188 * call when the task is on a CPU.
146 */ 189 */
147static void perf_remove_from_context(struct perf_counter *counter) 190static void perf_counter_remove_from_context(struct perf_counter *counter)
148{ 191{
149 struct perf_counter_context *ctx = counter->ctx; 192 struct perf_counter_context *ctx = counter->ctx;
150 struct task_struct *task = ctx->task; 193 struct task_struct *task = ctx->task;
@@ -155,32 +198,32 @@ static void perf_remove_from_context(struct perf_counter *counter)
155 * the removal is always sucessful. 198 * the removal is always sucessful.
156 */ 199 */
157 smp_call_function_single(counter->cpu, 200 smp_call_function_single(counter->cpu,
158 __perf_remove_from_context, 201 __perf_counter_remove_from_context,
159 counter, 1); 202 counter, 1);
160 return; 203 return;
161 } 204 }
162 205
163retry: 206retry:
164 task_oncpu_function_call(task, __perf_remove_from_context, 207 task_oncpu_function_call(task, __perf_counter_remove_from_context,
165 counter); 208 counter);
166 209
167 spin_lock_irq(&ctx->lock); 210 spin_lock_irq(&ctx->lock);
168 /* 211 /*
169 * If the context is active we need to retry the smp call. 212 * If the context is active we need to retry the smp call.
170 */ 213 */
171 if (ctx->nr_active && !list_empty(&counter->list)) { 214 if (ctx->nr_active && !list_empty(&counter->list_entry)) {
172 spin_unlock_irq(&ctx->lock); 215 spin_unlock_irq(&ctx->lock);
173 goto retry; 216 goto retry;
174 } 217 }
175 218
176 /* 219 /*
177 * The lock prevents that this context is scheduled in so we 220 * The lock prevents that this context is scheduled in so we
178 * can remove the counter safely, if it the call above did not 221 * can remove the counter safely, if the call above did not
179 * succeed. 222 * succeed.
180 */ 223 */
181 if (!list_empty(&counter->list)) { 224 if (!list_empty(&counter->list_entry)) {
182 ctx->nr_counters--; 225 ctx->nr_counters--;
183 list_del_init(&counter->list); 226 list_del_counter(counter, ctx);
184 counter->task = NULL; 227 counter->task = NULL;
185 } 228 }
186 spin_unlock_irq(&ctx->lock); 229 spin_unlock_irq(&ctx->lock);
@@ -211,7 +254,7 @@ static void __perf_install_in_context(void *info)
211 * counters on a global level. NOP for non NMI based counters. 254 * counters on a global level. NOP for non NMI based counters.
212 */ 255 */
213 hw_perf_disable_all(); 256 hw_perf_disable_all();
214 list_add_tail(&counter->list, &ctx->counters); 257 list_add_counter(counter, ctx);
215 hw_perf_enable_all(); 258 hw_perf_enable_all();
216 259
217 ctx->nr_counters++; 260 ctx->nr_counters++;
@@ -268,7 +311,7 @@ retry:
268 * If the context is active and the counter has not been added 311 * If the context is active and the counter has not been added
269 * we need to retry the smp call. 312 * we need to retry the smp call.
270 */ 313 */
271 if (ctx->nr_active && list_empty(&counter->list)) { 314 if (ctx->nr_active && list_empty(&counter->list_entry)) {
272 spin_unlock_irq(&ctx->lock); 315 spin_unlock_irq(&ctx->lock);
273 goto retry; 316 goto retry;
274 } 317 }
@@ -278,13 +321,45 @@ retry:
278 * can add the counter safely, if it the call above did not 321 * can add the counter safely, if it the call above did not
279 * succeed. 322 * succeed.
280 */ 323 */
281 if (list_empty(&counter->list)) { 324 if (list_empty(&counter->list_entry)) {
282 list_add_tail(&counter->list, &ctx->counters); 325 list_add_counter(counter, ctx);
283 ctx->nr_counters++; 326 ctx->nr_counters++;
284 } 327 }
285 spin_unlock_irq(&ctx->lock); 328 spin_unlock_irq(&ctx->lock);
286} 329}
287 330
331static void
332counter_sched_out(struct perf_counter *counter,
333 struct perf_cpu_context *cpuctx,
334 struct perf_counter_context *ctx)
335{
336 if (!counter->active)
337 return;
338
339 hw_perf_counter_disable(counter);
340 counter->active = 0;
341 counter->oncpu = -1;
342
343 cpuctx->active_oncpu--;
344 ctx->nr_active--;
345}
346
347static void
348group_sched_out(struct perf_counter *group_counter,
349 struct perf_cpu_context *cpuctx,
350 struct perf_counter_context *ctx)
351{
352 struct perf_counter *counter;
353
354 counter_sched_out(group_counter, cpuctx, ctx);
355
356 /*
357 * Schedule out siblings (if any):
358 */
359 list_for_each_entry(counter, &group_counter->sibling_list, list_entry)
360 counter_sched_out(counter, cpuctx, ctx);
361}
362
288/* 363/*
289 * Called from scheduler to remove the counters of the current task, 364 * Called from scheduler to remove the counters of the current task,
290 * with interrupts disabled. 365 * with interrupts disabled.
@@ -306,21 +381,48 @@ void perf_counter_task_sched_out(struct task_struct *task, int cpu)
306 return; 381 return;
307 382
308 spin_lock(&ctx->lock); 383 spin_lock(&ctx->lock);
309 list_for_each_entry(counter, &ctx->counters, list) { 384 if (ctx->nr_active) {
310 if (!ctx->nr_active) 385 list_for_each_entry(counter, &ctx->counter_list, list_entry)
311 break; 386 group_sched_out(counter, cpuctx, ctx);
312 if (counter->active) {
313 hw_perf_counter_disable(counter);
314 counter->active = 0;
315 counter->oncpu = -1;
316 ctx->nr_active--;
317 cpuctx->active_oncpu--;
318 }
319 } 387 }
320 spin_unlock(&ctx->lock); 388 spin_unlock(&ctx->lock);
321 cpuctx->task_ctx = NULL; 389 cpuctx->task_ctx = NULL;
322} 390}
323 391
392static void
393counter_sched_in(struct perf_counter *counter,
394 struct perf_cpu_context *cpuctx,
395 struct perf_counter_context *ctx,
396 int cpu)
397{
398 if (!counter->active)
399 return;
400
401 hw_perf_counter_enable(counter);
402 counter->active = 1;
403 counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */
404
405 cpuctx->active_oncpu++;
406 ctx->nr_active++;
407}
408
409static void
410group_sched_in(struct perf_counter *group_counter,
411 struct perf_cpu_context *cpuctx,
412 struct perf_counter_context *ctx,
413 int cpu)
414{
415 struct perf_counter *counter;
416
417 counter_sched_in(group_counter, cpuctx, ctx, cpu);
418
419 /*
420 * Schedule in siblings as one group (if any):
421 */
422 list_for_each_entry(counter, &group_counter->sibling_list, list_entry)
423 counter_sched_in(counter, cpuctx, ctx, cpu);
424}
425
324/* 426/*
325 * Called from scheduler to add the counters of the current task 427 * Called from scheduler to add the counters of the current task
326 * with interrupts disabled. 428 * with interrupts disabled.
@@ -342,19 +444,21 @@ void perf_counter_task_sched_in(struct task_struct *task, int cpu)
342 return; 444 return;
343 445
344 spin_lock(&ctx->lock); 446 spin_lock(&ctx->lock);
345 list_for_each_entry(counter, &ctx->counters, list) { 447 list_for_each_entry(counter, &ctx->counter_list, list_entry) {
346 if (ctx->nr_active == cpuctx->max_pertask) 448 if (ctx->nr_active == cpuctx->max_pertask)
347 break; 449 break;
450
451 /*
452 * Listen to the 'cpu' scheduling filter constraint
453 * of counters:
454 */
348 if (counter->cpu != -1 && counter->cpu != cpu) 455 if (counter->cpu != -1 && counter->cpu != cpu)
349 continue; 456 continue;
350 457
351 hw_perf_counter_enable(counter); 458 group_sched_in(counter, cpuctx, ctx, cpu);
352 counter->active = 1;
353 counter->oncpu = cpu;
354 ctx->nr_active++;
355 cpuctx->active_oncpu++;
356 } 459 }
357 spin_unlock(&ctx->lock); 460 spin_unlock(&ctx->lock);
461
358 cpuctx->task_ctx = ctx; 462 cpuctx->task_ctx = ctx;
359} 463}
360 464
@@ -371,12 +475,12 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
371 spin_lock(&ctx->lock); 475 spin_lock(&ctx->lock);
372 476
373 /* 477 /*
374 * Rotate the first entry last: 478 * Rotate the first entry last (works just fine for group counters too):
375 */ 479 */
376 hw_perf_disable_all(); 480 hw_perf_disable_all();
377 list_for_each_entry(counter, &ctx->counters, list) { 481 list_for_each_entry(counter, &ctx->counter_list, list_entry) {
378 list_del(&counter->list); 482 list_del(&counter->list_entry);
379 list_add_tail(&counter->list, &ctx->counters); 483 list_add_tail(&counter->list_entry, &ctx->counter_list);
380 break; 484 break;
381 } 485 }
382 hw_perf_enable_all(); 486 hw_perf_enable_all();
@@ -387,16 +491,23 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
387} 491}
388 492
389/* 493/*
494 * Initialize the perf_counter context in a task_struct:
495 */
496static void
497__perf_counter_init_context(struct perf_counter_context *ctx,
498 struct task_struct *task)
499{
500 spin_lock_init(&ctx->lock);
501 INIT_LIST_HEAD(&ctx->counter_list);
502 ctx->nr_counters = 0;
503 ctx->task = task;
504}
505/*
390 * Initialize the perf_counter context in task_struct 506 * Initialize the perf_counter context in task_struct
391 */ 507 */
392void perf_counter_init_task(struct task_struct *task) 508void perf_counter_init_task(struct task_struct *task)
393{ 509{
394 struct perf_counter_context *ctx = &task->perf_counter_ctx; 510 __perf_counter_init_context(&task->perf_counter_ctx, task);
395
396 spin_lock_init(&ctx->lock);
397 INIT_LIST_HEAD(&ctx->counters);
398 ctx->nr_counters = 0;
399 ctx->task = task;
400} 511}
401 512
402/* 513/*
@@ -407,7 +518,7 @@ static void __hw_perf_counter_read(void *info)
407 hw_perf_counter_read(info); 518 hw_perf_counter_read(info);
408} 519}
409 520
410static u64 perf_read_counter(struct perf_counter *counter) 521static u64 perf_counter_read(struct perf_counter *counter)
411{ 522{
412 /* 523 /*
413 * If counter is enabled and currently active on a CPU, update the 524 * If counter is enabled and currently active on a CPU, update the
@@ -418,7 +529,7 @@ static u64 perf_read_counter(struct perf_counter *counter)
418 __hw_perf_counter_read, counter, 1); 529 __hw_perf_counter_read, counter, 1);
419 } 530 }
420 531
421 return perf_read_counter_safe(counter); 532 return perf_counter_read_safe(counter);
422} 533}
423 534
424/* 535/*
@@ -555,7 +666,7 @@ static int perf_release(struct inode *inode, struct file *file)
555 666
556 mutex_lock(&counter->mutex); 667 mutex_lock(&counter->mutex);
557 668
558 perf_remove_from_context(counter); 669 perf_counter_remove_from_context(counter);
559 put_context(ctx); 670 put_context(ctx);
560 671
561 mutex_unlock(&counter->mutex); 672 mutex_unlock(&counter->mutex);
@@ -577,7 +688,7 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
577 return -EINVAL; 688 return -EINVAL;
578 689
579 mutex_lock(&counter->mutex); 690 mutex_lock(&counter->mutex);
580 cntval = perf_read_counter(counter); 691 cntval = perf_counter_read(counter);
581 mutex_unlock(&counter->mutex); 692 mutex_unlock(&counter->mutex);
582 693
583 return put_user(cntval, (u64 __user *) buf) ? -EFAULT : sizeof(cntval); 694 return put_user(cntval, (u64 __user *) buf) ? -EFAULT : sizeof(cntval);
@@ -707,15 +818,25 @@ static const struct file_operations perf_fops = {
707 * Allocate and initialize a counter structure 818 * Allocate and initialize a counter structure
708 */ 819 */
709static struct perf_counter * 820static struct perf_counter *
710perf_counter_alloc(struct perf_counter_hw_event *hw_event, int cpu) 821perf_counter_alloc(struct perf_counter_hw_event *hw_event,
822 int cpu,
823 struct perf_counter *group_leader)
711{ 824{
712 struct perf_counter *counter = kzalloc(sizeof(*counter), GFP_KERNEL); 825 struct perf_counter *counter = kzalloc(sizeof(*counter), GFP_KERNEL);
713 826
714 if (!counter) 827 if (!counter)
715 return NULL; 828 return NULL;
716 829
830 /*
831 * Single counters are their own group leaders, with an
832 * empty sibling list:
833 */
834 if (!group_leader)
835 group_leader = counter;
836
717 mutex_init(&counter->mutex); 837 mutex_init(&counter->mutex);
718 INIT_LIST_HEAD(&counter->list); 838 INIT_LIST_HEAD(&counter->list_entry);
839 INIT_LIST_HEAD(&counter->sibling_list);
719 init_waitqueue_head(&counter->waitq); 840 init_waitqueue_head(&counter->waitq);
720 841
721 counter->irqdata = &counter->data[0]; 842 counter->irqdata = &counter->data[0];
@@ -723,6 +844,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, int cpu)
723 counter->cpu = cpu; 844 counter->cpu = cpu;
724 counter->hw_event = *hw_event; 845 counter->hw_event = *hw_event;
725 counter->wakeup_pending = 0; 846 counter->wakeup_pending = 0;
847 counter->group_leader = group_leader;
726 848
727 return counter; 849 return counter;
728} 850}
@@ -743,20 +865,45 @@ asmlinkage int sys_perf_counter_open(
743 int group_fd) 865 int group_fd)
744 866
745{ 867{
746 struct perf_counter_context *ctx; 868 struct perf_counter *counter, *group_leader;
747 struct perf_counter_hw_event hw_event; 869 struct perf_counter_hw_event hw_event;
748 struct perf_counter *counter; 870 struct perf_counter_context *ctx;
871 struct file *group_file = NULL;
872 int fput_needed = 0;
749 int ret; 873 int ret;
750 874
751 if (copy_from_user(&hw_event, hw_event_uptr, sizeof(hw_event)) != 0) 875 if (copy_from_user(&hw_event, hw_event_uptr, sizeof(hw_event)) != 0)
752 return -EFAULT; 876 return -EFAULT;
753 877
878 /*
879 * Look up the group leader:
880 */
881 group_leader = NULL;
882 if (group_fd != -1) {
883 ret = -EINVAL;
884 group_file = fget_light(group_fd, &fput_needed);
885 if (!group_file)
886 goto out_fput;
887 if (group_file->f_op != &perf_fops)
888 goto out_fput;
889
890 group_leader = group_file->private_data;
891 /*
892 * Do not allow a recursive hierarchy:
893 */
894 if (group_leader->group_leader)
895 goto out_fput;
896 }
897
898 /*
899 * Get the target context (task or percpu):
900 */
754 ctx = find_get_context(pid, cpu); 901 ctx = find_get_context(pid, cpu);
755 if (IS_ERR(ctx)) 902 if (IS_ERR(ctx))
756 return PTR_ERR(ctx); 903 return PTR_ERR(ctx);
757 904
758 ret = -ENOMEM; 905 ret = -ENOMEM;
759 counter = perf_counter_alloc(&hw_event, cpu); 906 counter = perf_counter_alloc(&hw_event, cpu, group_leader);
760 if (!counter) 907 if (!counter)
761 goto err_put_context; 908 goto err_put_context;
762 909
@@ -770,11 +917,14 @@ asmlinkage int sys_perf_counter_open(
770 if (ret < 0) 917 if (ret < 0)
771 goto err_remove_free_put_context; 918 goto err_remove_free_put_context;
772 919
920out_fput:
921 fput_light(group_file, fput_needed);
922
773 return ret; 923 return ret;
774 924
775err_remove_free_put_context: 925err_remove_free_put_context:
776 mutex_lock(&counter->mutex); 926 mutex_lock(&counter->mutex);
777 perf_remove_from_context(counter); 927 perf_counter_remove_from_context(counter);
778 mutex_unlock(&counter->mutex); 928 mutex_unlock(&counter->mutex);
779 929
780err_free_put_context: 930err_free_put_context:
@@ -783,40 +933,40 @@ err_free_put_context:
783err_put_context: 933err_put_context:
784 put_context(ctx); 934 put_context(ctx);
785 935
786 return ret; 936 goto out_fput;
787} 937}
788 938
789static void __cpuinit perf_init_cpu(int cpu) 939static void __cpuinit perf_counter_init_cpu(int cpu)
790{ 940{
791 struct perf_cpu_context *ctx; 941 struct perf_cpu_context *cpuctx;
792 942
793 ctx = &per_cpu(perf_cpu_context, cpu); 943 cpuctx = &per_cpu(perf_cpu_context, cpu);
794 spin_lock_init(&ctx->ctx.lock); 944 __perf_counter_init_context(&cpuctx->ctx, NULL);
795 INIT_LIST_HEAD(&ctx->ctx.counters);
796 945
797 mutex_lock(&perf_resource_mutex); 946 mutex_lock(&perf_resource_mutex);
798 ctx->max_pertask = perf_max_counters - perf_reserved_percpu; 947 cpuctx->max_pertask = perf_max_counters - perf_reserved_percpu;
799 mutex_unlock(&perf_resource_mutex); 948 mutex_unlock(&perf_resource_mutex);
949
800 hw_perf_counter_setup(); 950 hw_perf_counter_setup();
801} 951}
802 952
803#ifdef CONFIG_HOTPLUG_CPU 953#ifdef CONFIG_HOTPLUG_CPU
804static void __perf_exit_cpu(void *info) 954static void __perf_counter_exit_cpu(void *info)
805{ 955{
806 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); 956 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
807 struct perf_counter_context *ctx = &cpuctx->ctx; 957 struct perf_counter_context *ctx = &cpuctx->ctx;
808 struct perf_counter *counter, *tmp; 958 struct perf_counter *counter, *tmp;
809 959
810 list_for_each_entry_safe(counter, tmp, &ctx->counters, list) 960 list_for_each_entry_safe(counter, tmp, &ctx->counter_list, list_entry)
811 __perf_remove_from_context(counter); 961 __perf_counter_remove_from_context(counter);
812 962
813} 963}
814static void perf_exit_cpu(int cpu) 964static void perf_counter_exit_cpu(int cpu)
815{ 965{
816 smp_call_function_single(cpu, __perf_exit_cpu, NULL, 1); 966 smp_call_function_single(cpu, __perf_counter_exit_cpu, NULL, 1);
817} 967}
818#else 968#else
819static inline void perf_exit_cpu(int cpu) { } 969static inline void perf_counter_exit_cpu(int cpu) { }
820#endif 970#endif
821 971
822static int __cpuinit 972static int __cpuinit
@@ -828,12 +978,12 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
828 978
829 case CPU_UP_PREPARE: 979 case CPU_UP_PREPARE:
830 case CPU_UP_PREPARE_FROZEN: 980 case CPU_UP_PREPARE_FROZEN:
831 perf_init_cpu(cpu); 981 perf_counter_init_cpu(cpu);
832 break; 982 break;
833 983
834 case CPU_DOWN_PREPARE: 984 case CPU_DOWN_PREPARE:
835 case CPU_DOWN_PREPARE_FROZEN: 985 case CPU_DOWN_PREPARE_FROZEN:
836 perf_exit_cpu(cpu); 986 perf_counter_exit_cpu(cpu);
837 break; 987 break;
838 988
839 default: 989 default: