aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/perf_counter.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-12-21 08:43:25 -0500
committerIngo Molnar <mingo@elte.hu>2008-12-23 06:45:23 -0500
commit235c7fc7c500e4fd1700c4ad01b5612bcdc1b449 (patch)
tree837db278456caa0eb4720afdc36adf47e7dd542f /kernel/perf_counter.c
parent8fe91e61cdc407c7556d3cd71cf20141a25bbcea (diff)
perfcounters: generalize the counter scheduler
Impact: clean up and refactor code refactor the counter scheduler: separate out in/out functions and introduce a counter-rotation function as well. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/perf_counter.c')
-rw-r--r--kernel/perf_counter.c220
1 files changed, 142 insertions, 78 deletions
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 48e1dbcdc1cd..d7a79f321b1c 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -111,11 +111,12 @@ static void __perf_counter_remove_from_context(void *info)
111 spin_lock(&ctx->lock); 111 spin_lock(&ctx->lock);
112 112
113 if (counter->state == PERF_COUNTER_STATE_ACTIVE) { 113 if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
114 counter->hw_ops->disable(counter);
115 counter->state = PERF_COUNTER_STATE_INACTIVE; 114 counter->state = PERF_COUNTER_STATE_INACTIVE;
115 counter->hw_ops->disable(counter);
116 ctx->nr_active--; 116 ctx->nr_active--;
117 cpuctx->active_oncpu--; 117 cpuctx->active_oncpu--;
118 counter->task = NULL; 118 counter->task = NULL;
119 counter->oncpu = -1;
119 } 120 }
120 ctx->nr_counters--; 121 ctx->nr_counters--;
121 122
@@ -192,8 +193,36 @@ retry:
192 spin_unlock_irq(&ctx->lock); 193 spin_unlock_irq(&ctx->lock);
193} 194}
194 195
196static int
197counter_sched_in(struct perf_counter *counter,
198 struct perf_cpu_context *cpuctx,
199 struct perf_counter_context *ctx,
200 int cpu)
201{
202 if (counter->state == PERF_COUNTER_STATE_OFF)
203 return 0;
204
205 counter->state = PERF_COUNTER_STATE_ACTIVE;
206 counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */
207 /*
208 * The new state must be visible before we turn it on in the hardware:
209 */
210 smp_wmb();
211
212 if (counter->hw_ops->enable(counter)) {
213 counter->state = PERF_COUNTER_STATE_INACTIVE;
214 counter->oncpu = -1;
215 return -EAGAIN;
216 }
217
218 cpuctx->active_oncpu++;
219 ctx->nr_active++;
220
221 return 0;
222}
223
195/* 224/*
196 * Cross CPU call to install and enable a preformance counter 225 * Cross CPU call to install and enable a performance counter
197 */ 226 */
198static void __perf_install_in_context(void *info) 227static void __perf_install_in_context(void *info)
199{ 228{
@@ -220,22 +249,17 @@ static void __perf_install_in_context(void *info)
220 * counters on a global level. NOP for non NMI based counters. 249 * counters on a global level. NOP for non NMI based counters.
221 */ 250 */
222 perf_flags = hw_perf_save_disable(); 251 perf_flags = hw_perf_save_disable();
223 list_add_counter(counter, ctx);
224 hw_perf_restore(perf_flags);
225 252
253 list_add_counter(counter, ctx);
226 ctx->nr_counters++; 254 ctx->nr_counters++;
227 255
228 if (cpuctx->active_oncpu < perf_max_counters) { 256 counter_sched_in(counter, cpuctx, ctx, cpu);
229 counter->state = PERF_COUNTER_STATE_ACTIVE;
230 counter->oncpu = cpu;
231 ctx->nr_active++;
232 cpuctx->active_oncpu++;
233 counter->hw_ops->enable(counter);
234 }
235 257
236 if (!ctx->task && cpuctx->max_pertask) 258 if (!ctx->task && cpuctx->max_pertask)
237 cpuctx->max_pertask--; 259 cpuctx->max_pertask--;
238 260
261 hw_perf_restore(perf_flags);
262
239 spin_unlock(&ctx->lock); 263 spin_unlock(&ctx->lock);
240 curr_rq_unlock_irq_restore(&flags); 264 curr_rq_unlock_irq_restore(&flags);
241} 265}
@@ -302,8 +326,8 @@ counter_sched_out(struct perf_counter *counter,
302 if (counter->state != PERF_COUNTER_STATE_ACTIVE) 326 if (counter->state != PERF_COUNTER_STATE_ACTIVE)
303 return; 327 return;
304 328
305 counter->hw_ops->disable(counter);
306 counter->state = PERF_COUNTER_STATE_INACTIVE; 329 counter->state = PERF_COUNTER_STATE_INACTIVE;
330 counter->hw_ops->disable(counter);
307 counter->oncpu = -1; 331 counter->oncpu = -1;
308 332
309 cpuctx->active_oncpu--; 333 cpuctx->active_oncpu--;
@@ -326,6 +350,22 @@ group_sched_out(struct perf_counter *group_counter,
326 counter_sched_out(counter, cpuctx, ctx); 350 counter_sched_out(counter, cpuctx, ctx);
327} 351}
328 352
353void __perf_counter_sched_out(struct perf_counter_context *ctx,
354 struct perf_cpu_context *cpuctx)
355{
356 struct perf_counter *counter;
357
358 if (likely(!ctx->nr_counters))
359 return;
360
361 spin_lock(&ctx->lock);
362 if (ctx->nr_active) {
363 list_for_each_entry(counter, &ctx->counter_list, list_entry)
364 group_sched_out(counter, cpuctx, ctx);
365 }
366 spin_unlock(&ctx->lock);
367}
368
329/* 369/*
330 * Called from scheduler to remove the counters of the current task, 370 * Called from scheduler to remove the counters of the current task,
331 * with interrupts disabled. 371 * with interrupts disabled.
@@ -341,39 +381,18 @@ void perf_counter_task_sched_out(struct task_struct *task, int cpu)
341{ 381{
342 struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); 382 struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
343 struct perf_counter_context *ctx = &task->perf_counter_ctx; 383 struct perf_counter_context *ctx = &task->perf_counter_ctx;
344 struct perf_counter *counter;
345 384
346 if (likely(!cpuctx->task_ctx)) 385 if (likely(!cpuctx->task_ctx))
347 return; 386 return;
348 387
349 spin_lock(&ctx->lock); 388 __perf_counter_sched_out(ctx, cpuctx);
350 if (ctx->nr_active) { 389
351 list_for_each_entry(counter, &ctx->counter_list, list_entry)
352 group_sched_out(counter, cpuctx, ctx);
353 }
354 spin_unlock(&ctx->lock);
355 cpuctx->task_ctx = NULL; 390 cpuctx->task_ctx = NULL;
356} 391}
357 392
358static int 393static void perf_counter_cpu_sched_out(struct perf_cpu_context *cpuctx)
359counter_sched_in(struct perf_counter *counter,
360 struct perf_cpu_context *cpuctx,
361 struct perf_counter_context *ctx,
362 int cpu)
363{ 394{
364 if (counter->state == PERF_COUNTER_STATE_OFF) 395 __perf_counter_sched_out(&cpuctx->ctx, cpuctx);
365 return 0;
366
367 if (counter->hw_ops->enable(counter))
368 return -EAGAIN;
369
370 counter->state = PERF_COUNTER_STATE_ACTIVE;
371 counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */
372
373 cpuctx->active_oncpu++;
374 ctx->nr_active++;
375
376 return 0;
377} 396}
378 397
379static int 398static int
@@ -416,21 +435,10 @@ group_error:
416 return -EAGAIN; 435 return -EAGAIN;
417} 436}
418 437
419/* 438static void
420 * Called from scheduler to add the counters of the current task 439__perf_counter_sched_in(struct perf_counter_context *ctx,
421 * with interrupts disabled. 440 struct perf_cpu_context *cpuctx, int cpu)
422 *
423 * We restore the counter value and then enable it.
424 *
425 * This does not protect us against NMI, but enable()
426 * sets the enabled bit in the control field of counter _before_
427 * accessing the counter control register. If a NMI hits, then it will
428 * keep the counter running.
429 */
430void perf_counter_task_sched_in(struct task_struct *task, int cpu)
431{ 441{
432 struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
433 struct perf_counter_context *ctx = &task->perf_counter_ctx;
434 struct perf_counter *counter; 442 struct perf_counter *counter;
435 443
436 if (likely(!ctx->nr_counters)) 444 if (likely(!ctx->nr_counters))
@@ -453,10 +461,35 @@ void perf_counter_task_sched_in(struct task_struct *task, int cpu)
453 break; 461 break;
454 } 462 }
455 spin_unlock(&ctx->lock); 463 spin_unlock(&ctx->lock);
464}
456 465
466/*
467 * Called from scheduler to add the counters of the current task
468 * with interrupts disabled.
469 *
470 * We restore the counter value and then enable it.
471 *
472 * This does not protect us against NMI, but enable()
473 * sets the enabled bit in the control field of counter _before_
474 * accessing the counter control register. If a NMI hits, then it will
475 * keep the counter running.
476 */
477void perf_counter_task_sched_in(struct task_struct *task, int cpu)
478{
479 struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
480 struct perf_counter_context *ctx = &task->perf_counter_ctx;
481
482 __perf_counter_sched_in(ctx, cpuctx, cpu);
457 cpuctx->task_ctx = ctx; 483 cpuctx->task_ctx = ctx;
458} 484}
459 485
486static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
487{
488 struct perf_counter_context *ctx = &cpuctx->ctx;
489
490 __perf_counter_sched_in(ctx, cpuctx, cpu);
491}
492
460int perf_counter_task_disable(void) 493int perf_counter_task_disable(void)
461{ 494{
462 struct task_struct *curr = current; 495 struct task_struct *curr = current;
@@ -514,6 +547,8 @@ int perf_counter_task_enable(void)
514 /* force the update of the task clock: */ 547 /* force the update of the task clock: */
515 __task_delta_exec(curr, 1); 548 __task_delta_exec(curr, 1);
516 549
550 perf_counter_task_sched_out(curr, cpu);
551
517 spin_lock(&ctx->lock); 552 spin_lock(&ctx->lock);
518 553
519 /* 554 /*
@@ -538,19 +573,18 @@ int perf_counter_task_enable(void)
538 return 0; 573 return 0;
539} 574}
540 575
541void perf_counter_task_tick(struct task_struct *curr, int cpu) 576/*
577 * Round-robin a context's counters:
578 */
579static void rotate_ctx(struct perf_counter_context *ctx)
542{ 580{
543 struct perf_counter_context *ctx = &curr->perf_counter_ctx;
544 struct perf_counter *counter; 581 struct perf_counter *counter;
545 u64 perf_flags; 582 u64 perf_flags;
546 583
547 if (likely(!ctx->nr_counters)) 584 if (!ctx->nr_counters)
548 return; 585 return;
549 586
550 perf_counter_task_sched_out(curr, cpu);
551
552 spin_lock(&ctx->lock); 587 spin_lock(&ctx->lock);
553
554 /* 588 /*
555 * Rotate the first entry last (works just fine for group counters too): 589 * Rotate the first entry last (works just fine for group counters too):
556 */ 590 */
@@ -563,7 +597,24 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
563 hw_perf_restore(perf_flags); 597 hw_perf_restore(perf_flags);
564 598
565 spin_unlock(&ctx->lock); 599 spin_unlock(&ctx->lock);
600}
601
602void perf_counter_task_tick(struct task_struct *curr, int cpu)
603{
604 struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
605 struct perf_counter_context *ctx = &curr->perf_counter_ctx;
606 const int rotate_percpu = 0;
607
608 if (rotate_percpu)
609 perf_counter_cpu_sched_out(cpuctx);
610 perf_counter_task_sched_out(curr, cpu);
566 611
612 if (rotate_percpu)
613 rotate_ctx(&cpuctx->ctx);
614 rotate_ctx(ctx);
615
616 if (rotate_percpu)
617 perf_counter_cpu_sched_in(cpuctx, cpu);
567 perf_counter_task_sched_in(curr, cpu); 618 perf_counter_task_sched_in(curr, cpu);
568} 619}
569 620
@@ -905,8 +956,6 @@ static u64 task_clock_perf_counter_val(struct perf_counter *counter, int update)
905 struct task_struct *curr = counter->task; 956 struct task_struct *curr = counter->task;
906 u64 delta; 957 u64 delta;
907 958
908 WARN_ON_ONCE(counter->task != current);
909
910 delta = __task_delta_exec(curr, update); 959 delta = __task_delta_exec(curr, update);
911 960
912 return curr->se.sum_exec_runtime + delta; 961 return curr->se.sum_exec_runtime + delta;
@@ -1160,6 +1209,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
1160 counter->group_leader = group_leader; 1209 counter->group_leader = group_leader;
1161 counter->hw_ops = NULL; 1210 counter->hw_ops = NULL;
1162 1211
1212 counter->state = PERF_COUNTER_STATE_INACTIVE;
1163 if (hw_event->disabled) 1213 if (hw_event->disabled)
1164 counter->state = PERF_COUNTER_STATE_OFF; 1214 counter->state = PERF_COUNTER_STATE_OFF;
1165 1215
@@ -1331,35 +1381,49 @@ __perf_counter_exit_task(struct task_struct *child,
1331{ 1381{
1332 struct perf_counter *parent_counter; 1382 struct perf_counter *parent_counter;
1333 u64 parent_val, child_val; 1383 u64 parent_val, child_val;
1334 unsigned long flags;
1335 u64 perf_flags;
1336 1384
1337 /* 1385 /*
1338 * Disable and unlink this counter. 1386 * If we do not self-reap then we have to wait for the
1339 * 1387 * child task to unschedule (it will happen for sure),
1340 * Be careful about zapping the list - IRQ/NMI context 1388 * so that its counter is at its final count. (This
1341 * could still be processing it: 1389 * condition triggers rarely - child tasks usually get
1390 * off their CPU before the parent has a chance to
1391 * get this far into the reaping action)
1342 */ 1392 */
1343 curr_rq_lock_irq_save(&flags); 1393 if (child != current) {
1344 perf_flags = hw_perf_save_disable(); 1394 wait_task_inactive(child, 0);
1345 1395 list_del_init(&child_counter->list_entry);
1346 if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) { 1396 } else {
1347 struct perf_cpu_context *cpuctx; 1397 struct perf_cpu_context *cpuctx;
1398 unsigned long flags;
1399 u64 perf_flags;
1400
1401 /*
1402 * Disable and unlink this counter.
1403 *
1404 * Be careful about zapping the list - IRQ/NMI context
1405 * could still be processing it:
1406 */
1407 curr_rq_lock_irq_save(&flags);
1408 perf_flags = hw_perf_save_disable();
1348 1409
1349 cpuctx = &__get_cpu_var(perf_cpu_context); 1410 cpuctx = &__get_cpu_var(perf_cpu_context);
1350 1411
1351 child_counter->hw_ops->disable(child_counter); 1412 if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) {
1352 child_counter->state = PERF_COUNTER_STATE_INACTIVE; 1413 child_counter->state = PERF_COUNTER_STATE_INACTIVE;
1353 child_counter->oncpu = -1; 1414 child_counter->hw_ops->disable(child_counter);
1415 cpuctx->active_oncpu--;
1416 child_ctx->nr_active--;
1417 child_counter->oncpu = -1;
1418 }
1354 1419
1355 cpuctx->active_oncpu--; 1420 list_del_init(&child_counter->list_entry);
1356 child_ctx->nr_active--;
1357 }
1358 1421
1359 list_del_init(&child_counter->list_entry); 1422 child_ctx->nr_counters--;
1360 1423
1361 hw_perf_restore(perf_flags); 1424 hw_perf_restore(perf_flags);
1362 curr_rq_unlock_irq_restore(&flags); 1425 curr_rq_unlock_irq_restore(&flags);
1426 }
1363 1427
1364 parent_counter = child_counter->parent; 1428 parent_counter = child_counter->parent;
1365 /* 1429 /*