diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-12-21 08:43:25 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-12-23 06:45:23 -0500 |
commit | 235c7fc7c500e4fd1700c4ad01b5612bcdc1b449 (patch) | |
tree | 837db278456caa0eb4720afdc36adf47e7dd542f /kernel/perf_counter.c | |
parent | 8fe91e61cdc407c7556d3cd71cf20141a25bbcea (diff) |
perfcounters: generalize the counter scheduler
Impact: clean up and refactor code
refactor the counter scheduler: separate out in/out functions and
introduce a counter-rotation function as well.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/perf_counter.c')
-rw-r--r-- | kernel/perf_counter.c | 220 |
1 files changed, 142 insertions, 78 deletions
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 48e1dbcdc1cd..d7a79f321b1c 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c | |||
@@ -111,11 +111,12 @@ static void __perf_counter_remove_from_context(void *info) | |||
111 | spin_lock(&ctx->lock); | 111 | spin_lock(&ctx->lock); |
112 | 112 | ||
113 | if (counter->state == PERF_COUNTER_STATE_ACTIVE) { | 113 | if (counter->state == PERF_COUNTER_STATE_ACTIVE) { |
114 | counter->hw_ops->disable(counter); | ||
115 | counter->state = PERF_COUNTER_STATE_INACTIVE; | 114 | counter->state = PERF_COUNTER_STATE_INACTIVE; |
115 | counter->hw_ops->disable(counter); | ||
116 | ctx->nr_active--; | 116 | ctx->nr_active--; |
117 | cpuctx->active_oncpu--; | 117 | cpuctx->active_oncpu--; |
118 | counter->task = NULL; | 118 | counter->task = NULL; |
119 | counter->oncpu = -1; | ||
119 | } | 120 | } |
120 | ctx->nr_counters--; | 121 | ctx->nr_counters--; |
121 | 122 | ||
@@ -192,8 +193,36 @@ retry: | |||
192 | spin_unlock_irq(&ctx->lock); | 193 | spin_unlock_irq(&ctx->lock); |
193 | } | 194 | } |
194 | 195 | ||
196 | static int | ||
197 | counter_sched_in(struct perf_counter *counter, | ||
198 | struct perf_cpu_context *cpuctx, | ||
199 | struct perf_counter_context *ctx, | ||
200 | int cpu) | ||
201 | { | ||
202 | if (counter->state == PERF_COUNTER_STATE_OFF) | ||
203 | return 0; | ||
204 | |||
205 | counter->state = PERF_COUNTER_STATE_ACTIVE; | ||
206 | counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */ | ||
207 | /* | ||
208 | * The new state must be visible before we turn it on in the hardware: | ||
209 | */ | ||
210 | smp_wmb(); | ||
211 | |||
212 | if (counter->hw_ops->enable(counter)) { | ||
213 | counter->state = PERF_COUNTER_STATE_INACTIVE; | ||
214 | counter->oncpu = -1; | ||
215 | return -EAGAIN; | ||
216 | } | ||
217 | |||
218 | cpuctx->active_oncpu++; | ||
219 | ctx->nr_active++; | ||
220 | |||
221 | return 0; | ||
222 | } | ||
223 | |||
195 | /* | 224 | /* |
196 | * Cross CPU call to install and enable a preformance counter | 225 | * Cross CPU call to install and enable a performance counter |
197 | */ | 226 | */ |
198 | static void __perf_install_in_context(void *info) | 227 | static void __perf_install_in_context(void *info) |
199 | { | 228 | { |
@@ -220,22 +249,17 @@ static void __perf_install_in_context(void *info) | |||
220 | * counters on a global level. NOP for non NMI based counters. | 249 | * counters on a global level. NOP for non NMI based counters. |
221 | */ | 250 | */ |
222 | perf_flags = hw_perf_save_disable(); | 251 | perf_flags = hw_perf_save_disable(); |
223 | list_add_counter(counter, ctx); | ||
224 | hw_perf_restore(perf_flags); | ||
225 | 252 | ||
253 | list_add_counter(counter, ctx); | ||
226 | ctx->nr_counters++; | 254 | ctx->nr_counters++; |
227 | 255 | ||
228 | if (cpuctx->active_oncpu < perf_max_counters) { | 256 | counter_sched_in(counter, cpuctx, ctx, cpu); |
229 | counter->state = PERF_COUNTER_STATE_ACTIVE; | ||
230 | counter->oncpu = cpu; | ||
231 | ctx->nr_active++; | ||
232 | cpuctx->active_oncpu++; | ||
233 | counter->hw_ops->enable(counter); | ||
234 | } | ||
235 | 257 | ||
236 | if (!ctx->task && cpuctx->max_pertask) | 258 | if (!ctx->task && cpuctx->max_pertask) |
237 | cpuctx->max_pertask--; | 259 | cpuctx->max_pertask--; |
238 | 260 | ||
261 | hw_perf_restore(perf_flags); | ||
262 | |||
239 | spin_unlock(&ctx->lock); | 263 | spin_unlock(&ctx->lock); |
240 | curr_rq_unlock_irq_restore(&flags); | 264 | curr_rq_unlock_irq_restore(&flags); |
241 | } | 265 | } |
@@ -302,8 +326,8 @@ counter_sched_out(struct perf_counter *counter, | |||
302 | if (counter->state != PERF_COUNTER_STATE_ACTIVE) | 326 | if (counter->state != PERF_COUNTER_STATE_ACTIVE) |
303 | return; | 327 | return; |
304 | 328 | ||
305 | counter->hw_ops->disable(counter); | ||
306 | counter->state = PERF_COUNTER_STATE_INACTIVE; | 329 | counter->state = PERF_COUNTER_STATE_INACTIVE; |
330 | counter->hw_ops->disable(counter); | ||
307 | counter->oncpu = -1; | 331 | counter->oncpu = -1; |
308 | 332 | ||
309 | cpuctx->active_oncpu--; | 333 | cpuctx->active_oncpu--; |
@@ -326,6 +350,22 @@ group_sched_out(struct perf_counter *group_counter, | |||
326 | counter_sched_out(counter, cpuctx, ctx); | 350 | counter_sched_out(counter, cpuctx, ctx); |
327 | } | 351 | } |
328 | 352 | ||
353 | void __perf_counter_sched_out(struct perf_counter_context *ctx, | ||
354 | struct perf_cpu_context *cpuctx) | ||
355 | { | ||
356 | struct perf_counter *counter; | ||
357 | |||
358 | if (likely(!ctx->nr_counters)) | ||
359 | return; | ||
360 | |||
361 | spin_lock(&ctx->lock); | ||
362 | if (ctx->nr_active) { | ||
363 | list_for_each_entry(counter, &ctx->counter_list, list_entry) | ||
364 | group_sched_out(counter, cpuctx, ctx); | ||
365 | } | ||
366 | spin_unlock(&ctx->lock); | ||
367 | } | ||
368 | |||
329 | /* | 369 | /* |
330 | * Called from scheduler to remove the counters of the current task, | 370 | * Called from scheduler to remove the counters of the current task, |
331 | * with interrupts disabled. | 371 | * with interrupts disabled. |
@@ -341,39 +381,18 @@ void perf_counter_task_sched_out(struct task_struct *task, int cpu) | |||
341 | { | 381 | { |
342 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | 382 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); |
343 | struct perf_counter_context *ctx = &task->perf_counter_ctx; | 383 | struct perf_counter_context *ctx = &task->perf_counter_ctx; |
344 | struct perf_counter *counter; | ||
345 | 384 | ||
346 | if (likely(!cpuctx->task_ctx)) | 385 | if (likely(!cpuctx->task_ctx)) |
347 | return; | 386 | return; |
348 | 387 | ||
349 | spin_lock(&ctx->lock); | 388 | __perf_counter_sched_out(ctx, cpuctx); |
350 | if (ctx->nr_active) { | 389 | |
351 | list_for_each_entry(counter, &ctx->counter_list, list_entry) | ||
352 | group_sched_out(counter, cpuctx, ctx); | ||
353 | } | ||
354 | spin_unlock(&ctx->lock); | ||
355 | cpuctx->task_ctx = NULL; | 390 | cpuctx->task_ctx = NULL; |
356 | } | 391 | } |
357 | 392 | ||
358 | static int | 393 | static void perf_counter_cpu_sched_out(struct perf_cpu_context *cpuctx) |
359 | counter_sched_in(struct perf_counter *counter, | ||
360 | struct perf_cpu_context *cpuctx, | ||
361 | struct perf_counter_context *ctx, | ||
362 | int cpu) | ||
363 | { | 394 | { |
364 | if (counter->state == PERF_COUNTER_STATE_OFF) | 395 | __perf_counter_sched_out(&cpuctx->ctx, cpuctx); |
365 | return 0; | ||
366 | |||
367 | if (counter->hw_ops->enable(counter)) | ||
368 | return -EAGAIN; | ||
369 | |||
370 | counter->state = PERF_COUNTER_STATE_ACTIVE; | ||
371 | counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */ | ||
372 | |||
373 | cpuctx->active_oncpu++; | ||
374 | ctx->nr_active++; | ||
375 | |||
376 | return 0; | ||
377 | } | 396 | } |
378 | 397 | ||
379 | static int | 398 | static int |
@@ -416,21 +435,10 @@ group_error: | |||
416 | return -EAGAIN; | 435 | return -EAGAIN; |
417 | } | 436 | } |
418 | 437 | ||
419 | /* | 438 | static void |
420 | * Called from scheduler to add the counters of the current task | 439 | __perf_counter_sched_in(struct perf_counter_context *ctx, |
421 | * with interrupts disabled. | 440 | struct perf_cpu_context *cpuctx, int cpu) |
422 | * | ||
423 | * We restore the counter value and then enable it. | ||
424 | * | ||
425 | * This does not protect us against NMI, but enable() | ||
426 | * sets the enabled bit in the control field of counter _before_ | ||
427 | * accessing the counter control register. If a NMI hits, then it will | ||
428 | * keep the counter running. | ||
429 | */ | ||
430 | void perf_counter_task_sched_in(struct task_struct *task, int cpu) | ||
431 | { | 441 | { |
432 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | ||
433 | struct perf_counter_context *ctx = &task->perf_counter_ctx; | ||
434 | struct perf_counter *counter; | 442 | struct perf_counter *counter; |
435 | 443 | ||
436 | if (likely(!ctx->nr_counters)) | 444 | if (likely(!ctx->nr_counters)) |
@@ -453,10 +461,35 @@ void perf_counter_task_sched_in(struct task_struct *task, int cpu) | |||
453 | break; | 461 | break; |
454 | } | 462 | } |
455 | spin_unlock(&ctx->lock); | 463 | spin_unlock(&ctx->lock); |
464 | } | ||
456 | 465 | ||
466 | /* | ||
467 | * Called from scheduler to add the counters of the current task | ||
468 | * with interrupts disabled. | ||
469 | * | ||
470 | * We restore the counter value and then enable it. | ||
471 | * | ||
472 | * This does not protect us against NMI, but enable() | ||
473 | * sets the enabled bit in the control field of counter _before_ | ||
474 | * accessing the counter control register. If a NMI hits, then it will | ||
475 | * keep the counter running. | ||
476 | */ | ||
477 | void perf_counter_task_sched_in(struct task_struct *task, int cpu) | ||
478 | { | ||
479 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | ||
480 | struct perf_counter_context *ctx = &task->perf_counter_ctx; | ||
481 | |||
482 | __perf_counter_sched_in(ctx, cpuctx, cpu); | ||
457 | cpuctx->task_ctx = ctx; | 483 | cpuctx->task_ctx = ctx; |
458 | } | 484 | } |
459 | 485 | ||
486 | static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu) | ||
487 | { | ||
488 | struct perf_counter_context *ctx = &cpuctx->ctx; | ||
489 | |||
490 | __perf_counter_sched_in(ctx, cpuctx, cpu); | ||
491 | } | ||
492 | |||
460 | int perf_counter_task_disable(void) | 493 | int perf_counter_task_disable(void) |
461 | { | 494 | { |
462 | struct task_struct *curr = current; | 495 | struct task_struct *curr = current; |
@@ -514,6 +547,8 @@ int perf_counter_task_enable(void) | |||
514 | /* force the update of the task clock: */ | 547 | /* force the update of the task clock: */ |
515 | __task_delta_exec(curr, 1); | 548 | __task_delta_exec(curr, 1); |
516 | 549 | ||
550 | perf_counter_task_sched_out(curr, cpu); | ||
551 | |||
517 | spin_lock(&ctx->lock); | 552 | spin_lock(&ctx->lock); |
518 | 553 | ||
519 | /* | 554 | /* |
@@ -538,19 +573,18 @@ int perf_counter_task_enable(void) | |||
538 | return 0; | 573 | return 0; |
539 | } | 574 | } |
540 | 575 | ||
541 | void perf_counter_task_tick(struct task_struct *curr, int cpu) | 576 | /* |
577 | * Round-robin a context's counters: | ||
578 | */ | ||
579 | static void rotate_ctx(struct perf_counter_context *ctx) | ||
542 | { | 580 | { |
543 | struct perf_counter_context *ctx = &curr->perf_counter_ctx; | ||
544 | struct perf_counter *counter; | 581 | struct perf_counter *counter; |
545 | u64 perf_flags; | 582 | u64 perf_flags; |
546 | 583 | ||
547 | if (likely(!ctx->nr_counters)) | 584 | if (!ctx->nr_counters) |
548 | return; | 585 | return; |
549 | 586 | ||
550 | perf_counter_task_sched_out(curr, cpu); | ||
551 | |||
552 | spin_lock(&ctx->lock); | 587 | spin_lock(&ctx->lock); |
553 | |||
554 | /* | 588 | /* |
555 | * Rotate the first entry last (works just fine for group counters too): | 589 | * Rotate the first entry last (works just fine for group counters too): |
556 | */ | 590 | */ |
@@ -563,7 +597,24 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu) | |||
563 | hw_perf_restore(perf_flags); | 597 | hw_perf_restore(perf_flags); |
564 | 598 | ||
565 | spin_unlock(&ctx->lock); | 599 | spin_unlock(&ctx->lock); |
600 | } | ||
601 | |||
602 | void perf_counter_task_tick(struct task_struct *curr, int cpu) | ||
603 | { | ||
604 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | ||
605 | struct perf_counter_context *ctx = &curr->perf_counter_ctx; | ||
606 | const int rotate_percpu = 0; | ||
607 | |||
608 | if (rotate_percpu) | ||
609 | perf_counter_cpu_sched_out(cpuctx); | ||
610 | perf_counter_task_sched_out(curr, cpu); | ||
566 | 611 | ||
612 | if (rotate_percpu) | ||
613 | rotate_ctx(&cpuctx->ctx); | ||
614 | rotate_ctx(ctx); | ||
615 | |||
616 | if (rotate_percpu) | ||
617 | perf_counter_cpu_sched_in(cpuctx, cpu); | ||
567 | perf_counter_task_sched_in(curr, cpu); | 618 | perf_counter_task_sched_in(curr, cpu); |
568 | } | 619 | } |
569 | 620 | ||
@@ -905,8 +956,6 @@ static u64 task_clock_perf_counter_val(struct perf_counter *counter, int update) | |||
905 | struct task_struct *curr = counter->task; | 956 | struct task_struct *curr = counter->task; |
906 | u64 delta; | 957 | u64 delta; |
907 | 958 | ||
908 | WARN_ON_ONCE(counter->task != current); | ||
909 | |||
910 | delta = __task_delta_exec(curr, update); | 959 | delta = __task_delta_exec(curr, update); |
911 | 960 | ||
912 | return curr->se.sum_exec_runtime + delta; | 961 | return curr->se.sum_exec_runtime + delta; |
@@ -1160,6 +1209,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, | |||
1160 | counter->group_leader = group_leader; | 1209 | counter->group_leader = group_leader; |
1161 | counter->hw_ops = NULL; | 1210 | counter->hw_ops = NULL; |
1162 | 1211 | ||
1212 | counter->state = PERF_COUNTER_STATE_INACTIVE; | ||
1163 | if (hw_event->disabled) | 1213 | if (hw_event->disabled) |
1164 | counter->state = PERF_COUNTER_STATE_OFF; | 1214 | counter->state = PERF_COUNTER_STATE_OFF; |
1165 | 1215 | ||
@@ -1331,35 +1381,49 @@ __perf_counter_exit_task(struct task_struct *child, | |||
1331 | { | 1381 | { |
1332 | struct perf_counter *parent_counter; | 1382 | struct perf_counter *parent_counter; |
1333 | u64 parent_val, child_val; | 1383 | u64 parent_val, child_val; |
1334 | unsigned long flags; | ||
1335 | u64 perf_flags; | ||
1336 | 1384 | ||
1337 | /* | 1385 | /* |
1338 | * Disable and unlink this counter. | 1386 | * If we do not self-reap then we have to wait for the |
1339 | * | 1387 | * child task to unschedule (it will happen for sure), |
1340 | * Be careful about zapping the list - IRQ/NMI context | 1388 | * so that its counter is at its final count. (This |
1341 | * could still be processing it: | 1389 | * condition triggers rarely - child tasks usually get |
1390 | * off their CPU before the parent has a chance to | ||
1391 | * get this far into the reaping action) | ||
1342 | */ | 1392 | */ |
1343 | curr_rq_lock_irq_save(&flags); | 1393 | if (child != current) { |
1344 | perf_flags = hw_perf_save_disable(); | 1394 | wait_task_inactive(child, 0); |
1345 | 1395 | list_del_init(&child_counter->list_entry); | |
1346 | if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) { | 1396 | } else { |
1347 | struct perf_cpu_context *cpuctx; | 1397 | struct perf_cpu_context *cpuctx; |
1398 | unsigned long flags; | ||
1399 | u64 perf_flags; | ||
1400 | |||
1401 | /* | ||
1402 | * Disable and unlink this counter. | ||
1403 | * | ||
1404 | * Be careful about zapping the list - IRQ/NMI context | ||
1405 | * could still be processing it: | ||
1406 | */ | ||
1407 | curr_rq_lock_irq_save(&flags); | ||
1408 | perf_flags = hw_perf_save_disable(); | ||
1348 | 1409 | ||
1349 | cpuctx = &__get_cpu_var(perf_cpu_context); | 1410 | cpuctx = &__get_cpu_var(perf_cpu_context); |
1350 | 1411 | ||
1351 | child_counter->hw_ops->disable(child_counter); | 1412 | if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) { |
1352 | child_counter->state = PERF_COUNTER_STATE_INACTIVE; | 1413 | child_counter->state = PERF_COUNTER_STATE_INACTIVE; |
1353 | child_counter->oncpu = -1; | 1414 | child_counter->hw_ops->disable(child_counter); |
1415 | cpuctx->active_oncpu--; | ||
1416 | child_ctx->nr_active--; | ||
1417 | child_counter->oncpu = -1; | ||
1418 | } | ||
1354 | 1419 | ||
1355 | cpuctx->active_oncpu--; | 1420 | list_del_init(&child_counter->list_entry); |
1356 | child_ctx->nr_active--; | ||
1357 | } | ||
1358 | 1421 | ||
1359 | list_del_init(&child_counter->list_entry); | 1422 | child_ctx->nr_counters--; |
1360 | 1423 | ||
1361 | hw_perf_restore(perf_flags); | 1424 | hw_perf_restore(perf_flags); |
1362 | curr_rq_unlock_irq_restore(&flags); | 1425 | curr_rq_unlock_irq_restore(&flags); |
1426 | } | ||
1363 | 1427 | ||
1364 | parent_counter = child_counter->parent; | 1428 | parent_counter = child_counter->parent; |
1365 | /* | 1429 | /* |