diff options
author | Steven Rostedt <srostedt@redhat.com> | 2009-03-24 20:50:39 -0400 |
---|---|---|
committer | Steven Rostedt <srostedt@redhat.com> | 2009-03-24 23:41:10 -0400 |
commit | cafb168a1c92e4c9e1731fe3d666c39611762c49 (patch) | |
tree | 536d4bb6d33499d746ffb674126d2983e5d6e361 /kernel | |
parent | 0706f1c48ca8a7ab478090b4e38f2e578ae2bfe0 (diff) |
tracing: make the function profiler per cpu
Impact: speed enhancement
By making the function profiler record in per cpu data we not only
get better readings, avoid races, we also do not have to take any
locks.
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/trace/ftrace.c | 199 |
1 files changed, 130 insertions, 69 deletions
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index a9ccd71fc922..ed1fc5021d44 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
@@ -257,28 +257,28 @@ struct ftrace_profile_page { | |||
257 | struct ftrace_profile records[]; | 257 | struct ftrace_profile records[]; |
258 | }; | 258 | }; |
259 | 259 | ||
260 | struct ftrace_profile_stat { | ||
261 | atomic_t disabled; | ||
262 | struct hlist_head *hash; | ||
263 | struct ftrace_profile_page *pages; | ||
264 | struct ftrace_profile_page *start; | ||
265 | struct tracer_stat stat; | ||
266 | }; | ||
267 | |||
260 | #define PROFILE_RECORDS_SIZE \ | 268 | #define PROFILE_RECORDS_SIZE \ |
261 | (PAGE_SIZE - offsetof(struct ftrace_profile_page, records)) | 269 | (PAGE_SIZE - offsetof(struct ftrace_profile_page, records)) |
262 | 270 | ||
263 | #define PROFILES_PER_PAGE \ | 271 | #define PROFILES_PER_PAGE \ |
264 | (PROFILE_RECORDS_SIZE / sizeof(struct ftrace_profile)) | 272 | (PROFILE_RECORDS_SIZE / sizeof(struct ftrace_profile)) |
265 | 273 | ||
266 | /* TODO: make these percpu, to prevent cache line bouncing */ | ||
267 | static struct ftrace_profile_page *profile_pages_start; | ||
268 | static struct ftrace_profile_page *profile_pages; | ||
269 | |||
270 | static struct hlist_head *ftrace_profile_hash; | ||
271 | static int ftrace_profile_bits; | 274 | static int ftrace_profile_bits; |
272 | static int ftrace_profile_enabled; | 275 | static int ftrace_profile_enabled; |
273 | static DEFINE_MUTEX(ftrace_profile_lock); | 276 | static DEFINE_MUTEX(ftrace_profile_lock); |
274 | 277 | ||
275 | static DEFINE_PER_CPU(atomic_t, ftrace_profile_disable); | 278 | static DEFINE_PER_CPU(struct ftrace_profile_stat, ftrace_profile_stats); |
276 | 279 | ||
277 | #define FTRACE_PROFILE_HASH_SIZE 1024 /* must be power of 2 */ | 280 | #define FTRACE_PROFILE_HASH_SIZE 1024 /* must be power of 2 */ |
278 | 281 | ||
279 | static raw_spinlock_t ftrace_profile_rec_lock = | ||
280 | (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; | ||
281 | |||
282 | static void * | 282 | static void * |
283 | function_stat_next(void *v, int idx) | 283 | function_stat_next(void *v, int idx) |
284 | { | 284 | { |
@@ -303,7 +303,13 @@ function_stat_next(void *v, int idx) | |||
303 | 303 | ||
304 | static void *function_stat_start(struct tracer_stat *trace) | 304 | static void *function_stat_start(struct tracer_stat *trace) |
305 | { | 305 | { |
306 | return function_stat_next(&profile_pages_start->records[0], 0); | 306 | struct ftrace_profile_stat *stat = |
307 | container_of(trace, struct ftrace_profile_stat, stat); | ||
308 | |||
309 | if (!stat || !stat->start) | ||
310 | return NULL; | ||
311 | |||
312 | return function_stat_next(&stat->start->records[0], 0); | ||
307 | } | 313 | } |
308 | 314 | ||
309 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 315 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
@@ -374,20 +380,11 @@ static int function_stat_show(struct seq_file *m, void *v) | |||
374 | return 0; | 380 | return 0; |
375 | } | 381 | } |
376 | 382 | ||
377 | static struct tracer_stat function_stats = { | 383 | static void ftrace_profile_reset(struct ftrace_profile_stat *stat) |
378 | .name = "functions", | ||
379 | .stat_start = function_stat_start, | ||
380 | .stat_next = function_stat_next, | ||
381 | .stat_cmp = function_stat_cmp, | ||
382 | .stat_headers = function_stat_headers, | ||
383 | .stat_show = function_stat_show | ||
384 | }; | ||
385 | |||
386 | static void ftrace_profile_reset(void) | ||
387 | { | 384 | { |
388 | struct ftrace_profile_page *pg; | 385 | struct ftrace_profile_page *pg; |
389 | 386 | ||
390 | pg = profile_pages = profile_pages_start; | 387 | pg = stat->pages = stat->start; |
391 | 388 | ||
392 | while (pg) { | 389 | while (pg) { |
393 | memset(pg->records, 0, PROFILE_RECORDS_SIZE); | 390 | memset(pg->records, 0, PROFILE_RECORDS_SIZE); |
@@ -395,24 +392,24 @@ static void ftrace_profile_reset(void) | |||
395 | pg = pg->next; | 392 | pg = pg->next; |
396 | } | 393 | } |
397 | 394 | ||
398 | memset(ftrace_profile_hash, 0, | 395 | memset(stat->hash, 0, |
399 | FTRACE_PROFILE_HASH_SIZE * sizeof(struct hlist_head)); | 396 | FTRACE_PROFILE_HASH_SIZE * sizeof(struct hlist_head)); |
400 | } | 397 | } |
401 | 398 | ||
402 | int ftrace_profile_pages_init(void) | 399 | int ftrace_profile_pages_init(struct ftrace_profile_stat *stat) |
403 | { | 400 | { |
404 | struct ftrace_profile_page *pg; | 401 | struct ftrace_profile_page *pg; |
405 | int i; | 402 | int i; |
406 | 403 | ||
407 | /* If we already allocated, do nothing */ | 404 | /* If we already allocated, do nothing */ |
408 | if (profile_pages) | 405 | if (stat->pages) |
409 | return 0; | 406 | return 0; |
410 | 407 | ||
411 | profile_pages = (void *)get_zeroed_page(GFP_KERNEL); | 408 | stat->pages = (void *)get_zeroed_page(GFP_KERNEL); |
412 | if (!profile_pages) | 409 | if (!stat->pages) |
413 | return -ENOMEM; | 410 | return -ENOMEM; |
414 | 411 | ||
415 | pg = profile_pages_start = profile_pages; | 412 | pg = stat->start = stat->pages; |
416 | 413 | ||
417 | /* allocate 10 more pages to start */ | 414 | /* allocate 10 more pages to start */ |
418 | for (i = 0; i < 10; i++) { | 415 | for (i = 0; i < 10; i++) { |
@@ -430,13 +427,16 @@ int ftrace_profile_pages_init(void) | |||
430 | return 0; | 427 | return 0; |
431 | } | 428 | } |
432 | 429 | ||
433 | static int ftrace_profile_init(void) | 430 | static int ftrace_profile_init_cpu(int cpu) |
434 | { | 431 | { |
432 | struct ftrace_profile_stat *stat; | ||
435 | int size; | 433 | int size; |
436 | 434 | ||
437 | if (ftrace_profile_hash) { | 435 | stat = &per_cpu(ftrace_profile_stats, cpu); |
436 | |||
437 | if (stat->hash) { | ||
438 | /* If the profile is already created, simply reset it */ | 438 | /* If the profile is already created, simply reset it */ |
439 | ftrace_profile_reset(); | 439 | ftrace_profile_reset(stat); |
440 | return 0; | 440 | return 0; |
441 | } | 441 | } |
442 | 442 | ||
@@ -446,29 +446,45 @@ static int ftrace_profile_init(void) | |||
446 | */ | 446 | */ |
447 | size = FTRACE_PROFILE_HASH_SIZE; | 447 | size = FTRACE_PROFILE_HASH_SIZE; |
448 | 448 | ||
449 | ftrace_profile_hash = | 449 | stat->hash = kzalloc(sizeof(struct hlist_head) * size, GFP_KERNEL); |
450 | kzalloc(sizeof(struct hlist_head) * size, GFP_KERNEL); | ||
451 | 450 | ||
452 | if (!ftrace_profile_hash) | 451 | if (!stat->hash) |
453 | return -ENOMEM; | 452 | return -ENOMEM; |
454 | 453 | ||
455 | size--; | 454 | if (!ftrace_profile_bits) { |
455 | size--; | ||
456 | 456 | ||
457 | for (; size; size >>= 1) | 457 | for (; size; size >>= 1) |
458 | ftrace_profile_bits++; | 458 | ftrace_profile_bits++; |
459 | } | ||
459 | 460 | ||
460 | /* Preallocate a few pages */ | 461 | /* Preallocate a few pages */ |
461 | if (ftrace_profile_pages_init() < 0) { | 462 | if (ftrace_profile_pages_init(stat) < 0) { |
462 | kfree(ftrace_profile_hash); | 463 | kfree(stat->hash); |
463 | ftrace_profile_hash = NULL; | 464 | stat->hash = NULL; |
464 | return -ENOMEM; | 465 | return -ENOMEM; |
465 | } | 466 | } |
466 | 467 | ||
467 | return 0; | 468 | return 0; |
468 | } | 469 | } |
469 | 470 | ||
471 | static int ftrace_profile_init(void) | ||
472 | { | ||
473 | int cpu; | ||
474 | int ret = 0; | ||
475 | |||
476 | for_each_online_cpu(cpu) { | ||
477 | ret = ftrace_profile_init_cpu(cpu); | ||
478 | if (ret) | ||
479 | break; | ||
480 | } | ||
481 | |||
482 | return ret; | ||
483 | } | ||
484 | |||
470 | /* interrupts must be disabled */ | 485 | /* interrupts must be disabled */ |
471 | static struct ftrace_profile *ftrace_find_profiled_func(unsigned long ip) | 486 | static struct ftrace_profile * |
487 | ftrace_find_profiled_func(struct ftrace_profile_stat *stat, unsigned long ip) | ||
472 | { | 488 | { |
473 | struct ftrace_profile *rec; | 489 | struct ftrace_profile *rec; |
474 | struct hlist_head *hhd; | 490 | struct hlist_head *hhd; |
@@ -476,7 +492,7 @@ static struct ftrace_profile *ftrace_find_profiled_func(unsigned long ip) | |||
476 | unsigned long key; | 492 | unsigned long key; |
477 | 493 | ||
478 | key = hash_long(ip, ftrace_profile_bits); | 494 | key = hash_long(ip, ftrace_profile_bits); |
479 | hhd = &ftrace_profile_hash[key]; | 495 | hhd = &stat->hash[key]; |
480 | 496 | ||
481 | if (hlist_empty(hhd)) | 497 | if (hlist_empty(hhd)) |
482 | return NULL; | 498 | return NULL; |
@@ -489,52 +505,50 @@ static struct ftrace_profile *ftrace_find_profiled_func(unsigned long ip) | |||
489 | return NULL; | 505 | return NULL; |
490 | } | 506 | } |
491 | 507 | ||
492 | static void ftrace_add_profile(struct ftrace_profile *rec) | 508 | static void ftrace_add_profile(struct ftrace_profile_stat *stat, |
509 | struct ftrace_profile *rec) | ||
493 | { | 510 | { |
494 | unsigned long key; | 511 | unsigned long key; |
495 | 512 | ||
496 | key = hash_long(rec->ip, ftrace_profile_bits); | 513 | key = hash_long(rec->ip, ftrace_profile_bits); |
497 | hlist_add_head_rcu(&rec->node, &ftrace_profile_hash[key]); | 514 | hlist_add_head_rcu(&rec->node, &stat->hash[key]); |
498 | } | 515 | } |
499 | 516 | ||
500 | /* Interrupts must be disabled calling this */ | 517 | /* Interrupts must be disabled calling this */ |
501 | static struct ftrace_profile * | 518 | static struct ftrace_profile * |
502 | ftrace_profile_alloc(unsigned long ip, bool alloc_safe) | 519 | ftrace_profile_alloc(struct ftrace_profile_stat *stat, |
520 | unsigned long ip, bool alloc_safe) | ||
503 | { | 521 | { |
504 | struct ftrace_profile *rec = NULL; | 522 | struct ftrace_profile *rec = NULL; |
505 | 523 | ||
506 | /* prevent recursion */ | 524 | /* prevent recursion */ |
507 | if (atomic_inc_return(&__get_cpu_var(ftrace_profile_disable)) != 1) | 525 | if (atomic_inc_return(&stat->disabled) != 1) |
508 | goto out; | 526 | goto out; |
509 | 527 | ||
510 | __raw_spin_lock(&ftrace_profile_rec_lock); | ||
511 | |||
512 | /* Try to always keep another page available */ | 528 | /* Try to always keep another page available */ |
513 | if (!profile_pages->next && alloc_safe) | 529 | if (!stat->pages->next && alloc_safe) |
514 | profile_pages->next = (void *)get_zeroed_page(GFP_ATOMIC); | 530 | stat->pages->next = (void *)get_zeroed_page(GFP_ATOMIC); |
515 | 531 | ||
516 | /* | 532 | /* |
517 | * Try to find the function again since another | 533 | * Try to find the function again since another |
518 | * task on another CPU could have added it | 534 | * task on another CPU could have added it |
519 | */ | 535 | */ |
520 | rec = ftrace_find_profiled_func(ip); | 536 | rec = ftrace_find_profiled_func(stat, ip); |
521 | if (rec) | 537 | if (rec) |
522 | goto out_unlock; | 538 | goto out; |
523 | 539 | ||
524 | if (profile_pages->index == PROFILES_PER_PAGE) { | 540 | if (stat->pages->index == PROFILES_PER_PAGE) { |
525 | if (!profile_pages->next) | 541 | if (!stat->pages->next) |
526 | goto out_unlock; | 542 | goto out; |
527 | profile_pages = profile_pages->next; | 543 | stat->pages = stat->pages->next; |
528 | } | 544 | } |
529 | 545 | ||
530 | rec = &profile_pages->records[profile_pages->index++]; | 546 | rec = &stat->pages->records[stat->pages->index++]; |
531 | rec->ip = ip; | 547 | rec->ip = ip; |
532 | ftrace_add_profile(rec); | 548 | ftrace_add_profile(stat, rec); |
533 | 549 | ||
534 | out_unlock: | ||
535 | __raw_spin_unlock(&ftrace_profile_rec_lock); | ||
536 | out: | 550 | out: |
537 | atomic_dec(&__get_cpu_var(ftrace_profile_disable)); | 551 | atomic_dec(&stat->disabled); |
538 | 552 | ||
539 | return rec; | 553 | return rec; |
540 | } | 554 | } |
@@ -552,6 +566,7 @@ static bool ftrace_safe_to_allocate(void) | |||
552 | static void | 566 | static void |
553 | function_profile_call(unsigned long ip, unsigned long parent_ip) | 567 | function_profile_call(unsigned long ip, unsigned long parent_ip) |
554 | { | 568 | { |
569 | struct ftrace_profile_stat *stat; | ||
555 | struct ftrace_profile *rec; | 570 | struct ftrace_profile *rec; |
556 | unsigned long flags; | 571 | unsigned long flags; |
557 | bool alloc_safe; | 572 | bool alloc_safe; |
@@ -562,9 +577,14 @@ function_profile_call(unsigned long ip, unsigned long parent_ip) | |||
562 | alloc_safe = ftrace_safe_to_allocate(); | 577 | alloc_safe = ftrace_safe_to_allocate(); |
563 | 578 | ||
564 | local_irq_save(flags); | 579 | local_irq_save(flags); |
565 | rec = ftrace_find_profiled_func(ip); | 580 | |
581 | stat = &__get_cpu_var(ftrace_profile_stats); | ||
582 | if (!stat->hash) | ||
583 | goto out; | ||
584 | |||
585 | rec = ftrace_find_profiled_func(stat, ip); | ||
566 | if (!rec) { | 586 | if (!rec) { |
567 | rec = ftrace_profile_alloc(ip, alloc_safe); | 587 | rec = ftrace_profile_alloc(stat, ip, alloc_safe); |
568 | if (!rec) | 588 | if (!rec) |
569 | goto out; | 589 | goto out; |
570 | } | 590 | } |
@@ -583,13 +603,19 @@ static int profile_graph_entry(struct ftrace_graph_ent *trace) | |||
583 | 603 | ||
584 | static void profile_graph_return(struct ftrace_graph_ret *trace) | 604 | static void profile_graph_return(struct ftrace_graph_ret *trace) |
585 | { | 605 | { |
586 | unsigned long flags; | 606 | struct ftrace_profile_stat *stat; |
587 | struct ftrace_profile *rec; | 607 | struct ftrace_profile *rec; |
608 | unsigned long flags; | ||
588 | 609 | ||
589 | local_irq_save(flags); | 610 | local_irq_save(flags); |
590 | rec = ftrace_find_profiled_func(trace->func); | 611 | stat = &__get_cpu_var(ftrace_profile_stats); |
612 | if (!stat->hash) | ||
613 | goto out; | ||
614 | |||
615 | rec = ftrace_find_profiled_func(stat, trace->func); | ||
591 | if (rec) | 616 | if (rec) |
592 | rec->time += trace->rettime - trace->calltime; | 617 | rec->time += trace->rettime - trace->calltime; |
618 | out: | ||
593 | local_irq_restore(flags); | 619 | local_irq_restore(flags); |
594 | } | 620 | } |
595 | 621 | ||
@@ -687,16 +713,51 @@ static const struct file_operations ftrace_profile_fops = { | |||
687 | .write = ftrace_profile_write, | 713 | .write = ftrace_profile_write, |
688 | }; | 714 | }; |
689 | 715 | ||
716 | /* used to initialize the real stat files */ | ||
717 | static struct tracer_stat function_stats __initdata = { | ||
718 | .name = "functions", | ||
719 | .stat_start = function_stat_start, | ||
720 | .stat_next = function_stat_next, | ||
721 | .stat_cmp = function_stat_cmp, | ||
722 | .stat_headers = function_stat_headers, | ||
723 | .stat_show = function_stat_show | ||
724 | }; | ||
725 | |||
690 | static void ftrace_profile_debugfs(struct dentry *d_tracer) | 726 | static void ftrace_profile_debugfs(struct dentry *d_tracer) |
691 | { | 727 | { |
728 | struct ftrace_profile_stat *stat; | ||
692 | struct dentry *entry; | 729 | struct dentry *entry; |
730 | char *name; | ||
693 | int ret; | 731 | int ret; |
732 | int cpu; | ||
694 | 733 | ||
695 | ret = register_stat_tracer(&function_stats); | 734 | for_each_possible_cpu(cpu) { |
696 | if (ret) { | 735 | stat = &per_cpu(ftrace_profile_stats, cpu); |
697 | pr_warning("Warning: could not register " | 736 | |
698 | "function stats\n"); | 737 | /* allocate enough for function name + cpu number */ |
699 | return; | 738 | name = kmalloc(32, GFP_KERNEL); |
739 | if (!name) { | ||
740 | /* | ||
741 | * The files created are permanent, if something happens | ||
742 | * we still do not free memory. | ||
743 | */ | ||
744 | kfree(stat); | ||
745 | WARN(1, | ||
746 | "Could not allocate stat file for cpu %d\n", | ||
747 | cpu); | ||
748 | return; | ||
749 | } | ||
750 | stat->stat = function_stats; | ||
751 | snprintf(name, 32, "function%d", cpu); | ||
752 | stat->stat.name = name; | ||
753 | ret = register_stat_tracer(&stat->stat); | ||
754 | if (ret) { | ||
755 | WARN(1, | ||
756 | "Could not register function stat for cpu %d\n", | ||
757 | cpu); | ||
758 | kfree(name); | ||
759 | return; | ||
760 | } | ||
700 | } | 761 | } |
701 | 762 | ||
702 | entry = debugfs_create_file("function_profile_enabled", 0644, | 763 | entry = debugfs_create_file("function_profile_enabled", 0644, |