aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorSteven Rostedt <srostedt@redhat.com>2009-03-24 20:50:39 -0400
committerSteven Rostedt <srostedt@redhat.com>2009-03-24 23:41:10 -0400
commitcafb168a1c92e4c9e1731fe3d666c39611762c49 (patch)
tree536d4bb6d33499d746ffb674126d2983e5d6e361 /kernel
parent0706f1c48ca8a7ab478090b4e38f2e578ae2bfe0 (diff)
tracing: make the function profiler per cpu
Impact: speed enhancement By making the function profiler record in per cpu data we not only get better readings, avoid races, we also do not have to take any locks. Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/trace/ftrace.c199
1 files changed, 130 insertions, 69 deletions
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index a9ccd71fc922..ed1fc5021d44 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -257,28 +257,28 @@ struct ftrace_profile_page {
257 struct ftrace_profile records[]; 257 struct ftrace_profile records[];
258}; 258};
259 259
260struct ftrace_profile_stat {
261 atomic_t disabled;
262 struct hlist_head *hash;
263 struct ftrace_profile_page *pages;
264 struct ftrace_profile_page *start;
265 struct tracer_stat stat;
266};
267
260#define PROFILE_RECORDS_SIZE \ 268#define PROFILE_RECORDS_SIZE \
261 (PAGE_SIZE - offsetof(struct ftrace_profile_page, records)) 269 (PAGE_SIZE - offsetof(struct ftrace_profile_page, records))
262 270
263#define PROFILES_PER_PAGE \ 271#define PROFILES_PER_PAGE \
264 (PROFILE_RECORDS_SIZE / sizeof(struct ftrace_profile)) 272 (PROFILE_RECORDS_SIZE / sizeof(struct ftrace_profile))
265 273
266/* TODO: make these percpu, to prevent cache line bouncing */
267static struct ftrace_profile_page *profile_pages_start;
268static struct ftrace_profile_page *profile_pages;
269
270static struct hlist_head *ftrace_profile_hash;
271static int ftrace_profile_bits; 274static int ftrace_profile_bits;
272static int ftrace_profile_enabled; 275static int ftrace_profile_enabled;
273static DEFINE_MUTEX(ftrace_profile_lock); 276static DEFINE_MUTEX(ftrace_profile_lock);
274 277
275static DEFINE_PER_CPU(atomic_t, ftrace_profile_disable); 278static DEFINE_PER_CPU(struct ftrace_profile_stat, ftrace_profile_stats);
276 279
277#define FTRACE_PROFILE_HASH_SIZE 1024 /* must be power of 2 */ 280#define FTRACE_PROFILE_HASH_SIZE 1024 /* must be power of 2 */
278 281
279static raw_spinlock_t ftrace_profile_rec_lock =
280 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
281
282static void * 282static void *
283function_stat_next(void *v, int idx) 283function_stat_next(void *v, int idx)
284{ 284{
@@ -303,7 +303,13 @@ function_stat_next(void *v, int idx)
303 303
304static void *function_stat_start(struct tracer_stat *trace) 304static void *function_stat_start(struct tracer_stat *trace)
305{ 305{
306 return function_stat_next(&profile_pages_start->records[0], 0); 306 struct ftrace_profile_stat *stat =
307 container_of(trace, struct ftrace_profile_stat, stat);
308
309 if (!stat || !stat->start)
310 return NULL;
311
312 return function_stat_next(&stat->start->records[0], 0);
307} 313}
308 314
309#ifdef CONFIG_FUNCTION_GRAPH_TRACER 315#ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -374,20 +380,11 @@ static int function_stat_show(struct seq_file *m, void *v)
374 return 0; 380 return 0;
375} 381}
376 382
377static struct tracer_stat function_stats = { 383static void ftrace_profile_reset(struct ftrace_profile_stat *stat)
378 .name = "functions",
379 .stat_start = function_stat_start,
380 .stat_next = function_stat_next,
381 .stat_cmp = function_stat_cmp,
382 .stat_headers = function_stat_headers,
383 .stat_show = function_stat_show
384};
385
386static void ftrace_profile_reset(void)
387{ 384{
388 struct ftrace_profile_page *pg; 385 struct ftrace_profile_page *pg;
389 386
390 pg = profile_pages = profile_pages_start; 387 pg = stat->pages = stat->start;
391 388
392 while (pg) { 389 while (pg) {
393 memset(pg->records, 0, PROFILE_RECORDS_SIZE); 390 memset(pg->records, 0, PROFILE_RECORDS_SIZE);
@@ -395,24 +392,24 @@ static void ftrace_profile_reset(void)
395 pg = pg->next; 392 pg = pg->next;
396 } 393 }
397 394
398 memset(ftrace_profile_hash, 0, 395 memset(stat->hash, 0,
399 FTRACE_PROFILE_HASH_SIZE * sizeof(struct hlist_head)); 396 FTRACE_PROFILE_HASH_SIZE * sizeof(struct hlist_head));
400} 397}
401 398
402int ftrace_profile_pages_init(void) 399int ftrace_profile_pages_init(struct ftrace_profile_stat *stat)
403{ 400{
404 struct ftrace_profile_page *pg; 401 struct ftrace_profile_page *pg;
405 int i; 402 int i;
406 403
407 /* If we already allocated, do nothing */ 404 /* If we already allocated, do nothing */
408 if (profile_pages) 405 if (stat->pages)
409 return 0; 406 return 0;
410 407
411 profile_pages = (void *)get_zeroed_page(GFP_KERNEL); 408 stat->pages = (void *)get_zeroed_page(GFP_KERNEL);
412 if (!profile_pages) 409 if (!stat->pages)
413 return -ENOMEM; 410 return -ENOMEM;
414 411
415 pg = profile_pages_start = profile_pages; 412 pg = stat->start = stat->pages;
416 413
417 /* allocate 10 more pages to start */ 414 /* allocate 10 more pages to start */
418 for (i = 0; i < 10; i++) { 415 for (i = 0; i < 10; i++) {
@@ -430,13 +427,16 @@ int ftrace_profile_pages_init(void)
430 return 0; 427 return 0;
431} 428}
432 429
433static int ftrace_profile_init(void) 430static int ftrace_profile_init_cpu(int cpu)
434{ 431{
432 struct ftrace_profile_stat *stat;
435 int size; 433 int size;
436 434
437 if (ftrace_profile_hash) { 435 stat = &per_cpu(ftrace_profile_stats, cpu);
436
437 if (stat->hash) {
438 /* If the profile is already created, simply reset it */ 438 /* If the profile is already created, simply reset it */
439 ftrace_profile_reset(); 439 ftrace_profile_reset(stat);
440 return 0; 440 return 0;
441 } 441 }
442 442
@@ -446,29 +446,45 @@ static int ftrace_profile_init(void)
446 */ 446 */
447 size = FTRACE_PROFILE_HASH_SIZE; 447 size = FTRACE_PROFILE_HASH_SIZE;
448 448
449 ftrace_profile_hash = 449 stat->hash = kzalloc(sizeof(struct hlist_head) * size, GFP_KERNEL);
450 kzalloc(sizeof(struct hlist_head) * size, GFP_KERNEL);
451 450
452 if (!ftrace_profile_hash) 451 if (!stat->hash)
453 return -ENOMEM; 452 return -ENOMEM;
454 453
455 size--; 454 if (!ftrace_profile_bits) {
455 size--;
456 456
457 for (; size; size >>= 1) 457 for (; size; size >>= 1)
458 ftrace_profile_bits++; 458 ftrace_profile_bits++;
459 }
459 460
460 /* Preallocate a few pages */ 461 /* Preallocate a few pages */
461 if (ftrace_profile_pages_init() < 0) { 462 if (ftrace_profile_pages_init(stat) < 0) {
462 kfree(ftrace_profile_hash); 463 kfree(stat->hash);
463 ftrace_profile_hash = NULL; 464 stat->hash = NULL;
464 return -ENOMEM; 465 return -ENOMEM;
465 } 466 }
466 467
467 return 0; 468 return 0;
468} 469}
469 470
471static int ftrace_profile_init(void)
472{
473 int cpu;
474 int ret = 0;
475
476 for_each_online_cpu(cpu) {
477 ret = ftrace_profile_init_cpu(cpu);
478 if (ret)
479 break;
480 }
481
482 return ret;
483}
484
470/* interrupts must be disabled */ 485/* interrupts must be disabled */
471static struct ftrace_profile *ftrace_find_profiled_func(unsigned long ip) 486static struct ftrace_profile *
487ftrace_find_profiled_func(struct ftrace_profile_stat *stat, unsigned long ip)
472{ 488{
473 struct ftrace_profile *rec; 489 struct ftrace_profile *rec;
474 struct hlist_head *hhd; 490 struct hlist_head *hhd;
@@ -476,7 +492,7 @@ static struct ftrace_profile *ftrace_find_profiled_func(unsigned long ip)
476 unsigned long key; 492 unsigned long key;
477 493
478 key = hash_long(ip, ftrace_profile_bits); 494 key = hash_long(ip, ftrace_profile_bits);
479 hhd = &ftrace_profile_hash[key]; 495 hhd = &stat->hash[key];
480 496
481 if (hlist_empty(hhd)) 497 if (hlist_empty(hhd))
482 return NULL; 498 return NULL;
@@ -489,52 +505,50 @@ static struct ftrace_profile *ftrace_find_profiled_func(unsigned long ip)
489 return NULL; 505 return NULL;
490} 506}
491 507
492static void ftrace_add_profile(struct ftrace_profile *rec) 508static void ftrace_add_profile(struct ftrace_profile_stat *stat,
509 struct ftrace_profile *rec)
493{ 510{
494 unsigned long key; 511 unsigned long key;
495 512
496 key = hash_long(rec->ip, ftrace_profile_bits); 513 key = hash_long(rec->ip, ftrace_profile_bits);
497 hlist_add_head_rcu(&rec->node, &ftrace_profile_hash[key]); 514 hlist_add_head_rcu(&rec->node, &stat->hash[key]);
498} 515}
499 516
500/* Interrupts must be disabled calling this */ 517/* Interrupts must be disabled calling this */
501static struct ftrace_profile * 518static struct ftrace_profile *
502ftrace_profile_alloc(unsigned long ip, bool alloc_safe) 519ftrace_profile_alloc(struct ftrace_profile_stat *stat,
520 unsigned long ip, bool alloc_safe)
503{ 521{
504 struct ftrace_profile *rec = NULL; 522 struct ftrace_profile *rec = NULL;
505 523
506 /* prevent recursion */ 524 /* prevent recursion */
507 if (atomic_inc_return(&__get_cpu_var(ftrace_profile_disable)) != 1) 525 if (atomic_inc_return(&stat->disabled) != 1)
508 goto out; 526 goto out;
509 527
510 __raw_spin_lock(&ftrace_profile_rec_lock);
511
512 /* Try to always keep another page available */ 528 /* Try to always keep another page available */
513 if (!profile_pages->next && alloc_safe) 529 if (!stat->pages->next && alloc_safe)
514 profile_pages->next = (void *)get_zeroed_page(GFP_ATOMIC); 530 stat->pages->next = (void *)get_zeroed_page(GFP_ATOMIC);
515 531
516 /* 532 /*
517 * Try to find the function again since another 533 * Try to find the function again since another
518 * task on another CPU could have added it 534 * task on another CPU could have added it
519 */ 535 */
520 rec = ftrace_find_profiled_func(ip); 536 rec = ftrace_find_profiled_func(stat, ip);
521 if (rec) 537 if (rec)
522 goto out_unlock; 538 goto out;
523 539
524 if (profile_pages->index == PROFILES_PER_PAGE) { 540 if (stat->pages->index == PROFILES_PER_PAGE) {
525 if (!profile_pages->next) 541 if (!stat->pages->next)
526 goto out_unlock; 542 goto out;
527 profile_pages = profile_pages->next; 543 stat->pages = stat->pages->next;
528 } 544 }
529 545
530 rec = &profile_pages->records[profile_pages->index++]; 546 rec = &stat->pages->records[stat->pages->index++];
531 rec->ip = ip; 547 rec->ip = ip;
532 ftrace_add_profile(rec); 548 ftrace_add_profile(stat, rec);
533 549
534 out_unlock:
535 __raw_spin_unlock(&ftrace_profile_rec_lock);
536 out: 550 out:
537 atomic_dec(&__get_cpu_var(ftrace_profile_disable)); 551 atomic_dec(&stat->disabled);
538 552
539 return rec; 553 return rec;
540} 554}
@@ -552,6 +566,7 @@ static bool ftrace_safe_to_allocate(void)
552static void 566static void
553function_profile_call(unsigned long ip, unsigned long parent_ip) 567function_profile_call(unsigned long ip, unsigned long parent_ip)
554{ 568{
569 struct ftrace_profile_stat *stat;
555 struct ftrace_profile *rec; 570 struct ftrace_profile *rec;
556 unsigned long flags; 571 unsigned long flags;
557 bool alloc_safe; 572 bool alloc_safe;
@@ -562,9 +577,14 @@ function_profile_call(unsigned long ip, unsigned long parent_ip)
562 alloc_safe = ftrace_safe_to_allocate(); 577 alloc_safe = ftrace_safe_to_allocate();
563 578
564 local_irq_save(flags); 579 local_irq_save(flags);
565 rec = ftrace_find_profiled_func(ip); 580
581 stat = &__get_cpu_var(ftrace_profile_stats);
582 if (!stat->hash)
583 goto out;
584
585 rec = ftrace_find_profiled_func(stat, ip);
566 if (!rec) { 586 if (!rec) {
567 rec = ftrace_profile_alloc(ip, alloc_safe); 587 rec = ftrace_profile_alloc(stat, ip, alloc_safe);
568 if (!rec) 588 if (!rec)
569 goto out; 589 goto out;
570 } 590 }
@@ -583,13 +603,19 @@ static int profile_graph_entry(struct ftrace_graph_ent *trace)
583 603
584static void profile_graph_return(struct ftrace_graph_ret *trace) 604static void profile_graph_return(struct ftrace_graph_ret *trace)
585{ 605{
586 unsigned long flags; 606 struct ftrace_profile_stat *stat;
587 struct ftrace_profile *rec; 607 struct ftrace_profile *rec;
608 unsigned long flags;
588 609
589 local_irq_save(flags); 610 local_irq_save(flags);
590 rec = ftrace_find_profiled_func(trace->func); 611 stat = &__get_cpu_var(ftrace_profile_stats);
612 if (!stat->hash)
613 goto out;
614
615 rec = ftrace_find_profiled_func(stat, trace->func);
591 if (rec) 616 if (rec)
592 rec->time += trace->rettime - trace->calltime; 617 rec->time += trace->rettime - trace->calltime;
618 out:
593 local_irq_restore(flags); 619 local_irq_restore(flags);
594} 620}
595 621
@@ -687,16 +713,51 @@ static const struct file_operations ftrace_profile_fops = {
687 .write = ftrace_profile_write, 713 .write = ftrace_profile_write,
688}; 714};
689 715
716/* used to initialize the real stat files */
717static struct tracer_stat function_stats __initdata = {
718 .name = "functions",
719 .stat_start = function_stat_start,
720 .stat_next = function_stat_next,
721 .stat_cmp = function_stat_cmp,
722 .stat_headers = function_stat_headers,
723 .stat_show = function_stat_show
724};
725
690static void ftrace_profile_debugfs(struct dentry *d_tracer) 726static void ftrace_profile_debugfs(struct dentry *d_tracer)
691{ 727{
728 struct ftrace_profile_stat *stat;
692 struct dentry *entry; 729 struct dentry *entry;
730 char *name;
693 int ret; 731 int ret;
732 int cpu;
694 733
695 ret = register_stat_tracer(&function_stats); 734 for_each_possible_cpu(cpu) {
696 if (ret) { 735 stat = &per_cpu(ftrace_profile_stats, cpu);
697 pr_warning("Warning: could not register " 736
698 "function stats\n"); 737 /* allocate enough for function name + cpu number */
699 return; 738 name = kmalloc(32, GFP_KERNEL);
739 if (!name) {
740 /*
741 * The files created are permanent, if something happens
742 * we still do not free memory.
743 */
744 kfree(stat);
745 WARN(1,
746 "Could not allocate stat file for cpu %d\n",
747 cpu);
748 return;
749 }
750 stat->stat = function_stats;
751 snprintf(name, 32, "function%d", cpu);
752 stat->stat.name = name;
753 ret = register_stat_tracer(&stat->stat);
754 if (ret) {
755 WARN(1,
756 "Could not register function stat for cpu %d\n",
757 cpu);
758 kfree(name);
759 return;
760 }
700 } 761 }
701 762
702 entry = debugfs_create_file("function_profile_enabled", 0644, 763 entry = debugfs_create_file("function_profile_enabled", 0644,