aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-09-19 06:05:25 -0400
committerIngo Molnar <mingo@elte.hu>2009-09-19 06:05:25 -0400
commitbe4bdbfbae6b303c21ebe446648f617908a794b5 (patch)
tree4ea485b2fa95cb17aded35b0ac83d6bdf9a0396d
parentfc5377668c3d808e1d53c4aee152c836f55c3490 (diff)
parent20ab4425a77a1f34028cc6ce57053c22c184ba5f (diff)
Merge branch 'tracing/core-v3' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic/random-tracing into tracing/urgent
-rw-r--r--include/linux/ftrace_event.h10
-rw-r--r--include/linux/syscalls.h24
-rw-r--r--include/trace/ftrace.h111
-rw-r--r--kernel/trace/trace_event_profile.c79
-rw-r--r--kernel/trace/trace_syscalls.c97
5 files changed, 234 insertions, 87 deletions
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index bd099ba82ccc..4ec5e67e18cf 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -4,6 +4,7 @@
4#include <linux/ring_buffer.h> 4#include <linux/ring_buffer.h>
5#include <linux/trace_seq.h> 5#include <linux/trace_seq.h>
6#include <linux/percpu.h> 6#include <linux/percpu.h>
7#include <linux/hardirq.h>
7 8
8struct trace_array; 9struct trace_array;
9struct tracer; 10struct tracer;
@@ -130,10 +131,15 @@ struct ftrace_event_call {
130 void *data; 131 void *data;
131 132
132 atomic_t profile_count; 133 atomic_t profile_count;
133 int (*profile_enable)(struct ftrace_event_call *); 134 int (*profile_enable)(void);
134 void (*profile_disable)(struct ftrace_event_call *); 135 void (*profile_disable)(void);
135}; 136};
136 137
138#define FTRACE_MAX_PROFILE_SIZE 2048
139
140extern char *trace_profile_buf;
141extern char *trace_profile_buf_nmi;
142
137#define MAX_FILTER_PRED 32 143#define MAX_FILTER_PRED 32
138#define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ 144#define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */
139 145
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index a8e37821cc60..7d9803cbb20f 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -100,33 +100,25 @@ struct perf_counter_attr;
100 100
101#ifdef CONFIG_EVENT_PROFILE 101#ifdef CONFIG_EVENT_PROFILE
102#define TRACE_SYS_ENTER_PROFILE(sname) \ 102#define TRACE_SYS_ENTER_PROFILE(sname) \
103static int prof_sysenter_enable_##sname(struct ftrace_event_call *event_call) \ 103static int prof_sysenter_enable_##sname(void) \
104{ \ 104{ \
105 int ret = 0; \ 105 return reg_prof_syscall_enter("sys"#sname); \
106 if (!atomic_inc_return(&event_enter_##sname.profile_count)) \
107 ret = reg_prof_syscall_enter("sys"#sname); \
108 return ret; \
109} \ 106} \
110 \ 107 \
111static void prof_sysenter_disable_##sname(struct ftrace_event_call *event_call)\ 108static void prof_sysenter_disable_##sname(void) \
112{ \ 109{ \
113 if (atomic_add_negative(-1, &event_enter_##sname.profile_count)) \ 110 unreg_prof_syscall_enter("sys"#sname); \
114 unreg_prof_syscall_enter("sys"#sname); \
115} 111}
116 112
117#define TRACE_SYS_EXIT_PROFILE(sname) \ 113#define TRACE_SYS_EXIT_PROFILE(sname) \
118static int prof_sysexit_enable_##sname(struct ftrace_event_call *event_call) \ 114static int prof_sysexit_enable_##sname(void) \
119{ \ 115{ \
120 int ret = 0; \ 116 return reg_prof_syscall_exit("sys"#sname); \
121 if (!atomic_inc_return(&event_exit_##sname.profile_count)) \
122 ret = reg_prof_syscall_exit("sys"#sname); \
123 return ret; \
124} \ 117} \
125 \ 118 \
126static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \ 119static void prof_sysexit_disable_##sname(void) \
127{ \ 120{ \
128 if (atomic_add_negative(-1, &event_exit_##sname.profile_count)) \ 121 unreg_prof_syscall_exit("sys"#sname); \
129 unreg_prof_syscall_exit("sys"#sname); \
130} 122}
131 123
132#define TRACE_SYS_ENTER_PROFILE_INIT(sname) \ 124#define TRACE_SYS_ENTER_PROFILE_INIT(sname) \
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 72a3b437b829..a0361cb69769 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -382,20 +382,14 @@ static inline int ftrace_get_offsets_##call( \
382 * 382 *
383 * NOTE: The insertion profile callback (ftrace_profile_<call>) is defined later 383 * NOTE: The insertion profile callback (ftrace_profile_<call>) is defined later
384 * 384 *
385 * static int ftrace_profile_enable_<call>(struct ftrace_event_call *event_call) 385 * static int ftrace_profile_enable_<call>(void)
386 * { 386 * {
387 * int ret = 0; 387 * return register_trace_<call>(ftrace_profile_<call>);
388 *
389 * if (!atomic_inc_return(&event_call->profile_count))
390 * ret = register_trace_<call>(ftrace_profile_<call>);
391 *
392 * return ret;
393 * } 388 * }
394 * 389 *
395 * static void ftrace_profile_disable_<call>(struct ftrace_event_call *event_call) 390 * static void ftrace_profile_disable_<call>(void)
396 * { 391 * {
397 * if (atomic_add_negative(-1, &event->call->profile_count)) 392 * unregister_trace_<call>(ftrace_profile_<call>);
398 * unregister_trace_<call>(ftrace_profile_<call>);
399 * } 393 * }
400 * 394 *
401 */ 395 */
@@ -405,20 +399,14 @@ static inline int ftrace_get_offsets_##call( \
405 \ 399 \
406static void ftrace_profile_##call(proto); \ 400static void ftrace_profile_##call(proto); \
407 \ 401 \
408static int ftrace_profile_enable_##call(struct ftrace_event_call *event_call) \ 402static int ftrace_profile_enable_##call(void) \
409{ \ 403{ \
410 int ret = 0; \ 404 return register_trace_##call(ftrace_profile_##call); \
411 \
412 if (!atomic_inc_return(&event_call->profile_count)) \
413 ret = register_trace_##call(ftrace_profile_##call); \
414 \
415 return ret; \
416} \ 405} \
417 \ 406 \
418static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ 407static void ftrace_profile_disable_##call(void) \
419{ \ 408{ \
420 if (atomic_add_negative(-1, &event_call->profile_count)) \ 409 unregister_trace_##call(ftrace_profile_##call); \
421 unregister_trace_##call(ftrace_profile_##call); \
422} 410}
423 411
424#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) 412#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
@@ -660,11 +648,12 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
660 * struct ftrace_raw_##call *entry; 648 * struct ftrace_raw_##call *entry;
661 * u64 __addr = 0, __count = 1; 649 * u64 __addr = 0, __count = 1;
662 * unsigned long irq_flags; 650 * unsigned long irq_flags;
651 * struct trace_entry *ent;
663 * int __entry_size; 652 * int __entry_size;
664 * int __data_size; 653 * int __data_size;
654 * int __cpu
665 * int pc; 655 * int pc;
666 * 656 *
667 * local_save_flags(irq_flags);
668 * pc = preempt_count(); 657 * pc = preempt_count();
669 * 658 *
670 * __data_size = ftrace_get_offsets_<call>(&__data_offsets, args); 659 * __data_size = ftrace_get_offsets_<call>(&__data_offsets, args);
@@ -675,25 +664,34 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
675 * sizeof(u64)); 664 * sizeof(u64));
676 * __entry_size -= sizeof(u32); 665 * __entry_size -= sizeof(u32);
677 * 666 *
678 * do { 667 * // Protect the non nmi buffer
679 * char raw_data[__entry_size]; <- allocate our sample in the stack 668 * // This also protects the rcu read side
680 * struct trace_entry *ent; 669 * local_irq_save(irq_flags);
670 * __cpu = smp_processor_id();
671 *
672 * if (in_nmi())
673 * raw_data = rcu_dereference(trace_profile_buf_nmi);
674 * else
675 * raw_data = rcu_dereference(trace_profile_buf);
676 *
677 * if (!raw_data)
678 * goto end;
681 * 679 *
682 * zero dead bytes from alignment to avoid stack leak to userspace: 680 * raw_data = per_cpu_ptr(raw_data, __cpu);
683 * 681 *
684 * *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; 682 * //zero dead bytes from alignment to avoid stack leak to userspace:
685 * entry = (struct ftrace_raw_<call> *)raw_data; 683 * *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;
686 * ent = &entry->ent; 684 * entry = (struct ftrace_raw_<call> *)raw_data;
687 * tracing_generic_entry_update(ent, irq_flags, pc); 685 * ent = &entry->ent;
688 * ent->type = event_call->id; 686 * tracing_generic_entry_update(ent, irq_flags, pc);
687 * ent->type = event_call->id;
689 * 688 *
690 * <tstruct> <- do some jobs with dynamic arrays 689 * <tstruct> <- do some jobs with dynamic arrays
691 * 690 *
692 * <assign> <- affect our values 691 * <assign> <- affect our values
693 * 692 *
694 * perf_tpcounter_event(event_call->id, __addr, __count, entry, 693 * perf_tpcounter_event(event_call->id, __addr, __count, entry,
695 * __entry_size); <- submit them to perf counter 694 * __entry_size); <- submit them to perf counter
696 * } while (0);
697 * 695 *
698 * } 696 * }
699 */ 697 */
@@ -716,11 +714,13 @@ static void ftrace_profile_##call(proto) \
716 struct ftrace_raw_##call *entry; \ 714 struct ftrace_raw_##call *entry; \
717 u64 __addr = 0, __count = 1; \ 715 u64 __addr = 0, __count = 1; \
718 unsigned long irq_flags; \ 716 unsigned long irq_flags; \
717 struct trace_entry *ent; \
719 int __entry_size; \ 718 int __entry_size; \
720 int __data_size; \ 719 int __data_size; \
720 char *raw_data; \
721 int __cpu; \
721 int pc; \ 722 int pc; \
722 \ 723 \
723 local_save_flags(irq_flags); \
724 pc = preempt_count(); \ 724 pc = preempt_count(); \
725 \ 725 \
726 __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ 726 __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
@@ -728,23 +728,38 @@ static void ftrace_profile_##call(proto) \
728 sizeof(u64)); \ 728 sizeof(u64)); \
729 __entry_size -= sizeof(u32); \ 729 __entry_size -= sizeof(u32); \
730 \ 730 \
731 do { \ 731 if (WARN_ONCE(__entry_size > FTRACE_MAX_PROFILE_SIZE, \
732 char raw_data[__entry_size]; \ 732 "profile buffer not large enough")) \
733 struct trace_entry *ent; \ 733 return; \
734 \
735 local_irq_save(irq_flags); \
736 __cpu = smp_processor_id(); \
734 \ 737 \
735 *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \ 738 if (in_nmi()) \
736 entry = (struct ftrace_raw_##call *)raw_data; \ 739 raw_data = rcu_dereference(trace_profile_buf_nmi); \
737 ent = &entry->ent; \ 740 else \
738 tracing_generic_entry_update(ent, irq_flags, pc); \ 741 raw_data = rcu_dereference(trace_profile_buf); \
739 ent->type = event_call->id; \
740 \ 742 \
741 tstruct \ 743 if (!raw_data) \
744 goto end; \
742 \ 745 \
743 { assign; } \ 746 raw_data = per_cpu_ptr(raw_data, __cpu); \
744 \ 747 \
745 perf_tpcounter_event(event_call->id, __addr, __count, entry,\ 748 *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \
749 entry = (struct ftrace_raw_##call *)raw_data; \
750 ent = &entry->ent; \
751 tracing_generic_entry_update(ent, irq_flags, pc); \
752 ent->type = event_call->id; \
753 \
754 tstruct \
755 \
756 { assign; } \
757 \
758 perf_tpcounter_event(event_call->id, __addr, __count, entry, \
746 __entry_size); \ 759 __entry_size); \
747 } while (0); \ 760 \
761end: \
762 local_irq_restore(irq_flags); \
748 \ 763 \
749} 764}
750 765
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index 55a25c933d15..3aaa77c3309b 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -8,6 +8,54 @@
8#include <linux/module.h> 8#include <linux/module.h>
9#include "trace.h" 9#include "trace.h"
10 10
11/*
12 * We can't use a size but a type in alloc_percpu()
13 * So let's create a dummy type that matches the desired size
14 */
15typedef struct {char buf[FTRACE_MAX_PROFILE_SIZE];} profile_buf_t;
16
17char *trace_profile_buf;
18char *trace_profile_buf_nmi;
19
20/* Count the events in use (per event id, not per instance) */
21static int total_profile_count;
22
23static int ftrace_profile_enable_event(struct ftrace_event_call *event)
24{
25 char *buf;
26 int ret = -ENOMEM;
27
28 if (atomic_inc_return(&event->profile_count))
29 return 0;
30
31 if (!total_profile_count++) {
32 buf = (char *)alloc_percpu(profile_buf_t);
33 if (!buf)
34 goto fail_buf;
35
36 rcu_assign_pointer(trace_profile_buf, buf);
37
38 buf = (char *)alloc_percpu(profile_buf_t);
39 if (!buf)
40 goto fail_buf_nmi;
41
42 rcu_assign_pointer(trace_profile_buf_nmi, buf);
43 }
44
45 ret = event->profile_enable();
46 if (!ret)
47 return 0;
48
49 kfree(trace_profile_buf_nmi);
50fail_buf_nmi:
51 kfree(trace_profile_buf);
52fail_buf:
53 total_profile_count--;
54 atomic_dec(&event->profile_count);
55
56 return ret;
57}
58
11int ftrace_profile_enable(int event_id) 59int ftrace_profile_enable(int event_id)
12{ 60{
13 struct ftrace_event_call *event; 61 struct ftrace_event_call *event;
@@ -17,7 +65,7 @@ int ftrace_profile_enable(int event_id)
17 list_for_each_entry(event, &ftrace_events, list) { 65 list_for_each_entry(event, &ftrace_events, list) {
18 if (event->id == event_id && event->profile_enable && 66 if (event->id == event_id && event->profile_enable &&
19 try_module_get(event->mod)) { 67 try_module_get(event->mod)) {
20 ret = event->profile_enable(event); 68 ret = ftrace_profile_enable_event(event);
21 break; 69 break;
22 } 70 }
23 } 71 }
@@ -26,6 +74,33 @@ int ftrace_profile_enable(int event_id)
26 return ret; 74 return ret;
27} 75}
28 76
77static void ftrace_profile_disable_event(struct ftrace_event_call *event)
78{
79 char *buf, *nmi_buf;
80
81 if (!atomic_add_negative(-1, &event->profile_count))
82 return;
83
84 event->profile_disable();
85
86 if (!--total_profile_count) {
87 buf = trace_profile_buf;
88 rcu_assign_pointer(trace_profile_buf, NULL);
89
90 nmi_buf = trace_profile_buf_nmi;
91 rcu_assign_pointer(trace_profile_buf_nmi, NULL);
92
93 /*
94 * Ensure every events in profiling have finished before
95 * releasing the buffers
96 */
97 synchronize_sched();
98
99 free_percpu(buf);
100 free_percpu(nmi_buf);
101 }
102}
103
29void ftrace_profile_disable(int event_id) 104void ftrace_profile_disable(int event_id)
30{ 105{
31 struct ftrace_event_call *event; 106 struct ftrace_event_call *event;
@@ -33,7 +108,7 @@ void ftrace_profile_disable(int event_id)
33 mutex_lock(&event_mutex); 108 mutex_lock(&event_mutex);
34 list_for_each_entry(event, &ftrace_events, list) { 109 list_for_each_entry(event, &ftrace_events, list) {
35 if (event->id == event_id) { 110 if (event->id == event_id) {
36 event->profile_disable(event); 111 ftrace_profile_disable_event(event);
37 module_put(event->mod); 112 module_put(event->mod);
38 break; 113 break;
39 } 114 }
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 8712ce3c6a0e..7a3550cf2597 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -384,10 +384,13 @@ static int sys_prof_refcount_exit;
384 384
385static void prof_syscall_enter(struct pt_regs *regs, long id) 385static void prof_syscall_enter(struct pt_regs *regs, long id)
386{ 386{
387 struct syscall_trace_enter *rec;
388 struct syscall_metadata *sys_data; 387 struct syscall_metadata *sys_data;
388 struct syscall_trace_enter *rec;
389 unsigned long flags;
390 char *raw_data;
389 int syscall_nr; 391 int syscall_nr;
390 int size; 392 int size;
393 int cpu;
391 394
392 syscall_nr = syscall_get_nr(current, regs); 395 syscall_nr = syscall_get_nr(current, regs);
393 if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) 396 if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
@@ -402,20 +405,38 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
402 size = ALIGN(size + sizeof(u32), sizeof(u64)); 405 size = ALIGN(size + sizeof(u32), sizeof(u64));
403 size -= sizeof(u32); 406 size -= sizeof(u32);
404 407
405 do { 408 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
406 char raw_data[size]; 409 "profile buffer not large enough"))
410 return;
411
412 /* Protect the per cpu buffer, begin the rcu read side */
413 local_irq_save(flags);
407 414
408 /* zero the dead bytes from align to not leak stack to user */ 415 cpu = smp_processor_id();
409 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; 416
417 if (in_nmi())
418 raw_data = rcu_dereference(trace_profile_buf_nmi);
419 else
420 raw_data = rcu_dereference(trace_profile_buf);
421
422 if (!raw_data)
423 goto end;
410 424
411 rec = (struct syscall_trace_enter *) raw_data; 425 raw_data = per_cpu_ptr(raw_data, cpu);
412 tracing_generic_entry_update(&rec->ent, 0, 0); 426
413 rec->ent.type = sys_data->enter_id; 427 /* zero the dead bytes from align to not leak stack to user */
414 rec->nr = syscall_nr; 428 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
415 syscall_get_arguments(current, regs, 0, sys_data->nb_args, 429
416 (unsigned long *)&rec->args); 430 rec = (struct syscall_trace_enter *) raw_data;
417 perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size); 431 tracing_generic_entry_update(&rec->ent, 0, 0);
418 } while(0); 432 rec->ent.type = sys_data->enter_id;
433 rec->nr = syscall_nr;
434 syscall_get_arguments(current, regs, 0, sys_data->nb_args,
435 (unsigned long *)&rec->args);
436 perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size);
437
438end:
439 local_irq_restore(flags);
419} 440}
420 441
421int reg_prof_syscall_enter(char *name) 442int reg_prof_syscall_enter(char *name)
@@ -460,8 +481,12 @@ void unreg_prof_syscall_enter(char *name)
460static void prof_syscall_exit(struct pt_regs *regs, long ret) 481static void prof_syscall_exit(struct pt_regs *regs, long ret)
461{ 482{
462 struct syscall_metadata *sys_data; 483 struct syscall_metadata *sys_data;
463 struct syscall_trace_exit rec; 484 struct syscall_trace_exit *rec;
485 unsigned long flags;
464 int syscall_nr; 486 int syscall_nr;
487 char *raw_data;
488 int size;
489 int cpu;
465 490
466 syscall_nr = syscall_get_nr(current, regs); 491 syscall_nr = syscall_get_nr(current, regs);
467 if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) 492 if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
@@ -471,12 +496,46 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
471 if (!sys_data) 496 if (!sys_data)
472 return; 497 return;
473 498
474 tracing_generic_entry_update(&rec.ent, 0, 0); 499 /* We can probably do that at build time */
475 rec.ent.type = sys_data->exit_id; 500 size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
476 rec.nr = syscall_nr; 501 size -= sizeof(u32);
477 rec.ret = syscall_get_return_value(current, regs);
478 502
479 perf_tpcounter_event(sys_data->exit_id, 0, 1, &rec, sizeof(rec)); 503 /*
504 * Impossible, but be paranoid with the future
505 * How to put this check outside runtime?
506 */
507 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
508 "exit event has grown above profile buffer size"))
509 return;
510
511 /* Protect the per cpu buffer, begin the rcu read side */
512 local_irq_save(flags);
513 cpu = smp_processor_id();
514
515 if (in_nmi())
516 raw_data = rcu_dereference(trace_profile_buf_nmi);
517 else
518 raw_data = rcu_dereference(trace_profile_buf);
519
520 if (!raw_data)
521 goto end;
522
523 raw_data = per_cpu_ptr(raw_data, cpu);
524
525 /* zero the dead bytes from align to not leak stack to user */
526 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
527
528 rec = (struct syscall_trace_exit *)raw_data;
529
530 tracing_generic_entry_update(&rec->ent, 0, 0);
531 rec->ent.type = sys_data->exit_id;
532 rec->nr = syscall_nr;
533 rec->ret = syscall_get_return_value(current, regs);
534
535 perf_tpcounter_event(sys_data->exit_id, 0, 1, rec, size);
536
537end:
538 local_irq_restore(flags);
480} 539}
481 540
482int reg_prof_syscall_exit(char *name) 541int reg_prof_syscall_exit(char *name)