diff options
author | Jason Baron <jbaron@redhat.com> | 2009-08-10 16:52:47 -0400 |
---|---|---|
committer | Frederic Weisbecker <fweisbec@gmail.com> | 2009-08-11 14:35:28 -0400 |
commit | fb34a08c3469b2be9eae626ccb96476b4687b810 (patch) | |
tree | f308cd109de2c967a1f8bd485eb9c398992a9414 | |
parent | 69fd4f0eb2ececbf8ade55e31a933e174965745e (diff) |
tracing: Add trace events for each syscall entry/exit
Layer Frederic's syscall tracer on tracepoints. We create trace events
via hooking into the SYSCALL_DEFINE macros. This allows us to
individually toggle syscall entry and exit points on/off.
Signed-off-by: Jason Baron <jbaron@redhat.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Jiaying Zhang <jiayingz@google.com>
Cc: Martin Bligh <mbligh@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
-rw-r--r-- | include/linux/syscalls.h | 61 | ||||
-rw-r--r-- | include/trace/syscall.h | 18 | ||||
-rw-r--r-- | kernel/trace/trace_syscalls.c | 183 |
3 files changed, 159 insertions, 103 deletions
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 80de7003d8c2..5e5b4d33a31c 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h | |||
@@ -64,6 +64,7 @@ struct perf_counter_attr; | |||
64 | #include <linux/sem.h> | 64 | #include <linux/sem.h> |
65 | #include <asm/siginfo.h> | 65 | #include <asm/siginfo.h> |
66 | #include <asm/signal.h> | 66 | #include <asm/signal.h> |
67 | #include <linux/unistd.h> | ||
67 | #include <linux/quota.h> | 68 | #include <linux/quota.h> |
68 | #include <linux/key.h> | 69 | #include <linux/key.h> |
69 | #include <trace/syscall.h> | 70 | #include <trace/syscall.h> |
@@ -112,6 +113,59 @@ struct perf_counter_attr; | |||
112 | #define __SC_STR_TDECL5(t, a, ...) #t, __SC_STR_TDECL4(__VA_ARGS__) | 113 | #define __SC_STR_TDECL5(t, a, ...) #t, __SC_STR_TDECL4(__VA_ARGS__) |
113 | #define __SC_STR_TDECL6(t, a, ...) #t, __SC_STR_TDECL5(__VA_ARGS__) | 114 | #define __SC_STR_TDECL6(t, a, ...) #t, __SC_STR_TDECL5(__VA_ARGS__) |
114 | 115 | ||
116 | |||
117 | #define SYSCALL_TRACE_ENTER_EVENT(sname) \ | ||
118 | static struct ftrace_event_call event_enter_##sname; \ | ||
119 | static int init_enter_##sname(void) \ | ||
120 | { \ | ||
121 | int num; \ | ||
122 | num = syscall_name_to_nr("sys"#sname); \ | ||
123 | if (num < 0) \ | ||
124 | return -ENOSYS; \ | ||
125 | register_ftrace_event(&event_syscall_enter); \ | ||
126 | INIT_LIST_HEAD(&event_enter_##sname.fields); \ | ||
127 | init_preds(&event_enter_##sname); \ | ||
128 | return 0; \ | ||
129 | } \ | ||
130 | static struct ftrace_event_call __used \ | ||
131 | __attribute__((__aligned__(4))) \ | ||
132 | __attribute__((section("_ftrace_events"))) \ | ||
133 | event_enter_##sname = { \ | ||
134 | .name = "sys_enter"#sname, \ | ||
135 | .system = "syscalls", \ | ||
136 | .event = &event_syscall_enter, \ | ||
137 | .raw_init = init_enter_##sname, \ | ||
138 | .regfunc = reg_event_syscall_enter, \ | ||
139 | .unregfunc = unreg_event_syscall_enter, \ | ||
140 | .data = "sys"#sname, \ | ||
141 | } | ||
142 | |||
143 | #define SYSCALL_TRACE_EXIT_EVENT(sname) \ | ||
144 | static struct ftrace_event_call event_exit_##sname; \ | ||
145 | static int init_exit_##sname(void) \ | ||
146 | { \ | ||
147 | int num; \ | ||
148 | num = syscall_name_to_nr("sys"#sname); \ | ||
149 | if (num < 0) \ | ||
150 | return -ENOSYS; \ | ||
151 | register_ftrace_event(&event_syscall_exit); \ | ||
152 | INIT_LIST_HEAD(&event_exit_##sname.fields); \ | ||
153 | init_preds(&event_exit_##sname); \ | ||
154 | return 0; \ | ||
155 | } \ | ||
156 | static struct ftrace_event_call __used \ | ||
157 | __attribute__((__aligned__(4))) \ | ||
158 | __attribute__((section("_ftrace_events"))) \ | ||
159 | event_exit_##sname = { \ | ||
160 | .name = "sys_exit"#sname, \ | ||
161 | .system = "syscalls", \ | ||
162 | .event = &event_syscall_exit, \ | ||
163 | .raw_init = init_exit_##sname, \ | ||
164 | .regfunc = reg_event_syscall_exit, \ | ||
165 | .unregfunc = unreg_event_syscall_exit, \ | ||
166 | .data = "sys"#sname, \ | ||
167 | } | ||
168 | |||
115 | #define SYSCALL_METADATA(sname, nb) \ | 169 | #define SYSCALL_METADATA(sname, nb) \ |
116 | static const struct syscall_metadata __used \ | 170 | static const struct syscall_metadata __used \ |
117 | __attribute__((__aligned__(4))) \ | 171 | __attribute__((__aligned__(4))) \ |
@@ -121,7 +175,9 @@ struct perf_counter_attr; | |||
121 | .nb_args = nb, \ | 175 | .nb_args = nb, \ |
122 | .types = types_##sname, \ | 176 | .types = types_##sname, \ |
123 | .args = args_##sname, \ | 177 | .args = args_##sname, \ |
124 | } | 178 | }; \ |
179 | SYSCALL_TRACE_ENTER_EVENT(sname); \ | ||
180 | SYSCALL_TRACE_EXIT_EVENT(sname); | ||
125 | 181 | ||
126 | #define SYSCALL_DEFINE0(sname) \ | 182 | #define SYSCALL_DEFINE0(sname) \ |
127 | static const struct syscall_metadata __used \ | 183 | static const struct syscall_metadata __used \ |
@@ -131,8 +187,9 @@ struct perf_counter_attr; | |||
131 | .name = "sys_"#sname, \ | 187 | .name = "sys_"#sname, \ |
132 | .nb_args = 0, \ | 188 | .nb_args = 0, \ |
133 | }; \ | 189 | }; \ |
190 | SYSCALL_TRACE_ENTER_EVENT(_##sname); \ | ||
191 | SYSCALL_TRACE_EXIT_EVENT(_##sname); \ | ||
134 | asmlinkage long sys_##sname(void) | 192 | asmlinkage long sys_##sname(void) |
135 | |||
136 | #else | 193 | #else |
137 | #define SYSCALL_DEFINE0(name) asmlinkage long sys_##name(void) | 194 | #define SYSCALL_DEFINE0(name) asmlinkage long sys_##name(void) |
138 | #endif | 195 | #endif |
diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 3951d774de18..73fb8b4a9955 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h | |||
@@ -2,6 +2,8 @@ | |||
2 | #define _TRACE_SYSCALL_H | 2 | #define _TRACE_SYSCALL_H |
3 | 3 | ||
4 | #include <linux/tracepoint.h> | 4 | #include <linux/tracepoint.h> |
5 | #include <linux/unistd.h> | ||
6 | #include <linux/ftrace_event.h> | ||
5 | 7 | ||
6 | #include <asm/ptrace.h> | 8 | #include <asm/ptrace.h> |
7 | 9 | ||
@@ -40,15 +42,13 @@ struct syscall_metadata { | |||
40 | 42 | ||
41 | #ifdef CONFIG_FTRACE_SYSCALLS | 43 | #ifdef CONFIG_FTRACE_SYSCALLS |
42 | extern struct syscall_metadata *syscall_nr_to_meta(int nr); | 44 | extern struct syscall_metadata *syscall_nr_to_meta(int nr); |
43 | extern void start_ftrace_syscalls(void); | 45 | extern int syscall_name_to_nr(char *name); |
44 | extern void stop_ftrace_syscalls(void); | 46 | extern struct trace_event event_syscall_enter; |
45 | extern void ftrace_syscall_enter(struct pt_regs *regs); | 47 | extern struct trace_event event_syscall_exit; |
46 | extern void ftrace_syscall_exit(struct pt_regs *regs); | 48 | extern int reg_event_syscall_enter(void *ptr); |
47 | #else | 49 | extern void unreg_event_syscall_enter(void *ptr); |
48 | static inline void start_ftrace_syscalls(void) { } | 50 | extern int reg_event_syscall_exit(void *ptr); |
49 | static inline void stop_ftrace_syscalls(void) { } | 51 | extern void unreg_event_syscall_exit(void *ptr); |
50 | static inline void ftrace_syscall_enter(struct pt_regs *regs) { } | ||
51 | static inline void ftrace_syscall_exit(struct pt_regs *regs) { } | ||
52 | #endif | 52 | #endif |
53 | 53 | ||
54 | #endif /* _TRACE_SYSCALL_H */ | 54 | #endif /* _TRACE_SYSCALL_H */ |
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 08aed439feaf..c7ae25ee95d8 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c | |||
@@ -1,15 +1,16 @@ | |||
1 | #include <trace/syscall.h> | 1 | #include <trace/syscall.h> |
2 | #include <linux/kernel.h> | 2 | #include <linux/kernel.h> |
3 | #include <linux/ftrace.h> | ||
3 | #include <asm/syscall.h> | 4 | #include <asm/syscall.h> |
4 | 5 | ||
5 | #include "trace_output.h" | 6 | #include "trace_output.h" |
6 | #include "trace.h" | 7 | #include "trace.h" |
7 | 8 | ||
8 | /* Keep a counter of the syscall tracing users */ | ||
9 | static int refcount; | ||
10 | |||
11 | /* Prevent from races on thread flags toggling */ | ||
12 | static DEFINE_MUTEX(syscall_trace_lock); | 9 | static DEFINE_MUTEX(syscall_trace_lock); |
10 | static int sys_refcount_enter; | ||
11 | static int sys_refcount_exit; | ||
12 | static DECLARE_BITMAP(enabled_enter_syscalls, FTRACE_SYSCALL_MAX); | ||
13 | static DECLARE_BITMAP(enabled_exit_syscalls, FTRACE_SYSCALL_MAX); | ||
13 | 14 | ||
14 | /* Option to display the parameters types */ | 15 | /* Option to display the parameters types */ |
15 | enum { | 16 | enum { |
@@ -95,53 +96,7 @@ print_syscall_exit(struct trace_iterator *iter, int flags) | |||
95 | return TRACE_TYPE_HANDLED; | 96 | return TRACE_TYPE_HANDLED; |
96 | } | 97 | } |
97 | 98 | ||
98 | void start_ftrace_syscalls(void) | 99 | void ftrace_syscall_enter(struct pt_regs *regs, long id) |
99 | { | ||
100 | unsigned long flags; | ||
101 | struct task_struct *g, *t; | ||
102 | |||
103 | mutex_lock(&syscall_trace_lock); | ||
104 | |||
105 | /* Don't enable the flag on the tasks twice */ | ||
106 | if (++refcount != 1) | ||
107 | goto unlock; | ||
108 | |||
109 | read_lock_irqsave(&tasklist_lock, flags); | ||
110 | |||
111 | do_each_thread(g, t) { | ||
112 | set_tsk_thread_flag(t, TIF_SYSCALL_FTRACE); | ||
113 | } while_each_thread(g, t); | ||
114 | |||
115 | read_unlock_irqrestore(&tasklist_lock, flags); | ||
116 | |||
117 | unlock: | ||
118 | mutex_unlock(&syscall_trace_lock); | ||
119 | } | ||
120 | |||
121 | void stop_ftrace_syscalls(void) | ||
122 | { | ||
123 | unsigned long flags; | ||
124 | struct task_struct *g, *t; | ||
125 | |||
126 | mutex_lock(&syscall_trace_lock); | ||
127 | |||
128 | /* There are perhaps still some users */ | ||
129 | if (--refcount) | ||
130 | goto unlock; | ||
131 | |||
132 | read_lock_irqsave(&tasklist_lock, flags); | ||
133 | |||
134 | do_each_thread(g, t) { | ||
135 | clear_tsk_thread_flag(t, TIF_SYSCALL_FTRACE); | ||
136 | } while_each_thread(g, t); | ||
137 | |||
138 | read_unlock_irqrestore(&tasklist_lock, flags); | ||
139 | |||
140 | unlock: | ||
141 | mutex_unlock(&syscall_trace_lock); | ||
142 | } | ||
143 | |||
144 | void ftrace_syscall_enter(struct pt_regs *regs) | ||
145 | { | 100 | { |
146 | struct syscall_trace_enter *entry; | 101 | struct syscall_trace_enter *entry; |
147 | struct syscall_metadata *sys_data; | 102 | struct syscall_metadata *sys_data; |
@@ -150,6 +105,8 @@ void ftrace_syscall_enter(struct pt_regs *regs) | |||
150 | int syscall_nr; | 105 | int syscall_nr; |
151 | 106 | ||
152 | syscall_nr = syscall_get_nr(current, regs); | 107 | syscall_nr = syscall_get_nr(current, regs); |
108 | if (!test_bit(syscall_nr, enabled_enter_syscalls)) | ||
109 | return; | ||
153 | 110 | ||
154 | sys_data = syscall_nr_to_meta(syscall_nr); | 111 | sys_data = syscall_nr_to_meta(syscall_nr); |
155 | if (!sys_data) | 112 | if (!sys_data) |
@@ -170,7 +127,7 @@ void ftrace_syscall_enter(struct pt_regs *regs) | |||
170 | trace_wake_up(); | 127 | trace_wake_up(); |
171 | } | 128 | } |
172 | 129 | ||
173 | void ftrace_syscall_exit(struct pt_regs *regs) | 130 | void ftrace_syscall_exit(struct pt_regs *regs, long ret) |
174 | { | 131 | { |
175 | struct syscall_trace_exit *entry; | 132 | struct syscall_trace_exit *entry; |
176 | struct syscall_metadata *sys_data; | 133 | struct syscall_metadata *sys_data; |
@@ -178,6 +135,8 @@ void ftrace_syscall_exit(struct pt_regs *regs) | |||
178 | int syscall_nr; | 135 | int syscall_nr; |
179 | 136 | ||
180 | syscall_nr = syscall_get_nr(current, regs); | 137 | syscall_nr = syscall_get_nr(current, regs); |
138 | if (!test_bit(syscall_nr, enabled_exit_syscalls)) | ||
139 | return; | ||
181 | 140 | ||
182 | sys_data = syscall_nr_to_meta(syscall_nr); | 141 | sys_data = syscall_nr_to_meta(syscall_nr); |
183 | if (!sys_data) | 142 | if (!sys_data) |
@@ -196,54 +155,94 @@ void ftrace_syscall_exit(struct pt_regs *regs) | |||
196 | trace_wake_up(); | 155 | trace_wake_up(); |
197 | } | 156 | } |
198 | 157 | ||
199 | static int init_syscall_tracer(struct trace_array *tr) | 158 | int reg_event_syscall_enter(void *ptr) |
200 | { | 159 | { |
201 | start_ftrace_syscalls(); | 160 | int ret = 0; |
202 | 161 | int num; | |
203 | return 0; | 162 | char *name; |
163 | |||
164 | name = (char *)ptr; | ||
165 | num = syscall_name_to_nr(name); | ||
166 | if (num < 0 || num >= FTRACE_SYSCALL_MAX) | ||
167 | return -ENOSYS; | ||
168 | mutex_lock(&syscall_trace_lock); | ||
169 | if (!sys_refcount_enter) | ||
170 | ret = register_trace_syscall_enter(ftrace_syscall_enter); | ||
171 | if (ret) { | ||
172 | pr_info("event trace: Could not activate" | ||
173 | "syscall entry trace point"); | ||
174 | } else { | ||
175 | set_bit(num, enabled_enter_syscalls); | ||
176 | sys_refcount_enter++; | ||
177 | } | ||
178 | mutex_unlock(&syscall_trace_lock); | ||
179 | return ret; | ||
204 | } | 180 | } |
205 | 181 | ||
206 | static void reset_syscall_tracer(struct trace_array *tr) | 182 | void unreg_event_syscall_enter(void *ptr) |
207 | { | 183 | { |
208 | stop_ftrace_syscalls(); | 184 | int num; |
209 | tracing_reset_online_cpus(tr); | 185 | char *name; |
210 | } | ||
211 | |||
212 | static struct trace_event syscall_enter_event = { | ||
213 | .type = TRACE_SYSCALL_ENTER, | ||
214 | .trace = print_syscall_enter, | ||
215 | }; | ||
216 | |||
217 | static struct trace_event syscall_exit_event = { | ||
218 | .type = TRACE_SYSCALL_EXIT, | ||
219 | .trace = print_syscall_exit, | ||
220 | }; | ||
221 | 186 | ||
222 | static struct tracer syscall_tracer __read_mostly = { | 187 | name = (char *)ptr; |
223 | .name = "syscall", | 188 | num = syscall_name_to_nr(name); |
224 | .init = init_syscall_tracer, | 189 | if (num < 0 || num >= FTRACE_SYSCALL_MAX) |
225 | .reset = reset_syscall_tracer, | 190 | return; |
226 | .flags = &syscalls_flags, | 191 | mutex_lock(&syscall_trace_lock); |
227 | }; | 192 | sys_refcount_enter--; |
193 | clear_bit(num, enabled_enter_syscalls); | ||
194 | if (!sys_refcount_enter) | ||
195 | unregister_trace_syscall_enter(ftrace_syscall_enter); | ||
196 | mutex_unlock(&syscall_trace_lock); | ||
197 | } | ||
228 | 198 | ||
229 | __init int register_ftrace_syscalls(void) | 199 | int reg_event_syscall_exit(void *ptr) |
230 | { | 200 | { |
231 | int ret; | 201 | int ret = 0; |
232 | 202 | int num; | |
233 | ret = register_ftrace_event(&syscall_enter_event); | 203 | char *name; |
234 | if (!ret) { | 204 | |
235 | printk(KERN_WARNING "event %d failed to register\n", | 205 | name = (char *)ptr; |
236 | syscall_enter_event.type); | 206 | num = syscall_name_to_nr(name); |
237 | WARN_ON_ONCE(1); | 207 | if (num < 0 || num >= FTRACE_SYSCALL_MAX) |
208 | return -ENOSYS; | ||
209 | mutex_lock(&syscall_trace_lock); | ||
210 | if (!sys_refcount_exit) | ||
211 | ret = register_trace_syscall_exit(ftrace_syscall_exit); | ||
212 | if (ret) { | ||
213 | pr_info("event trace: Could not activate" | ||
214 | "syscall exit trace point"); | ||
215 | } else { | ||
216 | set_bit(num, enabled_exit_syscalls); | ||
217 | sys_refcount_exit++; | ||
238 | } | 218 | } |
219 | mutex_unlock(&syscall_trace_lock); | ||
220 | return ret; | ||
221 | } | ||
239 | 222 | ||
240 | ret = register_ftrace_event(&syscall_exit_event); | 223 | void unreg_event_syscall_exit(void *ptr) |
241 | if (!ret) { | 224 | { |
242 | printk(KERN_WARNING "event %d failed to register\n", | 225 | int num; |
243 | syscall_exit_event.type); | 226 | char *name; |
244 | WARN_ON_ONCE(1); | ||
245 | } | ||
246 | 227 | ||
247 | return register_tracer(&syscall_tracer); | 228 | name = (char *)ptr; |
229 | num = syscall_name_to_nr(name); | ||
230 | if (num < 0 || num >= FTRACE_SYSCALL_MAX) | ||
231 | return; | ||
232 | mutex_lock(&syscall_trace_lock); | ||
233 | sys_refcount_exit--; | ||
234 | clear_bit(num, enabled_exit_syscalls); | ||
235 | if (!sys_refcount_exit) | ||
236 | unregister_trace_syscall_exit(ftrace_syscall_exit); | ||
237 | mutex_unlock(&syscall_trace_lock); | ||
248 | } | 238 | } |
249 | device_initcall(register_ftrace_syscalls); | 239 | |
240 | struct trace_event event_syscall_enter = { | ||
241 | .trace = print_syscall_enter, | ||
242 | .type = TRACE_SYSCALL_ENTER | ||
243 | }; | ||
244 | |||
245 | struct trace_event event_syscall_exit = { | ||
246 | .trace = print_syscall_exit, | ||
247 | .type = TRACE_SYSCALL_EXIT | ||
248 | }; | ||