aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason Baron <jbaron@redhat.com>2009-08-10 16:52:47 -0400
committerFrederic Weisbecker <fweisbec@gmail.com>2009-08-11 14:35:28 -0400
commitfb34a08c3469b2be9eae626ccb96476b4687b810 (patch)
treef308cd109de2c967a1f8bd485eb9c398992a9414
parent69fd4f0eb2ececbf8ade55e31a933e174965745e (diff)
tracing: Add trace events for each syscall entry/exit
Layer Frederic's syscall tracer on tracepoints. We create trace events via hooking into the SYSCALL_DEFINE macros. This allows us to individually toggle syscall entry and exit points on/off. Signed-off-by: Jason Baron <jbaron@redhat.com> Cc: Lai Jiangshan <laijs@cn.fujitsu.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> Cc: Jiaying Zhang <jiayingz@google.com> Cc: Martin Bligh <mbligh@google.com> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Masami Hiramatsu <mhiramat@redhat.com> Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
-rw-r--r--include/linux/syscalls.h61
-rw-r--r--include/trace/syscall.h18
-rw-r--r--kernel/trace/trace_syscalls.c183
3 files changed, 159 insertions, 103 deletions
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 80de7003d8c2..5e5b4d33a31c 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -64,6 +64,7 @@ struct perf_counter_attr;
64#include <linux/sem.h> 64#include <linux/sem.h>
65#include <asm/siginfo.h> 65#include <asm/siginfo.h>
66#include <asm/signal.h> 66#include <asm/signal.h>
67#include <linux/unistd.h>
67#include <linux/quota.h> 68#include <linux/quota.h>
68#include <linux/key.h> 69#include <linux/key.h>
69#include <trace/syscall.h> 70#include <trace/syscall.h>
@@ -112,6 +113,59 @@ struct perf_counter_attr;
112#define __SC_STR_TDECL5(t, a, ...) #t, __SC_STR_TDECL4(__VA_ARGS__) 113#define __SC_STR_TDECL5(t, a, ...) #t, __SC_STR_TDECL4(__VA_ARGS__)
113#define __SC_STR_TDECL6(t, a, ...) #t, __SC_STR_TDECL5(__VA_ARGS__) 114#define __SC_STR_TDECL6(t, a, ...) #t, __SC_STR_TDECL5(__VA_ARGS__)
114 115
116
117#define SYSCALL_TRACE_ENTER_EVENT(sname) \
118 static struct ftrace_event_call event_enter_##sname; \
119 static int init_enter_##sname(void) \
120 { \
121 int num; \
122 num = syscall_name_to_nr("sys"#sname); \
123 if (num < 0) \
124 return -ENOSYS; \
125 register_ftrace_event(&event_syscall_enter); \
126 INIT_LIST_HEAD(&event_enter_##sname.fields); \
127 init_preds(&event_enter_##sname); \
128 return 0; \
129 } \
130 static struct ftrace_event_call __used \
131 __attribute__((__aligned__(4))) \
132 __attribute__((section("_ftrace_events"))) \
133 event_enter_##sname = { \
134 .name = "sys_enter"#sname, \
135 .system = "syscalls", \
136 .event = &event_syscall_enter, \
137 .raw_init = init_enter_##sname, \
138 .regfunc = reg_event_syscall_enter, \
139 .unregfunc = unreg_event_syscall_enter, \
140 .data = "sys"#sname, \
141 }
142
143#define SYSCALL_TRACE_EXIT_EVENT(sname) \
144 static struct ftrace_event_call event_exit_##sname; \
145 static int init_exit_##sname(void) \
146 { \
147 int num; \
148 num = syscall_name_to_nr("sys"#sname); \
149 if (num < 0) \
150 return -ENOSYS; \
151 register_ftrace_event(&event_syscall_exit); \
152 INIT_LIST_HEAD(&event_exit_##sname.fields); \
153 init_preds(&event_exit_##sname); \
154 return 0; \
155 } \
156 static struct ftrace_event_call __used \
157 __attribute__((__aligned__(4))) \
158 __attribute__((section("_ftrace_events"))) \
159 event_exit_##sname = { \
160 .name = "sys_exit"#sname, \
161 .system = "syscalls", \
162 .event = &event_syscall_exit, \
163 .raw_init = init_exit_##sname, \
164 .regfunc = reg_event_syscall_exit, \
165 .unregfunc = unreg_event_syscall_exit, \
166 .data = "sys"#sname, \
167 }
168
115#define SYSCALL_METADATA(sname, nb) \ 169#define SYSCALL_METADATA(sname, nb) \
116 static const struct syscall_metadata __used \ 170 static const struct syscall_metadata __used \
117 __attribute__((__aligned__(4))) \ 171 __attribute__((__aligned__(4))) \
@@ -121,7 +175,9 @@ struct perf_counter_attr;
121 .nb_args = nb, \ 175 .nb_args = nb, \
122 .types = types_##sname, \ 176 .types = types_##sname, \
123 .args = args_##sname, \ 177 .args = args_##sname, \
124 } 178 }; \
179 SYSCALL_TRACE_ENTER_EVENT(sname); \
180 SYSCALL_TRACE_EXIT_EVENT(sname);
125 181
126#define SYSCALL_DEFINE0(sname) \ 182#define SYSCALL_DEFINE0(sname) \
127 static const struct syscall_metadata __used \ 183 static const struct syscall_metadata __used \
@@ -131,8 +187,9 @@ struct perf_counter_attr;
131 .name = "sys_"#sname, \ 187 .name = "sys_"#sname, \
132 .nb_args = 0, \ 188 .nb_args = 0, \
133 }; \ 189 }; \
190 SYSCALL_TRACE_ENTER_EVENT(_##sname); \
191 SYSCALL_TRACE_EXIT_EVENT(_##sname); \
134 asmlinkage long sys_##sname(void) 192 asmlinkage long sys_##sname(void)
135
136#else 193#else
137#define SYSCALL_DEFINE0(name) asmlinkage long sys_##name(void) 194#define SYSCALL_DEFINE0(name) asmlinkage long sys_##name(void)
138#endif 195#endif
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index 3951d774de18..73fb8b4a9955 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -2,6 +2,8 @@
2#define _TRACE_SYSCALL_H 2#define _TRACE_SYSCALL_H
3 3
4#include <linux/tracepoint.h> 4#include <linux/tracepoint.h>
5#include <linux/unistd.h>
6#include <linux/ftrace_event.h>
5 7
6#include <asm/ptrace.h> 8#include <asm/ptrace.h>
7 9
@@ -40,15 +42,13 @@ struct syscall_metadata {
40 42
41#ifdef CONFIG_FTRACE_SYSCALLS 43#ifdef CONFIG_FTRACE_SYSCALLS
42extern struct syscall_metadata *syscall_nr_to_meta(int nr); 44extern struct syscall_metadata *syscall_nr_to_meta(int nr);
43extern void start_ftrace_syscalls(void); 45extern int syscall_name_to_nr(char *name);
44extern void stop_ftrace_syscalls(void); 46extern struct trace_event event_syscall_enter;
45extern void ftrace_syscall_enter(struct pt_regs *regs); 47extern struct trace_event event_syscall_exit;
46extern void ftrace_syscall_exit(struct pt_regs *regs); 48extern int reg_event_syscall_enter(void *ptr);
47#else 49extern void unreg_event_syscall_enter(void *ptr);
48static inline void start_ftrace_syscalls(void) { } 50extern int reg_event_syscall_exit(void *ptr);
49static inline void stop_ftrace_syscalls(void) { } 51extern void unreg_event_syscall_exit(void *ptr);
50static inline void ftrace_syscall_enter(struct pt_regs *regs) { }
51static inline void ftrace_syscall_exit(struct pt_regs *regs) { }
52#endif 52#endif
53 53
54#endif /* _TRACE_SYSCALL_H */ 54#endif /* _TRACE_SYSCALL_H */
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 08aed439feaf..c7ae25ee95d8 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -1,15 +1,16 @@
1#include <trace/syscall.h> 1#include <trace/syscall.h>
2#include <linux/kernel.h> 2#include <linux/kernel.h>
3#include <linux/ftrace.h>
3#include <asm/syscall.h> 4#include <asm/syscall.h>
4 5
5#include "trace_output.h" 6#include "trace_output.h"
6#include "trace.h" 7#include "trace.h"
7 8
8/* Keep a counter of the syscall tracing users */
9static int refcount;
10
11/* Prevent from races on thread flags toggling */
12static DEFINE_MUTEX(syscall_trace_lock); 9static DEFINE_MUTEX(syscall_trace_lock);
10static int sys_refcount_enter;
11static int sys_refcount_exit;
12static DECLARE_BITMAP(enabled_enter_syscalls, FTRACE_SYSCALL_MAX);
13static DECLARE_BITMAP(enabled_exit_syscalls, FTRACE_SYSCALL_MAX);
13 14
14/* Option to display the parameters types */ 15/* Option to display the parameters types */
15enum { 16enum {
@@ -95,53 +96,7 @@ print_syscall_exit(struct trace_iterator *iter, int flags)
95 return TRACE_TYPE_HANDLED; 96 return TRACE_TYPE_HANDLED;
96} 97}
97 98
98void start_ftrace_syscalls(void) 99void ftrace_syscall_enter(struct pt_regs *regs, long id)
99{
100 unsigned long flags;
101 struct task_struct *g, *t;
102
103 mutex_lock(&syscall_trace_lock);
104
105 /* Don't enable the flag on the tasks twice */
106 if (++refcount != 1)
107 goto unlock;
108
109 read_lock_irqsave(&tasklist_lock, flags);
110
111 do_each_thread(g, t) {
112 set_tsk_thread_flag(t, TIF_SYSCALL_FTRACE);
113 } while_each_thread(g, t);
114
115 read_unlock_irqrestore(&tasklist_lock, flags);
116
117unlock:
118 mutex_unlock(&syscall_trace_lock);
119}
120
121void stop_ftrace_syscalls(void)
122{
123 unsigned long flags;
124 struct task_struct *g, *t;
125
126 mutex_lock(&syscall_trace_lock);
127
128 /* There are perhaps still some users */
129 if (--refcount)
130 goto unlock;
131
132 read_lock_irqsave(&tasklist_lock, flags);
133
134 do_each_thread(g, t) {
135 clear_tsk_thread_flag(t, TIF_SYSCALL_FTRACE);
136 } while_each_thread(g, t);
137
138 read_unlock_irqrestore(&tasklist_lock, flags);
139
140unlock:
141 mutex_unlock(&syscall_trace_lock);
142}
143
144void ftrace_syscall_enter(struct pt_regs *regs)
145{ 100{
146 struct syscall_trace_enter *entry; 101 struct syscall_trace_enter *entry;
147 struct syscall_metadata *sys_data; 102 struct syscall_metadata *sys_data;
@@ -150,6 +105,8 @@ void ftrace_syscall_enter(struct pt_regs *regs)
150 int syscall_nr; 105 int syscall_nr;
151 106
152 syscall_nr = syscall_get_nr(current, regs); 107 syscall_nr = syscall_get_nr(current, regs);
108 if (!test_bit(syscall_nr, enabled_enter_syscalls))
109 return;
153 110
154 sys_data = syscall_nr_to_meta(syscall_nr); 111 sys_data = syscall_nr_to_meta(syscall_nr);
155 if (!sys_data) 112 if (!sys_data)
@@ -170,7 +127,7 @@ void ftrace_syscall_enter(struct pt_regs *regs)
170 trace_wake_up(); 127 trace_wake_up();
171} 128}
172 129
173void ftrace_syscall_exit(struct pt_regs *regs) 130void ftrace_syscall_exit(struct pt_regs *regs, long ret)
174{ 131{
175 struct syscall_trace_exit *entry; 132 struct syscall_trace_exit *entry;
176 struct syscall_metadata *sys_data; 133 struct syscall_metadata *sys_data;
@@ -178,6 +135,8 @@ void ftrace_syscall_exit(struct pt_regs *regs)
178 int syscall_nr; 135 int syscall_nr;
179 136
180 syscall_nr = syscall_get_nr(current, regs); 137 syscall_nr = syscall_get_nr(current, regs);
138 if (!test_bit(syscall_nr, enabled_exit_syscalls))
139 return;
181 140
182 sys_data = syscall_nr_to_meta(syscall_nr); 141 sys_data = syscall_nr_to_meta(syscall_nr);
183 if (!sys_data) 142 if (!sys_data)
@@ -196,54 +155,94 @@ void ftrace_syscall_exit(struct pt_regs *regs)
196 trace_wake_up(); 155 trace_wake_up();
197} 156}
198 157
199static int init_syscall_tracer(struct trace_array *tr) 158int reg_event_syscall_enter(void *ptr)
200{ 159{
201 start_ftrace_syscalls(); 160 int ret = 0;
202 161 int num;
203 return 0; 162 char *name;
163
164 name = (char *)ptr;
165 num = syscall_name_to_nr(name);
166 if (num < 0 || num >= FTRACE_SYSCALL_MAX)
167 return -ENOSYS;
168 mutex_lock(&syscall_trace_lock);
169 if (!sys_refcount_enter)
170 ret = register_trace_syscall_enter(ftrace_syscall_enter);
171 if (ret) {
172 pr_info("event trace: Could not activate"
173 "syscall entry trace point");
174 } else {
175 set_bit(num, enabled_enter_syscalls);
176 sys_refcount_enter++;
177 }
178 mutex_unlock(&syscall_trace_lock);
179 return ret;
204} 180}
205 181
206static void reset_syscall_tracer(struct trace_array *tr) 182void unreg_event_syscall_enter(void *ptr)
207{ 183{
208 stop_ftrace_syscalls(); 184 int num;
209 tracing_reset_online_cpus(tr); 185 char *name;
210}
211
212static struct trace_event syscall_enter_event = {
213 .type = TRACE_SYSCALL_ENTER,
214 .trace = print_syscall_enter,
215};
216
217static struct trace_event syscall_exit_event = {
218 .type = TRACE_SYSCALL_EXIT,
219 .trace = print_syscall_exit,
220};
221 186
222static struct tracer syscall_tracer __read_mostly = { 187 name = (char *)ptr;
223 .name = "syscall", 188 num = syscall_name_to_nr(name);
224 .init = init_syscall_tracer, 189 if (num < 0 || num >= FTRACE_SYSCALL_MAX)
225 .reset = reset_syscall_tracer, 190 return;
226 .flags = &syscalls_flags, 191 mutex_lock(&syscall_trace_lock);
227}; 192 sys_refcount_enter--;
193 clear_bit(num, enabled_enter_syscalls);
194 if (!sys_refcount_enter)
195 unregister_trace_syscall_enter(ftrace_syscall_enter);
196 mutex_unlock(&syscall_trace_lock);
197}
228 198
229__init int register_ftrace_syscalls(void) 199int reg_event_syscall_exit(void *ptr)
230{ 200{
231 int ret; 201 int ret = 0;
232 202 int num;
233 ret = register_ftrace_event(&syscall_enter_event); 203 char *name;
234 if (!ret) { 204
235 printk(KERN_WARNING "event %d failed to register\n", 205 name = (char *)ptr;
236 syscall_enter_event.type); 206 num = syscall_name_to_nr(name);
237 WARN_ON_ONCE(1); 207 if (num < 0 || num >= FTRACE_SYSCALL_MAX)
208 return -ENOSYS;
209 mutex_lock(&syscall_trace_lock);
210 if (!sys_refcount_exit)
211 ret = register_trace_syscall_exit(ftrace_syscall_exit);
212 if (ret) {
213 pr_info("event trace: Could not activate"
214 "syscall exit trace point");
215 } else {
216 set_bit(num, enabled_exit_syscalls);
217 sys_refcount_exit++;
238 } 218 }
219 mutex_unlock(&syscall_trace_lock);
220 return ret;
221}
239 222
240 ret = register_ftrace_event(&syscall_exit_event); 223void unreg_event_syscall_exit(void *ptr)
241 if (!ret) { 224{
242 printk(KERN_WARNING "event %d failed to register\n", 225 int num;
243 syscall_exit_event.type); 226 char *name;
244 WARN_ON_ONCE(1);
245 }
246 227
247 return register_tracer(&syscall_tracer); 228 name = (char *)ptr;
229 num = syscall_name_to_nr(name);
230 if (num < 0 || num >= FTRACE_SYSCALL_MAX)
231 return;
232 mutex_lock(&syscall_trace_lock);
233 sys_refcount_exit--;
234 clear_bit(num, enabled_exit_syscalls);
235 if (!sys_refcount_exit)
236 unregister_trace_syscall_exit(ftrace_syscall_exit);
237 mutex_unlock(&syscall_trace_lock);
248} 238}
249device_initcall(register_ftrace_syscalls); 239
240struct trace_event event_syscall_enter = {
241 .trace = print_syscall_enter,
242 .type = TRACE_SYSCALL_ENTER
243};
244
245struct trace_event event_syscall_exit = {
246 .trace = print_syscall_exit,
247 .type = TRACE_SYSCALL_EXIT
248};