aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig17
-rw-r--r--kernel/trace/Makefile1
-rw-r--r--kernel/trace/trace.h23
-rw-r--r--kernel/trace/trace_event_profile.c45
-rw-r--r--kernel/trace/trace_events.c144
-rw-r--r--kernel/trace/trace_export.c14
-rw-r--r--kernel/trace/trace_kprobe.c1513
-rw-r--r--kernel/trace/trace_syscalls.c60
8 files changed, 1725 insertions, 92 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index b416512ad17f..f05671609a89 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -428,6 +428,23 @@ config BLK_DEV_IO_TRACE
428 428
429 If unsure, say N. 429 If unsure, say N.
430 430
431config KPROBE_EVENT
432 depends on KPROBES
433 depends on X86
434 bool "Enable kprobes-based dynamic events"
435 select TRACING
436 default y
437 help
438 This allows the user to add tracing events (similar to tracepoints) on the fly
439 via the ftrace interface. See Documentation/trace/kprobetrace.txt
440 for more details.
441
442 Those events can be inserted wherever kprobes can probe, and record
443 various register and memory values.
444
445 This option is also required by perf-probe subcommand of perf tools. If
446 you want to use perf tools, this option is strongly recommended.
447
431config DYNAMIC_FTRACE 448config DYNAMIC_FTRACE
432 bool "enable/disable ftrace tracepoints dynamically" 449 bool "enable/disable ftrace tracepoints dynamically"
433 depends on FUNCTION_TRACER 450 depends on FUNCTION_TRACER
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 26f03ac07c2b..edc3a3cca1a1 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -53,6 +53,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_export.o
53obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o 53obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
54obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o 54obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
55obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o 55obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
56obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
56obj-$(CONFIG_EVENT_TRACING) += power-traces.o 57obj-$(CONFIG_EVENT_TRACING) += power-traces.o
57 58
58libftrace-y := ftrace.o 59libftrace-y := ftrace.o
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 4959ada9e0bb..b4e4212e66d7 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -101,6 +101,29 @@ struct syscall_trace_exit {
101 unsigned long ret; 101 unsigned long ret;
102}; 102};
103 103
104struct kprobe_trace_entry {
105 struct trace_entry ent;
106 unsigned long ip;
107 int nargs;
108 unsigned long args[];
109};
110
111#define SIZEOF_KPROBE_TRACE_ENTRY(n) \
112 (offsetof(struct kprobe_trace_entry, args) + \
113 (sizeof(unsigned long) * (n)))
114
115struct kretprobe_trace_entry {
116 struct trace_entry ent;
117 unsigned long func;
118 unsigned long ret_ip;
119 int nargs;
120 unsigned long args[];
121};
122
123#define SIZEOF_KRETPROBE_TRACE_ENTRY(n) \
124 (offsetof(struct kretprobe_trace_entry, args) + \
125 (sizeof(unsigned long) * (n)))
126
104/* 127/*
105 * trace_flag_type is an enumeration that holds different 128 * trace_flag_type is an enumeration that holds different
106 * states when a trace occurs. These are: 129 * states when a trace occurs. These are:
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index 8d5c171cc998..e0d351b01f5a 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -8,44 +8,39 @@
8#include <linux/module.h> 8#include <linux/module.h>
9#include "trace.h" 9#include "trace.h"
10 10
11/*
12 * We can't use a size but a type in alloc_percpu()
13 * So let's create a dummy type that matches the desired size
14 */
15typedef struct {char buf[FTRACE_MAX_PROFILE_SIZE];} profile_buf_t;
16 11
17char *trace_profile_buf; 12struct perf_trace_buf *perf_trace_buf;
18EXPORT_SYMBOL_GPL(trace_profile_buf); 13EXPORT_SYMBOL_GPL(perf_trace_buf);
19 14
20char *trace_profile_buf_nmi; 15struct perf_trace_buf *perf_trace_buf_nmi;
21EXPORT_SYMBOL_GPL(trace_profile_buf_nmi); 16EXPORT_SYMBOL_GPL(perf_trace_buf_nmi);
22 17
23/* Count the events in use (per event id, not per instance) */ 18/* Count the events in use (per event id, not per instance) */
24static int total_profile_count; 19static int total_profile_count;
25 20
26static int ftrace_profile_enable_event(struct ftrace_event_call *event) 21static int ftrace_profile_enable_event(struct ftrace_event_call *event)
27{ 22{
28 char *buf; 23 struct perf_trace_buf *buf;
29 int ret = -ENOMEM; 24 int ret = -ENOMEM;
30 25
31 if (atomic_inc_return(&event->profile_count)) 26 if (atomic_inc_return(&event->profile_count))
32 return 0; 27 return 0;
33 28
34 if (!total_profile_count) { 29 if (!total_profile_count) {
35 buf = (char *)alloc_percpu(profile_buf_t); 30 buf = alloc_percpu(struct perf_trace_buf);
36 if (!buf) 31 if (!buf)
37 goto fail_buf; 32 goto fail_buf;
38 33
39 rcu_assign_pointer(trace_profile_buf, buf); 34 rcu_assign_pointer(perf_trace_buf, buf);
40 35
41 buf = (char *)alloc_percpu(profile_buf_t); 36 buf = alloc_percpu(struct perf_trace_buf);
42 if (!buf) 37 if (!buf)
43 goto fail_buf_nmi; 38 goto fail_buf_nmi;
44 39
45 rcu_assign_pointer(trace_profile_buf_nmi, buf); 40 rcu_assign_pointer(perf_trace_buf_nmi, buf);
46 } 41 }
47 42
48 ret = event->profile_enable(); 43 ret = event->profile_enable(event);
49 if (!ret) { 44 if (!ret) {
50 total_profile_count++; 45 total_profile_count++;
51 return 0; 46 return 0;
@@ -53,10 +48,10 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event)
53 48
54fail_buf_nmi: 49fail_buf_nmi:
55 if (!total_profile_count) { 50 if (!total_profile_count) {
56 free_percpu(trace_profile_buf_nmi); 51 free_percpu(perf_trace_buf_nmi);
57 free_percpu(trace_profile_buf); 52 free_percpu(perf_trace_buf);
58 trace_profile_buf_nmi = NULL; 53 perf_trace_buf_nmi = NULL;
59 trace_profile_buf = NULL; 54 perf_trace_buf = NULL;
60 } 55 }
61fail_buf: 56fail_buf:
62 atomic_dec(&event->profile_count); 57 atomic_dec(&event->profile_count);
@@ -84,19 +79,19 @@ int ftrace_profile_enable(int event_id)
84 79
85static void ftrace_profile_disable_event(struct ftrace_event_call *event) 80static void ftrace_profile_disable_event(struct ftrace_event_call *event)
86{ 81{
87 char *buf, *nmi_buf; 82 struct perf_trace_buf *buf, *nmi_buf;
88 83
89 if (!atomic_add_negative(-1, &event->profile_count)) 84 if (!atomic_add_negative(-1, &event->profile_count))
90 return; 85 return;
91 86
92 event->profile_disable(); 87 event->profile_disable(event);
93 88
94 if (!--total_profile_count) { 89 if (!--total_profile_count) {
95 buf = trace_profile_buf; 90 buf = perf_trace_buf;
96 rcu_assign_pointer(trace_profile_buf, NULL); 91 rcu_assign_pointer(perf_trace_buf, NULL);
97 92
98 nmi_buf = trace_profile_buf_nmi; 93 nmi_buf = perf_trace_buf_nmi;
99 rcu_assign_pointer(trace_profile_buf_nmi, NULL); 94 rcu_assign_pointer(perf_trace_buf_nmi, NULL);
100 95
101 /* 96 /*
102 * Ensure every events in profiling have finished before 97 * Ensure every events in profiling have finished before
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 7c18d154ea28..1d18315dc836 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -93,9 +93,7 @@ int trace_define_common_fields(struct ftrace_event_call *call)
93} 93}
94EXPORT_SYMBOL_GPL(trace_define_common_fields); 94EXPORT_SYMBOL_GPL(trace_define_common_fields);
95 95
96#ifdef CONFIG_MODULES 96void trace_destroy_fields(struct ftrace_event_call *call)
97
98static void trace_destroy_fields(struct ftrace_event_call *call)
99{ 97{
100 struct ftrace_event_field *field, *next; 98 struct ftrace_event_field *field, *next;
101 99
@@ -107,8 +105,6 @@ static void trace_destroy_fields(struct ftrace_event_call *call)
107 } 105 }
108} 106}
109 107
110#endif /* CONFIG_MODULES */
111
112static void ftrace_event_enable_disable(struct ftrace_event_call *call, 108static void ftrace_event_enable_disable(struct ftrace_event_call *call,
113 int enable) 109 int enable)
114{ 110{
@@ -117,14 +113,14 @@ static void ftrace_event_enable_disable(struct ftrace_event_call *call,
117 if (call->enabled) { 113 if (call->enabled) {
118 call->enabled = 0; 114 call->enabled = 0;
119 tracing_stop_cmdline_record(); 115 tracing_stop_cmdline_record();
120 call->unregfunc(call->data); 116 call->unregfunc(call);
121 } 117 }
122 break; 118 break;
123 case 1: 119 case 1:
124 if (!call->enabled) { 120 if (!call->enabled) {
125 call->enabled = 1; 121 call->enabled = 1;
126 tracing_start_cmdline_record(); 122 tracing_start_cmdline_record();
127 call->regfunc(call->data); 123 call->regfunc(call);
128 } 124 }
129 break; 125 break;
130 } 126 }
@@ -937,27 +933,46 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
937 return 0; 933 return 0;
938} 934}
939 935
940#define for_each_event(event, start, end) \ 936static int __trace_add_event_call(struct ftrace_event_call *call)
941 for (event = start; \ 937{
942 (unsigned long)event < (unsigned long)end; \ 938 struct dentry *d_events;
943 event++) 939 int ret;
944 940
945#ifdef CONFIG_MODULES 941 if (!call->name)
942 return -EINVAL;
946 943
947static LIST_HEAD(ftrace_module_file_list); 944 if (call->raw_init) {
945 ret = call->raw_init(call);
946 if (ret < 0) {
947 if (ret != -ENOSYS)
948 pr_warning("Could not initialize trace "
949 "events/%s\n", call->name);
950 return ret;
951 }
952 }
948 953
949/* 954 d_events = event_trace_events_dir();
950 * Modules must own their file_operations to keep up with 955 if (!d_events)
951 * reference counting. 956 return -ENOENT;
952 */ 957
953struct ftrace_module_file_ops { 958 ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
954 struct list_head list; 959 &ftrace_enable_fops, &ftrace_event_filter_fops,
955 struct module *mod; 960 &ftrace_event_format_fops);
956 struct file_operations id; 961 if (!ret)
957 struct file_operations enable; 962 list_add(&call->list, &ftrace_events);
958 struct file_operations format; 963
959 struct file_operations filter; 964 return ret;
960}; 965}
966
967/* Add an additional event_call dynamically */
968int trace_add_event_call(struct ftrace_event_call *call)
969{
970 int ret;
971 mutex_lock(&event_mutex);
972 ret = __trace_add_event_call(call);
973 mutex_unlock(&event_mutex);
974 return ret;
975}
961 976
962static void remove_subsystem_dir(const char *name) 977static void remove_subsystem_dir(const char *name)
963{ 978{
@@ -985,6 +1000,53 @@ static void remove_subsystem_dir(const char *name)
985 } 1000 }
986} 1001}
987 1002
1003/*
1004 * Must be called under locking both of event_mutex and trace_event_mutex.
1005 */
1006static void __trace_remove_event_call(struct ftrace_event_call *call)
1007{
1008 ftrace_event_enable_disable(call, 0);
1009 if (call->event)
1010 __unregister_ftrace_event(call->event);
1011 debugfs_remove_recursive(call->dir);
1012 list_del(&call->list);
1013 trace_destroy_fields(call);
1014 destroy_preds(call);
1015 remove_subsystem_dir(call->system);
1016}
1017
1018/* Remove an event_call */
1019void trace_remove_event_call(struct ftrace_event_call *call)
1020{
1021 mutex_lock(&event_mutex);
1022 down_write(&trace_event_mutex);
1023 __trace_remove_event_call(call);
1024 up_write(&trace_event_mutex);
1025 mutex_unlock(&event_mutex);
1026}
1027
1028#define for_each_event(event, start, end) \
1029 for (event = start; \
1030 (unsigned long)event < (unsigned long)end; \
1031 event++)
1032
1033#ifdef CONFIG_MODULES
1034
1035static LIST_HEAD(ftrace_module_file_list);
1036
1037/*
1038 * Modules must own their file_operations to keep up with
1039 * reference counting.
1040 */
1041struct ftrace_module_file_ops {
1042 struct list_head list;
1043 struct module *mod;
1044 struct file_operations id;
1045 struct file_operations enable;
1046 struct file_operations format;
1047 struct file_operations filter;
1048};
1049
988static struct ftrace_module_file_ops * 1050static struct ftrace_module_file_ops *
989trace_create_file_ops(struct module *mod) 1051trace_create_file_ops(struct module *mod)
990{ 1052{
@@ -1042,7 +1104,7 @@ static void trace_module_add_events(struct module *mod)
1042 if (!call->name) 1104 if (!call->name)
1043 continue; 1105 continue;
1044 if (call->raw_init) { 1106 if (call->raw_init) {
1045 ret = call->raw_init(); 1107 ret = call->raw_init(call);
1046 if (ret < 0) { 1108 if (ret < 0) {
1047 if (ret != -ENOSYS) 1109 if (ret != -ENOSYS)
1048 pr_warning("Could not initialize trace " 1110 pr_warning("Could not initialize trace "
@@ -1060,10 +1122,11 @@ static void trace_module_add_events(struct module *mod)
1060 return; 1122 return;
1061 } 1123 }
1062 call->mod = mod; 1124 call->mod = mod;
1063 list_add(&call->list, &ftrace_events); 1125 ret = event_create_dir(call, d_events,
1064 event_create_dir(call, d_events, 1126 &file_ops->id, &file_ops->enable,
1065 &file_ops->id, &file_ops->enable, 1127 &file_ops->filter, &file_ops->format);
1066 &file_ops->filter, &file_ops->format); 1128 if (!ret)
1129 list_add(&call->list, &ftrace_events);
1067 } 1130 }
1068} 1131}
1069 1132
@@ -1077,14 +1140,7 @@ static void trace_module_remove_events(struct module *mod)
1077 list_for_each_entry_safe(call, p, &ftrace_events, list) { 1140 list_for_each_entry_safe(call, p, &ftrace_events, list) {
1078 if (call->mod == mod) { 1141 if (call->mod == mod) {
1079 found = true; 1142 found = true;
1080 ftrace_event_enable_disable(call, 0); 1143 __trace_remove_event_call(call);
1081 if (call->event)
1082 __unregister_ftrace_event(call->event);
1083 debugfs_remove_recursive(call->dir);
1084 list_del(&call->list);
1085 trace_destroy_fields(call);
1086 destroy_preds(call);
1087 remove_subsystem_dir(call->system);
1088 } 1144 }
1089 } 1145 }
1090 1146
@@ -1202,7 +1258,7 @@ static __init int event_trace_init(void)
1202 if (!call->name) 1258 if (!call->name)
1203 continue; 1259 continue;
1204 if (call->raw_init) { 1260 if (call->raw_init) {
1205 ret = call->raw_init(); 1261 ret = call->raw_init(call);
1206 if (ret < 0) { 1262 if (ret < 0) {
1207 if (ret != -ENOSYS) 1263 if (ret != -ENOSYS)
1208 pr_warning("Could not initialize trace " 1264 pr_warning("Could not initialize trace "
@@ -1210,10 +1266,12 @@ static __init int event_trace_init(void)
1210 continue; 1266 continue;
1211 } 1267 }
1212 } 1268 }
1213 list_add(&call->list, &ftrace_events); 1269 ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
1214 event_create_dir(call, d_events, &ftrace_event_id_fops, 1270 &ftrace_enable_fops,
1215 &ftrace_enable_fops, &ftrace_event_filter_fops, 1271 &ftrace_event_filter_fops,
1216 &ftrace_event_format_fops); 1272 &ftrace_event_format_fops);
1273 if (!ret)
1274 list_add(&call->list, &ftrace_events);
1217 } 1275 }
1218 1276
1219 while (true) { 1277 while (true) {
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 31da218ee10f..934d81fb4ca4 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -134,7 +134,6 @@ ftrace_format_##name(struct ftrace_event_call *unused, \
134 134
135#include "trace_entries.h" 135#include "trace_entries.h"
136 136
137
138#undef __field 137#undef __field
139#define __field(type, item) \ 138#define __field(type, item) \
140 ret = trace_define_field(event_call, #type, #item, \ 139 ret = trace_define_field(event_call, #type, #item, \
@@ -196,6 +195,11 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
196 195
197#include "trace_entries.h" 196#include "trace_entries.h"
198 197
198static int ftrace_raw_init_event(struct ftrace_event_call *call)
199{
200 INIT_LIST_HEAD(&call->fields);
201 return 0;
202}
199 203
200#undef __field 204#undef __field
201#define __field(type, item) 205#define __field(type, item)
@@ -214,7 +218,6 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
214 218
215#undef FTRACE_ENTRY 219#undef FTRACE_ENTRY
216#define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \ 220#define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \
217static int ftrace_raw_init_event_##call(void); \
218 \ 221 \
219struct ftrace_event_call __used \ 222struct ftrace_event_call __used \
220__attribute__((__aligned__(4))) \ 223__attribute__((__aligned__(4))) \
@@ -222,14 +225,9 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
222 .name = #call, \ 225 .name = #call, \
223 .id = type, \ 226 .id = type, \
224 .system = __stringify(TRACE_SYSTEM), \ 227 .system = __stringify(TRACE_SYSTEM), \
225 .raw_init = ftrace_raw_init_event_##call, \ 228 .raw_init = ftrace_raw_init_event, \
226 .show_format = ftrace_format_##call, \ 229 .show_format = ftrace_format_##call, \
227 .define_fields = ftrace_define_fields_##call, \ 230 .define_fields = ftrace_define_fields_##call, \
228}; \ 231}; \
229static int ftrace_raw_init_event_##call(void) \
230{ \
231 INIT_LIST_HEAD(&event_##call.fields); \
232 return 0; \
233} \
234 232
235#include "trace_entries.h" 233#include "trace_entries.h"
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
new file mode 100644
index 000000000000..3696476f307d
--- /dev/null
+++ b/kernel/trace/trace_kprobe.c
@@ -0,0 +1,1513 @@
1/*
2 * Kprobes-based tracing events
3 *
4 * Created by Masami Hiramatsu <mhiramat@redhat.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#include <linux/module.h>
21#include <linux/uaccess.h>
22#include <linux/kprobes.h>
23#include <linux/seq_file.h>
24#include <linux/slab.h>
25#include <linux/smp.h>
26#include <linux/debugfs.h>
27#include <linux/types.h>
28#include <linux/string.h>
29#include <linux/ctype.h>
30#include <linux/ptrace.h>
31#include <linux/perf_event.h>
32
33#include "trace.h"
34#include "trace_output.h"
35
36#define MAX_TRACE_ARGS 128
37#define MAX_ARGSTR_LEN 63
38#define MAX_EVENT_NAME_LEN 64
39#define KPROBE_EVENT_SYSTEM "kprobes"
40
41/* Reserved field names */
42#define FIELD_STRING_IP "__probe_ip"
43#define FIELD_STRING_NARGS "__probe_nargs"
44#define FIELD_STRING_RETIP "__probe_ret_ip"
45#define FIELD_STRING_FUNC "__probe_func"
46
47const char *reserved_field_names[] = {
48 "common_type",
49 "common_flags",
50 "common_preempt_count",
51 "common_pid",
52 "common_tgid",
53 "common_lock_depth",
54 FIELD_STRING_IP,
55 FIELD_STRING_NARGS,
56 FIELD_STRING_RETIP,
57 FIELD_STRING_FUNC,
58};
59
60struct fetch_func {
61 unsigned long (*func)(struct pt_regs *, void *);
62 void *data;
63};
64
65static __kprobes unsigned long call_fetch(struct fetch_func *f,
66 struct pt_regs *regs)
67{
68 return f->func(regs, f->data);
69}
70
71/* fetch handlers */
72static __kprobes unsigned long fetch_register(struct pt_regs *regs,
73 void *offset)
74{
75 return regs_get_register(regs, (unsigned int)((unsigned long)offset));
76}
77
78static __kprobes unsigned long fetch_stack(struct pt_regs *regs,
79 void *num)
80{
81 return regs_get_kernel_stack_nth(regs,
82 (unsigned int)((unsigned long)num));
83}
84
85static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr)
86{
87 unsigned long retval;
88
89 if (probe_kernel_address(addr, retval))
90 return 0;
91 return retval;
92}
93
94static __kprobes unsigned long fetch_argument(struct pt_regs *regs, void *num)
95{
96 return regs_get_argument_nth(regs, (unsigned int)((unsigned long)num));
97}
98
99static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs,
100 void *dummy)
101{
102 return regs_return_value(regs);
103}
104
105static __kprobes unsigned long fetch_stack_address(struct pt_regs *regs,
106 void *dummy)
107{
108 return kernel_stack_pointer(regs);
109}
110
111/* Memory fetching by symbol */
112struct symbol_cache {
113 char *symbol;
114 long offset;
115 unsigned long addr;
116};
117
118static unsigned long update_symbol_cache(struct symbol_cache *sc)
119{
120 sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);
121 if (sc->addr)
122 sc->addr += sc->offset;
123 return sc->addr;
124}
125
126static void free_symbol_cache(struct symbol_cache *sc)
127{
128 kfree(sc->symbol);
129 kfree(sc);
130}
131
132static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
133{
134 struct symbol_cache *sc;
135
136 if (!sym || strlen(sym) == 0)
137 return NULL;
138 sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
139 if (!sc)
140 return NULL;
141
142 sc->symbol = kstrdup(sym, GFP_KERNEL);
143 if (!sc->symbol) {
144 kfree(sc);
145 return NULL;
146 }
147 sc->offset = offset;
148
149 update_symbol_cache(sc);
150 return sc;
151}
152
153static __kprobes unsigned long fetch_symbol(struct pt_regs *regs, void *data)
154{
155 struct symbol_cache *sc = data;
156
157 if (sc->addr)
158 return fetch_memory(regs, (void *)sc->addr);
159 else
160 return 0;
161}
162
163/* Special indirect memory access interface */
164struct indirect_fetch_data {
165 struct fetch_func orig;
166 long offset;
167};
168
169static __kprobes unsigned long fetch_indirect(struct pt_regs *regs, void *data)
170{
171 struct indirect_fetch_data *ind = data;
172 unsigned long addr;
173
174 addr = call_fetch(&ind->orig, regs);
175 if (addr) {
176 addr += ind->offset;
177 return fetch_memory(regs, (void *)addr);
178 } else
179 return 0;
180}
181
182static __kprobes void free_indirect_fetch_data(struct indirect_fetch_data *data)
183{
184 if (data->orig.func == fetch_indirect)
185 free_indirect_fetch_data(data->orig.data);
186 else if (data->orig.func == fetch_symbol)
187 free_symbol_cache(data->orig.data);
188 kfree(data);
189}
190
191/**
192 * Kprobe event core functions
193 */
194
195struct probe_arg {
196 struct fetch_func fetch;
197 const char *name;
198};
199
200/* Flags for trace_probe */
201#define TP_FLAG_TRACE 1
202#define TP_FLAG_PROFILE 2
203
204struct trace_probe {
205 struct list_head list;
206 struct kretprobe rp; /* Use rp.kp for kprobe use */
207 unsigned long nhit;
208 unsigned int flags; /* For TP_FLAG_* */
209 const char *symbol; /* symbol name */
210 struct ftrace_event_call call;
211 struct trace_event event;
212 unsigned int nr_args;
213 struct probe_arg args[];
214};
215
216#define SIZEOF_TRACE_PROBE(n) \
217 (offsetof(struct trace_probe, args) + \
218 (sizeof(struct probe_arg) * (n)))
219
220static __kprobes int probe_is_return(struct trace_probe *tp)
221{
222 return tp->rp.handler != NULL;
223}
224
225static __kprobes const char *probe_symbol(struct trace_probe *tp)
226{
227 return tp->symbol ? tp->symbol : "unknown";
228}
229
230static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff)
231{
232 int ret = -EINVAL;
233
234 if (ff->func == fetch_argument)
235 ret = snprintf(buf, n, "$arg%lu", (unsigned long)ff->data);
236 else if (ff->func == fetch_register) {
237 const char *name;
238 name = regs_query_register_name((unsigned int)((long)ff->data));
239 ret = snprintf(buf, n, "%%%s", name);
240 } else if (ff->func == fetch_stack)
241 ret = snprintf(buf, n, "$stack%lu", (unsigned long)ff->data);
242 else if (ff->func == fetch_memory)
243 ret = snprintf(buf, n, "@0x%p", ff->data);
244 else if (ff->func == fetch_symbol) {
245 struct symbol_cache *sc = ff->data;
246 ret = snprintf(buf, n, "@%s%+ld", sc->symbol, sc->offset);
247 } else if (ff->func == fetch_retvalue)
248 ret = snprintf(buf, n, "$retval");
249 else if (ff->func == fetch_stack_address)
250 ret = snprintf(buf, n, "$stack");
251 else if (ff->func == fetch_indirect) {
252 struct indirect_fetch_data *id = ff->data;
253 size_t l = 0;
254 ret = snprintf(buf, n, "%+ld(", id->offset);
255 if (ret >= n)
256 goto end;
257 l += ret;
258 ret = probe_arg_string(buf + l, n - l, &id->orig);
259 if (ret < 0)
260 goto end;
261 l += ret;
262 ret = snprintf(buf + l, n - l, ")");
263 ret += l;
264 }
265end:
266 if (ret >= n)
267 return -ENOSPC;
268 return ret;
269}
270
271static int register_probe_event(struct trace_probe *tp);
272static void unregister_probe_event(struct trace_probe *tp);
273
274static DEFINE_MUTEX(probe_lock);
275static LIST_HEAD(probe_list);
276
277static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
278static int kretprobe_dispatcher(struct kretprobe_instance *ri,
279 struct pt_regs *regs);
280
281/*
282 * Allocate new trace_probe and initialize it (including kprobes).
283 */
284static struct trace_probe *alloc_trace_probe(const char *group,
285 const char *event,
286 void *addr,
287 const char *symbol,
288 unsigned long offs,
289 int nargs, int is_return)
290{
291 struct trace_probe *tp;
292
293 tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL);
294 if (!tp)
295 return ERR_PTR(-ENOMEM);
296
297 if (symbol) {
298 tp->symbol = kstrdup(symbol, GFP_KERNEL);
299 if (!tp->symbol)
300 goto error;
301 tp->rp.kp.symbol_name = tp->symbol;
302 tp->rp.kp.offset = offs;
303 } else
304 tp->rp.kp.addr = addr;
305
306 if (is_return)
307 tp->rp.handler = kretprobe_dispatcher;
308 else
309 tp->rp.kp.pre_handler = kprobe_dispatcher;
310
311 if (!event)
312 goto error;
313 tp->call.name = kstrdup(event, GFP_KERNEL);
314 if (!tp->call.name)
315 goto error;
316
317 if (!group)
318 goto error;
319 tp->call.system = kstrdup(group, GFP_KERNEL);
320 if (!tp->call.system)
321 goto error;
322
323 INIT_LIST_HEAD(&tp->list);
324 return tp;
325error:
326 kfree(tp->call.name);
327 kfree(tp->symbol);
328 kfree(tp);
329 return ERR_PTR(-ENOMEM);
330}
331
332static void free_probe_arg(struct probe_arg *arg)
333{
334 if (arg->fetch.func == fetch_symbol)
335 free_symbol_cache(arg->fetch.data);
336 else if (arg->fetch.func == fetch_indirect)
337 free_indirect_fetch_data(arg->fetch.data);
338 kfree(arg->name);
339}
340
341static void free_trace_probe(struct trace_probe *tp)
342{
343 int i;
344
345 for (i = 0; i < tp->nr_args; i++)
346 free_probe_arg(&tp->args[i]);
347
348 kfree(tp->call.system);
349 kfree(tp->call.name);
350 kfree(tp->symbol);
351 kfree(tp);
352}
353
354static struct trace_probe *find_probe_event(const char *event,
355 const char *group)
356{
357 struct trace_probe *tp;
358
359 list_for_each_entry(tp, &probe_list, list)
360 if (strcmp(tp->call.name, event) == 0 &&
361 strcmp(tp->call.system, group) == 0)
362 return tp;
363 return NULL;
364}
365
366/* Unregister a trace_probe and probe_event: call with locking probe_lock */
367static void unregister_trace_probe(struct trace_probe *tp)
368{
369 if (probe_is_return(tp))
370 unregister_kretprobe(&tp->rp);
371 else
372 unregister_kprobe(&tp->rp.kp);
373 list_del(&tp->list);
374 unregister_probe_event(tp);
375}
376
377/* Register a trace_probe and probe_event */
378static int register_trace_probe(struct trace_probe *tp)
379{
380 struct trace_probe *old_tp;
381 int ret;
382
383 mutex_lock(&probe_lock);
384
385 /* register as an event */
386 old_tp = find_probe_event(tp->call.name, tp->call.system);
387 if (old_tp) {
388 /* delete old event */
389 unregister_trace_probe(old_tp);
390 free_trace_probe(old_tp);
391 }
392 ret = register_probe_event(tp);
393 if (ret) {
394 pr_warning("Faild to register probe event(%d)\n", ret);
395 goto end;
396 }
397
398 tp->rp.kp.flags |= KPROBE_FLAG_DISABLED;
399 if (probe_is_return(tp))
400 ret = register_kretprobe(&tp->rp);
401 else
402 ret = register_kprobe(&tp->rp.kp);
403
404 if (ret) {
405 pr_warning("Could not insert probe(%d)\n", ret);
406 if (ret == -EILSEQ) {
407 pr_warning("Probing address(0x%p) is not an "
408 "instruction boundary.\n",
409 tp->rp.kp.addr);
410 ret = -EINVAL;
411 }
412 unregister_probe_event(tp);
413 } else
414 list_add_tail(&tp->list, &probe_list);
415end:
416 mutex_unlock(&probe_lock);
417 return ret;
418}
419
420/* Split symbol and offset. */
421static int split_symbol_offset(char *symbol, unsigned long *offset)
422{
423 char *tmp;
424 int ret;
425
426 if (!offset)
427 return -EINVAL;
428
429 tmp = strchr(symbol, '+');
430 if (tmp) {
431 /* skip sign because strict_strtol doesn't accept '+' */
432 ret = strict_strtoul(tmp + 1, 0, offset);
433 if (ret)
434 return ret;
435 *tmp = '\0';
436 } else
437 *offset = 0;
438 return 0;
439}
440
441#define PARAM_MAX_ARGS 16
442#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))
443
444static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return)
445{
446 int ret = 0;
447 unsigned long param;
448
449 if (strcmp(arg, "retval") == 0) {
450 if (is_return) {
451 ff->func = fetch_retvalue;
452 ff->data = NULL;
453 } else
454 ret = -EINVAL;
455 } else if (strncmp(arg, "stack", 5) == 0) {
456 if (arg[5] == '\0') {
457 ff->func = fetch_stack_address;
458 ff->data = NULL;
459 } else if (isdigit(arg[5])) {
460 ret = strict_strtoul(arg + 5, 10, &param);
461 if (ret || param > PARAM_MAX_STACK)
462 ret = -EINVAL;
463 else {
464 ff->func = fetch_stack;
465 ff->data = (void *)param;
466 }
467 } else
468 ret = -EINVAL;
469 } else if (strncmp(arg, "arg", 3) == 0 && isdigit(arg[3])) {
470 ret = strict_strtoul(arg + 3, 10, &param);
471 if (ret || param > PARAM_MAX_ARGS)
472 ret = -EINVAL;
473 else {
474 ff->func = fetch_argument;
475 ff->data = (void *)param;
476 }
477 } else
478 ret = -EINVAL;
479 return ret;
480}
481
482static int parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)
483{
484 int ret = 0;
485 unsigned long param;
486 long offset;
487 char *tmp;
488
489 switch (arg[0]) {
490 case '$':
491 ret = parse_probe_vars(arg + 1, ff, is_return);
492 break;
493 case '%': /* named register */
494 ret = regs_query_register_offset(arg + 1);
495 if (ret >= 0) {
496 ff->func = fetch_register;
497 ff->data = (void *)(unsigned long)ret;
498 ret = 0;
499 }
500 break;
501 case '@': /* memory or symbol */
502 if (isdigit(arg[1])) {
503 ret = strict_strtoul(arg + 1, 0, &param);
504 if (ret)
505 break;
506 ff->func = fetch_memory;
507 ff->data = (void *)param;
508 } else {
509 ret = split_symbol_offset(arg + 1, &offset);
510 if (ret)
511 break;
512 ff->data = alloc_symbol_cache(arg + 1, offset);
513 if (ff->data)
514 ff->func = fetch_symbol;
515 else
516 ret = -EINVAL;
517 }
518 break;
519 case '+': /* indirect memory */
520 case '-':
521 tmp = strchr(arg, '(');
522 if (!tmp) {
523 ret = -EINVAL;
524 break;
525 }
526 *tmp = '\0';
527 ret = strict_strtol(arg + 1, 0, &offset);
528 if (ret)
529 break;
530 if (arg[0] == '-')
531 offset = -offset;
532 arg = tmp + 1;
533 tmp = strrchr(arg, ')');
534 if (tmp) {
535 struct indirect_fetch_data *id;
536 *tmp = '\0';
537 id = kzalloc(sizeof(struct indirect_fetch_data),
538 GFP_KERNEL);
539 if (!id)
540 return -ENOMEM;
541 id->offset = offset;
542 ret = parse_probe_arg(arg, &id->orig, is_return);
543 if (ret)
544 kfree(id);
545 else {
546 ff->func = fetch_indirect;
547 ff->data = (void *)id;
548 }
549 } else
550 ret = -EINVAL;
551 break;
552 default:
553 /* TODO: support custom handler */
554 ret = -EINVAL;
555 }
556 return ret;
557}
558
559/* Return 1 if name is reserved or already used by another argument */
560static int conflict_field_name(const char *name,
561 struct probe_arg *args, int narg)
562{
563 int i;
564 for (i = 0; i < ARRAY_SIZE(reserved_field_names); i++)
565 if (strcmp(reserved_field_names[i], name) == 0)
566 return 1;
567 for (i = 0; i < narg; i++)
568 if (strcmp(args[i].name, name) == 0)
569 return 1;
570 return 0;
571}
572
573static int create_trace_probe(int argc, char **argv)
574{
575 /*
576 * Argument syntax:
577 * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS]
578 * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS]
579 * Fetch args:
580 * $argN : fetch Nth of function argument. (N:0-)
581 * $retval : fetch return value
582 * $stack : fetch stack address
583 * $stackN : fetch Nth of stack (N:0-)
584 * @ADDR : fetch memory at ADDR (ADDR should be in kernel)
585 * @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
586 * %REG : fetch register REG
587 * Indirect memory fetch:
588 * +|-offs(ARG) : fetch memory at ARG +|- offs address.
589 * Alias name of args:
590 * NAME=FETCHARG : set NAME as alias of FETCHARG.
591 */
592 struct trace_probe *tp;
593 int i, ret = 0;
594 int is_return = 0;
595 char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL;
596 unsigned long offset = 0;
597 void *addr = NULL;
598 char buf[MAX_EVENT_NAME_LEN];
599
600 if (argc < 2) {
601 pr_info("Probe point is not specified.\n");
602 return -EINVAL;
603 }
604
605 if (argv[0][0] == 'p')
606 is_return = 0;
607 else if (argv[0][0] == 'r')
608 is_return = 1;
609 else {
610 pr_info("Probe definition must be started with 'p' or 'r'.\n");
611 return -EINVAL;
612 }
613
614 if (argv[0][1] == ':') {
615 event = &argv[0][2];
616 if (strchr(event, '/')) {
617 group = event;
618 event = strchr(group, '/') + 1;
619 event[-1] = '\0';
620 if (strlen(group) == 0) {
621 pr_info("Group name is not specifiled\n");
622 return -EINVAL;
623 }
624 }
625 if (strlen(event) == 0) {
626 pr_info("Event name is not specifiled\n");
627 return -EINVAL;
628 }
629 }
630
631 if (isdigit(argv[1][0])) {
632 if (is_return) {
633 pr_info("Return probe point must be a symbol.\n");
634 return -EINVAL;
635 }
636 /* an address specified */
637 ret = strict_strtoul(&argv[0][2], 0, (unsigned long *)&addr);
638 if (ret) {
639 pr_info("Failed to parse address.\n");
640 return ret;
641 }
642 } else {
643 /* a symbol specified */
644 symbol = argv[1];
645 /* TODO: support .init module functions */
646 ret = split_symbol_offset(symbol, &offset);
647 if (ret) {
648 pr_info("Failed to parse symbol.\n");
649 return ret;
650 }
651 if (offset && is_return) {
652 pr_info("Return probe must be used without offset.\n");
653 return -EINVAL;
654 }
655 }
656 argc -= 2; argv += 2;
657
658 /* setup a probe */
659 if (!group)
660 group = KPROBE_EVENT_SYSTEM;
661 if (!event) {
662 /* Make a new event name */
663 if (symbol)
664 snprintf(buf, MAX_EVENT_NAME_LEN, "%c@%s%+ld",
665 is_return ? 'r' : 'p', symbol, offset);
666 else
667 snprintf(buf, MAX_EVENT_NAME_LEN, "%c@0x%p",
668 is_return ? 'r' : 'p', addr);
669 event = buf;
670 }
671 tp = alloc_trace_probe(group, event, addr, symbol, offset, argc,
672 is_return);
673 if (IS_ERR(tp)) {
674 pr_info("Failed to allocate trace_probe.(%d)\n",
675 (int)PTR_ERR(tp));
676 return PTR_ERR(tp);
677 }
678
679 /* parse arguments */
680 ret = 0;
681 for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
682 /* Parse argument name */
683 arg = strchr(argv[i], '=');
684 if (arg)
685 *arg++ = '\0';
686 else
687 arg = argv[i];
688
689 if (conflict_field_name(argv[i], tp->args, i)) {
690 pr_info("Argument%d name '%s' conflicts with "
691 "another field.\n", i, argv[i]);
692 ret = -EINVAL;
693 goto error;
694 }
695
696 tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
697
698 /* Parse fetch argument */
699 if (strlen(arg) > MAX_ARGSTR_LEN) {
700 pr_info("Argument%d(%s) is too long.\n", i, arg);
701 ret = -ENOSPC;
702 goto error;
703 }
704 ret = parse_probe_arg(arg, &tp->args[i].fetch, is_return);
705 if (ret) {
706 pr_info("Parse error at argument%d. (%d)\n", i, ret);
707 goto error;
708 }
709 }
710 tp->nr_args = i;
711
712 ret = register_trace_probe(tp);
713 if (ret)
714 goto error;
715 return 0;
716
717error:
718 free_trace_probe(tp);
719 return ret;
720}
721
722static void cleanup_all_probes(void)
723{
724 struct trace_probe *tp;
725
726 mutex_lock(&probe_lock);
727 /* TODO: Use batch unregistration */
728 while (!list_empty(&probe_list)) {
729 tp = list_entry(probe_list.next, struct trace_probe, list);
730 unregister_trace_probe(tp);
731 free_trace_probe(tp);
732 }
733 mutex_unlock(&probe_lock);
734}
735
736
737/* Probes listing interfaces */
738static void *probes_seq_start(struct seq_file *m, loff_t *pos)
739{
740 mutex_lock(&probe_lock);
741 return seq_list_start(&probe_list, *pos);
742}
743
744static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
745{
746 return seq_list_next(v, &probe_list, pos);
747}
748
749static void probes_seq_stop(struct seq_file *m, void *v)
750{
751 mutex_unlock(&probe_lock);
752}
753
754static int probes_seq_show(struct seq_file *m, void *v)
755{
756 struct trace_probe *tp = v;
757 int i, ret;
758 char buf[MAX_ARGSTR_LEN + 1];
759
760 seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p');
761 seq_printf(m, ":%s", tp->call.name);
762
763 if (tp->symbol)
764 seq_printf(m, " %s+%u", probe_symbol(tp), tp->rp.kp.offset);
765 else
766 seq_printf(m, " 0x%p", tp->rp.kp.addr);
767
768 for (i = 0; i < tp->nr_args; i++) {
769 ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i].fetch);
770 if (ret < 0) {
771 pr_warning("Argument%d decoding error(%d).\n", i, ret);
772 return ret;
773 }
774 seq_printf(m, " %s=%s", tp->args[i].name, buf);
775 }
776 seq_printf(m, "\n");
777 return 0;
778}
779
780static const struct seq_operations probes_seq_op = {
781 .start = probes_seq_start,
782 .next = probes_seq_next,
783 .stop = probes_seq_stop,
784 .show = probes_seq_show
785};
786
787static int probes_open(struct inode *inode, struct file *file)
788{
789 if ((file->f_mode & FMODE_WRITE) &&
790 (file->f_flags & O_TRUNC))
791 cleanup_all_probes();
792
793 return seq_open(file, &probes_seq_op);
794}
795
796static int command_trace_probe(const char *buf)
797{
798 char **argv;
799 int argc = 0, ret = 0;
800
801 argv = argv_split(GFP_KERNEL, buf, &argc);
802 if (!argv)
803 return -ENOMEM;
804
805 if (argc)
806 ret = create_trace_probe(argc, argv);
807
808 argv_free(argv);
809 return ret;
810}
811
812#define WRITE_BUFSIZE 128
813
814static ssize_t probes_write(struct file *file, const char __user *buffer,
815 size_t count, loff_t *ppos)
816{
817 char *kbuf, *tmp;
818 int ret;
819 size_t done;
820 size_t size;
821
822 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
823 if (!kbuf)
824 return -ENOMEM;
825
826 ret = done = 0;
827 while (done < count) {
828 size = count - done;
829 if (size >= WRITE_BUFSIZE)
830 size = WRITE_BUFSIZE - 1;
831 if (copy_from_user(kbuf, buffer + done, size)) {
832 ret = -EFAULT;
833 goto out;
834 }
835 kbuf[size] = '\0';
836 tmp = strchr(kbuf, '\n');
837 if (tmp) {
838 *tmp = '\0';
839 size = tmp - kbuf + 1;
840 } else if (done + size < count) {
841 pr_warning("Line length is too long: "
842 "Should be less than %d.", WRITE_BUFSIZE);
843 ret = -EINVAL;
844 goto out;
845 }
846 done += size;
847 /* Remove comments */
848 tmp = strchr(kbuf, '#');
849 if (tmp)
850 *tmp = '\0';
851
852 ret = command_trace_probe(kbuf);
853 if (ret)
854 goto out;
855 }
856 ret = done;
857out:
858 kfree(kbuf);
859 return ret;
860}
861
862static const struct file_operations kprobe_events_ops = {
863 .owner = THIS_MODULE,
864 .open = probes_open,
865 .read = seq_read,
866 .llseek = seq_lseek,
867 .release = seq_release,
868 .write = probes_write,
869};
870
871/* Probes profiling interfaces */
872static int probes_profile_seq_show(struct seq_file *m, void *v)
873{
874 struct trace_probe *tp = v;
875
876 seq_printf(m, " %-44s %15lu %15lu\n", tp->call.name, tp->nhit,
877 tp->rp.kp.nmissed);
878
879 return 0;
880}
881
882static const struct seq_operations profile_seq_op = {
883 .start = probes_seq_start,
884 .next = probes_seq_next,
885 .stop = probes_seq_stop,
886 .show = probes_profile_seq_show
887};
888
889static int profile_open(struct inode *inode, struct file *file)
890{
891 return seq_open(file, &profile_seq_op);
892}
893
894static const struct file_operations kprobe_profile_ops = {
895 .owner = THIS_MODULE,
896 .open = profile_open,
897 .read = seq_read,
898 .llseek = seq_lseek,
899 .release = seq_release,
900};
901
902/* Kprobe handler */
903static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
904{
905 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
906 struct kprobe_trace_entry *entry;
907 struct ring_buffer_event *event;
908 struct ring_buffer *buffer;
909 int size, i, pc;
910 unsigned long irq_flags;
911 struct ftrace_event_call *call = &tp->call;
912
913 tp->nhit++;
914
915 local_save_flags(irq_flags);
916 pc = preempt_count();
917
918 size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
919
920 event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
921 irq_flags, pc);
922 if (!event)
923 return 0;
924
925 entry = ring_buffer_event_data(event);
926 entry->nargs = tp->nr_args;
927 entry->ip = (unsigned long)kp->addr;
928 for (i = 0; i < tp->nr_args; i++)
929 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
930
931 if (!filter_current_check_discard(buffer, call, entry, event))
932 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
933 return 0;
934}
935
936/* Kretprobe handler */
937static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
938 struct pt_regs *regs)
939{
940 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
941 struct kretprobe_trace_entry *entry;
942 struct ring_buffer_event *event;
943 struct ring_buffer *buffer;
944 int size, i, pc;
945 unsigned long irq_flags;
946 struct ftrace_event_call *call = &tp->call;
947
948 local_save_flags(irq_flags);
949 pc = preempt_count();
950
951 size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
952
953 event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
954 irq_flags, pc);
955 if (!event)
956 return 0;
957
958 entry = ring_buffer_event_data(event);
959 entry->nargs = tp->nr_args;
960 entry->func = (unsigned long)tp->rp.kp.addr;
961 entry->ret_ip = (unsigned long)ri->ret_addr;
962 for (i = 0; i < tp->nr_args; i++)
963 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
964
965 if (!filter_current_check_discard(buffer, call, entry, event))
966 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
967
968 return 0;
969}
970
971/* Event entry printers */
972enum print_line_t
973print_kprobe_event(struct trace_iterator *iter, int flags)
974{
975 struct kprobe_trace_entry *field;
976 struct trace_seq *s = &iter->seq;
977 struct trace_event *event;
978 struct trace_probe *tp;
979 int i;
980
981 field = (struct kprobe_trace_entry *)iter->ent;
982 event = ftrace_find_event(field->ent.type);
983 tp = container_of(event, struct trace_probe, event);
984
985 if (!trace_seq_printf(s, "%s: (", tp->call.name))
986 goto partial;
987
988 if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
989 goto partial;
990
991 if (!trace_seq_puts(s, ")"))
992 goto partial;
993
994 for (i = 0; i < field->nargs; i++)
995 if (!trace_seq_printf(s, " %s=%lx",
996 tp->args[i].name, field->args[i]))
997 goto partial;
998
999 if (!trace_seq_puts(s, "\n"))
1000 goto partial;
1001
1002 return TRACE_TYPE_HANDLED;
1003partial:
1004 return TRACE_TYPE_PARTIAL_LINE;
1005}
1006
1007enum print_line_t
1008print_kretprobe_event(struct trace_iterator *iter, int flags)
1009{
1010 struct kretprobe_trace_entry *field;
1011 struct trace_seq *s = &iter->seq;
1012 struct trace_event *event;
1013 struct trace_probe *tp;
1014 int i;
1015
1016 field = (struct kretprobe_trace_entry *)iter->ent;
1017 event = ftrace_find_event(field->ent.type);
1018 tp = container_of(event, struct trace_probe, event);
1019
1020 if (!trace_seq_printf(s, "%s: (", tp->call.name))
1021 goto partial;
1022
1023 if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
1024 goto partial;
1025
1026 if (!trace_seq_puts(s, " <- "))
1027 goto partial;
1028
1029 if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
1030 goto partial;
1031
1032 if (!trace_seq_puts(s, ")"))
1033 goto partial;
1034
1035 for (i = 0; i < field->nargs; i++)
1036 if (!trace_seq_printf(s, " %s=%lx",
1037 tp->args[i].name, field->args[i]))
1038 goto partial;
1039
1040 if (!trace_seq_puts(s, "\n"))
1041 goto partial;
1042
1043 return TRACE_TYPE_HANDLED;
1044partial:
1045 return TRACE_TYPE_PARTIAL_LINE;
1046}
1047
1048static int probe_event_enable(struct ftrace_event_call *call)
1049{
1050 struct trace_probe *tp = (struct trace_probe *)call->data;
1051
1052 tp->flags |= TP_FLAG_TRACE;
1053 if (probe_is_return(tp))
1054 return enable_kretprobe(&tp->rp);
1055 else
1056 return enable_kprobe(&tp->rp.kp);
1057}
1058
1059static void probe_event_disable(struct ftrace_event_call *call)
1060{
1061 struct trace_probe *tp = (struct trace_probe *)call->data;
1062
1063 tp->flags &= ~TP_FLAG_TRACE;
1064 if (!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE))) {
1065 if (probe_is_return(tp))
1066 disable_kretprobe(&tp->rp);
1067 else
1068 disable_kprobe(&tp->rp.kp);
1069 }
1070}
1071
1072static int probe_event_raw_init(struct ftrace_event_call *event_call)
1073{
1074 INIT_LIST_HEAD(&event_call->fields);
1075
1076 return 0;
1077}
1078
1079#undef DEFINE_FIELD
1080#define DEFINE_FIELD(type, item, name, is_signed) \
1081 do { \
1082 ret = trace_define_field(event_call, #type, name, \
1083 offsetof(typeof(field), item), \
1084 sizeof(field.item), is_signed, \
1085 FILTER_OTHER); \
1086 if (ret) \
1087 return ret; \
1088 } while (0)
1089
1090static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
1091{
1092 int ret, i;
1093 struct kprobe_trace_entry field;
1094 struct trace_probe *tp = (struct trace_probe *)event_call->data;
1095
1096 ret = trace_define_common_fields(event_call);
1097 if (!ret)
1098 return ret;
1099
1100 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
1101 DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
1102 /* Set argument names as fields */
1103 for (i = 0; i < tp->nr_args; i++)
1104 DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0);
1105 return 0;
1106}
1107
1108static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
1109{
1110 int ret, i;
1111 struct kretprobe_trace_entry field;
1112 struct trace_probe *tp = (struct trace_probe *)event_call->data;
1113
1114 ret = trace_define_common_fields(event_call);
1115 if (!ret)
1116 return ret;
1117
1118 DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
1119 DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
1120 DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
1121 /* Set argument names as fields */
1122 for (i = 0; i < tp->nr_args; i++)
1123 DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0);
1124 return 0;
1125}
1126
1127static int __probe_event_show_format(struct trace_seq *s,
1128 struct trace_probe *tp, const char *fmt,
1129 const char *arg)
1130{
1131 int i;
1132
1133 /* Show format */
1134 if (!trace_seq_printf(s, "\nprint fmt: \"%s", fmt))
1135 return 0;
1136
1137 for (i = 0; i < tp->nr_args; i++)
1138 if (!trace_seq_printf(s, " %s=%%lx", tp->args[i].name))
1139 return 0;
1140
1141 if (!trace_seq_printf(s, "\", %s", arg))
1142 return 0;
1143
1144 for (i = 0; i < tp->nr_args; i++)
1145 if (!trace_seq_printf(s, ", REC->%s", tp->args[i].name))
1146 return 0;
1147
1148 return trace_seq_puts(s, "\n");
1149}
1150
1151#undef SHOW_FIELD
1152#define SHOW_FIELD(type, item, name) \
1153 do { \
1154 ret = trace_seq_printf(s, "\tfield: " #type " %s;\t" \
1155 "offset:%u;\tsize:%u;\n", name, \
1156 (unsigned int)offsetof(typeof(field), item),\
1157 (unsigned int)sizeof(type)); \
1158 if (!ret) \
1159 return 0; \
1160 } while (0)
1161
1162static int kprobe_event_show_format(struct ftrace_event_call *call,
1163 struct trace_seq *s)
1164{
1165 struct kprobe_trace_entry field __attribute__((unused));
1166 int ret, i;
1167 struct trace_probe *tp = (struct trace_probe *)call->data;
1168
1169 SHOW_FIELD(unsigned long, ip, FIELD_STRING_IP);
1170 SHOW_FIELD(int, nargs, FIELD_STRING_NARGS);
1171
1172 /* Show fields */
1173 for (i = 0; i < tp->nr_args; i++)
1174 SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
1175 trace_seq_puts(s, "\n");
1176
1177 return __probe_event_show_format(s, tp, "(%lx)",
1178 "REC->" FIELD_STRING_IP);
1179}
1180
1181static int kretprobe_event_show_format(struct ftrace_event_call *call,
1182 struct trace_seq *s)
1183{
1184 struct kretprobe_trace_entry field __attribute__((unused));
1185 int ret, i;
1186 struct trace_probe *tp = (struct trace_probe *)call->data;
1187
1188 SHOW_FIELD(unsigned long, func, FIELD_STRING_FUNC);
1189 SHOW_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP);
1190 SHOW_FIELD(int, nargs, FIELD_STRING_NARGS);
1191
1192 /* Show fields */
1193 for (i = 0; i < tp->nr_args; i++)
1194 SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
1195 trace_seq_puts(s, "\n");
1196
1197 return __probe_event_show_format(s, tp, "(%lx <- %lx)",
1198 "REC->" FIELD_STRING_FUNC
1199 ", REC->" FIELD_STRING_RETIP);
1200}
1201
1202#ifdef CONFIG_EVENT_PROFILE
1203
1204/* Kprobe profile handler */
1205static __kprobes int kprobe_profile_func(struct kprobe *kp,
1206 struct pt_regs *regs)
1207{
1208 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1209 struct ftrace_event_call *call = &tp->call;
1210 struct kprobe_trace_entry *entry;
1211 struct perf_trace_buf *trace_buf;
1212 struct trace_entry *ent;
1213 int size, __size, i, pc, __cpu;
1214 unsigned long irq_flags;
1215 char *raw_data;
1216
1217 pc = preempt_count();
1218 __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
1219 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1220 size -= sizeof(u32);
1221 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
1222 "profile buffer not large enough"))
1223 return 0;
1224
1225 /*
1226 * Protect the non nmi buffer
1227 * This also protects the rcu read side
1228 */
1229 local_irq_save(irq_flags);
1230 __cpu = smp_processor_id();
1231
1232 if (in_nmi())
1233 trace_buf = rcu_dereference(perf_trace_buf_nmi);
1234 else
1235 trace_buf = rcu_dereference(perf_trace_buf);
1236
1237 if (!trace_buf)
1238 goto end;
1239
1240 trace_buf = per_cpu_ptr(trace_buf, __cpu);
1241
1242 if (trace_buf->recursion++)
1243 goto end_recursion;
1244
1245 /*
1246 * Make recursion update visible before entering perf_tp_event
1247 * so that we protect from perf recursions.
1248 */
1249 barrier();
1250
1251 raw_data = trace_buf->buf;
1252
1253 /* Zero dead bytes from alignment to avoid buffer leak to userspace */
1254 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
1255 entry = (struct kprobe_trace_entry *)raw_data;
1256 ent = &entry->ent;
1257
1258 tracing_generic_entry_update(ent, irq_flags, pc);
1259 ent->type = call->id;
1260 entry->nargs = tp->nr_args;
1261 entry->ip = (unsigned long)kp->addr;
1262 for (i = 0; i < tp->nr_args; i++)
1263 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1264 perf_tp_event(call->id, entry->ip, 1, entry, size);
1265
1266end_recursion:
1267 trace_buf->recursion--;
1268end:
1269 local_irq_restore(irq_flags);
1270
1271 return 0;
1272}
1273
1274/* Kretprobe profile handler */
1275static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
1276 struct pt_regs *regs)
1277{
1278 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1279 struct ftrace_event_call *call = &tp->call;
1280 struct kretprobe_trace_entry *entry;
1281 struct perf_trace_buf *trace_buf;
1282 struct trace_entry *ent;
1283 int size, __size, i, pc, __cpu;
1284 unsigned long irq_flags;
1285 char *raw_data;
1286
1287 pc = preempt_count();
1288 __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
1289 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1290 size -= sizeof(u32);
1291 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
1292 "profile buffer not large enough"))
1293 return 0;
1294
1295 /*
1296 * Protect the non nmi buffer
1297 * This also protects the rcu read side
1298 */
1299 local_irq_save(irq_flags);
1300 __cpu = smp_processor_id();
1301
1302 if (in_nmi())
1303 trace_buf = rcu_dereference(perf_trace_buf_nmi);
1304 else
1305 trace_buf = rcu_dereference(perf_trace_buf);
1306
1307 if (!trace_buf)
1308 goto end;
1309
1310 trace_buf = per_cpu_ptr(trace_buf, __cpu);
1311
1312 if (trace_buf->recursion++)
1313 goto end_recursion;
1314
1315 /*
1316 * Make recursion update visible before entering perf_tp_event
1317 * so that we protect from perf recursions.
1318 */
1319 barrier();
1320
1321 raw_data = trace_buf->buf;
1322
1323 /* Zero dead bytes from alignment to avoid buffer leak to userspace */
1324 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
1325 entry = (struct kretprobe_trace_entry *)raw_data;
1326 ent = &entry->ent;
1327
1328 tracing_generic_entry_update(ent, irq_flags, pc);
1329 ent->type = call->id;
1330 entry->nargs = tp->nr_args;
1331 entry->func = (unsigned long)tp->rp.kp.addr;
1332 entry->ret_ip = (unsigned long)ri->ret_addr;
1333 for (i = 0; i < tp->nr_args; i++)
1334 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1335 perf_tp_event(call->id, entry->ret_ip, 1, entry, size);
1336
1337end_recursion:
1338 trace_buf->recursion--;
1339end:
1340 local_irq_restore(irq_flags);
1341
1342 return 0;
1343}
1344
1345static int probe_profile_enable(struct ftrace_event_call *call)
1346{
1347 struct trace_probe *tp = (struct trace_probe *)call->data;
1348
1349 tp->flags |= TP_FLAG_PROFILE;
1350
1351 if (probe_is_return(tp))
1352 return enable_kretprobe(&tp->rp);
1353 else
1354 return enable_kprobe(&tp->rp.kp);
1355}
1356
1357static void probe_profile_disable(struct ftrace_event_call *call)
1358{
1359 struct trace_probe *tp = (struct trace_probe *)call->data;
1360
1361 tp->flags &= ~TP_FLAG_PROFILE;
1362
1363 if (!(tp->flags & TP_FLAG_TRACE)) {
1364 if (probe_is_return(tp))
1365 disable_kretprobe(&tp->rp);
1366 else
1367 disable_kprobe(&tp->rp.kp);
1368 }
1369}
1370#endif /* CONFIG_EVENT_PROFILE */
1371
1372
1373static __kprobes
1374int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1375{
1376 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1377
1378 if (tp->flags & TP_FLAG_TRACE)
1379 kprobe_trace_func(kp, regs);
1380#ifdef CONFIG_EVENT_PROFILE
1381 if (tp->flags & TP_FLAG_PROFILE)
1382 kprobe_profile_func(kp, regs);
1383#endif /* CONFIG_EVENT_PROFILE */
1384 return 0; /* We don't tweek kernel, so just return 0 */
1385}
1386
1387static __kprobes
1388int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1389{
1390 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1391
1392 if (tp->flags & TP_FLAG_TRACE)
1393 kretprobe_trace_func(ri, regs);
1394#ifdef CONFIG_EVENT_PROFILE
1395 if (tp->flags & TP_FLAG_PROFILE)
1396 kretprobe_profile_func(ri, regs);
1397#endif /* CONFIG_EVENT_PROFILE */
1398 return 0; /* We don't tweek kernel, so just return 0 */
1399}
1400
1401static int register_probe_event(struct trace_probe *tp)
1402{
1403 struct ftrace_event_call *call = &tp->call;
1404 int ret;
1405
1406 /* Initialize ftrace_event_call */
1407 if (probe_is_return(tp)) {
1408 tp->event.trace = print_kretprobe_event;
1409 call->raw_init = probe_event_raw_init;
1410 call->show_format = kretprobe_event_show_format;
1411 call->define_fields = kretprobe_event_define_fields;
1412 } else {
1413 tp->event.trace = print_kprobe_event;
1414 call->raw_init = probe_event_raw_init;
1415 call->show_format = kprobe_event_show_format;
1416 call->define_fields = kprobe_event_define_fields;
1417 }
1418 call->event = &tp->event;
1419 call->id = register_ftrace_event(&tp->event);
1420 if (!call->id)
1421 return -ENODEV;
1422 call->enabled = 0;
1423 call->regfunc = probe_event_enable;
1424 call->unregfunc = probe_event_disable;
1425
1426#ifdef CONFIG_EVENT_PROFILE
1427 atomic_set(&call->profile_count, -1);
1428 call->profile_enable = probe_profile_enable;
1429 call->profile_disable = probe_profile_disable;
1430#endif
1431 call->data = tp;
1432 ret = trace_add_event_call(call);
1433 if (ret) {
1434 pr_info("Failed to register kprobe event: %s\n", call->name);
1435 unregister_ftrace_event(&tp->event);
1436 }
1437 return ret;
1438}
1439
1440static void unregister_probe_event(struct trace_probe *tp)
1441{
1442 /* tp->event is unregistered in trace_remove_event_call() */
1443 trace_remove_event_call(&tp->call);
1444}
1445
1446/* Make a debugfs interface for controling probe points */
1447static __init int init_kprobe_trace(void)
1448{
1449 struct dentry *d_tracer;
1450 struct dentry *entry;
1451
1452 d_tracer = tracing_init_dentry();
1453 if (!d_tracer)
1454 return 0;
1455
1456 entry = debugfs_create_file("kprobe_events", 0644, d_tracer,
1457 NULL, &kprobe_events_ops);
1458
1459 /* Event list interface */
1460 if (!entry)
1461 pr_warning("Could not create debugfs "
1462 "'kprobe_events' entry\n");
1463
1464 /* Profile interface */
1465 entry = debugfs_create_file("kprobe_profile", 0444, d_tracer,
1466 NULL, &kprobe_profile_ops);
1467
1468 if (!entry)
1469 pr_warning("Could not create debugfs "
1470 "'kprobe_profile' entry\n");
1471 return 0;
1472}
1473fs_initcall(init_kprobe_trace);
1474
1475
1476#ifdef CONFIG_FTRACE_STARTUP_TEST
1477
1478static int kprobe_trace_selftest_target(int a1, int a2, int a3,
1479 int a4, int a5, int a6)
1480{
1481 return a1 + a2 + a3 + a4 + a5 + a6;
1482}
1483
1484static __init int kprobe_trace_self_tests_init(void)
1485{
1486 int ret;
1487 int (*target)(int, int, int, int, int, int);
1488
1489 target = kprobe_trace_selftest_target;
1490
1491 pr_info("Testing kprobe tracing: ");
1492
1493 ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target "
1494 "$arg1 $arg2 $arg3 $arg4 $stack $stack0");
1495 if (WARN_ON_ONCE(ret))
1496 pr_warning("error enabling function entry\n");
1497
1498 ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target "
1499 "$retval");
1500 if (WARN_ON_ONCE(ret))
1501 pr_warning("error enabling function return\n");
1502
1503 ret = target(1, 2, 3, 4, 5, 6);
1504
1505 cleanup_all_probes();
1506
1507 pr_cont("OK\n");
1508 return 0;
1509}
1510
1511late_initcall(kprobe_trace_self_tests_init);
1512
1513#endif
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index d00d1a8f1f26..51213b0aa81b 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -354,13 +354,13 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret)
354 trace_current_buffer_unlock_commit(buffer, event, 0, 0); 354 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
355} 355}
356 356
357int reg_event_syscall_enter(void *ptr) 357int reg_event_syscall_enter(struct ftrace_event_call *call)
358{ 358{
359 int ret = 0; 359 int ret = 0;
360 int num; 360 int num;
361 char *name; 361 char *name;
362 362
363 name = (char *)ptr; 363 name = (char *)call->data;
364 num = syscall_name_to_nr(name); 364 num = syscall_name_to_nr(name);
365 if (num < 0 || num >= NR_syscalls) 365 if (num < 0 || num >= NR_syscalls)
366 return -ENOSYS; 366 return -ENOSYS;
@@ -378,12 +378,12 @@ int reg_event_syscall_enter(void *ptr)
378 return ret; 378 return ret;
379} 379}
380 380
381void unreg_event_syscall_enter(void *ptr) 381void unreg_event_syscall_enter(struct ftrace_event_call *call)
382{ 382{
383 int num; 383 int num;
384 char *name; 384 char *name;
385 385
386 name = (char *)ptr; 386 name = (char *)call->data;
387 num = syscall_name_to_nr(name); 387 num = syscall_name_to_nr(name);
388 if (num < 0 || num >= NR_syscalls) 388 if (num < 0 || num >= NR_syscalls)
389 return; 389 return;
@@ -395,13 +395,13 @@ void unreg_event_syscall_enter(void *ptr)
395 mutex_unlock(&syscall_trace_lock); 395 mutex_unlock(&syscall_trace_lock);
396} 396}
397 397
398int reg_event_syscall_exit(void *ptr) 398int reg_event_syscall_exit(struct ftrace_event_call *call)
399{ 399{
400 int ret = 0; 400 int ret = 0;
401 int num; 401 int num;
402 char *name; 402 char *name;
403 403
404 name = (char *)ptr; 404 name = call->data;
405 num = syscall_name_to_nr(name); 405 num = syscall_name_to_nr(name);
406 if (num < 0 || num >= NR_syscalls) 406 if (num < 0 || num >= NR_syscalls)
407 return -ENOSYS; 407 return -ENOSYS;
@@ -419,12 +419,12 @@ int reg_event_syscall_exit(void *ptr)
419 return ret; 419 return ret;
420} 420}
421 421
422void unreg_event_syscall_exit(void *ptr) 422void unreg_event_syscall_exit(struct ftrace_event_call *call)
423{ 423{
424 int num; 424 int num;
425 char *name; 425 char *name;
426 426
427 name = (char *)ptr; 427 name = call->data;
428 num = syscall_name_to_nr(name); 428 num = syscall_name_to_nr(name);
429 if (num < 0 || num >= NR_syscalls) 429 if (num < 0 || num >= NR_syscalls)
430 return; 430 return;
@@ -477,6 +477,7 @@ static int sys_prof_refcount_exit;
477static void prof_syscall_enter(struct pt_regs *regs, long id) 477static void prof_syscall_enter(struct pt_regs *regs, long id)
478{ 478{
479 struct syscall_metadata *sys_data; 479 struct syscall_metadata *sys_data;
480 struct perf_trace_buf *trace_buf;
480 struct syscall_trace_enter *rec; 481 struct syscall_trace_enter *rec;
481 unsigned long flags; 482 unsigned long flags;
482 char *raw_data; 483 char *raw_data;
@@ -507,14 +508,25 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
507 cpu = smp_processor_id(); 508 cpu = smp_processor_id();
508 509
509 if (in_nmi()) 510 if (in_nmi())
510 raw_data = rcu_dereference(trace_profile_buf_nmi); 511 trace_buf = rcu_dereference(perf_trace_buf_nmi);
511 else 512 else
512 raw_data = rcu_dereference(trace_profile_buf); 513 trace_buf = rcu_dereference(perf_trace_buf);
513 514
514 if (!raw_data) 515 if (!trace_buf)
515 goto end; 516 goto end;
516 517
517 raw_data = per_cpu_ptr(raw_data, cpu); 518 trace_buf = per_cpu_ptr(trace_buf, cpu);
519
520 if (trace_buf->recursion++)
521 goto end_recursion;
522
523 /*
524 * Make recursion update visible before entering perf_tp_event
525 * so that we protect from perf recursions.
526 */
527 barrier();
528
529 raw_data = trace_buf->buf;
518 530
519 /* zero the dead bytes from align to not leak stack to user */ 531 /* zero the dead bytes from align to not leak stack to user */
520 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; 532 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
@@ -527,6 +539,8 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
527 (unsigned long *)&rec->args); 539 (unsigned long *)&rec->args);
528 perf_tp_event(sys_data->enter_id, 0, 1, rec, size); 540 perf_tp_event(sys_data->enter_id, 0, 1, rec, size);
529 541
542end_recursion:
543 trace_buf->recursion--;
530end: 544end:
531 local_irq_restore(flags); 545 local_irq_restore(flags);
532} 546}
@@ -574,6 +588,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
574{ 588{
575 struct syscall_metadata *sys_data; 589 struct syscall_metadata *sys_data;
576 struct syscall_trace_exit *rec; 590 struct syscall_trace_exit *rec;
591 struct perf_trace_buf *trace_buf;
577 unsigned long flags; 592 unsigned long flags;
578 int syscall_nr; 593 int syscall_nr;
579 char *raw_data; 594 char *raw_data;
@@ -605,14 +620,25 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
605 cpu = smp_processor_id(); 620 cpu = smp_processor_id();
606 621
607 if (in_nmi()) 622 if (in_nmi())
608 raw_data = rcu_dereference(trace_profile_buf_nmi); 623 trace_buf = rcu_dereference(perf_trace_buf_nmi);
609 else 624 else
610 raw_data = rcu_dereference(trace_profile_buf); 625 trace_buf = rcu_dereference(perf_trace_buf);
611 626
612 if (!raw_data) 627 if (!trace_buf)
613 goto end; 628 goto end;
614 629
615 raw_data = per_cpu_ptr(raw_data, cpu); 630 trace_buf = per_cpu_ptr(trace_buf, cpu);
631
632 if (trace_buf->recursion++)
633 goto end_recursion;
634
635 /*
636 * Make recursion update visible before entering perf_tp_event
637 * so that we protect from perf recursions.
638 */
639 barrier();
640
641 raw_data = trace_buf->buf;
616 642
617 /* zero the dead bytes from align to not leak stack to user */ 643 /* zero the dead bytes from align to not leak stack to user */
618 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; 644 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
@@ -626,6 +652,8 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
626 652
627 perf_tp_event(sys_data->exit_id, 0, 1, rec, size); 653 perf_tp_event(sys_data->exit_id, 0, 1, rec, size);
628 654
655end_recursion:
656 trace_buf->recursion--;
629end: 657end:
630 local_irq_restore(flags); 658 local_irq_restore(flags);
631} 659}