aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig38
-rw-r--r--kernel/trace/Makefile2
-rw-r--r--kernel/trace/blktrace.c39
-rw-r--r--kernel/trace/ftrace.c410
-rw-r--r--kernel/trace/kmemtrace.c2
-rw-r--r--kernel/trace/ring_buffer.c38
-rw-r--r--kernel/trace/ring_buffer_benchmark.c85
-rw-r--r--kernel/trace/trace.c78
-rw-r--r--kernel/trace/trace.h82
-rw-r--r--kernel/trace/trace_branch.c8
-rw-r--r--kernel/trace/trace_clock.c8
-rw-r--r--kernel/trace/trace_entries.h16
-rw-r--r--kernel/trace/trace_event_profile.c50
-rw-r--r--kernel/trace/trace_events.c191
-rw-r--r--kernel/trace/trace_events_filter.c426
-rw-r--r--kernel/trace/trace_export.c43
-rw-r--r--kernel/trace/trace_functions_graph.c165
-rw-r--r--kernel/trace/trace_hw_branches.c8
-rw-r--r--kernel/trace/trace_kprobe.c1542
-rw-r--r--kernel/trace/trace_ksym.c551
-rw-r--r--kernel/trace/trace_output.c98
-rw-r--r--kernel/trace/trace_selftest.c55
-rw-r--r--kernel/trace/trace_syscalls.c231
23 files changed, 3545 insertions, 621 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index b416512ad17f..d006554888dc 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -339,6 +339,27 @@ config POWER_TRACER
339 power management decisions, specifically the C-state and P-state 339 power management decisions, specifically the C-state and P-state
340 behavior. 340 behavior.
341 341
342config KSYM_TRACER
343 bool "Trace read and write access on kernel memory locations"
344 depends on HAVE_HW_BREAKPOINT
345 select TRACING
346 help
347 This tracer helps find read and write operations on any given kernel
348 symbol i.e. /proc/kallsyms.
349
350config PROFILE_KSYM_TRACER
351 bool "Profile all kernel memory accesses on 'watched' variables"
352 depends on KSYM_TRACER
353 help
354 This tracer profiles kernel accesses on variables watched through the
355 ksym tracer ftrace plugin. Depending upon the hardware, all read
356 and write operations on kernel variables can be monitored for
357 accesses.
358
359 The results will be displayed in:
360 /debugfs/tracing/profile_ksym
361
362 Say N if unsure.
342 363
343config STACK_TRACER 364config STACK_TRACER
344 bool "Trace max stack" 365 bool "Trace max stack"
@@ -428,6 +449,23 @@ config BLK_DEV_IO_TRACE
428 449
429 If unsure, say N. 450 If unsure, say N.
430 451
452config KPROBE_EVENT
453 depends on KPROBES
454 depends on X86
455 bool "Enable kprobes-based dynamic events"
456 select TRACING
457 default y
458 help
459 This allows the user to add tracing events (similar to tracepoints) on the fly
460 via the ftrace interface. See Documentation/trace/kprobetrace.txt
461 for more details.
462
463 Those events can be inserted wherever kprobes can probe, and record
464 various register and memory values.
465
466 This option is also required by perf-probe subcommand of perf tools. If
467 you want to use perf tools, this option is strongly recommended.
468
431config DYNAMIC_FTRACE 469config DYNAMIC_FTRACE
432 bool "enable/disable ftrace tracepoints dynamically" 470 bool "enable/disable ftrace tracepoints dynamically"
433 depends on FUNCTION_TRACER 471 depends on FUNCTION_TRACER
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 26f03ac07c2b..cd9ecd89ec77 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -53,6 +53,8 @@ obj-$(CONFIG_EVENT_TRACING) += trace_export.o
53obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o 53obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
54obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o 54obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
55obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o 55obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
56obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
57obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
56obj-$(CONFIG_EVENT_TRACING) += power-traces.o 58obj-$(CONFIG_EVENT_TRACING) += power-traces.o
57 59
58libftrace-y := ftrace.o 60libftrace-y := ftrace.o
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 3eb159c277c8..d9d6206e0b14 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -856,6 +856,37 @@ static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
856} 856}
857 857
858/** 858/**
859 * blk_add_trace_rq_remap - Add a trace for a request-remap operation
860 * @q: queue the io is for
861 * @rq: the source request
862 * @dev: target device
863 * @from: source sector
864 *
865 * Description:
866 * Device mapper remaps request to other devices.
867 * Add a trace for that action.
868 *
869 **/
870static void blk_add_trace_rq_remap(struct request_queue *q,
871 struct request *rq, dev_t dev,
872 sector_t from)
873{
874 struct blk_trace *bt = q->blk_trace;
875 struct blk_io_trace_remap r;
876
877 if (likely(!bt))
878 return;
879
880 r.device_from = cpu_to_be32(dev);
881 r.device_to = cpu_to_be32(disk_devt(rq->rq_disk));
882 r.sector_from = cpu_to_be64(from);
883
884 __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
885 rq_data_dir(rq), BLK_TA_REMAP, !!rq->errors,
886 sizeof(r), &r);
887}
888
889/**
859 * blk_add_driver_data - Add binary message with driver-specific data 890 * blk_add_driver_data - Add binary message with driver-specific data
860 * @q: queue the io is for 891 * @q: queue the io is for
861 * @rq: io request 892 * @rq: io request
@@ -922,10 +953,13 @@ static void blk_register_tracepoints(void)
922 WARN_ON(ret); 953 WARN_ON(ret);
923 ret = register_trace_block_remap(blk_add_trace_remap); 954 ret = register_trace_block_remap(blk_add_trace_remap);
924 WARN_ON(ret); 955 WARN_ON(ret);
956 ret = register_trace_block_rq_remap(blk_add_trace_rq_remap);
957 WARN_ON(ret);
925} 958}
926 959
927static void blk_unregister_tracepoints(void) 960static void blk_unregister_tracepoints(void)
928{ 961{
962 unregister_trace_block_rq_remap(blk_add_trace_rq_remap);
929 unregister_trace_block_remap(blk_add_trace_remap); 963 unregister_trace_block_remap(blk_add_trace_remap);
930 unregister_trace_block_split(blk_add_trace_split); 964 unregister_trace_block_split(blk_add_trace_split);
931 unregister_trace_block_unplug_io(blk_add_trace_unplug_io); 965 unregister_trace_block_unplug_io(blk_add_trace_unplug_io);
@@ -1657,6 +1691,11 @@ int blk_trace_init_sysfs(struct device *dev)
1657 return sysfs_create_group(&dev->kobj, &blk_trace_attr_group); 1691 return sysfs_create_group(&dev->kobj, &blk_trace_attr_group);
1658} 1692}
1659 1693
1694void blk_trace_remove_sysfs(struct device *dev)
1695{
1696 sysfs_remove_group(&dev->kobj, &blk_trace_attr_group);
1697}
1698
1660#endif /* CONFIG_BLK_DEV_IO_TRACE */ 1699#endif /* CONFIG_BLK_DEV_IO_TRACE */
1661 1700
1662#ifdef CONFIG_EVENT_TRACING 1701#ifdef CONFIG_EVENT_TRACING
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 46592feab5a6..e51a1bcb7bed 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -60,6 +60,13 @@ static int last_ftrace_enabled;
60/* Quick disabling of function tracer. */ 60/* Quick disabling of function tracer. */
61int function_trace_stop; 61int function_trace_stop;
62 62
63/* List for set_ftrace_pid's pids. */
64LIST_HEAD(ftrace_pids);
65struct ftrace_pid {
66 struct list_head list;
67 struct pid *pid;
68};
69
63/* 70/*
64 * ftrace_disabled is set when an anomaly is discovered. 71 * ftrace_disabled is set when an anomaly is discovered.
65 * ftrace_disabled is much stronger than ftrace_enabled. 72 * ftrace_disabled is much stronger than ftrace_enabled.
@@ -78,6 +85,10 @@ ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
78ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub; 85ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
79ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; 86ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
80 87
88#ifdef CONFIG_FUNCTION_GRAPH_TRACER
89static int ftrace_set_func(unsigned long *array, int *idx, char *buffer);
90#endif
91
81static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) 92static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
82{ 93{
83 struct ftrace_ops *op = ftrace_list; 94 struct ftrace_ops *op = ftrace_list;
@@ -155,7 +166,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
155 else 166 else
156 func = ftrace_list_func; 167 func = ftrace_list_func;
157 168
158 if (ftrace_pid_trace) { 169 if (!list_empty(&ftrace_pids)) {
159 set_ftrace_pid_function(func); 170 set_ftrace_pid_function(func);
160 func = ftrace_pid_func; 171 func = ftrace_pid_func;
161 } 172 }
@@ -203,7 +214,7 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
203 if (ftrace_list->next == &ftrace_list_end) { 214 if (ftrace_list->next == &ftrace_list_end) {
204 ftrace_func_t func = ftrace_list->func; 215 ftrace_func_t func = ftrace_list->func;
205 216
206 if (ftrace_pid_trace) { 217 if (!list_empty(&ftrace_pids)) {
207 set_ftrace_pid_function(func); 218 set_ftrace_pid_function(func);
208 func = ftrace_pid_func; 219 func = ftrace_pid_func;
209 } 220 }
@@ -225,9 +236,13 @@ static void ftrace_update_pid_func(void)
225 if (ftrace_trace_function == ftrace_stub) 236 if (ftrace_trace_function == ftrace_stub)
226 return; 237 return;
227 238
239#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
228 func = ftrace_trace_function; 240 func = ftrace_trace_function;
241#else
242 func = __ftrace_trace_function;
243#endif
229 244
230 if (ftrace_pid_trace) { 245 if (!list_empty(&ftrace_pids)) {
231 set_ftrace_pid_function(func); 246 set_ftrace_pid_function(func);
232 func = ftrace_pid_func; 247 func = ftrace_pid_func;
233 } else { 248 } else {
@@ -736,7 +751,7 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf,
736 out: 751 out:
737 mutex_unlock(&ftrace_profile_lock); 752 mutex_unlock(&ftrace_profile_lock);
738 753
739 filp->f_pos += cnt; 754 *ppos += cnt;
740 755
741 return cnt; 756 return cnt;
742} 757}
@@ -817,8 +832,6 @@ static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
817} 832}
818#endif /* CONFIG_FUNCTION_PROFILER */ 833#endif /* CONFIG_FUNCTION_PROFILER */
819 834
820/* set when tracing only a pid */
821struct pid *ftrace_pid_trace;
822static struct pid * const ftrace_swapper_pid = &init_struct_pid; 835static struct pid * const ftrace_swapper_pid = &init_struct_pid;
823 836
824#ifdef CONFIG_DYNAMIC_FTRACE 837#ifdef CONFIG_DYNAMIC_FTRACE
@@ -1074,14 +1087,9 @@ static void ftrace_replace_code(int enable)
1074 failed = __ftrace_replace_code(rec, enable); 1087 failed = __ftrace_replace_code(rec, enable);
1075 if (failed) { 1088 if (failed) {
1076 rec->flags |= FTRACE_FL_FAILED; 1089 rec->flags |= FTRACE_FL_FAILED;
1077 if ((system_state == SYSTEM_BOOTING) || 1090 ftrace_bug(failed, rec->ip);
1078 !core_kernel_text(rec->ip)) { 1091 /* Stop processing */
1079 ftrace_free_rec(rec); 1092 return;
1080 } else {
1081 ftrace_bug(failed, rec->ip);
1082 /* Stop processing */
1083 return;
1084 }
1085 } 1093 }
1086 } while_for_each_ftrace_rec(); 1094 } while_for_each_ftrace_rec();
1087} 1095}
@@ -1262,12 +1270,34 @@ static int ftrace_update_code(struct module *mod)
1262 ftrace_new_addrs = p->newlist; 1270 ftrace_new_addrs = p->newlist;
1263 p->flags = 0L; 1271 p->flags = 0L;
1264 1272
1265 /* convert record (i.e, patch mcount-call with NOP) */ 1273 /*
1266 if (ftrace_code_disable(mod, p)) { 1274 * Do the initial record convertion from mcount jump
1267 p->flags |= FTRACE_FL_CONVERTED; 1275 * to the NOP instructions.
1268 ftrace_update_cnt++; 1276 */
1269 } else 1277 if (!ftrace_code_disable(mod, p)) {
1270 ftrace_free_rec(p); 1278 ftrace_free_rec(p);
1279 continue;
1280 }
1281
1282 p->flags |= FTRACE_FL_CONVERTED;
1283 ftrace_update_cnt++;
1284
1285 /*
1286 * If the tracing is enabled, go ahead and enable the record.
1287 *
1288 * The reason not to enable the record immediatelly is the
1289 * inherent check of ftrace_make_nop/ftrace_make_call for
1290 * correct previous instructions. Making first the NOP
1291 * conversion puts the module to the correct state, thus
1292 * passing the ftrace_make_call check.
1293 */
1294 if (ftrace_start_up) {
1295 int failed = __ftrace_replace_code(p, 1);
1296 if (failed) {
1297 ftrace_bug(failed, p->ip);
1298 ftrace_free_rec(p);
1299 }
1300 }
1271 } 1301 }
1272 1302
1273 stop = ftrace_now(raw_smp_processor_id()); 1303 stop = ftrace_now(raw_smp_processor_id());
@@ -1657,60 +1687,6 @@ ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
1657 return ret; 1687 return ret;
1658} 1688}
1659 1689
1660enum {
1661 MATCH_FULL,
1662 MATCH_FRONT_ONLY,
1663 MATCH_MIDDLE_ONLY,
1664 MATCH_END_ONLY,
1665};
1666
1667/*
1668 * (static function - no need for kernel doc)
1669 *
1670 * Pass in a buffer containing a glob and this function will
1671 * set search to point to the search part of the buffer and
1672 * return the type of search it is (see enum above).
1673 * This does modify buff.
1674 *
1675 * Returns enum type.
1676 * search returns the pointer to use for comparison.
1677 * not returns 1 if buff started with a '!'
1678 * 0 otherwise.
1679 */
1680static int
1681ftrace_setup_glob(char *buff, int len, char **search, int *not)
1682{
1683 int type = MATCH_FULL;
1684 int i;
1685
1686 if (buff[0] == '!') {
1687 *not = 1;
1688 buff++;
1689 len--;
1690 } else
1691 *not = 0;
1692
1693 *search = buff;
1694
1695 for (i = 0; i < len; i++) {
1696 if (buff[i] == '*') {
1697 if (!i) {
1698 *search = buff + 1;
1699 type = MATCH_END_ONLY;
1700 } else {
1701 if (type == MATCH_END_ONLY)
1702 type = MATCH_MIDDLE_ONLY;
1703 else
1704 type = MATCH_FRONT_ONLY;
1705 buff[i] = 0;
1706 break;
1707 }
1708 }
1709 }
1710
1711 return type;
1712}
1713
1714static int ftrace_match(char *str, char *regex, int len, int type) 1690static int ftrace_match(char *str, char *regex, int len, int type)
1715{ 1691{
1716 int matched = 0; 1692 int matched = 0;
@@ -1759,7 +1735,7 @@ static void ftrace_match_records(char *buff, int len, int enable)
1759 int not; 1735 int not;
1760 1736
1761 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; 1737 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
1762 type = ftrace_setup_glob(buff, len, &search, &not); 1738 type = filter_parse_regex(buff, len, &search, &not);
1763 1739
1764 search_len = strlen(search); 1740 search_len = strlen(search);
1765 1741
@@ -1827,7 +1803,7 @@ static void ftrace_match_module_records(char *buff, char *mod, int enable)
1827 } 1803 }
1828 1804
1829 if (strlen(buff)) { 1805 if (strlen(buff)) {
1830 type = ftrace_setup_glob(buff, strlen(buff), &search, &not); 1806 type = filter_parse_regex(buff, strlen(buff), &search, &not);
1831 search_len = strlen(search); 1807 search_len = strlen(search);
1832 } 1808 }
1833 1809
@@ -1992,7 +1968,7 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
1992 int count = 0; 1968 int count = 0;
1993 char *search; 1969 char *search;
1994 1970
1995 type = ftrace_setup_glob(glob, strlen(glob), &search, &not); 1971 type = filter_parse_regex(glob, strlen(glob), &search, &not);
1996 len = strlen(search); 1972 len = strlen(search);
1997 1973
1998 /* we do not support '!' for function probes */ 1974 /* we do not support '!' for function probes */
@@ -2069,7 +2045,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
2069 else if (glob) { 2045 else if (glob) {
2070 int not; 2046 int not;
2071 2047
2072 type = ftrace_setup_glob(glob, strlen(glob), &search, &not); 2048 type = filter_parse_regex(glob, strlen(glob), &search, &not);
2073 len = strlen(search); 2049 len = strlen(search);
2074 2050
2075 /* we do not support '!' for function probes */ 2051 /* we do not support '!' for function probes */
@@ -2223,15 +2199,15 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
2223 ret = ftrace_process_regex(parser->buffer, 2199 ret = ftrace_process_regex(parser->buffer,
2224 parser->idx, enable); 2200 parser->idx, enable);
2225 if (ret) 2201 if (ret)
2226 goto out; 2202 goto out_unlock;
2227 2203
2228 trace_parser_clear(parser); 2204 trace_parser_clear(parser);
2229 } 2205 }
2230 2206
2231 ret = read; 2207 ret = read;
2232 2208out_unlock:
2233 mutex_unlock(&ftrace_regex_lock); 2209 mutex_unlock(&ftrace_regex_lock);
2234out: 2210
2235 return ret; 2211 return ret;
2236} 2212}
2237 2213
@@ -2313,6 +2289,32 @@ static int __init set_ftrace_filter(char *str)
2313} 2289}
2314__setup("ftrace_filter=", set_ftrace_filter); 2290__setup("ftrace_filter=", set_ftrace_filter);
2315 2291
2292#ifdef CONFIG_FUNCTION_GRAPH_TRACER
2293static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata;
2294static int __init set_graph_function(char *str)
2295{
2296 strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE);
2297 return 1;
2298}
2299__setup("ftrace_graph_filter=", set_graph_function);
2300
2301static void __init set_ftrace_early_graph(char *buf)
2302{
2303 int ret;
2304 char *func;
2305
2306 while (buf) {
2307 func = strsep(&buf, ",");
2308 /* we allow only one expression at a time */
2309 ret = ftrace_set_func(ftrace_graph_funcs, &ftrace_graph_count,
2310 func);
2311 if (ret)
2312 printk(KERN_DEBUG "ftrace: function %s not "
2313 "traceable\n", func);
2314 }
2315}
2316#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
2317
2316static void __init set_ftrace_early_filter(char *buf, int enable) 2318static void __init set_ftrace_early_filter(char *buf, int enable)
2317{ 2319{
2318 char *func; 2320 char *func;
@@ -2329,6 +2331,10 @@ static void __init set_ftrace_early_filters(void)
2329 set_ftrace_early_filter(ftrace_filter_buf, 1); 2331 set_ftrace_early_filter(ftrace_filter_buf, 1);
2330 if (ftrace_notrace_buf[0]) 2332 if (ftrace_notrace_buf[0])
2331 set_ftrace_early_filter(ftrace_notrace_buf, 0); 2333 set_ftrace_early_filter(ftrace_notrace_buf, 0);
2334#ifdef CONFIG_FUNCTION_GRAPH_TRACER
2335 if (ftrace_graph_buf[0])
2336 set_ftrace_early_graph(ftrace_graph_buf);
2337#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
2332} 2338}
2333 2339
2334static int 2340static int
@@ -2514,7 +2520,7 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
2514 return -ENODEV; 2520 return -ENODEV;
2515 2521
2516 /* decode regex */ 2522 /* decode regex */
2517 type = ftrace_setup_glob(buffer, strlen(buffer), &search, &not); 2523 type = filter_parse_regex(buffer, strlen(buffer), &search, &not);
2518 if (not) 2524 if (not)
2519 return -EINVAL; 2525 return -EINVAL;
2520 2526
@@ -2625,7 +2631,7 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
2625 return 0; 2631 return 0;
2626} 2632}
2627 2633
2628static int ftrace_convert_nops(struct module *mod, 2634static int ftrace_process_locs(struct module *mod,
2629 unsigned long *start, 2635 unsigned long *start,
2630 unsigned long *end) 2636 unsigned long *end)
2631{ 2637{
@@ -2658,19 +2664,17 @@ static int ftrace_convert_nops(struct module *mod,
2658} 2664}
2659 2665
2660#ifdef CONFIG_MODULES 2666#ifdef CONFIG_MODULES
2661void ftrace_release(void *start, void *end) 2667void ftrace_release_mod(struct module *mod)
2662{ 2668{
2663 struct dyn_ftrace *rec; 2669 struct dyn_ftrace *rec;
2664 struct ftrace_page *pg; 2670 struct ftrace_page *pg;
2665 unsigned long s = (unsigned long)start;
2666 unsigned long e = (unsigned long)end;
2667 2671
2668 if (ftrace_disabled || !start || start == end) 2672 if (ftrace_disabled)
2669 return; 2673 return;
2670 2674
2671 mutex_lock(&ftrace_lock); 2675 mutex_lock(&ftrace_lock);
2672 do_for_each_ftrace_rec(pg, rec) { 2676 do_for_each_ftrace_rec(pg, rec) {
2673 if ((rec->ip >= s) && (rec->ip < e)) { 2677 if (within_module_core(rec->ip, mod)) {
2674 /* 2678 /*
2675 * rec->ip is changed in ftrace_free_rec() 2679 * rec->ip is changed in ftrace_free_rec()
2676 * It should not between s and e if record was freed. 2680 * It should not between s and e if record was freed.
@@ -2687,7 +2691,7 @@ static void ftrace_init_module(struct module *mod,
2687{ 2691{
2688 if (ftrace_disabled || start == end) 2692 if (ftrace_disabled || start == end)
2689 return; 2693 return;
2690 ftrace_convert_nops(mod, start, end); 2694 ftrace_process_locs(mod, start, end);
2691} 2695}
2692 2696
2693static int ftrace_module_notify(struct notifier_block *self, 2697static int ftrace_module_notify(struct notifier_block *self,
@@ -2702,9 +2706,7 @@ static int ftrace_module_notify(struct notifier_block *self,
2702 mod->num_ftrace_callsites); 2706 mod->num_ftrace_callsites);
2703 break; 2707 break;
2704 case MODULE_STATE_GOING: 2708 case MODULE_STATE_GOING:
2705 ftrace_release(mod->ftrace_callsites, 2709 ftrace_release_mod(mod);
2706 mod->ftrace_callsites +
2707 mod->num_ftrace_callsites);
2708 break; 2710 break;
2709 } 2711 }
2710 2712
@@ -2750,7 +2752,7 @@ void __init ftrace_init(void)
2750 2752
2751 last_ftrace_enabled = ftrace_enabled = 1; 2753 last_ftrace_enabled = ftrace_enabled = 1;
2752 2754
2753 ret = ftrace_convert_nops(NULL, 2755 ret = ftrace_process_locs(NULL,
2754 __start_mcount_loc, 2756 __start_mcount_loc,
2755 __stop_mcount_loc); 2757 __stop_mcount_loc);
2756 2758
@@ -2783,23 +2785,6 @@ static inline void ftrace_startup_enable(int command) { }
2783# define ftrace_shutdown_sysctl() do { } while (0) 2785# define ftrace_shutdown_sysctl() do { } while (0)
2784#endif /* CONFIG_DYNAMIC_FTRACE */ 2786#endif /* CONFIG_DYNAMIC_FTRACE */
2785 2787
2786static ssize_t
2787ftrace_pid_read(struct file *file, char __user *ubuf,
2788 size_t cnt, loff_t *ppos)
2789{
2790 char buf[64];
2791 int r;
2792
2793 if (ftrace_pid_trace == ftrace_swapper_pid)
2794 r = sprintf(buf, "swapper tasks\n");
2795 else if (ftrace_pid_trace)
2796 r = sprintf(buf, "%u\n", pid_vnr(ftrace_pid_trace));
2797 else
2798 r = sprintf(buf, "no pid\n");
2799
2800 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2801}
2802
2803static void clear_ftrace_swapper(void) 2788static void clear_ftrace_swapper(void)
2804{ 2789{
2805 struct task_struct *p; 2790 struct task_struct *p;
@@ -2850,14 +2835,12 @@ static void set_ftrace_pid(struct pid *pid)
2850 rcu_read_unlock(); 2835 rcu_read_unlock();
2851} 2836}
2852 2837
2853static void clear_ftrace_pid_task(struct pid **pid) 2838static void clear_ftrace_pid_task(struct pid *pid)
2854{ 2839{
2855 if (*pid == ftrace_swapper_pid) 2840 if (pid == ftrace_swapper_pid)
2856 clear_ftrace_swapper(); 2841 clear_ftrace_swapper();
2857 else 2842 else
2858 clear_ftrace_pid(*pid); 2843 clear_ftrace_pid(pid);
2859
2860 *pid = NULL;
2861} 2844}
2862 2845
2863static void set_ftrace_pid_task(struct pid *pid) 2846static void set_ftrace_pid_task(struct pid *pid)
@@ -2868,74 +2851,184 @@ static void set_ftrace_pid_task(struct pid *pid)
2868 set_ftrace_pid(pid); 2851 set_ftrace_pid(pid);
2869} 2852}
2870 2853
2871static ssize_t 2854static int ftrace_pid_add(int p)
2872ftrace_pid_write(struct file *filp, const char __user *ubuf,
2873 size_t cnt, loff_t *ppos)
2874{ 2855{
2875 struct pid *pid; 2856 struct pid *pid;
2876 char buf[64]; 2857 struct ftrace_pid *fpid;
2877 long val; 2858 int ret = -EINVAL;
2878 int ret;
2879 2859
2880 if (cnt >= sizeof(buf)) 2860 mutex_lock(&ftrace_lock);
2881 return -EINVAL;
2882 2861
2883 if (copy_from_user(&buf, ubuf, cnt)) 2862 if (!p)
2884 return -EFAULT; 2863 pid = ftrace_swapper_pid;
2864 else
2865 pid = find_get_pid(p);
2885 2866
2886 buf[cnt] = 0; 2867 if (!pid)
2868 goto out;
2887 2869
2888 ret = strict_strtol(buf, 10, &val); 2870 ret = 0;
2889 if (ret < 0)
2890 return ret;
2891 2871
2892 mutex_lock(&ftrace_lock); 2872 list_for_each_entry(fpid, &ftrace_pids, list)
2893 if (val < 0) { 2873 if (fpid->pid == pid)
2894 /* disable pid tracing */ 2874 goto out_put;
2895 if (!ftrace_pid_trace)
2896 goto out;
2897 2875
2898 clear_ftrace_pid_task(&ftrace_pid_trace); 2876 ret = -ENOMEM;
2899 2877
2900 } else { 2878 fpid = kmalloc(sizeof(*fpid), GFP_KERNEL);
2901 /* swapper task is special */ 2879 if (!fpid)
2902 if (!val) { 2880 goto out_put;
2903 pid = ftrace_swapper_pid;
2904 if (pid == ftrace_pid_trace)
2905 goto out;
2906 } else {
2907 pid = find_get_pid(val);
2908 2881
2909 if (pid == ftrace_pid_trace) { 2882 list_add(&fpid->list, &ftrace_pids);
2910 put_pid(pid); 2883 fpid->pid = pid;
2911 goto out;
2912 }
2913 }
2914 2884
2915 if (ftrace_pid_trace) 2885 set_ftrace_pid_task(pid);
2916 clear_ftrace_pid_task(&ftrace_pid_trace);
2917 2886
2918 if (!pid) 2887 ftrace_update_pid_func();
2919 goto out; 2888 ftrace_startup_enable(0);
2889
2890 mutex_unlock(&ftrace_lock);
2891 return 0;
2892
2893out_put:
2894 if (pid != ftrace_swapper_pid)
2895 put_pid(pid);
2920 2896
2921 ftrace_pid_trace = pid; 2897out:
2898 mutex_unlock(&ftrace_lock);
2899 return ret;
2900}
2901
2902static void ftrace_pid_reset(void)
2903{
2904 struct ftrace_pid *fpid, *safe;
2922 2905
2923 set_ftrace_pid_task(ftrace_pid_trace); 2906 mutex_lock(&ftrace_lock);
2907 list_for_each_entry_safe(fpid, safe, &ftrace_pids, list) {
2908 struct pid *pid = fpid->pid;
2909
2910 clear_ftrace_pid_task(pid);
2911
2912 list_del(&fpid->list);
2913 kfree(fpid);
2924 } 2914 }
2925 2915
2926 /* update the function call */
2927 ftrace_update_pid_func(); 2916 ftrace_update_pid_func();
2928 ftrace_startup_enable(0); 2917 ftrace_startup_enable(0);
2929 2918
2930 out:
2931 mutex_unlock(&ftrace_lock); 2919 mutex_unlock(&ftrace_lock);
2920}
2932 2921
2933 return cnt; 2922static void *fpid_start(struct seq_file *m, loff_t *pos)
2923{
2924 mutex_lock(&ftrace_lock);
2925
2926 if (list_empty(&ftrace_pids) && (!*pos))
2927 return (void *) 1;
2928
2929 return seq_list_start(&ftrace_pids, *pos);
2930}
2931
2932static void *fpid_next(struct seq_file *m, void *v, loff_t *pos)
2933{
2934 if (v == (void *)1)
2935 return NULL;
2936
2937 return seq_list_next(v, &ftrace_pids, pos);
2938}
2939
2940static void fpid_stop(struct seq_file *m, void *p)
2941{
2942 mutex_unlock(&ftrace_lock);
2943}
2944
2945static int fpid_show(struct seq_file *m, void *v)
2946{
2947 const struct ftrace_pid *fpid = list_entry(v, struct ftrace_pid, list);
2948
2949 if (v == (void *)1) {
2950 seq_printf(m, "no pid\n");
2951 return 0;
2952 }
2953
2954 if (fpid->pid == ftrace_swapper_pid)
2955 seq_printf(m, "swapper tasks\n");
2956 else
2957 seq_printf(m, "%u\n", pid_vnr(fpid->pid));
2958
2959 return 0;
2960}
2961
2962static const struct seq_operations ftrace_pid_sops = {
2963 .start = fpid_start,
2964 .next = fpid_next,
2965 .stop = fpid_stop,
2966 .show = fpid_show,
2967};
2968
2969static int
2970ftrace_pid_open(struct inode *inode, struct file *file)
2971{
2972 int ret = 0;
2973
2974 if ((file->f_mode & FMODE_WRITE) &&
2975 (file->f_flags & O_TRUNC))
2976 ftrace_pid_reset();
2977
2978 if (file->f_mode & FMODE_READ)
2979 ret = seq_open(file, &ftrace_pid_sops);
2980
2981 return ret;
2982}
2983
2984static ssize_t
2985ftrace_pid_write(struct file *filp, const char __user *ubuf,
2986 size_t cnt, loff_t *ppos)
2987{
2988 char buf[64], *tmp;
2989 long val;
2990 int ret;
2991
2992 if (cnt >= sizeof(buf))
2993 return -EINVAL;
2994
2995 if (copy_from_user(&buf, ubuf, cnt))
2996 return -EFAULT;
2997
2998 buf[cnt] = 0;
2999
3000 /*
3001 * Allow "echo > set_ftrace_pid" or "echo -n '' > set_ftrace_pid"
3002 * to clean the filter quietly.
3003 */
3004 tmp = strstrip(buf);
3005 if (strlen(tmp) == 0)
3006 return 1;
3007
3008 ret = strict_strtol(tmp, 10, &val);
3009 if (ret < 0)
3010 return ret;
3011
3012 ret = ftrace_pid_add(val);
3013
3014 return ret ? ret : cnt;
3015}
3016
3017static int
3018ftrace_pid_release(struct inode *inode, struct file *file)
3019{
3020 if (file->f_mode & FMODE_READ)
3021 seq_release(inode, file);
3022
3023 return 0;
2934} 3024}
2935 3025
2936static const struct file_operations ftrace_pid_fops = { 3026static const struct file_operations ftrace_pid_fops = {
2937 .read = ftrace_pid_read, 3027 .open = ftrace_pid_open,
2938 .write = ftrace_pid_write, 3028 .write = ftrace_pid_write,
3029 .read = seq_read,
3030 .llseek = seq_lseek,
3031 .release = ftrace_pid_release,
2939}; 3032};
2940 3033
2941static __init int ftrace_init_debugfs(void) 3034static __init int ftrace_init_debugfs(void)
@@ -3298,4 +3391,3 @@ void ftrace_graph_stop(void)
3298 ftrace_stop(); 3391 ftrace_stop();
3299} 3392}
3300#endif 3393#endif
3301
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
index 81b1645c8549..a91da69f153a 100644
--- a/kernel/trace/kmemtrace.c
+++ b/kernel/trace/kmemtrace.c
@@ -501,7 +501,7 @@ static int __init init_kmem_tracer(void)
501 return 1; 501 return 1;
502 } 502 }
503 503
504 if (!register_tracer(&kmem_tracer)) { 504 if (register_tracer(&kmem_tracer) != 0) {
505 pr_warning("Warning: could not register the kmem tracer\n"); 505 pr_warning("Warning: could not register the kmem tracer\n");
506 return 1; 506 return 1;
507 } 507 }
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index d4ff01970547..a1ca4956ab5e 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -397,18 +397,21 @@ int ring_buffer_print_page_header(struct trace_seq *s)
397 int ret; 397 int ret;
398 398
399 ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t" 399 ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t"
400 "offset:0;\tsize:%u;\n", 400 "offset:0;\tsize:%u;\tsigned:%u;\n",
401 (unsigned int)sizeof(field.time_stamp)); 401 (unsigned int)sizeof(field.time_stamp),
402 (unsigned int)is_signed_type(u64));
402 403
403 ret = trace_seq_printf(s, "\tfield: local_t commit;\t" 404 ret = trace_seq_printf(s, "\tfield: local_t commit;\t"
404 "offset:%u;\tsize:%u;\n", 405 "offset:%u;\tsize:%u;\tsigned:%u;\n",
405 (unsigned int)offsetof(typeof(field), commit), 406 (unsigned int)offsetof(typeof(field), commit),
406 (unsigned int)sizeof(field.commit)); 407 (unsigned int)sizeof(field.commit),
408 (unsigned int)is_signed_type(long));
407 409
408 ret = trace_seq_printf(s, "\tfield: char data;\t" 410 ret = trace_seq_printf(s, "\tfield: char data;\t"
409 "offset:%u;\tsize:%u;\n", 411 "offset:%u;\tsize:%u;\tsigned:%u;\n",
410 (unsigned int)offsetof(typeof(field), data), 412 (unsigned int)offsetof(typeof(field), data),
411 (unsigned int)BUF_PAGE_SIZE); 413 (unsigned int)BUF_PAGE_SIZE,
414 (unsigned int)is_signed_type(char));
412 415
413 return ret; 416 return ret;
414} 417}
@@ -483,7 +486,7 @@ struct ring_buffer_iter {
483/* Up this if you want to test the TIME_EXTENTS and normalization */ 486/* Up this if you want to test the TIME_EXTENTS and normalization */
484#define DEBUG_SHIFT 0 487#define DEBUG_SHIFT 0
485 488
486static inline u64 rb_time_stamp(struct ring_buffer *buffer, int cpu) 489static inline u64 rb_time_stamp(struct ring_buffer *buffer)
487{ 490{
488 /* shift to debug/test normalization and TIME_EXTENTS */ 491 /* shift to debug/test normalization and TIME_EXTENTS */
489 return buffer->clock() << DEBUG_SHIFT; 492 return buffer->clock() << DEBUG_SHIFT;
@@ -494,7 +497,7 @@ u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu)
494 u64 time; 497 u64 time;
495 498
496 preempt_disable_notrace(); 499 preempt_disable_notrace();
497 time = rb_time_stamp(buffer, cpu); 500 time = rb_time_stamp(buffer);
498 preempt_enable_no_resched_notrace(); 501 preempt_enable_no_resched_notrace();
499 502
500 return time; 503 return time;
@@ -599,7 +602,7 @@ static struct list_head *rb_list_head(struct list_head *list)
599} 602}
600 603
601/* 604/*
602 * rb_is_head_page - test if the give page is the head page 605 * rb_is_head_page - test if the given page is the head page
603 * 606 *
604 * Because the reader may move the head_page pointer, we can 607 * Because the reader may move the head_page pointer, we can
605 * not trust what the head page is (it may be pointing to 608 * not trust what the head page is (it may be pointing to
@@ -1193,6 +1196,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
1193 atomic_inc(&cpu_buffer->record_disabled); 1196 atomic_inc(&cpu_buffer->record_disabled);
1194 synchronize_sched(); 1197 synchronize_sched();
1195 1198
1199 spin_lock_irq(&cpu_buffer->reader_lock);
1196 rb_head_page_deactivate(cpu_buffer); 1200 rb_head_page_deactivate(cpu_buffer);
1197 1201
1198 for (i = 0; i < nr_pages; i++) { 1202 for (i = 0; i < nr_pages; i++) {
@@ -1207,6 +1211,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
1207 return; 1211 return;
1208 1212
1209 rb_reset_cpu(cpu_buffer); 1213 rb_reset_cpu(cpu_buffer);
1214 spin_unlock_irq(&cpu_buffer->reader_lock);
1210 1215
1211 rb_check_pages(cpu_buffer); 1216 rb_check_pages(cpu_buffer);
1212 1217
@@ -1785,9 +1790,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
1785static struct ring_buffer_event * 1790static struct ring_buffer_event *
1786rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, 1791rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1787 unsigned long length, unsigned long tail, 1792 unsigned long length, unsigned long tail,
1788 struct buffer_page *commit_page,
1789 struct buffer_page *tail_page, u64 *ts) 1793 struct buffer_page *tail_page, u64 *ts)
1790{ 1794{
1795 struct buffer_page *commit_page = cpu_buffer->commit_page;
1791 struct ring_buffer *buffer = cpu_buffer->buffer; 1796 struct ring_buffer *buffer = cpu_buffer->buffer;
1792 struct buffer_page *next_page; 1797 struct buffer_page *next_page;
1793 int ret; 1798 int ret;
@@ -1868,7 +1873,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1868 * Nested commits always have zero deltas, so 1873 * Nested commits always have zero deltas, so
1869 * just reread the time stamp 1874 * just reread the time stamp
1870 */ 1875 */
1871 *ts = rb_time_stamp(buffer, cpu_buffer->cpu); 1876 *ts = rb_time_stamp(buffer);
1872 next_page->page->time_stamp = *ts; 1877 next_page->page->time_stamp = *ts;
1873 } 1878 }
1874 1879
@@ -1890,13 +1895,10 @@ static struct ring_buffer_event *
1890__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, 1895__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1891 unsigned type, unsigned long length, u64 *ts) 1896 unsigned type, unsigned long length, u64 *ts)
1892{ 1897{
1893 struct buffer_page *tail_page, *commit_page; 1898 struct buffer_page *tail_page;
1894 struct ring_buffer_event *event; 1899 struct ring_buffer_event *event;
1895 unsigned long tail, write; 1900 unsigned long tail, write;
1896 1901
1897 commit_page = cpu_buffer->commit_page;
1898 /* we just need to protect against interrupts */
1899 barrier();
1900 tail_page = cpu_buffer->tail_page; 1902 tail_page = cpu_buffer->tail_page;
1901 write = local_add_return(length, &tail_page->write); 1903 write = local_add_return(length, &tail_page->write);
1902 1904
@@ -1907,7 +1909,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1907 /* See if we shot pass the end of this buffer page */ 1909 /* See if we shot pass the end of this buffer page */
1908 if (write > BUF_PAGE_SIZE) 1910 if (write > BUF_PAGE_SIZE)
1909 return rb_move_tail(cpu_buffer, length, tail, 1911 return rb_move_tail(cpu_buffer, length, tail,
1910 commit_page, tail_page, ts); 1912 tail_page, ts);
1911 1913
1912 /* We reserved something on the buffer */ 1914 /* We reserved something on the buffer */
1913 1915
@@ -2111,7 +2113,7 @@ rb_reserve_next_event(struct ring_buffer *buffer,
2111 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000)) 2113 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
2112 goto out_fail; 2114 goto out_fail;
2113 2115
2114 ts = rb_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu); 2116 ts = rb_time_stamp(cpu_buffer->buffer);
2115 2117
2116 /* 2118 /*
2117 * Only the first commit can update the timestamp. 2119 * Only the first commit can update the timestamp.
@@ -2681,7 +2683,7 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer)
2681EXPORT_SYMBOL_GPL(ring_buffer_entries); 2683EXPORT_SYMBOL_GPL(ring_buffer_entries);
2682 2684
2683/** 2685/**
2684 * ring_buffer_overrun_cpu - get the number of overruns in buffer 2686 * ring_buffer_overruns - get the number of overruns in buffer
2685 * @buffer: The ring buffer 2687 * @buffer: The ring buffer
2686 * 2688 *
2687 * Returns the total number of overruns in the ring buffer 2689 * Returns the total number of overruns in the ring buffer
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index 573d3cc762c3..b2477caf09c2 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -35,6 +35,28 @@ static int disable_reader;
35module_param(disable_reader, uint, 0644); 35module_param(disable_reader, uint, 0644);
36MODULE_PARM_DESC(disable_reader, "only run producer"); 36MODULE_PARM_DESC(disable_reader, "only run producer");
37 37
38static int write_iteration = 50;
39module_param(write_iteration, uint, 0644);
40MODULE_PARM_DESC(write_iteration, "# of writes between timestamp readings");
41
42static int producer_nice = 19;
43static int consumer_nice = 19;
44
45static int producer_fifo = -1;
46static int consumer_fifo = -1;
47
48module_param(producer_nice, uint, 0644);
49MODULE_PARM_DESC(producer_nice, "nice prio for producer");
50
51module_param(consumer_nice, uint, 0644);
52MODULE_PARM_DESC(consumer_nice, "nice prio for consumer");
53
54module_param(producer_fifo, uint, 0644);
55MODULE_PARM_DESC(producer_fifo, "fifo prio for producer");
56
57module_param(consumer_fifo, uint, 0644);
58MODULE_PARM_DESC(consumer_fifo, "fifo prio for consumer");
59
38static int read_events; 60static int read_events;
39 61
40static int kill_test; 62static int kill_test;
@@ -208,15 +230,18 @@ static void ring_buffer_producer(void)
208 do { 230 do {
209 struct ring_buffer_event *event; 231 struct ring_buffer_event *event;
210 int *entry; 232 int *entry;
211 233 int i;
212 event = ring_buffer_lock_reserve(buffer, 10); 234
213 if (!event) { 235 for (i = 0; i < write_iteration; i++) {
214 missed++; 236 event = ring_buffer_lock_reserve(buffer, 10);
215 } else { 237 if (!event) {
216 hit++; 238 missed++;
217 entry = ring_buffer_event_data(event); 239 } else {
218 *entry = smp_processor_id(); 240 hit++;
219 ring_buffer_unlock_commit(buffer, event); 241 entry = ring_buffer_event_data(event);
242 *entry = smp_processor_id();
243 ring_buffer_unlock_commit(buffer, event);
244 }
220 } 245 }
221 do_gettimeofday(&end_tv); 246 do_gettimeofday(&end_tv);
222 247
@@ -263,6 +288,27 @@ static void ring_buffer_producer(void)
263 288
264 if (kill_test) 289 if (kill_test)
265 trace_printk("ERROR!\n"); 290 trace_printk("ERROR!\n");
291
292 if (!disable_reader) {
293 if (consumer_fifo < 0)
294 trace_printk("Running Consumer at nice: %d\n",
295 consumer_nice);
296 else
297 trace_printk("Running Consumer at SCHED_FIFO %d\n",
298 consumer_fifo);
299 }
300 if (producer_fifo < 0)
301 trace_printk("Running Producer at nice: %d\n",
302 producer_nice);
303 else
304 trace_printk("Running Producer at SCHED_FIFO %d\n",
305 producer_fifo);
306
307 /* Let the user know that the test is running at low priority */
308 if (producer_fifo < 0 && consumer_fifo < 0 &&
309 producer_nice == 19 && consumer_nice == 19)
310 trace_printk("WARNING!!! This test is running at lowest priority.\n");
311
266 trace_printk("Time: %lld (usecs)\n", time); 312 trace_printk("Time: %lld (usecs)\n", time);
267 trace_printk("Overruns: %lld\n", overruns); 313 trace_printk("Overruns: %lld\n", overruns);
268 if (disable_reader) 314 if (disable_reader)
@@ -392,6 +438,27 @@ static int __init ring_buffer_benchmark_init(void)
392 if (IS_ERR(producer)) 438 if (IS_ERR(producer))
393 goto out_kill; 439 goto out_kill;
394 440
441 /*
442 * Run them as low-prio background tasks by default:
443 */
444 if (!disable_reader) {
445 if (consumer_fifo >= 0) {
446 struct sched_param param = {
447 .sched_priority = consumer_fifo
448 };
449 sched_setscheduler(consumer, SCHED_FIFO, &param);
450 } else
451 set_user_nice(consumer, consumer_nice);
452 }
453
454 if (producer_fifo >= 0) {
455 struct sched_param param = {
456 .sched_priority = consumer_fifo
457 };
458 sched_setscheduler(producer, SCHED_FIFO, &param);
459 } else
460 set_user_nice(producer, producer_nice);
461
395 return 0; 462 return 0;
396 463
397 out_kill: 464 out_kill:
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 85a5ed70b5b2..c82dfd92fdfd 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -129,7 +129,7 @@ static int tracing_set_tracer(const char *buf);
129static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata; 129static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
130static char *default_bootup_tracer; 130static char *default_bootup_tracer;
131 131
132static int __init set_ftrace(char *str) 132static int __init set_cmdline_ftrace(char *str)
133{ 133{
134 strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE); 134 strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
135 default_bootup_tracer = bootup_tracer_buf; 135 default_bootup_tracer = bootup_tracer_buf;
@@ -137,7 +137,7 @@ static int __init set_ftrace(char *str)
137 ring_buffer_expanded = 1; 137 ring_buffer_expanded = 1;
138 return 1; 138 return 1;
139} 139}
140__setup("ftrace=", set_ftrace); 140__setup("ftrace=", set_cmdline_ftrace);
141 141
142static int __init set_ftrace_dump_on_oops(char *str) 142static int __init set_ftrace_dump_on_oops(char *str)
143{ 143{
@@ -1363,9 +1363,6 @@ int trace_array_vprintk(struct trace_array *tr,
1363 __raw_spin_lock(&trace_buf_lock); 1363 __raw_spin_lock(&trace_buf_lock);
1364 len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args); 1364 len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
1365 1365
1366 len = min(len, TRACE_BUF_SIZE-1);
1367 trace_buf[len] = 0;
1368
1369 size = sizeof(*entry) + len + 1; 1366 size = sizeof(*entry) + len + 1;
1370 buffer = tr->buffer; 1367 buffer = tr->buffer;
1371 event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, 1368 event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
@@ -1373,10 +1370,10 @@ int trace_array_vprintk(struct trace_array *tr,
1373 if (!event) 1370 if (!event)
1374 goto out_unlock; 1371 goto out_unlock;
1375 entry = ring_buffer_event_data(event); 1372 entry = ring_buffer_event_data(event);
1376 entry->ip = ip; 1373 entry->ip = ip;
1377 1374
1378 memcpy(&entry->buf, trace_buf, len); 1375 memcpy(&entry->buf, trace_buf, len);
1379 entry->buf[len] = 0; 1376 entry->buf[len] = '\0';
1380 if (!filter_check_discard(call, entry, buffer, event)) 1377 if (!filter_check_discard(call, entry, buffer, event))
1381 ring_buffer_unlock_commit(buffer, event); 1378 ring_buffer_unlock_commit(buffer, event);
1382 1379
@@ -1393,7 +1390,7 @@ int trace_array_vprintk(struct trace_array *tr,
1393 1390
1394int trace_vprintk(unsigned long ip, const char *fmt, va_list args) 1391int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
1395{ 1392{
1396 return trace_array_printk(&global_trace, ip, fmt, args); 1393 return trace_array_vprintk(&global_trace, ip, fmt, args);
1397} 1394}
1398EXPORT_SYMBOL_GPL(trace_vprintk); 1395EXPORT_SYMBOL_GPL(trace_vprintk);
1399 1396
@@ -1515,6 +1512,8 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos)
1515 int i = (int)*pos; 1512 int i = (int)*pos;
1516 void *ent; 1513 void *ent;
1517 1514
1515 WARN_ON_ONCE(iter->leftover);
1516
1518 (*pos)++; 1517 (*pos)++;
1519 1518
1520 /* can't go backwards */ 1519 /* can't go backwards */
@@ -1613,8 +1612,16 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1613 ; 1612 ;
1614 1613
1615 } else { 1614 } else {
1616 l = *pos - 1; 1615 /*
1617 p = s_next(m, p, &l); 1616 * If we overflowed the seq_file before, then we want
1617 * to just reuse the trace_seq buffer again.
1618 */
1619 if (iter->leftover)
1620 p = iter;
1621 else {
1622 l = *pos - 1;
1623 p = s_next(m, p, &l);
1624 }
1618 } 1625 }
1619 1626
1620 trace_event_read_lock(); 1627 trace_event_read_lock();
@@ -1922,6 +1929,7 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter)
1922static int s_show(struct seq_file *m, void *v) 1929static int s_show(struct seq_file *m, void *v)
1923{ 1930{
1924 struct trace_iterator *iter = v; 1931 struct trace_iterator *iter = v;
1932 int ret;
1925 1933
1926 if (iter->ent == NULL) { 1934 if (iter->ent == NULL) {
1927 if (iter->tr) { 1935 if (iter->tr) {
@@ -1941,9 +1949,27 @@ static int s_show(struct seq_file *m, void *v)
1941 if (!(trace_flags & TRACE_ITER_VERBOSE)) 1949 if (!(trace_flags & TRACE_ITER_VERBOSE))
1942 print_func_help_header(m); 1950 print_func_help_header(m);
1943 } 1951 }
1952 } else if (iter->leftover) {
1953 /*
1954 * If we filled the seq_file buffer earlier, we
1955 * want to just show it now.
1956 */
1957 ret = trace_print_seq(m, &iter->seq);
1958
1959 /* ret should this time be zero, but you never know */
1960 iter->leftover = ret;
1961
1944 } else { 1962 } else {
1945 print_trace_line(iter); 1963 print_trace_line(iter);
1946 trace_print_seq(m, &iter->seq); 1964 ret = trace_print_seq(m, &iter->seq);
1965 /*
1966 * If we overflow the seq_file buffer, then it will
1967 * ask us for this data again at start up.
1968 * Use that instead.
1969 * ret is 0 if seq_file write succeeded.
1970 * -1 otherwise.
1971 */
1972 iter->leftover = ret;
1947 } 1973 }
1948 1974
1949 return 0; 1975 return 0;
@@ -2440,7 +2466,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2440 return ret; 2466 return ret;
2441 } 2467 }
2442 2468
2443 filp->f_pos += cnt; 2469 *ppos += cnt;
2444 2470
2445 return cnt; 2471 return cnt;
2446} 2472}
@@ -2582,7 +2608,7 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf,
2582 } 2608 }
2583 mutex_unlock(&trace_types_lock); 2609 mutex_unlock(&trace_types_lock);
2584 2610
2585 filp->f_pos += cnt; 2611 *ppos += cnt;
2586 2612
2587 return cnt; 2613 return cnt;
2588} 2614}
@@ -2764,7 +2790,7 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2764 if (err) 2790 if (err)
2765 return err; 2791 return err;
2766 2792
2767 filp->f_pos += ret; 2793 *ppos += ret;
2768 2794
2769 return ret; 2795 return ret;
2770} 2796}
@@ -2897,6 +2923,10 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
2897 else 2923 else
2898 cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask); 2924 cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask);
2899 2925
2926
2927 if (iter->trace->pipe_close)
2928 iter->trace->pipe_close(iter);
2929
2900 mutex_unlock(&trace_types_lock); 2930 mutex_unlock(&trace_types_lock);
2901 2931
2902 free_cpumask_var(iter->started); 2932 free_cpumask_var(iter->started);
@@ -3299,7 +3329,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
3299 } 3329 }
3300 } 3330 }
3301 3331
3302 filp->f_pos += cnt; 3332 *ppos += cnt;
3303 3333
3304 /* If check pages failed, return ENOMEM */ 3334 /* If check pages failed, return ENOMEM */
3305 if (tracing_disabled) 3335 if (tracing_disabled)
@@ -3334,7 +3364,6 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
3334 size_t cnt, loff_t *fpos) 3364 size_t cnt, loff_t *fpos)
3335{ 3365{
3336 char *buf; 3366 char *buf;
3337 char *end;
3338 3367
3339 if (tracing_disabled) 3368 if (tracing_disabled)
3340 return -EINVAL; 3369 return -EINVAL;
@@ -3342,7 +3371,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
3342 if (cnt > TRACE_BUF_SIZE) 3371 if (cnt > TRACE_BUF_SIZE)
3343 cnt = TRACE_BUF_SIZE; 3372 cnt = TRACE_BUF_SIZE;
3344 3373
3345 buf = kmalloc(cnt + 1, GFP_KERNEL); 3374 buf = kmalloc(cnt + 2, GFP_KERNEL);
3346 if (buf == NULL) 3375 if (buf == NULL)
3347 return -ENOMEM; 3376 return -ENOMEM;
3348 3377
@@ -3350,14 +3379,13 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
3350 kfree(buf); 3379 kfree(buf);
3351 return -EFAULT; 3380 return -EFAULT;
3352 } 3381 }
3382 if (buf[cnt-1] != '\n') {
3383 buf[cnt] = '\n';
3384 buf[cnt+1] = '\0';
3385 } else
3386 buf[cnt] = '\0';
3353 3387
3354 /* Cut from the first nil or newline. */ 3388 cnt = mark_printk("%s", buf);
3355 buf[cnt] = '\0';
3356 end = strchr(buf, '\n');
3357 if (end)
3358 *end = '\0';
3359
3360 cnt = mark_printk("%s\n", buf);
3361 kfree(buf); 3389 kfree(buf);
3362 *fpos += cnt; 3390 *fpos += cnt;
3363 3391
@@ -3730,7 +3758,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
3730 3758
3731 s = kmalloc(sizeof(*s), GFP_KERNEL); 3759 s = kmalloc(sizeof(*s), GFP_KERNEL);
3732 if (!s) 3760 if (!s)
3733 return ENOMEM; 3761 return -ENOMEM;
3734 3762
3735 trace_seq_init(s); 3763 trace_seq_init(s);
3736 3764
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 542f45554883..a52bed2eedd8 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -11,6 +11,7 @@
11#include <linux/ftrace.h> 11#include <linux/ftrace.h>
12#include <trace/boot.h> 12#include <trace/boot.h>
13#include <linux/kmemtrace.h> 13#include <linux/kmemtrace.h>
14#include <linux/hw_breakpoint.h>
14 15
15#include <linux/trace_seq.h> 16#include <linux/trace_seq.h>
16#include <linux/ftrace_event.h> 17#include <linux/ftrace_event.h>
@@ -37,6 +38,7 @@ enum trace_type {
37 TRACE_KMEM_ALLOC, 38 TRACE_KMEM_ALLOC,
38 TRACE_KMEM_FREE, 39 TRACE_KMEM_FREE,
39 TRACE_BLK, 40 TRACE_BLK,
41 TRACE_KSYM,
40 42
41 __TRACE_LAST_TYPE, 43 __TRACE_LAST_TYPE,
42}; 44};
@@ -98,9 +100,32 @@ struct syscall_trace_enter {
98struct syscall_trace_exit { 100struct syscall_trace_exit {
99 struct trace_entry ent; 101 struct trace_entry ent;
100 int nr; 102 int nr;
101 unsigned long ret; 103 long ret;
102}; 104};
103 105
106struct kprobe_trace_entry {
107 struct trace_entry ent;
108 unsigned long ip;
109 int nargs;
110 unsigned long args[];
111};
112
113#define SIZEOF_KPROBE_TRACE_ENTRY(n) \
114 (offsetof(struct kprobe_trace_entry, args) + \
115 (sizeof(unsigned long) * (n)))
116
117struct kretprobe_trace_entry {
118 struct trace_entry ent;
119 unsigned long func;
120 unsigned long ret_ip;
121 int nargs;
122 unsigned long args[];
123};
124
125#define SIZEOF_KRETPROBE_TRACE_ENTRY(n) \
126 (offsetof(struct kretprobe_trace_entry, args) + \
127 (sizeof(unsigned long) * (n)))
128
104/* 129/*
105 * trace_flag_type is an enumeration that holds different 130 * trace_flag_type is an enumeration that holds different
106 * states when a trace occurs. These are: 131 * states when a trace occurs. These are:
@@ -209,6 +234,7 @@ extern void __ftrace_bad_type(void);
209 TRACE_KMEM_ALLOC); \ 234 TRACE_KMEM_ALLOC); \
210 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ 235 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
211 TRACE_KMEM_FREE); \ 236 TRACE_KMEM_FREE); \
237 IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\
212 __ftrace_bad_type(); \ 238 __ftrace_bad_type(); \
213 } while (0) 239 } while (0)
214 240
@@ -246,6 +272,7 @@ struct tracer_flags {
246 * @pipe_open: called when the trace_pipe file is opened 272 * @pipe_open: called when the trace_pipe file is opened
247 * @wait_pipe: override how the user waits for traces on trace_pipe 273 * @wait_pipe: override how the user waits for traces on trace_pipe
248 * @close: called when the trace file is released 274 * @close: called when the trace file is released
275 * @pipe_close: called when the trace_pipe file is released
249 * @read: override the default read callback on trace_pipe 276 * @read: override the default read callback on trace_pipe
250 * @splice_read: override the default splice_read callback on trace_pipe 277 * @splice_read: override the default splice_read callback on trace_pipe
251 * @selftest: selftest to run on boot (see trace_selftest.c) 278 * @selftest: selftest to run on boot (see trace_selftest.c)
@@ -264,6 +291,7 @@ struct tracer {
264 void (*pipe_open)(struct trace_iterator *iter); 291 void (*pipe_open)(struct trace_iterator *iter);
265 void (*wait_pipe)(struct trace_iterator *iter); 292 void (*wait_pipe)(struct trace_iterator *iter);
266 void (*close)(struct trace_iterator *iter); 293 void (*close)(struct trace_iterator *iter);
294 void (*pipe_close)(struct trace_iterator *iter);
267 ssize_t (*read)(struct trace_iterator *iter, 295 ssize_t (*read)(struct trace_iterator *iter,
268 struct file *filp, char __user *ubuf, 296 struct file *filp, char __user *ubuf,
269 size_t cnt, loff_t *ppos); 297 size_t cnt, loff_t *ppos);
@@ -364,6 +392,8 @@ int register_tracer(struct tracer *type);
364void unregister_tracer(struct tracer *type); 392void unregister_tracer(struct tracer *type);
365int is_tracing_stopped(void); 393int is_tracing_stopped(void);
366 394
395extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr);
396
367extern unsigned long nsecs_to_usecs(unsigned long nsecs); 397extern unsigned long nsecs_to_usecs(unsigned long nsecs);
368 398
369#ifdef CONFIG_TRACER_MAX_TRACE 399#ifdef CONFIG_TRACER_MAX_TRACE
@@ -438,6 +468,8 @@ extern int trace_selftest_startup_branch(struct tracer *trace,
438 struct trace_array *tr); 468 struct trace_array *tr);
439extern int trace_selftest_startup_hw_branches(struct tracer *trace, 469extern int trace_selftest_startup_hw_branches(struct tracer *trace,
440 struct trace_array *tr); 470 struct trace_array *tr);
471extern int trace_selftest_startup_ksym(struct tracer *trace,
472 struct trace_array *tr);
441#endif /* CONFIG_FTRACE_STARTUP_TEST */ 473#endif /* CONFIG_FTRACE_STARTUP_TEST */
442 474
443extern void *head_page(struct trace_array_cpu *data); 475extern void *head_page(struct trace_array_cpu *data);
@@ -483,10 +515,6 @@ static inline int ftrace_graph_addr(unsigned long addr)
483 return 0; 515 return 0;
484} 516}
485#else 517#else
486static inline int ftrace_trace_addr(unsigned long addr)
487{
488 return 1;
489}
490static inline int ftrace_graph_addr(unsigned long addr) 518static inline int ftrace_graph_addr(unsigned long addr)
491{ 519{
492 return 1; 520 return 1;
@@ -500,12 +528,12 @@ print_graph_function(struct trace_iterator *iter)
500} 528}
501#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 529#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
502 530
503extern struct pid *ftrace_pid_trace; 531extern struct list_head ftrace_pids;
504 532
505#ifdef CONFIG_FUNCTION_TRACER 533#ifdef CONFIG_FUNCTION_TRACER
506static inline int ftrace_trace_task(struct task_struct *task) 534static inline int ftrace_trace_task(struct task_struct *task)
507{ 535{
508 if (!ftrace_pid_trace) 536 if (list_empty(&ftrace_pids))
509 return 1; 537 return 1;
510 538
511 return test_tsk_trace_trace(task); 539 return test_tsk_trace_trace(task);
@@ -687,7 +715,6 @@ struct event_filter {
687 int n_preds; 715 int n_preds;
688 struct filter_pred **preds; 716 struct filter_pred **preds;
689 char *filter_string; 717 char *filter_string;
690 bool no_reset;
691}; 718};
692 719
693struct event_subsystem { 720struct event_subsystem {
@@ -699,22 +726,40 @@ struct event_subsystem {
699}; 726};
700 727
701struct filter_pred; 728struct filter_pred;
729struct regex;
702 730
703typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event, 731typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event,
704 int val1, int val2); 732 int val1, int val2);
705 733
734typedef int (*regex_match_func)(char *str, struct regex *r, int len);
735
736enum regex_type {
737 MATCH_FULL = 0,
738 MATCH_FRONT_ONLY,
739 MATCH_MIDDLE_ONLY,
740 MATCH_END_ONLY,
741};
742
743struct regex {
744 char pattern[MAX_FILTER_STR_VAL];
745 int len;
746 int field_len;
747 regex_match_func match;
748};
749
706struct filter_pred { 750struct filter_pred {
707 filter_pred_fn_t fn; 751 filter_pred_fn_t fn;
708 u64 val; 752 u64 val;
709 char str_val[MAX_FILTER_STR_VAL]; 753 struct regex regex;
710 int str_len; 754 char *field_name;
711 char *field_name; 755 int offset;
712 int offset; 756 int not;
713 int not; 757 int op;
714 int op; 758 int pop_n;
715 int pop_n;
716}; 759};
717 760
761extern enum regex_type
762filter_parse_regex(char *buff, int len, char **search, int *not);
718extern void print_event_filter(struct ftrace_event_call *call, 763extern void print_event_filter(struct ftrace_event_call *call,
719 struct trace_seq *s); 764 struct trace_seq *s);
720extern int apply_event_filter(struct ftrace_event_call *call, 765extern int apply_event_filter(struct ftrace_event_call *call,
@@ -730,7 +775,8 @@ filter_check_discard(struct ftrace_event_call *call, void *rec,
730 struct ring_buffer *buffer, 775 struct ring_buffer *buffer,
731 struct ring_buffer_event *event) 776 struct ring_buffer_event *event)
732{ 777{
733 if (unlikely(call->filter_active) && !filter_match_preds(call, rec)) { 778 if (unlikely(call->filter_active) &&
779 !filter_match_preds(call->filter, rec)) {
734 ring_buffer_discard_commit(buffer, event); 780 ring_buffer_discard_commit(buffer, event);
735 return 1; 781 return 1;
736 } 782 }
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 7a7a9fd249a9..4a194f08f88c 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -34,6 +34,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
34 struct trace_array *tr = branch_tracer; 34 struct trace_array *tr = branch_tracer;
35 struct ring_buffer_event *event; 35 struct ring_buffer_event *event;
36 struct trace_branch *entry; 36 struct trace_branch *entry;
37 struct ring_buffer *buffer;
37 unsigned long flags; 38 unsigned long flags;
38 int cpu, pc; 39 int cpu, pc;
39 const char *p; 40 const char *p;
@@ -54,7 +55,8 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
54 goto out; 55 goto out;
55 56
56 pc = preempt_count(); 57 pc = preempt_count();
57 event = trace_buffer_lock_reserve(tr, TRACE_BRANCH, 58 buffer = tr->buffer;
59 event = trace_buffer_lock_reserve(buffer, TRACE_BRANCH,
58 sizeof(*entry), flags, pc); 60 sizeof(*entry), flags, pc);
59 if (!event) 61 if (!event)
60 goto out; 62 goto out;
@@ -74,8 +76,8 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
74 entry->line = f->line; 76 entry->line = f->line;
75 entry->correct = val == expect; 77 entry->correct = val == expect;
76 78
77 if (!filter_check_discard(call, entry, tr->buffer, event)) 79 if (!filter_check_discard(call, entry, buffer, event))
78 ring_buffer_unlock_commit(tr->buffer, event); 80 ring_buffer_unlock_commit(buffer, event);
79 81
80 out: 82 out:
81 atomic_dec(&tr->data[cpu]->disabled); 83 atomic_dec(&tr->data[cpu]->disabled);
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 20c5f92e28a8..878c03f386ba 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -20,6 +20,8 @@
20#include <linux/ktime.h> 20#include <linux/ktime.h>
21#include <linux/trace_clock.h> 21#include <linux/trace_clock.h>
22 22
23#include "trace.h"
24
23/* 25/*
24 * trace_clock_local(): the simplest and least coherent tracing clock. 26 * trace_clock_local(): the simplest and least coherent tracing clock.
25 * 27 *
@@ -28,17 +30,17 @@
28 */ 30 */
29u64 notrace trace_clock_local(void) 31u64 notrace trace_clock_local(void)
30{ 32{
31 unsigned long flags;
32 u64 clock; 33 u64 clock;
34 int resched;
33 35
34 /* 36 /*
35 * sched_clock() is an architecture implemented, fast, scalable, 37 * sched_clock() is an architecture implemented, fast, scalable,
36 * lockless clock. It is not guaranteed to be coherent across 38 * lockless clock. It is not guaranteed to be coherent across
37 * CPUs, nor across CPU idle events. 39 * CPUs, nor across CPU idle events.
38 */ 40 */
39 raw_local_irq_save(flags); 41 resched = ftrace_preempt_disable();
40 clock = sched_clock(); 42 clock = sched_clock();
41 raw_local_irq_restore(flags); 43 ftrace_preempt_enable(resched);
42 44
43 return clock; 45 return clock;
44} 46}
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index ead3d724599d..c16a08f399df 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -364,3 +364,19 @@ FTRACE_ENTRY(kmem_free, kmemtrace_free_entry,
364 F_printk("type:%u call_site:%lx ptr:%p", 364 F_printk("type:%u call_site:%lx ptr:%p",
365 __entry->type_id, __entry->call_site, __entry->ptr) 365 __entry->type_id, __entry->call_site, __entry->ptr)
366); 366);
367
368FTRACE_ENTRY(ksym_trace, ksym_trace_entry,
369
370 TRACE_KSYM,
371
372 F_STRUCT(
373 __field( unsigned long, ip )
374 __field( unsigned char, type )
375 __array( char , cmd, TASK_COMM_LEN )
376 __field( unsigned long, addr )
377 ),
378
379 F_printk("ip: %pF type: %d ksym_name: %pS cmd: %s",
380 (void *)__entry->ip, (unsigned int)__entry->type,
381 (void *)__entry->addr, __entry->cmd)
382);
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index dd44b8768867..d9c60f80aa0d 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -8,17 +8,14 @@
8#include <linux/module.h> 8#include <linux/module.h>
9#include "trace.h" 9#include "trace.h"
10 10
11/*
12 * We can't use a size but a type in alloc_percpu()
13 * So let's create a dummy type that matches the desired size
14 */
15typedef struct {char buf[FTRACE_MAX_PROFILE_SIZE];} profile_buf_t;
16 11
17char *trace_profile_buf; 12char *perf_trace_buf;
18EXPORT_SYMBOL_GPL(trace_profile_buf); 13EXPORT_SYMBOL_GPL(perf_trace_buf);
14
15char *perf_trace_buf_nmi;
16EXPORT_SYMBOL_GPL(perf_trace_buf_nmi);
19 17
20char *trace_profile_buf_nmi; 18typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ;
21EXPORT_SYMBOL_GPL(trace_profile_buf_nmi);
22 19
23/* Count the events in use (per event id, not per instance) */ 20/* Count the events in use (per event id, not per instance) */
24static int total_profile_count; 21static int total_profile_count;
@@ -31,29 +28,34 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event)
31 if (atomic_inc_return(&event->profile_count)) 28 if (atomic_inc_return(&event->profile_count))
32 return 0; 29 return 0;
33 30
34 if (!total_profile_count++) { 31 if (!total_profile_count) {
35 buf = (char *)alloc_percpu(profile_buf_t); 32 buf = (char *)alloc_percpu(perf_trace_t);
36 if (!buf) 33 if (!buf)
37 goto fail_buf; 34 goto fail_buf;
38 35
39 rcu_assign_pointer(trace_profile_buf, buf); 36 rcu_assign_pointer(perf_trace_buf, buf);
40 37
41 buf = (char *)alloc_percpu(profile_buf_t); 38 buf = (char *)alloc_percpu(perf_trace_t);
42 if (!buf) 39 if (!buf)
43 goto fail_buf_nmi; 40 goto fail_buf_nmi;
44 41
45 rcu_assign_pointer(trace_profile_buf_nmi, buf); 42 rcu_assign_pointer(perf_trace_buf_nmi, buf);
46 } 43 }
47 44
48 ret = event->profile_enable(); 45 ret = event->profile_enable(event);
49 if (!ret) 46 if (!ret) {
47 total_profile_count++;
50 return 0; 48 return 0;
49 }
51 50
52 kfree(trace_profile_buf_nmi);
53fail_buf_nmi: 51fail_buf_nmi:
54 kfree(trace_profile_buf); 52 if (!total_profile_count) {
53 free_percpu(perf_trace_buf_nmi);
54 free_percpu(perf_trace_buf);
55 perf_trace_buf_nmi = NULL;
56 perf_trace_buf = NULL;
57 }
55fail_buf: 58fail_buf:
56 total_profile_count--;
57 atomic_dec(&event->profile_count); 59 atomic_dec(&event->profile_count);
58 60
59 return ret; 61 return ret;
@@ -84,14 +86,14 @@ static void ftrace_profile_disable_event(struct ftrace_event_call *event)
84 if (!atomic_add_negative(-1, &event->profile_count)) 86 if (!atomic_add_negative(-1, &event->profile_count))
85 return; 87 return;
86 88
87 event->profile_disable(); 89 event->profile_disable(event);
88 90
89 if (!--total_profile_count) { 91 if (!--total_profile_count) {
90 buf = trace_profile_buf; 92 buf = perf_trace_buf;
91 rcu_assign_pointer(trace_profile_buf, NULL); 93 rcu_assign_pointer(perf_trace_buf, NULL);
92 94
93 nmi_buf = trace_profile_buf_nmi; 95 nmi_buf = perf_trace_buf_nmi;
94 rcu_assign_pointer(trace_profile_buf_nmi, NULL); 96 rcu_assign_pointer(perf_trace_buf_nmi, NULL);
95 97
96 /* 98 /*
97 * Ensure every events in profiling have finished before 99 * Ensure every events in profiling have finished before
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index d128f65778e6..1d18315dc836 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -93,9 +93,7 @@ int trace_define_common_fields(struct ftrace_event_call *call)
93} 93}
94EXPORT_SYMBOL_GPL(trace_define_common_fields); 94EXPORT_SYMBOL_GPL(trace_define_common_fields);
95 95
96#ifdef CONFIG_MODULES 96void trace_destroy_fields(struct ftrace_event_call *call)
97
98static void trace_destroy_fields(struct ftrace_event_call *call)
99{ 97{
100 struct ftrace_event_field *field, *next; 98 struct ftrace_event_field *field, *next;
101 99
@@ -107,8 +105,6 @@ static void trace_destroy_fields(struct ftrace_event_call *call)
107 } 105 }
108} 106}
109 107
110#endif /* CONFIG_MODULES */
111
112static void ftrace_event_enable_disable(struct ftrace_event_call *call, 108static void ftrace_event_enable_disable(struct ftrace_event_call *call,
113 int enable) 109 int enable)
114{ 110{
@@ -117,14 +113,14 @@ static void ftrace_event_enable_disable(struct ftrace_event_call *call,
117 if (call->enabled) { 113 if (call->enabled) {
118 call->enabled = 0; 114 call->enabled = 0;
119 tracing_stop_cmdline_record(); 115 tracing_stop_cmdline_record();
120 call->unregfunc(call->data); 116 call->unregfunc(call);
121 } 117 }
122 break; 118 break;
123 case 1: 119 case 1:
124 if (!call->enabled) { 120 if (!call->enabled) {
125 call->enabled = 1; 121 call->enabled = 1;
126 tracing_start_cmdline_record(); 122 tracing_start_cmdline_record();
127 call->regfunc(call->data); 123 call->regfunc(call);
128 } 124 }
129 break; 125 break;
130 } 126 }
@@ -507,7 +503,7 @@ extern char *__bad_type_size(void);
507#define FIELD(type, name) \ 503#define FIELD(type, name) \
508 sizeof(type) != sizeof(field.name) ? __bad_type_size() : \ 504 sizeof(type) != sizeof(field.name) ? __bad_type_size() : \
509 #type, "common_" #name, offsetof(typeof(field), name), \ 505 #type, "common_" #name, offsetof(typeof(field), name), \
510 sizeof(field.name) 506 sizeof(field.name), is_signed_type(type)
511 507
512static int trace_write_header(struct trace_seq *s) 508static int trace_write_header(struct trace_seq *s)
513{ 509{
@@ -515,17 +511,17 @@ static int trace_write_header(struct trace_seq *s)
515 511
516 /* struct trace_entry */ 512 /* struct trace_entry */
517 return trace_seq_printf(s, 513 return trace_seq_printf(s,
518 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 514 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
519 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 515 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
520 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 516 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
521 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 517 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
522 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 518 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
523 "\n", 519 "\n",
524 FIELD(unsigned short, type), 520 FIELD(unsigned short, type),
525 FIELD(unsigned char, flags), 521 FIELD(unsigned char, flags),
526 FIELD(unsigned char, preempt_count), 522 FIELD(unsigned char, preempt_count),
527 FIELD(int, pid), 523 FIELD(int, pid),
528 FIELD(int, lock_depth)); 524 FIELD(int, lock_depth));
529} 525}
530 526
531static ssize_t 527static ssize_t
@@ -878,9 +874,9 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
878 "'%s/filter' entry\n", name); 874 "'%s/filter' entry\n", name);
879 } 875 }
880 876
881 entry = trace_create_file("enable", 0644, system->entry, 877 trace_create_file("enable", 0644, system->entry,
882 (void *)system->name, 878 (void *)system->name,
883 &ftrace_system_enable_fops); 879 &ftrace_system_enable_fops);
884 880
885 return system->entry; 881 return system->entry;
886} 882}
@@ -892,7 +888,6 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
892 const struct file_operations *filter, 888 const struct file_operations *filter,
893 const struct file_operations *format) 889 const struct file_operations *format)
894{ 890{
895 struct dentry *entry;
896 int ret; 891 int ret;
897 892
898 /* 893 /*
@@ -910,12 +905,12 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
910 } 905 }
911 906
912 if (call->regfunc) 907 if (call->regfunc)
913 entry = trace_create_file("enable", 0644, call->dir, call, 908 trace_create_file("enable", 0644, call->dir, call,
914 enable); 909 enable);
915 910
916 if (call->id && call->profile_enable) 911 if (call->id && call->profile_enable)
917 entry = trace_create_file("id", 0444, call->dir, call, 912 trace_create_file("id", 0444, call->dir, call,
918 id); 913 id);
919 914
920 if (call->define_fields) { 915 if (call->define_fields) {
921 ret = call->define_fields(call); 916 ret = call->define_fields(call);
@@ -924,41 +919,60 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
924 " events/%s\n", call->name); 919 " events/%s\n", call->name);
925 return ret; 920 return ret;
926 } 921 }
927 entry = trace_create_file("filter", 0644, call->dir, call, 922 trace_create_file("filter", 0644, call->dir, call,
928 filter); 923 filter);
929 } 924 }
930 925
931 /* A trace may not want to export its format */ 926 /* A trace may not want to export its format */
932 if (!call->show_format) 927 if (!call->show_format)
933 return 0; 928 return 0;
934 929
935 entry = trace_create_file("format", 0444, call->dir, call, 930 trace_create_file("format", 0444, call->dir, call,
936 format); 931 format);
937 932
938 return 0; 933 return 0;
939} 934}
940 935
941#define for_each_event(event, start, end) \ 936static int __trace_add_event_call(struct ftrace_event_call *call)
942 for (event = start; \ 937{
943 (unsigned long)event < (unsigned long)end; \ 938 struct dentry *d_events;
944 event++) 939 int ret;
945 940
946#ifdef CONFIG_MODULES 941 if (!call->name)
942 return -EINVAL;
947 943
948static LIST_HEAD(ftrace_module_file_list); 944 if (call->raw_init) {
945 ret = call->raw_init(call);
946 if (ret < 0) {
947 if (ret != -ENOSYS)
948 pr_warning("Could not initialize trace "
949 "events/%s\n", call->name);
950 return ret;
951 }
952 }
949 953
950/* 954 d_events = event_trace_events_dir();
951 * Modules must own their file_operations to keep up with 955 if (!d_events)
952 * reference counting. 956 return -ENOENT;
953 */ 957
954struct ftrace_module_file_ops { 958 ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
955 struct list_head list; 959 &ftrace_enable_fops, &ftrace_event_filter_fops,
956 struct module *mod; 960 &ftrace_event_format_fops);
957 struct file_operations id; 961 if (!ret)
958 struct file_operations enable; 962 list_add(&call->list, &ftrace_events);
959 struct file_operations format; 963
960 struct file_operations filter; 964 return ret;
961}; 965}
966
967/* Add an additional event_call dynamically */
968int trace_add_event_call(struct ftrace_event_call *call)
969{
970 int ret;
971 mutex_lock(&event_mutex);
972 ret = __trace_add_event_call(call);
973 mutex_unlock(&event_mutex);
974 return ret;
975}
962 976
963static void remove_subsystem_dir(const char *name) 977static void remove_subsystem_dir(const char *name)
964{ 978{
@@ -986,6 +1000,53 @@ static void remove_subsystem_dir(const char *name)
986 } 1000 }
987} 1001}
988 1002
1003/*
1004 * Must be called under locking both of event_mutex and trace_event_mutex.
1005 */
1006static void __trace_remove_event_call(struct ftrace_event_call *call)
1007{
1008 ftrace_event_enable_disable(call, 0);
1009 if (call->event)
1010 __unregister_ftrace_event(call->event);
1011 debugfs_remove_recursive(call->dir);
1012 list_del(&call->list);
1013 trace_destroy_fields(call);
1014 destroy_preds(call);
1015 remove_subsystem_dir(call->system);
1016}
1017
1018/* Remove an event_call */
1019void trace_remove_event_call(struct ftrace_event_call *call)
1020{
1021 mutex_lock(&event_mutex);
1022 down_write(&trace_event_mutex);
1023 __trace_remove_event_call(call);
1024 up_write(&trace_event_mutex);
1025 mutex_unlock(&event_mutex);
1026}
1027
1028#define for_each_event(event, start, end) \
1029 for (event = start; \
1030 (unsigned long)event < (unsigned long)end; \
1031 event++)
1032
1033#ifdef CONFIG_MODULES
1034
1035static LIST_HEAD(ftrace_module_file_list);
1036
1037/*
1038 * Modules must own their file_operations to keep up with
1039 * reference counting.
1040 */
1041struct ftrace_module_file_ops {
1042 struct list_head list;
1043 struct module *mod;
1044 struct file_operations id;
1045 struct file_operations enable;
1046 struct file_operations format;
1047 struct file_operations filter;
1048};
1049
989static struct ftrace_module_file_ops * 1050static struct ftrace_module_file_ops *
990trace_create_file_ops(struct module *mod) 1051trace_create_file_ops(struct module *mod)
991{ 1052{
@@ -1043,7 +1104,7 @@ static void trace_module_add_events(struct module *mod)
1043 if (!call->name) 1104 if (!call->name)
1044 continue; 1105 continue;
1045 if (call->raw_init) { 1106 if (call->raw_init) {
1046 ret = call->raw_init(); 1107 ret = call->raw_init(call);
1047 if (ret < 0) { 1108 if (ret < 0) {
1048 if (ret != -ENOSYS) 1109 if (ret != -ENOSYS)
1049 pr_warning("Could not initialize trace " 1110 pr_warning("Could not initialize trace "
@@ -1061,10 +1122,11 @@ static void trace_module_add_events(struct module *mod)
1061 return; 1122 return;
1062 } 1123 }
1063 call->mod = mod; 1124 call->mod = mod;
1064 list_add(&call->list, &ftrace_events); 1125 ret = event_create_dir(call, d_events,
1065 event_create_dir(call, d_events, 1126 &file_ops->id, &file_ops->enable,
1066 &file_ops->id, &file_ops->enable, 1127 &file_ops->filter, &file_ops->format);
1067 &file_ops->filter, &file_ops->format); 1128 if (!ret)
1129 list_add(&call->list, &ftrace_events);
1068 } 1130 }
1069} 1131}
1070 1132
@@ -1078,14 +1140,7 @@ static void trace_module_remove_events(struct module *mod)
1078 list_for_each_entry_safe(call, p, &ftrace_events, list) { 1140 list_for_each_entry_safe(call, p, &ftrace_events, list) {
1079 if (call->mod == mod) { 1141 if (call->mod == mod) {
1080 found = true; 1142 found = true;
1081 ftrace_event_enable_disable(call, 0); 1143 __trace_remove_event_call(call);
1082 if (call->event)
1083 __unregister_ftrace_event(call->event);
1084 debugfs_remove_recursive(call->dir);
1085 list_del(&call->list);
1086 trace_destroy_fields(call);
1087 destroy_preds(call);
1088 remove_subsystem_dir(call->system);
1089 } 1144 }
1090 } 1145 }
1091 1146
@@ -1203,7 +1258,7 @@ static __init int event_trace_init(void)
1203 if (!call->name) 1258 if (!call->name)
1204 continue; 1259 continue;
1205 if (call->raw_init) { 1260 if (call->raw_init) {
1206 ret = call->raw_init(); 1261 ret = call->raw_init(call);
1207 if (ret < 0) { 1262 if (ret < 0) {
1208 if (ret != -ENOSYS) 1263 if (ret != -ENOSYS)
1209 pr_warning("Could not initialize trace " 1264 pr_warning("Could not initialize trace "
@@ -1211,10 +1266,12 @@ static __init int event_trace_init(void)
1211 continue; 1266 continue;
1212 } 1267 }
1213 } 1268 }
1214 list_add(&call->list, &ftrace_events); 1269 ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
1215 event_create_dir(call, d_events, &ftrace_event_id_fops, 1270 &ftrace_enable_fops,
1216 &ftrace_enable_fops, &ftrace_event_filter_fops, 1271 &ftrace_event_filter_fops,
1217 &ftrace_event_format_fops); 1272 &ftrace_event_format_fops);
1273 if (!ret)
1274 list_add(&call->list, &ftrace_events);
1218 } 1275 }
1219 1276
1220 while (true) { 1277 while (true) {
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 23245785927f..50504cb228de 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -18,11 +18,10 @@
18 * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com> 18 * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
19 */ 19 */
20 20
21#include <linux/debugfs.h>
22#include <linux/uaccess.h>
23#include <linux/module.h> 21#include <linux/module.h>
24#include <linux/ctype.h> 22#include <linux/ctype.h>
25#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include <linux/perf_event.h>
26 25
27#include "trace.h" 26#include "trace.h"
28#include "trace_output.h" 27#include "trace_output.h"
@@ -31,6 +30,7 @@ enum filter_op_ids
31{ 30{
32 OP_OR, 31 OP_OR,
33 OP_AND, 32 OP_AND,
33 OP_GLOB,
34 OP_NE, 34 OP_NE,
35 OP_EQ, 35 OP_EQ,
36 OP_LT, 36 OP_LT,
@@ -48,16 +48,17 @@ struct filter_op {
48}; 48};
49 49
50static struct filter_op filter_ops[] = { 50static struct filter_op filter_ops[] = {
51 { OP_OR, "||", 1 }, 51 { OP_OR, "||", 1 },
52 { OP_AND, "&&", 2 }, 52 { OP_AND, "&&", 2 },
53 { OP_NE, "!=", 4 }, 53 { OP_GLOB, "~", 4 },
54 { OP_EQ, "==", 4 }, 54 { OP_NE, "!=", 4 },
55 { OP_LT, "<", 5 }, 55 { OP_EQ, "==", 4 },
56 { OP_LE, "<=", 5 }, 56 { OP_LT, "<", 5 },
57 { OP_GT, ">", 5 }, 57 { OP_LE, "<=", 5 },
58 { OP_GE, ">=", 5 }, 58 { OP_GT, ">", 5 },
59 { OP_NONE, "OP_NONE", 0 }, 59 { OP_GE, ">=", 5 },
60 { OP_OPEN_PAREN, "(", 0 }, 60 { OP_NONE, "OP_NONE", 0 },
61 { OP_OPEN_PAREN, "(", 0 },
61}; 62};
62 63
63enum { 64enum {
@@ -197,9 +198,9 @@ static int filter_pred_string(struct filter_pred *pred, void *event,
197 char *addr = (char *)(event + pred->offset); 198 char *addr = (char *)(event + pred->offset);
198 int cmp, match; 199 int cmp, match;
199 200
200 cmp = strncmp(addr, pred->str_val, pred->str_len); 201 cmp = pred->regex.match(addr, &pred->regex, pred->regex.field_len);
201 202
202 match = (!cmp) ^ pred->not; 203 match = cmp ^ pred->not;
203 204
204 return match; 205 return match;
205} 206}
@@ -211,9 +212,9 @@ static int filter_pred_pchar(struct filter_pred *pred, void *event,
211 char **addr = (char **)(event + pred->offset); 212 char **addr = (char **)(event + pred->offset);
212 int cmp, match; 213 int cmp, match;
213 214
214 cmp = strncmp(*addr, pred->str_val, pred->str_len); 215 cmp = pred->regex.match(*addr, &pred->regex, pred->regex.field_len);
215 216
216 match = (!cmp) ^ pred->not; 217 match = cmp ^ pred->not;
217 218
218 return match; 219 return match;
219} 220}
@@ -237,9 +238,9 @@ static int filter_pred_strloc(struct filter_pred *pred, void *event,
237 char *addr = (char *)(event + str_loc); 238 char *addr = (char *)(event + str_loc);
238 int cmp, match; 239 int cmp, match;
239 240
240 cmp = strncmp(addr, pred->str_val, str_len); 241 cmp = pred->regex.match(addr, &pred->regex, str_len);
241 242
242 match = (!cmp) ^ pred->not; 243 match = cmp ^ pred->not;
243 244
244 return match; 245 return match;
245} 246}
@@ -250,10 +251,121 @@ static int filter_pred_none(struct filter_pred *pred, void *event,
250 return 0; 251 return 0;
251} 252}
252 253
254/* Basic regex callbacks */
255static int regex_match_full(char *str, struct regex *r, int len)
256{
257 if (strncmp(str, r->pattern, len) == 0)
258 return 1;
259 return 0;
260}
261
262static int regex_match_front(char *str, struct regex *r, int len)
263{
264 if (strncmp(str, r->pattern, len) == 0)
265 return 1;
266 return 0;
267}
268
269static int regex_match_middle(char *str, struct regex *r, int len)
270{
271 if (strstr(str, r->pattern))
272 return 1;
273 return 0;
274}
275
276static int regex_match_end(char *str, struct regex *r, int len)
277{
278 char *ptr = strstr(str, r->pattern);
279
280 if (ptr && (ptr[r->len] == 0))
281 return 1;
282 return 0;
283}
284
285/**
286 * filter_parse_regex - parse a basic regex
287 * @buff: the raw regex
288 * @len: length of the regex
289 * @search: will point to the beginning of the string to compare
290 * @not: tell whether the match will have to be inverted
291 *
292 * This passes in a buffer containing a regex and this function will
293 * set search to point to the search part of the buffer and
294 * return the type of search it is (see enum above).
295 * This does modify buff.
296 *
297 * Returns enum type.
298 * search returns the pointer to use for comparison.
299 * not returns 1 if buff started with a '!'
300 * 0 otherwise.
301 */
302enum regex_type filter_parse_regex(char *buff, int len, char **search, int *not)
303{
304 int type = MATCH_FULL;
305 int i;
306
307 if (buff[0] == '!') {
308 *not = 1;
309 buff++;
310 len--;
311 } else
312 *not = 0;
313
314 *search = buff;
315
316 for (i = 0; i < len; i++) {
317 if (buff[i] == '*') {
318 if (!i) {
319 *search = buff + 1;
320 type = MATCH_END_ONLY;
321 } else {
322 if (type == MATCH_END_ONLY)
323 type = MATCH_MIDDLE_ONLY;
324 else
325 type = MATCH_FRONT_ONLY;
326 buff[i] = 0;
327 break;
328 }
329 }
330 }
331
332 return type;
333}
334
335static void filter_build_regex(struct filter_pred *pred)
336{
337 struct regex *r = &pred->regex;
338 char *search;
339 enum regex_type type = MATCH_FULL;
340 int not = 0;
341
342 if (pred->op == OP_GLOB) {
343 type = filter_parse_regex(r->pattern, r->len, &search, &not);
344 r->len = strlen(search);
345 memmove(r->pattern, search, r->len+1);
346 }
347
348 switch (type) {
349 case MATCH_FULL:
350 r->match = regex_match_full;
351 break;
352 case MATCH_FRONT_ONLY:
353 r->match = regex_match_front;
354 break;
355 case MATCH_MIDDLE_ONLY:
356 r->match = regex_match_middle;
357 break;
358 case MATCH_END_ONLY:
359 r->match = regex_match_end;
360 break;
361 }
362
363 pred->not ^= not;
364}
365
253/* return 1 if event matches, 0 otherwise (discard) */ 366/* return 1 if event matches, 0 otherwise (discard) */
254int filter_match_preds(struct ftrace_event_call *call, void *rec) 367int filter_match_preds(struct event_filter *filter, void *rec)
255{ 368{
256 struct event_filter *filter = call->filter;
257 int match, top = 0, val1 = 0, val2 = 0; 369 int match, top = 0, val1 = 0, val2 = 0;
258 int stack[MAX_FILTER_PRED]; 370 int stack[MAX_FILTER_PRED];
259 struct filter_pred *pred; 371 struct filter_pred *pred;
@@ -396,7 +508,7 @@ static void filter_clear_pred(struct filter_pred *pred)
396{ 508{
397 kfree(pred->field_name); 509 kfree(pred->field_name);
398 pred->field_name = NULL; 510 pred->field_name = NULL;
399 pred->str_len = 0; 511 pred->regex.len = 0;
400} 512}
401 513
402static int filter_set_pred(struct filter_pred *dest, 514static int filter_set_pred(struct filter_pred *dest,
@@ -426,9 +538,8 @@ static void filter_disable_preds(struct ftrace_event_call *call)
426 filter->preds[i]->fn = filter_pred_none; 538 filter->preds[i]->fn = filter_pred_none;
427} 539}
428 540
429void destroy_preds(struct ftrace_event_call *call) 541static void __free_preds(struct event_filter *filter)
430{ 542{
431 struct event_filter *filter = call->filter;
432 int i; 543 int i;
433 544
434 if (!filter) 545 if (!filter)
@@ -441,21 +552,24 @@ void destroy_preds(struct ftrace_event_call *call)
441 kfree(filter->preds); 552 kfree(filter->preds);
442 kfree(filter->filter_string); 553 kfree(filter->filter_string);
443 kfree(filter); 554 kfree(filter);
555}
556
557void destroy_preds(struct ftrace_event_call *call)
558{
559 __free_preds(call->filter);
444 call->filter = NULL; 560 call->filter = NULL;
561 call->filter_active = 0;
445} 562}
446 563
447static int init_preds(struct ftrace_event_call *call) 564static struct event_filter *__alloc_preds(void)
448{ 565{
449 struct event_filter *filter; 566 struct event_filter *filter;
450 struct filter_pred *pred; 567 struct filter_pred *pred;
451 int i; 568 int i;
452 569
453 if (call->filter) 570 filter = kzalloc(sizeof(*filter), GFP_KERNEL);
454 return 0; 571 if (!filter)
455 572 return ERR_PTR(-ENOMEM);
456 filter = call->filter = kzalloc(sizeof(*filter), GFP_KERNEL);
457 if (!call->filter)
458 return -ENOMEM;
459 573
460 filter->n_preds = 0; 574 filter->n_preds = 0;
461 575
@@ -471,12 +585,24 @@ static int init_preds(struct ftrace_event_call *call)
471 filter->preds[i] = pred; 585 filter->preds[i] = pred;
472 } 586 }
473 587
474 return 0; 588 return filter;
475 589
476oom: 590oom:
477 destroy_preds(call); 591 __free_preds(filter);
592 return ERR_PTR(-ENOMEM);
593}
594
595static int init_preds(struct ftrace_event_call *call)
596{
597 if (call->filter)
598 return 0;
599
600 call->filter_active = 0;
601 call->filter = __alloc_preds();
602 if (IS_ERR(call->filter))
603 return PTR_ERR(call->filter);
478 604
479 return -ENOMEM; 605 return 0;
480} 606}
481 607
482static int init_subsystem_preds(struct event_subsystem *system) 608static int init_subsystem_preds(struct event_subsystem *system)
@@ -499,14 +625,7 @@ static int init_subsystem_preds(struct event_subsystem *system)
499 return 0; 625 return 0;
500} 626}
501 627
502enum { 628static void filter_free_subsystem_preds(struct event_subsystem *system)
503 FILTER_DISABLE_ALL,
504 FILTER_INIT_NO_RESET,
505 FILTER_SKIP_NO_RESET,
506};
507
508static void filter_free_subsystem_preds(struct event_subsystem *system,
509 int flag)
510{ 629{
511 struct ftrace_event_call *call; 630 struct ftrace_event_call *call;
512 631
@@ -517,14 +636,6 @@ static void filter_free_subsystem_preds(struct event_subsystem *system,
517 if (strcmp(call->system, system->name) != 0) 636 if (strcmp(call->system, system->name) != 0)
518 continue; 637 continue;
519 638
520 if (flag == FILTER_INIT_NO_RESET) {
521 call->filter->no_reset = false;
522 continue;
523 }
524
525 if (flag == FILTER_SKIP_NO_RESET && call->filter->no_reset)
526 continue;
527
528 filter_disable_preds(call); 639 filter_disable_preds(call);
529 remove_filter_string(call->filter); 640 remove_filter_string(call->filter);
530 } 641 }
@@ -532,10 +643,10 @@ static void filter_free_subsystem_preds(struct event_subsystem *system,
532 643
533static int filter_add_pred_fn(struct filter_parse_state *ps, 644static int filter_add_pred_fn(struct filter_parse_state *ps,
534 struct ftrace_event_call *call, 645 struct ftrace_event_call *call,
646 struct event_filter *filter,
535 struct filter_pred *pred, 647 struct filter_pred *pred,
536 filter_pred_fn_t fn) 648 filter_pred_fn_t fn)
537{ 649{
538 struct event_filter *filter = call->filter;
539 int idx, err; 650 int idx, err;
540 651
541 if (filter->n_preds == MAX_FILTER_PRED) { 652 if (filter->n_preds == MAX_FILTER_PRED) {
@@ -550,7 +661,6 @@ static int filter_add_pred_fn(struct filter_parse_state *ps,
550 return err; 661 return err;
551 662
552 filter->n_preds++; 663 filter->n_preds++;
553 call->filter_active = 1;
554 664
555 return 0; 665 return 0;
556} 666}
@@ -575,7 +685,10 @@ static bool is_string_field(struct ftrace_event_field *field)
575 685
576static int is_legal_op(struct ftrace_event_field *field, int op) 686static int is_legal_op(struct ftrace_event_field *field, int op)
577{ 687{
578 if (is_string_field(field) && (op != OP_EQ && op != OP_NE)) 688 if (is_string_field(field) &&
689 (op != OP_EQ && op != OP_NE && op != OP_GLOB))
690 return 0;
691 if (!is_string_field(field) && op == OP_GLOB)
579 return 0; 692 return 0;
580 693
581 return 1; 694 return 1;
@@ -626,6 +739,7 @@ static filter_pred_fn_t select_comparison_fn(int op, int field_size,
626 739
627static int filter_add_pred(struct filter_parse_state *ps, 740static int filter_add_pred(struct filter_parse_state *ps,
628 struct ftrace_event_call *call, 741 struct ftrace_event_call *call,
742 struct event_filter *filter,
629 struct filter_pred *pred, 743 struct filter_pred *pred,
630 bool dry_run) 744 bool dry_run)
631{ 745{
@@ -660,21 +774,22 @@ static int filter_add_pred(struct filter_parse_state *ps,
660 } 774 }
661 775
662 if (is_string_field(field)) { 776 if (is_string_field(field)) {
663 pred->str_len = field->size; 777 filter_build_regex(pred);
664 778
665 if (field->filter_type == FILTER_STATIC_STRING) 779 if (field->filter_type == FILTER_STATIC_STRING) {
666 fn = filter_pred_string; 780 fn = filter_pred_string;
667 else if (field->filter_type == FILTER_DYN_STRING) 781 pred->regex.field_len = field->size;
782 } else if (field->filter_type == FILTER_DYN_STRING)
668 fn = filter_pred_strloc; 783 fn = filter_pred_strloc;
669 else { 784 else {
670 fn = filter_pred_pchar; 785 fn = filter_pred_pchar;
671 pred->str_len = strlen(pred->str_val); 786 pred->regex.field_len = strlen(pred->regex.pattern);
672 } 787 }
673 } else { 788 } else {
674 if (field->is_signed) 789 if (field->is_signed)
675 ret = strict_strtoll(pred->str_val, 0, &val); 790 ret = strict_strtoll(pred->regex.pattern, 0, &val);
676 else 791 else
677 ret = strict_strtoull(pred->str_val, 0, &val); 792 ret = strict_strtoull(pred->regex.pattern, 0, &val);
678 if (ret) { 793 if (ret) {
679 parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0); 794 parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0);
680 return -EINVAL; 795 return -EINVAL;
@@ -694,45 +809,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
694 809
695add_pred_fn: 810add_pred_fn:
696 if (!dry_run) 811 if (!dry_run)
697 return filter_add_pred_fn(ps, call, pred, fn); 812 return filter_add_pred_fn(ps, call, filter, pred, fn);
698 return 0;
699}
700
701static int filter_add_subsystem_pred(struct filter_parse_state *ps,
702 struct event_subsystem *system,
703 struct filter_pred *pred,
704 char *filter_string,
705 bool dry_run)
706{
707 struct ftrace_event_call *call;
708 int err = 0;
709 bool fail = true;
710
711 list_for_each_entry(call, &ftrace_events, list) {
712
713 if (!call->define_fields)
714 continue;
715
716 if (strcmp(call->system, system->name))
717 continue;
718
719 if (call->filter->no_reset)
720 continue;
721
722 err = filter_add_pred(ps, call, pred, dry_run);
723 if (err)
724 call->filter->no_reset = true;
725 else
726 fail = false;
727
728 if (!dry_run)
729 replace_filter_string(call->filter, filter_string);
730 }
731
732 if (fail) {
733 parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
734 return err;
735 }
736 return 0; 813 return 0;
737} 814}
738 815
@@ -933,8 +1010,9 @@ static void postfix_clear(struct filter_parse_state *ps)
933 1010
934 while (!list_empty(&ps->postfix)) { 1011 while (!list_empty(&ps->postfix)) {
935 elt = list_first_entry(&ps->postfix, struct postfix_elt, list); 1012 elt = list_first_entry(&ps->postfix, struct postfix_elt, list);
936 kfree(elt->operand);
937 list_del(&elt->list); 1013 list_del(&elt->list);
1014 kfree(elt->operand);
1015 kfree(elt);
938 } 1016 }
939} 1017}
940 1018
@@ -1044,8 +1122,8 @@ static struct filter_pred *create_pred(int op, char *operand1, char *operand2)
1044 return NULL; 1122 return NULL;
1045 } 1123 }
1046 1124
1047 strcpy(pred->str_val, operand2); 1125 strcpy(pred->regex.pattern, operand2);
1048 pred->str_len = strlen(operand2); 1126 pred->regex.len = strlen(pred->regex.pattern);
1049 1127
1050 pred->op = op; 1128 pred->op = op;
1051 1129
@@ -1089,8 +1167,8 @@ static int check_preds(struct filter_parse_state *ps)
1089 return 0; 1167 return 0;
1090} 1168}
1091 1169
1092static int replace_preds(struct event_subsystem *system, 1170static int replace_preds(struct ftrace_event_call *call,
1093 struct ftrace_event_call *call, 1171 struct event_filter *filter,
1094 struct filter_parse_state *ps, 1172 struct filter_parse_state *ps,
1095 char *filter_string, 1173 char *filter_string,
1096 bool dry_run) 1174 bool dry_run)
@@ -1137,11 +1215,7 @@ static int replace_preds(struct event_subsystem *system,
1137add_pred: 1215add_pred:
1138 if (!pred) 1216 if (!pred)
1139 return -ENOMEM; 1217 return -ENOMEM;
1140 if (call) 1218 err = filter_add_pred(ps, call, filter, pred, dry_run);
1141 err = filter_add_pred(ps, call, pred, false);
1142 else
1143 err = filter_add_subsystem_pred(ps, system, pred,
1144 filter_string, dry_run);
1145 filter_free_pred(pred); 1219 filter_free_pred(pred);
1146 if (err) 1220 if (err)
1147 return err; 1221 return err;
@@ -1152,10 +1226,50 @@ add_pred:
1152 return 0; 1226 return 0;
1153} 1227}
1154 1228
1155int apply_event_filter(struct ftrace_event_call *call, char *filter_string) 1229static int replace_system_preds(struct event_subsystem *system,
1230 struct filter_parse_state *ps,
1231 char *filter_string)
1156{ 1232{
1233 struct ftrace_event_call *call;
1234 bool fail = true;
1157 int err; 1235 int err;
1158 1236
1237 list_for_each_entry(call, &ftrace_events, list) {
1238 struct event_filter *filter = call->filter;
1239
1240 if (!call->define_fields)
1241 continue;
1242
1243 if (strcmp(call->system, system->name) != 0)
1244 continue;
1245
1246 /* try to see if the filter can be applied */
1247 err = replace_preds(call, filter, ps, filter_string, true);
1248 if (err)
1249 continue;
1250
1251 /* really apply the filter */
1252 filter_disable_preds(call);
1253 err = replace_preds(call, filter, ps, filter_string, false);
1254 if (err)
1255 filter_disable_preds(call);
1256 else {
1257 call->filter_active = 1;
1258 replace_filter_string(filter, filter_string);
1259 }
1260 fail = false;
1261 }
1262
1263 if (fail) {
1264 parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
1265 return -EINVAL;
1266 }
1267 return 0;
1268}
1269
1270int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1271{
1272 int err;
1159 struct filter_parse_state *ps; 1273 struct filter_parse_state *ps;
1160 1274
1161 mutex_lock(&event_mutex); 1275 mutex_lock(&event_mutex);
@@ -1167,8 +1281,7 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1167 if (!strcmp(strstrip(filter_string), "0")) { 1281 if (!strcmp(strstrip(filter_string), "0")) {
1168 filter_disable_preds(call); 1282 filter_disable_preds(call);
1169 remove_filter_string(call->filter); 1283 remove_filter_string(call->filter);
1170 mutex_unlock(&event_mutex); 1284 goto out_unlock;
1171 return 0;
1172 } 1285 }
1173 1286
1174 err = -ENOMEM; 1287 err = -ENOMEM;
@@ -1186,10 +1299,11 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1186 goto out; 1299 goto out;
1187 } 1300 }
1188 1301
1189 err = replace_preds(NULL, call, ps, filter_string, false); 1302 err = replace_preds(call, call->filter, ps, filter_string, false);
1190 if (err) 1303 if (err)
1191 append_filter_err(ps, call->filter); 1304 append_filter_err(ps, call->filter);
1192 1305 else
1306 call->filter_active = 1;
1193out: 1307out:
1194 filter_opstack_clear(ps); 1308 filter_opstack_clear(ps);
1195 postfix_clear(ps); 1309 postfix_clear(ps);
@@ -1204,7 +1318,6 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1204 char *filter_string) 1318 char *filter_string)
1205{ 1319{
1206 int err; 1320 int err;
1207
1208 struct filter_parse_state *ps; 1321 struct filter_parse_state *ps;
1209 1322
1210 mutex_lock(&event_mutex); 1323 mutex_lock(&event_mutex);
@@ -1214,10 +1327,9 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1214 goto out_unlock; 1327 goto out_unlock;
1215 1328
1216 if (!strcmp(strstrip(filter_string), "0")) { 1329 if (!strcmp(strstrip(filter_string), "0")) {
1217 filter_free_subsystem_preds(system, FILTER_DISABLE_ALL); 1330 filter_free_subsystem_preds(system);
1218 remove_filter_string(system->filter); 1331 remove_filter_string(system->filter);
1219 mutex_unlock(&event_mutex); 1332 goto out_unlock;
1220 return 0;
1221 } 1333 }
1222 1334
1223 err = -ENOMEM; 1335 err = -ENOMEM;
@@ -1234,31 +1346,87 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1234 goto out; 1346 goto out;
1235 } 1347 }
1236 1348
1237 filter_free_subsystem_preds(system, FILTER_INIT_NO_RESET); 1349 err = replace_system_preds(system, ps, filter_string);
1238 1350 if (err)
1239 /* try to see the filter can be applied to which events */
1240 err = replace_preds(system, NULL, ps, filter_string, true);
1241 if (err) {
1242 append_filter_err(ps, system->filter); 1351 append_filter_err(ps, system->filter);
1243 goto out; 1352
1353out:
1354 filter_opstack_clear(ps);
1355 postfix_clear(ps);
1356 kfree(ps);
1357out_unlock:
1358 mutex_unlock(&event_mutex);
1359
1360 return err;
1361}
1362
1363#ifdef CONFIG_EVENT_PROFILE
1364
1365void ftrace_profile_free_filter(struct perf_event *event)
1366{
1367 struct event_filter *filter = event->filter;
1368
1369 event->filter = NULL;
1370 __free_preds(filter);
1371}
1372
1373int ftrace_profile_set_filter(struct perf_event *event, int event_id,
1374 char *filter_str)
1375{
1376 int err;
1377 struct event_filter *filter;
1378 struct filter_parse_state *ps;
1379 struct ftrace_event_call *call = NULL;
1380
1381 mutex_lock(&event_mutex);
1382
1383 list_for_each_entry(call, &ftrace_events, list) {
1384 if (call->id == event_id)
1385 break;
1244 } 1386 }
1245 1387
1246 filter_free_subsystem_preds(system, FILTER_SKIP_NO_RESET); 1388 err = -EINVAL;
1389 if (!call)
1390 goto out_unlock;
1247 1391
1248 /* really apply the filter to the events */ 1392 err = -EEXIST;
1249 err = replace_preds(system, NULL, ps, filter_string, false); 1393 if (event->filter)
1250 if (err) { 1394 goto out_unlock;
1251 append_filter_err(ps, system->filter); 1395
1252 filter_free_subsystem_preds(system, 2); 1396 filter = __alloc_preds();
1397 if (IS_ERR(filter)) {
1398 err = PTR_ERR(filter);
1399 goto out_unlock;
1253 } 1400 }
1254 1401
1255out: 1402 err = -ENOMEM;
1403 ps = kzalloc(sizeof(*ps), GFP_KERNEL);
1404 if (!ps)
1405 goto free_preds;
1406
1407 parse_init(ps, filter_ops, filter_str);
1408 err = filter_parse(ps);
1409 if (err)
1410 goto free_ps;
1411
1412 err = replace_preds(call, filter, ps, filter_str, false);
1413 if (!err)
1414 event->filter = filter;
1415
1416free_ps:
1256 filter_opstack_clear(ps); 1417 filter_opstack_clear(ps);
1257 postfix_clear(ps); 1418 postfix_clear(ps);
1258 kfree(ps); 1419 kfree(ps);
1420
1421free_preds:
1422 if (err)
1423 __free_preds(filter);
1424
1259out_unlock: 1425out_unlock:
1260 mutex_unlock(&event_mutex); 1426 mutex_unlock(&event_mutex);
1261 1427
1262 return err; 1428 return err;
1263} 1429}
1264 1430
1431#endif /* CONFIG_EVENT_PROFILE */
1432
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 9753fcc61bc5..dff8c84ddf17 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -48,11 +48,11 @@
48struct ____ftrace_##name { \ 48struct ____ftrace_##name { \
49 tstruct \ 49 tstruct \
50}; \ 50}; \
51static void __used ____ftrace_check_##name(void) \ 51static void __always_unused ____ftrace_check_##name(void) \
52{ \ 52{ \
53 struct ____ftrace_##name *__entry = NULL; \ 53 struct ____ftrace_##name *__entry = NULL; \
54 \ 54 \
55 /* force cmpile-time check on F_printk() */ \ 55 /* force compile-time check on F_printk() */ \
56 printk(print); \ 56 printk(print); \
57} 57}
58 58
@@ -66,44 +66,47 @@ static void __used ____ftrace_check_##name(void) \
66#undef __field 66#undef __field
67#define __field(type, item) \ 67#define __field(type, item) \
68 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ 68 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
69 "offset:%zu;\tsize:%zu;\n", \ 69 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
70 offsetof(typeof(field), item), \ 70 offsetof(typeof(field), item), \
71 sizeof(field.item)); \ 71 sizeof(field.item), is_signed_type(type)); \
72 if (!ret) \ 72 if (!ret) \
73 return 0; 73 return 0;
74 74
75#undef __field_desc 75#undef __field_desc
76#define __field_desc(type, container, item) \ 76#define __field_desc(type, container, item) \
77 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ 77 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
78 "offset:%zu;\tsize:%zu;\n", \ 78 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
79 offsetof(typeof(field), container.item), \ 79 offsetof(typeof(field), container.item), \
80 sizeof(field.container.item)); \ 80 sizeof(field.container.item), \
81 is_signed_type(type)); \
81 if (!ret) \ 82 if (!ret) \
82 return 0; 83 return 0;
83 84
84#undef __array 85#undef __array
85#define __array(type, item, len) \ 86#define __array(type, item, len) \
86 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ 87 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
87 "offset:%zu;\tsize:%zu;\n", \ 88 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
88 offsetof(typeof(field), item), \ 89 offsetof(typeof(field), item), \
89 sizeof(field.item)); \ 90 sizeof(field.item), is_signed_type(type)); \
90 if (!ret) \ 91 if (!ret) \
91 return 0; 92 return 0;
92 93
93#undef __array_desc 94#undef __array_desc
94#define __array_desc(type, container, item, len) \ 95#define __array_desc(type, container, item, len) \
95 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ 96 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
96 "offset:%zu;\tsize:%zu;\n", \ 97 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
97 offsetof(typeof(field), container.item), \ 98 offsetof(typeof(field), container.item), \
98 sizeof(field.container.item)); \ 99 sizeof(field.container.item), \
100 is_signed_type(type)); \
99 if (!ret) \ 101 if (!ret) \
100 return 0; 102 return 0;
101 103
102#undef __dynamic_array 104#undef __dynamic_array
103#define __dynamic_array(type, item) \ 105#define __dynamic_array(type, item) \
104 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ 106 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
105 "offset:%zu;\tsize:0;\n", \ 107 "offset:%zu;\tsize:0;\tsigned:%u;\n", \
106 offsetof(typeof(field), item)); \ 108 offsetof(typeof(field), item), \
109 is_signed_type(type)); \
107 if (!ret) \ 110 if (!ret) \
108 return 0; 111 return 0;
109 112
@@ -131,7 +134,6 @@ ftrace_format_##name(struct ftrace_event_call *unused, \
131 134
132#include "trace_entries.h" 135#include "trace_entries.h"
133 136
134
135#undef __field 137#undef __field
136#define __field(type, item) \ 138#define __field(type, item) \
137 ret = trace_define_field(event_call, #type, #item, \ 139 ret = trace_define_field(event_call, #type, #item, \
@@ -193,6 +195,11 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
193 195
194#include "trace_entries.h" 196#include "trace_entries.h"
195 197
198static int ftrace_raw_init_event(struct ftrace_event_call *call)
199{
200 INIT_LIST_HEAD(&call->fields);
201 return 0;
202}
196 203
197#undef __field 204#undef __field
198#define __field(type, item) 205#define __field(type, item)
@@ -211,7 +218,6 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
211 218
212#undef FTRACE_ENTRY 219#undef FTRACE_ENTRY
213#define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \ 220#define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \
214static int ftrace_raw_init_event_##call(void); \
215 \ 221 \
216struct ftrace_event_call __used \ 222struct ftrace_event_call __used \
217__attribute__((__aligned__(4))) \ 223__attribute__((__aligned__(4))) \
@@ -219,14 +225,9 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
219 .name = #call, \ 225 .name = #call, \
220 .id = type, \ 226 .id = type, \
221 .system = __stringify(TRACE_SYSTEM), \ 227 .system = __stringify(TRACE_SYSTEM), \
222 .raw_init = ftrace_raw_init_event_##call, \ 228 .raw_init = ftrace_raw_init_event, \
223 .show_format = ftrace_format_##call, \ 229 .show_format = ftrace_format_##call, \
224 .define_fields = ftrace_define_fields_##call, \ 230 .define_fields = ftrace_define_fields_##call, \
225}; \ 231}; \
226static int ftrace_raw_init_event_##call(void) \
227{ \
228 INIT_LIST_HEAD(&event_##call.fields); \
229 return 0; \
230} \
231 232
232#include "trace_entries.h" 233#include "trace_entries.h"
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 90a6daa10962..b1342c5d37cf 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -14,9 +14,20 @@
14#include "trace.h" 14#include "trace.h"
15#include "trace_output.h" 15#include "trace_output.h"
16 16
17struct fgraph_data { 17struct fgraph_cpu_data {
18 pid_t last_pid; 18 pid_t last_pid;
19 int depth; 19 int depth;
20 int ignore;
21};
22
23struct fgraph_data {
24 struct fgraph_cpu_data *cpu_data;
25
26 /* Place to preserve last processed entry. */
27 struct ftrace_graph_ent_entry ent;
28 struct ftrace_graph_ret_entry ret;
29 int failed;
30 int cpu;
20}; 31};
21 32
22#define TRACE_GRAPH_INDENT 2 33#define TRACE_GRAPH_INDENT 2
@@ -384,7 +395,7 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data)
384 if (!data) 395 if (!data)
385 return TRACE_TYPE_HANDLED; 396 return TRACE_TYPE_HANDLED;
386 397
387 last_pid = &(per_cpu_ptr(data, cpu)->last_pid); 398 last_pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
388 399
389 if (*last_pid == pid) 400 if (*last_pid == pid)
390 return TRACE_TYPE_HANDLED; 401 return TRACE_TYPE_HANDLED;
@@ -435,26 +446,49 @@ static struct ftrace_graph_ret_entry *
435get_return_for_leaf(struct trace_iterator *iter, 446get_return_for_leaf(struct trace_iterator *iter,
436 struct ftrace_graph_ent_entry *curr) 447 struct ftrace_graph_ent_entry *curr)
437{ 448{
438 struct ring_buffer_iter *ring_iter; 449 struct fgraph_data *data = iter->private;
450 struct ring_buffer_iter *ring_iter = NULL;
439 struct ring_buffer_event *event; 451 struct ring_buffer_event *event;
440 struct ftrace_graph_ret_entry *next; 452 struct ftrace_graph_ret_entry *next;
441 453
442 ring_iter = iter->buffer_iter[iter->cpu]; 454 /*
455 * If the previous output failed to write to the seq buffer,
456 * then we just reuse the data from before.
457 */
458 if (data && data->failed) {
459 curr = &data->ent;
460 next = &data->ret;
461 } else {
443 462
444 /* First peek to compare current entry and the next one */ 463 ring_iter = iter->buffer_iter[iter->cpu];
445 if (ring_iter) 464
446 event = ring_buffer_iter_peek(ring_iter, NULL); 465 /* First peek to compare current entry and the next one */
447 else { 466 if (ring_iter)
448 /* We need to consume the current entry to see the next one */ 467 event = ring_buffer_iter_peek(ring_iter, NULL);
449 ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL); 468 else {
450 event = ring_buffer_peek(iter->tr->buffer, iter->cpu, 469 /*
451 NULL); 470 * We need to consume the current entry to see
452 } 471 * the next one.
472 */
473 ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
474 event = ring_buffer_peek(iter->tr->buffer, iter->cpu,
475 NULL);
476 }
453 477
454 if (!event) 478 if (!event)
455 return NULL; 479 return NULL;
480
481 next = ring_buffer_event_data(event);
456 482
457 next = ring_buffer_event_data(event); 483 if (data) {
484 /*
485 * Save current and next entries for later reference
486 * if the output fails.
487 */
488 data->ent = *curr;
489 data->ret = *next;
490 }
491 }
458 492
459 if (next->ent.type != TRACE_GRAPH_RET) 493 if (next->ent.type != TRACE_GRAPH_RET)
460 return NULL; 494 return NULL;
@@ -640,7 +674,7 @@ print_graph_entry_leaf(struct trace_iterator *iter,
640 674
641 if (data) { 675 if (data) {
642 int cpu = iter->cpu; 676 int cpu = iter->cpu;
643 int *depth = &(per_cpu_ptr(data, cpu)->depth); 677 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
644 678
645 /* 679 /*
646 * Comments display at + 1 to depth. Since 680 * Comments display at + 1 to depth. Since
@@ -688,7 +722,7 @@ print_graph_entry_nested(struct trace_iterator *iter,
688 722
689 if (data) { 723 if (data) {
690 int cpu = iter->cpu; 724 int cpu = iter->cpu;
691 int *depth = &(per_cpu_ptr(data, cpu)->depth); 725 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
692 726
693 *depth = call->depth; 727 *depth = call->depth;
694 } 728 }
@@ -782,19 +816,34 @@ static enum print_line_t
782print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, 816print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
783 struct trace_iterator *iter) 817 struct trace_iterator *iter)
784{ 818{
785 int cpu = iter->cpu; 819 struct fgraph_data *data = iter->private;
786 struct ftrace_graph_ent *call = &field->graph_ent; 820 struct ftrace_graph_ent *call = &field->graph_ent;
787 struct ftrace_graph_ret_entry *leaf_ret; 821 struct ftrace_graph_ret_entry *leaf_ret;
822 static enum print_line_t ret;
823 int cpu = iter->cpu;
788 824
789 if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func)) 825 if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func))
790 return TRACE_TYPE_PARTIAL_LINE; 826 return TRACE_TYPE_PARTIAL_LINE;
791 827
792 leaf_ret = get_return_for_leaf(iter, field); 828 leaf_ret = get_return_for_leaf(iter, field);
793 if (leaf_ret) 829 if (leaf_ret)
794 return print_graph_entry_leaf(iter, field, leaf_ret, s); 830 ret = print_graph_entry_leaf(iter, field, leaf_ret, s);
795 else 831 else
796 return print_graph_entry_nested(iter, field, s, cpu); 832 ret = print_graph_entry_nested(iter, field, s, cpu);
797 833
834 if (data) {
835 /*
836 * If we failed to write our output, then we need to make
837 * note of it. Because we already consumed our entry.
838 */
839 if (s->full) {
840 data->failed = 1;
841 data->cpu = cpu;
842 } else
843 data->failed = 0;
844 }
845
846 return ret;
798} 847}
799 848
800static enum print_line_t 849static enum print_line_t
@@ -810,7 +859,7 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
810 859
811 if (data) { 860 if (data) {
812 int cpu = iter->cpu; 861 int cpu = iter->cpu;
813 int *depth = &(per_cpu_ptr(data, cpu)->depth); 862 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
814 863
815 /* 864 /*
816 * Comments display at + 1 to depth. This is the 865 * Comments display at + 1 to depth. This is the
@@ -873,7 +922,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
873 int i; 922 int i;
874 923
875 if (data) 924 if (data)
876 depth = per_cpu_ptr(data, iter->cpu)->depth; 925 depth = per_cpu_ptr(data->cpu_data, iter->cpu)->depth;
877 926
878 if (print_graph_prologue(iter, s, 0, 0)) 927 if (print_graph_prologue(iter, s, 0, 0))
879 return TRACE_TYPE_PARTIAL_LINE; 928 return TRACE_TYPE_PARTIAL_LINE;
@@ -941,8 +990,33 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
941enum print_line_t 990enum print_line_t
942print_graph_function(struct trace_iterator *iter) 991print_graph_function(struct trace_iterator *iter)
943{ 992{
993 struct ftrace_graph_ent_entry *field;
994 struct fgraph_data *data = iter->private;
944 struct trace_entry *entry = iter->ent; 995 struct trace_entry *entry = iter->ent;
945 struct trace_seq *s = &iter->seq; 996 struct trace_seq *s = &iter->seq;
997 int cpu = iter->cpu;
998 int ret;
999
1000 if (data && per_cpu_ptr(data->cpu_data, cpu)->ignore) {
1001 per_cpu_ptr(data->cpu_data, cpu)->ignore = 0;
1002 return TRACE_TYPE_HANDLED;
1003 }
1004
1005 /*
1006 * If the last output failed, there's a possibility we need
1007 * to print out the missing entry which would never go out.
1008 */
1009 if (data && data->failed) {
1010 field = &data->ent;
1011 iter->cpu = data->cpu;
1012 ret = print_graph_entry(field, s, iter);
1013 if (ret == TRACE_TYPE_HANDLED && iter->cpu != cpu) {
1014 per_cpu_ptr(data->cpu_data, iter->cpu)->ignore = 1;
1015 ret = TRACE_TYPE_NO_CONSUME;
1016 }
1017 iter->cpu = cpu;
1018 return ret;
1019 }
946 1020
947 switch (entry->type) { 1021 switch (entry->type) {
948 case TRACE_GRAPH_ENT: { 1022 case TRACE_GRAPH_ENT: {
@@ -952,7 +1026,7 @@ print_graph_function(struct trace_iterator *iter)
952 * sizeof(struct ftrace_graph_ent_entry) is very small, 1026 * sizeof(struct ftrace_graph_ent_entry) is very small,
953 * it can be safely saved at the stack. 1027 * it can be safely saved at the stack.
954 */ 1028 */
955 struct ftrace_graph_ent_entry *field, saved; 1029 struct ftrace_graph_ent_entry saved;
956 trace_assign_type(field, entry); 1030 trace_assign_type(field, entry);
957 saved = *field; 1031 saved = *field;
958 return print_graph_entry(&saved, s, iter); 1032 return print_graph_entry(&saved, s, iter);
@@ -1030,31 +1104,54 @@ static void print_graph_headers(struct seq_file *s)
1030static void graph_trace_open(struct trace_iterator *iter) 1104static void graph_trace_open(struct trace_iterator *iter)
1031{ 1105{
1032 /* pid and depth on the last trace processed */ 1106 /* pid and depth on the last trace processed */
1033 struct fgraph_data *data = alloc_percpu(struct fgraph_data); 1107 struct fgraph_data *data;
1034 int cpu; 1108 int cpu;
1035 1109
1110 iter->private = NULL;
1111
1112 data = kzalloc(sizeof(*data), GFP_KERNEL);
1036 if (!data) 1113 if (!data)
1037 pr_warning("function graph tracer: not enough memory\n"); 1114 goto out_err;
1038 else 1115
1039 for_each_possible_cpu(cpu) { 1116 data->cpu_data = alloc_percpu(struct fgraph_cpu_data);
1040 pid_t *pid = &(per_cpu_ptr(data, cpu)->last_pid); 1117 if (!data->cpu_data)
1041 int *depth = &(per_cpu_ptr(data, cpu)->depth); 1118 goto out_err_free;
1042 *pid = -1; 1119
1043 *depth = 0; 1120 for_each_possible_cpu(cpu) {
1044 } 1121 pid_t *pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
1122 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
1123 int *ignore = &(per_cpu_ptr(data->cpu_data, cpu)->ignore);
1124 *pid = -1;
1125 *depth = 0;
1126 *ignore = 0;
1127 }
1045 1128
1046 iter->private = data; 1129 iter->private = data;
1130
1131 return;
1132
1133 out_err_free:
1134 kfree(data);
1135 out_err:
1136 pr_warning("function graph tracer: not enough memory\n");
1047} 1137}
1048 1138
1049static void graph_trace_close(struct trace_iterator *iter) 1139static void graph_trace_close(struct trace_iterator *iter)
1050{ 1140{
1051 free_percpu(iter->private); 1141 struct fgraph_data *data = iter->private;
1142
1143 if (data) {
1144 free_percpu(data->cpu_data);
1145 kfree(data);
1146 }
1052} 1147}
1053 1148
1054static struct tracer graph_trace __read_mostly = { 1149static struct tracer graph_trace __read_mostly = {
1055 .name = "function_graph", 1150 .name = "function_graph",
1056 .open = graph_trace_open, 1151 .open = graph_trace_open,
1152 .pipe_open = graph_trace_open,
1057 .close = graph_trace_close, 1153 .close = graph_trace_close,
1154 .pipe_close = graph_trace_close,
1058 .wait_pipe = poll_wait_pipe, 1155 .wait_pipe = poll_wait_pipe,
1059 .init = graph_trace_init, 1156 .init = graph_trace_init,
1060 .reset = graph_trace_reset, 1157 .reset = graph_trace_reset,
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
index adaf7a39d0dc..7b97000745f5 100644
--- a/kernel/trace/trace_hw_branches.c
+++ b/kernel/trace/trace_hw_branches.c
@@ -166,6 +166,7 @@ void trace_hw_branch(u64 from, u64 to)
166 struct ftrace_event_call *call = &event_hw_branch; 166 struct ftrace_event_call *call = &event_hw_branch;
167 struct trace_array *tr = hw_branch_trace; 167 struct trace_array *tr = hw_branch_trace;
168 struct ring_buffer_event *event; 168 struct ring_buffer_event *event;
169 struct ring_buffer *buf;
169 struct hw_branch_entry *entry; 170 struct hw_branch_entry *entry;
170 unsigned long irq1; 171 unsigned long irq1;
171 int cpu; 172 int cpu;
@@ -181,7 +182,8 @@ void trace_hw_branch(u64 from, u64 to)
181 if (atomic_inc_return(&tr->data[cpu]->disabled) != 1) 182 if (atomic_inc_return(&tr->data[cpu]->disabled) != 1)
182 goto out; 183 goto out;
183 184
184 event = trace_buffer_lock_reserve(tr, TRACE_HW_BRANCHES, 185 buf = tr->buffer;
186 event = trace_buffer_lock_reserve(buf, TRACE_HW_BRANCHES,
185 sizeof(*entry), 0, 0); 187 sizeof(*entry), 0, 0);
186 if (!event) 188 if (!event)
187 goto out; 189 goto out;
@@ -190,8 +192,8 @@ void trace_hw_branch(u64 from, u64 to)
190 entry->ent.type = TRACE_HW_BRANCHES; 192 entry->ent.type = TRACE_HW_BRANCHES;
191 entry->from = from; 193 entry->from = from;
192 entry->to = to; 194 entry->to = to;
193 if (!filter_check_discard(call, entry, tr->buffer, event)) 195 if (!filter_check_discard(call, entry, buf, event))
194 trace_buffer_unlock_commit(tr, event, 0, 0); 196 trace_buffer_unlock_commit(buf, event, 0, 0);
195 197
196 out: 198 out:
197 atomic_dec(&tr->data[cpu]->disabled); 199 atomic_dec(&tr->data[cpu]->disabled);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
new file mode 100644
index 000000000000..b52d397e57eb
--- /dev/null
+++ b/kernel/trace/trace_kprobe.c
@@ -0,0 +1,1542 @@
1/*
2 * Kprobes-based tracing events
3 *
4 * Created by Masami Hiramatsu <mhiramat@redhat.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#include <linux/module.h>
21#include <linux/uaccess.h>
22#include <linux/kprobes.h>
23#include <linux/seq_file.h>
24#include <linux/slab.h>
25#include <linux/smp.h>
26#include <linux/debugfs.h>
27#include <linux/types.h>
28#include <linux/string.h>
29#include <linux/ctype.h>
30#include <linux/ptrace.h>
31#include <linux/perf_event.h>
32
33#include "trace.h"
34#include "trace_output.h"
35
36#define MAX_TRACE_ARGS 128
37#define MAX_ARGSTR_LEN 63
38#define MAX_EVENT_NAME_LEN 64
39#define KPROBE_EVENT_SYSTEM "kprobes"
40
41/* Reserved field names */
42#define FIELD_STRING_IP "__probe_ip"
43#define FIELD_STRING_NARGS "__probe_nargs"
44#define FIELD_STRING_RETIP "__probe_ret_ip"
45#define FIELD_STRING_FUNC "__probe_func"
46
47const char *reserved_field_names[] = {
48 "common_type",
49 "common_flags",
50 "common_preempt_count",
51 "common_pid",
52 "common_tgid",
53 "common_lock_depth",
54 FIELD_STRING_IP,
55 FIELD_STRING_NARGS,
56 FIELD_STRING_RETIP,
57 FIELD_STRING_FUNC,
58};
59
60struct fetch_func {
61 unsigned long (*func)(struct pt_regs *, void *);
62 void *data;
63};
64
65static __kprobes unsigned long call_fetch(struct fetch_func *f,
66 struct pt_regs *regs)
67{
68 return f->func(regs, f->data);
69}
70
71/* fetch handlers */
72static __kprobes unsigned long fetch_register(struct pt_regs *regs,
73 void *offset)
74{
75 return regs_get_register(regs, (unsigned int)((unsigned long)offset));
76}
77
78static __kprobes unsigned long fetch_stack(struct pt_regs *regs,
79 void *num)
80{
81 return regs_get_kernel_stack_nth(regs,
82 (unsigned int)((unsigned long)num));
83}
84
85static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr)
86{
87 unsigned long retval;
88
89 if (probe_kernel_address(addr, retval))
90 return 0;
91 return retval;
92}
93
94static __kprobes unsigned long fetch_argument(struct pt_regs *regs, void *num)
95{
96 return regs_get_argument_nth(regs, (unsigned int)((unsigned long)num));
97}
98
99static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs,
100 void *dummy)
101{
102 return regs_return_value(regs);
103}
104
105static __kprobes unsigned long fetch_stack_address(struct pt_regs *regs,
106 void *dummy)
107{
108 return kernel_stack_pointer(regs);
109}
110
111/* Memory fetching by symbol */
112struct symbol_cache {
113 char *symbol;
114 long offset;
115 unsigned long addr;
116};
117
118static unsigned long update_symbol_cache(struct symbol_cache *sc)
119{
120 sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);
121 if (sc->addr)
122 sc->addr += sc->offset;
123 return sc->addr;
124}
125
126static void free_symbol_cache(struct symbol_cache *sc)
127{
128 kfree(sc->symbol);
129 kfree(sc);
130}
131
132static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
133{
134 struct symbol_cache *sc;
135
136 if (!sym || strlen(sym) == 0)
137 return NULL;
138 sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
139 if (!sc)
140 return NULL;
141
142 sc->symbol = kstrdup(sym, GFP_KERNEL);
143 if (!sc->symbol) {
144 kfree(sc);
145 return NULL;
146 }
147 sc->offset = offset;
148
149 update_symbol_cache(sc);
150 return sc;
151}
152
153static __kprobes unsigned long fetch_symbol(struct pt_regs *regs, void *data)
154{
155 struct symbol_cache *sc = data;
156
157 if (sc->addr)
158 return fetch_memory(regs, (void *)sc->addr);
159 else
160 return 0;
161}
162
163/* Special indirect memory access interface */
164struct indirect_fetch_data {
165 struct fetch_func orig;
166 long offset;
167};
168
169static __kprobes unsigned long fetch_indirect(struct pt_regs *regs, void *data)
170{
171 struct indirect_fetch_data *ind = data;
172 unsigned long addr;
173
174 addr = call_fetch(&ind->orig, regs);
175 if (addr) {
176 addr += ind->offset;
177 return fetch_memory(regs, (void *)addr);
178 } else
179 return 0;
180}
181
182static __kprobes void free_indirect_fetch_data(struct indirect_fetch_data *data)
183{
184 if (data->orig.func == fetch_indirect)
185 free_indirect_fetch_data(data->orig.data);
186 else if (data->orig.func == fetch_symbol)
187 free_symbol_cache(data->orig.data);
188 kfree(data);
189}
190
191/**
192 * Kprobe event core functions
193 */
194
195struct probe_arg {
196 struct fetch_func fetch;
197 const char *name;
198};
199
200/* Flags for trace_probe */
201#define TP_FLAG_TRACE 1
202#define TP_FLAG_PROFILE 2
203
204struct trace_probe {
205 struct list_head list;
206 struct kretprobe rp; /* Use rp.kp for kprobe use */
207 unsigned long nhit;
208 unsigned int flags; /* For TP_FLAG_* */
209 const char *symbol; /* symbol name */
210 struct ftrace_event_call call;
211 struct trace_event event;
212 unsigned int nr_args;
213 struct probe_arg args[];
214};
215
216#define SIZEOF_TRACE_PROBE(n) \
217 (offsetof(struct trace_probe, args) + \
218 (sizeof(struct probe_arg) * (n)))
219
220static __kprobes int probe_is_return(struct trace_probe *tp)
221{
222 return tp->rp.handler != NULL;
223}
224
225static __kprobes const char *probe_symbol(struct trace_probe *tp)
226{
227 return tp->symbol ? tp->symbol : "unknown";
228}
229
230static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff)
231{
232 int ret = -EINVAL;
233
234 if (ff->func == fetch_argument)
235 ret = snprintf(buf, n, "$arg%lu", (unsigned long)ff->data);
236 else if (ff->func == fetch_register) {
237 const char *name;
238 name = regs_query_register_name((unsigned int)((long)ff->data));
239 ret = snprintf(buf, n, "%%%s", name);
240 } else if (ff->func == fetch_stack)
241 ret = snprintf(buf, n, "$stack%lu", (unsigned long)ff->data);
242 else if (ff->func == fetch_memory)
243 ret = snprintf(buf, n, "@0x%p", ff->data);
244 else if (ff->func == fetch_symbol) {
245 struct symbol_cache *sc = ff->data;
246 if (sc->offset)
247 ret = snprintf(buf, n, "@%s%+ld", sc->symbol,
248 sc->offset);
249 else
250 ret = snprintf(buf, n, "@%s", sc->symbol);
251 } else if (ff->func == fetch_retvalue)
252 ret = snprintf(buf, n, "$retval");
253 else if (ff->func == fetch_stack_address)
254 ret = snprintf(buf, n, "$stack");
255 else if (ff->func == fetch_indirect) {
256 struct indirect_fetch_data *id = ff->data;
257 size_t l = 0;
258 ret = snprintf(buf, n, "%+ld(", id->offset);
259 if (ret >= n)
260 goto end;
261 l += ret;
262 ret = probe_arg_string(buf + l, n - l, &id->orig);
263 if (ret < 0)
264 goto end;
265 l += ret;
266 ret = snprintf(buf + l, n - l, ")");
267 ret += l;
268 }
269end:
270 if (ret >= n)
271 return -ENOSPC;
272 return ret;
273}
274
275static int register_probe_event(struct trace_probe *tp);
276static void unregister_probe_event(struct trace_probe *tp);
277
278static DEFINE_MUTEX(probe_lock);
279static LIST_HEAD(probe_list);
280
281static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
282static int kretprobe_dispatcher(struct kretprobe_instance *ri,
283 struct pt_regs *regs);
284
285/*
286 * Allocate new trace_probe and initialize it (including kprobes).
287 */
288static struct trace_probe *alloc_trace_probe(const char *group,
289 const char *event,
290 void *addr,
291 const char *symbol,
292 unsigned long offs,
293 int nargs, int is_return)
294{
295 struct trace_probe *tp;
296
297 tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL);
298 if (!tp)
299 return ERR_PTR(-ENOMEM);
300
301 if (symbol) {
302 tp->symbol = kstrdup(symbol, GFP_KERNEL);
303 if (!tp->symbol)
304 goto error;
305 tp->rp.kp.symbol_name = tp->symbol;
306 tp->rp.kp.offset = offs;
307 } else
308 tp->rp.kp.addr = addr;
309
310 if (is_return)
311 tp->rp.handler = kretprobe_dispatcher;
312 else
313 tp->rp.kp.pre_handler = kprobe_dispatcher;
314
315 if (!event)
316 goto error;
317 tp->call.name = kstrdup(event, GFP_KERNEL);
318 if (!tp->call.name)
319 goto error;
320
321 if (!group)
322 goto error;
323 tp->call.system = kstrdup(group, GFP_KERNEL);
324 if (!tp->call.system)
325 goto error;
326
327 INIT_LIST_HEAD(&tp->list);
328 return tp;
329error:
330 kfree(tp->call.name);
331 kfree(tp->symbol);
332 kfree(tp);
333 return ERR_PTR(-ENOMEM);
334}
335
336static void free_probe_arg(struct probe_arg *arg)
337{
338 if (arg->fetch.func == fetch_symbol)
339 free_symbol_cache(arg->fetch.data);
340 else if (arg->fetch.func == fetch_indirect)
341 free_indirect_fetch_data(arg->fetch.data);
342 kfree(arg->name);
343}
344
345static void free_trace_probe(struct trace_probe *tp)
346{
347 int i;
348
349 for (i = 0; i < tp->nr_args; i++)
350 free_probe_arg(&tp->args[i]);
351
352 kfree(tp->call.system);
353 kfree(tp->call.name);
354 kfree(tp->symbol);
355 kfree(tp);
356}
357
358static struct trace_probe *find_probe_event(const char *event,
359 const char *group)
360{
361 struct trace_probe *tp;
362
363 list_for_each_entry(tp, &probe_list, list)
364 if (strcmp(tp->call.name, event) == 0 &&
365 strcmp(tp->call.system, group) == 0)
366 return tp;
367 return NULL;
368}
369
370/* Unregister a trace_probe and probe_event: call with locking probe_lock */
371static void unregister_trace_probe(struct trace_probe *tp)
372{
373 if (probe_is_return(tp))
374 unregister_kretprobe(&tp->rp);
375 else
376 unregister_kprobe(&tp->rp.kp);
377 list_del(&tp->list);
378 unregister_probe_event(tp);
379}
380
381/* Register a trace_probe and probe_event */
382static int register_trace_probe(struct trace_probe *tp)
383{
384 struct trace_probe *old_tp;
385 int ret;
386
387 mutex_lock(&probe_lock);
388
389 /* register as an event */
390 old_tp = find_probe_event(tp->call.name, tp->call.system);
391 if (old_tp) {
392 /* delete old event */
393 unregister_trace_probe(old_tp);
394 free_trace_probe(old_tp);
395 }
396 ret = register_probe_event(tp);
397 if (ret) {
398 pr_warning("Faild to register probe event(%d)\n", ret);
399 goto end;
400 }
401
402 tp->rp.kp.flags |= KPROBE_FLAG_DISABLED;
403 if (probe_is_return(tp))
404 ret = register_kretprobe(&tp->rp);
405 else
406 ret = register_kprobe(&tp->rp.kp);
407
408 if (ret) {
409 pr_warning("Could not insert probe(%d)\n", ret);
410 if (ret == -EILSEQ) {
411 pr_warning("Probing address(0x%p) is not an "
412 "instruction boundary.\n",
413 tp->rp.kp.addr);
414 ret = -EINVAL;
415 }
416 unregister_probe_event(tp);
417 } else
418 list_add_tail(&tp->list, &probe_list);
419end:
420 mutex_unlock(&probe_lock);
421 return ret;
422}
423
424/* Split symbol and offset. */
425static int split_symbol_offset(char *symbol, unsigned long *offset)
426{
427 char *tmp;
428 int ret;
429
430 if (!offset)
431 return -EINVAL;
432
433 tmp = strchr(symbol, '+');
434 if (tmp) {
435 /* skip sign because strict_strtol doesn't accept '+' */
436 ret = strict_strtoul(tmp + 1, 0, offset);
437 if (ret)
438 return ret;
439 *tmp = '\0';
440 } else
441 *offset = 0;
442 return 0;
443}
444
445#define PARAM_MAX_ARGS 16
446#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))
447
448static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return)
449{
450 int ret = 0;
451 unsigned long param;
452
453 if (strcmp(arg, "retval") == 0) {
454 if (is_return) {
455 ff->func = fetch_retvalue;
456 ff->data = NULL;
457 } else
458 ret = -EINVAL;
459 } else if (strncmp(arg, "stack", 5) == 0) {
460 if (arg[5] == '\0') {
461 ff->func = fetch_stack_address;
462 ff->data = NULL;
463 } else if (isdigit(arg[5])) {
464 ret = strict_strtoul(arg + 5, 10, &param);
465 if (ret || param > PARAM_MAX_STACK)
466 ret = -EINVAL;
467 else {
468 ff->func = fetch_stack;
469 ff->data = (void *)param;
470 }
471 } else
472 ret = -EINVAL;
473 } else if (strncmp(arg, "arg", 3) == 0 && isdigit(arg[3])) {
474 ret = strict_strtoul(arg + 3, 10, &param);
475 if (ret || param > PARAM_MAX_ARGS)
476 ret = -EINVAL;
477 else {
478 ff->func = fetch_argument;
479 ff->data = (void *)param;
480 }
481 } else
482 ret = -EINVAL;
483 return ret;
484}
485
486/* Recursive argument parser */
487static int __parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)
488{
489 int ret = 0;
490 unsigned long param;
491 long offset;
492 char *tmp;
493
494 switch (arg[0]) {
495 case '$':
496 ret = parse_probe_vars(arg + 1, ff, is_return);
497 break;
498 case '%': /* named register */
499 ret = regs_query_register_offset(arg + 1);
500 if (ret >= 0) {
501 ff->func = fetch_register;
502 ff->data = (void *)(unsigned long)ret;
503 ret = 0;
504 }
505 break;
506 case '@': /* memory or symbol */
507 if (isdigit(arg[1])) {
508 ret = strict_strtoul(arg + 1, 0, &param);
509 if (ret)
510 break;
511 ff->func = fetch_memory;
512 ff->data = (void *)param;
513 } else {
514 ret = split_symbol_offset(arg + 1, &offset);
515 if (ret)
516 break;
517 ff->data = alloc_symbol_cache(arg + 1, offset);
518 if (ff->data)
519 ff->func = fetch_symbol;
520 else
521 ret = -EINVAL;
522 }
523 break;
524 case '+': /* indirect memory */
525 case '-':
526 tmp = strchr(arg, '(');
527 if (!tmp) {
528 ret = -EINVAL;
529 break;
530 }
531 *tmp = '\0';
532 ret = strict_strtol(arg + 1, 0, &offset);
533 if (ret)
534 break;
535 if (arg[0] == '-')
536 offset = -offset;
537 arg = tmp + 1;
538 tmp = strrchr(arg, ')');
539 if (tmp) {
540 struct indirect_fetch_data *id;
541 *tmp = '\0';
542 id = kzalloc(sizeof(struct indirect_fetch_data),
543 GFP_KERNEL);
544 if (!id)
545 return -ENOMEM;
546 id->offset = offset;
547 ret = __parse_probe_arg(arg, &id->orig, is_return);
548 if (ret)
549 kfree(id);
550 else {
551 ff->func = fetch_indirect;
552 ff->data = (void *)id;
553 }
554 } else
555 ret = -EINVAL;
556 break;
557 default:
558 /* TODO: support custom handler */
559 ret = -EINVAL;
560 }
561 return ret;
562}
563
564/* String length checking wrapper */
565static int parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)
566{
567 if (strlen(arg) > MAX_ARGSTR_LEN) {
568 pr_info("Argument is too long.: %s\n", arg);
569 return -ENOSPC;
570 }
571 return __parse_probe_arg(arg, ff, is_return);
572}
573
574/* Return 1 if name is reserved or already used by another argument */
575static int conflict_field_name(const char *name,
576 struct probe_arg *args, int narg)
577{
578 int i;
579 for (i = 0; i < ARRAY_SIZE(reserved_field_names); i++)
580 if (strcmp(reserved_field_names[i], name) == 0)
581 return 1;
582 for (i = 0; i < narg; i++)
583 if (strcmp(args[i].name, name) == 0)
584 return 1;
585 return 0;
586}
587
588static int create_trace_probe(int argc, char **argv)
589{
590 /*
591 * Argument syntax:
592 * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS]
593 * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS]
594 * Fetch args:
595 * $argN : fetch Nth of function argument. (N:0-)
596 * $retval : fetch return value
597 * $stack : fetch stack address
598 * $stackN : fetch Nth of stack (N:0-)
599 * @ADDR : fetch memory at ADDR (ADDR should be in kernel)
600 * @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
601 * %REG : fetch register REG
602 * Indirect memory fetch:
603 * +|-offs(ARG) : fetch memory at ARG +|- offs address.
604 * Alias name of args:
605 * NAME=FETCHARG : set NAME as alias of FETCHARG.
606 */
607 struct trace_probe *tp;
608 int i, ret = 0;
609 int is_return = 0, is_delete = 0;
610 char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL;
611 unsigned long offset = 0;
612 void *addr = NULL;
613 char buf[MAX_EVENT_NAME_LEN];
614
615 /* argc must be >= 1 */
616 if (argv[0][0] == 'p')
617 is_return = 0;
618 else if (argv[0][0] == 'r')
619 is_return = 1;
620 else if (argv[0][0] == '-')
621 is_delete = 1;
622 else {
623 pr_info("Probe definition must be started with 'p', 'r' or"
624 " '-'.\n");
625 return -EINVAL;
626 }
627
628 if (argv[0][1] == ':') {
629 event = &argv[0][2];
630 if (strchr(event, '/')) {
631 group = event;
632 event = strchr(group, '/') + 1;
633 event[-1] = '\0';
634 if (strlen(group) == 0) {
635 pr_info("Group name is not specifiled\n");
636 return -EINVAL;
637 }
638 }
639 if (strlen(event) == 0) {
640 pr_info("Event name is not specifiled\n");
641 return -EINVAL;
642 }
643 }
644 if (!group)
645 group = KPROBE_EVENT_SYSTEM;
646
647 if (is_delete) {
648 if (!event) {
649 pr_info("Delete command needs an event name.\n");
650 return -EINVAL;
651 }
652 tp = find_probe_event(event, group);
653 if (!tp) {
654 pr_info("Event %s/%s doesn't exist.\n", group, event);
655 return -ENOENT;
656 }
657 /* delete an event */
658 unregister_trace_probe(tp);
659 free_trace_probe(tp);
660 return 0;
661 }
662
663 if (argc < 2) {
664 pr_info("Probe point is not specified.\n");
665 return -EINVAL;
666 }
667 if (isdigit(argv[1][0])) {
668 if (is_return) {
669 pr_info("Return probe point must be a symbol.\n");
670 return -EINVAL;
671 }
672 /* an address specified */
673 ret = strict_strtoul(&argv[0][2], 0, (unsigned long *)&addr);
674 if (ret) {
675 pr_info("Failed to parse address.\n");
676 return ret;
677 }
678 } else {
679 /* a symbol specified */
680 symbol = argv[1];
681 /* TODO: support .init module functions */
682 ret = split_symbol_offset(symbol, &offset);
683 if (ret) {
684 pr_info("Failed to parse symbol.\n");
685 return ret;
686 }
687 if (offset && is_return) {
688 pr_info("Return probe must be used without offset.\n");
689 return -EINVAL;
690 }
691 }
692 argc -= 2; argv += 2;
693
694 /* setup a probe */
695 if (!event) {
696 /* Make a new event name */
697 if (symbol)
698 snprintf(buf, MAX_EVENT_NAME_LEN, "%c@%s%+ld",
699 is_return ? 'r' : 'p', symbol, offset);
700 else
701 snprintf(buf, MAX_EVENT_NAME_LEN, "%c@0x%p",
702 is_return ? 'r' : 'p', addr);
703 event = buf;
704 }
705 tp = alloc_trace_probe(group, event, addr, symbol, offset, argc,
706 is_return);
707 if (IS_ERR(tp)) {
708 pr_info("Failed to allocate trace_probe.(%d)\n",
709 (int)PTR_ERR(tp));
710 return PTR_ERR(tp);
711 }
712
713 /* parse arguments */
714 ret = 0;
715 for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
716 /* Parse argument name */
717 arg = strchr(argv[i], '=');
718 if (arg)
719 *arg++ = '\0';
720 else
721 arg = argv[i];
722
723 if (conflict_field_name(argv[i], tp->args, i)) {
724 pr_info("Argument%d name '%s' conflicts with "
725 "another field.\n", i, argv[i]);
726 ret = -EINVAL;
727 goto error;
728 }
729
730 tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
731 if (!tp->args[i].name) {
732 pr_info("Failed to allocate argument%d name '%s'.\n",
733 i, argv[i]);
734 ret = -ENOMEM;
735 goto error;
736 }
737
738 /* Parse fetch argument */
739 ret = parse_probe_arg(arg, &tp->args[i].fetch, is_return);
740 if (ret) {
741 pr_info("Parse error at argument%d. (%d)\n", i, ret);
742 kfree(tp->args[i].name);
743 goto error;
744 }
745
746 tp->nr_args++;
747 }
748
749 ret = register_trace_probe(tp);
750 if (ret)
751 goto error;
752 return 0;
753
754error:
755 free_trace_probe(tp);
756 return ret;
757}
758
759static void cleanup_all_probes(void)
760{
761 struct trace_probe *tp;
762
763 mutex_lock(&probe_lock);
764 /* TODO: Use batch unregistration */
765 while (!list_empty(&probe_list)) {
766 tp = list_entry(probe_list.next, struct trace_probe, list);
767 unregister_trace_probe(tp);
768 free_trace_probe(tp);
769 }
770 mutex_unlock(&probe_lock);
771}
772
773
774/* Probes listing interfaces */
775static void *probes_seq_start(struct seq_file *m, loff_t *pos)
776{
777 mutex_lock(&probe_lock);
778 return seq_list_start(&probe_list, *pos);
779}
780
781static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
782{
783 return seq_list_next(v, &probe_list, pos);
784}
785
786static void probes_seq_stop(struct seq_file *m, void *v)
787{
788 mutex_unlock(&probe_lock);
789}
790
791static int probes_seq_show(struct seq_file *m, void *v)
792{
793 struct trace_probe *tp = v;
794 int i, ret;
795 char buf[MAX_ARGSTR_LEN + 1];
796
797 seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p');
798 seq_printf(m, ":%s/%s", tp->call.system, tp->call.name);
799
800 if (!tp->symbol)
801 seq_printf(m, " 0x%p", tp->rp.kp.addr);
802 else if (tp->rp.kp.offset)
803 seq_printf(m, " %s+%u", probe_symbol(tp), tp->rp.kp.offset);
804 else
805 seq_printf(m, " %s", probe_symbol(tp));
806
807 for (i = 0; i < tp->nr_args; i++) {
808 ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i].fetch);
809 if (ret < 0) {
810 pr_warning("Argument%d decoding error(%d).\n", i, ret);
811 return ret;
812 }
813 seq_printf(m, " %s=%s", tp->args[i].name, buf);
814 }
815 seq_printf(m, "\n");
816 return 0;
817}
818
819static const struct seq_operations probes_seq_op = {
820 .start = probes_seq_start,
821 .next = probes_seq_next,
822 .stop = probes_seq_stop,
823 .show = probes_seq_show
824};
825
826static int probes_open(struct inode *inode, struct file *file)
827{
828 if ((file->f_mode & FMODE_WRITE) &&
829 (file->f_flags & O_TRUNC))
830 cleanup_all_probes();
831
832 return seq_open(file, &probes_seq_op);
833}
834
835static int command_trace_probe(const char *buf)
836{
837 char **argv;
838 int argc = 0, ret = 0;
839
840 argv = argv_split(GFP_KERNEL, buf, &argc);
841 if (!argv)
842 return -ENOMEM;
843
844 if (argc)
845 ret = create_trace_probe(argc, argv);
846
847 argv_free(argv);
848 return ret;
849}
850
851#define WRITE_BUFSIZE 128
852
853static ssize_t probes_write(struct file *file, const char __user *buffer,
854 size_t count, loff_t *ppos)
855{
856 char *kbuf, *tmp;
857 int ret;
858 size_t done;
859 size_t size;
860
861 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
862 if (!kbuf)
863 return -ENOMEM;
864
865 ret = done = 0;
866 while (done < count) {
867 size = count - done;
868 if (size >= WRITE_BUFSIZE)
869 size = WRITE_BUFSIZE - 1;
870 if (copy_from_user(kbuf, buffer + done, size)) {
871 ret = -EFAULT;
872 goto out;
873 }
874 kbuf[size] = '\0';
875 tmp = strchr(kbuf, '\n');
876 if (tmp) {
877 *tmp = '\0';
878 size = tmp - kbuf + 1;
879 } else if (done + size < count) {
880 pr_warning("Line length is too long: "
881 "Should be less than %d.", WRITE_BUFSIZE);
882 ret = -EINVAL;
883 goto out;
884 }
885 done += size;
886 /* Remove comments */
887 tmp = strchr(kbuf, '#');
888 if (tmp)
889 *tmp = '\0';
890
891 ret = command_trace_probe(kbuf);
892 if (ret)
893 goto out;
894 }
895 ret = done;
896out:
897 kfree(kbuf);
898 return ret;
899}
900
901static const struct file_operations kprobe_events_ops = {
902 .owner = THIS_MODULE,
903 .open = probes_open,
904 .read = seq_read,
905 .llseek = seq_lseek,
906 .release = seq_release,
907 .write = probes_write,
908};
909
910/* Probes profiling interfaces */
911static int probes_profile_seq_show(struct seq_file *m, void *v)
912{
913 struct trace_probe *tp = v;
914
915 seq_printf(m, " %-44s %15lu %15lu\n", tp->call.name, tp->nhit,
916 tp->rp.kp.nmissed);
917
918 return 0;
919}
920
921static const struct seq_operations profile_seq_op = {
922 .start = probes_seq_start,
923 .next = probes_seq_next,
924 .stop = probes_seq_stop,
925 .show = probes_profile_seq_show
926};
927
928static int profile_open(struct inode *inode, struct file *file)
929{
930 return seq_open(file, &profile_seq_op);
931}
932
933static const struct file_operations kprobe_profile_ops = {
934 .owner = THIS_MODULE,
935 .open = profile_open,
936 .read = seq_read,
937 .llseek = seq_lseek,
938 .release = seq_release,
939};
940
941/* Kprobe handler */
942static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
943{
944 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
945 struct kprobe_trace_entry *entry;
946 struct ring_buffer_event *event;
947 struct ring_buffer *buffer;
948 int size, i, pc;
949 unsigned long irq_flags;
950 struct ftrace_event_call *call = &tp->call;
951
952 tp->nhit++;
953
954 local_save_flags(irq_flags);
955 pc = preempt_count();
956
957 size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
958
959 event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
960 irq_flags, pc);
961 if (!event)
962 return 0;
963
964 entry = ring_buffer_event_data(event);
965 entry->nargs = tp->nr_args;
966 entry->ip = (unsigned long)kp->addr;
967 for (i = 0; i < tp->nr_args; i++)
968 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
969
970 if (!filter_current_check_discard(buffer, call, entry, event))
971 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
972 return 0;
973}
974
975/* Kretprobe handler */
976static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
977 struct pt_regs *regs)
978{
979 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
980 struct kretprobe_trace_entry *entry;
981 struct ring_buffer_event *event;
982 struct ring_buffer *buffer;
983 int size, i, pc;
984 unsigned long irq_flags;
985 struct ftrace_event_call *call = &tp->call;
986
987 local_save_flags(irq_flags);
988 pc = preempt_count();
989
990 size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
991
992 event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
993 irq_flags, pc);
994 if (!event)
995 return 0;
996
997 entry = ring_buffer_event_data(event);
998 entry->nargs = tp->nr_args;
999 entry->func = (unsigned long)tp->rp.kp.addr;
1000 entry->ret_ip = (unsigned long)ri->ret_addr;
1001 for (i = 0; i < tp->nr_args; i++)
1002 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1003
1004 if (!filter_current_check_discard(buffer, call, entry, event))
1005 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
1006
1007 return 0;
1008}
1009
1010/* Event entry printers */
1011enum print_line_t
1012print_kprobe_event(struct trace_iterator *iter, int flags)
1013{
1014 struct kprobe_trace_entry *field;
1015 struct trace_seq *s = &iter->seq;
1016 struct trace_event *event;
1017 struct trace_probe *tp;
1018 int i;
1019
1020 field = (struct kprobe_trace_entry *)iter->ent;
1021 event = ftrace_find_event(field->ent.type);
1022 tp = container_of(event, struct trace_probe, event);
1023
1024 if (!trace_seq_printf(s, "%s: (", tp->call.name))
1025 goto partial;
1026
1027 if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
1028 goto partial;
1029
1030 if (!trace_seq_puts(s, ")"))
1031 goto partial;
1032
1033 for (i = 0; i < field->nargs; i++)
1034 if (!trace_seq_printf(s, " %s=%lx",
1035 tp->args[i].name, field->args[i]))
1036 goto partial;
1037
1038 if (!trace_seq_puts(s, "\n"))
1039 goto partial;
1040
1041 return TRACE_TYPE_HANDLED;
1042partial:
1043 return TRACE_TYPE_PARTIAL_LINE;
1044}
1045
1046enum print_line_t
1047print_kretprobe_event(struct trace_iterator *iter, int flags)
1048{
1049 struct kretprobe_trace_entry *field;
1050 struct trace_seq *s = &iter->seq;
1051 struct trace_event *event;
1052 struct trace_probe *tp;
1053 int i;
1054
1055 field = (struct kretprobe_trace_entry *)iter->ent;
1056 event = ftrace_find_event(field->ent.type);
1057 tp = container_of(event, struct trace_probe, event);
1058
1059 if (!trace_seq_printf(s, "%s: (", tp->call.name))
1060 goto partial;
1061
1062 if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
1063 goto partial;
1064
1065 if (!trace_seq_puts(s, " <- "))
1066 goto partial;
1067
1068 if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
1069 goto partial;
1070
1071 if (!trace_seq_puts(s, ")"))
1072 goto partial;
1073
1074 for (i = 0; i < field->nargs; i++)
1075 if (!trace_seq_printf(s, " %s=%lx",
1076 tp->args[i].name, field->args[i]))
1077 goto partial;
1078
1079 if (!trace_seq_puts(s, "\n"))
1080 goto partial;
1081
1082 return TRACE_TYPE_HANDLED;
1083partial:
1084 return TRACE_TYPE_PARTIAL_LINE;
1085}
1086
1087static int probe_event_enable(struct ftrace_event_call *call)
1088{
1089 struct trace_probe *tp = (struct trace_probe *)call->data;
1090
1091 tp->flags |= TP_FLAG_TRACE;
1092 if (probe_is_return(tp))
1093 return enable_kretprobe(&tp->rp);
1094 else
1095 return enable_kprobe(&tp->rp.kp);
1096}
1097
1098static void probe_event_disable(struct ftrace_event_call *call)
1099{
1100 struct trace_probe *tp = (struct trace_probe *)call->data;
1101
1102 tp->flags &= ~TP_FLAG_TRACE;
1103 if (!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE))) {
1104 if (probe_is_return(tp))
1105 disable_kretprobe(&tp->rp);
1106 else
1107 disable_kprobe(&tp->rp.kp);
1108 }
1109}
1110
1111static int probe_event_raw_init(struct ftrace_event_call *event_call)
1112{
1113 INIT_LIST_HEAD(&event_call->fields);
1114
1115 return 0;
1116}
1117
1118#undef DEFINE_FIELD
1119#define DEFINE_FIELD(type, item, name, is_signed) \
1120 do { \
1121 ret = trace_define_field(event_call, #type, name, \
1122 offsetof(typeof(field), item), \
1123 sizeof(field.item), is_signed, \
1124 FILTER_OTHER); \
1125 if (ret) \
1126 return ret; \
1127 } while (0)
1128
1129static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
1130{
1131 int ret, i;
1132 struct kprobe_trace_entry field;
1133 struct trace_probe *tp = (struct trace_probe *)event_call->data;
1134
1135 ret = trace_define_common_fields(event_call);
1136 if (ret)
1137 return ret;
1138
1139 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
1140 DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
1141 /* Set argument names as fields */
1142 for (i = 0; i < tp->nr_args; i++)
1143 DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0);
1144 return 0;
1145}
1146
1147static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
1148{
1149 int ret, i;
1150 struct kretprobe_trace_entry field;
1151 struct trace_probe *tp = (struct trace_probe *)event_call->data;
1152
1153 ret = trace_define_common_fields(event_call);
1154 if (ret)
1155 return ret;
1156
1157 DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
1158 DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
1159 DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
1160 /* Set argument names as fields */
1161 for (i = 0; i < tp->nr_args; i++)
1162 DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0);
1163 return 0;
1164}
1165
1166static int __probe_event_show_format(struct trace_seq *s,
1167 struct trace_probe *tp, const char *fmt,
1168 const char *arg)
1169{
1170 int i;
1171
1172 /* Show format */
1173 if (!trace_seq_printf(s, "\nprint fmt: \"%s", fmt))
1174 return 0;
1175
1176 for (i = 0; i < tp->nr_args; i++)
1177 if (!trace_seq_printf(s, " %s=%%lx", tp->args[i].name))
1178 return 0;
1179
1180 if (!trace_seq_printf(s, "\", %s", arg))
1181 return 0;
1182
1183 for (i = 0; i < tp->nr_args; i++)
1184 if (!trace_seq_printf(s, ", REC->%s", tp->args[i].name))
1185 return 0;
1186
1187 return trace_seq_puts(s, "\n");
1188}
1189
1190#undef SHOW_FIELD
1191#define SHOW_FIELD(type, item, name) \
1192 do { \
1193 ret = trace_seq_printf(s, "\tfield: " #type " %s;\t" \
1194 "offset:%u;\tsize:%u;\n", name, \
1195 (unsigned int)offsetof(typeof(field), item),\
1196 (unsigned int)sizeof(type)); \
1197 if (!ret) \
1198 return 0; \
1199 } while (0)
1200
1201static int kprobe_event_show_format(struct ftrace_event_call *call,
1202 struct trace_seq *s)
1203{
1204 struct kprobe_trace_entry field __attribute__((unused));
1205 int ret, i;
1206 struct trace_probe *tp = (struct trace_probe *)call->data;
1207
1208 SHOW_FIELD(unsigned long, ip, FIELD_STRING_IP);
1209 SHOW_FIELD(int, nargs, FIELD_STRING_NARGS);
1210
1211 /* Show fields */
1212 for (i = 0; i < tp->nr_args; i++)
1213 SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
1214 trace_seq_puts(s, "\n");
1215
1216 return __probe_event_show_format(s, tp, "(%lx)",
1217 "REC->" FIELD_STRING_IP);
1218}
1219
1220static int kretprobe_event_show_format(struct ftrace_event_call *call,
1221 struct trace_seq *s)
1222{
1223 struct kretprobe_trace_entry field __attribute__((unused));
1224 int ret, i;
1225 struct trace_probe *tp = (struct trace_probe *)call->data;
1226
1227 SHOW_FIELD(unsigned long, func, FIELD_STRING_FUNC);
1228 SHOW_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP);
1229 SHOW_FIELD(int, nargs, FIELD_STRING_NARGS);
1230
1231 /* Show fields */
1232 for (i = 0; i < tp->nr_args; i++)
1233 SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
1234 trace_seq_puts(s, "\n");
1235
1236 return __probe_event_show_format(s, tp, "(%lx <- %lx)",
1237 "REC->" FIELD_STRING_FUNC
1238 ", REC->" FIELD_STRING_RETIP);
1239}
1240
1241#ifdef CONFIG_EVENT_PROFILE
1242
1243/* Kprobe profile handler */
1244static __kprobes int kprobe_profile_func(struct kprobe *kp,
1245 struct pt_regs *regs)
1246{
1247 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1248 struct ftrace_event_call *call = &tp->call;
1249 struct kprobe_trace_entry *entry;
1250 struct trace_entry *ent;
1251 int size, __size, i, pc, __cpu;
1252 unsigned long irq_flags;
1253 char *trace_buf;
1254 char *raw_data;
1255 int rctx;
1256
1257 pc = preempt_count();
1258 __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
1259 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1260 size -= sizeof(u32);
1261 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
1262 "profile buffer not large enough"))
1263 return 0;
1264
1265 /*
1266 * Protect the non nmi buffer
1267 * This also protects the rcu read side
1268 */
1269 local_irq_save(irq_flags);
1270
1271 rctx = perf_swevent_get_recursion_context();
1272 if (rctx < 0)
1273 goto end_recursion;
1274
1275 __cpu = smp_processor_id();
1276
1277 if (in_nmi())
1278 trace_buf = rcu_dereference(perf_trace_buf_nmi);
1279 else
1280 trace_buf = rcu_dereference(perf_trace_buf);
1281
1282 if (!trace_buf)
1283 goto end;
1284
1285 raw_data = per_cpu_ptr(trace_buf, __cpu);
1286
1287 /* Zero dead bytes from alignment to avoid buffer leak to userspace */
1288 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
1289 entry = (struct kprobe_trace_entry *)raw_data;
1290 ent = &entry->ent;
1291
1292 tracing_generic_entry_update(ent, irq_flags, pc);
1293 ent->type = call->id;
1294 entry->nargs = tp->nr_args;
1295 entry->ip = (unsigned long)kp->addr;
1296 for (i = 0; i < tp->nr_args; i++)
1297 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1298 perf_tp_event(call->id, entry->ip, 1, entry, size);
1299
1300end:
1301 perf_swevent_put_recursion_context(rctx);
1302end_recursion:
1303 local_irq_restore(irq_flags);
1304
1305 return 0;
1306}
1307
1308/* Kretprobe profile handler */
1309static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
1310 struct pt_regs *regs)
1311{
1312 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1313 struct ftrace_event_call *call = &tp->call;
1314 struct kretprobe_trace_entry *entry;
1315 struct trace_entry *ent;
1316 int size, __size, i, pc, __cpu;
1317 unsigned long irq_flags;
1318 char *trace_buf;
1319 char *raw_data;
1320 int rctx;
1321
1322 pc = preempt_count();
1323 __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
1324 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1325 size -= sizeof(u32);
1326 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
1327 "profile buffer not large enough"))
1328 return 0;
1329
1330 /*
1331 * Protect the non nmi buffer
1332 * This also protects the rcu read side
1333 */
1334 local_irq_save(irq_flags);
1335
1336 rctx = perf_swevent_get_recursion_context();
1337 if (rctx < 0)
1338 goto end_recursion;
1339
1340 __cpu = smp_processor_id();
1341
1342 if (in_nmi())
1343 trace_buf = rcu_dereference(perf_trace_buf_nmi);
1344 else
1345 trace_buf = rcu_dereference(perf_trace_buf);
1346
1347 if (!trace_buf)
1348 goto end;
1349
1350 raw_data = per_cpu_ptr(trace_buf, __cpu);
1351
1352 /* Zero dead bytes from alignment to avoid buffer leak to userspace */
1353 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
1354 entry = (struct kretprobe_trace_entry *)raw_data;
1355 ent = &entry->ent;
1356
1357 tracing_generic_entry_update(ent, irq_flags, pc);
1358 ent->type = call->id;
1359 entry->nargs = tp->nr_args;
1360 entry->func = (unsigned long)tp->rp.kp.addr;
1361 entry->ret_ip = (unsigned long)ri->ret_addr;
1362 for (i = 0; i < tp->nr_args; i++)
1363 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1364 perf_tp_event(call->id, entry->ret_ip, 1, entry, size);
1365
1366end:
1367 perf_swevent_put_recursion_context(rctx);
1368end_recursion:
1369 local_irq_restore(irq_flags);
1370
1371 return 0;
1372}
1373
1374static int probe_profile_enable(struct ftrace_event_call *call)
1375{
1376 struct trace_probe *tp = (struct trace_probe *)call->data;
1377
1378 tp->flags |= TP_FLAG_PROFILE;
1379
1380 if (probe_is_return(tp))
1381 return enable_kretprobe(&tp->rp);
1382 else
1383 return enable_kprobe(&tp->rp.kp);
1384}
1385
1386static void probe_profile_disable(struct ftrace_event_call *call)
1387{
1388 struct trace_probe *tp = (struct trace_probe *)call->data;
1389
1390 tp->flags &= ~TP_FLAG_PROFILE;
1391
1392 if (!(tp->flags & TP_FLAG_TRACE)) {
1393 if (probe_is_return(tp))
1394 disable_kretprobe(&tp->rp);
1395 else
1396 disable_kprobe(&tp->rp.kp);
1397 }
1398}
1399#endif /* CONFIG_EVENT_PROFILE */
1400
1401
1402static __kprobes
1403int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1404{
1405 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1406
1407 if (tp->flags & TP_FLAG_TRACE)
1408 kprobe_trace_func(kp, regs);
1409#ifdef CONFIG_EVENT_PROFILE
1410 if (tp->flags & TP_FLAG_PROFILE)
1411 kprobe_profile_func(kp, regs);
1412#endif /* CONFIG_EVENT_PROFILE */
1413 return 0; /* We don't tweek kernel, so just return 0 */
1414}
1415
1416static __kprobes
1417int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1418{
1419 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1420
1421 if (tp->flags & TP_FLAG_TRACE)
1422 kretprobe_trace_func(ri, regs);
1423#ifdef CONFIG_EVENT_PROFILE
1424 if (tp->flags & TP_FLAG_PROFILE)
1425 kretprobe_profile_func(ri, regs);
1426#endif /* CONFIG_EVENT_PROFILE */
1427 return 0; /* We don't tweek kernel, so just return 0 */
1428}
1429
1430static int register_probe_event(struct trace_probe *tp)
1431{
1432 struct ftrace_event_call *call = &tp->call;
1433 int ret;
1434
1435 /* Initialize ftrace_event_call */
1436 if (probe_is_return(tp)) {
1437 tp->event.trace = print_kretprobe_event;
1438 call->raw_init = probe_event_raw_init;
1439 call->show_format = kretprobe_event_show_format;
1440 call->define_fields = kretprobe_event_define_fields;
1441 } else {
1442 tp->event.trace = print_kprobe_event;
1443 call->raw_init = probe_event_raw_init;
1444 call->show_format = kprobe_event_show_format;
1445 call->define_fields = kprobe_event_define_fields;
1446 }
1447 call->event = &tp->event;
1448 call->id = register_ftrace_event(&tp->event);
1449 if (!call->id)
1450 return -ENODEV;
1451 call->enabled = 0;
1452 call->regfunc = probe_event_enable;
1453 call->unregfunc = probe_event_disable;
1454
1455#ifdef CONFIG_EVENT_PROFILE
1456 atomic_set(&call->profile_count, -1);
1457 call->profile_enable = probe_profile_enable;
1458 call->profile_disable = probe_profile_disable;
1459#endif
1460 call->data = tp;
1461 ret = trace_add_event_call(call);
1462 if (ret) {
1463 pr_info("Failed to register kprobe event: %s\n", call->name);
1464 unregister_ftrace_event(&tp->event);
1465 }
1466 return ret;
1467}
1468
1469static void unregister_probe_event(struct trace_probe *tp)
1470{
1471 /* tp->event is unregistered in trace_remove_event_call() */
1472 trace_remove_event_call(&tp->call);
1473}
1474
1475/* Make a debugfs interface for controling probe points */
1476static __init int init_kprobe_trace(void)
1477{
1478 struct dentry *d_tracer;
1479 struct dentry *entry;
1480
1481 d_tracer = tracing_init_dentry();
1482 if (!d_tracer)
1483 return 0;
1484
1485 entry = debugfs_create_file("kprobe_events", 0644, d_tracer,
1486 NULL, &kprobe_events_ops);
1487
1488 /* Event list interface */
1489 if (!entry)
1490 pr_warning("Could not create debugfs "
1491 "'kprobe_events' entry\n");
1492
1493 /* Profile interface */
1494 entry = debugfs_create_file("kprobe_profile", 0444, d_tracer,
1495 NULL, &kprobe_profile_ops);
1496
1497 if (!entry)
1498 pr_warning("Could not create debugfs "
1499 "'kprobe_profile' entry\n");
1500 return 0;
1501}
1502fs_initcall(init_kprobe_trace);
1503
1504
1505#ifdef CONFIG_FTRACE_STARTUP_TEST
1506
1507static int kprobe_trace_selftest_target(int a1, int a2, int a3,
1508 int a4, int a5, int a6)
1509{
1510 return a1 + a2 + a3 + a4 + a5 + a6;
1511}
1512
1513static __init int kprobe_trace_self_tests_init(void)
1514{
1515 int ret;
1516 int (*target)(int, int, int, int, int, int);
1517
1518 target = kprobe_trace_selftest_target;
1519
1520 pr_info("Testing kprobe tracing: ");
1521
1522 ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target "
1523 "$arg1 $arg2 $arg3 $arg4 $stack $stack0");
1524 if (WARN_ON_ONCE(ret))
1525 pr_warning("error enabling function entry\n");
1526
1527 ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target "
1528 "$retval");
1529 if (WARN_ON_ONCE(ret))
1530 pr_warning("error enabling function return\n");
1531
1532 ret = target(1, 2, 3, 4, 5, 6);
1533
1534 cleanup_all_probes();
1535
1536 pr_cont("OK\n");
1537 return 0;
1538}
1539
1540late_initcall(kprobe_trace_self_tests_init);
1541
1542#endif
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
new file mode 100644
index 000000000000..acb87d4a4ac1
--- /dev/null
+++ b/kernel/trace/trace_ksym.c
@@ -0,0 +1,551 @@
1/*
2 * trace_ksym.c - Kernel Symbol Tracer
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) IBM Corporation, 2009
19 */
20
21#include <linux/kallsyms.h>
22#include <linux/uaccess.h>
23#include <linux/debugfs.h>
24#include <linux/ftrace.h>
25#include <linux/module.h>
26#include <linux/fs.h>
27
28#include "trace_output.h"
29#include "trace_stat.h"
30#include "trace.h"
31
32#include <linux/hw_breakpoint.h>
33#include <asm/hw_breakpoint.h>
34
35/*
36 * For now, let us restrict the no. of symbols traced simultaneously to number
37 * of available hardware breakpoint registers.
38 */
39#define KSYM_TRACER_MAX HBP_NUM
40
41#define KSYM_TRACER_OP_LEN 3 /* rw- */
42
43struct trace_ksym {
44 struct perf_event **ksym_hbp;
45 struct perf_event_attr attr;
46#ifdef CONFIG_PROFILE_KSYM_TRACER
47 unsigned long counter;
48#endif
49 struct hlist_node ksym_hlist;
50};
51
52static struct trace_array *ksym_trace_array;
53
54static unsigned int ksym_filter_entry_count;
55static unsigned int ksym_tracing_enabled;
56
57static HLIST_HEAD(ksym_filter_head);
58
59static DEFINE_MUTEX(ksym_tracer_mutex);
60
61#ifdef CONFIG_PROFILE_KSYM_TRACER
62
63#define MAX_UL_INT 0xffffffff
64
65void ksym_collect_stats(unsigned long hbp_hit_addr)
66{
67 struct hlist_node *node;
68 struct trace_ksym *entry;
69
70 rcu_read_lock();
71 hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
72 if ((entry->attr.bp_addr == hbp_hit_addr) &&
73 (entry->counter <= MAX_UL_INT)) {
74 entry->counter++;
75 break;
76 }
77 }
78 rcu_read_unlock();
79}
80#endif /* CONFIG_PROFILE_KSYM_TRACER */
81
82void ksym_hbp_handler(struct perf_event *hbp, int nmi,
83 struct perf_sample_data *data,
84 struct pt_regs *regs)
85{
86 struct ring_buffer_event *event;
87 struct ksym_trace_entry *entry;
88 struct ring_buffer *buffer;
89 int pc;
90
91 if (!ksym_tracing_enabled)
92 return;
93
94 buffer = ksym_trace_array->buffer;
95
96 pc = preempt_count();
97
98 event = trace_buffer_lock_reserve(buffer, TRACE_KSYM,
99 sizeof(*entry), 0, pc);
100 if (!event)
101 return;
102
103 entry = ring_buffer_event_data(event);
104 entry->ip = instruction_pointer(regs);
105 entry->type = hw_breakpoint_type(hbp);
106 entry->addr = hw_breakpoint_addr(hbp);
107 strlcpy(entry->cmd, current->comm, TASK_COMM_LEN);
108
109#ifdef CONFIG_PROFILE_KSYM_TRACER
110 ksym_collect_stats(hw_breakpoint_addr(hbp));
111#endif /* CONFIG_PROFILE_KSYM_TRACER */
112
113 trace_buffer_unlock_commit(buffer, event, 0, pc);
114}
115
116/* Valid access types are represented as
117 *
118 * rw- : Set Read/Write Access Breakpoint
119 * -w- : Set Write Access Breakpoint
120 * --- : Clear Breakpoints
121 * --x : Set Execution Break points (Not available yet)
122 *
123 */
124static int ksym_trace_get_access_type(char *str)
125{
126 int access = 0;
127
128 if (str[0] == 'r')
129 access |= HW_BREAKPOINT_R;
130
131 if (str[1] == 'w')
132 access |= HW_BREAKPOINT_W;
133
134 if (str[2] == 'x')
135 access |= HW_BREAKPOINT_X;
136
137 switch (access) {
138 case HW_BREAKPOINT_R:
139 case HW_BREAKPOINT_W:
140 case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
141 return access;
142 default:
143 return -EINVAL;
144 }
145}
146
147/*
148 * There can be several possible malformed requests and we attempt to capture
149 * all of them. We enumerate some of the rules
150 * 1. We will not allow kernel symbols with ':' since it is used as a delimiter.
151 * i.e. multiple ':' symbols disallowed. Possible uses are of the form
152 * <module>:<ksym_name>:<op>.
153 * 2. No delimiter symbol ':' in the input string
154 * 3. Spurious operator symbols or symbols not in their respective positions
155 * 4. <ksym_name>:--- i.e. clear breakpoint request when ksym_name not in file
156 * 5. Kernel symbol not a part of /proc/kallsyms
157 * 6. Duplicate requests
158 */
159static int parse_ksym_trace_str(char *input_string, char **ksymname,
160 unsigned long *addr)
161{
162 int ret;
163
164 *ksymname = strsep(&input_string, ":");
165 *addr = kallsyms_lookup_name(*ksymname);
166
167 /* Check for malformed request: (2), (1) and (5) */
168 if ((!input_string) ||
169 (strlen(input_string) != KSYM_TRACER_OP_LEN) ||
170 (*addr == 0))
171 return -EINVAL;;
172
173 ret = ksym_trace_get_access_type(input_string);
174
175 return ret;
176}
177
178int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
179{
180 struct trace_ksym *entry;
181 int ret = -ENOMEM;
182
183 if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
184 printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
185 " new requests for tracing can be accepted now.\n",
186 KSYM_TRACER_MAX);
187 return -ENOSPC;
188 }
189
190 entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
191 if (!entry)
192 return -ENOMEM;
193
194 hw_breakpoint_init(&entry->attr);
195
196 entry->attr.bp_type = op;
197 entry->attr.bp_addr = addr;
198 entry->attr.bp_len = HW_BREAKPOINT_LEN_4;
199
200 ret = -EAGAIN;
201 entry->ksym_hbp = register_wide_hw_breakpoint(&entry->attr,
202 ksym_hbp_handler);
203
204 if (IS_ERR(entry->ksym_hbp)) {
205 ret = PTR_ERR(entry->ksym_hbp);
206 printk(KERN_INFO "ksym_tracer request failed. Try again"
207 " later!!\n");
208 goto err;
209 }
210
211 hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head);
212 ksym_filter_entry_count++;
213
214 return 0;
215
216err:
217 kfree(entry);
218
219 return ret;
220}
221
222static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
223 size_t count, loff_t *ppos)
224{
225 struct trace_ksym *entry;
226 struct hlist_node *node;
227 struct trace_seq *s;
228 ssize_t cnt = 0;
229 int ret;
230
231 s = kmalloc(sizeof(*s), GFP_KERNEL);
232 if (!s)
233 return -ENOMEM;
234 trace_seq_init(s);
235
236 mutex_lock(&ksym_tracer_mutex);
237
238 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
239 ret = trace_seq_printf(s, "%pS:", (void *)entry->attr.bp_addr);
240 if (entry->attr.bp_type == HW_BREAKPOINT_R)
241 ret = trace_seq_puts(s, "r--\n");
242 else if (entry->attr.bp_type == HW_BREAKPOINT_W)
243 ret = trace_seq_puts(s, "-w-\n");
244 else if (entry->attr.bp_type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R))
245 ret = trace_seq_puts(s, "rw-\n");
246 WARN_ON_ONCE(!ret);
247 }
248
249 cnt = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
250
251 mutex_unlock(&ksym_tracer_mutex);
252
253 kfree(s);
254
255 return cnt;
256}
257
258static void __ksym_trace_reset(void)
259{
260 struct trace_ksym *entry;
261 struct hlist_node *node, *node1;
262
263 mutex_lock(&ksym_tracer_mutex);
264 hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
265 ksym_hlist) {
266 unregister_wide_hw_breakpoint(entry->ksym_hbp);
267 ksym_filter_entry_count--;
268 hlist_del_rcu(&(entry->ksym_hlist));
269 synchronize_rcu();
270 kfree(entry);
271 }
272 mutex_unlock(&ksym_tracer_mutex);
273}
274
275static ssize_t ksym_trace_filter_write(struct file *file,
276 const char __user *buffer,
277 size_t count, loff_t *ppos)
278{
279 struct trace_ksym *entry;
280 struct hlist_node *node;
281 char *input_string, *ksymname = NULL;
282 unsigned long ksym_addr = 0;
283 int ret, op, changed = 0;
284
285 input_string = kzalloc(count + 1, GFP_KERNEL);
286 if (!input_string)
287 return -ENOMEM;
288
289 if (copy_from_user(input_string, buffer, count)) {
290 kfree(input_string);
291 return -EFAULT;
292 }
293 input_string[count] = '\0';
294
295 strstrip(input_string);
296
297 /*
298 * Clear all breakpoints if:
299 * 1: echo > ksym_trace_filter
300 * 2: echo 0 > ksym_trace_filter
301 * 3: echo "*:---" > ksym_trace_filter
302 */
303 if (!input_string[0] || !strcmp(input_string, "0") ||
304 !strcmp(input_string, "*:---")) {
305 __ksym_trace_reset();
306 kfree(input_string);
307 return count;
308 }
309
310 ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
311 if (ret < 0) {
312 kfree(input_string);
313 return ret;
314 }
315
316 mutex_lock(&ksym_tracer_mutex);
317
318 ret = -EINVAL;
319 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
320 if (entry->attr.bp_addr == ksym_addr) {
321 /* Check for malformed request: (6) */
322 if (entry->attr.bp_type != op)
323 changed = 1;
324 else
325 goto out;
326 break;
327 }
328 }
329 if (changed) {
330 unregister_wide_hw_breakpoint(entry->ksym_hbp);
331 entry->attr.bp_type = op;
332 ret = 0;
333 if (op > 0) {
334 entry->ksym_hbp =
335 register_wide_hw_breakpoint(&entry->attr,
336 ksym_hbp_handler);
337 if (IS_ERR(entry->ksym_hbp))
338 ret = PTR_ERR(entry->ksym_hbp);
339 else
340 goto out;
341 }
342 /* Error or "symbol:---" case: drop it */
343 ksym_filter_entry_count--;
344 hlist_del_rcu(&(entry->ksym_hlist));
345 synchronize_rcu();
346 kfree(entry);
347 goto out;
348 } else {
349 /* Check for malformed request: (4) */
350 if (op == 0)
351 goto out;
352 ret = process_new_ksym_entry(ksymname, op, ksym_addr);
353 }
354out:
355 mutex_unlock(&ksym_tracer_mutex);
356
357 kfree(input_string);
358
359 if (!ret)
360 ret = count;
361 return ret;
362}
363
364static const struct file_operations ksym_tracing_fops = {
365 .open = tracing_open_generic,
366 .read = ksym_trace_filter_read,
367 .write = ksym_trace_filter_write,
368};
369
370static void ksym_trace_reset(struct trace_array *tr)
371{
372 ksym_tracing_enabled = 0;
373 __ksym_trace_reset();
374}
375
376static int ksym_trace_init(struct trace_array *tr)
377{
378 int cpu, ret = 0;
379
380 for_each_online_cpu(cpu)
381 tracing_reset(tr, cpu);
382 ksym_tracing_enabled = 1;
383 ksym_trace_array = tr;
384
385 return ret;
386}
387
388static void ksym_trace_print_header(struct seq_file *m)
389{
390 seq_puts(m,
391 "# TASK-PID CPU# Symbol "
392 "Type Function\n");
393 seq_puts(m,
394 "# | | | "
395 " | |\n");
396}
397
398static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
399{
400 struct trace_entry *entry = iter->ent;
401 struct trace_seq *s = &iter->seq;
402 struct ksym_trace_entry *field;
403 char str[KSYM_SYMBOL_LEN];
404 int ret;
405
406 if (entry->type != TRACE_KSYM)
407 return TRACE_TYPE_UNHANDLED;
408
409 trace_assign_type(field, entry);
410
411 ret = trace_seq_printf(s, "%11s-%-5d [%03d] %pS", field->cmd,
412 entry->pid, iter->cpu, (char *)field->addr);
413 if (!ret)
414 return TRACE_TYPE_PARTIAL_LINE;
415
416 switch (field->type) {
417 case HW_BREAKPOINT_R:
418 ret = trace_seq_printf(s, " R ");
419 break;
420 case HW_BREAKPOINT_W:
421 ret = trace_seq_printf(s, " W ");
422 break;
423 case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
424 ret = trace_seq_printf(s, " RW ");
425 break;
426 default:
427 return TRACE_TYPE_PARTIAL_LINE;
428 }
429
430 if (!ret)
431 return TRACE_TYPE_PARTIAL_LINE;
432
433 sprint_symbol(str, field->ip);
434 ret = trace_seq_printf(s, "%s\n", str);
435 if (!ret)
436 return TRACE_TYPE_PARTIAL_LINE;
437
438 return TRACE_TYPE_HANDLED;
439}
440
441struct tracer ksym_tracer __read_mostly =
442{
443 .name = "ksym_tracer",
444 .init = ksym_trace_init,
445 .reset = ksym_trace_reset,
446#ifdef CONFIG_FTRACE_SELFTEST
447 .selftest = trace_selftest_startup_ksym,
448#endif
449 .print_header = ksym_trace_print_header,
450 .print_line = ksym_trace_output
451};
452
453__init static int init_ksym_trace(void)
454{
455 struct dentry *d_tracer;
456 struct dentry *entry;
457
458 d_tracer = tracing_init_dentry();
459 ksym_filter_entry_count = 0;
460
461 entry = debugfs_create_file("ksym_trace_filter", 0644, d_tracer,
462 NULL, &ksym_tracing_fops);
463 if (!entry)
464 pr_warning("Could not create debugfs "
465 "'ksym_trace_filter' file\n");
466
467 return register_tracer(&ksym_tracer);
468}
469device_initcall(init_ksym_trace);
470
471
472#ifdef CONFIG_PROFILE_KSYM_TRACER
473static int ksym_tracer_stat_headers(struct seq_file *m)
474{
475 seq_puts(m, " Access Type ");
476 seq_puts(m, " Symbol Counter\n");
477 seq_puts(m, " ----------- ");
478 seq_puts(m, " ------ -------\n");
479 return 0;
480}
481
482static int ksym_tracer_stat_show(struct seq_file *m, void *v)
483{
484 struct hlist_node *stat = v;
485 struct trace_ksym *entry;
486 int access_type = 0;
487 char fn_name[KSYM_NAME_LEN];
488
489 entry = hlist_entry(stat, struct trace_ksym, ksym_hlist);
490
491 access_type = entry->attr.bp_type;
492
493 switch (access_type) {
494 case HW_BREAKPOINT_R:
495 seq_puts(m, " R ");
496 break;
497 case HW_BREAKPOINT_W:
498 seq_puts(m, " W ");
499 break;
500 case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
501 seq_puts(m, " RW ");
502 break;
503 default:
504 seq_puts(m, " NA ");
505 }
506
507 if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0)
508 seq_printf(m, " %-36s", fn_name);
509 else
510 seq_printf(m, " %-36s", "<NA>");
511 seq_printf(m, " %15lu\n", entry->counter);
512
513 return 0;
514}
515
516static void *ksym_tracer_stat_start(struct tracer_stat *trace)
517{
518 return ksym_filter_head.first;
519}
520
521static void *
522ksym_tracer_stat_next(void *v, int idx)
523{
524 struct hlist_node *stat = v;
525
526 return stat->next;
527}
528
529static struct tracer_stat ksym_tracer_stats = {
530 .name = "ksym_tracer",
531 .stat_start = ksym_tracer_stat_start,
532 .stat_next = ksym_tracer_stat_next,
533 .stat_headers = ksym_tracer_stat_headers,
534 .stat_show = ksym_tracer_stat_show
535};
536
537__init static int ksym_tracer_stat_init(void)
538{
539 int ret;
540
541 ret = register_stat_tracer(&ksym_tracer_stats);
542 if (ret) {
543 printk(KERN_WARNING "Warning: could not register "
544 "ksym tracer stats\n");
545 return 1;
546 }
547
548 return 0;
549}
550fs_initcall(ksym_tracer_stat_init);
551#endif /* CONFIG_PROFILE_KSYM_TRACER */
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index f572f44c6e1e..8e46b3323cdc 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -23,13 +23,21 @@ static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
23 23
24static int next_event_type = __TRACE_LAST_TYPE + 1; 24static int next_event_type = __TRACE_LAST_TYPE + 1;
25 25
26void trace_print_seq(struct seq_file *m, struct trace_seq *s) 26int trace_print_seq(struct seq_file *m, struct trace_seq *s)
27{ 27{
28 int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len; 28 int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
29 int ret;
30
31 ret = seq_write(m, s->buffer, len);
29 32
30 seq_write(m, s->buffer, len); 33 /*
34 * Only reset this buffer if we successfully wrote to the
35 * seq_file buffer.
36 */
37 if (!ret)
38 trace_seq_init(s);
31 39
32 trace_seq_init(s); 40 return ret;
33} 41}
34 42
35enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter) 43enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter)
@@ -69,6 +77,9 @@ enum print_line_t trace_print_printk_msg_only(struct trace_iterator *iter)
69 * @s: trace sequence descriptor 77 * @s: trace sequence descriptor
70 * @fmt: printf format string 78 * @fmt: printf format string
71 * 79 *
80 * It returns 0 if the trace oversizes the buffer's free
81 * space, 1 otherwise.
82 *
72 * The tracer may use either sequence operations or its own 83 * The tracer may use either sequence operations or its own
73 * copy to user routines. To simplify formating of a trace 84 * copy to user routines. To simplify formating of a trace
74 * trace_seq_printf is used to store strings into a special 85 * trace_seq_printf is used to store strings into a special
@@ -82,7 +93,7 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
82 va_list ap; 93 va_list ap;
83 int ret; 94 int ret;
84 95
85 if (!len) 96 if (s->full || !len)
86 return 0; 97 return 0;
87 98
88 va_start(ap, fmt); 99 va_start(ap, fmt);
@@ -90,12 +101,14 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
90 va_end(ap); 101 va_end(ap);
91 102
92 /* If we can't write it all, don't bother writing anything */ 103 /* If we can't write it all, don't bother writing anything */
93 if (ret >= len) 104 if (ret >= len) {
105 s->full = 1;
94 return 0; 106 return 0;
107 }
95 108
96 s->len += ret; 109 s->len += ret;
97 110
98 return len; 111 return 1;
99} 112}
100EXPORT_SYMBOL_GPL(trace_seq_printf); 113EXPORT_SYMBOL_GPL(trace_seq_printf);
101 114
@@ -116,14 +129,16 @@ trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
116 int len = (PAGE_SIZE - 1) - s->len; 129 int len = (PAGE_SIZE - 1) - s->len;
117 int ret; 130 int ret;
118 131
119 if (!len) 132 if (s->full || !len)
120 return 0; 133 return 0;
121 134
122 ret = vsnprintf(s->buffer + s->len, len, fmt, args); 135 ret = vsnprintf(s->buffer + s->len, len, fmt, args);
123 136
124 /* If we can't write it all, don't bother writing anything */ 137 /* If we can't write it all, don't bother writing anything */
125 if (ret >= len) 138 if (ret >= len) {
139 s->full = 1;
126 return 0; 140 return 0;
141 }
127 142
128 s->len += ret; 143 s->len += ret;
129 144
@@ -136,14 +151,16 @@ int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
136 int len = (PAGE_SIZE - 1) - s->len; 151 int len = (PAGE_SIZE - 1) - s->len;
137 int ret; 152 int ret;
138 153
139 if (!len) 154 if (s->full || !len)
140 return 0; 155 return 0;
141 156
142 ret = bstr_printf(s->buffer + s->len, len, fmt, binary); 157 ret = bstr_printf(s->buffer + s->len, len, fmt, binary);
143 158
144 /* If we can't write it all, don't bother writing anything */ 159 /* If we can't write it all, don't bother writing anything */
145 if (ret >= len) 160 if (ret >= len) {
161 s->full = 1;
146 return 0; 162 return 0;
163 }
147 164
148 s->len += ret; 165 s->len += ret;
149 166
@@ -164,9 +181,14 @@ int trace_seq_puts(struct trace_seq *s, const char *str)
164{ 181{
165 int len = strlen(str); 182 int len = strlen(str);
166 183
167 if (len > ((PAGE_SIZE - 1) - s->len)) 184 if (s->full)
168 return 0; 185 return 0;
169 186
187 if (len > ((PAGE_SIZE - 1) - s->len)) {
188 s->full = 1;
189 return 0;
190 }
191
170 memcpy(s->buffer + s->len, str, len); 192 memcpy(s->buffer + s->len, str, len);
171 s->len += len; 193 s->len += len;
172 194
@@ -175,9 +197,14 @@ int trace_seq_puts(struct trace_seq *s, const char *str)
175 197
176int trace_seq_putc(struct trace_seq *s, unsigned char c) 198int trace_seq_putc(struct trace_seq *s, unsigned char c)
177{ 199{
178 if (s->len >= (PAGE_SIZE - 1)) 200 if (s->full)
179 return 0; 201 return 0;
180 202
203 if (s->len >= (PAGE_SIZE - 1)) {
204 s->full = 1;
205 return 0;
206 }
207
181 s->buffer[s->len++] = c; 208 s->buffer[s->len++] = c;
182 209
183 return 1; 210 return 1;
@@ -185,8 +212,13 @@ int trace_seq_putc(struct trace_seq *s, unsigned char c)
185 212
186int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len) 213int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len)
187{ 214{
188 if (len > ((PAGE_SIZE - 1) - s->len)) 215 if (s->full)
216 return 0;
217
218 if (len > ((PAGE_SIZE - 1) - s->len)) {
219 s->full = 1;
189 return 0; 220 return 0;
221 }
190 222
191 memcpy(s->buffer + s->len, mem, len); 223 memcpy(s->buffer + s->len, mem, len);
192 s->len += len; 224 s->len += len;
@@ -200,6 +232,9 @@ int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, size_t len)
200 const unsigned char *data = mem; 232 const unsigned char *data = mem;
201 int i, j; 233 int i, j;
202 234
235 if (s->full)
236 return 0;
237
203#ifdef __BIG_ENDIAN 238#ifdef __BIG_ENDIAN
204 for (i = 0, j = 0; i < len; i++) { 239 for (i = 0, j = 0; i < len; i++) {
205#else 240#else
@@ -217,8 +252,13 @@ void *trace_seq_reserve(struct trace_seq *s, size_t len)
217{ 252{
218 void *ret; 253 void *ret;
219 254
220 if (len > ((PAGE_SIZE - 1) - s->len)) 255 if (s->full)
256 return 0;
257
258 if (len > ((PAGE_SIZE - 1) - s->len)) {
259 s->full = 1;
221 return NULL; 260 return NULL;
261 }
222 262
223 ret = s->buffer + s->len; 263 ret = s->buffer + s->len;
224 s->len += len; 264 s->len += len;
@@ -230,8 +270,14 @@ int trace_seq_path(struct trace_seq *s, struct path *path)
230{ 270{
231 unsigned char *p; 271 unsigned char *p;
232 272
233 if (s->len >= (PAGE_SIZE - 1)) 273 if (s->full)
274 return 0;
275
276 if (s->len >= (PAGE_SIZE - 1)) {
277 s->full = 1;
234 return 0; 278 return 0;
279 }
280
235 p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len); 281 p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
236 if (!IS_ERR(p)) { 282 if (!IS_ERR(p)) {
237 p = mangle_path(s->buffer + s->len, p, "\n"); 283 p = mangle_path(s->buffer + s->len, p, "\n");
@@ -244,6 +290,7 @@ int trace_seq_path(struct trace_seq *s, struct path *path)
244 return 1; 290 return 1;
245 } 291 }
246 292
293 s->full = 1;
247 return 0; 294 return 0;
248} 295}
249 296
@@ -370,6 +417,9 @@ int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
370 unsigned long vmstart = 0; 417 unsigned long vmstart = 0;
371 int ret = 1; 418 int ret = 1;
372 419
420 if (s->full)
421 return 0;
422
373 if (mm) { 423 if (mm) {
374 const struct vm_area_struct *vma; 424 const struct vm_area_struct *vma;
375 425
@@ -486,16 +536,18 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
486 hardirq ? 'h' : softirq ? 's' : '.')) 536 hardirq ? 'h' : softirq ? 's' : '.'))
487 return 0; 537 return 0;
488 538
489 if (entry->lock_depth < 0) 539 if (entry->preempt_count)
490 ret = trace_seq_putc(s, '.'); 540 ret = trace_seq_printf(s, "%x", entry->preempt_count);
491 else 541 else
492 ret = trace_seq_printf(s, "%d", entry->lock_depth); 542 ret = trace_seq_putc(s, '.');
543
493 if (!ret) 544 if (!ret)
494 return 0; 545 return 0;
495 546
496 if (entry->preempt_count) 547 if (entry->lock_depth < 0)
497 return trace_seq_printf(s, "%x", entry->preempt_count); 548 return trace_seq_putc(s, '.');
498 return trace_seq_putc(s, '.'); 549
550 return trace_seq_printf(s, "%d", entry->lock_depth);
499} 551}
500 552
501static int 553static int
@@ -883,7 +935,7 @@ static int trace_ctxwake_raw(struct trace_iterator *iter, char S)
883 trace_assign_type(field, iter->ent); 935 trace_assign_type(field, iter->ent);
884 936
885 if (!S) 937 if (!S)
886 task_state_char(field->prev_state); 938 S = task_state_char(field->prev_state);
887 T = task_state_char(field->next_state); 939 T = task_state_char(field->next_state);
888 if (!trace_seq_printf(&iter->seq, "%d %d %c %d %d %d %c\n", 940 if (!trace_seq_printf(&iter->seq, "%d %d %c %d %d %d %c\n",
889 field->prev_pid, 941 field->prev_pid,
@@ -918,7 +970,7 @@ static int trace_ctxwake_hex(struct trace_iterator *iter, char S)
918 trace_assign_type(field, iter->ent); 970 trace_assign_type(field, iter->ent);
919 971
920 if (!S) 972 if (!S)
921 task_state_char(field->prev_state); 973 S = task_state_char(field->prev_state);
922 T = task_state_char(field->next_state); 974 T = task_state_char(field->next_state);
923 975
924 SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid); 976 SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index d2cdbabb4ead..dc98309e839a 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -17,6 +17,7 @@ static inline int trace_valid_entry(struct trace_entry *entry)
17 case TRACE_GRAPH_ENT: 17 case TRACE_GRAPH_ENT:
18 case TRACE_GRAPH_RET: 18 case TRACE_GRAPH_RET:
19 case TRACE_HW_BRANCHES: 19 case TRACE_HW_BRANCHES:
20 case TRACE_KSYM:
20 return 1; 21 return 1;
21 } 22 }
22 return 0; 23 return 0;
@@ -808,3 +809,57 @@ trace_selftest_startup_hw_branches(struct tracer *trace,
808 return ret; 809 return ret;
809} 810}
810#endif /* CONFIG_HW_BRANCH_TRACER */ 811#endif /* CONFIG_HW_BRANCH_TRACER */
812
813#ifdef CONFIG_KSYM_TRACER
814static int ksym_selftest_dummy;
815
816int
817trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
818{
819 unsigned long count;
820 int ret;
821
822 /* start the tracing */
823 ret = tracer_init(trace, tr);
824 if (ret) {
825 warn_failed_init_tracer(trace, ret);
826 return ret;
827 }
828
829 ksym_selftest_dummy = 0;
830 /* Register the read-write tracing request */
831
832 ret = process_new_ksym_entry("ksym_selftest_dummy",
833 HW_BREAKPOINT_R | HW_BREAKPOINT_W,
834 (unsigned long)(&ksym_selftest_dummy));
835
836 if (ret < 0) {
837 printk(KERN_CONT "ksym_trace read-write startup test failed\n");
838 goto ret_path;
839 }
840 /* Perform a read and a write operation over the dummy variable to
841 * trigger the tracer
842 */
843 if (ksym_selftest_dummy == 0)
844 ksym_selftest_dummy++;
845
846 /* stop the tracing. */
847 tracing_stop();
848 /* check the trace buffer */
849 ret = trace_test_buffer(tr, &count);
850 trace->reset(tr);
851 tracing_start();
852
853 /* read & write operations - one each is performed on the dummy variable
854 * triggering two entries in the trace buffer
855 */
856 if (!ret && count != 2) {
857 printk(KERN_CONT "Ksym tracer startup test failed");
858 ret = -1;
859 }
860
861ret_path:
862 return ret;
863}
864#endif /* CONFIG_KSYM_TRACER */
865
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 9fbce6c9d2e1..57501d90096a 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -14,6 +14,43 @@ static int sys_refcount_exit;
14static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls); 14static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
15static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); 15static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
16 16
17extern unsigned long __start_syscalls_metadata[];
18extern unsigned long __stop_syscalls_metadata[];
19
20static struct syscall_metadata **syscalls_metadata;
21
22static struct syscall_metadata *find_syscall_meta(unsigned long syscall)
23{
24 struct syscall_metadata *start;
25 struct syscall_metadata *stop;
26 char str[KSYM_SYMBOL_LEN];
27
28
29 start = (struct syscall_metadata *)__start_syscalls_metadata;
30 stop = (struct syscall_metadata *)__stop_syscalls_metadata;
31 kallsyms_lookup(syscall, NULL, NULL, NULL, str);
32
33 for ( ; start < stop; start++) {
34 /*
35 * Only compare after the "sys" prefix. Archs that use
36 * syscall wrappers may have syscalls symbols aliases prefixed
37 * with "SyS" instead of "sys", leading to an unwanted
38 * mismatch.
39 */
40 if (start->name && !strcmp(start->name + 3, str + 3))
41 return start;
42 }
43 return NULL;
44}
45
46static struct syscall_metadata *syscall_nr_to_meta(int nr)
47{
48 if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
49 return NULL;
50
51 return syscalls_metadata[nr];
52}
53
17enum print_line_t 54enum print_line_t
18print_syscall_enter(struct trace_iterator *iter, int flags) 55print_syscall_enter(struct trace_iterator *iter, int flags)
19{ 56{
@@ -30,7 +67,7 @@ print_syscall_enter(struct trace_iterator *iter, int flags)
30 if (!entry) 67 if (!entry)
31 goto end; 68 goto end;
32 69
33 if (entry->enter_id != ent->type) { 70 if (entry->enter_event->id != ent->type) {
34 WARN_ON_ONCE(1); 71 WARN_ON_ONCE(1);
35 goto end; 72 goto end;
36 } 73 }
@@ -85,7 +122,7 @@ print_syscall_exit(struct trace_iterator *iter, int flags)
85 return TRACE_TYPE_HANDLED; 122 return TRACE_TYPE_HANDLED;
86 } 123 }
87 124
88 if (entry->exit_id != ent->type) { 125 if (entry->exit_event->id != ent->type) {
89 WARN_ON_ONCE(1); 126 WARN_ON_ONCE(1);
90 return TRACE_TYPE_UNHANDLED; 127 return TRACE_TYPE_UNHANDLED;
91 } 128 }
@@ -103,24 +140,19 @@ extern char *__bad_type_size(void);
103#define SYSCALL_FIELD(type, name) \ 140#define SYSCALL_FIELD(type, name) \
104 sizeof(type) != sizeof(trace.name) ? \ 141 sizeof(type) != sizeof(trace.name) ? \
105 __bad_type_size() : \ 142 __bad_type_size() : \
106 #type, #name, offsetof(typeof(trace), name), sizeof(trace.name) 143 #type, #name, offsetof(typeof(trace), name), \
144 sizeof(trace.name), is_signed_type(type)
107 145
108int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) 146int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
109{ 147{
110 int i; 148 int i;
111 int nr;
112 int ret; 149 int ret;
113 struct syscall_metadata *entry; 150 struct syscall_metadata *entry = call->data;
114 struct syscall_trace_enter trace; 151 struct syscall_trace_enter trace;
115 int offset = offsetof(struct syscall_trace_enter, args); 152 int offset = offsetof(struct syscall_trace_enter, args);
116 153
117 nr = syscall_name_to_nr(call->data); 154 ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
118 entry = syscall_nr_to_meta(nr); 155 "\tsigned:%u;\n",
119
120 if (!entry)
121 return 0;
122
123 ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n",
124 SYSCALL_FIELD(int, nr)); 156 SYSCALL_FIELD(int, nr));
125 if (!ret) 157 if (!ret)
126 return 0; 158 return 0;
@@ -130,8 +162,10 @@ int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
130 entry->args[i]); 162 entry->args[i]);
131 if (!ret) 163 if (!ret)
132 return 0; 164 return 0;
133 ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;\n", offset, 165 ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;"
134 sizeof(unsigned long)); 166 "\tsigned:%u;\n", offset,
167 sizeof(unsigned long),
168 is_signed_type(unsigned long));
135 if (!ret) 169 if (!ret)
136 return 0; 170 return 0;
137 offset += sizeof(unsigned long); 171 offset += sizeof(unsigned long);
@@ -163,10 +197,12 @@ int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s)
163 struct syscall_trace_exit trace; 197 struct syscall_trace_exit trace;
164 198
165 ret = trace_seq_printf(s, 199 ret = trace_seq_printf(s,
166 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 200 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
167 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n", 201 "\tsigned:%u;\n"
202 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
203 "\tsigned:%u;\n",
168 SYSCALL_FIELD(int, nr), 204 SYSCALL_FIELD(int, nr),
169 SYSCALL_FIELD(unsigned long, ret)); 205 SYSCALL_FIELD(long, ret));
170 if (!ret) 206 if (!ret)
171 return 0; 207 return 0;
172 208
@@ -176,22 +212,19 @@ int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s)
176int syscall_enter_define_fields(struct ftrace_event_call *call) 212int syscall_enter_define_fields(struct ftrace_event_call *call)
177{ 213{
178 struct syscall_trace_enter trace; 214 struct syscall_trace_enter trace;
179 struct syscall_metadata *meta; 215 struct syscall_metadata *meta = call->data;
180 int ret; 216 int ret;
181 int nr;
182 int i; 217 int i;
183 int offset = offsetof(typeof(trace), args); 218 int offset = offsetof(typeof(trace), args);
184 219
185 nr = syscall_name_to_nr(call->data);
186 meta = syscall_nr_to_meta(nr);
187
188 if (!meta)
189 return 0;
190
191 ret = trace_define_common_fields(call); 220 ret = trace_define_common_fields(call);
192 if (ret) 221 if (ret)
193 return ret; 222 return ret;
194 223
224 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
225 if (ret)
226 return ret;
227
195 for (i = 0; i < meta->nb_args; i++) { 228 for (i = 0; i < meta->nb_args; i++) {
196 ret = trace_define_field(call, meta->types[i], 229 ret = trace_define_field(call, meta->types[i],
197 meta->args[i], offset, 230 meta->args[i], offset,
@@ -212,7 +245,11 @@ int syscall_exit_define_fields(struct ftrace_event_call *call)
212 if (ret) 245 if (ret)
213 return ret; 246 return ret;
214 247
215 ret = trace_define_field(call, SYSCALL_FIELD(unsigned long, ret), 0, 248 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
249 if (ret)
250 return ret;
251
252 ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
216 FILTER_OTHER); 253 FILTER_OTHER);
217 254
218 return ret; 255 return ret;
@@ -239,8 +276,8 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id)
239 276
240 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; 277 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
241 278
242 event = trace_current_buffer_lock_reserve(&buffer, sys_data->enter_id, 279 event = trace_current_buffer_lock_reserve(&buffer,
243 size, 0, 0); 280 sys_data->enter_event->id, size, 0, 0);
244 if (!event) 281 if (!event)
245 return; 282 return;
246 283
@@ -271,8 +308,8 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret)
271 if (!sys_data) 308 if (!sys_data)
272 return; 309 return;
273 310
274 event = trace_current_buffer_lock_reserve(&buffer, sys_data->exit_id, 311 event = trace_current_buffer_lock_reserve(&buffer,
275 sizeof(*entry), 0, 0); 312 sys_data->exit_event->id, sizeof(*entry), 0, 0);
276 if (!event) 313 if (!event)
277 return; 314 return;
278 315
@@ -285,14 +322,12 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret)
285 trace_current_buffer_unlock_commit(buffer, event, 0, 0); 322 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
286} 323}
287 324
288int reg_event_syscall_enter(void *ptr) 325int reg_event_syscall_enter(struct ftrace_event_call *call)
289{ 326{
290 int ret = 0; 327 int ret = 0;
291 int num; 328 int num;
292 char *name;
293 329
294 name = (char *)ptr; 330 num = ((struct syscall_metadata *)call->data)->syscall_nr;
295 num = syscall_name_to_nr(name);
296 if (num < 0 || num >= NR_syscalls) 331 if (num < 0 || num >= NR_syscalls)
297 return -ENOSYS; 332 return -ENOSYS;
298 mutex_lock(&syscall_trace_lock); 333 mutex_lock(&syscall_trace_lock);
@@ -309,13 +344,11 @@ int reg_event_syscall_enter(void *ptr)
309 return ret; 344 return ret;
310} 345}
311 346
312void unreg_event_syscall_enter(void *ptr) 347void unreg_event_syscall_enter(struct ftrace_event_call *call)
313{ 348{
314 int num; 349 int num;
315 char *name;
316 350
317 name = (char *)ptr; 351 num = ((struct syscall_metadata *)call->data)->syscall_nr;
318 num = syscall_name_to_nr(name);
319 if (num < 0 || num >= NR_syscalls) 352 if (num < 0 || num >= NR_syscalls)
320 return; 353 return;
321 mutex_lock(&syscall_trace_lock); 354 mutex_lock(&syscall_trace_lock);
@@ -326,14 +359,12 @@ void unreg_event_syscall_enter(void *ptr)
326 mutex_unlock(&syscall_trace_lock); 359 mutex_unlock(&syscall_trace_lock);
327} 360}
328 361
329int reg_event_syscall_exit(void *ptr) 362int reg_event_syscall_exit(struct ftrace_event_call *call)
330{ 363{
331 int ret = 0; 364 int ret = 0;
332 int num; 365 int num;
333 char *name;
334 366
335 name = (char *)ptr; 367 num = ((struct syscall_metadata *)call->data)->syscall_nr;
336 num = syscall_name_to_nr(name);
337 if (num < 0 || num >= NR_syscalls) 368 if (num < 0 || num >= NR_syscalls)
338 return -ENOSYS; 369 return -ENOSYS;
339 mutex_lock(&syscall_trace_lock); 370 mutex_lock(&syscall_trace_lock);
@@ -350,13 +381,11 @@ int reg_event_syscall_exit(void *ptr)
350 return ret; 381 return ret;
351} 382}
352 383
353void unreg_event_syscall_exit(void *ptr) 384void unreg_event_syscall_exit(struct ftrace_event_call *call)
354{ 385{
355 int num; 386 int num;
356 char *name;
357 387
358 name = (char *)ptr; 388 num = ((struct syscall_metadata *)call->data)->syscall_nr;
359 num = syscall_name_to_nr(name);
360 if (num < 0 || num >= NR_syscalls) 389 if (num < 0 || num >= NR_syscalls)
361 return; 390 return;
362 mutex_lock(&syscall_trace_lock); 391 mutex_lock(&syscall_trace_lock);
@@ -367,13 +396,44 @@ void unreg_event_syscall_exit(void *ptr)
367 mutex_unlock(&syscall_trace_lock); 396 mutex_unlock(&syscall_trace_lock);
368} 397}
369 398
370struct trace_event event_syscall_enter = { 399int init_syscall_trace(struct ftrace_event_call *call)
371 .trace = print_syscall_enter, 400{
372}; 401 int id;
402
403 id = register_ftrace_event(call->event);
404 if (!id)
405 return -ENODEV;
406 call->id = id;
407 INIT_LIST_HEAD(&call->fields);
408 return 0;
409}
410
411int __init init_ftrace_syscalls(void)
412{
413 struct syscall_metadata *meta;
414 unsigned long addr;
415 int i;
416
417 syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
418 NR_syscalls, GFP_KERNEL);
419 if (!syscalls_metadata) {
420 WARN_ON(1);
421 return -ENOMEM;
422 }
423
424 for (i = 0; i < NR_syscalls; i++) {
425 addr = arch_syscall_addr(i);
426 meta = find_syscall_meta(addr);
427 if (!meta)
428 continue;
429
430 meta->syscall_nr = i;
431 syscalls_metadata[i] = meta;
432 }
373 433
374struct trace_event event_syscall_exit = { 434 return 0;
375 .trace = print_syscall_exit, 435}
376}; 436core_initcall(init_ftrace_syscalls);
377 437
378#ifdef CONFIG_EVENT_PROFILE 438#ifdef CONFIG_EVENT_PROFILE
379 439
@@ -387,8 +447,10 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
387 struct syscall_metadata *sys_data; 447 struct syscall_metadata *sys_data;
388 struct syscall_trace_enter *rec; 448 struct syscall_trace_enter *rec;
389 unsigned long flags; 449 unsigned long flags;
450 char *trace_buf;
390 char *raw_data; 451 char *raw_data;
391 int syscall_nr; 452 int syscall_nr;
453 int rctx;
392 int size; 454 int size;
393 int cpu; 455 int cpu;
394 456
@@ -412,41 +474,42 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
412 /* Protect the per cpu buffer, begin the rcu read side */ 474 /* Protect the per cpu buffer, begin the rcu read side */
413 local_irq_save(flags); 475 local_irq_save(flags);
414 476
477 rctx = perf_swevent_get_recursion_context();
478 if (rctx < 0)
479 goto end_recursion;
480
415 cpu = smp_processor_id(); 481 cpu = smp_processor_id();
416 482
417 if (in_nmi()) 483 trace_buf = rcu_dereference(perf_trace_buf);
418 raw_data = rcu_dereference(trace_profile_buf_nmi);
419 else
420 raw_data = rcu_dereference(trace_profile_buf);
421 484
422 if (!raw_data) 485 if (!trace_buf)
423 goto end; 486 goto end;
424 487
425 raw_data = per_cpu_ptr(raw_data, cpu); 488 raw_data = per_cpu_ptr(trace_buf, cpu);
426 489
427 /* zero the dead bytes from align to not leak stack to user */ 490 /* zero the dead bytes from align to not leak stack to user */
428 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; 491 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
429 492
430 rec = (struct syscall_trace_enter *) raw_data; 493 rec = (struct syscall_trace_enter *) raw_data;
431 tracing_generic_entry_update(&rec->ent, 0, 0); 494 tracing_generic_entry_update(&rec->ent, 0, 0);
432 rec->ent.type = sys_data->enter_id; 495 rec->ent.type = sys_data->enter_event->id;
433 rec->nr = syscall_nr; 496 rec->nr = syscall_nr;
434 syscall_get_arguments(current, regs, 0, sys_data->nb_args, 497 syscall_get_arguments(current, regs, 0, sys_data->nb_args,
435 (unsigned long *)&rec->args); 498 (unsigned long *)&rec->args);
436 perf_tp_event(sys_data->enter_id, 0, 1, rec, size); 499 perf_tp_event(sys_data->enter_event->id, 0, 1, rec, size);
437 500
438end: 501end:
502 perf_swevent_put_recursion_context(rctx);
503end_recursion:
439 local_irq_restore(flags); 504 local_irq_restore(flags);
440} 505}
441 506
442int reg_prof_syscall_enter(char *name) 507int prof_sysenter_enable(struct ftrace_event_call *call)
443{ 508{
444 int ret = 0; 509 int ret = 0;
445 int num; 510 int num;
446 511
447 num = syscall_name_to_nr(name); 512 num = ((struct syscall_metadata *)call->data)->syscall_nr;
448 if (num < 0 || num >= NR_syscalls)
449 return -ENOSYS;
450 513
451 mutex_lock(&syscall_trace_lock); 514 mutex_lock(&syscall_trace_lock);
452 if (!sys_prof_refcount_enter) 515 if (!sys_prof_refcount_enter)
@@ -462,13 +525,11 @@ int reg_prof_syscall_enter(char *name)
462 return ret; 525 return ret;
463} 526}
464 527
465void unreg_prof_syscall_enter(char *name) 528void prof_sysenter_disable(struct ftrace_event_call *call)
466{ 529{
467 int num; 530 int num;
468 531
469 num = syscall_name_to_nr(name); 532 num = ((struct syscall_metadata *)call->data)->syscall_nr;
470 if (num < 0 || num >= NR_syscalls)
471 return;
472 533
473 mutex_lock(&syscall_trace_lock); 534 mutex_lock(&syscall_trace_lock);
474 sys_prof_refcount_enter--; 535 sys_prof_refcount_enter--;
@@ -484,7 +545,9 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
484 struct syscall_trace_exit *rec; 545 struct syscall_trace_exit *rec;
485 unsigned long flags; 546 unsigned long flags;
486 int syscall_nr; 547 int syscall_nr;
548 char *trace_buf;
487 char *raw_data; 549 char *raw_data;
550 int rctx;
488 int size; 551 int size;
489 int cpu; 552 int cpu;
490 553
@@ -510,17 +573,19 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
510 573
511 /* Protect the per cpu buffer, begin the rcu read side */ 574 /* Protect the per cpu buffer, begin the rcu read side */
512 local_irq_save(flags); 575 local_irq_save(flags);
576
577 rctx = perf_swevent_get_recursion_context();
578 if (rctx < 0)
579 goto end_recursion;
580
513 cpu = smp_processor_id(); 581 cpu = smp_processor_id();
514 582
515 if (in_nmi()) 583 trace_buf = rcu_dereference(perf_trace_buf);
516 raw_data = rcu_dereference(trace_profile_buf_nmi);
517 else
518 raw_data = rcu_dereference(trace_profile_buf);
519 584
520 if (!raw_data) 585 if (!trace_buf)
521 goto end; 586 goto end;
522 587
523 raw_data = per_cpu_ptr(raw_data, cpu); 588 raw_data = per_cpu_ptr(trace_buf, cpu);
524 589
525 /* zero the dead bytes from align to not leak stack to user */ 590 /* zero the dead bytes from align to not leak stack to user */
526 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; 591 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
@@ -528,24 +593,24 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
528 rec = (struct syscall_trace_exit *)raw_data; 593 rec = (struct syscall_trace_exit *)raw_data;
529 594
530 tracing_generic_entry_update(&rec->ent, 0, 0); 595 tracing_generic_entry_update(&rec->ent, 0, 0);
531 rec->ent.type = sys_data->exit_id; 596 rec->ent.type = sys_data->exit_event->id;
532 rec->nr = syscall_nr; 597 rec->nr = syscall_nr;
533 rec->ret = syscall_get_return_value(current, regs); 598 rec->ret = syscall_get_return_value(current, regs);
534 599
535 perf_tp_event(sys_data->exit_id, 0, 1, rec, size); 600 perf_tp_event(sys_data->exit_event->id, 0, 1, rec, size);
536 601
537end: 602end:
603 perf_swevent_put_recursion_context(rctx);
604end_recursion:
538 local_irq_restore(flags); 605 local_irq_restore(flags);
539} 606}
540 607
541int reg_prof_syscall_exit(char *name) 608int prof_sysexit_enable(struct ftrace_event_call *call)
542{ 609{
543 int ret = 0; 610 int ret = 0;
544 int num; 611 int num;
545 612
546 num = syscall_name_to_nr(name); 613 num = ((struct syscall_metadata *)call->data)->syscall_nr;
547 if (num < 0 || num >= NR_syscalls)
548 return -ENOSYS;
549 614
550 mutex_lock(&syscall_trace_lock); 615 mutex_lock(&syscall_trace_lock);
551 if (!sys_prof_refcount_exit) 616 if (!sys_prof_refcount_exit)
@@ -561,13 +626,11 @@ int reg_prof_syscall_exit(char *name)
561 return ret; 626 return ret;
562} 627}
563 628
564void unreg_prof_syscall_exit(char *name) 629void prof_sysexit_disable(struct ftrace_event_call *call)
565{ 630{
566 int num; 631 int num;
567 632
568 num = syscall_name_to_nr(name); 633 num = ((struct syscall_metadata *)call->data)->syscall_nr;
569 if (num < 0 || num >= NR_syscalls)
570 return;
571 634
572 mutex_lock(&syscall_trace_lock); 635 mutex_lock(&syscall_trace_lock);
573 sys_prof_refcount_exit--; 636 sys_prof_refcount_exit--;