aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig38
-rw-r--r--kernel/trace/Makefile2
-rw-r--r--kernel/trace/ftrace.c405
-rw-r--r--kernel/trace/power-traces.c2
-rw-r--r--kernel/trace/ring_buffer.c69
-rw-r--r--kernel/trace/ring_buffer_benchmark.c85
-rw-r--r--kernel/trace/trace.c316
-rw-r--r--kernel/trace/trace.h107
-rw-r--r--kernel/trace/trace_clock.c16
-rw-r--r--kernel/trace/trace_entries.h16
-rw-r--r--kernel/trace/trace_event_profile.c49
-rw-r--r--kernel/trace/trace_events.c228
-rw-r--r--kernel/trace/trace_events_filter.c423
-rw-r--r--kernel/trace/trace_export.c47
-rw-r--r--kernel/trace/trace_functions_graph.c169
-rw-r--r--kernel/trace/trace_hw_branches.c51
-rw-r--r--kernel/trace/trace_irqsoff.c2
-rw-r--r--kernel/trace/trace_kprobe.c1533
-rw-r--r--kernel/trace/trace_ksym.c545
-rw-r--r--kernel/trace/trace_output.c75
-rw-r--r--kernel/trace/trace_sched_wakeup.c16
-rw-r--r--kernel/trace/trace_selftest.c59
-rw-r--r--kernel/trace/trace_stack.c16
-rw-r--r--kernel/trace/trace_syscalls.c235
24 files changed, 3671 insertions, 833 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index b416512ad17f..d006554888dc 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -339,6 +339,27 @@ config POWER_TRACER
339 power management decisions, specifically the C-state and P-state 339 power management decisions, specifically the C-state and P-state
340 behavior. 340 behavior.
341 341
342config KSYM_TRACER
343 bool "Trace read and write access on kernel memory locations"
344 depends on HAVE_HW_BREAKPOINT
345 select TRACING
346 help
347 This tracer helps find read and write operations on any given kernel
348 symbol i.e. /proc/kallsyms.
349
350config PROFILE_KSYM_TRACER
351 bool "Profile all kernel memory accesses on 'watched' variables"
352 depends on KSYM_TRACER
353 help
354 This tracer profiles kernel accesses on variables watched through the
355 ksym tracer ftrace plugin. Depending upon the hardware, all read
356 and write operations on kernel variables can be monitored for
357 accesses.
358
359 The results will be displayed in:
360 /debugfs/tracing/profile_ksym
361
362 Say N if unsure.
342 363
343config STACK_TRACER 364config STACK_TRACER
344 bool "Trace max stack" 365 bool "Trace max stack"
@@ -428,6 +449,23 @@ config BLK_DEV_IO_TRACE
428 449
429 If unsure, say N. 450 If unsure, say N.
430 451
452config KPROBE_EVENT
453 depends on KPROBES
454 depends on X86
455 bool "Enable kprobes-based dynamic events"
456 select TRACING
457 default y
458 help
459 This allows the user to add tracing events (similar to tracepoints) on the fly
460 via the ftrace interface. See Documentation/trace/kprobetrace.txt
461 for more details.
462
463 Those events can be inserted wherever kprobes can probe, and record
464 various register and memory values.
465
466 This option is also required by perf-probe subcommand of perf tools. If
467 you want to use perf tools, this option is strongly recommended.
468
431config DYNAMIC_FTRACE 469config DYNAMIC_FTRACE
432 bool "enable/disable ftrace tracepoints dynamically" 470 bool "enable/disable ftrace tracepoints dynamically"
433 depends on FUNCTION_TRACER 471 depends on FUNCTION_TRACER
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 26f03ac07c2b..cd9ecd89ec77 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -53,6 +53,8 @@ obj-$(CONFIG_EVENT_TRACING) += trace_export.o
53obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o 53obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
54obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o 54obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
55obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o 55obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
56obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
57obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
56obj-$(CONFIG_EVENT_TRACING) += power-traces.o 58obj-$(CONFIG_EVENT_TRACING) += power-traces.o
57 59
58libftrace-y := ftrace.o 60libftrace-y := ftrace.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 6dc4e5ef7a01..7968762c8167 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -60,6 +60,13 @@ static int last_ftrace_enabled;
60/* Quick disabling of function tracer. */ 60/* Quick disabling of function tracer. */
61int function_trace_stop; 61int function_trace_stop;
62 62
63/* List for set_ftrace_pid's pids. */
64LIST_HEAD(ftrace_pids);
65struct ftrace_pid {
66 struct list_head list;
67 struct pid *pid;
68};
69
63/* 70/*
64 * ftrace_disabled is set when an anomaly is discovered. 71 * ftrace_disabled is set when an anomaly is discovered.
65 * ftrace_disabled is much stronger than ftrace_enabled. 72 * ftrace_disabled is much stronger than ftrace_enabled.
@@ -78,6 +85,10 @@ ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
78ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub; 85ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
79ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; 86ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
80 87
88#ifdef CONFIG_FUNCTION_GRAPH_TRACER
89static int ftrace_set_func(unsigned long *array, int *idx, char *buffer);
90#endif
91
81static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) 92static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
82{ 93{
83 struct ftrace_ops *op = ftrace_list; 94 struct ftrace_ops *op = ftrace_list;
@@ -155,7 +166,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
155 else 166 else
156 func = ftrace_list_func; 167 func = ftrace_list_func;
157 168
158 if (ftrace_pid_trace) { 169 if (!list_empty(&ftrace_pids)) {
159 set_ftrace_pid_function(func); 170 set_ftrace_pid_function(func);
160 func = ftrace_pid_func; 171 func = ftrace_pid_func;
161 } 172 }
@@ -203,7 +214,7 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
203 if (ftrace_list->next == &ftrace_list_end) { 214 if (ftrace_list->next == &ftrace_list_end) {
204 ftrace_func_t func = ftrace_list->func; 215 ftrace_func_t func = ftrace_list->func;
205 216
206 if (ftrace_pid_trace) { 217 if (!list_empty(&ftrace_pids)) {
207 set_ftrace_pid_function(func); 218 set_ftrace_pid_function(func);
208 func = ftrace_pid_func; 219 func = ftrace_pid_func;
209 } 220 }
@@ -231,7 +242,7 @@ static void ftrace_update_pid_func(void)
231 func = __ftrace_trace_function; 242 func = __ftrace_trace_function;
232#endif 243#endif
233 244
234 if (ftrace_pid_trace) { 245 if (!list_empty(&ftrace_pids)) {
235 set_ftrace_pid_function(func); 246 set_ftrace_pid_function(func);
236 func = ftrace_pid_func; 247 func = ftrace_pid_func;
237 } else { 248 } else {
@@ -821,8 +832,6 @@ static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
821} 832}
822#endif /* CONFIG_FUNCTION_PROFILER */ 833#endif /* CONFIG_FUNCTION_PROFILER */
823 834
824/* set when tracing only a pid */
825struct pid *ftrace_pid_trace;
826static struct pid * const ftrace_swapper_pid = &init_struct_pid; 835static struct pid * const ftrace_swapper_pid = &init_struct_pid;
827 836
828#ifdef CONFIG_DYNAMIC_FTRACE 837#ifdef CONFIG_DYNAMIC_FTRACE
@@ -1261,12 +1270,34 @@ static int ftrace_update_code(struct module *mod)
1261 ftrace_new_addrs = p->newlist; 1270 ftrace_new_addrs = p->newlist;
1262 p->flags = 0L; 1271 p->flags = 0L;
1263 1272
1264 /* convert record (i.e, patch mcount-call with NOP) */ 1273 /*
1265 if (ftrace_code_disable(mod, p)) { 1274 * Do the initial record convertion from mcount jump
1266 p->flags |= FTRACE_FL_CONVERTED; 1275 * to the NOP instructions.
1267 ftrace_update_cnt++; 1276 */
1268 } else 1277 if (!ftrace_code_disable(mod, p)) {
1269 ftrace_free_rec(p); 1278 ftrace_free_rec(p);
1279 continue;
1280 }
1281
1282 p->flags |= FTRACE_FL_CONVERTED;
1283 ftrace_update_cnt++;
1284
1285 /*
1286 * If the tracing is enabled, go ahead and enable the record.
1287 *
1288 * The reason not to enable the record immediatelly is the
1289 * inherent check of ftrace_make_nop/ftrace_make_call for
1290 * correct previous instructions. Making first the NOP
1291 * conversion puts the module to the correct state, thus
1292 * passing the ftrace_make_call check.
1293 */
1294 if (ftrace_start_up) {
1295 int failed = __ftrace_replace_code(p, 1);
1296 if (failed) {
1297 ftrace_bug(failed, p->ip);
1298 ftrace_free_rec(p);
1299 }
1300 }
1270 } 1301 }
1271 1302
1272 stop = ftrace_now(raw_smp_processor_id()); 1303 stop = ftrace_now(raw_smp_processor_id());
@@ -1656,60 +1687,6 @@ ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
1656 return ret; 1687 return ret;
1657} 1688}
1658 1689
1659enum {
1660 MATCH_FULL,
1661 MATCH_FRONT_ONLY,
1662 MATCH_MIDDLE_ONLY,
1663 MATCH_END_ONLY,
1664};
1665
1666/*
1667 * (static function - no need for kernel doc)
1668 *
1669 * Pass in a buffer containing a glob and this function will
1670 * set search to point to the search part of the buffer and
1671 * return the type of search it is (see enum above).
1672 * This does modify buff.
1673 *
1674 * Returns enum type.
1675 * search returns the pointer to use for comparison.
1676 * not returns 1 if buff started with a '!'
1677 * 0 otherwise.
1678 */
1679static int
1680ftrace_setup_glob(char *buff, int len, char **search, int *not)
1681{
1682 int type = MATCH_FULL;
1683 int i;
1684
1685 if (buff[0] == '!') {
1686 *not = 1;
1687 buff++;
1688 len--;
1689 } else
1690 *not = 0;
1691
1692 *search = buff;
1693
1694 for (i = 0; i < len; i++) {
1695 if (buff[i] == '*') {
1696 if (!i) {
1697 *search = buff + 1;
1698 type = MATCH_END_ONLY;
1699 } else {
1700 if (type == MATCH_END_ONLY)
1701 type = MATCH_MIDDLE_ONLY;
1702 else
1703 type = MATCH_FRONT_ONLY;
1704 buff[i] = 0;
1705 break;
1706 }
1707 }
1708 }
1709
1710 return type;
1711}
1712
1713static int ftrace_match(char *str, char *regex, int len, int type) 1690static int ftrace_match(char *str, char *regex, int len, int type)
1714{ 1691{
1715 int matched = 0; 1692 int matched = 0;
@@ -1747,7 +1724,7 @@ ftrace_match_record(struct dyn_ftrace *rec, char *regex, int len, int type)
1747 return ftrace_match(str, regex, len, type); 1724 return ftrace_match(str, regex, len, type);
1748} 1725}
1749 1726
1750static void ftrace_match_records(char *buff, int len, int enable) 1727static int ftrace_match_records(char *buff, int len, int enable)
1751{ 1728{
1752 unsigned int search_len; 1729 unsigned int search_len;
1753 struct ftrace_page *pg; 1730 struct ftrace_page *pg;
@@ -1756,9 +1733,10 @@ static void ftrace_match_records(char *buff, int len, int enable)
1756 char *search; 1733 char *search;
1757 int type; 1734 int type;
1758 int not; 1735 int not;
1736 int found = 0;
1759 1737
1760 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; 1738 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
1761 type = ftrace_setup_glob(buff, len, &search, &not); 1739 type = filter_parse_regex(buff, len, &search, &not);
1762 1740
1763 search_len = strlen(search); 1741 search_len = strlen(search);
1764 1742
@@ -1773,6 +1751,7 @@ static void ftrace_match_records(char *buff, int len, int enable)
1773 rec->flags &= ~flag; 1751 rec->flags &= ~flag;
1774 else 1752 else
1775 rec->flags |= flag; 1753 rec->flags |= flag;
1754 found = 1;
1776 } 1755 }
1777 /* 1756 /*
1778 * Only enable filtering if we have a function that 1757 * Only enable filtering if we have a function that
@@ -1782,6 +1761,8 @@ static void ftrace_match_records(char *buff, int len, int enable)
1782 ftrace_filtered = 1; 1761 ftrace_filtered = 1;
1783 } while_for_each_ftrace_rec(); 1762 } while_for_each_ftrace_rec();
1784 mutex_unlock(&ftrace_lock); 1763 mutex_unlock(&ftrace_lock);
1764
1765 return found;
1785} 1766}
1786 1767
1787static int 1768static int
@@ -1803,7 +1784,7 @@ ftrace_match_module_record(struct dyn_ftrace *rec, char *mod,
1803 return 1; 1784 return 1;
1804} 1785}
1805 1786
1806static void ftrace_match_module_records(char *buff, char *mod, int enable) 1787static int ftrace_match_module_records(char *buff, char *mod, int enable)
1807{ 1788{
1808 unsigned search_len = 0; 1789 unsigned search_len = 0;
1809 struct ftrace_page *pg; 1790 struct ftrace_page *pg;
@@ -1812,6 +1793,7 @@ static void ftrace_match_module_records(char *buff, char *mod, int enable)
1812 char *search = buff; 1793 char *search = buff;
1813 unsigned long flag; 1794 unsigned long flag;
1814 int not = 0; 1795 int not = 0;
1796 int found = 0;
1815 1797
1816 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; 1798 flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
1817 1799
@@ -1826,7 +1808,7 @@ static void ftrace_match_module_records(char *buff, char *mod, int enable)
1826 } 1808 }
1827 1809
1828 if (strlen(buff)) { 1810 if (strlen(buff)) {
1829 type = ftrace_setup_glob(buff, strlen(buff), &search, &not); 1811 type = filter_parse_regex(buff, strlen(buff), &search, &not);
1830 search_len = strlen(search); 1812 search_len = strlen(search);
1831 } 1813 }
1832 1814
@@ -1842,12 +1824,15 @@ static void ftrace_match_module_records(char *buff, char *mod, int enable)
1842 rec->flags &= ~flag; 1824 rec->flags &= ~flag;
1843 else 1825 else
1844 rec->flags |= flag; 1826 rec->flags |= flag;
1827 found = 1;
1845 } 1828 }
1846 if (enable && (rec->flags & FTRACE_FL_FILTER)) 1829 if (enable && (rec->flags & FTRACE_FL_FILTER))
1847 ftrace_filtered = 1; 1830 ftrace_filtered = 1;
1848 1831
1849 } while_for_each_ftrace_rec(); 1832 } while_for_each_ftrace_rec();
1850 mutex_unlock(&ftrace_lock); 1833 mutex_unlock(&ftrace_lock);
1834
1835 return found;
1851} 1836}
1852 1837
1853/* 1838/*
@@ -1876,8 +1861,9 @@ ftrace_mod_callback(char *func, char *cmd, char *param, int enable)
1876 if (!strlen(mod)) 1861 if (!strlen(mod))
1877 return -EINVAL; 1862 return -EINVAL;
1878 1863
1879 ftrace_match_module_records(func, mod, enable); 1864 if (ftrace_match_module_records(func, mod, enable))
1880 return 0; 1865 return 0;
1866 return -EINVAL;
1881} 1867}
1882 1868
1883static struct ftrace_func_command ftrace_mod_cmd = { 1869static struct ftrace_func_command ftrace_mod_cmd = {
@@ -1991,7 +1977,7 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
1991 int count = 0; 1977 int count = 0;
1992 char *search; 1978 char *search;
1993 1979
1994 type = ftrace_setup_glob(glob, strlen(glob), &search, &not); 1980 type = filter_parse_regex(glob, strlen(glob), &search, &not);
1995 len = strlen(search); 1981 len = strlen(search);
1996 1982
1997 /* we do not support '!' for function probes */ 1983 /* we do not support '!' for function probes */
@@ -2068,7 +2054,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
2068 else if (glob) { 2054 else if (glob) {
2069 int not; 2055 int not;
2070 2056
2071 type = ftrace_setup_glob(glob, strlen(glob), &search, &not); 2057 type = filter_parse_regex(glob, strlen(glob), &search, &not);
2072 len = strlen(search); 2058 len = strlen(search);
2073 2059
2074 /* we do not support '!' for function probes */ 2060 /* we do not support '!' for function probes */
@@ -2174,8 +2160,9 @@ static int ftrace_process_regex(char *buff, int len, int enable)
2174 func = strsep(&next, ":"); 2160 func = strsep(&next, ":");
2175 2161
2176 if (!next) { 2162 if (!next) {
2177 ftrace_match_records(func, len, enable); 2163 if (ftrace_match_records(func, len, enable))
2178 return 0; 2164 return 0;
2165 return ret;
2179 } 2166 }
2180 2167
2181 /* command found */ 2168 /* command found */
@@ -2221,10 +2208,9 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
2221 !trace_parser_cont(parser)) { 2208 !trace_parser_cont(parser)) {
2222 ret = ftrace_process_regex(parser->buffer, 2209 ret = ftrace_process_regex(parser->buffer,
2223 parser->idx, enable); 2210 parser->idx, enable);
2211 trace_parser_clear(parser);
2224 if (ret) 2212 if (ret)
2225 goto out_unlock; 2213 goto out_unlock;
2226
2227 trace_parser_clear(parser);
2228 } 2214 }
2229 2215
2230 ret = read; 2216 ret = read;
@@ -2312,6 +2298,32 @@ static int __init set_ftrace_filter(char *str)
2312} 2298}
2313__setup("ftrace_filter=", set_ftrace_filter); 2299__setup("ftrace_filter=", set_ftrace_filter);
2314 2300
2301#ifdef CONFIG_FUNCTION_GRAPH_TRACER
2302static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata;
2303static int __init set_graph_function(char *str)
2304{
2305 strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE);
2306 return 1;
2307}
2308__setup("ftrace_graph_filter=", set_graph_function);
2309
2310static void __init set_ftrace_early_graph(char *buf)
2311{
2312 int ret;
2313 char *func;
2314
2315 while (buf) {
2316 func = strsep(&buf, ",");
2317 /* we allow only one expression at a time */
2318 ret = ftrace_set_func(ftrace_graph_funcs, &ftrace_graph_count,
2319 func);
2320 if (ret)
2321 printk(KERN_DEBUG "ftrace: function %s not "
2322 "traceable\n", func);
2323 }
2324}
2325#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
2326
2315static void __init set_ftrace_early_filter(char *buf, int enable) 2327static void __init set_ftrace_early_filter(char *buf, int enable)
2316{ 2328{
2317 char *func; 2329 char *func;
@@ -2328,6 +2340,10 @@ static void __init set_ftrace_early_filters(void)
2328 set_ftrace_early_filter(ftrace_filter_buf, 1); 2340 set_ftrace_early_filter(ftrace_filter_buf, 1);
2329 if (ftrace_notrace_buf[0]) 2341 if (ftrace_notrace_buf[0])
2330 set_ftrace_early_filter(ftrace_notrace_buf, 0); 2342 set_ftrace_early_filter(ftrace_notrace_buf, 0);
2343#ifdef CONFIG_FUNCTION_GRAPH_TRACER
2344 if (ftrace_graph_buf[0])
2345 set_ftrace_early_graph(ftrace_graph_buf);
2346#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
2331} 2347}
2332 2348
2333static int 2349static int
@@ -2513,7 +2529,7 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
2513 return -ENODEV; 2529 return -ENODEV;
2514 2530
2515 /* decode regex */ 2531 /* decode regex */
2516 type = ftrace_setup_glob(buffer, strlen(buffer), &search, &not); 2532 type = filter_parse_regex(buffer, strlen(buffer), &search, &not);
2517 if (not) 2533 if (not)
2518 return -EINVAL; 2534 return -EINVAL;
2519 2535
@@ -2536,10 +2552,9 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
2536 exists = true; 2552 exists = true;
2537 break; 2553 break;
2538 } 2554 }
2539 if (!exists) { 2555 if (!exists)
2540 array[(*idx)++] = rec->ip; 2556 array[(*idx)++] = rec->ip;
2541 found = 1; 2557 found = 1;
2542 }
2543 } 2558 }
2544 } while_for_each_ftrace_rec(); 2559 } while_for_each_ftrace_rec();
2545 2560
@@ -2624,7 +2639,7 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
2624 return 0; 2639 return 0;
2625} 2640}
2626 2641
2627static int ftrace_convert_nops(struct module *mod, 2642static int ftrace_process_locs(struct module *mod,
2628 unsigned long *start, 2643 unsigned long *start,
2629 unsigned long *end) 2644 unsigned long *end)
2630{ 2645{
@@ -2684,7 +2699,7 @@ static void ftrace_init_module(struct module *mod,
2684{ 2699{
2685 if (ftrace_disabled || start == end) 2700 if (ftrace_disabled || start == end)
2686 return; 2701 return;
2687 ftrace_convert_nops(mod, start, end); 2702 ftrace_process_locs(mod, start, end);
2688} 2703}
2689 2704
2690static int ftrace_module_notify(struct notifier_block *self, 2705static int ftrace_module_notify(struct notifier_block *self,
@@ -2745,7 +2760,7 @@ void __init ftrace_init(void)
2745 2760
2746 last_ftrace_enabled = ftrace_enabled = 1; 2761 last_ftrace_enabled = ftrace_enabled = 1;
2747 2762
2748 ret = ftrace_convert_nops(NULL, 2763 ret = ftrace_process_locs(NULL,
2749 __start_mcount_loc, 2764 __start_mcount_loc,
2750 __stop_mcount_loc); 2765 __stop_mcount_loc);
2751 2766
@@ -2778,23 +2793,6 @@ static inline void ftrace_startup_enable(int command) { }
2778# define ftrace_shutdown_sysctl() do { } while (0) 2793# define ftrace_shutdown_sysctl() do { } while (0)
2779#endif /* CONFIG_DYNAMIC_FTRACE */ 2794#endif /* CONFIG_DYNAMIC_FTRACE */
2780 2795
2781static ssize_t
2782ftrace_pid_read(struct file *file, char __user *ubuf,
2783 size_t cnt, loff_t *ppos)
2784{
2785 char buf[64];
2786 int r;
2787
2788 if (ftrace_pid_trace == ftrace_swapper_pid)
2789 r = sprintf(buf, "swapper tasks\n");
2790 else if (ftrace_pid_trace)
2791 r = sprintf(buf, "%u\n", pid_vnr(ftrace_pid_trace));
2792 else
2793 r = sprintf(buf, "no pid\n");
2794
2795 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2796}
2797
2798static void clear_ftrace_swapper(void) 2796static void clear_ftrace_swapper(void)
2799{ 2797{
2800 struct task_struct *p; 2798 struct task_struct *p;
@@ -2845,14 +2843,12 @@ static void set_ftrace_pid(struct pid *pid)
2845 rcu_read_unlock(); 2843 rcu_read_unlock();
2846} 2844}
2847 2845
2848static void clear_ftrace_pid_task(struct pid **pid) 2846static void clear_ftrace_pid_task(struct pid *pid)
2849{ 2847{
2850 if (*pid == ftrace_swapper_pid) 2848 if (pid == ftrace_swapper_pid)
2851 clear_ftrace_swapper(); 2849 clear_ftrace_swapper();
2852 else 2850 else
2853 clear_ftrace_pid(*pid); 2851 clear_ftrace_pid(pid);
2854
2855 *pid = NULL;
2856} 2852}
2857 2853
2858static void set_ftrace_pid_task(struct pid *pid) 2854static void set_ftrace_pid_task(struct pid *pid)
@@ -2863,74 +2859,184 @@ static void set_ftrace_pid_task(struct pid *pid)
2863 set_ftrace_pid(pid); 2859 set_ftrace_pid(pid);
2864} 2860}
2865 2861
2866static ssize_t 2862static int ftrace_pid_add(int p)
2867ftrace_pid_write(struct file *filp, const char __user *ubuf,
2868 size_t cnt, loff_t *ppos)
2869{ 2863{
2870 struct pid *pid; 2864 struct pid *pid;
2871 char buf[64]; 2865 struct ftrace_pid *fpid;
2872 long val; 2866 int ret = -EINVAL;
2873 int ret;
2874 2867
2875 if (cnt >= sizeof(buf)) 2868 mutex_lock(&ftrace_lock);
2876 return -EINVAL;
2877 2869
2878 if (copy_from_user(&buf, ubuf, cnt)) 2870 if (!p)
2879 return -EFAULT; 2871 pid = ftrace_swapper_pid;
2872 else
2873 pid = find_get_pid(p);
2880 2874
2881 buf[cnt] = 0; 2875 if (!pid)
2876 goto out;
2882 2877
2883 ret = strict_strtol(buf, 10, &val); 2878 ret = 0;
2884 if (ret < 0)
2885 return ret;
2886 2879
2887 mutex_lock(&ftrace_lock); 2880 list_for_each_entry(fpid, &ftrace_pids, list)
2888 if (val < 0) { 2881 if (fpid->pid == pid)
2889 /* disable pid tracing */ 2882 goto out_put;
2890 if (!ftrace_pid_trace)
2891 goto out;
2892 2883
2893 clear_ftrace_pid_task(&ftrace_pid_trace); 2884 ret = -ENOMEM;
2894 2885
2895 } else { 2886 fpid = kmalloc(sizeof(*fpid), GFP_KERNEL);
2896 /* swapper task is special */ 2887 if (!fpid)
2897 if (!val) { 2888 goto out_put;
2898 pid = ftrace_swapper_pid;
2899 if (pid == ftrace_pid_trace)
2900 goto out;
2901 } else {
2902 pid = find_get_pid(val);
2903 2889
2904 if (pid == ftrace_pid_trace) { 2890 list_add(&fpid->list, &ftrace_pids);
2905 put_pid(pid); 2891 fpid->pid = pid;
2906 goto out;
2907 }
2908 }
2909 2892
2910 if (ftrace_pid_trace) 2893 set_ftrace_pid_task(pid);
2911 clear_ftrace_pid_task(&ftrace_pid_trace);
2912 2894
2913 if (!pid) 2895 ftrace_update_pid_func();
2914 goto out; 2896 ftrace_startup_enable(0);
2897
2898 mutex_unlock(&ftrace_lock);
2899 return 0;
2900
2901out_put:
2902 if (pid != ftrace_swapper_pid)
2903 put_pid(pid);
2915 2904
2916 ftrace_pid_trace = pid; 2905out:
2906 mutex_unlock(&ftrace_lock);
2907 return ret;
2908}
2909
2910static void ftrace_pid_reset(void)
2911{
2912 struct ftrace_pid *fpid, *safe;
2917 2913
2918 set_ftrace_pid_task(ftrace_pid_trace); 2914 mutex_lock(&ftrace_lock);
2915 list_for_each_entry_safe(fpid, safe, &ftrace_pids, list) {
2916 struct pid *pid = fpid->pid;
2917
2918 clear_ftrace_pid_task(pid);
2919
2920 list_del(&fpid->list);
2921 kfree(fpid);
2919 } 2922 }
2920 2923
2921 /* update the function call */
2922 ftrace_update_pid_func(); 2924 ftrace_update_pid_func();
2923 ftrace_startup_enable(0); 2925 ftrace_startup_enable(0);
2924 2926
2925 out:
2926 mutex_unlock(&ftrace_lock); 2927 mutex_unlock(&ftrace_lock);
2928}
2927 2929
2928 return cnt; 2930static void *fpid_start(struct seq_file *m, loff_t *pos)
2931{
2932 mutex_lock(&ftrace_lock);
2933
2934 if (list_empty(&ftrace_pids) && (!*pos))
2935 return (void *) 1;
2936
2937 return seq_list_start(&ftrace_pids, *pos);
2938}
2939
2940static void *fpid_next(struct seq_file *m, void *v, loff_t *pos)
2941{
2942 if (v == (void *)1)
2943 return NULL;
2944
2945 return seq_list_next(v, &ftrace_pids, pos);
2946}
2947
2948static void fpid_stop(struct seq_file *m, void *p)
2949{
2950 mutex_unlock(&ftrace_lock);
2951}
2952
2953static int fpid_show(struct seq_file *m, void *v)
2954{
2955 const struct ftrace_pid *fpid = list_entry(v, struct ftrace_pid, list);
2956
2957 if (v == (void *)1) {
2958 seq_printf(m, "no pid\n");
2959 return 0;
2960 }
2961
2962 if (fpid->pid == ftrace_swapper_pid)
2963 seq_printf(m, "swapper tasks\n");
2964 else
2965 seq_printf(m, "%u\n", pid_vnr(fpid->pid));
2966
2967 return 0;
2968}
2969
2970static const struct seq_operations ftrace_pid_sops = {
2971 .start = fpid_start,
2972 .next = fpid_next,
2973 .stop = fpid_stop,
2974 .show = fpid_show,
2975};
2976
2977static int
2978ftrace_pid_open(struct inode *inode, struct file *file)
2979{
2980 int ret = 0;
2981
2982 if ((file->f_mode & FMODE_WRITE) &&
2983 (file->f_flags & O_TRUNC))
2984 ftrace_pid_reset();
2985
2986 if (file->f_mode & FMODE_READ)
2987 ret = seq_open(file, &ftrace_pid_sops);
2988
2989 return ret;
2990}
2991
2992static ssize_t
2993ftrace_pid_write(struct file *filp, const char __user *ubuf,
2994 size_t cnt, loff_t *ppos)
2995{
2996 char buf[64], *tmp;
2997 long val;
2998 int ret;
2999
3000 if (cnt >= sizeof(buf))
3001 return -EINVAL;
3002
3003 if (copy_from_user(&buf, ubuf, cnt))
3004 return -EFAULT;
3005
3006 buf[cnt] = 0;
3007
3008 /*
3009 * Allow "echo > set_ftrace_pid" or "echo -n '' > set_ftrace_pid"
3010 * to clean the filter quietly.
3011 */
3012 tmp = strstrip(buf);
3013 if (strlen(tmp) == 0)
3014 return 1;
3015
3016 ret = strict_strtol(tmp, 10, &val);
3017 if (ret < 0)
3018 return ret;
3019
3020 ret = ftrace_pid_add(val);
3021
3022 return ret ? ret : cnt;
3023}
3024
3025static int
3026ftrace_pid_release(struct inode *inode, struct file *file)
3027{
3028 if (file->f_mode & FMODE_READ)
3029 seq_release(inode, file);
3030
3031 return 0;
2929} 3032}
2930 3033
2931static const struct file_operations ftrace_pid_fops = { 3034static const struct file_operations ftrace_pid_fops = {
2932 .read = ftrace_pid_read, 3035 .open = ftrace_pid_open,
2933 .write = ftrace_pid_write, 3036 .write = ftrace_pid_write,
3037 .read = seq_read,
3038 .llseek = seq_lseek,
3039 .release = ftrace_pid_release,
2934}; 3040};
2935 3041
2936static __init int ftrace_init_debugfs(void) 3042static __init int ftrace_init_debugfs(void)
@@ -3293,4 +3399,3 @@ void ftrace_graph_stop(void)
3293 ftrace_stop(); 3399 ftrace_stop();
3294} 3400}
3295#endif 3401#endif
3296
diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c
index e06c6e3d56a3..9f4f565b01e6 100644
--- a/kernel/trace/power-traces.c
+++ b/kernel/trace/power-traces.c
@@ -14,7 +14,5 @@
14#define CREATE_TRACE_POINTS 14#define CREATE_TRACE_POINTS
15#include <trace/events/power.h> 15#include <trace/events/power.h>
16 16
17EXPORT_TRACEPOINT_SYMBOL_GPL(power_start);
18EXPORT_TRACEPOINT_SYMBOL_GPL(power_end);
19EXPORT_TRACEPOINT_SYMBOL_GPL(power_frequency); 17EXPORT_TRACEPOINT_SYMBOL_GPL(power_frequency);
20 18
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 5dd017fea6f5..2326b04c95c4 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -397,18 +397,21 @@ int ring_buffer_print_page_header(struct trace_seq *s)
397 int ret; 397 int ret;
398 398
399 ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t" 399 ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t"
400 "offset:0;\tsize:%u;\n", 400 "offset:0;\tsize:%u;\tsigned:%u;\n",
401 (unsigned int)sizeof(field.time_stamp)); 401 (unsigned int)sizeof(field.time_stamp),
402 (unsigned int)is_signed_type(u64));
402 403
403 ret = trace_seq_printf(s, "\tfield: local_t commit;\t" 404 ret = trace_seq_printf(s, "\tfield: local_t commit;\t"
404 "offset:%u;\tsize:%u;\n", 405 "offset:%u;\tsize:%u;\tsigned:%u;\n",
405 (unsigned int)offsetof(typeof(field), commit), 406 (unsigned int)offsetof(typeof(field), commit),
406 (unsigned int)sizeof(field.commit)); 407 (unsigned int)sizeof(field.commit),
408 (unsigned int)is_signed_type(long));
407 409
408 ret = trace_seq_printf(s, "\tfield: char data;\t" 410 ret = trace_seq_printf(s, "\tfield: char data;\t"
409 "offset:%u;\tsize:%u;\n", 411 "offset:%u;\tsize:%u;\tsigned:%u;\n",
410 (unsigned int)offsetof(typeof(field), data), 412 (unsigned int)offsetof(typeof(field), data),
411 (unsigned int)BUF_PAGE_SIZE); 413 (unsigned int)BUF_PAGE_SIZE,
414 (unsigned int)is_signed_type(char));
412 415
413 return ret; 416 return ret;
414} 417}
@@ -420,7 +423,7 @@ struct ring_buffer_per_cpu {
420 int cpu; 423 int cpu;
421 struct ring_buffer *buffer; 424 struct ring_buffer *buffer;
422 spinlock_t reader_lock; /* serialize readers */ 425 spinlock_t reader_lock; /* serialize readers */
423 raw_spinlock_t lock; 426 arch_spinlock_t lock;
424 struct lock_class_key lock_key; 427 struct lock_class_key lock_key;
425 struct list_head *pages; 428 struct list_head *pages;
426 struct buffer_page *head_page; /* read from head */ 429 struct buffer_page *head_page; /* read from head */
@@ -995,7 +998,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
995 cpu_buffer->buffer = buffer; 998 cpu_buffer->buffer = buffer;
996 spin_lock_init(&cpu_buffer->reader_lock); 999 spin_lock_init(&cpu_buffer->reader_lock);
997 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key); 1000 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
998 cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 1001 cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
999 1002
1000 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), 1003 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
1001 GFP_KERNEL, cpu_to_node(cpu)); 1004 GFP_KERNEL, cpu_to_node(cpu));
@@ -1190,9 +1193,6 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
1190 struct list_head *p; 1193 struct list_head *p;
1191 unsigned i; 1194 unsigned i;
1192 1195
1193 atomic_inc(&cpu_buffer->record_disabled);
1194 synchronize_sched();
1195
1196 spin_lock_irq(&cpu_buffer->reader_lock); 1196 spin_lock_irq(&cpu_buffer->reader_lock);
1197 rb_head_page_deactivate(cpu_buffer); 1197 rb_head_page_deactivate(cpu_buffer);
1198 1198
@@ -1208,12 +1208,9 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
1208 return; 1208 return;
1209 1209
1210 rb_reset_cpu(cpu_buffer); 1210 rb_reset_cpu(cpu_buffer);
1211 spin_unlock_irq(&cpu_buffer->reader_lock);
1212
1213 rb_check_pages(cpu_buffer); 1211 rb_check_pages(cpu_buffer);
1214 1212
1215 atomic_dec(&cpu_buffer->record_disabled); 1213 spin_unlock_irq(&cpu_buffer->reader_lock);
1216
1217} 1214}
1218 1215
1219static void 1216static void
@@ -1224,9 +1221,6 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
1224 struct list_head *p; 1221 struct list_head *p;
1225 unsigned i; 1222 unsigned i;
1226 1223
1227 atomic_inc(&cpu_buffer->record_disabled);
1228 synchronize_sched();
1229
1230 spin_lock_irq(&cpu_buffer->reader_lock); 1224 spin_lock_irq(&cpu_buffer->reader_lock);
1231 rb_head_page_deactivate(cpu_buffer); 1225 rb_head_page_deactivate(cpu_buffer);
1232 1226
@@ -1239,11 +1233,9 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
1239 list_add_tail(&bpage->list, cpu_buffer->pages); 1233 list_add_tail(&bpage->list, cpu_buffer->pages);
1240 } 1234 }
1241 rb_reset_cpu(cpu_buffer); 1235 rb_reset_cpu(cpu_buffer);
1242 spin_unlock_irq(&cpu_buffer->reader_lock);
1243
1244 rb_check_pages(cpu_buffer); 1236 rb_check_pages(cpu_buffer);
1245 1237
1246 atomic_dec(&cpu_buffer->record_disabled); 1238 spin_unlock_irq(&cpu_buffer->reader_lock);
1247} 1239}
1248 1240
1249/** 1241/**
@@ -1251,11 +1243,6 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
1251 * @buffer: the buffer to resize. 1243 * @buffer: the buffer to resize.
1252 * @size: the new size. 1244 * @size: the new size.
1253 * 1245 *
1254 * The tracer is responsible for making sure that the buffer is
1255 * not being used while changing the size.
1256 * Note: We may be able to change the above requirement by using
1257 * RCU synchronizations.
1258 *
1259 * Minimum size is 2 * BUF_PAGE_SIZE. 1246 * Minimum size is 2 * BUF_PAGE_SIZE.
1260 * 1247 *
1261 * Returns -1 on failure. 1248 * Returns -1 on failure.
@@ -1287,6 +1274,11 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1287 if (size == buffer_size) 1274 if (size == buffer_size)
1288 return size; 1275 return size;
1289 1276
1277 atomic_inc(&buffer->record_disabled);
1278
1279 /* Make sure all writers are done with this buffer. */
1280 synchronize_sched();
1281
1290 mutex_lock(&buffer->mutex); 1282 mutex_lock(&buffer->mutex);
1291 get_online_cpus(); 1283 get_online_cpus();
1292 1284
@@ -1349,6 +1341,8 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1349 put_online_cpus(); 1341 put_online_cpus();
1350 mutex_unlock(&buffer->mutex); 1342 mutex_unlock(&buffer->mutex);
1351 1343
1344 atomic_dec(&buffer->record_disabled);
1345
1352 return size; 1346 return size;
1353 1347
1354 free_pages: 1348 free_pages:
@@ -1358,6 +1352,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1358 } 1352 }
1359 put_online_cpus(); 1353 put_online_cpus();
1360 mutex_unlock(&buffer->mutex); 1354 mutex_unlock(&buffer->mutex);
1355 atomic_dec(&buffer->record_disabled);
1361 return -ENOMEM; 1356 return -ENOMEM;
1362 1357
1363 /* 1358 /*
@@ -1367,6 +1362,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1367 out_fail: 1362 out_fail:
1368 put_online_cpus(); 1363 put_online_cpus();
1369 mutex_unlock(&buffer->mutex); 1364 mutex_unlock(&buffer->mutex);
1365 atomic_dec(&buffer->record_disabled);
1370 return -1; 1366 return -1;
1371} 1367}
1372EXPORT_SYMBOL_GPL(ring_buffer_resize); 1368EXPORT_SYMBOL_GPL(ring_buffer_resize);
@@ -1787,9 +1783,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
1787static struct ring_buffer_event * 1783static struct ring_buffer_event *
1788rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, 1784rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1789 unsigned long length, unsigned long tail, 1785 unsigned long length, unsigned long tail,
1790 struct buffer_page *commit_page,
1791 struct buffer_page *tail_page, u64 *ts) 1786 struct buffer_page *tail_page, u64 *ts)
1792{ 1787{
1788 struct buffer_page *commit_page = cpu_buffer->commit_page;
1793 struct ring_buffer *buffer = cpu_buffer->buffer; 1789 struct ring_buffer *buffer = cpu_buffer->buffer;
1794 struct buffer_page *next_page; 1790 struct buffer_page *next_page;
1795 int ret; 1791 int ret;
@@ -1892,13 +1888,10 @@ static struct ring_buffer_event *
1892__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, 1888__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1893 unsigned type, unsigned long length, u64 *ts) 1889 unsigned type, unsigned long length, u64 *ts)
1894{ 1890{
1895 struct buffer_page *tail_page, *commit_page; 1891 struct buffer_page *tail_page;
1896 struct ring_buffer_event *event; 1892 struct ring_buffer_event *event;
1897 unsigned long tail, write; 1893 unsigned long tail, write;
1898 1894
1899 commit_page = cpu_buffer->commit_page;
1900 /* we just need to protect against interrupts */
1901 barrier();
1902 tail_page = cpu_buffer->tail_page; 1895 tail_page = cpu_buffer->tail_page;
1903 write = local_add_return(length, &tail_page->write); 1896 write = local_add_return(length, &tail_page->write);
1904 1897
@@ -1909,7 +1902,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1909 /* See if we shot pass the end of this buffer page */ 1902 /* See if we shot pass the end of this buffer page */
1910 if (write > BUF_PAGE_SIZE) 1903 if (write > BUF_PAGE_SIZE)
1911 return rb_move_tail(cpu_buffer, length, tail, 1904 return rb_move_tail(cpu_buffer, length, tail,
1912 commit_page, tail_page, ts); 1905 tail_page, ts);
1913 1906
1914 /* We reserved something on the buffer */ 1907 /* We reserved something on the buffer */
1915 1908
@@ -2834,7 +2827,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2834 int ret; 2827 int ret;
2835 2828
2836 local_irq_save(flags); 2829 local_irq_save(flags);
2837 __raw_spin_lock(&cpu_buffer->lock); 2830 arch_spin_lock(&cpu_buffer->lock);
2838 2831
2839 again: 2832 again:
2840 /* 2833 /*
@@ -2923,7 +2916,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2923 goto again; 2916 goto again;
2924 2917
2925 out: 2918 out:
2926 __raw_spin_unlock(&cpu_buffer->lock); 2919 arch_spin_unlock(&cpu_buffer->lock);
2927 local_irq_restore(flags); 2920 local_irq_restore(flags);
2928 2921
2929 return reader; 2922 return reader;
@@ -3286,9 +3279,9 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
3286 synchronize_sched(); 3279 synchronize_sched();
3287 3280
3288 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 3281 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3289 __raw_spin_lock(&cpu_buffer->lock); 3282 arch_spin_lock(&cpu_buffer->lock);
3290 rb_iter_reset(iter); 3283 rb_iter_reset(iter);
3291 __raw_spin_unlock(&cpu_buffer->lock); 3284 arch_spin_unlock(&cpu_buffer->lock);
3292 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3285 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3293 3286
3294 return iter; 3287 return iter;
@@ -3408,11 +3401,11 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
3408 if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing))) 3401 if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
3409 goto out; 3402 goto out;
3410 3403
3411 __raw_spin_lock(&cpu_buffer->lock); 3404 arch_spin_lock(&cpu_buffer->lock);
3412 3405
3413 rb_reset_cpu(cpu_buffer); 3406 rb_reset_cpu(cpu_buffer);
3414 3407
3415 __raw_spin_unlock(&cpu_buffer->lock); 3408 arch_spin_unlock(&cpu_buffer->lock);
3416 3409
3417 out: 3410 out:
3418 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3411 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index 573d3cc762c3..b2477caf09c2 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -35,6 +35,28 @@ static int disable_reader;
35module_param(disable_reader, uint, 0644); 35module_param(disable_reader, uint, 0644);
36MODULE_PARM_DESC(disable_reader, "only run producer"); 36MODULE_PARM_DESC(disable_reader, "only run producer");
37 37
38static int write_iteration = 50;
39module_param(write_iteration, uint, 0644);
40MODULE_PARM_DESC(write_iteration, "# of writes between timestamp readings");
41
42static int producer_nice = 19;
43static int consumer_nice = 19;
44
45static int producer_fifo = -1;
46static int consumer_fifo = -1;
47
48module_param(producer_nice, uint, 0644);
49MODULE_PARM_DESC(producer_nice, "nice prio for producer");
50
51module_param(consumer_nice, uint, 0644);
52MODULE_PARM_DESC(consumer_nice, "nice prio for consumer");
53
54module_param(producer_fifo, uint, 0644);
55MODULE_PARM_DESC(producer_fifo, "fifo prio for producer");
56
57module_param(consumer_fifo, uint, 0644);
58MODULE_PARM_DESC(consumer_fifo, "fifo prio for consumer");
59
38static int read_events; 60static int read_events;
39 61
40static int kill_test; 62static int kill_test;
@@ -208,15 +230,18 @@ static void ring_buffer_producer(void)
208 do { 230 do {
209 struct ring_buffer_event *event; 231 struct ring_buffer_event *event;
210 int *entry; 232 int *entry;
211 233 int i;
212 event = ring_buffer_lock_reserve(buffer, 10); 234
213 if (!event) { 235 for (i = 0; i < write_iteration; i++) {
214 missed++; 236 event = ring_buffer_lock_reserve(buffer, 10);
215 } else { 237 if (!event) {
216 hit++; 238 missed++;
217 entry = ring_buffer_event_data(event); 239 } else {
218 *entry = smp_processor_id(); 240 hit++;
219 ring_buffer_unlock_commit(buffer, event); 241 entry = ring_buffer_event_data(event);
242 *entry = smp_processor_id();
243 ring_buffer_unlock_commit(buffer, event);
244 }
220 } 245 }
221 do_gettimeofday(&end_tv); 246 do_gettimeofday(&end_tv);
222 247
@@ -263,6 +288,27 @@ static void ring_buffer_producer(void)
263 288
264 if (kill_test) 289 if (kill_test)
265 trace_printk("ERROR!\n"); 290 trace_printk("ERROR!\n");
291
292 if (!disable_reader) {
293 if (consumer_fifo < 0)
294 trace_printk("Running Consumer at nice: %d\n",
295 consumer_nice);
296 else
297 trace_printk("Running Consumer at SCHED_FIFO %d\n",
298 consumer_fifo);
299 }
300 if (producer_fifo < 0)
301 trace_printk("Running Producer at nice: %d\n",
302 producer_nice);
303 else
304 trace_printk("Running Producer at SCHED_FIFO %d\n",
305 producer_fifo);
306
307 /* Let the user know that the test is running at low priority */
308 if (producer_fifo < 0 && consumer_fifo < 0 &&
309 producer_nice == 19 && consumer_nice == 19)
310 trace_printk("WARNING!!! This test is running at lowest priority.\n");
311
266 trace_printk("Time: %lld (usecs)\n", time); 312 trace_printk("Time: %lld (usecs)\n", time);
267 trace_printk("Overruns: %lld\n", overruns); 313 trace_printk("Overruns: %lld\n", overruns);
268 if (disable_reader) 314 if (disable_reader)
@@ -392,6 +438,27 @@ static int __init ring_buffer_benchmark_init(void)
392 if (IS_ERR(producer)) 438 if (IS_ERR(producer))
393 goto out_kill; 439 goto out_kill;
394 440
441 /*
442 * Run them as low-prio background tasks by default:
443 */
444 if (!disable_reader) {
445 if (consumer_fifo >= 0) {
446 struct sched_param param = {
447 .sched_priority = consumer_fifo
448 };
449 sched_setscheduler(consumer, SCHED_FIFO, &param);
450 } else
451 set_user_nice(consumer, consumer_nice);
452 }
453
454 if (producer_fifo >= 0) {
455 struct sched_param param = {
456 .sched_priority = consumer_fifo
457 };
458 sched_setscheduler(producer, SCHED_FIFO, &param);
459 } else
460 set_user_nice(producer, producer_nice);
461
395 return 0; 462 return 0;
396 463
397 out_kill: 464 out_kill:
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index b20d3ec75de9..06ba26747d7e 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -86,17 +86,17 @@ static int dummy_set_flag(u32 old_flags, u32 bit, int set)
86 */ 86 */
87static int tracing_disabled = 1; 87static int tracing_disabled = 1;
88 88
89DEFINE_PER_CPU(local_t, ftrace_cpu_disabled); 89DEFINE_PER_CPU(int, ftrace_cpu_disabled);
90 90
91static inline void ftrace_disable_cpu(void) 91static inline void ftrace_disable_cpu(void)
92{ 92{
93 preempt_disable(); 93 preempt_disable();
94 local_inc(&__get_cpu_var(ftrace_cpu_disabled)); 94 __this_cpu_inc(per_cpu_var(ftrace_cpu_disabled));
95} 95}
96 96
97static inline void ftrace_enable_cpu(void) 97static inline void ftrace_enable_cpu(void)
98{ 98{
99 local_dec(&__get_cpu_var(ftrace_cpu_disabled)); 99 __this_cpu_dec(per_cpu_var(ftrace_cpu_disabled));
100 preempt_enable(); 100 preempt_enable();
101} 101}
102 102
@@ -129,7 +129,7 @@ static int tracing_set_tracer(const char *buf);
129static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata; 129static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
130static char *default_bootup_tracer; 130static char *default_bootup_tracer;
131 131
132static int __init set_ftrace(char *str) 132static int __init set_cmdline_ftrace(char *str)
133{ 133{
134 strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE); 134 strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
135 default_bootup_tracer = bootup_tracer_buf; 135 default_bootup_tracer = bootup_tracer_buf;
@@ -137,7 +137,7 @@ static int __init set_ftrace(char *str)
137 ring_buffer_expanded = 1; 137 ring_buffer_expanded = 1;
138 return 1; 138 return 1;
139} 139}
140__setup("ftrace=", set_ftrace); 140__setup("ftrace=", set_cmdline_ftrace);
141 141
142static int __init set_ftrace_dump_on_oops(char *str) 142static int __init set_ftrace_dump_on_oops(char *str)
143{ 143{
@@ -203,7 +203,7 @@ cycle_t ftrace_now(int cpu)
203 */ 203 */
204static struct trace_array max_tr; 204static struct trace_array max_tr;
205 205
206static DEFINE_PER_CPU(struct trace_array_cpu, max_data); 206static DEFINE_PER_CPU(struct trace_array_cpu, max_tr_data);
207 207
208/* tracer_enabled is used to toggle activation of a tracer */ 208/* tracer_enabled is used to toggle activation of a tracer */
209static int tracer_enabled = 1; 209static int tracer_enabled = 1;
@@ -313,7 +313,6 @@ static const char *trace_options[] = {
313 "bin", 313 "bin",
314 "block", 314 "block",
315 "stacktrace", 315 "stacktrace",
316 "sched-tree",
317 "trace_printk", 316 "trace_printk",
318 "ftrace_preempt", 317 "ftrace_preempt",
319 "branch", 318 "branch",
@@ -493,15 +492,15 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
493 * protected by per_cpu spinlocks. But the action of the swap 492 * protected by per_cpu spinlocks. But the action of the swap
494 * needs its own lock. 493 * needs its own lock.
495 * 494 *
496 * This is defined as a raw_spinlock_t in order to help 495 * This is defined as a arch_spinlock_t in order to help
497 * with performance when lockdep debugging is enabled. 496 * with performance when lockdep debugging is enabled.
498 * 497 *
499 * It is also used in other places outside the update_max_tr 498 * It is also used in other places outside the update_max_tr
500 * so it needs to be defined outside of the 499 * so it needs to be defined outside of the
501 * CONFIG_TRACER_MAX_TRACE. 500 * CONFIG_TRACER_MAX_TRACE.
502 */ 501 */
503static raw_spinlock_t ftrace_max_lock = 502static arch_spinlock_t ftrace_max_lock =
504 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 503 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
505 504
506#ifdef CONFIG_TRACER_MAX_TRACE 505#ifdef CONFIG_TRACER_MAX_TRACE
507unsigned long __read_mostly tracing_max_latency; 506unsigned long __read_mostly tracing_max_latency;
@@ -555,13 +554,13 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
555 return; 554 return;
556 555
557 WARN_ON_ONCE(!irqs_disabled()); 556 WARN_ON_ONCE(!irqs_disabled());
558 __raw_spin_lock(&ftrace_max_lock); 557 arch_spin_lock(&ftrace_max_lock);
559 558
560 tr->buffer = max_tr.buffer; 559 tr->buffer = max_tr.buffer;
561 max_tr.buffer = buf; 560 max_tr.buffer = buf;
562 561
563 __update_max_tr(tr, tsk, cpu); 562 __update_max_tr(tr, tsk, cpu);
564 __raw_spin_unlock(&ftrace_max_lock); 563 arch_spin_unlock(&ftrace_max_lock);
565} 564}
566 565
567/** 566/**
@@ -581,7 +580,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
581 return; 580 return;
582 581
583 WARN_ON_ONCE(!irqs_disabled()); 582 WARN_ON_ONCE(!irqs_disabled());
584 __raw_spin_lock(&ftrace_max_lock); 583 arch_spin_lock(&ftrace_max_lock);
585 584
586 ftrace_disable_cpu(); 585 ftrace_disable_cpu();
587 586
@@ -603,7 +602,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
603 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY); 602 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
604 603
605 __update_max_tr(tr, tsk, cpu); 604 __update_max_tr(tr, tsk, cpu);
606 __raw_spin_unlock(&ftrace_max_lock); 605 arch_spin_unlock(&ftrace_max_lock);
607} 606}
608#endif /* CONFIG_TRACER_MAX_TRACE */ 607#endif /* CONFIG_TRACER_MAX_TRACE */
609 608
@@ -802,7 +801,7 @@ static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
802static unsigned map_cmdline_to_pid[SAVED_CMDLINES]; 801static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
803static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN]; 802static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
804static int cmdline_idx; 803static int cmdline_idx;
805static raw_spinlock_t trace_cmdline_lock = __RAW_SPIN_LOCK_UNLOCKED; 804static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
806 805
807/* temporary disable recording */ 806/* temporary disable recording */
808static atomic_t trace_record_cmdline_disabled __read_mostly; 807static atomic_t trace_record_cmdline_disabled __read_mostly;
@@ -915,7 +914,7 @@ static void trace_save_cmdline(struct task_struct *tsk)
915 * nor do we want to disable interrupts, 914 * nor do we want to disable interrupts,
916 * so if we miss here, then better luck next time. 915 * so if we miss here, then better luck next time.
917 */ 916 */
918 if (!__raw_spin_trylock(&trace_cmdline_lock)) 917 if (!arch_spin_trylock(&trace_cmdline_lock))
919 return; 918 return;
920 919
921 idx = map_pid_to_cmdline[tsk->pid]; 920 idx = map_pid_to_cmdline[tsk->pid];
@@ -940,7 +939,7 @@ static void trace_save_cmdline(struct task_struct *tsk)
940 939
941 memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN); 940 memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
942 941
943 __raw_spin_unlock(&trace_cmdline_lock); 942 arch_spin_unlock(&trace_cmdline_lock);
944} 943}
945 944
946void trace_find_cmdline(int pid, char comm[]) 945void trace_find_cmdline(int pid, char comm[])
@@ -958,14 +957,14 @@ void trace_find_cmdline(int pid, char comm[])
958 } 957 }
959 958
960 preempt_disable(); 959 preempt_disable();
961 __raw_spin_lock(&trace_cmdline_lock); 960 arch_spin_lock(&trace_cmdline_lock);
962 map = map_pid_to_cmdline[pid]; 961 map = map_pid_to_cmdline[pid];
963 if (map != NO_CMDLINE_MAP) 962 if (map != NO_CMDLINE_MAP)
964 strcpy(comm, saved_cmdlines[map]); 963 strcpy(comm, saved_cmdlines[map]);
965 else 964 else
966 strcpy(comm, "<...>"); 965 strcpy(comm, "<...>");
967 966
968 __raw_spin_unlock(&trace_cmdline_lock); 967 arch_spin_unlock(&trace_cmdline_lock);
969 preempt_enable(); 968 preempt_enable();
970} 969}
971 970
@@ -1085,7 +1084,7 @@ trace_function(struct trace_array *tr,
1085 struct ftrace_entry *entry; 1084 struct ftrace_entry *entry;
1086 1085
1087 /* If we are reading the ring buffer, don't trace */ 1086 /* If we are reading the ring buffer, don't trace */
1088 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) 1087 if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled))))
1089 return; 1088 return;
1090 1089
1091 event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry), 1090 event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
@@ -1151,6 +1150,22 @@ void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1151 __ftrace_trace_stack(tr->buffer, flags, skip, pc); 1150 __ftrace_trace_stack(tr->buffer, flags, skip, pc);
1152} 1151}
1153 1152
1153/**
1154 * trace_dump_stack - record a stack back trace in the trace buffer
1155 */
1156void trace_dump_stack(void)
1157{
1158 unsigned long flags;
1159
1160 if (tracing_disabled || tracing_selftest_running)
1161 return;
1162
1163 local_save_flags(flags);
1164
1165 /* skipping 3 traces, seems to get us at the caller of this function */
1166 __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count());
1167}
1168
1154void 1169void
1155ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) 1170ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1156{ 1171{
@@ -1251,8 +1266,8 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
1251 */ 1266 */
1252int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) 1267int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1253{ 1268{
1254 static raw_spinlock_t trace_buf_lock = 1269 static arch_spinlock_t trace_buf_lock =
1255 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 1270 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
1256 static u32 trace_buf[TRACE_BUF_SIZE]; 1271 static u32 trace_buf[TRACE_BUF_SIZE];
1257 1272
1258 struct ftrace_event_call *call = &event_bprint; 1273 struct ftrace_event_call *call = &event_bprint;
@@ -1283,7 +1298,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1283 1298
1284 /* Lockdep uses trace_printk for lock tracing */ 1299 /* Lockdep uses trace_printk for lock tracing */
1285 local_irq_save(flags); 1300 local_irq_save(flags);
1286 __raw_spin_lock(&trace_buf_lock); 1301 arch_spin_lock(&trace_buf_lock);
1287 len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args); 1302 len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
1288 1303
1289 if (len > TRACE_BUF_SIZE || len < 0) 1304 if (len > TRACE_BUF_SIZE || len < 0)
@@ -1304,7 +1319,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1304 ring_buffer_unlock_commit(buffer, event); 1319 ring_buffer_unlock_commit(buffer, event);
1305 1320
1306out_unlock: 1321out_unlock:
1307 __raw_spin_unlock(&trace_buf_lock); 1322 arch_spin_unlock(&trace_buf_lock);
1308 local_irq_restore(flags); 1323 local_irq_restore(flags);
1309 1324
1310out: 1325out:
@@ -1334,7 +1349,7 @@ int trace_array_printk(struct trace_array *tr,
1334int trace_array_vprintk(struct trace_array *tr, 1349int trace_array_vprintk(struct trace_array *tr,
1335 unsigned long ip, const char *fmt, va_list args) 1350 unsigned long ip, const char *fmt, va_list args)
1336{ 1351{
1337 static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED; 1352 static arch_spinlock_t trace_buf_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1338 static char trace_buf[TRACE_BUF_SIZE]; 1353 static char trace_buf[TRACE_BUF_SIZE];
1339 1354
1340 struct ftrace_event_call *call = &event_print; 1355 struct ftrace_event_call *call = &event_print;
@@ -1360,12 +1375,9 @@ int trace_array_vprintk(struct trace_array *tr,
1360 1375
1361 pause_graph_tracing(); 1376 pause_graph_tracing();
1362 raw_local_irq_save(irq_flags); 1377 raw_local_irq_save(irq_flags);
1363 __raw_spin_lock(&trace_buf_lock); 1378 arch_spin_lock(&trace_buf_lock);
1364 len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args); 1379 len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
1365 1380
1366 len = min(len, TRACE_BUF_SIZE-1);
1367 trace_buf[len] = 0;
1368
1369 size = sizeof(*entry) + len + 1; 1381 size = sizeof(*entry) + len + 1;
1370 buffer = tr->buffer; 1382 buffer = tr->buffer;
1371 event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, 1383 event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
@@ -1373,15 +1385,15 @@ int trace_array_vprintk(struct trace_array *tr,
1373 if (!event) 1385 if (!event)
1374 goto out_unlock; 1386 goto out_unlock;
1375 entry = ring_buffer_event_data(event); 1387 entry = ring_buffer_event_data(event);
1376 entry->ip = ip; 1388 entry->ip = ip;
1377 1389
1378 memcpy(&entry->buf, trace_buf, len); 1390 memcpy(&entry->buf, trace_buf, len);
1379 entry->buf[len] = 0; 1391 entry->buf[len] = '\0';
1380 if (!filter_check_discard(call, entry, buffer, event)) 1392 if (!filter_check_discard(call, entry, buffer, event))
1381 ring_buffer_unlock_commit(buffer, event); 1393 ring_buffer_unlock_commit(buffer, event);
1382 1394
1383 out_unlock: 1395 out_unlock:
1384 __raw_spin_unlock(&trace_buf_lock); 1396 arch_spin_unlock(&trace_buf_lock);
1385 raw_local_irq_restore(irq_flags); 1397 raw_local_irq_restore(irq_flags);
1386 unpause_graph_tracing(); 1398 unpause_graph_tracing();
1387 out: 1399 out:
@@ -1515,6 +1527,8 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos)
1515 int i = (int)*pos; 1527 int i = (int)*pos;
1516 void *ent; 1528 void *ent;
1517 1529
1530 WARN_ON_ONCE(iter->leftover);
1531
1518 (*pos)++; 1532 (*pos)++;
1519 1533
1520 /* can't go backwards */ 1534 /* can't go backwards */
@@ -1613,8 +1627,16 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1613 ; 1627 ;
1614 1628
1615 } else { 1629 } else {
1616 l = *pos - 1; 1630 /*
1617 p = s_next(m, p, &l); 1631 * If we overflowed the seq_file before, then we want
1632 * to just reuse the trace_seq buffer again.
1633 */
1634 if (iter->leftover)
1635 p = iter;
1636 else {
1637 l = *pos - 1;
1638 p = s_next(m, p, &l);
1639 }
1618 } 1640 }
1619 1641
1620 trace_event_read_lock(); 1642 trace_event_read_lock();
@@ -1922,6 +1944,7 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter)
1922static int s_show(struct seq_file *m, void *v) 1944static int s_show(struct seq_file *m, void *v)
1923{ 1945{
1924 struct trace_iterator *iter = v; 1946 struct trace_iterator *iter = v;
1947 int ret;
1925 1948
1926 if (iter->ent == NULL) { 1949 if (iter->ent == NULL) {
1927 if (iter->tr) { 1950 if (iter->tr) {
@@ -1941,9 +1964,27 @@ static int s_show(struct seq_file *m, void *v)
1941 if (!(trace_flags & TRACE_ITER_VERBOSE)) 1964 if (!(trace_flags & TRACE_ITER_VERBOSE))
1942 print_func_help_header(m); 1965 print_func_help_header(m);
1943 } 1966 }
1967 } else if (iter->leftover) {
1968 /*
1969 * If we filled the seq_file buffer earlier, we
1970 * want to just show it now.
1971 */
1972 ret = trace_print_seq(m, &iter->seq);
1973
1974 /* ret should this time be zero, but you never know */
1975 iter->leftover = ret;
1976
1944 } else { 1977 } else {
1945 print_trace_line(iter); 1978 print_trace_line(iter);
1946 trace_print_seq(m, &iter->seq); 1979 ret = trace_print_seq(m, &iter->seq);
1980 /*
1981 * If we overflow the seq_file buffer, then it will
1982 * ask us for this data again at start up.
1983 * Use that instead.
1984 * ret is 0 if seq_file write succeeded.
1985 * -1 otherwise.
1986 */
1987 iter->leftover = ret;
1947 } 1988 }
1948 1989
1949 return 0; 1990 return 0;
@@ -2253,7 +2294,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2253 mutex_lock(&tracing_cpumask_update_lock); 2294 mutex_lock(&tracing_cpumask_update_lock);
2254 2295
2255 local_irq_disable(); 2296 local_irq_disable();
2256 __raw_spin_lock(&ftrace_max_lock); 2297 arch_spin_lock(&ftrace_max_lock);
2257 for_each_tracing_cpu(cpu) { 2298 for_each_tracing_cpu(cpu) {
2258 /* 2299 /*
2259 * Increase/decrease the disabled counter if we are 2300 * Increase/decrease the disabled counter if we are
@@ -2268,7 +2309,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2268 atomic_dec(&global_trace.data[cpu]->disabled); 2309 atomic_dec(&global_trace.data[cpu]->disabled);
2269 } 2310 }
2270 } 2311 }
2271 __raw_spin_unlock(&ftrace_max_lock); 2312 arch_spin_unlock(&ftrace_max_lock);
2272 local_irq_enable(); 2313 local_irq_enable();
2273 2314
2274 cpumask_copy(tracing_cpumask, tracing_cpumask_new); 2315 cpumask_copy(tracing_cpumask, tracing_cpumask_new);
@@ -2290,67 +2331,49 @@ static const struct file_operations tracing_cpumask_fops = {
2290 .write = tracing_cpumask_write, 2331 .write = tracing_cpumask_write,
2291}; 2332};
2292 2333
2293static ssize_t 2334static int tracing_trace_options_show(struct seq_file *m, void *v)
2294tracing_trace_options_read(struct file *filp, char __user *ubuf,
2295 size_t cnt, loff_t *ppos)
2296{ 2335{
2297 struct tracer_opt *trace_opts; 2336 struct tracer_opt *trace_opts;
2298 u32 tracer_flags; 2337 u32 tracer_flags;
2299 int len = 0;
2300 char *buf;
2301 int r = 0;
2302 int i; 2338 int i;
2303 2339
2304
2305 /* calculate max size */
2306 for (i = 0; trace_options[i]; i++) {
2307 len += strlen(trace_options[i]);
2308 len += 3; /* "no" and newline */
2309 }
2310
2311 mutex_lock(&trace_types_lock); 2340 mutex_lock(&trace_types_lock);
2312 tracer_flags = current_trace->flags->val; 2341 tracer_flags = current_trace->flags->val;
2313 trace_opts = current_trace->flags->opts; 2342 trace_opts = current_trace->flags->opts;
2314 2343
2315 /*
2316 * Increase the size with names of options specific
2317 * of the current tracer.
2318 */
2319 for (i = 0; trace_opts[i].name; i++) {
2320 len += strlen(trace_opts[i].name);
2321 len += 3; /* "no" and newline */
2322 }
2323
2324 /* +1 for \0 */
2325 buf = kmalloc(len + 1, GFP_KERNEL);
2326 if (!buf) {
2327 mutex_unlock(&trace_types_lock);
2328 return -ENOMEM;
2329 }
2330
2331 for (i = 0; trace_options[i]; i++) { 2344 for (i = 0; trace_options[i]; i++) {
2332 if (trace_flags & (1 << i)) 2345 if (trace_flags & (1 << i))
2333 r += sprintf(buf + r, "%s\n", trace_options[i]); 2346 seq_printf(m, "%s\n", trace_options[i]);
2334 else 2347 else
2335 r += sprintf(buf + r, "no%s\n", trace_options[i]); 2348 seq_printf(m, "no%s\n", trace_options[i]);
2336 } 2349 }
2337 2350
2338 for (i = 0; trace_opts[i].name; i++) { 2351 for (i = 0; trace_opts[i].name; i++) {
2339 if (tracer_flags & trace_opts[i].bit) 2352 if (tracer_flags & trace_opts[i].bit)
2340 r += sprintf(buf + r, "%s\n", 2353 seq_printf(m, "%s\n", trace_opts[i].name);
2341 trace_opts[i].name);
2342 else 2354 else
2343 r += sprintf(buf + r, "no%s\n", 2355 seq_printf(m, "no%s\n", trace_opts[i].name);
2344 trace_opts[i].name);
2345 } 2356 }
2346 mutex_unlock(&trace_types_lock); 2357 mutex_unlock(&trace_types_lock);
2347 2358
2348 WARN_ON(r >= len + 1); 2359 return 0;
2360}
2349 2361
2350 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2362static int __set_tracer_option(struct tracer *trace,
2363 struct tracer_flags *tracer_flags,
2364 struct tracer_opt *opts, int neg)
2365{
2366 int ret;
2351 2367
2352 kfree(buf); 2368 ret = trace->set_flag(tracer_flags->val, opts->bit, !neg);
2353 return r; 2369 if (ret)
2370 return ret;
2371
2372 if (neg)
2373 tracer_flags->val &= ~opts->bit;
2374 else
2375 tracer_flags->val |= opts->bit;
2376 return 0;
2354} 2377}
2355 2378
2356/* Try to assign a tracer specific option */ 2379/* Try to assign a tracer specific option */
@@ -2358,33 +2381,17 @@ static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
2358{ 2381{
2359 struct tracer_flags *tracer_flags = trace->flags; 2382 struct tracer_flags *tracer_flags = trace->flags;
2360 struct tracer_opt *opts = NULL; 2383 struct tracer_opt *opts = NULL;
2361 int ret = 0, i = 0; 2384 int i;
2362 int len;
2363 2385
2364 for (i = 0; tracer_flags->opts[i].name; i++) { 2386 for (i = 0; tracer_flags->opts[i].name; i++) {
2365 opts = &tracer_flags->opts[i]; 2387 opts = &tracer_flags->opts[i];
2366 len = strlen(opts->name);
2367 2388
2368 if (strncmp(cmp, opts->name, len) == 0) { 2389 if (strcmp(cmp, opts->name) == 0)
2369 ret = trace->set_flag(tracer_flags->val, 2390 return __set_tracer_option(trace, trace->flags,
2370 opts->bit, !neg); 2391 opts, neg);
2371 break;
2372 }
2373 } 2392 }
2374 /* Not found */
2375 if (!tracer_flags->opts[i].name)
2376 return -EINVAL;
2377
2378 /* Refused to handle */
2379 if (ret)
2380 return ret;
2381
2382 if (neg)
2383 tracer_flags->val &= ~opts->bit;
2384 else
2385 tracer_flags->val |= opts->bit;
2386 2393
2387 return 0; 2394 return -EINVAL;
2388} 2395}
2389 2396
2390static void set_tracer_flags(unsigned int mask, int enabled) 2397static void set_tracer_flags(unsigned int mask, int enabled)
@@ -2404,7 +2411,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2404 size_t cnt, loff_t *ppos) 2411 size_t cnt, loff_t *ppos)
2405{ 2412{
2406 char buf[64]; 2413 char buf[64];
2407 char *cmp = buf; 2414 char *cmp;
2408 int neg = 0; 2415 int neg = 0;
2409 int ret; 2416 int ret;
2410 int i; 2417 int i;
@@ -2416,16 +2423,15 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2416 return -EFAULT; 2423 return -EFAULT;
2417 2424
2418 buf[cnt] = 0; 2425 buf[cnt] = 0;
2426 cmp = strstrip(buf);
2419 2427
2420 if (strncmp(buf, "no", 2) == 0) { 2428 if (strncmp(cmp, "no", 2) == 0) {
2421 neg = 1; 2429 neg = 1;
2422 cmp += 2; 2430 cmp += 2;
2423 } 2431 }
2424 2432
2425 for (i = 0; trace_options[i]; i++) { 2433 for (i = 0; trace_options[i]; i++) {
2426 int len = strlen(trace_options[i]); 2434 if (strcmp(cmp, trace_options[i]) == 0) {
2427
2428 if (strncmp(cmp, trace_options[i], len) == 0) {
2429 set_tracer_flags(1 << i, !neg); 2435 set_tracer_flags(1 << i, !neg);
2430 break; 2436 break;
2431 } 2437 }
@@ -2445,9 +2451,18 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2445 return cnt; 2451 return cnt;
2446} 2452}
2447 2453
2454static int tracing_trace_options_open(struct inode *inode, struct file *file)
2455{
2456 if (tracing_disabled)
2457 return -ENODEV;
2458 return single_open(file, tracing_trace_options_show, NULL);
2459}
2460
2448static const struct file_operations tracing_iter_fops = { 2461static const struct file_operations tracing_iter_fops = {
2449 .open = tracing_open_generic, 2462 .open = tracing_trace_options_open,
2450 .read = tracing_trace_options_read, 2463 .read = seq_read,
2464 .llseek = seq_lseek,
2465 .release = single_release,
2451 .write = tracing_trace_options_write, 2466 .write = tracing_trace_options_write,
2452}; 2467};
2453 2468
@@ -2897,6 +2912,10 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
2897 else 2912 else
2898 cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask); 2913 cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask);
2899 2914
2915
2916 if (iter->trace->pipe_close)
2917 iter->trace->pipe_close(iter);
2918
2900 mutex_unlock(&trace_types_lock); 2919 mutex_unlock(&trace_types_lock);
2901 2920
2902 free_cpumask_var(iter->started); 2921 free_cpumask_var(iter->started);
@@ -3103,7 +3122,7 @@ static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
3103 __free_page(spd->pages[idx]); 3122 __free_page(spd->pages[idx]);
3104} 3123}
3105 3124
3106static struct pipe_buf_operations tracing_pipe_buf_ops = { 3125static const struct pipe_buf_operations tracing_pipe_buf_ops = {
3107 .can_merge = 0, 3126 .can_merge = 0,
3108 .map = generic_pipe_buf_map, 3127 .map = generic_pipe_buf_map,
3109 .unmap = generic_pipe_buf_unmap, 3128 .unmap = generic_pipe_buf_unmap,
@@ -3334,7 +3353,6 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
3334 size_t cnt, loff_t *fpos) 3353 size_t cnt, loff_t *fpos)
3335{ 3354{
3336 char *buf; 3355 char *buf;
3337 char *end;
3338 3356
3339 if (tracing_disabled) 3357 if (tracing_disabled)
3340 return -EINVAL; 3358 return -EINVAL;
@@ -3342,7 +3360,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
3342 if (cnt > TRACE_BUF_SIZE) 3360 if (cnt > TRACE_BUF_SIZE)
3343 cnt = TRACE_BUF_SIZE; 3361 cnt = TRACE_BUF_SIZE;
3344 3362
3345 buf = kmalloc(cnt + 1, GFP_KERNEL); 3363 buf = kmalloc(cnt + 2, GFP_KERNEL);
3346 if (buf == NULL) 3364 if (buf == NULL)
3347 return -ENOMEM; 3365 return -ENOMEM;
3348 3366
@@ -3350,35 +3368,31 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
3350 kfree(buf); 3368 kfree(buf);
3351 return -EFAULT; 3369 return -EFAULT;
3352 } 3370 }
3371 if (buf[cnt-1] != '\n') {
3372 buf[cnt] = '\n';
3373 buf[cnt+1] = '\0';
3374 } else
3375 buf[cnt] = '\0';
3353 3376
3354 /* Cut from the first nil or newline. */ 3377 cnt = mark_printk("%s", buf);
3355 buf[cnt] = '\0';
3356 end = strchr(buf, '\n');
3357 if (end)
3358 *end = '\0';
3359
3360 cnt = mark_printk("%s\n", buf);
3361 kfree(buf); 3378 kfree(buf);
3362 *fpos += cnt; 3379 *fpos += cnt;
3363 3380
3364 return cnt; 3381 return cnt;
3365} 3382}
3366 3383
3367static ssize_t tracing_clock_read(struct file *filp, char __user *ubuf, 3384static int tracing_clock_show(struct seq_file *m, void *v)
3368 size_t cnt, loff_t *ppos)
3369{ 3385{
3370 char buf[64];
3371 int bufiter = 0;
3372 int i; 3386 int i;
3373 3387
3374 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) 3388 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
3375 bufiter += snprintf(buf + bufiter, sizeof(buf) - bufiter, 3389 seq_printf(m,
3376 "%s%s%s%s", i ? " " : "", 3390 "%s%s%s%s", i ? " " : "",
3377 i == trace_clock_id ? "[" : "", trace_clocks[i].name, 3391 i == trace_clock_id ? "[" : "", trace_clocks[i].name,
3378 i == trace_clock_id ? "]" : ""); 3392 i == trace_clock_id ? "]" : "");
3379 bufiter += snprintf(buf + bufiter, sizeof(buf) - bufiter, "\n"); 3393 seq_putc(m, '\n');
3380 3394
3381 return simple_read_from_buffer(ubuf, cnt, ppos, buf, bufiter); 3395 return 0;
3382} 3396}
3383 3397
3384static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, 3398static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
@@ -3420,6 +3434,13 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
3420 return cnt; 3434 return cnt;
3421} 3435}
3422 3436
3437static int tracing_clock_open(struct inode *inode, struct file *file)
3438{
3439 if (tracing_disabled)
3440 return -ENODEV;
3441 return single_open(file, tracing_clock_show, NULL);
3442}
3443
3423static const struct file_operations tracing_max_lat_fops = { 3444static const struct file_operations tracing_max_lat_fops = {
3424 .open = tracing_open_generic, 3445 .open = tracing_open_generic,
3425 .read = tracing_max_lat_read, 3446 .read = tracing_max_lat_read,
@@ -3458,8 +3479,10 @@ static const struct file_operations tracing_mark_fops = {
3458}; 3479};
3459 3480
3460static const struct file_operations trace_clock_fops = { 3481static const struct file_operations trace_clock_fops = {
3461 .open = tracing_open_generic, 3482 .open = tracing_clock_open,
3462 .read = tracing_clock_read, 3483 .read = seq_read,
3484 .llseek = seq_lseek,
3485 .release = single_release,
3463 .write = tracing_clock_write, 3486 .write = tracing_clock_write,
3464}; 3487};
3465 3488
@@ -3589,7 +3612,7 @@ static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
3589} 3612}
3590 3613
3591/* Pipe buffer operations for a buffer. */ 3614/* Pipe buffer operations for a buffer. */
3592static struct pipe_buf_operations buffer_pipe_buf_ops = { 3615static const struct pipe_buf_operations buffer_pipe_buf_ops = {
3593 .can_merge = 0, 3616 .can_merge = 0,
3594 .map = generic_pipe_buf_map, 3617 .map = generic_pipe_buf_map,
3595 .unmap = generic_pipe_buf_unmap, 3618 .unmap = generic_pipe_buf_unmap,
@@ -3730,7 +3753,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
3730 3753
3731 s = kmalloc(sizeof(*s), GFP_KERNEL); 3754 s = kmalloc(sizeof(*s), GFP_KERNEL);
3732 if (!s) 3755 if (!s)
3733 return ENOMEM; 3756 return -ENOMEM;
3734 3757
3735 trace_seq_init(s); 3758 trace_seq_init(s);
3736 3759
@@ -3920,39 +3943,16 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
3920 if (ret < 0) 3943 if (ret < 0)
3921 return ret; 3944 return ret;
3922 3945
3923 ret = 0; 3946 if (val != 0 && val != 1)
3924 switch (val) { 3947 return -EINVAL;
3925 case 0:
3926 /* do nothing if already cleared */
3927 if (!(topt->flags->val & topt->opt->bit))
3928 break;
3929
3930 mutex_lock(&trace_types_lock);
3931 if (current_trace->set_flag)
3932 ret = current_trace->set_flag(topt->flags->val,
3933 topt->opt->bit, 0);
3934 mutex_unlock(&trace_types_lock);
3935 if (ret)
3936 return ret;
3937 topt->flags->val &= ~topt->opt->bit;
3938 break;
3939 case 1:
3940 /* do nothing if already set */
3941 if (topt->flags->val & topt->opt->bit)
3942 break;
3943 3948
3949 if (!!(topt->flags->val & topt->opt->bit) != val) {
3944 mutex_lock(&trace_types_lock); 3950 mutex_lock(&trace_types_lock);
3945 if (current_trace->set_flag) 3951 ret = __set_tracer_option(current_trace, topt->flags,
3946 ret = current_trace->set_flag(topt->flags->val, 3952 topt->opt, val);
3947 topt->opt->bit, 1);
3948 mutex_unlock(&trace_types_lock); 3953 mutex_unlock(&trace_types_lock);
3949 if (ret) 3954 if (ret)
3950 return ret; 3955 return ret;
3951 topt->flags->val |= topt->opt->bit;
3952 break;
3953
3954 default:
3955 return -EINVAL;
3956 } 3956 }
3957 3957
3958 *ppos += cnt; 3958 *ppos += cnt;
@@ -4279,8 +4279,8 @@ trace_printk_seq(struct trace_seq *s)
4279 4279
4280static void __ftrace_dump(bool disable_tracing) 4280static void __ftrace_dump(bool disable_tracing)
4281{ 4281{
4282 static raw_spinlock_t ftrace_dump_lock = 4282 static arch_spinlock_t ftrace_dump_lock =
4283 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 4283 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
4284 /* use static because iter can be a bit big for the stack */ 4284 /* use static because iter can be a bit big for the stack */
4285 static struct trace_iterator iter; 4285 static struct trace_iterator iter;
4286 unsigned int old_userobj; 4286 unsigned int old_userobj;
@@ -4290,7 +4290,7 @@ static void __ftrace_dump(bool disable_tracing)
4290 4290
4291 /* only one dump */ 4291 /* only one dump */
4292 local_irq_save(flags); 4292 local_irq_save(flags);
4293 __raw_spin_lock(&ftrace_dump_lock); 4293 arch_spin_lock(&ftrace_dump_lock);
4294 if (dump_ran) 4294 if (dump_ran)
4295 goto out; 4295 goto out;
4296 4296
@@ -4365,7 +4365,7 @@ static void __ftrace_dump(bool disable_tracing)
4365 } 4365 }
4366 4366
4367 out: 4367 out:
4368 __raw_spin_unlock(&ftrace_dump_lock); 4368 arch_spin_unlock(&ftrace_dump_lock);
4369 local_irq_restore(flags); 4369 local_irq_restore(flags);
4370} 4370}
4371 4371
@@ -4426,7 +4426,7 @@ __init static int tracer_alloc_buffers(void)
4426 /* Allocate the first page for all buffers */ 4426 /* Allocate the first page for all buffers */
4427 for_each_tracing_cpu(i) { 4427 for_each_tracing_cpu(i) {
4428 global_trace.data[i] = &per_cpu(global_trace_cpu, i); 4428 global_trace.data[i] = &per_cpu(global_trace_cpu, i);
4429 max_tr.data[i] = &per_cpu(max_data, i); 4429 max_tr.data[i] = &per_cpu(max_tr_data, i);
4430 } 4430 }
4431 4431
4432 trace_init_cmdlines(); 4432 trace_init_cmdlines();
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 405cb850b75d..4df6a77eb196 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -11,6 +11,7 @@
11#include <linux/ftrace.h> 11#include <linux/ftrace.h>
12#include <trace/boot.h> 12#include <trace/boot.h>
13#include <linux/kmemtrace.h> 13#include <linux/kmemtrace.h>
14#include <linux/hw_breakpoint.h>
14 15
15#include <linux/trace_seq.h> 16#include <linux/trace_seq.h>
16#include <linux/ftrace_event.h> 17#include <linux/ftrace_event.h>
@@ -37,6 +38,7 @@ enum trace_type {
37 TRACE_KMEM_ALLOC, 38 TRACE_KMEM_ALLOC,
38 TRACE_KMEM_FREE, 39 TRACE_KMEM_FREE,
39 TRACE_BLK, 40 TRACE_BLK,
41 TRACE_KSYM,
40 42
41 __TRACE_LAST_TYPE, 43 __TRACE_LAST_TYPE,
42}; 44};
@@ -98,9 +100,32 @@ struct syscall_trace_enter {
98struct syscall_trace_exit { 100struct syscall_trace_exit {
99 struct trace_entry ent; 101 struct trace_entry ent;
100 int nr; 102 int nr;
101 unsigned long ret; 103 long ret;
102}; 104};
103 105
106struct kprobe_trace_entry {
107 struct trace_entry ent;
108 unsigned long ip;
109 int nargs;
110 unsigned long args[];
111};
112
113#define SIZEOF_KPROBE_TRACE_ENTRY(n) \
114 (offsetof(struct kprobe_trace_entry, args) + \
115 (sizeof(unsigned long) * (n)))
116
117struct kretprobe_trace_entry {
118 struct trace_entry ent;
119 unsigned long func;
120 unsigned long ret_ip;
121 int nargs;
122 unsigned long args[];
123};
124
125#define SIZEOF_KRETPROBE_TRACE_ENTRY(n) \
126 (offsetof(struct kretprobe_trace_entry, args) + \
127 (sizeof(unsigned long) * (n)))
128
104/* 129/*
105 * trace_flag_type is an enumeration that holds different 130 * trace_flag_type is an enumeration that holds different
106 * states when a trace occurs. These are: 131 * states when a trace occurs. These are:
@@ -209,6 +234,7 @@ extern void __ftrace_bad_type(void);
209 TRACE_KMEM_ALLOC); \ 234 TRACE_KMEM_ALLOC); \
210 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ 235 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
211 TRACE_KMEM_FREE); \ 236 TRACE_KMEM_FREE); \
237 IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\
212 __ftrace_bad_type(); \ 238 __ftrace_bad_type(); \
213 } while (0) 239 } while (0)
214 240
@@ -246,6 +272,7 @@ struct tracer_flags {
246 * @pipe_open: called when the trace_pipe file is opened 272 * @pipe_open: called when the trace_pipe file is opened
247 * @wait_pipe: override how the user waits for traces on trace_pipe 273 * @wait_pipe: override how the user waits for traces on trace_pipe
248 * @close: called when the trace file is released 274 * @close: called when the trace file is released
275 * @pipe_close: called when the trace_pipe file is released
249 * @read: override the default read callback on trace_pipe 276 * @read: override the default read callback on trace_pipe
250 * @splice_read: override the default splice_read callback on trace_pipe 277 * @splice_read: override the default splice_read callback on trace_pipe
251 * @selftest: selftest to run on boot (see trace_selftest.c) 278 * @selftest: selftest to run on boot (see trace_selftest.c)
@@ -264,6 +291,7 @@ struct tracer {
264 void (*pipe_open)(struct trace_iterator *iter); 291 void (*pipe_open)(struct trace_iterator *iter);
265 void (*wait_pipe)(struct trace_iterator *iter); 292 void (*wait_pipe)(struct trace_iterator *iter);
266 void (*close)(struct trace_iterator *iter); 293 void (*close)(struct trace_iterator *iter);
294 void (*pipe_close)(struct trace_iterator *iter);
267 ssize_t (*read)(struct trace_iterator *iter, 295 ssize_t (*read)(struct trace_iterator *iter,
268 struct file *filp, char __user *ubuf, 296 struct file *filp, char __user *ubuf,
269 size_t cnt, loff_t *ppos); 297 size_t cnt, loff_t *ppos);
@@ -364,6 +392,8 @@ int register_tracer(struct tracer *type);
364void unregister_tracer(struct tracer *type); 392void unregister_tracer(struct tracer *type);
365int is_tracing_stopped(void); 393int is_tracing_stopped(void);
366 394
395extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr);
396
367extern unsigned long nsecs_to_usecs(unsigned long nsecs); 397extern unsigned long nsecs_to_usecs(unsigned long nsecs);
368 398
369#ifdef CONFIG_TRACER_MAX_TRACE 399#ifdef CONFIG_TRACER_MAX_TRACE
@@ -413,7 +443,7 @@ extern int DYN_FTRACE_TEST_NAME(void);
413 443
414extern int ring_buffer_expanded; 444extern int ring_buffer_expanded;
415extern bool tracing_selftest_disabled; 445extern bool tracing_selftest_disabled;
416DECLARE_PER_CPU(local_t, ftrace_cpu_disabled); 446DECLARE_PER_CPU(int, ftrace_cpu_disabled);
417 447
418#ifdef CONFIG_FTRACE_STARTUP_TEST 448#ifdef CONFIG_FTRACE_STARTUP_TEST
419extern int trace_selftest_startup_function(struct tracer *trace, 449extern int trace_selftest_startup_function(struct tracer *trace,
@@ -438,6 +468,8 @@ extern int trace_selftest_startup_branch(struct tracer *trace,
438 struct trace_array *tr); 468 struct trace_array *tr);
439extern int trace_selftest_startup_hw_branches(struct tracer *trace, 469extern int trace_selftest_startup_hw_branches(struct tracer *trace,
440 struct trace_array *tr); 470 struct trace_array *tr);
471extern int trace_selftest_startup_ksym(struct tracer *trace,
472 struct trace_array *tr);
441#endif /* CONFIG_FTRACE_STARTUP_TEST */ 473#endif /* CONFIG_FTRACE_STARTUP_TEST */
442 474
443extern void *head_page(struct trace_array_cpu *data); 475extern void *head_page(struct trace_array_cpu *data);
@@ -483,10 +515,6 @@ static inline int ftrace_graph_addr(unsigned long addr)
483 return 0; 515 return 0;
484} 516}
485#else 517#else
486static inline int ftrace_trace_addr(unsigned long addr)
487{
488 return 1;
489}
490static inline int ftrace_graph_addr(unsigned long addr) 518static inline int ftrace_graph_addr(unsigned long addr)
491{ 519{
492 return 1; 520 return 1;
@@ -500,12 +528,12 @@ print_graph_function(struct trace_iterator *iter)
500} 528}
501#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 529#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
502 530
503extern struct pid *ftrace_pid_trace; 531extern struct list_head ftrace_pids;
504 532
505#ifdef CONFIG_FUNCTION_TRACER 533#ifdef CONFIG_FUNCTION_TRACER
506static inline int ftrace_trace_task(struct task_struct *task) 534static inline int ftrace_trace_task(struct task_struct *task)
507{ 535{
508 if (!ftrace_pid_trace) 536 if (list_empty(&ftrace_pids))
509 return 1; 537 return 1;
510 538
511 return test_tsk_trace_trace(task); 539 return test_tsk_trace_trace(task);
@@ -569,18 +597,17 @@ enum trace_iterator_flags {
569 TRACE_ITER_BIN = 0x40, 597 TRACE_ITER_BIN = 0x40,
570 TRACE_ITER_BLOCK = 0x80, 598 TRACE_ITER_BLOCK = 0x80,
571 TRACE_ITER_STACKTRACE = 0x100, 599 TRACE_ITER_STACKTRACE = 0x100,
572 TRACE_ITER_SCHED_TREE = 0x200, 600 TRACE_ITER_PRINTK = 0x200,
573 TRACE_ITER_PRINTK = 0x400, 601 TRACE_ITER_PREEMPTONLY = 0x400,
574 TRACE_ITER_PREEMPTONLY = 0x800, 602 TRACE_ITER_BRANCH = 0x800,
575 TRACE_ITER_BRANCH = 0x1000, 603 TRACE_ITER_ANNOTATE = 0x1000,
576 TRACE_ITER_ANNOTATE = 0x2000, 604 TRACE_ITER_USERSTACKTRACE = 0x2000,
577 TRACE_ITER_USERSTACKTRACE = 0x4000, 605 TRACE_ITER_SYM_USEROBJ = 0x4000,
578 TRACE_ITER_SYM_USEROBJ = 0x8000, 606 TRACE_ITER_PRINTK_MSGONLY = 0x8000,
579 TRACE_ITER_PRINTK_MSGONLY = 0x10000, 607 TRACE_ITER_CONTEXT_INFO = 0x10000, /* Print pid/cpu/time */
580 TRACE_ITER_CONTEXT_INFO = 0x20000, /* Print pid/cpu/time */ 608 TRACE_ITER_LATENCY_FMT = 0x20000,
581 TRACE_ITER_LATENCY_FMT = 0x40000, 609 TRACE_ITER_SLEEP_TIME = 0x40000,
582 TRACE_ITER_SLEEP_TIME = 0x80000, 610 TRACE_ITER_GRAPH_TIME = 0x80000,
583 TRACE_ITER_GRAPH_TIME = 0x100000,
584}; 611};
585 612
586/* 613/*
@@ -687,7 +714,6 @@ struct event_filter {
687 int n_preds; 714 int n_preds;
688 struct filter_pred **preds; 715 struct filter_pred **preds;
689 char *filter_string; 716 char *filter_string;
690 bool no_reset;
691}; 717};
692 718
693struct event_subsystem { 719struct event_subsystem {
@@ -699,22 +725,40 @@ struct event_subsystem {
699}; 725};
700 726
701struct filter_pred; 727struct filter_pred;
728struct regex;
702 729
703typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event, 730typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event,
704 int val1, int val2); 731 int val1, int val2);
705 732
733typedef int (*regex_match_func)(char *str, struct regex *r, int len);
734
735enum regex_type {
736 MATCH_FULL = 0,
737 MATCH_FRONT_ONLY,
738 MATCH_MIDDLE_ONLY,
739 MATCH_END_ONLY,
740};
741
742struct regex {
743 char pattern[MAX_FILTER_STR_VAL];
744 int len;
745 int field_len;
746 regex_match_func match;
747};
748
706struct filter_pred { 749struct filter_pred {
707 filter_pred_fn_t fn; 750 filter_pred_fn_t fn;
708 u64 val; 751 u64 val;
709 char str_val[MAX_FILTER_STR_VAL]; 752 struct regex regex;
710 int str_len; 753 char *field_name;
711 char *field_name; 754 int offset;
712 int offset; 755 int not;
713 int not; 756 int op;
714 int op; 757 int pop_n;
715 int pop_n;
716}; 758};
717 759
760extern enum regex_type
761filter_parse_regex(char *buff, int len, char **search, int *not);
718extern void print_event_filter(struct ftrace_event_call *call, 762extern void print_event_filter(struct ftrace_event_call *call,
719 struct trace_seq *s); 763 struct trace_seq *s);
720extern int apply_event_filter(struct ftrace_event_call *call, 764extern int apply_event_filter(struct ftrace_event_call *call,
@@ -730,7 +774,8 @@ filter_check_discard(struct ftrace_event_call *call, void *rec,
730 struct ring_buffer *buffer, 774 struct ring_buffer *buffer,
731 struct ring_buffer_event *event) 775 struct ring_buffer_event *event)
732{ 776{
733 if (unlikely(call->filter_active) && !filter_match_preds(call, rec)) { 777 if (unlikely(call->filter_active) &&
778 !filter_match_preds(call->filter, rec)) {
734 ring_buffer_discard_commit(buffer, event); 779 ring_buffer_discard_commit(buffer, event);
735 return 1; 780 return 1;
736 } 781 }
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 20c5f92e28a8..84a3a7ba072a 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -20,6 +20,8 @@
20#include <linux/ktime.h> 20#include <linux/ktime.h>
21#include <linux/trace_clock.h> 21#include <linux/trace_clock.h>
22 22
23#include "trace.h"
24
23/* 25/*
24 * trace_clock_local(): the simplest and least coherent tracing clock. 26 * trace_clock_local(): the simplest and least coherent tracing clock.
25 * 27 *
@@ -28,17 +30,17 @@
28 */ 30 */
29u64 notrace trace_clock_local(void) 31u64 notrace trace_clock_local(void)
30{ 32{
31 unsigned long flags;
32 u64 clock; 33 u64 clock;
34 int resched;
33 35
34 /* 36 /*
35 * sched_clock() is an architecture implemented, fast, scalable, 37 * sched_clock() is an architecture implemented, fast, scalable,
36 * lockless clock. It is not guaranteed to be coherent across 38 * lockless clock. It is not guaranteed to be coherent across
37 * CPUs, nor across CPU idle events. 39 * CPUs, nor across CPU idle events.
38 */ 40 */
39 raw_local_irq_save(flags); 41 resched = ftrace_preempt_disable();
40 clock = sched_clock(); 42 clock = sched_clock();
41 raw_local_irq_restore(flags); 43 ftrace_preempt_enable(resched);
42 44
43 return clock; 45 return clock;
44} 46}
@@ -69,10 +71,10 @@ u64 notrace trace_clock(void)
69/* keep prev_time and lock in the same cacheline. */ 71/* keep prev_time and lock in the same cacheline. */
70static struct { 72static struct {
71 u64 prev_time; 73 u64 prev_time;
72 raw_spinlock_t lock; 74 arch_spinlock_t lock;
73} trace_clock_struct ____cacheline_aligned_in_smp = 75} trace_clock_struct ____cacheline_aligned_in_smp =
74 { 76 {
75 .lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED, 77 .lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED,
76 }; 78 };
77 79
78u64 notrace trace_clock_global(void) 80u64 notrace trace_clock_global(void)
@@ -92,7 +94,7 @@ u64 notrace trace_clock_global(void)
92 if (unlikely(in_nmi())) 94 if (unlikely(in_nmi()))
93 goto out; 95 goto out;
94 96
95 __raw_spin_lock(&trace_clock_struct.lock); 97 arch_spin_lock(&trace_clock_struct.lock);
96 98
97 /* 99 /*
98 * TODO: if this happens often then maybe we should reset 100 * TODO: if this happens often then maybe we should reset
@@ -104,7 +106,7 @@ u64 notrace trace_clock_global(void)
104 106
105 trace_clock_struct.prev_time = now; 107 trace_clock_struct.prev_time = now;
106 108
107 __raw_spin_unlock(&trace_clock_struct.lock); 109 arch_spin_unlock(&trace_clock_struct.lock);
108 110
109 out: 111 out:
110 raw_local_irq_restore(flags); 112 raw_local_irq_restore(flags);
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index ead3d724599d..c16a08f399df 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -364,3 +364,19 @@ FTRACE_ENTRY(kmem_free, kmemtrace_free_entry,
364 F_printk("type:%u call_site:%lx ptr:%p", 364 F_printk("type:%u call_site:%lx ptr:%p",
365 __entry->type_id, __entry->call_site, __entry->ptr) 365 __entry->type_id, __entry->call_site, __entry->ptr)
366); 366);
367
368FTRACE_ENTRY(ksym_trace, ksym_trace_entry,
369
370 TRACE_KSYM,
371
372 F_STRUCT(
373 __field( unsigned long, ip )
374 __field( unsigned char, type )
375 __array( char , cmd, TASK_COMM_LEN )
376 __field( unsigned long, addr )
377 ),
378
379 F_printk("ip: %pF type: %d ksym_name: %pS cmd: %s",
380 (void *)__entry->ip, (unsigned int)__entry->type,
381 (void *)__entry->addr, __entry->cmd)
382);
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index 8d5c171cc998..9e25573242cf 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -8,17 +8,14 @@
8#include <linux/module.h> 8#include <linux/module.h>
9#include "trace.h" 9#include "trace.h"
10 10
11/*
12 * We can't use a size but a type in alloc_percpu()
13 * So let's create a dummy type that matches the desired size
14 */
15typedef struct {char buf[FTRACE_MAX_PROFILE_SIZE];} profile_buf_t;
16 11
17char *trace_profile_buf; 12char *perf_trace_buf;
18EXPORT_SYMBOL_GPL(trace_profile_buf); 13EXPORT_SYMBOL_GPL(perf_trace_buf);
14
15char *perf_trace_buf_nmi;
16EXPORT_SYMBOL_GPL(perf_trace_buf_nmi);
19 17
20char *trace_profile_buf_nmi; 18typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ;
21EXPORT_SYMBOL_GPL(trace_profile_buf_nmi);
22 19
23/* Count the events in use (per event id, not per instance) */ 20/* Count the events in use (per event id, not per instance) */
24static int total_profile_count; 21static int total_profile_count;
@@ -28,24 +25,24 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event)
28 char *buf; 25 char *buf;
29 int ret = -ENOMEM; 26 int ret = -ENOMEM;
30 27
31 if (atomic_inc_return(&event->profile_count)) 28 if (event->profile_count++ > 0)
32 return 0; 29 return 0;
33 30
34 if (!total_profile_count) { 31 if (!total_profile_count) {
35 buf = (char *)alloc_percpu(profile_buf_t); 32 buf = (char *)alloc_percpu(perf_trace_t);
36 if (!buf) 33 if (!buf)
37 goto fail_buf; 34 goto fail_buf;
38 35
39 rcu_assign_pointer(trace_profile_buf, buf); 36 rcu_assign_pointer(perf_trace_buf, buf);
40 37
41 buf = (char *)alloc_percpu(profile_buf_t); 38 buf = (char *)alloc_percpu(perf_trace_t);
42 if (!buf) 39 if (!buf)
43 goto fail_buf_nmi; 40 goto fail_buf_nmi;
44 41
45 rcu_assign_pointer(trace_profile_buf_nmi, buf); 42 rcu_assign_pointer(perf_trace_buf_nmi, buf);
46 } 43 }
47 44
48 ret = event->profile_enable(); 45 ret = event->profile_enable(event);
49 if (!ret) { 46 if (!ret) {
50 total_profile_count++; 47 total_profile_count++;
51 return 0; 48 return 0;
@@ -53,13 +50,13 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event)
53 50
54fail_buf_nmi: 51fail_buf_nmi:
55 if (!total_profile_count) { 52 if (!total_profile_count) {
56 free_percpu(trace_profile_buf_nmi); 53 free_percpu(perf_trace_buf_nmi);
57 free_percpu(trace_profile_buf); 54 free_percpu(perf_trace_buf);
58 trace_profile_buf_nmi = NULL; 55 perf_trace_buf_nmi = NULL;
59 trace_profile_buf = NULL; 56 perf_trace_buf = NULL;
60 } 57 }
61fail_buf: 58fail_buf:
62 atomic_dec(&event->profile_count); 59 event->profile_count--;
63 60
64 return ret; 61 return ret;
65} 62}
@@ -86,17 +83,17 @@ static void ftrace_profile_disable_event(struct ftrace_event_call *event)
86{ 83{
87 char *buf, *nmi_buf; 84 char *buf, *nmi_buf;
88 85
89 if (!atomic_add_negative(-1, &event->profile_count)) 86 if (--event->profile_count > 0)
90 return; 87 return;
91 88
92 event->profile_disable(); 89 event->profile_disable(event);
93 90
94 if (!--total_profile_count) { 91 if (!--total_profile_count) {
95 buf = trace_profile_buf; 92 buf = perf_trace_buf;
96 rcu_assign_pointer(trace_profile_buf, NULL); 93 rcu_assign_pointer(perf_trace_buf, NULL);
97 94
98 nmi_buf = trace_profile_buf_nmi; 95 nmi_buf = perf_trace_buf_nmi;
99 rcu_assign_pointer(trace_profile_buf_nmi, NULL); 96 rcu_assign_pointer(perf_trace_buf_nmi, NULL);
100 97
101 /* 98 /*
102 * Ensure every events in profiling have finished before 99 * Ensure every events in profiling have finished before
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index d128f65778e6..189b09baf4fb 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -78,7 +78,7 @@ EXPORT_SYMBOL_GPL(trace_define_field);
78 if (ret) \ 78 if (ret) \
79 return ret; 79 return ret;
80 80
81int trace_define_common_fields(struct ftrace_event_call *call) 81static int trace_define_common_fields(struct ftrace_event_call *call)
82{ 82{
83 int ret; 83 int ret;
84 struct trace_entry ent; 84 struct trace_entry ent;
@@ -91,11 +91,8 @@ int trace_define_common_fields(struct ftrace_event_call *call)
91 91
92 return ret; 92 return ret;
93} 93}
94EXPORT_SYMBOL_GPL(trace_define_common_fields);
95 94
96#ifdef CONFIG_MODULES 95void trace_destroy_fields(struct ftrace_event_call *call)
97
98static void trace_destroy_fields(struct ftrace_event_call *call)
99{ 96{
100 struct ftrace_event_field *field, *next; 97 struct ftrace_event_field *field, *next;
101 98
@@ -107,27 +104,49 @@ static void trace_destroy_fields(struct ftrace_event_call *call)
107 } 104 }
108} 105}
109 106
110#endif /* CONFIG_MODULES */ 107int trace_event_raw_init(struct ftrace_event_call *call)
108{
109 int id;
110
111 id = register_ftrace_event(call->event);
112 if (!id)
113 return -ENODEV;
114 call->id = id;
115 INIT_LIST_HEAD(&call->fields);
116
117 return 0;
118}
119EXPORT_SYMBOL_GPL(trace_event_raw_init);
111 120
112static void ftrace_event_enable_disable(struct ftrace_event_call *call, 121static int ftrace_event_enable_disable(struct ftrace_event_call *call,
113 int enable) 122 int enable)
114{ 123{
124 int ret = 0;
125
115 switch (enable) { 126 switch (enable) {
116 case 0: 127 case 0:
117 if (call->enabled) { 128 if (call->enabled) {
118 call->enabled = 0; 129 call->enabled = 0;
119 tracing_stop_cmdline_record(); 130 tracing_stop_cmdline_record();
120 call->unregfunc(call->data); 131 call->unregfunc(call);
121 } 132 }
122 break; 133 break;
123 case 1: 134 case 1:
124 if (!call->enabled) { 135 if (!call->enabled) {
125 call->enabled = 1;
126 tracing_start_cmdline_record(); 136 tracing_start_cmdline_record();
127 call->regfunc(call->data); 137 ret = call->regfunc(call);
138 if (ret) {
139 tracing_stop_cmdline_record();
140 pr_info("event trace: Could not enable event "
141 "%s\n", call->name);
142 break;
143 }
144 call->enabled = 1;
128 } 145 }
129 break; 146 break;
130 } 147 }
148
149 return ret;
131} 150}
132 151
133static void ftrace_clear_events(void) 152static void ftrace_clear_events(void)
@@ -406,7 +425,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
406 case 0: 425 case 0:
407 case 1: 426 case 1:
408 mutex_lock(&event_mutex); 427 mutex_lock(&event_mutex);
409 ftrace_event_enable_disable(call, val); 428 ret = ftrace_event_enable_disable(call, val);
410 mutex_unlock(&event_mutex); 429 mutex_unlock(&event_mutex);
411 break; 430 break;
412 431
@@ -416,7 +435,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
416 435
417 *ppos += cnt; 436 *ppos += cnt;
418 437
419 return cnt; 438 return ret ? ret : cnt;
420} 439}
421 440
422static ssize_t 441static ssize_t
@@ -507,7 +526,7 @@ extern char *__bad_type_size(void);
507#define FIELD(type, name) \ 526#define FIELD(type, name) \
508 sizeof(type) != sizeof(field.name) ? __bad_type_size() : \ 527 sizeof(type) != sizeof(field.name) ? __bad_type_size() : \
509 #type, "common_" #name, offsetof(typeof(field), name), \ 528 #type, "common_" #name, offsetof(typeof(field), name), \
510 sizeof(field.name) 529 sizeof(field.name), is_signed_type(type)
511 530
512static int trace_write_header(struct trace_seq *s) 531static int trace_write_header(struct trace_seq *s)
513{ 532{
@@ -515,17 +534,17 @@ static int trace_write_header(struct trace_seq *s)
515 534
516 /* struct trace_entry */ 535 /* struct trace_entry */
517 return trace_seq_printf(s, 536 return trace_seq_printf(s,
518 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 537 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
519 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 538 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
520 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 539 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
521 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 540 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
522 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 541 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
523 "\n", 542 "\n",
524 FIELD(unsigned short, type), 543 FIELD(unsigned short, type),
525 FIELD(unsigned char, flags), 544 FIELD(unsigned char, flags),
526 FIELD(unsigned char, preempt_count), 545 FIELD(unsigned char, preempt_count),
527 FIELD(int, pid), 546 FIELD(int, pid),
528 FIELD(int, lock_depth)); 547 FIELD(int, lock_depth));
529} 548}
530 549
531static ssize_t 550static ssize_t
@@ -878,9 +897,9 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
878 "'%s/filter' entry\n", name); 897 "'%s/filter' entry\n", name);
879 } 898 }
880 899
881 entry = trace_create_file("enable", 0644, system->entry, 900 trace_create_file("enable", 0644, system->entry,
882 (void *)system->name, 901 (void *)system->name,
883 &ftrace_system_enable_fops); 902 &ftrace_system_enable_fops);
884 903
885 return system->entry; 904 return system->entry;
886} 905}
@@ -892,7 +911,6 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
892 const struct file_operations *filter, 911 const struct file_operations *filter,
893 const struct file_operations *format) 912 const struct file_operations *format)
894{ 913{
895 struct dentry *entry;
896 int ret; 914 int ret;
897 915
898 /* 916 /*
@@ -910,55 +928,76 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
910 } 928 }
911 929
912 if (call->regfunc) 930 if (call->regfunc)
913 entry = trace_create_file("enable", 0644, call->dir, call, 931 trace_create_file("enable", 0644, call->dir, call,
914 enable); 932 enable);
915 933
916 if (call->id && call->profile_enable) 934 if (call->id && call->profile_enable)
917 entry = trace_create_file("id", 0444, call->dir, call, 935 trace_create_file("id", 0444, call->dir, call,
918 id); 936 id);
919 937
920 if (call->define_fields) { 938 if (call->define_fields) {
921 ret = call->define_fields(call); 939 ret = trace_define_common_fields(call);
940 if (!ret)
941 ret = call->define_fields(call);
922 if (ret < 0) { 942 if (ret < 0) {
923 pr_warning("Could not initialize trace point" 943 pr_warning("Could not initialize trace point"
924 " events/%s\n", call->name); 944 " events/%s\n", call->name);
925 return ret; 945 return ret;
926 } 946 }
927 entry = trace_create_file("filter", 0644, call->dir, call, 947 trace_create_file("filter", 0644, call->dir, call,
928 filter); 948 filter);
929 } 949 }
930 950
931 /* A trace may not want to export its format */ 951 /* A trace may not want to export its format */
932 if (!call->show_format) 952 if (!call->show_format)
933 return 0; 953 return 0;
934 954
935 entry = trace_create_file("format", 0444, call->dir, call, 955 trace_create_file("format", 0444, call->dir, call,
936 format); 956 format);
937 957
938 return 0; 958 return 0;
939} 959}
940 960
941#define for_each_event(event, start, end) \ 961static int __trace_add_event_call(struct ftrace_event_call *call)
942 for (event = start; \ 962{
943 (unsigned long)event < (unsigned long)end; \ 963 struct dentry *d_events;
944 event++) 964 int ret;
945 965
946#ifdef CONFIG_MODULES 966 if (!call->name)
967 return -EINVAL;
947 968
948static LIST_HEAD(ftrace_module_file_list); 969 if (call->raw_init) {
970 ret = call->raw_init(call);
971 if (ret < 0) {
972 if (ret != -ENOSYS)
973 pr_warning("Could not initialize trace "
974 "events/%s\n", call->name);
975 return ret;
976 }
977 }
949 978
950/* 979 d_events = event_trace_events_dir();
951 * Modules must own their file_operations to keep up with 980 if (!d_events)
952 * reference counting. 981 return -ENOENT;
953 */ 982
954struct ftrace_module_file_ops { 983 ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
955 struct list_head list; 984 &ftrace_enable_fops, &ftrace_event_filter_fops,
956 struct module *mod; 985 &ftrace_event_format_fops);
957 struct file_operations id; 986 if (!ret)
958 struct file_operations enable; 987 list_add(&call->list, &ftrace_events);
959 struct file_operations format; 988
960 struct file_operations filter; 989 return ret;
961}; 990}
991
992/* Add an additional event_call dynamically */
993int trace_add_event_call(struct ftrace_event_call *call)
994{
995 int ret;
996 mutex_lock(&event_mutex);
997 ret = __trace_add_event_call(call);
998 mutex_unlock(&event_mutex);
999 return ret;
1000}
962 1001
963static void remove_subsystem_dir(const char *name) 1002static void remove_subsystem_dir(const char *name)
964{ 1003{
@@ -986,6 +1025,53 @@ static void remove_subsystem_dir(const char *name)
986 } 1025 }
987} 1026}
988 1027
1028/*
1029 * Must be called under locking both of event_mutex and trace_event_mutex.
1030 */
1031static void __trace_remove_event_call(struct ftrace_event_call *call)
1032{
1033 ftrace_event_enable_disable(call, 0);
1034 if (call->event)
1035 __unregister_ftrace_event(call->event);
1036 debugfs_remove_recursive(call->dir);
1037 list_del(&call->list);
1038 trace_destroy_fields(call);
1039 destroy_preds(call);
1040 remove_subsystem_dir(call->system);
1041}
1042
1043/* Remove an event_call */
1044void trace_remove_event_call(struct ftrace_event_call *call)
1045{
1046 mutex_lock(&event_mutex);
1047 down_write(&trace_event_mutex);
1048 __trace_remove_event_call(call);
1049 up_write(&trace_event_mutex);
1050 mutex_unlock(&event_mutex);
1051}
1052
1053#define for_each_event(event, start, end) \
1054 for (event = start; \
1055 (unsigned long)event < (unsigned long)end; \
1056 event++)
1057
1058#ifdef CONFIG_MODULES
1059
1060static LIST_HEAD(ftrace_module_file_list);
1061
1062/*
1063 * Modules must own their file_operations to keep up with
1064 * reference counting.
1065 */
1066struct ftrace_module_file_ops {
1067 struct list_head list;
1068 struct module *mod;
1069 struct file_operations id;
1070 struct file_operations enable;
1071 struct file_operations format;
1072 struct file_operations filter;
1073};
1074
989static struct ftrace_module_file_ops * 1075static struct ftrace_module_file_ops *
990trace_create_file_ops(struct module *mod) 1076trace_create_file_ops(struct module *mod)
991{ 1077{
@@ -1043,7 +1129,7 @@ static void trace_module_add_events(struct module *mod)
1043 if (!call->name) 1129 if (!call->name)
1044 continue; 1130 continue;
1045 if (call->raw_init) { 1131 if (call->raw_init) {
1046 ret = call->raw_init(); 1132 ret = call->raw_init(call);
1047 if (ret < 0) { 1133 if (ret < 0) {
1048 if (ret != -ENOSYS) 1134 if (ret != -ENOSYS)
1049 pr_warning("Could not initialize trace " 1135 pr_warning("Could not initialize trace "
@@ -1061,10 +1147,11 @@ static void trace_module_add_events(struct module *mod)
1061 return; 1147 return;
1062 } 1148 }
1063 call->mod = mod; 1149 call->mod = mod;
1064 list_add(&call->list, &ftrace_events); 1150 ret = event_create_dir(call, d_events,
1065 event_create_dir(call, d_events, 1151 &file_ops->id, &file_ops->enable,
1066 &file_ops->id, &file_ops->enable, 1152 &file_ops->filter, &file_ops->format);
1067 &file_ops->filter, &file_ops->format); 1153 if (!ret)
1154 list_add(&call->list, &ftrace_events);
1068 } 1155 }
1069} 1156}
1070 1157
@@ -1078,14 +1165,7 @@ static void trace_module_remove_events(struct module *mod)
1078 list_for_each_entry_safe(call, p, &ftrace_events, list) { 1165 list_for_each_entry_safe(call, p, &ftrace_events, list) {
1079 if (call->mod == mod) { 1166 if (call->mod == mod) {
1080 found = true; 1167 found = true;
1081 ftrace_event_enable_disable(call, 0); 1168 __trace_remove_event_call(call);
1082 if (call->event)
1083 __unregister_ftrace_event(call->event);
1084 debugfs_remove_recursive(call->dir);
1085 list_del(&call->list);
1086 trace_destroy_fields(call);
1087 destroy_preds(call);
1088 remove_subsystem_dir(call->system);
1089 } 1169 }
1090 } 1170 }
1091 1171
@@ -1203,7 +1283,7 @@ static __init int event_trace_init(void)
1203 if (!call->name) 1283 if (!call->name)
1204 continue; 1284 continue;
1205 if (call->raw_init) { 1285 if (call->raw_init) {
1206 ret = call->raw_init(); 1286 ret = call->raw_init(call);
1207 if (ret < 0) { 1287 if (ret < 0) {
1208 if (ret != -ENOSYS) 1288 if (ret != -ENOSYS)
1209 pr_warning("Could not initialize trace " 1289 pr_warning("Could not initialize trace "
@@ -1211,10 +1291,12 @@ static __init int event_trace_init(void)
1211 continue; 1291 continue;
1212 } 1292 }
1213 } 1293 }
1214 list_add(&call->list, &ftrace_events); 1294 ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
1215 event_create_dir(call, d_events, &ftrace_event_id_fops, 1295 &ftrace_enable_fops,
1216 &ftrace_enable_fops, &ftrace_event_filter_fops, 1296 &ftrace_event_filter_fops,
1217 &ftrace_event_format_fops); 1297 &ftrace_event_format_fops);
1298 if (!ret)
1299 list_add(&call->list, &ftrace_events);
1218 } 1300 }
1219 1301
1220 while (true) { 1302 while (true) {
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 98a6cc5c64ed..50504cb228de 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -18,11 +18,10 @@
18 * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com> 18 * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
19 */ 19 */
20 20
21#include <linux/debugfs.h>
22#include <linux/uaccess.h>
23#include <linux/module.h> 21#include <linux/module.h>
24#include <linux/ctype.h> 22#include <linux/ctype.h>
25#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include <linux/perf_event.h>
26 25
27#include "trace.h" 26#include "trace.h"
28#include "trace_output.h" 27#include "trace_output.h"
@@ -31,6 +30,7 @@ enum filter_op_ids
31{ 30{
32 OP_OR, 31 OP_OR,
33 OP_AND, 32 OP_AND,
33 OP_GLOB,
34 OP_NE, 34 OP_NE,
35 OP_EQ, 35 OP_EQ,
36 OP_LT, 36 OP_LT,
@@ -48,16 +48,17 @@ struct filter_op {
48}; 48};
49 49
50static struct filter_op filter_ops[] = { 50static struct filter_op filter_ops[] = {
51 { OP_OR, "||", 1 }, 51 { OP_OR, "||", 1 },
52 { OP_AND, "&&", 2 }, 52 { OP_AND, "&&", 2 },
53 { OP_NE, "!=", 4 }, 53 { OP_GLOB, "~", 4 },
54 { OP_EQ, "==", 4 }, 54 { OP_NE, "!=", 4 },
55 { OP_LT, "<", 5 }, 55 { OP_EQ, "==", 4 },
56 { OP_LE, "<=", 5 }, 56 { OP_LT, "<", 5 },
57 { OP_GT, ">", 5 }, 57 { OP_LE, "<=", 5 },
58 { OP_GE, ">=", 5 }, 58 { OP_GT, ">", 5 },
59 { OP_NONE, "OP_NONE", 0 }, 59 { OP_GE, ">=", 5 },
60 { OP_OPEN_PAREN, "(", 0 }, 60 { OP_NONE, "OP_NONE", 0 },
61 { OP_OPEN_PAREN, "(", 0 },
61}; 62};
62 63
63enum { 64enum {
@@ -197,9 +198,9 @@ static int filter_pred_string(struct filter_pred *pred, void *event,
197 char *addr = (char *)(event + pred->offset); 198 char *addr = (char *)(event + pred->offset);
198 int cmp, match; 199 int cmp, match;
199 200
200 cmp = strncmp(addr, pred->str_val, pred->str_len); 201 cmp = pred->regex.match(addr, &pred->regex, pred->regex.field_len);
201 202
202 match = (!cmp) ^ pred->not; 203 match = cmp ^ pred->not;
203 204
204 return match; 205 return match;
205} 206}
@@ -211,9 +212,9 @@ static int filter_pred_pchar(struct filter_pred *pred, void *event,
211 char **addr = (char **)(event + pred->offset); 212 char **addr = (char **)(event + pred->offset);
212 int cmp, match; 213 int cmp, match;
213 214
214 cmp = strncmp(*addr, pred->str_val, pred->str_len); 215 cmp = pred->regex.match(*addr, &pred->regex, pred->regex.field_len);
215 216
216 match = (!cmp) ^ pred->not; 217 match = cmp ^ pred->not;
217 218
218 return match; 219 return match;
219} 220}
@@ -237,9 +238,9 @@ static int filter_pred_strloc(struct filter_pred *pred, void *event,
237 char *addr = (char *)(event + str_loc); 238 char *addr = (char *)(event + str_loc);
238 int cmp, match; 239 int cmp, match;
239 240
240 cmp = strncmp(addr, pred->str_val, str_len); 241 cmp = pred->regex.match(addr, &pred->regex, str_len);
241 242
242 match = (!cmp) ^ pred->not; 243 match = cmp ^ pred->not;
243 244
244 return match; 245 return match;
245} 246}
@@ -250,10 +251,121 @@ static int filter_pred_none(struct filter_pred *pred, void *event,
250 return 0; 251 return 0;
251} 252}
252 253
254/* Basic regex callbacks */
255static int regex_match_full(char *str, struct regex *r, int len)
256{
257 if (strncmp(str, r->pattern, len) == 0)
258 return 1;
259 return 0;
260}
261
262static int regex_match_front(char *str, struct regex *r, int len)
263{
264 if (strncmp(str, r->pattern, len) == 0)
265 return 1;
266 return 0;
267}
268
269static int regex_match_middle(char *str, struct regex *r, int len)
270{
271 if (strstr(str, r->pattern))
272 return 1;
273 return 0;
274}
275
276static int regex_match_end(char *str, struct regex *r, int len)
277{
278 char *ptr = strstr(str, r->pattern);
279
280 if (ptr && (ptr[r->len] == 0))
281 return 1;
282 return 0;
283}
284
285/**
286 * filter_parse_regex - parse a basic regex
287 * @buff: the raw regex
288 * @len: length of the regex
289 * @search: will point to the beginning of the string to compare
290 * @not: tell whether the match will have to be inverted
291 *
292 * This passes in a buffer containing a regex and this function will
293 * set search to point to the search part of the buffer and
294 * return the type of search it is (see enum above).
295 * This does modify buff.
296 *
297 * Returns enum type.
298 * search returns the pointer to use for comparison.
299 * not returns 1 if buff started with a '!'
300 * 0 otherwise.
301 */
302enum regex_type filter_parse_regex(char *buff, int len, char **search, int *not)
303{
304 int type = MATCH_FULL;
305 int i;
306
307 if (buff[0] == '!') {
308 *not = 1;
309 buff++;
310 len--;
311 } else
312 *not = 0;
313
314 *search = buff;
315
316 for (i = 0; i < len; i++) {
317 if (buff[i] == '*') {
318 if (!i) {
319 *search = buff + 1;
320 type = MATCH_END_ONLY;
321 } else {
322 if (type == MATCH_END_ONLY)
323 type = MATCH_MIDDLE_ONLY;
324 else
325 type = MATCH_FRONT_ONLY;
326 buff[i] = 0;
327 break;
328 }
329 }
330 }
331
332 return type;
333}
334
335static void filter_build_regex(struct filter_pred *pred)
336{
337 struct regex *r = &pred->regex;
338 char *search;
339 enum regex_type type = MATCH_FULL;
340 int not = 0;
341
342 if (pred->op == OP_GLOB) {
343 type = filter_parse_regex(r->pattern, r->len, &search, &not);
344 r->len = strlen(search);
345 memmove(r->pattern, search, r->len+1);
346 }
347
348 switch (type) {
349 case MATCH_FULL:
350 r->match = regex_match_full;
351 break;
352 case MATCH_FRONT_ONLY:
353 r->match = regex_match_front;
354 break;
355 case MATCH_MIDDLE_ONLY:
356 r->match = regex_match_middle;
357 break;
358 case MATCH_END_ONLY:
359 r->match = regex_match_end;
360 break;
361 }
362
363 pred->not ^= not;
364}
365
253/* return 1 if event matches, 0 otherwise (discard) */ 366/* return 1 if event matches, 0 otherwise (discard) */
254int filter_match_preds(struct ftrace_event_call *call, void *rec) 367int filter_match_preds(struct event_filter *filter, void *rec)
255{ 368{
256 struct event_filter *filter = call->filter;
257 int match, top = 0, val1 = 0, val2 = 0; 369 int match, top = 0, val1 = 0, val2 = 0;
258 int stack[MAX_FILTER_PRED]; 370 int stack[MAX_FILTER_PRED];
259 struct filter_pred *pred; 371 struct filter_pred *pred;
@@ -396,7 +508,7 @@ static void filter_clear_pred(struct filter_pred *pred)
396{ 508{
397 kfree(pred->field_name); 509 kfree(pred->field_name);
398 pred->field_name = NULL; 510 pred->field_name = NULL;
399 pred->str_len = 0; 511 pred->regex.len = 0;
400} 512}
401 513
402static int filter_set_pred(struct filter_pred *dest, 514static int filter_set_pred(struct filter_pred *dest,
@@ -426,9 +538,8 @@ static void filter_disable_preds(struct ftrace_event_call *call)
426 filter->preds[i]->fn = filter_pred_none; 538 filter->preds[i]->fn = filter_pred_none;
427} 539}
428 540
429void destroy_preds(struct ftrace_event_call *call) 541static void __free_preds(struct event_filter *filter)
430{ 542{
431 struct event_filter *filter = call->filter;
432 int i; 543 int i;
433 544
434 if (!filter) 545 if (!filter)
@@ -441,21 +552,24 @@ void destroy_preds(struct ftrace_event_call *call)
441 kfree(filter->preds); 552 kfree(filter->preds);
442 kfree(filter->filter_string); 553 kfree(filter->filter_string);
443 kfree(filter); 554 kfree(filter);
555}
556
557void destroy_preds(struct ftrace_event_call *call)
558{
559 __free_preds(call->filter);
444 call->filter = NULL; 560 call->filter = NULL;
561 call->filter_active = 0;
445} 562}
446 563
447static int init_preds(struct ftrace_event_call *call) 564static struct event_filter *__alloc_preds(void)
448{ 565{
449 struct event_filter *filter; 566 struct event_filter *filter;
450 struct filter_pred *pred; 567 struct filter_pred *pred;
451 int i; 568 int i;
452 569
453 if (call->filter) 570 filter = kzalloc(sizeof(*filter), GFP_KERNEL);
454 return 0; 571 if (!filter)
455 572 return ERR_PTR(-ENOMEM);
456 filter = call->filter = kzalloc(sizeof(*filter), GFP_KERNEL);
457 if (!call->filter)
458 return -ENOMEM;
459 573
460 filter->n_preds = 0; 574 filter->n_preds = 0;
461 575
@@ -471,12 +585,24 @@ static int init_preds(struct ftrace_event_call *call)
471 filter->preds[i] = pred; 585 filter->preds[i] = pred;
472 } 586 }
473 587
474 return 0; 588 return filter;
475 589
476oom: 590oom:
477 destroy_preds(call); 591 __free_preds(filter);
592 return ERR_PTR(-ENOMEM);
593}
478 594
479 return -ENOMEM; 595static int init_preds(struct ftrace_event_call *call)
596{
597 if (call->filter)
598 return 0;
599
600 call->filter_active = 0;
601 call->filter = __alloc_preds();
602 if (IS_ERR(call->filter))
603 return PTR_ERR(call->filter);
604
605 return 0;
480} 606}
481 607
482static int init_subsystem_preds(struct event_subsystem *system) 608static int init_subsystem_preds(struct event_subsystem *system)
@@ -499,14 +625,7 @@ static int init_subsystem_preds(struct event_subsystem *system)
499 return 0; 625 return 0;
500} 626}
501 627
502enum { 628static void filter_free_subsystem_preds(struct event_subsystem *system)
503 FILTER_DISABLE_ALL,
504 FILTER_INIT_NO_RESET,
505 FILTER_SKIP_NO_RESET,
506};
507
508static void filter_free_subsystem_preds(struct event_subsystem *system,
509 int flag)
510{ 629{
511 struct ftrace_event_call *call; 630 struct ftrace_event_call *call;
512 631
@@ -517,14 +636,6 @@ static void filter_free_subsystem_preds(struct event_subsystem *system,
517 if (strcmp(call->system, system->name) != 0) 636 if (strcmp(call->system, system->name) != 0)
518 continue; 637 continue;
519 638
520 if (flag == FILTER_INIT_NO_RESET) {
521 call->filter->no_reset = false;
522 continue;
523 }
524
525 if (flag == FILTER_SKIP_NO_RESET && call->filter->no_reset)
526 continue;
527
528 filter_disable_preds(call); 639 filter_disable_preds(call);
529 remove_filter_string(call->filter); 640 remove_filter_string(call->filter);
530 } 641 }
@@ -532,10 +643,10 @@ static void filter_free_subsystem_preds(struct event_subsystem *system,
532 643
533static int filter_add_pred_fn(struct filter_parse_state *ps, 644static int filter_add_pred_fn(struct filter_parse_state *ps,
534 struct ftrace_event_call *call, 645 struct ftrace_event_call *call,
646 struct event_filter *filter,
535 struct filter_pred *pred, 647 struct filter_pred *pred,
536 filter_pred_fn_t fn) 648 filter_pred_fn_t fn)
537{ 649{
538 struct event_filter *filter = call->filter;
539 int idx, err; 650 int idx, err;
540 651
541 if (filter->n_preds == MAX_FILTER_PRED) { 652 if (filter->n_preds == MAX_FILTER_PRED) {
@@ -550,7 +661,6 @@ static int filter_add_pred_fn(struct filter_parse_state *ps,
550 return err; 661 return err;
551 662
552 filter->n_preds++; 663 filter->n_preds++;
553 call->filter_active = 1;
554 664
555 return 0; 665 return 0;
556} 666}
@@ -575,7 +685,10 @@ static bool is_string_field(struct ftrace_event_field *field)
575 685
576static int is_legal_op(struct ftrace_event_field *field, int op) 686static int is_legal_op(struct ftrace_event_field *field, int op)
577{ 687{
578 if (is_string_field(field) && (op != OP_EQ && op != OP_NE)) 688 if (is_string_field(field) &&
689 (op != OP_EQ && op != OP_NE && op != OP_GLOB))
690 return 0;
691 if (!is_string_field(field) && op == OP_GLOB)
579 return 0; 692 return 0;
580 693
581 return 1; 694 return 1;
@@ -626,6 +739,7 @@ static filter_pred_fn_t select_comparison_fn(int op, int field_size,
626 739
627static int filter_add_pred(struct filter_parse_state *ps, 740static int filter_add_pred(struct filter_parse_state *ps,
628 struct ftrace_event_call *call, 741 struct ftrace_event_call *call,
742 struct event_filter *filter,
629 struct filter_pred *pred, 743 struct filter_pred *pred,
630 bool dry_run) 744 bool dry_run)
631{ 745{
@@ -660,21 +774,22 @@ static int filter_add_pred(struct filter_parse_state *ps,
660 } 774 }
661 775
662 if (is_string_field(field)) { 776 if (is_string_field(field)) {
663 pred->str_len = field->size; 777 filter_build_regex(pred);
664 778
665 if (field->filter_type == FILTER_STATIC_STRING) 779 if (field->filter_type == FILTER_STATIC_STRING) {
666 fn = filter_pred_string; 780 fn = filter_pred_string;
667 else if (field->filter_type == FILTER_DYN_STRING) 781 pred->regex.field_len = field->size;
782 } else if (field->filter_type == FILTER_DYN_STRING)
668 fn = filter_pred_strloc; 783 fn = filter_pred_strloc;
669 else { 784 else {
670 fn = filter_pred_pchar; 785 fn = filter_pred_pchar;
671 pred->str_len = strlen(pred->str_val); 786 pred->regex.field_len = strlen(pred->regex.pattern);
672 } 787 }
673 } else { 788 } else {
674 if (field->is_signed) 789 if (field->is_signed)
675 ret = strict_strtoll(pred->str_val, 0, &val); 790 ret = strict_strtoll(pred->regex.pattern, 0, &val);
676 else 791 else
677 ret = strict_strtoull(pred->str_val, 0, &val); 792 ret = strict_strtoull(pred->regex.pattern, 0, &val);
678 if (ret) { 793 if (ret) {
679 parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0); 794 parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0);
680 return -EINVAL; 795 return -EINVAL;
@@ -694,45 +809,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
694 809
695add_pred_fn: 810add_pred_fn:
696 if (!dry_run) 811 if (!dry_run)
697 return filter_add_pred_fn(ps, call, pred, fn); 812 return filter_add_pred_fn(ps, call, filter, pred, fn);
698 return 0;
699}
700
701static int filter_add_subsystem_pred(struct filter_parse_state *ps,
702 struct event_subsystem *system,
703 struct filter_pred *pred,
704 char *filter_string,
705 bool dry_run)
706{
707 struct ftrace_event_call *call;
708 int err = 0;
709 bool fail = true;
710
711 list_for_each_entry(call, &ftrace_events, list) {
712
713 if (!call->define_fields)
714 continue;
715
716 if (strcmp(call->system, system->name))
717 continue;
718
719 if (call->filter->no_reset)
720 continue;
721
722 err = filter_add_pred(ps, call, pred, dry_run);
723 if (err)
724 call->filter->no_reset = true;
725 else
726 fail = false;
727
728 if (!dry_run)
729 replace_filter_string(call->filter, filter_string);
730 }
731
732 if (fail) {
733 parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
734 return err;
735 }
736 return 0; 813 return 0;
737} 814}
738 815
@@ -1045,8 +1122,8 @@ static struct filter_pred *create_pred(int op, char *operand1, char *operand2)
1045 return NULL; 1122 return NULL;
1046 } 1123 }
1047 1124
1048 strcpy(pred->str_val, operand2); 1125 strcpy(pred->regex.pattern, operand2);
1049 pred->str_len = strlen(operand2); 1126 pred->regex.len = strlen(pred->regex.pattern);
1050 1127
1051 pred->op = op; 1128 pred->op = op;
1052 1129
@@ -1090,8 +1167,8 @@ static int check_preds(struct filter_parse_state *ps)
1090 return 0; 1167 return 0;
1091} 1168}
1092 1169
1093static int replace_preds(struct event_subsystem *system, 1170static int replace_preds(struct ftrace_event_call *call,
1094 struct ftrace_event_call *call, 1171 struct event_filter *filter,
1095 struct filter_parse_state *ps, 1172 struct filter_parse_state *ps,
1096 char *filter_string, 1173 char *filter_string,
1097 bool dry_run) 1174 bool dry_run)
@@ -1138,11 +1215,7 @@ static int replace_preds(struct event_subsystem *system,
1138add_pred: 1215add_pred:
1139 if (!pred) 1216 if (!pred)
1140 return -ENOMEM; 1217 return -ENOMEM;
1141 if (call) 1218 err = filter_add_pred(ps, call, filter, pred, dry_run);
1142 err = filter_add_pred(ps, call, pred, false);
1143 else
1144 err = filter_add_subsystem_pred(ps, system, pred,
1145 filter_string, dry_run);
1146 filter_free_pred(pred); 1219 filter_free_pred(pred);
1147 if (err) 1220 if (err)
1148 return err; 1221 return err;
@@ -1153,10 +1226,50 @@ add_pred:
1153 return 0; 1226 return 0;
1154} 1227}
1155 1228
1156int apply_event_filter(struct ftrace_event_call *call, char *filter_string) 1229static int replace_system_preds(struct event_subsystem *system,
1230 struct filter_parse_state *ps,
1231 char *filter_string)
1157{ 1232{
1233 struct ftrace_event_call *call;
1234 bool fail = true;
1158 int err; 1235 int err;
1159 1236
1237 list_for_each_entry(call, &ftrace_events, list) {
1238 struct event_filter *filter = call->filter;
1239
1240 if (!call->define_fields)
1241 continue;
1242
1243 if (strcmp(call->system, system->name) != 0)
1244 continue;
1245
1246 /* try to see if the filter can be applied */
1247 err = replace_preds(call, filter, ps, filter_string, true);
1248 if (err)
1249 continue;
1250
1251 /* really apply the filter */
1252 filter_disable_preds(call);
1253 err = replace_preds(call, filter, ps, filter_string, false);
1254 if (err)
1255 filter_disable_preds(call);
1256 else {
1257 call->filter_active = 1;
1258 replace_filter_string(filter, filter_string);
1259 }
1260 fail = false;
1261 }
1262
1263 if (fail) {
1264 parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
1265 return -EINVAL;
1266 }
1267 return 0;
1268}
1269
1270int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1271{
1272 int err;
1160 struct filter_parse_state *ps; 1273 struct filter_parse_state *ps;
1161 1274
1162 mutex_lock(&event_mutex); 1275 mutex_lock(&event_mutex);
@@ -1168,8 +1281,7 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1168 if (!strcmp(strstrip(filter_string), "0")) { 1281 if (!strcmp(strstrip(filter_string), "0")) {
1169 filter_disable_preds(call); 1282 filter_disable_preds(call);
1170 remove_filter_string(call->filter); 1283 remove_filter_string(call->filter);
1171 mutex_unlock(&event_mutex); 1284 goto out_unlock;
1172 return 0;
1173 } 1285 }
1174 1286
1175 err = -ENOMEM; 1287 err = -ENOMEM;
@@ -1187,10 +1299,11 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1187 goto out; 1299 goto out;
1188 } 1300 }
1189 1301
1190 err = replace_preds(NULL, call, ps, filter_string, false); 1302 err = replace_preds(call, call->filter, ps, filter_string, false);
1191 if (err) 1303 if (err)
1192 append_filter_err(ps, call->filter); 1304 append_filter_err(ps, call->filter);
1193 1305 else
1306 call->filter_active = 1;
1194out: 1307out:
1195 filter_opstack_clear(ps); 1308 filter_opstack_clear(ps);
1196 postfix_clear(ps); 1309 postfix_clear(ps);
@@ -1205,7 +1318,6 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1205 char *filter_string) 1318 char *filter_string)
1206{ 1319{
1207 int err; 1320 int err;
1208
1209 struct filter_parse_state *ps; 1321 struct filter_parse_state *ps;
1210 1322
1211 mutex_lock(&event_mutex); 1323 mutex_lock(&event_mutex);
@@ -1215,10 +1327,9 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1215 goto out_unlock; 1327 goto out_unlock;
1216 1328
1217 if (!strcmp(strstrip(filter_string), "0")) { 1329 if (!strcmp(strstrip(filter_string), "0")) {
1218 filter_free_subsystem_preds(system, FILTER_DISABLE_ALL); 1330 filter_free_subsystem_preds(system);
1219 remove_filter_string(system->filter); 1331 remove_filter_string(system->filter);
1220 mutex_unlock(&event_mutex); 1332 goto out_unlock;
1221 return 0;
1222 } 1333 }
1223 1334
1224 err = -ENOMEM; 1335 err = -ENOMEM;
@@ -1235,31 +1346,87 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1235 goto out; 1346 goto out;
1236 } 1347 }
1237 1348
1238 filter_free_subsystem_preds(system, FILTER_INIT_NO_RESET); 1349 err = replace_system_preds(system, ps, filter_string);
1239 1350 if (err)
1240 /* try to see the filter can be applied to which events */
1241 err = replace_preds(system, NULL, ps, filter_string, true);
1242 if (err) {
1243 append_filter_err(ps, system->filter); 1351 append_filter_err(ps, system->filter);
1244 goto out; 1352
1353out:
1354 filter_opstack_clear(ps);
1355 postfix_clear(ps);
1356 kfree(ps);
1357out_unlock:
1358 mutex_unlock(&event_mutex);
1359
1360 return err;
1361}
1362
1363#ifdef CONFIG_EVENT_PROFILE
1364
1365void ftrace_profile_free_filter(struct perf_event *event)
1366{
1367 struct event_filter *filter = event->filter;
1368
1369 event->filter = NULL;
1370 __free_preds(filter);
1371}
1372
1373int ftrace_profile_set_filter(struct perf_event *event, int event_id,
1374 char *filter_str)
1375{
1376 int err;
1377 struct event_filter *filter;
1378 struct filter_parse_state *ps;
1379 struct ftrace_event_call *call = NULL;
1380
1381 mutex_lock(&event_mutex);
1382
1383 list_for_each_entry(call, &ftrace_events, list) {
1384 if (call->id == event_id)
1385 break;
1245 } 1386 }
1246 1387
1247 filter_free_subsystem_preds(system, FILTER_SKIP_NO_RESET); 1388 err = -EINVAL;
1389 if (!call)
1390 goto out_unlock;
1248 1391
1249 /* really apply the filter to the events */ 1392 err = -EEXIST;
1250 err = replace_preds(system, NULL, ps, filter_string, false); 1393 if (event->filter)
1251 if (err) { 1394 goto out_unlock;
1252 append_filter_err(ps, system->filter); 1395
1253 filter_free_subsystem_preds(system, 2); 1396 filter = __alloc_preds();
1397 if (IS_ERR(filter)) {
1398 err = PTR_ERR(filter);
1399 goto out_unlock;
1254 } 1400 }
1255 1401
1256out: 1402 err = -ENOMEM;
1403 ps = kzalloc(sizeof(*ps), GFP_KERNEL);
1404 if (!ps)
1405 goto free_preds;
1406
1407 parse_init(ps, filter_ops, filter_str);
1408 err = filter_parse(ps);
1409 if (err)
1410 goto free_ps;
1411
1412 err = replace_preds(call, filter, ps, filter_str, false);
1413 if (!err)
1414 event->filter = filter;
1415
1416free_ps:
1257 filter_opstack_clear(ps); 1417 filter_opstack_clear(ps);
1258 postfix_clear(ps); 1418 postfix_clear(ps);
1259 kfree(ps); 1419 kfree(ps);
1420
1421free_preds:
1422 if (err)
1423 __free_preds(filter);
1424
1260out_unlock: 1425out_unlock:
1261 mutex_unlock(&event_mutex); 1426 mutex_unlock(&event_mutex);
1262 1427
1263 return err; 1428 return err;
1264} 1429}
1265 1430
1431#endif /* CONFIG_EVENT_PROFILE */
1432
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 9753fcc61bc5..458e5bfe26d0 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -48,11 +48,11 @@
48struct ____ftrace_##name { \ 48struct ____ftrace_##name { \
49 tstruct \ 49 tstruct \
50}; \ 50}; \
51static void __used ____ftrace_check_##name(void) \ 51static void __always_unused ____ftrace_check_##name(void) \
52{ \ 52{ \
53 struct ____ftrace_##name *__entry = NULL; \ 53 struct ____ftrace_##name *__entry = NULL; \
54 \ 54 \
55 /* force cmpile-time check on F_printk() */ \ 55 /* force compile-time check on F_printk() */ \
56 printk(print); \ 56 printk(print); \
57} 57}
58 58
@@ -66,44 +66,47 @@ static void __used ____ftrace_check_##name(void) \
66#undef __field 66#undef __field
67#define __field(type, item) \ 67#define __field(type, item) \
68 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ 68 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
69 "offset:%zu;\tsize:%zu;\n", \ 69 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
70 offsetof(typeof(field), item), \ 70 offsetof(typeof(field), item), \
71 sizeof(field.item)); \ 71 sizeof(field.item), is_signed_type(type)); \
72 if (!ret) \ 72 if (!ret) \
73 return 0; 73 return 0;
74 74
75#undef __field_desc 75#undef __field_desc
76#define __field_desc(type, container, item) \ 76#define __field_desc(type, container, item) \
77 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ 77 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
78 "offset:%zu;\tsize:%zu;\n", \ 78 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
79 offsetof(typeof(field), container.item), \ 79 offsetof(typeof(field), container.item), \
80 sizeof(field.container.item)); \ 80 sizeof(field.container.item), \
81 is_signed_type(type)); \
81 if (!ret) \ 82 if (!ret) \
82 return 0; 83 return 0;
83 84
84#undef __array 85#undef __array
85#define __array(type, item, len) \ 86#define __array(type, item, len) \
86 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ 87 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
87 "offset:%zu;\tsize:%zu;\n", \ 88 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
88 offsetof(typeof(field), item), \ 89 offsetof(typeof(field), item), \
89 sizeof(field.item)); \ 90 sizeof(field.item), is_signed_type(type)); \
90 if (!ret) \ 91 if (!ret) \
91 return 0; 92 return 0;
92 93
93#undef __array_desc 94#undef __array_desc
94#define __array_desc(type, container, item, len) \ 95#define __array_desc(type, container, item, len) \
95 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ 96 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
96 "offset:%zu;\tsize:%zu;\n", \ 97 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
97 offsetof(typeof(field), container.item), \ 98 offsetof(typeof(field), container.item), \
98 sizeof(field.container.item)); \ 99 sizeof(field.container.item), \
100 is_signed_type(type)); \
99 if (!ret) \ 101 if (!ret) \
100 return 0; 102 return 0;
101 103
102#undef __dynamic_array 104#undef __dynamic_array
103#define __dynamic_array(type, item) \ 105#define __dynamic_array(type, item) \
104 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ 106 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
105 "offset:%zu;\tsize:0;\n", \ 107 "offset:%zu;\tsize:0;\tsigned:%u;\n", \
106 offsetof(typeof(field), item)); \ 108 offsetof(typeof(field), item), \
109 is_signed_type(type)); \
107 if (!ret) \ 110 if (!ret) \
108 return 0; 111 return 0;
109 112
@@ -131,7 +134,6 @@ ftrace_format_##name(struct ftrace_event_call *unused, \
131 134
132#include "trace_entries.h" 135#include "trace_entries.h"
133 136
134
135#undef __field 137#undef __field
136#define __field(type, item) \ 138#define __field(type, item) \
137 ret = trace_define_field(event_call, #type, #item, \ 139 ret = trace_define_field(event_call, #type, #item, \
@@ -182,10 +184,6 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
182 struct struct_name field; \ 184 struct struct_name field; \
183 int ret; \ 185 int ret; \
184 \ 186 \
185 ret = trace_define_common_fields(event_call); \
186 if (ret) \
187 return ret; \
188 \
189 tstruct; \ 187 tstruct; \
190 \ 188 \
191 return ret; \ 189 return ret; \
@@ -193,6 +191,11 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
193 191
194#include "trace_entries.h" 192#include "trace_entries.h"
195 193
194static int ftrace_raw_init_event(struct ftrace_event_call *call)
195{
196 INIT_LIST_HEAD(&call->fields);
197 return 0;
198}
196 199
197#undef __field 200#undef __field
198#define __field(type, item) 201#define __field(type, item)
@@ -211,7 +214,6 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
211 214
212#undef FTRACE_ENTRY 215#undef FTRACE_ENTRY
213#define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \ 216#define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \
214static int ftrace_raw_init_event_##call(void); \
215 \ 217 \
216struct ftrace_event_call __used \ 218struct ftrace_event_call __used \
217__attribute__((__aligned__(4))) \ 219__attribute__((__aligned__(4))) \
@@ -219,14 +221,9 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
219 .name = #call, \ 221 .name = #call, \
220 .id = type, \ 222 .id = type, \
221 .system = __stringify(TRACE_SYSTEM), \ 223 .system = __stringify(TRACE_SYSTEM), \
222 .raw_init = ftrace_raw_init_event_##call, \ 224 .raw_init = ftrace_raw_init_event, \
223 .show_format = ftrace_format_##call, \ 225 .show_format = ftrace_format_##call, \
224 .define_fields = ftrace_define_fields_##call, \ 226 .define_fields = ftrace_define_fields_##call, \
225}; \ 227}; \
226static int ftrace_raw_init_event_##call(void) \
227{ \
228 INIT_LIST_HEAD(&event_##call.fields); \
229 return 0; \
230} \
231 228
232#include "trace_entries.h" 229#include "trace_entries.h"
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 45e6c01b2e4d..b1342c5d37cf 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -14,9 +14,20 @@
14#include "trace.h" 14#include "trace.h"
15#include "trace_output.h" 15#include "trace_output.h"
16 16
17struct fgraph_data { 17struct fgraph_cpu_data {
18 pid_t last_pid; 18 pid_t last_pid;
19 int depth; 19 int depth;
20 int ignore;
21};
22
23struct fgraph_data {
24 struct fgraph_cpu_data *cpu_data;
25
26 /* Place to preserve last processed entry. */
27 struct ftrace_graph_ent_entry ent;
28 struct ftrace_graph_ret_entry ret;
29 int failed;
30 int cpu;
20}; 31};
21 32
22#define TRACE_GRAPH_INDENT 2 33#define TRACE_GRAPH_INDENT 2
@@ -176,7 +187,7 @@ static int __trace_graph_entry(struct trace_array *tr,
176 struct ring_buffer *buffer = tr->buffer; 187 struct ring_buffer *buffer = tr->buffer;
177 struct ftrace_graph_ent_entry *entry; 188 struct ftrace_graph_ent_entry *entry;
178 189
179 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) 190 if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled))))
180 return 0; 191 return 0;
181 192
182 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT, 193 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT,
@@ -240,7 +251,7 @@ static void __trace_graph_return(struct trace_array *tr,
240 struct ring_buffer *buffer = tr->buffer; 251 struct ring_buffer *buffer = tr->buffer;
241 struct ftrace_graph_ret_entry *entry; 252 struct ftrace_graph_ret_entry *entry;
242 253
243 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) 254 if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled))))
244 return; 255 return;
245 256
246 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET, 257 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET,
@@ -384,7 +395,7 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data)
384 if (!data) 395 if (!data)
385 return TRACE_TYPE_HANDLED; 396 return TRACE_TYPE_HANDLED;
386 397
387 last_pid = &(per_cpu_ptr(data, cpu)->last_pid); 398 last_pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
388 399
389 if (*last_pid == pid) 400 if (*last_pid == pid)
390 return TRACE_TYPE_HANDLED; 401 return TRACE_TYPE_HANDLED;
@@ -435,26 +446,49 @@ static struct ftrace_graph_ret_entry *
435get_return_for_leaf(struct trace_iterator *iter, 446get_return_for_leaf(struct trace_iterator *iter,
436 struct ftrace_graph_ent_entry *curr) 447 struct ftrace_graph_ent_entry *curr)
437{ 448{
438 struct ring_buffer_iter *ring_iter; 449 struct fgraph_data *data = iter->private;
450 struct ring_buffer_iter *ring_iter = NULL;
439 struct ring_buffer_event *event; 451 struct ring_buffer_event *event;
440 struct ftrace_graph_ret_entry *next; 452 struct ftrace_graph_ret_entry *next;
441 453
442 ring_iter = iter->buffer_iter[iter->cpu]; 454 /*
455 * If the previous output failed to write to the seq buffer,
456 * then we just reuse the data from before.
457 */
458 if (data && data->failed) {
459 curr = &data->ent;
460 next = &data->ret;
461 } else {
443 462
444 /* First peek to compare current entry and the next one */ 463 ring_iter = iter->buffer_iter[iter->cpu];
445 if (ring_iter) 464
446 event = ring_buffer_iter_peek(ring_iter, NULL); 465 /* First peek to compare current entry and the next one */
447 else { 466 if (ring_iter)
448 /* We need to consume the current entry to see the next one */ 467 event = ring_buffer_iter_peek(ring_iter, NULL);
449 ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL); 468 else {
450 event = ring_buffer_peek(iter->tr->buffer, iter->cpu, 469 /*
451 NULL); 470 * We need to consume the current entry to see
452 } 471 * the next one.
472 */
473 ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
474 event = ring_buffer_peek(iter->tr->buffer, iter->cpu,
475 NULL);
476 }
453 477
454 if (!event) 478 if (!event)
455 return NULL; 479 return NULL;
480
481 next = ring_buffer_event_data(event);
456 482
457 next = ring_buffer_event_data(event); 483 if (data) {
484 /*
485 * Save current and next entries for later reference
486 * if the output fails.
487 */
488 data->ent = *curr;
489 data->ret = *next;
490 }
491 }
458 492
459 if (next->ent.type != TRACE_GRAPH_RET) 493 if (next->ent.type != TRACE_GRAPH_RET)
460 return NULL; 494 return NULL;
@@ -640,7 +674,7 @@ print_graph_entry_leaf(struct trace_iterator *iter,
640 674
641 if (data) { 675 if (data) {
642 int cpu = iter->cpu; 676 int cpu = iter->cpu;
643 int *depth = &(per_cpu_ptr(data, cpu)->depth); 677 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
644 678
645 /* 679 /*
646 * Comments display at + 1 to depth. Since 680 * Comments display at + 1 to depth. Since
@@ -688,7 +722,7 @@ print_graph_entry_nested(struct trace_iterator *iter,
688 722
689 if (data) { 723 if (data) {
690 int cpu = iter->cpu; 724 int cpu = iter->cpu;
691 int *depth = &(per_cpu_ptr(data, cpu)->depth); 725 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
692 726
693 *depth = call->depth; 727 *depth = call->depth;
694 } 728 }
@@ -782,19 +816,34 @@ static enum print_line_t
782print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, 816print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
783 struct trace_iterator *iter) 817 struct trace_iterator *iter)
784{ 818{
785 int cpu = iter->cpu; 819 struct fgraph_data *data = iter->private;
786 struct ftrace_graph_ent *call = &field->graph_ent; 820 struct ftrace_graph_ent *call = &field->graph_ent;
787 struct ftrace_graph_ret_entry *leaf_ret; 821 struct ftrace_graph_ret_entry *leaf_ret;
822 static enum print_line_t ret;
823 int cpu = iter->cpu;
788 824
789 if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func)) 825 if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func))
790 return TRACE_TYPE_PARTIAL_LINE; 826 return TRACE_TYPE_PARTIAL_LINE;
791 827
792 leaf_ret = get_return_for_leaf(iter, field); 828 leaf_ret = get_return_for_leaf(iter, field);
793 if (leaf_ret) 829 if (leaf_ret)
794 return print_graph_entry_leaf(iter, field, leaf_ret, s); 830 ret = print_graph_entry_leaf(iter, field, leaf_ret, s);
795 else 831 else
796 return print_graph_entry_nested(iter, field, s, cpu); 832 ret = print_graph_entry_nested(iter, field, s, cpu);
797 833
834 if (data) {
835 /*
836 * If we failed to write our output, then we need to make
837 * note of it. Because we already consumed our entry.
838 */
839 if (s->full) {
840 data->failed = 1;
841 data->cpu = cpu;
842 } else
843 data->failed = 0;
844 }
845
846 return ret;
798} 847}
799 848
800static enum print_line_t 849static enum print_line_t
@@ -810,7 +859,7 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
810 859
811 if (data) { 860 if (data) {
812 int cpu = iter->cpu; 861 int cpu = iter->cpu;
813 int *depth = &(per_cpu_ptr(data, cpu)->depth); 862 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
814 863
815 /* 864 /*
816 * Comments display at + 1 to depth. This is the 865 * Comments display at + 1 to depth. This is the
@@ -873,7 +922,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
873 int i; 922 int i;
874 923
875 if (data) 924 if (data)
876 depth = per_cpu_ptr(data, iter->cpu)->depth; 925 depth = per_cpu_ptr(data->cpu_data, iter->cpu)->depth;
877 926
878 if (print_graph_prologue(iter, s, 0, 0)) 927 if (print_graph_prologue(iter, s, 0, 0))
879 return TRACE_TYPE_PARTIAL_LINE; 928 return TRACE_TYPE_PARTIAL_LINE;
@@ -941,8 +990,33 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
941enum print_line_t 990enum print_line_t
942print_graph_function(struct trace_iterator *iter) 991print_graph_function(struct trace_iterator *iter)
943{ 992{
993 struct ftrace_graph_ent_entry *field;
994 struct fgraph_data *data = iter->private;
944 struct trace_entry *entry = iter->ent; 995 struct trace_entry *entry = iter->ent;
945 struct trace_seq *s = &iter->seq; 996 struct trace_seq *s = &iter->seq;
997 int cpu = iter->cpu;
998 int ret;
999
1000 if (data && per_cpu_ptr(data->cpu_data, cpu)->ignore) {
1001 per_cpu_ptr(data->cpu_data, cpu)->ignore = 0;
1002 return TRACE_TYPE_HANDLED;
1003 }
1004
1005 /*
1006 * If the last output failed, there's a possibility we need
1007 * to print out the missing entry which would never go out.
1008 */
1009 if (data && data->failed) {
1010 field = &data->ent;
1011 iter->cpu = data->cpu;
1012 ret = print_graph_entry(field, s, iter);
1013 if (ret == TRACE_TYPE_HANDLED && iter->cpu != cpu) {
1014 per_cpu_ptr(data->cpu_data, iter->cpu)->ignore = 1;
1015 ret = TRACE_TYPE_NO_CONSUME;
1016 }
1017 iter->cpu = cpu;
1018 return ret;
1019 }
946 1020
947 switch (entry->type) { 1021 switch (entry->type) {
948 case TRACE_GRAPH_ENT: { 1022 case TRACE_GRAPH_ENT: {
@@ -952,7 +1026,7 @@ print_graph_function(struct trace_iterator *iter)
952 * sizeof(struct ftrace_graph_ent_entry) is very small, 1026 * sizeof(struct ftrace_graph_ent_entry) is very small,
953 * it can be safely saved at the stack. 1027 * it can be safely saved at the stack.
954 */ 1028 */
955 struct ftrace_graph_ent_entry *field, saved; 1029 struct ftrace_graph_ent_entry saved;
956 trace_assign_type(field, entry); 1030 trace_assign_type(field, entry);
957 saved = *field; 1031 saved = *field;
958 return print_graph_entry(&saved, s, iter); 1032 return print_graph_entry(&saved, s, iter);
@@ -1030,31 +1104,54 @@ static void print_graph_headers(struct seq_file *s)
1030static void graph_trace_open(struct trace_iterator *iter) 1104static void graph_trace_open(struct trace_iterator *iter)
1031{ 1105{
1032 /* pid and depth on the last trace processed */ 1106 /* pid and depth on the last trace processed */
1033 struct fgraph_data *data = alloc_percpu(struct fgraph_data); 1107 struct fgraph_data *data;
1034 int cpu; 1108 int cpu;
1035 1109
1110 iter->private = NULL;
1111
1112 data = kzalloc(sizeof(*data), GFP_KERNEL);
1036 if (!data) 1113 if (!data)
1037 pr_warning("function graph tracer: not enough memory\n"); 1114 goto out_err;
1038 else 1115
1039 for_each_possible_cpu(cpu) { 1116 data->cpu_data = alloc_percpu(struct fgraph_cpu_data);
1040 pid_t *pid = &(per_cpu_ptr(data, cpu)->last_pid); 1117 if (!data->cpu_data)
1041 int *depth = &(per_cpu_ptr(data, cpu)->depth); 1118 goto out_err_free;
1042 *pid = -1; 1119
1043 *depth = 0; 1120 for_each_possible_cpu(cpu) {
1044 } 1121 pid_t *pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
1122 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
1123 int *ignore = &(per_cpu_ptr(data->cpu_data, cpu)->ignore);
1124 *pid = -1;
1125 *depth = 0;
1126 *ignore = 0;
1127 }
1045 1128
1046 iter->private = data; 1129 iter->private = data;
1130
1131 return;
1132
1133 out_err_free:
1134 kfree(data);
1135 out_err:
1136 pr_warning("function graph tracer: not enough memory\n");
1047} 1137}
1048 1138
1049static void graph_trace_close(struct trace_iterator *iter) 1139static void graph_trace_close(struct trace_iterator *iter)
1050{ 1140{
1051 free_percpu(iter->private); 1141 struct fgraph_data *data = iter->private;
1142
1143 if (data) {
1144 free_percpu(data->cpu_data);
1145 kfree(data);
1146 }
1052} 1147}
1053 1148
1054static struct tracer graph_trace __read_mostly = { 1149static struct tracer graph_trace __read_mostly = {
1055 .name = "function_graph", 1150 .name = "function_graph",
1056 .open = graph_trace_open, 1151 .open = graph_trace_open,
1152 .pipe_open = graph_trace_open,
1057 .close = graph_trace_close, 1153 .close = graph_trace_close,
1154 .pipe_close = graph_trace_close,
1058 .wait_pipe = poll_wait_pipe, 1155 .wait_pipe = poll_wait_pipe,
1059 .init = graph_trace_init, 1156 .init = graph_trace_init,
1060 .reset = graph_trace_reset, 1157 .reset = graph_trace_reset,
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
index 69543a905cd5..7b97000745f5 100644
--- a/kernel/trace/trace_hw_branches.c
+++ b/kernel/trace/trace_hw_branches.c
@@ -20,10 +20,10 @@
20 20
21#define BTS_BUFFER_SIZE (1 << 13) 21#define BTS_BUFFER_SIZE (1 << 13)
22 22
23static DEFINE_PER_CPU(struct bts_tracer *, tracer); 23static DEFINE_PER_CPU(struct bts_tracer *, hwb_tracer);
24static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], buffer); 24static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], hwb_buffer);
25 25
26#define this_tracer per_cpu(tracer, smp_processor_id()) 26#define this_tracer per_cpu(hwb_tracer, smp_processor_id())
27 27
28static int trace_hw_branches_enabled __read_mostly; 28static int trace_hw_branches_enabled __read_mostly;
29static int trace_hw_branches_suspended __read_mostly; 29static int trace_hw_branches_suspended __read_mostly;
@@ -32,12 +32,13 @@ static struct trace_array *hw_branch_trace __read_mostly;
32 32
33static void bts_trace_init_cpu(int cpu) 33static void bts_trace_init_cpu(int cpu)
34{ 34{
35 per_cpu(tracer, cpu) = 35 per_cpu(hwb_tracer, cpu) =
36 ds_request_bts_cpu(cpu, per_cpu(buffer, cpu), BTS_BUFFER_SIZE, 36 ds_request_bts_cpu(cpu, per_cpu(hwb_buffer, cpu),
37 NULL, (size_t)-1, BTS_KERNEL); 37 BTS_BUFFER_SIZE, NULL, (size_t)-1,
38 BTS_KERNEL);
38 39
39 if (IS_ERR(per_cpu(tracer, cpu))) 40 if (IS_ERR(per_cpu(hwb_tracer, cpu)))
40 per_cpu(tracer, cpu) = NULL; 41 per_cpu(hwb_tracer, cpu) = NULL;
41} 42}
42 43
43static int bts_trace_init(struct trace_array *tr) 44static int bts_trace_init(struct trace_array *tr)
@@ -51,7 +52,7 @@ static int bts_trace_init(struct trace_array *tr)
51 for_each_online_cpu(cpu) { 52 for_each_online_cpu(cpu) {
52 bts_trace_init_cpu(cpu); 53 bts_trace_init_cpu(cpu);
53 54
54 if (likely(per_cpu(tracer, cpu))) 55 if (likely(per_cpu(hwb_tracer, cpu)))
55 trace_hw_branches_enabled = 1; 56 trace_hw_branches_enabled = 1;
56 } 57 }
57 trace_hw_branches_suspended = 0; 58 trace_hw_branches_suspended = 0;
@@ -67,9 +68,9 @@ static void bts_trace_reset(struct trace_array *tr)
67 68
68 get_online_cpus(); 69 get_online_cpus();
69 for_each_online_cpu(cpu) { 70 for_each_online_cpu(cpu) {
70 if (likely(per_cpu(tracer, cpu))) { 71 if (likely(per_cpu(hwb_tracer, cpu))) {
71 ds_release_bts(per_cpu(tracer, cpu)); 72 ds_release_bts(per_cpu(hwb_tracer, cpu));
72 per_cpu(tracer, cpu) = NULL; 73 per_cpu(hwb_tracer, cpu) = NULL;
73 } 74 }
74 } 75 }
75 trace_hw_branches_enabled = 0; 76 trace_hw_branches_enabled = 0;
@@ -83,8 +84,8 @@ static void bts_trace_start(struct trace_array *tr)
83 84
84 get_online_cpus(); 85 get_online_cpus();
85 for_each_online_cpu(cpu) 86 for_each_online_cpu(cpu)
86 if (likely(per_cpu(tracer, cpu))) 87 if (likely(per_cpu(hwb_tracer, cpu)))
87 ds_resume_bts(per_cpu(tracer, cpu)); 88 ds_resume_bts(per_cpu(hwb_tracer, cpu));
88 trace_hw_branches_suspended = 0; 89 trace_hw_branches_suspended = 0;
89 put_online_cpus(); 90 put_online_cpus();
90} 91}
@@ -95,8 +96,8 @@ static void bts_trace_stop(struct trace_array *tr)
95 96
96 get_online_cpus(); 97 get_online_cpus();
97 for_each_online_cpu(cpu) 98 for_each_online_cpu(cpu)
98 if (likely(per_cpu(tracer, cpu))) 99 if (likely(per_cpu(hwb_tracer, cpu)))
99 ds_suspend_bts(per_cpu(tracer, cpu)); 100 ds_suspend_bts(per_cpu(hwb_tracer, cpu));
100 trace_hw_branches_suspended = 1; 101 trace_hw_branches_suspended = 1;
101 put_online_cpus(); 102 put_online_cpus();
102} 103}
@@ -114,16 +115,16 @@ static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb,
114 bts_trace_init_cpu(cpu); 115 bts_trace_init_cpu(cpu);
115 116
116 if (trace_hw_branches_suspended && 117 if (trace_hw_branches_suspended &&
117 likely(per_cpu(tracer, cpu))) 118 likely(per_cpu(hwb_tracer, cpu)))
118 ds_suspend_bts(per_cpu(tracer, cpu)); 119 ds_suspend_bts(per_cpu(hwb_tracer, cpu));
119 } 120 }
120 break; 121 break;
121 122
122 case CPU_DOWN_PREPARE: 123 case CPU_DOWN_PREPARE:
123 /* The notification is sent with interrupts enabled. */ 124 /* The notification is sent with interrupts enabled. */
124 if (likely(per_cpu(tracer, cpu))) { 125 if (likely(per_cpu(hwb_tracer, cpu))) {
125 ds_release_bts(per_cpu(tracer, cpu)); 126 ds_release_bts(per_cpu(hwb_tracer, cpu));
126 per_cpu(tracer, cpu) = NULL; 127 per_cpu(hwb_tracer, cpu) = NULL;
127 } 128 }
128 } 129 }
129 130
@@ -258,8 +259,8 @@ static void trace_bts_prepare(struct trace_iterator *iter)
258 259
259 get_online_cpus(); 260 get_online_cpus();
260 for_each_online_cpu(cpu) 261 for_each_online_cpu(cpu)
261 if (likely(per_cpu(tracer, cpu))) 262 if (likely(per_cpu(hwb_tracer, cpu)))
262 ds_suspend_bts(per_cpu(tracer, cpu)); 263 ds_suspend_bts(per_cpu(hwb_tracer, cpu));
263 /* 264 /*
264 * We need to collect the trace on the respective cpu since ftrace 265 * We need to collect the trace on the respective cpu since ftrace
265 * implicitly adds the record for the current cpu. 266 * implicitly adds the record for the current cpu.
@@ -268,8 +269,8 @@ static void trace_bts_prepare(struct trace_iterator *iter)
268 on_each_cpu(trace_bts_cpu, iter->tr, 1); 269 on_each_cpu(trace_bts_cpu, iter->tr, 1);
269 270
270 for_each_online_cpu(cpu) 271 for_each_online_cpu(cpu)
271 if (likely(per_cpu(tracer, cpu))) 272 if (likely(per_cpu(hwb_tracer, cpu)))
272 ds_resume_bts(per_cpu(tracer, cpu)); 273 ds_resume_bts(per_cpu(hwb_tracer, cpu));
273 put_online_cpus(); 274 put_online_cpus();
274} 275}
275 276
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 3aa7eaa2114c..2974bc7538c7 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -151,6 +151,8 @@ check_critical_timing(struct trace_array *tr,
151 goto out_unlock; 151 goto out_unlock;
152 152
153 trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); 153 trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
154 /* Skip 5 functions to get to the irq/preempt enable function */
155 __trace_stack(tr, flags, 5, pc);
154 156
155 if (data->critical_sequence != max_sequence) 157 if (data->critical_sequence != max_sequence)
156 goto out_unlock; 158 goto out_unlock;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
new file mode 100644
index 000000000000..7ecab06547a5
--- /dev/null
+++ b/kernel/trace/trace_kprobe.c
@@ -0,0 +1,1533 @@
1/*
2 * Kprobes-based tracing events
3 *
4 * Created by Masami Hiramatsu <mhiramat@redhat.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#include <linux/module.h>
21#include <linux/uaccess.h>
22#include <linux/kprobes.h>
23#include <linux/seq_file.h>
24#include <linux/slab.h>
25#include <linux/smp.h>
26#include <linux/debugfs.h>
27#include <linux/types.h>
28#include <linux/string.h>
29#include <linux/ctype.h>
30#include <linux/ptrace.h>
31#include <linux/perf_event.h>
32
33#include "trace.h"
34#include "trace_output.h"
35
36#define MAX_TRACE_ARGS 128
37#define MAX_ARGSTR_LEN 63
38#define MAX_EVENT_NAME_LEN 64
39#define KPROBE_EVENT_SYSTEM "kprobes"
40
41/* Reserved field names */
42#define FIELD_STRING_IP "__probe_ip"
43#define FIELD_STRING_NARGS "__probe_nargs"
44#define FIELD_STRING_RETIP "__probe_ret_ip"
45#define FIELD_STRING_FUNC "__probe_func"
46
47const char *reserved_field_names[] = {
48 "common_type",
49 "common_flags",
50 "common_preempt_count",
51 "common_pid",
52 "common_tgid",
53 "common_lock_depth",
54 FIELD_STRING_IP,
55 FIELD_STRING_NARGS,
56 FIELD_STRING_RETIP,
57 FIELD_STRING_FUNC,
58};
59
60struct fetch_func {
61 unsigned long (*func)(struct pt_regs *, void *);
62 void *data;
63};
64
65static __kprobes unsigned long call_fetch(struct fetch_func *f,
66 struct pt_regs *regs)
67{
68 return f->func(regs, f->data);
69}
70
71/* fetch handlers */
72static __kprobes unsigned long fetch_register(struct pt_regs *regs,
73 void *offset)
74{
75 return regs_get_register(regs, (unsigned int)((unsigned long)offset));
76}
77
78static __kprobes unsigned long fetch_stack(struct pt_regs *regs,
79 void *num)
80{
81 return regs_get_kernel_stack_nth(regs,
82 (unsigned int)((unsigned long)num));
83}
84
85static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr)
86{
87 unsigned long retval;
88
89 if (probe_kernel_address(addr, retval))
90 return 0;
91 return retval;
92}
93
94static __kprobes unsigned long fetch_argument(struct pt_regs *regs, void *num)
95{
96 return regs_get_argument_nth(regs, (unsigned int)((unsigned long)num));
97}
98
99static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs,
100 void *dummy)
101{
102 return regs_return_value(regs);
103}
104
105static __kprobes unsigned long fetch_stack_address(struct pt_regs *regs,
106 void *dummy)
107{
108 return kernel_stack_pointer(regs);
109}
110
111/* Memory fetching by symbol */
112struct symbol_cache {
113 char *symbol;
114 long offset;
115 unsigned long addr;
116};
117
118static unsigned long update_symbol_cache(struct symbol_cache *sc)
119{
120 sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);
121 if (sc->addr)
122 sc->addr += sc->offset;
123 return sc->addr;
124}
125
126static void free_symbol_cache(struct symbol_cache *sc)
127{
128 kfree(sc->symbol);
129 kfree(sc);
130}
131
132static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
133{
134 struct symbol_cache *sc;
135
136 if (!sym || strlen(sym) == 0)
137 return NULL;
138 sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
139 if (!sc)
140 return NULL;
141
142 sc->symbol = kstrdup(sym, GFP_KERNEL);
143 if (!sc->symbol) {
144 kfree(sc);
145 return NULL;
146 }
147 sc->offset = offset;
148
149 update_symbol_cache(sc);
150 return sc;
151}
152
153static __kprobes unsigned long fetch_symbol(struct pt_regs *regs, void *data)
154{
155 struct symbol_cache *sc = data;
156
157 if (sc->addr)
158 return fetch_memory(regs, (void *)sc->addr);
159 else
160 return 0;
161}
162
163/* Special indirect memory access interface */
164struct indirect_fetch_data {
165 struct fetch_func orig;
166 long offset;
167};
168
169static __kprobes unsigned long fetch_indirect(struct pt_regs *regs, void *data)
170{
171 struct indirect_fetch_data *ind = data;
172 unsigned long addr;
173
174 addr = call_fetch(&ind->orig, regs);
175 if (addr) {
176 addr += ind->offset;
177 return fetch_memory(regs, (void *)addr);
178 } else
179 return 0;
180}
181
182static __kprobes void free_indirect_fetch_data(struct indirect_fetch_data *data)
183{
184 if (data->orig.func == fetch_indirect)
185 free_indirect_fetch_data(data->orig.data);
186 else if (data->orig.func == fetch_symbol)
187 free_symbol_cache(data->orig.data);
188 kfree(data);
189}
190
191/**
192 * Kprobe event core functions
193 */
194
195struct probe_arg {
196 struct fetch_func fetch;
197 const char *name;
198};
199
200/* Flags for trace_probe */
201#define TP_FLAG_TRACE 1
202#define TP_FLAG_PROFILE 2
203
204struct trace_probe {
205 struct list_head list;
206 struct kretprobe rp; /* Use rp.kp for kprobe use */
207 unsigned long nhit;
208 unsigned int flags; /* For TP_FLAG_* */
209 const char *symbol; /* symbol name */
210 struct ftrace_event_call call;
211 struct trace_event event;
212 unsigned int nr_args;
213 struct probe_arg args[];
214};
215
216#define SIZEOF_TRACE_PROBE(n) \
217 (offsetof(struct trace_probe, args) + \
218 (sizeof(struct probe_arg) * (n)))
219
220static __kprobes int probe_is_return(struct trace_probe *tp)
221{
222 return tp->rp.handler != NULL;
223}
224
225static __kprobes const char *probe_symbol(struct trace_probe *tp)
226{
227 return tp->symbol ? tp->symbol : "unknown";
228}
229
230static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff)
231{
232 int ret = -EINVAL;
233
234 if (ff->func == fetch_argument)
235 ret = snprintf(buf, n, "$arg%lu", (unsigned long)ff->data);
236 else if (ff->func == fetch_register) {
237 const char *name;
238 name = regs_query_register_name((unsigned int)((long)ff->data));
239 ret = snprintf(buf, n, "%%%s", name);
240 } else if (ff->func == fetch_stack)
241 ret = snprintf(buf, n, "$stack%lu", (unsigned long)ff->data);
242 else if (ff->func == fetch_memory)
243 ret = snprintf(buf, n, "@0x%p", ff->data);
244 else if (ff->func == fetch_symbol) {
245 struct symbol_cache *sc = ff->data;
246 if (sc->offset)
247 ret = snprintf(buf, n, "@%s%+ld", sc->symbol,
248 sc->offset);
249 else
250 ret = snprintf(buf, n, "@%s", sc->symbol);
251 } else if (ff->func == fetch_retvalue)
252 ret = snprintf(buf, n, "$retval");
253 else if (ff->func == fetch_stack_address)
254 ret = snprintf(buf, n, "$stack");
255 else if (ff->func == fetch_indirect) {
256 struct indirect_fetch_data *id = ff->data;
257 size_t l = 0;
258 ret = snprintf(buf, n, "%+ld(", id->offset);
259 if (ret >= n)
260 goto end;
261 l += ret;
262 ret = probe_arg_string(buf + l, n - l, &id->orig);
263 if (ret < 0)
264 goto end;
265 l += ret;
266 ret = snprintf(buf + l, n - l, ")");
267 ret += l;
268 }
269end:
270 if (ret >= n)
271 return -ENOSPC;
272 return ret;
273}
274
275static int register_probe_event(struct trace_probe *tp);
276static void unregister_probe_event(struct trace_probe *tp);
277
278static DEFINE_MUTEX(probe_lock);
279static LIST_HEAD(probe_list);
280
281static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
282static int kretprobe_dispatcher(struct kretprobe_instance *ri,
283 struct pt_regs *regs);
284
285/*
286 * Allocate new trace_probe and initialize it (including kprobes).
287 */
288static struct trace_probe *alloc_trace_probe(const char *group,
289 const char *event,
290 void *addr,
291 const char *symbol,
292 unsigned long offs,
293 int nargs, int is_return)
294{
295 struct trace_probe *tp;
296
297 tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL);
298 if (!tp)
299 return ERR_PTR(-ENOMEM);
300
301 if (symbol) {
302 tp->symbol = kstrdup(symbol, GFP_KERNEL);
303 if (!tp->symbol)
304 goto error;
305 tp->rp.kp.symbol_name = tp->symbol;
306 tp->rp.kp.offset = offs;
307 } else
308 tp->rp.kp.addr = addr;
309
310 if (is_return)
311 tp->rp.handler = kretprobe_dispatcher;
312 else
313 tp->rp.kp.pre_handler = kprobe_dispatcher;
314
315 if (!event)
316 goto error;
317 tp->call.name = kstrdup(event, GFP_KERNEL);
318 if (!tp->call.name)
319 goto error;
320
321 if (!group)
322 goto error;
323 tp->call.system = kstrdup(group, GFP_KERNEL);
324 if (!tp->call.system)
325 goto error;
326
327 INIT_LIST_HEAD(&tp->list);
328 return tp;
329error:
330 kfree(tp->call.name);
331 kfree(tp->symbol);
332 kfree(tp);
333 return ERR_PTR(-ENOMEM);
334}
335
336static void free_probe_arg(struct probe_arg *arg)
337{
338 if (arg->fetch.func == fetch_symbol)
339 free_symbol_cache(arg->fetch.data);
340 else if (arg->fetch.func == fetch_indirect)
341 free_indirect_fetch_data(arg->fetch.data);
342 kfree(arg->name);
343}
344
345static void free_trace_probe(struct trace_probe *tp)
346{
347 int i;
348
349 for (i = 0; i < tp->nr_args; i++)
350 free_probe_arg(&tp->args[i]);
351
352 kfree(tp->call.system);
353 kfree(tp->call.name);
354 kfree(tp->symbol);
355 kfree(tp);
356}
357
358static struct trace_probe *find_probe_event(const char *event,
359 const char *group)
360{
361 struct trace_probe *tp;
362
363 list_for_each_entry(tp, &probe_list, list)
364 if (strcmp(tp->call.name, event) == 0 &&
365 strcmp(tp->call.system, group) == 0)
366 return tp;
367 return NULL;
368}
369
370/* Unregister a trace_probe and probe_event: call with locking probe_lock */
371static void unregister_trace_probe(struct trace_probe *tp)
372{
373 if (probe_is_return(tp))
374 unregister_kretprobe(&tp->rp);
375 else
376 unregister_kprobe(&tp->rp.kp);
377 list_del(&tp->list);
378 unregister_probe_event(tp);
379}
380
381/* Register a trace_probe and probe_event */
382static int register_trace_probe(struct trace_probe *tp)
383{
384 struct trace_probe *old_tp;
385 int ret;
386
387 mutex_lock(&probe_lock);
388
389 /* register as an event */
390 old_tp = find_probe_event(tp->call.name, tp->call.system);
391 if (old_tp) {
392 /* delete old event */
393 unregister_trace_probe(old_tp);
394 free_trace_probe(old_tp);
395 }
396 ret = register_probe_event(tp);
397 if (ret) {
398 pr_warning("Faild to register probe event(%d)\n", ret);
399 goto end;
400 }
401
402 tp->rp.kp.flags |= KPROBE_FLAG_DISABLED;
403 if (probe_is_return(tp))
404 ret = register_kretprobe(&tp->rp);
405 else
406 ret = register_kprobe(&tp->rp.kp);
407
408 if (ret) {
409 pr_warning("Could not insert probe(%d)\n", ret);
410 if (ret == -EILSEQ) {
411 pr_warning("Probing address(0x%p) is not an "
412 "instruction boundary.\n",
413 tp->rp.kp.addr);
414 ret = -EINVAL;
415 }
416 unregister_probe_event(tp);
417 } else
418 list_add_tail(&tp->list, &probe_list);
419end:
420 mutex_unlock(&probe_lock);
421 return ret;
422}
423
424/* Split symbol and offset. */
425static int split_symbol_offset(char *symbol, unsigned long *offset)
426{
427 char *tmp;
428 int ret;
429
430 if (!offset)
431 return -EINVAL;
432
433 tmp = strchr(symbol, '+');
434 if (tmp) {
435 /* skip sign because strict_strtol doesn't accept '+' */
436 ret = strict_strtoul(tmp + 1, 0, offset);
437 if (ret)
438 return ret;
439 *tmp = '\0';
440 } else
441 *offset = 0;
442 return 0;
443}
444
445#define PARAM_MAX_ARGS 16
446#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))
447
448static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return)
449{
450 int ret = 0;
451 unsigned long param;
452
453 if (strcmp(arg, "retval") == 0) {
454 if (is_return) {
455 ff->func = fetch_retvalue;
456 ff->data = NULL;
457 } else
458 ret = -EINVAL;
459 } else if (strncmp(arg, "stack", 5) == 0) {
460 if (arg[5] == '\0') {
461 ff->func = fetch_stack_address;
462 ff->data = NULL;
463 } else if (isdigit(arg[5])) {
464 ret = strict_strtoul(arg + 5, 10, &param);
465 if (ret || param > PARAM_MAX_STACK)
466 ret = -EINVAL;
467 else {
468 ff->func = fetch_stack;
469 ff->data = (void *)param;
470 }
471 } else
472 ret = -EINVAL;
473 } else if (strncmp(arg, "arg", 3) == 0 && isdigit(arg[3])) {
474 ret = strict_strtoul(arg + 3, 10, &param);
475 if (ret || param > PARAM_MAX_ARGS)
476 ret = -EINVAL;
477 else {
478 ff->func = fetch_argument;
479 ff->data = (void *)param;
480 }
481 } else
482 ret = -EINVAL;
483 return ret;
484}
485
486/* Recursive argument parser */
487static int __parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)
488{
489 int ret = 0;
490 unsigned long param;
491 long offset;
492 char *tmp;
493
494 switch (arg[0]) {
495 case '$':
496 ret = parse_probe_vars(arg + 1, ff, is_return);
497 break;
498 case '%': /* named register */
499 ret = regs_query_register_offset(arg + 1);
500 if (ret >= 0) {
501 ff->func = fetch_register;
502 ff->data = (void *)(unsigned long)ret;
503 ret = 0;
504 }
505 break;
506 case '@': /* memory or symbol */
507 if (isdigit(arg[1])) {
508 ret = strict_strtoul(arg + 1, 0, &param);
509 if (ret)
510 break;
511 ff->func = fetch_memory;
512 ff->data = (void *)param;
513 } else {
514 ret = split_symbol_offset(arg + 1, &offset);
515 if (ret)
516 break;
517 ff->data = alloc_symbol_cache(arg + 1, offset);
518 if (ff->data)
519 ff->func = fetch_symbol;
520 else
521 ret = -EINVAL;
522 }
523 break;
524 case '+': /* indirect memory */
525 case '-':
526 tmp = strchr(arg, '(');
527 if (!tmp) {
528 ret = -EINVAL;
529 break;
530 }
531 *tmp = '\0';
532 ret = strict_strtol(arg + 1, 0, &offset);
533 if (ret)
534 break;
535 if (arg[0] == '-')
536 offset = -offset;
537 arg = tmp + 1;
538 tmp = strrchr(arg, ')');
539 if (tmp) {
540 struct indirect_fetch_data *id;
541 *tmp = '\0';
542 id = kzalloc(sizeof(struct indirect_fetch_data),
543 GFP_KERNEL);
544 if (!id)
545 return -ENOMEM;
546 id->offset = offset;
547 ret = __parse_probe_arg(arg, &id->orig, is_return);
548 if (ret)
549 kfree(id);
550 else {
551 ff->func = fetch_indirect;
552 ff->data = (void *)id;
553 }
554 } else
555 ret = -EINVAL;
556 break;
557 default:
558 /* TODO: support custom handler */
559 ret = -EINVAL;
560 }
561 return ret;
562}
563
564/* String length checking wrapper */
565static int parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)
566{
567 if (strlen(arg) > MAX_ARGSTR_LEN) {
568 pr_info("Argument is too long.: %s\n", arg);
569 return -ENOSPC;
570 }
571 return __parse_probe_arg(arg, ff, is_return);
572}
573
574/* Return 1 if name is reserved or already used by another argument */
575static int conflict_field_name(const char *name,
576 struct probe_arg *args, int narg)
577{
578 int i;
579 for (i = 0; i < ARRAY_SIZE(reserved_field_names); i++)
580 if (strcmp(reserved_field_names[i], name) == 0)
581 return 1;
582 for (i = 0; i < narg; i++)
583 if (strcmp(args[i].name, name) == 0)
584 return 1;
585 return 0;
586}
587
588static int create_trace_probe(int argc, char **argv)
589{
590 /*
591 * Argument syntax:
592 * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS]
593 * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS]
594 * Fetch args:
595 * $argN : fetch Nth of function argument. (N:0-)
596 * $retval : fetch return value
597 * $stack : fetch stack address
598 * $stackN : fetch Nth of stack (N:0-)
599 * @ADDR : fetch memory at ADDR (ADDR should be in kernel)
600 * @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
601 * %REG : fetch register REG
602 * Indirect memory fetch:
603 * +|-offs(ARG) : fetch memory at ARG +|- offs address.
604 * Alias name of args:
605 * NAME=FETCHARG : set NAME as alias of FETCHARG.
606 */
607 struct trace_probe *tp;
608 int i, ret = 0;
609 int is_return = 0, is_delete = 0;
610 char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL;
611 unsigned long offset = 0;
612 void *addr = NULL;
613 char buf[MAX_EVENT_NAME_LEN];
614
615 /* argc must be >= 1 */
616 if (argv[0][0] == 'p')
617 is_return = 0;
618 else if (argv[0][0] == 'r')
619 is_return = 1;
620 else if (argv[0][0] == '-')
621 is_delete = 1;
622 else {
623 pr_info("Probe definition must be started with 'p', 'r' or"
624 " '-'.\n");
625 return -EINVAL;
626 }
627
628 if (argv[0][1] == ':') {
629 event = &argv[0][2];
630 if (strchr(event, '/')) {
631 group = event;
632 event = strchr(group, '/') + 1;
633 event[-1] = '\0';
634 if (strlen(group) == 0) {
635 pr_info("Group name is not specifiled\n");
636 return -EINVAL;
637 }
638 }
639 if (strlen(event) == 0) {
640 pr_info("Event name is not specifiled\n");
641 return -EINVAL;
642 }
643 }
644 if (!group)
645 group = KPROBE_EVENT_SYSTEM;
646
647 if (is_delete) {
648 if (!event) {
649 pr_info("Delete command needs an event name.\n");
650 return -EINVAL;
651 }
652 tp = find_probe_event(event, group);
653 if (!tp) {
654 pr_info("Event %s/%s doesn't exist.\n", group, event);
655 return -ENOENT;
656 }
657 /* delete an event */
658 unregister_trace_probe(tp);
659 free_trace_probe(tp);
660 return 0;
661 }
662
663 if (argc < 2) {
664 pr_info("Probe point is not specified.\n");
665 return -EINVAL;
666 }
667 if (isdigit(argv[1][0])) {
668 if (is_return) {
669 pr_info("Return probe point must be a symbol.\n");
670 return -EINVAL;
671 }
672 /* an address specified */
673 ret = strict_strtoul(&argv[0][2], 0, (unsigned long *)&addr);
674 if (ret) {
675 pr_info("Failed to parse address.\n");
676 return ret;
677 }
678 } else {
679 /* a symbol specified */
680 symbol = argv[1];
681 /* TODO: support .init module functions */
682 ret = split_symbol_offset(symbol, &offset);
683 if (ret) {
684 pr_info("Failed to parse symbol.\n");
685 return ret;
686 }
687 if (offset && is_return) {
688 pr_info("Return probe must be used without offset.\n");
689 return -EINVAL;
690 }
691 }
692 argc -= 2; argv += 2;
693
694 /* setup a probe */
695 if (!event) {
696 /* Make a new event name */
697 if (symbol)
698 snprintf(buf, MAX_EVENT_NAME_LEN, "%c@%s%+ld",
699 is_return ? 'r' : 'p', symbol, offset);
700 else
701 snprintf(buf, MAX_EVENT_NAME_LEN, "%c@0x%p",
702 is_return ? 'r' : 'p', addr);
703 event = buf;
704 }
705 tp = alloc_trace_probe(group, event, addr, symbol, offset, argc,
706 is_return);
707 if (IS_ERR(tp)) {
708 pr_info("Failed to allocate trace_probe.(%d)\n",
709 (int)PTR_ERR(tp));
710 return PTR_ERR(tp);
711 }
712
713 /* parse arguments */
714 ret = 0;
715 for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
716 /* Parse argument name */
717 arg = strchr(argv[i], '=');
718 if (arg)
719 *arg++ = '\0';
720 else
721 arg = argv[i];
722
723 if (conflict_field_name(argv[i], tp->args, i)) {
724 pr_info("Argument%d name '%s' conflicts with "
725 "another field.\n", i, argv[i]);
726 ret = -EINVAL;
727 goto error;
728 }
729
730 tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
731 if (!tp->args[i].name) {
732 pr_info("Failed to allocate argument%d name '%s'.\n",
733 i, argv[i]);
734 ret = -ENOMEM;
735 goto error;
736 }
737
738 /* Parse fetch argument */
739 ret = parse_probe_arg(arg, &tp->args[i].fetch, is_return);
740 if (ret) {
741 pr_info("Parse error at argument%d. (%d)\n", i, ret);
742 kfree(tp->args[i].name);
743 goto error;
744 }
745
746 tp->nr_args++;
747 }
748
749 ret = register_trace_probe(tp);
750 if (ret)
751 goto error;
752 return 0;
753
754error:
755 free_trace_probe(tp);
756 return ret;
757}
758
759static void cleanup_all_probes(void)
760{
761 struct trace_probe *tp;
762
763 mutex_lock(&probe_lock);
764 /* TODO: Use batch unregistration */
765 while (!list_empty(&probe_list)) {
766 tp = list_entry(probe_list.next, struct trace_probe, list);
767 unregister_trace_probe(tp);
768 free_trace_probe(tp);
769 }
770 mutex_unlock(&probe_lock);
771}
772
773
774/* Probes listing interfaces */
775static void *probes_seq_start(struct seq_file *m, loff_t *pos)
776{
777 mutex_lock(&probe_lock);
778 return seq_list_start(&probe_list, *pos);
779}
780
781static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
782{
783 return seq_list_next(v, &probe_list, pos);
784}
785
786static void probes_seq_stop(struct seq_file *m, void *v)
787{
788 mutex_unlock(&probe_lock);
789}
790
791static int probes_seq_show(struct seq_file *m, void *v)
792{
793 struct trace_probe *tp = v;
794 int i, ret;
795 char buf[MAX_ARGSTR_LEN + 1];
796
797 seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p');
798 seq_printf(m, ":%s/%s", tp->call.system, tp->call.name);
799
800 if (!tp->symbol)
801 seq_printf(m, " 0x%p", tp->rp.kp.addr);
802 else if (tp->rp.kp.offset)
803 seq_printf(m, " %s+%u", probe_symbol(tp), tp->rp.kp.offset);
804 else
805 seq_printf(m, " %s", probe_symbol(tp));
806
807 for (i = 0; i < tp->nr_args; i++) {
808 ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i].fetch);
809 if (ret < 0) {
810 pr_warning("Argument%d decoding error(%d).\n", i, ret);
811 return ret;
812 }
813 seq_printf(m, " %s=%s", tp->args[i].name, buf);
814 }
815 seq_printf(m, "\n");
816 return 0;
817}
818
819static const struct seq_operations probes_seq_op = {
820 .start = probes_seq_start,
821 .next = probes_seq_next,
822 .stop = probes_seq_stop,
823 .show = probes_seq_show
824};
825
826static int probes_open(struct inode *inode, struct file *file)
827{
828 if ((file->f_mode & FMODE_WRITE) &&
829 (file->f_flags & O_TRUNC))
830 cleanup_all_probes();
831
832 return seq_open(file, &probes_seq_op);
833}
834
835static int command_trace_probe(const char *buf)
836{
837 char **argv;
838 int argc = 0, ret = 0;
839
840 argv = argv_split(GFP_KERNEL, buf, &argc);
841 if (!argv)
842 return -ENOMEM;
843
844 if (argc)
845 ret = create_trace_probe(argc, argv);
846
847 argv_free(argv);
848 return ret;
849}
850
851#define WRITE_BUFSIZE 128
852
853static ssize_t probes_write(struct file *file, const char __user *buffer,
854 size_t count, loff_t *ppos)
855{
856 char *kbuf, *tmp;
857 int ret;
858 size_t done;
859 size_t size;
860
861 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
862 if (!kbuf)
863 return -ENOMEM;
864
865 ret = done = 0;
866 while (done < count) {
867 size = count - done;
868 if (size >= WRITE_BUFSIZE)
869 size = WRITE_BUFSIZE - 1;
870 if (copy_from_user(kbuf, buffer + done, size)) {
871 ret = -EFAULT;
872 goto out;
873 }
874 kbuf[size] = '\0';
875 tmp = strchr(kbuf, '\n');
876 if (tmp) {
877 *tmp = '\0';
878 size = tmp - kbuf + 1;
879 } else if (done + size < count) {
880 pr_warning("Line length is too long: "
881 "Should be less than %d.", WRITE_BUFSIZE);
882 ret = -EINVAL;
883 goto out;
884 }
885 done += size;
886 /* Remove comments */
887 tmp = strchr(kbuf, '#');
888 if (tmp)
889 *tmp = '\0';
890
891 ret = command_trace_probe(kbuf);
892 if (ret)
893 goto out;
894 }
895 ret = done;
896out:
897 kfree(kbuf);
898 return ret;
899}
900
901static const struct file_operations kprobe_events_ops = {
902 .owner = THIS_MODULE,
903 .open = probes_open,
904 .read = seq_read,
905 .llseek = seq_lseek,
906 .release = seq_release,
907 .write = probes_write,
908};
909
910/* Probes profiling interfaces */
911static int probes_profile_seq_show(struct seq_file *m, void *v)
912{
913 struct trace_probe *tp = v;
914
915 seq_printf(m, " %-44s %15lu %15lu\n", tp->call.name, tp->nhit,
916 tp->rp.kp.nmissed);
917
918 return 0;
919}
920
921static const struct seq_operations profile_seq_op = {
922 .start = probes_seq_start,
923 .next = probes_seq_next,
924 .stop = probes_seq_stop,
925 .show = probes_profile_seq_show
926};
927
928static int profile_open(struct inode *inode, struct file *file)
929{
930 return seq_open(file, &profile_seq_op);
931}
932
933static const struct file_operations kprobe_profile_ops = {
934 .owner = THIS_MODULE,
935 .open = profile_open,
936 .read = seq_read,
937 .llseek = seq_lseek,
938 .release = seq_release,
939};
940
941/* Kprobe handler */
942static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
943{
944 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
945 struct kprobe_trace_entry *entry;
946 struct ring_buffer_event *event;
947 struct ring_buffer *buffer;
948 int size, i, pc;
949 unsigned long irq_flags;
950 struct ftrace_event_call *call = &tp->call;
951
952 tp->nhit++;
953
954 local_save_flags(irq_flags);
955 pc = preempt_count();
956
957 size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
958
959 event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
960 irq_flags, pc);
961 if (!event)
962 return 0;
963
964 entry = ring_buffer_event_data(event);
965 entry->nargs = tp->nr_args;
966 entry->ip = (unsigned long)kp->addr;
967 for (i = 0; i < tp->nr_args; i++)
968 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
969
970 if (!filter_current_check_discard(buffer, call, entry, event))
971 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
972 return 0;
973}
974
975/* Kretprobe handler */
976static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
977 struct pt_regs *regs)
978{
979 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
980 struct kretprobe_trace_entry *entry;
981 struct ring_buffer_event *event;
982 struct ring_buffer *buffer;
983 int size, i, pc;
984 unsigned long irq_flags;
985 struct ftrace_event_call *call = &tp->call;
986
987 local_save_flags(irq_flags);
988 pc = preempt_count();
989
990 size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
991
992 event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
993 irq_flags, pc);
994 if (!event)
995 return 0;
996
997 entry = ring_buffer_event_data(event);
998 entry->nargs = tp->nr_args;
999 entry->func = (unsigned long)tp->rp.kp.addr;
1000 entry->ret_ip = (unsigned long)ri->ret_addr;
1001 for (i = 0; i < tp->nr_args; i++)
1002 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1003
1004 if (!filter_current_check_discard(buffer, call, entry, event))
1005 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
1006
1007 return 0;
1008}
1009
1010/* Event entry printers */
1011enum print_line_t
1012print_kprobe_event(struct trace_iterator *iter, int flags)
1013{
1014 struct kprobe_trace_entry *field;
1015 struct trace_seq *s = &iter->seq;
1016 struct trace_event *event;
1017 struct trace_probe *tp;
1018 int i;
1019
1020 field = (struct kprobe_trace_entry *)iter->ent;
1021 event = ftrace_find_event(field->ent.type);
1022 tp = container_of(event, struct trace_probe, event);
1023
1024 if (!trace_seq_printf(s, "%s: (", tp->call.name))
1025 goto partial;
1026
1027 if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
1028 goto partial;
1029
1030 if (!trace_seq_puts(s, ")"))
1031 goto partial;
1032
1033 for (i = 0; i < field->nargs; i++)
1034 if (!trace_seq_printf(s, " %s=%lx",
1035 tp->args[i].name, field->args[i]))
1036 goto partial;
1037
1038 if (!trace_seq_puts(s, "\n"))
1039 goto partial;
1040
1041 return TRACE_TYPE_HANDLED;
1042partial:
1043 return TRACE_TYPE_PARTIAL_LINE;
1044}
1045
1046enum print_line_t
1047print_kretprobe_event(struct trace_iterator *iter, int flags)
1048{
1049 struct kretprobe_trace_entry *field;
1050 struct trace_seq *s = &iter->seq;
1051 struct trace_event *event;
1052 struct trace_probe *tp;
1053 int i;
1054
1055 field = (struct kretprobe_trace_entry *)iter->ent;
1056 event = ftrace_find_event(field->ent.type);
1057 tp = container_of(event, struct trace_probe, event);
1058
1059 if (!trace_seq_printf(s, "%s: (", tp->call.name))
1060 goto partial;
1061
1062 if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
1063 goto partial;
1064
1065 if (!trace_seq_puts(s, " <- "))
1066 goto partial;
1067
1068 if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
1069 goto partial;
1070
1071 if (!trace_seq_puts(s, ")"))
1072 goto partial;
1073
1074 for (i = 0; i < field->nargs; i++)
1075 if (!trace_seq_printf(s, " %s=%lx",
1076 tp->args[i].name, field->args[i]))
1077 goto partial;
1078
1079 if (!trace_seq_puts(s, "\n"))
1080 goto partial;
1081
1082 return TRACE_TYPE_HANDLED;
1083partial:
1084 return TRACE_TYPE_PARTIAL_LINE;
1085}
1086
1087static int probe_event_enable(struct ftrace_event_call *call)
1088{
1089 struct trace_probe *tp = (struct trace_probe *)call->data;
1090
1091 tp->flags |= TP_FLAG_TRACE;
1092 if (probe_is_return(tp))
1093 return enable_kretprobe(&tp->rp);
1094 else
1095 return enable_kprobe(&tp->rp.kp);
1096}
1097
1098static void probe_event_disable(struct ftrace_event_call *call)
1099{
1100 struct trace_probe *tp = (struct trace_probe *)call->data;
1101
1102 tp->flags &= ~TP_FLAG_TRACE;
1103 if (!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE))) {
1104 if (probe_is_return(tp))
1105 disable_kretprobe(&tp->rp);
1106 else
1107 disable_kprobe(&tp->rp.kp);
1108 }
1109}
1110
1111static int probe_event_raw_init(struct ftrace_event_call *event_call)
1112{
1113 INIT_LIST_HEAD(&event_call->fields);
1114
1115 return 0;
1116}
1117
1118#undef DEFINE_FIELD
1119#define DEFINE_FIELD(type, item, name, is_signed) \
1120 do { \
1121 ret = trace_define_field(event_call, #type, name, \
1122 offsetof(typeof(field), item), \
1123 sizeof(field.item), is_signed, \
1124 FILTER_OTHER); \
1125 if (ret) \
1126 return ret; \
1127 } while (0)
1128
1129static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
1130{
1131 int ret, i;
1132 struct kprobe_trace_entry field;
1133 struct trace_probe *tp = (struct trace_probe *)event_call->data;
1134
1135 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
1136 DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
1137 /* Set argument names as fields */
1138 for (i = 0; i < tp->nr_args; i++)
1139 DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0);
1140 return 0;
1141}
1142
1143static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
1144{
1145 int ret, i;
1146 struct kretprobe_trace_entry field;
1147 struct trace_probe *tp = (struct trace_probe *)event_call->data;
1148
1149 DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
1150 DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
1151 DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
1152 /* Set argument names as fields */
1153 for (i = 0; i < tp->nr_args; i++)
1154 DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0);
1155 return 0;
1156}
1157
1158static int __probe_event_show_format(struct trace_seq *s,
1159 struct trace_probe *tp, const char *fmt,
1160 const char *arg)
1161{
1162 int i;
1163
1164 /* Show format */
1165 if (!trace_seq_printf(s, "\nprint fmt: \"%s", fmt))
1166 return 0;
1167
1168 for (i = 0; i < tp->nr_args; i++)
1169 if (!trace_seq_printf(s, " %s=%%lx", tp->args[i].name))
1170 return 0;
1171
1172 if (!trace_seq_printf(s, "\", %s", arg))
1173 return 0;
1174
1175 for (i = 0; i < tp->nr_args; i++)
1176 if (!trace_seq_printf(s, ", REC->%s", tp->args[i].name))
1177 return 0;
1178
1179 return trace_seq_puts(s, "\n");
1180}
1181
1182#undef SHOW_FIELD
1183#define SHOW_FIELD(type, item, name) \
1184 do { \
1185 ret = trace_seq_printf(s, "\tfield: " #type " %s;\t" \
1186 "offset:%u;\tsize:%u;\n", name, \
1187 (unsigned int)offsetof(typeof(field), item),\
1188 (unsigned int)sizeof(type)); \
1189 if (!ret) \
1190 return 0; \
1191 } while (0)
1192
1193static int kprobe_event_show_format(struct ftrace_event_call *call,
1194 struct trace_seq *s)
1195{
1196 struct kprobe_trace_entry field __attribute__((unused));
1197 int ret, i;
1198 struct trace_probe *tp = (struct trace_probe *)call->data;
1199
1200 SHOW_FIELD(unsigned long, ip, FIELD_STRING_IP);
1201 SHOW_FIELD(int, nargs, FIELD_STRING_NARGS);
1202
1203 /* Show fields */
1204 for (i = 0; i < tp->nr_args; i++)
1205 SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
1206 trace_seq_puts(s, "\n");
1207
1208 return __probe_event_show_format(s, tp, "(%lx)",
1209 "REC->" FIELD_STRING_IP);
1210}
1211
1212static int kretprobe_event_show_format(struct ftrace_event_call *call,
1213 struct trace_seq *s)
1214{
1215 struct kretprobe_trace_entry field __attribute__((unused));
1216 int ret, i;
1217 struct trace_probe *tp = (struct trace_probe *)call->data;
1218
1219 SHOW_FIELD(unsigned long, func, FIELD_STRING_FUNC);
1220 SHOW_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP);
1221 SHOW_FIELD(int, nargs, FIELD_STRING_NARGS);
1222
1223 /* Show fields */
1224 for (i = 0; i < tp->nr_args; i++)
1225 SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
1226 trace_seq_puts(s, "\n");
1227
1228 return __probe_event_show_format(s, tp, "(%lx <- %lx)",
1229 "REC->" FIELD_STRING_FUNC
1230 ", REC->" FIELD_STRING_RETIP);
1231}
1232
1233#ifdef CONFIG_EVENT_PROFILE
1234
1235/* Kprobe profile handler */
1236static __kprobes int kprobe_profile_func(struct kprobe *kp,
1237 struct pt_regs *regs)
1238{
1239 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1240 struct ftrace_event_call *call = &tp->call;
1241 struct kprobe_trace_entry *entry;
1242 struct trace_entry *ent;
1243 int size, __size, i, pc, __cpu;
1244 unsigned long irq_flags;
1245 char *trace_buf;
1246 char *raw_data;
1247 int rctx;
1248
1249 pc = preempt_count();
1250 __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
1251 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1252 size -= sizeof(u32);
1253 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
1254 "profile buffer not large enough"))
1255 return 0;
1256
1257 /*
1258 * Protect the non nmi buffer
1259 * This also protects the rcu read side
1260 */
1261 local_irq_save(irq_flags);
1262
1263 rctx = perf_swevent_get_recursion_context();
1264 if (rctx < 0)
1265 goto end_recursion;
1266
1267 __cpu = smp_processor_id();
1268
1269 if (in_nmi())
1270 trace_buf = rcu_dereference(perf_trace_buf_nmi);
1271 else
1272 trace_buf = rcu_dereference(perf_trace_buf);
1273
1274 if (!trace_buf)
1275 goto end;
1276
1277 raw_data = per_cpu_ptr(trace_buf, __cpu);
1278
1279 /* Zero dead bytes from alignment to avoid buffer leak to userspace */
1280 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
1281 entry = (struct kprobe_trace_entry *)raw_data;
1282 ent = &entry->ent;
1283
1284 tracing_generic_entry_update(ent, irq_flags, pc);
1285 ent->type = call->id;
1286 entry->nargs = tp->nr_args;
1287 entry->ip = (unsigned long)kp->addr;
1288 for (i = 0; i < tp->nr_args; i++)
1289 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1290 perf_tp_event(call->id, entry->ip, 1, entry, size);
1291
1292end:
1293 perf_swevent_put_recursion_context(rctx);
1294end_recursion:
1295 local_irq_restore(irq_flags);
1296
1297 return 0;
1298}
1299
1300/* Kretprobe profile handler */
1301static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
1302 struct pt_regs *regs)
1303{
1304 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1305 struct ftrace_event_call *call = &tp->call;
1306 struct kretprobe_trace_entry *entry;
1307 struct trace_entry *ent;
1308 int size, __size, i, pc, __cpu;
1309 unsigned long irq_flags;
1310 char *trace_buf;
1311 char *raw_data;
1312 int rctx;
1313
1314 pc = preempt_count();
1315 __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
1316 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1317 size -= sizeof(u32);
1318 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
1319 "profile buffer not large enough"))
1320 return 0;
1321
1322 /*
1323 * Protect the non nmi buffer
1324 * This also protects the rcu read side
1325 */
1326 local_irq_save(irq_flags);
1327
1328 rctx = perf_swevent_get_recursion_context();
1329 if (rctx < 0)
1330 goto end_recursion;
1331
1332 __cpu = smp_processor_id();
1333
1334 if (in_nmi())
1335 trace_buf = rcu_dereference(perf_trace_buf_nmi);
1336 else
1337 trace_buf = rcu_dereference(perf_trace_buf);
1338
1339 if (!trace_buf)
1340 goto end;
1341
1342 raw_data = per_cpu_ptr(trace_buf, __cpu);
1343
1344 /* Zero dead bytes from alignment to avoid buffer leak to userspace */
1345 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
1346 entry = (struct kretprobe_trace_entry *)raw_data;
1347 ent = &entry->ent;
1348
1349 tracing_generic_entry_update(ent, irq_flags, pc);
1350 ent->type = call->id;
1351 entry->nargs = tp->nr_args;
1352 entry->func = (unsigned long)tp->rp.kp.addr;
1353 entry->ret_ip = (unsigned long)ri->ret_addr;
1354 for (i = 0; i < tp->nr_args; i++)
1355 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1356 perf_tp_event(call->id, entry->ret_ip, 1, entry, size);
1357
1358end:
1359 perf_swevent_put_recursion_context(rctx);
1360end_recursion:
1361 local_irq_restore(irq_flags);
1362
1363 return 0;
1364}
1365
1366static int probe_profile_enable(struct ftrace_event_call *call)
1367{
1368 struct trace_probe *tp = (struct trace_probe *)call->data;
1369
1370 tp->flags |= TP_FLAG_PROFILE;
1371
1372 if (probe_is_return(tp))
1373 return enable_kretprobe(&tp->rp);
1374 else
1375 return enable_kprobe(&tp->rp.kp);
1376}
1377
1378static void probe_profile_disable(struct ftrace_event_call *call)
1379{
1380 struct trace_probe *tp = (struct trace_probe *)call->data;
1381
1382 tp->flags &= ~TP_FLAG_PROFILE;
1383
1384 if (!(tp->flags & TP_FLAG_TRACE)) {
1385 if (probe_is_return(tp))
1386 disable_kretprobe(&tp->rp);
1387 else
1388 disable_kprobe(&tp->rp.kp);
1389 }
1390}
1391#endif /* CONFIG_EVENT_PROFILE */
1392
1393
1394static __kprobes
1395int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1396{
1397 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1398
1399 if (tp->flags & TP_FLAG_TRACE)
1400 kprobe_trace_func(kp, regs);
1401#ifdef CONFIG_EVENT_PROFILE
1402 if (tp->flags & TP_FLAG_PROFILE)
1403 kprobe_profile_func(kp, regs);
1404#endif /* CONFIG_EVENT_PROFILE */
1405 return 0; /* We don't tweek kernel, so just return 0 */
1406}
1407
1408static __kprobes
1409int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1410{
1411 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1412
1413 if (tp->flags & TP_FLAG_TRACE)
1414 kretprobe_trace_func(ri, regs);
1415#ifdef CONFIG_EVENT_PROFILE
1416 if (tp->flags & TP_FLAG_PROFILE)
1417 kretprobe_profile_func(ri, regs);
1418#endif /* CONFIG_EVENT_PROFILE */
1419 return 0; /* We don't tweek kernel, so just return 0 */
1420}
1421
1422static int register_probe_event(struct trace_probe *tp)
1423{
1424 struct ftrace_event_call *call = &tp->call;
1425 int ret;
1426
1427 /* Initialize ftrace_event_call */
1428 if (probe_is_return(tp)) {
1429 tp->event.trace = print_kretprobe_event;
1430 call->raw_init = probe_event_raw_init;
1431 call->show_format = kretprobe_event_show_format;
1432 call->define_fields = kretprobe_event_define_fields;
1433 } else {
1434 tp->event.trace = print_kprobe_event;
1435 call->raw_init = probe_event_raw_init;
1436 call->show_format = kprobe_event_show_format;
1437 call->define_fields = kprobe_event_define_fields;
1438 }
1439 call->event = &tp->event;
1440 call->id = register_ftrace_event(&tp->event);
1441 if (!call->id)
1442 return -ENODEV;
1443 call->enabled = 0;
1444 call->regfunc = probe_event_enable;
1445 call->unregfunc = probe_event_disable;
1446
1447#ifdef CONFIG_EVENT_PROFILE
1448 call->profile_enable = probe_profile_enable;
1449 call->profile_disable = probe_profile_disable;
1450#endif
1451 call->data = tp;
1452 ret = trace_add_event_call(call);
1453 if (ret) {
1454 pr_info("Failed to register kprobe event: %s\n", call->name);
1455 unregister_ftrace_event(&tp->event);
1456 }
1457 return ret;
1458}
1459
1460static void unregister_probe_event(struct trace_probe *tp)
1461{
1462 /* tp->event is unregistered in trace_remove_event_call() */
1463 trace_remove_event_call(&tp->call);
1464}
1465
1466/* Make a debugfs interface for controling probe points */
1467static __init int init_kprobe_trace(void)
1468{
1469 struct dentry *d_tracer;
1470 struct dentry *entry;
1471
1472 d_tracer = tracing_init_dentry();
1473 if (!d_tracer)
1474 return 0;
1475
1476 entry = debugfs_create_file("kprobe_events", 0644, d_tracer,
1477 NULL, &kprobe_events_ops);
1478
1479 /* Event list interface */
1480 if (!entry)
1481 pr_warning("Could not create debugfs "
1482 "'kprobe_events' entry\n");
1483
1484 /* Profile interface */
1485 entry = debugfs_create_file("kprobe_profile", 0444, d_tracer,
1486 NULL, &kprobe_profile_ops);
1487
1488 if (!entry)
1489 pr_warning("Could not create debugfs "
1490 "'kprobe_profile' entry\n");
1491 return 0;
1492}
1493fs_initcall(init_kprobe_trace);
1494
1495
1496#ifdef CONFIG_FTRACE_STARTUP_TEST
1497
1498static int kprobe_trace_selftest_target(int a1, int a2, int a3,
1499 int a4, int a5, int a6)
1500{
1501 return a1 + a2 + a3 + a4 + a5 + a6;
1502}
1503
1504static __init int kprobe_trace_self_tests_init(void)
1505{
1506 int ret;
1507 int (*target)(int, int, int, int, int, int);
1508
1509 target = kprobe_trace_selftest_target;
1510
1511 pr_info("Testing kprobe tracing: ");
1512
1513 ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target "
1514 "$arg1 $arg2 $arg3 $arg4 $stack $stack0");
1515 if (WARN_ON_ONCE(ret))
1516 pr_warning("error enabling function entry\n");
1517
1518 ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target "
1519 "$retval");
1520 if (WARN_ON_ONCE(ret))
1521 pr_warning("error enabling function return\n");
1522
1523 ret = target(1, 2, 3, 4, 5, 6);
1524
1525 cleanup_all_probes();
1526
1527 pr_cont("OK\n");
1528 return 0;
1529}
1530
1531late_initcall(kprobe_trace_self_tests_init);
1532
1533#endif
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
new file mode 100644
index 000000000000..faf37fa4408c
--- /dev/null
+++ b/kernel/trace/trace_ksym.c
@@ -0,0 +1,545 @@
1/*
2 * trace_ksym.c - Kernel Symbol Tracer
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) IBM Corporation, 2009
19 */
20
21#include <linux/kallsyms.h>
22#include <linux/uaccess.h>
23#include <linux/debugfs.h>
24#include <linux/ftrace.h>
25#include <linux/module.h>
26#include <linux/fs.h>
27
28#include "trace_output.h"
29#include "trace_stat.h"
30#include "trace.h"
31
32#include <linux/hw_breakpoint.h>
33#include <asm/hw_breakpoint.h>
34
35/*
36 * For now, let us restrict the no. of symbols traced simultaneously to number
37 * of available hardware breakpoint registers.
38 */
39#define KSYM_TRACER_MAX HBP_NUM
40
41#define KSYM_TRACER_OP_LEN 3 /* rw- */
42
43struct trace_ksym {
44 struct perf_event **ksym_hbp;
45 struct perf_event_attr attr;
46#ifdef CONFIG_PROFILE_KSYM_TRACER
47 unsigned long counter;
48#endif
49 struct hlist_node ksym_hlist;
50};
51
52static struct trace_array *ksym_trace_array;
53
54static unsigned int ksym_filter_entry_count;
55static unsigned int ksym_tracing_enabled;
56
57static HLIST_HEAD(ksym_filter_head);
58
59static DEFINE_MUTEX(ksym_tracer_mutex);
60
61#ifdef CONFIG_PROFILE_KSYM_TRACER
62
63#define MAX_UL_INT 0xffffffff
64
65void ksym_collect_stats(unsigned long hbp_hit_addr)
66{
67 struct hlist_node *node;
68 struct trace_ksym *entry;
69
70 rcu_read_lock();
71 hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
72 if ((entry->attr.bp_addr == hbp_hit_addr) &&
73 (entry->counter <= MAX_UL_INT)) {
74 entry->counter++;
75 break;
76 }
77 }
78 rcu_read_unlock();
79}
80#endif /* CONFIG_PROFILE_KSYM_TRACER */
81
82void ksym_hbp_handler(struct perf_event *hbp, int nmi,
83 struct perf_sample_data *data,
84 struct pt_regs *regs)
85{
86 struct ring_buffer_event *event;
87 struct ksym_trace_entry *entry;
88 struct ring_buffer *buffer;
89 int pc;
90
91 if (!ksym_tracing_enabled)
92 return;
93
94 buffer = ksym_trace_array->buffer;
95
96 pc = preempt_count();
97
98 event = trace_buffer_lock_reserve(buffer, TRACE_KSYM,
99 sizeof(*entry), 0, pc);
100 if (!event)
101 return;
102
103 entry = ring_buffer_event_data(event);
104 entry->ip = instruction_pointer(regs);
105 entry->type = hw_breakpoint_type(hbp);
106 entry->addr = hw_breakpoint_addr(hbp);
107 strlcpy(entry->cmd, current->comm, TASK_COMM_LEN);
108
109#ifdef CONFIG_PROFILE_KSYM_TRACER
110 ksym_collect_stats(hw_breakpoint_addr(hbp));
111#endif /* CONFIG_PROFILE_KSYM_TRACER */
112
113 trace_buffer_unlock_commit(buffer, event, 0, pc);
114}
115
116/* Valid access types are represented as
117 *
118 * rw- : Set Read/Write Access Breakpoint
119 * -w- : Set Write Access Breakpoint
120 * --- : Clear Breakpoints
121 * --x : Set Execution Break points (Not available yet)
122 *
123 */
124static int ksym_trace_get_access_type(char *str)
125{
126 int access = 0;
127
128 if (str[0] == 'r')
129 access |= HW_BREAKPOINT_R;
130
131 if (str[1] == 'w')
132 access |= HW_BREAKPOINT_W;
133
134 if (str[2] == 'x')
135 access |= HW_BREAKPOINT_X;
136
137 switch (access) {
138 case HW_BREAKPOINT_R:
139 case HW_BREAKPOINT_W:
140 case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
141 return access;
142 default:
143 return -EINVAL;
144 }
145}
146
147/*
148 * There can be several possible malformed requests and we attempt to capture
149 * all of them. We enumerate some of the rules
150 * 1. We will not allow kernel symbols with ':' since it is used as a delimiter.
151 * i.e. multiple ':' symbols disallowed. Possible uses are of the form
152 * <module>:<ksym_name>:<op>.
153 * 2. No delimiter symbol ':' in the input string
154 * 3. Spurious operator symbols or symbols not in their respective positions
155 * 4. <ksym_name>:--- i.e. clear breakpoint request when ksym_name not in file
156 * 5. Kernel symbol not a part of /proc/kallsyms
157 * 6. Duplicate requests
158 */
159static int parse_ksym_trace_str(char *input_string, char **ksymname,
160 unsigned long *addr)
161{
162 int ret;
163
164 *ksymname = strsep(&input_string, ":");
165 *addr = kallsyms_lookup_name(*ksymname);
166
167 /* Check for malformed request: (2), (1) and (5) */
168 if ((!input_string) ||
169 (strlen(input_string) != KSYM_TRACER_OP_LEN) ||
170 (*addr == 0))
171 return -EINVAL;;
172
173 ret = ksym_trace_get_access_type(input_string);
174
175 return ret;
176}
177
178int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
179{
180 struct trace_ksym *entry;
181 int ret = -ENOMEM;
182
183 if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
184 printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
185 " new requests for tracing can be accepted now.\n",
186 KSYM_TRACER_MAX);
187 return -ENOSPC;
188 }
189
190 entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
191 if (!entry)
192 return -ENOMEM;
193
194 hw_breakpoint_init(&entry->attr);
195
196 entry->attr.bp_type = op;
197 entry->attr.bp_addr = addr;
198 entry->attr.bp_len = HW_BREAKPOINT_LEN_4;
199
200 ret = -EAGAIN;
201 entry->ksym_hbp = register_wide_hw_breakpoint(&entry->attr,
202 ksym_hbp_handler);
203
204 if (IS_ERR(entry->ksym_hbp)) {
205 ret = PTR_ERR(entry->ksym_hbp);
206 printk(KERN_INFO "ksym_tracer request failed. Try again"
207 " later!!\n");
208 goto err;
209 }
210
211 hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head);
212 ksym_filter_entry_count++;
213
214 return 0;
215
216err:
217 kfree(entry);
218
219 return ret;
220}
221
222static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
223 size_t count, loff_t *ppos)
224{
225 struct trace_ksym *entry;
226 struct hlist_node *node;
227 struct trace_seq *s;
228 ssize_t cnt = 0;
229 int ret;
230
231 s = kmalloc(sizeof(*s), GFP_KERNEL);
232 if (!s)
233 return -ENOMEM;
234 trace_seq_init(s);
235
236 mutex_lock(&ksym_tracer_mutex);
237
238 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
239 ret = trace_seq_printf(s, "%pS:",
240 (void *)(unsigned long)entry->attr.bp_addr);
241 if (entry->attr.bp_type == HW_BREAKPOINT_R)
242 ret = trace_seq_puts(s, "r--\n");
243 else if (entry->attr.bp_type == HW_BREAKPOINT_W)
244 ret = trace_seq_puts(s, "-w-\n");
245 else if (entry->attr.bp_type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R))
246 ret = trace_seq_puts(s, "rw-\n");
247 WARN_ON_ONCE(!ret);
248 }
249
250 cnt = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
251
252 mutex_unlock(&ksym_tracer_mutex);
253
254 kfree(s);
255
256 return cnt;
257}
258
259static void __ksym_trace_reset(void)
260{
261 struct trace_ksym *entry;
262 struct hlist_node *node, *node1;
263
264 mutex_lock(&ksym_tracer_mutex);
265 hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
266 ksym_hlist) {
267 unregister_wide_hw_breakpoint(entry->ksym_hbp);
268 ksym_filter_entry_count--;
269 hlist_del_rcu(&(entry->ksym_hlist));
270 synchronize_rcu();
271 kfree(entry);
272 }
273 mutex_unlock(&ksym_tracer_mutex);
274}
275
276static ssize_t ksym_trace_filter_write(struct file *file,
277 const char __user *buffer,
278 size_t count, loff_t *ppos)
279{
280 struct trace_ksym *entry;
281 struct hlist_node *node;
282 char *buf, *input_string, *ksymname = NULL;
283 unsigned long ksym_addr = 0;
284 int ret, op, changed = 0;
285
286 buf = kzalloc(count + 1, GFP_KERNEL);
287 if (!buf)
288 return -ENOMEM;
289
290 ret = -EFAULT;
291 if (copy_from_user(buf, buffer, count))
292 goto out;
293
294 buf[count] = '\0';
295 input_string = strstrip(buf);
296
297 /*
298 * Clear all breakpoints if:
299 * 1: echo > ksym_trace_filter
300 * 2: echo 0 > ksym_trace_filter
301 * 3: echo "*:---" > ksym_trace_filter
302 */
303 if (!buf[0] || !strcmp(buf, "0") ||
304 !strcmp(buf, "*:---")) {
305 __ksym_trace_reset();
306 ret = 0;
307 goto out;
308 }
309
310 ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
311 if (ret < 0)
312 goto out;
313
314 mutex_lock(&ksym_tracer_mutex);
315
316 ret = -EINVAL;
317 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
318 if (entry->attr.bp_addr == ksym_addr) {
319 /* Check for malformed request: (6) */
320 if (entry->attr.bp_type != op)
321 changed = 1;
322 else
323 goto out_unlock;
324 break;
325 }
326 }
327 if (changed) {
328 unregister_wide_hw_breakpoint(entry->ksym_hbp);
329 entry->attr.bp_type = op;
330 ret = 0;
331 if (op > 0) {
332 entry->ksym_hbp =
333 register_wide_hw_breakpoint(&entry->attr,
334 ksym_hbp_handler);
335 if (IS_ERR(entry->ksym_hbp))
336 ret = PTR_ERR(entry->ksym_hbp);
337 else
338 goto out_unlock;
339 }
340 /* Error or "symbol:---" case: drop it */
341 ksym_filter_entry_count--;
342 hlist_del_rcu(&(entry->ksym_hlist));
343 synchronize_rcu();
344 kfree(entry);
345 goto out_unlock;
346 } else {
347 /* Check for malformed request: (4) */
348 if (op)
349 ret = process_new_ksym_entry(ksymname, op, ksym_addr);
350 }
351out_unlock:
352 mutex_unlock(&ksym_tracer_mutex);
353out:
354 kfree(buf);
355 return !ret ? count : ret;
356}
357
358static const struct file_operations ksym_tracing_fops = {
359 .open = tracing_open_generic,
360 .read = ksym_trace_filter_read,
361 .write = ksym_trace_filter_write,
362};
363
364static void ksym_trace_reset(struct trace_array *tr)
365{
366 ksym_tracing_enabled = 0;
367 __ksym_trace_reset();
368}
369
370static int ksym_trace_init(struct trace_array *tr)
371{
372 int cpu, ret = 0;
373
374 for_each_online_cpu(cpu)
375 tracing_reset(tr, cpu);
376 ksym_tracing_enabled = 1;
377 ksym_trace_array = tr;
378
379 return ret;
380}
381
382static void ksym_trace_print_header(struct seq_file *m)
383{
384 seq_puts(m,
385 "# TASK-PID CPU# Symbol "
386 "Type Function\n");
387 seq_puts(m,
388 "# | | | "
389 " | |\n");
390}
391
392static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
393{
394 struct trace_entry *entry = iter->ent;
395 struct trace_seq *s = &iter->seq;
396 struct ksym_trace_entry *field;
397 char str[KSYM_SYMBOL_LEN];
398 int ret;
399
400 if (entry->type != TRACE_KSYM)
401 return TRACE_TYPE_UNHANDLED;
402
403 trace_assign_type(field, entry);
404
405 ret = trace_seq_printf(s, "%11s-%-5d [%03d] %pS", field->cmd,
406 entry->pid, iter->cpu, (char *)field->addr);
407 if (!ret)
408 return TRACE_TYPE_PARTIAL_LINE;
409
410 switch (field->type) {
411 case HW_BREAKPOINT_R:
412 ret = trace_seq_printf(s, " R ");
413 break;
414 case HW_BREAKPOINT_W:
415 ret = trace_seq_printf(s, " W ");
416 break;
417 case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
418 ret = trace_seq_printf(s, " RW ");
419 break;
420 default:
421 return TRACE_TYPE_PARTIAL_LINE;
422 }
423
424 if (!ret)
425 return TRACE_TYPE_PARTIAL_LINE;
426
427 sprint_symbol(str, field->ip);
428 ret = trace_seq_printf(s, "%s\n", str);
429 if (!ret)
430 return TRACE_TYPE_PARTIAL_LINE;
431
432 return TRACE_TYPE_HANDLED;
433}
434
435struct tracer ksym_tracer __read_mostly =
436{
437 .name = "ksym_tracer",
438 .init = ksym_trace_init,
439 .reset = ksym_trace_reset,
440#ifdef CONFIG_FTRACE_SELFTEST
441 .selftest = trace_selftest_startup_ksym,
442#endif
443 .print_header = ksym_trace_print_header,
444 .print_line = ksym_trace_output
445};
446
447__init static int init_ksym_trace(void)
448{
449 struct dentry *d_tracer;
450 struct dentry *entry;
451
452 d_tracer = tracing_init_dentry();
453 ksym_filter_entry_count = 0;
454
455 entry = debugfs_create_file("ksym_trace_filter", 0644, d_tracer,
456 NULL, &ksym_tracing_fops);
457 if (!entry)
458 pr_warning("Could not create debugfs "
459 "'ksym_trace_filter' file\n");
460
461 return register_tracer(&ksym_tracer);
462}
463device_initcall(init_ksym_trace);
464
465
466#ifdef CONFIG_PROFILE_KSYM_TRACER
467static int ksym_tracer_stat_headers(struct seq_file *m)
468{
469 seq_puts(m, " Access Type ");
470 seq_puts(m, " Symbol Counter\n");
471 seq_puts(m, " ----------- ");
472 seq_puts(m, " ------ -------\n");
473 return 0;
474}
475
476static int ksym_tracer_stat_show(struct seq_file *m, void *v)
477{
478 struct hlist_node *stat = v;
479 struct trace_ksym *entry;
480 int access_type = 0;
481 char fn_name[KSYM_NAME_LEN];
482
483 entry = hlist_entry(stat, struct trace_ksym, ksym_hlist);
484
485 access_type = entry->attr.bp_type;
486
487 switch (access_type) {
488 case HW_BREAKPOINT_R:
489 seq_puts(m, " R ");
490 break;
491 case HW_BREAKPOINT_W:
492 seq_puts(m, " W ");
493 break;
494 case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
495 seq_puts(m, " RW ");
496 break;
497 default:
498 seq_puts(m, " NA ");
499 }
500
501 if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0)
502 seq_printf(m, " %-36s", fn_name);
503 else
504 seq_printf(m, " %-36s", "<NA>");
505 seq_printf(m, " %15lu\n", entry->counter);
506
507 return 0;
508}
509
510static void *ksym_tracer_stat_start(struct tracer_stat *trace)
511{
512 return ksym_filter_head.first;
513}
514
515static void *
516ksym_tracer_stat_next(void *v, int idx)
517{
518 struct hlist_node *stat = v;
519
520 return stat->next;
521}
522
523static struct tracer_stat ksym_tracer_stats = {
524 .name = "ksym_tracer",
525 .stat_start = ksym_tracer_stat_start,
526 .stat_next = ksym_tracer_stat_next,
527 .stat_headers = ksym_tracer_stat_headers,
528 .stat_show = ksym_tracer_stat_show
529};
530
531__init static int ksym_tracer_stat_init(void)
532{
533 int ret;
534
535 ret = register_stat_tracer(&ksym_tracer_stats);
536 if (ret) {
537 printk(KERN_WARNING "Warning: could not register "
538 "ksym tracer stats\n");
539 return 1;
540 }
541
542 return 0;
543}
544fs_initcall(ksym_tracer_stat_init);
545#endif /* CONFIG_PROFILE_KSYM_TRACER */
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index b6c12c6a1bcd..8e46b3323cdc 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -23,13 +23,21 @@ static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
23 23
24static int next_event_type = __TRACE_LAST_TYPE + 1; 24static int next_event_type = __TRACE_LAST_TYPE + 1;
25 25
26void trace_print_seq(struct seq_file *m, struct trace_seq *s) 26int trace_print_seq(struct seq_file *m, struct trace_seq *s)
27{ 27{
28 int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len; 28 int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
29 int ret;
30
31 ret = seq_write(m, s->buffer, len);
29 32
30 seq_write(m, s->buffer, len); 33 /*
34 * Only reset this buffer if we successfully wrote to the
35 * seq_file buffer.
36 */
37 if (!ret)
38 trace_seq_init(s);
31 39
32 trace_seq_init(s); 40 return ret;
33} 41}
34 42
35enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter) 43enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter)
@@ -85,7 +93,7 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
85 va_list ap; 93 va_list ap;
86 int ret; 94 int ret;
87 95
88 if (!len) 96 if (s->full || !len)
89 return 0; 97 return 0;
90 98
91 va_start(ap, fmt); 99 va_start(ap, fmt);
@@ -93,8 +101,10 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
93 va_end(ap); 101 va_end(ap);
94 102
95 /* If we can't write it all, don't bother writing anything */ 103 /* If we can't write it all, don't bother writing anything */
96 if (ret >= len) 104 if (ret >= len) {
105 s->full = 1;
97 return 0; 106 return 0;
107 }
98 108
99 s->len += ret; 109 s->len += ret;
100 110
@@ -119,14 +129,16 @@ trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
119 int len = (PAGE_SIZE - 1) - s->len; 129 int len = (PAGE_SIZE - 1) - s->len;
120 int ret; 130 int ret;
121 131
122 if (!len) 132 if (s->full || !len)
123 return 0; 133 return 0;
124 134
125 ret = vsnprintf(s->buffer + s->len, len, fmt, args); 135 ret = vsnprintf(s->buffer + s->len, len, fmt, args);
126 136
127 /* If we can't write it all, don't bother writing anything */ 137 /* If we can't write it all, don't bother writing anything */
128 if (ret >= len) 138 if (ret >= len) {
139 s->full = 1;
129 return 0; 140 return 0;
141 }
130 142
131 s->len += ret; 143 s->len += ret;
132 144
@@ -139,14 +151,16 @@ int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
139 int len = (PAGE_SIZE - 1) - s->len; 151 int len = (PAGE_SIZE - 1) - s->len;
140 int ret; 152 int ret;
141 153
142 if (!len) 154 if (s->full || !len)
143 return 0; 155 return 0;
144 156
145 ret = bstr_printf(s->buffer + s->len, len, fmt, binary); 157 ret = bstr_printf(s->buffer + s->len, len, fmt, binary);
146 158
147 /* If we can't write it all, don't bother writing anything */ 159 /* If we can't write it all, don't bother writing anything */
148 if (ret >= len) 160 if (ret >= len) {
161 s->full = 1;
149 return 0; 162 return 0;
163 }
150 164
151 s->len += ret; 165 s->len += ret;
152 166
@@ -167,8 +181,13 @@ int trace_seq_puts(struct trace_seq *s, const char *str)
167{ 181{
168 int len = strlen(str); 182 int len = strlen(str);
169 183
170 if (len > ((PAGE_SIZE - 1) - s->len)) 184 if (s->full)
185 return 0;
186
187 if (len > ((PAGE_SIZE - 1) - s->len)) {
188 s->full = 1;
171 return 0; 189 return 0;
190 }
172 191
173 memcpy(s->buffer + s->len, str, len); 192 memcpy(s->buffer + s->len, str, len);
174 s->len += len; 193 s->len += len;
@@ -178,9 +197,14 @@ int trace_seq_puts(struct trace_seq *s, const char *str)
178 197
179int trace_seq_putc(struct trace_seq *s, unsigned char c) 198int trace_seq_putc(struct trace_seq *s, unsigned char c)
180{ 199{
181 if (s->len >= (PAGE_SIZE - 1)) 200 if (s->full)
182 return 0; 201 return 0;
183 202
203 if (s->len >= (PAGE_SIZE - 1)) {
204 s->full = 1;
205 return 0;
206 }
207
184 s->buffer[s->len++] = c; 208 s->buffer[s->len++] = c;
185 209
186 return 1; 210 return 1;
@@ -188,9 +212,14 @@ int trace_seq_putc(struct trace_seq *s, unsigned char c)
188 212
189int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len) 213int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len)
190{ 214{
191 if (len > ((PAGE_SIZE - 1) - s->len)) 215 if (s->full)
192 return 0; 216 return 0;
193 217
218 if (len > ((PAGE_SIZE - 1) - s->len)) {
219 s->full = 1;
220 return 0;
221 }
222
194 memcpy(s->buffer + s->len, mem, len); 223 memcpy(s->buffer + s->len, mem, len);
195 s->len += len; 224 s->len += len;
196 225
@@ -203,6 +232,9 @@ int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, size_t len)
203 const unsigned char *data = mem; 232 const unsigned char *data = mem;
204 int i, j; 233 int i, j;
205 234
235 if (s->full)
236 return 0;
237
206#ifdef __BIG_ENDIAN 238#ifdef __BIG_ENDIAN
207 for (i = 0, j = 0; i < len; i++) { 239 for (i = 0, j = 0; i < len; i++) {
208#else 240#else
@@ -220,8 +252,13 @@ void *trace_seq_reserve(struct trace_seq *s, size_t len)
220{ 252{
221 void *ret; 253 void *ret;
222 254
223 if (len > ((PAGE_SIZE - 1) - s->len)) 255 if (s->full)
256 return 0;
257
258 if (len > ((PAGE_SIZE - 1) - s->len)) {
259 s->full = 1;
224 return NULL; 260 return NULL;
261 }
225 262
226 ret = s->buffer + s->len; 263 ret = s->buffer + s->len;
227 s->len += len; 264 s->len += len;
@@ -233,8 +270,14 @@ int trace_seq_path(struct trace_seq *s, struct path *path)
233{ 270{
234 unsigned char *p; 271 unsigned char *p;
235 272
236 if (s->len >= (PAGE_SIZE - 1)) 273 if (s->full)
274 return 0;
275
276 if (s->len >= (PAGE_SIZE - 1)) {
277 s->full = 1;
237 return 0; 278 return 0;
279 }
280
238 p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len); 281 p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
239 if (!IS_ERR(p)) { 282 if (!IS_ERR(p)) {
240 p = mangle_path(s->buffer + s->len, p, "\n"); 283 p = mangle_path(s->buffer + s->len, p, "\n");
@@ -247,6 +290,7 @@ int trace_seq_path(struct trace_seq *s, struct path *path)
247 return 1; 290 return 1;
248 } 291 }
249 292
293 s->full = 1;
250 return 0; 294 return 0;
251} 295}
252 296
@@ -373,6 +417,9 @@ int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
373 unsigned long vmstart = 0; 417 unsigned long vmstart = 0;
374 int ret = 1; 418 int ret = 1;
375 419
420 if (s->full)
421 return 0;
422
376 if (mm) { 423 if (mm) {
377 const struct vm_area_struct *vma; 424 const struct vm_area_struct *vma;
378 425
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 26185d727676..0271742abb8d 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -28,8 +28,8 @@ static int wakeup_current_cpu;
28static unsigned wakeup_prio = -1; 28static unsigned wakeup_prio = -1;
29static int wakeup_rt; 29static int wakeup_rt;
30 30
31static raw_spinlock_t wakeup_lock = 31static arch_spinlock_t wakeup_lock =
32 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 32 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
33 33
34static void __wakeup_reset(struct trace_array *tr); 34static void __wakeup_reset(struct trace_array *tr);
35 35
@@ -143,7 +143,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
143 goto out; 143 goto out;
144 144
145 local_irq_save(flags); 145 local_irq_save(flags);
146 __raw_spin_lock(&wakeup_lock); 146 arch_spin_lock(&wakeup_lock);
147 147
148 /* We could race with grabbing wakeup_lock */ 148 /* We could race with grabbing wakeup_lock */
149 if (unlikely(!tracer_enabled || next != wakeup_task)) 149 if (unlikely(!tracer_enabled || next != wakeup_task))
@@ -169,7 +169,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
169 169
170out_unlock: 170out_unlock:
171 __wakeup_reset(wakeup_trace); 171 __wakeup_reset(wakeup_trace);
172 __raw_spin_unlock(&wakeup_lock); 172 arch_spin_unlock(&wakeup_lock);
173 local_irq_restore(flags); 173 local_irq_restore(flags);
174out: 174out:
175 atomic_dec(&wakeup_trace->data[cpu]->disabled); 175 atomic_dec(&wakeup_trace->data[cpu]->disabled);
@@ -193,9 +193,9 @@ static void wakeup_reset(struct trace_array *tr)
193 tracing_reset_online_cpus(tr); 193 tracing_reset_online_cpus(tr);
194 194
195 local_irq_save(flags); 195 local_irq_save(flags);
196 __raw_spin_lock(&wakeup_lock); 196 arch_spin_lock(&wakeup_lock);
197 __wakeup_reset(tr); 197 __wakeup_reset(tr);
198 __raw_spin_unlock(&wakeup_lock); 198 arch_spin_unlock(&wakeup_lock);
199 local_irq_restore(flags); 199 local_irq_restore(flags);
200} 200}
201 201
@@ -225,7 +225,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
225 goto out; 225 goto out;
226 226
227 /* interrupts should be off from try_to_wake_up */ 227 /* interrupts should be off from try_to_wake_up */
228 __raw_spin_lock(&wakeup_lock); 228 arch_spin_lock(&wakeup_lock);
229 229
230 /* check for races. */ 230 /* check for races. */
231 if (!tracer_enabled || p->prio >= wakeup_prio) 231 if (!tracer_enabled || p->prio >= wakeup_prio)
@@ -255,7 +255,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
255 trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); 255 trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
256 256
257out_locked: 257out_locked:
258 __raw_spin_unlock(&wakeup_lock); 258 arch_spin_unlock(&wakeup_lock);
259out: 259out:
260 atomic_dec(&wakeup_trace->data[cpu]->disabled); 260 atomic_dec(&wakeup_trace->data[cpu]->disabled);
261} 261}
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index d2cdbabb4ead..280fea470d67 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -17,6 +17,7 @@ static inline int trace_valid_entry(struct trace_entry *entry)
17 case TRACE_GRAPH_ENT: 17 case TRACE_GRAPH_ENT:
18 case TRACE_GRAPH_RET: 18 case TRACE_GRAPH_RET:
19 case TRACE_HW_BRANCHES: 19 case TRACE_HW_BRANCHES:
20 case TRACE_KSYM:
20 return 1; 21 return 1;
21 } 22 }
22 return 0; 23 return 0;
@@ -66,7 +67,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
66 67
67 /* Don't allow flipping of max traces now */ 68 /* Don't allow flipping of max traces now */
68 local_irq_save(flags); 69 local_irq_save(flags);
69 __raw_spin_lock(&ftrace_max_lock); 70 arch_spin_lock(&ftrace_max_lock);
70 71
71 cnt = ring_buffer_entries(tr->buffer); 72 cnt = ring_buffer_entries(tr->buffer);
72 73
@@ -84,7 +85,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
84 break; 85 break;
85 } 86 }
86 tracing_on(); 87 tracing_on();
87 __raw_spin_unlock(&ftrace_max_lock); 88 arch_spin_unlock(&ftrace_max_lock);
88 local_irq_restore(flags); 89 local_irq_restore(flags);
89 90
90 if (count) 91 if (count)
@@ -808,3 +809,57 @@ trace_selftest_startup_hw_branches(struct tracer *trace,
808 return ret; 809 return ret;
809} 810}
810#endif /* CONFIG_HW_BRANCH_TRACER */ 811#endif /* CONFIG_HW_BRANCH_TRACER */
812
813#ifdef CONFIG_KSYM_TRACER
814static int ksym_selftest_dummy;
815
816int
817trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
818{
819 unsigned long count;
820 int ret;
821
822 /* start the tracing */
823 ret = tracer_init(trace, tr);
824 if (ret) {
825 warn_failed_init_tracer(trace, ret);
826 return ret;
827 }
828
829 ksym_selftest_dummy = 0;
830 /* Register the read-write tracing request */
831
832 ret = process_new_ksym_entry("ksym_selftest_dummy",
833 HW_BREAKPOINT_R | HW_BREAKPOINT_W,
834 (unsigned long)(&ksym_selftest_dummy));
835
836 if (ret < 0) {
837 printk(KERN_CONT "ksym_trace read-write startup test failed\n");
838 goto ret_path;
839 }
840 /* Perform a read and a write operation over the dummy variable to
841 * trigger the tracer
842 */
843 if (ksym_selftest_dummy == 0)
844 ksym_selftest_dummy++;
845
846 /* stop the tracing. */
847 tracing_stop();
848 /* check the trace buffer */
849 ret = trace_test_buffer(tr, &count);
850 trace->reset(tr);
851 tracing_start();
852
853 /* read & write operations - one each is performed on the dummy variable
854 * triggering two entries in the trace buffer
855 */
856 if (!ret && count != 2) {
857 printk(KERN_CONT "Ksym tracer startup test failed");
858 ret = -1;
859 }
860
861ret_path:
862 return ret;
863}
864#endif /* CONFIG_KSYM_TRACER */
865
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 8504ac71e4e8..678a5120ee30 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -27,8 +27,8 @@ static struct stack_trace max_stack_trace = {
27}; 27};
28 28
29static unsigned long max_stack_size; 29static unsigned long max_stack_size;
30static raw_spinlock_t max_stack_lock = 30static arch_spinlock_t max_stack_lock =
31 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 31 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
32 32
33static int stack_trace_disabled __read_mostly; 33static int stack_trace_disabled __read_mostly;
34static DEFINE_PER_CPU(int, trace_active); 34static DEFINE_PER_CPU(int, trace_active);
@@ -54,7 +54,7 @@ static inline void check_stack(void)
54 return; 54 return;
55 55
56 local_irq_save(flags); 56 local_irq_save(flags);
57 __raw_spin_lock(&max_stack_lock); 57 arch_spin_lock(&max_stack_lock);
58 58
59 /* a race could have already updated it */ 59 /* a race could have already updated it */
60 if (this_size <= max_stack_size) 60 if (this_size <= max_stack_size)
@@ -103,7 +103,7 @@ static inline void check_stack(void)
103 } 103 }
104 104
105 out: 105 out:
106 __raw_spin_unlock(&max_stack_lock); 106 arch_spin_unlock(&max_stack_lock);
107 local_irq_restore(flags); 107 local_irq_restore(flags);
108} 108}
109 109
@@ -171,9 +171,9 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
171 return ret; 171 return ret;
172 172
173 local_irq_save(flags); 173 local_irq_save(flags);
174 __raw_spin_lock(&max_stack_lock); 174 arch_spin_lock(&max_stack_lock);
175 *ptr = val; 175 *ptr = val;
176 __raw_spin_unlock(&max_stack_lock); 176 arch_spin_unlock(&max_stack_lock);
177 local_irq_restore(flags); 177 local_irq_restore(flags);
178 178
179 return count; 179 return count;
@@ -207,7 +207,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
207static void *t_start(struct seq_file *m, loff_t *pos) 207static void *t_start(struct seq_file *m, loff_t *pos)
208{ 208{
209 local_irq_disable(); 209 local_irq_disable();
210 __raw_spin_lock(&max_stack_lock); 210 arch_spin_lock(&max_stack_lock);
211 211
212 if (*pos == 0) 212 if (*pos == 0)
213 return SEQ_START_TOKEN; 213 return SEQ_START_TOKEN;
@@ -217,7 +217,7 @@ static void *t_start(struct seq_file *m, loff_t *pos)
217 217
218static void t_stop(struct seq_file *m, void *p) 218static void t_stop(struct seq_file *m, void *p)
219{ 219{
220 __raw_spin_unlock(&max_stack_lock); 220 arch_spin_unlock(&max_stack_lock);
221 local_irq_enable(); 221 local_irq_enable();
222} 222}
223 223
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 527e17eae575..75289f372dd2 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -14,6 +14,43 @@ static int sys_refcount_exit;
14static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls); 14static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
15static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); 15static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
16 16
17extern unsigned long __start_syscalls_metadata[];
18extern unsigned long __stop_syscalls_metadata[];
19
20static struct syscall_metadata **syscalls_metadata;
21
22static struct syscall_metadata *find_syscall_meta(unsigned long syscall)
23{
24 struct syscall_metadata *start;
25 struct syscall_metadata *stop;
26 char str[KSYM_SYMBOL_LEN];
27
28
29 start = (struct syscall_metadata *)__start_syscalls_metadata;
30 stop = (struct syscall_metadata *)__stop_syscalls_metadata;
31 kallsyms_lookup(syscall, NULL, NULL, NULL, str);
32
33 for ( ; start < stop; start++) {
34 /*
35 * Only compare after the "sys" prefix. Archs that use
36 * syscall wrappers may have syscalls symbols aliases prefixed
37 * with "SyS" instead of "sys", leading to an unwanted
38 * mismatch.
39 */
40 if (start->name && !strcmp(start->name + 3, str + 3))
41 return start;
42 }
43 return NULL;
44}
45
46static struct syscall_metadata *syscall_nr_to_meta(int nr)
47{
48 if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
49 return NULL;
50
51 return syscalls_metadata[nr];
52}
53
17enum print_line_t 54enum print_line_t
18print_syscall_enter(struct trace_iterator *iter, int flags) 55print_syscall_enter(struct trace_iterator *iter, int flags)
19{ 56{
@@ -30,7 +67,7 @@ print_syscall_enter(struct trace_iterator *iter, int flags)
30 if (!entry) 67 if (!entry)
31 goto end; 68 goto end;
32 69
33 if (entry->enter_id != ent->type) { 70 if (entry->enter_event->id != ent->type) {
34 WARN_ON_ONCE(1); 71 WARN_ON_ONCE(1);
35 goto end; 72 goto end;
36 } 73 }
@@ -85,7 +122,7 @@ print_syscall_exit(struct trace_iterator *iter, int flags)
85 return TRACE_TYPE_HANDLED; 122 return TRACE_TYPE_HANDLED;
86 } 123 }
87 124
88 if (entry->exit_id != ent->type) { 125 if (entry->exit_event->id != ent->type) {
89 WARN_ON_ONCE(1); 126 WARN_ON_ONCE(1);
90 return TRACE_TYPE_UNHANDLED; 127 return TRACE_TYPE_UNHANDLED;
91 } 128 }
@@ -103,24 +140,19 @@ extern char *__bad_type_size(void);
103#define SYSCALL_FIELD(type, name) \ 140#define SYSCALL_FIELD(type, name) \
104 sizeof(type) != sizeof(trace.name) ? \ 141 sizeof(type) != sizeof(trace.name) ? \
105 __bad_type_size() : \ 142 __bad_type_size() : \
106 #type, #name, offsetof(typeof(trace), name), sizeof(trace.name) 143 #type, #name, offsetof(typeof(trace), name), \
144 sizeof(trace.name), is_signed_type(type)
107 145
108int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) 146int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
109{ 147{
110 int i; 148 int i;
111 int nr;
112 int ret; 149 int ret;
113 struct syscall_metadata *entry; 150 struct syscall_metadata *entry = call->data;
114 struct syscall_trace_enter trace; 151 struct syscall_trace_enter trace;
115 int offset = offsetof(struct syscall_trace_enter, args); 152 int offset = offsetof(struct syscall_trace_enter, args);
116 153
117 nr = syscall_name_to_nr(call->data); 154 ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
118 entry = syscall_nr_to_meta(nr); 155 "\tsigned:%u;\n",
119
120 if (!entry)
121 return 0;
122
123 ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n",
124 SYSCALL_FIELD(int, nr)); 156 SYSCALL_FIELD(int, nr));
125 if (!ret) 157 if (!ret)
126 return 0; 158 return 0;
@@ -130,8 +162,10 @@ int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
130 entry->args[i]); 162 entry->args[i]);
131 if (!ret) 163 if (!ret)
132 return 0; 164 return 0;
133 ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;\n", offset, 165 ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;"
134 sizeof(unsigned long)); 166 "\tsigned:%u;\n", offset,
167 sizeof(unsigned long),
168 is_signed_type(unsigned long));
135 if (!ret) 169 if (!ret)
136 return 0; 170 return 0;
137 offset += sizeof(unsigned long); 171 offset += sizeof(unsigned long);
@@ -163,8 +197,10 @@ int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s)
163 struct syscall_trace_exit trace; 197 struct syscall_trace_exit trace;
164 198
165 ret = trace_seq_printf(s, 199 ret = trace_seq_printf(s,
166 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 200 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
167 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n", 201 "\tsigned:%u;\n"
202 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
203 "\tsigned:%u;\n",
168 SYSCALL_FIELD(int, nr), 204 SYSCALL_FIELD(int, nr),
169 SYSCALL_FIELD(long, ret)); 205 SYSCALL_FIELD(long, ret));
170 if (!ret) 206 if (!ret)
@@ -176,19 +212,12 @@ int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s)
176int syscall_enter_define_fields(struct ftrace_event_call *call) 212int syscall_enter_define_fields(struct ftrace_event_call *call)
177{ 213{
178 struct syscall_trace_enter trace; 214 struct syscall_trace_enter trace;
179 struct syscall_metadata *meta; 215 struct syscall_metadata *meta = call->data;
180 int ret; 216 int ret;
181 int nr;
182 int i; 217 int i;
183 int offset = offsetof(typeof(trace), args); 218 int offset = offsetof(typeof(trace), args);
184 219
185 nr = syscall_name_to_nr(call->data); 220 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
186 meta = syscall_nr_to_meta(nr);
187
188 if (!meta)
189 return 0;
190
191 ret = trace_define_common_fields(call);
192 if (ret) 221 if (ret)
193 return ret; 222 return ret;
194 223
@@ -208,11 +237,11 @@ int syscall_exit_define_fields(struct ftrace_event_call *call)
208 struct syscall_trace_exit trace; 237 struct syscall_trace_exit trace;
209 int ret; 238 int ret;
210 239
211 ret = trace_define_common_fields(call); 240 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
212 if (ret) 241 if (ret)
213 return ret; 242 return ret;
214 243
215 ret = trace_define_field(call, SYSCALL_FIELD(long, ret), 0, 244 ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
216 FILTER_OTHER); 245 FILTER_OTHER);
217 246
218 return ret; 247 return ret;
@@ -239,8 +268,8 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id)
239 268
240 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; 269 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
241 270
242 event = trace_current_buffer_lock_reserve(&buffer, sys_data->enter_id, 271 event = trace_current_buffer_lock_reserve(&buffer,
243 size, 0, 0); 272 sys_data->enter_event->id, size, 0, 0);
244 if (!event) 273 if (!event)
245 return; 274 return;
246 275
@@ -271,8 +300,8 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret)
271 if (!sys_data) 300 if (!sys_data)
272 return; 301 return;
273 302
274 event = trace_current_buffer_lock_reserve(&buffer, sys_data->exit_id, 303 event = trace_current_buffer_lock_reserve(&buffer,
275 sizeof(*entry), 0, 0); 304 sys_data->exit_event->id, sizeof(*entry), 0, 0);
276 if (!event) 305 if (!event)
277 return; 306 return;
278 307
@@ -285,23 +314,18 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret)
285 trace_current_buffer_unlock_commit(buffer, event, 0, 0); 314 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
286} 315}
287 316
288int reg_event_syscall_enter(void *ptr) 317int reg_event_syscall_enter(struct ftrace_event_call *call)
289{ 318{
290 int ret = 0; 319 int ret = 0;
291 int num; 320 int num;
292 char *name;
293 321
294 name = (char *)ptr; 322 num = ((struct syscall_metadata *)call->data)->syscall_nr;
295 num = syscall_name_to_nr(name);
296 if (num < 0 || num >= NR_syscalls) 323 if (num < 0 || num >= NR_syscalls)
297 return -ENOSYS; 324 return -ENOSYS;
298 mutex_lock(&syscall_trace_lock); 325 mutex_lock(&syscall_trace_lock);
299 if (!sys_refcount_enter) 326 if (!sys_refcount_enter)
300 ret = register_trace_sys_enter(ftrace_syscall_enter); 327 ret = register_trace_sys_enter(ftrace_syscall_enter);
301 if (ret) { 328 if (!ret) {
302 pr_info("event trace: Could not activate"
303 "syscall entry trace point");
304 } else {
305 set_bit(num, enabled_enter_syscalls); 329 set_bit(num, enabled_enter_syscalls);
306 sys_refcount_enter++; 330 sys_refcount_enter++;
307 } 331 }
@@ -309,13 +333,11 @@ int reg_event_syscall_enter(void *ptr)
309 return ret; 333 return ret;
310} 334}
311 335
312void unreg_event_syscall_enter(void *ptr) 336void unreg_event_syscall_enter(struct ftrace_event_call *call)
313{ 337{
314 int num; 338 int num;
315 char *name;
316 339
317 name = (char *)ptr; 340 num = ((struct syscall_metadata *)call->data)->syscall_nr;
318 num = syscall_name_to_nr(name);
319 if (num < 0 || num >= NR_syscalls) 341 if (num < 0 || num >= NR_syscalls)
320 return; 342 return;
321 mutex_lock(&syscall_trace_lock); 343 mutex_lock(&syscall_trace_lock);
@@ -326,23 +348,18 @@ void unreg_event_syscall_enter(void *ptr)
326 mutex_unlock(&syscall_trace_lock); 348 mutex_unlock(&syscall_trace_lock);
327} 349}
328 350
329int reg_event_syscall_exit(void *ptr) 351int reg_event_syscall_exit(struct ftrace_event_call *call)
330{ 352{
331 int ret = 0; 353 int ret = 0;
332 int num; 354 int num;
333 char *name;
334 355
335 name = (char *)ptr; 356 num = ((struct syscall_metadata *)call->data)->syscall_nr;
336 num = syscall_name_to_nr(name);
337 if (num < 0 || num >= NR_syscalls) 357 if (num < 0 || num >= NR_syscalls)
338 return -ENOSYS; 358 return -ENOSYS;
339 mutex_lock(&syscall_trace_lock); 359 mutex_lock(&syscall_trace_lock);
340 if (!sys_refcount_exit) 360 if (!sys_refcount_exit)
341 ret = register_trace_sys_exit(ftrace_syscall_exit); 361 ret = register_trace_sys_exit(ftrace_syscall_exit);
342 if (ret) { 362 if (!ret) {
343 pr_info("event trace: Could not activate"
344 "syscall exit trace point");
345 } else {
346 set_bit(num, enabled_exit_syscalls); 363 set_bit(num, enabled_exit_syscalls);
347 sys_refcount_exit++; 364 sys_refcount_exit++;
348 } 365 }
@@ -350,13 +367,11 @@ int reg_event_syscall_exit(void *ptr)
350 return ret; 367 return ret;
351} 368}
352 369
353void unreg_event_syscall_exit(void *ptr) 370void unreg_event_syscall_exit(struct ftrace_event_call *call)
354{ 371{
355 int num; 372 int num;
356 char *name;
357 373
358 name = (char *)ptr; 374 num = ((struct syscall_metadata *)call->data)->syscall_nr;
359 num = syscall_name_to_nr(name);
360 if (num < 0 || num >= NR_syscalls) 375 if (num < 0 || num >= NR_syscalls)
361 return; 376 return;
362 mutex_lock(&syscall_trace_lock); 377 mutex_lock(&syscall_trace_lock);
@@ -367,13 +382,44 @@ void unreg_event_syscall_exit(void *ptr)
367 mutex_unlock(&syscall_trace_lock); 382 mutex_unlock(&syscall_trace_lock);
368} 383}
369 384
370struct trace_event event_syscall_enter = { 385int init_syscall_trace(struct ftrace_event_call *call)
371 .trace = print_syscall_enter, 386{
372}; 387 int id;
388
389 id = register_ftrace_event(call->event);
390 if (!id)
391 return -ENODEV;
392 call->id = id;
393 INIT_LIST_HEAD(&call->fields);
394 return 0;
395}
396
397int __init init_ftrace_syscalls(void)
398{
399 struct syscall_metadata *meta;
400 unsigned long addr;
401 int i;
402
403 syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
404 NR_syscalls, GFP_KERNEL);
405 if (!syscalls_metadata) {
406 WARN_ON(1);
407 return -ENOMEM;
408 }
409
410 for (i = 0; i < NR_syscalls; i++) {
411 addr = arch_syscall_addr(i);
412 meta = find_syscall_meta(addr);
413 if (!meta)
414 continue;
415
416 meta->syscall_nr = i;
417 syscalls_metadata[i] = meta;
418 }
373 419
374struct trace_event event_syscall_exit = { 420 return 0;
375 .trace = print_syscall_exit, 421}
376}; 422core_initcall(init_ftrace_syscalls);
377 423
378#ifdef CONFIG_EVENT_PROFILE 424#ifdef CONFIG_EVENT_PROFILE
379 425
@@ -387,8 +433,10 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
387 struct syscall_metadata *sys_data; 433 struct syscall_metadata *sys_data;
388 struct syscall_trace_enter *rec; 434 struct syscall_trace_enter *rec;
389 unsigned long flags; 435 unsigned long flags;
436 char *trace_buf;
390 char *raw_data; 437 char *raw_data;
391 int syscall_nr; 438 int syscall_nr;
439 int rctx;
392 int size; 440 int size;
393 int cpu; 441 int cpu;
394 442
@@ -412,41 +460,42 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
412 /* Protect the per cpu buffer, begin the rcu read side */ 460 /* Protect the per cpu buffer, begin the rcu read side */
413 local_irq_save(flags); 461 local_irq_save(flags);
414 462
463 rctx = perf_swevent_get_recursion_context();
464 if (rctx < 0)
465 goto end_recursion;
466
415 cpu = smp_processor_id(); 467 cpu = smp_processor_id();
416 468
417 if (in_nmi()) 469 trace_buf = rcu_dereference(perf_trace_buf);
418 raw_data = rcu_dereference(trace_profile_buf_nmi);
419 else
420 raw_data = rcu_dereference(trace_profile_buf);
421 470
422 if (!raw_data) 471 if (!trace_buf)
423 goto end; 472 goto end;
424 473
425 raw_data = per_cpu_ptr(raw_data, cpu); 474 raw_data = per_cpu_ptr(trace_buf, cpu);
426 475
427 /* zero the dead bytes from align to not leak stack to user */ 476 /* zero the dead bytes from align to not leak stack to user */
428 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; 477 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
429 478
430 rec = (struct syscall_trace_enter *) raw_data; 479 rec = (struct syscall_trace_enter *) raw_data;
431 tracing_generic_entry_update(&rec->ent, 0, 0); 480 tracing_generic_entry_update(&rec->ent, 0, 0);
432 rec->ent.type = sys_data->enter_id; 481 rec->ent.type = sys_data->enter_event->id;
433 rec->nr = syscall_nr; 482 rec->nr = syscall_nr;
434 syscall_get_arguments(current, regs, 0, sys_data->nb_args, 483 syscall_get_arguments(current, regs, 0, sys_data->nb_args,
435 (unsigned long *)&rec->args); 484 (unsigned long *)&rec->args);
436 perf_tp_event(sys_data->enter_id, 0, 1, rec, size); 485 perf_tp_event(sys_data->enter_event->id, 0, 1, rec, size);
437 486
438end: 487end:
488 perf_swevent_put_recursion_context(rctx);
489end_recursion:
439 local_irq_restore(flags); 490 local_irq_restore(flags);
440} 491}
441 492
442int reg_prof_syscall_enter(char *name) 493int prof_sysenter_enable(struct ftrace_event_call *call)
443{ 494{
444 int ret = 0; 495 int ret = 0;
445 int num; 496 int num;
446 497
447 num = syscall_name_to_nr(name); 498 num = ((struct syscall_metadata *)call->data)->syscall_nr;
448 if (num < 0 || num >= NR_syscalls)
449 return -ENOSYS;
450 499
451 mutex_lock(&syscall_trace_lock); 500 mutex_lock(&syscall_trace_lock);
452 if (!sys_prof_refcount_enter) 501 if (!sys_prof_refcount_enter)
@@ -462,13 +511,11 @@ int reg_prof_syscall_enter(char *name)
462 return ret; 511 return ret;
463} 512}
464 513
465void unreg_prof_syscall_enter(char *name) 514void prof_sysenter_disable(struct ftrace_event_call *call)
466{ 515{
467 int num; 516 int num;
468 517
469 num = syscall_name_to_nr(name); 518 num = ((struct syscall_metadata *)call->data)->syscall_nr;
470 if (num < 0 || num >= NR_syscalls)
471 return;
472 519
473 mutex_lock(&syscall_trace_lock); 520 mutex_lock(&syscall_trace_lock);
474 sys_prof_refcount_enter--; 521 sys_prof_refcount_enter--;
@@ -484,7 +531,9 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
484 struct syscall_trace_exit *rec; 531 struct syscall_trace_exit *rec;
485 unsigned long flags; 532 unsigned long flags;
486 int syscall_nr; 533 int syscall_nr;
534 char *trace_buf;
487 char *raw_data; 535 char *raw_data;
536 int rctx;
488 int size; 537 int size;
489 int cpu; 538 int cpu;
490 539
@@ -510,17 +559,19 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
510 559
511 /* Protect the per cpu buffer, begin the rcu read side */ 560 /* Protect the per cpu buffer, begin the rcu read side */
512 local_irq_save(flags); 561 local_irq_save(flags);
562
563 rctx = perf_swevent_get_recursion_context();
564 if (rctx < 0)
565 goto end_recursion;
566
513 cpu = smp_processor_id(); 567 cpu = smp_processor_id();
514 568
515 if (in_nmi()) 569 trace_buf = rcu_dereference(perf_trace_buf);
516 raw_data = rcu_dereference(trace_profile_buf_nmi);
517 else
518 raw_data = rcu_dereference(trace_profile_buf);
519 570
520 if (!raw_data) 571 if (!trace_buf)
521 goto end; 572 goto end;
522 573
523 raw_data = per_cpu_ptr(raw_data, cpu); 574 raw_data = per_cpu_ptr(trace_buf, cpu);
524 575
525 /* zero the dead bytes from align to not leak stack to user */ 576 /* zero the dead bytes from align to not leak stack to user */
526 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; 577 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
@@ -528,24 +579,24 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
528 rec = (struct syscall_trace_exit *)raw_data; 579 rec = (struct syscall_trace_exit *)raw_data;
529 580
530 tracing_generic_entry_update(&rec->ent, 0, 0); 581 tracing_generic_entry_update(&rec->ent, 0, 0);
531 rec->ent.type = sys_data->exit_id; 582 rec->ent.type = sys_data->exit_event->id;
532 rec->nr = syscall_nr; 583 rec->nr = syscall_nr;
533 rec->ret = syscall_get_return_value(current, regs); 584 rec->ret = syscall_get_return_value(current, regs);
534 585
535 perf_tp_event(sys_data->exit_id, 0, 1, rec, size); 586 perf_tp_event(sys_data->exit_event->id, 0, 1, rec, size);
536 587
537end: 588end:
589 perf_swevent_put_recursion_context(rctx);
590end_recursion:
538 local_irq_restore(flags); 591 local_irq_restore(flags);
539} 592}
540 593
541int reg_prof_syscall_exit(char *name) 594int prof_sysexit_enable(struct ftrace_event_call *call)
542{ 595{
543 int ret = 0; 596 int ret = 0;
544 int num; 597 int num;
545 598
546 num = syscall_name_to_nr(name); 599 num = ((struct syscall_metadata *)call->data)->syscall_nr;
547 if (num < 0 || num >= NR_syscalls)
548 return -ENOSYS;
549 600
550 mutex_lock(&syscall_trace_lock); 601 mutex_lock(&syscall_trace_lock);
551 if (!sys_prof_refcount_exit) 602 if (!sys_prof_refcount_exit)
@@ -561,13 +612,11 @@ int reg_prof_syscall_exit(char *name)
561 return ret; 612 return ret;
562} 613}
563 614
564void unreg_prof_syscall_exit(char *name) 615void prof_sysexit_disable(struct ftrace_event_call *call)
565{ 616{
566 int num; 617 int num;
567 618
568 num = syscall_name_to_nr(name); 619 num = ((struct syscall_metadata *)call->data)->syscall_nr;
569 if (num < 0 || num >= NR_syscalls)
570 return;
571 620
572 mutex_lock(&syscall_trace_lock); 621 mutex_lock(&syscall_trace_lock);
573 sys_prof_refcount_exit--; 622 sys_prof_refcount_exit--;