aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ftrace.txt16
-rw-r--r--Documentation/tracers/mmiotrace.txt6
-rw-r--r--arch/powerpc/include/asm/ftrace.h14
-rw-r--r--arch/powerpc/include/asm/module.h16
-rw-r--r--arch/powerpc/kernel/ftrace.c473
-rw-r--r--arch/powerpc/kernel/idle.c5
-rw-r--r--arch/powerpc/kernel/module_32.c10
-rw-r--r--arch/powerpc/kernel/module_64.c13
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/ftrace.h1
-rw-r--r--arch/x86/include/asm/thread_info.h29
-rw-r--r--arch/x86/kernel/ftrace.c29
-rw-r--r--arch/x86/kernel/stacktrace.c64
-rw-r--r--fs/seq_file.c14
-rw-r--r--include/linux/ftrace.h10
-rw-r--r--include/linux/ring_buffer.h1
-rw-r--r--include/linux/sched.h23
-rw-r--r--include/linux/seq_file.h1
-rw-r--r--include/linux/stacktrace.h8
-rw-r--r--init/main.c2
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/fork.c3
-rw-r--r--kernel/power/disk.c13
-rw-r--r--kernel/power/main.c5
-rw-r--r--kernel/sched.c1
-rw-r--r--kernel/trace/Kconfig3
-rw-r--r--kernel/trace/ftrace.c101
-rw-r--r--kernel/trace/ring_buffer.c79
-rw-r--r--kernel/trace/trace.c182
-rw-r--r--kernel/trace/trace.h10
-rw-r--r--kernel/trace/trace_mmiotrace.c16
-rwxr-xr-xscripts/recordmcount.pl48
32 files changed, 1047 insertions, 151 deletions
diff --git a/Documentation/ftrace.txt b/Documentation/ftrace.txt
index 753f4de4b175..35a78bc6651d 100644
--- a/Documentation/ftrace.txt
+++ b/Documentation/ftrace.txt
@@ -324,7 +324,7 @@ output. To see what is available, simply cat the file:
324 324
325 cat /debug/tracing/trace_options 325 cat /debug/tracing/trace_options
326 print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \ 326 print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \
327 noblock nostacktrace nosched-tree 327 noblock nostacktrace nosched-tree nouserstacktrace nosym-userobj
328 328
329To disable one of the options, echo in the option prepended with "no". 329To disable one of the options, echo in the option prepended with "no".
330 330
@@ -378,6 +378,20 @@ Here are the available options:
378 When a trace is recorded, so is the stack of functions. 378 When a trace is recorded, so is the stack of functions.
379 This allows for back traces of trace sites. 379 This allows for back traces of trace sites.
380 380
381 userstacktrace - This option changes the trace.
382 It records a stacktrace of the current userspace thread.
383
384 sym-userobj - when user stacktrace are enabled, look up which object the
385 address belongs to, and print a relative address
386 This is especially useful when ASLR is on, otherwise you don't
387 get a chance to resolve the address to object/file/line after the app is no
388 longer running
389
390 The lookup is performed when you read trace,trace_pipe,latency_trace. Example:
391
392 a.out-1623 [000] 40874.465068: /root/a.out[+0x480] <-/root/a.out[+0
393x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6]
394
381 sched-tree - TBD (any users??) 395 sched-tree - TBD (any users??)
382 396
383 397
diff --git a/Documentation/tracers/mmiotrace.txt b/Documentation/tracers/mmiotrace.txt
index 5bbbe2096223..cde23b4a12a1 100644
--- a/Documentation/tracers/mmiotrace.txt
+++ b/Documentation/tracers/mmiotrace.txt
@@ -37,7 +37,7 @@ $ echo mmiotrace > /debug/tracing/current_tracer
37$ cat /debug/tracing/trace_pipe > mydump.txt & 37$ cat /debug/tracing/trace_pipe > mydump.txt &
38Start X or whatever. 38Start X or whatever.
39$ echo "X is up" > /debug/tracing/trace_marker 39$ echo "X is up" > /debug/tracing/trace_marker
40$ echo none > /debug/tracing/current_tracer 40$ echo nop > /debug/tracing/current_tracer
41Check for lost events. 41Check for lost events.
42 42
43 43
@@ -66,7 +66,7 @@ which action. It is recommended to place descriptive markers about what you
66do. 66do.
67 67
68Shut down mmiotrace (requires root privileges): 68Shut down mmiotrace (requires root privileges):
69$ echo none > /debug/tracing/current_tracer 69$ echo nop > /debug/tracing/current_tracer
70The 'cat' process exits. If it does not, kill it by issuing 'fg' command and 70The 'cat' process exits. If it does not, kill it by issuing 'fg' command and
71pressing ctrl+c. 71pressing ctrl+c.
72 72
@@ -81,7 +81,9 @@ are:
81$ cat /debug/tracing/trace_entries 81$ cat /debug/tracing/trace_entries
82gives you a number. Approximately double this number and write it back, for 82gives you a number. Approximately double this number and write it back, for
83instance: 83instance:
84$ echo 0 > /debug/tracing/tracing_enabled
84$ echo 128000 > /debug/tracing/trace_entries 85$ echo 128000 > /debug/tracing/trace_entries
86$ echo 1 > /debug/tracing/tracing_enabled
85Then start again from the top. 87Then start again from the top.
86 88
87If you are doing a trace for a driver project, e.g. Nouveau, you should also 89If you are doing a trace for a driver project, e.g. Nouveau, you should also
diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
index b298f7a631e6..e5f2ae8362f7 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -7,7 +7,19 @@
7 7
8#ifndef __ASSEMBLY__ 8#ifndef __ASSEMBLY__
9extern void _mcount(void); 9extern void _mcount(void);
10#endif 10
11#ifdef CONFIG_DYNAMIC_FTRACE
12static inline unsigned long ftrace_call_adjust(unsigned long addr)
13{
14 /* reloction of mcount call site is the same as the address */
15 return addr;
16}
17
18struct dyn_arch_ftrace {
19 struct module *mod;
20};
21#endif /* CONFIG_DYNAMIC_FTRACE */
22#endif /* __ASSEMBLY__ */
11 23
12#endif 24#endif
13 25
diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h
index e5f14b13ccf0..08454880a2c0 100644
--- a/arch/powerpc/include/asm/module.h
+++ b/arch/powerpc/include/asm/module.h
@@ -34,11 +34,19 @@ struct mod_arch_specific {
34#ifdef __powerpc64__ 34#ifdef __powerpc64__
35 unsigned int stubs_section; /* Index of stubs section in module */ 35 unsigned int stubs_section; /* Index of stubs section in module */
36 unsigned int toc_section; /* What section is the TOC? */ 36 unsigned int toc_section; /* What section is the TOC? */
37#else 37#ifdef CONFIG_DYNAMIC_FTRACE
38 unsigned long toc;
39 unsigned long tramp;
40#endif
41
42#else /* powerpc64 */
38 /* Indices of PLT sections within module. */ 43 /* Indices of PLT sections within module. */
39 unsigned int core_plt_section; 44 unsigned int core_plt_section;
40 unsigned int init_plt_section; 45 unsigned int init_plt_section;
46#ifdef CONFIG_DYNAMIC_FTRACE
47 unsigned long tramp;
41#endif 48#endif
49#endif /* powerpc64 */
42 50
43 /* List of BUG addresses, source line numbers and filenames */ 51 /* List of BUG addresses, source line numbers and filenames */
44 struct list_head bug_list; 52 struct list_head bug_list;
@@ -68,6 +76,12 @@ struct mod_arch_specific {
68# endif /* MODULE */ 76# endif /* MODULE */
69#endif 77#endif
70 78
79#ifdef CONFIG_DYNAMIC_FTRACE
80# ifdef MODULE
81 asm(".section .ftrace.tramp,\"ax\",@nobits; .align 3; .previous");
82# endif /* MODULE */
83#endif
84
71 85
72struct exception_table_entry; 86struct exception_table_entry;
73void sort_ex_table(struct exception_table_entry *start, 87void sort_ex_table(struct exception_table_entry *start,
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index f4b006ed0ab1..3271cd698e4c 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -9,22 +9,30 @@
9 9
10#include <linux/spinlock.h> 10#include <linux/spinlock.h>
11#include <linux/hardirq.h> 11#include <linux/hardirq.h>
12#include <linux/uaccess.h>
13#include <linux/module.h>
12#include <linux/ftrace.h> 14#include <linux/ftrace.h>
13#include <linux/percpu.h> 15#include <linux/percpu.h>
14#include <linux/init.h> 16#include <linux/init.h>
15#include <linux/list.h> 17#include <linux/list.h>
16 18
17#include <asm/cacheflush.h> 19#include <asm/cacheflush.h>
20#include <asm/code-patching.h>
18#include <asm/ftrace.h> 21#include <asm/ftrace.h>
19 22
23#if 0
24#define DEBUGP printk
25#else
26#define DEBUGP(fmt , ...) do { } while (0)
27#endif
20 28
21static unsigned int ftrace_nop = 0x60000000; 29static unsigned int ftrace_nop = PPC_NOP_INSTR;
22 30
23#ifdef CONFIG_PPC32 31#ifdef CONFIG_PPC32
24# define GET_ADDR(addr) addr 32# define GET_ADDR(addr) addr
25#else 33#else
26/* PowerPC64's functions are data that points to the functions */ 34/* PowerPC64's functions are data that points to the functions */
27# define GET_ADDR(addr) *(unsigned long *)addr 35# define GET_ADDR(addr) (*(unsigned long *)addr)
28#endif 36#endif
29 37
30 38
@@ -33,12 +41,12 @@ static unsigned int ftrace_calc_offset(long ip, long addr)
33 return (int)(addr - ip); 41 return (int)(addr - ip);
34} 42}
35 43
36unsigned char *ftrace_nop_replace(void) 44static unsigned char *ftrace_nop_replace(void)
37{ 45{
38 return (char *)&ftrace_nop; 46 return (char *)&ftrace_nop;
39} 47}
40 48
41unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) 49static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
42{ 50{
43 static unsigned int op; 51 static unsigned int op;
44 52
@@ -68,49 +76,434 @@ unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
68# define _ASM_PTR " .long " 76# define _ASM_PTR " .long "
69#endif 77#endif
70 78
71int 79static int
72ftrace_modify_code(unsigned long ip, unsigned char *old_code, 80ftrace_modify_code(unsigned long ip, unsigned char *old_code,
73 unsigned char *new_code) 81 unsigned char *new_code)
74{ 82{
75 unsigned replaced; 83 unsigned char replaced[MCOUNT_INSN_SIZE];
76 unsigned old = *(unsigned *)old_code;
77 unsigned new = *(unsigned *)new_code;
78 int faulted = 0;
79 84
80 /* 85 /*
81 * Note: Due to modules and __init, code can 86 * Note: Due to modules and __init, code can
82 * disappear and change, we need to protect against faulting 87 * disappear and change, we need to protect against faulting
83 * as well as code changing. 88 * as well as code changing. We do this by using the
89 * probe_kernel_* functions.
84 * 90 *
85 * No real locking needed, this code is run through 91 * No real locking needed, this code is run through
86 * kstop_machine. 92 * kstop_machine, or before SMP starts.
87 */ 93 */
88 asm volatile ( 94
89 "1: lwz %1, 0(%2)\n" 95 /* read the text we want to modify */
90 " cmpw %1, %5\n" 96 if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
91 " bne 2f\n" 97 return -EFAULT;
92 " stwu %3, 0(%2)\n" 98
93 "2:\n" 99 /* Make sure it is what we expect it to be */
94 ".section .fixup, \"ax\"\n" 100 if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
95 "3: li %0, 1\n" 101 return -EINVAL;
96 " b 2b\n" 102
97 ".previous\n" 103 /* replace the text with the new text */
98 ".section __ex_table,\"a\"\n" 104 if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE))
99 _ASM_ALIGN "\n" 105 return -EPERM;
100 _ASM_PTR "1b, 3b\n" 106
101 ".previous" 107 flush_icache_range(ip, ip + 8);
102 : "=r"(faulted), "=r"(replaced) 108
103 : "r"(ip), "r"(new), 109 return 0;
104 "0"(faulted), "r"(old) 110}
105 : "memory"); 111
106 112/*
107 if (replaced != old && replaced != new) 113 * Helper functions that are the same for both PPC64 and PPC32.
108 faulted = 2; 114 */
109 115static int test_24bit_addr(unsigned long ip, unsigned long addr)
110 if (!faulted) 116{
111 flush_icache_range(ip, ip + 8); 117 long diff;
112 118
113 return faulted; 119 /*
120 * Can we get to addr from ip in 24 bits?
121 * (26 really, since we mulitply by 4 for 4 byte alignment)
122 */
123 diff = addr - ip;
124
125 /*
126 * Return true if diff is less than 1 << 25
127 * and greater than -1 << 26.
128 */
129 return (diff < (1 << 25)) && (diff > (-1 << 26));
130}
131
132static int is_bl_op(unsigned int op)
133{
134 return (op & 0xfc000003) == 0x48000001;
135}
136
137static int test_offset(unsigned long offset)
138{
139 return (offset + 0x2000000 > 0x3ffffff) || ((offset & 3) != 0);
140}
141
142static unsigned long find_bl_target(unsigned long ip, unsigned int op)
143{
144 static int offset;
145
146 offset = (op & 0x03fffffc);
147 /* make it signed */
148 if (offset & 0x02000000)
149 offset |= 0xfe000000;
150
151 return ip + (long)offset;
152}
153
154static unsigned int branch_offset(unsigned long offset)
155{
156 /* return "bl ip+offset" */
157 return 0x48000001 | (offset & 0x03fffffc);
158}
159
160#ifdef CONFIG_PPC64
161static int
162__ftrace_make_nop(struct module *mod,
163 struct dyn_ftrace *rec, unsigned long addr)
164{
165 unsigned char replaced[MCOUNT_INSN_SIZE * 2];
166 unsigned int *op = (unsigned *)&replaced;
167 unsigned char jmp[8];
168 unsigned long *ptr = (unsigned long *)&jmp;
169 unsigned long ip = rec->ip;
170 unsigned long tramp;
171 int offset;
172
173 /* read where this goes */
174 if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
175 return -EFAULT;
176
177 /* Make sure that that this is still a 24bit jump */
178 if (!is_bl_op(*op)) {
179 printk(KERN_ERR "Not expected bl: opcode is %x\n", *op);
180 return -EINVAL;
181 }
182
183 /* lets find where the pointer goes */
184 tramp = find_bl_target(ip, *op);
185
186 /*
187 * On PPC64 the trampoline looks like:
188 * 0x3d, 0x82, 0x00, 0x00, addis r12,r2, <high>
189 * 0x39, 0x8c, 0x00, 0x00, addi r12,r12, <low>
190 * Where the bytes 2,3,6 and 7 make up the 32bit offset
191 * to the TOC that holds the pointer.
192 * to jump to.
193 * 0xf8, 0x41, 0x00, 0x28, std r2,40(r1)
194 * 0xe9, 0x6c, 0x00, 0x20, ld r11,32(r12)
195 * The actually address is 32 bytes from the offset
196 * into the TOC.
197 * 0xe8, 0x4c, 0x00, 0x28, ld r2,40(r12)
198 */
199
200 DEBUGP("ip:%lx jumps to %lx r2: %lx", ip, tramp, mod->arch.toc);
201
202 /* Find where the trampoline jumps to */
203 if (probe_kernel_read(jmp, (void *)tramp, 8)) {
204 printk(KERN_ERR "Failed to read %lx\n", tramp);
205 return -EFAULT;
206 }
207
208 DEBUGP(" %08x %08x",
209 (unsigned)(*ptr >> 32),
210 (unsigned)*ptr);
211
212 offset = (unsigned)jmp[2] << 24 |
213 (unsigned)jmp[3] << 16 |
214 (unsigned)jmp[6] << 8 |
215 (unsigned)jmp[7];
216
217 DEBUGP(" %x ", offset);
218
219 /* get the address this jumps too */
220 tramp = mod->arch.toc + offset + 32;
221 DEBUGP("toc: %lx", tramp);
222
223 if (probe_kernel_read(jmp, (void *)tramp, 8)) {
224 printk(KERN_ERR "Failed to read %lx\n", tramp);
225 return -EFAULT;
226 }
227
228 DEBUGP(" %08x %08x\n",
229 (unsigned)(*ptr >> 32),
230 (unsigned)*ptr);
231
232 /* This should match what was called */
233 if (*ptr != GET_ADDR(addr)) {
234 printk(KERN_ERR "addr does not match %lx\n", *ptr);
235 return -EINVAL;
236 }
237
238 /*
239 * We want to nop the line, but the next line is
240 * 0xe8, 0x41, 0x00, 0x28 ld r2,40(r1)
241 * This needs to be turned to a nop too.
242 */
243 if (probe_kernel_read(replaced, (void *)(ip+4), MCOUNT_INSN_SIZE))
244 return -EFAULT;
245
246 if (*op != 0xe8410028) {
247 printk(KERN_ERR "Next line is not ld! (%08x)\n", *op);
248 return -EINVAL;
249 }
250
251 /*
252 * Milton Miller pointed out that we can not blindly do nops.
253 * If a task was preempted when calling a trace function,
254 * the nops will remove the way to restore the TOC in r2
255 * and the r2 TOC will get corrupted.
256 */
257
258 /*
259 * Replace:
260 * bl <tramp> <==== will be replaced with "b 1f"
261 * ld r2,40(r1)
262 * 1:
263 */
264 op[0] = 0x48000008; /* b +8 */
265
266 if (probe_kernel_write((void *)ip, replaced, MCOUNT_INSN_SIZE))
267 return -EPERM;
268
269 return 0;
270}
271
272#else /* !PPC64 */
273static int
274__ftrace_make_nop(struct module *mod,
275 struct dyn_ftrace *rec, unsigned long addr)
276{
277 unsigned char replaced[MCOUNT_INSN_SIZE];
278 unsigned int *op = (unsigned *)&replaced;
279 unsigned char jmp[8];
280 unsigned int *ptr = (unsigned int *)&jmp;
281 unsigned long ip = rec->ip;
282 unsigned long tramp;
283 int offset;
284
285 if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
286 return -EFAULT;
287
288 /* Make sure that that this is still a 24bit jump */
289 if (!is_bl_op(*op)) {
290 printk(KERN_ERR "Not expected bl: opcode is %x\n", *op);
291 return -EINVAL;
292 }
293
294 /* lets find where the pointer goes */
295 tramp = find_bl_target(ip, *op);
296
297 /*
298 * On PPC32 the trampoline looks like:
299 * lis r11,sym@ha
300 * addi r11,r11,sym@l
301 * mtctr r11
302 * bctr
303 */
304
305 DEBUGP("ip:%lx jumps to %lx", ip, tramp);
306
307 /* Find where the trampoline jumps to */
308 if (probe_kernel_read(jmp, (void *)tramp, 8)) {
309 printk(KERN_ERR "Failed to read %lx\n", tramp);
310 return -EFAULT;
311 }
312
313 DEBUGP(" %08x %08x ", ptr[0], ptr[1]);
314
315 tramp = (ptr[1] & 0xffff) |
316 ((ptr[0] & 0xffff) << 16);
317 if (tramp & 0x8000)
318 tramp -= 0x10000;
319
320 DEBUGP(" %x ", tramp);
321
322 if (tramp != addr) {
323 printk(KERN_ERR
324 "Trampoline location %08lx does not match addr\n",
325 tramp);
326 return -EINVAL;
327 }
328
329 op[0] = PPC_NOP_INSTR;
330
331 if (probe_kernel_write((void *)ip, replaced, MCOUNT_INSN_SIZE))
332 return -EPERM;
333
334 return 0;
335}
336#endif /* PPC64 */
337
338int ftrace_make_nop(struct module *mod,
339 struct dyn_ftrace *rec, unsigned long addr)
340{
341 unsigned char *old, *new;
342 unsigned long ip = rec->ip;
343
344 /*
345 * If the calling address is more that 24 bits away,
346 * then we had to use a trampoline to make the call.
347 * Otherwise just update the call site.
348 */
349 if (test_24bit_addr(ip, addr)) {
350 /* within range */
351 old = ftrace_call_replace(ip, addr);
352 new = ftrace_nop_replace();
353 return ftrace_modify_code(ip, old, new);
354 }
355
356 /*
357 * Out of range jumps are called from modules.
358 * We should either already have a pointer to the module
359 * or it has been passed in.
360 */
361 if (!rec->arch.mod) {
362 if (!mod) {
363 printk(KERN_ERR "No module loaded addr=%lx\n",
364 addr);
365 return -EFAULT;
366 }
367 rec->arch.mod = mod;
368 } else if (mod) {
369 if (mod != rec->arch.mod) {
370 printk(KERN_ERR
371 "Record mod %p not equal to passed in mod %p\n",
372 rec->arch.mod, mod);
373 return -EINVAL;
374 }
375 /* nothing to do if mod == rec->arch.mod */
376 } else
377 mod = rec->arch.mod;
378
379 return __ftrace_make_nop(mod, rec, addr);
380
381}
382
383#ifdef CONFIG_PPC64
384static int
385__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
386{
387 unsigned char replaced[MCOUNT_INSN_SIZE * 2];
388 unsigned int *op = (unsigned *)&replaced;
389 unsigned long ip = rec->ip;
390 unsigned long offset;
391
392 /* read where this goes */
393 if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE * 2))
394 return -EFAULT;
395
396 /*
397 * It should be pointing to two nops or
398 * b +8; ld r2,40(r1)
399 */
400 if (((op[0] != 0x48000008) || (op[1] != 0xe8410028)) &&
401 ((op[0] != PPC_NOP_INSTR) || (op[1] != PPC_NOP_INSTR))) {
402 printk(KERN_ERR "Expected NOPs but have %x %x\n", op[0], op[1]);
403 return -EINVAL;
404 }
405
406 /* If we never set up a trampoline to ftrace_caller, then bail */
407 if (!rec->arch.mod->arch.tramp) {
408 printk(KERN_ERR "No ftrace trampoline\n");
409 return -EINVAL;
410 }
411
412 /* now calculate a jump to the ftrace caller trampoline */
413 offset = rec->arch.mod->arch.tramp - ip;
414
415 if (test_offset(offset)) {
416 printk(KERN_ERR "REL24 %li out of range!\n",
417 (long int)offset);
418 return -EINVAL;
419 }
420
421 /* Set to "bl addr" */
422 op[0] = branch_offset(offset);
423 /* ld r2,40(r1) */
424 op[1] = 0xe8410028;
425
426 DEBUGP("write to %lx\n", rec->ip);
427
428 if (probe_kernel_write((void *)ip, replaced, MCOUNT_INSN_SIZE * 2))
429 return -EPERM;
430
431 return 0;
432}
433#else
434static int
435__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
436{
437 unsigned char replaced[MCOUNT_INSN_SIZE];
438 unsigned int *op = (unsigned *)&replaced;
439 unsigned long ip = rec->ip;
440 unsigned long offset;
441
442 /* read where this goes */
443 if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
444 return -EFAULT;
445
446 /* It should be pointing to a nop */
447 if (op[0] != PPC_NOP_INSTR) {
448 printk(KERN_ERR "Expected NOP but have %x\n", op[0]);
449 return -EINVAL;
450 }
451
452 /* If we never set up a trampoline to ftrace_caller, then bail */
453 if (!rec->arch.mod->arch.tramp) {
454 printk(KERN_ERR "No ftrace trampoline\n");
455 return -EINVAL;
456 }
457
458 /* now calculate a jump to the ftrace caller trampoline */
459 offset = rec->arch.mod->arch.tramp - ip;
460
461 if (test_offset(offset)) {
462 printk(KERN_ERR "REL24 %li out of range!\n",
463 (long int)offset);
464 return -EINVAL;
465 }
466
467 /* Set to "bl addr" */
468 op[0] = branch_offset(offset);
469
470 DEBUGP("write to %lx\n", rec->ip);
471
472 if (probe_kernel_write((void *)ip, replaced, MCOUNT_INSN_SIZE))
473 return -EPERM;
474
475 return 0;
476}
477#endif /* CONFIG_PPC64 */
478
479int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
480{
481 unsigned char *old, *new;
482 unsigned long ip = rec->ip;
483
484 /*
485 * If the calling address is more that 24 bits away,
486 * then we had to use a trampoline to make the call.
487 * Otherwise just update the call site.
488 */
489 if (test_24bit_addr(ip, addr)) {
490 /* within range */
491 old = ftrace_nop_replace();
492 new = ftrace_call_replace(ip, addr);
493 return ftrace_modify_code(ip, old, new);
494 }
495
496 /*
497 * Out of range jumps are called from modules.
498 * Being that we are converting from nop, it had better
499 * already have a module defined.
500 */
501 if (!rec->arch.mod) {
502 printk(KERN_ERR "No module loaded\n");
503 return -EINVAL;
504 }
505
506 return __ftrace_make_call(rec, addr);
114} 507}
115 508
116int ftrace_update_ftrace_func(ftrace_func_t func) 509int ftrace_update_ftrace_func(ftrace_func_t func)
@@ -128,10 +521,10 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
128 521
129int __init ftrace_dyn_arch_init(void *data) 522int __init ftrace_dyn_arch_init(void *data)
130{ 523{
131 /* This is running in kstop_machine */ 524 /* caller expects data to be zero */
525 unsigned long *p = data;
132 526
133 ftrace_mcount_set(data); 527 *p = 0;
134 528
135 return 0; 529 return 0;
136} 530}
137
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index 31982d05d81a..88d9c1d5e5fb 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -69,10 +69,15 @@ void cpu_idle(void)
69 smp_mb(); 69 smp_mb();
70 local_irq_disable(); 70 local_irq_disable();
71 71
72 /* Don't trace irqs off for idle */
73 stop_critical_timings();
74
72 /* check again after disabling irqs */ 75 /* check again after disabling irqs */
73 if (!need_resched() && !cpu_should_die()) 76 if (!need_resched() && !cpu_should_die())
74 ppc_md.power_save(); 77 ppc_md.power_save();
75 78
79 start_critical_timings();
80
76 local_irq_enable(); 81 local_irq_enable();
77 set_thread_flag(TIF_POLLING_NRFLAG); 82 set_thread_flag(TIF_POLLING_NRFLAG);
78 83
diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c
index 2df91a03462a..f832773fc28e 100644
--- a/arch/powerpc/kernel/module_32.c
+++ b/arch/powerpc/kernel/module_32.c
@@ -22,6 +22,7 @@
22#include <linux/fs.h> 22#include <linux/fs.h>
23#include <linux/string.h> 23#include <linux/string.h>
24#include <linux/kernel.h> 24#include <linux/kernel.h>
25#include <linux/ftrace.h>
25#include <linux/cache.h> 26#include <linux/cache.h>
26#include <linux/bug.h> 27#include <linux/bug.h>
27#include <linux/sort.h> 28#include <linux/sort.h>
@@ -53,6 +54,9 @@ static unsigned int count_relocs(const Elf32_Rela *rela, unsigned int num)
53 r_addend = rela[i].r_addend; 54 r_addend = rela[i].r_addend;
54 } 55 }
55 56
57#ifdef CONFIG_DYNAMIC_FTRACE
58 _count_relocs++; /* add one for ftrace_caller */
59#endif
56 return _count_relocs; 60 return _count_relocs;
57} 61}
58 62
@@ -306,5 +310,11 @@ int apply_relocate_add(Elf32_Shdr *sechdrs,
306 return -ENOEXEC; 310 return -ENOEXEC;
307 } 311 }
308 } 312 }
313#ifdef CONFIG_DYNAMIC_FTRACE
314 module->arch.tramp =
315 do_plt_call(module->module_core,
316 (unsigned long)ftrace_caller,
317 sechdrs, module);
318#endif
309 return 0; 319 return 0;
310} 320}
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 1af2377e4992..8992b031a7b6 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -20,6 +20,7 @@
20#include <linux/moduleloader.h> 20#include <linux/moduleloader.h>
21#include <linux/err.h> 21#include <linux/err.h>
22#include <linux/vmalloc.h> 22#include <linux/vmalloc.h>
23#include <linux/ftrace.h>
23#include <linux/bug.h> 24#include <linux/bug.h>
24#include <asm/module.h> 25#include <asm/module.h>
25#include <asm/firmware.h> 26#include <asm/firmware.h>
@@ -163,6 +164,11 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr,
163 } 164 }
164 } 165 }
165 166
167#ifdef CONFIG_DYNAMIC_FTRACE
168 /* make the trampoline to the ftrace_caller */
169 relocs++;
170#endif
171
166 DEBUGP("Looks like a total of %lu stubs, max\n", relocs); 172 DEBUGP("Looks like a total of %lu stubs, max\n", relocs);
167 return relocs * sizeof(struct ppc64_stub_entry); 173 return relocs * sizeof(struct ppc64_stub_entry);
168} 174}
@@ -441,5 +447,12 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
441 } 447 }
442 } 448 }
443 449
450#ifdef CONFIG_DYNAMIC_FTRACE
451 me->arch.toc = my_r2(sechdrs, me);
452 me->arch.tramp = stub_for_addr(sechdrs,
453 (unsigned long)ftrace_caller,
454 me);
455#endif
456
444 return 0; 457 return 0;
445} 458}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7a146baaa990..e49a4fd718fe 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -36,6 +36,7 @@ config X86
36 select HAVE_ARCH_TRACEHOOK 36 select HAVE_ARCH_TRACEHOOK
37 select HAVE_GENERIC_DMA_COHERENT if X86_32 37 select HAVE_GENERIC_DMA_COHERENT if X86_32
38 select HAVE_EFFICIENT_UNALIGNED_ACCESS 38 select HAVE_EFFICIENT_UNALIGNED_ACCESS
39 select USER_STACKTRACE_SUPPORT
39 40
40config ARCH_DEFCONFIG 41config ARCH_DEFCONFIG
41 string 42 string
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 2bb43b433e07..754a3e082f94 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -29,7 +29,6 @@ struct dyn_arch_ftrace {
29#endif /* CONFIG_FUNCTION_TRACER */ 29#endif /* CONFIG_FUNCTION_TRACER */
30 30
31#ifdef CONFIG_FUNCTION_RET_TRACER 31#ifdef CONFIG_FUNCTION_RET_TRACER
32#define FTRACE_RET_STACK_SIZE 20
33 32
34#ifndef __ASSEMBLY__ 33#ifndef __ASSEMBLY__
35 34
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index e90e81ef6ab9..0921b4018c11 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -40,36 +40,8 @@ struct thread_info {
40 */ 40 */
41 __u8 supervisor_stack[0]; 41 __u8 supervisor_stack[0];
42#endif 42#endif
43
44#ifdef CONFIG_FUNCTION_RET_TRACER
45 /* Index of current stored adress in ret_stack */
46 int curr_ret_stack;
47 /* Stack of return addresses for return function tracing */
48 struct ftrace_ret_stack ret_stack[FTRACE_RET_STACK_SIZE];
49 /*
50 * Number of functions that haven't been traced
51 * because of depth overrun.
52 */
53 atomic_t trace_overrun;
54#endif
55}; 43};
56 44
57#ifdef CONFIG_FUNCTION_RET_TRACER
58#define INIT_THREAD_INFO(tsk) \
59{ \
60 .task = &tsk, \
61 .exec_domain = &default_exec_domain, \
62 .flags = 0, \
63 .cpu = 0, \
64 .preempt_count = 1, \
65 .addr_limit = KERNEL_DS, \
66 .restart_block = { \
67 .fn = do_no_restart_syscall, \
68 }, \
69 .curr_ret_stack = -1,\
70 .trace_overrun = ATOMIC_INIT(0) \
71}
72#else
73#define INIT_THREAD_INFO(tsk) \ 45#define INIT_THREAD_INFO(tsk) \
74{ \ 46{ \
75 .task = &tsk, \ 47 .task = &tsk, \
@@ -82,7 +54,6 @@ struct thread_info {
82 .fn = do_no_restart_syscall, \ 54 .fn = do_no_restart_syscall, \
83 }, \ 55 }, \
84} 56}
85#endif
86 57
87#define init_thread_info (init_thread_union.thread_info) 58#define init_thread_info (init_thread_union.thread_info)
88#define init_stack (init_thread_union.stack) 59#define init_stack (init_thread_union.stack)
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 356bb1eb6e9a..bb137f7297ed 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -350,19 +350,21 @@ static int push_return_trace(unsigned long ret, unsigned long long time,
350 unsigned long func) 350 unsigned long func)
351{ 351{
352 int index; 352 int index;
353 struct thread_info *ti = current_thread_info(); 353
354 if (!current->ret_stack)
355 return -EBUSY;
354 356
355 /* The return trace stack is full */ 357 /* The return trace stack is full */
356 if (ti->curr_ret_stack == FTRACE_RET_STACK_SIZE - 1) { 358 if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
357 atomic_inc(&ti->trace_overrun); 359 atomic_inc(&current->trace_overrun);
358 return -EBUSY; 360 return -EBUSY;
359 } 361 }
360 362
361 index = ++ti->curr_ret_stack; 363 index = ++current->curr_ret_stack;
362 barrier(); 364 barrier();
363 ti->ret_stack[index].ret = ret; 365 current->ret_stack[index].ret = ret;
364 ti->ret_stack[index].func = func; 366 current->ret_stack[index].func = func;
365 ti->ret_stack[index].calltime = time; 367 current->ret_stack[index].calltime = time;
366 368
367 return 0; 369 return 0;
368} 370}
@@ -373,13 +375,12 @@ static void pop_return_trace(unsigned long *ret, unsigned long long *time,
373{ 375{
374 int index; 376 int index;
375 377
376 struct thread_info *ti = current_thread_info(); 378 index = current->curr_ret_stack;
377 index = ti->curr_ret_stack; 379 *ret = current->ret_stack[index].ret;
378 *ret = ti->ret_stack[index].ret; 380 *func = current->ret_stack[index].func;
379 *func = ti->ret_stack[index].func; 381 *time = current->ret_stack[index].calltime;
380 *time = ti->ret_stack[index].calltime; 382 *overrun = atomic_read(&current->trace_overrun);
381 *overrun = atomic_read(&ti->trace_overrun); 383 current->curr_ret_stack--;
382 ti->curr_ret_stack--;
383} 384}
384 385
385/* 386/*
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index a03e7f6d90c3..10786af95545 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -6,6 +6,7 @@
6#include <linux/sched.h> 6#include <linux/sched.h>
7#include <linux/stacktrace.h> 7#include <linux/stacktrace.h>
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/uaccess.h>
9#include <asm/stacktrace.h> 10#include <asm/stacktrace.h>
10 11
11static void save_stack_warning(void *data, char *msg) 12static void save_stack_warning(void *data, char *msg)
@@ -83,3 +84,66 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
83 trace->entries[trace->nr_entries++] = ULONG_MAX; 84 trace->entries[trace->nr_entries++] = ULONG_MAX;
84} 85}
85EXPORT_SYMBOL_GPL(save_stack_trace_tsk); 86EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
87
88/* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */
89
90struct stack_frame {
91 const void __user *next_fp;
92 unsigned long ret_addr;
93};
94
95static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
96{
97 int ret;
98
99 if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
100 return 0;
101
102 ret = 1;
103 pagefault_disable();
104 if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
105 ret = 0;
106 pagefault_enable();
107
108 return ret;
109}
110
111static inline void __save_stack_trace_user(struct stack_trace *trace)
112{
113 const struct pt_regs *regs = task_pt_regs(current);
114 const void __user *fp = (const void __user *)regs->bp;
115
116 if (trace->nr_entries < trace->max_entries)
117 trace->entries[trace->nr_entries++] = regs->ip;
118
119 while (trace->nr_entries < trace->max_entries) {
120 struct stack_frame frame;
121
122 frame.next_fp = NULL;
123 frame.ret_addr = 0;
124 if (!copy_stack_frame(fp, &frame))
125 break;
126 if ((unsigned long)fp < regs->sp)
127 break;
128 if (frame.ret_addr) {
129 trace->entries[trace->nr_entries++] =
130 frame.ret_addr;
131 }
132 if (fp == frame.next_fp)
133 break;
134 fp = frame.next_fp;
135 }
136}
137
138void save_stack_trace_user(struct stack_trace *trace)
139{
140 /*
141 * Trace user stack if we are not a kernel thread
142 */
143 if (current->mm) {
144 __save_stack_trace_user(trace);
145 }
146 if (trace->nr_entries < trace->max_entries)
147 trace->entries[trace->nr_entries++] = ULONG_MAX;
148}
149
diff --git a/fs/seq_file.c b/fs/seq_file.c
index eba2eabcd2b8..f03220d7891b 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -357,7 +357,18 @@ int seq_printf(struct seq_file *m, const char *f, ...)
357} 357}
358EXPORT_SYMBOL(seq_printf); 358EXPORT_SYMBOL(seq_printf);
359 359
360static char *mangle_path(char *s, char *p, char *esc) 360/**
361 * mangle_path - mangle and copy path to buffer beginning
362 * @s: buffer start
363 * @p: beginning of path in above buffer
364 * @esc: set of characters that need escaping
365 *
366 * Copy the path from @p to @s, replacing each occurrence of character from
367 * @esc with usual octal escape.
368 * Returns pointer past last written character in @s, or NULL in case of
369 * failure.
370 */
371char *mangle_path(char *s, char *p, char *esc)
361{ 372{
362 while (s <= p) { 373 while (s <= p) {
363 char c = *p++; 374 char c = *p++;
@@ -376,6 +387,7 @@ static char *mangle_path(char *s, char *p, char *esc)
376 } 387 }
377 return NULL; 388 return NULL;
378} 389}
390EXPORT_SYMBOL_GPL(mangle_path);
379 391
380/* 392/*
381 * return the absolute path of 'dentry' residing in mount 'mnt'. 393 * return the absolute path of 'dentry' residing in mount 'mnt'.
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index f7ba4ea5e128..7854d87b97b2 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -257,6 +257,7 @@ extern int ftrace_dump_on_oops;
257 257
258extern void tracing_start(void); 258extern void tracing_start(void);
259extern void tracing_stop(void); 259extern void tracing_stop(void);
260extern void ftrace_off_permanent(void);
260 261
261extern void 262extern void
262ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); 263ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
@@ -290,6 +291,7 @@ ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 0)));
290 291
291static inline void tracing_start(void) { } 292static inline void tracing_start(void) { }
292static inline void tracing_stop(void) { } 293static inline void tracing_stop(void) { }
294static inline void ftrace_off_permanent(void) { }
293static inline int 295static inline int
294ftrace_printk(const char *fmt, ...) 296ftrace_printk(const char *fmt, ...)
295{ 297{
@@ -323,6 +325,8 @@ struct ftrace_retfunc {
323}; 325};
324 326
325#ifdef CONFIG_FUNCTION_RET_TRACER 327#ifdef CONFIG_FUNCTION_RET_TRACER
328#define FTRACE_RETFUNC_DEPTH 50
329#define FTRACE_RETSTACK_ALLOC_SIZE 32
326/* Type of a callback handler of tracing return function */ 330/* Type of a callback handler of tracing return function */
327typedef void (*trace_function_return_t)(struct ftrace_retfunc *); 331typedef void (*trace_function_return_t)(struct ftrace_retfunc *);
328 332
@@ -330,6 +334,12 @@ extern int register_ftrace_return(trace_function_return_t func);
330/* The current handler in use */ 334/* The current handler in use */
331extern trace_function_return_t ftrace_function_return; 335extern trace_function_return_t ftrace_function_return;
332extern void unregister_ftrace_return(void); 336extern void unregister_ftrace_return(void);
337
338extern void ftrace_retfunc_init_task(struct task_struct *t);
339extern void ftrace_retfunc_exit_task(struct task_struct *t);
340#else
341static inline void ftrace_retfunc_init_task(struct task_struct *t) { }
342static inline void ftrace_retfunc_exit_task(struct task_struct *t) { }
333#endif 343#endif
334 344
335#endif /* _LINUX_FTRACE_H */ 345#endif /* _LINUX_FTRACE_H */
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index e097c2e6b6dc..3bb87a753fa3 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -122,6 +122,7 @@ void ring_buffer_normalize_time_stamp(int cpu, u64 *ts);
122 122
123void tracing_on(void); 123void tracing_on(void);
124void tracing_off(void); 124void tracing_off(void);
125void tracing_off_permanent(void);
125 126
126enum ring_buffer_flags { 127enum ring_buffer_flags {
127 RB_FL_OVERWRITE = 1 << 0, 128 RB_FL_OVERWRITE = 1 << 0,
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c8e0db464206..bee1e93c95ad 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1352,6 +1352,17 @@ struct task_struct {
1352 unsigned long default_timer_slack_ns; 1352 unsigned long default_timer_slack_ns;
1353 1353
1354 struct list_head *scm_work_list; 1354 struct list_head *scm_work_list;
1355#ifdef CONFIG_FUNCTION_RET_TRACER
1356 /* Index of current stored adress in ret_stack */
1357 int curr_ret_stack;
1358 /* Stack of return addresses for return function tracing */
1359 struct ftrace_ret_stack *ret_stack;
1360 /*
1361 * Number of functions that haven't been traced
1362 * because of depth overrun.
1363 */
1364 atomic_t trace_overrun;
1365#endif
1355}; 1366};
1356 1367
1357/* 1368/*
@@ -2006,18 +2017,6 @@ static inline void setup_thread_stack(struct task_struct *p, struct task_struct
2006{ 2017{
2007 *task_thread_info(p) = *task_thread_info(org); 2018 *task_thread_info(p) = *task_thread_info(org);
2008 task_thread_info(p)->task = p; 2019 task_thread_info(p)->task = p;
2009
2010#ifdef CONFIG_FUNCTION_RET_TRACER
2011 /*
2012 * When fork() creates a child process, this function is called.
2013 * But the child task may not inherit the return adresses traced
2014 * by the return function tracer because it will directly execute
2015 * in userspace and will not return to kernel functions its parent
2016 * used.
2017 */
2018 task_thread_info(p)->curr_ret_stack = -1;
2019 atomic_set(&task_thread_info(p)->trace_overrun, 0);
2020#endif
2021} 2020}
2022 2021
2023static inline unsigned long *end_of_stack(struct task_struct *p) 2022static inline unsigned long *end_of_stack(struct task_struct *p)
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index dc50bcc282a8..b3dfa72f13b9 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -34,6 +34,7 @@ struct seq_operations {
34 34
35#define SEQ_SKIP 1 35#define SEQ_SKIP 1
36 36
37char *mangle_path(char *s, char *p, char *esc);
37int seq_open(struct file *, const struct seq_operations *); 38int seq_open(struct file *, const struct seq_operations *);
38ssize_t seq_read(struct file *, char __user *, size_t, loff_t *); 39ssize_t seq_read(struct file *, char __user *, size_t, loff_t *);
39loff_t seq_lseek(struct file *, loff_t, int); 40loff_t seq_lseek(struct file *, loff_t, int);
diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
index b106fd8e0d5c..1a8cecc4f38c 100644
--- a/include/linux/stacktrace.h
+++ b/include/linux/stacktrace.h
@@ -15,9 +15,17 @@ extern void save_stack_trace_tsk(struct task_struct *tsk,
15 struct stack_trace *trace); 15 struct stack_trace *trace);
16 16
17extern void print_stack_trace(struct stack_trace *trace, int spaces); 17extern void print_stack_trace(struct stack_trace *trace, int spaces);
18
19#ifdef CONFIG_USER_STACKTRACE_SUPPORT
20extern void save_stack_trace_user(struct stack_trace *trace);
21#else
22# define save_stack_trace_user(trace) do { } while (0)
23#endif
24
18#else 25#else
19# define save_stack_trace(trace) do { } while (0) 26# define save_stack_trace(trace) do { } while (0)
20# define save_stack_trace_tsk(tsk, trace) do { } while (0) 27# define save_stack_trace_tsk(tsk, trace) do { } while (0)
28# define save_stack_trace_user(trace) do { } while (0)
21# define print_stack_trace(trace, spaces) do { } while (0) 29# define print_stack_trace(trace, spaces) do { } while (0)
22#endif 30#endif
23 31
diff --git a/init/main.c b/init/main.c
index e810196bf2f2..79213c0785d2 100644
--- a/init/main.c
+++ b/init/main.c
@@ -723,7 +723,7 @@ int do_one_initcall(initcall_t fn)
723 disable_boot_trace(); 723 disable_boot_trace();
724 rettime = ktime_get(); 724 rettime = ktime_get();
725 delta = ktime_sub(rettime, calltime); 725 delta = ktime_sub(rettime, calltime);
726 ret.duration = (unsigned long long) delta.tv64 >> 10; 726 ret.duration = (unsigned long long) ktime_to_ns(delta) >> 10;
727 trace_boot_ret(&ret, fn); 727 trace_boot_ret(&ret, fn);
728 printk("initcall %pF returned %d after %Ld usecs\n", fn, 728 printk("initcall %pF returned %d after %Ld usecs\n", fn,
729 ret.result, ret.duration); 729 ret.result, ret.duration);
diff --git a/kernel/exit.c b/kernel/exit.c
index 35c8ec2ba03a..e5ae36ebe8af 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1127,7 +1127,6 @@ NORET_TYPE void do_exit(long code)
1127 preempt_disable(); 1127 preempt_disable();
1128 /* causes final put_task_struct in finish_task_switch(). */ 1128 /* causes final put_task_struct in finish_task_switch(). */
1129 tsk->state = TASK_DEAD; 1129 tsk->state = TASK_DEAD;
1130
1131 schedule(); 1130 schedule();
1132 BUG(); 1131 BUG();
1133 /* Avoid "noreturn function does return". */ 1132 /* Avoid "noreturn function does return". */
diff --git a/kernel/fork.c b/kernel/fork.c
index ac62f43ee430..d6e1a3205f62 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -47,6 +47,7 @@
47#include <linux/mount.h> 47#include <linux/mount.h>
48#include <linux/audit.h> 48#include <linux/audit.h>
49#include <linux/memcontrol.h> 49#include <linux/memcontrol.h>
50#include <linux/ftrace.h>
50#include <linux/profile.h> 51#include <linux/profile.h>
51#include <linux/rmap.h> 52#include <linux/rmap.h>
52#include <linux/acct.h> 53#include <linux/acct.h>
@@ -139,6 +140,7 @@ void free_task(struct task_struct *tsk)
139 prop_local_destroy_single(&tsk->dirties); 140 prop_local_destroy_single(&tsk->dirties);
140 free_thread_info(tsk->stack); 141 free_thread_info(tsk->stack);
141 rt_mutex_debug_task_free(tsk); 142 rt_mutex_debug_task_free(tsk);
143 ftrace_retfunc_exit_task(tsk);
142 free_task_struct(tsk); 144 free_task_struct(tsk);
143} 145}
144EXPORT_SYMBOL(free_task); 146EXPORT_SYMBOL(free_task);
@@ -1269,6 +1271,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1269 total_forks++; 1271 total_forks++;
1270 spin_unlock(&current->sighand->siglock); 1272 spin_unlock(&current->sighand->siglock);
1271 write_unlock_irq(&tasklist_lock); 1273 write_unlock_irq(&tasklist_lock);
1274 ftrace_retfunc_init_task(p);
1272 proc_fork_connector(p); 1275 proc_fork_connector(p);
1273 cgroup_post_fork(p); 1276 cgroup_post_fork(p);
1274 return p; 1277 return p;
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index c9d74083746f..f77d3819ef57 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -22,7 +22,6 @@
22#include <linux/console.h> 22#include <linux/console.h>
23#include <linux/cpu.h> 23#include <linux/cpu.h>
24#include <linux/freezer.h> 24#include <linux/freezer.h>
25#include <linux/ftrace.h>
26 25
27#include "power.h" 26#include "power.h"
28 27
@@ -257,7 +256,7 @@ static int create_image(int platform_mode)
257 256
258int hibernation_snapshot(int platform_mode) 257int hibernation_snapshot(int platform_mode)
259{ 258{
260 int error, ftrace_save; 259 int error;
261 260
262 /* Free memory before shutting down devices. */ 261 /* Free memory before shutting down devices. */
263 error = swsusp_shrink_memory(); 262 error = swsusp_shrink_memory();
@@ -269,7 +268,6 @@ int hibernation_snapshot(int platform_mode)
269 goto Close; 268 goto Close;
270 269
271 suspend_console(); 270 suspend_console();
272 ftrace_save = __ftrace_enabled_save();
273 error = device_suspend(PMSG_FREEZE); 271 error = device_suspend(PMSG_FREEZE);
274 if (error) 272 if (error)
275 goto Recover_platform; 273 goto Recover_platform;
@@ -299,7 +297,6 @@ int hibernation_snapshot(int platform_mode)
299 Resume_devices: 297 Resume_devices:
300 device_resume(in_suspend ? 298 device_resume(in_suspend ?
301 (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); 299 (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
302 __ftrace_enabled_restore(ftrace_save);
303 resume_console(); 300 resume_console();
304 Close: 301 Close:
305 platform_end(platform_mode); 302 platform_end(platform_mode);
@@ -370,11 +367,10 @@ static int resume_target_kernel(void)
370 367
371int hibernation_restore(int platform_mode) 368int hibernation_restore(int platform_mode)
372{ 369{
373 int error, ftrace_save; 370 int error;
374 371
375 pm_prepare_console(); 372 pm_prepare_console();
376 suspend_console(); 373 suspend_console();
377 ftrace_save = __ftrace_enabled_save();
378 error = device_suspend(PMSG_QUIESCE); 374 error = device_suspend(PMSG_QUIESCE);
379 if (error) 375 if (error)
380 goto Finish; 376 goto Finish;
@@ -389,7 +385,6 @@ int hibernation_restore(int platform_mode)
389 platform_restore_cleanup(platform_mode); 385 platform_restore_cleanup(platform_mode);
390 device_resume(PMSG_RECOVER); 386 device_resume(PMSG_RECOVER);
391 Finish: 387 Finish:
392 __ftrace_enabled_restore(ftrace_save);
393 resume_console(); 388 resume_console();
394 pm_restore_console(); 389 pm_restore_console();
395 return error; 390 return error;
@@ -402,7 +397,7 @@ int hibernation_restore(int platform_mode)
402 397
403int hibernation_platform_enter(void) 398int hibernation_platform_enter(void)
404{ 399{
405 int error, ftrace_save; 400 int error;
406 401
407 if (!hibernation_ops) 402 if (!hibernation_ops)
408 return -ENOSYS; 403 return -ENOSYS;
@@ -417,7 +412,6 @@ int hibernation_platform_enter(void)
417 goto Close; 412 goto Close;
418 413
419 suspend_console(); 414 suspend_console();
420 ftrace_save = __ftrace_enabled_save();
421 error = device_suspend(PMSG_HIBERNATE); 415 error = device_suspend(PMSG_HIBERNATE);
422 if (error) { 416 if (error) {
423 if (hibernation_ops->recover) 417 if (hibernation_ops->recover)
@@ -452,7 +446,6 @@ int hibernation_platform_enter(void)
452 hibernation_ops->finish(); 446 hibernation_ops->finish();
453 Resume_devices: 447 Resume_devices:
454 device_resume(PMSG_RESTORE); 448 device_resume(PMSG_RESTORE);
455 __ftrace_enabled_restore(ftrace_save);
456 resume_console(); 449 resume_console();
457 Close: 450 Close:
458 hibernation_ops->end(); 451 hibernation_ops->end();
diff --git a/kernel/power/main.c b/kernel/power/main.c
index b8f7ce9473e8..613f16941b85 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -22,7 +22,6 @@
22#include <linux/freezer.h> 22#include <linux/freezer.h>
23#include <linux/vmstat.h> 23#include <linux/vmstat.h>
24#include <linux/syscalls.h> 24#include <linux/syscalls.h>
25#include <linux/ftrace.h>
26 25
27#include "power.h" 26#include "power.h"
28 27
@@ -317,7 +316,7 @@ static int suspend_enter(suspend_state_t state)
317 */ 316 */
318int suspend_devices_and_enter(suspend_state_t state) 317int suspend_devices_and_enter(suspend_state_t state)
319{ 318{
320 int error, ftrace_save; 319 int error;
321 320
322 if (!suspend_ops) 321 if (!suspend_ops)
323 return -ENOSYS; 322 return -ENOSYS;
@@ -328,7 +327,6 @@ int suspend_devices_and_enter(suspend_state_t state)
328 goto Close; 327 goto Close;
329 } 328 }
330 suspend_console(); 329 suspend_console();
331 ftrace_save = __ftrace_enabled_save();
332 suspend_test_start(); 330 suspend_test_start();
333 error = device_suspend(PMSG_SUSPEND); 331 error = device_suspend(PMSG_SUSPEND);
334 if (error) { 332 if (error) {
@@ -360,7 +358,6 @@ int suspend_devices_and_enter(suspend_state_t state)
360 suspend_test_start(); 358 suspend_test_start();
361 device_resume(PMSG_RESUME); 359 device_resume(PMSG_RESUME);
362 suspend_test_finish("resume devices"); 360 suspend_test_finish("resume devices");
363 __ftrace_enabled_restore(ftrace_save);
364 resume_console(); 361 resume_console();
365 Close: 362 Close:
366 if (suspend_ops->end) 363 if (suspend_ops->end)
diff --git a/kernel/sched.c b/kernel/sched.c
index 4de56108c86f..388d9db044ab 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5901,6 +5901,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
5901 * The idle tasks have their own, simple scheduling class: 5901 * The idle tasks have their own, simple scheduling class:
5902 */ 5902 */
5903 idle->sched_class = &idle_sched_class; 5903 idle->sched_class = &idle_sched_class;
5904 ftrace_retfunc_init_task(idle);
5904} 5905}
5905 5906
5906/* 5907/*
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 61e8cca6ff45..9cbf7761f498 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -3,6 +3,9 @@
3# select HAVE_FUNCTION_TRACER: 3# select HAVE_FUNCTION_TRACER:
4# 4#
5 5
6config USER_STACKTRACE_SUPPORT
7 bool
8
6config NOP_TRACER 9config NOP_TRACER
7 bool 10 bool
8 11
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index f212da486689..53042f118f23 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1498,10 +1498,77 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
1498 1498
1499#ifdef CONFIG_FUNCTION_RET_TRACER 1499#ifdef CONFIG_FUNCTION_RET_TRACER
1500 1500
1501static atomic_t ftrace_retfunc_active;
1502
1501/* The callback that hooks the return of a function */ 1503/* The callback that hooks the return of a function */
1502trace_function_return_t ftrace_function_return = 1504trace_function_return_t ftrace_function_return =
1503 (trace_function_return_t)ftrace_stub; 1505 (trace_function_return_t)ftrace_stub;
1504 1506
1507
1508/* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */
1509static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
1510{
1511 int i;
1512 int ret = 0;
1513 unsigned long flags;
1514 int start = 0, end = FTRACE_RETSTACK_ALLOC_SIZE;
1515 struct task_struct *g, *t;
1516
1517 for (i = 0; i < FTRACE_RETSTACK_ALLOC_SIZE; i++) {
1518 ret_stack_list[i] = kmalloc(FTRACE_RETFUNC_DEPTH
1519 * sizeof(struct ftrace_ret_stack),
1520 GFP_KERNEL);
1521 if (!ret_stack_list[i]) {
1522 start = 0;
1523 end = i;
1524 ret = -ENOMEM;
1525 goto free;
1526 }
1527 }
1528
1529 read_lock_irqsave(&tasklist_lock, flags);
1530 do_each_thread(g, t) {
1531 if (start == end) {
1532 ret = -EAGAIN;
1533 goto unlock;
1534 }
1535
1536 if (t->ret_stack == NULL) {
1537 t->ret_stack = ret_stack_list[start++];
1538 t->curr_ret_stack = -1;
1539 atomic_set(&t->trace_overrun, 0);
1540 }
1541 } while_each_thread(g, t);
1542
1543unlock:
1544 read_unlock_irqrestore(&tasklist_lock, flags);
1545free:
1546 for (i = start; i < end; i++)
1547 kfree(ret_stack_list[i]);
1548 return ret;
1549}
1550
1551/* Allocate a return stack for each task */
1552static int start_return_tracing(void)
1553{
1554 struct ftrace_ret_stack **ret_stack_list;
1555 int ret;
1556
1557 ret_stack_list = kmalloc(FTRACE_RETSTACK_ALLOC_SIZE *
1558 sizeof(struct ftrace_ret_stack *),
1559 GFP_KERNEL);
1560
1561 if (!ret_stack_list)
1562 return -ENOMEM;
1563
1564 do {
1565 ret = alloc_retstack_tasklist(ret_stack_list);
1566 } while (ret == -EAGAIN);
1567
1568 kfree(ret_stack_list);
1569 return ret;
1570}
1571
1505int register_ftrace_return(trace_function_return_t func) 1572int register_ftrace_return(trace_function_return_t func)
1506{ 1573{
1507 int ret = 0; 1574 int ret = 0;
@@ -1516,7 +1583,12 @@ int register_ftrace_return(trace_function_return_t func)
1516 ret = -EBUSY; 1583 ret = -EBUSY;
1517 goto out; 1584 goto out;
1518 } 1585 }
1519 1586 atomic_inc(&ftrace_retfunc_active);
1587 ret = start_return_tracing();
1588 if (ret) {
1589 atomic_dec(&ftrace_retfunc_active);
1590 goto out;
1591 }
1520 ftrace_tracing_type = FTRACE_TYPE_RETURN; 1592 ftrace_tracing_type = FTRACE_TYPE_RETURN;
1521 ftrace_function_return = func; 1593 ftrace_function_return = func;
1522 ftrace_startup(); 1594 ftrace_startup();
@@ -1530,6 +1602,7 @@ void unregister_ftrace_return(void)
1530{ 1602{
1531 mutex_lock(&ftrace_sysctl_lock); 1603 mutex_lock(&ftrace_sysctl_lock);
1532 1604
1605 atomic_dec(&ftrace_retfunc_active);
1533 ftrace_function_return = (trace_function_return_t)ftrace_stub; 1606 ftrace_function_return = (trace_function_return_t)ftrace_stub;
1534 ftrace_shutdown(); 1607 ftrace_shutdown();
1535 /* Restore normal tracing type */ 1608 /* Restore normal tracing type */
@@ -1537,6 +1610,32 @@ void unregister_ftrace_return(void)
1537 1610
1538 mutex_unlock(&ftrace_sysctl_lock); 1611 mutex_unlock(&ftrace_sysctl_lock);
1539} 1612}
1613
1614/* Allocate a return stack for newly created task */
1615void ftrace_retfunc_init_task(struct task_struct *t)
1616{
1617 if (atomic_read(&ftrace_retfunc_active)) {
1618 t->ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH
1619 * sizeof(struct ftrace_ret_stack),
1620 GFP_KERNEL);
1621 if (!t->ret_stack)
1622 return;
1623 t->curr_ret_stack = -1;
1624 atomic_set(&t->trace_overrun, 0);
1625 } else
1626 t->ret_stack = NULL;
1627}
1628
1629void ftrace_retfunc_exit_task(struct task_struct *t)
1630{
1631 struct ftrace_ret_stack *ret_stack = t->ret_stack;
1632
1633 t->ret_stack = NULL;
1634 /* NULL must become visible to IRQs before we free it: */
1635 barrier();
1636
1637 kfree(ret_stack);
1638}
1540#endif 1639#endif
1541 1640
1542 1641
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 85ced143c2c4..e206951603c1 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -18,8 +18,46 @@
18 18
19#include "trace.h" 19#include "trace.h"
20 20
21/* Global flag to disable all recording to ring buffers */ 21/*
22static int ring_buffers_off __read_mostly; 22 * A fast way to enable or disable all ring buffers is to
23 * call tracing_on or tracing_off. Turning off the ring buffers
24 * prevents all ring buffers from being recorded to.
25 * Turning this switch on, makes it OK to write to the
26 * ring buffer, if the ring buffer is enabled itself.
27 *
28 * There's three layers that must be on in order to write
29 * to the ring buffer.
30 *
31 * 1) This global flag must be set.
32 * 2) The ring buffer must be enabled for recording.
33 * 3) The per cpu buffer must be enabled for recording.
34 *
35 * In case of an anomaly, this global flag has a bit set that
36 * will permantly disable all ring buffers.
37 */
38
39/*
40 * Global flag to disable all recording to ring buffers
41 * This has two bits: ON, DISABLED
42 *
43 * ON DISABLED
44 * ---- ----------
45 * 0 0 : ring buffers are off
46 * 1 0 : ring buffers are on
47 * X 1 : ring buffers are permanently disabled
48 */
49
50enum {
51 RB_BUFFERS_ON_BIT = 0,
52 RB_BUFFERS_DISABLED_BIT = 1,
53};
54
55enum {
56 RB_BUFFERS_ON = 1 << RB_BUFFERS_ON_BIT,
57 RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT,
58};
59
60static long ring_buffer_flags __read_mostly = RB_BUFFERS_ON;
23 61
24/** 62/**
25 * tracing_on - enable all tracing buffers 63 * tracing_on - enable all tracing buffers
@@ -29,7 +67,7 @@ static int ring_buffers_off __read_mostly;
29 */ 67 */
30void tracing_on(void) 68void tracing_on(void)
31{ 69{
32 ring_buffers_off = 0; 70 set_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
33} 71}
34 72
35/** 73/**
@@ -42,7 +80,18 @@ void tracing_on(void)
42 */ 80 */
43void tracing_off(void) 81void tracing_off(void)
44{ 82{
45 ring_buffers_off = 1; 83 clear_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
84}
85
86/**
87 * tracing_off_permanent - permanently disable ring buffers
88 *
89 * This function, once called, will disable all ring buffers
90 * permanenty.
91 */
92void tracing_off_permanent(void)
93{
94 set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags);
46} 95}
47 96
48#include "trace.h" 97#include "trace.h"
@@ -1185,7 +1234,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
1185 struct ring_buffer_event *event; 1234 struct ring_buffer_event *event;
1186 int cpu, resched; 1235 int cpu, resched;
1187 1236
1188 if (ring_buffers_off) 1237 if (ring_buffer_flags != RB_BUFFERS_ON)
1189 return NULL; 1238 return NULL;
1190 1239
1191 if (atomic_read(&buffer->record_disabled)) 1240 if (atomic_read(&buffer->record_disabled))
@@ -1297,7 +1346,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
1297 int ret = -EBUSY; 1346 int ret = -EBUSY;
1298 int cpu, resched; 1347 int cpu, resched;
1299 1348
1300 if (ring_buffers_off) 1349 if (ring_buffer_flags != RB_BUFFERS_ON)
1301 return -EBUSY; 1350 return -EBUSY;
1302 1351
1303 if (atomic_read(&buffer->record_disabled)) 1352 if (atomic_read(&buffer->record_disabled))
@@ -2178,12 +2227,14 @@ static ssize_t
2178rb_simple_read(struct file *filp, char __user *ubuf, 2227rb_simple_read(struct file *filp, char __user *ubuf,
2179 size_t cnt, loff_t *ppos) 2228 size_t cnt, loff_t *ppos)
2180{ 2229{
2181 int *p = filp->private_data; 2230 long *p = filp->private_data;
2182 char buf[64]; 2231 char buf[64];
2183 int r; 2232 int r;
2184 2233
2185 /* !ring_buffers_off == tracing_on */ 2234 if (test_bit(RB_BUFFERS_DISABLED_BIT, p))
2186 r = sprintf(buf, "%d\n", !*p); 2235 r = sprintf(buf, "permanently disabled\n");
2236 else
2237 r = sprintf(buf, "%d\n", test_bit(RB_BUFFERS_ON_BIT, p));
2187 2238
2188 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2239 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2189} 2240}
@@ -2192,7 +2243,7 @@ static ssize_t
2192rb_simple_write(struct file *filp, const char __user *ubuf, 2243rb_simple_write(struct file *filp, const char __user *ubuf,
2193 size_t cnt, loff_t *ppos) 2244 size_t cnt, loff_t *ppos)
2194{ 2245{
2195 int *p = filp->private_data; 2246 long *p = filp->private_data;
2196 char buf[64]; 2247 char buf[64];
2197 long val; 2248 long val;
2198 int ret; 2249 int ret;
@@ -2209,8 +2260,10 @@ rb_simple_write(struct file *filp, const char __user *ubuf,
2209 if (ret < 0) 2260 if (ret < 0)
2210 return ret; 2261 return ret;
2211 2262
2212 /* !ring_buffers_off == tracing_on */ 2263 if (val)
2213 *p = !val; 2264 set_bit(RB_BUFFERS_ON_BIT, p);
2265 else
2266 clear_bit(RB_BUFFERS_ON_BIT, p);
2214 2267
2215 (*ppos)++; 2268 (*ppos)++;
2216 2269
@@ -2232,7 +2285,7 @@ static __init int rb_init_debugfs(void)
2232 d_tracer = tracing_init_dentry(); 2285 d_tracer = tracing_init_dentry();
2233 2286
2234 entry = debugfs_create_file("tracing_on", 0644, d_tracer, 2287 entry = debugfs_create_file("tracing_on", 0644, d_tracer,
2235 &ring_buffers_off, &rb_simple_fops); 2288 &ring_buffer_flags, &rb_simple_fops);
2236 if (!entry) 2289 if (!entry)
2237 pr_warning("Could not create debugfs 'tracing_on' entry\n"); 2290 pr_warning("Could not create debugfs 'tracing_on' entry\n");
2238 2291
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 4ee6f0375222..a45b59e53fbc 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -30,6 +30,7 @@
30#include <linux/gfp.h> 30#include <linux/gfp.h>
31#include <linux/fs.h> 31#include <linux/fs.h>
32#include <linux/kprobes.h> 32#include <linux/kprobes.h>
33#include <linux/seq_file.h>
33#include <linux/writeback.h> 34#include <linux/writeback.h>
34 35
35#include <linux/stacktrace.h> 36#include <linux/stacktrace.h>
@@ -275,6 +276,8 @@ static const char *trace_options[] = {
275 "ftrace_preempt", 276 "ftrace_preempt",
276 "branch", 277 "branch",
277 "annotate", 278 "annotate",
279 "userstacktrace",
280 "sym-userobj",
278 NULL 281 NULL
279}; 282};
280 283
@@ -421,6 +424,28 @@ trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
421 return trace_seq_putmem(s, hex, j); 424 return trace_seq_putmem(s, hex, j);
422} 425}
423 426
427static int
428trace_seq_path(struct trace_seq *s, struct path *path)
429{
430 unsigned char *p;
431
432 if (s->len >= (PAGE_SIZE - 1))
433 return 0;
434 p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
435 if (!IS_ERR(p)) {
436 p = mangle_path(s->buffer + s->len, p, "\n");
437 if (p) {
438 s->len = p - s->buffer;
439 return 1;
440 }
441 } else {
442 s->buffer[s->len++] = '?';
443 return 1;
444 }
445
446 return 0;
447}
448
424static void 449static void
425trace_seq_reset(struct trace_seq *s) 450trace_seq_reset(struct trace_seq *s)
426{ 451{
@@ -661,6 +686,21 @@ static int trace_stop_count;
661static DEFINE_SPINLOCK(tracing_start_lock); 686static DEFINE_SPINLOCK(tracing_start_lock);
662 687
663/** 688/**
689 * ftrace_off_permanent - disable all ftrace code permanently
690 *
691 * This should only be called when a serious anomally has
692 * been detected. This will turn off the function tracing,
693 * ring buffers, and other tracing utilites. It takes no
694 * locks and can be called from any context.
695 */
696void ftrace_off_permanent(void)
697{
698 tracing_disabled = 1;
699 ftrace_stop();
700 tracing_off_permanent();
701}
702
703/**
664 * tracing_start - quick start of the tracer 704 * tracing_start - quick start of the tracer
665 * 705 *
666 * If tracing is enabled but was stopped by tracing_stop, 706 * If tracing is enabled but was stopped by tracing_stop,
@@ -801,6 +841,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
801 841
802 entry->preempt_count = pc & 0xff; 842 entry->preempt_count = pc & 0xff;
803 entry->pid = (tsk) ? tsk->pid : 0; 843 entry->pid = (tsk) ? tsk->pid : 0;
844 entry->tgid = (tsk) ? tsk->tgid : 0;
804 entry->flags = 845 entry->flags =
805#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT 846#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
806 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | 847 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
@@ -918,6 +959,44 @@ void __trace_stack(struct trace_array *tr,
918 ftrace_trace_stack(tr, data, flags, skip, preempt_count()); 959 ftrace_trace_stack(tr, data, flags, skip, preempt_count());
919} 960}
920 961
962static void ftrace_trace_userstack(struct trace_array *tr,
963 struct trace_array_cpu *data,
964 unsigned long flags, int pc)
965{
966 struct ring_buffer_event *event;
967 struct userstack_entry *entry;
968 struct stack_trace trace;
969 unsigned long irq_flags;
970
971 if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
972 return;
973
974 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
975 &irq_flags);
976 if (!event)
977 return;
978 entry = ring_buffer_event_data(event);
979 tracing_generic_entry_update(&entry->ent, flags, pc);
980 entry->ent.type = TRACE_USER_STACK;
981
982 memset(&entry->caller, 0, sizeof(entry->caller));
983
984 trace.nr_entries = 0;
985 trace.max_entries = FTRACE_STACK_ENTRIES;
986 trace.skip = 0;
987 trace.entries = entry->caller;
988
989 save_stack_trace_user(&trace);
990 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
991}
992
993void __trace_userstack(struct trace_array *tr,
994 struct trace_array_cpu *data,
995 unsigned long flags)
996{
997 ftrace_trace_userstack(tr, data, flags, preempt_count());
998}
999
921static void 1000static void
922ftrace_trace_special(void *__tr, void *__data, 1001ftrace_trace_special(void *__tr, void *__data,
923 unsigned long arg1, unsigned long arg2, unsigned long arg3, 1002 unsigned long arg1, unsigned long arg2, unsigned long arg3,
@@ -941,6 +1020,7 @@ ftrace_trace_special(void *__tr, void *__data,
941 entry->arg3 = arg3; 1020 entry->arg3 = arg3;
942 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 1021 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
943 ftrace_trace_stack(tr, data, irq_flags, 4, pc); 1022 ftrace_trace_stack(tr, data, irq_flags, 4, pc);
1023 ftrace_trace_userstack(tr, data, irq_flags, pc);
944 1024
945 trace_wake_up(); 1025 trace_wake_up();
946} 1026}
@@ -979,6 +1059,7 @@ tracing_sched_switch_trace(struct trace_array *tr,
979 entry->next_cpu = task_cpu(next); 1059 entry->next_cpu = task_cpu(next);
980 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 1060 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
981 ftrace_trace_stack(tr, data, flags, 5, pc); 1061 ftrace_trace_stack(tr, data, flags, 5, pc);
1062 ftrace_trace_userstack(tr, data, flags, pc);
982} 1063}
983 1064
984void 1065void
@@ -1008,6 +1089,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
1008 entry->next_cpu = task_cpu(wakee); 1089 entry->next_cpu = task_cpu(wakee);
1009 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 1090 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
1010 ftrace_trace_stack(tr, data, flags, 6, pc); 1091 ftrace_trace_stack(tr, data, flags, 6, pc);
1092 ftrace_trace_userstack(tr, data, flags, pc);
1011 1093
1012 trace_wake_up(); 1094 trace_wake_up();
1013} 1095}
@@ -1387,6 +1469,78 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
1387 return ret; 1469 return ret;
1388} 1470}
1389 1471
1472static inline int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
1473 unsigned long ip, unsigned long sym_flags)
1474{
1475 struct file *file = NULL;
1476 unsigned long vmstart = 0;
1477 int ret = 1;
1478
1479 if (mm) {
1480 const struct vm_area_struct *vma;
1481
1482 down_read(&mm->mmap_sem);
1483 vma = find_vma(mm, ip);
1484 if (vma) {
1485 file = vma->vm_file;
1486 vmstart = vma->vm_start;
1487 }
1488 if (file) {
1489 ret = trace_seq_path(s, &file->f_path);
1490 if (ret)
1491 ret = trace_seq_printf(s, "[+0x%lx]", ip - vmstart);
1492 }
1493 up_read(&mm->mmap_sem);
1494 }
1495 if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file))
1496 ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
1497 return ret;
1498}
1499
1500static int
1501seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
1502 unsigned long sym_flags)
1503{
1504 struct mm_struct *mm = NULL;
1505 int ret = 1;
1506 unsigned int i;
1507
1508 if (trace_flags & TRACE_ITER_SYM_USEROBJ) {
1509 struct task_struct *task;
1510 /*
1511 * we do the lookup on the thread group leader,
1512 * since individual threads might have already quit!
1513 */
1514 rcu_read_lock();
1515 task = find_task_by_vpid(entry->ent.tgid);
1516 if (task)
1517 mm = get_task_mm(task);
1518 rcu_read_unlock();
1519 }
1520
1521 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1522 unsigned long ip = entry->caller[i];
1523
1524 if (ip == ULONG_MAX || !ret)
1525 break;
1526 if (i && ret)
1527 ret = trace_seq_puts(s, " <- ");
1528 if (!ip) {
1529 if (ret)
1530 ret = trace_seq_puts(s, "??");
1531 continue;
1532 }
1533 if (!ret)
1534 break;
1535 if (ret)
1536 ret = seq_print_user_ip(s, mm, ip, sym_flags);
1537 }
1538
1539 if (mm)
1540 mmput(mm);
1541 return ret;
1542}
1543
1390static void print_lat_help_header(struct seq_file *m) 1544static void print_lat_help_header(struct seq_file *m)
1391{ 1545{
1392 seq_puts(m, "# _------=> CPU# \n"); 1546 seq_puts(m, "# _------=> CPU# \n");
@@ -1702,6 +1856,15 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1702 field->line); 1856 field->line);
1703 break; 1857 break;
1704 } 1858 }
1859 case TRACE_USER_STACK: {
1860 struct userstack_entry *field;
1861
1862 trace_assign_type(field, entry);
1863
1864 seq_print_userip_objs(field, s, sym_flags);
1865 trace_seq_putc(s, '\n');
1866 break;
1867 }
1705 default: 1868 default:
1706 trace_seq_printf(s, "Unknown type %d\n", entry->type); 1869 trace_seq_printf(s, "Unknown type %d\n", entry->type);
1707 } 1870 }
@@ -1853,6 +2016,19 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
1853 field->line); 2016 field->line);
1854 break; 2017 break;
1855 } 2018 }
2019 case TRACE_USER_STACK: {
2020 struct userstack_entry *field;
2021
2022 trace_assign_type(field, entry);
2023
2024 ret = seq_print_userip_objs(field, s, sym_flags);
2025 if (!ret)
2026 return TRACE_TYPE_PARTIAL_LINE;
2027 ret = trace_seq_putc(s, '\n');
2028 if (!ret)
2029 return TRACE_TYPE_PARTIAL_LINE;
2030 break;
2031 }
1856 } 2032 }
1857 return TRACE_TYPE_HANDLED; 2033 return TRACE_TYPE_HANDLED;
1858} 2034}
@@ -1912,6 +2088,7 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
1912 break; 2088 break;
1913 } 2089 }
1914 case TRACE_SPECIAL: 2090 case TRACE_SPECIAL:
2091 case TRACE_USER_STACK:
1915 case TRACE_STACK: { 2092 case TRACE_STACK: {
1916 struct special_entry *field; 2093 struct special_entry *field;
1917 2094
@@ -2000,6 +2177,7 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2000 break; 2177 break;
2001 } 2178 }
2002 case TRACE_SPECIAL: 2179 case TRACE_SPECIAL:
2180 case TRACE_USER_STACK:
2003 case TRACE_STACK: { 2181 case TRACE_STACK: {
2004 struct special_entry *field; 2182 struct special_entry *field;
2005 2183
@@ -2054,6 +2232,7 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2054 break; 2232 break;
2055 } 2233 }
2056 case TRACE_SPECIAL: 2234 case TRACE_SPECIAL:
2235 case TRACE_USER_STACK:
2057 case TRACE_STACK: { 2236 case TRACE_STACK: {
2058 struct special_entry *field; 2237 struct special_entry *field;
2059 2238
@@ -3488,6 +3667,9 @@ void ftrace_dump(void)
3488 atomic_inc(&global_trace.data[cpu]->disabled); 3667 atomic_inc(&global_trace.data[cpu]->disabled);
3489 } 3668 }
3490 3669
3670 /* don't look at user memory in panic mode */
3671 trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
3672
3491 printk(KERN_TRACE "Dumping ftrace buffer:\n"); 3673 printk(KERN_TRACE "Dumping ftrace buffer:\n");
3492 3674
3493 iter.tr = &global_trace; 3675 iter.tr = &global_trace;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 2cb12fd98f6b..28c15c2ebc22 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -26,6 +26,7 @@ enum trace_type {
26 TRACE_BOOT_CALL, 26 TRACE_BOOT_CALL,
27 TRACE_BOOT_RET, 27 TRACE_BOOT_RET,
28 TRACE_FN_RET, 28 TRACE_FN_RET,
29 TRACE_USER_STACK,
29 30
30 __TRACE_LAST_TYPE 31 __TRACE_LAST_TYPE
31}; 32};
@@ -42,6 +43,7 @@ struct trace_entry {
42 unsigned char flags; 43 unsigned char flags;
43 unsigned char preempt_count; 44 unsigned char preempt_count;
44 int pid; 45 int pid;
46 int tgid;
45}; 47};
46 48
47/* 49/*
@@ -99,6 +101,11 @@ struct stack_entry {
99 unsigned long caller[FTRACE_STACK_ENTRIES]; 101 unsigned long caller[FTRACE_STACK_ENTRIES];
100}; 102};
101 103
104struct userstack_entry {
105 struct trace_entry ent;
106 unsigned long caller[FTRACE_STACK_ENTRIES];
107};
108
102/* 109/*
103 * ftrace_printk entry: 110 * ftrace_printk entry:
104 */ 111 */
@@ -240,6 +247,7 @@ extern void __ftrace_bad_type(void);
240 IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \ 247 IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \
241 IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \ 248 IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \
242 IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \ 249 IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \
250 IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
243 IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ 251 IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \
244 IF_ASSIGN(var, ent, struct special_entry, 0); \ 252 IF_ASSIGN(var, ent, struct special_entry, 0); \
245 IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \ 253 IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \
@@ -500,6 +508,8 @@ enum trace_iterator_flags {
500 TRACE_ITER_PREEMPTONLY = 0x800, 508 TRACE_ITER_PREEMPTONLY = 0x800,
501 TRACE_ITER_BRANCH = 0x1000, 509 TRACE_ITER_BRANCH = 0x1000,
502 TRACE_ITER_ANNOTATE = 0x2000, 510 TRACE_ITER_ANNOTATE = 0x2000,
511 TRACE_ITER_USERSTACKTRACE = 0x4000,
512 TRACE_ITER_SYM_USEROBJ = 0x8000
503}; 513};
504 514
505/* 515/*
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 433d650eda9f..2a98a206acc2 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -18,12 +18,14 @@ struct header_iter {
18 18
19static struct trace_array *mmio_trace_array; 19static struct trace_array *mmio_trace_array;
20static bool overrun_detected; 20static bool overrun_detected;
21static unsigned long prev_overruns;
21 22
22static void mmio_reset_data(struct trace_array *tr) 23static void mmio_reset_data(struct trace_array *tr)
23{ 24{
24 int cpu; 25 int cpu;
25 26
26 overrun_detected = false; 27 overrun_detected = false;
28 prev_overruns = 0;
27 tr->time_start = ftrace_now(tr->cpu); 29 tr->time_start = ftrace_now(tr->cpu);
28 30
29 for_each_online_cpu(cpu) 31 for_each_online_cpu(cpu)
@@ -123,16 +125,12 @@ static void mmio_close(struct trace_iterator *iter)
123 125
124static unsigned long count_overruns(struct trace_iterator *iter) 126static unsigned long count_overruns(struct trace_iterator *iter)
125{ 127{
126 int cpu;
127 unsigned long cnt = 0; 128 unsigned long cnt = 0;
128/* FIXME: */ 129 unsigned long over = ring_buffer_overruns(iter->tr->buffer);
129#if 0 130
130 for_each_online_cpu(cpu) { 131 if (over > prev_overruns)
131 cnt += iter->overrun[cpu]; 132 cnt = over - prev_overruns;
132 iter->overrun[cpu] = 0; 133 prev_overruns = over;
133 }
134#endif
135 (void)cpu;
136 return cnt; 134 return cnt;
137} 135}
138 136
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index eeac71c87c66..0197e2f6b544 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -130,11 +130,13 @@ my %weak; # List of weak functions
130my %convert; # List of local functions used that needs conversion 130my %convert; # List of local functions used that needs conversion
131 131
132my $type; 132my $type;
133my $nm_regex; # Find the local functions (return function)
133my $section_regex; # Find the start of a section 134my $section_regex; # Find the start of a section
134my $function_regex; # Find the name of a function 135my $function_regex; # Find the name of a function
135 # (return offset and func name) 136 # (return offset and func name)
136my $mcount_regex; # Find the call site to mcount (return offset) 137my $mcount_regex; # Find the call site to mcount (return offset)
137my $alignment; # The .align value to use for $mcount_section 138my $alignment; # The .align value to use for $mcount_section
139my $section_type; # Section header plus possible alignment command
138 140
139if ($arch eq "x86") { 141if ($arch eq "x86") {
140 if ($bits == 64) { 142 if ($bits == 64) {
@@ -144,9 +146,18 @@ if ($arch eq "x86") {
144 } 146 }
145} 147}
146 148
149#
150# We base the defaults off of i386, the other archs may
151# feel free to change them in the below if statements.
152#
153$nm_regex = "^[0-9a-fA-F]+\\s+t\\s+(\\S+)";
154$section_regex = "Disassembly of section\\s+(\\S+):";
155$function_regex = "^([0-9a-fA-F]+)\\s+<(.*?)>:";
156$mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount\$";
157$section_type = '@progbits';
158$type = ".long";
159
147if ($arch eq "x86_64") { 160if ($arch eq "x86_64") {
148 $section_regex = "Disassembly of section\\s+(\\S+):";
149 $function_regex = "^([0-9a-fA-F]+)\\s+<(.*?)>:";
150 $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount([+-]0x[0-9a-zA-Z]+)?\$"; 161 $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount([+-]0x[0-9a-zA-Z]+)?\$";
151 $type = ".quad"; 162 $type = ".quad";
152 $alignment = 8; 163 $alignment = 8;
@@ -158,10 +169,6 @@ if ($arch eq "x86_64") {
158 $cc .= " -m64"; 169 $cc .= " -m64";
159 170
160} elsif ($arch eq "i386") { 171} elsif ($arch eq "i386") {
161 $section_regex = "Disassembly of section\\s+(\\S+):";
162 $function_regex = "^([0-9a-fA-F]+)\\s+<(.*?)>:";
163 $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount\$";
164 $type = ".long";
165 $alignment = 4; 172 $alignment = 4;
166 173
167 # force flags for this arch 174 # force flags for this arch
@@ -170,6 +177,27 @@ if ($arch eq "x86_64") {
170 $objcopy .= " -O elf32-i386"; 177 $objcopy .= " -O elf32-i386";
171 $cc .= " -m32"; 178 $cc .= " -m32";
172 179
180} elsif ($arch eq "sh") {
181 $alignment = 2;
182
183 # force flags for this arch
184 $ld .= " -m shlelf_linux";
185 $objcopy .= " -O elf32-sh-linux";
186 $cc .= " -m32";
187
188} elsif ($arch eq "powerpc") {
189 $nm_regex = "^[0-9a-fA-F]+\\s+t\\s+(\\.?\\S+)";
190 $function_regex = "^([0-9a-fA-F]+)\\s+<(\\.?.*?)>:";
191 $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s\\.?_mcount\$";
192
193 if ($bits == 64) {
194 $type = ".quad";
195 }
196
197} elsif ($arch eq "arm") {
198 $alignment = 2;
199 $section_type = '%progbits';
200
173} else { 201} else {
174 die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD"; 202 die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD";
175} 203}
@@ -239,7 +267,7 @@ if (!$found_version) {
239# 267#
240open (IN, "$nm $inputfile|") || die "error running $nm"; 268open (IN, "$nm $inputfile|") || die "error running $nm";
241while (<IN>) { 269while (<IN>) {
242 if (/^[0-9a-fA-F]+\s+t\s+(\S+)/) { 270 if (/$nm_regex/) {
243 $locals{$1} = 1; 271 $locals{$1} = 1;
244 } elsif (/^[0-9a-fA-F]+\s+([wW])\s+(\S+)/) { 272 } elsif (/^[0-9a-fA-F]+\s+([wW])\s+(\S+)/) {
245 $weak{$2} = $1; 273 $weak{$2} = $1;
@@ -290,8 +318,8 @@ sub update_funcs
290 if (!$opened) { 318 if (!$opened) {
291 open(FILE, ">$mcount_s") || die "can't create $mcount_s\n"; 319 open(FILE, ">$mcount_s") || die "can't create $mcount_s\n";
292 $opened = 1; 320 $opened = 1;
293 print FILE "\t.section $mcount_section,\"a\",\@progbits\n"; 321 print FILE "\t.section $mcount_section,\"a\",$section_type\n";
294 print FILE "\t.align $alignment\n"; 322 print FILE "\t.align $alignment\n" if (defined($alignment));
295 } 323 }
296 printf FILE "\t%s %s + %d\n", $type, $ref_func, $offsets[$i] - $offset; 324 printf FILE "\t%s %s + %d\n", $type, $ref_func, $offsets[$i] - $offset;
297 } 325 }