aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ftrace.txt16
-rw-r--r--Documentation/tracers/mmiotrace.txt6
-rw-r--r--arch/powerpc/include/asm/ftrace.h14
-rw-r--r--arch/powerpc/include/asm/module.h16
-rw-r--r--arch/powerpc/kernel/ftrace.c473
-rw-r--r--arch/powerpc/kernel/idle.c5
-rw-r--r--arch/powerpc/kernel/module_32.c10
-rw-r--r--arch/powerpc/kernel/module_64.c13
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/Kconfig.cpu1
-rw-r--r--arch/x86/boot/tty.c2
-rw-r--r--arch/x86/include/asm/ds.h140
-rw-r--r--arch/x86/include/asm/ftrace.h1
-rw-r--r--arch/x86/include/asm/thread_info.h29
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/cpu/intel.c3
-rw-r--r--arch/x86/kernel/ds.c752
-rw-r--r--arch/x86/kernel/ftrace.c29
-rw-r--r--arch/x86/kernel/i387.c2
-rw-r--r--arch/x86/kernel/io_apic.c22
-rw-r--r--arch/x86/kernel/pci-calgary_64.c2
-rw-r--r--arch/x86/kernel/ptrace.c98
-rw-r--r--arch/x86/kernel/stacktrace.c64
-rw-r--r--arch/x86/kernel/xsave.c2
-rw-r--r--arch/x86/oprofile/op_model_ppro.c2
-rw-r--r--arch/x86/xen/mmu.c21
-rw-r--r--fs/seq_file.c14
-rw-r--r--include/linux/ftrace.h10
-rw-r--r--include/linux/ring_buffer.h1
-rw-r--r--include/linux/sched.h36
-rw-r--r--include/linux/seq_file.h1
-rw-r--r--include/linux/stacktrace.h8
-rw-r--r--init/main.c2
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/fork.c3
-rw-r--r--kernel/power/disk.c13
-rw-r--r--kernel/power/main.c5
-rw-r--r--kernel/sched.c1
-rw-r--r--kernel/trace/Kconfig14
-rw-r--r--kernel/trace/Makefile1
-rw-r--r--kernel/trace/ftrace.c101
-rw-r--r--kernel/trace/ring_buffer.c79
-rw-r--r--kernel/trace/trace.c193
-rw-r--r--kernel/trace/trace.h23
-rw-r--r--kernel/trace/trace_bts.c276
-rw-r--r--kernel/trace/trace_mmiotrace.c16
-rwxr-xr-xscripts/recordmcount.pl48
47 files changed, 1864 insertions, 708 deletions
diff --git a/Documentation/ftrace.txt b/Documentation/ftrace.txt
index 753f4de4b175..35a78bc6651d 100644
--- a/Documentation/ftrace.txt
+++ b/Documentation/ftrace.txt
@@ -324,7 +324,7 @@ output. To see what is available, simply cat the file:
324 324
325 cat /debug/tracing/trace_options 325 cat /debug/tracing/trace_options
326 print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \ 326 print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \
327 noblock nostacktrace nosched-tree 327 noblock nostacktrace nosched-tree nouserstacktrace nosym-userobj
328 328
329To disable one of the options, echo in the option prepended with "no". 329To disable one of the options, echo in the option prepended with "no".
330 330
@@ -378,6 +378,20 @@ Here are the available options:
378 When a trace is recorded, so is the stack of functions. 378 When a trace is recorded, so is the stack of functions.
379 This allows for back traces of trace sites. 379 This allows for back traces of trace sites.
380 380
381 userstacktrace - This option changes the trace.
382 It records a stacktrace of the current userspace thread.
383
384 sym-userobj - when user stacktrace are enabled, look up which object the
385 address belongs to, and print a relative address
386 This is especially useful when ASLR is on, otherwise you don't
387 get a chance to resolve the address to object/file/line after the app is no
388 longer running
389
390 The lookup is performed when you read trace,trace_pipe,latency_trace. Example:
391
392 a.out-1623 [000] 40874.465068: /root/a.out[+0x480] <-/root/a.out[+0
393x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6]
394
381 sched-tree - TBD (any users??) 395 sched-tree - TBD (any users??)
382 396
383 397
diff --git a/Documentation/tracers/mmiotrace.txt b/Documentation/tracers/mmiotrace.txt
index 5bbbe2096223..cde23b4a12a1 100644
--- a/Documentation/tracers/mmiotrace.txt
+++ b/Documentation/tracers/mmiotrace.txt
@@ -37,7 +37,7 @@ $ echo mmiotrace > /debug/tracing/current_tracer
37$ cat /debug/tracing/trace_pipe > mydump.txt & 37$ cat /debug/tracing/trace_pipe > mydump.txt &
38Start X or whatever. 38Start X or whatever.
39$ echo "X is up" > /debug/tracing/trace_marker 39$ echo "X is up" > /debug/tracing/trace_marker
40$ echo none > /debug/tracing/current_tracer 40$ echo nop > /debug/tracing/current_tracer
41Check for lost events. 41Check for lost events.
42 42
43 43
@@ -66,7 +66,7 @@ which action. It is recommended to place descriptive markers about what you
66do. 66do.
67 67
68Shut down mmiotrace (requires root privileges): 68Shut down mmiotrace (requires root privileges):
69$ echo none > /debug/tracing/current_tracer 69$ echo nop > /debug/tracing/current_tracer
70The 'cat' process exits. If it does not, kill it by issuing 'fg' command and 70The 'cat' process exits. If it does not, kill it by issuing 'fg' command and
71pressing ctrl+c. 71pressing ctrl+c.
72 72
@@ -81,7 +81,9 @@ are:
81$ cat /debug/tracing/trace_entries 81$ cat /debug/tracing/trace_entries
82gives you a number. Approximately double this number and write it back, for 82gives you a number. Approximately double this number and write it back, for
83instance: 83instance:
84$ echo 0 > /debug/tracing/tracing_enabled
84$ echo 128000 > /debug/tracing/trace_entries 85$ echo 128000 > /debug/tracing/trace_entries
86$ echo 1 > /debug/tracing/tracing_enabled
85Then start again from the top. 87Then start again from the top.
86 88
87If you are doing a trace for a driver project, e.g. Nouveau, you should also 89If you are doing a trace for a driver project, e.g. Nouveau, you should also
diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
index b298f7a631e6..e5f2ae8362f7 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -7,7 +7,19 @@
7 7
8#ifndef __ASSEMBLY__ 8#ifndef __ASSEMBLY__
9extern void _mcount(void); 9extern void _mcount(void);
10#endif 10
11#ifdef CONFIG_DYNAMIC_FTRACE
12static inline unsigned long ftrace_call_adjust(unsigned long addr)
13{
14 /* reloction of mcount call site is the same as the address */
15 return addr;
16}
17
18struct dyn_arch_ftrace {
19 struct module *mod;
20};
21#endif /* CONFIG_DYNAMIC_FTRACE */
22#endif /* __ASSEMBLY__ */
11 23
12#endif 24#endif
13 25
diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h
index e5f14b13ccf0..08454880a2c0 100644
--- a/arch/powerpc/include/asm/module.h
+++ b/arch/powerpc/include/asm/module.h
@@ -34,11 +34,19 @@ struct mod_arch_specific {
34#ifdef __powerpc64__ 34#ifdef __powerpc64__
35 unsigned int stubs_section; /* Index of stubs section in module */ 35 unsigned int stubs_section; /* Index of stubs section in module */
36 unsigned int toc_section; /* What section is the TOC? */ 36 unsigned int toc_section; /* What section is the TOC? */
37#else 37#ifdef CONFIG_DYNAMIC_FTRACE
38 unsigned long toc;
39 unsigned long tramp;
40#endif
41
42#else /* powerpc64 */
38 /* Indices of PLT sections within module. */ 43 /* Indices of PLT sections within module. */
39 unsigned int core_plt_section; 44 unsigned int core_plt_section;
40 unsigned int init_plt_section; 45 unsigned int init_plt_section;
46#ifdef CONFIG_DYNAMIC_FTRACE
47 unsigned long tramp;
41#endif 48#endif
49#endif /* powerpc64 */
42 50
43 /* List of BUG addresses, source line numbers and filenames */ 51 /* List of BUG addresses, source line numbers and filenames */
44 struct list_head bug_list; 52 struct list_head bug_list;
@@ -68,6 +76,12 @@ struct mod_arch_specific {
68# endif /* MODULE */ 76# endif /* MODULE */
69#endif 77#endif
70 78
79#ifdef CONFIG_DYNAMIC_FTRACE
80# ifdef MODULE
81 asm(".section .ftrace.tramp,\"ax\",@nobits; .align 3; .previous");
82# endif /* MODULE */
83#endif
84
71 85
72struct exception_table_entry; 86struct exception_table_entry;
73void sort_ex_table(struct exception_table_entry *start, 87void sort_ex_table(struct exception_table_entry *start,
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index f4b006ed0ab1..3271cd698e4c 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -9,22 +9,30 @@
9 9
10#include <linux/spinlock.h> 10#include <linux/spinlock.h>
11#include <linux/hardirq.h> 11#include <linux/hardirq.h>
12#include <linux/uaccess.h>
13#include <linux/module.h>
12#include <linux/ftrace.h> 14#include <linux/ftrace.h>
13#include <linux/percpu.h> 15#include <linux/percpu.h>
14#include <linux/init.h> 16#include <linux/init.h>
15#include <linux/list.h> 17#include <linux/list.h>
16 18
17#include <asm/cacheflush.h> 19#include <asm/cacheflush.h>
20#include <asm/code-patching.h>
18#include <asm/ftrace.h> 21#include <asm/ftrace.h>
19 22
23#if 0
24#define DEBUGP printk
25#else
26#define DEBUGP(fmt , ...) do { } while (0)
27#endif
20 28
21static unsigned int ftrace_nop = 0x60000000; 29static unsigned int ftrace_nop = PPC_NOP_INSTR;
22 30
23#ifdef CONFIG_PPC32 31#ifdef CONFIG_PPC32
24# define GET_ADDR(addr) addr 32# define GET_ADDR(addr) addr
25#else 33#else
26/* PowerPC64's functions are data that points to the functions */ 34/* PowerPC64's functions are data that points to the functions */
27# define GET_ADDR(addr) *(unsigned long *)addr 35# define GET_ADDR(addr) (*(unsigned long *)addr)
28#endif 36#endif
29 37
30 38
@@ -33,12 +41,12 @@ static unsigned int ftrace_calc_offset(long ip, long addr)
33 return (int)(addr - ip); 41 return (int)(addr - ip);
34} 42}
35 43
36unsigned char *ftrace_nop_replace(void) 44static unsigned char *ftrace_nop_replace(void)
37{ 45{
38 return (char *)&ftrace_nop; 46 return (char *)&ftrace_nop;
39} 47}
40 48
41unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) 49static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
42{ 50{
43 static unsigned int op; 51 static unsigned int op;
44 52
@@ -68,49 +76,434 @@ unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
68# define _ASM_PTR " .long " 76# define _ASM_PTR " .long "
69#endif 77#endif
70 78
71int 79static int
72ftrace_modify_code(unsigned long ip, unsigned char *old_code, 80ftrace_modify_code(unsigned long ip, unsigned char *old_code,
73 unsigned char *new_code) 81 unsigned char *new_code)
74{ 82{
75 unsigned replaced; 83 unsigned char replaced[MCOUNT_INSN_SIZE];
76 unsigned old = *(unsigned *)old_code;
77 unsigned new = *(unsigned *)new_code;
78 int faulted = 0;
79 84
80 /* 85 /*
81 * Note: Due to modules and __init, code can 86 * Note: Due to modules and __init, code can
82 * disappear and change, we need to protect against faulting 87 * disappear and change, we need to protect against faulting
83 * as well as code changing. 88 * as well as code changing. We do this by using the
89 * probe_kernel_* functions.
84 * 90 *
85 * No real locking needed, this code is run through 91 * No real locking needed, this code is run through
86 * kstop_machine. 92 * kstop_machine, or before SMP starts.
87 */ 93 */
88 asm volatile ( 94
89 "1: lwz %1, 0(%2)\n" 95 /* read the text we want to modify */
90 " cmpw %1, %5\n" 96 if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
91 " bne 2f\n" 97 return -EFAULT;
92 " stwu %3, 0(%2)\n" 98
93 "2:\n" 99 /* Make sure it is what we expect it to be */
94 ".section .fixup, \"ax\"\n" 100 if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
95 "3: li %0, 1\n" 101 return -EINVAL;
96 " b 2b\n" 102
97 ".previous\n" 103 /* replace the text with the new text */
98 ".section __ex_table,\"a\"\n" 104 if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE))
99 _ASM_ALIGN "\n" 105 return -EPERM;
100 _ASM_PTR "1b, 3b\n" 106
101 ".previous" 107 flush_icache_range(ip, ip + 8);
102 : "=r"(faulted), "=r"(replaced) 108
103 : "r"(ip), "r"(new), 109 return 0;
104 "0"(faulted), "r"(old) 110}
105 : "memory"); 111
106 112/*
107 if (replaced != old && replaced != new) 113 * Helper functions that are the same for both PPC64 and PPC32.
108 faulted = 2; 114 */
109 115static int test_24bit_addr(unsigned long ip, unsigned long addr)
110 if (!faulted) 116{
111 flush_icache_range(ip, ip + 8); 117 long diff;
112 118
113 return faulted; 119 /*
120 * Can we get to addr from ip in 24 bits?
121 * (26 really, since we mulitply by 4 for 4 byte alignment)
122 */
123 diff = addr - ip;
124
125 /*
126 * Return true if diff is less than 1 << 25
127 * and greater than -1 << 26.
128 */
129 return (diff < (1 << 25)) && (diff > (-1 << 26));
130}
131
132static int is_bl_op(unsigned int op)
133{
134 return (op & 0xfc000003) == 0x48000001;
135}
136
137static int test_offset(unsigned long offset)
138{
139 return (offset + 0x2000000 > 0x3ffffff) || ((offset & 3) != 0);
140}
141
142static unsigned long find_bl_target(unsigned long ip, unsigned int op)
143{
144 static int offset;
145
146 offset = (op & 0x03fffffc);
147 /* make it signed */
148 if (offset & 0x02000000)
149 offset |= 0xfe000000;
150
151 return ip + (long)offset;
152}
153
154static unsigned int branch_offset(unsigned long offset)
155{
156 /* return "bl ip+offset" */
157 return 0x48000001 | (offset & 0x03fffffc);
158}
159
160#ifdef CONFIG_PPC64
161static int
162__ftrace_make_nop(struct module *mod,
163 struct dyn_ftrace *rec, unsigned long addr)
164{
165 unsigned char replaced[MCOUNT_INSN_SIZE * 2];
166 unsigned int *op = (unsigned *)&replaced;
167 unsigned char jmp[8];
168 unsigned long *ptr = (unsigned long *)&jmp;
169 unsigned long ip = rec->ip;
170 unsigned long tramp;
171 int offset;
172
173 /* read where this goes */
174 if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
175 return -EFAULT;
176
177 /* Make sure that that this is still a 24bit jump */
178 if (!is_bl_op(*op)) {
179 printk(KERN_ERR "Not expected bl: opcode is %x\n", *op);
180 return -EINVAL;
181 }
182
183 /* lets find where the pointer goes */
184 tramp = find_bl_target(ip, *op);
185
186 /*
187 * On PPC64 the trampoline looks like:
188 * 0x3d, 0x82, 0x00, 0x00, addis r12,r2, <high>
189 * 0x39, 0x8c, 0x00, 0x00, addi r12,r12, <low>
190 * Where the bytes 2,3,6 and 7 make up the 32bit offset
191 * to the TOC that holds the pointer.
192 * to jump to.
193 * 0xf8, 0x41, 0x00, 0x28, std r2,40(r1)
194 * 0xe9, 0x6c, 0x00, 0x20, ld r11,32(r12)
195 * The actually address is 32 bytes from the offset
196 * into the TOC.
197 * 0xe8, 0x4c, 0x00, 0x28, ld r2,40(r12)
198 */
199
200 DEBUGP("ip:%lx jumps to %lx r2: %lx", ip, tramp, mod->arch.toc);
201
202 /* Find where the trampoline jumps to */
203 if (probe_kernel_read(jmp, (void *)tramp, 8)) {
204 printk(KERN_ERR "Failed to read %lx\n", tramp);
205 return -EFAULT;
206 }
207
208 DEBUGP(" %08x %08x",
209 (unsigned)(*ptr >> 32),
210 (unsigned)*ptr);
211
212 offset = (unsigned)jmp[2] << 24 |
213 (unsigned)jmp[3] << 16 |
214 (unsigned)jmp[6] << 8 |
215 (unsigned)jmp[7];
216
217 DEBUGP(" %x ", offset);
218
219 /* get the address this jumps too */
220 tramp = mod->arch.toc + offset + 32;
221 DEBUGP("toc: %lx", tramp);
222
223 if (probe_kernel_read(jmp, (void *)tramp, 8)) {
224 printk(KERN_ERR "Failed to read %lx\n", tramp);
225 return -EFAULT;
226 }
227
228 DEBUGP(" %08x %08x\n",
229 (unsigned)(*ptr >> 32),
230 (unsigned)*ptr);
231
232 /* This should match what was called */
233 if (*ptr != GET_ADDR(addr)) {
234 printk(KERN_ERR "addr does not match %lx\n", *ptr);
235 return -EINVAL;
236 }
237
238 /*
239 * We want to nop the line, but the next line is
240 * 0xe8, 0x41, 0x00, 0x28 ld r2,40(r1)
241 * This needs to be turned to a nop too.
242 */
243 if (probe_kernel_read(replaced, (void *)(ip+4), MCOUNT_INSN_SIZE))
244 return -EFAULT;
245
246 if (*op != 0xe8410028) {
247 printk(KERN_ERR "Next line is not ld! (%08x)\n", *op);
248 return -EINVAL;
249 }
250
251 /*
252 * Milton Miller pointed out that we can not blindly do nops.
253 * If a task was preempted when calling a trace function,
254 * the nops will remove the way to restore the TOC in r2
255 * and the r2 TOC will get corrupted.
256 */
257
258 /*
259 * Replace:
260 * bl <tramp> <==== will be replaced with "b 1f"
261 * ld r2,40(r1)
262 * 1:
263 */
264 op[0] = 0x48000008; /* b +8 */
265
266 if (probe_kernel_write((void *)ip, replaced, MCOUNT_INSN_SIZE))
267 return -EPERM;
268
269 return 0;
270}
271
272#else /* !PPC64 */
273static int
274__ftrace_make_nop(struct module *mod,
275 struct dyn_ftrace *rec, unsigned long addr)
276{
277 unsigned char replaced[MCOUNT_INSN_SIZE];
278 unsigned int *op = (unsigned *)&replaced;
279 unsigned char jmp[8];
280 unsigned int *ptr = (unsigned int *)&jmp;
281 unsigned long ip = rec->ip;
282 unsigned long tramp;
283 int offset;
284
285 if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
286 return -EFAULT;
287
288 /* Make sure that that this is still a 24bit jump */
289 if (!is_bl_op(*op)) {
290 printk(KERN_ERR "Not expected bl: opcode is %x\n", *op);
291 return -EINVAL;
292 }
293
294 /* lets find where the pointer goes */
295 tramp = find_bl_target(ip, *op);
296
297 /*
298 * On PPC32 the trampoline looks like:
299 * lis r11,sym@ha
300 * addi r11,r11,sym@l
301 * mtctr r11
302 * bctr
303 */
304
305 DEBUGP("ip:%lx jumps to %lx", ip, tramp);
306
307 /* Find where the trampoline jumps to */
308 if (probe_kernel_read(jmp, (void *)tramp, 8)) {
309 printk(KERN_ERR "Failed to read %lx\n", tramp);
310 return -EFAULT;
311 }
312
313 DEBUGP(" %08x %08x ", ptr[0], ptr[1]);
314
315 tramp = (ptr[1] & 0xffff) |
316 ((ptr[0] & 0xffff) << 16);
317 if (tramp & 0x8000)
318 tramp -= 0x10000;
319
320 DEBUGP(" %x ", tramp);
321
322 if (tramp != addr) {
323 printk(KERN_ERR
324 "Trampoline location %08lx does not match addr\n",
325 tramp);
326 return -EINVAL;
327 }
328
329 op[0] = PPC_NOP_INSTR;
330
331 if (probe_kernel_write((void *)ip, replaced, MCOUNT_INSN_SIZE))
332 return -EPERM;
333
334 return 0;
335}
336#endif /* PPC64 */
337
338int ftrace_make_nop(struct module *mod,
339 struct dyn_ftrace *rec, unsigned long addr)
340{
341 unsigned char *old, *new;
342 unsigned long ip = rec->ip;
343
344 /*
345 * If the calling address is more that 24 bits away,
346 * then we had to use a trampoline to make the call.
347 * Otherwise just update the call site.
348 */
349 if (test_24bit_addr(ip, addr)) {
350 /* within range */
351 old = ftrace_call_replace(ip, addr);
352 new = ftrace_nop_replace();
353 return ftrace_modify_code(ip, old, new);
354 }
355
356 /*
357 * Out of range jumps are called from modules.
358 * We should either already have a pointer to the module
359 * or it has been passed in.
360 */
361 if (!rec->arch.mod) {
362 if (!mod) {
363 printk(KERN_ERR "No module loaded addr=%lx\n",
364 addr);
365 return -EFAULT;
366 }
367 rec->arch.mod = mod;
368 } else if (mod) {
369 if (mod != rec->arch.mod) {
370 printk(KERN_ERR
371 "Record mod %p not equal to passed in mod %p\n",
372 rec->arch.mod, mod);
373 return -EINVAL;
374 }
375 /* nothing to do if mod == rec->arch.mod */
376 } else
377 mod = rec->arch.mod;
378
379 return __ftrace_make_nop(mod, rec, addr);
380
381}
382
383#ifdef CONFIG_PPC64
384static int
385__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
386{
387 unsigned char replaced[MCOUNT_INSN_SIZE * 2];
388 unsigned int *op = (unsigned *)&replaced;
389 unsigned long ip = rec->ip;
390 unsigned long offset;
391
392 /* read where this goes */
393 if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE * 2))
394 return -EFAULT;
395
396 /*
397 * It should be pointing to two nops or
398 * b +8; ld r2,40(r1)
399 */
400 if (((op[0] != 0x48000008) || (op[1] != 0xe8410028)) &&
401 ((op[0] != PPC_NOP_INSTR) || (op[1] != PPC_NOP_INSTR))) {
402 printk(KERN_ERR "Expected NOPs but have %x %x\n", op[0], op[1]);
403 return -EINVAL;
404 }
405
406 /* If we never set up a trampoline to ftrace_caller, then bail */
407 if (!rec->arch.mod->arch.tramp) {
408 printk(KERN_ERR "No ftrace trampoline\n");
409 return -EINVAL;
410 }
411
412 /* now calculate a jump to the ftrace caller trampoline */
413 offset = rec->arch.mod->arch.tramp - ip;
414
415 if (test_offset(offset)) {
416 printk(KERN_ERR "REL24 %li out of range!\n",
417 (long int)offset);
418 return -EINVAL;
419 }
420
421 /* Set to "bl addr" */
422 op[0] = branch_offset(offset);
423 /* ld r2,40(r1) */
424 op[1] = 0xe8410028;
425
426 DEBUGP("write to %lx\n", rec->ip);
427
428 if (probe_kernel_write((void *)ip, replaced, MCOUNT_INSN_SIZE * 2))
429 return -EPERM;
430
431 return 0;
432}
433#else
434static int
435__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
436{
437 unsigned char replaced[MCOUNT_INSN_SIZE];
438 unsigned int *op = (unsigned *)&replaced;
439 unsigned long ip = rec->ip;
440 unsigned long offset;
441
442 /* read where this goes */
443 if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
444 return -EFAULT;
445
446 /* It should be pointing to a nop */
447 if (op[0] != PPC_NOP_INSTR) {
448 printk(KERN_ERR "Expected NOP but have %x\n", op[0]);
449 return -EINVAL;
450 }
451
452 /* If we never set up a trampoline to ftrace_caller, then bail */
453 if (!rec->arch.mod->arch.tramp) {
454 printk(KERN_ERR "No ftrace trampoline\n");
455 return -EINVAL;
456 }
457
458 /* now calculate a jump to the ftrace caller trampoline */
459 offset = rec->arch.mod->arch.tramp - ip;
460
461 if (test_offset(offset)) {
462 printk(KERN_ERR "REL24 %li out of range!\n",
463 (long int)offset);
464 return -EINVAL;
465 }
466
467 /* Set to "bl addr" */
468 op[0] = branch_offset(offset);
469
470 DEBUGP("write to %lx\n", rec->ip);
471
472 if (probe_kernel_write((void *)ip, replaced, MCOUNT_INSN_SIZE))
473 return -EPERM;
474
475 return 0;
476}
477#endif /* CONFIG_PPC64 */
478
479int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
480{
481 unsigned char *old, *new;
482 unsigned long ip = rec->ip;
483
484 /*
485 * If the calling address is more that 24 bits away,
486 * then we had to use a trampoline to make the call.
487 * Otherwise just update the call site.
488 */
489 if (test_24bit_addr(ip, addr)) {
490 /* within range */
491 old = ftrace_nop_replace();
492 new = ftrace_call_replace(ip, addr);
493 return ftrace_modify_code(ip, old, new);
494 }
495
496 /*
497 * Out of range jumps are called from modules.
498 * Being that we are converting from nop, it had better
499 * already have a module defined.
500 */
501 if (!rec->arch.mod) {
502 printk(KERN_ERR "No module loaded\n");
503 return -EINVAL;
504 }
505
506 return __ftrace_make_call(rec, addr);
114} 507}
115 508
116int ftrace_update_ftrace_func(ftrace_func_t func) 509int ftrace_update_ftrace_func(ftrace_func_t func)
@@ -128,10 +521,10 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
128 521
129int __init ftrace_dyn_arch_init(void *data) 522int __init ftrace_dyn_arch_init(void *data)
130{ 523{
131 /* This is running in kstop_machine */ 524 /* caller expects data to be zero */
525 unsigned long *p = data;
132 526
133 ftrace_mcount_set(data); 527 *p = 0;
134 528
135 return 0; 529 return 0;
136} 530}
137
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index 31982d05d81a..88d9c1d5e5fb 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -69,10 +69,15 @@ void cpu_idle(void)
69 smp_mb(); 69 smp_mb();
70 local_irq_disable(); 70 local_irq_disable();
71 71
72 /* Don't trace irqs off for idle */
73 stop_critical_timings();
74
72 /* check again after disabling irqs */ 75 /* check again after disabling irqs */
73 if (!need_resched() && !cpu_should_die()) 76 if (!need_resched() && !cpu_should_die())
74 ppc_md.power_save(); 77 ppc_md.power_save();
75 78
79 start_critical_timings();
80
76 local_irq_enable(); 81 local_irq_enable();
77 set_thread_flag(TIF_POLLING_NRFLAG); 82 set_thread_flag(TIF_POLLING_NRFLAG);
78 83
diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c
index 2df91a03462a..f832773fc28e 100644
--- a/arch/powerpc/kernel/module_32.c
+++ b/arch/powerpc/kernel/module_32.c
@@ -22,6 +22,7 @@
22#include <linux/fs.h> 22#include <linux/fs.h>
23#include <linux/string.h> 23#include <linux/string.h>
24#include <linux/kernel.h> 24#include <linux/kernel.h>
25#include <linux/ftrace.h>
25#include <linux/cache.h> 26#include <linux/cache.h>
26#include <linux/bug.h> 27#include <linux/bug.h>
27#include <linux/sort.h> 28#include <linux/sort.h>
@@ -53,6 +54,9 @@ static unsigned int count_relocs(const Elf32_Rela *rela, unsigned int num)
53 r_addend = rela[i].r_addend; 54 r_addend = rela[i].r_addend;
54 } 55 }
55 56
57#ifdef CONFIG_DYNAMIC_FTRACE
58 _count_relocs++; /* add one for ftrace_caller */
59#endif
56 return _count_relocs; 60 return _count_relocs;
57} 61}
58 62
@@ -306,5 +310,11 @@ int apply_relocate_add(Elf32_Shdr *sechdrs,
306 return -ENOEXEC; 310 return -ENOEXEC;
307 } 311 }
308 } 312 }
313#ifdef CONFIG_DYNAMIC_FTRACE
314 module->arch.tramp =
315 do_plt_call(module->module_core,
316 (unsigned long)ftrace_caller,
317 sechdrs, module);
318#endif
309 return 0; 319 return 0;
310} 320}
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 1af2377e4992..8992b031a7b6 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -20,6 +20,7 @@
20#include <linux/moduleloader.h> 20#include <linux/moduleloader.h>
21#include <linux/err.h> 21#include <linux/err.h>
22#include <linux/vmalloc.h> 22#include <linux/vmalloc.h>
23#include <linux/ftrace.h>
23#include <linux/bug.h> 24#include <linux/bug.h>
24#include <asm/module.h> 25#include <asm/module.h>
25#include <asm/firmware.h> 26#include <asm/firmware.h>
@@ -163,6 +164,11 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr,
163 } 164 }
164 } 165 }
165 166
167#ifdef CONFIG_DYNAMIC_FTRACE
168 /* make the trampoline to the ftrace_caller */
169 relocs++;
170#endif
171
166 DEBUGP("Looks like a total of %lu stubs, max\n", relocs); 172 DEBUGP("Looks like a total of %lu stubs, max\n", relocs);
167 return relocs * sizeof(struct ppc64_stub_entry); 173 return relocs * sizeof(struct ppc64_stub_entry);
168} 174}
@@ -441,5 +447,12 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
441 } 447 }
442 } 448 }
443 449
450#ifdef CONFIG_DYNAMIC_FTRACE
451 me->arch.toc = my_r2(sechdrs, me);
452 me->arch.tramp = stub_for_addr(sechdrs,
453 (unsigned long)ftrace_caller,
454 me);
455#endif
456
444 return 0; 457 return 0;
445} 458}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7a146baaa990..e49a4fd718fe 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -36,6 +36,7 @@ config X86
36 select HAVE_ARCH_TRACEHOOK 36 select HAVE_ARCH_TRACEHOOK
37 select HAVE_GENERIC_DMA_COHERENT if X86_32 37 select HAVE_GENERIC_DMA_COHERENT if X86_32
38 select HAVE_EFFICIENT_UNALIGNED_ACCESS 38 select HAVE_EFFICIENT_UNALIGNED_ACCESS
39 select USER_STACKTRACE_SUPPORT
39 40
40config ARCH_DEFCONFIG 41config ARCH_DEFCONFIG
41 string 42 string
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index b815664fe370..85a78575956c 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -515,6 +515,7 @@ config CPU_SUP_UMC_32
515config X86_DS 515config X86_DS
516 def_bool X86_PTRACE_BTS 516 def_bool X86_PTRACE_BTS
517 depends on X86_DEBUGCTLMSR 517 depends on X86_DEBUGCTLMSR
518 select HAVE_HW_BRANCH_TRACER
518 519
519config X86_PTRACE_BTS 520config X86_PTRACE_BTS
520 bool "Branch Trace Store" 521 bool "Branch Trace Store"
diff --git a/arch/x86/boot/tty.c b/arch/x86/boot/tty.c
index 0be77b39328a..7e8e8b25f5f6 100644
--- a/arch/x86/boot/tty.c
+++ b/arch/x86/boot/tty.c
@@ -74,7 +74,7 @@ static int kbd_pending(void)
74{ 74{
75 u8 pending; 75 u8 pending;
76 asm volatile("int $0x16; setnz %0" 76 asm volatile("int $0x16; setnz %0"
77 : "=rm" (pending) 77 : "=qm" (pending)
78 : "a" (0x0100)); 78 : "a" (0x0100));
79 return pending; 79 return pending;
80} 80}
diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h
index 72c5a190bf48..99b6c39774a4 100644
--- a/arch/x86/include/asm/ds.h
+++ b/arch/x86/include/asm/ds.h
@@ -7,13 +7,12 @@
7 * 7 *
8 * It manages: 8 * It manages:
9 * - per-thread and per-cpu allocation of BTS and PEBS 9 * - per-thread and per-cpu allocation of BTS and PEBS
10 * - buffer memory allocation (optional) 10 * - buffer overflow handling (to be done)
11 * - buffer overflow handling
12 * - buffer access 11 * - buffer access
13 * 12 *
14 * It assumes: 13 * It assumes:
15 * - get_task_struct on all parameter tasks 14 * - get_task_struct on all traced tasks
16 * - current is allowed to trace parameter tasks 15 * - current is allowed to trace tasks
17 * 16 *
18 * 17 *
19 * Copyright (C) 2007-2008 Intel Corporation. 18 * Copyright (C) 2007-2008 Intel Corporation.
@@ -23,13 +22,21 @@
23#ifndef _ASM_X86_DS_H 22#ifndef _ASM_X86_DS_H
24#define _ASM_X86_DS_H 23#define _ASM_X86_DS_H
25 24
26#ifdef CONFIG_X86_DS
27 25
28#include <linux/types.h> 26#include <linux/types.h>
29#include <linux/init.h> 27#include <linux/init.h>
28#include <linux/err.h>
29
30 30
31#ifdef CONFIG_X86_DS
31 32
32struct task_struct; 33struct task_struct;
34struct ds_tracer;
35struct bts_tracer;
36struct pebs_tracer;
37
38typedef void (*bts_ovfl_callback_t)(struct bts_tracer *);
39typedef void (*pebs_ovfl_callback_t)(struct pebs_tracer *);
33 40
34/* 41/*
35 * Request BTS or PEBS 42 * Request BTS or PEBS
@@ -37,60 +44,62 @@ struct task_struct;
37 * Due to alignement constraints, the actual buffer may be slightly 44 * Due to alignement constraints, the actual buffer may be slightly
38 * smaller than the requested or provided buffer. 45 * smaller than the requested or provided buffer.
39 * 46 *
40 * Returns 0 on success; -Eerrno otherwise 47 * Returns a pointer to a tracer structure on success, or
48 * ERR_PTR(errcode) on failure.
49 *
50 * The interrupt threshold is independent from the overflow callback
51 * to allow users to use their own overflow interrupt handling mechanism.
41 * 52 *
42 * task: the task to request recording for; 53 * task: the task to request recording for;
43 * NULL for per-cpu recording on the current cpu 54 * NULL for per-cpu recording on the current cpu
44 * base: the base pointer for the (non-pageable) buffer; 55 * base: the base pointer for the (non-pageable) buffer;
45 * NULL if buffer allocation requested 56 * size: the size of the provided buffer in bytes
46 * size: the size of the requested or provided buffer
47 * ovfl: pointer to a function to be called on buffer overflow; 57 * ovfl: pointer to a function to be called on buffer overflow;
48 * NULL if cyclic buffer requested 58 * NULL if cyclic buffer requested
59 * th: the interrupt threshold in records from the end of the buffer;
60 * -1 if no interrupt threshold is requested.
49 */ 61 */
50typedef void (*ds_ovfl_callback_t)(struct task_struct *); 62extern struct bts_tracer *ds_request_bts(struct task_struct *task,
51extern int ds_request_bts(struct task_struct *task, void *base, size_t size, 63 void *base, size_t size,
52 ds_ovfl_callback_t ovfl); 64 bts_ovfl_callback_t ovfl, size_t th);
53extern int ds_request_pebs(struct task_struct *task, void *base, size_t size, 65extern struct pebs_tracer *ds_request_pebs(struct task_struct *task,
54 ds_ovfl_callback_t ovfl); 66 void *base, size_t size,
67 pebs_ovfl_callback_t ovfl,
68 size_t th);
55 69
56/* 70/*
57 * Release BTS or PEBS resources 71 * Release BTS or PEBS resources
58 * 72 *
59 * Frees buffers allocated on ds_request.
60 *
61 * Returns 0 on success; -Eerrno otherwise 73 * Returns 0 on success; -Eerrno otherwise
62 * 74 *
63 * task: the task to release resources for; 75 * tracer: the tracer handle returned from ds_request_~()
64 * NULL to release resources for the current cpu
65 */ 76 */
66extern int ds_release_bts(struct task_struct *task); 77extern int ds_release_bts(struct bts_tracer *tracer);
67extern int ds_release_pebs(struct task_struct *task); 78extern int ds_release_pebs(struct pebs_tracer *tracer);
68 79
69/* 80/*
70 * Return the (array) index of the write pointer. 81 * Get the (array) index of the write pointer.
71 * (assuming an array of BTS/PEBS records) 82 * (assuming an array of BTS/PEBS records)
72 * 83 *
73 * Returns -Eerrno on error 84 * Returns 0 on success; -Eerrno on error
74 * 85 *
75 * task: the task to access; 86 * tracer: the tracer handle returned from ds_request_~()
76 * NULL to access the current cpu 87 * pos (out): will hold the result
77 * pos (out): if not NULL, will hold the result
78 */ 88 */
79extern int ds_get_bts_index(struct task_struct *task, size_t *pos); 89extern int ds_get_bts_index(struct bts_tracer *tracer, size_t *pos);
80extern int ds_get_pebs_index(struct task_struct *task, size_t *pos); 90extern int ds_get_pebs_index(struct pebs_tracer *tracer, size_t *pos);
81 91
82/* 92/*
83 * Return the (array) index one record beyond the end of the array. 93 * Get the (array) index one record beyond the end of the array.
84 * (assuming an array of BTS/PEBS records) 94 * (assuming an array of BTS/PEBS records)
85 * 95 *
86 * Returns -Eerrno on error 96 * Returns 0 on success; -Eerrno on error
87 * 97 *
88 * task: the task to access; 98 * tracer: the tracer handle returned from ds_request_~()
89 * NULL to access the current cpu 99 * pos (out): will hold the result
90 * pos (out): if not NULL, will hold the result
91 */ 100 */
92extern int ds_get_bts_end(struct task_struct *task, size_t *pos); 101extern int ds_get_bts_end(struct bts_tracer *tracer, size_t *pos);
93extern int ds_get_pebs_end(struct task_struct *task, size_t *pos); 102extern int ds_get_pebs_end(struct pebs_tracer *tracer, size_t *pos);
94 103
95/* 104/*
96 * Provide a pointer to the BTS/PEBS record at parameter index. 105 * Provide a pointer to the BTS/PEBS record at parameter index.
@@ -101,14 +110,13 @@ extern int ds_get_pebs_end(struct task_struct *task, size_t *pos);
101 * 110 *
102 * Returns the size of a single record on success; -Eerrno on error 111 * Returns the size of a single record on success; -Eerrno on error
103 * 112 *
104 * task: the task to access; 113 * tracer: the tracer handle returned from ds_request_~()
105 * NULL to access the current cpu
106 * index: the index of the requested record 114 * index: the index of the requested record
107 * record (out): pointer to the requested record 115 * record (out): pointer to the requested record
108 */ 116 */
109extern int ds_access_bts(struct task_struct *task, 117extern int ds_access_bts(struct bts_tracer *tracer,
110 size_t index, const void **record); 118 size_t index, const void **record);
111extern int ds_access_pebs(struct task_struct *task, 119extern int ds_access_pebs(struct pebs_tracer *tracer,
112 size_t index, const void **record); 120 size_t index, const void **record);
113 121
114/* 122/*
@@ -128,38 +136,24 @@ extern int ds_access_pebs(struct task_struct *task,
128 * 136 *
129 * Returns the number of bytes written or -Eerrno. 137 * Returns the number of bytes written or -Eerrno.
130 * 138 *
131 * task: the task to access; 139 * tracer: the tracer handle returned from ds_request_~()
132 * NULL to access the current cpu
133 * buffer: the buffer to write 140 * buffer: the buffer to write
134 * size: the size of the buffer 141 * size: the size of the buffer
135 */ 142 */
136extern int ds_write_bts(struct task_struct *task, 143extern int ds_write_bts(struct bts_tracer *tracer,
137 const void *buffer, size_t size); 144 const void *buffer, size_t size);
138extern int ds_write_pebs(struct task_struct *task, 145extern int ds_write_pebs(struct pebs_tracer *tracer,
139 const void *buffer, size_t size); 146 const void *buffer, size_t size);
140 147
141/* 148/*
142 * Same as ds_write_bts/pebs, but omit ownership checks.
143 *
144 * This is needed to have some other task than the owner of the
145 * BTS/PEBS buffer or the parameter task itself write into the
146 * respective buffer.
147 */
148extern int ds_unchecked_write_bts(struct task_struct *task,
149 const void *buffer, size_t size);
150extern int ds_unchecked_write_pebs(struct task_struct *task,
151 const void *buffer, size_t size);
152
153/*
154 * Reset the write pointer of the BTS/PEBS buffer. 149 * Reset the write pointer of the BTS/PEBS buffer.
155 * 150 *
156 * Returns 0 on success; -Eerrno on error 151 * Returns 0 on success; -Eerrno on error
157 * 152 *
158 * task: the task to access; 153 * tracer: the tracer handle returned from ds_request_~()
159 * NULL to access the current cpu
160 */ 154 */
161extern int ds_reset_bts(struct task_struct *task); 155extern int ds_reset_bts(struct bts_tracer *tracer);
162extern int ds_reset_pebs(struct task_struct *task); 156extern int ds_reset_pebs(struct pebs_tracer *tracer);
163 157
164/* 158/*
165 * Clear the BTS/PEBS buffer and reset the write pointer. 159 * Clear the BTS/PEBS buffer and reset the write pointer.
@@ -167,33 +161,30 @@ extern int ds_reset_pebs(struct task_struct *task);
167 * 161 *
168 * Returns 0 on success; -Eerrno on error 162 * Returns 0 on success; -Eerrno on error
169 * 163 *
170 * task: the task to access; 164 * tracer: the tracer handle returned from ds_request_~()
171 * NULL to access the current cpu
172 */ 165 */
173extern int ds_clear_bts(struct task_struct *task); 166extern int ds_clear_bts(struct bts_tracer *tracer);
174extern int ds_clear_pebs(struct task_struct *task); 167extern int ds_clear_pebs(struct pebs_tracer *tracer);
175 168
176/* 169/*
177 * Provide the PEBS counter reset value. 170 * Provide the PEBS counter reset value.
178 * 171 *
179 * Returns 0 on success; -Eerrno on error 172 * Returns 0 on success; -Eerrno on error
180 * 173 *
181 * task: the task to access; 174 * tracer: the tracer handle returned from ds_request_pebs()
182 * NULL to access the current cpu
183 * value (out): the counter reset value 175 * value (out): the counter reset value
184 */ 176 */
185extern int ds_get_pebs_reset(struct task_struct *task, u64 *value); 177extern int ds_get_pebs_reset(struct pebs_tracer *tracer, u64 *value);
186 178
187/* 179/*
188 * Set the PEBS counter reset value. 180 * Set the PEBS counter reset value.
189 * 181 *
190 * Returns 0 on success; -Eerrno on error 182 * Returns 0 on success; -Eerrno on error
191 * 183 *
192 * task: the task to access; 184 * tracer: the tracer handle returned from ds_request_pebs()
193 * NULL to access the current cpu
194 * value: the new counter reset value 185 * value: the new counter reset value
195 */ 186 */
196extern int ds_set_pebs_reset(struct task_struct *task, u64 value); 187extern int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value);
197 188
198/* 189/*
199 * Initialization 190 * Initialization
@@ -206,17 +197,13 @@ extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *);
206/* 197/*
207 * The DS context - part of struct thread_struct. 198 * The DS context - part of struct thread_struct.
208 */ 199 */
200#define MAX_SIZEOF_DS (12 * 8)
201
209struct ds_context { 202struct ds_context {
210 /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */ 203 /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */
211 unsigned char *ds; 204 unsigned char ds[MAX_SIZEOF_DS];
212 /* the owner of the BTS and PEBS configuration, respectively */ 205 /* the owner of the BTS and PEBS configuration, respectively */
213 struct task_struct *owner[2]; 206 struct ds_tracer *owner[2];
214 /* buffer overflow notification function for BTS and PEBS */
215 ds_ovfl_callback_t callback[2];
216 /* the original buffer address */
217 void *buffer[2];
218 /* the number of allocated pages for on-request allocated buffers */
219 unsigned int pages[2];
220 /* use count */ 207 /* use count */
221 unsigned long count; 208 unsigned long count;
222 /* a pointer to the context location inside the thread_struct 209 /* a pointer to the context location inside the thread_struct
@@ -232,7 +219,8 @@ extern void ds_free(struct ds_context *context);
232 219
233#else /* CONFIG_X86_DS */ 220#else /* CONFIG_X86_DS */
234 221
235#define ds_init_intel(config) do {} while (0) 222struct cpuinfo_x86;
223static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {}
236 224
237#endif /* CONFIG_X86_DS */ 225#endif /* CONFIG_X86_DS */
238#endif /* _ASM_X86_DS_H */ 226#endif /* _ASM_X86_DS_H */
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 2bb43b433e07..754a3e082f94 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -29,7 +29,6 @@ struct dyn_arch_ftrace {
29#endif /* CONFIG_FUNCTION_TRACER */ 29#endif /* CONFIG_FUNCTION_TRACER */
30 30
31#ifdef CONFIG_FUNCTION_RET_TRACER 31#ifdef CONFIG_FUNCTION_RET_TRACER
32#define FTRACE_RET_STACK_SIZE 20
33 32
34#ifndef __ASSEMBLY__ 33#ifndef __ASSEMBLY__
35 34
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index e90e81ef6ab9..0921b4018c11 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -40,36 +40,8 @@ struct thread_info {
40 */ 40 */
41 __u8 supervisor_stack[0]; 41 __u8 supervisor_stack[0];
42#endif 42#endif
43
44#ifdef CONFIG_FUNCTION_RET_TRACER
45 /* Index of current stored adress in ret_stack */
46 int curr_ret_stack;
47 /* Stack of return addresses for return function tracing */
48 struct ftrace_ret_stack ret_stack[FTRACE_RET_STACK_SIZE];
49 /*
50 * Number of functions that haven't been traced
51 * because of depth overrun.
52 */
53 atomic_t trace_overrun;
54#endif
55}; 43};
56 44
57#ifdef CONFIG_FUNCTION_RET_TRACER
58#define INIT_THREAD_INFO(tsk) \
59{ \
60 .task = &tsk, \
61 .exec_domain = &default_exec_domain, \
62 .flags = 0, \
63 .cpu = 0, \
64 .preempt_count = 1, \
65 .addr_limit = KERNEL_DS, \
66 .restart_block = { \
67 .fn = do_no_restart_syscall, \
68 }, \
69 .curr_ret_stack = -1,\
70 .trace_overrun = ATOMIC_INIT(0) \
71}
72#else
73#define INIT_THREAD_INFO(tsk) \ 45#define INIT_THREAD_INFO(tsk) \
74{ \ 46{ \
75 .task = &tsk, \ 47 .task = &tsk, \
@@ -82,7 +54,6 @@ struct thread_info {
82 .fn = do_no_restart_syscall, \ 54 .fn = do_no_restart_syscall, \
83 }, \ 55 }, \
84} 56}
85#endif
86 57
87#define init_thread_info (init_thread_union.thread_info) 58#define init_thread_info (init_thread_union.thread_info)
88#define init_stack (init_thread_union.stack) 59#define init_stack (init_thread_union.stack)
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 1d8ed95da846..af2bc36ca1c4 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -46,7 +46,7 @@ obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
46obj-y += process.o 46obj-y += process.o
47obj-y += i387.o xsave.o 47obj-y += i387.o xsave.o
48obj-y += ptrace.o 48obj-y += ptrace.o
49obj-y += ds.o 49obj-$(CONFIG_X86_DS) += ds.o
50obj-$(CONFIG_X86_32) += tls.o 50obj-$(CONFIG_X86_32) += tls.o
51obj-$(CONFIG_IA32_EMULATION) += tls.o 51obj-$(CONFIG_IA32_EMULATION) += tls.o
52obj-y += step.o 52obj-y += step.o
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index cce0b6118d55..816f27f289b1 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -307,12 +307,11 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
307 set_cpu_cap(c, X86_FEATURE_P4); 307 set_cpu_cap(c, X86_FEATURE_P4);
308 if (c->x86 == 6) 308 if (c->x86 == 6)
309 set_cpu_cap(c, X86_FEATURE_P3); 309 set_cpu_cap(c, X86_FEATURE_P3);
310#endif
310 311
311 if (cpu_has_bts) 312 if (cpu_has_bts)
312 ptrace_bts_init_intel(c); 313 ptrace_bts_init_intel(c);
313 314
314#endif
315
316 detect_extended_topology(c); 315 detect_extended_topology(c);
317 if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { 316 if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) {
318 /* 317 /*
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index d1a121443bde..19a8c2c0389f 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -7,13 +7,12 @@
7 * 7 *
8 * It manages: 8 * It manages:
9 * - per-thread and per-cpu allocation of BTS and PEBS 9 * - per-thread and per-cpu allocation of BTS and PEBS
10 * - buffer memory allocation (optional) 10 * - buffer overflow handling (to be done)
11 * - buffer overflow handling
12 * - buffer access 11 * - buffer access
13 * 12 *
14 * It assumes: 13 * It assumes:
15 * - get_task_struct on all parameter tasks 14 * - get_task_struct on all traced tasks
16 * - current is allowed to trace parameter tasks 15 * - current is allowed to trace tasks
17 * 16 *
18 * 17 *
19 * Copyright (C) 2007-2008 Intel Corporation. 18 * Copyright (C) 2007-2008 Intel Corporation.
@@ -21,8 +20,6 @@
21 */ 20 */
22 21
23 22
24#ifdef CONFIG_X86_DS
25
26#include <asm/ds.h> 23#include <asm/ds.h>
27 24
28#include <linux/errno.h> 25#include <linux/errno.h>
@@ -30,6 +27,7 @@
30#include <linux/slab.h> 27#include <linux/slab.h>
31#include <linux/sched.h> 28#include <linux/sched.h>
32#include <linux/mm.h> 29#include <linux/mm.h>
30#include <linux/kernel.h>
33 31
34 32
35/* 33/*
@@ -46,6 +44,33 @@ struct ds_configuration {
46}; 44};
47static struct ds_configuration ds_cfg; 45static struct ds_configuration ds_cfg;
48 46
47/*
48 * A BTS or PEBS tracer.
49 *
50 * This holds the configuration of the tracer and serves as a handle
51 * to identify tracers.
52 */
53struct ds_tracer {
54 /* the DS context (partially) owned by this tracer */
55 struct ds_context *context;
56 /* the buffer provided on ds_request() and its size in bytes */
57 void *buffer;
58 size_t size;
59};
60
61struct bts_tracer {
62 /* the common DS part */
63 struct ds_tracer ds;
64 /* buffer overflow notification function */
65 bts_ovfl_callback_t ovfl;
66};
67
68struct pebs_tracer {
69 /* the common DS part */
70 struct ds_tracer ds;
71 /* buffer overflow notification function */
72 pebs_ovfl_callback_t ovfl;
73};
49 74
50/* 75/*
51 * Debug Store (DS) save area configuration (see Intel64 and IA32 76 * Debug Store (DS) save area configuration (see Intel64 and IA32
@@ -109,34 +134,13 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
109 (*(unsigned long *)base) = value; 134 (*(unsigned long *)base) = value;
110} 135}
111 136
137#define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */
112 138
113/*
114 * Locking is done only for allocating BTS or PEBS resources and for
115 * guarding context and buffer memory allocation.
116 *
117 * Most functions require the current task to own the ds context part
118 * they are going to access. All the locking is done when validating
119 * access to the context.
120 */
121static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock);
122 139
123/* 140/*
124 * Validate that the current task is allowed to access the BTS/PEBS 141 * Locking is done only for allocating BTS or PEBS resources.
125 * buffer of the parameter task.
126 *
127 * Returns 0, if access is granted; -Eerrno, otherwise.
128 */ 142 */
129static inline int ds_validate_access(struct ds_context *context, 143static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock);
130 enum ds_qualifier qual)
131{
132 if (!context)
133 return -EPERM;
134
135 if (context->owner[qual] == current)
136 return 0;
137
138 return -EPERM;
139}
140 144
141 145
142/* 146/*
@@ -185,96 +189,43 @@ static inline int check_tracer(struct task_struct *task)
185 * 189 *
186 * Contexts are use-counted. They are allocated on first access and 190 * Contexts are use-counted. They are allocated on first access and
187 * deallocated when the last user puts the context. 191 * deallocated when the last user puts the context.
188 *
189 * We distinguish between an allocating and a non-allocating get of a
190 * context:
191 * - the allocating get is used for requesting BTS/PEBS resources. It
192 * requires the caller to hold the global ds_lock.
193 * - the non-allocating get is used for all other cases. A
194 * non-existing context indicates an error. It acquires and releases
195 * the ds_lock itself for obtaining the context.
196 *
197 * A context and its DS configuration are allocated and deallocated
198 * together. A context always has a DS configuration of the
199 * appropriate size.
200 */ 192 */
201static DEFINE_PER_CPU(struct ds_context *, system_context); 193static DEFINE_PER_CPU(struct ds_context *, system_context);
202 194
203#define this_system_context per_cpu(system_context, smp_processor_id()) 195#define this_system_context per_cpu(system_context, smp_processor_id())
204 196
205/*
206 * Returns the pointer to the parameter task's context or to the
207 * system-wide context, if task is NULL.
208 *
209 * Increases the use count of the returned context, if not NULL.
210 */
211static inline struct ds_context *ds_get_context(struct task_struct *task) 197static inline struct ds_context *ds_get_context(struct task_struct *task)
212{ 198{
213 struct ds_context *context;
214
215 spin_lock(&ds_lock);
216
217 context = (task ? task->thread.ds_ctx : this_system_context);
218 if (context)
219 context->count++;
220
221 spin_unlock(&ds_lock);
222
223 return context;
224}
225
226/*
227 * Same as ds_get_context, but allocates the context and it's DS
228 * structure, if necessary; returns NULL; if out of memory.
229 *
230 * pre: requires ds_lock to be held
231 */
232static inline struct ds_context *ds_alloc_context(struct task_struct *task)
233{
234 struct ds_context **p_context = 199 struct ds_context **p_context =
235 (task ? &task->thread.ds_ctx : &this_system_context); 200 (task ? &task->thread.ds_ctx : &this_system_context);
236 struct ds_context *context = *p_context; 201 struct ds_context *context = *p_context;
202 unsigned long irq;
237 203
238 if (!context) { 204 if (!context) {
239 spin_unlock(&ds_lock);
240
241 context = kzalloc(sizeof(*context), GFP_KERNEL); 205 context = kzalloc(sizeof(*context), GFP_KERNEL);
242 206 if (!context)
243 if (!context) {
244 spin_lock(&ds_lock);
245 return NULL; 207 return NULL;
246 }
247 208
248 context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); 209 spin_lock_irqsave(&ds_lock, irq);
249 if (!context->ds) {
250 kfree(context);
251 spin_lock(&ds_lock);
252 return NULL;
253 }
254 210
255 spin_lock(&ds_lock);
256 /*
257 * Check for race - another CPU could have allocated
258 * it meanwhile:
259 */
260 if (*p_context) { 211 if (*p_context) {
261 kfree(context->ds);
262 kfree(context); 212 kfree(context);
263 return *p_context;
264 }
265 213
266 *p_context = context; 214 context = *p_context;
215 } else {
216 *p_context = context;
267 217
268 context->this = p_context; 218 context->this = p_context;
269 context->task = task; 219 context->task = task;
270 220
271 if (task) 221 if (task)
272 set_tsk_thread_flag(task, TIF_DS_AREA_MSR); 222 set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
273 223
274 if (!task || (task == current)) 224 if (!task || (task == current))
275 wrmsr(MSR_IA32_DS_AREA, (unsigned long)context->ds, 0); 225 wrmsrl(MSR_IA32_DS_AREA,
276 226 (unsigned long)context->ds);
277 get_tracer(task); 227 }
228 spin_unlock_irqrestore(&ds_lock, irq);
278 } 229 }
279 230
280 context->count++; 231 context->count++;
@@ -282,16 +233,14 @@ static inline struct ds_context *ds_alloc_context(struct task_struct *task)
282 return context; 233 return context;
283} 234}
284 235
285/*
286 * Decreases the use count of the parameter context, if not NULL.
287 * Deallocates the context, if the use count reaches zero.
288 */
289static inline void ds_put_context(struct ds_context *context) 236static inline void ds_put_context(struct ds_context *context)
290{ 237{
238 unsigned long irq;
239
291 if (!context) 240 if (!context)
292 return; 241 return;
293 242
294 spin_lock(&ds_lock); 243 spin_lock_irqsave(&ds_lock, irq);
295 244
296 if (--context->count) 245 if (--context->count)
297 goto out; 246 goto out;
@@ -304,352 +253,351 @@ static inline void ds_put_context(struct ds_context *context)
304 if (!context->task || (context->task == current)) 253 if (!context->task || (context->task == current))
305 wrmsrl(MSR_IA32_DS_AREA, 0); 254 wrmsrl(MSR_IA32_DS_AREA, 0);
306 255
307 put_tracer(context->task);
308
309 /* free any leftover buffers from tracers that did not
310 * deallocate them properly. */
311 kfree(context->buffer[ds_bts]);
312 kfree(context->buffer[ds_pebs]);
313 kfree(context->ds);
314 kfree(context); 256 kfree(context);
315 out: 257 out:
316 spin_unlock(&ds_lock); 258 spin_unlock_irqrestore(&ds_lock, irq);
317} 259}
318 260
319 261
320/* 262/*
321 * Handle a buffer overflow 263 * Handle a buffer overflow
322 * 264 *
323 * task: the task whose buffers are overflowing;
324 * NULL for a buffer overflow on the current cpu
325 * context: the ds context 265 * context: the ds context
326 * qual: the buffer type 266 * qual: the buffer type
327 */ 267 */
328static void ds_overflow(struct task_struct *task, struct ds_context *context, 268static void ds_overflow(struct ds_context *context, enum ds_qualifier qual)
329 enum ds_qualifier qual) 269{
330{ 270 switch (qual) {
331 if (!context) 271 case ds_bts: {
332 return; 272 struct bts_tracer *tracer =
333 273 container_of(context->owner[qual],
334 if (context->callback[qual]) 274 struct bts_tracer, ds);
335 (*context->callback[qual])(task); 275 if (tracer->ovfl)
336 276 tracer->ovfl(tracer);
337 /* todo: do some more overflow handling */ 277 }
278 break;
279 case ds_pebs: {
280 struct pebs_tracer *tracer =
281 container_of(context->owner[qual],
282 struct pebs_tracer, ds);
283 if (tracer->ovfl)
284 tracer->ovfl(tracer);
285 }
286 break;
287 }
338} 288}
339 289
340 290
341/* 291static void ds_install_ds_config(struct ds_context *context,
342 * Allocate a non-pageable buffer of the parameter size. 292 enum ds_qualifier qual,
343 * Checks the memory and the locked memory rlimit. 293 void *base, size_t size, size_t ith)
344 *
345 * Returns the buffer, if successful;
346 * NULL, if out of memory or rlimit exceeded.
347 *
348 * size: the requested buffer size in bytes
349 * pages (out): if not NULL, contains the number of pages reserved
350 */
351static inline void *ds_allocate_buffer(size_t size, unsigned int *pages)
352{ 294{
353 unsigned long rlim, vm, pgsz; 295 unsigned long buffer, adj;
354 void *buffer;
355
356 pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
357
358 rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
359 vm = current->mm->total_vm + pgsz;
360 if (rlim < vm)
361 return NULL;
362 296
363 rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; 297 /* adjust the buffer address and size to meet alignment
364 vm = current->mm->locked_vm + pgsz; 298 * constraints:
365 if (rlim < vm) 299 * - buffer is double-word aligned
366 return NULL; 300 * - size is multiple of record size
301 *
302 * We checked the size at the very beginning; we have enough
303 * space to do the adjustment.
304 */
305 buffer = (unsigned long)base;
367 306
368 buffer = kzalloc(size, GFP_KERNEL); 307 adj = ALIGN(buffer, DS_ALIGNMENT) - buffer;
369 if (!buffer) 308 buffer += adj;
370 return NULL; 309 size -= adj;
371 310
372 current->mm->total_vm += pgsz; 311 size /= ds_cfg.sizeof_rec[qual];
373 current->mm->locked_vm += pgsz; 312 size *= ds_cfg.sizeof_rec[qual];
374 313
375 if (pages) 314 ds_set(context->ds, qual, ds_buffer_base, buffer);
376 *pages = pgsz; 315 ds_set(context->ds, qual, ds_index, buffer);
316 ds_set(context->ds, qual, ds_absolute_maximum, buffer + size);
377 317
378 return buffer; 318 /* The value for 'no threshold' is -1, which will set the
319 * threshold outside of the buffer, just like we want it.
320 */
321 ds_set(context->ds, qual,
322 ds_interrupt_threshold, buffer + size - ith);
379} 323}
380 324
381static int ds_request(struct task_struct *task, void *base, size_t size, 325static int ds_request(struct ds_tracer *tracer, enum ds_qualifier qual,
382 ds_ovfl_callback_t ovfl, enum ds_qualifier qual) 326 struct task_struct *task,
327 void *base, size_t size, size_t th)
383{ 328{
384 struct ds_context *context; 329 struct ds_context *context;
385 unsigned long buffer, adj; 330 unsigned long irq;
386 const unsigned long alignment = (1 << 3); 331 int error;
387 int error = 0;
388 332
333 error = -EOPNOTSUPP;
389 if (!ds_cfg.sizeof_ds) 334 if (!ds_cfg.sizeof_ds)
390 return -EOPNOTSUPP; 335 goto out;
336
337 error = -EINVAL;
338 if (!base)
339 goto out;
391 340
392 /* we require some space to do alignment adjustments below */ 341 /* we require some space to do alignment adjustments below */
393 if (size < (alignment + ds_cfg.sizeof_rec[qual])) 342 error = -EINVAL;
394 return -EINVAL; 343 if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual]))
344 goto out;
395 345
396 /* buffer overflow notification is not yet implemented */ 346 if (th != (size_t)-1) {
397 if (ovfl) 347 th *= ds_cfg.sizeof_rec[qual];
398 return -EOPNOTSUPP;
399 348
349 error = -EINVAL;
350 if (size <= th)
351 goto out;
352 }
400 353
401 spin_lock(&ds_lock); 354 tracer->buffer = base;
355 tracer->size = size;
402 356
403 error = -ENOMEM; 357 error = -ENOMEM;
404 context = ds_alloc_context(task); 358 context = ds_get_context(task);
405 if (!context) 359 if (!context)
406 goto out_unlock; 360 goto out;
361 tracer->context = context;
362
363
364 spin_lock_irqsave(&ds_lock, irq);
407 365
408 error = -EPERM; 366 error = -EPERM;
409 if (!check_tracer(task)) 367 if (!check_tracer(task))
410 goto out_unlock; 368 goto out_unlock;
369 get_tracer(task);
411 370
412 error = -EALREADY;
413 if (context->owner[qual] == current)
414 goto out_unlock;
415 error = -EPERM; 371 error = -EPERM;
416 if (context->owner[qual] != NULL) 372 if (context->owner[qual])
417 goto out_unlock; 373 goto out_put_tracer;
418 context->owner[qual] = current; 374 context->owner[qual] = tracer;
419
420 spin_unlock(&ds_lock);
421
422
423 error = -ENOMEM;
424 if (!base) {
425 base = ds_allocate_buffer(size, &context->pages[qual]);
426 if (!base)
427 goto out_release;
428
429 context->buffer[qual] = base;
430 }
431 error = 0;
432
433 context->callback[qual] = ovfl;
434 375
435 /* adjust the buffer address and size to meet alignment 376 spin_unlock_irqrestore(&ds_lock, irq);
436 * constraints:
437 * - buffer is double-word aligned
438 * - size is multiple of record size
439 *
440 * We checked the size at the very beginning; we have enough
441 * space to do the adjustment.
442 */
443 buffer = (unsigned long)base;
444
445 adj = ALIGN(buffer, alignment) - buffer;
446 buffer += adj;
447 size -= adj;
448
449 size /= ds_cfg.sizeof_rec[qual];
450 size *= ds_cfg.sizeof_rec[qual];
451
452 ds_set(context->ds, qual, ds_buffer_base, buffer);
453 ds_set(context->ds, qual, ds_index, buffer);
454 ds_set(context->ds, qual, ds_absolute_maximum, buffer + size);
455 377
456 if (ovfl) {
457 /* todo: select a suitable interrupt threshold */
458 } else
459 ds_set(context->ds, qual,
460 ds_interrupt_threshold, buffer + size + 1);
461 378
462 /* we keep the context until ds_release */ 379 ds_install_ds_config(context, qual, base, size, th);
463 return error;
464 380
465 out_release: 381 return 0;
466 context->owner[qual] = NULL;
467 ds_put_context(context);
468 return error;
469 382
383 out_put_tracer:
384 put_tracer(task);
470 out_unlock: 385 out_unlock:
471 spin_unlock(&ds_lock); 386 spin_unlock_irqrestore(&ds_lock, irq);
472 ds_put_context(context); 387 ds_put_context(context);
388 tracer->context = NULL;
389 out:
473 return error; 390 return error;
474} 391}
475 392
476int ds_request_bts(struct task_struct *task, void *base, size_t size, 393struct bts_tracer *ds_request_bts(struct task_struct *task,
477 ds_ovfl_callback_t ovfl) 394 void *base, size_t size,
395 bts_ovfl_callback_t ovfl, size_t th)
478{ 396{
479 return ds_request(task, base, size, ovfl, ds_bts); 397 struct bts_tracer *tracer;
480} 398 int error;
481 399
482int ds_request_pebs(struct task_struct *task, void *base, size_t size, 400 /* buffer overflow notification is not yet implemented */
483 ds_ovfl_callback_t ovfl) 401 error = -EOPNOTSUPP;
484{ 402 if (ovfl)
485 return ds_request(task, base, size, ovfl, ds_pebs); 403 goto out;
404
405 error = -ENOMEM;
406 tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
407 if (!tracer)
408 goto out;
409 tracer->ovfl = ovfl;
410
411 error = ds_request(&tracer->ds, ds_bts, task, base, size, th);
412 if (error < 0)
413 goto out_tracer;
414
415 return tracer;
416
417 out_tracer:
418 kfree(tracer);
419 out:
420 return ERR_PTR(error);
486} 421}
487 422
488static int ds_release(struct task_struct *task, enum ds_qualifier qual) 423struct pebs_tracer *ds_request_pebs(struct task_struct *task,
424 void *base, size_t size,
425 pebs_ovfl_callback_t ovfl, size_t th)
489{ 426{
490 struct ds_context *context; 427 struct pebs_tracer *tracer;
491 int error; 428 int error;
492 429
493 context = ds_get_context(task); 430 /* buffer overflow notification is not yet implemented */
494 error = ds_validate_access(context, qual); 431 error = -EOPNOTSUPP;
495 if (error < 0) 432 if (ovfl)
496 goto out; 433 goto out;
497 434
498 kfree(context->buffer[qual]); 435 error = -ENOMEM;
499 context->buffer[qual] = NULL; 436 tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
437 if (!tracer)
438 goto out;
439 tracer->ovfl = ovfl;
500 440
501 current->mm->total_vm -= context->pages[qual]; 441 error = ds_request(&tracer->ds, ds_pebs, task, base, size, th);
502 current->mm->locked_vm -= context->pages[qual]; 442 if (error < 0)
503 context->pages[qual] = 0; 443 goto out_tracer;
504 context->owner[qual] = NULL;
505 444
506 /* 445 return tracer;
507 * we put the context twice: 446
508 * once for the ds_get_context 447 out_tracer:
509 * once for the corresponding ds_request 448 kfree(tracer);
510 */
511 ds_put_context(context);
512 out: 449 out:
513 ds_put_context(context); 450 return ERR_PTR(error);
514 return error;
515} 451}
516 452
517int ds_release_bts(struct task_struct *task) 453static void ds_release(struct ds_tracer *tracer, enum ds_qualifier qual)
518{ 454{
519 return ds_release(task, ds_bts); 455 BUG_ON(tracer->context->owner[qual] != tracer);
456 tracer->context->owner[qual] = NULL;
457
458 put_tracer(tracer->context->task);
459 ds_put_context(tracer->context);
520} 460}
521 461
522int ds_release_pebs(struct task_struct *task) 462int ds_release_bts(struct bts_tracer *tracer)
523{ 463{
524 return ds_release(task, ds_pebs); 464 if (!tracer)
465 return -EINVAL;
466
467 ds_release(&tracer->ds, ds_bts);
468 kfree(tracer);
469
470 return 0;
525} 471}
526 472
527static int ds_get_index(struct task_struct *task, size_t *pos, 473int ds_release_pebs(struct pebs_tracer *tracer)
528 enum ds_qualifier qual)
529{ 474{
530 struct ds_context *context; 475 if (!tracer)
531 unsigned long base, index; 476 return -EINVAL;
532 int error;
533 477
534 context = ds_get_context(task); 478 ds_release(&tracer->ds, ds_pebs);
535 error = ds_validate_access(context, qual); 479 kfree(tracer);
536 if (error < 0) 480
537 goto out; 481 return 0;
482}
483
484static size_t ds_get_index(struct ds_context *context, enum ds_qualifier qual)
485{
486 unsigned long base, index;
538 487
539 base = ds_get(context->ds, qual, ds_buffer_base); 488 base = ds_get(context->ds, qual, ds_buffer_base);
540 index = ds_get(context->ds, qual, ds_index); 489 index = ds_get(context->ds, qual, ds_index);
541 490
542 error = ((index - base) / ds_cfg.sizeof_rec[qual]); 491 return (index - base) / ds_cfg.sizeof_rec[qual];
543 if (pos)
544 *pos = error;
545 out:
546 ds_put_context(context);
547 return error;
548} 492}
549 493
550int ds_get_bts_index(struct task_struct *task, size_t *pos) 494int ds_get_bts_index(struct bts_tracer *tracer, size_t *pos)
551{ 495{
552 return ds_get_index(task, pos, ds_bts); 496 if (!tracer)
497 return -EINVAL;
498
499 if (!pos)
500 return -EINVAL;
501
502 *pos = ds_get_index(tracer->ds.context, ds_bts);
503
504 return 0;
553} 505}
554 506
555int ds_get_pebs_index(struct task_struct *task, size_t *pos) 507int ds_get_pebs_index(struct pebs_tracer *tracer, size_t *pos)
556{ 508{
557 return ds_get_index(task, pos, ds_pebs); 509 if (!tracer)
510 return -EINVAL;
511
512 if (!pos)
513 return -EINVAL;
514
515 *pos = ds_get_index(tracer->ds.context, ds_pebs);
516
517 return 0;
558} 518}
559 519
560static int ds_get_end(struct task_struct *task, size_t *pos, 520static size_t ds_get_end(struct ds_context *context, enum ds_qualifier qual)
561 enum ds_qualifier qual)
562{ 521{
563 struct ds_context *context; 522 unsigned long base, max;
564 unsigned long base, end;
565 int error;
566
567 context = ds_get_context(task);
568 error = ds_validate_access(context, qual);
569 if (error < 0)
570 goto out;
571 523
572 base = ds_get(context->ds, qual, ds_buffer_base); 524 base = ds_get(context->ds, qual, ds_buffer_base);
573 end = ds_get(context->ds, qual, ds_absolute_maximum); 525 max = ds_get(context->ds, qual, ds_absolute_maximum);
574 526
575 error = ((end - base) / ds_cfg.sizeof_rec[qual]); 527 return (max - base) / ds_cfg.sizeof_rec[qual];
576 if (pos)
577 *pos = error;
578 out:
579 ds_put_context(context);
580 return error;
581} 528}
582 529
583int ds_get_bts_end(struct task_struct *task, size_t *pos) 530int ds_get_bts_end(struct bts_tracer *tracer, size_t *pos)
584{ 531{
585 return ds_get_end(task, pos, ds_bts); 532 if (!tracer)
533 return -EINVAL;
534
535 if (!pos)
536 return -EINVAL;
537
538 *pos = ds_get_end(tracer->ds.context, ds_bts);
539
540 return 0;
586} 541}
587 542
588int ds_get_pebs_end(struct task_struct *task, size_t *pos) 543int ds_get_pebs_end(struct pebs_tracer *tracer, size_t *pos)
589{ 544{
590 return ds_get_end(task, pos, ds_pebs); 545 if (!tracer)
546 return -EINVAL;
547
548 if (!pos)
549 return -EINVAL;
550
551 *pos = ds_get_end(tracer->ds.context, ds_pebs);
552
553 return 0;
591} 554}
592 555
593static int ds_access(struct task_struct *task, size_t index, 556static int ds_access(struct ds_context *context, enum ds_qualifier qual,
594 const void **record, enum ds_qualifier qual) 557 size_t index, const void **record)
595{ 558{
596 struct ds_context *context;
597 unsigned long base, idx; 559 unsigned long base, idx;
598 int error;
599 560
600 if (!record) 561 if (!record)
601 return -EINVAL; 562 return -EINVAL;
602 563
603 context = ds_get_context(task);
604 error = ds_validate_access(context, qual);
605 if (error < 0)
606 goto out;
607
608 base = ds_get(context->ds, qual, ds_buffer_base); 564 base = ds_get(context->ds, qual, ds_buffer_base);
609 idx = base + (index * ds_cfg.sizeof_rec[qual]); 565 idx = base + (index * ds_cfg.sizeof_rec[qual]);
610 566
611 error = -EINVAL;
612 if (idx > ds_get(context->ds, qual, ds_absolute_maximum)) 567 if (idx > ds_get(context->ds, qual, ds_absolute_maximum))
613 goto out; 568 return -EINVAL;
614 569
615 *record = (const void *)idx; 570 *record = (const void *)idx;
616 error = ds_cfg.sizeof_rec[qual]; 571
617 out: 572 return ds_cfg.sizeof_rec[qual];
618 ds_put_context(context);
619 return error;
620} 573}
621 574
622int ds_access_bts(struct task_struct *task, size_t index, const void **record) 575int ds_access_bts(struct bts_tracer *tracer, size_t index,
576 const void **record)
623{ 577{
624 return ds_access(task, index, record, ds_bts); 578 if (!tracer)
579 return -EINVAL;
580
581 return ds_access(tracer->ds.context, ds_bts, index, record);
625} 582}
626 583
627int ds_access_pebs(struct task_struct *task, size_t index, const void **record) 584int ds_access_pebs(struct pebs_tracer *tracer, size_t index,
585 const void **record)
628{ 586{
629 return ds_access(task, index, record, ds_pebs); 587 if (!tracer)
588 return -EINVAL;
589
590 return ds_access(tracer->ds.context, ds_pebs, index, record);
630} 591}
631 592
632static int ds_write(struct task_struct *task, const void *record, size_t size, 593static int ds_write(struct ds_context *context, enum ds_qualifier qual,
633 enum ds_qualifier qual, int force) 594 const void *record, size_t size)
634{ 595{
635 struct ds_context *context; 596 int bytes_written = 0;
636 int error;
637 597
638 if (!record) 598 if (!record)
639 return -EINVAL; 599 return -EINVAL;
640 600
641 error = -EPERM;
642 context = ds_get_context(task);
643 if (!context)
644 goto out;
645
646 if (!force) {
647 error = ds_validate_access(context, qual);
648 if (error < 0)
649 goto out;
650 }
651
652 error = 0;
653 while (size) { 601 while (size) {
654 unsigned long base, index, end, write_end, int_th; 602 unsigned long base, index, end, write_end, int_th;
655 unsigned long write_size, adj_write_size; 603 unsigned long write_size, adj_write_size;
@@ -677,14 +625,14 @@ static int ds_write(struct task_struct *task, const void *record, size_t size,
677 write_end = end; 625 write_end = end;
678 626
679 if (write_end <= index) 627 if (write_end <= index)
680 goto out; 628 break;
681 629
682 write_size = min((unsigned long) size, write_end - index); 630 write_size = min((unsigned long) size, write_end - index);
683 memcpy((void *)index, record, write_size); 631 memcpy((void *)index, record, write_size);
684 632
685 record = (const char *)record + write_size; 633 record = (const char *)record + write_size;
686 size -= write_size; 634 size -= write_size;
687 error += write_size; 635 bytes_written += write_size;
688 636
689 adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; 637 adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
690 adj_write_size *= ds_cfg.sizeof_rec[qual]; 638 adj_write_size *= ds_cfg.sizeof_rec[qual];
@@ -699,47 +647,32 @@ static int ds_write(struct task_struct *task, const void *record, size_t size,
699 ds_set(context->ds, qual, ds_index, index); 647 ds_set(context->ds, qual, ds_index, index);
700 648
701 if (index >= int_th) 649 if (index >= int_th)
702 ds_overflow(task, context, qual); 650 ds_overflow(context, qual);
703 } 651 }
704 652
705 out: 653 return bytes_written;
706 ds_put_context(context);
707 return error;
708} 654}
709 655
710int ds_write_bts(struct task_struct *task, const void *record, size_t size) 656int ds_write_bts(struct bts_tracer *tracer, const void *record, size_t size)
711{ 657{
712 return ds_write(task, record, size, ds_bts, /* force = */ 0); 658 if (!tracer)
713} 659 return -EINVAL;
714 660
715int ds_write_pebs(struct task_struct *task, const void *record, size_t size) 661 return ds_write(tracer->ds.context, ds_bts, record, size);
716{
717 return ds_write(task, record, size, ds_pebs, /* force = */ 0);
718} 662}
719 663
720int ds_unchecked_write_bts(struct task_struct *task, 664int ds_write_pebs(struct pebs_tracer *tracer, const void *record, size_t size)
721 const void *record, size_t size)
722{ 665{
723 return ds_write(task, record, size, ds_bts, /* force = */ 1); 666 if (!tracer)
724} 667 return -EINVAL;
725 668
726int ds_unchecked_write_pebs(struct task_struct *task, 669 return ds_write(tracer->ds.context, ds_pebs, record, size);
727 const void *record, size_t size)
728{
729 return ds_write(task, record, size, ds_pebs, /* force = */ 1);
730} 670}
731 671
732static int ds_reset_or_clear(struct task_struct *task, 672static void ds_reset_or_clear(struct ds_context *context,
733 enum ds_qualifier qual, int clear) 673 enum ds_qualifier qual, int clear)
734{ 674{
735 struct ds_context *context;
736 unsigned long base, end; 675 unsigned long base, end;
737 int error;
738
739 context = ds_get_context(task);
740 error = ds_validate_access(context, qual);
741 if (error < 0)
742 goto out;
743 676
744 base = ds_get(context->ds, qual, ds_buffer_base); 677 base = ds_get(context->ds, qual, ds_buffer_base);
745 end = ds_get(context->ds, qual, ds_absolute_maximum); 678 end = ds_get(context->ds, qual, ds_absolute_maximum);
@@ -748,89 +681,100 @@ static int ds_reset_or_clear(struct task_struct *task,
748 memset((void *)base, 0, end - base); 681 memset((void *)base, 0, end - base);
749 682
750 ds_set(context->ds, qual, ds_index, base); 683 ds_set(context->ds, qual, ds_index, base);
751
752 error = 0;
753 out:
754 ds_put_context(context);
755 return error;
756} 684}
757 685
758int ds_reset_bts(struct task_struct *task) 686int ds_reset_bts(struct bts_tracer *tracer)
759{ 687{
760 return ds_reset_or_clear(task, ds_bts, /* clear = */ 0); 688 if (!tracer)
689 return -EINVAL;
690
691 ds_reset_or_clear(tracer->ds.context, ds_bts, /* clear = */ 0);
692
693 return 0;
761} 694}
762 695
763int ds_reset_pebs(struct task_struct *task) 696int ds_reset_pebs(struct pebs_tracer *tracer)
764{ 697{
765 return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0); 698 if (!tracer)
699 return -EINVAL;
700
701 ds_reset_or_clear(tracer->ds.context, ds_pebs, /* clear = */ 0);
702
703 return 0;
766} 704}
767 705
768int ds_clear_bts(struct task_struct *task) 706int ds_clear_bts(struct bts_tracer *tracer)
769{ 707{
770 return ds_reset_or_clear(task, ds_bts, /* clear = */ 1); 708 if (!tracer)
709 return -EINVAL;
710
711 ds_reset_or_clear(tracer->ds.context, ds_bts, /* clear = */ 1);
712
713 return 0;
771} 714}
772 715
773int ds_clear_pebs(struct task_struct *task) 716int ds_clear_pebs(struct pebs_tracer *tracer)
774{ 717{
775 return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1); 718 if (!tracer)
719 return -EINVAL;
720
721 ds_reset_or_clear(tracer->ds.context, ds_pebs, /* clear = */ 1);
722
723 return 0;
776} 724}
777 725
778int ds_get_pebs_reset(struct task_struct *task, u64 *value) 726int ds_get_pebs_reset(struct pebs_tracer *tracer, u64 *value)
779{ 727{
780 struct ds_context *context; 728 if (!tracer)
781 int error; 729 return -EINVAL;
782 730
783 if (!value) 731 if (!value)
784 return -EINVAL; 732 return -EINVAL;
785 733
786 context = ds_get_context(task); 734 *value = *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8));
787 error = ds_validate_access(context, ds_pebs);
788 if (error < 0)
789 goto out;
790
791 *value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8));
792 735
793 error = 0; 736 return 0;
794 out:
795 ds_put_context(context);
796 return error;
797} 737}
798 738
799int ds_set_pebs_reset(struct task_struct *task, u64 value) 739int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value)
800{ 740{
801 struct ds_context *context; 741 if (!tracer)
802 int error; 742 return -EINVAL;
803
804 context = ds_get_context(task);
805 error = ds_validate_access(context, ds_pebs);
806 if (error < 0)
807 goto out;
808 743
809 *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value; 744 *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value;
810 745
811 error = 0; 746 return 0;
812 out:
813 ds_put_context(context);
814 return error;
815} 747}
816 748
817static const struct ds_configuration ds_cfg_var = { 749static const struct ds_configuration ds_cfg_var = {
818 .sizeof_ds = sizeof(long) * 12, 750 .sizeof_ds = sizeof(long) * 12,
819 .sizeof_field = sizeof(long), 751 .sizeof_field = sizeof(long),
820 .sizeof_rec[ds_bts] = sizeof(long) * 3, 752 .sizeof_rec[ds_bts] = sizeof(long) * 3,
753#ifdef __i386__
821 .sizeof_rec[ds_pebs] = sizeof(long) * 10 754 .sizeof_rec[ds_pebs] = sizeof(long) * 10
755#else
756 .sizeof_rec[ds_pebs] = sizeof(long) * 18
757#endif
822}; 758};
823static const struct ds_configuration ds_cfg_64 = { 759static const struct ds_configuration ds_cfg_64 = {
824 .sizeof_ds = 8 * 12, 760 .sizeof_ds = 8 * 12,
825 .sizeof_field = 8, 761 .sizeof_field = 8,
826 .sizeof_rec[ds_bts] = 8 * 3, 762 .sizeof_rec[ds_bts] = 8 * 3,
763#ifdef __i386__
827 .sizeof_rec[ds_pebs] = 8 * 10 764 .sizeof_rec[ds_pebs] = 8 * 10
765#else
766 .sizeof_rec[ds_pebs] = 8 * 18
767#endif
828}; 768};
829 769
830static inline void 770static inline void
831ds_configure(const struct ds_configuration *cfg) 771ds_configure(const struct ds_configuration *cfg)
832{ 772{
833 ds_cfg = *cfg; 773 ds_cfg = *cfg;
774
775 printk(KERN_INFO "DS available\n");
776
777 BUG_ON(MAX_SIZEOF_DS < ds_cfg.sizeof_ds);
834} 778}
835 779
836void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) 780void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
@@ -838,17 +782,16 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
838 switch (c->x86) { 782 switch (c->x86) {
839 case 0x6: 783 case 0x6:
840 switch (c->x86_model) { 784 switch (c->x86_model) {
785 case 0 ... 0xC:
786 /* sorry, don't know about them */
787 break;
841 case 0xD: 788 case 0xD:
842 case 0xE: /* Pentium M */ 789 case 0xE: /* Pentium M */
843 ds_configure(&ds_cfg_var); 790 ds_configure(&ds_cfg_var);
844 break; 791 break;
845 case 0xF: /* Core2 */ 792 default: /* Core2, Atom, ... */
846 case 0x1C: /* Atom */
847 ds_configure(&ds_cfg_64); 793 ds_configure(&ds_cfg_64);
848 break; 794 break;
849 default:
850 /* sorry, don't know about them */
851 break;
852 } 795 }
853 break; 796 break;
854 case 0xF: 797 case 0xF:
@@ -875,7 +818,8 @@ void ds_free(struct ds_context *context)
875 * is dying. There should not be any user of that context left 818 * is dying. There should not be any user of that context left
876 * to disturb us, anymore. */ 819 * to disturb us, anymore. */
877 unsigned long leftovers = context->count; 820 unsigned long leftovers = context->count;
878 while (leftovers--) 821 while (leftovers--) {
822 put_tracer(context->task);
879 ds_put_context(context); 823 ds_put_context(context);
824 }
880} 825}
881#endif /* CONFIG_X86_DS */
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 356bb1eb6e9a..bb137f7297ed 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -350,19 +350,21 @@ static int push_return_trace(unsigned long ret, unsigned long long time,
350 unsigned long func) 350 unsigned long func)
351{ 351{
352 int index; 352 int index;
353 struct thread_info *ti = current_thread_info(); 353
354 if (!current->ret_stack)
355 return -EBUSY;
354 356
355 /* The return trace stack is full */ 357 /* The return trace stack is full */
356 if (ti->curr_ret_stack == FTRACE_RET_STACK_SIZE - 1) { 358 if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
357 atomic_inc(&ti->trace_overrun); 359 atomic_inc(&current->trace_overrun);
358 return -EBUSY; 360 return -EBUSY;
359 } 361 }
360 362
361 index = ++ti->curr_ret_stack; 363 index = ++current->curr_ret_stack;
362 barrier(); 364 barrier();
363 ti->ret_stack[index].ret = ret; 365 current->ret_stack[index].ret = ret;
364 ti->ret_stack[index].func = func; 366 current->ret_stack[index].func = func;
365 ti->ret_stack[index].calltime = time; 367 current->ret_stack[index].calltime = time;
366 368
367 return 0; 369 return 0;
368} 370}
@@ -373,13 +375,12 @@ static void pop_return_trace(unsigned long *ret, unsigned long long *time,
373{ 375{
374 int index; 376 int index;
375 377
376 struct thread_info *ti = current_thread_info(); 378 index = current->curr_ret_stack;
377 index = ti->curr_ret_stack; 379 *ret = current->ret_stack[index].ret;
378 *ret = ti->ret_stack[index].ret; 380 *func = current->ret_stack[index].func;
379 *func = ti->ret_stack[index].func; 381 *time = current->ret_stack[index].calltime;
380 *time = ti->ret_stack[index].calltime; 382 *overrun = atomic_read(&current->trace_overrun);
381 *overrun = atomic_read(&ti->trace_overrun); 383 current->curr_ret_stack--;
382 ti->curr_ret_stack--;
383} 384}
384 385
385/* 386/*
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 1f20608d4ca8..b0f61f0dcd0a 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -58,7 +58,7 @@ void __cpuinit mxcsr_feature_mask_init(void)
58 stts(); 58 stts();
59} 59}
60 60
61void __init init_thread_xstate(void) 61void __cpuinit init_thread_xstate(void)
62{ 62{
63 if (!HAVE_HWFP) { 63 if (!HAVE_HWFP) {
64 xstate_size = sizeof(struct i387_soft_struct); 64 xstate_size = sizeof(struct i387_soft_struct);
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index c9513e1ff28d..1fec0f9b1508 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -3608,27 +3608,7 @@ int __init io_apic_get_redir_entries (int ioapic)
3608 3608
3609int __init probe_nr_irqs(void) 3609int __init probe_nr_irqs(void)
3610{ 3610{
3611 int idx; 3611 return NR_IRQS;
3612 int nr = 0;
3613#ifndef CONFIG_XEN
3614 int nr_min = 32;
3615#else
3616 int nr_min = NR_IRQS;
3617#endif
3618
3619 for (idx = 0; idx < nr_ioapics; idx++)
3620 nr += io_apic_get_redir_entries(idx) + 1;
3621
3622 /* double it for hotplug and msi and nmi */
3623 nr <<= 1;
3624
3625 /* something wrong ? */
3626 if (nr < nr_min)
3627 nr = nr_min;
3628 if (WARN_ON(nr > NR_IRQS))
3629 nr = NR_IRQS;
3630
3631 return nr;
3632} 3612}
3633 3613
3634/* -------------------------------------------------------------------------- 3614/* --------------------------------------------------------------------------
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index e1e731d78f38..d28bbdc35e4e 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -1567,7 +1567,7 @@ static int __init calgary_parse_options(char *p)
1567 ++p; 1567 ++p;
1568 if (*p == '\0') 1568 if (*p == '\0')
1569 break; 1569 break;
1570 bridge = simple_strtol(p, &endp, 0); 1570 bridge = simple_strtoul(p, &endp, 0);
1571 if (p == endp) 1571 if (p == endp)
1572 break; 1572 break;
1573 1573
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 0a6d8c12e10d..2c8ec1ba75e6 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -668,14 +668,14 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index,
668 size_t bts_index, bts_end; 668 size_t bts_index, bts_end;
669 int error; 669 int error;
670 670
671 error = ds_get_bts_end(child, &bts_end); 671 error = ds_get_bts_end(child->bts, &bts_end);
672 if (error < 0) 672 if (error < 0)
673 return error; 673 return error;
674 674
675 if (bts_end <= index) 675 if (bts_end <= index)
676 return -EINVAL; 676 return -EINVAL;
677 677
678 error = ds_get_bts_index(child, &bts_index); 678 error = ds_get_bts_index(child->bts, &bts_index);
679 if (error < 0) 679 if (error < 0)
680 return error; 680 return error;
681 681
@@ -684,7 +684,7 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index,
684 if (bts_end <= bts_index) 684 if (bts_end <= bts_index)
685 bts_index -= bts_end; 685 bts_index -= bts_end;
686 686
687 error = ds_access_bts(child, bts_index, &bts_record); 687 error = ds_access_bts(child->bts, bts_index, &bts_record);
688 if (error < 0) 688 if (error < 0)
689 return error; 689 return error;
690 690
@@ -705,14 +705,14 @@ static int ptrace_bts_drain(struct task_struct *child,
705 size_t end, i; 705 size_t end, i;
706 int error; 706 int error;
707 707
708 error = ds_get_bts_index(child, &end); 708 error = ds_get_bts_index(child->bts, &end);
709 if (error < 0) 709 if (error < 0)
710 return error; 710 return error;
711 711
712 if (size < (end * sizeof(struct bts_struct))) 712 if (size < (end * sizeof(struct bts_struct)))
713 return -EIO; 713 return -EIO;
714 714
715 error = ds_access_bts(child, 0, (const void **)&raw); 715 error = ds_access_bts(child->bts, 0, (const void **)&raw);
716 if (error < 0) 716 if (error < 0)
717 return error; 717 return error;
718 718
@@ -723,18 +723,13 @@ static int ptrace_bts_drain(struct task_struct *child,
723 return -EFAULT; 723 return -EFAULT;
724 } 724 }
725 725
726 error = ds_clear_bts(child); 726 error = ds_clear_bts(child->bts);
727 if (error < 0) 727 if (error < 0)
728 return error; 728 return error;
729 729
730 return end; 730 return end;
731} 731}
732 732
733static void ptrace_bts_ovfl(struct task_struct *child)
734{
735 send_sig(child->thread.bts_ovfl_signal, child, 0);
736}
737
738static int ptrace_bts_config(struct task_struct *child, 733static int ptrace_bts_config(struct task_struct *child,
739 long cfg_size, 734 long cfg_size,
740 const struct ptrace_bts_config __user *ucfg) 735 const struct ptrace_bts_config __user *ucfg)
@@ -760,23 +755,45 @@ static int ptrace_bts_config(struct task_struct *child,
760 goto errout; 755 goto errout;
761 756
762 if (cfg.flags & PTRACE_BTS_O_ALLOC) { 757 if (cfg.flags & PTRACE_BTS_O_ALLOC) {
763 ds_ovfl_callback_t ovfl = NULL; 758 bts_ovfl_callback_t ovfl = NULL;
764 unsigned int sig = 0; 759 unsigned int sig = 0;
765 760
766 /* we ignore the error in case we were not tracing child */ 761 error = -EINVAL;
767 (void)ds_release_bts(child); 762 if (cfg.size < (10 * bts_cfg.sizeof_bts))
763 goto errout;
768 764
769 if (cfg.flags & PTRACE_BTS_O_SIGNAL) { 765 if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
770 if (!cfg.signal) 766 if (!cfg.signal)
771 goto errout; 767 goto errout;
772 768
769 error = -EOPNOTSUPP;
770 goto errout;
771
773 sig = cfg.signal; 772 sig = cfg.signal;
774 ovfl = ptrace_bts_ovfl;
775 } 773 }
776 774
777 error = ds_request_bts(child, /* base = */ NULL, cfg.size, ovfl); 775 if (child->bts) {
778 if (error < 0) 776 (void)ds_release_bts(child->bts);
777 kfree(child->bts_buffer);
778
779 child->bts = NULL;
780 child->bts_buffer = NULL;
781 }
782
783 error = -ENOMEM;
784 child->bts_buffer = kzalloc(cfg.size, GFP_KERNEL);
785 if (!child->bts_buffer)
786 goto errout;
787
788 child->bts = ds_request_bts(child, child->bts_buffer, cfg.size,
789 ovfl, /* th = */ (size_t)-1);
790 if (IS_ERR(child->bts)) {
791 error = PTR_ERR(child->bts);
792 kfree(child->bts_buffer);
793 child->bts = NULL;
794 child->bts_buffer = NULL;
779 goto errout; 795 goto errout;
796 }
780 797
781 child->thread.bts_ovfl_signal = sig; 798 child->thread.bts_ovfl_signal = sig;
782 } 799 }
@@ -823,15 +840,15 @@ static int ptrace_bts_status(struct task_struct *child,
823 if (cfg_size < sizeof(cfg)) 840 if (cfg_size < sizeof(cfg))
824 return -EIO; 841 return -EIO;
825 842
826 error = ds_get_bts_end(child, &end); 843 error = ds_get_bts_end(child->bts, &end);
827 if (error < 0) 844 if (error < 0)
828 return error; 845 return error;
829 846
830 error = ds_access_bts(child, /* index = */ 0, &base); 847 error = ds_access_bts(child->bts, /* index = */ 0, &base);
831 if (error < 0) 848 if (error < 0)
832 return error; 849 return error;
833 850
834 error = ds_access_bts(child, /* index = */ end, &max); 851 error = ds_access_bts(child->bts, /* index = */ end, &max);
835 if (error < 0) 852 if (error < 0)
836 return error; 853 return error;
837 854
@@ -884,10 +901,7 @@ static int ptrace_bts_write_record(struct task_struct *child,
884 return -EINVAL; 901 return -EINVAL;
885 } 902 }
886 903
887 /* The writing task will be the switched-to task on a context 904 return ds_write_bts(child->bts, bts_record, bts_cfg.sizeof_bts);
888 * switch. It needs to write into the switched-from task's BTS
889 * buffer. */
890 return ds_unchecked_write_bts(child, bts_record, bts_cfg.sizeof_bts);
891} 905}
892 906
893void ptrace_bts_take_timestamp(struct task_struct *tsk, 907void ptrace_bts_take_timestamp(struct task_struct *tsk,
@@ -929,17 +943,16 @@ void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c)
929 switch (c->x86) { 943 switch (c->x86) {
930 case 0x6: 944 case 0x6:
931 switch (c->x86_model) { 945 switch (c->x86_model) {
946 case 0 ... 0xC:
947 /* sorry, don't know about them */
948 break;
932 case 0xD: 949 case 0xD:
933 case 0xE: /* Pentium M */ 950 case 0xE: /* Pentium M */
934 bts_configure(&bts_cfg_pentium_m); 951 bts_configure(&bts_cfg_pentium_m);
935 break; 952 break;
936 case 0xF: /* Core2 */ 953 default: /* Core2, Atom, ... */
937 case 0x1C: /* Atom */
938 bts_configure(&bts_cfg_core2); 954 bts_configure(&bts_cfg_core2);
939 break; 955 break;
940 default:
941 /* sorry, don't know about them */
942 break;
943 } 956 }
944 break; 957 break;
945 case 0xF: 958 case 0xF:
@@ -973,13 +986,17 @@ void ptrace_disable(struct task_struct *child)
973 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); 986 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
974#endif 987#endif
975#ifdef CONFIG_X86_PTRACE_BTS 988#ifdef CONFIG_X86_PTRACE_BTS
976 (void)ds_release_bts(child); 989 if (child->bts) {
990 (void)ds_release_bts(child->bts);
991 kfree(child->bts_buffer);
992 child->bts_buffer = NULL;
977 993
978 child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; 994 child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
979 if (!child->thread.debugctlmsr) 995 if (!child->thread.debugctlmsr)
980 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); 996 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
981 997
982 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); 998 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
999 }
983#endif /* CONFIG_X86_PTRACE_BTS */ 1000#endif /* CONFIG_X86_PTRACE_BTS */
984} 1001}
985 1002
@@ -1111,9 +1128,16 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
1111 (child, data, (struct ptrace_bts_config __user *)addr); 1128 (child, data, (struct ptrace_bts_config __user *)addr);
1112 break; 1129 break;
1113 1130
1114 case PTRACE_BTS_SIZE: 1131 case PTRACE_BTS_SIZE: {
1115 ret = ds_get_bts_index(child, /* pos = */ NULL); 1132 size_t size;
1133
1134 ret = ds_get_bts_index(child->bts, &size);
1135 if (ret == 0) {
1136 BUG_ON(size != (int) size);
1137 ret = (int) size;
1138 }
1116 break; 1139 break;
1140 }
1117 1141
1118 case PTRACE_BTS_GET: 1142 case PTRACE_BTS_GET:
1119 ret = ptrace_bts_read_record 1143 ret = ptrace_bts_read_record
@@ -1121,7 +1145,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
1121 break; 1145 break;
1122 1146
1123 case PTRACE_BTS_CLEAR: 1147 case PTRACE_BTS_CLEAR:
1124 ret = ds_clear_bts(child); 1148 ret = ds_clear_bts(child->bts);
1125 break; 1149 break;
1126 1150
1127 case PTRACE_BTS_DRAIN: 1151 case PTRACE_BTS_DRAIN:
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index a03e7f6d90c3..10786af95545 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -6,6 +6,7 @@
6#include <linux/sched.h> 6#include <linux/sched.h>
7#include <linux/stacktrace.h> 7#include <linux/stacktrace.h>
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/uaccess.h>
9#include <asm/stacktrace.h> 10#include <asm/stacktrace.h>
10 11
11static void save_stack_warning(void *data, char *msg) 12static void save_stack_warning(void *data, char *msg)
@@ -83,3 +84,66 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
83 trace->entries[trace->nr_entries++] = ULONG_MAX; 84 trace->entries[trace->nr_entries++] = ULONG_MAX;
84} 85}
85EXPORT_SYMBOL_GPL(save_stack_trace_tsk); 86EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
87
88/* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */
89
90struct stack_frame {
91 const void __user *next_fp;
92 unsigned long ret_addr;
93};
94
95static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
96{
97 int ret;
98
99 if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
100 return 0;
101
102 ret = 1;
103 pagefault_disable();
104 if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
105 ret = 0;
106 pagefault_enable();
107
108 return ret;
109}
110
111static inline void __save_stack_trace_user(struct stack_trace *trace)
112{
113 const struct pt_regs *regs = task_pt_regs(current);
114 const void __user *fp = (const void __user *)regs->bp;
115
116 if (trace->nr_entries < trace->max_entries)
117 trace->entries[trace->nr_entries++] = regs->ip;
118
119 while (trace->nr_entries < trace->max_entries) {
120 struct stack_frame frame;
121
122 frame.next_fp = NULL;
123 frame.ret_addr = 0;
124 if (!copy_stack_frame(fp, &frame))
125 break;
126 if ((unsigned long)fp < regs->sp)
127 break;
128 if (frame.ret_addr) {
129 trace->entries[trace->nr_entries++] =
130 frame.ret_addr;
131 }
132 if (fp == frame.next_fp)
133 break;
134 fp = frame.next_fp;
135 }
136}
137
138void save_stack_trace_user(struct stack_trace *trace)
139{
140 /*
141 * Trace user stack if we are not a kernel thread
142 */
143 if (current->mm) {
144 __save_stack_trace_user(trace);
145 }
146 if (trace->nr_entries < trace->max_entries)
147 trace->entries[trace->nr_entries++] = ULONG_MAX;
148}
149
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index b13acb75e822..15c3e6999182 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -310,7 +310,7 @@ static void __init setup_xstate_init(void)
310/* 310/*
311 * Enable and initialize the xsave feature. 311 * Enable and initialize the xsave feature.
312 */ 312 */
313void __init xsave_cntxt_init(void) 313void __ref xsave_cntxt_init(void)
314{ 314{
315 unsigned int eax, ebx, ecx, edx; 315 unsigned int eax, ebx, ecx, edx;
316 316
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 3f1b81a83e2e..716d26f0e5d4 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -69,7 +69,7 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
69 int i; 69 int i;
70 70
71 if (!reset_value) { 71 if (!reset_value) {
72 reset_value = kmalloc(sizeof(unsigned) * num_counters, 72 reset_value = kmalloc(sizeof(reset_value[0]) * num_counters,
73 GFP_ATOMIC); 73 GFP_ATOMIC);
74 if (!reset_value) 74 if (!reset_value)
75 return; 75 return;
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 688936044dc9..636ef4caa52d 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -661,12 +661,11 @@ void xen_set_pgd(pgd_t *ptr, pgd_t val)
661 * For 64-bit, we must skip the Xen hole in the middle of the address 661 * For 64-bit, we must skip the Xen hole in the middle of the address
662 * space, just after the big x86-64 virtual hole. 662 * space, just after the big x86-64 virtual hole.
663 */ 663 */
664static int xen_pgd_walk(struct mm_struct *mm, 664static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
665 int (*func)(struct mm_struct *mm, struct page *, 665 int (*func)(struct mm_struct *mm, struct page *,
666 enum pt_level), 666 enum pt_level),
667 unsigned long limit) 667 unsigned long limit)
668{ 668{
669 pgd_t *pgd = mm->pgd;
670 int flush = 0; 669 int flush = 0;
671 unsigned hole_low, hole_high; 670 unsigned hole_low, hole_high;
672 unsigned pgdidx_limit, pudidx_limit, pmdidx_limit; 671 unsigned pgdidx_limit, pudidx_limit, pmdidx_limit;
@@ -753,6 +752,14 @@ out:
753 return flush; 752 return flush;
754} 753}
755 754
755static int xen_pgd_walk(struct mm_struct *mm,
756 int (*func)(struct mm_struct *mm, struct page *,
757 enum pt_level),
758 unsigned long limit)
759{
760 return __xen_pgd_walk(mm, mm->pgd, func, limit);
761}
762
756/* If we're using split pte locks, then take the page's lock and 763/* If we're using split pte locks, then take the page's lock and
757 return a pointer to it. Otherwise return NULL. */ 764 return a pointer to it. Otherwise return NULL. */
758static spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm) 765static spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm)
@@ -854,7 +861,7 @@ static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
854 861
855 xen_mc_batch(); 862 xen_mc_batch();
856 863
857 if (xen_pgd_walk(mm, xen_pin_page, USER_LIMIT)) { 864 if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) {
858 /* re-enable interrupts for flushing */ 865 /* re-enable interrupts for flushing */
859 xen_mc_issue(0); 866 xen_mc_issue(0);
860 867
@@ -998,7 +1005,7 @@ static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd)
998 PT_PMD); 1005 PT_PMD);
999#endif 1006#endif
1000 1007
1001 xen_pgd_walk(mm, xen_unpin_page, USER_LIMIT); 1008 __xen_pgd_walk(mm, pgd, xen_unpin_page, USER_LIMIT);
1002 1009
1003 xen_mc_issue(0); 1010 xen_mc_issue(0);
1004} 1011}
diff --git a/fs/seq_file.c b/fs/seq_file.c
index eba2eabcd2b8..f03220d7891b 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -357,7 +357,18 @@ int seq_printf(struct seq_file *m, const char *f, ...)
357} 357}
358EXPORT_SYMBOL(seq_printf); 358EXPORT_SYMBOL(seq_printf);
359 359
360static char *mangle_path(char *s, char *p, char *esc) 360/**
361 * mangle_path - mangle and copy path to buffer beginning
362 * @s: buffer start
363 * @p: beginning of path in above buffer
364 * @esc: set of characters that need escaping
365 *
366 * Copy the path from @p to @s, replacing each occurrence of character from
367 * @esc with usual octal escape.
368 * Returns pointer past last written character in @s, or NULL in case of
369 * failure.
370 */
371char *mangle_path(char *s, char *p, char *esc)
361{ 372{
362 while (s <= p) { 373 while (s <= p) {
363 char c = *p++; 374 char c = *p++;
@@ -376,6 +387,7 @@ static char *mangle_path(char *s, char *p, char *esc)
376 } 387 }
377 return NULL; 388 return NULL;
378} 389}
390EXPORT_SYMBOL_GPL(mangle_path);
379 391
380/* 392/*
381 * return the absolute path of 'dentry' residing in mount 'mnt'. 393 * return the absolute path of 'dentry' residing in mount 'mnt'.
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index f7ba4ea5e128..7854d87b97b2 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -257,6 +257,7 @@ extern int ftrace_dump_on_oops;
257 257
258extern void tracing_start(void); 258extern void tracing_start(void);
259extern void tracing_stop(void); 259extern void tracing_stop(void);
260extern void ftrace_off_permanent(void);
260 261
261extern void 262extern void
262ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); 263ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
@@ -290,6 +291,7 @@ ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 0)));
290 291
291static inline void tracing_start(void) { } 292static inline void tracing_start(void) { }
292static inline void tracing_stop(void) { } 293static inline void tracing_stop(void) { }
294static inline void ftrace_off_permanent(void) { }
293static inline int 295static inline int
294ftrace_printk(const char *fmt, ...) 296ftrace_printk(const char *fmt, ...)
295{ 297{
@@ -323,6 +325,8 @@ struct ftrace_retfunc {
323}; 325};
324 326
325#ifdef CONFIG_FUNCTION_RET_TRACER 327#ifdef CONFIG_FUNCTION_RET_TRACER
328#define FTRACE_RETFUNC_DEPTH 50
329#define FTRACE_RETSTACK_ALLOC_SIZE 32
326/* Type of a callback handler of tracing return function */ 330/* Type of a callback handler of tracing return function */
327typedef void (*trace_function_return_t)(struct ftrace_retfunc *); 331typedef void (*trace_function_return_t)(struct ftrace_retfunc *);
328 332
@@ -330,6 +334,12 @@ extern int register_ftrace_return(trace_function_return_t func);
330/* The current handler in use */ 334/* The current handler in use */
331extern trace_function_return_t ftrace_function_return; 335extern trace_function_return_t ftrace_function_return;
332extern void unregister_ftrace_return(void); 336extern void unregister_ftrace_return(void);
337
338extern void ftrace_retfunc_init_task(struct task_struct *t);
339extern void ftrace_retfunc_exit_task(struct task_struct *t);
340#else
341static inline void ftrace_retfunc_init_task(struct task_struct *t) { }
342static inline void ftrace_retfunc_exit_task(struct task_struct *t) { }
333#endif 343#endif
334 344
335#endif /* _LINUX_FTRACE_H */ 345#endif /* _LINUX_FTRACE_H */
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index e097c2e6b6dc..3bb87a753fa3 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -122,6 +122,7 @@ void ring_buffer_normalize_time_stamp(int cpu, u64 *ts);
122 122
123void tracing_on(void); 123void tracing_on(void);
124void tracing_off(void); 124void tracing_off(void);
125void tracing_off_permanent(void);
125 126
126enum ring_buffer_flags { 127enum ring_buffer_flags {
127 RB_FL_OVERWRITE = 1 << 0, 128 RB_FL_OVERWRITE = 1 << 0,
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c8e0db464206..d02a0ca70ee9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -96,6 +96,7 @@ struct exec_domain;
96struct futex_pi_state; 96struct futex_pi_state;
97struct robust_list_head; 97struct robust_list_head;
98struct bio; 98struct bio;
99struct bts_tracer;
99 100
100/* 101/*
101 * List of flags we want to share for kernel threads, 102 * List of flags we want to share for kernel threads,
@@ -1161,6 +1162,18 @@ struct task_struct {
1161 struct list_head ptraced; 1162 struct list_head ptraced;
1162 struct list_head ptrace_entry; 1163 struct list_head ptrace_entry;
1163 1164
1165#ifdef CONFIG_X86_PTRACE_BTS
1166 /*
1167 * This is the tracer handle for the ptrace BTS extension.
1168 * This field actually belongs to the ptracer task.
1169 */
1170 struct bts_tracer *bts;
1171 /*
1172 * The buffer to hold the BTS data.
1173 */
1174 void *bts_buffer;
1175#endif /* CONFIG_X86_PTRACE_BTS */
1176
1164 /* PID/PID hash table linkage. */ 1177 /* PID/PID hash table linkage. */
1165 struct pid_link pids[PIDTYPE_MAX]; 1178 struct pid_link pids[PIDTYPE_MAX];
1166 struct list_head thread_group; 1179 struct list_head thread_group;
@@ -1352,6 +1365,17 @@ struct task_struct {
1352 unsigned long default_timer_slack_ns; 1365 unsigned long default_timer_slack_ns;
1353 1366
1354 struct list_head *scm_work_list; 1367 struct list_head *scm_work_list;
1368#ifdef CONFIG_FUNCTION_RET_TRACER
1369 /* Index of current stored adress in ret_stack */
1370 int curr_ret_stack;
1371 /* Stack of return addresses for return function tracing */
1372 struct ftrace_ret_stack *ret_stack;
1373 /*
1374 * Number of functions that haven't been traced
1375 * because of depth overrun.
1376 */
1377 atomic_t trace_overrun;
1378#endif
1355}; 1379};
1356 1380
1357/* 1381/*
@@ -2006,18 +2030,6 @@ static inline void setup_thread_stack(struct task_struct *p, struct task_struct
2006{ 2030{
2007 *task_thread_info(p) = *task_thread_info(org); 2031 *task_thread_info(p) = *task_thread_info(org);
2008 task_thread_info(p)->task = p; 2032 task_thread_info(p)->task = p;
2009
2010#ifdef CONFIG_FUNCTION_RET_TRACER
2011 /*
2012 * When fork() creates a child process, this function is called.
2013 * But the child task may not inherit the return adresses traced
2014 * by the return function tracer because it will directly execute
2015 * in userspace and will not return to kernel functions its parent
2016 * used.
2017 */
2018 task_thread_info(p)->curr_ret_stack = -1;
2019 atomic_set(&task_thread_info(p)->trace_overrun, 0);
2020#endif
2021} 2033}
2022 2034
2023static inline unsigned long *end_of_stack(struct task_struct *p) 2035static inline unsigned long *end_of_stack(struct task_struct *p)
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index dc50bcc282a8..b3dfa72f13b9 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -34,6 +34,7 @@ struct seq_operations {
34 34
35#define SEQ_SKIP 1 35#define SEQ_SKIP 1
36 36
37char *mangle_path(char *s, char *p, char *esc);
37int seq_open(struct file *, const struct seq_operations *); 38int seq_open(struct file *, const struct seq_operations *);
38ssize_t seq_read(struct file *, char __user *, size_t, loff_t *); 39ssize_t seq_read(struct file *, char __user *, size_t, loff_t *);
39loff_t seq_lseek(struct file *, loff_t, int); 40loff_t seq_lseek(struct file *, loff_t, int);
diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
index b106fd8e0d5c..1a8cecc4f38c 100644
--- a/include/linux/stacktrace.h
+++ b/include/linux/stacktrace.h
@@ -15,9 +15,17 @@ extern void save_stack_trace_tsk(struct task_struct *tsk,
15 struct stack_trace *trace); 15 struct stack_trace *trace);
16 16
17extern void print_stack_trace(struct stack_trace *trace, int spaces); 17extern void print_stack_trace(struct stack_trace *trace, int spaces);
18
19#ifdef CONFIG_USER_STACKTRACE_SUPPORT
20extern void save_stack_trace_user(struct stack_trace *trace);
21#else
22# define save_stack_trace_user(trace) do { } while (0)
23#endif
24
18#else 25#else
19# define save_stack_trace(trace) do { } while (0) 26# define save_stack_trace(trace) do { } while (0)
20# define save_stack_trace_tsk(tsk, trace) do { } while (0) 27# define save_stack_trace_tsk(tsk, trace) do { } while (0)
28# define save_stack_trace_user(trace) do { } while (0)
21# define print_stack_trace(trace, spaces) do { } while (0) 29# define print_stack_trace(trace, spaces) do { } while (0)
22#endif 30#endif
23 31
diff --git a/init/main.c b/init/main.c
index e810196bf2f2..79213c0785d2 100644
--- a/init/main.c
+++ b/init/main.c
@@ -723,7 +723,7 @@ int do_one_initcall(initcall_t fn)
723 disable_boot_trace(); 723 disable_boot_trace();
724 rettime = ktime_get(); 724 rettime = ktime_get();
725 delta = ktime_sub(rettime, calltime); 725 delta = ktime_sub(rettime, calltime);
726 ret.duration = (unsigned long long) delta.tv64 >> 10; 726 ret.duration = (unsigned long long) ktime_to_ns(delta) >> 10;
727 trace_boot_ret(&ret, fn); 727 trace_boot_ret(&ret, fn);
728 printk("initcall %pF returned %d after %Ld usecs\n", fn, 728 printk("initcall %pF returned %d after %Ld usecs\n", fn,
729 ret.result, ret.duration); 729 ret.result, ret.duration);
diff --git a/kernel/exit.c b/kernel/exit.c
index 35c8ec2ba03a..e5ae36ebe8af 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1127,7 +1127,6 @@ NORET_TYPE void do_exit(long code)
1127 preempt_disable(); 1127 preempt_disable();
1128 /* causes final put_task_struct in finish_task_switch(). */ 1128 /* causes final put_task_struct in finish_task_switch(). */
1129 tsk->state = TASK_DEAD; 1129 tsk->state = TASK_DEAD;
1130
1131 schedule(); 1130 schedule();
1132 BUG(); 1131 BUG();
1133 /* Avoid "noreturn function does return". */ 1132 /* Avoid "noreturn function does return". */
diff --git a/kernel/fork.c b/kernel/fork.c
index ac62f43ee430..d6e1a3205f62 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -47,6 +47,7 @@
47#include <linux/mount.h> 47#include <linux/mount.h>
48#include <linux/audit.h> 48#include <linux/audit.h>
49#include <linux/memcontrol.h> 49#include <linux/memcontrol.h>
50#include <linux/ftrace.h>
50#include <linux/profile.h> 51#include <linux/profile.h>
51#include <linux/rmap.h> 52#include <linux/rmap.h>
52#include <linux/acct.h> 53#include <linux/acct.h>
@@ -139,6 +140,7 @@ void free_task(struct task_struct *tsk)
139 prop_local_destroy_single(&tsk->dirties); 140 prop_local_destroy_single(&tsk->dirties);
140 free_thread_info(tsk->stack); 141 free_thread_info(tsk->stack);
141 rt_mutex_debug_task_free(tsk); 142 rt_mutex_debug_task_free(tsk);
143 ftrace_retfunc_exit_task(tsk);
142 free_task_struct(tsk); 144 free_task_struct(tsk);
143} 145}
144EXPORT_SYMBOL(free_task); 146EXPORT_SYMBOL(free_task);
@@ -1269,6 +1271,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1269 total_forks++; 1271 total_forks++;
1270 spin_unlock(&current->sighand->siglock); 1272 spin_unlock(&current->sighand->siglock);
1271 write_unlock_irq(&tasklist_lock); 1273 write_unlock_irq(&tasklist_lock);
1274 ftrace_retfunc_init_task(p);
1272 proc_fork_connector(p); 1275 proc_fork_connector(p);
1273 cgroup_post_fork(p); 1276 cgroup_post_fork(p);
1274 return p; 1277 return p;
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index c9d74083746f..f77d3819ef57 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -22,7 +22,6 @@
22#include <linux/console.h> 22#include <linux/console.h>
23#include <linux/cpu.h> 23#include <linux/cpu.h>
24#include <linux/freezer.h> 24#include <linux/freezer.h>
25#include <linux/ftrace.h>
26 25
27#include "power.h" 26#include "power.h"
28 27
@@ -257,7 +256,7 @@ static int create_image(int platform_mode)
257 256
258int hibernation_snapshot(int platform_mode) 257int hibernation_snapshot(int platform_mode)
259{ 258{
260 int error, ftrace_save; 259 int error;
261 260
262 /* Free memory before shutting down devices. */ 261 /* Free memory before shutting down devices. */
263 error = swsusp_shrink_memory(); 262 error = swsusp_shrink_memory();
@@ -269,7 +268,6 @@ int hibernation_snapshot(int platform_mode)
269 goto Close; 268 goto Close;
270 269
271 suspend_console(); 270 suspend_console();
272 ftrace_save = __ftrace_enabled_save();
273 error = device_suspend(PMSG_FREEZE); 271 error = device_suspend(PMSG_FREEZE);
274 if (error) 272 if (error)
275 goto Recover_platform; 273 goto Recover_platform;
@@ -299,7 +297,6 @@ int hibernation_snapshot(int platform_mode)
299 Resume_devices: 297 Resume_devices:
300 device_resume(in_suspend ? 298 device_resume(in_suspend ?
301 (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); 299 (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
302 __ftrace_enabled_restore(ftrace_save);
303 resume_console(); 300 resume_console();
304 Close: 301 Close:
305 platform_end(platform_mode); 302 platform_end(platform_mode);
@@ -370,11 +367,10 @@ static int resume_target_kernel(void)
370 367
371int hibernation_restore(int platform_mode) 368int hibernation_restore(int platform_mode)
372{ 369{
373 int error, ftrace_save; 370 int error;
374 371
375 pm_prepare_console(); 372 pm_prepare_console();
376 suspend_console(); 373 suspend_console();
377 ftrace_save = __ftrace_enabled_save();
378 error = device_suspend(PMSG_QUIESCE); 374 error = device_suspend(PMSG_QUIESCE);
379 if (error) 375 if (error)
380 goto Finish; 376 goto Finish;
@@ -389,7 +385,6 @@ int hibernation_restore(int platform_mode)
389 platform_restore_cleanup(platform_mode); 385 platform_restore_cleanup(platform_mode);
390 device_resume(PMSG_RECOVER); 386 device_resume(PMSG_RECOVER);
391 Finish: 387 Finish:
392 __ftrace_enabled_restore(ftrace_save);
393 resume_console(); 388 resume_console();
394 pm_restore_console(); 389 pm_restore_console();
395 return error; 390 return error;
@@ -402,7 +397,7 @@ int hibernation_restore(int platform_mode)
402 397
403int hibernation_platform_enter(void) 398int hibernation_platform_enter(void)
404{ 399{
405 int error, ftrace_save; 400 int error;
406 401
407 if (!hibernation_ops) 402 if (!hibernation_ops)
408 return -ENOSYS; 403 return -ENOSYS;
@@ -417,7 +412,6 @@ int hibernation_platform_enter(void)
417 goto Close; 412 goto Close;
418 413
419 suspend_console(); 414 suspend_console();
420 ftrace_save = __ftrace_enabled_save();
421 error = device_suspend(PMSG_HIBERNATE); 415 error = device_suspend(PMSG_HIBERNATE);
422 if (error) { 416 if (error) {
423 if (hibernation_ops->recover) 417 if (hibernation_ops->recover)
@@ -452,7 +446,6 @@ int hibernation_platform_enter(void)
452 hibernation_ops->finish(); 446 hibernation_ops->finish();
453 Resume_devices: 447 Resume_devices:
454 device_resume(PMSG_RESTORE); 448 device_resume(PMSG_RESTORE);
455 __ftrace_enabled_restore(ftrace_save);
456 resume_console(); 449 resume_console();
457 Close: 450 Close:
458 hibernation_ops->end(); 451 hibernation_ops->end();
diff --git a/kernel/power/main.c b/kernel/power/main.c
index b8f7ce9473e8..613f16941b85 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -22,7 +22,6 @@
22#include <linux/freezer.h> 22#include <linux/freezer.h>
23#include <linux/vmstat.h> 23#include <linux/vmstat.h>
24#include <linux/syscalls.h> 24#include <linux/syscalls.h>
25#include <linux/ftrace.h>
26 25
27#include "power.h" 26#include "power.h"
28 27
@@ -317,7 +316,7 @@ static int suspend_enter(suspend_state_t state)
317 */ 316 */
318int suspend_devices_and_enter(suspend_state_t state) 317int suspend_devices_and_enter(suspend_state_t state)
319{ 318{
320 int error, ftrace_save; 319 int error;
321 320
322 if (!suspend_ops) 321 if (!suspend_ops)
323 return -ENOSYS; 322 return -ENOSYS;
@@ -328,7 +327,6 @@ int suspend_devices_and_enter(suspend_state_t state)
328 goto Close; 327 goto Close;
329 } 328 }
330 suspend_console(); 329 suspend_console();
331 ftrace_save = __ftrace_enabled_save();
332 suspend_test_start(); 330 suspend_test_start();
333 error = device_suspend(PMSG_SUSPEND); 331 error = device_suspend(PMSG_SUSPEND);
334 if (error) { 332 if (error) {
@@ -360,7 +358,6 @@ int suspend_devices_and_enter(suspend_state_t state)
360 suspend_test_start(); 358 suspend_test_start();
361 device_resume(PMSG_RESUME); 359 device_resume(PMSG_RESUME);
362 suspend_test_finish("resume devices"); 360 suspend_test_finish("resume devices");
363 __ftrace_enabled_restore(ftrace_save);
364 resume_console(); 361 resume_console();
365 Close: 362 Close:
366 if (suspend_ops->end) 363 if (suspend_ops->end)
diff --git a/kernel/sched.c b/kernel/sched.c
index 4de56108c86f..388d9db044ab 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5901,6 +5901,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
5901 * The idle tasks have their own, simple scheduling class: 5901 * The idle tasks have their own, simple scheduling class:
5902 */ 5902 */
5903 idle->sched_class = &idle_sched_class; 5903 idle->sched_class = &idle_sched_class;
5904 ftrace_retfunc_init_task(idle);
5904} 5905}
5905 5906
5906/* 5907/*
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 61e8cca6ff45..620feadff67a 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -3,6 +3,9 @@
3# select HAVE_FUNCTION_TRACER: 3# select HAVE_FUNCTION_TRACER:
4# 4#
5 5
6config USER_STACKTRACE_SUPPORT
7 bool
8
6config NOP_TRACER 9config NOP_TRACER
7 bool 10 bool
8 11
@@ -25,6 +28,9 @@ config HAVE_DYNAMIC_FTRACE
25config HAVE_FTRACE_MCOUNT_RECORD 28config HAVE_FTRACE_MCOUNT_RECORD
26 bool 29 bool
27 30
31config HAVE_HW_BRANCH_TRACER
32 bool
33
28config TRACER_MAX_TRACE 34config TRACER_MAX_TRACE
29 bool 35 bool
30 36
@@ -230,6 +236,14 @@ config STACK_TRACER
230 236
231 Say N if unsure. 237 Say N if unsure.
232 238
239config BTS_TRACER
240 depends on HAVE_HW_BRANCH_TRACER
241 bool "Trace branches"
242 select TRACING
243 help
244 This tracer records all branches on the system in a circular
245 buffer giving access to the last N branches for each cpu.
246
233config DYNAMIC_FTRACE 247config DYNAMIC_FTRACE
234 bool "enable/disable ftrace tracepoints dynamically" 248 bool "enable/disable ftrace tracepoints dynamically"
235 depends on FUNCTION_TRACER 249 depends on FUNCTION_TRACER
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 1a8c9259dc69..cef4bcb4e822 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -31,5 +31,6 @@ obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
31obj-$(CONFIG_BOOT_TRACER) += trace_boot.o 31obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
32obj-$(CONFIG_FUNCTION_RET_TRACER) += trace_functions_return.o 32obj-$(CONFIG_FUNCTION_RET_TRACER) += trace_functions_return.o
33obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o 33obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
34obj-$(CONFIG_BTS_TRACER) += trace_bts.o
34 35
35libftrace-y := ftrace.o 36libftrace-y := ftrace.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index f212da486689..53042f118f23 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1498,10 +1498,77 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
1498 1498
1499#ifdef CONFIG_FUNCTION_RET_TRACER 1499#ifdef CONFIG_FUNCTION_RET_TRACER
1500 1500
1501static atomic_t ftrace_retfunc_active;
1502
1501/* The callback that hooks the return of a function */ 1503/* The callback that hooks the return of a function */
1502trace_function_return_t ftrace_function_return = 1504trace_function_return_t ftrace_function_return =
1503 (trace_function_return_t)ftrace_stub; 1505 (trace_function_return_t)ftrace_stub;
1504 1506
1507
1508/* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */
1509static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
1510{
1511 int i;
1512 int ret = 0;
1513 unsigned long flags;
1514 int start = 0, end = FTRACE_RETSTACK_ALLOC_SIZE;
1515 struct task_struct *g, *t;
1516
1517 for (i = 0; i < FTRACE_RETSTACK_ALLOC_SIZE; i++) {
1518 ret_stack_list[i] = kmalloc(FTRACE_RETFUNC_DEPTH
1519 * sizeof(struct ftrace_ret_stack),
1520 GFP_KERNEL);
1521 if (!ret_stack_list[i]) {
1522 start = 0;
1523 end = i;
1524 ret = -ENOMEM;
1525 goto free;
1526 }
1527 }
1528
1529 read_lock_irqsave(&tasklist_lock, flags);
1530 do_each_thread(g, t) {
1531 if (start == end) {
1532 ret = -EAGAIN;
1533 goto unlock;
1534 }
1535
1536 if (t->ret_stack == NULL) {
1537 t->ret_stack = ret_stack_list[start++];
1538 t->curr_ret_stack = -1;
1539 atomic_set(&t->trace_overrun, 0);
1540 }
1541 } while_each_thread(g, t);
1542
1543unlock:
1544 read_unlock_irqrestore(&tasklist_lock, flags);
1545free:
1546 for (i = start; i < end; i++)
1547 kfree(ret_stack_list[i]);
1548 return ret;
1549}
1550
1551/* Allocate a return stack for each task */
1552static int start_return_tracing(void)
1553{
1554 struct ftrace_ret_stack **ret_stack_list;
1555 int ret;
1556
1557 ret_stack_list = kmalloc(FTRACE_RETSTACK_ALLOC_SIZE *
1558 sizeof(struct ftrace_ret_stack *),
1559 GFP_KERNEL);
1560
1561 if (!ret_stack_list)
1562 return -ENOMEM;
1563
1564 do {
1565 ret = alloc_retstack_tasklist(ret_stack_list);
1566 } while (ret == -EAGAIN);
1567
1568 kfree(ret_stack_list);
1569 return ret;
1570}
1571
1505int register_ftrace_return(trace_function_return_t func) 1572int register_ftrace_return(trace_function_return_t func)
1506{ 1573{
1507 int ret = 0; 1574 int ret = 0;
@@ -1516,7 +1583,12 @@ int register_ftrace_return(trace_function_return_t func)
1516 ret = -EBUSY; 1583 ret = -EBUSY;
1517 goto out; 1584 goto out;
1518 } 1585 }
1519 1586 atomic_inc(&ftrace_retfunc_active);
1587 ret = start_return_tracing();
1588 if (ret) {
1589 atomic_dec(&ftrace_retfunc_active);
1590 goto out;
1591 }
1520 ftrace_tracing_type = FTRACE_TYPE_RETURN; 1592 ftrace_tracing_type = FTRACE_TYPE_RETURN;
1521 ftrace_function_return = func; 1593 ftrace_function_return = func;
1522 ftrace_startup(); 1594 ftrace_startup();
@@ -1530,6 +1602,7 @@ void unregister_ftrace_return(void)
1530{ 1602{
1531 mutex_lock(&ftrace_sysctl_lock); 1603 mutex_lock(&ftrace_sysctl_lock);
1532 1604
1605 atomic_dec(&ftrace_retfunc_active);
1533 ftrace_function_return = (trace_function_return_t)ftrace_stub; 1606 ftrace_function_return = (trace_function_return_t)ftrace_stub;
1534 ftrace_shutdown(); 1607 ftrace_shutdown();
1535 /* Restore normal tracing type */ 1608 /* Restore normal tracing type */
@@ -1537,6 +1610,32 @@ void unregister_ftrace_return(void)
1537 1610
1538 mutex_unlock(&ftrace_sysctl_lock); 1611 mutex_unlock(&ftrace_sysctl_lock);
1539} 1612}
1613
1614/* Allocate a return stack for newly created task */
1615void ftrace_retfunc_init_task(struct task_struct *t)
1616{
1617 if (atomic_read(&ftrace_retfunc_active)) {
1618 t->ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH
1619 * sizeof(struct ftrace_ret_stack),
1620 GFP_KERNEL);
1621 if (!t->ret_stack)
1622 return;
1623 t->curr_ret_stack = -1;
1624 atomic_set(&t->trace_overrun, 0);
1625 } else
1626 t->ret_stack = NULL;
1627}
1628
1629void ftrace_retfunc_exit_task(struct task_struct *t)
1630{
1631 struct ftrace_ret_stack *ret_stack = t->ret_stack;
1632
1633 t->ret_stack = NULL;
1634 /* NULL must become visible to IRQs before we free it: */
1635 barrier();
1636
1637 kfree(ret_stack);
1638}
1540#endif 1639#endif
1541 1640
1542 1641
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 85ced143c2c4..e206951603c1 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -18,8 +18,46 @@
18 18
19#include "trace.h" 19#include "trace.h"
20 20
21/* Global flag to disable all recording to ring buffers */ 21/*
22static int ring_buffers_off __read_mostly; 22 * A fast way to enable or disable all ring buffers is to
23 * call tracing_on or tracing_off. Turning off the ring buffers
24 * prevents all ring buffers from being recorded to.
25 * Turning this switch on, makes it OK to write to the
26 * ring buffer, if the ring buffer is enabled itself.
27 *
28 * There's three layers that must be on in order to write
29 * to the ring buffer.
30 *
31 * 1) This global flag must be set.
32 * 2) The ring buffer must be enabled for recording.
33 * 3) The per cpu buffer must be enabled for recording.
34 *
35 * In case of an anomaly, this global flag has a bit set that
36 * will permantly disable all ring buffers.
37 */
38
39/*
40 * Global flag to disable all recording to ring buffers
41 * This has two bits: ON, DISABLED
42 *
43 * ON DISABLED
44 * ---- ----------
45 * 0 0 : ring buffers are off
46 * 1 0 : ring buffers are on
47 * X 1 : ring buffers are permanently disabled
48 */
49
50enum {
51 RB_BUFFERS_ON_BIT = 0,
52 RB_BUFFERS_DISABLED_BIT = 1,
53};
54
55enum {
56 RB_BUFFERS_ON = 1 << RB_BUFFERS_ON_BIT,
57 RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT,
58};
59
60static long ring_buffer_flags __read_mostly = RB_BUFFERS_ON;
23 61
24/** 62/**
25 * tracing_on - enable all tracing buffers 63 * tracing_on - enable all tracing buffers
@@ -29,7 +67,7 @@ static int ring_buffers_off __read_mostly;
29 */ 67 */
30void tracing_on(void) 68void tracing_on(void)
31{ 69{
32 ring_buffers_off = 0; 70 set_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
33} 71}
34 72
35/** 73/**
@@ -42,7 +80,18 @@ void tracing_on(void)
42 */ 80 */
43void tracing_off(void) 81void tracing_off(void)
44{ 82{
45 ring_buffers_off = 1; 83 clear_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
84}
85
86/**
87 * tracing_off_permanent - permanently disable ring buffers
88 *
89 * This function, once called, will disable all ring buffers
90 * permanenty.
91 */
92void tracing_off_permanent(void)
93{
94 set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags);
46} 95}
47 96
48#include "trace.h" 97#include "trace.h"
@@ -1185,7 +1234,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
1185 struct ring_buffer_event *event; 1234 struct ring_buffer_event *event;
1186 int cpu, resched; 1235 int cpu, resched;
1187 1236
1188 if (ring_buffers_off) 1237 if (ring_buffer_flags != RB_BUFFERS_ON)
1189 return NULL; 1238 return NULL;
1190 1239
1191 if (atomic_read(&buffer->record_disabled)) 1240 if (atomic_read(&buffer->record_disabled))
@@ -1297,7 +1346,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
1297 int ret = -EBUSY; 1346 int ret = -EBUSY;
1298 int cpu, resched; 1347 int cpu, resched;
1299 1348
1300 if (ring_buffers_off) 1349 if (ring_buffer_flags != RB_BUFFERS_ON)
1301 return -EBUSY; 1350 return -EBUSY;
1302 1351
1303 if (atomic_read(&buffer->record_disabled)) 1352 if (atomic_read(&buffer->record_disabled))
@@ -2178,12 +2227,14 @@ static ssize_t
2178rb_simple_read(struct file *filp, char __user *ubuf, 2227rb_simple_read(struct file *filp, char __user *ubuf,
2179 size_t cnt, loff_t *ppos) 2228 size_t cnt, loff_t *ppos)
2180{ 2229{
2181 int *p = filp->private_data; 2230 long *p = filp->private_data;
2182 char buf[64]; 2231 char buf[64];
2183 int r; 2232 int r;
2184 2233
2185 /* !ring_buffers_off == tracing_on */ 2234 if (test_bit(RB_BUFFERS_DISABLED_BIT, p))
2186 r = sprintf(buf, "%d\n", !*p); 2235 r = sprintf(buf, "permanently disabled\n");
2236 else
2237 r = sprintf(buf, "%d\n", test_bit(RB_BUFFERS_ON_BIT, p));
2187 2238
2188 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2239 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2189} 2240}
@@ -2192,7 +2243,7 @@ static ssize_t
2192rb_simple_write(struct file *filp, const char __user *ubuf, 2243rb_simple_write(struct file *filp, const char __user *ubuf,
2193 size_t cnt, loff_t *ppos) 2244 size_t cnt, loff_t *ppos)
2194{ 2245{
2195 int *p = filp->private_data; 2246 long *p = filp->private_data;
2196 char buf[64]; 2247 char buf[64];
2197 long val; 2248 long val;
2198 int ret; 2249 int ret;
@@ -2209,8 +2260,10 @@ rb_simple_write(struct file *filp, const char __user *ubuf,
2209 if (ret < 0) 2260 if (ret < 0)
2210 return ret; 2261 return ret;
2211 2262
2212 /* !ring_buffers_off == tracing_on */ 2263 if (val)
2213 *p = !val; 2264 set_bit(RB_BUFFERS_ON_BIT, p);
2265 else
2266 clear_bit(RB_BUFFERS_ON_BIT, p);
2214 2267
2215 (*ppos)++; 2268 (*ppos)++;
2216 2269
@@ -2232,7 +2285,7 @@ static __init int rb_init_debugfs(void)
2232 d_tracer = tracing_init_dentry(); 2285 d_tracer = tracing_init_dentry();
2233 2286
2234 entry = debugfs_create_file("tracing_on", 0644, d_tracer, 2287 entry = debugfs_create_file("tracing_on", 0644, d_tracer,
2235 &ring_buffers_off, &rb_simple_fops); 2288 &ring_buffer_flags, &rb_simple_fops);
2236 if (!entry) 2289 if (!entry)
2237 pr_warning("Could not create debugfs 'tracing_on' entry\n"); 2290 pr_warning("Could not create debugfs 'tracing_on' entry\n");
2238 2291
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 4ee6f0375222..8df8fdd69c95 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -30,6 +30,7 @@
30#include <linux/gfp.h> 30#include <linux/gfp.h>
31#include <linux/fs.h> 31#include <linux/fs.h>
32#include <linux/kprobes.h> 32#include <linux/kprobes.h>
33#include <linux/seq_file.h>
33#include <linux/writeback.h> 34#include <linux/writeback.h>
34 35
35#include <linux/stacktrace.h> 36#include <linux/stacktrace.h>
@@ -275,6 +276,8 @@ static const char *trace_options[] = {
275 "ftrace_preempt", 276 "ftrace_preempt",
276 "branch", 277 "branch",
277 "annotate", 278 "annotate",
279 "userstacktrace",
280 "sym-userobj",
278 NULL 281 NULL
279}; 282};
280 283
@@ -421,6 +424,28 @@ trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
421 return trace_seq_putmem(s, hex, j); 424 return trace_seq_putmem(s, hex, j);
422} 425}
423 426
427static int
428trace_seq_path(struct trace_seq *s, struct path *path)
429{
430 unsigned char *p;
431
432 if (s->len >= (PAGE_SIZE - 1))
433 return 0;
434 p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
435 if (!IS_ERR(p)) {
436 p = mangle_path(s->buffer + s->len, p, "\n");
437 if (p) {
438 s->len = p - s->buffer;
439 return 1;
440 }
441 } else {
442 s->buffer[s->len++] = '?';
443 return 1;
444 }
445
446 return 0;
447}
448
424static void 449static void
425trace_seq_reset(struct trace_seq *s) 450trace_seq_reset(struct trace_seq *s)
426{ 451{
@@ -661,6 +686,21 @@ static int trace_stop_count;
661static DEFINE_SPINLOCK(tracing_start_lock); 686static DEFINE_SPINLOCK(tracing_start_lock);
662 687
663/** 688/**
689 * ftrace_off_permanent - disable all ftrace code permanently
690 *
691 * This should only be called when a serious anomally has
692 * been detected. This will turn off the function tracing,
693 * ring buffers, and other tracing utilites. It takes no
694 * locks and can be called from any context.
695 */
696void ftrace_off_permanent(void)
697{
698 tracing_disabled = 1;
699 ftrace_stop();
700 tracing_off_permanent();
701}
702
703/**
664 * tracing_start - quick start of the tracer 704 * tracing_start - quick start of the tracer
665 * 705 *
666 * If tracing is enabled but was stopped by tracing_stop, 706 * If tracing is enabled but was stopped by tracing_stop,
@@ -801,6 +841,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
801 841
802 entry->preempt_count = pc & 0xff; 842 entry->preempt_count = pc & 0xff;
803 entry->pid = (tsk) ? tsk->pid : 0; 843 entry->pid = (tsk) ? tsk->pid : 0;
844 entry->tgid = (tsk) ? tsk->tgid : 0;
804 entry->flags = 845 entry->flags =
805#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT 846#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
806 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | 847 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
@@ -918,6 +959,44 @@ void __trace_stack(struct trace_array *tr,
918 ftrace_trace_stack(tr, data, flags, skip, preempt_count()); 959 ftrace_trace_stack(tr, data, flags, skip, preempt_count());
919} 960}
920 961
962static void ftrace_trace_userstack(struct trace_array *tr,
963 struct trace_array_cpu *data,
964 unsigned long flags, int pc)
965{
966 struct ring_buffer_event *event;
967 struct userstack_entry *entry;
968 struct stack_trace trace;
969 unsigned long irq_flags;
970
971 if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
972 return;
973
974 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
975 &irq_flags);
976 if (!event)
977 return;
978 entry = ring_buffer_event_data(event);
979 tracing_generic_entry_update(&entry->ent, flags, pc);
980 entry->ent.type = TRACE_USER_STACK;
981
982 memset(&entry->caller, 0, sizeof(entry->caller));
983
984 trace.nr_entries = 0;
985 trace.max_entries = FTRACE_STACK_ENTRIES;
986 trace.skip = 0;
987 trace.entries = entry->caller;
988
989 save_stack_trace_user(&trace);
990 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
991}
992
993void __trace_userstack(struct trace_array *tr,
994 struct trace_array_cpu *data,
995 unsigned long flags)
996{
997 ftrace_trace_userstack(tr, data, flags, preempt_count());
998}
999
921static void 1000static void
922ftrace_trace_special(void *__tr, void *__data, 1001ftrace_trace_special(void *__tr, void *__data,
923 unsigned long arg1, unsigned long arg2, unsigned long arg3, 1002 unsigned long arg1, unsigned long arg2, unsigned long arg3,
@@ -941,6 +1020,7 @@ ftrace_trace_special(void *__tr, void *__data,
941 entry->arg3 = arg3; 1020 entry->arg3 = arg3;
942 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 1021 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
943 ftrace_trace_stack(tr, data, irq_flags, 4, pc); 1022 ftrace_trace_stack(tr, data, irq_flags, 4, pc);
1023 ftrace_trace_userstack(tr, data, irq_flags, pc);
944 1024
945 trace_wake_up(); 1025 trace_wake_up();
946} 1026}
@@ -979,6 +1059,7 @@ tracing_sched_switch_trace(struct trace_array *tr,
979 entry->next_cpu = task_cpu(next); 1059 entry->next_cpu = task_cpu(next);
980 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 1060 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
981 ftrace_trace_stack(tr, data, flags, 5, pc); 1061 ftrace_trace_stack(tr, data, flags, 5, pc);
1062 ftrace_trace_userstack(tr, data, flags, pc);
982} 1063}
983 1064
984void 1065void
@@ -1008,6 +1089,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
1008 entry->next_cpu = task_cpu(wakee); 1089 entry->next_cpu = task_cpu(wakee);
1009 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 1090 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
1010 ftrace_trace_stack(tr, data, flags, 6, pc); 1091 ftrace_trace_stack(tr, data, flags, 6, pc);
1092 ftrace_trace_userstack(tr, data, flags, pc);
1011 1093
1012 trace_wake_up(); 1094 trace_wake_up();
1013} 1095}
@@ -1387,6 +1469,78 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
1387 return ret; 1469 return ret;
1388} 1470}
1389 1471
1472static inline int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
1473 unsigned long ip, unsigned long sym_flags)
1474{
1475 struct file *file = NULL;
1476 unsigned long vmstart = 0;
1477 int ret = 1;
1478
1479 if (mm) {
1480 const struct vm_area_struct *vma;
1481
1482 down_read(&mm->mmap_sem);
1483 vma = find_vma(mm, ip);
1484 if (vma) {
1485 file = vma->vm_file;
1486 vmstart = vma->vm_start;
1487 }
1488 if (file) {
1489 ret = trace_seq_path(s, &file->f_path);
1490 if (ret)
1491 ret = trace_seq_printf(s, "[+0x%lx]", ip - vmstart);
1492 }
1493 up_read(&mm->mmap_sem);
1494 }
1495 if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file))
1496 ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
1497 return ret;
1498}
1499
1500static int
1501seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
1502 unsigned long sym_flags)
1503{
1504 struct mm_struct *mm = NULL;
1505 int ret = 1;
1506 unsigned int i;
1507
1508 if (trace_flags & TRACE_ITER_SYM_USEROBJ) {
1509 struct task_struct *task;
1510 /*
1511 * we do the lookup on the thread group leader,
1512 * since individual threads might have already quit!
1513 */
1514 rcu_read_lock();
1515 task = find_task_by_vpid(entry->ent.tgid);
1516 if (task)
1517 mm = get_task_mm(task);
1518 rcu_read_unlock();
1519 }
1520
1521 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1522 unsigned long ip = entry->caller[i];
1523
1524 if (ip == ULONG_MAX || !ret)
1525 break;
1526 if (i && ret)
1527 ret = trace_seq_puts(s, " <- ");
1528 if (!ip) {
1529 if (ret)
1530 ret = trace_seq_puts(s, "??");
1531 continue;
1532 }
1533 if (!ret)
1534 break;
1535 if (ret)
1536 ret = seq_print_user_ip(s, mm, ip, sym_flags);
1537 }
1538
1539 if (mm)
1540 mmput(mm);
1541 return ret;
1542}
1543
1390static void print_lat_help_header(struct seq_file *m) 1544static void print_lat_help_header(struct seq_file *m)
1391{ 1545{
1392 seq_puts(m, "# _------=> CPU# \n"); 1546 seq_puts(m, "# _------=> CPU# \n");
@@ -1702,6 +1856,15 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1702 field->line); 1856 field->line);
1703 break; 1857 break;
1704 } 1858 }
1859 case TRACE_USER_STACK: {
1860 struct userstack_entry *field;
1861
1862 trace_assign_type(field, entry);
1863
1864 seq_print_userip_objs(field, s, sym_flags);
1865 trace_seq_putc(s, '\n');
1866 break;
1867 }
1705 default: 1868 default:
1706 trace_seq_printf(s, "Unknown type %d\n", entry->type); 1869 trace_seq_printf(s, "Unknown type %d\n", entry->type);
1707 } 1870 }
@@ -1853,6 +2016,19 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
1853 field->line); 2016 field->line);
1854 break; 2017 break;
1855 } 2018 }
2019 case TRACE_USER_STACK: {
2020 struct userstack_entry *field;
2021
2022 trace_assign_type(field, entry);
2023
2024 ret = seq_print_userip_objs(field, s, sym_flags);
2025 if (!ret)
2026 return TRACE_TYPE_PARTIAL_LINE;
2027 ret = trace_seq_putc(s, '\n');
2028 if (!ret)
2029 return TRACE_TYPE_PARTIAL_LINE;
2030 break;
2031 }
1856 } 2032 }
1857 return TRACE_TYPE_HANDLED; 2033 return TRACE_TYPE_HANDLED;
1858} 2034}
@@ -1912,6 +2088,7 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
1912 break; 2088 break;
1913 } 2089 }
1914 case TRACE_SPECIAL: 2090 case TRACE_SPECIAL:
2091 case TRACE_USER_STACK:
1915 case TRACE_STACK: { 2092 case TRACE_STACK: {
1916 struct special_entry *field; 2093 struct special_entry *field;
1917 2094
@@ -2000,6 +2177,7 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2000 break; 2177 break;
2001 } 2178 }
2002 case TRACE_SPECIAL: 2179 case TRACE_SPECIAL:
2180 case TRACE_USER_STACK:
2003 case TRACE_STACK: { 2181 case TRACE_STACK: {
2004 struct special_entry *field; 2182 struct special_entry *field;
2005 2183
@@ -2054,6 +2232,7 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2054 break; 2232 break;
2055 } 2233 }
2056 case TRACE_SPECIAL: 2234 case TRACE_SPECIAL:
2235 case TRACE_USER_STACK:
2057 case TRACE_STACK: { 2236 case TRACE_STACK: {
2058 struct special_entry *field; 2237 struct special_entry *field;
2059 2238
@@ -2119,7 +2298,9 @@ static int s_show(struct seq_file *m, void *v)
2119 seq_printf(m, "# tracer: %s\n", iter->trace->name); 2298 seq_printf(m, "# tracer: %s\n", iter->trace->name);
2120 seq_puts(m, "#\n"); 2299 seq_puts(m, "#\n");
2121 } 2300 }
2122 if (iter->iter_flags & TRACE_FILE_LAT_FMT) { 2301 if (iter->trace && iter->trace->print_header)
2302 iter->trace->print_header(m);
2303 else if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2123 /* print nothing if the buffers are empty */ 2304 /* print nothing if the buffers are empty */
2124 if (trace_empty(iter)) 2305 if (trace_empty(iter))
2125 return 0; 2306 return 0;
@@ -2171,6 +2352,10 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
2171 iter->trace = current_trace; 2352 iter->trace = current_trace;
2172 iter->pos = -1; 2353 iter->pos = -1;
2173 2354
2355 /* Notify the tracer early; before we stop tracing. */
2356 if (iter->trace && iter->trace->open)
2357 iter->trace->open(iter);
2358
2174 /* Annotate start of buffers if we had overruns */ 2359 /* Annotate start of buffers if we had overruns */
2175 if (ring_buffer_overruns(iter->tr->buffer)) 2360 if (ring_buffer_overruns(iter->tr->buffer))
2176 iter->iter_flags |= TRACE_FILE_ANNOTATE; 2361 iter->iter_flags |= TRACE_FILE_ANNOTATE;
@@ -2196,9 +2381,6 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
2196 /* stop the trace while dumping */ 2381 /* stop the trace while dumping */
2197 tracing_stop(); 2382 tracing_stop();
2198 2383
2199 if (iter->trace && iter->trace->open)
2200 iter->trace->open(iter);
2201
2202 mutex_unlock(&trace_types_lock); 2384 mutex_unlock(&trace_types_lock);
2203 2385
2204 out: 2386 out:
@@ -3488,6 +3670,9 @@ void ftrace_dump(void)
3488 atomic_inc(&global_trace.data[cpu]->disabled); 3670 atomic_inc(&global_trace.data[cpu]->disabled);
3489 } 3671 }
3490 3672
3673 /* don't look at user memory in panic mode */
3674 trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
3675
3491 printk(KERN_TRACE "Dumping ftrace buffer:\n"); 3676 printk(KERN_TRACE "Dumping ftrace buffer:\n");
3492 3677
3493 iter.tr = &global_trace; 3678 iter.tr = &global_trace;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 2cb12fd98f6b..3abd645e8af2 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -26,6 +26,8 @@ enum trace_type {
26 TRACE_BOOT_CALL, 26 TRACE_BOOT_CALL,
27 TRACE_BOOT_RET, 27 TRACE_BOOT_RET,
28 TRACE_FN_RET, 28 TRACE_FN_RET,
29 TRACE_USER_STACK,
30 TRACE_BTS,
29 31
30 __TRACE_LAST_TYPE 32 __TRACE_LAST_TYPE
31}; 33};
@@ -42,6 +44,7 @@ struct trace_entry {
42 unsigned char flags; 44 unsigned char flags;
43 unsigned char preempt_count; 45 unsigned char preempt_count;
44 int pid; 46 int pid;
47 int tgid;
45}; 48};
46 49
47/* 50/*
@@ -99,6 +102,11 @@ struct stack_entry {
99 unsigned long caller[FTRACE_STACK_ENTRIES]; 102 unsigned long caller[FTRACE_STACK_ENTRIES];
100}; 103};
101 104
105struct userstack_entry {
106 struct trace_entry ent;
107 unsigned long caller[FTRACE_STACK_ENTRIES];
108};
109
102/* 110/*
103 * ftrace_printk entry: 111 * ftrace_printk entry:
104 */ 112 */
@@ -146,6 +154,12 @@ struct trace_branch {
146 char correct; 154 char correct;
147}; 155};
148 156
157struct bts_entry {
158 struct trace_entry ent;
159 unsigned long from;
160 unsigned long to;
161};
162
149/* 163/*
150 * trace_flag_type is an enumeration that holds different 164 * trace_flag_type is an enumeration that holds different
151 * states when a trace occurs. These are: 165 * states when a trace occurs. These are:
@@ -240,6 +254,7 @@ extern void __ftrace_bad_type(void);
240 IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \ 254 IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \
241 IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \ 255 IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \
242 IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \ 256 IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \
257 IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
243 IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ 258 IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \
244 IF_ASSIGN(var, ent, struct special_entry, 0); \ 259 IF_ASSIGN(var, ent, struct special_entry, 0); \
245 IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \ 260 IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \
@@ -250,6 +265,7 @@ extern void __ftrace_bad_type(void);
250 IF_ASSIGN(var, ent, struct trace_boot_ret, TRACE_BOOT_RET);\ 265 IF_ASSIGN(var, ent, struct trace_boot_ret, TRACE_BOOT_RET);\
251 IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \ 266 IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \
252 IF_ASSIGN(var, ent, struct ftrace_ret_entry, TRACE_FN_RET);\ 267 IF_ASSIGN(var, ent, struct ftrace_ret_entry, TRACE_FN_RET);\
268 IF_ASSIGN(var, ent, struct bts_entry, TRACE_BTS);\
253 __ftrace_bad_type(); \ 269 __ftrace_bad_type(); \
254 } while (0) 270 } while (0)
255 271
@@ -303,6 +319,7 @@ struct tracer {
303 int (*selftest)(struct tracer *trace, 319 int (*selftest)(struct tracer *trace,
304 struct trace_array *tr); 320 struct trace_array *tr);
305#endif 321#endif
322 void (*print_header)(struct seq_file *m);
306 enum print_line_t (*print_line)(struct trace_iterator *iter); 323 enum print_line_t (*print_line)(struct trace_iterator *iter);
307 /* If you handled the flag setting, return 0 */ 324 /* If you handled the flag setting, return 0 */
308 int (*set_flag)(u32 old_flags, u32 bit, int set); 325 int (*set_flag)(u32 old_flags, u32 bit, int set);
@@ -383,6 +400,10 @@ void trace_function(struct trace_array *tr,
383void 400void
384trace_function_return(struct ftrace_retfunc *trace); 401trace_function_return(struct ftrace_retfunc *trace);
385 402
403void trace_bts(struct trace_array *tr,
404 unsigned long from,
405 unsigned long to);
406
386void tracing_start_cmdline_record(void); 407void tracing_start_cmdline_record(void);
387void tracing_stop_cmdline_record(void); 408void tracing_stop_cmdline_record(void);
388void tracing_sched_switch_assign_trace(struct trace_array *tr); 409void tracing_sched_switch_assign_trace(struct trace_array *tr);
@@ -500,6 +521,8 @@ enum trace_iterator_flags {
500 TRACE_ITER_PREEMPTONLY = 0x800, 521 TRACE_ITER_PREEMPTONLY = 0x800,
501 TRACE_ITER_BRANCH = 0x1000, 522 TRACE_ITER_BRANCH = 0x1000,
502 TRACE_ITER_ANNOTATE = 0x2000, 523 TRACE_ITER_ANNOTATE = 0x2000,
524 TRACE_ITER_USERSTACKTRACE = 0x4000,
525 TRACE_ITER_SYM_USEROBJ = 0x8000
503}; 526};
504 527
505/* 528/*
diff --git a/kernel/trace/trace_bts.c b/kernel/trace/trace_bts.c
new file mode 100644
index 000000000000..23b76e4690ef
--- /dev/null
+++ b/kernel/trace/trace_bts.c
@@ -0,0 +1,276 @@
1/*
2 * BTS tracer
3 *
4 * Copyright (C) 2008 Markus Metzger <markus.t.metzger@gmail.com>
5 *
6 */
7
8#include <linux/module.h>
9#include <linux/fs.h>
10#include <linux/debugfs.h>
11#include <linux/ftrace.h>
12#include <linux/kallsyms.h>
13
14#include <asm/ds.h>
15
16#include "trace.h"
17
18
19#define SIZEOF_BTS (1 << 13)
20
21static DEFINE_PER_CPU(struct bts_tracer *, tracer);
22static DEFINE_PER_CPU(unsigned char[SIZEOF_BTS], buffer);
23
24#define this_tracer per_cpu(tracer, smp_processor_id())
25#define this_buffer per_cpu(buffer, smp_processor_id())
26
27
28/*
29 * Information to interpret a BTS record.
30 * This will go into an in-kernel BTS interface.
31 */
32static unsigned char sizeof_field;
33static unsigned long debugctl_mask;
34
35#define sizeof_bts (3 * sizeof_field)
36
37static void bts_trace_cpuinit(struct cpuinfo_x86 *c)
38{
39 switch (c->x86) {
40 case 0x6:
41 switch (c->x86_model) {
42 case 0x0 ... 0xC:
43 break;
44 case 0xD:
45 case 0xE: /* Pentium M */
46 sizeof_field = sizeof(long);
47 debugctl_mask = (1<<6)|(1<<7);
48 break;
49 default:
50 sizeof_field = 8;
51 debugctl_mask = (1<<6)|(1<<7);
52 break;
53 }
54 break;
55 case 0xF:
56 switch (c->x86_model) {
57 case 0x0:
58 case 0x1:
59 case 0x2: /* Netburst */
60 sizeof_field = sizeof(long);
61 debugctl_mask = (1<<2)|(1<<3);
62 break;
63 default:
64 /* sorry, don't know about them */
65 break;
66 }
67 break;
68 default:
69 /* sorry, don't know about them */
70 break;
71 }
72}
73
74static inline void bts_enable(void)
75{
76 unsigned long debugctl;
77
78 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
79 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl | debugctl_mask);
80}
81
82static inline void bts_disable(void)
83{
84 unsigned long debugctl;
85
86 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
87 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl & ~debugctl_mask);
88}
89
90static void bts_trace_reset(struct trace_array *tr)
91{
92 int cpu;
93
94 tr->time_start = ftrace_now(tr->cpu);
95
96 for_each_online_cpu(cpu)
97 tracing_reset(tr, cpu);
98}
99
100static void bts_trace_start_cpu(void *arg)
101{
102 this_tracer =
103 ds_request_bts(/* task = */ NULL, this_buffer, SIZEOF_BTS,
104 /* ovfl = */ NULL, /* th = */ (size_t)-1);
105 if (IS_ERR(this_tracer)) {
106 this_tracer = NULL;
107 return;
108 }
109
110 bts_enable();
111}
112
113static void bts_trace_start(struct trace_array *tr)
114{
115 int cpu;
116
117 bts_trace_reset(tr);
118
119 for_each_cpu_mask(cpu, cpu_possible_map)
120 smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1);
121}
122
123static void bts_trace_stop_cpu(void *arg)
124{
125 if (this_tracer) {
126 bts_disable();
127
128 ds_release_bts(this_tracer);
129 this_tracer = NULL;
130 }
131}
132
133static void bts_trace_stop(struct trace_array *tr)
134{
135 int cpu;
136
137 for_each_cpu_mask(cpu, cpu_possible_map)
138 smp_call_function_single(cpu, bts_trace_stop_cpu, NULL, 1);
139}
140
141static int bts_trace_init(struct trace_array *tr)
142{
143 bts_trace_cpuinit(&boot_cpu_data);
144 bts_trace_reset(tr);
145 bts_trace_start(tr);
146
147 return 0;
148}
149
150static void bts_trace_print_header(struct seq_file *m)
151{
152#ifdef __i386__
153 seq_puts(m, "# CPU# FROM TO FUNCTION\n");
154 seq_puts(m, "# | | | |\n");
155#else
156 seq_puts(m,
157 "# CPU# FROM TO FUNCTION\n");
158 seq_puts(m,
159 "# | | | |\n");
160#endif
161}
162
163static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
164{
165 struct trace_entry *entry = iter->ent;
166 struct trace_seq *seq = &iter->seq;
167 struct bts_entry *it;
168
169 trace_assign_type(it, entry);
170
171 if (entry->type == TRACE_BTS) {
172 int ret;
173#ifdef CONFIG_KALLSYMS
174 char function[KSYM_SYMBOL_LEN];
175 sprint_symbol(function, it->from);
176#else
177 char *function = "<unknown>";
178#endif
179
180 ret = trace_seq_printf(seq, "%4d 0x%lx -> 0x%lx [%s]\n",
181 entry->cpu, it->from, it->to, function);
182 if (!ret)
183 return TRACE_TYPE_PARTIAL_LINE;;
184 return TRACE_TYPE_HANDLED;
185 }
186 return TRACE_TYPE_UNHANDLED;
187}
188
189void trace_bts(struct trace_array *tr, unsigned long from, unsigned long to)
190{
191 struct ring_buffer_event *event;
192 struct bts_entry *entry;
193 unsigned long irq;
194
195 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), &irq);
196 if (!event)
197 return;
198 entry = ring_buffer_event_data(event);
199 tracing_generic_entry_update(&entry->ent, 0, from);
200 entry->ent.type = TRACE_BTS;
201 entry->ent.cpu = smp_processor_id();
202 entry->from = from;
203 entry->to = to;
204 ring_buffer_unlock_commit(tr->buffer, event, irq);
205}
206
207static void trace_bts_at(struct trace_array *tr, size_t index)
208{
209 const void *raw = NULL;
210 unsigned long from, to;
211 int err;
212
213 err = ds_access_bts(this_tracer, index, &raw);
214 if (err < 0)
215 return;
216
217 from = *(const unsigned long *)raw;
218 to = *(const unsigned long *)((const char *)raw + sizeof_field);
219
220 trace_bts(tr, from, to);
221}
222
223static void trace_bts_cpu(void *arg)
224{
225 struct trace_array *tr = (struct trace_array *) arg;
226 size_t index = 0, end = 0, i;
227 int err;
228
229 if (!this_tracer)
230 return;
231
232 bts_disable();
233
234 err = ds_get_bts_index(this_tracer, &index);
235 if (err < 0)
236 goto out;
237
238 err = ds_get_bts_end(this_tracer, &end);
239 if (err < 0)
240 goto out;
241
242 for (i = index; i < end; i++)
243 trace_bts_at(tr, i);
244
245 for (i = 0; i < index; i++)
246 trace_bts_at(tr, i);
247
248out:
249 bts_enable();
250}
251
252static void trace_bts_prepare(struct trace_iterator *iter)
253{
254 int cpu;
255
256 for_each_cpu_mask(cpu, cpu_possible_map)
257 smp_call_function_single(cpu, trace_bts_cpu, iter->tr, 1);
258}
259
260struct tracer bts_tracer __read_mostly =
261{
262 .name = "bts",
263 .init = bts_trace_init,
264 .reset = bts_trace_stop,
265 .print_header = bts_trace_print_header,
266 .print_line = bts_trace_print_line,
267 .start = bts_trace_start,
268 .stop = bts_trace_stop,
269 .open = trace_bts_prepare
270};
271
272__init static int init_bts_trace(void)
273{
274 return register_tracer(&bts_tracer);
275}
276device_initcall(init_bts_trace);
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 433d650eda9f..2a98a206acc2 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -18,12 +18,14 @@ struct header_iter {
18 18
19static struct trace_array *mmio_trace_array; 19static struct trace_array *mmio_trace_array;
20static bool overrun_detected; 20static bool overrun_detected;
21static unsigned long prev_overruns;
21 22
22static void mmio_reset_data(struct trace_array *tr) 23static void mmio_reset_data(struct trace_array *tr)
23{ 24{
24 int cpu; 25 int cpu;
25 26
26 overrun_detected = false; 27 overrun_detected = false;
28 prev_overruns = 0;
27 tr->time_start = ftrace_now(tr->cpu); 29 tr->time_start = ftrace_now(tr->cpu);
28 30
29 for_each_online_cpu(cpu) 31 for_each_online_cpu(cpu)
@@ -123,16 +125,12 @@ static void mmio_close(struct trace_iterator *iter)
123 125
124static unsigned long count_overruns(struct trace_iterator *iter) 126static unsigned long count_overruns(struct trace_iterator *iter)
125{ 127{
126 int cpu;
127 unsigned long cnt = 0; 128 unsigned long cnt = 0;
128/* FIXME: */ 129 unsigned long over = ring_buffer_overruns(iter->tr->buffer);
129#if 0 130
130 for_each_online_cpu(cpu) { 131 if (over > prev_overruns)
131 cnt += iter->overrun[cpu]; 132 cnt = over - prev_overruns;
132 iter->overrun[cpu] = 0; 133 prev_overruns = over;
133 }
134#endif
135 (void)cpu;
136 return cnt; 134 return cnt;
137} 135}
138 136
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index eeac71c87c66..0197e2f6b544 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -130,11 +130,13 @@ my %weak; # List of weak functions
130my %convert; # List of local functions used that needs conversion 130my %convert; # List of local functions used that needs conversion
131 131
132my $type; 132my $type;
133my $nm_regex; # Find the local functions (return function)
133my $section_regex; # Find the start of a section 134my $section_regex; # Find the start of a section
134my $function_regex; # Find the name of a function 135my $function_regex; # Find the name of a function
135 # (return offset and func name) 136 # (return offset and func name)
136my $mcount_regex; # Find the call site to mcount (return offset) 137my $mcount_regex; # Find the call site to mcount (return offset)
137my $alignment; # The .align value to use for $mcount_section 138my $alignment; # The .align value to use for $mcount_section
139my $section_type; # Section header plus possible alignment command
138 140
139if ($arch eq "x86") { 141if ($arch eq "x86") {
140 if ($bits == 64) { 142 if ($bits == 64) {
@@ -144,9 +146,18 @@ if ($arch eq "x86") {
144 } 146 }
145} 147}
146 148
149#
150# We base the defaults off of i386, the other archs may
151# feel free to change them in the below if statements.
152#
153$nm_regex = "^[0-9a-fA-F]+\\s+t\\s+(\\S+)";
154$section_regex = "Disassembly of section\\s+(\\S+):";
155$function_regex = "^([0-9a-fA-F]+)\\s+<(.*?)>:";
156$mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount\$";
157$section_type = '@progbits';
158$type = ".long";
159
147if ($arch eq "x86_64") { 160if ($arch eq "x86_64") {
148 $section_regex = "Disassembly of section\\s+(\\S+):";
149 $function_regex = "^([0-9a-fA-F]+)\\s+<(.*?)>:";
150 $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount([+-]0x[0-9a-zA-Z]+)?\$"; 161 $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount([+-]0x[0-9a-zA-Z]+)?\$";
151 $type = ".quad"; 162 $type = ".quad";
152 $alignment = 8; 163 $alignment = 8;
@@ -158,10 +169,6 @@ if ($arch eq "x86_64") {
158 $cc .= " -m64"; 169 $cc .= " -m64";
159 170
160} elsif ($arch eq "i386") { 171} elsif ($arch eq "i386") {
161 $section_regex = "Disassembly of section\\s+(\\S+):";
162 $function_regex = "^([0-9a-fA-F]+)\\s+<(.*?)>:";
163 $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount\$";
164 $type = ".long";
165 $alignment = 4; 172 $alignment = 4;
166 173
167 # force flags for this arch 174 # force flags for this arch
@@ -170,6 +177,27 @@ if ($arch eq "x86_64") {
170 $objcopy .= " -O elf32-i386"; 177 $objcopy .= " -O elf32-i386";
171 $cc .= " -m32"; 178 $cc .= " -m32";
172 179
180} elsif ($arch eq "sh") {
181 $alignment = 2;
182
183 # force flags for this arch
184 $ld .= " -m shlelf_linux";
185 $objcopy .= " -O elf32-sh-linux";
186 $cc .= " -m32";
187
188} elsif ($arch eq "powerpc") {
189 $nm_regex = "^[0-9a-fA-F]+\\s+t\\s+(\\.?\\S+)";
190 $function_regex = "^([0-9a-fA-F]+)\\s+<(\\.?.*?)>:";
191 $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s\\.?_mcount\$";
192
193 if ($bits == 64) {
194 $type = ".quad";
195 }
196
197} elsif ($arch eq "arm") {
198 $alignment = 2;
199 $section_type = '%progbits';
200
173} else { 201} else {
174 die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD"; 202 die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD";
175} 203}
@@ -239,7 +267,7 @@ if (!$found_version) {
239# 267#
240open (IN, "$nm $inputfile|") || die "error running $nm"; 268open (IN, "$nm $inputfile|") || die "error running $nm";
241while (<IN>) { 269while (<IN>) {
242 if (/^[0-9a-fA-F]+\s+t\s+(\S+)/) { 270 if (/$nm_regex/) {
243 $locals{$1} = 1; 271 $locals{$1} = 1;
244 } elsif (/^[0-9a-fA-F]+\s+([wW])\s+(\S+)/) { 272 } elsif (/^[0-9a-fA-F]+\s+([wW])\s+(\S+)/) {
245 $weak{$2} = $1; 273 $weak{$2} = $1;
@@ -290,8 +318,8 @@ sub update_funcs
290 if (!$opened) { 318 if (!$opened) {
291 open(FILE, ">$mcount_s") || die "can't create $mcount_s\n"; 319 open(FILE, ">$mcount_s") || die "can't create $mcount_s\n";
292 $opened = 1; 320 $opened = 1;
293 print FILE "\t.section $mcount_section,\"a\",\@progbits\n"; 321 print FILE "\t.section $mcount_section,\"a\",$section_type\n";
294 print FILE "\t.align $alignment\n"; 322 print FILE "\t.align $alignment\n" if (defined($alignment));
295 } 323 }
296 printf FILE "\t%s %s + %d\n", $type, $ref_func, $offsets[$i] - $offset; 324 printf FILE "\t%s %s + %d\n", $type, $ref_func, $offsets[$i] - $offset;
297 } 325 }