aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/ftrace.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/ftrace.c')
-rw-r--r--arch/x86/kernel/ftrace.c265
1 files changed, 129 insertions, 136 deletions
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 231bdd3c5b1c..61df77532120 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -18,6 +18,7 @@
18#include <linux/init.h> 18#include <linux/init.h>
19#include <linux/list.h> 19#include <linux/list.h>
20 20
21#include <asm/cacheflush.h>
21#include <asm/ftrace.h> 22#include <asm/ftrace.h>
22#include <linux/ftrace.h> 23#include <linux/ftrace.h>
23#include <asm/nops.h> 24#include <asm/nops.h>
@@ -26,6 +27,18 @@
26 27
27#ifdef CONFIG_DYNAMIC_FTRACE 28#ifdef CONFIG_DYNAMIC_FTRACE
28 29
30int ftrace_arch_code_modify_prepare(void)
31{
32 set_kernel_text_rw();
33 return 0;
34}
35
36int ftrace_arch_code_modify_post_process(void)
37{
38 set_kernel_text_ro();
39 return 0;
40}
41
29union ftrace_code_union { 42union ftrace_code_union {
30 char code[MCOUNT_INSN_SIZE]; 43 char code[MCOUNT_INSN_SIZE];
31 struct { 44 struct {
@@ -66,11 +79,11 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
66 * 79 *
67 * 1) Put the instruction pointer into the IP buffer 80 * 1) Put the instruction pointer into the IP buffer
68 * and the new code into the "code" buffer. 81 * and the new code into the "code" buffer.
69 * 2) Set a flag that says we are modifying code 82 * 2) Wait for any running NMIs to finish and set a flag that says
70 * 3) Wait for any running NMIs to finish. 83 * we are modifying code, it is done in an atomic operation.
71 * 4) Write the code 84 * 3) Write the code
72 * 5) clear the flag. 85 * 4) clear the flag.
73 * 6) Wait for any running NMIs to finish. 86 * 5) Wait for any running NMIs to finish.
74 * 87 *
75 * If an NMI is executed, the first thing it does is to call 88 * If an NMI is executed, the first thing it does is to call
76 * "ftrace_nmi_enter". This will check if the flag is set to write 89 * "ftrace_nmi_enter". This will check if the flag is set to write
@@ -82,9 +95,9 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
82 * are the same as what exists. 95 * are the same as what exists.
83 */ 96 */
84 97
85static atomic_t in_nmi = ATOMIC_INIT(0); 98#define MOD_CODE_WRITE_FLAG (1 << 31) /* set when NMI should do the write */
99static atomic_t nmi_running = ATOMIC_INIT(0);
86static int mod_code_status; /* holds return value of text write */ 100static int mod_code_status; /* holds return value of text write */
87static int mod_code_write; /* set when NMI should do the write */
88static void *mod_code_ip; /* holds the IP to write to */ 101static void *mod_code_ip; /* holds the IP to write to */
89static void *mod_code_newcode; /* holds the text to write to the IP */ 102static void *mod_code_newcode; /* holds the text to write to the IP */
90 103
@@ -101,6 +114,20 @@ int ftrace_arch_read_dyn_info(char *buf, int size)
101 return r; 114 return r;
102} 115}
103 116
117static void clear_mod_flag(void)
118{
119 int old = atomic_read(&nmi_running);
120
121 for (;;) {
122 int new = old & ~MOD_CODE_WRITE_FLAG;
123
124 if (old == new)
125 break;
126
127 old = atomic_cmpxchg(&nmi_running, old, new);
128 }
129}
130
104static void ftrace_mod_code(void) 131static void ftrace_mod_code(void)
105{ 132{
106 /* 133 /*
@@ -111,37 +138,52 @@ static void ftrace_mod_code(void)
111 */ 138 */
112 mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode, 139 mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
113 MCOUNT_INSN_SIZE); 140 MCOUNT_INSN_SIZE);
141
142 /* if we fail, then kill any new writers */
143 if (mod_code_status)
144 clear_mod_flag();
114} 145}
115 146
116void ftrace_nmi_enter(void) 147void ftrace_nmi_enter(void)
117{ 148{
118 atomic_inc(&in_nmi); 149 if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {
119 /* Must have in_nmi seen before reading write flag */ 150 smp_rmb();
120 smp_mb();
121 if (mod_code_write) {
122 ftrace_mod_code(); 151 ftrace_mod_code();
123 atomic_inc(&nmi_update_count); 152 atomic_inc(&nmi_update_count);
124 } 153 }
154 /* Must have previous changes seen before executions */
155 smp_mb();
125} 156}
126 157
127void ftrace_nmi_exit(void) 158void ftrace_nmi_exit(void)
128{ 159{
129 /* Finish all executions before clearing in_nmi */ 160 /* Finish all executions before clearing nmi_running */
130 smp_wmb(); 161 smp_mb();
131 atomic_dec(&in_nmi); 162 atomic_dec(&nmi_running);
163}
164
165static void wait_for_nmi_and_set_mod_flag(void)
166{
167 if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG))
168 return;
169
170 do {
171 cpu_relax();
172 } while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG));
173
174 nmi_wait_count++;
132} 175}
133 176
134static void wait_for_nmi(void) 177static void wait_for_nmi(void)
135{ 178{
136 int waited = 0; 179 if (!atomic_read(&nmi_running))
180 return;
137 181
138 while (atomic_read(&in_nmi)) { 182 do {
139 waited = 1;
140 cpu_relax(); 183 cpu_relax();
141 } 184 } while (atomic_read(&nmi_running));
142 185
143 if (waited) 186 nmi_wait_count++;
144 nmi_wait_count++;
145} 187}
146 188
147static int 189static int
@@ -151,14 +193,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
151 mod_code_newcode = new_code; 193 mod_code_newcode = new_code;
152 194
153 /* The buffers need to be visible before we let NMIs write them */ 195 /* The buffers need to be visible before we let NMIs write them */
154 smp_wmb();
155
156 mod_code_write = 1;
157
158 /* Make sure write bit is visible before we wait on NMIs */
159 smp_mb(); 196 smp_mb();
160 197
161 wait_for_nmi(); 198 wait_for_nmi_and_set_mod_flag();
162 199
163 /* Make sure all running NMIs have finished before we write the code */ 200 /* Make sure all running NMIs have finished before we write the code */
164 smp_mb(); 201 smp_mb();
@@ -166,13 +203,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
166 ftrace_mod_code(); 203 ftrace_mod_code();
167 204
168 /* Make sure the write happens before clearing the bit */ 205 /* Make sure the write happens before clearing the bit */
169 smp_wmb();
170
171 mod_code_write = 0;
172
173 /* make sure NMIs see the cleared bit */
174 smp_mb(); 206 smp_mb();
175 207
208 clear_mod_flag();
176 wait_for_nmi(); 209 wait_for_nmi();
177 210
178 return mod_code_status; 211 return mod_code_status;
@@ -368,100 +401,8 @@ int ftrace_disable_ftrace_graph_caller(void)
368 return ftrace_mod_jmp(ip, old_offset, new_offset); 401 return ftrace_mod_jmp(ip, old_offset, new_offset);
369} 402}
370 403
371#else /* CONFIG_DYNAMIC_FTRACE */
372
373/*
374 * These functions are picked from those used on
375 * this page for dynamic ftrace. They have been
376 * simplified to ignore all traces in NMI context.
377 */
378static atomic_t in_nmi;
379
380void ftrace_nmi_enter(void)
381{
382 atomic_inc(&in_nmi);
383}
384
385void ftrace_nmi_exit(void)
386{
387 atomic_dec(&in_nmi);
388}
389
390#endif /* !CONFIG_DYNAMIC_FTRACE */ 404#endif /* !CONFIG_DYNAMIC_FTRACE */
391 405
392/* Add a function return address to the trace stack on thread info.*/
393static int push_return_trace(unsigned long ret, unsigned long long time,
394 unsigned long func, int *depth)
395{
396 int index;
397
398 if (!current->ret_stack)
399 return -EBUSY;
400
401 /* The return trace stack is full */
402 if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
403 atomic_inc(&current->trace_overrun);
404 return -EBUSY;
405 }
406
407 index = ++current->curr_ret_stack;
408 barrier();
409 current->ret_stack[index].ret = ret;
410 current->ret_stack[index].func = func;
411 current->ret_stack[index].calltime = time;
412 *depth = index;
413
414 return 0;
415}
416
417/* Retrieve a function return address to the trace stack on thread info.*/
418static void pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
419{
420 int index;
421
422 index = current->curr_ret_stack;
423
424 if (unlikely(index < 0)) {
425 ftrace_graph_stop();
426 WARN_ON(1);
427 /* Might as well panic, otherwise we have no where to go */
428 *ret = (unsigned long)panic;
429 return;
430 }
431
432 *ret = current->ret_stack[index].ret;
433 trace->func = current->ret_stack[index].func;
434 trace->calltime = current->ret_stack[index].calltime;
435 trace->overrun = atomic_read(&current->trace_overrun);
436 trace->depth = index;
437 barrier();
438 current->curr_ret_stack--;
439
440}
441
442/*
443 * Send the trace to the ring-buffer.
444 * @return the original return address.
445 */
446unsigned long ftrace_return_to_handler(void)
447{
448 struct ftrace_graph_ret trace;
449 unsigned long ret;
450
451 pop_return_trace(&trace, &ret);
452 trace.rettime = cpu_clock(raw_smp_processor_id());
453 ftrace_graph_return(&trace);
454
455 if (unlikely(!ret)) {
456 ftrace_graph_stop();
457 WARN_ON(1);
458 /* Might as well panic. What else to do? */
459 ret = (unsigned long)panic;
460 }
461
462 return ret;
463}
464
465/* 406/*
466 * Hook the return address and push it in the stack of return addrs 407 * Hook the return address and push it in the stack of return addrs
467 * in current thread info. 408 * in current thread info.
@@ -469,14 +410,13 @@ unsigned long ftrace_return_to_handler(void)
469void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) 410void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
470{ 411{
471 unsigned long old; 412 unsigned long old;
472 unsigned long long calltime;
473 int faulted; 413 int faulted;
474 struct ftrace_graph_ent trace; 414 struct ftrace_graph_ent trace;
475 unsigned long return_hooker = (unsigned long) 415 unsigned long return_hooker = (unsigned long)
476 &return_to_handler; 416 &return_to_handler;
477 417
478 /* Nmi's are currently unsupported */ 418 /* Nmi's are currently unsupported */
479 if (unlikely(atomic_read(&in_nmi))) 419 if (unlikely(in_nmi()))
480 return; 420 return;
481 421
482 if (unlikely(atomic_read(&current->tracing_graph_pause))) 422 if (unlikely(atomic_read(&current->tracing_graph_pause)))
@@ -512,17 +452,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
512 return; 452 return;
513 } 453 }
514 454
515 if (unlikely(!__kernel_text_address(old))) { 455 if (ftrace_push_return_trace(old, self_addr, &trace.depth) == -EBUSY) {
516 ftrace_graph_stop();
517 *parent = old;
518 WARN_ON(1);
519 return;
520 }
521
522 calltime = cpu_clock(raw_smp_processor_id());
523
524 if (push_return_trace(old, calltime,
525 self_addr, &trace.depth) == -EBUSY) {
526 *parent = old; 456 *parent = old;
527 return; 457 return;
528 } 458 }
@@ -536,3 +466,66 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
536 } 466 }
537} 467}
538#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 468#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
469
470#ifdef CONFIG_FTRACE_SYSCALLS
471
472extern unsigned long __start_syscalls_metadata[];
473extern unsigned long __stop_syscalls_metadata[];
474extern unsigned long *sys_call_table;
475
476static struct syscall_metadata **syscalls_metadata;
477
478static struct syscall_metadata *find_syscall_meta(unsigned long *syscall)
479{
480 struct syscall_metadata *start;
481 struct syscall_metadata *stop;
482 char str[KSYM_SYMBOL_LEN];
483
484
485 start = (struct syscall_metadata *)__start_syscalls_metadata;
486 stop = (struct syscall_metadata *)__stop_syscalls_metadata;
487 kallsyms_lookup((unsigned long) syscall, NULL, NULL, NULL, str);
488
489 for ( ; start < stop; start++) {
490 if (start->name && !strcmp(start->name, str))
491 return start;
492 }
493 return NULL;
494}
495
496struct syscall_metadata *syscall_nr_to_meta(int nr)
497{
498 if (!syscalls_metadata || nr >= FTRACE_SYSCALL_MAX || nr < 0)
499 return NULL;
500
501 return syscalls_metadata[nr];
502}
503
504void arch_init_ftrace_syscalls(void)
505{
506 int i;
507 struct syscall_metadata *meta;
508 unsigned long **psys_syscall_table = &sys_call_table;
509 static atomic_t refs;
510
511 if (atomic_inc_return(&refs) != 1)
512 goto end;
513
514 syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
515 FTRACE_SYSCALL_MAX, GFP_KERNEL);
516 if (!syscalls_metadata) {
517 WARN_ON(1);
518 return;
519 }
520
521 for (i = 0; i < FTRACE_SYSCALL_MAX; i++) {
522 meta = find_syscall_meta(psys_syscall_table[i]);
523 syscalls_metadata[i] = meta;
524 }
525 return;
526
527 /* Paranoid: avoid overflow */
528end:
529 atomic_dec(&refs);
530}
531#endif