diff options
Diffstat (limited to 'arch/x86/kernel/ftrace.c')
-rw-r--r-- | arch/x86/kernel/ftrace.c | 265 |
1 files changed, 129 insertions, 136 deletions
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 231bdd3c5b1c..61df77532120 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/init.h> | 18 | #include <linux/init.h> |
19 | #include <linux/list.h> | 19 | #include <linux/list.h> |
20 | 20 | ||
21 | #include <asm/cacheflush.h> | ||
21 | #include <asm/ftrace.h> | 22 | #include <asm/ftrace.h> |
22 | #include <linux/ftrace.h> | 23 | #include <linux/ftrace.h> |
23 | #include <asm/nops.h> | 24 | #include <asm/nops.h> |
@@ -26,6 +27,18 @@ | |||
26 | 27 | ||
27 | #ifdef CONFIG_DYNAMIC_FTRACE | 28 | #ifdef CONFIG_DYNAMIC_FTRACE |
28 | 29 | ||
30 | int ftrace_arch_code_modify_prepare(void) | ||
31 | { | ||
32 | set_kernel_text_rw(); | ||
33 | return 0; | ||
34 | } | ||
35 | |||
36 | int ftrace_arch_code_modify_post_process(void) | ||
37 | { | ||
38 | set_kernel_text_ro(); | ||
39 | return 0; | ||
40 | } | ||
41 | |||
29 | union ftrace_code_union { | 42 | union ftrace_code_union { |
30 | char code[MCOUNT_INSN_SIZE]; | 43 | char code[MCOUNT_INSN_SIZE]; |
31 | struct { | 44 | struct { |
@@ -66,11 +79,11 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) | |||
66 | * | 79 | * |
67 | * 1) Put the instruction pointer into the IP buffer | 80 | * 1) Put the instruction pointer into the IP buffer |
68 | * and the new code into the "code" buffer. | 81 | * and the new code into the "code" buffer. |
69 | * 2) Set a flag that says we are modifying code | 82 | * 2) Wait for any running NMIs to finish and set a flag that says |
70 | * 3) Wait for any running NMIs to finish. | 83 | * we are modifying code, it is done in an atomic operation. |
71 | * 4) Write the code | 84 | * 3) Write the code |
72 | * 5) clear the flag. | 85 | * 4) clear the flag. |
73 | * 6) Wait for any running NMIs to finish. | 86 | * 5) Wait for any running NMIs to finish. |
74 | * | 87 | * |
75 | * If an NMI is executed, the first thing it does is to call | 88 | * If an NMI is executed, the first thing it does is to call |
76 | * "ftrace_nmi_enter". This will check if the flag is set to write | 89 | * "ftrace_nmi_enter". This will check if the flag is set to write |
@@ -82,9 +95,9 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) | |||
82 | * are the same as what exists. | 95 | * are the same as what exists. |
83 | */ | 96 | */ |
84 | 97 | ||
85 | static atomic_t in_nmi = ATOMIC_INIT(0); | 98 | #define MOD_CODE_WRITE_FLAG (1 << 31) /* set when NMI should do the write */ |
99 | static atomic_t nmi_running = ATOMIC_INIT(0); | ||
86 | static int mod_code_status; /* holds return value of text write */ | 100 | static int mod_code_status; /* holds return value of text write */ |
87 | static int mod_code_write; /* set when NMI should do the write */ | ||
88 | static void *mod_code_ip; /* holds the IP to write to */ | 101 | static void *mod_code_ip; /* holds the IP to write to */ |
89 | static void *mod_code_newcode; /* holds the text to write to the IP */ | 102 | static void *mod_code_newcode; /* holds the text to write to the IP */ |
90 | 103 | ||
@@ -101,6 +114,20 @@ int ftrace_arch_read_dyn_info(char *buf, int size) | |||
101 | return r; | 114 | return r; |
102 | } | 115 | } |
103 | 116 | ||
117 | static void clear_mod_flag(void) | ||
118 | { | ||
119 | int old = atomic_read(&nmi_running); | ||
120 | |||
121 | for (;;) { | ||
122 | int new = old & ~MOD_CODE_WRITE_FLAG; | ||
123 | |||
124 | if (old == new) | ||
125 | break; | ||
126 | |||
127 | old = atomic_cmpxchg(&nmi_running, old, new); | ||
128 | } | ||
129 | } | ||
130 | |||
104 | static void ftrace_mod_code(void) | 131 | static void ftrace_mod_code(void) |
105 | { | 132 | { |
106 | /* | 133 | /* |
@@ -111,37 +138,52 @@ static void ftrace_mod_code(void) | |||
111 | */ | 138 | */ |
112 | mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode, | 139 | mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode, |
113 | MCOUNT_INSN_SIZE); | 140 | MCOUNT_INSN_SIZE); |
141 | |||
142 | /* if we fail, then kill any new writers */ | ||
143 | if (mod_code_status) | ||
144 | clear_mod_flag(); | ||
114 | } | 145 | } |
115 | 146 | ||
116 | void ftrace_nmi_enter(void) | 147 | void ftrace_nmi_enter(void) |
117 | { | 148 | { |
118 | atomic_inc(&in_nmi); | 149 | if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) { |
119 | /* Must have in_nmi seen before reading write flag */ | 150 | smp_rmb(); |
120 | smp_mb(); | ||
121 | if (mod_code_write) { | ||
122 | ftrace_mod_code(); | 151 | ftrace_mod_code(); |
123 | atomic_inc(&nmi_update_count); | 152 | atomic_inc(&nmi_update_count); |
124 | } | 153 | } |
154 | /* Must have previous changes seen before executions */ | ||
155 | smp_mb(); | ||
125 | } | 156 | } |
126 | 157 | ||
127 | void ftrace_nmi_exit(void) | 158 | void ftrace_nmi_exit(void) |
128 | { | 159 | { |
129 | /* Finish all executions before clearing in_nmi */ | 160 | /* Finish all executions before clearing nmi_running */ |
130 | smp_wmb(); | 161 | smp_mb(); |
131 | atomic_dec(&in_nmi); | 162 | atomic_dec(&nmi_running); |
163 | } | ||
164 | |||
165 | static void wait_for_nmi_and_set_mod_flag(void) | ||
166 | { | ||
167 | if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG)) | ||
168 | return; | ||
169 | |||
170 | do { | ||
171 | cpu_relax(); | ||
172 | } while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG)); | ||
173 | |||
174 | nmi_wait_count++; | ||
132 | } | 175 | } |
133 | 176 | ||
134 | static void wait_for_nmi(void) | 177 | static void wait_for_nmi(void) |
135 | { | 178 | { |
136 | int waited = 0; | 179 | if (!atomic_read(&nmi_running)) |
180 | return; | ||
137 | 181 | ||
138 | while (atomic_read(&in_nmi)) { | 182 | do { |
139 | waited = 1; | ||
140 | cpu_relax(); | 183 | cpu_relax(); |
141 | } | 184 | } while (atomic_read(&nmi_running)); |
142 | 185 | ||
143 | if (waited) | 186 | nmi_wait_count++; |
144 | nmi_wait_count++; | ||
145 | } | 187 | } |
146 | 188 | ||
147 | static int | 189 | static int |
@@ -151,14 +193,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code) | |||
151 | mod_code_newcode = new_code; | 193 | mod_code_newcode = new_code; |
152 | 194 | ||
153 | /* The buffers need to be visible before we let NMIs write them */ | 195 | /* The buffers need to be visible before we let NMIs write them */ |
154 | smp_wmb(); | ||
155 | |||
156 | mod_code_write = 1; | ||
157 | |||
158 | /* Make sure write bit is visible before we wait on NMIs */ | ||
159 | smp_mb(); | 196 | smp_mb(); |
160 | 197 | ||
161 | wait_for_nmi(); | 198 | wait_for_nmi_and_set_mod_flag(); |
162 | 199 | ||
163 | /* Make sure all running NMIs have finished before we write the code */ | 200 | /* Make sure all running NMIs have finished before we write the code */ |
164 | smp_mb(); | 201 | smp_mb(); |
@@ -166,13 +203,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code) | |||
166 | ftrace_mod_code(); | 203 | ftrace_mod_code(); |
167 | 204 | ||
168 | /* Make sure the write happens before clearing the bit */ | 205 | /* Make sure the write happens before clearing the bit */ |
169 | smp_wmb(); | ||
170 | |||
171 | mod_code_write = 0; | ||
172 | |||
173 | /* make sure NMIs see the cleared bit */ | ||
174 | smp_mb(); | 206 | smp_mb(); |
175 | 207 | ||
208 | clear_mod_flag(); | ||
176 | wait_for_nmi(); | 209 | wait_for_nmi(); |
177 | 210 | ||
178 | return mod_code_status; | 211 | return mod_code_status; |
@@ -368,100 +401,8 @@ int ftrace_disable_ftrace_graph_caller(void) | |||
368 | return ftrace_mod_jmp(ip, old_offset, new_offset); | 401 | return ftrace_mod_jmp(ip, old_offset, new_offset); |
369 | } | 402 | } |
370 | 403 | ||
371 | #else /* CONFIG_DYNAMIC_FTRACE */ | ||
372 | |||
373 | /* | ||
374 | * These functions are picked from those used on | ||
375 | * this page for dynamic ftrace. They have been | ||
376 | * simplified to ignore all traces in NMI context. | ||
377 | */ | ||
378 | static atomic_t in_nmi; | ||
379 | |||
380 | void ftrace_nmi_enter(void) | ||
381 | { | ||
382 | atomic_inc(&in_nmi); | ||
383 | } | ||
384 | |||
385 | void ftrace_nmi_exit(void) | ||
386 | { | ||
387 | atomic_dec(&in_nmi); | ||
388 | } | ||
389 | |||
390 | #endif /* !CONFIG_DYNAMIC_FTRACE */ | 404 | #endif /* !CONFIG_DYNAMIC_FTRACE */ |
391 | 405 | ||
392 | /* Add a function return address to the trace stack on thread info.*/ | ||
393 | static int push_return_trace(unsigned long ret, unsigned long long time, | ||
394 | unsigned long func, int *depth) | ||
395 | { | ||
396 | int index; | ||
397 | |||
398 | if (!current->ret_stack) | ||
399 | return -EBUSY; | ||
400 | |||
401 | /* The return trace stack is full */ | ||
402 | if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) { | ||
403 | atomic_inc(¤t->trace_overrun); | ||
404 | return -EBUSY; | ||
405 | } | ||
406 | |||
407 | index = ++current->curr_ret_stack; | ||
408 | barrier(); | ||
409 | current->ret_stack[index].ret = ret; | ||
410 | current->ret_stack[index].func = func; | ||
411 | current->ret_stack[index].calltime = time; | ||
412 | *depth = index; | ||
413 | |||
414 | return 0; | ||
415 | } | ||
416 | |||
417 | /* Retrieve a function return address to the trace stack on thread info.*/ | ||
418 | static void pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) | ||
419 | { | ||
420 | int index; | ||
421 | |||
422 | index = current->curr_ret_stack; | ||
423 | |||
424 | if (unlikely(index < 0)) { | ||
425 | ftrace_graph_stop(); | ||
426 | WARN_ON(1); | ||
427 | /* Might as well panic, otherwise we have no where to go */ | ||
428 | *ret = (unsigned long)panic; | ||
429 | return; | ||
430 | } | ||
431 | |||
432 | *ret = current->ret_stack[index].ret; | ||
433 | trace->func = current->ret_stack[index].func; | ||
434 | trace->calltime = current->ret_stack[index].calltime; | ||
435 | trace->overrun = atomic_read(¤t->trace_overrun); | ||
436 | trace->depth = index; | ||
437 | barrier(); | ||
438 | current->curr_ret_stack--; | ||
439 | |||
440 | } | ||
441 | |||
442 | /* | ||
443 | * Send the trace to the ring-buffer. | ||
444 | * @return the original return address. | ||
445 | */ | ||
446 | unsigned long ftrace_return_to_handler(void) | ||
447 | { | ||
448 | struct ftrace_graph_ret trace; | ||
449 | unsigned long ret; | ||
450 | |||
451 | pop_return_trace(&trace, &ret); | ||
452 | trace.rettime = cpu_clock(raw_smp_processor_id()); | ||
453 | ftrace_graph_return(&trace); | ||
454 | |||
455 | if (unlikely(!ret)) { | ||
456 | ftrace_graph_stop(); | ||
457 | WARN_ON(1); | ||
458 | /* Might as well panic. What else to do? */ | ||
459 | ret = (unsigned long)panic; | ||
460 | } | ||
461 | |||
462 | return ret; | ||
463 | } | ||
464 | |||
465 | /* | 406 | /* |
466 | * Hook the return address and push it in the stack of return addrs | 407 | * Hook the return address and push it in the stack of return addrs |
467 | * in current thread info. | 408 | * in current thread info. |
@@ -469,14 +410,13 @@ unsigned long ftrace_return_to_handler(void) | |||
469 | void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) | 410 | void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) |
470 | { | 411 | { |
471 | unsigned long old; | 412 | unsigned long old; |
472 | unsigned long long calltime; | ||
473 | int faulted; | 413 | int faulted; |
474 | struct ftrace_graph_ent trace; | 414 | struct ftrace_graph_ent trace; |
475 | unsigned long return_hooker = (unsigned long) | 415 | unsigned long return_hooker = (unsigned long) |
476 | &return_to_handler; | 416 | &return_to_handler; |
477 | 417 | ||
478 | /* Nmi's are currently unsupported */ | 418 | /* Nmi's are currently unsupported */ |
479 | if (unlikely(atomic_read(&in_nmi))) | 419 | if (unlikely(in_nmi())) |
480 | return; | 420 | return; |
481 | 421 | ||
482 | if (unlikely(atomic_read(¤t->tracing_graph_pause))) | 422 | if (unlikely(atomic_read(¤t->tracing_graph_pause))) |
@@ -512,17 +452,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) | |||
512 | return; | 452 | return; |
513 | } | 453 | } |
514 | 454 | ||
515 | if (unlikely(!__kernel_text_address(old))) { | 455 | if (ftrace_push_return_trace(old, self_addr, &trace.depth) == -EBUSY) { |
516 | ftrace_graph_stop(); | ||
517 | *parent = old; | ||
518 | WARN_ON(1); | ||
519 | return; | ||
520 | } | ||
521 | |||
522 | calltime = cpu_clock(raw_smp_processor_id()); | ||
523 | |||
524 | if (push_return_trace(old, calltime, | ||
525 | self_addr, &trace.depth) == -EBUSY) { | ||
526 | *parent = old; | 456 | *parent = old; |
527 | return; | 457 | return; |
528 | } | 458 | } |
@@ -536,3 +466,66 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) | |||
536 | } | 466 | } |
537 | } | 467 | } |
538 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ | 468 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ |
469 | |||
470 | #ifdef CONFIG_FTRACE_SYSCALLS | ||
471 | |||
472 | extern unsigned long __start_syscalls_metadata[]; | ||
473 | extern unsigned long __stop_syscalls_metadata[]; | ||
474 | extern unsigned long *sys_call_table; | ||
475 | |||
476 | static struct syscall_metadata **syscalls_metadata; | ||
477 | |||
478 | static struct syscall_metadata *find_syscall_meta(unsigned long *syscall) | ||
479 | { | ||
480 | struct syscall_metadata *start; | ||
481 | struct syscall_metadata *stop; | ||
482 | char str[KSYM_SYMBOL_LEN]; | ||
483 | |||
484 | |||
485 | start = (struct syscall_metadata *)__start_syscalls_metadata; | ||
486 | stop = (struct syscall_metadata *)__stop_syscalls_metadata; | ||
487 | kallsyms_lookup((unsigned long) syscall, NULL, NULL, NULL, str); | ||
488 | |||
489 | for ( ; start < stop; start++) { | ||
490 | if (start->name && !strcmp(start->name, str)) | ||
491 | return start; | ||
492 | } | ||
493 | return NULL; | ||
494 | } | ||
495 | |||
496 | struct syscall_metadata *syscall_nr_to_meta(int nr) | ||
497 | { | ||
498 | if (!syscalls_metadata || nr >= FTRACE_SYSCALL_MAX || nr < 0) | ||
499 | return NULL; | ||
500 | |||
501 | return syscalls_metadata[nr]; | ||
502 | } | ||
503 | |||
504 | void arch_init_ftrace_syscalls(void) | ||
505 | { | ||
506 | int i; | ||
507 | struct syscall_metadata *meta; | ||
508 | unsigned long **psys_syscall_table = &sys_call_table; | ||
509 | static atomic_t refs; | ||
510 | |||
511 | if (atomic_inc_return(&refs) != 1) | ||
512 | goto end; | ||
513 | |||
514 | syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * | ||
515 | FTRACE_SYSCALL_MAX, GFP_KERNEL); | ||
516 | if (!syscalls_metadata) { | ||
517 | WARN_ON(1); | ||
518 | return; | ||
519 | } | ||
520 | |||
521 | for (i = 0; i < FTRACE_SYSCALL_MAX; i++) { | ||
522 | meta = find_syscall_meta(psys_syscall_table[i]); | ||
523 | syscalls_metadata[i] = meta; | ||
524 | } | ||
525 | return; | ||
526 | |||
527 | /* Paranoid: avoid overflow */ | ||
528 | end: | ||
529 | atomic_dec(&refs); | ||
530 | } | ||
531 | #endif | ||