aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/ftrace.c
diff options
context:
space:
mode:
authorSteven Rostedt <rostedt@goodmis.org>2008-10-30 16:08:32 -0400
committerIngo Molnar <mingo@elte.hu>2008-10-30 16:30:08 -0400
commit17666f02b118099028522dfc3df00a235700e216 (patch)
tree6a7d280b9a6440cc7b8aab54d1e3e970d4ca00b2 /arch/x86/kernel/ftrace.c
parent7f82f000ed030d1108b4de47d9e2d556092980c6 (diff)
ftrace: nmi safe code modification
Impact: fix crashes that can occur in NMI handlers, if their code is modified Modifying code is something that needs special care. On SMP boxes, if code that is being modified is also being executed on another CPU, that CPU will have undefined results. The dynamic ftrace uses kstop_machine to make the system act like a uniprocessor system. But this does not address NMIs, that can still run on other CPUs. One approach to handle this is to make all code that are used by NMIs not be traced. But NMIs can call notifiers that spread throughout the kernel and this will be very hard to maintain, and the chance of missing a function is very high. The approach that this patch takes is to have the NMIs modify the code if the modification is taking place. The way this works is that just writing to code executing on another CPU is not harmful if what is written is the same as what exists. Two buffers are used: an IP buffer and a "code" buffer. The steps that the patcher takes are: 1) Put in the instruction pointer into the IP buffer and the new code into the "code" buffer. 2) Set a flag that says we are modifying code 3) Wait for any running NMIs to finish. 4) Write the code 5) clear the flag. 6) Wait for any running NMIs to finish. If an NMI is executed, it will also write the pending code. Multiple writes are OK, because what is being written is the same. Then the patcher must wait for all running NMIs to finish before going to the next line that must be patched. This is basically the RCU approach to code modification. Thanks to Ingo Molnar for suggesting the idea, and to Arjan van de Ven for his guidence on what is safe and what is not. Signed-off-by: Steven Rostedt <srostedt@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel/ftrace.c')
-rw-r--r--arch/x86/kernel/ftrace.c107
1 files changed, 106 insertions, 1 deletions
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 50ea0ac8c9bf..fe5f859130b5 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -56,6 +56,111 @@ unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
56 return calc.code; 56 return calc.code;
57} 57}
58 58
59/*
60 * Modifying code must take extra care. On an SMP machine, if
61 * the code being modified is also being executed on another CPU
62 * that CPU will have undefined results and possibly take a GPF.
63 * We use kstop_machine to stop other CPUS from exectuing code.
64 * But this does not stop NMIs from happening. We still need
65 * to protect against that. We separate out the modification of
66 * the code to take care of this.
67 *
68 * Two buffers are added: An IP buffer and a "code" buffer.
69 *
70 * 1) Put in the instruction pointer into the IP buffer
71 * and the new code into the "code" buffer.
72 * 2) Set a flag that says we are modifying code
73 * 3) Wait for any running NMIs to finish.
74 * 4) Write the code
75 * 5) clear the flag.
76 * 6) Wait for any running NMIs to finish.
77 *
78 * If an NMI is executed, the first thing it does is to call
79 * "ftrace_nmi_enter". This will check if the flag is set to write
80 * and if it is, it will write what is in the IP and "code" buffers.
81 *
82 * The trick is, it does not matter if everyone is writing the same
83 * content to the code location. Also, if a CPU is executing code
84 * it is OK to write to that code location if the contents being written
85 * are the same as what exists.
86 */
87
88static atomic_t in_nmi;
89static int mod_code_status;
90static int mod_code_write;
91static void *mod_code_ip;
92static void *mod_code_newcode;
93
94static void ftrace_mod_code(void)
95{
96 /*
97 * Yes, more than one CPU process can be writing to mod_code_status.
98 * (and the code itself)
99 * But if one were to fail, then they all should, and if one were
100 * to succeed, then they all should.
101 */
102 mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
103 MCOUNT_INSN_SIZE);
104
105}
106
107void ftrace_nmi_enter(void)
108{
109 atomic_inc(&in_nmi);
110 /* Must have in_nmi seen before reading write flag */
111 smp_mb();
112 if (mod_code_write)
113 ftrace_mod_code();
114}
115
116void ftrace_nmi_exit(void)
117{
118 /* Finish all executions before clearing in_nmi */
119 smp_wmb();
120 atomic_dec(&in_nmi);
121}
122
123static void wait_for_nmi(void)
124{
125 while (atomic_read(&in_nmi))
126 cpu_relax();
127}
128
129static int
130do_ftrace_mod_code(unsigned long ip, void *new_code)
131{
132 mod_code_ip = (void *)ip;
133 mod_code_newcode = new_code;
134
135 /* The buffers need to be visible before we let NMIs write them */
136 smp_wmb();
137
138 mod_code_write = 1;
139
140 /* Make sure write bit is visible before we wait on NMIs */
141 smp_mb();
142
143 wait_for_nmi();
144
145 /* Make sure all running NMIs have finished before we write the code */
146 smp_mb();
147
148 ftrace_mod_code();
149
150 /* Make sure the write happens before clearing the bit */
151 smp_wmb();
152
153 mod_code_write = 0;
154
155 /* make sure NMIs see the cleared bit */
156 smp_mb();
157
158 wait_for_nmi();
159
160 return mod_code_status;
161}
162
163
59int 164int
60ftrace_modify_code(unsigned long ip, unsigned char *old_code, 165ftrace_modify_code(unsigned long ip, unsigned char *old_code,
61 unsigned char *new_code) 166 unsigned char *new_code)
@@ -81,7 +186,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
81 return -EINVAL; 186 return -EINVAL;
82 187
83 /* replace the text with the new text */ 188 /* replace the text with the new text */
84 if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE)) 189 if (do_ftrace_mod_code(ip, new_code))
85 return -EPERM; 190 return -EPERM;
86 191
87 sync_core(); 192 sync_core();