aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSteven Rostedt <rostedt@goodmis.org>2008-10-30 16:08:32 -0400
committerIngo Molnar <mingo@elte.hu>2008-10-30 16:30:08 -0400
commit17666f02b118099028522dfc3df00a235700e216 (patch)
tree6a7d280b9a6440cc7b8aab54d1e3e970d4ca00b2
parent7f82f000ed030d1108b4de47d9e2d556092980c6 (diff)
ftrace: nmi safe code modification
Impact: fix crashes that can occur in NMI handlers, if their code is modified Modifying code is something that needs special care. On SMP boxes, if code that is being modified is also being executed on another CPU, that CPU will have undefined results. The dynamic ftrace uses kstop_machine to make the system act like a uniprocessor system. But this does not address NMIs, that can still run on other CPUs. One approach to handle this is to make all code that are used by NMIs not be traced. But NMIs can call notifiers that spread throughout the kernel and this will be very hard to maintain, and the chance of missing a function is very high. The approach that this patch takes is to have the NMIs modify the code if the modification is taking place. The way this works is that just writing to code executing on another CPU is not harmful if what is written is the same as what exists. Two buffers are used: an IP buffer and a "code" buffer. The steps that the patcher takes are: 1) Put in the instruction pointer into the IP buffer and the new code into the "code" buffer. 2) Set a flag that says we are modifying code 3) Wait for any running NMIs to finish. 4) Write the code 5) clear the flag. 6) Wait for any running NMIs to finish. If an NMI is executed, it will also write the pending code. Multiple writes are OK, because what is being written is the same. Then the patcher must wait for all running NMIs to finish before going to the next line that must be patched. This is basically the RCU approach to code modification. Thanks to Ingo Molnar for suggesting the idea, and to Arjan van de Ven for his guidence on what is safe and what is not. Signed-off-by: Steven Rostedt <srostedt@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/arm/include/asm/ftrace.h5
-rw-r--r--arch/powerpc/include/asm/ftrace.h5
-rw-r--r--arch/sh/include/asm/ftrace.h5
-rw-r--r--arch/sparc/include/asm/ftrace.h5
-rw-r--r--arch/x86/include/asm/ftrace.h15
-rw-r--r--arch/x86/kernel/ftrace.c107
-rw-r--r--include/linux/hardirq.h15
7 files changed, 154 insertions, 3 deletions
diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h
index 39c8bc1a006a..d4c24a7a9280 100644
--- a/arch/arm/include/asm/ftrace.h
+++ b/arch/arm/include/asm/ftrace.h
@@ -1,6 +1,11 @@
1#ifndef _ASM_ARM_FTRACE 1#ifndef _ASM_ARM_FTRACE
2#define _ASM_ARM_FTRACE 2#define _ASM_ARM_FTRACE
3 3
4#ifndef __ASSEMBLY__
5#define ftrace_nmi_enter() do { } while (0)
6#define ftrace_nmi_exit() do { } while (0)
7#endif
8
4#ifdef CONFIG_FUNCTION_TRACER 9#ifdef CONFIG_FUNCTION_TRACER
5#define MCOUNT_ADDR ((long)(mcount)) 10#define MCOUNT_ADDR ((long)(mcount))
6#define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */ 11#define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */
diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
index b298f7a631e6..7652755dc000 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -1,6 +1,11 @@
1#ifndef _ASM_POWERPC_FTRACE 1#ifndef _ASM_POWERPC_FTRACE
2#define _ASM_POWERPC_FTRACE 2#define _ASM_POWERPC_FTRACE
3 3
4#ifndef __ASSEMBLY__
5#define ftrace_nmi_enter() do { } while (0)
6#define ftrace_nmi_exit() do { } while (0)
7#endif
8
4#ifdef CONFIG_FUNCTION_TRACER 9#ifdef CONFIG_FUNCTION_TRACER
5#define MCOUNT_ADDR ((long)(_mcount)) 10#define MCOUNT_ADDR ((long)(_mcount))
6#define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */ 11#define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */
diff --git a/arch/sh/include/asm/ftrace.h b/arch/sh/include/asm/ftrace.h
index 3aed362c9463..cdf2cb0b9ffe 100644
--- a/arch/sh/include/asm/ftrace.h
+++ b/arch/sh/include/asm/ftrace.h
@@ -2,6 +2,11 @@
2#define __ASM_SH_FTRACE_H 2#define __ASM_SH_FTRACE_H
3 3
4#ifndef __ASSEMBLY__ 4#ifndef __ASSEMBLY__
5#define ftrace_nmi_enter() do { } while (0)
6#define ftrace_nmi_exit() do { } while (0)
7#endif
8
9#ifndef __ASSEMBLY__
5extern void mcount(void); 10extern void mcount(void);
6#endif 11#endif
7 12
diff --git a/arch/sparc/include/asm/ftrace.h b/arch/sparc/include/asm/ftrace.h
index d27716cd38c1..33a95feeb137 100644
--- a/arch/sparc/include/asm/ftrace.h
+++ b/arch/sparc/include/asm/ftrace.h
@@ -1,6 +1,11 @@
1#ifndef _ASM_SPARC64_FTRACE 1#ifndef _ASM_SPARC64_FTRACE
2#define _ASM_SPARC64_FTRACE 2#define _ASM_SPARC64_FTRACE
3 3
4#ifndef __ASSEMBLY__
5#define ftrace_nmi_enter() do { } while (0)
6#define ftrace_nmi_exit() do { } while (0)
7#endif
8
4#ifdef CONFIG_MCOUNT 9#ifdef CONFIG_MCOUNT
5#define MCOUNT_ADDR ((long)(_mcount)) 10#define MCOUNT_ADDR ((long)(_mcount))
6#define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */ 11#define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 9e8bc29b8b17..f2ed6b704a75 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -17,6 +17,21 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
17 */ 17 */
18 return addr - 1; 18 return addr - 1;
19} 19}
20
21#ifdef CONFIG_DYNAMIC_FTRACE
22extern void ftrace_nmi_enter(void);
23extern void ftrace_nmi_exit(void);
24#else
25#define ftrace_nmi_enter() do { } while (0)
26#define ftrace_nmi_exit() do { } while (0)
27#endif
28#endif
29
30#else /* CONFIG_FUNCTION_TRACER */
31
32#ifndef __ASSEMBLY__
33#define ftrace_nmi_enter() do { } while (0)
34#define ftrace_nmi_exit() do { } while (0)
20#endif 35#endif
21 36
22#endif /* CONFIG_FUNCTION_TRACER */ 37#endif /* CONFIG_FUNCTION_TRACER */
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 50ea0ac8c9bf..fe5f859130b5 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -56,6 +56,111 @@ unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
56 return calc.code; 56 return calc.code;
57} 57}
58 58
59/*
60 * Modifying code must take extra care. On an SMP machine, if
61 * the code being modified is also being executed on another CPU
62 * that CPU will have undefined results and possibly take a GPF.
63 * We use kstop_machine to stop other CPUS from exectuing code.
64 * But this does not stop NMIs from happening. We still need
65 * to protect against that. We separate out the modification of
66 * the code to take care of this.
67 *
68 * Two buffers are added: An IP buffer and a "code" buffer.
69 *
70 * 1) Put in the instruction pointer into the IP buffer
71 * and the new code into the "code" buffer.
72 * 2) Set a flag that says we are modifying code
73 * 3) Wait for any running NMIs to finish.
74 * 4) Write the code
75 * 5) clear the flag.
76 * 6) Wait for any running NMIs to finish.
77 *
78 * If an NMI is executed, the first thing it does is to call
79 * "ftrace_nmi_enter". This will check if the flag is set to write
80 * and if it is, it will write what is in the IP and "code" buffers.
81 *
82 * The trick is, it does not matter if everyone is writing the same
83 * content to the code location. Also, if a CPU is executing code
84 * it is OK to write to that code location if the contents being written
85 * are the same as what exists.
86 */
87
88static atomic_t in_nmi;
89static int mod_code_status;
90static int mod_code_write;
91static void *mod_code_ip;
92static void *mod_code_newcode;
93
94static void ftrace_mod_code(void)
95{
96 /*
97 * Yes, more than one CPU process can be writing to mod_code_status.
98 * (and the code itself)
99 * But if one were to fail, then they all should, and if one were
100 * to succeed, then they all should.
101 */
102 mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
103 MCOUNT_INSN_SIZE);
104
105}
106
107void ftrace_nmi_enter(void)
108{
109 atomic_inc(&in_nmi);
110 /* Must have in_nmi seen before reading write flag */
111 smp_mb();
112 if (mod_code_write)
113 ftrace_mod_code();
114}
115
116void ftrace_nmi_exit(void)
117{
118 /* Finish all executions before clearing in_nmi */
119 smp_wmb();
120 atomic_dec(&in_nmi);
121}
122
123static void wait_for_nmi(void)
124{
125 while (atomic_read(&in_nmi))
126 cpu_relax();
127}
128
129static int
130do_ftrace_mod_code(unsigned long ip, void *new_code)
131{
132 mod_code_ip = (void *)ip;
133 mod_code_newcode = new_code;
134
135 /* The buffers need to be visible before we let NMIs write them */
136 smp_wmb();
137
138 mod_code_write = 1;
139
140 /* Make sure write bit is visible before we wait on NMIs */
141 smp_mb();
142
143 wait_for_nmi();
144
145 /* Make sure all running NMIs have finished before we write the code */
146 smp_mb();
147
148 ftrace_mod_code();
149
150 /* Make sure the write happens before clearing the bit */
151 smp_wmb();
152
153 mod_code_write = 0;
154
155 /* make sure NMIs see the cleared bit */
156 smp_mb();
157
158 wait_for_nmi();
159
160 return mod_code_status;
161}
162
163
59int 164int
60ftrace_modify_code(unsigned long ip, unsigned char *old_code, 165ftrace_modify_code(unsigned long ip, unsigned char *old_code,
61 unsigned char *new_code) 166 unsigned char *new_code)
@@ -81,7 +186,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
81 return -EINVAL; 186 return -EINVAL;
82 187
83 /* replace the text with the new text */ 188 /* replace the text with the new text */
84 if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE)) 189 if (do_ftrace_mod_code(ip, new_code))
85 return -EPERM; 190 return -EPERM;
86 191
87 sync_core(); 192 sync_core();
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 181006cc94a0..0087cb43becf 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -5,6 +5,7 @@
5#include <linux/smp_lock.h> 5#include <linux/smp_lock.h>
6#include <linux/lockdep.h> 6#include <linux/lockdep.h>
7#include <asm/hardirq.h> 7#include <asm/hardirq.h>
8#include <asm/ftrace.h>
8#include <asm/system.h> 9#include <asm/system.h>
9 10
10/* 11/*
@@ -161,7 +162,17 @@ extern void irq_enter(void);
161 */ 162 */
162extern void irq_exit(void); 163extern void irq_exit(void);
163 164
164#define nmi_enter() do { lockdep_off(); __irq_enter(); } while (0) 165#define nmi_enter() \
165#define nmi_exit() do { __irq_exit(); lockdep_on(); } while (0) 166 do { \
167 ftrace_nmi_enter(); \
168 lockdep_off(); \
169 __irq_enter(); \
170 } while (0)
171#define nmi_exit() \
172 do { \
173 __irq_exit(); \
174 lockdep_on(); \
175 ftrace_nmi_exit(); \
176 } while (0)
166 177
167#endif /* LINUX_HARDIRQ_H */ 178#endif /* LINUX_HARDIRQ_H */