aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorSteven Rostedt <srostedt@redhat.com>2008-05-12 15:20:42 -0400
committerThomas Gleixner <tglx@linutronix.de>2008-05-23 14:33:09 -0400
commit3d0833953e1b98b79ddf491dd49229eef9baeac1 (patch)
tree3520cda824bdb58e47ce3e9f43d68249d5cc1a12 /arch/x86
parent6cd8a4bb2f97527a9ceb30bc77ea4e959c6a95e3 (diff)
ftrace: dynamic enabling/disabling of function calls
This patch adds a feature to dynamically replace the ftrace code with the jmps to allow a kernel with ftrace configured to run as fast as it can without it configured. The way this works, is on bootup (if ftrace is enabled), a ftrace function is registered to record the instruction pointer of all places that call the function. Later, if there's still any code to patch, a kthread is awoken (rate limited to at most once a second) that performs a stop_machine, and replaces all the code that was called with a jmp over the call to ftrace. It only replaces what was found the previous time. Typically the system reaches equilibrium quickly after bootup and there's no code patching needed at all. e.g. call ftrace /* 5 bytes */ is replaced with jmp 3f /* jmp is 2 bytes and we jump 3 forward */ 3: When we want to enable ftrace for function tracing, the IP recording is removed, and stop_machine is called again to replace all the locations of that were recorded back to the call of ftrace. When it is disabled, we replace the code back to the jmp. Allocation is done by the kthread. If the ftrace recording function is called, and we don't have any record slots available, then we simply skip that call. Once a second a new page (if needed) is allocated for recording new ftrace function calls. A large batch is allocated at boot up to get most of the calls there. Because we do this via stop_machine, we don't have to worry about another CPU executing a ftrace call as we modify it. But we do need to worry about NMI's so all functions that might be called via nmi must be annotated with notrace_nmi. When this code is configured in, the NMI code will not call notrace. Signed-off-by: Steven Rostedt <srostedt@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/ftrace.c237
2 files changed, 238 insertions, 0 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 5e618c3b4720..e142091524b0 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -56,6 +56,7 @@ obj-$(CONFIG_X86_MPPARSE) += mpparse.o
56obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi_$(BITS).o 56obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi_$(BITS).o
57obj-$(CONFIG_X86_IO_APIC) += io_apic_$(BITS).o 57obj-$(CONFIG_X86_IO_APIC) += io_apic_$(BITS).o
58obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o 58obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
59obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
59obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o 60obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
60obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o 61obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
61obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o 62obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
new file mode 100644
index 000000000000..5dd58136ef02
--- /dev/null
+++ b/arch/x86/kernel/ftrace.c
@@ -0,0 +1,237 @@
1/*
2 * Code for replacing ftrace calls with jumps.
3 *
4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5 *
6 * Thanks goes to Ingo Molnar, for suggesting the idea.
7 * Mathieu Desnoyers, for suggesting postponing the modifications.
8 * Arjan van de Ven, for keeping me straight, and explaining to me
9 * the dangers of modifying code on the run.
10 */
11
12#include <linux/spinlock.h>
13#include <linux/hardirq.h>
14#include <linux/ftrace.h>
15#include <linux/percpu.h>
16#include <linux/init.h>
17#include <linux/list.h>
18
19#define CALL_BACK 5
20
21#define JMPFWD 0x03eb
22
23static unsigned short ftrace_jmp = JMPFWD;
24
25struct ftrace_record {
26 struct dyn_ftrace rec;
27 int failed;
28} __attribute__((packed));
29
30struct ftrace_page {
31 struct ftrace_page *next;
32 int index;
33 struct ftrace_record records[];
34} __attribute__((packed));
35
36#define ENTRIES_PER_PAGE \
37 ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct ftrace_record))
38
39/* estimate from running different kernels */
40#define NR_TO_INIT 10000
41
42#define MCOUNT_ADDR ((long)(&mcount))
43
44union ftrace_code_union {
45 char code[5];
46 struct {
47 char e8;
48 int offset;
49 } __attribute__((packed));
50};
51
52static struct ftrace_page *ftrace_pages_start;
53static struct ftrace_page *ftrace_pages;
54
55notrace struct dyn_ftrace *ftrace_alloc_shutdown_node(unsigned long ip)
56{
57 struct ftrace_record *rec;
58 unsigned short save;
59
60 ip -= CALL_BACK;
61 save = *(short *)ip;
62
63 /* If this was already converted, skip it */
64 if (save == JMPFWD)
65 return NULL;
66
67 if (ftrace_pages->index == ENTRIES_PER_PAGE) {
68 if (!ftrace_pages->next)
69 return NULL;
70 ftrace_pages = ftrace_pages->next;
71 }
72
73 rec = &ftrace_pages->records[ftrace_pages->index++];
74
75 return &rec->rec;
76}
77
78static int notrace
79ftrace_modify_code(unsigned long ip, unsigned char *old_code,
80 unsigned char *new_code)
81{
82 unsigned short old = *(unsigned short *)old_code;
83 unsigned short new = *(unsigned short *)new_code;
84 unsigned short replaced;
85 int faulted = 0;
86
87 /*
88 * Note: Due to modules and __init, code can
89 * disappear and change, we need to protect against faulting
90 * as well as code changing.
91 *
92 * No real locking needed, this code is run through
93 * kstop_machine.
94 */
95 asm volatile (
96 "1: lock\n"
97 " cmpxchg %w3, (%2)\n"
98 "2:\n"
99 ".section .fixup, \"ax\"\n"
100 " movl $1, %0\n"
101 "3: jmp 2b\n"
102 ".previous\n"
103 _ASM_EXTABLE(1b, 3b)
104 : "=r"(faulted), "=a"(replaced)
105 : "r"(ip), "r"(new), "0"(faulted), "a"(old)
106 : "memory");
107 sync_core();
108
109 if (replaced != old)
110 faulted = 2;
111
112 return faulted;
113}
114
115static int notrace ftrace_calc_offset(long ip)
116{
117 return (int)(MCOUNT_ADDR - ip);
118}
119
120notrace void ftrace_code_disable(struct dyn_ftrace *rec)
121{
122 unsigned long ip;
123 union ftrace_code_union save;
124 struct ftrace_record *r =
125 container_of(rec, struct ftrace_record, rec);
126
127 ip = rec->ip;
128
129 save.e8 = 0xe8;
130 save.offset = ftrace_calc_offset(ip);
131
132 /* move the IP back to the start of the call */
133 ip -= CALL_BACK;
134
135 r->failed = ftrace_modify_code(ip, save.code, (char *)&ftrace_jmp);
136}
137
138static void notrace ftrace_replace_code(int saved)
139{
140 unsigned char *new = NULL, *old = NULL;
141 struct ftrace_record *rec;
142 struct ftrace_page *pg;
143 unsigned long ip;
144 int i;
145
146 if (saved)
147 old = (char *)&ftrace_jmp;
148 else
149 new = (char *)&ftrace_jmp;
150
151 for (pg = ftrace_pages_start; pg; pg = pg->next) {
152 for (i = 0; i < pg->index; i++) {
153 union ftrace_code_union calc;
154 rec = &pg->records[i];
155
156 /* don't modify code that has already faulted */
157 if (rec->failed)
158 continue;
159
160 ip = rec->rec.ip;
161
162 calc.e8 = 0xe8;
163 calc.offset = ftrace_calc_offset(ip);
164
165 if (saved)
166 new = calc.code;
167 else
168 old = calc.code;
169
170 ip -= CALL_BACK;
171
172 rec->failed = ftrace_modify_code(ip, old, new);
173 }
174 }
175
176}
177
178notrace void ftrace_startup_code(void)
179{
180 ftrace_replace_code(1);
181}
182
183notrace void ftrace_shutdown_code(void)
184{
185 ftrace_replace_code(0);
186}
187
188notrace void ftrace_shutdown_replenish(void)
189{
190 if (ftrace_pages->next)
191 return;
192
193 /* allocate another page */
194 ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL);
195}
196
197notrace int ftrace_shutdown_arch_init(void)
198{
199 struct ftrace_page *pg;
200 int cnt;
201 int i;
202
203 /* allocate a few pages */
204 ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL);
205 if (!ftrace_pages_start)
206 return -1;
207
208 /*
209 * Allocate a few more pages.
210 *
211 * TODO: have some parser search vmlinux before
212 * final linking to find all calls to ftrace.
213 * Then we can:
214 * a) know how many pages to allocate.
215 * and/or
216 * b) set up the table then.
217 *
218 * The dynamic code is still necessary for
219 * modules.
220 */
221
222 pg = ftrace_pages = ftrace_pages_start;
223
224 cnt = NR_TO_INIT / ENTRIES_PER_PAGE;
225
226 for (i = 0; i < cnt; i++) {
227 pg->next = (void *)get_zeroed_page(GFP_KERNEL);
228
229 /* If we fail, we'll try later anyway */
230 if (!pg->next)
231 break;
232
233 pg = pg->next;
234 }
235
236 return 0;
237}