aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorSteven Rostedt <srostedt@redhat.com>2008-05-12 15:20:42 -0400
committerThomas Gleixner <tglx@linutronix.de>2008-05-23 14:33:09 -0400
commit3d0833953e1b98b79ddf491dd49229eef9baeac1 (patch)
tree3520cda824bdb58e47ce3e9f43d68249d5cc1a12 /kernel
parent6cd8a4bb2f97527a9ceb30bc77ea4e959c6a95e3 (diff)
ftrace: dynamic enabling/disabling of function calls
This patch adds a feature to dynamically replace the ftrace code with the jmps to allow a kernel with ftrace configured to run as fast as it can without it configured. The way this works, is on bootup (if ftrace is enabled), a ftrace function is registered to record the instruction pointer of all places that call the function. Later, if there's still any code to patch, a kthread is awoken (rate limited to at most once a second) that performs a stop_machine, and replaces all the code that was called with a jmp over the call to ftrace. It only replaces what was found the previous time. Typically the system reaches equilibrium quickly after bootup and there's no code patching needed at all. e.g. call ftrace /* 5 bytes */ is replaced with jmp 3f /* jmp is 2 bytes and we jump 3 forward */ 3: When we want to enable ftrace for function tracing, the IP recording is removed, and stop_machine is called again to replace all the locations of that were recorded back to the call of ftrace. When it is disabled, we replace the code back to the jmp. Allocation is done by the kthread. If the ftrace recording function is called, and we don't have any record slots available, then we simply skip that call. Once a second a new page (if needed) is allocated for recording new ftrace function calls. A large batch is allocated at boot up to get most of the calls there. Because we do this via stop_machine, we don't have to worry about another CPU executing a ftrace call as we modify it. But we do need to worry about NMI's so all functions that might be called via nmi must be annotated with notrace_nmi. When this code is configured in, the NMI code will not call notrace. Signed-off-by: Steven Rostedt <srostedt@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/trace/Kconfig17
-rw-r--r--kernel/trace/ftrace.c356
2 files changed, 341 insertions, 32 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 6430016b98e8..cad9db1dee02 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -88,3 +88,20 @@ config CONTEXT_SWITCH_TRACER
88 This tracer gets called from the context switch and records 88 This tracer gets called from the context switch and records
89 all switching of tasks. 89 all switching of tasks.
90 90
91config DYNAMIC_FTRACE
92 bool "enable/disable ftrace tracepoints dynamically"
93 depends on FTRACE
94 default y
95 help
96 This option will modify all the calls to ftrace dynamically
97 (will patch them out of the binary image and replaces them
98 with a No-Op instruction) as they are called. A table is
99 created to dynamically enable them again.
100
101 This way a CONFIG_FTRACE kernel is slightly larger, but otherwise
102 has native performance as long as no tracing is active.
103
104 The changes to the code are done by a kernel thread that
105 wakes up once a second and checks to see if any ftrace calls
106 were made. If so, it runs stop_machine (stops all CPUS)
107 and modifies the code to jump over the call to ftrace.
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index b6a80b98a3fb..d1ae2ba25274 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -13,10 +13,19 @@
13 * Copyright (C) 2004 William Lee Irwin III 13 * Copyright (C) 2004 William Lee Irwin III
14 */ 14 */
15 15
16#include <linux/module.h> 16#include <linux/stop_machine.h>
17#include <linux/clocksource.h>
18#include <linux/kallsyms.h>
19#include <linux/kthread.h>
20#include <linux/hardirq.h>
17#include <linux/ftrace.h> 21#include <linux/ftrace.h>
22#include <linux/module.h>
23#include <linux/hash.h>
24#include <linux/list.h>
25
26#include "trace.h"
18 27
19static DEFINE_SPINLOCK(ftrace_func_lock); 28static DEFINE_SPINLOCK(ftrace_lock);
20static struct ftrace_ops ftrace_list_end __read_mostly = 29static struct ftrace_ops ftrace_list_end __read_mostly =
21{ 30{
22 .func = ftrace_stub, 31 .func = ftrace_stub,
@@ -44,21 +53,21 @@ notrace void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
44} 53}
45 54
46/** 55/**
47 * register_ftrace_function - register a function for profiling 56 * clear_ftrace_function - reset the ftrace function
48 * @ops - ops structure that holds the function for profiling.
49 *
50 * Register a function to be called by all functions in the
51 * kernel.
52 * 57 *
53 * Note: @ops->func and all the functions it calls must be labeled 58 * This NULLs the ftrace function and in essence stops
54 * with "notrace", otherwise it will go into a 59 * tracing. There may be lag
55 * recursive loop.
56 */ 60 */
57int register_ftrace_function(struct ftrace_ops *ops) 61void clear_ftrace_function(void)
58{ 62{
59 unsigned long flags; 63 ftrace_trace_function = ftrace_stub;
64}
65
66static int notrace __register_ftrace_function(struct ftrace_ops *ops)
67{
68 /* Should never be called by interrupts */
69 spin_lock(&ftrace_lock);
60 70
61 spin_lock_irqsave(&ftrace_func_lock, flags);
62 ops->next = ftrace_list; 71 ops->next = ftrace_list;
63 /* 72 /*
64 * We are entering ops into the ftrace_list but another 73 * We are entering ops into the ftrace_list but another
@@ -68,6 +77,7 @@ int register_ftrace_function(struct ftrace_ops *ops)
68 */ 77 */
69 smp_wmb(); 78 smp_wmb();
70 ftrace_list = ops; 79 ftrace_list = ops;
80
71 /* 81 /*
72 * For one func, simply call it directly. 82 * For one func, simply call it directly.
73 * For more than one func, call the chain. 83 * For more than one func, call the chain.
@@ -76,28 +86,22 @@ int register_ftrace_function(struct ftrace_ops *ops)
76 ftrace_trace_function = ops->func; 86 ftrace_trace_function = ops->func;
77 else 87 else
78 ftrace_trace_function = ftrace_list_func; 88 ftrace_trace_function = ftrace_list_func;
79 spin_unlock_irqrestore(&ftrace_func_lock, flags); 89
90 spin_unlock(&ftrace_lock);
80 91
81 return 0; 92 return 0;
82} 93}
83 94
84/** 95static int notrace __unregister_ftrace_function(struct ftrace_ops *ops)
85 * unregister_ftrace_function - unresgister a function for profiling.
86 * @ops - ops structure that holds the function to unregister
87 *
88 * Unregister a function that was added to be called by ftrace profiling.
89 */
90int unregister_ftrace_function(struct ftrace_ops *ops)
91{ 96{
92 unsigned long flags;
93 struct ftrace_ops **p; 97 struct ftrace_ops **p;
94 int ret = 0; 98 int ret = 0;
95 99
96 spin_lock_irqsave(&ftrace_func_lock, flags); 100 spin_lock(&ftrace_lock);
97 101
98 /* 102 /*
99 * If we are the only function, then the ftrace pointer is 103 * If we are removing the last function, then simply point
100 * pointing directly to that function. 104 * to the ftrace_stub.
101 */ 105 */
102 if (ftrace_list == ops && ops->next == &ftrace_list_end) { 106 if (ftrace_list == ops && ops->next == &ftrace_list_end) {
103 ftrace_trace_function = ftrace_stub; 107 ftrace_trace_function = ftrace_stub;
@@ -117,22 +121,310 @@ int unregister_ftrace_function(struct ftrace_ops *ops)
117 *p = (*p)->next; 121 *p = (*p)->next;
118 122
119 /* If we only have one func left, then call that directly */ 123 /* If we only have one func left, then call that directly */
120 if (ftrace_list->next == &ftrace_list_end) 124 if (ftrace_list == &ftrace_list_end ||
125 ftrace_list->next == &ftrace_list_end)
121 ftrace_trace_function = ftrace_list->func; 126 ftrace_trace_function = ftrace_list->func;
122 127
123 out: 128 out:
124 spin_unlock_irqrestore(&ftrace_func_lock, flags); 129 spin_unlock(&ftrace_lock);
130
131 return ret;
132}
133
134#ifdef CONFIG_DYNAMIC_FTRACE
135
136static struct hlist_head ftrace_hash[FTRACE_HASHSIZE];
137
138static DEFINE_PER_CPU(int, ftrace_shutdown_disable_cpu);
139
140static DEFINE_SPINLOCK(ftrace_shutdown_lock);
141static DEFINE_MUTEX(ftraced_lock);
142
143static int ftraced_trigger;
144static int ftraced_suspend;
145
146static int ftrace_record_suspend;
147
148static inline int
149notrace ftrace_ip_in_hash(unsigned long ip, unsigned long key)
150{
151 struct dyn_ftrace *p;
152 struct hlist_node *t;
153 int found = 0;
154
155 hlist_for_each_entry(p, t, &ftrace_hash[key], node) {
156 if (p->ip == ip) {
157 found = 1;
158 break;
159 }
160 }
161
162 return found;
163}
164
165static inline void notrace
166ftrace_add_hash(struct dyn_ftrace *node, unsigned long key)
167{
168 hlist_add_head(&node->node, &ftrace_hash[key]);
169}
170
171static void notrace
172ftrace_record_ip(unsigned long ip, unsigned long parent_ip)
173{
174 struct dyn_ftrace *node;
175 unsigned long flags;
176 unsigned long key;
177 int resched;
178 int atomic;
179
180 resched = need_resched();
181 preempt_disable_notrace();
182
183 /* We simply need to protect against recursion */
184 __get_cpu_var(ftrace_shutdown_disable_cpu)++;
185 if (__get_cpu_var(ftrace_shutdown_disable_cpu) != 1)
186 goto out;
187
188 if (unlikely(ftrace_record_suspend))
189 goto out;
190
191 key = hash_long(ip, FTRACE_HASHBITS);
192
193 WARN_ON_ONCE(key >= FTRACE_HASHSIZE);
194
195 if (ftrace_ip_in_hash(ip, key))
196 goto out;
197
198 atomic = irqs_disabled();
199
200 spin_lock_irqsave(&ftrace_shutdown_lock, flags);
201
202 /* This ip may have hit the hash before the lock */
203 if (ftrace_ip_in_hash(ip, key))
204 goto out_unlock;
205
206 /*
207 * There's a slight race that the ftraced will update the
208 * hash and reset here. The arch alloc is responsible
209 * for seeing if the IP has already changed, and if
210 * it has, the alloc will fail.
211 */
212 node = ftrace_alloc_shutdown_node(ip);
213 if (!node)
214 goto out_unlock;
215
216 node->ip = ip;
217
218 ftrace_add_hash(node, key);
219
220 ftraced_trigger = 1;
221
222 out_unlock:
223 spin_unlock_irqrestore(&ftrace_shutdown_lock, flags);
224 out:
225 __get_cpu_var(ftrace_shutdown_disable_cpu)--;
226
227 /* prevent recursion with scheduler */
228 if (resched)
229 preempt_enable_no_resched_notrace();
230 else
231 preempt_enable_notrace();
232}
233
234static struct ftrace_ops ftrace_shutdown_ops __read_mostly =
235{
236 .func = ftrace_record_ip,
237};
238
239
240static int notrace __ftrace_modify_code(void *data)
241{
242 void (*func)(void) = data;
243
244 func();
245 return 0;
246}
247
248static void notrace ftrace_run_startup_code(void)
249{
250 stop_machine_run(__ftrace_modify_code, ftrace_startup_code, NR_CPUS);
251}
252
253static void notrace ftrace_run_shutdown_code(void)
254{
255 stop_machine_run(__ftrace_modify_code, ftrace_shutdown_code, NR_CPUS);
256}
257
258static void notrace ftrace_startup(void)
259{
260 mutex_lock(&ftraced_lock);
261 ftraced_suspend++;
262 if (ftraced_suspend != 1)
263 goto out;
264 __unregister_ftrace_function(&ftrace_shutdown_ops);
265
266 ftrace_run_startup_code();
267 out:
268 mutex_unlock(&ftraced_lock);
269}
270
271static void notrace ftrace_shutdown(void)
272{
273 mutex_lock(&ftraced_lock);
274 ftraced_suspend--;
275 if (ftraced_suspend)
276 goto out;
277
278 ftrace_run_shutdown_code();
279
280 __register_ftrace_function(&ftrace_shutdown_ops);
281 out:
282 mutex_unlock(&ftraced_lock);
283}
284
285static cycle_t ftrace_update_time;
286static unsigned long ftrace_update_cnt;
287unsigned long ftrace_update_tot_cnt;
288
289static int notrace __ftrace_update_code(void *ignore)
290{
291 struct dyn_ftrace *p;
292 struct hlist_head head;
293 struct hlist_node *t;
294 cycle_t start, stop;
295 int i;
296
297 /* Don't be calling ftrace ops now */
298 __unregister_ftrace_function(&ftrace_shutdown_ops);
299
300 start = now(raw_smp_processor_id());
301 ftrace_update_cnt = 0;
302
303 /* No locks needed, the machine is stopped! */
304 for (i = 0; i < FTRACE_HASHSIZE; i++) {
305 if (hlist_empty(&ftrace_hash[i]))
306 continue;
307
308 head = ftrace_hash[i];
309 INIT_HLIST_HEAD(&ftrace_hash[i]);
310
311 /* all CPUS are stopped, we are safe to modify code */
312 hlist_for_each_entry(p, t, &head, node) {
313 ftrace_code_disable(p);
314 ftrace_update_cnt++;
315 }
316
317 }
318
319 stop = now(raw_smp_processor_id());
320 ftrace_update_time = stop - start;
321 ftrace_update_tot_cnt += ftrace_update_cnt;
322
323 __register_ftrace_function(&ftrace_shutdown_ops);
125 324
126 return 0; 325 return 0;
127} 326}
128 327
328static void notrace ftrace_update_code(void)
329{
330 stop_machine_run(__ftrace_update_code, NULL, NR_CPUS);
331}
332
333static int notrace ftraced(void *ignore)
334{
335 unsigned long usecs;
336
337 set_current_state(TASK_INTERRUPTIBLE);
338
339 while (!kthread_should_stop()) {
340
341 /* check once a second */
342 schedule_timeout(HZ);
343
344 mutex_lock(&ftraced_lock);
345 if (ftraced_trigger && !ftraced_suspend) {
346 ftrace_record_suspend++;
347 ftrace_update_code();
348 usecs = nsecs_to_usecs(ftrace_update_time);
349 if (ftrace_update_tot_cnt > 100000) {
350 ftrace_update_tot_cnt = 0;
351 pr_info("hm, dftrace overflow: %lu change%s"
352 " (%lu total) in %lu usec%s\n",
353 ftrace_update_cnt,
354 ftrace_update_cnt != 1 ? "s" : "",
355 ftrace_update_tot_cnt,
356 usecs, usecs != 1 ? "s" : "");
357 WARN_ON_ONCE(1);
358 }
359 ftraced_trigger = 0;
360 ftrace_record_suspend--;
361 }
362 mutex_unlock(&ftraced_lock);
363
364 ftrace_shutdown_replenish();
365
366 set_current_state(TASK_INTERRUPTIBLE);
367 }
368 __set_current_state(TASK_RUNNING);
369 return 0;
370}
371
372static int __init notrace ftrace_shutdown_init(void)
373{
374 struct task_struct *p;
375 int ret;
376
377 ret = ftrace_shutdown_arch_init();
378 if (ret)
379 return ret;
380
381 p = kthread_run(ftraced, NULL, "ftraced");
382 if (IS_ERR(p))
383 return -1;
384
385 __register_ftrace_function(&ftrace_shutdown_ops);
386
387 return 0;
388}
389
390core_initcall(ftrace_shutdown_init);
391#else
392# define ftrace_startup() do { } while (0)
393# define ftrace_shutdown() do { } while (0)
394#endif /* CONFIG_DYNAMIC_FTRACE */
395
129/** 396/**
130 * clear_ftrace_function - reset the ftrace function 397 * register_ftrace_function - register a function for profiling
398 * @ops - ops structure that holds the function for profiling.
131 * 399 *
132 * This NULLs the ftrace function and in essence stops 400 * Register a function to be called by all functions in the
133 * tracing. There may be lag 401 * kernel.
402 *
403 * Note: @ops->func and all the functions it calls must be labeled
404 * with "notrace", otherwise it will go into a
405 * recursive loop.
134 */ 406 */
135void clear_ftrace_function(void) 407int register_ftrace_function(struct ftrace_ops *ops)
136{ 408{
137 ftrace_trace_function = ftrace_stub; 409 ftrace_startup();
410
411 return __register_ftrace_function(ops);
412}
413
414/**
415 * unregister_ftrace_function - unresgister a function for profiling.
416 * @ops - ops structure that holds the function to unregister
417 *
418 * Unregister a function that was added to be called by ftrace profiling.
419 */
420int unregister_ftrace_function(struct ftrace_ops *ops)
421{
422 int ret;
423
424 ret = __unregister_ftrace_function(ops);
425
426 if (ftrace_list == &ftrace_list_end)
427 ftrace_shutdown();
428
429 return ret;
138} 430}