aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/ftrace.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-10-01 13:28:49 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-01 13:28:49 -0400
commit7e92daaefa68e5ef1e1732e45231e73adbb724e7 (patch)
tree8e7f8ac9d82654df4c65939c6682f95510e22977 /arch/x86/kernel/ftrace.c
parent7a68294278ae714ce2632a54f0f46916dca64f56 (diff)
parent1d787d37c8ff6612b8151c6dff15bfa7347bcbdf (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf update from Ingo Molnar: "Lots of changes in this cycle as well, with hundreds of commits from over 30 contributors. Most of the activity was on the tooling side. Higher level changes: - New 'perf kvm' analysis tool, from Xiao Guangrong. - New 'perf trace' system-wide tracing tool - uprobes fixes + cleanups from Oleg Nesterov. - Lots of patches to make perf build on Android out of box, from Irina Tirdea - Extend ftrace function tracing utility to be more dynamic for its users. It allows for data passing to the callback functions, as well as reading regs as if a breakpoint were to trigger at function entry. The main goal of this patch series was to allow kprobes to use ftrace as an optimized probe point when a probe is placed on an ftrace nop. With lots of help from Masami Hiramatsu, and going through lots of iterations, we finally came up with a good solution. - Add cpumask for uncore pmu, use it in 'stat', from Yan, Zheng. - Various tracing updates from Steve Rostedt - Clean up and improve 'perf sched' performance by elliminating lots of needless calls to libtraceevent. - Event group parsing support, from Jiri Olsa - UI/gtk refactorings and improvements from Namhyung Kim - Add support for non-tracepoint events in perf script python, from Feng Tang - Add --symbols to 'script', similar to the one in 'report', from Feng Tang. Infrastructure enhancements and fixes: - Convert the trace builtins to use the growing evsel/evlist tracepoint infrastructure, removing several open coded constructs like switch like series of strcmp to dispatch events, etc. Basically what had already been showcased in 'perf sched'. - Add evsel constructor for tracepoints, that uses libtraceevent just to parse the /format events file, use it in a new 'perf test' to make sure the libtraceevent format parsing regressions can be more readily caught. - Some strange errors were happening in some builds, but not on the next, reported by several people, problem was some parser related files, generated during the build, didn't had proper make deps, fix from Eric Sandeen. - Introduce struct and cache information about the environment where a perf.data file was captured, from Namhyung Kim. - Fix handling of unresolved samples when --symbols is used in 'report', from Feng Tang. - Add union member access support to 'probe', from Hyeoncheol Lee. - Fixups to die() removal, from Namhyung Kim. - Render fixes for the TUI, from Namhyung Kim. - Don't enable annotation in non symbolic view, from Namhyung Kim. - Fix pipe mode in 'report', from Namhyung Kim. - Move related stats code from stat to util/, will be used by the 'stat' kvm tool, from Xiao Guangrong. - Remove die()/exit() calls from several tools. - Resolve vdso callchains, from Jiri Olsa - Don't pass const char pointers to basename, so that we can unconditionally use libgen.h and thus avoid ifdef BIONIC lines, from David Ahern - Refactor hist formatting so that it can be reused with the GTK browser, From Namhyung Kim - Fix build for another rbtree.c change, from Adrian Hunter. - Make 'perf diff' command work with evsel hists, from Jiri Olsa. - Use the only field_sep var that is set up: symbol_conf.field_sep, fix from Jiri Olsa. - .gitignore compiled python binaries, from Namhyung Kim. - Get rid of die() in more libtraceevent places, from Namhyung Kim. - Rename libtraceevent 'private' struct member to 'priv' so that it works in C++, from Steven Rostedt - Remove lots of exit()/die() calls from tools so that the main perf exit routine can take place, from David Ahern - Fix x86 build on x86-64, from David Ahern. - {int,str,rb}list fixes from Suzuki K Poulose - perf.data header fixes from Namhyung Kim - Allow user to indicate objdump path, needed in cross environments, from Maciek Borzecki - Fix hardware cache event name generation, fix from Jiri Olsa - Add round trip test for sw, hw and cache event names, catching the problem Jiri fixed, after Jiri's patch, the test passes successfully. - Clean target should do clean for lib/traceevent too, fix from David Ahern - Check the right variable for allocation failure, fix from Namhyung Kim - Set up evsel->tp_format regardless of evsel->name being set already, fix from Namhyung Kim - Oprofile fixes from Robert Richter. - Remove perf_event_attr needless version inflation, from Jiri Olsa - Introduce libtraceevent strerror like error reporting facility, from Namhyung Kim - Add pmu mappings to perf.data header and use event names from cmd line, from Robert Richter - Fix include order for bison/flex-generated C files, from Ben Hutchings - Build fixes and documentation corrections from David Ahern - Assorted cleanups from Robert Richter - Let O= makes handle relative paths, from Steven Rostedt - perf script python fixes, from Feng Tang. - Initial bash completion support, from Frederic Weisbecker - Allow building without libelf, from Namhyung Kim. - Support DWARF CFI based unwind to have callchains when %bp based unwinding is not possible, from Jiri Olsa. - Symbol resolution fixes, while fixing support PPC64 files with an .opt ELF section was the end goal, several fixes for code that handles all architectures and cleanups are included, from Cody Schafer. - Assorted fixes for Documentation and build in 32 bit, from Robert Richter - Cache the libtraceevent event_format associated to each evsel early, so that we avoid relookups, i.e. calling pevent_find_event repeatedly when processing tracepoint events. [ This is to reduce the surface contact with libtraceevents and make clear what is that the perf tools needs from that lib: so far parsing the common and per event fields. ] - Don't stop the build if the audit libraries are not installed, fix from Namhyung Kim. - Fix bfd.h/libbfd detection with recent binutils, from Markus Trippelsdorf. - Improve warning message when libunwind devel packages not present, from Jiri Olsa" * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (282 commits) perf trace: Add aliases for some syscalls perf probe: Print an enum type variable in "enum variable-name" format when showing accessible variables perf tools: Check libaudit availability for perf-trace builtin perf hists: Add missing period_* fields when collapsing a hist entry perf trace: New tool perf evsel: Export the event_format constructor perf evsel: Introduce rawptr() method perf tools: Use perf_evsel__newtp in the event parser perf evsel: The tracepoint constructor should store sys:name perf evlist: Introduce set_filter() method perf evlist: Renane set_filters method to apply_filters perf test: Add test to check we correctly parse and match syscall open parms perf evsel: Handle endianity in intval method perf evsel: Know if byte swap is needed perf tools: Allow handling a NULL cpu_map as meaning "all cpus" perf evsel: Improve tracepoint constructor setup tools lib traceevent: Fix error path on pevent_parse_event perf test: Fix build failure trace: Move trace event enable from fs_initcall to core_initcall tracing: Add an option for disabling markers ...
Diffstat (limited to 'arch/x86/kernel/ftrace.c')
-rw-r--r--arch/x86/kernel/ftrace.c73
1 files changed, 69 insertions, 4 deletions
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index c3a7cb4bf6e6..1d414029f1d8 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -206,6 +206,21 @@ static int
206ftrace_modify_code(unsigned long ip, unsigned const char *old_code, 206ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
207 unsigned const char *new_code); 207 unsigned const char *new_code);
208 208
209/*
210 * Should never be called:
211 * As it is only called by __ftrace_replace_code() which is called by
212 * ftrace_replace_code() that x86 overrides, and by ftrace_update_code()
213 * which is called to turn mcount into nops or nops into function calls
214 * but not to convert a function from not using regs to one that uses
215 * regs, which ftrace_modify_call() is for.
216 */
217int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
218 unsigned long addr)
219{
220 WARN_ON(1);
221 return -EINVAL;
222}
223
209int ftrace_update_ftrace_func(ftrace_func_t func) 224int ftrace_update_ftrace_func(ftrace_func_t func)
210{ 225{
211 unsigned long ip = (unsigned long)(&ftrace_call); 226 unsigned long ip = (unsigned long)(&ftrace_call);
@@ -220,6 +235,14 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
220 235
221 ret = ftrace_modify_code(ip, old, new); 236 ret = ftrace_modify_code(ip, old, new);
222 237
238 /* Also update the regs callback function */
239 if (!ret) {
240 ip = (unsigned long)(&ftrace_regs_call);
241 memcpy(old, &ftrace_regs_call, MCOUNT_INSN_SIZE);
242 new = ftrace_call_replace(ip, (unsigned long)func);
243 ret = ftrace_modify_code(ip, old, new);
244 }
245
223 atomic_dec(&modifying_ftrace_code); 246 atomic_dec(&modifying_ftrace_code);
224 247
225 return ret; 248 return ret;
@@ -299,6 +322,32 @@ static int add_brk_on_nop(struct dyn_ftrace *rec)
299 return add_break(rec->ip, old); 322 return add_break(rec->ip, old);
300} 323}
301 324
325/*
326 * If the record has the FTRACE_FL_REGS set, that means that it
327 * wants to convert to a callback that saves all regs. If FTRACE_FL_REGS
328 * is not not set, then it wants to convert to the normal callback.
329 */
330static unsigned long get_ftrace_addr(struct dyn_ftrace *rec)
331{
332 if (rec->flags & FTRACE_FL_REGS)
333 return (unsigned long)FTRACE_REGS_ADDR;
334 else
335 return (unsigned long)FTRACE_ADDR;
336}
337
338/*
339 * The FTRACE_FL_REGS_EN is set when the record already points to
340 * a function that saves all the regs. Basically the '_EN' version
341 * represents the current state of the function.
342 */
343static unsigned long get_ftrace_old_addr(struct dyn_ftrace *rec)
344{
345 if (rec->flags & FTRACE_FL_REGS_EN)
346 return (unsigned long)FTRACE_REGS_ADDR;
347 else
348 return (unsigned long)FTRACE_ADDR;
349}
350
302static int add_breakpoints(struct dyn_ftrace *rec, int enable) 351static int add_breakpoints(struct dyn_ftrace *rec, int enable)
303{ 352{
304 unsigned long ftrace_addr; 353 unsigned long ftrace_addr;
@@ -306,7 +355,7 @@ static int add_breakpoints(struct dyn_ftrace *rec, int enable)
306 355
307 ret = ftrace_test_record(rec, enable); 356 ret = ftrace_test_record(rec, enable);
308 357
309 ftrace_addr = (unsigned long)FTRACE_ADDR; 358 ftrace_addr = get_ftrace_addr(rec);
310 359
311 switch (ret) { 360 switch (ret) {
312 case FTRACE_UPDATE_IGNORE: 361 case FTRACE_UPDATE_IGNORE:
@@ -316,6 +365,10 @@ static int add_breakpoints(struct dyn_ftrace *rec, int enable)
316 /* converting nop to call */ 365 /* converting nop to call */
317 return add_brk_on_nop(rec); 366 return add_brk_on_nop(rec);
318 367
368 case FTRACE_UPDATE_MODIFY_CALL_REGS:
369 case FTRACE_UPDATE_MODIFY_CALL:
370 ftrace_addr = get_ftrace_old_addr(rec);
371 /* fall through */
319 case FTRACE_UPDATE_MAKE_NOP: 372 case FTRACE_UPDATE_MAKE_NOP:
320 /* converting a call to a nop */ 373 /* converting a call to a nop */
321 return add_brk_on_call(rec, ftrace_addr); 374 return add_brk_on_call(rec, ftrace_addr);
@@ -360,13 +413,21 @@ static int remove_breakpoint(struct dyn_ftrace *rec)
360 * If not, don't touch the breakpoint, we make just create 413 * If not, don't touch the breakpoint, we make just create
361 * a disaster. 414 * a disaster.
362 */ 415 */
363 ftrace_addr = (unsigned long)FTRACE_ADDR; 416 ftrace_addr = get_ftrace_addr(rec);
417 nop = ftrace_call_replace(ip, ftrace_addr);
418
419 if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0)
420 goto update;
421
422 /* Check both ftrace_addr and ftrace_old_addr */
423 ftrace_addr = get_ftrace_old_addr(rec);
364 nop = ftrace_call_replace(ip, ftrace_addr); 424 nop = ftrace_call_replace(ip, ftrace_addr);
365 425
366 if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) 426 if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0)
367 return -EINVAL; 427 return -EINVAL;
368 } 428 }
369 429
430 update:
370 return probe_kernel_write((void *)ip, &nop[0], 1); 431 return probe_kernel_write((void *)ip, &nop[0], 1);
371} 432}
372 433
@@ -405,12 +466,14 @@ static int add_update(struct dyn_ftrace *rec, int enable)
405 466
406 ret = ftrace_test_record(rec, enable); 467 ret = ftrace_test_record(rec, enable);
407 468
408 ftrace_addr = (unsigned long)FTRACE_ADDR; 469 ftrace_addr = get_ftrace_addr(rec);
409 470
410 switch (ret) { 471 switch (ret) {
411 case FTRACE_UPDATE_IGNORE: 472 case FTRACE_UPDATE_IGNORE:
412 return 0; 473 return 0;
413 474
475 case FTRACE_UPDATE_MODIFY_CALL_REGS:
476 case FTRACE_UPDATE_MODIFY_CALL:
414 case FTRACE_UPDATE_MAKE_CALL: 477 case FTRACE_UPDATE_MAKE_CALL:
415 /* converting nop to call */ 478 /* converting nop to call */
416 return add_update_call(rec, ftrace_addr); 479 return add_update_call(rec, ftrace_addr);
@@ -455,12 +518,14 @@ static int finish_update(struct dyn_ftrace *rec, int enable)
455 518
456 ret = ftrace_update_record(rec, enable); 519 ret = ftrace_update_record(rec, enable);
457 520
458 ftrace_addr = (unsigned long)FTRACE_ADDR; 521 ftrace_addr = get_ftrace_addr(rec);
459 522
460 switch (ret) { 523 switch (ret) {
461 case FTRACE_UPDATE_IGNORE: 524 case FTRACE_UPDATE_IGNORE:
462 return 0; 525 return 0;
463 526
527 case FTRACE_UPDATE_MODIFY_CALL_REGS:
528 case FTRACE_UPDATE_MODIFY_CALL:
464 case FTRACE_UPDATE_MAKE_CALL: 529 case FTRACE_UPDATE_MAKE_CALL:
465 /* converting nop to call */ 530 /* converting nop to call */
466 return finish_update_call(rec, ftrace_addr); 531 return finish_update_call(rec, ftrace_addr);