From 23adec554a7648f99c8acc0caf49c66320cd2b84 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:41 +0200 Subject: x86: add notrace annotations to vsyscall. Add the notrace annotations to the vsyscall functions - there we are not in kernel context yet, so the tracer function cannot (and must not) be called. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/vsyscall_64.c | 3 ++- arch/x86/vdso/vclock_gettime.c | 15 ++++++++------- arch/x86/vdso/vgetcpu.c | 3 ++- 3 files changed, 12 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 61efa2f7d564..4063dfa2a02d 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -42,7 +42,8 @@ #include #include -#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) +#define __vsyscall(nr) \ + __attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace #define __syscall_clobber "r11","cx","memory" /* diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 23476c2ebfc4..5cb8f754c52d 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -23,7 +23,7 @@ #define gtod vdso_vsyscall_gtod_data -static long vdso_fallback_gettime(long clock, struct timespec *ts) +notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) { long ret; asm("syscall" : "=a" (ret) : @@ -31,7 +31,7 @@ static long vdso_fallback_gettime(long clock, struct timespec *ts) return ret; } -static inline long vgetns(void) +notrace static inline long vgetns(void) { long v; cycles_t (*vread)(void); @@ -40,7 +40,7 @@ static inline long vgetns(void) return (v * gtod->clock.mult) >> gtod->clock.shift; } -static noinline int do_realtime(struct timespec *ts) +notrace static noinline int do_realtime(struct timespec *ts) { unsigned long seq, ns; do { @@ -54,7 +54,8 @@ static noinline int do_realtime(struct timespec *ts) } /* Copy of the version in kernel/time.c which we cannot directly access */ -static void vset_normalized_timespec(struct timespec *ts, long sec, long nsec) +notrace static void +vset_normalized_timespec(struct timespec *ts, long sec, long nsec) { while (nsec >= NSEC_PER_SEC) { nsec -= NSEC_PER_SEC; @@ -68,7 +69,7 @@ static void vset_normalized_timespec(struct timespec *ts, long sec, long nsec) ts->tv_nsec = nsec; } -static noinline int do_monotonic(struct timespec *ts) +notrace static noinline int do_monotonic(struct timespec *ts) { unsigned long seq, ns, secs; do { @@ -82,7 +83,7 @@ static noinline int do_monotonic(struct timespec *ts) return 0; } -int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) +notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) { if (likely(gtod->sysctl_enabled && gtod->clock.vread)) switch (clock) { @@ -96,7 +97,7 @@ int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) int clock_gettime(clockid_t, struct timespec *) __attribute__((weak, alias("__vdso_clock_gettime"))); -int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) +notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) { long ret; if (likely(gtod->sysctl_enabled && gtod->clock.vread)) { diff --git a/arch/x86/vdso/vgetcpu.c b/arch/x86/vdso/vgetcpu.c index c8097f17f8a9..9fbc6b20026b 100644 --- a/arch/x86/vdso/vgetcpu.c +++ b/arch/x86/vdso/vgetcpu.c @@ -13,7 +13,8 @@ #include #include "vextern.h" -long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused) +notrace long +__vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused) { unsigned int p; -- cgit v1.2.2 From 16444a8a40d4c7b4f6de34af0cae1f76a4f6c901 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 12 May 2008 21:20:42 +0200 Subject: ftrace: add basic support for gcc profiler instrumentation If CONFIG_FTRACE is selected and /proc/sys/kernel/ftrace_enabled is set to a non-zero value the ftrace routine will be called everytime we enter a kernel function that is not marked with the "notrace" attribute. The ftrace routine will then call a registered function if a function happens to be registered. [ This code has been highly hacked by Steven Rostedt and Ingo Molnar, so don't blame Arnaldo for all of this ;-) ] Update: It is now possible to register more than one ftrace function. If only one ftrace function is registered, that will be the function that ftrace calls directly. If more than one function is registered, then ftrace will call a function that will loop through the functions to call. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig | 1 + arch/x86/kernel/entry_32.S | 27 +++++++++++++++++++++++++++ arch/x86/kernel/entry_64.S | 37 +++++++++++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index fe361ae7ef2f..c742dfeb0dbe 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -23,6 +23,7 @@ config X86 select HAVE_OPROFILE select HAVE_KPROBES select HAVE_KRETPROBES + select HAVE_FTRACE select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) select HAVE_ARCH_KGDB if !X86_VOYAGER diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 2a609dc3271c..f47b9b5440d2 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1109,6 +1109,33 @@ ENDPROC(xen_failsafe_callback) #endif /* CONFIG_XEN */ +#ifdef CONFIG_FTRACE +ENTRY(mcount) + cmpl $ftrace_stub, ftrace_trace_function + jnz trace + +.globl ftrace_stub +ftrace_stub: + ret + + /* taken from glibc */ +trace: + pushl %eax + pushl %ecx + pushl %edx + movl 0xc(%esp), %eax + movl 0x4(%ebp), %edx + + call *ftrace_trace_function + + popl %edx + popl %ecx + popl %eax + + jmp ftrace_stub +END(mcount) +#endif + .section .rodata,"a" #include "syscall_table_32.S" diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 556a8df522a7..f046e0c64883 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -54,6 +54,43 @@ .code64 +#ifdef CONFIG_FTRACE +ENTRY(mcount) + cmpq $ftrace_stub, ftrace_trace_function + jnz trace +.globl ftrace_stub +ftrace_stub: + retq + +trace: + /* taken from glibc */ + subq $0x38, %rsp + movq %rax, (%rsp) + movq %rcx, 8(%rsp) + movq %rdx, 16(%rsp) + movq %rsi, 24(%rsp) + movq %rdi, 32(%rsp) + movq %r8, 40(%rsp) + movq %r9, 48(%rsp) + + movq 0x38(%rsp), %rdi + movq 8(%rbp), %rsi + + call *ftrace_trace_function + + movq 48(%rsp), %r9 + movq 40(%rsp), %r8 + movq 32(%rsp), %rdi + movq 24(%rsp), %rsi + movq 16(%rsp), %rdx + movq 8(%rsp), %rcx + movq (%rsp), %rax + addq $0x38, %rsp + + jmp ftrace_stub +END(mcount) +#endif + #ifndef CONFIG_PREEMPT #define retint_kernel retint_restore_args #endif -- cgit v1.2.2 From 81d68a96a39844853b37f20cc8282d9b65b78ef3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:42 +0200 Subject: ftrace: trace irq disabled critical timings This patch adds latency tracing for critical timings (how long interrupts are disabled for). "irqsoff" is added to /debugfs/tracing/available_tracers Note: tracing_max_latency also holds the max latency for irqsoff (in usecs). (default to large number so one must start latency tracing) tracing_thresh threshold (in usecs) to always print out if irqs off is detected to be longer than stated here. If irq_thresh is non-zero, then max_irq_latency is ignored. Here's an example of a trace with ftrace_enabled = 0 ======= preemption latency trace v1.1.5 on 2.6.24-rc7 Signed-off-by: Ingo Molnar -------------------------------------------------------------------- latency: 100 us, #3/3, CPU#1 | (M:rt VP:0, KP:0, SP:0 HP:0 #P:2) ----------------- | task: swapper-0 (uid:0 nice:0 policy:0 rt_prio:0) ----------------- => started at: _spin_lock_irqsave+0x2a/0xb7 => ended at: _spin_unlock_irqrestore+0x32/0x5f _------=> CPU# / _-----=> irqs-off | / _----=> need-resched || / _---=> hardirq/softirq ||| / _--=> preempt-depth |||| / ||||| delay cmd pid ||||| time | caller \ / ||||| \ | / swapper-0 1d.s3 0us+: _spin_lock_irqsave+0x2a/0xb7 (e1000_update_stats+0x47/0x64c [e1000]) swapper-0 1d.s3 100us : _spin_unlock_irqrestore+0x32/0x5f (e1000_update_stats+0x641/0x64c [e1000]) swapper-0 1d.s3 100us : trace_hardirqs_on_caller+0x75/0x89 (_spin_unlock_irqrestore+0x32/0x5f) vim:ft=help ======= And this is a trace with ftrace_enabled == 1 ======= preemption latency trace v1.1.5 on 2.6.24-rc7 -------------------------------------------------------------------- latency: 102 us, #12/12, CPU#1 | (M:rt VP:0, KP:0, SP:0 HP:0 #P:2) ----------------- | task: swapper-0 (uid:0 nice:0 policy:0 rt_prio:0) ----------------- => started at: _spin_lock_irqsave+0x2a/0xb7 => ended at: _spin_unlock_irqrestore+0x32/0x5f _------=> CPU# / _-----=> irqs-off | / _----=> need-resched || / _---=> hardirq/softirq ||| / _--=> preempt-depth |||| / ||||| delay cmd pid ||||| time | caller \ / ||||| \ | / swapper-0 1dNs3 0us+: _spin_lock_irqsave+0x2a/0xb7 (e1000_update_stats+0x47/0x64c [e1000]) swapper-0 1dNs3 46us : e1000_read_phy_reg+0x16/0x225 [e1000] (e1000_update_stats+0x5e2/0x64c [e1000]) swapper-0 1dNs3 46us : e1000_swfw_sync_acquire+0x10/0x99 [e1000] (e1000_read_phy_reg+0x49/0x225 [e1000]) swapper-0 1dNs3 46us : e1000_get_hw_eeprom_semaphore+0x12/0xa6 [e1000] (e1000_swfw_sync_acquire+0x36/0x99 [e1000]) swapper-0 1dNs3 47us : __const_udelay+0x9/0x47 (e1000_read_phy_reg+0x116/0x225 [e1000]) swapper-0 1dNs3 47us+: __delay+0x9/0x50 (__const_udelay+0x45/0x47) swapper-0 1dNs3 97us : preempt_schedule+0xc/0x84 (__delay+0x4e/0x50) swapper-0 1dNs3 98us : e1000_swfw_sync_release+0xc/0x55 [e1000] (e1000_read_phy_reg+0x211/0x225 [e1000]) swapper-0 1dNs3 99us+: e1000_put_hw_eeprom_semaphore+0x9/0x35 [e1000] (e1000_swfw_sync_release+0x50/0x55 [e1000]) swapper-0 1dNs3 101us : _spin_unlock_irqrestore+0xe/0x5f (e1000_update_stats+0x641/0x64c [e1000]) swapper-0 1dNs3 102us : _spin_unlock_irqrestore+0x32/0x5f (e1000_update_stats+0x641/0x64c [e1000]) swapper-0 1dNs3 102us : trace_hardirqs_on_caller+0x75/0x89 (_spin_unlock_irqrestore+0x32/0x5f) vim:ft=help ======= Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process_64.c | 3 +++ arch/x86/lib/Makefile | 1 + arch/x86/lib/thunk_32.S | 47 ++++++++++++++++++++++++++++++++++++++++++++ arch/x86/lib/thunk_64.S | 19 ++++++++++++++++-- 4 files changed, 68 insertions(+), 2 deletions(-) create mode 100644 arch/x86/lib/thunk_32.S (limited to 'arch') diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index e2319f39988b..dd349c92f051 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -165,7 +165,10 @@ void cpu_idle(void) */ local_irq_disable(); enter_idle(); + /* Don't trace irqs off for idle */ + stop_critical_timings(); idle(); + start_critical_timings(); /* In many cases the interrupt that ended idle has already called exit_idle. But some idle loops can be woken up without interrupt. */ diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 76f60f52a885..84aa2883fe15 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_SMP) := msr-on-cpu.o lib-y := delay_$(BITS).o +lib-y += thunk_$(BITS).o lib-y += usercopy_$(BITS).o getuser_$(BITS).o putuser_$(BITS).o lib-y += memcpy_$(BITS).o diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S new file mode 100644 index 000000000000..650b11e00ecc --- /dev/null +++ b/arch/x86/lib/thunk_32.S @@ -0,0 +1,47 @@ +/* + * Trampoline to trace irqs off. (otherwise CALLER_ADDR1 might crash) + * Copyright 2008 by Steven Rostedt, Red Hat, Inc + * (inspired by Andi Kleen's thunk_64.S) + * Subject to the GNU public license, v.2. No warranty of any kind. + */ + + #include + +#define ARCH_TRACE_IRQS_ON \ + pushl %eax; \ + pushl %ecx; \ + pushl %edx; \ + call trace_hardirqs_on; \ + popl %edx; \ + popl %ecx; \ + popl %eax; + +#define ARCH_TRACE_IRQS_OFF \ + pushl %eax; \ + pushl %ecx; \ + pushl %edx; \ + call trace_hardirqs_off; \ + popl %edx; \ + popl %ecx; \ + popl %eax; + +#ifdef CONFIG_TRACE_IRQFLAGS + /* put return address in eax (arg1) */ + .macro thunk_ra name,func + .globl \name +\name: + pushl %eax + pushl %ecx + pushl %edx + /* Place EIP in the arg1 */ + movl 3*4(%esp), %eax + call \func + popl %edx + popl %ecx + popl %eax + ret + .endm + + thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller + thunk_ra trace_hardirqs_off_thunk,trace_hardirqs_off_caller +#endif diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S index e009251d4e9f..bf9a7d5a5428 100644 --- a/arch/x86/lib/thunk_64.S +++ b/arch/x86/lib/thunk_64.S @@ -2,6 +2,7 @@ * Save registers before calling assembly functions. This avoids * disturbance of register allocation in some inline assembly constructs. * Copyright 2001,2002 by Andi Kleen, SuSE Labs. + * Added trace_hardirqs callers - Copyright 2007 Steven Rostedt, Red Hat, Inc. * Subject to the GNU public license, v.2. No warranty of any kind. */ @@ -42,8 +43,22 @@ #endif #ifdef CONFIG_TRACE_IRQFLAGS - thunk trace_hardirqs_on_thunk,trace_hardirqs_on - thunk trace_hardirqs_off_thunk,trace_hardirqs_off + /* put return address in rdi (arg1) */ + .macro thunk_ra name,func + .globl \name +\name: + CFI_STARTPROC + SAVE_ARGS + /* SAVE_ARGS pushs 9 elements */ + /* the next element would be the rip */ + movq 9*8(%rsp), %rdi + call \func + jmp restore + CFI_ENDPROC + .endm + + thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller + thunk_ra trace_hardirqs_off_thunk,trace_hardirqs_off_caller #endif #ifdef CONFIG_DEBUG_LOCK_ALLOC -- cgit v1.2.2 From 6cd8a4bb2f97527a9ceb30bc77ea4e959c6a95e3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:42 +0200 Subject: ftrace: trace preempt off critical timings Add preempt off timings. A lot of kernel core code is taken from the RT patch latency trace that was written by Ingo Molnar. This adds "preemptoff" and "preemptirqsoff" to /debugfs/tracing/available_tracers Now instead of just tracing irqs off, preemption off can be selected to be recorded. When this is selected, it shares the same files as irqs off timings. One can either trace preemption off, irqs off, or one or the other off. By echoing "preemptoff" into /debugfs/tracing/current_tracer, recording of preempt off only is performed. "irqsoff" will only record the time irqs are disabled, but "preemptirqsoff" will take the total time irqs or preemption are disabled. Runtime switching of these options is now supported by simpling echoing in the appropriate trace name into /debugfs/tracing/current_tracer. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process_32.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index f8476dfbb60d..a30aa1f2607a 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -185,7 +185,10 @@ void cpu_idle(void) local_irq_disable(); __get_cpu_var(irq_stat).idle_timestamp = jiffies; + /* Don't trace irqs off for idle */ + stop_critical_timings(); idle(); + start_critical_timings(); } tick_nohz_restart_sched_tick(); preempt_enable_no_resched(); -- cgit v1.2.2 From 3d0833953e1b98b79ddf491dd49229eef9baeac1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:42 +0200 Subject: ftrace: dynamic enabling/disabling of function calls This patch adds a feature to dynamically replace the ftrace code with the jmps to allow a kernel with ftrace configured to run as fast as it can without it configured. The way this works, is on bootup (if ftrace is enabled), a ftrace function is registered to record the instruction pointer of all places that call the function. Later, if there's still any code to patch, a kthread is awoken (rate limited to at most once a second) that performs a stop_machine, and replaces all the code that was called with a jmp over the call to ftrace. It only replaces what was found the previous time. Typically the system reaches equilibrium quickly after bootup and there's no code patching needed at all. e.g. call ftrace /* 5 bytes */ is replaced with jmp 3f /* jmp is 2 bytes and we jump 3 forward */ 3: When we want to enable ftrace for function tracing, the IP recording is removed, and stop_machine is called again to replace all the locations of that were recorded back to the call of ftrace. When it is disabled, we replace the code back to the jmp. Allocation is done by the kthread. If the ftrace recording function is called, and we don't have any record slots available, then we simply skip that call. Once a second a new page (if needed) is allocated for recording new ftrace function calls. A large batch is allocated at boot up to get most of the calls there. Because we do this via stop_machine, we don't have to worry about another CPU executing a ftrace call as we modify it. But we do need to worry about NMI's so all functions that might be called via nmi must be annotated with notrace_nmi. When this code is configured in, the NMI code will not call notrace. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/Makefile | 1 + arch/x86/kernel/ftrace.c | 237 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 238 insertions(+) create mode 100644 arch/x86/kernel/ftrace.c (limited to 'arch') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 5e618c3b4720..e142091524b0 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -56,6 +56,7 @@ obj-$(CONFIG_X86_MPPARSE) += mpparse.o obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi_$(BITS).o obj-$(CONFIG_X86_IO_APIC) += io_apic_$(BITS).o obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o +obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c new file mode 100644 index 000000000000..5dd58136ef02 --- /dev/null +++ b/arch/x86/kernel/ftrace.c @@ -0,0 +1,237 @@ +/* + * Code for replacing ftrace calls with jumps. + * + * Copyright (C) 2007-2008 Steven Rostedt + * + * Thanks goes to Ingo Molnar, for suggesting the idea. + * Mathieu Desnoyers, for suggesting postponing the modifications. + * Arjan van de Ven, for keeping me straight, and explaining to me + * the dangers of modifying code on the run. + */ + +#include +#include +#include +#include +#include +#include + +#define CALL_BACK 5 + +#define JMPFWD 0x03eb + +static unsigned short ftrace_jmp = JMPFWD; + +struct ftrace_record { + struct dyn_ftrace rec; + int failed; +} __attribute__((packed)); + +struct ftrace_page { + struct ftrace_page *next; + int index; + struct ftrace_record records[]; +} __attribute__((packed)); + +#define ENTRIES_PER_PAGE \ + ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct ftrace_record)) + +/* estimate from running different kernels */ +#define NR_TO_INIT 10000 + +#define MCOUNT_ADDR ((long)(&mcount)) + +union ftrace_code_union { + char code[5]; + struct { + char e8; + int offset; + } __attribute__((packed)); +}; + +static struct ftrace_page *ftrace_pages_start; +static struct ftrace_page *ftrace_pages; + +notrace struct dyn_ftrace *ftrace_alloc_shutdown_node(unsigned long ip) +{ + struct ftrace_record *rec; + unsigned short save; + + ip -= CALL_BACK; + save = *(short *)ip; + + /* If this was already converted, skip it */ + if (save == JMPFWD) + return NULL; + + if (ftrace_pages->index == ENTRIES_PER_PAGE) { + if (!ftrace_pages->next) + return NULL; + ftrace_pages = ftrace_pages->next; + } + + rec = &ftrace_pages->records[ftrace_pages->index++]; + + return &rec->rec; +} + +static int notrace +ftrace_modify_code(unsigned long ip, unsigned char *old_code, + unsigned char *new_code) +{ + unsigned short old = *(unsigned short *)old_code; + unsigned short new = *(unsigned short *)new_code; + unsigned short replaced; + int faulted = 0; + + /* + * Note: Due to modules and __init, code can + * disappear and change, we need to protect against faulting + * as well as code changing. + * + * No real locking needed, this code is run through + * kstop_machine. + */ + asm volatile ( + "1: lock\n" + " cmpxchg %w3, (%2)\n" + "2:\n" + ".section .fixup, \"ax\"\n" + " movl $1, %0\n" + "3: jmp 2b\n" + ".previous\n" + _ASM_EXTABLE(1b, 3b) + : "=r"(faulted), "=a"(replaced) + : "r"(ip), "r"(new), "0"(faulted), "a"(old) + : "memory"); + sync_core(); + + if (replaced != old) + faulted = 2; + + return faulted; +} + +static int notrace ftrace_calc_offset(long ip) +{ + return (int)(MCOUNT_ADDR - ip); +} + +notrace void ftrace_code_disable(struct dyn_ftrace *rec) +{ + unsigned long ip; + union ftrace_code_union save; + struct ftrace_record *r = + container_of(rec, struct ftrace_record, rec); + + ip = rec->ip; + + save.e8 = 0xe8; + save.offset = ftrace_calc_offset(ip); + + /* move the IP back to the start of the call */ + ip -= CALL_BACK; + + r->failed = ftrace_modify_code(ip, save.code, (char *)&ftrace_jmp); +} + +static void notrace ftrace_replace_code(int saved) +{ + unsigned char *new = NULL, *old = NULL; + struct ftrace_record *rec; + struct ftrace_page *pg; + unsigned long ip; + int i; + + if (saved) + old = (char *)&ftrace_jmp; + else + new = (char *)&ftrace_jmp; + + for (pg = ftrace_pages_start; pg; pg = pg->next) { + for (i = 0; i < pg->index; i++) { + union ftrace_code_union calc; + rec = &pg->records[i]; + + /* don't modify code that has already faulted */ + if (rec->failed) + continue; + + ip = rec->rec.ip; + + calc.e8 = 0xe8; + calc.offset = ftrace_calc_offset(ip); + + if (saved) + new = calc.code; + else + old = calc.code; + + ip -= CALL_BACK; + + rec->failed = ftrace_modify_code(ip, old, new); + } + } + +} + +notrace void ftrace_startup_code(void) +{ + ftrace_replace_code(1); +} + +notrace void ftrace_shutdown_code(void) +{ + ftrace_replace_code(0); +} + +notrace void ftrace_shutdown_replenish(void) +{ + if (ftrace_pages->next) + return; + + /* allocate another page */ + ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL); +} + +notrace int ftrace_shutdown_arch_init(void) +{ + struct ftrace_page *pg; + int cnt; + int i; + + /* allocate a few pages */ + ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL); + if (!ftrace_pages_start) + return -1; + + /* + * Allocate a few more pages. + * + * TODO: have some parser search vmlinux before + * final linking to find all calls to ftrace. + * Then we can: + * a) know how many pages to allocate. + * and/or + * b) set up the table then. + * + * The dynamic code is still necessary for + * modules. + */ + + pg = ftrace_pages = ftrace_pages_start; + + cnt = NR_TO_INIT / ENTRIES_PER_PAGE; + + for (i = 0; i < cnt; i++) { + pg->next = (void *)get_zeroed_page(GFP_KERNEL); + + /* If we fail, we'll try later anyway */ + if (!pg->next) + break; + + pg = pg->next; + } + + return 0; +} -- cgit v1.2.2 From dfa60aba04dae7833d75b2e2be124bb7cfb8239f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:43 +0200 Subject: ftrace: use nops instead of jmp This patch patches the call to mcount with nops instead of a jmp over the mcount call. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/alternative.c | 4 ++-- arch/x86/kernel/ftrace.c | 40 ++++++++++++++++++++++++---------------- 2 files changed, 26 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 65c7857a90dd..de240ba2e288 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -143,7 +143,7 @@ static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = { #ifdef CONFIG_X86_64 extern char __vsyscall_0; -static inline const unsigned char*const * find_nop_table(void) +const unsigned char *const *find_nop_table(void) { return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || boot_cpu_data.x86 < 6 ? k8_nops : p6_nops; @@ -162,7 +162,7 @@ static const struct nop { { -1, NULL } }; -static const unsigned char*const * find_nop_table(void) +const unsigned char *const *find_nop_table(void) { const unsigned char *const *noptable = intel_nops; int i; diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 5dd58136ef02..2e060c58b860 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -16,11 +16,12 @@ #include #include -#define CALL_BACK 5 +#include -#define JMPFWD 0x03eb +#define CALL_BACK 5 -static unsigned short ftrace_jmp = JMPFWD; +/* Long is fine, even if it is only 4 bytes ;-) */ +static long *ftrace_nop; struct ftrace_record { struct dyn_ftrace rec; @@ -55,13 +56,13 @@ static struct ftrace_page *ftrace_pages; notrace struct dyn_ftrace *ftrace_alloc_shutdown_node(unsigned long ip) { struct ftrace_record *rec; - unsigned short save; + unsigned long save; ip -= CALL_BACK; - save = *(short *)ip; + save = *(long *)ip; /* If this was already converted, skip it */ - if (save == JMPFWD) + if (save == *ftrace_nop) return NULL; if (ftrace_pages->index == ENTRIES_PER_PAGE) { @@ -79,9 +80,10 @@ static int notrace ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char *new_code) { - unsigned short old = *(unsigned short *)old_code; - unsigned short new = *(unsigned short *)new_code; - unsigned short replaced; + unsigned replaced; + unsigned old = *(unsigned *)old_code; /* 4 bytes */ + unsigned new = *(unsigned *)new_code; /* 4 bytes */ + unsigned char newch = new_code[4]; int faulted = 0; /* @@ -94,7 +96,9 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, */ asm volatile ( "1: lock\n" - " cmpxchg %w3, (%2)\n" + " cmpxchg %3, (%2)\n" + " jnz 2f\n" + " movb %b4, 4(%2)\n" "2:\n" ".section .fixup, \"ax\"\n" " movl $1, %0\n" @@ -102,11 +106,12 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, ".previous\n" _ASM_EXTABLE(1b, 3b) : "=r"(faulted), "=a"(replaced) - : "r"(ip), "r"(new), "0"(faulted), "a"(old) + : "r"(ip), "r"(new), "r"(newch), + "0"(faulted), "a"(old) : "memory"); sync_core(); - if (replaced != old) + if (replaced != old && replaced != new) faulted = 2; return faulted; @@ -132,7 +137,7 @@ notrace void ftrace_code_disable(struct dyn_ftrace *rec) /* move the IP back to the start of the call */ ip -= CALL_BACK; - r->failed = ftrace_modify_code(ip, save.code, (char *)&ftrace_jmp); + r->failed = ftrace_modify_code(ip, save.code, (char *)ftrace_nop); } static void notrace ftrace_replace_code(int saved) @@ -144,9 +149,9 @@ static void notrace ftrace_replace_code(int saved) int i; if (saved) - old = (char *)&ftrace_jmp; + old = (char *)ftrace_nop; else - new = (char *)&ftrace_jmp; + new = (char *)ftrace_nop; for (pg = ftrace_pages_start; pg; pg = pg->next) { for (i = 0; i < pg->index; i++) { @@ -194,12 +199,15 @@ notrace void ftrace_shutdown_replenish(void) ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL); } -notrace int ftrace_shutdown_arch_init(void) +notrace int __init ftrace_shutdown_arch_init(void) { + const unsigned char *const *noptable = find_nop_table(); struct ftrace_page *pg; int cnt; int i; + ftrace_nop = (unsigned long *)noptable[CALL_BACK]; + /* allocate a few pages */ ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL); if (!ftrace_pages_start) -- cgit v1.2.2 From 3c1720f00bb619302ba19d55986ab565e74d06db Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:43 +0200 Subject: ftrace: move memory management out of arch code This patch moves the memory management of the ftrace records out of the arch code and into the generic code making the arch code simpler. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/ftrace.c | 183 ++++++++--------------------------------------- 1 file changed, 29 insertions(+), 154 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 2e060c58b860..b69795efa226 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -23,25 +23,6 @@ /* Long is fine, even if it is only 4 bytes ;-) */ static long *ftrace_nop; -struct ftrace_record { - struct dyn_ftrace rec; - int failed; -} __attribute__((packed)); - -struct ftrace_page { - struct ftrace_page *next; - int index; - struct ftrace_record records[]; -} __attribute__((packed)); - -#define ENTRIES_PER_PAGE \ - ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct ftrace_record)) - -/* estimate from running different kernels */ -#define NR_TO_INIT 10000 - -#define MCOUNT_ADDR ((long)(&mcount)) - union ftrace_code_union { char code[5]; struct { @@ -50,33 +31,41 @@ union ftrace_code_union { } __attribute__((packed)); }; -static struct ftrace_page *ftrace_pages_start; -static struct ftrace_page *ftrace_pages; - -notrace struct dyn_ftrace *ftrace_alloc_shutdown_node(unsigned long ip) +notrace int ftrace_ip_converted(unsigned long ip) { - struct ftrace_record *rec; unsigned long save; ip -= CALL_BACK; save = *(long *)ip; - /* If this was already converted, skip it */ - if (save == *ftrace_nop) - return NULL; + return save == *ftrace_nop; +} - if (ftrace_pages->index == ENTRIES_PER_PAGE) { - if (!ftrace_pages->next) - return NULL; - ftrace_pages = ftrace_pages->next; - } +static int notrace ftrace_calc_offset(long ip, long addr) +{ + return (int)(addr - ip); +} - rec = &ftrace_pages->records[ftrace_pages->index++]; +notrace unsigned char *ftrace_nop_replace(void) +{ + return (char *)ftrace_nop; +} + +notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) +{ + static union ftrace_code_union calc; - return &rec->rec; + calc.e8 = 0xe8; + calc.offset = ftrace_calc_offset(ip, addr); + + /* + * No locking needed, this must be called via kstop_machine + * which in essence is like running on a uniprocessor machine. + */ + return calc.code; } -static int notrace +notrace int ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char *new_code) { @@ -86,6 +75,9 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char newch = new_code[4]; int faulted = 0; + /* move the IP back to the start of the call */ + ip -= CALL_BACK; + /* * Note: Due to modules and __init, code can * disappear and change, we need to protect against faulting @@ -117,129 +109,12 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, return faulted; } -static int notrace ftrace_calc_offset(long ip) -{ - return (int)(MCOUNT_ADDR - ip); -} - -notrace void ftrace_code_disable(struct dyn_ftrace *rec) -{ - unsigned long ip; - union ftrace_code_union save; - struct ftrace_record *r = - container_of(rec, struct ftrace_record, rec); - - ip = rec->ip; - - save.e8 = 0xe8; - save.offset = ftrace_calc_offset(ip); - - /* move the IP back to the start of the call */ - ip -= CALL_BACK; - - r->failed = ftrace_modify_code(ip, save.code, (char *)ftrace_nop); -} - -static void notrace ftrace_replace_code(int saved) -{ - unsigned char *new = NULL, *old = NULL; - struct ftrace_record *rec; - struct ftrace_page *pg; - unsigned long ip; - int i; - - if (saved) - old = (char *)ftrace_nop; - else - new = (char *)ftrace_nop; - - for (pg = ftrace_pages_start; pg; pg = pg->next) { - for (i = 0; i < pg->index; i++) { - union ftrace_code_union calc; - rec = &pg->records[i]; - - /* don't modify code that has already faulted */ - if (rec->failed) - continue; - - ip = rec->rec.ip; - - calc.e8 = 0xe8; - calc.offset = ftrace_calc_offset(ip); - - if (saved) - new = calc.code; - else - old = calc.code; - - ip -= CALL_BACK; - - rec->failed = ftrace_modify_code(ip, old, new); - } - } - -} - -notrace void ftrace_startup_code(void) -{ - ftrace_replace_code(1); -} - -notrace void ftrace_shutdown_code(void) -{ - ftrace_replace_code(0); -} - -notrace void ftrace_shutdown_replenish(void) -{ - if (ftrace_pages->next) - return; - - /* allocate another page */ - ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL); -} - -notrace int __init ftrace_shutdown_arch_init(void) +int __init ftrace_dyn_arch_init(void) { const unsigned char *const *noptable = find_nop_table(); - struct ftrace_page *pg; - int cnt; - int i; ftrace_nop = (unsigned long *)noptable[CALL_BACK]; - /* allocate a few pages */ - ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL); - if (!ftrace_pages_start) - return -1; - - /* - * Allocate a few more pages. - * - * TODO: have some parser search vmlinux before - * final linking to find all calls to ftrace. - * Then we can: - * a) know how many pages to allocate. - * and/or - * b) set up the table then. - * - * The dynamic code is still necessary for - * modules. - */ - - pg = ftrace_pages = ftrace_pages_start; - - cnt = NR_TO_INIT / ENTRIES_PER_PAGE; - - for (i = 0; i < cnt; i++) { - pg->next = (void *)get_zeroed_page(GFP_KERNEL); - - /* If we fail, we'll try later anyway */ - if (!pg->next) - break; - - pg = pg->next; - } - return 0; } + -- cgit v1.2.2 From d61f82d06672f57fca410da6f7fffd15867db622 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:43 +0200 Subject: ftrace: use dynamic patching for updating mcount calls This patch replaces the indirect call to the mcount function pointer with a direct call that will be patched by the dynamic ftrace routines. On boot up, the mcount function calls the ftace_stub function. When the dynamic ftrace code is initialized, the ftrace_stub is replaced with a call to the ftrace_record_ip, which records the instruction pointers of the locations that call it. Later, the ftraced daemon will call kstop_machine and patch all the locations to nops. When a ftrace is enabled, the original calls to mcount will now be set top call ftrace_caller, which will do a direct call to the registered ftrace function. This direct call is also patched when the function that should be called is updated. All patching is performed by a kstop_machine routine to prevent any type of race conditions that is associated with modifying code on the fly. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/entry_32.S | 47 +++++++++++++++++++++++++++++--- arch/x86/kernel/entry_64.S | 67 +++++++++++++++++++++++++++++++++++++++++++++- arch/x86/kernel/ftrace.c | 41 +++++++++++++++++++++++++++- 3 files changed, 150 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index f47b9b5440d2..e6517ce0b824 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1110,10 +1110,50 @@ ENDPROC(xen_failsafe_callback) #endif /* CONFIG_XEN */ #ifdef CONFIG_FTRACE +#ifdef CONFIG_DYNAMIC_FTRACE + +ENTRY(mcount) + pushl %eax + pushl %ecx + pushl %edx + movl 0xc(%esp), %eax + +.globl mcount_call +mcount_call: + call ftrace_stub + + popl %edx + popl %ecx + popl %eax + + ret +END(mcount) + +ENTRY(ftrace_caller) + pushl %eax + pushl %ecx + pushl %edx + movl 0xc(%esp), %eax + movl 0x4(%ebp), %edx + +.globl ftrace_call +ftrace_call: + call ftrace_stub + + popl %edx + popl %ecx + popl %eax + +.globl ftrace_stub +ftrace_stub: + ret +END(ftrace_caller) + +#else /* ! CONFIG_DYNAMIC_FTRACE */ + ENTRY(mcount) cmpl $ftrace_stub, ftrace_trace_function jnz trace - .globl ftrace_stub ftrace_stub: ret @@ -1126,7 +1166,7 @@ trace: movl 0xc(%esp), %eax movl 0x4(%ebp), %edx - call *ftrace_trace_function + call *ftrace_trace_function popl %edx popl %ecx @@ -1134,7 +1174,8 @@ trace: jmp ftrace_stub END(mcount) -#endif +#endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* CONFIG_FTRACE */ .section .rodata,"a" #include "syscall_table_32.S" diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index f046e0c64883..fe25e5febca3 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -55,6 +55,70 @@ .code64 #ifdef CONFIG_FTRACE +#ifdef CONFIG_DYNAMIC_FTRACE +ENTRY(mcount) + + subq $0x38, %rsp + movq %rax, (%rsp) + movq %rcx, 8(%rsp) + movq %rdx, 16(%rsp) + movq %rsi, 24(%rsp) + movq %rdi, 32(%rsp) + movq %r8, 40(%rsp) + movq %r9, 48(%rsp) + + movq 0x38(%rsp), %rdi + +.globl mcount_call +mcount_call: + call ftrace_stub + + movq 48(%rsp), %r9 + movq 40(%rsp), %r8 + movq 32(%rsp), %rdi + movq 24(%rsp), %rsi + movq 16(%rsp), %rdx + movq 8(%rsp), %rcx + movq (%rsp), %rax + addq $0x38, %rsp + + retq +END(mcount) + +ENTRY(ftrace_caller) + + /* taken from glibc */ + subq $0x38, %rsp + movq %rax, (%rsp) + movq %rcx, 8(%rsp) + movq %rdx, 16(%rsp) + movq %rsi, 24(%rsp) + movq %rdi, 32(%rsp) + movq %r8, 40(%rsp) + movq %r9, 48(%rsp) + + movq 0x38(%rsp), %rdi + movq 8(%rbp), %rsi + +.globl ftrace_call +ftrace_call: + call ftrace_stub + + movq 48(%rsp), %r9 + movq 40(%rsp), %r8 + movq 32(%rsp), %rdi + movq 24(%rsp), %rsi + movq 16(%rsp), %rdx + movq 8(%rsp), %rcx + movq (%rsp), %rax + addq $0x38, %rsp + +.globl ftrace_stub +ftrace_stub: + retq +END(ftrace_caller) + +#else /* ! CONFIG_DYNAMIC_FTRACE */ ENTRY(mcount) cmpq $ftrace_stub, ftrace_trace_function jnz trace @@ -89,7 +153,8 @@ trace: jmp ftrace_stub END(mcount) -#endif +#endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* CONFIG_FTRACE */ #ifndef CONFIG_PREEMPT #define retint_kernel retint_restore_args diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index b69795efa226..9f44623e0072 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -109,10 +109,49 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, return faulted; } -int __init ftrace_dyn_arch_init(void) +notrace int ftrace_update_ftrace_func(ftrace_func_t func) +{ + unsigned long ip = (unsigned long)(&ftrace_call); + unsigned char old[5], *new; + int ret; + + ip += CALL_BACK; + + memcpy(old, &ftrace_call, 5); + new = ftrace_call_replace(ip, (unsigned long)func); + ret = ftrace_modify_code(ip, old, new); + + return ret; +} + +notrace int ftrace_mcount_set(unsigned long *data) +{ + unsigned long ip = (long)(&mcount_call); + unsigned long *addr = data; + unsigned char old[5], *new; + + /* ip is at the location, but modify code will subtact this */ + ip += CALL_BACK; + + /* + * Replace the mcount stub with a pointer to the + * ip recorder function. + */ + memcpy(old, &mcount_call, 5); + new = ftrace_call_replace(ip, *addr); + *addr = ftrace_modify_code(ip, old, new); + + return 0; +} + +int __init ftrace_dyn_arch_init(void *data) { const unsigned char *const *noptable = find_nop_table(); + /* This is running in kstop_machine */ + + ftrace_mcount_set(data); + ftrace_nop = (unsigned long *)noptable[CALL_BACK]; return 0; -- cgit v1.2.2 From f43fdad8627fec2d21df92799b254dceb66c9c3c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:43 +0200 Subject: ftrace: fix kexec disable the tracer while kexec pulls the rug from under the old kernel. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/machine_kexec_32.c | 4 ++++ arch/x86/kernel/machine_kexec_64.c | 4 ++++ 2 files changed, 8 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index d0b234c9fc31..88923fd7a6fc 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -11,6 +11,8 @@ #include #include #include +#include + #include #include #include @@ -107,6 +109,8 @@ NORET_TYPE void machine_kexec(struct kimage *image) unsigned long page_list[PAGES_NR]; void *control_page; + tracer_disable(); + /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 576a03db4511..1558fdc174f9 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -11,6 +11,8 @@ #include #include #include +#include + #include #include #include @@ -184,6 +186,8 @@ NORET_TYPE void machine_kexec(struct kimage *image) unsigned long page_list[PAGES_NR]; void *control_page; + tracer_disable(); + /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); -- cgit v1.2.2 From a56be3fe2f65f9f776e727bfd382e35db75911d6 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:56 +0200 Subject: ftrace: fix the fault label in updating code The fault label to jump to on fault of updating the code was misplaced preventing the fault from being recorded. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/ftrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 9f44623e0072..498608c015fb 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -93,8 +93,8 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, " movb %b4, 4(%2)\n" "2:\n" ".section .fixup, \"ax\"\n" - " movl $1, %0\n" - "3: jmp 2b\n" + "3: movl $1, %0\n" + " jmp 2b\n" ".previous\n" _ASM_EXTABLE(1b, 3b) : "=r"(faulted), "=a"(replaced) -- cgit v1.2.2 From 8f0f996e80b980fba07d11961d96a5fefb60976a Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:56 +0200 Subject: ftrace: dont write protect kernel text Dynamic ftrace cant work when the kernel has its text write protected. This patch keeps the kernel from being write protected when dynamic ftrace is in place. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/init_32.c | 4 ++++ arch/x86/mm/init_64.c | 10 ++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index ec30d10154b6..f96eca21ad8f 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -710,6 +710,8 @@ void mark_rodata_ro(void) unsigned long start = PFN_ALIGN(_text); unsigned long size = PFN_ALIGN(_etext) - start; +#ifndef CONFIG_DYNAMIC_FTRACE + /* Dynamic tracing modifies the kernel text section */ set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); printk(KERN_INFO "Write protecting the kernel text: %luk\n", size >> 10); @@ -722,6 +724,8 @@ void mark_rodata_ro(void) printk(KERN_INFO "Testing CPA: write protecting again\n"); set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT); #endif +#endif /* CONFIG_DYNAMIC_FTRACE */ + start += size; size = (unsigned long)__end_rodata - start; set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 32ba13b0f818..41824e776b6c 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -766,6 +766,13 @@ EXPORT_SYMBOL_GPL(rodata_test_data); void mark_rodata_ro(void) { unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata); + unsigned long rodata_start = + ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; + +#ifdef CONFIG_DYNAMIC_FTRACE + /* Dynamic tracing modifies the kernel text section */ + start = rodata_start; +#endif printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", (end - start) >> 10); @@ -775,8 +782,7 @@ void mark_rodata_ro(void) * The rodata section (but not the kernel text!) should also be * not-executable. */ - start = ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; - set_memory_nx(start, (end - start) >> PAGE_SHIFT); + set_memory_nx(rodata_start, (end - start) >> PAGE_SHIFT); rodata_test(); -- cgit v1.2.2 From 86069782d62e731b4835a0cf8eb7d1d0e17cf306 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:56 +0200 Subject: x86: add a list for custom page fault handlers. Provides kernel modules a way to register custom page fault handlers. On every page fault this will call a list of registered functions. The functions may handle the fault and force do_page_fault() to return immediately. This functionality is similar to the now removed page fault notifiers. Custom page fault handlers are used by debugging and reverse engineering tools. Mmiotrace is one such tool and a patch to add it into the tree will follow. The custom page fault handlers are called earlier in do_page_fault() than the page fault notifiers were. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig.debug | 8 ++++++++ arch/x86/mm/fault.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) (limited to 'arch') diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index ac1e31ba4795..9431a8399844 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -168,6 +168,14 @@ config IOMMU_LEAK Add a simple leak tracer to the IOMMU code. This is useful when you are debugging a buggy device driver that leaks IOMMU mappings. +config PAGE_FAULT_HANDLERS + bool "Custom page fault handlers" + depends on DEBUG_KERNEL + help + Allow the use of custom page fault handlers. A kernel module may + register a function that is called on every page fault. Custom + handlers are used by some debugging and reverse engineering tools. + # # IO delay types: # diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index fd7e1798c75a..343f5c1aacc8 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -49,6 +49,60 @@ #define PF_RSVD (1<<3) #define PF_INSTR (1<<4) +#ifdef CONFIG_PAGE_FAULT_HANDLERS +static HLIST_HEAD(pf_handlers); /* protected by RCU */ +static DEFINE_SPINLOCK(pf_handlers_writer); + +void register_page_fault_handler(struct pf_handler *new_pfh) +{ + unsigned long flags; + spin_lock_irqsave(&pf_handlers_writer, flags); + hlist_add_head_rcu(&new_pfh->hlist, &pf_handlers); + spin_unlock_irqrestore(&pf_handlers_writer, flags); +} +EXPORT_SYMBOL_GPL(register_page_fault_handler); + +/** + * unregister_page_fault_handler: + * The caller must ensure @old_pfh is not in use anymore before freeing it. + * This function does not guarantee it. The list of handlers is protected by + * RCU, so you can do this by e.g. calling synchronize_rcu(). + */ +void unregister_page_fault_handler(struct pf_handler *old_pfh) +{ + unsigned long flags; + spin_lock_irqsave(&pf_handlers_writer, flags); + hlist_del_rcu(&old_pfh->hlist); + spin_unlock_irqrestore(&pf_handlers_writer, flags); +} +EXPORT_SYMBOL_GPL(unregister_page_fault_handler); +#endif + +/* returns non-zero if do_page_fault() should return */ +static int handle_custom_pf(struct pt_regs *regs, unsigned long error_code, + unsigned long address) +{ +#ifdef CONFIG_PAGE_FAULT_HANDLERS + int ret = 0; + struct pf_handler *cur; + struct hlist_node *ncur; + + if (hlist_empty(&pf_handlers)) + return 0; + + rcu_read_lock(); + hlist_for_each_entry_rcu(cur, ncur, &pf_handlers, hlist) { + ret = cur->handler(regs, error_code, address); + if (ret) + break; + } + rcu_read_unlock(); + return ret; +#else + return 0; +#endif +} + static inline int notify_page_fault(struct pt_regs *regs) { #ifdef CONFIG_KPROBES @@ -601,6 +655,8 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) if (notify_page_fault(regs)) return; + if (handle_custom_pf(regs, error_code, address)) + return; /* * We fault-in kernel-space virtual memory on-demand. The -- cgit v1.2.2 From 72b59d67f80983f7bb587b086fb4cb1bc95263a4 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:21:01 +0200 Subject: x86_64: fix kernel rodata NX setting Without CONFIG_DYNAMIC_FTRACE, mark_rodata_ro() would mark a wrong number of pages as no-execute. The bug was introduced in the patch "ftrace: dont write protect kernel text". The symptom was machine reboot after a CPU hotplug. Signed-off-by: Pekka Paalanen Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/init_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 41824e776b6c..295be1d07b82 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -782,7 +782,7 @@ void mark_rodata_ro(void) * The rodata section (but not the kernel text!) should also be * not-executable. */ - set_memory_nx(rodata_start, (end - start) >> PAGE_SHIFT); + set_memory_nx(rodata_start, (end - rodata_start) >> PAGE_SHIFT); rodata_test(); -- cgit v1.2.2 From 2f1dafe50cc4e58a239fd81bd47f87f32042a1ee Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:21:01 +0200 Subject: x86: fix SMP alternatives: use mutex instead of spinlock, text_poke is sleepable text_poke is sleepable. The original fix by Mathieu Desnoyers . Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/alternative.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index de240ba2e288..2763cb37b553 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include @@ -279,7 +279,7 @@ struct smp_alt_module { struct list_head next; }; static LIST_HEAD(smp_alt_modules); -static DEFINE_SPINLOCK(smp_alt); +static DEFINE_MUTEX(smp_alt); static int smp_mode = 1; /* protected by smp_alt */ void alternatives_smp_module_add(struct module *mod, char *name, @@ -312,12 +312,12 @@ void alternatives_smp_module_add(struct module *mod, char *name, __func__, smp->locks, smp->locks_end, smp->text, smp->text_end, smp->name); - spin_lock(&smp_alt); + mutex_lock(&smp_alt); list_add_tail(&smp->next, &smp_alt_modules); if (boot_cpu_has(X86_FEATURE_UP)) alternatives_smp_unlock(smp->locks, smp->locks_end, smp->text, smp->text_end); - spin_unlock(&smp_alt); + mutex_unlock(&smp_alt); } void alternatives_smp_module_del(struct module *mod) @@ -327,17 +327,17 @@ void alternatives_smp_module_del(struct module *mod) if (smp_alt_once || noreplace_smp) return; - spin_lock(&smp_alt); + mutex_lock(&smp_alt); list_for_each_entry(item, &smp_alt_modules, next) { if (mod != item->mod) continue; list_del(&item->next); - spin_unlock(&smp_alt); + mutex_unlock(&smp_alt); DPRINTK("%s: %s\n", __func__, item->name); kfree(item); return; } - spin_unlock(&smp_alt); + mutex_unlock(&smp_alt); } void alternatives_smp_switch(int smp) @@ -359,7 +359,7 @@ void alternatives_smp_switch(int smp) return; BUG_ON(!smp && (num_online_cpus() > 1)); - spin_lock(&smp_alt); + mutex_lock(&smp_alt); /* * Avoid unnecessary switches because it forces JIT based VMs to @@ -383,7 +383,7 @@ void alternatives_smp_switch(int smp) mod->text, mod->text_end); } smp_mode = smp; - spin_unlock(&smp_alt); + mutex_unlock(&smp_alt); } #endif -- cgit v1.2.2 From d05f5f9906740474eb768823004ffcd775b12ca6 Mon Sep 17 00:00:00 2001 From: David Miller Date: Tue, 13 May 2008 22:06:59 -0700 Subject: sparc64: add ftrace support. Signed-off-by: David S. Miller Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/sparc64/Kconfig | 1 + arch/sparc64/Kconfig.debug | 2 +- arch/sparc64/kernel/Makefile | 1 + arch/sparc64/kernel/ftrace.c | 99 ++++++++++++++++++++++++++++++++++++++++++++ arch/sparc64/lib/mcount.S | 58 ++++++++++++++++++++++++-- 5 files changed, 156 insertions(+), 5 deletions(-) create mode 100644 arch/sparc64/kernel/ftrace.c (limited to 'arch') diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig index eb36f3b746b8..a480df6e6012 100644 --- a/arch/sparc64/Kconfig +++ b/arch/sparc64/Kconfig @@ -11,6 +11,7 @@ config SPARC config SPARC64 bool default y + select HAVE_FTRACE select HAVE_IDE select HAVE_LMB select HAVE_ARCH_KGDB diff --git a/arch/sparc64/Kconfig.debug b/arch/sparc64/Kconfig.debug index 6a4d28a4076d..d6d32d178fc8 100644 --- a/arch/sparc64/Kconfig.debug +++ b/arch/sparc64/Kconfig.debug @@ -33,7 +33,7 @@ config DEBUG_PAGEALLOC config MCOUNT bool - depends on STACK_DEBUG + depends on STACK_DEBUG || FTRACE default y config FRAME_POINTER diff --git a/arch/sparc64/kernel/Makefile b/arch/sparc64/kernel/Makefile index ec4f5ebb1ca6..418b5782096e 100644 --- a/arch/sparc64/kernel/Makefile +++ b/arch/sparc64/kernel/Makefile @@ -14,6 +14,7 @@ obj-y := process.o setup.o cpu.o idprom.o \ power.o sbus.o sparc64_ksyms.o chmc.o \ visemul.o prom.o of_device.o hvapi.o sstate.o mdesc.o +obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_PCI) += ebus.o pci_common.o \ pci_psycho.o pci_sabre.o pci_schizo.o \ diff --git a/arch/sparc64/kernel/ftrace.c b/arch/sparc64/kernel/ftrace.c new file mode 100644 index 000000000000..f449e6df6c4a --- /dev/null +++ b/arch/sparc64/kernel/ftrace.c @@ -0,0 +1,99 @@ +#include +#include +#include +#include +#include +#include + +static const u32 ftrace_nop = 0x01000000; + +notrace int ftrace_ip_converted(unsigned long ip) +{ + u32 insn = *(u32 *) ip; + + return (insn == ftrace_nop); +} + +notrace unsigned char *ftrace_nop_replace(void) +{ + return (char *)&ftrace_nop; +} + +notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) +{ + static u32 call; + s32 off; + + off = ((s32)addr - (s32)ip); + call = 0x40000000 | ((u32)off >> 2); + + return (unsigned char *) &call; +} + +notrace int +ftrace_modify_code(unsigned long ip, unsigned char *old_code, + unsigned char *new_code) +{ + u32 old = *(u32 *)old_code; + u32 new = *(u32 *)new_code; + u32 replaced; + int faulted; + + __asm__ __volatile__( + "1: cas [%[ip]], %[old], %[new]\n" + " flush %[ip]\n" + " mov 0, %[faulted]\n" + "2:\n" + " .section .fixup,#alloc,#execinstr\n" + " .align 4\n" + "3: sethi %%hi(2b), %[faulted]\n" + " jmpl %[faulted] + %%lo(2b), %%g0\n" + " mov 1, %[faulted]\n" + " .previous\n" + " .section __ex_table,\"a\"\n" + " .align 4\n" + " .word 1b, 3b\n" + " .previous\n" + : "=r" (replaced), [faulted] "=r" (faulted) + : [new] "0" (new), [old] "r" (old), [ip] "r" (ip) + : "memory"); + + if (replaced != old && replaced != new) + faulted = 2; + + return faulted; +} + +notrace int ftrace_update_ftrace_func(ftrace_func_t func) +{ + unsigned long ip = (unsigned long)(&ftrace_call); + unsigned char old[4], *new; + + memcpy(old, &ftrace_call, 4); + new = ftrace_call_replace(ip, (unsigned long)func); + return ftrace_modify_code(ip, old, new); +} + +notrace int ftrace_mcount_set(unsigned long *data) +{ + unsigned long ip = (long)(&mcount_call); + unsigned long *addr = data; + unsigned char old[4], *new; + + /* + * Replace the mcount stub with a pointer to the + * ip recorder function. + */ + memcpy(old, &mcount_call, 4); + new = ftrace_call_replace(ip, *addr); + *addr = ftrace_modify_code(ip, old, new); + + return 0; +} + + +int __init ftrace_dyn_arch_init(void *data) +{ + ftrace_mcount_set(data); + return 0; +} diff --git a/arch/sparc64/lib/mcount.S b/arch/sparc64/lib/mcount.S index 9e4534b485c7..7735a7a60533 100644 --- a/arch/sparc64/lib/mcount.S +++ b/arch/sparc64/lib/mcount.S @@ -28,10 +28,13 @@ ovstack: .skip OVSTACKSIZE #endif .text - .align 32 - .globl mcount, _mcount -mcount: + .align 32 + .globl _mcount + .type _mcount,#function + .globl mcount + .type mcount,#function _mcount: +mcount: #ifdef CONFIG_STACK_DEBUG /* * Check whether %sp is dangerously low. @@ -55,6 +58,53 @@ _mcount: or %g3, %lo(panicstring), %o0 call prom_halt nop +1: +#endif +#ifdef CONFIG_FTRACE +#ifdef CONFIG_DYNAMIC_FTRACE + mov %o7, %o0 + .globl mcount_call +mcount_call: + call ftrace_stub + mov %o0, %o7 +#else + sethi %hi(ftrace_trace_function), %g1 + sethi %hi(ftrace_stub), %g2 + ldx [%g1 + %lo(ftrace_trace_function)], %g1 + or %g2, %lo(ftrace_stub), %g2 + cmp %g1, %g2 + be,pn %icc, 1f + mov %i7, %o1 + jmpl %g1, %g0 + mov %o7, %o0 + /* not reached */ +1: #endif -1: retl +#endif + retl nop + .size _mcount,.-_mcount + .size mcount,.-mcount + +#ifdef CONFIG_FTRACE + .globl ftrace_stub + .type ftrace_stub,#function +ftrace_stub: + retl + nop + .size ftrace_stub,.-ftrace_stub +#ifdef CONFIG_DYNAMIC_FTRACE + .globl ftrace_caller + .type ftrace_caller,#function +ftrace_caller: + mov %i7, %o1 + mov %o7, %o0 + .globl ftrace_call +ftrace_call: + call ftrace_stub + mov %o0, %o7 + retl + nop + .size ftrace_caller,.-ftrace_caller +#endif +#endif -- cgit v1.2.2 From 37135677e653537ffc6e7def679443272a1c03c3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 14 May 2008 08:10:31 +0200 Subject: ftrace: fix mcount export bug David S. Miller noticed the following bug: the -pg instrumentation function callback is named differently on each platform. On x86 it is mcount, on sparc it is _mcount. So the export does not make sense in kernel/trace/ftrace.c - move it to x86. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/i386_ksyms_32.c | 9 ++++++++- arch/x86/kernel/x8664_ksyms_64.c | 11 +++++++++-- 2 files changed, 17 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c index deb43785e923..29999dbb754c 100644 --- a/arch/x86/kernel/i386_ksyms_32.c +++ b/arch/x86/kernel/i386_ksyms_32.c @@ -1,7 +1,14 @@ +#include #include + #include -#include #include +#include + +#ifdef CONFIG_FTRACE +/* mcount is defined in assembly */ +EXPORT_SYMBOL(mcount); +#endif /* Networking helper routines. */ EXPORT_SYMBOL(csum_partial_copy_generic); diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index f6c05d0410fb..122885bc5f3b 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -1,15 +1,22 @@ /* Exports for assembly files. All C exports should go in the respective C files. */ +#include #include -#include #include +#include + #include -#include #include +#include #include +#ifdef CONFIG_FTRACE +/* mcount is defined in assembly */ +EXPORT_SYMBOL(mcount); +#endif + EXPORT_SYMBOL(kernel_thread); EXPORT_SYMBOL(__get_user_1); -- cgit v1.2.2 From 4e491d14f2506b218d678935c25a7027b79178b1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 14 May 2008 23:49:44 -0400 Subject: ftrace: support for PowerPC This patch adds full support for ftrace for PowerPC (both 64 and 32 bit). This includes dynamic tracing and function filtering. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/powerpc/Kconfig | 3 +- arch/powerpc/kernel/Makefile | 14 +++ arch/powerpc/kernel/entry_32.S | 130 ++++++++++++++++++++++++ arch/powerpc/kernel/entry_64.S | 62 ++++++++++++ arch/powerpc/kernel/ftrace.c | 165 +++++++++++++++++++++++++++++++ arch/powerpc/kernel/io.c | 3 +- arch/powerpc/kernel/irq.c | 6 +- arch/powerpc/kernel/setup_32.c | 11 ++- arch/powerpc/kernel/setup_64.c | 5 + arch/powerpc/platforms/powermac/Makefile | 5 + 10 files changed, 396 insertions(+), 8 deletions(-) create mode 100644 arch/powerpc/kernel/ftrace.c (limited to 'arch') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 3934e2659407..62d034adbd43 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -105,11 +105,12 @@ config ARCH_NO_VIRT_TO_BUS config PPC bool default y + select HAVE_FTRACE select HAVE_IDE - select HAVE_OPROFILE select HAVE_KPROBES select HAVE_KRETPROBES select HAVE_LMB + select HAVE_OPROFILE config EARLY_PRINTK bool diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 2346d271fbfd..f3f5e2641432 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -12,6 +12,18 @@ CFLAGS_prom_init.o += -fPIC CFLAGS_btext.o += -fPIC endif +ifdef CONFIG_FTRACE +# Do not trace early boot code +CFLAGS_REMOVE_cputable.o = -pg +CFLAGS_REMOVE_prom_init.o = -pg + +ifdef CONFIG_DYNAMIC_FTRACE +# dynamic ftrace setup. +CFLAGS_REMOVE_ftrace.o = -pg +endif + +endif + obj-y := cputable.o ptrace.o syscalls.o \ irq.o align.o signal_32.o pmc.o vdso.o \ init_task.o process.o systbl.o idle.o \ @@ -78,6 +90,8 @@ obj-$(CONFIG_KEXEC) += machine_kexec.o crash.o \ obj-$(CONFIG_AUDIT) += audit.o obj64-$(CONFIG_AUDIT) += compat_audit.o +obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o + obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o ifneq ($(CONFIG_PPC_INDIRECT_IO),y) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 0c8614d9875c..0e6221889ca9 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -1035,3 +1035,133 @@ machine_check_in_rtas: /* XXX load up BATs and panic */ #endif /* CONFIG_PPC_RTAS */ + +#ifdef CONFIG_FTRACE +#ifdef CONFIG_DYNAMIC_FTRACE +_GLOBAL(mcount) +_GLOBAL(_mcount) + stwu r1,-48(r1) + stw r3, 12(r1) + stw r4, 16(r1) + stw r5, 20(r1) + stw r6, 24(r1) + mflr r3 + stw r7, 28(r1) + mfcr r5 + stw r8, 32(r1) + stw r9, 36(r1) + stw r10,40(r1) + stw r3, 44(r1) + stw r5, 8(r1) + .globl mcount_call +mcount_call: + bl ftrace_stub + nop + lwz r6, 8(r1) + lwz r0, 44(r1) + lwz r3, 12(r1) + mtctr r0 + lwz r4, 16(r1) + mtcr r6 + lwz r5, 20(r1) + lwz r6, 24(r1) + lwz r0, 52(r1) + lwz r7, 28(r1) + lwz r8, 32(r1) + mtlr r0 + lwz r9, 36(r1) + lwz r10,40(r1) + addi r1, r1, 48 + bctr + +_GLOBAL(ftrace_caller) + /* Based off of objdump optput from glibc */ + stwu r1,-48(r1) + stw r3, 12(r1) + stw r4, 16(r1) + stw r5, 20(r1) + stw r6, 24(r1) + mflr r3 + lwz r4, 52(r1) + mfcr r5 + stw r7, 28(r1) + stw r8, 32(r1) + stw r9, 36(r1) + stw r10,40(r1) + stw r3, 44(r1) + stw r5, 8(r1) +.globl ftrace_call +ftrace_call: + bl ftrace_stub + nop + lwz r6, 8(r1) + lwz r0, 44(r1) + lwz r3, 12(r1) + mtctr r0 + lwz r4, 16(r1) + mtcr r6 + lwz r5, 20(r1) + lwz r6, 24(r1) + lwz r0, 52(r1) + lwz r7, 28(r1) + lwz r8, 32(r1) + mtlr r0 + lwz r9, 36(r1) + lwz r10,40(r1) + addi r1, r1, 48 + bctr +#else +_GLOBAL(mcount) +_GLOBAL(_mcount) + stwu r1,-48(r1) + stw r3, 12(r1) + stw r4, 16(r1) + stw r5, 20(r1) + stw r6, 24(r1) + mflr r3 + lwz r4, 52(r1) + mfcr r5 + stw r7, 28(r1) + stw r8, 32(r1) + stw r9, 36(r1) + stw r10,40(r1) + stw r3, 44(r1) + stw r5, 8(r1) + + LOAD_REG_ADDR(r5, ftrace_trace_function) +#if 0 + mtctr r3 + mr r1, r5 + bctrl +#endif + lwz r5,0(r5) +#if 1 + mtctr r5 + bctrl +#else + bl ftrace_stub +#endif + nop + + lwz r6, 8(r1) + lwz r0, 44(r1) + lwz r3, 12(r1) + mtctr r0 + lwz r4, 16(r1) + mtcr r6 + lwz r5, 20(r1) + lwz r6, 24(r1) + lwz r0, 52(r1) + lwz r7, 28(r1) + lwz r8, 32(r1) + mtlr r0 + lwz r9, 36(r1) + lwz r10,40(r1) + addi r1, r1, 48 + bctr +#endif + +_GLOBAL(ftrace_stub) + blr + +#endif /* CONFIG_MCOUNT */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index c0db5b769e55..2c4d9e056ead 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -870,3 +870,65 @@ _GLOBAL(enter_prom) ld r0,16(r1) mtlr r0 blr + +#ifdef CONFIG_FTRACE +#ifdef CONFIG_DYNAMIC_FTRACE +_GLOBAL(mcount) +_GLOBAL(_mcount) + /* Taken from output of objdump from lib64/glibc */ + mflr r3 + stdu r1, -112(r1) + std r3, 128(r1) + .globl mcount_call +mcount_call: + bl ftrace_stub + nop + ld r0, 128(r1) + mtlr r0 + addi r1, r1, 112 + blr + +_GLOBAL(ftrace_caller) + /* Taken from output of objdump from lib64/glibc */ + mflr r3 + ld r11, 0(r1) + stdu r1, -112(r1) + std r3, 128(r1) + ld r4, 16(r11) +.globl ftrace_call +ftrace_call: + bl ftrace_stub + nop + ld r0, 128(r1) + mtlr r0 + addi r1, r1, 112 +_GLOBAL(ftrace_stub) + blr +#else +_GLOBAL(mcount) + blr + +_GLOBAL(_mcount) + /* Taken from output of objdump from lib64/glibc */ + mflr r3 + ld r11, 0(r1) + stdu r1, -112(r1) + std r3, 128(r1) + ld r4, 16(r11) + + + LOAD_REG_ADDR(r5,ftrace_trace_function) + ld r5,0(r5) + ld r5,0(r5) + mtctr r5 + bctrl + + nop + ld r0, 128(r1) + mtlr r0 + addi r1, r1, 112 +_GLOBAL(ftrace_stub) + blr + +#endif +#endif diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c new file mode 100644 index 000000000000..5a4993fefa45 --- /dev/null +++ b/arch/powerpc/kernel/ftrace.c @@ -0,0 +1,165 @@ +/* + * Code for replacing ftrace calls with jumps. + * + * Copyright (C) 2007-2008 Steven Rostedt + * + * Thanks goes out to P.A. Semi, Inc for supplying me with a PPC64 box. + * + */ + +#include +#include +#include +#include +#include +#include + +#include + +#define CALL_BACK 4 + +static unsigned int ftrace_nop = 0x60000000; + +#ifdef CONFIG_PPC32 +# define GET_ADDR(addr) addr +#else +/* PowerPC64's functions are data that points to the functions */ +# define GET_ADDR(addr) *(unsigned long *)addr +#endif + +notrace int ftrace_ip_converted(unsigned long ip) +{ + unsigned int save; + + ip -= CALL_BACK; + save = *(unsigned int *)ip; + + return save == ftrace_nop; +} + +static unsigned int notrace ftrace_calc_offset(long ip, long addr) +{ + return (int)((addr + CALL_BACK) - ip); +} + +notrace unsigned char *ftrace_nop_replace(void) +{ + return (char *)&ftrace_nop; +} + +notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) +{ + static unsigned int op; + + addr = GET_ADDR(addr); + + /* Set to "bl addr" */ + op = 0x48000001 | (ftrace_calc_offset(ip, addr) & 0x03fffffe); + + /* + * No locking needed, this must be called via kstop_machine + * which in essence is like running on a uniprocessor machine. + */ + return (unsigned char *)&op; +} + +#ifdef CONFIG_PPC64 +# define _ASM_ALIGN " .align 3 " +# define _ASM_PTR " .llong " +#else +# define _ASM_ALIGN " .align 2 " +# define _ASM_PTR " .long " +#endif + +notrace int +ftrace_modify_code(unsigned long ip, unsigned char *old_code, + unsigned char *new_code) +{ + unsigned replaced; + unsigned old = *(unsigned *)old_code; + unsigned new = *(unsigned *)new_code; + int faulted = 0; + + /* move the IP back to the start of the call */ + ip -= CALL_BACK; + + /* + * Note: Due to modules and __init, code can + * disappear and change, we need to protect against faulting + * as well as code changing. + * + * No real locking needed, this code is run through + * kstop_machine. + */ + asm volatile ( + "1: lwz %1, 0(%2)\n" + " cmpw %1, %5\n" + " bne 2f\n" + " stwu %3, 0(%2)\n" + "2:\n" + ".section .fixup, \"ax\"\n" + "3: li %0, 1\n" + " b 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + _ASM_ALIGN "\n" + _ASM_PTR "1b, 3b\n" + ".previous" + : "=r"(faulted), "=r"(replaced) + : "r"(ip), "r"(new), + "0"(faulted), "r"(old) + : "memory"); + + if (replaced != old && replaced != new) + faulted = 2; + + if (!faulted) + flush_icache_range(ip, ip + 8); + + return faulted; +} + +notrace int ftrace_update_ftrace_func(ftrace_func_t func) +{ + unsigned long ip = (unsigned long)(&ftrace_call); + unsigned char old[4], *new; + int ret; + + ip += CALL_BACK; + + memcpy(old, &ftrace_call, 4); + new = ftrace_call_replace(ip, (unsigned long)func); + ret = ftrace_modify_code(ip, old, new); + + return ret; +} + +notrace int ftrace_mcount_set(unsigned long *data) +{ + unsigned long ip = (long)(&mcount_call); + unsigned long *addr = data; + unsigned char old[4], *new; + + /* ip is at the location, but modify code will subtact this */ + ip += CALL_BACK; + + /* + * Replace the mcount stub with a pointer to the + * ip recorder function. + */ + memcpy(old, &mcount_call, 4); + new = ftrace_call_replace(ip, *addr); + *addr = ftrace_modify_code(ip, old, new); + + return 0; +} + +int __init ftrace_dyn_arch_init(void *data) +{ + /* This is running in kstop_machine */ + + ftrace_mcount_set(data); + + return 0; +} + diff --git a/arch/powerpc/kernel/io.c b/arch/powerpc/kernel/io.c index e31aca9208eb..1882bf419fa6 100644 --- a/arch/powerpc/kernel/io.c +++ b/arch/powerpc/kernel/io.c @@ -120,7 +120,8 @@ EXPORT_SYMBOL(_outsl_ns); #define IO_CHECK_ALIGN(v,a) ((((unsigned long)(v)) & ((a) - 1)) == 0) -void _memset_io(volatile void __iomem *addr, int c, unsigned long n) +notrace void +_memset_io(volatile void __iomem *addr, int c, unsigned long n) { void *p = (void __force *)addr; u32 lc = c; diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 2f73f705d564..6e01eb0a3315 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -98,7 +98,7 @@ EXPORT_SYMBOL(irq_desc); int distribute_irqs = 1; -static inline unsigned long get_hard_enabled(void) +static inline notrace unsigned long get_hard_enabled(void) { unsigned long enabled; @@ -108,13 +108,13 @@ static inline unsigned long get_hard_enabled(void) return enabled; } -static inline void set_soft_enabled(unsigned long enable) +static inline notrace void set_soft_enabled(unsigned long enable) { __asm__ __volatile__("stb %0,%1(13)" : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled))); } -void raw_local_irq_restore(unsigned long en) +notrace void raw_local_irq_restore(unsigned long en) { /* * get_paca()->soft_enabled = en; diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 5112a4aa801d..22f8e2bacd32 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -47,6 +47,11 @@ #include #endif +#ifdef CONFIG_FTRACE +extern void _mcount(void); +EXPORT_SYMBOL(_mcount); +#endif + extern void bootx_init(unsigned long r4, unsigned long phys); int boot_cpuid; @@ -81,7 +86,7 @@ int ucache_bsize; * from the address that it was linked at, so we must use RELOC/PTRRELOC * to access static data (including strings). -- paulus */ -unsigned long __init early_init(unsigned long dt_ptr) +notrace unsigned long __init early_init(unsigned long dt_ptr) { unsigned long offset = reloc_offset(); struct cpu_spec *spec; @@ -111,7 +116,7 @@ unsigned long __init early_init(unsigned long dt_ptr) * This is called very early on the boot process, after a minimal * MMU environment has been set up but before MMU_init is called. */ -void __init machine_init(unsigned long dt_ptr, unsigned long phys) +notrace void __init machine_init(unsigned long dt_ptr, unsigned long phys) { /* Enable early debugging if any specified (see udbg.h) */ udbg_early_init(); @@ -133,7 +138,7 @@ void __init machine_init(unsigned long dt_ptr, unsigned long phys) #ifdef CONFIG_BOOKE_WDT /* Checks wdt=x and wdt_period=xx command-line option */ -int __init early_parse_wdt(char *p) +notrace int __init early_parse_wdt(char *p) { if (p && strncmp(p, "0", 1) != 0) booke_wdt_enabled = 1; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 098fd96a394a..277bf18cbbcc 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -85,6 +85,11 @@ struct ppc64_caches ppc64_caches = { }; EXPORT_SYMBOL_GPL(ppc64_caches); +#ifdef CONFIG_FTRACE +extern void _mcount(void); +EXPORT_SYMBOL(_mcount); +#endif + /* * These are used in binfmt_elf.c to put aux entries on the stack * for each elf executable being started. diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile index 4d72c8f72159..89774177b209 100644 --- a/arch/powerpc/platforms/powermac/Makefile +++ b/arch/powerpc/platforms/powermac/Makefile @@ -1,5 +1,10 @@ CFLAGS_bootx_init.o += -fPIC +ifdef CONFIG_FTRACE +# Do not trace early boot code +CFLAGS_REMOVE_bootx_init.o = -pg +endif + obj-y += pic.o setup.o time.o feature.o pci.o \ sleep.o low_i2c.o cache.o pfunc_core.o \ pfunc_base.o -- cgit v1.2.2 From 7fa09f24b477ad41b821713eba757b3aa7a2864a Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 14 May 2008 21:30:32 -0400 Subject: ftrace: use the new kbuild CFLAGS_REMOVE for x86/kernel directory This patch removes the Makefile turd and uses the nice CFLAGS_REMOVE macro in the x86/kernel directory. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/Makefile | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index e142091524b0..739d49acd2f1 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -6,6 +6,13 @@ extra-y := head_$(BITS).o head$(BITS).o init_task.o vmlinux.lds CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) +ifdef CONFIG_FTRACE +# Do not profile debug utilities +CFLAGS_REMOVE_tsc_64.o = -pg +CFLAGS_REMOVE_tsc_32.o = -pg +CFLAGS_REMOVE_rtc.o = -pg +endif + # # vsyscalls (which work on the user stack) should have # no stack-protector checks: -- cgit v1.2.2 From 677aa9f77e8de3791b481a0cec6c8b84d1eec626 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 17 May 2008 00:01:36 -0400 Subject: ftrace: add have dynamic ftrace config for archs Now that ftrace is being ported to other architectures, it has become apparent that DYNAMIC_FTRACE is dependent on whether or not that architecture implements dynamic ftrace. FTRACE itself may be ported to an architecture without porting dynamic ftrace. This patch adds HAVE_DYNAMIC_FTRACE to allow architectures to port ftrace without having to also port the dynamic aspect as well. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/powerpc/Kconfig | 1 + arch/sparc64/Kconfig | 1 + arch/x86/Kconfig | 1 + 3 files changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 62d034adbd43..a5e9912e2d37 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -105,6 +105,7 @@ config ARCH_NO_VIRT_TO_BUS config PPC bool default y + select HAVE_DYNAMIC_FTRACE select HAVE_FTRACE select HAVE_IDE select HAVE_KPROBES diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig index a480df6e6012..fca9246470b1 100644 --- a/arch/sparc64/Kconfig +++ b/arch/sparc64/Kconfig @@ -11,6 +11,7 @@ config SPARC config SPARC64 bool default y + select HAVE_DYNAMIC_FTRACE select HAVE_FTRACE select HAVE_IDE select HAVE_LMB diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c742dfeb0dbe..fc86c54e791e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -23,6 +23,7 @@ config X86 select HAVE_OPROFILE select HAVE_KPROBES select HAVE_KRETPROBES + select HAVE_DYNAMIC_FTRACE select HAVE_FTRACE select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) select HAVE_ARCH_KGDB if !X86_VOYAGER -- cgit v1.2.2 From ccbfac2923c9febaeaf07a50054027a92b502718 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 22 May 2008 14:31:07 -0400 Subject: ftrace: powerpc clean ups This patch cleans up the ftrace code in PowerPC based on the comments from Michael Ellerman. Signed-off-by: Steven Rostedt Cc: Michael Ellerman Cc: proski@gnu.org Cc: a.p.zijlstra@chello.nl Cc: Pekka Paalanen Cc: Steven Rostedt Cc: linuxppc-dev@ozlabs.org Cc: Soeren Sandmann Pedersen Cc: paulus@samba.org Signed-off-by: Thomas Gleixner --- arch/powerpc/kernel/entry_32.S | 11 ++--------- arch/powerpc/kernel/ftrace.c | 8 +++++++- arch/powerpc/kernel/ppc_ksyms.c | 5 +++++ arch/powerpc/kernel/setup_32.c | 5 ----- arch/powerpc/kernel/setup_64.c | 5 ----- 5 files changed, 14 insertions(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 0e6221889ca9..3b1dd29d9f91 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -1129,18 +1129,11 @@ _GLOBAL(_mcount) stw r5, 8(r1) LOAD_REG_ADDR(r5, ftrace_trace_function) -#if 0 - mtctr r3 - mr r1, r5 - bctrl -#endif lwz r5,0(r5) -#if 1 + mtctr r5 bctrl -#else - bl ftrace_stub -#endif + nop lwz r6, 8(r1) diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index 5a4993fefa45..69ed41223468 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -51,10 +51,16 @@ notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) { static unsigned int op; + /* + * It would be nice to just use create_function_call, but that will + * update the code itself. Here we need to just return the + * instruction that is going to be modified, without modifying the + * code. + */ addr = GET_ADDR(addr); /* Set to "bl addr" */ - op = 0x48000001 | (ftrace_calc_offset(ip, addr) & 0x03fffffe); + op = 0x48000001 | (ftrace_calc_offset(ip, addr) & 0x03fffffc); /* * No locking needed, this must be called via kstop_machine diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index cf6b5a7d8b3f..4300db52662a 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -43,6 +43,7 @@ #include #include #include +#include #ifdef CONFIG_PPC32 extern void transfer_to_handler(void); @@ -68,6 +69,10 @@ EXPORT_SYMBOL(single_step_exception); EXPORT_SYMBOL(sys_sigreturn); #endif +#ifdef CONFIG_FTRACE +EXPORT_SYMBOL(_mcount); +#endif + EXPORT_SYMBOL(strcpy); EXPORT_SYMBOL(strncpy); EXPORT_SYMBOL(strcat); diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 22f8e2bacd32..19e8fcb9cea8 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -47,11 +47,6 @@ #include #endif -#ifdef CONFIG_FTRACE -extern void _mcount(void); -EXPORT_SYMBOL(_mcount); -#endif - extern void bootx_init(unsigned long r4, unsigned long phys); int boot_cpuid; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 277bf18cbbcc..098fd96a394a 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -85,11 +85,6 @@ struct ppc64_caches ppc64_caches = { }; EXPORT_SYMBOL_GPL(ppc64_caches); -#ifdef CONFIG_FTRACE -extern void _mcount(void); -EXPORT_SYMBOL(_mcount); -#endif - /* * These are used in binfmt_elf.c to put aux entries on the stack * for each elf executable being started. -- cgit v1.2.2 From 014c257cce65e9d1cd2d28ec1c89a37c536b151d Mon Sep 17 00:00:00 2001 From: Abhishek Sagar Date: Sat, 31 May 2008 14:23:50 +0530 Subject: ftrace: core support for ARM Core ftrace support for the ARM architecture, which includes support for dynamic function tracing. Signed-off-by: Abhishek Sagar Signed-off-by: Ingo Molnar --- arch/arm/Kconfig | 2 + arch/arm/boot/compressed/Makefile | 6 ++ arch/arm/kernel/Makefile | 5 ++ arch/arm/kernel/armksyms.c | 5 ++ arch/arm/kernel/entry-common.S | 47 ++++++++++++++ arch/arm/kernel/ftrace.c | 128 ++++++++++++++++++++++++++++++++++++++ 6 files changed, 193 insertions(+) create mode 100644 arch/arm/kernel/ftrace.c (limited to 'arch') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index b786e68914d4..3845e5c8a34f 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -14,6 +14,8 @@ config ARM select HAVE_OPROFILE select HAVE_KPROBES if (!XIP_KERNEL) select HAVE_KRETPROBES if (HAVE_KPROBES) + select HAVE_FTRACE if (!XIP_KERNEL) + select HAVE_DYNAMIC_FTRACE if (HAVE_FTRACE) help The ARM series is a line of low-power-consumption RISC chip designs licensed by ARM Ltd and targeted at embedded applications and diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index de9d9ee50958..95baac4939e0 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -69,6 +69,12 @@ SEDFLAGS = s/TEXT_START/$(ZTEXTADDR)/;s/BSS_START/$(ZBSSADDR)/ targets := vmlinux vmlinux.lds piggy.gz piggy.o font.o font.c \ head.o misc.o $(OBJS) + +ifeq ($(CONFIG_FTRACE),y) +ORIG_CFLAGS := $(KBUILD_CFLAGS) +KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS)) +endif + EXTRA_CFLAGS := -fpic -fno-builtin EXTRA_AFLAGS := diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index ad455ff5aebe..eb9092ca8008 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -4,6 +4,10 @@ AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) +ifdef CONFIG_DYNAMIC_FTRACE +CFLAGS_REMOVE_ftrace.o = -pg +endif + # Object file lists. obj-y := compat.o entry-armv.o entry-common.o irq.o \ @@ -18,6 +22,7 @@ obj-$(CONFIG_ARTHUR) += arthur.o obj-$(CONFIG_ISA_DMA) += dma-isa.o obj-$(CONFIG_PCI) += bios32.o isa.o obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o obj-$(CONFIG_KPROBES) += kprobes.o kprobes-decode.o obj-$(CONFIG_ATAGS_PROC) += atags.o diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index 688b7b1ee416..3b132215cbf8 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c @@ -48,6 +48,11 @@ extern void __aeabi_ulcmp(void); extern void fpundefinstr(void); extern void fp_enter(void); +#ifdef CONFIG_FTRACE +extern void mcount(void); +EXPORT_SYMBOL(mcount); +#endif + /* * This has a special calling convention; it doesn't * modify any of the usual registers, except for LR. diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 597ed00a08d8..8f79a4789ed4 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -99,6 +99,53 @@ ENTRY(ret_from_fork) #undef CALL #define CALL(x) .long x +#ifdef CONFIG_FTRACE +#ifdef CONFIG_DYNAMIC_FTRACE +ENTRY(mcount) + stmdb sp!, {r0-r3, lr} + mov r0, lr + + .globl mcount_call +mcount_call: + bl ftrace_stub + ldmia sp!, {r0-r3, pc} + +ENTRY(ftrace_caller) + stmdb sp!, {r0-r3, lr} + ldr r1, [fp, #-4] + mov r0, lr + + .globl ftrace_call +ftrace_call: + bl ftrace_stub + ldmia sp!, {r0-r3, pc} + +#else + +ENTRY(mcount) + stmdb sp!, {r0-r3, lr} + ldr r0, =ftrace_trace_function + ldr r2, [r0] + adr r0, ftrace_stub + cmp r0, r2 + bne trace + ldmia sp!, {r0-r3, pc} + +trace: + ldr r1, [fp, #-4] + mov r0, lr + mov lr, pc + mov pc, r2 + ldmia sp!, {r0-r3, pc} + +#endif /* CONFIG_DYNAMIC_FTRACE */ + + .globl ftrace_stub +ftrace_stub: + mov pc, lr + +#endif /* CONFIG_FTRACE */ + /*============================================================================= * SWI handler *----------------------------------------------------------------------------- diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c new file mode 100644 index 000000000000..f4cb4cc3fa0c --- /dev/null +++ b/arch/arm/kernel/ftrace.c @@ -0,0 +1,128 @@ +/* + * Dynamic function tracing support. + * + * Copyright (C) 2008 Abhishek Sagar + * + * For licencing details, see COPYING. + * + * Defines low-level handling of mcount calls when the kernel + * is compiled with the -pg flag. When using dynamic ftrace, the + * mcount call-sites get patched lazily with NOP till they are + * enabled. All code mutation routines here take effect atomically. + */ + +#include +#include + +#define INSN_SIZE 4 +#define PC_OFFSET 8 +#define BL_OPCODE 0xeb000000 +#define BL_OFFSET_MASK 0x00ffffff + +static unsigned long bl_insn; +static const unsigned long NOP = 0xe1a00000; /* mov r0, r0 */ + +/* return true if mcount call site is already patched/no-op'ed */ +int ftrace_ip_converted(unsigned long pc) +{ + unsigned long save; + + pc -= INSN_SIZE; + save = *(unsigned long *)pc; + return save == NOP; +} + +unsigned char *ftrace_nop_replace(void) +{ + return (char *)&NOP; +} + +/* construct a branch (BL) instruction to addr */ +unsigned char *ftrace_call_replace(unsigned long pc, unsigned long addr) +{ + long offset; + + offset = (long)addr - (long)(pc - INSN_SIZE + PC_OFFSET); + if (unlikely(offset < -33554432 || offset > 33554428)) { + /* Can't generate branches that far (from ARM ARM). Ftrace + * doesn't generate branches outside of core kernel text. + */ + WARN_ON_ONCE(1); + return NULL; + } + offset = (offset >> 2) & BL_OFFSET_MASK; + bl_insn = BL_OPCODE | offset; + return (unsigned char *)&bl_insn; +} + +int ftrace_modify_code(unsigned long pc, unsigned char *old_code, + unsigned char *new_code) +{ + unsigned long err = 0, replaced = 0, old, new; + + old = *(unsigned long *)old_code; + new = *(unsigned long *)new_code; + pc -= INSN_SIZE; + + __asm__ __volatile__ ( + "1: ldr %1, [%2] \n" + " cmp %1, %4 \n" + "2: streq %3, [%2] \n" + " cmpne %1, %3 \n" + " movne %0, #2 \n" + "3:\n" + + ".section .fixup, \"ax\"\n" + "4: mov %0, #1 \n" + " b 3b \n" + ".previous\n" + + ".section __ex_table, \"a\"\n" + " .long 1b, 4b \n" + " .long 2b, 4b \n" + ".previous\n" + + : "=r"(err), "=r"(replaced) + : "r"(pc), "r"(new), "r"(old), "0"(err), "1"(replaced) + : "memory"); + + if (!err && (replaced == old)) + flush_icache_range(pc, pc + INSN_SIZE); + + return err; +} + +int ftrace_update_ftrace_func(ftrace_func_t func) +{ + int ret; + unsigned long pc, old; + unsigned char *new; + + pc = (unsigned long)&ftrace_call; + pc += INSN_SIZE; + memcpy(&old, &ftrace_call, INSN_SIZE); + new = ftrace_call_replace(pc, (unsigned long)func); + ret = ftrace_modify_code(pc, (unsigned char *)&old, new); + return ret; +} + +int ftrace_mcount_set(unsigned long *data) +{ + unsigned long pc, old; + unsigned long *addr = data; + unsigned char *new; + + pc = (unsigned long)&mcount_call; + pc += INSN_SIZE; + memcpy(&old, &mcount_call, INSN_SIZE); + new = ftrace_call_replace(pc, *addr); + *addr = ftrace_modify_code(pc, (unsigned char *)&old, new); + return 0; +} + +/* run from kstop_machine */ +int __init ftrace_dyn_arch_init(void *data) +{ + ftrace_mcount_set(data); + return 0; +} -- cgit v1.2.2 From e0773410247f1e5fc6f7c52a4c5f3c6c9873d527 Mon Sep 17 00:00:00 2001 From: Abhishek Sagar Date: Sat, 31 May 2008 14:24:02 +0530 Subject: ftrace: export kretprobe_trampoline for function tracer Follow suit from kprobe implementations on other archs and make kretprobe_trampoline non-static. Ftrace implmentation (more specifically, kernel/trace/trace.c) requires access to it (see-> http://kerneltrap.org/mailarchive/linux-kernel/2008/5/27/1955234). Signed-off-by: Abhishek Sagar Signed-off-by: Ingo Molnar --- arch/arm/kernel/kprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/kernel/kprobes.c index 5593dd207216..5ee39e10c8d1 100644 --- a/arch/arm/kernel/kprobes.c +++ b/arch/arm/kernel/kprobes.c @@ -274,7 +274,7 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, * for kretprobe handlers which should normally be interested in r0 only * anyway. */ -static void __attribute__((naked)) __kprobes kretprobe_trampoline(void) +void __naked __kprobes kretprobe_trampoline(void) { __asm__ __volatile__ ( "stmdb sp!, {r0 - r11} \n\t" -- cgit v1.2.2 From 1d74f2a0f64b4091e5e91b55ac1b17dff93f4b59 Mon Sep 17 00:00:00 2001 From: Abhishek Sagar Date: Sun, 1 Jun 2008 21:47:42 +0530 Subject: ftrace: remove ftrace_ip_converted() Remove the unneeded function ftrace_ip_converted(). Signed-off-by: Abhishek Sagar Signed-off-by: Ingo Molnar --- arch/arm/kernel/ftrace.c | 10 ---------- arch/powerpc/kernel/ftrace.c | 10 ---------- arch/sparc64/kernel/ftrace.c | 7 ------- arch/x86/kernel/ftrace.c | 10 ---------- 4 files changed, 37 deletions(-) (limited to 'arch') diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index f4cb4cc3fa0c..22f3d6e309f9 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -22,16 +22,6 @@ static unsigned long bl_insn; static const unsigned long NOP = 0xe1a00000; /* mov r0, r0 */ -/* return true if mcount call site is already patched/no-op'ed */ -int ftrace_ip_converted(unsigned long pc) -{ - unsigned long save; - - pc -= INSN_SIZE; - save = *(unsigned long *)pc; - return save == NOP; -} - unsigned char *ftrace_nop_replace(void) { return (char *)&NOP; diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index 69ed41223468..e12c593ab9ca 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -27,16 +27,6 @@ static unsigned int ftrace_nop = 0x60000000; # define GET_ADDR(addr) *(unsigned long *)addr #endif -notrace int ftrace_ip_converted(unsigned long ip) -{ - unsigned int save; - - ip -= CALL_BACK; - save = *(unsigned int *)ip; - - return save == ftrace_nop; -} - static unsigned int notrace ftrace_calc_offset(long ip, long addr) { return (int)((addr + CALL_BACK) - ip); diff --git a/arch/sparc64/kernel/ftrace.c b/arch/sparc64/kernel/ftrace.c index f449e6df6c4a..c17373195b1e 100644 --- a/arch/sparc64/kernel/ftrace.c +++ b/arch/sparc64/kernel/ftrace.c @@ -7,13 +7,6 @@ static const u32 ftrace_nop = 0x01000000; -notrace int ftrace_ip_converted(unsigned long ip) -{ - u32 insn = *(u32 *) ip; - - return (insn == ftrace_nop); -} - notrace unsigned char *ftrace_nop_replace(void) { return (char *)&ftrace_nop; diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 498608c015fb..bc5cf8d46742 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -31,16 +31,6 @@ union ftrace_code_union { } __attribute__((packed)); }; -notrace int ftrace_ip_converted(unsigned long ip) -{ - unsigned long save; - - ip -= CALL_BACK; - save = *(long *)ip; - - return save == *ftrace_nop; -} - static int notrace ftrace_calc_offset(long ip, long addr) { return (int)(addr - ip); -- cgit v1.2.2 From ee4311adf105f4d740f52e3948acc1d81598afcc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 17 Jun 2008 17:43:02 +0200 Subject: ftrace: build fix with gcc 4.3 fix: arch/x86/kernel/ftrace.c: Assembler messages: arch/x86/kernel/ftrace.c:82: Error: bad register name `%sil' make[1]: *** [arch/x86/kernel/ftrace.o] Error 1 Signed-off-by: Ingo Molnar --- arch/x86/kernel/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index bc5cf8d46742..55828149e01e 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -88,7 +88,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, ".previous\n" _ASM_EXTABLE(1b, 3b) : "=r"(faulted), "=a"(replaced) - : "r"(ip), "r"(new), "r"(newch), + : "r"(ip), "r"(new), "c"(newch), "0"(faulted), "a"(old) : "memory"); sync_core(); -- cgit v1.2.2 From 395a59d0f8e86bb39cd700c3d185d30c670bb958 Mon Sep 17 00:00:00 2001 From: Abhishek Sagar Date: Sat, 21 Jun 2008 23:47:27 +0530 Subject: ftrace: store mcount address in rec->ip Record the address of the mcount call-site. Currently all archs except sparc64 record the address of the instruction following the mcount call-site. Some general cleanups are entailed. Storing mcount addresses in rec->ip enables looking them up in the kprobe hash table later on to check if they're kprobe'd. Signed-off-by: Abhishek Sagar Cc: davem@davemloft.net Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/arm/kernel/armksyms.c | 10 +++++----- arch/arm/kernel/entry-common.S | 4 ++++ arch/arm/kernel/ftrace.c | 16 +++++++--------- arch/powerpc/kernel/entry_32.S | 4 ++++ arch/powerpc/kernel/entry_64.S | 5 ++++- arch/powerpc/kernel/ftrace.c | 21 +++++++-------------- arch/sparc64/kernel/ftrace.c | 10 ++++++---- arch/sparc64/kernel/sparc64_ksyms.c | 2 +- arch/x86/kernel/entry_32.S | 4 ++++ arch/x86/kernel/entry_64.S | 4 ++++ arch/x86/kernel/ftrace.c | 26 +++++++++----------------- arch/x86/kernel/i386_ksyms_32.c | 2 +- arch/x86/kernel/x8664_ksyms_64.c | 2 +- 13 files changed, 57 insertions(+), 53 deletions(-) (limited to 'arch') diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index 3b132215cbf8..cc7b246e9652 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c @@ -18,6 +18,7 @@ #include #include #include +#include /* * libgcc functions - functions that are used internally by the @@ -48,11 +49,6 @@ extern void __aeabi_ulcmp(void); extern void fpundefinstr(void); extern void fp_enter(void); -#ifdef CONFIG_FTRACE -extern void mcount(void); -EXPORT_SYMBOL(mcount); -#endif - /* * This has a special calling convention; it doesn't * modify any of the usual registers, except for LR. @@ -186,3 +182,7 @@ EXPORT_SYMBOL(_find_next_bit_be); #endif EXPORT_SYMBOL(copy_page); + +#ifdef CONFIG_FTRACE +EXPORT_SYMBOL(mcount); +#endif diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 8f79a4789ed4..84694e88b428 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -9,6 +9,7 @@ */ #include +#include #include #include "entry-header.S" @@ -104,6 +105,7 @@ ENTRY(ret_from_fork) ENTRY(mcount) stmdb sp!, {r0-r3, lr} mov r0, lr + sub r0, r0, #MCOUNT_INSN_SIZE .globl mcount_call mcount_call: @@ -114,6 +116,7 @@ ENTRY(ftrace_caller) stmdb sp!, {r0-r3, lr} ldr r1, [fp, #-4] mov r0, lr + sub r0, r0, #MCOUNT_INSN_SIZE .globl ftrace_call ftrace_call: @@ -134,6 +137,7 @@ ENTRY(mcount) trace: ldr r1, [fp, #-4] mov r0, lr + sub r0, r0, #MCOUNT_INSN_SIZE mov lr, pc mov pc, r2 ldmia sp!, {r0-r3, pc} diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index 22f3d6e309f9..76d50e6091bc 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -12,9 +12,10 @@ */ #include + #include +#include -#define INSN_SIZE 4 #define PC_OFFSET 8 #define BL_OPCODE 0xeb000000 #define BL_OFFSET_MASK 0x00ffffff @@ -32,10 +33,10 @@ unsigned char *ftrace_call_replace(unsigned long pc, unsigned long addr) { long offset; - offset = (long)addr - (long)(pc - INSN_SIZE + PC_OFFSET); + offset = (long)addr - (long)(pc + PC_OFFSET); if (unlikely(offset < -33554432 || offset > 33554428)) { /* Can't generate branches that far (from ARM ARM). Ftrace - * doesn't generate branches outside of core kernel text. + * doesn't generate branches outside of kernel text. */ WARN_ON_ONCE(1); return NULL; @@ -52,7 +53,6 @@ int ftrace_modify_code(unsigned long pc, unsigned char *old_code, old = *(unsigned long *)old_code; new = *(unsigned long *)new_code; - pc -= INSN_SIZE; __asm__ __volatile__ ( "1: ldr %1, [%2] \n" @@ -77,7 +77,7 @@ int ftrace_modify_code(unsigned long pc, unsigned char *old_code, : "memory"); if (!err && (replaced == old)) - flush_icache_range(pc, pc + INSN_SIZE); + flush_icache_range(pc, pc + MCOUNT_INSN_SIZE); return err; } @@ -89,8 +89,7 @@ int ftrace_update_ftrace_func(ftrace_func_t func) unsigned char *new; pc = (unsigned long)&ftrace_call; - pc += INSN_SIZE; - memcpy(&old, &ftrace_call, INSN_SIZE); + memcpy(&old, &ftrace_call, MCOUNT_INSN_SIZE); new = ftrace_call_replace(pc, (unsigned long)func); ret = ftrace_modify_code(pc, (unsigned char *)&old, new); return ret; @@ -103,8 +102,7 @@ int ftrace_mcount_set(unsigned long *data) unsigned char *new; pc = (unsigned long)&mcount_call; - pc += INSN_SIZE; - memcpy(&old, &mcount_call, INSN_SIZE); + memcpy(&old, &mcount_call, MCOUNT_INSN_SIZE); new = ftrace_call_replace(pc, *addr); *addr = ftrace_modify_code(pc, (unsigned char *)&old, new); return 0; diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 3b1dd29d9f91..7231a708af0d 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -30,6 +30,7 @@ #include #include #include +#include #undef SHOW_SYSCALLS #undef SHOW_SYSCALLS_TASK @@ -1053,6 +1054,7 @@ _GLOBAL(_mcount) stw r10,40(r1) stw r3, 44(r1) stw r5, 8(r1) + subi r3, r3, MCOUNT_INSN_SIZE .globl mcount_call mcount_call: bl ftrace_stub @@ -1090,6 +1092,7 @@ _GLOBAL(ftrace_caller) stw r10,40(r1) stw r3, 44(r1) stw r5, 8(r1) + subi r3, r3, MCOUNT_INSN_SIZE .globl ftrace_call ftrace_call: bl ftrace_stub @@ -1128,6 +1131,7 @@ _GLOBAL(_mcount) stw r3, 44(r1) stw r5, 8(r1) + subi r3, r3, MCOUNT_INSN_SIZE LOAD_REG_ADDR(r5, ftrace_trace_function) lwz r5,0(r5) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 2c4d9e056ead..2f511a969d2c 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -31,6 +31,7 @@ #include #include #include +#include /* * System calls. @@ -879,6 +880,7 @@ _GLOBAL(_mcount) mflr r3 stdu r1, -112(r1) std r3, 128(r1) + subi r3, r3, MCOUNT_INSN_SIZE .globl mcount_call mcount_call: bl ftrace_stub @@ -895,6 +897,7 @@ _GLOBAL(ftrace_caller) stdu r1, -112(r1) std r3, 128(r1) ld r4, 16(r11) + subi r3, r3, MCOUNT_INSN_SIZE .globl ftrace_call ftrace_call: bl ftrace_stub @@ -916,7 +919,7 @@ _GLOBAL(_mcount) std r3, 128(r1) ld r4, 16(r11) - + subi r3, r3, MCOUNT_INSN_SIZE LOAD_REG_ADDR(r5,ftrace_trace_function) ld r5,0(r5) ld r5,0(r5) diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index e12c593ab9ca..3855ceb937b0 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -15,8 +15,8 @@ #include #include +#include -#define CALL_BACK 4 static unsigned int ftrace_nop = 0x60000000; @@ -27,9 +27,10 @@ static unsigned int ftrace_nop = 0x60000000; # define GET_ADDR(addr) *(unsigned long *)addr #endif + static unsigned int notrace ftrace_calc_offset(long ip, long addr) { - return (int)((addr + CALL_BACK) - ip); + return (int)(addr - ip); } notrace unsigned char *ftrace_nop_replace(void) @@ -76,9 +77,6 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned new = *(unsigned *)new_code; int faulted = 0; - /* move the IP back to the start of the call */ - ip -= CALL_BACK; - /* * Note: Due to modules and __init, code can * disappear and change, we need to protect against faulting @@ -118,12 +116,10 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, notrace int ftrace_update_ftrace_func(ftrace_func_t func) { unsigned long ip = (unsigned long)(&ftrace_call); - unsigned char old[4], *new; + unsigned char old[MCOUNT_INSN_SIZE], *new; int ret; - ip += CALL_BACK; - - memcpy(old, &ftrace_call, 4); + memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE); new = ftrace_call_replace(ip, (unsigned long)func); ret = ftrace_modify_code(ip, old, new); @@ -134,16 +130,13 @@ notrace int ftrace_mcount_set(unsigned long *data) { unsigned long ip = (long)(&mcount_call); unsigned long *addr = data; - unsigned char old[4], *new; - - /* ip is at the location, but modify code will subtact this */ - ip += CALL_BACK; + unsigned char old[MCOUNT_INSN_SIZE], *new; /* * Replace the mcount stub with a pointer to the * ip recorder function. */ - memcpy(old, &mcount_call, 4); + memcpy(old, &mcount_call, MCOUNT_INSN_SIZE); new = ftrace_call_replace(ip, *addr); *addr = ftrace_modify_code(ip, old, new); diff --git a/arch/sparc64/kernel/ftrace.c b/arch/sparc64/kernel/ftrace.c index c17373195b1e..4298d0aee713 100644 --- a/arch/sparc64/kernel/ftrace.c +++ b/arch/sparc64/kernel/ftrace.c @@ -5,6 +5,8 @@ #include #include +#include + static const u32 ftrace_nop = 0x01000000; notrace unsigned char *ftrace_nop_replace(void) @@ -60,9 +62,9 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, notrace int ftrace_update_ftrace_func(ftrace_func_t func) { unsigned long ip = (unsigned long)(&ftrace_call); - unsigned char old[4], *new; + unsigned char old[MCOUNT_INSN_SIZE], *new; - memcpy(old, &ftrace_call, 4); + memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE); new = ftrace_call_replace(ip, (unsigned long)func); return ftrace_modify_code(ip, old, new); } @@ -71,13 +73,13 @@ notrace int ftrace_mcount_set(unsigned long *data) { unsigned long ip = (long)(&mcount_call); unsigned long *addr = data; - unsigned char old[4], *new; + unsigned char old[MCOUNT_INSN_SIZE], *new; /* * Replace the mcount stub with a pointer to the * ip recorder function. */ - memcpy(old, &mcount_call, 4); + memcpy(old, &mcount_call, MCOUNT_INSN_SIZE); new = ftrace_call_replace(ip, *addr); *addr = ftrace_modify_code(ip, old, new); diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c index 8ac0b99f2c55..b80d982a29c6 100644 --- a/arch/sparc64/kernel/sparc64_ksyms.c +++ b/arch/sparc64/kernel/sparc64_ksyms.c @@ -53,6 +53,7 @@ #include #include #include +#include struct poll { int fd; @@ -112,7 +113,6 @@ EXPORT_SYMBOL(smp_call_function); #endif /* CONFIG_SMP */ #if defined(CONFIG_MCOUNT) -extern void _mcount(void); EXPORT_SYMBOL(_mcount); #endif diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 04ea83ccb979..95e6bbe3665e 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -51,6 +51,7 @@ #include #include #include +#include #include "irq_vectors.h" /* @@ -1118,6 +1119,7 @@ ENTRY(mcount) pushl %ecx pushl %edx movl 0xc(%esp), %eax + subl $MCOUNT_INSN_SIZE, %eax .globl mcount_call mcount_call: @@ -1136,6 +1138,7 @@ ENTRY(ftrace_caller) pushl %edx movl 0xc(%esp), %eax movl 0x4(%ebp), %edx + subl $MCOUNT_INSN_SIZE, %eax .globl ftrace_call ftrace_call: @@ -1166,6 +1169,7 @@ trace: pushl %edx movl 0xc(%esp), %eax movl 0x4(%ebp), %edx + subl $MCOUNT_INSN_SIZE, %eax call *ftrace_trace_function diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index fe25e5febca3..b0f7308f78a6 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -51,6 +51,7 @@ #include #include #include +#include .code64 @@ -68,6 +69,7 @@ ENTRY(mcount) movq %r9, 48(%rsp) movq 0x38(%rsp), %rdi + subq $MCOUNT_INSN_SIZE, %rdi .globl mcount_call mcount_call: @@ -99,6 +101,7 @@ ENTRY(ftrace_caller) movq 0x38(%rsp), %rdi movq 8(%rbp), %rsi + subq $MCOUNT_INSN_SIZE, %rdi .globl ftrace_call ftrace_call: @@ -139,6 +142,7 @@ trace: movq 0x38(%rsp), %rdi movq 8(%rbp), %rsi + subq $MCOUNT_INSN_SIZE, %rdi call *ftrace_trace_function diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 55828149e01e..ab115cd15fdf 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -17,20 +17,21 @@ #include #include +#include -#define CALL_BACK 5 /* Long is fine, even if it is only 4 bytes ;-) */ static long *ftrace_nop; union ftrace_code_union { - char code[5]; + char code[MCOUNT_INSN_SIZE]; struct { char e8; int offset; } __attribute__((packed)); }; + static int notrace ftrace_calc_offset(long ip, long addr) { return (int)(addr - ip); @@ -46,7 +47,7 @@ notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) static union ftrace_code_union calc; calc.e8 = 0xe8; - calc.offset = ftrace_calc_offset(ip, addr); + calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr); /* * No locking needed, this must be called via kstop_machine @@ -65,9 +66,6 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char newch = new_code[4]; int faulted = 0; - /* move the IP back to the start of the call */ - ip -= CALL_BACK; - /* * Note: Due to modules and __init, code can * disappear and change, we need to protect against faulting @@ -102,12 +100,10 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, notrace int ftrace_update_ftrace_func(ftrace_func_t func) { unsigned long ip = (unsigned long)(&ftrace_call); - unsigned char old[5], *new; + unsigned char old[MCOUNT_INSN_SIZE], *new; int ret; - ip += CALL_BACK; - - memcpy(old, &ftrace_call, 5); + memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE); new = ftrace_call_replace(ip, (unsigned long)func); ret = ftrace_modify_code(ip, old, new); @@ -118,16 +114,13 @@ notrace int ftrace_mcount_set(unsigned long *data) { unsigned long ip = (long)(&mcount_call); unsigned long *addr = data; - unsigned char old[5], *new; - - /* ip is at the location, but modify code will subtact this */ - ip += CALL_BACK; + unsigned char old[MCOUNT_INSN_SIZE], *new; /* * Replace the mcount stub with a pointer to the * ip recorder function. */ - memcpy(old, &mcount_call, 5); + memcpy(old, &mcount_call, MCOUNT_INSN_SIZE); new = ftrace_call_replace(ip, *addr); *addr = ftrace_modify_code(ip, old, new); @@ -142,8 +135,7 @@ int __init ftrace_dyn_arch_init(void *data) ftrace_mcount_set(data); - ftrace_nop = (unsigned long *)noptable[CALL_BACK]; + ftrace_nop = (unsigned long *)noptable[MCOUNT_INSN_SIZE]; return 0; } - diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c index 29999dbb754c..dd7ebee446af 100644 --- a/arch/x86/kernel/i386_ksyms_32.c +++ b/arch/x86/kernel/i386_ksyms_32.c @@ -1,9 +1,9 @@ -#include #include #include #include #include +#include #ifdef CONFIG_FTRACE /* mcount is defined in assembly */ diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 122885bc5f3b..16ff4bf418d9 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -1,7 +1,6 @@ /* Exports for assembly files. All C exports should go in the respective C files. */ -#include #include #include @@ -11,6 +10,7 @@ #include #include #include +#include #ifdef CONFIG_FTRACE /* mcount is defined in assembly */ -- cgit v1.2.2 From 760378e1497841246ea7e42abad617d8a8ac0bcc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 1 Jul 2008 17:35:06 +0200 Subject: fix "ftrace: store mcount address in rec->ip" Alexander Beregalov reported this build failure: $ make CROSS_COMPILE=sparc64-unknown-linux-gnu- image modules && sudo make modules_install CHK include/linux/version.h CHK include/linux/utsrelease.h CALL scripts/checksyscalls.sh CHK include/linux/compile.h dnsdomainname: Unknown host CC arch/sparc64/kernel/sparc64_ksyms.o arch/sparc64/kernel/sparc64_ksyms.c:116: error: '_mcount' undeclared here (not in a function) cc1: warnings being treated as errors arch/sparc64/kernel/sparc64_ksyms.c:116: error: type defaults to 'int' in declaration of '_mcount' And bisected it back to: | commit 395a59d0f8e86bb39cd700c3d185d30c670bb958 | Author: Abhishek Sagar | Date: Sat Jun 21 23:47:27 2008 +0530 | | ftrace: store mcount address in rec->ip the mcount prototype is only available under CONFIG_FTRACE, extend it to CONFIG_MCOUNT as well. Reported-and-bisected-by: Alexander Beregalov Signed-off-by: Ingo Molnar --- arch/sparc64/kernel/sparc64_ksyms.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c index b80d982a29c6..49d3ea50c247 100644 --- a/arch/sparc64/kernel/sparc64_ksyms.c +++ b/arch/sparc64/kernel/sparc64_ksyms.c @@ -112,7 +112,7 @@ EXPORT_SYMBOL(__write_trylock); EXPORT_SYMBOL(smp_call_function); #endif /* CONFIG_SMP */ -#if defined(CONFIG_MCOUNT) +#ifdef CONFIG_MCOUNT EXPORT_SYMBOL(_mcount); #endif -- cgit v1.2.2