aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/Kconfig.debug8
-rw-r--r--arch/x86/kernel/Makefile8
-rw-r--r--arch/x86/kernel/alternative.c22
-rw-r--r--arch/x86/kernel/entry_32.S68
-rw-r--r--arch/x86/kernel/entry_64.S102
-rw-r--r--arch/x86/kernel/ftrace.c149
-rw-r--r--arch/x86/kernel/i386_ksyms_32.c9
-rw-r--r--arch/x86/kernel/machine_kexec_32.c4
-rw-r--r--arch/x86/kernel/machine_kexec_64.c4
-rw-r--r--arch/x86/kernel/process_32.c3
-rw-r--r--arch/x86/kernel/process_64.c3
-rw-r--r--arch/x86/kernel/vsyscall_64.c3
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c11
-rw-r--r--arch/x86/lib/Makefile1
-rw-r--r--arch/x86/lib/thunk_32.S47
-rw-r--r--arch/x86/lib/thunk_64.S19
-rw-r--r--arch/x86/mm/fault.c56
-rw-r--r--arch/x86/mm/init_32.c4
-rw-r--r--arch/x86/mm/init_64.c10
-rw-r--r--arch/x86/vdso/vclock_gettime.c15
-rw-r--r--arch/x86/vdso/vgetcpu.c3
22 files changed, 524 insertions, 27 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 52e18e6d2ba0..b0937c03af3c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -23,6 +23,8 @@ config X86
23 select HAVE_OPROFILE 23 select HAVE_OPROFILE
24 select HAVE_KPROBES 24 select HAVE_KPROBES
25 select HAVE_KRETPROBES 25 select HAVE_KRETPROBES
26 select HAVE_DYNAMIC_FTRACE
27 select HAVE_FTRACE
26 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) 28 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
27 select HAVE_ARCH_KGDB if !X86_VOYAGER 29 select HAVE_ARCH_KGDB if !X86_VOYAGER
28 30
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 18363374d51a..f395fd537c5c 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -172,6 +172,14 @@ config IOMMU_LEAK
172 Add a simple leak tracer to the IOMMU code. This is useful when you 172 Add a simple leak tracer to the IOMMU code. This is useful when you
173 are debugging a buggy device driver that leaks IOMMU mappings. 173 are debugging a buggy device driver that leaks IOMMU mappings.
174 174
175config PAGE_FAULT_HANDLERS
176 bool "Custom page fault handlers"
177 depends on DEBUG_KERNEL
178 help
179 Allow the use of custom page fault handlers. A kernel module may
180 register a function that is called on every page fault. Custom
181 handlers are used by some debugging and reverse engineering tools.
182
175# 183#
176# IO delay types: 184# IO delay types:
177# 185#
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 5e618c3b4720..739d49acd2f1 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -6,6 +6,13 @@ extra-y := head_$(BITS).o head$(BITS).o init_task.o vmlinux.lds
6 6
7CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) 7CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
8 8
9ifdef CONFIG_FTRACE
10# Do not profile debug utilities
11CFLAGS_REMOVE_tsc_64.o = -pg
12CFLAGS_REMOVE_tsc_32.o = -pg
13CFLAGS_REMOVE_rtc.o = -pg
14endif
15
9# 16#
10# vsyscalls (which work on the user stack) should have 17# vsyscalls (which work on the user stack) should have
11# no stack-protector checks: 18# no stack-protector checks:
@@ -56,6 +63,7 @@ obj-$(CONFIG_X86_MPPARSE) += mpparse.o
56obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi_$(BITS).o 63obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi_$(BITS).o
57obj-$(CONFIG_X86_IO_APIC) += io_apic_$(BITS).o 64obj-$(CONFIG_X86_IO_APIC) += io_apic_$(BITS).o
58obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o 65obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
66obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
59obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o 67obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
60obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o 68obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
61obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o 69obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 65c7857a90dd..2763cb37b553 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -1,6 +1,6 @@
1#include <linux/module.h> 1#include <linux/module.h>
2#include <linux/sched.h> 2#include <linux/sched.h>
3#include <linux/spinlock.h> 3#include <linux/mutex.h>
4#include <linux/list.h> 4#include <linux/list.h>
5#include <linux/kprobes.h> 5#include <linux/kprobes.h>
6#include <linux/mm.h> 6#include <linux/mm.h>
@@ -143,7 +143,7 @@ static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = {
143#ifdef CONFIG_X86_64 143#ifdef CONFIG_X86_64
144 144
145extern char __vsyscall_0; 145extern char __vsyscall_0;
146static inline const unsigned char*const * find_nop_table(void) 146const unsigned char *const *find_nop_table(void)
147{ 147{
148 return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || 148 return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
149 boot_cpu_data.x86 < 6 ? k8_nops : p6_nops; 149 boot_cpu_data.x86 < 6 ? k8_nops : p6_nops;
@@ -162,7 +162,7 @@ static const struct nop {
162 { -1, NULL } 162 { -1, NULL }
163}; 163};
164 164
165static const unsigned char*const * find_nop_table(void) 165const unsigned char *const *find_nop_table(void)
166{ 166{
167 const unsigned char *const *noptable = intel_nops; 167 const unsigned char *const *noptable = intel_nops;
168 int i; 168 int i;
@@ -279,7 +279,7 @@ struct smp_alt_module {
279 struct list_head next; 279 struct list_head next;
280}; 280};
281static LIST_HEAD(smp_alt_modules); 281static LIST_HEAD(smp_alt_modules);
282static DEFINE_SPINLOCK(smp_alt); 282static DEFINE_MUTEX(smp_alt);
283static int smp_mode = 1; /* protected by smp_alt */ 283static int smp_mode = 1; /* protected by smp_alt */
284 284
285void alternatives_smp_module_add(struct module *mod, char *name, 285void alternatives_smp_module_add(struct module *mod, char *name,
@@ -312,12 +312,12 @@ void alternatives_smp_module_add(struct module *mod, char *name,
312 __func__, smp->locks, smp->locks_end, 312 __func__, smp->locks, smp->locks_end,
313 smp->text, smp->text_end, smp->name); 313 smp->text, smp->text_end, smp->name);
314 314
315 spin_lock(&smp_alt); 315 mutex_lock(&smp_alt);
316 list_add_tail(&smp->next, &smp_alt_modules); 316 list_add_tail(&smp->next, &smp_alt_modules);
317 if (boot_cpu_has(X86_FEATURE_UP)) 317 if (boot_cpu_has(X86_FEATURE_UP))
318 alternatives_smp_unlock(smp->locks, smp->locks_end, 318 alternatives_smp_unlock(smp->locks, smp->locks_end,
319 smp->text, smp->text_end); 319 smp->text, smp->text_end);
320 spin_unlock(&smp_alt); 320 mutex_unlock(&smp_alt);
321} 321}
322 322
323void alternatives_smp_module_del(struct module *mod) 323void alternatives_smp_module_del(struct module *mod)
@@ -327,17 +327,17 @@ void alternatives_smp_module_del(struct module *mod)
327 if (smp_alt_once || noreplace_smp) 327 if (smp_alt_once || noreplace_smp)
328 return; 328 return;
329 329
330 spin_lock(&smp_alt); 330 mutex_lock(&smp_alt);
331 list_for_each_entry(item, &smp_alt_modules, next) { 331 list_for_each_entry(item, &smp_alt_modules, next) {
332 if (mod != item->mod) 332 if (mod != item->mod)
333 continue; 333 continue;
334 list_del(&item->next); 334 list_del(&item->next);
335 spin_unlock(&smp_alt); 335 mutex_unlock(&smp_alt);
336 DPRINTK("%s: %s\n", __func__, item->name); 336 DPRINTK("%s: %s\n", __func__, item->name);
337 kfree(item); 337 kfree(item);
338 return; 338 return;
339 } 339 }
340 spin_unlock(&smp_alt); 340 mutex_unlock(&smp_alt);
341} 341}
342 342
343void alternatives_smp_switch(int smp) 343void alternatives_smp_switch(int smp)
@@ -359,7 +359,7 @@ void alternatives_smp_switch(int smp)
359 return; 359 return;
360 BUG_ON(!smp && (num_online_cpus() > 1)); 360 BUG_ON(!smp && (num_online_cpus() > 1));
361 361
362 spin_lock(&smp_alt); 362 mutex_lock(&smp_alt);
363 363
364 /* 364 /*
365 * Avoid unnecessary switches because it forces JIT based VMs to 365 * Avoid unnecessary switches because it forces JIT based VMs to
@@ -383,7 +383,7 @@ void alternatives_smp_switch(int smp)
383 mod->text, mod->text_end); 383 mod->text, mod->text_end);
384 } 384 }
385 smp_mode = smp; 385 smp_mode = smp;
386 spin_unlock(&smp_alt); 386 mutex_unlock(&smp_alt);
387} 387}
388 388
389#endif 389#endif
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index c778e4fa55a2..04ea83ccb979 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1110,6 +1110,74 @@ ENDPROC(xen_failsafe_callback)
1110 1110
1111#endif /* CONFIG_XEN */ 1111#endif /* CONFIG_XEN */
1112 1112
1113#ifdef CONFIG_FTRACE
1114#ifdef CONFIG_DYNAMIC_FTRACE
1115
1116ENTRY(mcount)
1117 pushl %eax
1118 pushl %ecx
1119 pushl %edx
1120 movl 0xc(%esp), %eax
1121
1122.globl mcount_call
1123mcount_call:
1124 call ftrace_stub
1125
1126 popl %edx
1127 popl %ecx
1128 popl %eax
1129
1130 ret
1131END(mcount)
1132
1133ENTRY(ftrace_caller)
1134 pushl %eax
1135 pushl %ecx
1136 pushl %edx
1137 movl 0xc(%esp), %eax
1138 movl 0x4(%ebp), %edx
1139
1140.globl ftrace_call
1141ftrace_call:
1142 call ftrace_stub
1143
1144 popl %edx
1145 popl %ecx
1146 popl %eax
1147
1148.globl ftrace_stub
1149ftrace_stub:
1150 ret
1151END(ftrace_caller)
1152
1153#else /* ! CONFIG_DYNAMIC_FTRACE */
1154
1155ENTRY(mcount)
1156 cmpl $ftrace_stub, ftrace_trace_function
1157 jnz trace
1158.globl ftrace_stub
1159ftrace_stub:
1160 ret
1161
1162 /* taken from glibc */
1163trace:
1164 pushl %eax
1165 pushl %ecx
1166 pushl %edx
1167 movl 0xc(%esp), %eax
1168 movl 0x4(%ebp), %edx
1169
1170 call *ftrace_trace_function
1171
1172 popl %edx
1173 popl %ecx
1174 popl %eax
1175
1176 jmp ftrace_stub
1177END(mcount)
1178#endif /* CONFIG_DYNAMIC_FTRACE */
1179#endif /* CONFIG_FTRACE */
1180
1113.section .rodata,"a" 1181.section .rodata,"a"
1114#include "syscall_table_32.S" 1182#include "syscall_table_32.S"
1115 1183
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 556a8df522a7..fe25e5febca3 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -54,6 +54,108 @@
54 54
55 .code64 55 .code64
56 56
57#ifdef CONFIG_FTRACE
58#ifdef CONFIG_DYNAMIC_FTRACE
59ENTRY(mcount)
60
61 subq $0x38, %rsp
62 movq %rax, (%rsp)
63 movq %rcx, 8(%rsp)
64 movq %rdx, 16(%rsp)
65 movq %rsi, 24(%rsp)
66 movq %rdi, 32(%rsp)
67 movq %r8, 40(%rsp)
68 movq %r9, 48(%rsp)
69
70 movq 0x38(%rsp), %rdi
71
72.globl mcount_call
73mcount_call:
74 call ftrace_stub
75
76 movq 48(%rsp), %r9
77 movq 40(%rsp), %r8
78 movq 32(%rsp), %rdi
79 movq 24(%rsp), %rsi
80 movq 16(%rsp), %rdx
81 movq 8(%rsp), %rcx
82 movq (%rsp), %rax
83 addq $0x38, %rsp
84
85 retq
86END(mcount)
87
88ENTRY(ftrace_caller)
89
90 /* taken from glibc */
91 subq $0x38, %rsp
92 movq %rax, (%rsp)
93 movq %rcx, 8(%rsp)
94 movq %rdx, 16(%rsp)
95 movq %rsi, 24(%rsp)
96 movq %rdi, 32(%rsp)
97 movq %r8, 40(%rsp)
98 movq %r9, 48(%rsp)
99
100 movq 0x38(%rsp), %rdi
101 movq 8(%rbp), %rsi
102
103.globl ftrace_call
104ftrace_call:
105 call ftrace_stub
106
107 movq 48(%rsp), %r9
108 movq 40(%rsp), %r8
109 movq 32(%rsp), %rdi
110 movq 24(%rsp), %rsi
111 movq 16(%rsp), %rdx
112 movq 8(%rsp), %rcx
113 movq (%rsp), %rax
114 addq $0x38, %rsp
115
116.globl ftrace_stub
117ftrace_stub:
118 retq
119END(ftrace_caller)
120
121#else /* ! CONFIG_DYNAMIC_FTRACE */
122ENTRY(mcount)
123 cmpq $ftrace_stub, ftrace_trace_function
124 jnz trace
125.globl ftrace_stub
126ftrace_stub:
127 retq
128
129trace:
130 /* taken from glibc */
131 subq $0x38, %rsp
132 movq %rax, (%rsp)
133 movq %rcx, 8(%rsp)
134 movq %rdx, 16(%rsp)
135 movq %rsi, 24(%rsp)
136 movq %rdi, 32(%rsp)
137 movq %r8, 40(%rsp)
138 movq %r9, 48(%rsp)
139
140 movq 0x38(%rsp), %rdi
141 movq 8(%rbp), %rsi
142
143 call *ftrace_trace_function
144
145 movq 48(%rsp), %r9
146 movq 40(%rsp), %r8
147 movq 32(%rsp), %rdi
148 movq 24(%rsp), %rsi
149 movq 16(%rsp), %rdx
150 movq 8(%rsp), %rcx
151 movq (%rsp), %rax
152 addq $0x38, %rsp
153
154 jmp ftrace_stub
155END(mcount)
156#endif /* CONFIG_DYNAMIC_FTRACE */
157#endif /* CONFIG_FTRACE */
158
57#ifndef CONFIG_PREEMPT 159#ifndef CONFIG_PREEMPT
58#define retint_kernel retint_restore_args 160#define retint_kernel retint_restore_args
59#endif 161#endif
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
new file mode 100644
index 000000000000..bc5cf8d46742
--- /dev/null
+++ b/arch/x86/kernel/ftrace.c
@@ -0,0 +1,149 @@
1/*
2 * Code for replacing ftrace calls with jumps.
3 *
4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5 *
6 * Thanks goes to Ingo Molnar, for suggesting the idea.
7 * Mathieu Desnoyers, for suggesting postponing the modifications.
8 * Arjan van de Ven, for keeping me straight, and explaining to me
9 * the dangers of modifying code on the run.
10 */
11
12#include <linux/spinlock.h>
13#include <linux/hardirq.h>
14#include <linux/ftrace.h>
15#include <linux/percpu.h>
16#include <linux/init.h>
17#include <linux/list.h>
18
19#include <asm/alternative.h>
20
21#define CALL_BACK 5
22
23/* Long is fine, even if it is only 4 bytes ;-) */
24static long *ftrace_nop;
25
26union ftrace_code_union {
27 char code[5];
28 struct {
29 char e8;
30 int offset;
31 } __attribute__((packed));
32};
33
34static int notrace ftrace_calc_offset(long ip, long addr)
35{
36 return (int)(addr - ip);
37}
38
39notrace unsigned char *ftrace_nop_replace(void)
40{
41 return (char *)ftrace_nop;
42}
43
44notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
45{
46 static union ftrace_code_union calc;
47
48 calc.e8 = 0xe8;
49 calc.offset = ftrace_calc_offset(ip, addr);
50
51 /*
52 * No locking needed, this must be called via kstop_machine
53 * which in essence is like running on a uniprocessor machine.
54 */
55 return calc.code;
56}
57
58notrace int
59ftrace_modify_code(unsigned long ip, unsigned char *old_code,
60 unsigned char *new_code)
61{
62 unsigned replaced;
63 unsigned old = *(unsigned *)old_code; /* 4 bytes */
64 unsigned new = *(unsigned *)new_code; /* 4 bytes */
65 unsigned char newch = new_code[4];
66 int faulted = 0;
67
68 /* move the IP back to the start of the call */
69 ip -= CALL_BACK;
70
71 /*
72 * Note: Due to modules and __init, code can
73 * disappear and change, we need to protect against faulting
74 * as well as code changing.
75 *
76 * No real locking needed, this code is run through
77 * kstop_machine.
78 */
79 asm volatile (
80 "1: lock\n"
81 " cmpxchg %3, (%2)\n"
82 " jnz 2f\n"
83 " movb %b4, 4(%2)\n"
84 "2:\n"
85 ".section .fixup, \"ax\"\n"
86 "3: movl $1, %0\n"
87 " jmp 2b\n"
88 ".previous\n"
89 _ASM_EXTABLE(1b, 3b)
90 : "=r"(faulted), "=a"(replaced)
91 : "r"(ip), "r"(new), "r"(newch),
92 "0"(faulted), "a"(old)
93 : "memory");
94 sync_core();
95
96 if (replaced != old && replaced != new)
97 faulted = 2;
98
99 return faulted;
100}
101
102notrace int ftrace_update_ftrace_func(ftrace_func_t func)
103{
104 unsigned long ip = (unsigned long)(&ftrace_call);
105 unsigned char old[5], *new;
106 int ret;
107
108 ip += CALL_BACK;
109
110 memcpy(old, &ftrace_call, 5);
111 new = ftrace_call_replace(ip, (unsigned long)func);
112 ret = ftrace_modify_code(ip, old, new);
113
114 return ret;
115}
116
117notrace int ftrace_mcount_set(unsigned long *data)
118{
119 unsigned long ip = (long)(&mcount_call);
120 unsigned long *addr = data;
121 unsigned char old[5], *new;
122
123 /* ip is at the location, but modify code will subtact this */
124 ip += CALL_BACK;
125
126 /*
127 * Replace the mcount stub with a pointer to the
128 * ip recorder function.
129 */
130 memcpy(old, &mcount_call, 5);
131 new = ftrace_call_replace(ip, *addr);
132 *addr = ftrace_modify_code(ip, old, new);
133
134 return 0;
135}
136
137int __init ftrace_dyn_arch_init(void *data)
138{
139 const unsigned char *const *noptable = find_nop_table();
140
141 /* This is running in kstop_machine */
142
143 ftrace_mcount_set(data);
144
145 ftrace_nop = (unsigned long *)noptable[CALL_BACK];
146
147 return 0;
148}
149
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
index deb43785e923..29999dbb754c 100644
--- a/arch/x86/kernel/i386_ksyms_32.c
+++ b/arch/x86/kernel/i386_ksyms_32.c
@@ -1,7 +1,14 @@
1#include <linux/ftrace.h>
1#include <linux/module.h> 2#include <linux/module.h>
3
2#include <asm/checksum.h> 4#include <asm/checksum.h>
3#include <asm/desc.h>
4#include <asm/pgtable.h> 5#include <asm/pgtable.h>
6#include <asm/desc.h>
7
8#ifdef CONFIG_FTRACE
9/* mcount is defined in assembly */
10EXPORT_SYMBOL(mcount);
11#endif
5 12
6/* Networking helper routines. */ 13/* Networking helper routines. */
7EXPORT_SYMBOL(csum_partial_copy_generic); 14EXPORT_SYMBOL(csum_partial_copy_generic);
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index d0b234c9fc31..88923fd7a6fc 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -11,6 +11,8 @@
11#include <linux/delay.h> 11#include <linux/delay.h>
12#include <linux/init.h> 12#include <linux/init.h>
13#include <linux/numa.h> 13#include <linux/numa.h>
14#include <linux/ftrace.h>
15
14#include <asm/pgtable.h> 16#include <asm/pgtable.h>
15#include <asm/pgalloc.h> 17#include <asm/pgalloc.h>
16#include <asm/tlbflush.h> 18#include <asm/tlbflush.h>
@@ -107,6 +109,8 @@ NORET_TYPE void machine_kexec(struct kimage *image)
107 unsigned long page_list[PAGES_NR]; 109 unsigned long page_list[PAGES_NR];
108 void *control_page; 110 void *control_page;
109 111
112 tracer_disable();
113
110 /* Interrupts aren't acceptable while we reboot */ 114 /* Interrupts aren't acceptable while we reboot */
111 local_irq_disable(); 115 local_irq_disable();
112 116
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 576a03db4511..1558fdc174f9 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -11,6 +11,8 @@
11#include <linux/string.h> 11#include <linux/string.h>
12#include <linux/reboot.h> 12#include <linux/reboot.h>
13#include <linux/numa.h> 13#include <linux/numa.h>
14#include <linux/ftrace.h>
15
14#include <asm/pgtable.h> 16#include <asm/pgtable.h>
15#include <asm/tlbflush.h> 17#include <asm/tlbflush.h>
16#include <asm/mmu_context.h> 18#include <asm/mmu_context.h>
@@ -184,6 +186,8 @@ NORET_TYPE void machine_kexec(struct kimage *image)
184 unsigned long page_list[PAGES_NR]; 186 unsigned long page_list[PAGES_NR];
185 void *control_page; 187 void *control_page;
186 188
189 tracer_disable();
190
187 /* Interrupts aren't acceptable while we reboot */ 191 /* Interrupts aren't acceptable while we reboot */
188 local_irq_disable(); 192 local_irq_disable();
189 193
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 6d5483356e74..61f7481c31dd 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -185,7 +185,10 @@ void cpu_idle(void)
185 185
186 local_irq_disable(); 186 local_irq_disable();
187 __get_cpu_var(irq_stat).idle_timestamp = jiffies; 187 __get_cpu_var(irq_stat).idle_timestamp = jiffies;
188 /* Don't trace irqs off for idle */
189 stop_critical_timings();
188 idle(); 190 idle();
191 start_critical_timings();
189 } 192 }
190 tick_nohz_restart_sched_tick(); 193 tick_nohz_restart_sched_tick();
191 preempt_enable_no_resched(); 194 preempt_enable_no_resched();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ac54ff56df80..dc534f40c8d3 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -165,7 +165,10 @@ void cpu_idle(void)
165 */ 165 */
166 local_irq_disable(); 166 local_irq_disable();
167 enter_idle(); 167 enter_idle();
168 /* Don't trace irqs off for idle */
169 stop_critical_timings();
168 idle(); 170 idle();
171 start_critical_timings();
169 /* In many cases the interrupt that ended idle 172 /* In many cases the interrupt that ended idle
170 has already called exit_idle. But some idle 173 has already called exit_idle. But some idle
171 loops can be woken up without interrupt. */ 174 loops can be woken up without interrupt. */
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 61efa2f7d564..4063dfa2a02d 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -42,7 +42,8 @@
42#include <asm/topology.h> 42#include <asm/topology.h>
43#include <asm/vgtod.h> 43#include <asm/vgtod.h>
44 44
45#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) 45#define __vsyscall(nr) \
46 __attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace
46#define __syscall_clobber "r11","cx","memory" 47#define __syscall_clobber "r11","cx","memory"
47 48
48/* 49/*
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index f6c05d0410fb..122885bc5f3b 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -1,15 +1,22 @@
1/* Exports for assembly files. 1/* Exports for assembly files.
2 All C exports should go in the respective C files. */ 2 All C exports should go in the respective C files. */
3 3
4#include <linux/ftrace.h>
4#include <linux/module.h> 5#include <linux/module.h>
5#include <net/checksum.h>
6#include <linux/smp.h> 6#include <linux/smp.h>
7 7
8#include <net/checksum.h>
9
8#include <asm/processor.h> 10#include <asm/processor.h>
9#include <asm/uaccess.h>
10#include <asm/pgtable.h> 11#include <asm/pgtable.h>
12#include <asm/uaccess.h>
11#include <asm/desc.h> 13#include <asm/desc.h>
12 14
15#ifdef CONFIG_FTRACE
16/* mcount is defined in assembly */
17EXPORT_SYMBOL(mcount);
18#endif
19
13EXPORT_SYMBOL(kernel_thread); 20EXPORT_SYMBOL(kernel_thread);
14 21
15EXPORT_SYMBOL(__get_user_1); 22EXPORT_SYMBOL(__get_user_1);
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 76f60f52a885..84aa2883fe15 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -5,6 +5,7 @@
5obj-$(CONFIG_SMP) := msr-on-cpu.o 5obj-$(CONFIG_SMP) := msr-on-cpu.o
6 6
7lib-y := delay_$(BITS).o 7lib-y := delay_$(BITS).o
8lib-y += thunk_$(BITS).o
8lib-y += usercopy_$(BITS).o getuser_$(BITS).o putuser_$(BITS).o 9lib-y += usercopy_$(BITS).o getuser_$(BITS).o putuser_$(BITS).o
9lib-y += memcpy_$(BITS).o 10lib-y += memcpy_$(BITS).o
10 11
diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S
new file mode 100644
index 000000000000..650b11e00ecc
--- /dev/null
+++ b/arch/x86/lib/thunk_32.S
@@ -0,0 +1,47 @@
1/*
2 * Trampoline to trace irqs off. (otherwise CALLER_ADDR1 might crash)
3 * Copyright 2008 by Steven Rostedt, Red Hat, Inc
4 * (inspired by Andi Kleen's thunk_64.S)
5 * Subject to the GNU public license, v.2. No warranty of any kind.
6 */
7
8 #include <linux/linkage.h>
9
10#define ARCH_TRACE_IRQS_ON \
11 pushl %eax; \
12 pushl %ecx; \
13 pushl %edx; \
14 call trace_hardirqs_on; \
15 popl %edx; \
16 popl %ecx; \
17 popl %eax;
18
19#define ARCH_TRACE_IRQS_OFF \
20 pushl %eax; \
21 pushl %ecx; \
22 pushl %edx; \
23 call trace_hardirqs_off; \
24 popl %edx; \
25 popl %ecx; \
26 popl %eax;
27
28#ifdef CONFIG_TRACE_IRQFLAGS
29 /* put return address in eax (arg1) */
30 .macro thunk_ra name,func
31 .globl \name
32\name:
33 pushl %eax
34 pushl %ecx
35 pushl %edx
36 /* Place EIP in the arg1 */
37 movl 3*4(%esp), %eax
38 call \func
39 popl %edx
40 popl %ecx
41 popl %eax
42 ret
43 .endm
44
45 thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller
46 thunk_ra trace_hardirqs_off_thunk,trace_hardirqs_off_caller
47#endif
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
index e009251d4e9f..bf9a7d5a5428 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/lib/thunk_64.S
@@ -2,6 +2,7 @@
2 * Save registers before calling assembly functions. This avoids 2 * Save registers before calling assembly functions. This avoids
3 * disturbance of register allocation in some inline assembly constructs. 3 * disturbance of register allocation in some inline assembly constructs.
4 * Copyright 2001,2002 by Andi Kleen, SuSE Labs. 4 * Copyright 2001,2002 by Andi Kleen, SuSE Labs.
5 * Added trace_hardirqs callers - Copyright 2007 Steven Rostedt, Red Hat, Inc.
5 * Subject to the GNU public license, v.2. No warranty of any kind. 6 * Subject to the GNU public license, v.2. No warranty of any kind.
6 */ 7 */
7 8
@@ -42,8 +43,22 @@
42#endif 43#endif
43 44
44#ifdef CONFIG_TRACE_IRQFLAGS 45#ifdef CONFIG_TRACE_IRQFLAGS
45 thunk trace_hardirqs_on_thunk,trace_hardirqs_on 46 /* put return address in rdi (arg1) */
46 thunk trace_hardirqs_off_thunk,trace_hardirqs_off 47 .macro thunk_ra name,func
48 .globl \name
49\name:
50 CFI_STARTPROC
51 SAVE_ARGS
52 /* SAVE_ARGS pushs 9 elements */
53 /* the next element would be the rip */
54 movq 9*8(%rsp), %rdi
55 call \func
56 jmp restore
57 CFI_ENDPROC
58 .endm
59
60 thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller
61 thunk_ra trace_hardirqs_off_thunk,trace_hardirqs_off_caller
47#endif 62#endif
48 63
49#ifdef CONFIG_DEBUG_LOCK_ALLOC 64#ifdef CONFIG_DEBUG_LOCK_ALLOC
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 8bcb6f40ccb6..42394b353c6a 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -49,6 +49,60 @@
49#define PF_RSVD (1<<3) 49#define PF_RSVD (1<<3)
50#define PF_INSTR (1<<4) 50#define PF_INSTR (1<<4)
51 51
52#ifdef CONFIG_PAGE_FAULT_HANDLERS
53static HLIST_HEAD(pf_handlers); /* protected by RCU */
54static DEFINE_SPINLOCK(pf_handlers_writer);
55
56void register_page_fault_handler(struct pf_handler *new_pfh)
57{
58 unsigned long flags;
59 spin_lock_irqsave(&pf_handlers_writer, flags);
60 hlist_add_head_rcu(&new_pfh->hlist, &pf_handlers);
61 spin_unlock_irqrestore(&pf_handlers_writer, flags);
62}
63EXPORT_SYMBOL_GPL(register_page_fault_handler);
64
65/**
66 * unregister_page_fault_handler:
67 * The caller must ensure @old_pfh is not in use anymore before freeing it.
68 * This function does not guarantee it. The list of handlers is protected by
69 * RCU, so you can do this by e.g. calling synchronize_rcu().
70 */
71void unregister_page_fault_handler(struct pf_handler *old_pfh)
72{
73 unsigned long flags;
74 spin_lock_irqsave(&pf_handlers_writer, flags);
75 hlist_del_rcu(&old_pfh->hlist);
76 spin_unlock_irqrestore(&pf_handlers_writer, flags);
77}
78EXPORT_SYMBOL_GPL(unregister_page_fault_handler);
79#endif
80
81/* returns non-zero if do_page_fault() should return */
82static int handle_custom_pf(struct pt_regs *regs, unsigned long error_code,
83 unsigned long address)
84{
85#ifdef CONFIG_PAGE_FAULT_HANDLERS
86 int ret = 0;
87 struct pf_handler *cur;
88 struct hlist_node *ncur;
89
90 if (hlist_empty(&pf_handlers))
91 return 0;
92
93 rcu_read_lock();
94 hlist_for_each_entry_rcu(cur, ncur, &pf_handlers, hlist) {
95 ret = cur->handler(regs, error_code, address);
96 if (ret)
97 break;
98 }
99 rcu_read_unlock();
100 return ret;
101#else
102 return 0;
103#endif
104}
105
52static inline int notify_page_fault(struct pt_regs *regs) 106static inline int notify_page_fault(struct pt_regs *regs)
53{ 107{
54#ifdef CONFIG_KPROBES 108#ifdef CONFIG_KPROBES
@@ -606,6 +660,8 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
606 660
607 if (notify_page_fault(regs)) 661 if (notify_page_fault(regs))
608 return; 662 return;
663 if (handle_custom_pf(regs, error_code, address))
664 return;
609 665
610 /* 666 /*
611 * We fault-in kernel-space virtual memory on-demand. The 667 * We fault-in kernel-space virtual memory on-demand. The
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index ec30d10154b6..f96eca21ad8f 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -710,6 +710,8 @@ void mark_rodata_ro(void)
710 unsigned long start = PFN_ALIGN(_text); 710 unsigned long start = PFN_ALIGN(_text);
711 unsigned long size = PFN_ALIGN(_etext) - start; 711 unsigned long size = PFN_ALIGN(_etext) - start;
712 712
713#ifndef CONFIG_DYNAMIC_FTRACE
714 /* Dynamic tracing modifies the kernel text section */
713 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); 715 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
714 printk(KERN_INFO "Write protecting the kernel text: %luk\n", 716 printk(KERN_INFO "Write protecting the kernel text: %luk\n",
715 size >> 10); 717 size >> 10);
@@ -722,6 +724,8 @@ void mark_rodata_ro(void)
722 printk(KERN_INFO "Testing CPA: write protecting again\n"); 724 printk(KERN_INFO "Testing CPA: write protecting again\n");
723 set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT); 725 set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
724#endif 726#endif
727#endif /* CONFIG_DYNAMIC_FTRACE */
728
725 start += size; 729 start += size;
726 size = (unsigned long)__end_rodata - start; 730 size = (unsigned long)__end_rodata - start;
727 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); 731 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 156e6d7b0e32..a5fd2e06f5c9 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -766,6 +766,13 @@ EXPORT_SYMBOL_GPL(rodata_test_data);
766void mark_rodata_ro(void) 766void mark_rodata_ro(void)
767{ 767{
768 unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata); 768 unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata);
769 unsigned long rodata_start =
770 ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
771
772#ifdef CONFIG_DYNAMIC_FTRACE
773 /* Dynamic tracing modifies the kernel text section */
774 start = rodata_start;
775#endif
769 776
770 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", 777 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
771 (end - start) >> 10); 778 (end - start) >> 10);
@@ -775,8 +782,7 @@ void mark_rodata_ro(void)
775 * The rodata section (but not the kernel text!) should also be 782 * The rodata section (but not the kernel text!) should also be
776 * not-executable. 783 * not-executable.
777 */ 784 */
778 start = ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; 785 set_memory_nx(rodata_start, (end - rodata_start) >> PAGE_SHIFT);
779 set_memory_nx(start, (end - start) >> PAGE_SHIFT);
780 786
781 rodata_test(); 787 rodata_test();
782 788
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index efa2ba7c6005..1ef0f90813d6 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -23,7 +23,7 @@
23 23
24#define gtod vdso_vsyscall_gtod_data 24#define gtod vdso_vsyscall_gtod_data
25 25
26static long vdso_fallback_gettime(long clock, struct timespec *ts) 26notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
27{ 27{
28 long ret; 28 long ret;
29 asm("syscall" : "=a" (ret) : 29 asm("syscall" : "=a" (ret) :
@@ -31,7 +31,7 @@ static long vdso_fallback_gettime(long clock, struct timespec *ts)
31 return ret; 31 return ret;
32} 32}
33 33
34static inline long vgetns(void) 34notrace static inline long vgetns(void)
35{ 35{
36 long v; 36 long v;
37 cycles_t (*vread)(void); 37 cycles_t (*vread)(void);
@@ -40,7 +40,7 @@ static inline long vgetns(void)
40 return (v * gtod->clock.mult) >> gtod->clock.shift; 40 return (v * gtod->clock.mult) >> gtod->clock.shift;
41} 41}
42 42
43static noinline int do_realtime(struct timespec *ts) 43notrace static noinline int do_realtime(struct timespec *ts)
44{ 44{
45 unsigned long seq, ns; 45 unsigned long seq, ns;
46 do { 46 do {
@@ -54,7 +54,8 @@ static noinline int do_realtime(struct timespec *ts)
54} 54}
55 55
56/* Copy of the version in kernel/time.c which we cannot directly access */ 56/* Copy of the version in kernel/time.c which we cannot directly access */
57static void vset_normalized_timespec(struct timespec *ts, long sec, long nsec) 57notrace static void
58vset_normalized_timespec(struct timespec *ts, long sec, long nsec)
58{ 59{
59 while (nsec >= NSEC_PER_SEC) { 60 while (nsec >= NSEC_PER_SEC) {
60 nsec -= NSEC_PER_SEC; 61 nsec -= NSEC_PER_SEC;
@@ -68,7 +69,7 @@ static void vset_normalized_timespec(struct timespec *ts, long sec, long nsec)
68 ts->tv_nsec = nsec; 69 ts->tv_nsec = nsec;
69} 70}
70 71
71static noinline int do_monotonic(struct timespec *ts) 72notrace static noinline int do_monotonic(struct timespec *ts)
72{ 73{
73 unsigned long seq, ns, secs; 74 unsigned long seq, ns, secs;
74 do { 75 do {
@@ -82,7 +83,7 @@ static noinline int do_monotonic(struct timespec *ts)
82 return 0; 83 return 0;
83} 84}
84 85
85int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) 86notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
86{ 87{
87 if (likely(gtod->sysctl_enabled && gtod->clock.vread)) 88 if (likely(gtod->sysctl_enabled && gtod->clock.vread))
88 switch (clock) { 89 switch (clock) {
@@ -96,7 +97,7 @@ int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
96int clock_gettime(clockid_t, struct timespec *) 97int clock_gettime(clockid_t, struct timespec *)
97 __attribute__((weak, alias("__vdso_clock_gettime"))); 98 __attribute__((weak, alias("__vdso_clock_gettime")));
98 99
99int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) 100notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
100{ 101{
101 long ret; 102 long ret;
102 if (likely(gtod->sysctl_enabled && gtod->clock.vread)) { 103 if (likely(gtod->sysctl_enabled && gtod->clock.vread)) {
diff --git a/arch/x86/vdso/vgetcpu.c b/arch/x86/vdso/vgetcpu.c
index c8097f17f8a9..9fbc6b20026b 100644
--- a/arch/x86/vdso/vgetcpu.c
+++ b/arch/x86/vdso/vgetcpu.c
@@ -13,7 +13,8 @@
13#include <asm/vgtod.h> 13#include <asm/vgtod.h>
14#include "vextern.h" 14#include "vextern.h"
15 15
16long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused) 16notrace long
17__vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
17{ 18{
18 unsigned int p; 19 unsigned int p;
19 20