aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-12-12 06:43:05 -0500
committerIngo Molnar <mingo@elte.hu>2008-12-12 06:43:05 -0500
commit81444a799550214f549caf579cf65a0ca55e70b7 (patch)
tree3288dac0740be2e1e7d1af4ee51d792a6e91edf3 /arch/x86
parenta64d31baed104be25305e9c71585d3ea4ee9a418 (diff)
parentda485e0cb16726797e99a595a399b9fc721b91bc (diff)
Merge branch 'tracing/fastboot' into cpus4096
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/Kconfig.cpu1
-rw-r--r--arch/x86/include/asm/ds.h134
-rw-r--r--arch/x86/include/asm/ftrace.h4
-rw-r--r--arch/x86/kernel/Makefile9
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c4
-rw-r--r--arch/x86/kernel/cpu/intel.c3
-rw-r--r--arch/x86/kernel/ds.c692
-rw-r--r--arch/x86/kernel/dumpstack.c351
-rw-r--r--arch/x86/kernel/dumpstack.h39
-rw-r--r--arch/x86/kernel/dumpstack_32.c307
-rw-r--r--arch/x86/kernel/dumpstack_64.c289
-rw-r--r--arch/x86/kernel/entry_32.S21
-rw-r--r--arch/x86/kernel/entry_64.S78
-rw-r--r--arch/x86/kernel/ftrace.c134
-rw-r--r--arch/x86/kernel/process.c16
-rw-r--r--arch/x86/kernel/process_32.c4
-rw-r--r--arch/x86/kernel/process_64.c4
-rw-r--r--arch/x86/kernel/ptrace.c98
-rw-r--r--arch/x86/kvm/mmu.c2
-rw-r--r--arch/x86/kvm/paging_tmpl.h1
-rw-r--r--arch/x86/kvm/vmx.c4
-rw-r--r--arch/x86/mm/fault.c11
23 files changed, 1081 insertions, 1127 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 352f63df1d80..c7235e643aff 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -29,7 +29,7 @@ config X86
29 select HAVE_FTRACE_MCOUNT_RECORD 29 select HAVE_FTRACE_MCOUNT_RECORD
30 select HAVE_DYNAMIC_FTRACE 30 select HAVE_DYNAMIC_FTRACE
31 select HAVE_FUNCTION_TRACER 31 select HAVE_FUNCTION_TRACER
32 select HAVE_FUNCTION_RET_TRACER if X86_32 32 select HAVE_FUNCTION_GRAPH_TRACER
33 select HAVE_FUNCTION_TRACE_MCOUNT_TEST 33 select HAVE_FUNCTION_TRACE_MCOUNT_TEST
34 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) 34 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
35 select HAVE_ARCH_KGDB if !X86_VOYAGER 35 select HAVE_ARCH_KGDB if !X86_VOYAGER
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index b815664fe370..85a78575956c 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -515,6 +515,7 @@ config CPU_SUP_UMC_32
515config X86_DS 515config X86_DS
516 def_bool X86_PTRACE_BTS 516 def_bool X86_PTRACE_BTS
517 depends on X86_DEBUGCTLMSR 517 depends on X86_DEBUGCTLMSR
518 select HAVE_HW_BRANCH_TRACER
518 519
519config X86_PTRACE_BTS 520config X86_PTRACE_BTS
520 bool "Branch Trace Store" 521 bool "Branch Trace Store"
diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h
index a95008457ea4..99b6c39774a4 100644
--- a/arch/x86/include/asm/ds.h
+++ b/arch/x86/include/asm/ds.h
@@ -7,13 +7,12 @@
7 * 7 *
8 * It manages: 8 * It manages:
9 * - per-thread and per-cpu allocation of BTS and PEBS 9 * - per-thread and per-cpu allocation of BTS and PEBS
10 * - buffer memory allocation (optional) 10 * - buffer overflow handling (to be done)
11 * - buffer overflow handling
12 * - buffer access 11 * - buffer access
13 * 12 *
14 * It assumes: 13 * It assumes:
15 * - get_task_struct on all parameter tasks 14 * - get_task_struct on all traced tasks
16 * - current is allowed to trace parameter tasks 15 * - current is allowed to trace tasks
17 * 16 *
18 * 17 *
19 * Copyright (C) 2007-2008 Intel Corporation. 18 * Copyright (C) 2007-2008 Intel Corporation.
@@ -26,11 +25,18 @@
26 25
27#include <linux/types.h> 26#include <linux/types.h>
28#include <linux/init.h> 27#include <linux/init.h>
28#include <linux/err.h>
29 29
30 30
31#ifdef CONFIG_X86_DS 31#ifdef CONFIG_X86_DS
32 32
33struct task_struct; 33struct task_struct;
34struct ds_tracer;
35struct bts_tracer;
36struct pebs_tracer;
37
38typedef void (*bts_ovfl_callback_t)(struct bts_tracer *);
39typedef void (*pebs_ovfl_callback_t)(struct pebs_tracer *);
34 40
35/* 41/*
36 * Request BTS or PEBS 42 * Request BTS or PEBS
@@ -38,60 +44,62 @@ struct task_struct;
38 * Due to alignement constraints, the actual buffer may be slightly 44 * Due to alignement constraints, the actual buffer may be slightly
39 * smaller than the requested or provided buffer. 45 * smaller than the requested or provided buffer.
40 * 46 *
41 * Returns 0 on success; -Eerrno otherwise 47 * Returns a pointer to a tracer structure on success, or
48 * ERR_PTR(errcode) on failure.
49 *
50 * The interrupt threshold is independent from the overflow callback
51 * to allow users to use their own overflow interrupt handling mechanism.
42 * 52 *
43 * task: the task to request recording for; 53 * task: the task to request recording for;
44 * NULL for per-cpu recording on the current cpu 54 * NULL for per-cpu recording on the current cpu
45 * base: the base pointer for the (non-pageable) buffer; 55 * base: the base pointer for the (non-pageable) buffer;
46 * NULL if buffer allocation requested 56 * size: the size of the provided buffer in bytes
47 * size: the size of the requested or provided buffer
48 * ovfl: pointer to a function to be called on buffer overflow; 57 * ovfl: pointer to a function to be called on buffer overflow;
49 * NULL if cyclic buffer requested 58 * NULL if cyclic buffer requested
59 * th: the interrupt threshold in records from the end of the buffer;
60 * -1 if no interrupt threshold is requested.
50 */ 61 */
51typedef void (*ds_ovfl_callback_t)(struct task_struct *); 62extern struct bts_tracer *ds_request_bts(struct task_struct *task,
52extern int ds_request_bts(struct task_struct *task, void *base, size_t size, 63 void *base, size_t size,
53 ds_ovfl_callback_t ovfl); 64 bts_ovfl_callback_t ovfl, size_t th);
54extern int ds_request_pebs(struct task_struct *task, void *base, size_t size, 65extern struct pebs_tracer *ds_request_pebs(struct task_struct *task,
55 ds_ovfl_callback_t ovfl); 66 void *base, size_t size,
67 pebs_ovfl_callback_t ovfl,
68 size_t th);
56 69
57/* 70/*
58 * Release BTS or PEBS resources 71 * Release BTS or PEBS resources
59 * 72 *
60 * Frees buffers allocated on ds_request.
61 *
62 * Returns 0 on success; -Eerrno otherwise 73 * Returns 0 on success; -Eerrno otherwise
63 * 74 *
64 * task: the task to release resources for; 75 * tracer: the tracer handle returned from ds_request_~()
65 * NULL to release resources for the current cpu
66 */ 76 */
67extern int ds_release_bts(struct task_struct *task); 77extern int ds_release_bts(struct bts_tracer *tracer);
68extern int ds_release_pebs(struct task_struct *task); 78extern int ds_release_pebs(struct pebs_tracer *tracer);
69 79
70/* 80/*
71 * Return the (array) index of the write pointer. 81 * Get the (array) index of the write pointer.
72 * (assuming an array of BTS/PEBS records) 82 * (assuming an array of BTS/PEBS records)
73 * 83 *
74 * Returns -Eerrno on error 84 * Returns 0 on success; -Eerrno on error
75 * 85 *
76 * task: the task to access; 86 * tracer: the tracer handle returned from ds_request_~()
77 * NULL to access the current cpu 87 * pos (out): will hold the result
78 * pos (out): if not NULL, will hold the result
79 */ 88 */
80extern int ds_get_bts_index(struct task_struct *task, size_t *pos); 89extern int ds_get_bts_index(struct bts_tracer *tracer, size_t *pos);
81extern int ds_get_pebs_index(struct task_struct *task, size_t *pos); 90extern int ds_get_pebs_index(struct pebs_tracer *tracer, size_t *pos);
82 91
83/* 92/*
84 * Return the (array) index one record beyond the end of the array. 93 * Get the (array) index one record beyond the end of the array.
85 * (assuming an array of BTS/PEBS records) 94 * (assuming an array of BTS/PEBS records)
86 * 95 *
87 * Returns -Eerrno on error 96 * Returns 0 on success; -Eerrno on error
88 * 97 *
89 * task: the task to access; 98 * tracer: the tracer handle returned from ds_request_~()
90 * NULL to access the current cpu 99 * pos (out): will hold the result
91 * pos (out): if not NULL, will hold the result
92 */ 100 */
93extern int ds_get_bts_end(struct task_struct *task, size_t *pos); 101extern int ds_get_bts_end(struct bts_tracer *tracer, size_t *pos);
94extern int ds_get_pebs_end(struct task_struct *task, size_t *pos); 102extern int ds_get_pebs_end(struct pebs_tracer *tracer, size_t *pos);
95 103
96/* 104/*
97 * Provide a pointer to the BTS/PEBS record at parameter index. 105 * Provide a pointer to the BTS/PEBS record at parameter index.
@@ -102,14 +110,13 @@ extern int ds_get_pebs_end(struct task_struct *task, size_t *pos);
102 * 110 *
103 * Returns the size of a single record on success; -Eerrno on error 111 * Returns the size of a single record on success; -Eerrno on error
104 * 112 *
105 * task: the task to access; 113 * tracer: the tracer handle returned from ds_request_~()
106 * NULL to access the current cpu
107 * index: the index of the requested record 114 * index: the index of the requested record
108 * record (out): pointer to the requested record 115 * record (out): pointer to the requested record
109 */ 116 */
110extern int ds_access_bts(struct task_struct *task, 117extern int ds_access_bts(struct bts_tracer *tracer,
111 size_t index, const void **record); 118 size_t index, const void **record);
112extern int ds_access_pebs(struct task_struct *task, 119extern int ds_access_pebs(struct pebs_tracer *tracer,
113 size_t index, const void **record); 120 size_t index, const void **record);
114 121
115/* 122/*
@@ -129,38 +136,24 @@ extern int ds_access_pebs(struct task_struct *task,
129 * 136 *
130 * Returns the number of bytes written or -Eerrno. 137 * Returns the number of bytes written or -Eerrno.
131 * 138 *
132 * task: the task to access; 139 * tracer: the tracer handle returned from ds_request_~()
133 * NULL to access the current cpu
134 * buffer: the buffer to write 140 * buffer: the buffer to write
135 * size: the size of the buffer 141 * size: the size of the buffer
136 */ 142 */
137extern int ds_write_bts(struct task_struct *task, 143extern int ds_write_bts(struct bts_tracer *tracer,
138 const void *buffer, size_t size); 144 const void *buffer, size_t size);
139extern int ds_write_pebs(struct task_struct *task, 145extern int ds_write_pebs(struct pebs_tracer *tracer,
140 const void *buffer, size_t size); 146 const void *buffer, size_t size);
141 147
142/* 148/*
143 * Same as ds_write_bts/pebs, but omit ownership checks.
144 *
145 * This is needed to have some other task than the owner of the
146 * BTS/PEBS buffer or the parameter task itself write into the
147 * respective buffer.
148 */
149extern int ds_unchecked_write_bts(struct task_struct *task,
150 const void *buffer, size_t size);
151extern int ds_unchecked_write_pebs(struct task_struct *task,
152 const void *buffer, size_t size);
153
154/*
155 * Reset the write pointer of the BTS/PEBS buffer. 149 * Reset the write pointer of the BTS/PEBS buffer.
156 * 150 *
157 * Returns 0 on success; -Eerrno on error 151 * Returns 0 on success; -Eerrno on error
158 * 152 *
159 * task: the task to access; 153 * tracer: the tracer handle returned from ds_request_~()
160 * NULL to access the current cpu
161 */ 154 */
162extern int ds_reset_bts(struct task_struct *task); 155extern int ds_reset_bts(struct bts_tracer *tracer);
163extern int ds_reset_pebs(struct task_struct *task); 156extern int ds_reset_pebs(struct pebs_tracer *tracer);
164 157
165/* 158/*
166 * Clear the BTS/PEBS buffer and reset the write pointer. 159 * Clear the BTS/PEBS buffer and reset the write pointer.
@@ -168,33 +161,30 @@ extern int ds_reset_pebs(struct task_struct *task);
168 * 161 *
169 * Returns 0 on success; -Eerrno on error 162 * Returns 0 on success; -Eerrno on error
170 * 163 *
171 * task: the task to access; 164 * tracer: the tracer handle returned from ds_request_~()
172 * NULL to access the current cpu
173 */ 165 */
174extern int ds_clear_bts(struct task_struct *task); 166extern int ds_clear_bts(struct bts_tracer *tracer);
175extern int ds_clear_pebs(struct task_struct *task); 167extern int ds_clear_pebs(struct pebs_tracer *tracer);
176 168
177/* 169/*
178 * Provide the PEBS counter reset value. 170 * Provide the PEBS counter reset value.
179 * 171 *
180 * Returns 0 on success; -Eerrno on error 172 * Returns 0 on success; -Eerrno on error
181 * 173 *
182 * task: the task to access; 174 * tracer: the tracer handle returned from ds_request_pebs()
183 * NULL to access the current cpu
184 * value (out): the counter reset value 175 * value (out): the counter reset value
185 */ 176 */
186extern int ds_get_pebs_reset(struct task_struct *task, u64 *value); 177extern int ds_get_pebs_reset(struct pebs_tracer *tracer, u64 *value);
187 178
188/* 179/*
189 * Set the PEBS counter reset value. 180 * Set the PEBS counter reset value.
190 * 181 *
191 * Returns 0 on success; -Eerrno on error 182 * Returns 0 on success; -Eerrno on error
192 * 183 *
193 * task: the task to access; 184 * tracer: the tracer handle returned from ds_request_pebs()
194 * NULL to access the current cpu
195 * value: the new counter reset value 185 * value: the new counter reset value
196 */ 186 */
197extern int ds_set_pebs_reset(struct task_struct *task, u64 value); 187extern int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value);
198 188
199/* 189/*
200 * Initialization 190 * Initialization
@@ -207,17 +197,13 @@ extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *);
207/* 197/*
208 * The DS context - part of struct thread_struct. 198 * The DS context - part of struct thread_struct.
209 */ 199 */
200#define MAX_SIZEOF_DS (12 * 8)
201
210struct ds_context { 202struct ds_context {
211 /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */ 203 /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */
212 unsigned char *ds; 204 unsigned char ds[MAX_SIZEOF_DS];
213 /* the owner of the BTS and PEBS configuration, respectively */ 205 /* the owner of the BTS and PEBS configuration, respectively */
214 struct task_struct *owner[2]; 206 struct ds_tracer *owner[2];
215 /* buffer overflow notification function for BTS and PEBS */
216 ds_ovfl_callback_t callback[2];
217 /* the original buffer address */
218 void *buffer[2];
219 /* the number of allocated pages for on-request allocated buffers */
220 unsigned int pages[2];
221 /* use count */ 207 /* use count */
222 unsigned long count; 208 unsigned long count;
223 /* a pointer to the context location inside the thread_struct 209 /* a pointer to the context location inside the thread_struct
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 754a3e082f94..7e61b4ceb9a4 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -28,7 +28,7 @@ struct dyn_arch_ftrace {
28#endif /* __ASSEMBLY__ */ 28#endif /* __ASSEMBLY__ */
29#endif /* CONFIG_FUNCTION_TRACER */ 29#endif /* CONFIG_FUNCTION_TRACER */
30 30
31#ifdef CONFIG_FUNCTION_RET_TRACER 31#ifdef CONFIG_FUNCTION_GRAPH_TRACER
32 32
33#ifndef __ASSEMBLY__ 33#ifndef __ASSEMBLY__
34 34
@@ -51,6 +51,6 @@ struct ftrace_ret_stack {
51extern void return_to_handler(void); 51extern void return_to_handler(void);
52 52
53#endif /* __ASSEMBLY__ */ 53#endif /* __ASSEMBLY__ */
54#endif /* CONFIG_FUNCTION_RET_TRACER */ 54#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
55 55
56#endif /* _ASM_X86_FTRACE_H */ 56#endif /* _ASM_X86_FTRACE_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index af2bc36ca1c4..1cad9318d217 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -14,11 +14,6 @@ CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
14CFLAGS_REMOVE_ftrace.o = -pg 14CFLAGS_REMOVE_ftrace.o = -pg
15endif 15endif
16 16
17ifdef CONFIG_FUNCTION_RET_TRACER
18# Don't trace __switch_to() but let it for function tracer
19CFLAGS_REMOVE_process_32.o = -pg
20endif
21
22# 17#
23# vsyscalls (which work on the user stack) should have 18# vsyscalls (which work on the user stack) should have
24# no stack-protector checks: 19# no stack-protector checks:
@@ -30,7 +25,7 @@ CFLAGS_tsc.o := $(nostackp)
30 25
31obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o 26obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o
32obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o 27obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
33obj-y += time_$(BITS).o ioport.o ldt.o 28obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o
34obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o 29obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o
35obj-$(CONFIG_X86_VISWS) += visws_quirks.o 30obj-$(CONFIG_X86_VISWS) += visws_quirks.o
36obj-$(CONFIG_X86_32) += probe_roms_32.o 31obj-$(CONFIG_X86_32) += probe_roms_32.o
@@ -70,7 +65,7 @@ obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
70obj-$(CONFIG_X86_IO_APIC) += io_apic.o 65obj-$(CONFIG_X86_IO_APIC) += io_apic.o
71obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o 66obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
72obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o 67obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
73obj-$(CONFIG_FUNCTION_RET_TRACER) += ftrace.o 68obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
74obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o 69obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
75obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o 70obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
76obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o 71obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 8e48c5d4467d..88ea02dcb622 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -33,6 +33,7 @@
33#include <linux/cpufreq.h> 33#include <linux/cpufreq.h>
34#include <linux/compiler.h> 34#include <linux/compiler.h>
35#include <linux/dmi.h> 35#include <linux/dmi.h>
36#include <linux/ftrace.h>
36 37
37#include <linux/acpi.h> 38#include <linux/acpi.h>
38#include <acpi/processor.h> 39#include <acpi/processor.h>
@@ -391,6 +392,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
391 unsigned int next_perf_state = 0; /* Index into perf table */ 392 unsigned int next_perf_state = 0; /* Index into perf table */
392 unsigned int i; 393 unsigned int i;
393 int result = 0; 394 int result = 0;
395 struct power_trace it;
394 396
395 dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu); 397 dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);
396 398
@@ -427,6 +429,8 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
427 } 429 }
428 } 430 }
429 431
432 trace_power_mark(&it, POWER_PSTATE, next_perf_state);
433
430 switch (data->cpu_feature) { 434 switch (data->cpu_feature) {
431 case SYSTEM_INTEL_MSR_CAPABLE: 435 case SYSTEM_INTEL_MSR_CAPABLE:
432 cmd.type = SYSTEM_INTEL_MSR_CAPABLE; 436 cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index cce0b6118d55..816f27f289b1 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -307,12 +307,11 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
307 set_cpu_cap(c, X86_FEATURE_P4); 307 set_cpu_cap(c, X86_FEATURE_P4);
308 if (c->x86 == 6) 308 if (c->x86 == 6)
309 set_cpu_cap(c, X86_FEATURE_P3); 309 set_cpu_cap(c, X86_FEATURE_P3);
310#endif
310 311
311 if (cpu_has_bts) 312 if (cpu_has_bts)
312 ptrace_bts_init_intel(c); 313 ptrace_bts_init_intel(c);
313 314
314#endif
315
316 detect_extended_topology(c); 315 detect_extended_topology(c);
317 if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { 316 if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) {
318 /* 317 /*
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index a2d1176c38ee..19a8c2c0389f 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -7,13 +7,12 @@
7 * 7 *
8 * It manages: 8 * It manages:
9 * - per-thread and per-cpu allocation of BTS and PEBS 9 * - per-thread and per-cpu allocation of BTS and PEBS
10 * - buffer memory allocation (optional) 10 * - buffer overflow handling (to be done)
11 * - buffer overflow handling
12 * - buffer access 11 * - buffer access
13 * 12 *
14 * It assumes: 13 * It assumes:
15 * - get_task_struct on all parameter tasks 14 * - get_task_struct on all traced tasks
16 * - current is allowed to trace parameter tasks 15 * - current is allowed to trace tasks
17 * 16 *
18 * 17 *
19 * Copyright (C) 2007-2008 Intel Corporation. 18 * Copyright (C) 2007-2008 Intel Corporation.
@@ -28,6 +27,7 @@
28#include <linux/slab.h> 27#include <linux/slab.h>
29#include <linux/sched.h> 28#include <linux/sched.h>
30#include <linux/mm.h> 29#include <linux/mm.h>
30#include <linux/kernel.h>
31 31
32 32
33/* 33/*
@@ -44,6 +44,33 @@ struct ds_configuration {
44}; 44};
45static struct ds_configuration ds_cfg; 45static struct ds_configuration ds_cfg;
46 46
47/*
48 * A BTS or PEBS tracer.
49 *
50 * This holds the configuration of the tracer and serves as a handle
51 * to identify tracers.
52 */
53struct ds_tracer {
54 /* the DS context (partially) owned by this tracer */
55 struct ds_context *context;
56 /* the buffer provided on ds_request() and its size in bytes */
57 void *buffer;
58 size_t size;
59};
60
61struct bts_tracer {
62 /* the common DS part */
63 struct ds_tracer ds;
64 /* buffer overflow notification function */
65 bts_ovfl_callback_t ovfl;
66};
67
68struct pebs_tracer {
69 /* the common DS part */
70 struct ds_tracer ds;
71 /* buffer overflow notification function */
72 pebs_ovfl_callback_t ovfl;
73};
47 74
48/* 75/*
49 * Debug Store (DS) save area configuration (see Intel64 and IA32 76 * Debug Store (DS) save area configuration (see Intel64 and IA32
@@ -107,34 +134,13 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
107 (*(unsigned long *)base) = value; 134 (*(unsigned long *)base) = value;
108} 135}
109 136
137#define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */
110 138
111/*
112 * Locking is done only for allocating BTS or PEBS resources and for
113 * guarding context and buffer memory allocation.
114 *
115 * Most functions require the current task to own the ds context part
116 * they are going to access. All the locking is done when validating
117 * access to the context.
118 */
119static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock);
120 139
121/* 140/*
122 * Validate that the current task is allowed to access the BTS/PEBS 141 * Locking is done only for allocating BTS or PEBS resources.
123 * buffer of the parameter task.
124 *
125 * Returns 0, if access is granted; -Eerrno, otherwise.
126 */ 142 */
127static inline int ds_validate_access(struct ds_context *context, 143static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock);
128 enum ds_qualifier qual)
129{
130 if (!context)
131 return -EPERM;
132
133 if (context->owner[qual] == current)
134 return 0;
135
136 return -EPERM;
137}
138 144
139 145
140/* 146/*
@@ -183,51 +189,13 @@ static inline int check_tracer(struct task_struct *task)
183 * 189 *
184 * Contexts are use-counted. They are allocated on first access and 190 * Contexts are use-counted. They are allocated on first access and
185 * deallocated when the last user puts the context. 191 * deallocated when the last user puts the context.
186 *
187 * We distinguish between an allocating and a non-allocating get of a
188 * context:
189 * - the allocating get is used for requesting BTS/PEBS resources. It
190 * requires the caller to hold the global ds_lock.
191 * - the non-allocating get is used for all other cases. A
192 * non-existing context indicates an error. It acquires and releases
193 * the ds_lock itself for obtaining the context.
194 *
195 * A context and its DS configuration are allocated and deallocated
196 * together. A context always has a DS configuration of the
197 * appropriate size.
198 */ 192 */
199static DEFINE_PER_CPU(struct ds_context *, system_context); 193static DEFINE_PER_CPU(struct ds_context *, system_context);
200 194
201#define this_system_context per_cpu(system_context, smp_processor_id()) 195#define this_system_context per_cpu(system_context, smp_processor_id())
202 196
203/*
204 * Returns the pointer to the parameter task's context or to the
205 * system-wide context, if task is NULL.
206 *
207 * Increases the use count of the returned context, if not NULL.
208 */
209static inline struct ds_context *ds_get_context(struct task_struct *task) 197static inline struct ds_context *ds_get_context(struct task_struct *task)
210{ 198{
211 struct ds_context *context;
212 unsigned long irq;
213
214 spin_lock_irqsave(&ds_lock, irq);
215
216 context = (task ? task->thread.ds_ctx : this_system_context);
217 if (context)
218 context->count++;
219
220 spin_unlock_irqrestore(&ds_lock, irq);
221
222 return context;
223}
224
225/*
226 * Same as ds_get_context, but allocates the context and it's DS
227 * structure, if necessary; returns NULL; if out of memory.
228 */
229static inline struct ds_context *ds_alloc_context(struct task_struct *task)
230{
231 struct ds_context **p_context = 199 struct ds_context **p_context =
232 (task ? &task->thread.ds_ctx : &this_system_context); 200 (task ? &task->thread.ds_ctx : &this_system_context);
233 struct ds_context *context = *p_context; 201 struct ds_context *context = *p_context;
@@ -238,16 +206,9 @@ static inline struct ds_context *ds_alloc_context(struct task_struct *task)
238 if (!context) 206 if (!context)
239 return NULL; 207 return NULL;
240 208
241 context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL);
242 if (!context->ds) {
243 kfree(context);
244 return NULL;
245 }
246
247 spin_lock_irqsave(&ds_lock, irq); 209 spin_lock_irqsave(&ds_lock, irq);
248 210
249 if (*p_context) { 211 if (*p_context) {
250 kfree(context->ds);
251 kfree(context); 212 kfree(context);
252 213
253 context = *p_context; 214 context = *p_context;
@@ -272,10 +233,6 @@ static inline struct ds_context *ds_alloc_context(struct task_struct *task)
272 return context; 233 return context;
273} 234}
274 235
275/*
276 * Decreases the use count of the parameter context, if not NULL.
277 * Deallocates the context, if the use count reaches zero.
278 */
279static inline void ds_put_context(struct ds_context *context) 236static inline void ds_put_context(struct ds_context *context)
280{ 237{
281 unsigned long irq; 238 unsigned long irq;
@@ -296,13 +253,6 @@ static inline void ds_put_context(struct ds_context *context)
296 if (!context->task || (context->task == current)) 253 if (!context->task || (context->task == current))
297 wrmsrl(MSR_IA32_DS_AREA, 0); 254 wrmsrl(MSR_IA32_DS_AREA, 0);
298 255
299 put_tracer(context->task);
300
301 /* free any leftover buffers from tracers that did not
302 * deallocate them properly. */
303 kfree(context->buffer[ds_bts]);
304 kfree(context->buffer[ds_pebs]);
305 kfree(context->ds);
306 kfree(context); 256 kfree(context);
307 out: 257 out:
308 spin_unlock_irqrestore(&ds_lock, irq); 258 spin_unlock_irqrestore(&ds_lock, irq);
@@ -312,345 +262,342 @@ static inline void ds_put_context(struct ds_context *context)
312/* 262/*
313 * Handle a buffer overflow 263 * Handle a buffer overflow
314 * 264 *
315 * task: the task whose buffers are overflowing;
316 * NULL for a buffer overflow on the current cpu
317 * context: the ds context 265 * context: the ds context
318 * qual: the buffer type 266 * qual: the buffer type
319 */ 267 */
320static void ds_overflow(struct task_struct *task, struct ds_context *context, 268static void ds_overflow(struct ds_context *context, enum ds_qualifier qual)
321 enum ds_qualifier qual) 269{
322{ 270 switch (qual) {
323 if (!context) 271 case ds_bts: {
324 return; 272 struct bts_tracer *tracer =
325 273 container_of(context->owner[qual],
326 if (context->callback[qual]) 274 struct bts_tracer, ds);
327 (*context->callback[qual])(task); 275 if (tracer->ovfl)
328 276 tracer->ovfl(tracer);
329 /* todo: do some more overflow handling */ 277 }
278 break;
279 case ds_pebs: {
280 struct pebs_tracer *tracer =
281 container_of(context->owner[qual],
282 struct pebs_tracer, ds);
283 if (tracer->ovfl)
284 tracer->ovfl(tracer);
285 }
286 break;
287 }
330} 288}
331 289
332 290
333/* 291static void ds_install_ds_config(struct ds_context *context,
334 * Allocate a non-pageable buffer of the parameter size. 292 enum ds_qualifier qual,
335 * Checks the memory and the locked memory rlimit. 293 void *base, size_t size, size_t ith)
336 *
337 * Returns the buffer, if successful;
338 * NULL, if out of memory or rlimit exceeded.
339 *
340 * size: the requested buffer size in bytes
341 * pages (out): if not NULL, contains the number of pages reserved
342 */
343static inline void *ds_allocate_buffer(size_t size, unsigned int *pages)
344{ 294{
345 unsigned long rlim, vm, pgsz; 295 unsigned long buffer, adj;
346 void *buffer;
347
348 pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
349
350 rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
351 vm = current->mm->total_vm + pgsz;
352 if (rlim < vm)
353 return NULL;
354 296
355 rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; 297 /* adjust the buffer address and size to meet alignment
356 vm = current->mm->locked_vm + pgsz; 298 * constraints:
357 if (rlim < vm) 299 * - buffer is double-word aligned
358 return NULL; 300 * - size is multiple of record size
301 *
302 * We checked the size at the very beginning; we have enough
303 * space to do the adjustment.
304 */
305 buffer = (unsigned long)base;
359 306
360 buffer = kzalloc(size, GFP_KERNEL); 307 adj = ALIGN(buffer, DS_ALIGNMENT) - buffer;
361 if (!buffer) 308 buffer += adj;
362 return NULL; 309 size -= adj;
363 310
364 current->mm->total_vm += pgsz; 311 size /= ds_cfg.sizeof_rec[qual];
365 current->mm->locked_vm += pgsz; 312 size *= ds_cfg.sizeof_rec[qual];
366 313
367 if (pages) 314 ds_set(context->ds, qual, ds_buffer_base, buffer);
368 *pages = pgsz; 315 ds_set(context->ds, qual, ds_index, buffer);
316 ds_set(context->ds, qual, ds_absolute_maximum, buffer + size);
369 317
370 return buffer; 318 /* The value for 'no threshold' is -1, which will set the
319 * threshold outside of the buffer, just like we want it.
320 */
321 ds_set(context->ds, qual,
322 ds_interrupt_threshold, buffer + size - ith);
371} 323}
372 324
373static int ds_request(struct task_struct *task, void *base, size_t size, 325static int ds_request(struct ds_tracer *tracer, enum ds_qualifier qual,
374 ds_ovfl_callback_t ovfl, enum ds_qualifier qual) 326 struct task_struct *task,
327 void *base, size_t size, size_t th)
375{ 328{
376 struct ds_context *context; 329 struct ds_context *context;
377 unsigned long buffer, adj;
378 const unsigned long alignment = (1 << 3);
379 unsigned long irq; 330 unsigned long irq;
380 int error = 0; 331 int error;
381 332
333 error = -EOPNOTSUPP;
382 if (!ds_cfg.sizeof_ds) 334 if (!ds_cfg.sizeof_ds)
383 return -EOPNOTSUPP; 335 goto out;
336
337 error = -EINVAL;
338 if (!base)
339 goto out;
384 340
385 /* we require some space to do alignment adjustments below */ 341 /* we require some space to do alignment adjustments below */
386 if (size < (alignment + ds_cfg.sizeof_rec[qual])) 342 error = -EINVAL;
387 return -EINVAL; 343 if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual]))
344 goto out;
388 345
389 /* buffer overflow notification is not yet implemented */ 346 if (th != (size_t)-1) {
390 if (ovfl) 347 th *= ds_cfg.sizeof_rec[qual];
391 return -EOPNOTSUPP; 348
349 error = -EINVAL;
350 if (size <= th)
351 goto out;
352 }
392 353
354 tracer->buffer = base;
355 tracer->size = size;
393 356
394 context = ds_alloc_context(task); 357 error = -ENOMEM;
358 context = ds_get_context(task);
395 if (!context) 359 if (!context)
396 return -ENOMEM; 360 goto out;
361 tracer->context = context;
362
397 363
398 spin_lock_irqsave(&ds_lock, irq); 364 spin_lock_irqsave(&ds_lock, irq);
399 365
400 error = -EPERM; 366 error = -EPERM;
401 if (!check_tracer(task)) 367 if (!check_tracer(task))
402 goto out_unlock; 368 goto out_unlock;
403
404 get_tracer(task); 369 get_tracer(task);
405 370
406 error = -EALREADY;
407 if (context->owner[qual] == current)
408 goto out_put_tracer;
409 error = -EPERM; 371 error = -EPERM;
410 if (context->owner[qual] != NULL) 372 if (context->owner[qual])
411 goto out_put_tracer; 373 goto out_put_tracer;
412 context->owner[qual] = current; 374 context->owner[qual] = tracer;
413 375
414 spin_unlock_irqrestore(&ds_lock, irq); 376 spin_unlock_irqrestore(&ds_lock, irq);
415 377
416 378
417 error = -ENOMEM; 379 ds_install_ds_config(context, qual, base, size, th);
418 if (!base) {
419 base = ds_allocate_buffer(size, &context->pages[qual]);
420 if (!base)
421 goto out_release;
422
423 context->buffer[qual] = base;
424 }
425 error = 0;
426 380
427 context->callback[qual] = ovfl; 381 return 0;
428
429 /* adjust the buffer address and size to meet alignment
430 * constraints:
431 * - buffer is double-word aligned
432 * - size is multiple of record size
433 *
434 * We checked the size at the very beginning; we have enough
435 * space to do the adjustment.
436 */
437 buffer = (unsigned long)base;
438
439 adj = ALIGN(buffer, alignment) - buffer;
440 buffer += adj;
441 size -= adj;
442
443 size /= ds_cfg.sizeof_rec[qual];
444 size *= ds_cfg.sizeof_rec[qual];
445
446 ds_set(context->ds, qual, ds_buffer_base, buffer);
447 ds_set(context->ds, qual, ds_index, buffer);
448 ds_set(context->ds, qual, ds_absolute_maximum, buffer + size);
449
450 if (ovfl) {
451 /* todo: select a suitable interrupt threshold */
452 } else
453 ds_set(context->ds, qual,
454 ds_interrupt_threshold, buffer + size + 1);
455
456 /* we keep the context until ds_release */
457 return error;
458
459 out_release:
460 context->owner[qual] = NULL;
461 ds_put_context(context);
462 put_tracer(task);
463 return error;
464 382
465 out_put_tracer: 383 out_put_tracer:
466 spin_unlock_irqrestore(&ds_lock, irq);
467 ds_put_context(context);
468 put_tracer(task); 384 put_tracer(task);
469 return error;
470
471 out_unlock: 385 out_unlock:
472 spin_unlock_irqrestore(&ds_lock, irq); 386 spin_unlock_irqrestore(&ds_lock, irq);
473 ds_put_context(context); 387 ds_put_context(context);
388 tracer->context = NULL;
389 out:
474 return error; 390 return error;
475} 391}
476 392
477int ds_request_bts(struct task_struct *task, void *base, size_t size, 393struct bts_tracer *ds_request_bts(struct task_struct *task,
478 ds_ovfl_callback_t ovfl) 394 void *base, size_t size,
395 bts_ovfl_callback_t ovfl, size_t th)
479{ 396{
480 return ds_request(task, base, size, ovfl, ds_bts); 397 struct bts_tracer *tracer;
481} 398 int error;
482 399
483int ds_request_pebs(struct task_struct *task, void *base, size_t size, 400 /* buffer overflow notification is not yet implemented */
484 ds_ovfl_callback_t ovfl) 401 error = -EOPNOTSUPP;
485{ 402 if (ovfl)
486 return ds_request(task, base, size, ovfl, ds_pebs); 403 goto out;
404
405 error = -ENOMEM;
406 tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
407 if (!tracer)
408 goto out;
409 tracer->ovfl = ovfl;
410
411 error = ds_request(&tracer->ds, ds_bts, task, base, size, th);
412 if (error < 0)
413 goto out_tracer;
414
415 return tracer;
416
417 out_tracer:
418 kfree(tracer);
419 out:
420 return ERR_PTR(error);
487} 421}
488 422
489static int ds_release(struct task_struct *task, enum ds_qualifier qual) 423struct pebs_tracer *ds_request_pebs(struct task_struct *task,
424 void *base, size_t size,
425 pebs_ovfl_callback_t ovfl, size_t th)
490{ 426{
491 struct ds_context *context; 427 struct pebs_tracer *tracer;
492 int error; 428 int error;
493 429
494 context = ds_get_context(task); 430 /* buffer overflow notification is not yet implemented */
495 error = ds_validate_access(context, qual); 431 error = -EOPNOTSUPP;
496 if (error < 0) 432 if (ovfl)
497 goto out; 433 goto out;
498 434
499 kfree(context->buffer[qual]); 435 error = -ENOMEM;
500 context->buffer[qual] = NULL; 436 tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
437 if (!tracer)
438 goto out;
439 tracer->ovfl = ovfl;
501 440
502 current->mm->total_vm -= context->pages[qual]; 441 error = ds_request(&tracer->ds, ds_pebs, task, base, size, th);
503 current->mm->locked_vm -= context->pages[qual]; 442 if (error < 0)
504 context->pages[qual] = 0; 443 goto out_tracer;
505 context->owner[qual] = NULL;
506 444
507 /* 445 return tracer;
508 * we put the context twice: 446
509 * once for the ds_get_context 447 out_tracer:
510 * once for the corresponding ds_request 448 kfree(tracer);
511 */
512 ds_put_context(context);
513 out: 449 out:
514 ds_put_context(context); 450 return ERR_PTR(error);
515 return error;
516} 451}
517 452
518int ds_release_bts(struct task_struct *task) 453static void ds_release(struct ds_tracer *tracer, enum ds_qualifier qual)
519{ 454{
520 return ds_release(task, ds_bts); 455 BUG_ON(tracer->context->owner[qual] != tracer);
456 tracer->context->owner[qual] = NULL;
457
458 put_tracer(tracer->context->task);
459 ds_put_context(tracer->context);
521} 460}
522 461
523int ds_release_pebs(struct task_struct *task) 462int ds_release_bts(struct bts_tracer *tracer)
524{ 463{
525 return ds_release(task, ds_pebs); 464 if (!tracer)
465 return -EINVAL;
466
467 ds_release(&tracer->ds, ds_bts);
468 kfree(tracer);
469
470 return 0;
526} 471}
527 472
528static int ds_get_index(struct task_struct *task, size_t *pos, 473int ds_release_pebs(struct pebs_tracer *tracer)
529 enum ds_qualifier qual)
530{ 474{
531 struct ds_context *context; 475 if (!tracer)
532 unsigned long base, index; 476 return -EINVAL;
533 int error;
534 477
535 context = ds_get_context(task); 478 ds_release(&tracer->ds, ds_pebs);
536 error = ds_validate_access(context, qual); 479 kfree(tracer);
537 if (error < 0) 480
538 goto out; 481 return 0;
482}
483
484static size_t ds_get_index(struct ds_context *context, enum ds_qualifier qual)
485{
486 unsigned long base, index;
539 487
540 base = ds_get(context->ds, qual, ds_buffer_base); 488 base = ds_get(context->ds, qual, ds_buffer_base);
541 index = ds_get(context->ds, qual, ds_index); 489 index = ds_get(context->ds, qual, ds_index);
542 490
543 error = ((index - base) / ds_cfg.sizeof_rec[qual]); 491 return (index - base) / ds_cfg.sizeof_rec[qual];
544 if (pos)
545 *pos = error;
546 out:
547 ds_put_context(context);
548 return error;
549} 492}
550 493
551int ds_get_bts_index(struct task_struct *task, size_t *pos) 494int ds_get_bts_index(struct bts_tracer *tracer, size_t *pos)
552{ 495{
553 return ds_get_index(task, pos, ds_bts); 496 if (!tracer)
497 return -EINVAL;
498
499 if (!pos)
500 return -EINVAL;
501
502 *pos = ds_get_index(tracer->ds.context, ds_bts);
503
504 return 0;
554} 505}
555 506
556int ds_get_pebs_index(struct task_struct *task, size_t *pos) 507int ds_get_pebs_index(struct pebs_tracer *tracer, size_t *pos)
557{ 508{
558 return ds_get_index(task, pos, ds_pebs); 509 if (!tracer)
510 return -EINVAL;
511
512 if (!pos)
513 return -EINVAL;
514
515 *pos = ds_get_index(tracer->ds.context, ds_pebs);
516
517 return 0;
559} 518}
560 519
561static int ds_get_end(struct task_struct *task, size_t *pos, 520static size_t ds_get_end(struct ds_context *context, enum ds_qualifier qual)
562 enum ds_qualifier qual)
563{ 521{
564 struct ds_context *context; 522 unsigned long base, max;
565 unsigned long base, end;
566 int error;
567
568 context = ds_get_context(task);
569 error = ds_validate_access(context, qual);
570 if (error < 0)
571 goto out;
572 523
573 base = ds_get(context->ds, qual, ds_buffer_base); 524 base = ds_get(context->ds, qual, ds_buffer_base);
574 end = ds_get(context->ds, qual, ds_absolute_maximum); 525 max = ds_get(context->ds, qual, ds_absolute_maximum);
575 526
576 error = ((end - base) / ds_cfg.sizeof_rec[qual]); 527 return (max - base) / ds_cfg.sizeof_rec[qual];
577 if (pos)
578 *pos = error;
579 out:
580 ds_put_context(context);
581 return error;
582} 528}
583 529
584int ds_get_bts_end(struct task_struct *task, size_t *pos) 530int ds_get_bts_end(struct bts_tracer *tracer, size_t *pos)
585{ 531{
586 return ds_get_end(task, pos, ds_bts); 532 if (!tracer)
533 return -EINVAL;
534
535 if (!pos)
536 return -EINVAL;
537
538 *pos = ds_get_end(tracer->ds.context, ds_bts);
539
540 return 0;
587} 541}
588 542
589int ds_get_pebs_end(struct task_struct *task, size_t *pos) 543int ds_get_pebs_end(struct pebs_tracer *tracer, size_t *pos)
590{ 544{
591 return ds_get_end(task, pos, ds_pebs); 545 if (!tracer)
546 return -EINVAL;
547
548 if (!pos)
549 return -EINVAL;
550
551 *pos = ds_get_end(tracer->ds.context, ds_pebs);
552
553 return 0;
592} 554}
593 555
594static int ds_access(struct task_struct *task, size_t index, 556static int ds_access(struct ds_context *context, enum ds_qualifier qual,
595 const void **record, enum ds_qualifier qual) 557 size_t index, const void **record)
596{ 558{
597 struct ds_context *context;
598 unsigned long base, idx; 559 unsigned long base, idx;
599 int error;
600 560
601 if (!record) 561 if (!record)
602 return -EINVAL; 562 return -EINVAL;
603 563
604 context = ds_get_context(task);
605 error = ds_validate_access(context, qual);
606 if (error < 0)
607 goto out;
608
609 base = ds_get(context->ds, qual, ds_buffer_base); 564 base = ds_get(context->ds, qual, ds_buffer_base);
610 idx = base + (index * ds_cfg.sizeof_rec[qual]); 565 idx = base + (index * ds_cfg.sizeof_rec[qual]);
611 566
612 error = -EINVAL;
613 if (idx > ds_get(context->ds, qual, ds_absolute_maximum)) 567 if (idx > ds_get(context->ds, qual, ds_absolute_maximum))
614 goto out; 568 return -EINVAL;
615 569
616 *record = (const void *)idx; 570 *record = (const void *)idx;
617 error = ds_cfg.sizeof_rec[qual]; 571
618 out: 572 return ds_cfg.sizeof_rec[qual];
619 ds_put_context(context);
620 return error;
621} 573}
622 574
623int ds_access_bts(struct task_struct *task, size_t index, const void **record) 575int ds_access_bts(struct bts_tracer *tracer, size_t index,
576 const void **record)
624{ 577{
625 return ds_access(task, index, record, ds_bts); 578 if (!tracer)
579 return -EINVAL;
580
581 return ds_access(tracer->ds.context, ds_bts, index, record);
626} 582}
627 583
628int ds_access_pebs(struct task_struct *task, size_t index, const void **record) 584int ds_access_pebs(struct pebs_tracer *tracer, size_t index,
585 const void **record)
629{ 586{
630 return ds_access(task, index, record, ds_pebs); 587 if (!tracer)
588 return -EINVAL;
589
590 return ds_access(tracer->ds.context, ds_pebs, index, record);
631} 591}
632 592
633static int ds_write(struct task_struct *task, const void *record, size_t size, 593static int ds_write(struct ds_context *context, enum ds_qualifier qual,
634 enum ds_qualifier qual, int force) 594 const void *record, size_t size)
635{ 595{
636 struct ds_context *context; 596 int bytes_written = 0;
637 int error;
638 597
639 if (!record) 598 if (!record)
640 return -EINVAL; 599 return -EINVAL;
641 600
642 error = -EPERM;
643 context = ds_get_context(task);
644 if (!context)
645 goto out;
646
647 if (!force) {
648 error = ds_validate_access(context, qual);
649 if (error < 0)
650 goto out;
651 }
652
653 error = 0;
654 while (size) { 601 while (size) {
655 unsigned long base, index, end, write_end, int_th; 602 unsigned long base, index, end, write_end, int_th;
656 unsigned long write_size, adj_write_size; 603 unsigned long write_size, adj_write_size;
@@ -678,14 +625,14 @@ static int ds_write(struct task_struct *task, const void *record, size_t size,
678 write_end = end; 625 write_end = end;
679 626
680 if (write_end <= index) 627 if (write_end <= index)
681 goto out; 628 break;
682 629
683 write_size = min((unsigned long) size, write_end - index); 630 write_size = min((unsigned long) size, write_end - index);
684 memcpy((void *)index, record, write_size); 631 memcpy((void *)index, record, write_size);
685 632
686 record = (const char *)record + write_size; 633 record = (const char *)record + write_size;
687 size -= write_size; 634 size -= write_size;
688 error += write_size; 635 bytes_written += write_size;
689 636
690 adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; 637 adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
691 adj_write_size *= ds_cfg.sizeof_rec[qual]; 638 adj_write_size *= ds_cfg.sizeof_rec[qual];
@@ -700,47 +647,32 @@ static int ds_write(struct task_struct *task, const void *record, size_t size,
700 ds_set(context->ds, qual, ds_index, index); 647 ds_set(context->ds, qual, ds_index, index);
701 648
702 if (index >= int_th) 649 if (index >= int_th)
703 ds_overflow(task, context, qual); 650 ds_overflow(context, qual);
704 } 651 }
705 652
706 out: 653 return bytes_written;
707 ds_put_context(context);
708 return error;
709} 654}
710 655
711int ds_write_bts(struct task_struct *task, const void *record, size_t size) 656int ds_write_bts(struct bts_tracer *tracer, const void *record, size_t size)
712{ 657{
713 return ds_write(task, record, size, ds_bts, /* force = */ 0); 658 if (!tracer)
714} 659 return -EINVAL;
715 660
716int ds_write_pebs(struct task_struct *task, const void *record, size_t size) 661 return ds_write(tracer->ds.context, ds_bts, record, size);
717{
718 return ds_write(task, record, size, ds_pebs, /* force = */ 0);
719} 662}
720 663
721int ds_unchecked_write_bts(struct task_struct *task, 664int ds_write_pebs(struct pebs_tracer *tracer, const void *record, size_t size)
722 const void *record, size_t size)
723{ 665{
724 return ds_write(task, record, size, ds_bts, /* force = */ 1); 666 if (!tracer)
725} 667 return -EINVAL;
726 668
727int ds_unchecked_write_pebs(struct task_struct *task, 669 return ds_write(tracer->ds.context, ds_pebs, record, size);
728 const void *record, size_t size)
729{
730 return ds_write(task, record, size, ds_pebs, /* force = */ 1);
731} 670}
732 671
733static int ds_reset_or_clear(struct task_struct *task, 672static void ds_reset_or_clear(struct ds_context *context,
734 enum ds_qualifier qual, int clear) 673 enum ds_qualifier qual, int clear)
735{ 674{
736 struct ds_context *context;
737 unsigned long base, end; 675 unsigned long base, end;
738 int error;
739
740 context = ds_get_context(task);
741 error = ds_validate_access(context, qual);
742 if (error < 0)
743 goto out;
744 676
745 base = ds_get(context->ds, qual, ds_buffer_base); 677 base = ds_get(context->ds, qual, ds_buffer_base);
746 end = ds_get(context->ds, qual, ds_absolute_maximum); 678 end = ds_get(context->ds, qual, ds_absolute_maximum);
@@ -749,70 +681,69 @@ static int ds_reset_or_clear(struct task_struct *task,
749 memset((void *)base, 0, end - base); 681 memset((void *)base, 0, end - base);
750 682
751 ds_set(context->ds, qual, ds_index, base); 683 ds_set(context->ds, qual, ds_index, base);
752
753 error = 0;
754 out:
755 ds_put_context(context);
756 return error;
757} 684}
758 685
759int ds_reset_bts(struct task_struct *task) 686int ds_reset_bts(struct bts_tracer *tracer)
760{ 687{
761 return ds_reset_or_clear(task, ds_bts, /* clear = */ 0); 688 if (!tracer)
689 return -EINVAL;
690
691 ds_reset_or_clear(tracer->ds.context, ds_bts, /* clear = */ 0);
692
693 return 0;
762} 694}
763 695
764int ds_reset_pebs(struct task_struct *task) 696int ds_reset_pebs(struct pebs_tracer *tracer)
765{ 697{
766 return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0); 698 if (!tracer)
699 return -EINVAL;
700
701 ds_reset_or_clear(tracer->ds.context, ds_pebs, /* clear = */ 0);
702
703 return 0;
767} 704}
768 705
769int ds_clear_bts(struct task_struct *task) 706int ds_clear_bts(struct bts_tracer *tracer)
770{ 707{
771 return ds_reset_or_clear(task, ds_bts, /* clear = */ 1); 708 if (!tracer)
709 return -EINVAL;
710
711 ds_reset_or_clear(tracer->ds.context, ds_bts, /* clear = */ 1);
712
713 return 0;
772} 714}
773 715
774int ds_clear_pebs(struct task_struct *task) 716int ds_clear_pebs(struct pebs_tracer *tracer)
775{ 717{
776 return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1); 718 if (!tracer)
719 return -EINVAL;
720
721 ds_reset_or_clear(tracer->ds.context, ds_pebs, /* clear = */ 1);
722
723 return 0;
777} 724}
778 725
779int ds_get_pebs_reset(struct task_struct *task, u64 *value) 726int ds_get_pebs_reset(struct pebs_tracer *tracer, u64 *value)
780{ 727{
781 struct ds_context *context; 728 if (!tracer)
782 int error; 729 return -EINVAL;
783 730
784 if (!value) 731 if (!value)
785 return -EINVAL; 732 return -EINVAL;
786 733
787 context = ds_get_context(task); 734 *value = *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8));
788 error = ds_validate_access(context, ds_pebs);
789 if (error < 0)
790 goto out;
791 735
792 *value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)); 736 return 0;
793
794 error = 0;
795 out:
796 ds_put_context(context);
797 return error;
798} 737}
799 738
800int ds_set_pebs_reset(struct task_struct *task, u64 value) 739int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value)
801{ 740{
802 struct ds_context *context; 741 if (!tracer)
803 int error; 742 return -EINVAL;
804
805 context = ds_get_context(task);
806 error = ds_validate_access(context, ds_pebs);
807 if (error < 0)
808 goto out;
809 743
810 *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value; 744 *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value;
811 745
812 error = 0; 746 return 0;
813 out:
814 ds_put_context(context);
815 return error;
816} 747}
817 748
818static const struct ds_configuration ds_cfg_var = { 749static const struct ds_configuration ds_cfg_var = {
@@ -840,6 +771,10 @@ static inline void
840ds_configure(const struct ds_configuration *cfg) 771ds_configure(const struct ds_configuration *cfg)
841{ 772{
842 ds_cfg = *cfg; 773 ds_cfg = *cfg;
774
775 printk(KERN_INFO "DS available\n");
776
777 BUG_ON(MAX_SIZEOF_DS < ds_cfg.sizeof_ds);
843} 778}
844 779
845void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) 780void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
@@ -847,17 +782,16 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
847 switch (c->x86) { 782 switch (c->x86) {
848 case 0x6: 783 case 0x6:
849 switch (c->x86_model) { 784 switch (c->x86_model) {
785 case 0 ... 0xC:
786 /* sorry, don't know about them */
787 break;
850 case 0xD: 788 case 0xD:
851 case 0xE: /* Pentium M */ 789 case 0xE: /* Pentium M */
852 ds_configure(&ds_cfg_var); 790 ds_configure(&ds_cfg_var);
853 break; 791 break;
854 case 0xF: /* Core2 */ 792 default: /* Core2, Atom, ... */
855 case 0x1C: /* Atom */
856 ds_configure(&ds_cfg_64); 793 ds_configure(&ds_cfg_64);
857 break; 794 break;
858 default:
859 /* sorry, don't know about them */
860 break;
861 } 795 }
862 break; 796 break;
863 case 0xF: 797 case 0xF:
@@ -884,6 +818,8 @@ void ds_free(struct ds_context *context)
884 * is dying. There should not be any user of that context left 818 * is dying. There should not be any user of that context left
885 * to disturb us, anymore. */ 819 * to disturb us, anymore. */
886 unsigned long leftovers = context->count; 820 unsigned long leftovers = context->count;
887 while (leftovers--) 821 while (leftovers--) {
822 put_tracer(context->task);
888 ds_put_context(context); 823 ds_put_context(context);
824 }
889} 825}
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
new file mode 100644
index 000000000000..6b1f6f6f8661
--- /dev/null
+++ b/arch/x86/kernel/dumpstack.c
@@ -0,0 +1,351 @@
1/*
2 * Copyright (C) 1991, 1992 Linus Torvalds
3 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
4 */
5#include <linux/kallsyms.h>
6#include <linux/kprobes.h>
7#include <linux/uaccess.h>
8#include <linux/utsname.h>
9#include <linux/hardirq.h>
10#include <linux/kdebug.h>
11#include <linux/module.h>
12#include <linux/ptrace.h>
13#include <linux/kexec.h>
14#include <linux/bug.h>
15#include <linux/nmi.h>
16#include <linux/sysfs.h>
17
18#include <asm/stacktrace.h>
19
20#include "dumpstack.h"
21
22int panic_on_unrecovered_nmi;
23unsigned int code_bytes = 64;
24int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
25static int die_counter;
26
27void printk_address(unsigned long address, int reliable)
28{
29 printk(" [<%p>] %s%pS\n", (void *) address,
30 reliable ? "" : "? ", (void *) address);
31}
32
33#ifdef CONFIG_FUNCTION_GRAPH_TRACER
34static void
35print_ftrace_graph_addr(unsigned long addr, void *data,
36 const struct stacktrace_ops *ops,
37 struct thread_info *tinfo, int *graph)
38{
39 struct task_struct *task = tinfo->task;
40 unsigned long ret_addr;
41 int index = task->curr_ret_stack;
42
43 if (addr != (unsigned long)return_to_handler)
44 return;
45
46 if (!task->ret_stack || index < *graph)
47 return;
48
49 index -= *graph;
50 ret_addr = task->ret_stack[index].ret;
51
52 ops->address(data, ret_addr, 1);
53
54 (*graph)++;
55}
56#else
57static inline void
58print_ftrace_graph_addr(unsigned long addr, void *data,
59 const struct stacktrace_ops *ops,
60 struct thread_info *tinfo, int *graph)
61{ }
62#endif
63
64/*
65 * x86-64 can have up to three kernel stacks:
66 * process stack
67 * interrupt stack
68 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
69 */
70
71static inline int valid_stack_ptr(struct thread_info *tinfo,
72 void *p, unsigned int size, void *end)
73{
74 void *t = tinfo;
75 if (end) {
76 if (p < end && p >= (end-THREAD_SIZE))
77 return 1;
78 else
79 return 0;
80 }
81 return p > t && p < t + THREAD_SIZE - size;
82}
83
84unsigned long
85print_context_stack(struct thread_info *tinfo,
86 unsigned long *stack, unsigned long bp,
87 const struct stacktrace_ops *ops, void *data,
88 unsigned long *end, int *graph)
89{
90 struct stack_frame *frame = (struct stack_frame *)bp;
91
92 while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
93 unsigned long addr;
94
95 addr = *stack;
96 if (__kernel_text_address(addr)) {
97 if ((unsigned long) stack == bp + sizeof(long)) {
98 ops->address(data, addr, 1);
99 frame = frame->next_frame;
100 bp = (unsigned long) frame;
101 } else {
102 ops->address(data, addr, bp == 0);
103 }
104 print_ftrace_graph_addr(addr, data, ops, tinfo, graph);
105 }
106 stack++;
107 }
108 return bp;
109}
110
111
112static void
113print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
114{
115 printk(data);
116 print_symbol(msg, symbol);
117 printk("\n");
118}
119
120static void print_trace_warning(void *data, char *msg)
121{
122 printk("%s%s\n", (char *)data, msg);
123}
124
125static int print_trace_stack(void *data, char *name)
126{
127 printk("%s <%s> ", (char *)data, name);
128 return 0;
129}
130
131/*
132 * Print one address/symbol entries per line.
133 */
134static void print_trace_address(void *data, unsigned long addr, int reliable)
135{
136 touch_nmi_watchdog();
137 printk(data);
138 printk_address(addr, reliable);
139}
140
141static const struct stacktrace_ops print_trace_ops = {
142 .warning = print_trace_warning,
143 .warning_symbol = print_trace_warning_symbol,
144 .stack = print_trace_stack,
145 .address = print_trace_address,
146};
147
148void
149show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
150 unsigned long *stack, unsigned long bp, char *log_lvl)
151{
152 printk("%sCall Trace:\n", log_lvl);
153 dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
154}
155
156void show_trace(struct task_struct *task, struct pt_regs *regs,
157 unsigned long *stack, unsigned long bp)
158{
159 show_trace_log_lvl(task, regs, stack, bp, "");
160}
161
162void show_stack(struct task_struct *task, unsigned long *sp)
163{
164 show_stack_log_lvl(task, NULL, sp, 0, "");
165}
166
167/*
168 * The architecture-independent dump_stack generator
169 */
170void dump_stack(void)
171{
172 unsigned long bp = 0;
173 unsigned long stack;
174
175#ifdef CONFIG_FRAME_POINTER
176 if (!bp)
177 get_bp(bp);
178#endif
179
180 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
181 current->pid, current->comm, print_tainted(),
182 init_utsname()->release,
183 (int)strcspn(init_utsname()->version, " "),
184 init_utsname()->version);
185 show_trace(NULL, NULL, &stack, bp);
186}
187EXPORT_SYMBOL(dump_stack);
188
189static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
190static int die_owner = -1;
191static unsigned int die_nest_count;
192
193unsigned __kprobes long oops_begin(void)
194{
195 int cpu;
196 unsigned long flags;
197
198 oops_enter();
199
200 /* racy, but better than risking deadlock. */
201 raw_local_irq_save(flags);
202 cpu = smp_processor_id();
203 if (!__raw_spin_trylock(&die_lock)) {
204 if (cpu == die_owner)
205 /* nested oops. should stop eventually */;
206 else
207 __raw_spin_lock(&die_lock);
208 }
209 die_nest_count++;
210 die_owner = cpu;
211 console_verbose();
212 bust_spinlocks(1);
213 return flags;
214}
215
216void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
217{
218 if (regs && kexec_should_crash(current))
219 crash_kexec(regs);
220
221 bust_spinlocks(0);
222 die_owner = -1;
223 add_taint(TAINT_DIE);
224 die_nest_count--;
225 if (!die_nest_count)
226 /* Nest count reaches zero, release the lock. */
227 __raw_spin_unlock(&die_lock);
228 raw_local_irq_restore(flags);
229 oops_exit();
230
231 if (!signr)
232 return;
233 if (in_interrupt())
234 panic("Fatal exception in interrupt");
235 if (panic_on_oops)
236 panic("Fatal exception");
237 do_exit(signr);
238}
239
240int __kprobes __die(const char *str, struct pt_regs *regs, long err)
241{
242#ifdef CONFIG_X86_32
243 unsigned short ss;
244 unsigned long sp;
245#endif
246 printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
247#ifdef CONFIG_PREEMPT
248 printk("PREEMPT ");
249#endif
250#ifdef CONFIG_SMP
251 printk("SMP ");
252#endif
253#ifdef CONFIG_DEBUG_PAGEALLOC
254 printk("DEBUG_PAGEALLOC");
255#endif
256 printk("\n");
257 sysfs_printk_last_file();
258 if (notify_die(DIE_OOPS, str, regs, err,
259 current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
260 return 1;
261
262 show_registers(regs);
263#ifdef CONFIG_X86_32
264 sp = (unsigned long) (&regs->sp);
265 savesegment(ss, ss);
266 if (user_mode(regs)) {
267 sp = regs->sp;
268 ss = regs->ss & 0xffff;
269 }
270 printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
271 print_symbol("%s", regs->ip);
272 printk(" SS:ESP %04x:%08lx\n", ss, sp);
273#else
274 /* Executive summary in case the oops scrolled away */
275 printk(KERN_ALERT "RIP ");
276 printk_address(regs->ip, 1);
277 printk(" RSP <%016lx>\n", regs->sp);
278#endif
279 return 0;
280}
281
282/*
283 * This is gone through when something in the kernel has done something bad
284 * and is about to be terminated:
285 */
286void die(const char *str, struct pt_regs *regs, long err)
287{
288 unsigned long flags = oops_begin();
289 int sig = SIGSEGV;
290
291 if (!user_mode_vm(regs))
292 report_bug(regs->ip, regs);
293
294 if (__die(str, regs, err))
295 sig = 0;
296 oops_end(flags, regs, sig);
297}
298
299void notrace __kprobes
300die_nmi(char *str, struct pt_regs *regs, int do_panic)
301{
302 unsigned long flags;
303
304 if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
305 return;
306
307 /*
308 * We are in trouble anyway, lets at least try
309 * to get a message out.
310 */
311 flags = oops_begin();
312 printk(KERN_EMERG "%s", str);
313 printk(" on CPU%d, ip %08lx, registers:\n",
314 smp_processor_id(), regs->ip);
315 show_registers(regs);
316 oops_end(flags, regs, 0);
317 if (do_panic || panic_on_oops)
318 panic("Non maskable interrupt");
319 nmi_exit();
320 local_irq_enable();
321 do_exit(SIGBUS);
322}
323
324static int __init oops_setup(char *s)
325{
326 if (!s)
327 return -EINVAL;
328 if (!strcmp(s, "panic"))
329 panic_on_oops = 1;
330 return 0;
331}
332early_param("oops", oops_setup);
333
334static int __init kstack_setup(char *s)
335{
336 if (!s)
337 return -EINVAL;
338 kstack_depth_to_print = simple_strtoul(s, NULL, 0);
339 return 0;
340}
341early_param("kstack", kstack_setup);
342
343static int __init code_bytes_setup(char *s)
344{
345 code_bytes = simple_strtoul(s, NULL, 0);
346 if (code_bytes > 8192)
347 code_bytes = 8192;
348
349 return 1;
350}
351__setup("code_bytes=", code_bytes_setup);
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h
new file mode 100644
index 000000000000..da87590b8698
--- /dev/null
+++ b/arch/x86/kernel/dumpstack.h
@@ -0,0 +1,39 @@
1/*
2 * Copyright (C) 1991, 1992 Linus Torvalds
3 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
4 */
5
6#ifndef DUMPSTACK_H
7#define DUMPSTACK_H
8
9#ifdef CONFIG_X86_32
10#define STACKSLOTS_PER_LINE 8
11#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
12#else
13#define STACKSLOTS_PER_LINE 4
14#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
15#endif
16
17extern unsigned long
18print_context_stack(struct thread_info *tinfo,
19 unsigned long *stack, unsigned long bp,
20 const struct stacktrace_ops *ops, void *data,
21 unsigned long *end, int *graph);
22
23extern void
24show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
25 unsigned long *stack, unsigned long bp, char *log_lvl);
26
27extern void
28show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
29 unsigned long *sp, unsigned long bp, char *log_lvl);
30
31extern unsigned int code_bytes;
32extern int kstack_depth_to_print;
33
34/* The form of the top of the frame on the stack */
35struct stack_frame {
36 struct stack_frame *next_frame;
37 unsigned long return_address;
38};
39#endif
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index b3614752197b..d593cd1f58dc 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -17,69 +17,14 @@
17 17
18#include <asm/stacktrace.h> 18#include <asm/stacktrace.h>
19 19
20#define STACKSLOTS_PER_LINE 8 20#include "dumpstack.h"
21#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
22
23int panic_on_unrecovered_nmi;
24int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
25static unsigned int code_bytes = 64;
26static int die_counter;
27
28void printk_address(unsigned long address, int reliable)
29{
30 printk(" [<%p>] %s%pS\n", (void *) address,
31 reliable ? "" : "? ", (void *) address);
32}
33
34static inline int valid_stack_ptr(struct thread_info *tinfo,
35 void *p, unsigned int size, void *end)
36{
37 void *t = tinfo;
38 if (end) {
39 if (p < end && p >= (end-THREAD_SIZE))
40 return 1;
41 else
42 return 0;
43 }
44 return p > t && p < t + THREAD_SIZE - size;
45}
46
47/* The form of the top of the frame on the stack */
48struct stack_frame {
49 struct stack_frame *next_frame;
50 unsigned long return_address;
51};
52
53static inline unsigned long
54print_context_stack(struct thread_info *tinfo,
55 unsigned long *stack, unsigned long bp,
56 const struct stacktrace_ops *ops, void *data,
57 unsigned long *end)
58{
59 struct stack_frame *frame = (struct stack_frame *)bp;
60
61 while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
62 unsigned long addr;
63
64 addr = *stack;
65 if (__kernel_text_address(addr)) {
66 if ((unsigned long) stack == bp + sizeof(long)) {
67 ops->address(data, addr, 1);
68 frame = frame->next_frame;
69 bp = (unsigned long) frame;
70 } else {
71 ops->address(data, addr, bp == 0);
72 }
73 }
74 stack++;
75 }
76 return bp;
77}
78 21
79void dump_trace(struct task_struct *task, struct pt_regs *regs, 22void dump_trace(struct task_struct *task, struct pt_regs *regs,
80 unsigned long *stack, unsigned long bp, 23 unsigned long *stack, unsigned long bp,
81 const struct stacktrace_ops *ops, void *data) 24 const struct stacktrace_ops *ops, void *data)
82{ 25{
26 int graph = 0;
27
83 if (!task) 28 if (!task)
84 task = current; 29 task = current;
85 30
@@ -107,7 +52,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
107 52
108 context = (struct thread_info *) 53 context = (struct thread_info *)
109 ((unsigned long)stack & (~(THREAD_SIZE - 1))); 54 ((unsigned long)stack & (~(THREAD_SIZE - 1)));
110 bp = print_context_stack(context, stack, bp, ops, data, NULL); 55 bp = print_context_stack(context, stack, bp, ops,
56 data, NULL, &graph);
111 57
112 stack = (unsigned long *)context->previous_esp; 58 stack = (unsigned long *)context->previous_esp;
113 if (!stack) 59 if (!stack)
@@ -119,57 +65,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
119} 65}
120EXPORT_SYMBOL(dump_trace); 66EXPORT_SYMBOL(dump_trace);
121 67
122static void 68void
123print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
124{
125 printk(data);
126 print_symbol(msg, symbol);
127 printk("\n");
128}
129
130static void print_trace_warning(void *data, char *msg)
131{
132 printk("%s%s\n", (char *)data, msg);
133}
134
135static int print_trace_stack(void *data, char *name)
136{
137 printk("%s <%s> ", (char *)data, name);
138 return 0;
139}
140
141/*
142 * Print one address/symbol entries per line.
143 */
144static void print_trace_address(void *data, unsigned long addr, int reliable)
145{
146 touch_nmi_watchdog();
147 printk(data);
148 printk_address(addr, reliable);
149}
150
151static const struct stacktrace_ops print_trace_ops = {
152 .warning = print_trace_warning,
153 .warning_symbol = print_trace_warning_symbol,
154 .stack = print_trace_stack,
155 .address = print_trace_address,
156};
157
158static void
159show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
160 unsigned long *stack, unsigned long bp, char *log_lvl)
161{
162 printk("%sCall Trace:\n", log_lvl);
163 dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
164}
165
166void show_trace(struct task_struct *task, struct pt_regs *regs,
167 unsigned long *stack, unsigned long bp)
168{
169 show_trace_log_lvl(task, regs, stack, bp, "");
170}
171
172static void
173show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, 69show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
174 unsigned long *sp, unsigned long bp, char *log_lvl) 70 unsigned long *sp, unsigned long bp, char *log_lvl)
175{ 71{
@@ -196,33 +92,6 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
196 show_trace_log_lvl(task, regs, sp, bp, log_lvl); 92 show_trace_log_lvl(task, regs, sp, bp, log_lvl);
197} 93}
198 94
199void show_stack(struct task_struct *task, unsigned long *sp)
200{
201 show_stack_log_lvl(task, NULL, sp, 0, "");
202}
203
204/*
205 * The architecture-independent dump_stack generator
206 */
207void dump_stack(void)
208{
209 unsigned long bp = 0;
210 unsigned long stack;
211
212#ifdef CONFIG_FRAME_POINTER
213 if (!bp)
214 get_bp(bp);
215#endif
216
217 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
218 current->pid, current->comm, print_tainted(),
219 init_utsname()->release,
220 (int)strcspn(init_utsname()->version, " "),
221 init_utsname()->version);
222 show_trace(NULL, NULL, &stack, bp);
223}
224
225EXPORT_SYMBOL(dump_stack);
226 95
227void show_registers(struct pt_regs *regs) 96void show_registers(struct pt_regs *regs)
228{ 97{
@@ -283,167 +152,3 @@ int is_valid_bugaddr(unsigned long ip)
283 return ud2 == 0x0b0f; 152 return ud2 == 0x0b0f;
284} 153}
285 154
286static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
287static int die_owner = -1;
288static unsigned int die_nest_count;
289
290unsigned __kprobes long oops_begin(void)
291{
292 unsigned long flags;
293
294 oops_enter();
295
296 if (die_owner != raw_smp_processor_id()) {
297 console_verbose();
298 raw_local_irq_save(flags);
299 __raw_spin_lock(&die_lock);
300 die_owner = smp_processor_id();
301 die_nest_count = 0;
302 bust_spinlocks(1);
303 } else {
304 raw_local_irq_save(flags);
305 }
306 die_nest_count++;
307 return flags;
308}
309
310void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
311{
312 bust_spinlocks(0);
313 die_owner = -1;
314 add_taint(TAINT_DIE);
315 __raw_spin_unlock(&die_lock);
316 raw_local_irq_restore(flags);
317
318 if (!regs)
319 return;
320
321 if (kexec_should_crash(current))
322 crash_kexec(regs);
323 if (in_interrupt())
324 panic("Fatal exception in interrupt");
325 if (panic_on_oops)
326 panic("Fatal exception");
327 oops_exit();
328 do_exit(signr);
329}
330
331int __kprobes __die(const char *str, struct pt_regs *regs, long err)
332{
333 unsigned short ss;
334 unsigned long sp;
335
336 printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
337#ifdef CONFIG_PREEMPT
338 printk("PREEMPT ");
339#endif
340#ifdef CONFIG_SMP
341 printk("SMP ");
342#endif
343#ifdef CONFIG_DEBUG_PAGEALLOC
344 printk("DEBUG_PAGEALLOC");
345#endif
346 printk("\n");
347 sysfs_printk_last_file();
348 if (notify_die(DIE_OOPS, str, regs, err,
349 current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
350 return 1;
351
352 show_registers(regs);
353 /* Executive summary in case the oops scrolled away */
354 sp = (unsigned long) (&regs->sp);
355 savesegment(ss, ss);
356 if (user_mode(regs)) {
357 sp = regs->sp;
358 ss = regs->ss & 0xffff;
359 }
360 printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
361 print_symbol("%s", regs->ip);
362 printk(" SS:ESP %04x:%08lx\n", ss, sp);
363 return 0;
364}
365
366/*
367 * This is gone through when something in the kernel has done something bad
368 * and is about to be terminated:
369 */
370void die(const char *str, struct pt_regs *regs, long err)
371{
372 unsigned long flags = oops_begin();
373
374 if (die_nest_count < 3) {
375 report_bug(regs->ip, regs);
376
377 if (__die(str, regs, err))
378 regs = NULL;
379 } else {
380 printk(KERN_EMERG "Recursive die() failure, output suppressed\n");
381 }
382
383 oops_end(flags, regs, SIGSEGV);
384}
385
386static DEFINE_SPINLOCK(nmi_print_lock);
387
388void notrace __kprobes
389die_nmi(char *str, struct pt_regs *regs, int do_panic)
390{
391 if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
392 return;
393
394 spin_lock(&nmi_print_lock);
395 /*
396 * We are in trouble anyway, lets at least try
397 * to get a message out:
398 */
399 bust_spinlocks(1);
400 printk(KERN_EMERG "%s", str);
401 printk(" on CPU%d, ip %08lx, registers:\n",
402 smp_processor_id(), regs->ip);
403 show_registers(regs);
404 if (do_panic)
405 panic("Non maskable interrupt");
406 console_silent();
407 spin_unlock(&nmi_print_lock);
408
409 /*
410 * If we are in kernel we are probably nested up pretty bad
411 * and might aswell get out now while we still can:
412 */
413 if (!user_mode_vm(regs)) {
414 current->thread.trap_no = 2;
415 crash_kexec(regs);
416 }
417
418 bust_spinlocks(0);
419 do_exit(SIGSEGV);
420}
421
422static int __init oops_setup(char *s)
423{
424 if (!s)
425 return -EINVAL;
426 if (!strcmp(s, "panic"))
427 panic_on_oops = 1;
428 return 0;
429}
430early_param("oops", oops_setup);
431
432static int __init kstack_setup(char *s)
433{
434 if (!s)
435 return -EINVAL;
436 kstack_depth_to_print = simple_strtoul(s, NULL, 0);
437 return 0;
438}
439early_param("kstack", kstack_setup);
440
441static int __init code_bytes_setup(char *s)
442{
443 code_bytes = simple_strtoul(s, NULL, 0);
444 if (code_bytes > 8192)
445 code_bytes = 8192;
446
447 return 1;
448}
449__setup("code_bytes=", code_bytes_setup);
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 96a5db7da8a7..c302d0707048 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -17,19 +17,7 @@
17 17
18#include <asm/stacktrace.h> 18#include <asm/stacktrace.h>
19 19
20#define STACKSLOTS_PER_LINE 4 20#include "dumpstack.h"
21#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
22
23int panic_on_unrecovered_nmi;
24int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
25static unsigned int code_bytes = 64;
26static int die_counter;
27
28void printk_address(unsigned long address, int reliable)
29{
30 printk(" [<%p>] %s%pS\n", (void *) address,
31 reliable ? "" : "? ", (void *) address);
32}
33 21
34static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, 22static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
35 unsigned *usedp, char **idp) 23 unsigned *usedp, char **idp)
@@ -113,51 +101,6 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
113 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack 101 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
114 */ 102 */
115 103
116static inline int valid_stack_ptr(struct thread_info *tinfo,
117 void *p, unsigned int size, void *end)
118{
119 void *t = tinfo;
120 if (end) {
121 if (p < end && p >= (end-THREAD_SIZE))
122 return 1;
123 else
124 return 0;
125 }
126 return p > t && p < t + THREAD_SIZE - size;
127}
128
129/* The form of the top of the frame on the stack */
130struct stack_frame {
131 struct stack_frame *next_frame;
132 unsigned long return_address;
133};
134
135static inline unsigned long
136print_context_stack(struct thread_info *tinfo,
137 unsigned long *stack, unsigned long bp,
138 const struct stacktrace_ops *ops, void *data,
139 unsigned long *end)
140{
141 struct stack_frame *frame = (struct stack_frame *)bp;
142
143 while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
144 unsigned long addr;
145
146 addr = *stack;
147 if (__kernel_text_address(addr)) {
148 if ((unsigned long) stack == bp + sizeof(long)) {
149 ops->address(data, addr, 1);
150 frame = frame->next_frame;
151 bp = (unsigned long) frame;
152 } else {
153 ops->address(data, addr, bp == 0);
154 }
155 }
156 stack++;
157 }
158 return bp;
159}
160
161void dump_trace(struct task_struct *task, struct pt_regs *regs, 104void dump_trace(struct task_struct *task, struct pt_regs *regs,
162 unsigned long *stack, unsigned long bp, 105 unsigned long *stack, unsigned long bp,
163 const struct stacktrace_ops *ops, void *data) 106 const struct stacktrace_ops *ops, void *data)
@@ -166,6 +109,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
166 unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; 109 unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
167 unsigned used = 0; 110 unsigned used = 0;
168 struct thread_info *tinfo; 111 struct thread_info *tinfo;
112 int graph = 0;
169 113
170 if (!task) 114 if (!task)
171 task = current; 115 task = current;
@@ -206,7 +150,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
206 break; 150 break;
207 151
208 bp = print_context_stack(tinfo, stack, bp, ops, 152 bp = print_context_stack(tinfo, stack, bp, ops,
209 data, estack_end); 153 data, estack_end, &graph);
210 ops->stack(data, "<EOE>"); 154 ops->stack(data, "<EOE>");
211 /* 155 /*
212 * We link to the next stack via the 156 * We link to the next stack via the
@@ -225,7 +169,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
225 if (ops->stack(data, "IRQ") < 0) 169 if (ops->stack(data, "IRQ") < 0)
226 break; 170 break;
227 bp = print_context_stack(tinfo, stack, bp, 171 bp = print_context_stack(tinfo, stack, bp,
228 ops, data, irqstack_end); 172 ops, data, irqstack_end, &graph);
229 /* 173 /*
230 * We link to the next stack (which would be 174 * We link to the next stack (which would be
231 * the process stack normally) the last 175 * the process stack normally) the last
@@ -243,62 +187,12 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
243 /* 187 /*
244 * This handles the process stack: 188 * This handles the process stack:
245 */ 189 */
246 bp = print_context_stack(tinfo, stack, bp, ops, data, NULL); 190 bp = print_context_stack(tinfo, stack, bp, ops, data, NULL, &graph);
247 put_cpu(); 191 put_cpu();
248} 192}
249EXPORT_SYMBOL(dump_trace); 193EXPORT_SYMBOL(dump_trace);
250 194
251static void 195void
252print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
253{
254 printk(data);
255 print_symbol(msg, symbol);
256 printk("\n");
257}
258
259static void print_trace_warning(void *data, char *msg)
260{
261 printk("%s%s\n", (char *)data, msg);
262}
263
264static int print_trace_stack(void *data, char *name)
265{
266 printk("%s <%s> ", (char *)data, name);
267 return 0;
268}
269
270/*
271 * Print one address/symbol entries per line.
272 */
273static void print_trace_address(void *data, unsigned long addr, int reliable)
274{
275 touch_nmi_watchdog();
276 printk(data);
277 printk_address(addr, reliable);
278}
279
280static const struct stacktrace_ops print_trace_ops = {
281 .warning = print_trace_warning,
282 .warning_symbol = print_trace_warning_symbol,
283 .stack = print_trace_stack,
284 .address = print_trace_address,
285};
286
287static void
288show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
289 unsigned long *stack, unsigned long bp, char *log_lvl)
290{
291 printk("%sCall Trace:\n", log_lvl);
292 dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
293}
294
295void show_trace(struct task_struct *task, struct pt_regs *regs,
296 unsigned long *stack, unsigned long bp)
297{
298 show_trace_log_lvl(task, regs, stack, bp, "");
299}
300
301static void
302show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, 196show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
303 unsigned long *sp, unsigned long bp, char *log_lvl) 197 unsigned long *sp, unsigned long bp, char *log_lvl)
304{ 198{
@@ -342,33 +236,6 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
342 show_trace_log_lvl(task, regs, sp, bp, log_lvl); 236 show_trace_log_lvl(task, regs, sp, bp, log_lvl);
343} 237}
344 238
345void show_stack(struct task_struct *task, unsigned long *sp)
346{
347 show_stack_log_lvl(task, NULL, sp, 0, "");
348}
349
350/*
351 * The architecture-independent dump_stack generator
352 */
353void dump_stack(void)
354{
355 unsigned long bp = 0;
356 unsigned long stack;
357
358#ifdef CONFIG_FRAME_POINTER
359 if (!bp)
360 get_bp(bp);
361#endif
362
363 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
364 current->pid, current->comm, print_tainted(),
365 init_utsname()->release,
366 (int)strcspn(init_utsname()->version, " "),
367 init_utsname()->version);
368 show_trace(NULL, NULL, &stack, bp);
369}
370EXPORT_SYMBOL(dump_stack);
371
372void show_registers(struct pt_regs *regs) 239void show_registers(struct pt_regs *regs)
373{ 240{
374 int i; 241 int i;
@@ -429,147 +296,3 @@ int is_valid_bugaddr(unsigned long ip)
429 return ud2 == 0x0b0f; 296 return ud2 == 0x0b0f;
430} 297}
431 298
432static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
433static int die_owner = -1;
434static unsigned int die_nest_count;
435
436unsigned __kprobes long oops_begin(void)
437{
438 int cpu;
439 unsigned long flags;
440
441 oops_enter();
442
443 /* racy, but better than risking deadlock. */
444 raw_local_irq_save(flags);
445 cpu = smp_processor_id();
446 if (!__raw_spin_trylock(&die_lock)) {
447 if (cpu == die_owner)
448 /* nested oops. should stop eventually */;
449 else
450 __raw_spin_lock(&die_lock);
451 }
452 die_nest_count++;
453 die_owner = cpu;
454 console_verbose();
455 bust_spinlocks(1);
456 return flags;
457}
458
459void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
460{
461 die_owner = -1;
462 bust_spinlocks(0);
463 die_nest_count--;
464 if (!die_nest_count)
465 /* Nest count reaches zero, release the lock. */
466 __raw_spin_unlock(&die_lock);
467 raw_local_irq_restore(flags);
468 if (!regs) {
469 oops_exit();
470 return;
471 }
472 if (in_interrupt())
473 panic("Fatal exception in interrupt");
474 if (panic_on_oops)
475 panic("Fatal exception");
476 oops_exit();
477 do_exit(signr);
478}
479
480int __kprobes __die(const char *str, struct pt_regs *regs, long err)
481{
482 printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
483#ifdef CONFIG_PREEMPT
484 printk("PREEMPT ");
485#endif
486#ifdef CONFIG_SMP
487 printk("SMP ");
488#endif
489#ifdef CONFIG_DEBUG_PAGEALLOC
490 printk("DEBUG_PAGEALLOC");
491#endif
492 printk("\n");
493 sysfs_printk_last_file();
494 if (notify_die(DIE_OOPS, str, regs, err,
495 current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
496 return 1;
497
498 show_registers(regs);
499 add_taint(TAINT_DIE);
500 /* Executive summary in case the oops scrolled away */
501 printk(KERN_ALERT "RIP ");
502 printk_address(regs->ip, 1);
503 printk(" RSP <%016lx>\n", regs->sp);
504 if (kexec_should_crash(current))
505 crash_kexec(regs);
506 return 0;
507}
508
509void die(const char *str, struct pt_regs *regs, long err)
510{
511 unsigned long flags = oops_begin();
512
513 if (!user_mode(regs))
514 report_bug(regs->ip, regs);
515
516 if (__die(str, regs, err))
517 regs = NULL;
518 oops_end(flags, regs, SIGSEGV);
519}
520
521notrace __kprobes void
522die_nmi(char *str, struct pt_regs *regs, int do_panic)
523{
524 unsigned long flags;
525
526 if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
527 return;
528
529 flags = oops_begin();
530 /*
531 * We are in trouble anyway, lets at least try
532 * to get a message out.
533 */
534 printk(KERN_EMERG "%s", str);
535 printk(" on CPU%d, ip %08lx, registers:\n",
536 smp_processor_id(), regs->ip);
537 show_registers(regs);
538 if (kexec_should_crash(current))
539 crash_kexec(regs);
540 if (do_panic || panic_on_oops)
541 panic("Non maskable interrupt");
542 oops_end(flags, NULL, SIGBUS);
543 nmi_exit();
544 local_irq_enable();
545 do_exit(SIGBUS);
546}
547
548static int __init oops_setup(char *s)
549{
550 if (!s)
551 return -EINVAL;
552 if (!strcmp(s, "panic"))
553 panic_on_oops = 1;
554 return 0;
555}
556early_param("oops", oops_setup);
557
558static int __init kstack_setup(char *s)
559{
560 if (!s)
561 return -EINVAL;
562 kstack_depth_to_print = simple_strtoul(s, NULL, 0);
563 return 0;
564}
565early_param("kstack", kstack_setup);
566
567static int __init code_bytes_setup(char *s)
568{
569 code_bytes = simple_strtoul(s, NULL, 0);
570 if (code_bytes > 8192)
571 code_bytes = 8192;
572
573 return 1;
574}
575__setup("code_bytes=", code_bytes_setup);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 74defe21ba42..43ceb3f454bf 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1174,6 +1174,11 @@ ftrace_call:
1174 popl %edx 1174 popl %edx
1175 popl %ecx 1175 popl %ecx
1176 popl %eax 1176 popl %eax
1177#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1178.globl ftrace_graph_call
1179ftrace_graph_call:
1180 jmp ftrace_stub
1181#endif
1177 1182
1178.globl ftrace_stub 1183.globl ftrace_stub
1179ftrace_stub: 1184ftrace_stub:
@@ -1188,9 +1193,12 @@ ENTRY(mcount)
1188 1193
1189 cmpl $ftrace_stub, ftrace_trace_function 1194 cmpl $ftrace_stub, ftrace_trace_function
1190 jnz trace 1195 jnz trace
1191#ifdef CONFIG_FUNCTION_RET_TRACER 1196#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1192 cmpl $ftrace_stub, ftrace_function_return 1197 cmpl $ftrace_stub, ftrace_graph_return
1193 jnz ftrace_return_caller 1198 jnz ftrace_graph_caller
1199
1200 cmpl $ftrace_graph_entry_stub, ftrace_graph_entry
1201 jnz ftrace_graph_caller
1194#endif 1202#endif
1195.globl ftrace_stub 1203.globl ftrace_stub
1196ftrace_stub: 1204ftrace_stub:
@@ -1215,8 +1223,8 @@ END(mcount)
1215#endif /* CONFIG_DYNAMIC_FTRACE */ 1223#endif /* CONFIG_DYNAMIC_FTRACE */
1216#endif /* CONFIG_FUNCTION_TRACER */ 1224#endif /* CONFIG_FUNCTION_TRACER */
1217 1225
1218#ifdef CONFIG_FUNCTION_RET_TRACER 1226#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1219ENTRY(ftrace_return_caller) 1227ENTRY(ftrace_graph_caller)
1220 cmpl $0, function_trace_stop 1228 cmpl $0, function_trace_stop
1221 jne ftrace_stub 1229 jne ftrace_stub
1222 1230
@@ -1225,12 +1233,13 @@ ENTRY(ftrace_return_caller)
1225 pushl %edx 1233 pushl %edx
1226 movl 0xc(%esp), %edx 1234 movl 0xc(%esp), %edx
1227 lea 0x4(%ebp), %eax 1235 lea 0x4(%ebp), %eax
1236 subl $MCOUNT_INSN_SIZE, %edx
1228 call prepare_ftrace_return 1237 call prepare_ftrace_return
1229 popl %edx 1238 popl %edx
1230 popl %ecx 1239 popl %ecx
1231 popl %eax 1240 popl %eax
1232 ret 1241 ret
1233END(ftrace_return_caller) 1242END(ftrace_graph_caller)
1234 1243
1235.globl return_to_handler 1244.globl return_to_handler
1236return_to_handler: 1245return_to_handler:
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 08aa6b10933c..54e0bbdccb99 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -98,6 +98,12 @@ ftrace_call:
98 movq (%rsp), %rax 98 movq (%rsp), %rax
99 addq $0x38, %rsp 99 addq $0x38, %rsp
100 100
101#ifdef CONFIG_FUNCTION_GRAPH_TRACER
102.globl ftrace_graph_call
103ftrace_graph_call:
104 jmp ftrace_stub
105#endif
106
101.globl ftrace_stub 107.globl ftrace_stub
102ftrace_stub: 108ftrace_stub:
103 retq 109 retq
@@ -110,6 +116,15 @@ ENTRY(mcount)
110 116
111 cmpq $ftrace_stub, ftrace_trace_function 117 cmpq $ftrace_stub, ftrace_trace_function
112 jnz trace 118 jnz trace
119
120#ifdef CONFIG_FUNCTION_GRAPH_TRACER
121 cmpq $ftrace_stub, ftrace_graph_return
122 jnz ftrace_graph_caller
123
124 cmpq $ftrace_graph_entry_stub, ftrace_graph_entry
125 jnz ftrace_graph_caller
126#endif
127
113.globl ftrace_stub 128.globl ftrace_stub
114ftrace_stub: 129ftrace_stub:
115 retq 130 retq
@@ -145,6 +160,69 @@ END(mcount)
145#endif /* CONFIG_DYNAMIC_FTRACE */ 160#endif /* CONFIG_DYNAMIC_FTRACE */
146#endif /* CONFIG_FUNCTION_TRACER */ 161#endif /* CONFIG_FUNCTION_TRACER */
147 162
163#ifdef CONFIG_FUNCTION_GRAPH_TRACER
164ENTRY(ftrace_graph_caller)
165 cmpl $0, function_trace_stop
166 jne ftrace_stub
167
168 subq $0x38, %rsp
169 movq %rax, (%rsp)
170 movq %rcx, 8(%rsp)
171 movq %rdx, 16(%rsp)
172 movq %rsi, 24(%rsp)
173 movq %rdi, 32(%rsp)
174 movq %r8, 40(%rsp)
175 movq %r9, 48(%rsp)
176
177 leaq 8(%rbp), %rdi
178 movq 0x38(%rsp), %rsi
179 subq $MCOUNT_INSN_SIZE, %rsi
180
181 call prepare_ftrace_return
182
183 movq 48(%rsp), %r9
184 movq 40(%rsp), %r8
185 movq 32(%rsp), %rdi
186 movq 24(%rsp), %rsi
187 movq 16(%rsp), %rdx
188 movq 8(%rsp), %rcx
189 movq (%rsp), %rax
190 addq $0x38, %rsp
191 retq
192END(ftrace_graph_caller)
193
194
195.globl return_to_handler
196return_to_handler:
197 subq $80, %rsp
198
199 movq %rax, (%rsp)
200 movq %rcx, 8(%rsp)
201 movq %rdx, 16(%rsp)
202 movq %rsi, 24(%rsp)
203 movq %rdi, 32(%rsp)
204 movq %r8, 40(%rsp)
205 movq %r9, 48(%rsp)
206 movq %r10, 56(%rsp)
207 movq %r11, 64(%rsp)
208
209 call ftrace_return_to_handler
210
211 movq %rax, 72(%rsp)
212 movq 64(%rsp), %r11
213 movq 56(%rsp), %r10
214 movq 48(%rsp), %r9
215 movq 40(%rsp), %r8
216 movq 32(%rsp), %rdi
217 movq 24(%rsp), %rsi
218 movq 16(%rsp), %rdx
219 movq 8(%rsp), %rcx
220 movq (%rsp), %rax
221 addq $72, %rsp
222 retq
223#endif
224
225
148#ifndef CONFIG_PREEMPT 226#ifndef CONFIG_PREEMPT
149#define retint_kernel retint_restore_args 227#define retint_kernel retint_restore_args
150#endif 228#endif
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index bb137f7297ed..1b43086b097a 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -111,7 +111,6 @@ static void ftrace_mod_code(void)
111 */ 111 */
112 mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode, 112 mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
113 MCOUNT_INSN_SIZE); 113 MCOUNT_INSN_SIZE);
114
115} 114}
116 115
117void ftrace_nmi_enter(void) 116void ftrace_nmi_enter(void)
@@ -323,9 +322,53 @@ int __init ftrace_dyn_arch_init(void *data)
323} 322}
324#endif 323#endif
325 324
326#ifdef CONFIG_FUNCTION_RET_TRACER 325#ifdef CONFIG_FUNCTION_GRAPH_TRACER
326
327#ifdef CONFIG_DYNAMIC_FTRACE
328extern void ftrace_graph_call(void);
329
330static int ftrace_mod_jmp(unsigned long ip,
331 int old_offset, int new_offset)
332{
333 unsigned char code[MCOUNT_INSN_SIZE];
334
335 if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE))
336 return -EFAULT;
337
338 if (code[0] != 0xe9 || old_offset != *(int *)(&code[1]))
339 return -EINVAL;
340
341 *(int *)(&code[1]) = new_offset;
342
343 if (do_ftrace_mod_code(ip, &code))
344 return -EPERM;
345
346 return 0;
347}
348
349int ftrace_enable_ftrace_graph_caller(void)
350{
351 unsigned long ip = (unsigned long)(&ftrace_graph_call);
352 int old_offset, new_offset;
353
354 old_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE);
355 new_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE);
356
357 return ftrace_mod_jmp(ip, old_offset, new_offset);
358}
359
360int ftrace_disable_ftrace_graph_caller(void)
361{
362 unsigned long ip = (unsigned long)(&ftrace_graph_call);
363 int old_offset, new_offset;
364
365 old_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE);
366 new_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE);
327 367
328#ifndef CONFIG_DYNAMIC_FTRACE 368 return ftrace_mod_jmp(ip, old_offset, new_offset);
369}
370
371#else /* CONFIG_DYNAMIC_FTRACE */
329 372
330/* 373/*
331 * These functions are picked from those used on 374 * These functions are picked from those used on
@@ -343,11 +386,12 @@ void ftrace_nmi_exit(void)
343{ 386{
344 atomic_dec(&in_nmi); 387 atomic_dec(&in_nmi);
345} 388}
389
346#endif /* !CONFIG_DYNAMIC_FTRACE */ 390#endif /* !CONFIG_DYNAMIC_FTRACE */
347 391
348/* Add a function return address to the trace stack on thread info.*/ 392/* Add a function return address to the trace stack on thread info.*/
349static int push_return_trace(unsigned long ret, unsigned long long time, 393static int push_return_trace(unsigned long ret, unsigned long long time,
350 unsigned long func) 394 unsigned long func, int *depth)
351{ 395{
352 int index; 396 int index;
353 397
@@ -365,22 +409,34 @@ static int push_return_trace(unsigned long ret, unsigned long long time,
365 current->ret_stack[index].ret = ret; 409 current->ret_stack[index].ret = ret;
366 current->ret_stack[index].func = func; 410 current->ret_stack[index].func = func;
367 current->ret_stack[index].calltime = time; 411 current->ret_stack[index].calltime = time;
412 *depth = index;
368 413
369 return 0; 414 return 0;
370} 415}
371 416
372/* Retrieve a function return address to the trace stack on thread info.*/ 417/* Retrieve a function return address to the trace stack on thread info.*/
373static void pop_return_trace(unsigned long *ret, unsigned long long *time, 418static void pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
374 unsigned long *func, unsigned long *overrun)
375{ 419{
376 int index; 420 int index;
377 421
378 index = current->curr_ret_stack; 422 index = current->curr_ret_stack;
423
424 if (unlikely(index < 0)) {
425 ftrace_graph_stop();
426 WARN_ON(1);
427 /* Might as well panic, otherwise we have no where to go */
428 *ret = (unsigned long)panic;
429 return;
430 }
431
379 *ret = current->ret_stack[index].ret; 432 *ret = current->ret_stack[index].ret;
380 *func = current->ret_stack[index].func; 433 trace->func = current->ret_stack[index].func;
381 *time = current->ret_stack[index].calltime; 434 trace->calltime = current->ret_stack[index].calltime;
382 *overrun = atomic_read(&current->trace_overrun); 435 trace->overrun = atomic_read(&current->trace_overrun);
436 trace->depth = index;
437 barrier();
383 current->curr_ret_stack--; 438 current->curr_ret_stack--;
439
384} 440}
385 441
386/* 442/*
@@ -389,13 +445,21 @@ static void pop_return_trace(unsigned long *ret, unsigned long long *time,
389 */ 445 */
390unsigned long ftrace_return_to_handler(void) 446unsigned long ftrace_return_to_handler(void)
391{ 447{
392 struct ftrace_retfunc trace; 448 struct ftrace_graph_ret trace;
393 pop_return_trace(&trace.ret, &trace.calltime, &trace.func, 449 unsigned long ret;
394 &trace.overrun); 450
451 pop_return_trace(&trace, &ret);
395 trace.rettime = cpu_clock(raw_smp_processor_id()); 452 trace.rettime = cpu_clock(raw_smp_processor_id());
396 ftrace_function_return(&trace); 453 ftrace_graph_return(&trace);
454
455 if (unlikely(!ret)) {
456 ftrace_graph_stop();
457 WARN_ON(1);
458 /* Might as well panic. What else to do? */
459 ret = (unsigned long)panic;
460 }
397 461
398 return trace.ret; 462 return ret;
399} 463}
400 464
401/* 465/*
@@ -407,11 +471,15 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
407 unsigned long old; 471 unsigned long old;
408 unsigned long long calltime; 472 unsigned long long calltime;
409 int faulted; 473 int faulted;
474 struct ftrace_graph_ent trace;
410 unsigned long return_hooker = (unsigned long) 475 unsigned long return_hooker = (unsigned long)
411 &return_to_handler; 476 &return_to_handler;
412 477
413 /* Nmi's are currently unsupported */ 478 /* Nmi's are currently unsupported */
414 if (atomic_read(&in_nmi)) 479 if (unlikely(atomic_read(&in_nmi)))
480 return;
481
482 if (unlikely(atomic_read(&current->tracing_graph_pause)))
415 return; 483 return;
416 484
417 /* 485 /*
@@ -420,18 +488,16 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
420 * ignore such a protection. 488 * ignore such a protection.
421 */ 489 */
422 asm volatile( 490 asm volatile(
423 "1: movl (%[parent_old]), %[old]\n" 491 "1: " _ASM_MOV " (%[parent_old]), %[old]\n"
424 "2: movl %[return_hooker], (%[parent_replaced])\n" 492 "2: " _ASM_MOV " %[return_hooker], (%[parent_replaced])\n"
425 " movl $0, %[faulted]\n" 493 " movl $0, %[faulted]\n"
426 494
427 ".section .fixup, \"ax\"\n" 495 ".section .fixup, \"ax\"\n"
428 "3: movl $1, %[faulted]\n" 496 "3: movl $1, %[faulted]\n"
429 ".previous\n" 497 ".previous\n"
430 498
431 ".section __ex_table, \"a\"\n" 499 _ASM_EXTABLE(1b, 3b)
432 " .long 1b, 3b\n" 500 _ASM_EXTABLE(2b, 3b)
433 " .long 2b, 3b\n"
434 ".previous\n"
435 501
436 : [parent_replaced] "=r" (parent), [old] "=r" (old), 502 : [parent_replaced] "=r" (parent), [old] "=r" (old),
437 [faulted] "=r" (faulted) 503 [faulted] "=r" (faulted)
@@ -439,21 +505,33 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
439 : "memory" 505 : "memory"
440 ); 506 );
441 507
442 if (WARN_ON(faulted)) { 508 if (unlikely(faulted)) {
443 unregister_ftrace_return(); 509 ftrace_graph_stop();
510 WARN_ON(1);
444 return; 511 return;
445 } 512 }
446 513
447 if (WARN_ON(!__kernel_text_address(old))) { 514 if (unlikely(!__kernel_text_address(old))) {
448 unregister_ftrace_return(); 515 ftrace_graph_stop();
449 *parent = old; 516 *parent = old;
517 WARN_ON(1);
450 return; 518 return;
451 } 519 }
452 520
453 calltime = cpu_clock(raw_smp_processor_id()); 521 calltime = cpu_clock(raw_smp_processor_id());
454 522
455 if (push_return_trace(old, calltime, self_addr) == -EBUSY) 523 if (push_return_trace(old, calltime,
524 self_addr, &trace.depth) == -EBUSY) {
456 *parent = old; 525 *parent = old;
457} 526 return;
527 }
528
529 trace.func = self_addr;
458 530
459#endif /* CONFIG_FUNCTION_RET_TRACER */ 531 /* Only trace if the calling function expects to */
532 if (!ftrace_graph_entry(&trace)) {
533 current->curr_ret_stack--;
534 *parent = old;
535 }
536}
537#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c622772744d8..c27af49a4ede 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -7,6 +7,7 @@
7#include <linux/module.h> 7#include <linux/module.h>
8#include <linux/pm.h> 8#include <linux/pm.h>
9#include <linux/clockchips.h> 9#include <linux/clockchips.h>
10#include <linux/ftrace.h>
10#include <asm/system.h> 11#include <asm/system.h>
11 12
12unsigned long idle_halt; 13unsigned long idle_halt;
@@ -100,6 +101,9 @@ static inline int hlt_use_halt(void)
100void default_idle(void) 101void default_idle(void)
101{ 102{
102 if (hlt_use_halt()) { 103 if (hlt_use_halt()) {
104 struct power_trace it;
105
106 trace_power_start(&it, POWER_CSTATE, 1);
103 current_thread_info()->status &= ~TS_POLLING; 107 current_thread_info()->status &= ~TS_POLLING;
104 /* 108 /*
105 * TS_POLLING-cleared state must be visible before we 109 * TS_POLLING-cleared state must be visible before we
@@ -112,6 +116,7 @@ void default_idle(void)
112 else 116 else
113 local_irq_enable(); 117 local_irq_enable();
114 current_thread_info()->status |= TS_POLLING; 118 current_thread_info()->status |= TS_POLLING;
119 trace_power_end(&it);
115 } else { 120 } else {
116 local_irq_enable(); 121 local_irq_enable();
117 /* loop is done by the caller */ 122 /* loop is done by the caller */
@@ -154,24 +159,31 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
154 */ 159 */
155void mwait_idle_with_hints(unsigned long ax, unsigned long cx) 160void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
156{ 161{
162 struct power_trace it;
163
164 trace_power_start(&it, POWER_CSTATE, (ax>>4)+1);
157 if (!need_resched()) { 165 if (!need_resched()) {
158 __monitor((void *)&current_thread_info()->flags, 0, 0); 166 __monitor((void *)&current_thread_info()->flags, 0, 0);
159 smp_mb(); 167 smp_mb();
160 if (!need_resched()) 168 if (!need_resched())
161 __mwait(ax, cx); 169 __mwait(ax, cx);
162 } 170 }
171 trace_power_end(&it);
163} 172}
164 173
165/* Default MONITOR/MWAIT with no hints, used for default C1 state */ 174/* Default MONITOR/MWAIT with no hints, used for default C1 state */
166static void mwait_idle(void) 175static void mwait_idle(void)
167{ 176{
177 struct power_trace it;
168 if (!need_resched()) { 178 if (!need_resched()) {
179 trace_power_start(&it, POWER_CSTATE, 1);
169 __monitor((void *)&current_thread_info()->flags, 0, 0); 180 __monitor((void *)&current_thread_info()->flags, 0, 0);
170 smp_mb(); 181 smp_mb();
171 if (!need_resched()) 182 if (!need_resched())
172 __sti_mwait(0, 0); 183 __sti_mwait(0, 0);
173 else 184 else
174 local_irq_enable(); 185 local_irq_enable();
186 trace_power_end(&it);
175 } else 187 } else
176 local_irq_enable(); 188 local_irq_enable();
177} 189}
@@ -183,9 +195,13 @@ static void mwait_idle(void)
183 */ 195 */
184static void poll_idle(void) 196static void poll_idle(void)
185{ 197{
198 struct power_trace it;
199
200 trace_power_start(&it, POWER_CSTATE, 0);
186 local_irq_enable(); 201 local_irq_enable();
187 while (!need_resched()) 202 while (!need_resched())
188 cpu_relax(); 203 cpu_relax();
204 trace_power_end(&it);
189} 205}
190 206
191/* 207/*
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 0a1302fe6d45..24c2276aa453 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -38,6 +38,7 @@
38#include <linux/percpu.h> 38#include <linux/percpu.h>
39#include <linux/prctl.h> 39#include <linux/prctl.h>
40#include <linux/dmi.h> 40#include <linux/dmi.h>
41#include <linux/ftrace.h>
41 42
42#include <asm/uaccess.h> 43#include <asm/uaccess.h>
43#include <asm/pgtable.h> 44#include <asm/pgtable.h>
@@ -548,7 +549,8 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
548 * the task-switch, and shows up in ret_from_fork in entry.S, 549 * the task-switch, and shows up in ret_from_fork in entry.S,
549 * for example. 550 * for example.
550 */ 551 */
551struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) 552__notrace_funcgraph struct task_struct *
553__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
552{ 554{
553 struct thread_struct *prev = &prev_p->thread, 555 struct thread_struct *prev = &prev_p->thread,
554 *next = &next_p->thread; 556 *next = &next_p->thread;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index c958120fb1b6..fbb321d53d34 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -39,6 +39,7 @@
39#include <linux/prctl.h> 39#include <linux/prctl.h>
40#include <linux/uaccess.h> 40#include <linux/uaccess.h>
41#include <linux/io.h> 41#include <linux/io.h>
42#include <linux/ftrace.h>
42 43
43#include <asm/pgtable.h> 44#include <asm/pgtable.h>
44#include <asm/system.h> 45#include <asm/system.h>
@@ -551,8 +552,9 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
551 * - could test fs/gs bitsliced 552 * - could test fs/gs bitsliced
552 * 553 *
553 * Kprobes not supported here. Set the probe on schedule instead. 554 * Kprobes not supported here. Set the probe on schedule instead.
555 * Function graph tracer not supported too.
554 */ 556 */
555struct task_struct * 557__notrace_funcgraph struct task_struct *
556__switch_to(struct task_struct *prev_p, struct task_struct *next_p) 558__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
557{ 559{
558 struct thread_struct *prev = &prev_p->thread; 560 struct thread_struct *prev = &prev_p->thread;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 0a6d8c12e10d..2c8ec1ba75e6 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -668,14 +668,14 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index,
668 size_t bts_index, bts_end; 668 size_t bts_index, bts_end;
669 int error; 669 int error;
670 670
671 error = ds_get_bts_end(child, &bts_end); 671 error = ds_get_bts_end(child->bts, &bts_end);
672 if (error < 0) 672 if (error < 0)
673 return error; 673 return error;
674 674
675 if (bts_end <= index) 675 if (bts_end <= index)
676 return -EINVAL; 676 return -EINVAL;
677 677
678 error = ds_get_bts_index(child, &bts_index); 678 error = ds_get_bts_index(child->bts, &bts_index);
679 if (error < 0) 679 if (error < 0)
680 return error; 680 return error;
681 681
@@ -684,7 +684,7 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index,
684 if (bts_end <= bts_index) 684 if (bts_end <= bts_index)
685 bts_index -= bts_end; 685 bts_index -= bts_end;
686 686
687 error = ds_access_bts(child, bts_index, &bts_record); 687 error = ds_access_bts(child->bts, bts_index, &bts_record);
688 if (error < 0) 688 if (error < 0)
689 return error; 689 return error;
690 690
@@ -705,14 +705,14 @@ static int ptrace_bts_drain(struct task_struct *child,
705 size_t end, i; 705 size_t end, i;
706 int error; 706 int error;
707 707
708 error = ds_get_bts_index(child, &end); 708 error = ds_get_bts_index(child->bts, &end);
709 if (error < 0) 709 if (error < 0)
710 return error; 710 return error;
711 711
712 if (size < (end * sizeof(struct bts_struct))) 712 if (size < (end * sizeof(struct bts_struct)))
713 return -EIO; 713 return -EIO;
714 714
715 error = ds_access_bts(child, 0, (const void **)&raw); 715 error = ds_access_bts(child->bts, 0, (const void **)&raw);
716 if (error < 0) 716 if (error < 0)
717 return error; 717 return error;
718 718
@@ -723,18 +723,13 @@ static int ptrace_bts_drain(struct task_struct *child,
723 return -EFAULT; 723 return -EFAULT;
724 } 724 }
725 725
726 error = ds_clear_bts(child); 726 error = ds_clear_bts(child->bts);
727 if (error < 0) 727 if (error < 0)
728 return error; 728 return error;
729 729
730 return end; 730 return end;
731} 731}
732 732
733static void ptrace_bts_ovfl(struct task_struct *child)
734{
735 send_sig(child->thread.bts_ovfl_signal, child, 0);
736}
737
738static int ptrace_bts_config(struct task_struct *child, 733static int ptrace_bts_config(struct task_struct *child,
739 long cfg_size, 734 long cfg_size,
740 const struct ptrace_bts_config __user *ucfg) 735 const struct ptrace_bts_config __user *ucfg)
@@ -760,23 +755,45 @@ static int ptrace_bts_config(struct task_struct *child,
760 goto errout; 755 goto errout;
761 756
762 if (cfg.flags & PTRACE_BTS_O_ALLOC) { 757 if (cfg.flags & PTRACE_BTS_O_ALLOC) {
763 ds_ovfl_callback_t ovfl = NULL; 758 bts_ovfl_callback_t ovfl = NULL;
764 unsigned int sig = 0; 759 unsigned int sig = 0;
765 760
766 /* we ignore the error in case we were not tracing child */ 761 error = -EINVAL;
767 (void)ds_release_bts(child); 762 if (cfg.size < (10 * bts_cfg.sizeof_bts))
763 goto errout;
768 764
769 if (cfg.flags & PTRACE_BTS_O_SIGNAL) { 765 if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
770 if (!cfg.signal) 766 if (!cfg.signal)
771 goto errout; 767 goto errout;
772 768
769 error = -EOPNOTSUPP;
770 goto errout;
771
773 sig = cfg.signal; 772 sig = cfg.signal;
774 ovfl = ptrace_bts_ovfl;
775 } 773 }
776 774
777 error = ds_request_bts(child, /* base = */ NULL, cfg.size, ovfl); 775 if (child->bts) {
778 if (error < 0) 776 (void)ds_release_bts(child->bts);
777 kfree(child->bts_buffer);
778
779 child->bts = NULL;
780 child->bts_buffer = NULL;
781 }
782
783 error = -ENOMEM;
784 child->bts_buffer = kzalloc(cfg.size, GFP_KERNEL);
785 if (!child->bts_buffer)
786 goto errout;
787
788 child->bts = ds_request_bts(child, child->bts_buffer, cfg.size,
789 ovfl, /* th = */ (size_t)-1);
790 if (IS_ERR(child->bts)) {
791 error = PTR_ERR(child->bts);
792 kfree(child->bts_buffer);
793 child->bts = NULL;
794 child->bts_buffer = NULL;
779 goto errout; 795 goto errout;
796 }
780 797
781 child->thread.bts_ovfl_signal = sig; 798 child->thread.bts_ovfl_signal = sig;
782 } 799 }
@@ -823,15 +840,15 @@ static int ptrace_bts_status(struct task_struct *child,
823 if (cfg_size < sizeof(cfg)) 840 if (cfg_size < sizeof(cfg))
824 return -EIO; 841 return -EIO;
825 842
826 error = ds_get_bts_end(child, &end); 843 error = ds_get_bts_end(child->bts, &end);
827 if (error < 0) 844 if (error < 0)
828 return error; 845 return error;
829 846
830 error = ds_access_bts(child, /* index = */ 0, &base); 847 error = ds_access_bts(child->bts, /* index = */ 0, &base);
831 if (error < 0) 848 if (error < 0)
832 return error; 849 return error;
833 850
834 error = ds_access_bts(child, /* index = */ end, &max); 851 error = ds_access_bts(child->bts, /* index = */ end, &max);
835 if (error < 0) 852 if (error < 0)
836 return error; 853 return error;
837 854
@@ -884,10 +901,7 @@ static int ptrace_bts_write_record(struct task_struct *child,
884 return -EINVAL; 901 return -EINVAL;
885 } 902 }
886 903
887 /* The writing task will be the switched-to task on a context 904 return ds_write_bts(child->bts, bts_record, bts_cfg.sizeof_bts);
888 * switch. It needs to write into the switched-from task's BTS
889 * buffer. */
890 return ds_unchecked_write_bts(child, bts_record, bts_cfg.sizeof_bts);
891} 905}
892 906
893void ptrace_bts_take_timestamp(struct task_struct *tsk, 907void ptrace_bts_take_timestamp(struct task_struct *tsk,
@@ -929,17 +943,16 @@ void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c)
929 switch (c->x86) { 943 switch (c->x86) {
930 case 0x6: 944 case 0x6:
931 switch (c->x86_model) { 945 switch (c->x86_model) {
946 case 0 ... 0xC:
947 /* sorry, don't know about them */
948 break;
932 case 0xD: 949 case 0xD:
933 case 0xE: /* Pentium M */ 950 case 0xE: /* Pentium M */
934 bts_configure(&bts_cfg_pentium_m); 951 bts_configure(&bts_cfg_pentium_m);
935 break; 952 break;
936 case 0xF: /* Core2 */ 953 default: /* Core2, Atom, ... */
937 case 0x1C: /* Atom */
938 bts_configure(&bts_cfg_core2); 954 bts_configure(&bts_cfg_core2);
939 break; 955 break;
940 default:
941 /* sorry, don't know about them */
942 break;
943 } 956 }
944 break; 957 break;
945 case 0xF: 958 case 0xF:
@@ -973,13 +986,17 @@ void ptrace_disable(struct task_struct *child)
973 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); 986 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
974#endif 987#endif
975#ifdef CONFIG_X86_PTRACE_BTS 988#ifdef CONFIG_X86_PTRACE_BTS
976 (void)ds_release_bts(child); 989 if (child->bts) {
990 (void)ds_release_bts(child->bts);
991 kfree(child->bts_buffer);
992 child->bts_buffer = NULL;
977 993
978 child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; 994 child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
979 if (!child->thread.debugctlmsr) 995 if (!child->thread.debugctlmsr)
980 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); 996 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
981 997
982 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); 998 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
999 }
983#endif /* CONFIG_X86_PTRACE_BTS */ 1000#endif /* CONFIG_X86_PTRACE_BTS */
984} 1001}
985 1002
@@ -1111,9 +1128,16 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
1111 (child, data, (struct ptrace_bts_config __user *)addr); 1128 (child, data, (struct ptrace_bts_config __user *)addr);
1112 break; 1129 break;
1113 1130
1114 case PTRACE_BTS_SIZE: 1131 case PTRACE_BTS_SIZE: {
1115 ret = ds_get_bts_index(child, /* pos = */ NULL); 1132 size_t size;
1133
1134 ret = ds_get_bts_index(child->bts, &size);
1135 if (ret == 0) {
1136 BUG_ON(size != (int) size);
1137 ret = (int) size;
1138 }
1116 break; 1139 break;
1140 }
1117 1141
1118 case PTRACE_BTS_GET: 1142 case PTRACE_BTS_GET:
1119 ret = ptrace_bts_read_record 1143 ret = ptrace_bts_read_record
@@ -1121,7 +1145,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
1121 break; 1145 break;
1122 1146
1123 case PTRACE_BTS_CLEAR: 1147 case PTRACE_BTS_CLEAR:
1124 ret = ds_clear_bts(child); 1148 ret = ds_clear_bts(child->bts);
1125 break; 1149 break;
1126 1150
1127 case PTRACE_BTS_DRAIN: 1151 case PTRACE_BTS_DRAIN:
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index f1983d9477cd..410ddbc1aa2e 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1038,13 +1038,13 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
1038 } 1038 }
1039 1039
1040 rmap_write_protect(vcpu->kvm, sp->gfn); 1040 rmap_write_protect(vcpu->kvm, sp->gfn);
1041 kvm_unlink_unsync_page(vcpu->kvm, sp);
1041 if (vcpu->arch.mmu.sync_page(vcpu, sp)) { 1042 if (vcpu->arch.mmu.sync_page(vcpu, sp)) {
1042 kvm_mmu_zap_page(vcpu->kvm, sp); 1043 kvm_mmu_zap_page(vcpu->kvm, sp);
1043 return 1; 1044 return 1;
1044 } 1045 }
1045 1046
1046 kvm_mmu_flush_tlb(vcpu); 1047 kvm_mmu_flush_tlb(vcpu);
1047 kvm_unlink_unsync_page(vcpu->kvm, sp);
1048 return 0; 1048 return 0;
1049} 1049}
1050 1050
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 613ec9aa674a..84eee43bbe74 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -331,6 +331,7 @@ static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw,
331 r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 2], 331 r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 2],
332 &curr_pte, sizeof(curr_pte)); 332 &curr_pte, sizeof(curr_pte));
333 if (r || curr_pte != gw->ptes[level - 2]) { 333 if (r || curr_pte != gw->ptes[level - 2]) {
334 kvm_mmu_put_page(shadow_page, sptep);
334 kvm_release_pfn_clean(sw->pfn); 335 kvm_release_pfn_clean(sw->pfn);
335 sw->sptep = NULL; 336 sw->sptep = NULL;
336 return 1; 337 return 1;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d06b4dc0e2ea..a4018b01e1f9 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3149,7 +3149,9 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
3149 3149
3150 if (cpu_has_virtual_nmis()) { 3150 if (cpu_has_virtual_nmis()) {
3151 if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { 3151 if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
3152 if (vmx_nmi_enabled(vcpu)) { 3152 if (vcpu->arch.interrupt.pending) {
3153 enable_nmi_window(vcpu);
3154 } else if (vmx_nmi_enabled(vcpu)) {
3153 vcpu->arch.nmi_pending = false; 3155 vcpu->arch.nmi_pending = false;
3154 vcpu->arch.nmi_injected = true; 3156 vcpu->arch.nmi_injected = true;
3155 } else { 3157 } else {
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 4152d3c3b138..21e996a70d68 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -413,6 +413,7 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
413 unsigned long error_code) 413 unsigned long error_code)
414{ 414{
415 unsigned long flags = oops_begin(); 415 unsigned long flags = oops_begin();
416 int sig = SIGKILL;
416 struct task_struct *tsk; 417 struct task_struct *tsk;
417 418
418 printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", 419 printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
@@ -423,8 +424,8 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
423 tsk->thread.trap_no = 14; 424 tsk->thread.trap_no = 14;
424 tsk->thread.error_code = error_code; 425 tsk->thread.error_code = error_code;
425 if (__die("Bad pagetable", regs, error_code)) 426 if (__die("Bad pagetable", regs, error_code))
426 regs = NULL; 427 sig = 0;
427 oops_end(flags, regs, SIGKILL); 428 oops_end(flags, regs, sig);
428} 429}
429#endif 430#endif
430 431
@@ -590,6 +591,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
590 int fault; 591 int fault;
591#ifdef CONFIG_X86_64 592#ifdef CONFIG_X86_64
592 unsigned long flags; 593 unsigned long flags;
594 int sig;
593#endif 595#endif
594 596
595 tsk = current; 597 tsk = current;
@@ -849,11 +851,12 @@ no_context:
849 bust_spinlocks(0); 851 bust_spinlocks(0);
850 do_exit(SIGKILL); 852 do_exit(SIGKILL);
851#else 853#else
854 sig = SIGKILL;
852 if (__die("Oops", regs, error_code)) 855 if (__die("Oops", regs, error_code))
853 regs = NULL; 856 sig = 0;
854 /* Executive summary in case the body of the oops scrolled away */ 857 /* Executive summary in case the body of the oops scrolled away */
855 printk(KERN_EMERG "CR2: %016lx\n", address); 858 printk(KERN_EMERG "CR2: %016lx\n", address);
856 oops_end(flags, regs, SIGKILL); 859 oops_end(flags, regs, sig);
857#endif 860#endif
858 861
859/* 862/*