diff options
Diffstat (limited to 'arch/x86')
32 files changed, 2021 insertions, 1699 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ac22bb7719f7..45c86fb94132 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -29,11 +29,14 @@ config X86 | |||
29 | select HAVE_FTRACE_MCOUNT_RECORD | 29 | select HAVE_FTRACE_MCOUNT_RECORD |
30 | select HAVE_DYNAMIC_FTRACE | 30 | select HAVE_DYNAMIC_FTRACE |
31 | select HAVE_FUNCTION_TRACER | 31 | select HAVE_FUNCTION_TRACER |
32 | select HAVE_FUNCTION_GRAPH_TRACER | ||
33 | select HAVE_FUNCTION_TRACE_MCOUNT_TEST | ||
32 | select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) | 34 | select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) |
33 | select HAVE_ARCH_KGDB if !X86_VOYAGER | 35 | select HAVE_ARCH_KGDB if !X86_VOYAGER |
34 | select HAVE_ARCH_TRACEHOOK | 36 | select HAVE_ARCH_TRACEHOOK |
35 | select HAVE_GENERIC_DMA_COHERENT if X86_32 | 37 | select HAVE_GENERIC_DMA_COHERENT if X86_32 |
36 | select HAVE_EFFICIENT_UNALIGNED_ACCESS | 38 | select HAVE_EFFICIENT_UNALIGNED_ACCESS |
39 | select USER_STACKTRACE_SUPPORT | ||
37 | 40 | ||
38 | config ARCH_DEFCONFIG | 41 | config ARCH_DEFCONFIG |
39 | string | 42 | string |
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index b815664fe370..85a78575956c 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu | |||
@@ -515,6 +515,7 @@ config CPU_SUP_UMC_32 | |||
515 | config X86_DS | 515 | config X86_DS |
516 | def_bool X86_PTRACE_BTS | 516 | def_bool X86_PTRACE_BTS |
517 | depends on X86_DEBUGCTLMSR | 517 | depends on X86_DEBUGCTLMSR |
518 | select HAVE_HW_BRANCH_TRACER | ||
518 | 519 | ||
519 | config X86_PTRACE_BTS | 520 | config X86_PTRACE_BTS |
520 | bool "Branch Trace Store" | 521 | bool "Branch Trace Store" |
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 2a3dfbd5e677..fa013f529b74 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug | |||
@@ -186,14 +186,10 @@ config IOMMU_LEAK | |||
186 | Add a simple leak tracer to the IOMMU code. This is useful when you | 186 | Add a simple leak tracer to the IOMMU code. This is useful when you |
187 | are debugging a buggy device driver that leaks IOMMU mappings. | 187 | are debugging a buggy device driver that leaks IOMMU mappings. |
188 | 188 | ||
189 | config MMIOTRACE_HOOKS | ||
190 | bool | ||
191 | |||
192 | config MMIOTRACE | 189 | config MMIOTRACE |
193 | bool "Memory mapped IO tracing" | 190 | bool "Memory mapped IO tracing" |
194 | depends on DEBUG_KERNEL && PCI | 191 | depends on DEBUG_KERNEL && PCI |
195 | select TRACING | 192 | select TRACING |
196 | select MMIOTRACE_HOOKS | ||
197 | help | 193 | help |
198 | Mmiotrace traces Memory Mapped I/O access and is meant for | 194 | Mmiotrace traces Memory Mapped I/O access and is meant for |
199 | debugging and reverse engineering. It is called from the ioremap | 195 | debugging and reverse engineering. It is called from the ioremap |
diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h index a95008457ea4..ee0ea3a96c11 100644 --- a/arch/x86/include/asm/ds.h +++ b/arch/x86/include/asm/ds.h | |||
@@ -6,14 +6,13 @@ | |||
6 | * precise-event based sampling (PEBS). | 6 | * precise-event based sampling (PEBS). |
7 | * | 7 | * |
8 | * It manages: | 8 | * It manages: |
9 | * - per-thread and per-cpu allocation of BTS and PEBS | 9 | * - DS and BTS hardware configuration |
10 | * - buffer memory allocation (optional) | 10 | * - buffer overflow handling (to be done) |
11 | * - buffer overflow handling | ||
12 | * - buffer access | 11 | * - buffer access |
13 | * | 12 | * |
14 | * It assumes: | 13 | * It does not do: |
15 | * - get_task_struct on all parameter tasks | 14 | * - security checking (is the caller allowed to trace the task) |
16 | * - current is allowed to trace parameter tasks | 15 | * - buffer allocation (memory accounting) |
17 | * | 16 | * |
18 | * | 17 | * |
19 | * Copyright (C) 2007-2008 Intel Corporation. | 18 | * Copyright (C) 2007-2008 Intel Corporation. |
@@ -26,11 +25,51 @@ | |||
26 | 25 | ||
27 | #include <linux/types.h> | 26 | #include <linux/types.h> |
28 | #include <linux/init.h> | 27 | #include <linux/init.h> |
28 | #include <linux/err.h> | ||
29 | 29 | ||
30 | 30 | ||
31 | #ifdef CONFIG_X86_DS | 31 | #ifdef CONFIG_X86_DS |
32 | 32 | ||
33 | struct task_struct; | 33 | struct task_struct; |
34 | struct ds_context; | ||
35 | struct ds_tracer; | ||
36 | struct bts_tracer; | ||
37 | struct pebs_tracer; | ||
38 | |||
39 | typedef void (*bts_ovfl_callback_t)(struct bts_tracer *); | ||
40 | typedef void (*pebs_ovfl_callback_t)(struct pebs_tracer *); | ||
41 | |||
42 | |||
43 | /* | ||
44 | * A list of features plus corresponding macros to talk about them in | ||
45 | * the ds_request function's flags parameter. | ||
46 | * | ||
47 | * We use the enum to index an array of corresponding control bits; | ||
48 | * we use the macro to index a flags bit-vector. | ||
49 | */ | ||
50 | enum ds_feature { | ||
51 | dsf_bts = 0, | ||
52 | dsf_bts_kernel, | ||
53 | #define BTS_KERNEL (1 << dsf_bts_kernel) | ||
54 | /* trace kernel-mode branches */ | ||
55 | |||
56 | dsf_bts_user, | ||
57 | #define BTS_USER (1 << dsf_bts_user) | ||
58 | /* trace user-mode branches */ | ||
59 | |||
60 | dsf_bts_overflow, | ||
61 | dsf_bts_max, | ||
62 | dsf_pebs = dsf_bts_max, | ||
63 | |||
64 | dsf_pebs_max, | ||
65 | dsf_ctl_max = dsf_pebs_max, | ||
66 | dsf_bts_timestamps = dsf_ctl_max, | ||
67 | #define BTS_TIMESTAMPS (1 << dsf_bts_timestamps) | ||
68 | /* add timestamps into BTS trace */ | ||
69 | |||
70 | #define BTS_USER_FLAGS (BTS_KERNEL | BTS_USER | BTS_TIMESTAMPS) | ||
71 | }; | ||
72 | |||
34 | 73 | ||
35 | /* | 74 | /* |
36 | * Request BTS or PEBS | 75 | * Request BTS or PEBS |
@@ -38,163 +77,169 @@ struct task_struct; | |||
38 | * Due to alignement constraints, the actual buffer may be slightly | 77 | * Due to alignement constraints, the actual buffer may be slightly |
39 | * smaller than the requested or provided buffer. | 78 | * smaller than the requested or provided buffer. |
40 | * | 79 | * |
41 | * Returns 0 on success; -Eerrno otherwise | 80 | * Returns a pointer to a tracer structure on success, or |
81 | * ERR_PTR(errcode) on failure. | ||
82 | * | ||
83 | * The interrupt threshold is independent from the overflow callback | ||
84 | * to allow users to use their own overflow interrupt handling mechanism. | ||
42 | * | 85 | * |
43 | * task: the task to request recording for; | 86 | * task: the task to request recording for; |
44 | * NULL for per-cpu recording on the current cpu | 87 | * NULL for per-cpu recording on the current cpu |
45 | * base: the base pointer for the (non-pageable) buffer; | 88 | * base: the base pointer for the (non-pageable) buffer; |
46 | * NULL if buffer allocation requested | 89 | * size: the size of the provided buffer in bytes |
47 | * size: the size of the requested or provided buffer | ||
48 | * ovfl: pointer to a function to be called on buffer overflow; | 90 | * ovfl: pointer to a function to be called on buffer overflow; |
49 | * NULL if cyclic buffer requested | 91 | * NULL if cyclic buffer requested |
92 | * th: the interrupt threshold in records from the end of the buffer; | ||
93 | * -1 if no interrupt threshold is requested. | ||
94 | * flags: a bit-mask of the above flags | ||
50 | */ | 95 | */ |
51 | typedef void (*ds_ovfl_callback_t)(struct task_struct *); | 96 | extern struct bts_tracer *ds_request_bts(struct task_struct *task, |
52 | extern int ds_request_bts(struct task_struct *task, void *base, size_t size, | 97 | void *base, size_t size, |
53 | ds_ovfl_callback_t ovfl); | 98 | bts_ovfl_callback_t ovfl, |
54 | extern int ds_request_pebs(struct task_struct *task, void *base, size_t size, | 99 | size_t th, unsigned int flags); |
55 | ds_ovfl_callback_t ovfl); | 100 | extern struct pebs_tracer *ds_request_pebs(struct task_struct *task, |
101 | void *base, size_t size, | ||
102 | pebs_ovfl_callback_t ovfl, | ||
103 | size_t th, unsigned int flags); | ||
56 | 104 | ||
57 | /* | 105 | /* |
58 | * Release BTS or PEBS resources | 106 | * Release BTS or PEBS resources |
107 | * Suspend and resume BTS or PEBS tracing | ||
59 | * | 108 | * |
60 | * Frees buffers allocated on ds_request. | 109 | * tracer: the tracer handle returned from ds_request_~() |
61 | * | ||
62 | * Returns 0 on success; -Eerrno otherwise | ||
63 | * | ||
64 | * task: the task to release resources for; | ||
65 | * NULL to release resources for the current cpu | ||
66 | */ | 110 | */ |
67 | extern int ds_release_bts(struct task_struct *task); | 111 | extern void ds_release_bts(struct bts_tracer *tracer); |
68 | extern int ds_release_pebs(struct task_struct *task); | 112 | extern void ds_suspend_bts(struct bts_tracer *tracer); |
113 | extern void ds_resume_bts(struct bts_tracer *tracer); | ||
114 | extern void ds_release_pebs(struct pebs_tracer *tracer); | ||
115 | extern void ds_suspend_pebs(struct pebs_tracer *tracer); | ||
116 | extern void ds_resume_pebs(struct pebs_tracer *tracer); | ||
69 | 117 | ||
70 | /* | ||
71 | * Return the (array) index of the write pointer. | ||
72 | * (assuming an array of BTS/PEBS records) | ||
73 | * | ||
74 | * Returns -Eerrno on error | ||
75 | * | ||
76 | * task: the task to access; | ||
77 | * NULL to access the current cpu | ||
78 | * pos (out): if not NULL, will hold the result | ||
79 | */ | ||
80 | extern int ds_get_bts_index(struct task_struct *task, size_t *pos); | ||
81 | extern int ds_get_pebs_index(struct task_struct *task, size_t *pos); | ||
82 | 118 | ||
83 | /* | 119 | /* |
84 | * Return the (array) index one record beyond the end of the array. | 120 | * The raw DS buffer state as it is used for BTS and PEBS recording. |
85 | * (assuming an array of BTS/PEBS records) | ||
86 | * | ||
87 | * Returns -Eerrno on error | ||
88 | * | 121 | * |
89 | * task: the task to access; | 122 | * This is the low-level, arch-dependent interface for working |
90 | * NULL to access the current cpu | 123 | * directly on the raw trace data. |
91 | * pos (out): if not NULL, will hold the result | ||
92 | */ | 124 | */ |
93 | extern int ds_get_bts_end(struct task_struct *task, size_t *pos); | 125 | struct ds_trace { |
94 | extern int ds_get_pebs_end(struct task_struct *task, size_t *pos); | 126 | /* the number of bts/pebs records */ |
127 | size_t n; | ||
128 | /* the size of a bts/pebs record in bytes */ | ||
129 | size_t size; | ||
130 | /* pointers into the raw buffer: | ||
131 | - to the first entry */ | ||
132 | void *begin; | ||
133 | /* - one beyond the last entry */ | ||
134 | void *end; | ||
135 | /* - one beyond the newest entry */ | ||
136 | void *top; | ||
137 | /* - the interrupt threshold */ | ||
138 | void *ith; | ||
139 | /* flags given on ds_request() */ | ||
140 | unsigned int flags; | ||
141 | }; | ||
95 | 142 | ||
96 | /* | 143 | /* |
97 | * Provide a pointer to the BTS/PEBS record at parameter index. | 144 | * An arch-independent view on branch trace data. |
98 | * (assuming an array of BTS/PEBS records) | ||
99 | * | ||
100 | * The pointer points directly into the buffer. The user is | ||
101 | * responsible for copying the record. | ||
102 | * | ||
103 | * Returns the size of a single record on success; -Eerrno on error | ||
104 | * | ||
105 | * task: the task to access; | ||
106 | * NULL to access the current cpu | ||
107 | * index: the index of the requested record | ||
108 | * record (out): pointer to the requested record | ||
109 | */ | 145 | */ |
110 | extern int ds_access_bts(struct task_struct *task, | 146 | enum bts_qualifier { |
111 | size_t index, const void **record); | 147 | bts_invalid, |
112 | extern int ds_access_pebs(struct task_struct *task, | 148 | #define BTS_INVALID bts_invalid |
113 | size_t index, const void **record); | 149 | |
150 | bts_branch, | ||
151 | #define BTS_BRANCH bts_branch | ||
152 | |||
153 | bts_task_arrives, | ||
154 | #define BTS_TASK_ARRIVES bts_task_arrives | ||
155 | |||
156 | bts_task_departs, | ||
157 | #define BTS_TASK_DEPARTS bts_task_departs | ||
158 | |||
159 | bts_qual_bit_size = 4, | ||
160 | bts_qual_max = (1 << bts_qual_bit_size), | ||
161 | }; | ||
162 | |||
163 | struct bts_struct { | ||
164 | __u64 qualifier; | ||
165 | union { | ||
166 | /* BTS_BRANCH */ | ||
167 | struct { | ||
168 | __u64 from; | ||
169 | __u64 to; | ||
170 | } lbr; | ||
171 | /* BTS_TASK_ARRIVES or BTS_TASK_DEPARTS */ | ||
172 | struct { | ||
173 | __u64 jiffies; | ||
174 | pid_t pid; | ||
175 | } timestamp; | ||
176 | } variant; | ||
177 | }; | ||
114 | 178 | ||
115 | /* | ||
116 | * Write one or more BTS/PEBS records at the write pointer index and | ||
117 | * advance the write pointer. | ||
118 | * | ||
119 | * If size is not a multiple of the record size, trailing bytes are | ||
120 | * zeroed out. | ||
121 | * | ||
122 | * May result in one or more overflow notifications. | ||
123 | * | ||
124 | * If called during overflow handling, that is, with index >= | ||
125 | * interrupt threshold, the write will wrap around. | ||
126 | * | ||
127 | * An overflow notification is given if and when the interrupt | ||
128 | * threshold is reached during or after the write. | ||
129 | * | ||
130 | * Returns the number of bytes written or -Eerrno. | ||
131 | * | ||
132 | * task: the task to access; | ||
133 | * NULL to access the current cpu | ||
134 | * buffer: the buffer to write | ||
135 | * size: the size of the buffer | ||
136 | */ | ||
137 | extern int ds_write_bts(struct task_struct *task, | ||
138 | const void *buffer, size_t size); | ||
139 | extern int ds_write_pebs(struct task_struct *task, | ||
140 | const void *buffer, size_t size); | ||
141 | 179 | ||
142 | /* | 180 | /* |
143 | * Same as ds_write_bts/pebs, but omit ownership checks. | 181 | * The BTS state. |
144 | * | 182 | * |
145 | * This is needed to have some other task than the owner of the | 183 | * This gives access to the raw DS state and adds functions to provide |
146 | * BTS/PEBS buffer or the parameter task itself write into the | 184 | * an arch-independent view of the BTS data. |
147 | * respective buffer. | ||
148 | */ | 185 | */ |
149 | extern int ds_unchecked_write_bts(struct task_struct *task, | 186 | struct bts_trace { |
150 | const void *buffer, size_t size); | 187 | struct ds_trace ds; |
151 | extern int ds_unchecked_write_pebs(struct task_struct *task, | 188 | |
152 | const void *buffer, size_t size); | 189 | int (*read)(struct bts_tracer *tracer, const void *at, |
190 | struct bts_struct *out); | ||
191 | int (*write)(struct bts_tracer *tracer, const struct bts_struct *in); | ||
192 | }; | ||
193 | |||
153 | 194 | ||
154 | /* | 195 | /* |
155 | * Reset the write pointer of the BTS/PEBS buffer. | 196 | * The PEBS state. |
156 | * | ||
157 | * Returns 0 on success; -Eerrno on error | ||
158 | * | 197 | * |
159 | * task: the task to access; | 198 | * This gives access to the raw DS state and the PEBS-specific counter |
160 | * NULL to access the current cpu | 199 | * reset value. |
161 | */ | 200 | */ |
162 | extern int ds_reset_bts(struct task_struct *task); | 201 | struct pebs_trace { |
163 | extern int ds_reset_pebs(struct task_struct *task); | 202 | struct ds_trace ds; |
203 | |||
204 | /* the PEBS reset value */ | ||
205 | unsigned long long reset_value; | ||
206 | }; | ||
207 | |||
164 | 208 | ||
165 | /* | 209 | /* |
166 | * Clear the BTS/PEBS buffer and reset the write pointer. | 210 | * Read the BTS or PEBS trace. |
167 | * The entire buffer will be zeroed out. | ||
168 | * | 211 | * |
169 | * Returns 0 on success; -Eerrno on error | 212 | * Returns a view on the trace collected for the parameter tracer. |
170 | * | 213 | * |
171 | * task: the task to access; | 214 | * The view remains valid as long as the traced task is not running or |
172 | * NULL to access the current cpu | 215 | * the tracer is suspended. |
216 | * Writes into the trace buffer are not reflected. | ||
217 | * | ||
218 | * tracer: the tracer handle returned from ds_request_~() | ||
173 | */ | 219 | */ |
174 | extern int ds_clear_bts(struct task_struct *task); | 220 | extern const struct bts_trace *ds_read_bts(struct bts_tracer *tracer); |
175 | extern int ds_clear_pebs(struct task_struct *task); | 221 | extern const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer); |
222 | |||
176 | 223 | ||
177 | /* | 224 | /* |
178 | * Provide the PEBS counter reset value. | 225 | * Reset the write pointer of the BTS/PEBS buffer. |
179 | * | 226 | * |
180 | * Returns 0 on success; -Eerrno on error | 227 | * Returns 0 on success; -Eerrno on error |
181 | * | 228 | * |
182 | * task: the task to access; | 229 | * tracer: the tracer handle returned from ds_request_~() |
183 | * NULL to access the current cpu | ||
184 | * value (out): the counter reset value | ||
185 | */ | 230 | */ |
186 | extern int ds_get_pebs_reset(struct task_struct *task, u64 *value); | 231 | extern int ds_reset_bts(struct bts_tracer *tracer); |
232 | extern int ds_reset_pebs(struct pebs_tracer *tracer); | ||
187 | 233 | ||
188 | /* | 234 | /* |
189 | * Set the PEBS counter reset value. | 235 | * Set the PEBS counter reset value. |
190 | * | 236 | * |
191 | * Returns 0 on success; -Eerrno on error | 237 | * Returns 0 on success; -Eerrno on error |
192 | * | 238 | * |
193 | * task: the task to access; | 239 | * tracer: the tracer handle returned from ds_request_pebs() |
194 | * NULL to access the current cpu | ||
195 | * value: the new counter reset value | 240 | * value: the new counter reset value |
196 | */ | 241 | */ |
197 | extern int ds_set_pebs_reset(struct task_struct *task, u64 value); | 242 | extern int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value); |
198 | 243 | ||
199 | /* | 244 | /* |
200 | * Initialization | 245 | * Initialization |
@@ -202,39 +247,17 @@ extern int ds_set_pebs_reset(struct task_struct *task, u64 value); | |||
202 | struct cpuinfo_x86; | 247 | struct cpuinfo_x86; |
203 | extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *); | 248 | extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *); |
204 | 249 | ||
205 | |||
206 | |||
207 | /* | 250 | /* |
208 | * The DS context - part of struct thread_struct. | 251 | * Context switch work |
209 | */ | 252 | */ |
210 | struct ds_context { | 253 | extern void ds_switch_to(struct task_struct *prev, struct task_struct *next); |
211 | /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */ | ||
212 | unsigned char *ds; | ||
213 | /* the owner of the BTS and PEBS configuration, respectively */ | ||
214 | struct task_struct *owner[2]; | ||
215 | /* buffer overflow notification function for BTS and PEBS */ | ||
216 | ds_ovfl_callback_t callback[2]; | ||
217 | /* the original buffer address */ | ||
218 | void *buffer[2]; | ||
219 | /* the number of allocated pages for on-request allocated buffers */ | ||
220 | unsigned int pages[2]; | ||
221 | /* use count */ | ||
222 | unsigned long count; | ||
223 | /* a pointer to the context location inside the thread_struct | ||
224 | * or the per_cpu context array */ | ||
225 | struct ds_context **this; | ||
226 | /* a pointer to the task owning this context, or NULL, if the | ||
227 | * context is owned by a cpu */ | ||
228 | struct task_struct *task; | ||
229 | }; | ||
230 | |||
231 | /* called by exit_thread() to free leftover contexts */ | ||
232 | extern void ds_free(struct ds_context *context); | ||
233 | 254 | ||
234 | #else /* CONFIG_X86_DS */ | 255 | #else /* CONFIG_X86_DS */ |
235 | 256 | ||
236 | struct cpuinfo_x86; | 257 | struct cpuinfo_x86; |
237 | static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {} | 258 | static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {} |
259 | static inline void ds_switch_to(struct task_struct *prev, | ||
260 | struct task_struct *next) {} | ||
238 | 261 | ||
239 | #endif /* CONFIG_X86_DS */ | 262 | #endif /* CONFIG_X86_DS */ |
240 | #endif /* _ASM_X86_DS_H */ | 263 | #endif /* _ASM_X86_DS_H */ |
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 9e8bc29b8b17..b55b4a7fbefd 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h | |||
@@ -1,6 +1,33 @@ | |||
1 | #ifndef _ASM_X86_FTRACE_H | 1 | #ifndef _ASM_X86_FTRACE_H |
2 | #define _ASM_X86_FTRACE_H | 2 | #define _ASM_X86_FTRACE_H |
3 | 3 | ||
4 | #ifdef __ASSEMBLY__ | ||
5 | |||
6 | .macro MCOUNT_SAVE_FRAME | ||
7 | /* taken from glibc */ | ||
8 | subq $0x38, %rsp | ||
9 | movq %rax, (%rsp) | ||
10 | movq %rcx, 8(%rsp) | ||
11 | movq %rdx, 16(%rsp) | ||
12 | movq %rsi, 24(%rsp) | ||
13 | movq %rdi, 32(%rsp) | ||
14 | movq %r8, 40(%rsp) | ||
15 | movq %r9, 48(%rsp) | ||
16 | .endm | ||
17 | |||
18 | .macro MCOUNT_RESTORE_FRAME | ||
19 | movq 48(%rsp), %r9 | ||
20 | movq 40(%rsp), %r8 | ||
21 | movq 32(%rsp), %rdi | ||
22 | movq 24(%rsp), %rsi | ||
23 | movq 16(%rsp), %rdx | ||
24 | movq 8(%rsp), %rcx | ||
25 | movq (%rsp), %rax | ||
26 | addq $0x38, %rsp | ||
27 | .endm | ||
28 | |||
29 | #endif | ||
30 | |||
4 | #ifdef CONFIG_FUNCTION_TRACER | 31 | #ifdef CONFIG_FUNCTION_TRACER |
5 | #define MCOUNT_ADDR ((long)(mcount)) | 32 | #define MCOUNT_ADDR ((long)(mcount)) |
6 | #define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ | 33 | #define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ |
@@ -17,8 +44,40 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) | |||
17 | */ | 44 | */ |
18 | return addr - 1; | 45 | return addr - 1; |
19 | } | 46 | } |
20 | #endif | ||
21 | 47 | ||
48 | #ifdef CONFIG_DYNAMIC_FTRACE | ||
49 | |||
50 | struct dyn_arch_ftrace { | ||
51 | /* No extra data needed for x86 */ | ||
52 | }; | ||
53 | |||
54 | #endif /* CONFIG_DYNAMIC_FTRACE */ | ||
55 | #endif /* __ASSEMBLY__ */ | ||
22 | #endif /* CONFIG_FUNCTION_TRACER */ | 56 | #endif /* CONFIG_FUNCTION_TRACER */ |
23 | 57 | ||
58 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
59 | |||
60 | #ifndef __ASSEMBLY__ | ||
61 | |||
62 | /* | ||
63 | * Stack of return addresses for functions | ||
64 | * of a thread. | ||
65 | * Used in struct thread_info | ||
66 | */ | ||
67 | struct ftrace_ret_stack { | ||
68 | unsigned long ret; | ||
69 | unsigned long func; | ||
70 | unsigned long long calltime; | ||
71 | }; | ||
72 | |||
73 | /* | ||
74 | * Primary handler of a function return. | ||
75 | * It relays on ftrace_return_to_handler. | ||
76 | * Defined in entry_32/64.S | ||
77 | */ | ||
78 | extern void return_to_handler(void); | ||
79 | |||
80 | #endif /* __ASSEMBLY__ */ | ||
81 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ | ||
82 | |||
24 | #endif /* _ASM_X86_FTRACE_H */ | 83 | #endif /* _ASM_X86_FTRACE_H */ |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 5ca01e383269..aa5914f8e501 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -752,6 +752,19 @@ extern void switch_to_new_gdt(void); | |||
752 | extern void cpu_init(void); | 752 | extern void cpu_init(void); |
753 | extern void init_gdt(int cpu); | 753 | extern void init_gdt(int cpu); |
754 | 754 | ||
755 | static inline unsigned long get_debugctlmsr(void) | ||
756 | { | ||
757 | unsigned long debugctlmsr = 0; | ||
758 | |||
759 | #ifndef CONFIG_X86_DEBUGCTLMSR | ||
760 | if (boot_cpu_data.x86 < 6) | ||
761 | return 0; | ||
762 | #endif | ||
763 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); | ||
764 | |||
765 | return debugctlmsr; | ||
766 | } | ||
767 | |||
755 | static inline void update_debugctlmsr(unsigned long debugctlmsr) | 768 | static inline void update_debugctlmsr(unsigned long debugctlmsr) |
756 | { | 769 | { |
757 | #ifndef CONFIG_X86_DEBUGCTLMSR | 770 | #ifndef CONFIG_X86_DEBUGCTLMSR |
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index eefb0594b058..fbf744215911 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h | |||
@@ -6,7 +6,6 @@ | |||
6 | #include <asm/processor-flags.h> | 6 | #include <asm/processor-flags.h> |
7 | 7 | ||
8 | #ifdef __KERNEL__ | 8 | #ifdef __KERNEL__ |
9 | #include <asm/ds.h> /* the DS BTS struct is used for ptrace too */ | ||
10 | #include <asm/segment.h> | 9 | #include <asm/segment.h> |
11 | #endif | 10 | #endif |
12 | 11 | ||
@@ -128,34 +127,6 @@ struct pt_regs { | |||
128 | #endif /* !__i386__ */ | 127 | #endif /* !__i386__ */ |
129 | 128 | ||
130 | 129 | ||
131 | #ifdef CONFIG_X86_PTRACE_BTS | ||
132 | /* a branch trace record entry | ||
133 | * | ||
134 | * In order to unify the interface between various processor versions, | ||
135 | * we use the below data structure for all processors. | ||
136 | */ | ||
137 | enum bts_qualifier { | ||
138 | BTS_INVALID = 0, | ||
139 | BTS_BRANCH, | ||
140 | BTS_TASK_ARRIVES, | ||
141 | BTS_TASK_DEPARTS | ||
142 | }; | ||
143 | |||
144 | struct bts_struct { | ||
145 | __u64 qualifier; | ||
146 | union { | ||
147 | /* BTS_BRANCH */ | ||
148 | struct { | ||
149 | __u64 from_ip; | ||
150 | __u64 to_ip; | ||
151 | } lbr; | ||
152 | /* BTS_TASK_ARRIVES or | ||
153 | BTS_TASK_DEPARTS */ | ||
154 | __u64 jiffies; | ||
155 | } variant; | ||
156 | }; | ||
157 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
158 | |||
159 | #ifdef __KERNEL__ | 130 | #ifdef __KERNEL__ |
160 | 131 | ||
161 | #include <linux/init.h> | 132 | #include <linux/init.h> |
@@ -163,13 +134,6 @@ struct bts_struct { | |||
163 | struct cpuinfo_x86; | 134 | struct cpuinfo_x86; |
164 | struct task_struct; | 135 | struct task_struct; |
165 | 136 | ||
166 | #ifdef CONFIG_X86_PTRACE_BTS | ||
167 | extern void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *); | ||
168 | extern void ptrace_bts_take_timestamp(struct task_struct *, enum bts_qualifier); | ||
169 | #else | ||
170 | #define ptrace_bts_init_intel(config) do {} while (0) | ||
171 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
172 | |||
173 | extern unsigned long profile_pc(struct pt_regs *regs); | 137 | extern unsigned long profile_pc(struct pt_regs *regs); |
174 | 138 | ||
175 | extern unsigned long | 139 | extern unsigned long |
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index e44d379faad2..bf8113d16a33 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
@@ -20,6 +20,8 @@ | |||
20 | struct task_struct; | 20 | struct task_struct; |
21 | struct exec_domain; | 21 | struct exec_domain; |
22 | #include <asm/processor.h> | 22 | #include <asm/processor.h> |
23 | #include <asm/ftrace.h> | ||
24 | #include <asm/atomic.h> | ||
23 | 25 | ||
24 | struct thread_info { | 26 | struct thread_info { |
25 | struct task_struct *task; /* main task structure */ | 27 | struct task_struct *task; /* main task structure */ |
@@ -91,7 +93,6 @@ struct thread_info { | |||
91 | #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ | 93 | #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ |
92 | #define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ | 94 | #define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ |
93 | #define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ | 95 | #define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ |
94 | #define TIF_BTS_TRACE_TS 27 /* record scheduling event timestamps */ | ||
95 | 96 | ||
96 | #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) | 97 | #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) |
97 | #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) | 98 | #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) |
@@ -113,7 +114,6 @@ struct thread_info { | |||
113 | #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) | 114 | #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) |
114 | #define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) | 115 | #define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) |
115 | #define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) | 116 | #define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) |
116 | #define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS) | ||
117 | 117 | ||
118 | /* work to do in syscall_trace_enter() */ | 118 | /* work to do in syscall_trace_enter() */ |
119 | #define _TIF_WORK_SYSCALL_ENTRY \ | 119 | #define _TIF_WORK_SYSCALL_ENTRY \ |
@@ -139,8 +139,7 @@ struct thread_info { | |||
139 | 139 | ||
140 | /* flags to check in __switch_to() */ | 140 | /* flags to check in __switch_to() */ |
141 | #define _TIF_WORK_CTXSW \ | 141 | #define _TIF_WORK_CTXSW \ |
142 | (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS| \ | 142 | (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC) |
143 | _TIF_NOTSC) | ||
144 | 143 | ||
145 | #define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW | 144 | #define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW |
146 | #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) | 145 | #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index b62a7667828e..1cad9318d217 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -25,7 +25,7 @@ CFLAGS_tsc.o := $(nostackp) | |||
25 | 25 | ||
26 | obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o | 26 | obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o |
27 | obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o | 27 | obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o |
28 | obj-y += time_$(BITS).o ioport.o ldt.o | 28 | obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o |
29 | obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o | 29 | obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o |
30 | obj-$(CONFIG_X86_VISWS) += visws_quirks.o | 30 | obj-$(CONFIG_X86_VISWS) += visws_quirks.o |
31 | obj-$(CONFIG_X86_32) += probe_roms_32.o | 31 | obj-$(CONFIG_X86_32) += probe_roms_32.o |
@@ -65,6 +65,7 @@ obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o | |||
65 | obj-$(CONFIG_X86_IO_APIC) += io_apic.o | 65 | obj-$(CONFIG_X86_IO_APIC) += io_apic.o |
66 | obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o | 66 | obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o |
67 | obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o | 67 | obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o |
68 | obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o | ||
68 | obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o | 69 | obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o |
69 | obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o | 70 | obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o |
70 | obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o | 71 | obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o |
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 16f94879b525..b946ac19753b 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <linux/module.h> | 30 | #include <linux/module.h> |
31 | #include <linux/dmi.h> | 31 | #include <linux/dmi.h> |
32 | #include <linux/dmar.h> | 32 | #include <linux/dmar.h> |
33 | #include <linux/ftrace.h> | ||
33 | 34 | ||
34 | #include <asm/atomic.h> | 35 | #include <asm/atomic.h> |
35 | #include <asm/smp.h> | 36 | #include <asm/smp.h> |
@@ -800,7 +801,7 @@ static void local_apic_timer_interrupt(void) | |||
800 | * [ if a single-CPU system runs an SMP kernel then we call the local | 801 | * [ if a single-CPU system runs an SMP kernel then we call the local |
801 | * interrupt as well. Thus we cannot inline the local irq ... ] | 802 | * interrupt as well. Thus we cannot inline the local irq ... ] |
802 | */ | 803 | */ |
803 | void smp_apic_timer_interrupt(struct pt_regs *regs) | 804 | void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs) |
804 | { | 805 | { |
805 | struct pt_regs *old_regs = set_irq_regs(regs); | 806 | struct pt_regs *old_regs = set_irq_regs(regs); |
806 | 807 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 8e48c5d4467d..88ea02dcb622 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <linux/cpufreq.h> | 33 | #include <linux/cpufreq.h> |
34 | #include <linux/compiler.h> | 34 | #include <linux/compiler.h> |
35 | #include <linux/dmi.h> | 35 | #include <linux/dmi.h> |
36 | #include <linux/ftrace.h> | ||
36 | 37 | ||
37 | #include <linux/acpi.h> | 38 | #include <linux/acpi.h> |
38 | #include <acpi/processor.h> | 39 | #include <acpi/processor.h> |
@@ -391,6 +392,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, | |||
391 | unsigned int next_perf_state = 0; /* Index into perf table */ | 392 | unsigned int next_perf_state = 0; /* Index into perf table */ |
392 | unsigned int i; | 393 | unsigned int i; |
393 | int result = 0; | 394 | int result = 0; |
395 | struct power_trace it; | ||
394 | 396 | ||
395 | dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu); | 397 | dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu); |
396 | 398 | ||
@@ -427,6 +429,8 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, | |||
427 | } | 429 | } |
428 | } | 430 | } |
429 | 431 | ||
432 | trace_power_mark(&it, POWER_PSTATE, next_perf_state); | ||
433 | |||
430 | switch (data->cpu_feature) { | 434 | switch (data->cpu_feature) { |
431 | case SYSTEM_INTEL_MSR_CAPABLE: | 435 | case SYSTEM_INTEL_MSR_CAPABLE: |
432 | cmd.type = SYSTEM_INTEL_MSR_CAPABLE; | 436 | cmd.type = SYSTEM_INTEL_MSR_CAPABLE; |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index ccfd2047630c..8ea6929e974c 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -11,7 +11,6 @@ | |||
11 | #include <asm/pgtable.h> | 11 | #include <asm/pgtable.h> |
12 | #include <asm/msr.h> | 12 | #include <asm/msr.h> |
13 | #include <asm/uaccess.h> | 13 | #include <asm/uaccess.h> |
14 | #include <asm/ptrace.h> | ||
15 | #include <asm/ds.h> | 14 | #include <asm/ds.h> |
16 | #include <asm/bugs.h> | 15 | #include <asm/bugs.h> |
17 | 16 | ||
@@ -326,9 +325,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
326 | set_cpu_cap(c, X86_FEATURE_P3); | 325 | set_cpu_cap(c, X86_FEATURE_P3); |
327 | #endif | 326 | #endif |
328 | 327 | ||
329 | if (cpu_has_bts) | ||
330 | ptrace_bts_init_intel(c); | ||
331 | |||
332 | if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { | 328 | if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { |
333 | /* | 329 | /* |
334 | * let's use the legacy cpuid vector 0x1 and 0x4 for topology | 330 | * let's use the legacy cpuid vector 0x1 and 0x4 for topology |
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index d6938d9351cf..98d271e60e08 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c | |||
@@ -6,14 +6,13 @@ | |||
6 | * precise-event based sampling (PEBS). | 6 | * precise-event based sampling (PEBS). |
7 | * | 7 | * |
8 | * It manages: | 8 | * It manages: |
9 | * - per-thread and per-cpu allocation of BTS and PEBS | 9 | * - DS and BTS hardware configuration |
10 | * - buffer memory allocation (optional) | 10 | * - buffer overflow handling (to be done) |
11 | * - buffer overflow handling | ||
12 | * - buffer access | 11 | * - buffer access |
13 | * | 12 | * |
14 | * It assumes: | 13 | * It does not do: |
15 | * - get_task_struct on all parameter tasks | 14 | * - security checking (is the caller allowed to trace the task) |
16 | * - current is allowed to trace parameter tasks | 15 | * - buffer allocation (memory accounting) |
17 | * | 16 | * |
18 | * | 17 | * |
19 | * Copyright (C) 2007-2008 Intel Corporation. | 18 | * Copyright (C) 2007-2008 Intel Corporation. |
@@ -28,22 +27,69 @@ | |||
28 | #include <linux/slab.h> | 27 | #include <linux/slab.h> |
29 | #include <linux/sched.h> | 28 | #include <linux/sched.h> |
30 | #include <linux/mm.h> | 29 | #include <linux/mm.h> |
30 | #include <linux/kernel.h> | ||
31 | 31 | ||
32 | 32 | ||
33 | /* | 33 | /* |
34 | * The configuration for a particular DS hardware implementation. | 34 | * The configuration for a particular DS hardware implementation. |
35 | */ | 35 | */ |
36 | struct ds_configuration { | 36 | struct ds_configuration { |
37 | /* the size of the DS structure in bytes */ | 37 | /* the name of the configuration */ |
38 | unsigned char sizeof_ds; | 38 | const char *name; |
39 | /* the size of one pointer-typed field in the DS structure in bytes; | 39 | /* the size of one pointer-typed field in the DS structure and |
40 | this covers the first 8 fields related to buffer management. */ | 40 | in the BTS and PEBS buffers in bytes; |
41 | this covers the first 8 DS fields related to buffer management. */ | ||
41 | unsigned char sizeof_field; | 42 | unsigned char sizeof_field; |
42 | /* the size of a BTS/PEBS record in bytes */ | 43 | /* the size of a BTS/PEBS record in bytes */ |
43 | unsigned char sizeof_rec[2]; | 44 | unsigned char sizeof_rec[2]; |
45 | /* a series of bit-masks to control various features indexed | ||
46 | * by enum ds_feature */ | ||
47 | unsigned long ctl[dsf_ctl_max]; | ||
44 | }; | 48 | }; |
45 | static struct ds_configuration ds_cfg; | 49 | static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array); |
46 | 50 | ||
51 | #define ds_cfg per_cpu(ds_cfg_array, smp_processor_id()) | ||
52 | |||
53 | #define MAX_SIZEOF_DS (12 * 8) /* maximal size of a DS configuration */ | ||
54 | #define MAX_SIZEOF_BTS (3 * 8) /* maximal size of a BTS record */ | ||
55 | #define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */ | ||
56 | |||
57 | #define BTS_CONTROL \ | ||
58 | (ds_cfg.ctl[dsf_bts] | ds_cfg.ctl[dsf_bts_kernel] | ds_cfg.ctl[dsf_bts_user] |\ | ||
59 | ds_cfg.ctl[dsf_bts_overflow]) | ||
60 | |||
61 | |||
62 | /* | ||
63 | * A BTS or PEBS tracer. | ||
64 | * | ||
65 | * This holds the configuration of the tracer and serves as a handle | ||
66 | * to identify tracers. | ||
67 | */ | ||
68 | struct ds_tracer { | ||
69 | /* the DS context (partially) owned by this tracer */ | ||
70 | struct ds_context *context; | ||
71 | /* the buffer provided on ds_request() and its size in bytes */ | ||
72 | void *buffer; | ||
73 | size_t size; | ||
74 | }; | ||
75 | |||
76 | struct bts_tracer { | ||
77 | /* the common DS part */ | ||
78 | struct ds_tracer ds; | ||
79 | /* the trace including the DS configuration */ | ||
80 | struct bts_trace trace; | ||
81 | /* buffer overflow notification function */ | ||
82 | bts_ovfl_callback_t ovfl; | ||
83 | }; | ||
84 | |||
85 | struct pebs_tracer { | ||
86 | /* the common DS part */ | ||
87 | struct ds_tracer ds; | ||
88 | /* the trace including the DS configuration */ | ||
89 | struct pebs_trace trace; | ||
90 | /* buffer overflow notification function */ | ||
91 | pebs_ovfl_callback_t ovfl; | ||
92 | }; | ||
47 | 93 | ||
48 | /* | 94 | /* |
49 | * Debug Store (DS) save area configuration (see Intel64 and IA32 | 95 | * Debug Store (DS) save area configuration (see Intel64 and IA32 |
@@ -109,32 +155,9 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual, | |||
109 | 155 | ||
110 | 156 | ||
111 | /* | 157 | /* |
112 | * Locking is done only for allocating BTS or PEBS resources and for | 158 | * Locking is done only for allocating BTS or PEBS resources. |
113 | * guarding context and buffer memory allocation. | ||
114 | * | ||
115 | * Most functions require the current task to own the ds context part | ||
116 | * they are going to access. All the locking is done when validating | ||
117 | * access to the context. | ||
118 | */ | 159 | */ |
119 | static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock); | 160 | static DEFINE_SPINLOCK(ds_lock); |
120 | |||
121 | /* | ||
122 | * Validate that the current task is allowed to access the BTS/PEBS | ||
123 | * buffer of the parameter task. | ||
124 | * | ||
125 | * Returns 0, if access is granted; -Eerrno, otherwise. | ||
126 | */ | ||
127 | static inline int ds_validate_access(struct ds_context *context, | ||
128 | enum ds_qualifier qual) | ||
129 | { | ||
130 | if (!context) | ||
131 | return -EPERM; | ||
132 | |||
133 | if (context->owner[qual] == current) | ||
134 | return 0; | ||
135 | |||
136 | return -EPERM; | ||
137 | } | ||
138 | 161 | ||
139 | 162 | ||
140 | /* | 163 | /* |
@@ -150,27 +173,32 @@ static inline int ds_validate_access(struct ds_context *context, | |||
150 | * >0 number of per-thread tracers | 173 | * >0 number of per-thread tracers |
151 | * <0 number of per-cpu tracers | 174 | * <0 number of per-cpu tracers |
152 | * | 175 | * |
153 | * The below functions to get and put tracers and to check the | ||
154 | * allocation type require the ds_lock to be held by the caller. | ||
155 | * | ||
156 | * Tracers essentially gives the number of ds contexts for a certain | 176 | * Tracers essentially gives the number of ds contexts for a certain |
157 | * type of allocation. | 177 | * type of allocation. |
158 | */ | 178 | */ |
159 | static long tracers; | 179 | static atomic_t tracers = ATOMIC_INIT(0); |
160 | 180 | ||
161 | static inline void get_tracer(struct task_struct *task) | 181 | static inline void get_tracer(struct task_struct *task) |
162 | { | 182 | { |
163 | tracers += (task ? 1 : -1); | 183 | if (task) |
184 | atomic_inc(&tracers); | ||
185 | else | ||
186 | atomic_dec(&tracers); | ||
164 | } | 187 | } |
165 | 188 | ||
166 | static inline void put_tracer(struct task_struct *task) | 189 | static inline void put_tracer(struct task_struct *task) |
167 | { | 190 | { |
168 | tracers -= (task ? 1 : -1); | 191 | if (task) |
192 | atomic_dec(&tracers); | ||
193 | else | ||
194 | atomic_inc(&tracers); | ||
169 | } | 195 | } |
170 | 196 | ||
171 | static inline int check_tracer(struct task_struct *task) | 197 | static inline int check_tracer(struct task_struct *task) |
172 | { | 198 | { |
173 | return (task ? (tracers >= 0) : (tracers <= 0)); | 199 | return task ? |
200 | (atomic_read(&tracers) >= 0) : | ||
201 | (atomic_read(&tracers) <= 0); | ||
174 | } | 202 | } |
175 | 203 | ||
176 | 204 | ||
@@ -183,99 +211,70 @@ static inline int check_tracer(struct task_struct *task) | |||
183 | * | 211 | * |
184 | * Contexts are use-counted. They are allocated on first access and | 212 | * Contexts are use-counted. They are allocated on first access and |
185 | * deallocated when the last user puts the context. | 213 | * deallocated when the last user puts the context. |
186 | * | ||
187 | * We distinguish between an allocating and a non-allocating get of a | ||
188 | * context: | ||
189 | * - the allocating get is used for requesting BTS/PEBS resources. It | ||
190 | * requires the caller to hold the global ds_lock. | ||
191 | * - the non-allocating get is used for all other cases. A | ||
192 | * non-existing context indicates an error. It acquires and releases | ||
193 | * the ds_lock itself for obtaining the context. | ||
194 | * | ||
195 | * A context and its DS configuration are allocated and deallocated | ||
196 | * together. A context always has a DS configuration of the | ||
197 | * appropriate size. | ||
198 | */ | 214 | */ |
199 | static DEFINE_PER_CPU(struct ds_context *, system_context); | 215 | struct ds_context { |
200 | 216 | /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */ | |
201 | #define this_system_context per_cpu(system_context, smp_processor_id()) | 217 | unsigned char ds[MAX_SIZEOF_DS]; |
202 | 218 | /* the owner of the BTS and PEBS configuration, respectively */ | |
203 | /* | 219 | struct bts_tracer *bts_master; |
204 | * Returns the pointer to the parameter task's context or to the | 220 | struct pebs_tracer *pebs_master; |
205 | * system-wide context, if task is NULL. | 221 | /* use count */ |
206 | * | 222 | unsigned long count; |
207 | * Increases the use count of the returned context, if not NULL. | 223 | /* a pointer to the context location inside the thread_struct |
208 | */ | 224 | * or the per_cpu context array */ |
209 | static inline struct ds_context *ds_get_context(struct task_struct *task) | 225 | struct ds_context **this; |
210 | { | 226 | /* a pointer to the task owning this context, or NULL, if the |
211 | struct ds_context *context; | 227 | * context is owned by a cpu */ |
212 | unsigned long irq; | 228 | struct task_struct *task; |
229 | }; | ||
213 | 230 | ||
214 | spin_lock_irqsave(&ds_lock, irq); | 231 | static DEFINE_PER_CPU(struct ds_context *, system_context_array); |
215 | 232 | ||
216 | context = (task ? task->thread.ds_ctx : this_system_context); | 233 | #define system_context per_cpu(system_context_array, smp_processor_id()) |
217 | if (context) | ||
218 | context->count++; | ||
219 | 234 | ||
220 | spin_unlock_irqrestore(&ds_lock, irq); | ||
221 | 235 | ||
222 | return context; | 236 | static inline struct ds_context *ds_get_context(struct task_struct *task) |
223 | } | ||
224 | |||
225 | /* | ||
226 | * Same as ds_get_context, but allocates the context and it's DS | ||
227 | * structure, if necessary; returns NULL; if out of memory. | ||
228 | */ | ||
229 | static inline struct ds_context *ds_alloc_context(struct task_struct *task) | ||
230 | { | 237 | { |
231 | struct ds_context **p_context = | 238 | struct ds_context **p_context = |
232 | (task ? &task->thread.ds_ctx : &this_system_context); | 239 | (task ? &task->thread.ds_ctx : &system_context); |
233 | struct ds_context *context = *p_context; | 240 | struct ds_context *context = NULL; |
241 | struct ds_context *new_context = NULL; | ||
234 | unsigned long irq; | 242 | unsigned long irq; |
235 | 243 | ||
236 | if (!context) { | 244 | /* Chances are small that we already have a context. */ |
237 | context = kzalloc(sizeof(*context), GFP_KERNEL); | 245 | new_context = kzalloc(sizeof(*new_context), GFP_KERNEL); |
238 | if (!context) | 246 | if (!new_context) |
239 | return NULL; | 247 | return NULL; |
240 | |||
241 | context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); | ||
242 | if (!context->ds) { | ||
243 | kfree(context); | ||
244 | return NULL; | ||
245 | } | ||
246 | 248 | ||
247 | spin_lock_irqsave(&ds_lock, irq); | 249 | spin_lock_irqsave(&ds_lock, irq); |
248 | 250 | ||
249 | if (*p_context) { | 251 | context = *p_context; |
250 | kfree(context->ds); | 252 | if (!context) { |
251 | kfree(context); | 253 | context = new_context; |
252 | 254 | ||
253 | context = *p_context; | 255 | context->this = p_context; |
254 | } else { | 256 | context->task = task; |
255 | *p_context = context; | 257 | context->count = 0; |
256 | 258 | ||
257 | context->this = p_context; | 259 | if (task) |
258 | context->task = task; | 260 | set_tsk_thread_flag(task, TIF_DS_AREA_MSR); |
259 | 261 | ||
260 | if (task) | 262 | if (!task || (task == current)) |
261 | set_tsk_thread_flag(task, TIF_DS_AREA_MSR); | 263 | wrmsrl(MSR_IA32_DS_AREA, (unsigned long)context->ds); |
262 | 264 | ||
263 | if (!task || (task == current)) | 265 | *p_context = context; |
264 | wrmsrl(MSR_IA32_DS_AREA, | ||
265 | (unsigned long)context->ds); | ||
266 | } | ||
267 | spin_unlock_irqrestore(&ds_lock, irq); | ||
268 | } | 266 | } |
269 | 267 | ||
270 | context->count++; | 268 | context->count++; |
271 | 269 | ||
270 | spin_unlock_irqrestore(&ds_lock, irq); | ||
271 | |||
272 | if (context != new_context) | ||
273 | kfree(new_context); | ||
274 | |||
272 | return context; | 275 | return context; |
273 | } | 276 | } |
274 | 277 | ||
275 | /* | ||
276 | * Decreases the use count of the parameter context, if not NULL. | ||
277 | * Deallocates the context, if the use count reaches zero. | ||
278 | */ | ||
279 | static inline void ds_put_context(struct ds_context *context) | 278 | static inline void ds_put_context(struct ds_context *context) |
280 | { | 279 | { |
281 | unsigned long irq; | 280 | unsigned long irq; |
@@ -285,8 +284,10 @@ static inline void ds_put_context(struct ds_context *context) | |||
285 | 284 | ||
286 | spin_lock_irqsave(&ds_lock, irq); | 285 | spin_lock_irqsave(&ds_lock, irq); |
287 | 286 | ||
288 | if (--context->count) | 287 | if (--context->count) { |
289 | goto out; | 288 | spin_unlock_irqrestore(&ds_lock, irq); |
289 | return; | ||
290 | } | ||
290 | 291 | ||
291 | *(context->this) = NULL; | 292 | *(context->this) = NULL; |
292 | 293 | ||
@@ -296,135 +297,263 @@ static inline void ds_put_context(struct ds_context *context) | |||
296 | if (!context->task || (context->task == current)) | 297 | if (!context->task || (context->task == current)) |
297 | wrmsrl(MSR_IA32_DS_AREA, 0); | 298 | wrmsrl(MSR_IA32_DS_AREA, 0); |
298 | 299 | ||
299 | put_tracer(context->task); | 300 | spin_unlock_irqrestore(&ds_lock, irq); |
300 | 301 | ||
301 | /* free any leftover buffers from tracers that did not | ||
302 | * deallocate them properly. */ | ||
303 | kfree(context->buffer[ds_bts]); | ||
304 | kfree(context->buffer[ds_pebs]); | ||
305 | kfree(context->ds); | ||
306 | kfree(context); | 302 | kfree(context); |
307 | out: | ||
308 | spin_unlock_irqrestore(&ds_lock, irq); | ||
309 | } | 303 | } |
310 | 304 | ||
311 | 305 | ||
312 | /* | 306 | /* |
313 | * Handle a buffer overflow | 307 | * Call the tracer's callback on a buffer overflow. |
314 | * | 308 | * |
315 | * task: the task whose buffers are overflowing; | ||
316 | * NULL for a buffer overflow on the current cpu | ||
317 | * context: the ds context | 309 | * context: the ds context |
318 | * qual: the buffer type | 310 | * qual: the buffer type |
319 | */ | 311 | */ |
320 | static void ds_overflow(struct task_struct *task, struct ds_context *context, | 312 | static void ds_overflow(struct ds_context *context, enum ds_qualifier qual) |
321 | enum ds_qualifier qual) | ||
322 | { | 313 | { |
323 | if (!context) | 314 | switch (qual) { |
324 | return; | 315 | case ds_bts: |
325 | 316 | if (context->bts_master && | |
326 | if (context->callback[qual]) | 317 | context->bts_master->ovfl) |
327 | (*context->callback[qual])(task); | 318 | context->bts_master->ovfl(context->bts_master); |
328 | 319 | break; | |
329 | /* todo: do some more overflow handling */ | 320 | case ds_pebs: |
321 | if (context->pebs_master && | ||
322 | context->pebs_master->ovfl) | ||
323 | context->pebs_master->ovfl(context->pebs_master); | ||
324 | break; | ||
325 | } | ||
330 | } | 326 | } |
331 | 327 | ||
332 | 328 | ||
333 | /* | 329 | /* |
334 | * Allocate a non-pageable buffer of the parameter size. | 330 | * Write raw data into the BTS or PEBS buffer. |
335 | * Checks the memory and the locked memory rlimit. | ||
336 | * | 331 | * |
337 | * Returns the buffer, if successful; | 332 | * The remainder of any partially written record is zeroed out. |
338 | * NULL, if out of memory or rlimit exceeded. | ||
339 | * | 333 | * |
340 | * size: the requested buffer size in bytes | 334 | * context: the DS context |
341 | * pages (out): if not NULL, contains the number of pages reserved | 335 | * qual: the buffer type |
336 | * record: the data to write | ||
337 | * size: the size of the data | ||
342 | */ | 338 | */ |
343 | static inline void *ds_allocate_buffer(size_t size, unsigned int *pages) | 339 | static int ds_write(struct ds_context *context, enum ds_qualifier qual, |
340 | const void *record, size_t size) | ||
344 | { | 341 | { |
345 | unsigned long rlim, vm, pgsz; | 342 | int bytes_written = 0; |
346 | void *buffer; | ||
347 | 343 | ||
348 | pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; | 344 | if (!record) |
345 | return -EINVAL; | ||
349 | 346 | ||
350 | rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; | 347 | while (size) { |
351 | vm = current->mm->total_vm + pgsz; | 348 | unsigned long base, index, end, write_end, int_th; |
352 | if (rlim < vm) | 349 | unsigned long write_size, adj_write_size; |
353 | return NULL; | ||
354 | 350 | ||
355 | rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; | 351 | /* |
356 | vm = current->mm->locked_vm + pgsz; | 352 | * write as much as possible without producing an |
357 | if (rlim < vm) | 353 | * overflow interrupt. |
358 | return NULL; | 354 | * |
355 | * interrupt_threshold must either be | ||
356 | * - bigger than absolute_maximum or | ||
357 | * - point to a record between buffer_base and absolute_maximum | ||
358 | * | ||
359 | * index points to a valid record. | ||
360 | */ | ||
361 | base = ds_get(context->ds, qual, ds_buffer_base); | ||
362 | index = ds_get(context->ds, qual, ds_index); | ||
363 | end = ds_get(context->ds, qual, ds_absolute_maximum); | ||
364 | int_th = ds_get(context->ds, qual, ds_interrupt_threshold); | ||
359 | 365 | ||
360 | buffer = kzalloc(size, GFP_KERNEL); | 366 | write_end = min(end, int_th); |
361 | if (!buffer) | 367 | |
362 | return NULL; | 368 | /* if we are already beyond the interrupt threshold, |
369 | * we fill the entire buffer */ | ||
370 | if (write_end <= index) | ||
371 | write_end = end; | ||
372 | |||
373 | if (write_end <= index) | ||
374 | break; | ||
375 | |||
376 | write_size = min((unsigned long) size, write_end - index); | ||
377 | memcpy((void *)index, record, write_size); | ||
378 | |||
379 | record = (const char *)record + write_size; | ||
380 | size -= write_size; | ||
381 | bytes_written += write_size; | ||
382 | |||
383 | adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; | ||
384 | adj_write_size *= ds_cfg.sizeof_rec[qual]; | ||
385 | |||
386 | /* zero out trailing bytes */ | ||
387 | memset((char *)index + write_size, 0, | ||
388 | adj_write_size - write_size); | ||
389 | index += adj_write_size; | ||
390 | |||
391 | if (index >= end) | ||
392 | index = base; | ||
393 | ds_set(context->ds, qual, ds_index, index); | ||
394 | |||
395 | if (index >= int_th) | ||
396 | ds_overflow(context, qual); | ||
397 | } | ||
398 | |||
399 | return bytes_written; | ||
400 | } | ||
401 | |||
402 | |||
403 | /* | ||
404 | * Branch Trace Store (BTS) uses the following format. Different | ||
405 | * architectures vary in the size of those fields. | ||
406 | * - source linear address | ||
407 | * - destination linear address | ||
408 | * - flags | ||
409 | * | ||
410 | * Later architectures use 64bit pointers throughout, whereas earlier | ||
411 | * architectures use 32bit pointers in 32bit mode. | ||
412 | * | ||
413 | * We compute the base address for the first 8 fields based on: | ||
414 | * - the field size stored in the DS configuration | ||
415 | * - the relative field position | ||
416 | * | ||
417 | * In order to store additional information in the BTS buffer, we use | ||
418 | * a special source address to indicate that the record requires | ||
419 | * special interpretation. | ||
420 | * | ||
421 | * Netburst indicated via a bit in the flags field whether the branch | ||
422 | * was predicted; this is ignored. | ||
423 | * | ||
424 | * We use two levels of abstraction: | ||
425 | * - the raw data level defined here | ||
426 | * - an arch-independent level defined in ds.h | ||
427 | */ | ||
428 | |||
429 | enum bts_field { | ||
430 | bts_from, | ||
431 | bts_to, | ||
432 | bts_flags, | ||
363 | 433 | ||
364 | current->mm->total_vm += pgsz; | 434 | bts_qual = bts_from, |
365 | current->mm->locked_vm += pgsz; | 435 | bts_jiffies = bts_to, |
436 | bts_pid = bts_flags, | ||
366 | 437 | ||
367 | if (pages) | 438 | bts_qual_mask = (bts_qual_max - 1), |
368 | *pages = pgsz; | 439 | bts_escape = ((unsigned long)-1 & ~bts_qual_mask) |
440 | }; | ||
369 | 441 | ||
370 | return buffer; | 442 | static inline unsigned long bts_get(const char *base, enum bts_field field) |
443 | { | ||
444 | base += (ds_cfg.sizeof_field * field); | ||
445 | return *(unsigned long *)base; | ||
371 | } | 446 | } |
372 | 447 | ||
373 | static int ds_request(struct task_struct *task, void *base, size_t size, | 448 | static inline void bts_set(char *base, enum bts_field field, unsigned long val) |
374 | ds_ovfl_callback_t ovfl, enum ds_qualifier qual) | ||
375 | { | 449 | { |
376 | struct ds_context *context; | 450 | base += (ds_cfg.sizeof_field * field);; |
377 | unsigned long buffer, adj; | 451 | (*(unsigned long *)base) = val; |
378 | const unsigned long alignment = (1 << 3); | 452 | } |
379 | unsigned long irq; | ||
380 | int error = 0; | ||
381 | 453 | ||
382 | if (!ds_cfg.sizeof_ds) | ||
383 | return -EOPNOTSUPP; | ||
384 | 454 | ||
385 | /* we require some space to do alignment adjustments below */ | 455 | /* |
386 | if (size < (alignment + ds_cfg.sizeof_rec[qual])) | 456 | * The raw BTS data is architecture dependent. |
457 | * | ||
458 | * For higher-level users, we give an arch-independent view. | ||
459 | * - ds.h defines struct bts_struct | ||
460 | * - bts_read translates one raw bts record into a bts_struct | ||
461 | * - bts_write translates one bts_struct into the raw format and | ||
462 | * writes it into the top of the parameter tracer's buffer. | ||
463 | * | ||
464 | * return: bytes read/written on success; -Eerrno, otherwise | ||
465 | */ | ||
466 | static int bts_read(struct bts_tracer *tracer, const void *at, | ||
467 | struct bts_struct *out) | ||
468 | { | ||
469 | if (!tracer) | ||
387 | return -EINVAL; | 470 | return -EINVAL; |
388 | 471 | ||
389 | /* buffer overflow notification is not yet implemented */ | 472 | if (at < tracer->trace.ds.begin) |
390 | if (ovfl) | 473 | return -EINVAL; |
391 | return -EOPNOTSUPP; | ||
392 | 474 | ||
475 | if (tracer->trace.ds.end < (at + tracer->trace.ds.size)) | ||
476 | return -EINVAL; | ||
393 | 477 | ||
394 | context = ds_alloc_context(task); | 478 | memset(out, 0, sizeof(*out)); |
395 | if (!context) | 479 | if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) { |
396 | return -ENOMEM; | 480 | out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask); |
481 | out->variant.timestamp.jiffies = bts_get(at, bts_jiffies); | ||
482 | out->variant.timestamp.pid = bts_get(at, bts_pid); | ||
483 | } else { | ||
484 | out->qualifier = bts_branch; | ||
485 | out->variant.lbr.from = bts_get(at, bts_from); | ||
486 | out->variant.lbr.to = bts_get(at, bts_to); | ||
487 | |||
488 | if (!out->variant.lbr.from && !out->variant.lbr.to) | ||
489 | out->qualifier = bts_invalid; | ||
490 | } | ||
397 | 491 | ||
398 | spin_lock_irqsave(&ds_lock, irq); | 492 | return ds_cfg.sizeof_rec[ds_bts]; |
493 | } | ||
399 | 494 | ||
400 | error = -EPERM; | 495 | static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in) |
401 | if (!check_tracer(task)) | 496 | { |
402 | goto out_unlock; | 497 | unsigned char raw[MAX_SIZEOF_BTS]; |
403 | 498 | ||
404 | get_tracer(task); | 499 | if (!tracer) |
500 | return -EINVAL; | ||
405 | 501 | ||
406 | error = -EALREADY; | 502 | if (MAX_SIZEOF_BTS < ds_cfg.sizeof_rec[ds_bts]) |
407 | if (context->owner[qual] == current) | 503 | return -EOVERFLOW; |
408 | goto out_put_tracer; | ||
409 | error = -EPERM; | ||
410 | if (context->owner[qual] != NULL) | ||
411 | goto out_put_tracer; | ||
412 | context->owner[qual] = current; | ||
413 | 504 | ||
414 | spin_unlock_irqrestore(&ds_lock, irq); | 505 | switch (in->qualifier) { |
506 | case bts_invalid: | ||
507 | bts_set(raw, bts_from, 0); | ||
508 | bts_set(raw, bts_to, 0); | ||
509 | bts_set(raw, bts_flags, 0); | ||
510 | break; | ||
511 | case bts_branch: | ||
512 | bts_set(raw, bts_from, in->variant.lbr.from); | ||
513 | bts_set(raw, bts_to, in->variant.lbr.to); | ||
514 | bts_set(raw, bts_flags, 0); | ||
515 | break; | ||
516 | case bts_task_arrives: | ||
517 | case bts_task_departs: | ||
518 | bts_set(raw, bts_qual, (bts_escape | in->qualifier)); | ||
519 | bts_set(raw, bts_jiffies, in->variant.timestamp.jiffies); | ||
520 | bts_set(raw, bts_pid, in->variant.timestamp.pid); | ||
521 | break; | ||
522 | default: | ||
523 | return -EINVAL; | ||
524 | } | ||
415 | 525 | ||
526 | return ds_write(tracer->ds.context, ds_bts, raw, | ||
527 | ds_cfg.sizeof_rec[ds_bts]); | ||
528 | } | ||
416 | 529 | ||
417 | error = -ENOMEM; | ||
418 | if (!base) { | ||
419 | base = ds_allocate_buffer(size, &context->pages[qual]); | ||
420 | if (!base) | ||
421 | goto out_release; | ||
422 | 530 | ||
423 | context->buffer[qual] = base; | 531 | static void ds_write_config(struct ds_context *context, |
424 | } | 532 | struct ds_trace *cfg, enum ds_qualifier qual) |
425 | error = 0; | 533 | { |
534 | unsigned char *ds = context->ds; | ||
535 | |||
536 | ds_set(ds, qual, ds_buffer_base, (unsigned long)cfg->begin); | ||
537 | ds_set(ds, qual, ds_index, (unsigned long)cfg->top); | ||
538 | ds_set(ds, qual, ds_absolute_maximum, (unsigned long)cfg->end); | ||
539 | ds_set(ds, qual, ds_interrupt_threshold, (unsigned long)cfg->ith); | ||
540 | } | ||
541 | |||
542 | static void ds_read_config(struct ds_context *context, | ||
543 | struct ds_trace *cfg, enum ds_qualifier qual) | ||
544 | { | ||
545 | unsigned char *ds = context->ds; | ||
546 | |||
547 | cfg->begin = (void *)ds_get(ds, qual, ds_buffer_base); | ||
548 | cfg->top = (void *)ds_get(ds, qual, ds_index); | ||
549 | cfg->end = (void *)ds_get(ds, qual, ds_absolute_maximum); | ||
550 | cfg->ith = (void *)ds_get(ds, qual, ds_interrupt_threshold); | ||
551 | } | ||
426 | 552 | ||
427 | context->callback[qual] = ovfl; | 553 | static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual, |
554 | void *base, size_t size, size_t ith, | ||
555 | unsigned int flags) { | ||
556 | unsigned long buffer, adj; | ||
428 | 557 | ||
429 | /* adjust the buffer address and size to meet alignment | 558 | /* adjust the buffer address and size to meet alignment |
430 | * constraints: | 559 | * constraints: |
@@ -436,410 +565,383 @@ static int ds_request(struct task_struct *task, void *base, size_t size, | |||
436 | */ | 565 | */ |
437 | buffer = (unsigned long)base; | 566 | buffer = (unsigned long)base; |
438 | 567 | ||
439 | adj = ALIGN(buffer, alignment) - buffer; | 568 | adj = ALIGN(buffer, DS_ALIGNMENT) - buffer; |
440 | buffer += adj; | 569 | buffer += adj; |
441 | size -= adj; | 570 | size -= adj; |
442 | 571 | ||
443 | size /= ds_cfg.sizeof_rec[qual]; | 572 | trace->n = size / ds_cfg.sizeof_rec[qual]; |
444 | size *= ds_cfg.sizeof_rec[qual]; | 573 | trace->size = ds_cfg.sizeof_rec[qual]; |
445 | |||
446 | ds_set(context->ds, qual, ds_buffer_base, buffer); | ||
447 | ds_set(context->ds, qual, ds_index, buffer); | ||
448 | ds_set(context->ds, qual, ds_absolute_maximum, buffer + size); | ||
449 | |||
450 | if (ovfl) { | ||
451 | /* todo: select a suitable interrupt threshold */ | ||
452 | } else | ||
453 | ds_set(context->ds, qual, | ||
454 | ds_interrupt_threshold, buffer + size + 1); | ||
455 | |||
456 | /* we keep the context until ds_release */ | ||
457 | return error; | ||
458 | |||
459 | out_release: | ||
460 | context->owner[qual] = NULL; | ||
461 | ds_put_context(context); | ||
462 | put_tracer(task); | ||
463 | return error; | ||
464 | 574 | ||
465 | out_put_tracer: | 575 | size = (trace->n * trace->size); |
466 | spin_unlock_irqrestore(&ds_lock, irq); | ||
467 | ds_put_context(context); | ||
468 | put_tracer(task); | ||
469 | return error; | ||
470 | 576 | ||
471 | out_unlock: | 577 | trace->begin = (void *)buffer; |
472 | spin_unlock_irqrestore(&ds_lock, irq); | 578 | trace->top = trace->begin; |
473 | ds_put_context(context); | 579 | trace->end = (void *)(buffer + size); |
474 | return error; | 580 | /* The value for 'no threshold' is -1, which will set the |
475 | } | 581 | * threshold outside of the buffer, just like we want it. |
582 | */ | ||
583 | trace->ith = (void *)(buffer + size - ith); | ||
476 | 584 | ||
477 | int ds_request_bts(struct task_struct *task, void *base, size_t size, | 585 | trace->flags = flags; |
478 | ds_ovfl_callback_t ovfl) | ||
479 | { | ||
480 | return ds_request(task, base, size, ovfl, ds_bts); | ||
481 | } | 586 | } |
482 | 587 | ||
483 | int ds_request_pebs(struct task_struct *task, void *base, size_t size, | ||
484 | ds_ovfl_callback_t ovfl) | ||
485 | { | ||
486 | return ds_request(task, base, size, ovfl, ds_pebs); | ||
487 | } | ||
488 | 588 | ||
489 | static int ds_release(struct task_struct *task, enum ds_qualifier qual) | 589 | static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, |
590 | enum ds_qualifier qual, struct task_struct *task, | ||
591 | void *base, size_t size, size_t th, unsigned int flags) | ||
490 | { | 592 | { |
491 | struct ds_context *context; | 593 | struct ds_context *context; |
492 | int error; | 594 | int error; |
493 | 595 | ||
494 | context = ds_get_context(task); | 596 | error = -EINVAL; |
495 | error = ds_validate_access(context, qual); | 597 | if (!base) |
496 | if (error < 0) | ||
497 | goto out; | 598 | goto out; |
498 | 599 | ||
499 | kfree(context->buffer[qual]); | 600 | /* we require some space to do alignment adjustments below */ |
500 | context->buffer[qual] = NULL; | 601 | error = -EINVAL; |
501 | 602 | if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual])) | |
502 | current->mm->total_vm -= context->pages[qual]; | 603 | goto out; |
503 | current->mm->locked_vm -= context->pages[qual]; | ||
504 | context->pages[qual] = 0; | ||
505 | context->owner[qual] = NULL; | ||
506 | |||
507 | /* | ||
508 | * we put the context twice: | ||
509 | * once for the ds_get_context | ||
510 | * once for the corresponding ds_request | ||
511 | */ | ||
512 | ds_put_context(context); | ||
513 | out: | ||
514 | ds_put_context(context); | ||
515 | return error; | ||
516 | } | ||
517 | 604 | ||
518 | int ds_release_bts(struct task_struct *task) | 605 | if (th != (size_t)-1) { |
519 | { | 606 | th *= ds_cfg.sizeof_rec[qual]; |
520 | return ds_release(task, ds_bts); | ||
521 | } | ||
522 | 607 | ||
523 | int ds_release_pebs(struct task_struct *task) | 608 | error = -EINVAL; |
524 | { | 609 | if (size <= th) |
525 | return ds_release(task, ds_pebs); | 610 | goto out; |
526 | } | 611 | } |
527 | 612 | ||
528 | static int ds_get_index(struct task_struct *task, size_t *pos, | 613 | tracer->buffer = base; |
529 | enum ds_qualifier qual) | 614 | tracer->size = size; |
530 | { | ||
531 | struct ds_context *context; | ||
532 | unsigned long base, index; | ||
533 | int error; | ||
534 | 615 | ||
616 | error = -ENOMEM; | ||
535 | context = ds_get_context(task); | 617 | context = ds_get_context(task); |
536 | error = ds_validate_access(context, qual); | 618 | if (!context) |
537 | if (error < 0) | ||
538 | goto out; | 619 | goto out; |
620 | tracer->context = context; | ||
539 | 621 | ||
540 | base = ds_get(context->ds, qual, ds_buffer_base); | 622 | ds_init_ds_trace(trace, qual, base, size, th, flags); |
541 | index = ds_get(context->ds, qual, ds_index); | ||
542 | 623 | ||
543 | error = ((index - base) / ds_cfg.sizeof_rec[qual]); | 624 | error = 0; |
544 | if (pos) | ||
545 | *pos = error; | ||
546 | out: | 625 | out: |
547 | ds_put_context(context); | ||
548 | return error; | 626 | return error; |
549 | } | 627 | } |
550 | 628 | ||
551 | int ds_get_bts_index(struct task_struct *task, size_t *pos) | 629 | struct bts_tracer *ds_request_bts(struct task_struct *task, |
552 | { | 630 | void *base, size_t size, |
553 | return ds_get_index(task, pos, ds_bts); | 631 | bts_ovfl_callback_t ovfl, size_t th, |
554 | } | 632 | unsigned int flags) |
555 | |||
556 | int ds_get_pebs_index(struct task_struct *task, size_t *pos) | ||
557 | { | 633 | { |
558 | return ds_get_index(task, pos, ds_pebs); | 634 | struct bts_tracer *tracer; |
559 | } | 635 | unsigned long irq; |
560 | |||
561 | static int ds_get_end(struct task_struct *task, size_t *pos, | ||
562 | enum ds_qualifier qual) | ||
563 | { | ||
564 | struct ds_context *context; | ||
565 | unsigned long base, end; | ||
566 | int error; | 636 | int error; |
567 | 637 | ||
568 | context = ds_get_context(task); | 638 | error = -EOPNOTSUPP; |
569 | error = ds_validate_access(context, qual); | 639 | if (!ds_cfg.ctl[dsf_bts]) |
570 | if (error < 0) | ||
571 | goto out; | 640 | goto out; |
572 | 641 | ||
573 | base = ds_get(context->ds, qual, ds_buffer_base); | 642 | /* buffer overflow notification is not yet implemented */ |
574 | end = ds_get(context->ds, qual, ds_absolute_maximum); | 643 | error = -EOPNOTSUPP; |
644 | if (ovfl) | ||
645 | goto out; | ||
575 | 646 | ||
576 | error = ((end - base) / ds_cfg.sizeof_rec[qual]); | 647 | error = -ENOMEM; |
577 | if (pos) | 648 | tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); |
578 | *pos = error; | 649 | if (!tracer) |
579 | out: | 650 | goto out; |
580 | ds_put_context(context); | 651 | tracer->ovfl = ovfl; |
581 | return error; | ||
582 | } | ||
583 | 652 | ||
584 | int ds_get_bts_end(struct task_struct *task, size_t *pos) | 653 | error = ds_request(&tracer->ds, &tracer->trace.ds, |
585 | { | 654 | ds_bts, task, base, size, th, flags); |
586 | return ds_get_end(task, pos, ds_bts); | 655 | if (error < 0) |
587 | } | 656 | goto out_tracer; |
588 | 657 | ||
589 | int ds_get_pebs_end(struct task_struct *task, size_t *pos) | ||
590 | { | ||
591 | return ds_get_end(task, pos, ds_pebs); | ||
592 | } | ||
593 | 658 | ||
594 | static int ds_access(struct task_struct *task, size_t index, | 659 | spin_lock_irqsave(&ds_lock, irq); |
595 | const void **record, enum ds_qualifier qual) | ||
596 | { | ||
597 | struct ds_context *context; | ||
598 | unsigned long base, idx; | ||
599 | int error; | ||
600 | 660 | ||
601 | if (!record) | 661 | error = -EPERM; |
602 | return -EINVAL; | 662 | if (!check_tracer(task)) |
663 | goto out_unlock; | ||
664 | get_tracer(task); | ||
603 | 665 | ||
604 | context = ds_get_context(task); | 666 | error = -EPERM; |
605 | error = ds_validate_access(context, qual); | 667 | if (tracer->ds.context->bts_master) |
606 | if (error < 0) | 668 | goto out_put_tracer; |
607 | goto out; | 669 | tracer->ds.context->bts_master = tracer; |
608 | 670 | ||
609 | base = ds_get(context->ds, qual, ds_buffer_base); | 671 | spin_unlock_irqrestore(&ds_lock, irq); |
610 | idx = base + (index * ds_cfg.sizeof_rec[qual]); | ||
611 | 672 | ||
612 | error = -EINVAL; | ||
613 | if (idx > ds_get(context->ds, qual, ds_absolute_maximum)) | ||
614 | goto out; | ||
615 | 673 | ||
616 | *record = (const void *)idx; | 674 | tracer->trace.read = bts_read; |
617 | error = ds_cfg.sizeof_rec[qual]; | 675 | tracer->trace.write = bts_write; |
618 | out: | ||
619 | ds_put_context(context); | ||
620 | return error; | ||
621 | } | ||
622 | 676 | ||
623 | int ds_access_bts(struct task_struct *task, size_t index, const void **record) | 677 | ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); |
624 | { | 678 | ds_resume_bts(tracer); |
625 | return ds_access(task, index, record, ds_bts); | ||
626 | } | ||
627 | 679 | ||
628 | int ds_access_pebs(struct task_struct *task, size_t index, const void **record) | 680 | return tracer; |
629 | { | 681 | |
630 | return ds_access(task, index, record, ds_pebs); | 682 | out_put_tracer: |
683 | put_tracer(task); | ||
684 | out_unlock: | ||
685 | spin_unlock_irqrestore(&ds_lock, irq); | ||
686 | ds_put_context(tracer->ds.context); | ||
687 | out_tracer: | ||
688 | kfree(tracer); | ||
689 | out: | ||
690 | return ERR_PTR(error); | ||
631 | } | 691 | } |
632 | 692 | ||
633 | static int ds_write(struct task_struct *task, const void *record, size_t size, | 693 | struct pebs_tracer *ds_request_pebs(struct task_struct *task, |
634 | enum ds_qualifier qual, int force) | 694 | void *base, size_t size, |
695 | pebs_ovfl_callback_t ovfl, size_t th, | ||
696 | unsigned int flags) | ||
635 | { | 697 | { |
636 | struct ds_context *context; | 698 | struct pebs_tracer *tracer; |
699 | unsigned long irq; | ||
637 | int error; | 700 | int error; |
638 | 701 | ||
639 | if (!record) | 702 | /* buffer overflow notification is not yet implemented */ |
640 | return -EINVAL; | 703 | error = -EOPNOTSUPP; |
704 | if (ovfl) | ||
705 | goto out; | ||
641 | 706 | ||
642 | error = -EPERM; | 707 | error = -ENOMEM; |
643 | context = ds_get_context(task); | 708 | tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); |
644 | if (!context) | 709 | if (!tracer) |
645 | goto out; | 710 | goto out; |
711 | tracer->ovfl = ovfl; | ||
646 | 712 | ||
647 | if (!force) { | 713 | error = ds_request(&tracer->ds, &tracer->trace.ds, |
648 | error = ds_validate_access(context, qual); | 714 | ds_pebs, task, base, size, th, flags); |
649 | if (error < 0) | 715 | if (error < 0) |
650 | goto out; | 716 | goto out_tracer; |
651 | } | ||
652 | 717 | ||
653 | error = 0; | 718 | spin_lock_irqsave(&ds_lock, irq); |
654 | while (size) { | ||
655 | unsigned long base, index, end, write_end, int_th; | ||
656 | unsigned long write_size, adj_write_size; | ||
657 | 719 | ||
658 | /* | 720 | error = -EPERM; |
659 | * write as much as possible without producing an | 721 | if (!check_tracer(task)) |
660 | * overflow interrupt. | 722 | goto out_unlock; |
661 | * | 723 | get_tracer(task); |
662 | * interrupt_threshold must either be | ||
663 | * - bigger than absolute_maximum or | ||
664 | * - point to a record between buffer_base and absolute_maximum | ||
665 | * | ||
666 | * index points to a valid record. | ||
667 | */ | ||
668 | base = ds_get(context->ds, qual, ds_buffer_base); | ||
669 | index = ds_get(context->ds, qual, ds_index); | ||
670 | end = ds_get(context->ds, qual, ds_absolute_maximum); | ||
671 | int_th = ds_get(context->ds, qual, ds_interrupt_threshold); | ||
672 | 724 | ||
673 | write_end = min(end, int_th); | 725 | error = -EPERM; |
726 | if (tracer->ds.context->pebs_master) | ||
727 | goto out_put_tracer; | ||
728 | tracer->ds.context->pebs_master = tracer; | ||
674 | 729 | ||
675 | /* if we are already beyond the interrupt threshold, | 730 | spin_unlock_irqrestore(&ds_lock, irq); |
676 | * we fill the entire buffer */ | ||
677 | if (write_end <= index) | ||
678 | write_end = end; | ||
679 | 731 | ||
680 | if (write_end <= index) | 732 | ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); |
681 | goto out; | 733 | ds_resume_pebs(tracer); |
682 | 734 | ||
683 | write_size = min((unsigned long) size, write_end - index); | 735 | return tracer; |
684 | memcpy((void *)index, record, write_size); | ||
685 | 736 | ||
686 | record = (const char *)record + write_size; | 737 | out_put_tracer: |
687 | size -= write_size; | 738 | put_tracer(task); |
688 | error += write_size; | 739 | out_unlock: |
740 | spin_unlock_irqrestore(&ds_lock, irq); | ||
741 | ds_put_context(tracer->ds.context); | ||
742 | out_tracer: | ||
743 | kfree(tracer); | ||
744 | out: | ||
745 | return ERR_PTR(error); | ||
746 | } | ||
689 | 747 | ||
690 | adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; | 748 | void ds_release_bts(struct bts_tracer *tracer) |
691 | adj_write_size *= ds_cfg.sizeof_rec[qual]; | 749 | { |
750 | if (!tracer) | ||
751 | return; | ||
692 | 752 | ||
693 | /* zero out trailing bytes */ | 753 | ds_suspend_bts(tracer); |
694 | memset((char *)index + write_size, 0, | ||
695 | adj_write_size - write_size); | ||
696 | index += adj_write_size; | ||
697 | 754 | ||
698 | if (index >= end) | 755 | WARN_ON_ONCE(tracer->ds.context->bts_master != tracer); |
699 | index = base; | 756 | tracer->ds.context->bts_master = NULL; |
700 | ds_set(context->ds, qual, ds_index, index); | ||
701 | 757 | ||
702 | if (index >= int_th) | 758 | put_tracer(tracer->ds.context->task); |
703 | ds_overflow(task, context, qual); | 759 | ds_put_context(tracer->ds.context); |
704 | } | ||
705 | 760 | ||
706 | out: | 761 | kfree(tracer); |
707 | ds_put_context(context); | ||
708 | return error; | ||
709 | } | 762 | } |
710 | 763 | ||
711 | int ds_write_bts(struct task_struct *task, const void *record, size_t size) | 764 | void ds_suspend_bts(struct bts_tracer *tracer) |
712 | { | 765 | { |
713 | return ds_write(task, record, size, ds_bts, /* force = */ 0); | 766 | struct task_struct *task; |
714 | } | ||
715 | 767 | ||
716 | int ds_write_pebs(struct task_struct *task, const void *record, size_t size) | 768 | if (!tracer) |
717 | { | 769 | return; |
718 | return ds_write(task, record, size, ds_pebs, /* force = */ 0); | ||
719 | } | ||
720 | 770 | ||
721 | int ds_unchecked_write_bts(struct task_struct *task, | 771 | task = tracer->ds.context->task; |
722 | const void *record, size_t size) | ||
723 | { | ||
724 | return ds_write(task, record, size, ds_bts, /* force = */ 1); | ||
725 | } | ||
726 | 772 | ||
727 | int ds_unchecked_write_pebs(struct task_struct *task, | 773 | if (!task || (task == current)) |
728 | const void *record, size_t size) | 774 | update_debugctlmsr(get_debugctlmsr() & ~BTS_CONTROL); |
729 | { | 775 | |
730 | return ds_write(task, record, size, ds_pebs, /* force = */ 1); | 776 | if (task) { |
777 | task->thread.debugctlmsr &= ~BTS_CONTROL; | ||
778 | |||
779 | if (!task->thread.debugctlmsr) | ||
780 | clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR); | ||
781 | } | ||
731 | } | 782 | } |
732 | 783 | ||
733 | static int ds_reset_or_clear(struct task_struct *task, | 784 | void ds_resume_bts(struct bts_tracer *tracer) |
734 | enum ds_qualifier qual, int clear) | ||
735 | { | 785 | { |
736 | struct ds_context *context; | 786 | struct task_struct *task; |
737 | unsigned long base, end; | 787 | unsigned long control; |
738 | int error; | ||
739 | 788 | ||
740 | context = ds_get_context(task); | 789 | if (!tracer) |
741 | error = ds_validate_access(context, qual); | 790 | return; |
742 | if (error < 0) | ||
743 | goto out; | ||
744 | 791 | ||
745 | base = ds_get(context->ds, qual, ds_buffer_base); | 792 | task = tracer->ds.context->task; |
746 | end = ds_get(context->ds, qual, ds_absolute_maximum); | ||
747 | 793 | ||
748 | if (clear) | 794 | control = ds_cfg.ctl[dsf_bts]; |
749 | memset((void *)base, 0, end - base); | 795 | if (!(tracer->trace.ds.flags & BTS_KERNEL)) |
796 | control |= ds_cfg.ctl[dsf_bts_kernel]; | ||
797 | if (!(tracer->trace.ds.flags & BTS_USER)) | ||
798 | control |= ds_cfg.ctl[dsf_bts_user]; | ||
750 | 799 | ||
751 | ds_set(context->ds, qual, ds_index, base); | 800 | if (task) { |
801 | task->thread.debugctlmsr |= control; | ||
802 | set_tsk_thread_flag(task, TIF_DEBUGCTLMSR); | ||
803 | } | ||
752 | 804 | ||
753 | error = 0; | 805 | if (!task || (task == current)) |
754 | out: | 806 | update_debugctlmsr(get_debugctlmsr() | control); |
755 | ds_put_context(context); | ||
756 | return error; | ||
757 | } | 807 | } |
758 | 808 | ||
759 | int ds_reset_bts(struct task_struct *task) | 809 | void ds_release_pebs(struct pebs_tracer *tracer) |
760 | { | 810 | { |
761 | return ds_reset_or_clear(task, ds_bts, /* clear = */ 0); | 811 | if (!tracer) |
812 | return; | ||
813 | |||
814 | ds_suspend_pebs(tracer); | ||
815 | |||
816 | WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer); | ||
817 | tracer->ds.context->pebs_master = NULL; | ||
818 | |||
819 | put_tracer(tracer->ds.context->task); | ||
820 | ds_put_context(tracer->ds.context); | ||
821 | |||
822 | kfree(tracer); | ||
762 | } | 823 | } |
763 | 824 | ||
764 | int ds_reset_pebs(struct task_struct *task) | 825 | void ds_suspend_pebs(struct pebs_tracer *tracer) |
765 | { | 826 | { |
766 | return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0); | 827 | |
767 | } | 828 | } |
768 | 829 | ||
769 | int ds_clear_bts(struct task_struct *task) | 830 | void ds_resume_pebs(struct pebs_tracer *tracer) |
770 | { | 831 | { |
771 | return ds_reset_or_clear(task, ds_bts, /* clear = */ 1); | 832 | |
772 | } | 833 | } |
773 | 834 | ||
774 | int ds_clear_pebs(struct task_struct *task) | 835 | const struct bts_trace *ds_read_bts(struct bts_tracer *tracer) |
775 | { | 836 | { |
776 | return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1); | 837 | if (!tracer) |
838 | return NULL; | ||
839 | |||
840 | ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_bts); | ||
841 | return &tracer->trace; | ||
777 | } | 842 | } |
778 | 843 | ||
779 | int ds_get_pebs_reset(struct task_struct *task, u64 *value) | 844 | const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer) |
780 | { | 845 | { |
781 | struct ds_context *context; | 846 | if (!tracer) |
782 | int error; | 847 | return NULL; |
848 | |||
849 | ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); | ||
850 | tracer->trace.reset_value = | ||
851 | *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)); | ||
783 | 852 | ||
784 | if (!value) | 853 | return &tracer->trace; |
854 | } | ||
855 | |||
856 | int ds_reset_bts(struct bts_tracer *tracer) | ||
857 | { | ||
858 | if (!tracer) | ||
785 | return -EINVAL; | 859 | return -EINVAL; |
786 | 860 | ||
787 | context = ds_get_context(task); | 861 | tracer->trace.ds.top = tracer->trace.ds.begin; |
788 | error = ds_validate_access(context, ds_pebs); | ||
789 | if (error < 0) | ||
790 | goto out; | ||
791 | 862 | ||
792 | *value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)); | 863 | ds_set(tracer->ds.context->ds, ds_bts, ds_index, |
864 | (unsigned long)tracer->trace.ds.top); | ||
793 | 865 | ||
794 | error = 0; | 866 | return 0; |
795 | out: | ||
796 | ds_put_context(context); | ||
797 | return error; | ||
798 | } | 867 | } |
799 | 868 | ||
800 | int ds_set_pebs_reset(struct task_struct *task, u64 value) | 869 | int ds_reset_pebs(struct pebs_tracer *tracer) |
801 | { | 870 | { |
802 | struct ds_context *context; | 871 | if (!tracer) |
803 | int error; | 872 | return -EINVAL; |
804 | 873 | ||
805 | context = ds_get_context(task); | 874 | tracer->trace.ds.top = tracer->trace.ds.begin; |
806 | error = ds_validate_access(context, ds_pebs); | ||
807 | if (error < 0) | ||
808 | goto out; | ||
809 | 875 | ||
810 | *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value; | 876 | ds_set(tracer->ds.context->ds, ds_bts, ds_index, |
877 | (unsigned long)tracer->trace.ds.top); | ||
811 | 878 | ||
812 | error = 0; | 879 | return 0; |
813 | out: | 880 | } |
814 | ds_put_context(context); | 881 | |
815 | return error; | 882 | int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value) |
883 | { | ||
884 | if (!tracer) | ||
885 | return -EINVAL; | ||
886 | |||
887 | *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value; | ||
888 | |||
889 | return 0; | ||
816 | } | 890 | } |
817 | 891 | ||
818 | static const struct ds_configuration ds_cfg_var = { | 892 | static const struct ds_configuration ds_cfg_netburst = { |
819 | .sizeof_ds = sizeof(long) * 12, | 893 | .name = "netburst", |
820 | .sizeof_field = sizeof(long), | 894 | .ctl[dsf_bts] = (1 << 2) | (1 << 3), |
821 | .sizeof_rec[ds_bts] = sizeof(long) * 3, | 895 | .ctl[dsf_bts_kernel] = (1 << 5), |
896 | .ctl[dsf_bts_user] = (1 << 6), | ||
897 | |||
898 | .sizeof_field = sizeof(long), | ||
899 | .sizeof_rec[ds_bts] = sizeof(long) * 3, | ||
822 | #ifdef __i386__ | 900 | #ifdef __i386__ |
823 | .sizeof_rec[ds_pebs] = sizeof(long) * 10 | 901 | .sizeof_rec[ds_pebs] = sizeof(long) * 10, |
824 | #else | 902 | #else |
825 | .sizeof_rec[ds_pebs] = sizeof(long) * 18 | 903 | .sizeof_rec[ds_pebs] = sizeof(long) * 18, |
826 | #endif | 904 | #endif |
827 | }; | 905 | }; |
828 | static const struct ds_configuration ds_cfg_64 = { | 906 | static const struct ds_configuration ds_cfg_pentium_m = { |
829 | .sizeof_ds = 8 * 12, | 907 | .name = "pentium m", |
830 | .sizeof_field = 8, | 908 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), |
831 | .sizeof_rec[ds_bts] = 8 * 3, | 909 | |
910 | .sizeof_field = sizeof(long), | ||
911 | .sizeof_rec[ds_bts] = sizeof(long) * 3, | ||
832 | #ifdef __i386__ | 912 | #ifdef __i386__ |
833 | .sizeof_rec[ds_pebs] = 8 * 10 | 913 | .sizeof_rec[ds_pebs] = sizeof(long) * 10, |
834 | #else | 914 | #else |
835 | .sizeof_rec[ds_pebs] = 8 * 18 | 915 | .sizeof_rec[ds_pebs] = sizeof(long) * 18, |
836 | #endif | 916 | #endif |
837 | }; | 917 | }; |
918 | static const struct ds_configuration ds_cfg_core2 = { | ||
919 | .name = "core 2", | ||
920 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), | ||
921 | .ctl[dsf_bts_kernel] = (1 << 9), | ||
922 | .ctl[dsf_bts_user] = (1 << 10), | ||
923 | |||
924 | .sizeof_field = 8, | ||
925 | .sizeof_rec[ds_bts] = 8 * 3, | ||
926 | .sizeof_rec[ds_pebs] = 8 * 18, | ||
927 | }; | ||
838 | 928 | ||
839 | static inline void | 929 | static void |
840 | ds_configure(const struct ds_configuration *cfg) | 930 | ds_configure(const struct ds_configuration *cfg) |
841 | { | 931 | { |
932 | memset(&ds_cfg, 0, sizeof(ds_cfg)); | ||
842 | ds_cfg = *cfg; | 933 | ds_cfg = *cfg; |
934 | |||
935 | printk(KERN_INFO "[ds] using %s configuration\n", ds_cfg.name); | ||
936 | |||
937 | if (!cpu_has_bts) { | ||
938 | ds_cfg.ctl[dsf_bts] = 0; | ||
939 | printk(KERN_INFO "[ds] bts not available\n"); | ||
940 | } | ||
941 | if (!cpu_has_pebs) | ||
942 | printk(KERN_INFO "[ds] pebs not available\n"); | ||
943 | |||
944 | WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_field)); | ||
843 | } | 945 | } |
844 | 946 | ||
845 | void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | 947 | void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) |
@@ -852,10 +954,10 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | |||
852 | break; | 954 | break; |
853 | case 0xD: | 955 | case 0xD: |
854 | case 0xE: /* Pentium M */ | 956 | case 0xE: /* Pentium M */ |
855 | ds_configure(&ds_cfg_var); | 957 | ds_configure(&ds_cfg_pentium_m); |
856 | break; | 958 | break; |
857 | default: /* Core2, Atom, ... */ | 959 | default: /* Core2, Atom, ... */ |
858 | ds_configure(&ds_cfg_64); | 960 | ds_configure(&ds_cfg_core2); |
859 | break; | 961 | break; |
860 | } | 962 | } |
861 | break; | 963 | break; |
@@ -864,7 +966,7 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | |||
864 | case 0x0: | 966 | case 0x0: |
865 | case 0x1: | 967 | case 0x1: |
866 | case 0x2: /* Netburst */ | 968 | case 0x2: /* Netburst */ |
867 | ds_configure(&ds_cfg_var); | 969 | ds_configure(&ds_cfg_netburst); |
868 | break; | 970 | break; |
869 | default: | 971 | default: |
870 | /* sorry, don't know about them */ | 972 | /* sorry, don't know about them */ |
@@ -877,12 +979,41 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | |||
877 | } | 979 | } |
878 | } | 980 | } |
879 | 981 | ||
880 | void ds_free(struct ds_context *context) | 982 | /* |
881 | { | 983 | * Change the DS configuration from tracing prev to tracing next. |
882 | /* This is called when the task owning the parameter context | 984 | */ |
883 | * is dying. There should not be any user of that context left | 985 | void ds_switch_to(struct task_struct *prev, struct task_struct *next) |
884 | * to disturb us, anymore. */ | 986 | { |
885 | unsigned long leftovers = context->count; | 987 | struct ds_context *prev_ctx = prev->thread.ds_ctx; |
886 | while (leftovers--) | 988 | struct ds_context *next_ctx = next->thread.ds_ctx; |
887 | ds_put_context(context); | 989 | |
990 | if (prev_ctx) { | ||
991 | update_debugctlmsr(0); | ||
992 | |||
993 | if (prev_ctx->bts_master && | ||
994 | (prev_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) { | ||
995 | struct bts_struct ts = { | ||
996 | .qualifier = bts_task_departs, | ||
997 | .variant.timestamp.jiffies = jiffies_64, | ||
998 | .variant.timestamp.pid = prev->pid | ||
999 | }; | ||
1000 | bts_write(prev_ctx->bts_master, &ts); | ||
1001 | } | ||
1002 | } | ||
1003 | |||
1004 | if (next_ctx) { | ||
1005 | if (next_ctx->bts_master && | ||
1006 | (next_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) { | ||
1007 | struct bts_struct ts = { | ||
1008 | .qualifier = bts_task_arrives, | ||
1009 | .variant.timestamp.jiffies = jiffies_64, | ||
1010 | .variant.timestamp.pid = next->pid | ||
1011 | }; | ||
1012 | bts_write(next_ctx->bts_master, &ts); | ||
1013 | } | ||
1014 | |||
1015 | wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds); | ||
1016 | } | ||
1017 | |||
1018 | update_debugctlmsr(next->thread.debugctlmsr); | ||
888 | } | 1019 | } |
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c new file mode 100644 index 000000000000..6b1f6f6f8661 --- /dev/null +++ b/arch/x86/kernel/dumpstack.c | |||
@@ -0,0 +1,351 @@ | |||
1 | /* | ||
2 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
3 | * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs | ||
4 | */ | ||
5 | #include <linux/kallsyms.h> | ||
6 | #include <linux/kprobes.h> | ||
7 | #include <linux/uaccess.h> | ||
8 | #include <linux/utsname.h> | ||
9 | #include <linux/hardirq.h> | ||
10 | #include <linux/kdebug.h> | ||
11 | #include <linux/module.h> | ||
12 | #include <linux/ptrace.h> | ||
13 | #include <linux/kexec.h> | ||
14 | #include <linux/bug.h> | ||
15 | #include <linux/nmi.h> | ||
16 | #include <linux/sysfs.h> | ||
17 | |||
18 | #include <asm/stacktrace.h> | ||
19 | |||
20 | #include "dumpstack.h" | ||
21 | |||
22 | int panic_on_unrecovered_nmi; | ||
23 | unsigned int code_bytes = 64; | ||
24 | int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; | ||
25 | static int die_counter; | ||
26 | |||
27 | void printk_address(unsigned long address, int reliable) | ||
28 | { | ||
29 | printk(" [<%p>] %s%pS\n", (void *) address, | ||
30 | reliable ? "" : "? ", (void *) address); | ||
31 | } | ||
32 | |||
33 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
34 | static void | ||
35 | print_ftrace_graph_addr(unsigned long addr, void *data, | ||
36 | const struct stacktrace_ops *ops, | ||
37 | struct thread_info *tinfo, int *graph) | ||
38 | { | ||
39 | struct task_struct *task = tinfo->task; | ||
40 | unsigned long ret_addr; | ||
41 | int index = task->curr_ret_stack; | ||
42 | |||
43 | if (addr != (unsigned long)return_to_handler) | ||
44 | return; | ||
45 | |||
46 | if (!task->ret_stack || index < *graph) | ||
47 | return; | ||
48 | |||
49 | index -= *graph; | ||
50 | ret_addr = task->ret_stack[index].ret; | ||
51 | |||
52 | ops->address(data, ret_addr, 1); | ||
53 | |||
54 | (*graph)++; | ||
55 | } | ||
56 | #else | ||
57 | static inline void | ||
58 | print_ftrace_graph_addr(unsigned long addr, void *data, | ||
59 | const struct stacktrace_ops *ops, | ||
60 | struct thread_info *tinfo, int *graph) | ||
61 | { } | ||
62 | #endif | ||
63 | |||
64 | /* | ||
65 | * x86-64 can have up to three kernel stacks: | ||
66 | * process stack | ||
67 | * interrupt stack | ||
68 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack | ||
69 | */ | ||
70 | |||
71 | static inline int valid_stack_ptr(struct thread_info *tinfo, | ||
72 | void *p, unsigned int size, void *end) | ||
73 | { | ||
74 | void *t = tinfo; | ||
75 | if (end) { | ||
76 | if (p < end && p >= (end-THREAD_SIZE)) | ||
77 | return 1; | ||
78 | else | ||
79 | return 0; | ||
80 | } | ||
81 | return p > t && p < t + THREAD_SIZE - size; | ||
82 | } | ||
83 | |||
84 | unsigned long | ||
85 | print_context_stack(struct thread_info *tinfo, | ||
86 | unsigned long *stack, unsigned long bp, | ||
87 | const struct stacktrace_ops *ops, void *data, | ||
88 | unsigned long *end, int *graph) | ||
89 | { | ||
90 | struct stack_frame *frame = (struct stack_frame *)bp; | ||
91 | |||
92 | while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { | ||
93 | unsigned long addr; | ||
94 | |||
95 | addr = *stack; | ||
96 | if (__kernel_text_address(addr)) { | ||
97 | if ((unsigned long) stack == bp + sizeof(long)) { | ||
98 | ops->address(data, addr, 1); | ||
99 | frame = frame->next_frame; | ||
100 | bp = (unsigned long) frame; | ||
101 | } else { | ||
102 | ops->address(data, addr, bp == 0); | ||
103 | } | ||
104 | print_ftrace_graph_addr(addr, data, ops, tinfo, graph); | ||
105 | } | ||
106 | stack++; | ||
107 | } | ||
108 | return bp; | ||
109 | } | ||
110 | |||
111 | |||
112 | static void | ||
113 | print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) | ||
114 | { | ||
115 | printk(data); | ||
116 | print_symbol(msg, symbol); | ||
117 | printk("\n"); | ||
118 | } | ||
119 | |||
120 | static void print_trace_warning(void *data, char *msg) | ||
121 | { | ||
122 | printk("%s%s\n", (char *)data, msg); | ||
123 | } | ||
124 | |||
125 | static int print_trace_stack(void *data, char *name) | ||
126 | { | ||
127 | printk("%s <%s> ", (char *)data, name); | ||
128 | return 0; | ||
129 | } | ||
130 | |||
131 | /* | ||
132 | * Print one address/symbol entries per line. | ||
133 | */ | ||
134 | static void print_trace_address(void *data, unsigned long addr, int reliable) | ||
135 | { | ||
136 | touch_nmi_watchdog(); | ||
137 | printk(data); | ||
138 | printk_address(addr, reliable); | ||
139 | } | ||
140 | |||
141 | static const struct stacktrace_ops print_trace_ops = { | ||
142 | .warning = print_trace_warning, | ||
143 | .warning_symbol = print_trace_warning_symbol, | ||
144 | .stack = print_trace_stack, | ||
145 | .address = print_trace_address, | ||
146 | }; | ||
147 | |||
148 | void | ||
149 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
150 | unsigned long *stack, unsigned long bp, char *log_lvl) | ||
151 | { | ||
152 | printk("%sCall Trace:\n", log_lvl); | ||
153 | dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); | ||
154 | } | ||
155 | |||
156 | void show_trace(struct task_struct *task, struct pt_regs *regs, | ||
157 | unsigned long *stack, unsigned long bp) | ||
158 | { | ||
159 | show_trace_log_lvl(task, regs, stack, bp, ""); | ||
160 | } | ||
161 | |||
162 | void show_stack(struct task_struct *task, unsigned long *sp) | ||
163 | { | ||
164 | show_stack_log_lvl(task, NULL, sp, 0, ""); | ||
165 | } | ||
166 | |||
167 | /* | ||
168 | * The architecture-independent dump_stack generator | ||
169 | */ | ||
170 | void dump_stack(void) | ||
171 | { | ||
172 | unsigned long bp = 0; | ||
173 | unsigned long stack; | ||
174 | |||
175 | #ifdef CONFIG_FRAME_POINTER | ||
176 | if (!bp) | ||
177 | get_bp(bp); | ||
178 | #endif | ||
179 | |||
180 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", | ||
181 | current->pid, current->comm, print_tainted(), | ||
182 | init_utsname()->release, | ||
183 | (int)strcspn(init_utsname()->version, " "), | ||
184 | init_utsname()->version); | ||
185 | show_trace(NULL, NULL, &stack, bp); | ||
186 | } | ||
187 | EXPORT_SYMBOL(dump_stack); | ||
188 | |||
189 | static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; | ||
190 | static int die_owner = -1; | ||
191 | static unsigned int die_nest_count; | ||
192 | |||
193 | unsigned __kprobes long oops_begin(void) | ||
194 | { | ||
195 | int cpu; | ||
196 | unsigned long flags; | ||
197 | |||
198 | oops_enter(); | ||
199 | |||
200 | /* racy, but better than risking deadlock. */ | ||
201 | raw_local_irq_save(flags); | ||
202 | cpu = smp_processor_id(); | ||
203 | if (!__raw_spin_trylock(&die_lock)) { | ||
204 | if (cpu == die_owner) | ||
205 | /* nested oops. should stop eventually */; | ||
206 | else | ||
207 | __raw_spin_lock(&die_lock); | ||
208 | } | ||
209 | die_nest_count++; | ||
210 | die_owner = cpu; | ||
211 | console_verbose(); | ||
212 | bust_spinlocks(1); | ||
213 | return flags; | ||
214 | } | ||
215 | |||
216 | void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) | ||
217 | { | ||
218 | if (regs && kexec_should_crash(current)) | ||
219 | crash_kexec(regs); | ||
220 | |||
221 | bust_spinlocks(0); | ||
222 | die_owner = -1; | ||
223 | add_taint(TAINT_DIE); | ||
224 | die_nest_count--; | ||
225 | if (!die_nest_count) | ||
226 | /* Nest count reaches zero, release the lock. */ | ||
227 | __raw_spin_unlock(&die_lock); | ||
228 | raw_local_irq_restore(flags); | ||
229 | oops_exit(); | ||
230 | |||
231 | if (!signr) | ||
232 | return; | ||
233 | if (in_interrupt()) | ||
234 | panic("Fatal exception in interrupt"); | ||
235 | if (panic_on_oops) | ||
236 | panic("Fatal exception"); | ||
237 | do_exit(signr); | ||
238 | } | ||
239 | |||
240 | int __kprobes __die(const char *str, struct pt_regs *regs, long err) | ||
241 | { | ||
242 | #ifdef CONFIG_X86_32 | ||
243 | unsigned short ss; | ||
244 | unsigned long sp; | ||
245 | #endif | ||
246 | printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); | ||
247 | #ifdef CONFIG_PREEMPT | ||
248 | printk("PREEMPT "); | ||
249 | #endif | ||
250 | #ifdef CONFIG_SMP | ||
251 | printk("SMP "); | ||
252 | #endif | ||
253 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
254 | printk("DEBUG_PAGEALLOC"); | ||
255 | #endif | ||
256 | printk("\n"); | ||
257 | sysfs_printk_last_file(); | ||
258 | if (notify_die(DIE_OOPS, str, regs, err, | ||
259 | current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) | ||
260 | return 1; | ||
261 | |||
262 | show_registers(regs); | ||
263 | #ifdef CONFIG_X86_32 | ||
264 | sp = (unsigned long) (®s->sp); | ||
265 | savesegment(ss, ss); | ||
266 | if (user_mode(regs)) { | ||
267 | sp = regs->sp; | ||
268 | ss = regs->ss & 0xffff; | ||
269 | } | ||
270 | printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); | ||
271 | print_symbol("%s", regs->ip); | ||
272 | printk(" SS:ESP %04x:%08lx\n", ss, sp); | ||
273 | #else | ||
274 | /* Executive summary in case the oops scrolled away */ | ||
275 | printk(KERN_ALERT "RIP "); | ||
276 | printk_address(regs->ip, 1); | ||
277 | printk(" RSP <%016lx>\n", regs->sp); | ||
278 | #endif | ||
279 | return 0; | ||
280 | } | ||
281 | |||
282 | /* | ||
283 | * This is gone through when something in the kernel has done something bad | ||
284 | * and is about to be terminated: | ||
285 | */ | ||
286 | void die(const char *str, struct pt_regs *regs, long err) | ||
287 | { | ||
288 | unsigned long flags = oops_begin(); | ||
289 | int sig = SIGSEGV; | ||
290 | |||
291 | if (!user_mode_vm(regs)) | ||
292 | report_bug(regs->ip, regs); | ||
293 | |||
294 | if (__die(str, regs, err)) | ||
295 | sig = 0; | ||
296 | oops_end(flags, regs, sig); | ||
297 | } | ||
298 | |||
299 | void notrace __kprobes | ||
300 | die_nmi(char *str, struct pt_regs *regs, int do_panic) | ||
301 | { | ||
302 | unsigned long flags; | ||
303 | |||
304 | if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) | ||
305 | return; | ||
306 | |||
307 | /* | ||
308 | * We are in trouble anyway, lets at least try | ||
309 | * to get a message out. | ||
310 | */ | ||
311 | flags = oops_begin(); | ||
312 | printk(KERN_EMERG "%s", str); | ||
313 | printk(" on CPU%d, ip %08lx, registers:\n", | ||
314 | smp_processor_id(), regs->ip); | ||
315 | show_registers(regs); | ||
316 | oops_end(flags, regs, 0); | ||
317 | if (do_panic || panic_on_oops) | ||
318 | panic("Non maskable interrupt"); | ||
319 | nmi_exit(); | ||
320 | local_irq_enable(); | ||
321 | do_exit(SIGBUS); | ||
322 | } | ||
323 | |||
324 | static int __init oops_setup(char *s) | ||
325 | { | ||
326 | if (!s) | ||
327 | return -EINVAL; | ||
328 | if (!strcmp(s, "panic")) | ||
329 | panic_on_oops = 1; | ||
330 | return 0; | ||
331 | } | ||
332 | early_param("oops", oops_setup); | ||
333 | |||
334 | static int __init kstack_setup(char *s) | ||
335 | { | ||
336 | if (!s) | ||
337 | return -EINVAL; | ||
338 | kstack_depth_to_print = simple_strtoul(s, NULL, 0); | ||
339 | return 0; | ||
340 | } | ||
341 | early_param("kstack", kstack_setup); | ||
342 | |||
343 | static int __init code_bytes_setup(char *s) | ||
344 | { | ||
345 | code_bytes = simple_strtoul(s, NULL, 0); | ||
346 | if (code_bytes > 8192) | ||
347 | code_bytes = 8192; | ||
348 | |||
349 | return 1; | ||
350 | } | ||
351 | __setup("code_bytes=", code_bytes_setup); | ||
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h new file mode 100644 index 000000000000..da87590b8698 --- /dev/null +++ b/arch/x86/kernel/dumpstack.h | |||
@@ -0,0 +1,39 @@ | |||
1 | /* | ||
2 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
3 | * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs | ||
4 | */ | ||
5 | |||
6 | #ifndef DUMPSTACK_H | ||
7 | #define DUMPSTACK_H | ||
8 | |||
9 | #ifdef CONFIG_X86_32 | ||
10 | #define STACKSLOTS_PER_LINE 8 | ||
11 | #define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :) | ||
12 | #else | ||
13 | #define STACKSLOTS_PER_LINE 4 | ||
14 | #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) | ||
15 | #endif | ||
16 | |||
17 | extern unsigned long | ||
18 | print_context_stack(struct thread_info *tinfo, | ||
19 | unsigned long *stack, unsigned long bp, | ||
20 | const struct stacktrace_ops *ops, void *data, | ||
21 | unsigned long *end, int *graph); | ||
22 | |||
23 | extern void | ||
24 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
25 | unsigned long *stack, unsigned long bp, char *log_lvl); | ||
26 | |||
27 | extern void | ||
28 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
29 | unsigned long *sp, unsigned long bp, char *log_lvl); | ||
30 | |||
31 | extern unsigned int code_bytes; | ||
32 | extern int kstack_depth_to_print; | ||
33 | |||
34 | /* The form of the top of the frame on the stack */ | ||
35 | struct stack_frame { | ||
36 | struct stack_frame *next_frame; | ||
37 | unsigned long return_address; | ||
38 | }; | ||
39 | #endif | ||
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index b3614752197b..d593cd1f58dc 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c | |||
@@ -17,69 +17,14 @@ | |||
17 | 17 | ||
18 | #include <asm/stacktrace.h> | 18 | #include <asm/stacktrace.h> |
19 | 19 | ||
20 | #define STACKSLOTS_PER_LINE 8 | 20 | #include "dumpstack.h" |
21 | #define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :) | ||
22 | |||
23 | int panic_on_unrecovered_nmi; | ||
24 | int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; | ||
25 | static unsigned int code_bytes = 64; | ||
26 | static int die_counter; | ||
27 | |||
28 | void printk_address(unsigned long address, int reliable) | ||
29 | { | ||
30 | printk(" [<%p>] %s%pS\n", (void *) address, | ||
31 | reliable ? "" : "? ", (void *) address); | ||
32 | } | ||
33 | |||
34 | static inline int valid_stack_ptr(struct thread_info *tinfo, | ||
35 | void *p, unsigned int size, void *end) | ||
36 | { | ||
37 | void *t = tinfo; | ||
38 | if (end) { | ||
39 | if (p < end && p >= (end-THREAD_SIZE)) | ||
40 | return 1; | ||
41 | else | ||
42 | return 0; | ||
43 | } | ||
44 | return p > t && p < t + THREAD_SIZE - size; | ||
45 | } | ||
46 | |||
47 | /* The form of the top of the frame on the stack */ | ||
48 | struct stack_frame { | ||
49 | struct stack_frame *next_frame; | ||
50 | unsigned long return_address; | ||
51 | }; | ||
52 | |||
53 | static inline unsigned long | ||
54 | print_context_stack(struct thread_info *tinfo, | ||
55 | unsigned long *stack, unsigned long bp, | ||
56 | const struct stacktrace_ops *ops, void *data, | ||
57 | unsigned long *end) | ||
58 | { | ||
59 | struct stack_frame *frame = (struct stack_frame *)bp; | ||
60 | |||
61 | while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { | ||
62 | unsigned long addr; | ||
63 | |||
64 | addr = *stack; | ||
65 | if (__kernel_text_address(addr)) { | ||
66 | if ((unsigned long) stack == bp + sizeof(long)) { | ||
67 | ops->address(data, addr, 1); | ||
68 | frame = frame->next_frame; | ||
69 | bp = (unsigned long) frame; | ||
70 | } else { | ||
71 | ops->address(data, addr, bp == 0); | ||
72 | } | ||
73 | } | ||
74 | stack++; | ||
75 | } | ||
76 | return bp; | ||
77 | } | ||
78 | 21 | ||
79 | void dump_trace(struct task_struct *task, struct pt_regs *regs, | 22 | void dump_trace(struct task_struct *task, struct pt_regs *regs, |
80 | unsigned long *stack, unsigned long bp, | 23 | unsigned long *stack, unsigned long bp, |
81 | const struct stacktrace_ops *ops, void *data) | 24 | const struct stacktrace_ops *ops, void *data) |
82 | { | 25 | { |
26 | int graph = 0; | ||
27 | |||
83 | if (!task) | 28 | if (!task) |
84 | task = current; | 29 | task = current; |
85 | 30 | ||
@@ -107,7 +52,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
107 | 52 | ||
108 | context = (struct thread_info *) | 53 | context = (struct thread_info *) |
109 | ((unsigned long)stack & (~(THREAD_SIZE - 1))); | 54 | ((unsigned long)stack & (~(THREAD_SIZE - 1))); |
110 | bp = print_context_stack(context, stack, bp, ops, data, NULL); | 55 | bp = print_context_stack(context, stack, bp, ops, |
56 | data, NULL, &graph); | ||
111 | 57 | ||
112 | stack = (unsigned long *)context->previous_esp; | 58 | stack = (unsigned long *)context->previous_esp; |
113 | if (!stack) | 59 | if (!stack) |
@@ -119,57 +65,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
119 | } | 65 | } |
120 | EXPORT_SYMBOL(dump_trace); | 66 | EXPORT_SYMBOL(dump_trace); |
121 | 67 | ||
122 | static void | 68 | void |
123 | print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) | ||
124 | { | ||
125 | printk(data); | ||
126 | print_symbol(msg, symbol); | ||
127 | printk("\n"); | ||
128 | } | ||
129 | |||
130 | static void print_trace_warning(void *data, char *msg) | ||
131 | { | ||
132 | printk("%s%s\n", (char *)data, msg); | ||
133 | } | ||
134 | |||
135 | static int print_trace_stack(void *data, char *name) | ||
136 | { | ||
137 | printk("%s <%s> ", (char *)data, name); | ||
138 | return 0; | ||
139 | } | ||
140 | |||
141 | /* | ||
142 | * Print one address/symbol entries per line. | ||
143 | */ | ||
144 | static void print_trace_address(void *data, unsigned long addr, int reliable) | ||
145 | { | ||
146 | touch_nmi_watchdog(); | ||
147 | printk(data); | ||
148 | printk_address(addr, reliable); | ||
149 | } | ||
150 | |||
151 | static const struct stacktrace_ops print_trace_ops = { | ||
152 | .warning = print_trace_warning, | ||
153 | .warning_symbol = print_trace_warning_symbol, | ||
154 | .stack = print_trace_stack, | ||
155 | .address = print_trace_address, | ||
156 | }; | ||
157 | |||
158 | static void | ||
159 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
160 | unsigned long *stack, unsigned long bp, char *log_lvl) | ||
161 | { | ||
162 | printk("%sCall Trace:\n", log_lvl); | ||
163 | dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); | ||
164 | } | ||
165 | |||
166 | void show_trace(struct task_struct *task, struct pt_regs *regs, | ||
167 | unsigned long *stack, unsigned long bp) | ||
168 | { | ||
169 | show_trace_log_lvl(task, regs, stack, bp, ""); | ||
170 | } | ||
171 | |||
172 | static void | ||
173 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | 69 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
174 | unsigned long *sp, unsigned long bp, char *log_lvl) | 70 | unsigned long *sp, unsigned long bp, char *log_lvl) |
175 | { | 71 | { |
@@ -196,33 +92,6 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
196 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); | 92 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); |
197 | } | 93 | } |
198 | 94 | ||
199 | void show_stack(struct task_struct *task, unsigned long *sp) | ||
200 | { | ||
201 | show_stack_log_lvl(task, NULL, sp, 0, ""); | ||
202 | } | ||
203 | |||
204 | /* | ||
205 | * The architecture-independent dump_stack generator | ||
206 | */ | ||
207 | void dump_stack(void) | ||
208 | { | ||
209 | unsigned long bp = 0; | ||
210 | unsigned long stack; | ||
211 | |||
212 | #ifdef CONFIG_FRAME_POINTER | ||
213 | if (!bp) | ||
214 | get_bp(bp); | ||
215 | #endif | ||
216 | |||
217 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", | ||
218 | current->pid, current->comm, print_tainted(), | ||
219 | init_utsname()->release, | ||
220 | (int)strcspn(init_utsname()->version, " "), | ||
221 | init_utsname()->version); | ||
222 | show_trace(NULL, NULL, &stack, bp); | ||
223 | } | ||
224 | |||
225 | EXPORT_SYMBOL(dump_stack); | ||
226 | 95 | ||
227 | void show_registers(struct pt_regs *regs) | 96 | void show_registers(struct pt_regs *regs) |
228 | { | 97 | { |
@@ -283,167 +152,3 @@ int is_valid_bugaddr(unsigned long ip) | |||
283 | return ud2 == 0x0b0f; | 152 | return ud2 == 0x0b0f; |
284 | } | 153 | } |
285 | 154 | ||
286 | static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; | ||
287 | static int die_owner = -1; | ||
288 | static unsigned int die_nest_count; | ||
289 | |||
290 | unsigned __kprobes long oops_begin(void) | ||
291 | { | ||
292 | unsigned long flags; | ||
293 | |||
294 | oops_enter(); | ||
295 | |||
296 | if (die_owner != raw_smp_processor_id()) { | ||
297 | console_verbose(); | ||
298 | raw_local_irq_save(flags); | ||
299 | __raw_spin_lock(&die_lock); | ||
300 | die_owner = smp_processor_id(); | ||
301 | die_nest_count = 0; | ||
302 | bust_spinlocks(1); | ||
303 | } else { | ||
304 | raw_local_irq_save(flags); | ||
305 | } | ||
306 | die_nest_count++; | ||
307 | return flags; | ||
308 | } | ||
309 | |||
310 | void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) | ||
311 | { | ||
312 | bust_spinlocks(0); | ||
313 | die_owner = -1; | ||
314 | add_taint(TAINT_DIE); | ||
315 | __raw_spin_unlock(&die_lock); | ||
316 | raw_local_irq_restore(flags); | ||
317 | |||
318 | if (!regs) | ||
319 | return; | ||
320 | |||
321 | if (kexec_should_crash(current)) | ||
322 | crash_kexec(regs); | ||
323 | if (in_interrupt()) | ||
324 | panic("Fatal exception in interrupt"); | ||
325 | if (panic_on_oops) | ||
326 | panic("Fatal exception"); | ||
327 | oops_exit(); | ||
328 | do_exit(signr); | ||
329 | } | ||
330 | |||
331 | int __kprobes __die(const char *str, struct pt_regs *regs, long err) | ||
332 | { | ||
333 | unsigned short ss; | ||
334 | unsigned long sp; | ||
335 | |||
336 | printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); | ||
337 | #ifdef CONFIG_PREEMPT | ||
338 | printk("PREEMPT "); | ||
339 | #endif | ||
340 | #ifdef CONFIG_SMP | ||
341 | printk("SMP "); | ||
342 | #endif | ||
343 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
344 | printk("DEBUG_PAGEALLOC"); | ||
345 | #endif | ||
346 | printk("\n"); | ||
347 | sysfs_printk_last_file(); | ||
348 | if (notify_die(DIE_OOPS, str, regs, err, | ||
349 | current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) | ||
350 | return 1; | ||
351 | |||
352 | show_registers(regs); | ||
353 | /* Executive summary in case the oops scrolled away */ | ||
354 | sp = (unsigned long) (®s->sp); | ||
355 | savesegment(ss, ss); | ||
356 | if (user_mode(regs)) { | ||
357 | sp = regs->sp; | ||
358 | ss = regs->ss & 0xffff; | ||
359 | } | ||
360 | printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); | ||
361 | print_symbol("%s", regs->ip); | ||
362 | printk(" SS:ESP %04x:%08lx\n", ss, sp); | ||
363 | return 0; | ||
364 | } | ||
365 | |||
366 | /* | ||
367 | * This is gone through when something in the kernel has done something bad | ||
368 | * and is about to be terminated: | ||
369 | */ | ||
370 | void die(const char *str, struct pt_regs *regs, long err) | ||
371 | { | ||
372 | unsigned long flags = oops_begin(); | ||
373 | |||
374 | if (die_nest_count < 3) { | ||
375 | report_bug(regs->ip, regs); | ||
376 | |||
377 | if (__die(str, regs, err)) | ||
378 | regs = NULL; | ||
379 | } else { | ||
380 | printk(KERN_EMERG "Recursive die() failure, output suppressed\n"); | ||
381 | } | ||
382 | |||
383 | oops_end(flags, regs, SIGSEGV); | ||
384 | } | ||
385 | |||
386 | static DEFINE_SPINLOCK(nmi_print_lock); | ||
387 | |||
388 | void notrace __kprobes | ||
389 | die_nmi(char *str, struct pt_regs *regs, int do_panic) | ||
390 | { | ||
391 | if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) | ||
392 | return; | ||
393 | |||
394 | spin_lock(&nmi_print_lock); | ||
395 | /* | ||
396 | * We are in trouble anyway, lets at least try | ||
397 | * to get a message out: | ||
398 | */ | ||
399 | bust_spinlocks(1); | ||
400 | printk(KERN_EMERG "%s", str); | ||
401 | printk(" on CPU%d, ip %08lx, registers:\n", | ||
402 | smp_processor_id(), regs->ip); | ||
403 | show_registers(regs); | ||
404 | if (do_panic) | ||
405 | panic("Non maskable interrupt"); | ||
406 | console_silent(); | ||
407 | spin_unlock(&nmi_print_lock); | ||
408 | |||
409 | /* | ||
410 | * If we are in kernel we are probably nested up pretty bad | ||
411 | * and might aswell get out now while we still can: | ||
412 | */ | ||
413 | if (!user_mode_vm(regs)) { | ||
414 | current->thread.trap_no = 2; | ||
415 | crash_kexec(regs); | ||
416 | } | ||
417 | |||
418 | bust_spinlocks(0); | ||
419 | do_exit(SIGSEGV); | ||
420 | } | ||
421 | |||
422 | static int __init oops_setup(char *s) | ||
423 | { | ||
424 | if (!s) | ||
425 | return -EINVAL; | ||
426 | if (!strcmp(s, "panic")) | ||
427 | panic_on_oops = 1; | ||
428 | return 0; | ||
429 | } | ||
430 | early_param("oops", oops_setup); | ||
431 | |||
432 | static int __init kstack_setup(char *s) | ||
433 | { | ||
434 | if (!s) | ||
435 | return -EINVAL; | ||
436 | kstack_depth_to_print = simple_strtoul(s, NULL, 0); | ||
437 | return 0; | ||
438 | } | ||
439 | early_param("kstack", kstack_setup); | ||
440 | |||
441 | static int __init code_bytes_setup(char *s) | ||
442 | { | ||
443 | code_bytes = simple_strtoul(s, NULL, 0); | ||
444 | if (code_bytes > 8192) | ||
445 | code_bytes = 8192; | ||
446 | |||
447 | return 1; | ||
448 | } | ||
449 | __setup("code_bytes=", code_bytes_setup); | ||
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 96a5db7da8a7..c302d0707048 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c | |||
@@ -17,19 +17,7 @@ | |||
17 | 17 | ||
18 | #include <asm/stacktrace.h> | 18 | #include <asm/stacktrace.h> |
19 | 19 | ||
20 | #define STACKSLOTS_PER_LINE 4 | 20 | #include "dumpstack.h" |
21 | #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) | ||
22 | |||
23 | int panic_on_unrecovered_nmi; | ||
24 | int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; | ||
25 | static unsigned int code_bytes = 64; | ||
26 | static int die_counter; | ||
27 | |||
28 | void printk_address(unsigned long address, int reliable) | ||
29 | { | ||
30 | printk(" [<%p>] %s%pS\n", (void *) address, | ||
31 | reliable ? "" : "? ", (void *) address); | ||
32 | } | ||
33 | 21 | ||
34 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | 22 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, |
35 | unsigned *usedp, char **idp) | 23 | unsigned *usedp, char **idp) |
@@ -113,51 +101,6 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | |||
113 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack | 101 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack |
114 | */ | 102 | */ |
115 | 103 | ||
116 | static inline int valid_stack_ptr(struct thread_info *tinfo, | ||
117 | void *p, unsigned int size, void *end) | ||
118 | { | ||
119 | void *t = tinfo; | ||
120 | if (end) { | ||
121 | if (p < end && p >= (end-THREAD_SIZE)) | ||
122 | return 1; | ||
123 | else | ||
124 | return 0; | ||
125 | } | ||
126 | return p > t && p < t + THREAD_SIZE - size; | ||
127 | } | ||
128 | |||
129 | /* The form of the top of the frame on the stack */ | ||
130 | struct stack_frame { | ||
131 | struct stack_frame *next_frame; | ||
132 | unsigned long return_address; | ||
133 | }; | ||
134 | |||
135 | static inline unsigned long | ||
136 | print_context_stack(struct thread_info *tinfo, | ||
137 | unsigned long *stack, unsigned long bp, | ||
138 | const struct stacktrace_ops *ops, void *data, | ||
139 | unsigned long *end) | ||
140 | { | ||
141 | struct stack_frame *frame = (struct stack_frame *)bp; | ||
142 | |||
143 | while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { | ||
144 | unsigned long addr; | ||
145 | |||
146 | addr = *stack; | ||
147 | if (__kernel_text_address(addr)) { | ||
148 | if ((unsigned long) stack == bp + sizeof(long)) { | ||
149 | ops->address(data, addr, 1); | ||
150 | frame = frame->next_frame; | ||
151 | bp = (unsigned long) frame; | ||
152 | } else { | ||
153 | ops->address(data, addr, bp == 0); | ||
154 | } | ||
155 | } | ||
156 | stack++; | ||
157 | } | ||
158 | return bp; | ||
159 | } | ||
160 | |||
161 | void dump_trace(struct task_struct *task, struct pt_regs *regs, | 104 | void dump_trace(struct task_struct *task, struct pt_regs *regs, |
162 | unsigned long *stack, unsigned long bp, | 105 | unsigned long *stack, unsigned long bp, |
163 | const struct stacktrace_ops *ops, void *data) | 106 | const struct stacktrace_ops *ops, void *data) |
@@ -166,6 +109,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
166 | unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; | 109 | unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; |
167 | unsigned used = 0; | 110 | unsigned used = 0; |
168 | struct thread_info *tinfo; | 111 | struct thread_info *tinfo; |
112 | int graph = 0; | ||
169 | 113 | ||
170 | if (!task) | 114 | if (!task) |
171 | task = current; | 115 | task = current; |
@@ -206,7 +150,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
206 | break; | 150 | break; |
207 | 151 | ||
208 | bp = print_context_stack(tinfo, stack, bp, ops, | 152 | bp = print_context_stack(tinfo, stack, bp, ops, |
209 | data, estack_end); | 153 | data, estack_end, &graph); |
210 | ops->stack(data, "<EOE>"); | 154 | ops->stack(data, "<EOE>"); |
211 | /* | 155 | /* |
212 | * We link to the next stack via the | 156 | * We link to the next stack via the |
@@ -225,7 +169,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
225 | if (ops->stack(data, "IRQ") < 0) | 169 | if (ops->stack(data, "IRQ") < 0) |
226 | break; | 170 | break; |
227 | bp = print_context_stack(tinfo, stack, bp, | 171 | bp = print_context_stack(tinfo, stack, bp, |
228 | ops, data, irqstack_end); | 172 | ops, data, irqstack_end, &graph); |
229 | /* | 173 | /* |
230 | * We link to the next stack (which would be | 174 | * We link to the next stack (which would be |
231 | * the process stack normally) the last | 175 | * the process stack normally) the last |
@@ -243,62 +187,12 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
243 | /* | 187 | /* |
244 | * This handles the process stack: | 188 | * This handles the process stack: |
245 | */ | 189 | */ |
246 | bp = print_context_stack(tinfo, stack, bp, ops, data, NULL); | 190 | bp = print_context_stack(tinfo, stack, bp, ops, data, NULL, &graph); |
247 | put_cpu(); | 191 | put_cpu(); |
248 | } | 192 | } |
249 | EXPORT_SYMBOL(dump_trace); | 193 | EXPORT_SYMBOL(dump_trace); |
250 | 194 | ||
251 | static void | 195 | void |
252 | print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) | ||
253 | { | ||
254 | printk(data); | ||
255 | print_symbol(msg, symbol); | ||
256 | printk("\n"); | ||
257 | } | ||
258 | |||
259 | static void print_trace_warning(void *data, char *msg) | ||
260 | { | ||
261 | printk("%s%s\n", (char *)data, msg); | ||
262 | } | ||
263 | |||
264 | static int print_trace_stack(void *data, char *name) | ||
265 | { | ||
266 | printk("%s <%s> ", (char *)data, name); | ||
267 | return 0; | ||
268 | } | ||
269 | |||
270 | /* | ||
271 | * Print one address/symbol entries per line. | ||
272 | */ | ||
273 | static void print_trace_address(void *data, unsigned long addr, int reliable) | ||
274 | { | ||
275 | touch_nmi_watchdog(); | ||
276 | printk(data); | ||
277 | printk_address(addr, reliable); | ||
278 | } | ||
279 | |||
280 | static const struct stacktrace_ops print_trace_ops = { | ||
281 | .warning = print_trace_warning, | ||
282 | .warning_symbol = print_trace_warning_symbol, | ||
283 | .stack = print_trace_stack, | ||
284 | .address = print_trace_address, | ||
285 | }; | ||
286 | |||
287 | static void | ||
288 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
289 | unsigned long *stack, unsigned long bp, char *log_lvl) | ||
290 | { | ||
291 | printk("%sCall Trace:\n", log_lvl); | ||
292 | dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); | ||
293 | } | ||
294 | |||
295 | void show_trace(struct task_struct *task, struct pt_regs *regs, | ||
296 | unsigned long *stack, unsigned long bp) | ||
297 | { | ||
298 | show_trace_log_lvl(task, regs, stack, bp, ""); | ||
299 | } | ||
300 | |||
301 | static void | ||
302 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | 196 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
303 | unsigned long *sp, unsigned long bp, char *log_lvl) | 197 | unsigned long *sp, unsigned long bp, char *log_lvl) |
304 | { | 198 | { |
@@ -342,33 +236,6 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
342 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); | 236 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); |
343 | } | 237 | } |
344 | 238 | ||
345 | void show_stack(struct task_struct *task, unsigned long *sp) | ||
346 | { | ||
347 | show_stack_log_lvl(task, NULL, sp, 0, ""); | ||
348 | } | ||
349 | |||
350 | /* | ||
351 | * The architecture-independent dump_stack generator | ||
352 | */ | ||
353 | void dump_stack(void) | ||
354 | { | ||
355 | unsigned long bp = 0; | ||
356 | unsigned long stack; | ||
357 | |||
358 | #ifdef CONFIG_FRAME_POINTER | ||
359 | if (!bp) | ||
360 | get_bp(bp); | ||
361 | #endif | ||
362 | |||
363 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", | ||
364 | current->pid, current->comm, print_tainted(), | ||
365 | init_utsname()->release, | ||
366 | (int)strcspn(init_utsname()->version, " "), | ||
367 | init_utsname()->version); | ||
368 | show_trace(NULL, NULL, &stack, bp); | ||
369 | } | ||
370 | EXPORT_SYMBOL(dump_stack); | ||
371 | |||
372 | void show_registers(struct pt_regs *regs) | 239 | void show_registers(struct pt_regs *regs) |
373 | { | 240 | { |
374 | int i; | 241 | int i; |
@@ -429,147 +296,3 @@ int is_valid_bugaddr(unsigned long ip) | |||
429 | return ud2 == 0x0b0f; | 296 | return ud2 == 0x0b0f; |
430 | } | 297 | } |
431 | 298 | ||
432 | static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; | ||
433 | static int die_owner = -1; | ||
434 | static unsigned int die_nest_count; | ||
435 | |||
436 | unsigned __kprobes long oops_begin(void) | ||
437 | { | ||
438 | int cpu; | ||
439 | unsigned long flags; | ||
440 | |||
441 | oops_enter(); | ||
442 | |||
443 | /* racy, but better than risking deadlock. */ | ||
444 | raw_local_irq_save(flags); | ||
445 | cpu = smp_processor_id(); | ||
446 | if (!__raw_spin_trylock(&die_lock)) { | ||
447 | if (cpu == die_owner) | ||
448 | /* nested oops. should stop eventually */; | ||
449 | else | ||
450 | __raw_spin_lock(&die_lock); | ||
451 | } | ||
452 | die_nest_count++; | ||
453 | die_owner = cpu; | ||
454 | console_verbose(); | ||
455 | bust_spinlocks(1); | ||
456 | return flags; | ||
457 | } | ||
458 | |||
459 | void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) | ||
460 | { | ||
461 | die_owner = -1; | ||
462 | bust_spinlocks(0); | ||
463 | die_nest_count--; | ||
464 | if (!die_nest_count) | ||
465 | /* Nest count reaches zero, release the lock. */ | ||
466 | __raw_spin_unlock(&die_lock); | ||
467 | raw_local_irq_restore(flags); | ||
468 | if (!regs) { | ||
469 | oops_exit(); | ||
470 | return; | ||
471 | } | ||
472 | if (in_interrupt()) | ||
473 | panic("Fatal exception in interrupt"); | ||
474 | if (panic_on_oops) | ||
475 | panic("Fatal exception"); | ||
476 | oops_exit(); | ||
477 | do_exit(signr); | ||
478 | } | ||
479 | |||
480 | int __kprobes __die(const char *str, struct pt_regs *regs, long err) | ||
481 | { | ||
482 | printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); | ||
483 | #ifdef CONFIG_PREEMPT | ||
484 | printk("PREEMPT "); | ||
485 | #endif | ||
486 | #ifdef CONFIG_SMP | ||
487 | printk("SMP "); | ||
488 | #endif | ||
489 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
490 | printk("DEBUG_PAGEALLOC"); | ||
491 | #endif | ||
492 | printk("\n"); | ||
493 | sysfs_printk_last_file(); | ||
494 | if (notify_die(DIE_OOPS, str, regs, err, | ||
495 | current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) | ||
496 | return 1; | ||
497 | |||
498 | show_registers(regs); | ||
499 | add_taint(TAINT_DIE); | ||
500 | /* Executive summary in case the oops scrolled away */ | ||
501 | printk(KERN_ALERT "RIP "); | ||
502 | printk_address(regs->ip, 1); | ||
503 | printk(" RSP <%016lx>\n", regs->sp); | ||
504 | if (kexec_should_crash(current)) | ||
505 | crash_kexec(regs); | ||
506 | return 0; | ||
507 | } | ||
508 | |||
509 | void die(const char *str, struct pt_regs *regs, long err) | ||
510 | { | ||
511 | unsigned long flags = oops_begin(); | ||
512 | |||
513 | if (!user_mode(regs)) | ||
514 | report_bug(regs->ip, regs); | ||
515 | |||
516 | if (__die(str, regs, err)) | ||
517 | regs = NULL; | ||
518 | oops_end(flags, regs, SIGSEGV); | ||
519 | } | ||
520 | |||
521 | notrace __kprobes void | ||
522 | die_nmi(char *str, struct pt_regs *regs, int do_panic) | ||
523 | { | ||
524 | unsigned long flags; | ||
525 | |||
526 | if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) | ||
527 | return; | ||
528 | |||
529 | flags = oops_begin(); | ||
530 | /* | ||
531 | * We are in trouble anyway, lets at least try | ||
532 | * to get a message out. | ||
533 | */ | ||
534 | printk(KERN_EMERG "%s", str); | ||
535 | printk(" on CPU%d, ip %08lx, registers:\n", | ||
536 | smp_processor_id(), regs->ip); | ||
537 | show_registers(regs); | ||
538 | if (kexec_should_crash(current)) | ||
539 | crash_kexec(regs); | ||
540 | if (do_panic || panic_on_oops) | ||
541 | panic("Non maskable interrupt"); | ||
542 | oops_end(flags, NULL, SIGBUS); | ||
543 | nmi_exit(); | ||
544 | local_irq_enable(); | ||
545 | do_exit(SIGBUS); | ||
546 | } | ||
547 | |||
548 | static int __init oops_setup(char *s) | ||
549 | { | ||
550 | if (!s) | ||
551 | return -EINVAL; | ||
552 | if (!strcmp(s, "panic")) | ||
553 | panic_on_oops = 1; | ||
554 | return 0; | ||
555 | } | ||
556 | early_param("oops", oops_setup); | ||
557 | |||
558 | static int __init kstack_setup(char *s) | ||
559 | { | ||
560 | if (!s) | ||
561 | return -EINVAL; | ||
562 | kstack_depth_to_print = simple_strtoul(s, NULL, 0); | ||
563 | return 0; | ||
564 | } | ||
565 | early_param("kstack", kstack_setup); | ||
566 | |||
567 | static int __init code_bytes_setup(char *s) | ||
568 | { | ||
569 | code_bytes = simple_strtoul(s, NULL, 0); | ||
570 | if (code_bytes > 8192) | ||
571 | code_bytes = 8192; | ||
572 | |||
573 | return 1; | ||
574 | } | ||
575 | __setup("code_bytes=", code_bytes_setup); | ||
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 28b597ef9ca1..43ceb3f454bf 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -1157,6 +1157,9 @@ ENTRY(mcount) | |||
1157 | END(mcount) | 1157 | END(mcount) |
1158 | 1158 | ||
1159 | ENTRY(ftrace_caller) | 1159 | ENTRY(ftrace_caller) |
1160 | cmpl $0, function_trace_stop | ||
1161 | jne ftrace_stub | ||
1162 | |||
1160 | pushl %eax | 1163 | pushl %eax |
1161 | pushl %ecx | 1164 | pushl %ecx |
1162 | pushl %edx | 1165 | pushl %edx |
@@ -1171,6 +1174,11 @@ ftrace_call: | |||
1171 | popl %edx | 1174 | popl %edx |
1172 | popl %ecx | 1175 | popl %ecx |
1173 | popl %eax | 1176 | popl %eax |
1177 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
1178 | .globl ftrace_graph_call | ||
1179 | ftrace_graph_call: | ||
1180 | jmp ftrace_stub | ||
1181 | #endif | ||
1174 | 1182 | ||
1175 | .globl ftrace_stub | 1183 | .globl ftrace_stub |
1176 | ftrace_stub: | 1184 | ftrace_stub: |
@@ -1180,8 +1188,18 @@ END(ftrace_caller) | |||
1180 | #else /* ! CONFIG_DYNAMIC_FTRACE */ | 1188 | #else /* ! CONFIG_DYNAMIC_FTRACE */ |
1181 | 1189 | ||
1182 | ENTRY(mcount) | 1190 | ENTRY(mcount) |
1191 | cmpl $0, function_trace_stop | ||
1192 | jne ftrace_stub | ||
1193 | |||
1183 | cmpl $ftrace_stub, ftrace_trace_function | 1194 | cmpl $ftrace_stub, ftrace_trace_function |
1184 | jnz trace | 1195 | jnz trace |
1196 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
1197 | cmpl $ftrace_stub, ftrace_graph_return | ||
1198 | jnz ftrace_graph_caller | ||
1199 | |||
1200 | cmpl $ftrace_graph_entry_stub, ftrace_graph_entry | ||
1201 | jnz ftrace_graph_caller | ||
1202 | #endif | ||
1185 | .globl ftrace_stub | 1203 | .globl ftrace_stub |
1186 | ftrace_stub: | 1204 | ftrace_stub: |
1187 | ret | 1205 | ret |
@@ -1200,12 +1218,43 @@ trace: | |||
1200 | popl %edx | 1218 | popl %edx |
1201 | popl %ecx | 1219 | popl %ecx |
1202 | popl %eax | 1220 | popl %eax |
1203 | |||
1204 | jmp ftrace_stub | 1221 | jmp ftrace_stub |
1205 | END(mcount) | 1222 | END(mcount) |
1206 | #endif /* CONFIG_DYNAMIC_FTRACE */ | 1223 | #endif /* CONFIG_DYNAMIC_FTRACE */ |
1207 | #endif /* CONFIG_FUNCTION_TRACER */ | 1224 | #endif /* CONFIG_FUNCTION_TRACER */ |
1208 | 1225 | ||
1226 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
1227 | ENTRY(ftrace_graph_caller) | ||
1228 | cmpl $0, function_trace_stop | ||
1229 | jne ftrace_stub | ||
1230 | |||
1231 | pushl %eax | ||
1232 | pushl %ecx | ||
1233 | pushl %edx | ||
1234 | movl 0xc(%esp), %edx | ||
1235 | lea 0x4(%ebp), %eax | ||
1236 | subl $MCOUNT_INSN_SIZE, %edx | ||
1237 | call prepare_ftrace_return | ||
1238 | popl %edx | ||
1239 | popl %ecx | ||
1240 | popl %eax | ||
1241 | ret | ||
1242 | END(ftrace_graph_caller) | ||
1243 | |||
1244 | .globl return_to_handler | ||
1245 | return_to_handler: | ||
1246 | pushl $0 | ||
1247 | pushl %eax | ||
1248 | pushl %ecx | ||
1249 | pushl %edx | ||
1250 | call ftrace_return_to_handler | ||
1251 | movl %eax, 0xc(%esp) | ||
1252 | popl %edx | ||
1253 | popl %ecx | ||
1254 | popl %eax | ||
1255 | ret | ||
1256 | #endif | ||
1257 | |||
1209 | .section .rodata,"a" | 1258 | .section .rodata,"a" |
1210 | #include "syscall_table_32.S" | 1259 | #include "syscall_table_32.S" |
1211 | 1260 | ||
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index b86f332c96a6..303dd84d2a98 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -68,16 +68,10 @@ ENTRY(mcount) | |||
68 | END(mcount) | 68 | END(mcount) |
69 | 69 | ||
70 | ENTRY(ftrace_caller) | 70 | ENTRY(ftrace_caller) |
71 | cmpl $0, function_trace_stop | ||
72 | jne ftrace_stub | ||
71 | 73 | ||
72 | /* taken from glibc */ | 74 | MCOUNT_SAVE_FRAME |
73 | subq $0x38, %rsp | ||
74 | movq %rax, (%rsp) | ||
75 | movq %rcx, 8(%rsp) | ||
76 | movq %rdx, 16(%rsp) | ||
77 | movq %rsi, 24(%rsp) | ||
78 | movq %rdi, 32(%rsp) | ||
79 | movq %r8, 40(%rsp) | ||
80 | movq %r9, 48(%rsp) | ||
81 | 75 | ||
82 | movq 0x38(%rsp), %rdi | 76 | movq 0x38(%rsp), %rdi |
83 | movq 8(%rbp), %rsi | 77 | movq 8(%rbp), %rsi |
@@ -87,14 +81,13 @@ ENTRY(ftrace_caller) | |||
87 | ftrace_call: | 81 | ftrace_call: |
88 | call ftrace_stub | 82 | call ftrace_stub |
89 | 83 | ||
90 | movq 48(%rsp), %r9 | 84 | MCOUNT_RESTORE_FRAME |
91 | movq 40(%rsp), %r8 | 85 | |
92 | movq 32(%rsp), %rdi | 86 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
93 | movq 24(%rsp), %rsi | 87 | .globl ftrace_graph_call |
94 | movq 16(%rsp), %rdx | 88 | ftrace_graph_call: |
95 | movq 8(%rsp), %rcx | 89 | jmp ftrace_stub |
96 | movq (%rsp), %rax | 90 | #endif |
97 | addq $0x38, %rsp | ||
98 | 91 | ||
99 | .globl ftrace_stub | 92 | .globl ftrace_stub |
100 | ftrace_stub: | 93 | ftrace_stub: |
@@ -103,15 +96,63 @@ END(ftrace_caller) | |||
103 | 96 | ||
104 | #else /* ! CONFIG_DYNAMIC_FTRACE */ | 97 | #else /* ! CONFIG_DYNAMIC_FTRACE */ |
105 | ENTRY(mcount) | 98 | ENTRY(mcount) |
99 | cmpl $0, function_trace_stop | ||
100 | jne ftrace_stub | ||
101 | |||
106 | cmpq $ftrace_stub, ftrace_trace_function | 102 | cmpq $ftrace_stub, ftrace_trace_function |
107 | jnz trace | 103 | jnz trace |
104 | |||
105 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
106 | cmpq $ftrace_stub, ftrace_graph_return | ||
107 | jnz ftrace_graph_caller | ||
108 | |||
109 | cmpq $ftrace_graph_entry_stub, ftrace_graph_entry | ||
110 | jnz ftrace_graph_caller | ||
111 | #endif | ||
112 | |||
108 | .globl ftrace_stub | 113 | .globl ftrace_stub |
109 | ftrace_stub: | 114 | ftrace_stub: |
110 | retq | 115 | retq |
111 | 116 | ||
112 | trace: | 117 | trace: |
113 | /* taken from glibc */ | 118 | MCOUNT_SAVE_FRAME |
114 | subq $0x38, %rsp | 119 | |
120 | movq 0x38(%rsp), %rdi | ||
121 | movq 8(%rbp), %rsi | ||
122 | subq $MCOUNT_INSN_SIZE, %rdi | ||
123 | |||
124 | call *ftrace_trace_function | ||
125 | |||
126 | MCOUNT_RESTORE_FRAME | ||
127 | |||
128 | jmp ftrace_stub | ||
129 | END(mcount) | ||
130 | #endif /* CONFIG_DYNAMIC_FTRACE */ | ||
131 | #endif /* CONFIG_FUNCTION_TRACER */ | ||
132 | |||
133 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
134 | ENTRY(ftrace_graph_caller) | ||
135 | cmpl $0, function_trace_stop | ||
136 | jne ftrace_stub | ||
137 | |||
138 | MCOUNT_SAVE_FRAME | ||
139 | |||
140 | leaq 8(%rbp), %rdi | ||
141 | movq 0x38(%rsp), %rsi | ||
142 | subq $MCOUNT_INSN_SIZE, %rsi | ||
143 | |||
144 | call prepare_ftrace_return | ||
145 | |||
146 | MCOUNT_RESTORE_FRAME | ||
147 | |||
148 | retq | ||
149 | END(ftrace_graph_caller) | ||
150 | |||
151 | |||
152 | .globl return_to_handler | ||
153 | return_to_handler: | ||
154 | subq $80, %rsp | ||
155 | |||
115 | movq %rax, (%rsp) | 156 | movq %rax, (%rsp) |
116 | movq %rcx, 8(%rsp) | 157 | movq %rcx, 8(%rsp) |
117 | movq %rdx, 16(%rsp) | 158 | movq %rdx, 16(%rsp) |
@@ -119,13 +160,14 @@ trace: | |||
119 | movq %rdi, 32(%rsp) | 160 | movq %rdi, 32(%rsp) |
120 | movq %r8, 40(%rsp) | 161 | movq %r8, 40(%rsp) |
121 | movq %r9, 48(%rsp) | 162 | movq %r9, 48(%rsp) |
163 | movq %r10, 56(%rsp) | ||
164 | movq %r11, 64(%rsp) | ||
122 | 165 | ||
123 | movq 0x38(%rsp), %rdi | 166 | call ftrace_return_to_handler |
124 | movq 8(%rbp), %rsi | ||
125 | subq $MCOUNT_INSN_SIZE, %rdi | ||
126 | |||
127 | call *ftrace_trace_function | ||
128 | 167 | ||
168 | movq %rax, 72(%rsp) | ||
169 | movq 64(%rsp), %r11 | ||
170 | movq 56(%rsp), %r10 | ||
129 | movq 48(%rsp), %r9 | 171 | movq 48(%rsp), %r9 |
130 | movq 40(%rsp), %r8 | 172 | movq 40(%rsp), %r8 |
131 | movq 32(%rsp), %rdi | 173 | movq 32(%rsp), %rdi |
@@ -133,12 +175,10 @@ trace: | |||
133 | movq 16(%rsp), %rdx | 175 | movq 16(%rsp), %rdx |
134 | movq 8(%rsp), %rcx | 176 | movq 8(%rsp), %rcx |
135 | movq (%rsp), %rax | 177 | movq (%rsp), %rax |
136 | addq $0x38, %rsp | 178 | addq $72, %rsp |
179 | retq | ||
180 | #endif | ||
137 | 181 | ||
138 | jmp ftrace_stub | ||
139 | END(mcount) | ||
140 | #endif /* CONFIG_DYNAMIC_FTRACE */ | ||
141 | #endif /* CONFIG_FUNCTION_TRACER */ | ||
142 | 182 | ||
143 | #ifndef CONFIG_PREEMPT | 183 | #ifndef CONFIG_PREEMPT |
144 | #define retint_kernel retint_restore_args | 184 | #define retint_kernel retint_restore_args |
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 50ea0ac8c9bf..1b43086b097a 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
@@ -14,14 +14,17 @@ | |||
14 | #include <linux/uaccess.h> | 14 | #include <linux/uaccess.h> |
15 | #include <linux/ftrace.h> | 15 | #include <linux/ftrace.h> |
16 | #include <linux/percpu.h> | 16 | #include <linux/percpu.h> |
17 | #include <linux/sched.h> | ||
17 | #include <linux/init.h> | 18 | #include <linux/init.h> |
18 | #include <linux/list.h> | 19 | #include <linux/list.h> |
19 | 20 | ||
20 | #include <asm/ftrace.h> | 21 | #include <asm/ftrace.h> |
22 | #include <linux/ftrace.h> | ||
21 | #include <asm/nops.h> | 23 | #include <asm/nops.h> |
24 | #include <asm/nmi.h> | ||
22 | 25 | ||
23 | 26 | ||
24 | static unsigned char ftrace_nop[MCOUNT_INSN_SIZE]; | 27 | #ifdef CONFIG_DYNAMIC_FTRACE |
25 | 28 | ||
26 | union ftrace_code_union { | 29 | union ftrace_code_union { |
27 | char code[MCOUNT_INSN_SIZE]; | 30 | char code[MCOUNT_INSN_SIZE]; |
@@ -31,18 +34,12 @@ union ftrace_code_union { | |||
31 | } __attribute__((packed)); | 34 | } __attribute__((packed)); |
32 | }; | 35 | }; |
33 | 36 | ||
34 | |||
35 | static int ftrace_calc_offset(long ip, long addr) | 37 | static int ftrace_calc_offset(long ip, long addr) |
36 | { | 38 | { |
37 | return (int)(addr - ip); | 39 | return (int)(addr - ip); |
38 | } | 40 | } |
39 | 41 | ||
40 | unsigned char *ftrace_nop_replace(void) | 42 | static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) |
41 | { | ||
42 | return ftrace_nop; | ||
43 | } | ||
44 | |||
45 | unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) | ||
46 | { | 43 | { |
47 | static union ftrace_code_union calc; | 44 | static union ftrace_code_union calc; |
48 | 45 | ||
@@ -56,7 +53,142 @@ unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) | |||
56 | return calc.code; | 53 | return calc.code; |
57 | } | 54 | } |
58 | 55 | ||
59 | int | 56 | /* |
57 | * Modifying code must take extra care. On an SMP machine, if | ||
58 | * the code being modified is also being executed on another CPU | ||
59 | * that CPU will have undefined results and possibly take a GPF. | ||
60 | * We use kstop_machine to stop other CPUS from exectuing code. | ||
61 | * But this does not stop NMIs from happening. We still need | ||
62 | * to protect against that. We separate out the modification of | ||
63 | * the code to take care of this. | ||
64 | * | ||
65 | * Two buffers are added: An IP buffer and a "code" buffer. | ||
66 | * | ||
67 | * 1) Put the instruction pointer into the IP buffer | ||
68 | * and the new code into the "code" buffer. | ||
69 | * 2) Set a flag that says we are modifying code | ||
70 | * 3) Wait for any running NMIs to finish. | ||
71 | * 4) Write the code | ||
72 | * 5) clear the flag. | ||
73 | * 6) Wait for any running NMIs to finish. | ||
74 | * | ||
75 | * If an NMI is executed, the first thing it does is to call | ||
76 | * "ftrace_nmi_enter". This will check if the flag is set to write | ||
77 | * and if it is, it will write what is in the IP and "code" buffers. | ||
78 | * | ||
79 | * The trick is, it does not matter if everyone is writing the same | ||
80 | * content to the code location. Also, if a CPU is executing code | ||
81 | * it is OK to write to that code location if the contents being written | ||
82 | * are the same as what exists. | ||
83 | */ | ||
84 | |||
85 | static atomic_t in_nmi = ATOMIC_INIT(0); | ||
86 | static int mod_code_status; /* holds return value of text write */ | ||
87 | static int mod_code_write; /* set when NMI should do the write */ | ||
88 | static void *mod_code_ip; /* holds the IP to write to */ | ||
89 | static void *mod_code_newcode; /* holds the text to write to the IP */ | ||
90 | |||
91 | static unsigned nmi_wait_count; | ||
92 | static atomic_t nmi_update_count = ATOMIC_INIT(0); | ||
93 | |||
94 | int ftrace_arch_read_dyn_info(char *buf, int size) | ||
95 | { | ||
96 | int r; | ||
97 | |||
98 | r = snprintf(buf, size, "%u %u", | ||
99 | nmi_wait_count, | ||
100 | atomic_read(&nmi_update_count)); | ||
101 | return r; | ||
102 | } | ||
103 | |||
104 | static void ftrace_mod_code(void) | ||
105 | { | ||
106 | /* | ||
107 | * Yes, more than one CPU process can be writing to mod_code_status. | ||
108 | * (and the code itself) | ||
109 | * But if one were to fail, then they all should, and if one were | ||
110 | * to succeed, then they all should. | ||
111 | */ | ||
112 | mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode, | ||
113 | MCOUNT_INSN_SIZE); | ||
114 | } | ||
115 | |||
116 | void ftrace_nmi_enter(void) | ||
117 | { | ||
118 | atomic_inc(&in_nmi); | ||
119 | /* Must have in_nmi seen before reading write flag */ | ||
120 | smp_mb(); | ||
121 | if (mod_code_write) { | ||
122 | ftrace_mod_code(); | ||
123 | atomic_inc(&nmi_update_count); | ||
124 | } | ||
125 | } | ||
126 | |||
127 | void ftrace_nmi_exit(void) | ||
128 | { | ||
129 | /* Finish all executions before clearing in_nmi */ | ||
130 | smp_wmb(); | ||
131 | atomic_dec(&in_nmi); | ||
132 | } | ||
133 | |||
134 | static void wait_for_nmi(void) | ||
135 | { | ||
136 | int waited = 0; | ||
137 | |||
138 | while (atomic_read(&in_nmi)) { | ||
139 | waited = 1; | ||
140 | cpu_relax(); | ||
141 | } | ||
142 | |||
143 | if (waited) | ||
144 | nmi_wait_count++; | ||
145 | } | ||
146 | |||
147 | static int | ||
148 | do_ftrace_mod_code(unsigned long ip, void *new_code) | ||
149 | { | ||
150 | mod_code_ip = (void *)ip; | ||
151 | mod_code_newcode = new_code; | ||
152 | |||
153 | /* The buffers need to be visible before we let NMIs write them */ | ||
154 | smp_wmb(); | ||
155 | |||
156 | mod_code_write = 1; | ||
157 | |||
158 | /* Make sure write bit is visible before we wait on NMIs */ | ||
159 | smp_mb(); | ||
160 | |||
161 | wait_for_nmi(); | ||
162 | |||
163 | /* Make sure all running NMIs have finished before we write the code */ | ||
164 | smp_mb(); | ||
165 | |||
166 | ftrace_mod_code(); | ||
167 | |||
168 | /* Make sure the write happens before clearing the bit */ | ||
169 | smp_wmb(); | ||
170 | |||
171 | mod_code_write = 0; | ||
172 | |||
173 | /* make sure NMIs see the cleared bit */ | ||
174 | smp_mb(); | ||
175 | |||
176 | wait_for_nmi(); | ||
177 | |||
178 | return mod_code_status; | ||
179 | } | ||
180 | |||
181 | |||
182 | |||
183 | |||
184 | static unsigned char ftrace_nop[MCOUNT_INSN_SIZE]; | ||
185 | |||
186 | static unsigned char *ftrace_nop_replace(void) | ||
187 | { | ||
188 | return ftrace_nop; | ||
189 | } | ||
190 | |||
191 | static int | ||
60 | ftrace_modify_code(unsigned long ip, unsigned char *old_code, | 192 | ftrace_modify_code(unsigned long ip, unsigned char *old_code, |
61 | unsigned char *new_code) | 193 | unsigned char *new_code) |
62 | { | 194 | { |
@@ -81,7 +213,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, | |||
81 | return -EINVAL; | 213 | return -EINVAL; |
82 | 214 | ||
83 | /* replace the text with the new text */ | 215 | /* replace the text with the new text */ |
84 | if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE)) | 216 | if (do_ftrace_mod_code(ip, new_code)) |
85 | return -EPERM; | 217 | return -EPERM; |
86 | 218 | ||
87 | sync_core(); | 219 | sync_core(); |
@@ -89,6 +221,29 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, | |||
89 | return 0; | 221 | return 0; |
90 | } | 222 | } |
91 | 223 | ||
224 | int ftrace_make_nop(struct module *mod, | ||
225 | struct dyn_ftrace *rec, unsigned long addr) | ||
226 | { | ||
227 | unsigned char *new, *old; | ||
228 | unsigned long ip = rec->ip; | ||
229 | |||
230 | old = ftrace_call_replace(ip, addr); | ||
231 | new = ftrace_nop_replace(); | ||
232 | |||
233 | return ftrace_modify_code(rec->ip, old, new); | ||
234 | } | ||
235 | |||
236 | int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) | ||
237 | { | ||
238 | unsigned char *new, *old; | ||
239 | unsigned long ip = rec->ip; | ||
240 | |||
241 | old = ftrace_nop_replace(); | ||
242 | new = ftrace_call_replace(ip, addr); | ||
243 | |||
244 | return ftrace_modify_code(rec->ip, old, new); | ||
245 | } | ||
246 | |||
92 | int ftrace_update_ftrace_func(ftrace_func_t func) | 247 | int ftrace_update_ftrace_func(ftrace_func_t func) |
93 | { | 248 | { |
94 | unsigned long ip = (unsigned long)(&ftrace_call); | 249 | unsigned long ip = (unsigned long)(&ftrace_call); |
@@ -165,3 +320,218 @@ int __init ftrace_dyn_arch_init(void *data) | |||
165 | 320 | ||
166 | return 0; | 321 | return 0; |
167 | } | 322 | } |
323 | #endif | ||
324 | |||
325 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
326 | |||
327 | #ifdef CONFIG_DYNAMIC_FTRACE | ||
328 | extern void ftrace_graph_call(void); | ||
329 | |||
330 | static int ftrace_mod_jmp(unsigned long ip, | ||
331 | int old_offset, int new_offset) | ||
332 | { | ||
333 | unsigned char code[MCOUNT_INSN_SIZE]; | ||
334 | |||
335 | if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE)) | ||
336 | return -EFAULT; | ||
337 | |||
338 | if (code[0] != 0xe9 || old_offset != *(int *)(&code[1])) | ||
339 | return -EINVAL; | ||
340 | |||
341 | *(int *)(&code[1]) = new_offset; | ||
342 | |||
343 | if (do_ftrace_mod_code(ip, &code)) | ||
344 | return -EPERM; | ||
345 | |||
346 | return 0; | ||
347 | } | ||
348 | |||
349 | int ftrace_enable_ftrace_graph_caller(void) | ||
350 | { | ||
351 | unsigned long ip = (unsigned long)(&ftrace_graph_call); | ||
352 | int old_offset, new_offset; | ||
353 | |||
354 | old_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE); | ||
355 | new_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE); | ||
356 | |||
357 | return ftrace_mod_jmp(ip, old_offset, new_offset); | ||
358 | } | ||
359 | |||
360 | int ftrace_disable_ftrace_graph_caller(void) | ||
361 | { | ||
362 | unsigned long ip = (unsigned long)(&ftrace_graph_call); | ||
363 | int old_offset, new_offset; | ||
364 | |||
365 | old_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE); | ||
366 | new_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE); | ||
367 | |||
368 | return ftrace_mod_jmp(ip, old_offset, new_offset); | ||
369 | } | ||
370 | |||
371 | #else /* CONFIG_DYNAMIC_FTRACE */ | ||
372 | |||
373 | /* | ||
374 | * These functions are picked from those used on | ||
375 | * this page for dynamic ftrace. They have been | ||
376 | * simplified to ignore all traces in NMI context. | ||
377 | */ | ||
378 | static atomic_t in_nmi; | ||
379 | |||
380 | void ftrace_nmi_enter(void) | ||
381 | { | ||
382 | atomic_inc(&in_nmi); | ||
383 | } | ||
384 | |||
385 | void ftrace_nmi_exit(void) | ||
386 | { | ||
387 | atomic_dec(&in_nmi); | ||
388 | } | ||
389 | |||
390 | #endif /* !CONFIG_DYNAMIC_FTRACE */ | ||
391 | |||
392 | /* Add a function return address to the trace stack on thread info.*/ | ||
393 | static int push_return_trace(unsigned long ret, unsigned long long time, | ||
394 | unsigned long func, int *depth) | ||
395 | { | ||
396 | int index; | ||
397 | |||
398 | if (!current->ret_stack) | ||
399 | return -EBUSY; | ||
400 | |||
401 | /* The return trace stack is full */ | ||
402 | if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) { | ||
403 | atomic_inc(¤t->trace_overrun); | ||
404 | return -EBUSY; | ||
405 | } | ||
406 | |||
407 | index = ++current->curr_ret_stack; | ||
408 | barrier(); | ||
409 | current->ret_stack[index].ret = ret; | ||
410 | current->ret_stack[index].func = func; | ||
411 | current->ret_stack[index].calltime = time; | ||
412 | *depth = index; | ||
413 | |||
414 | return 0; | ||
415 | } | ||
416 | |||
417 | /* Retrieve a function return address to the trace stack on thread info.*/ | ||
418 | static void pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) | ||
419 | { | ||
420 | int index; | ||
421 | |||
422 | index = current->curr_ret_stack; | ||
423 | |||
424 | if (unlikely(index < 0)) { | ||
425 | ftrace_graph_stop(); | ||
426 | WARN_ON(1); | ||
427 | /* Might as well panic, otherwise we have no where to go */ | ||
428 | *ret = (unsigned long)panic; | ||
429 | return; | ||
430 | } | ||
431 | |||
432 | *ret = current->ret_stack[index].ret; | ||
433 | trace->func = current->ret_stack[index].func; | ||
434 | trace->calltime = current->ret_stack[index].calltime; | ||
435 | trace->overrun = atomic_read(¤t->trace_overrun); | ||
436 | trace->depth = index; | ||
437 | barrier(); | ||
438 | current->curr_ret_stack--; | ||
439 | |||
440 | } | ||
441 | |||
442 | /* | ||
443 | * Send the trace to the ring-buffer. | ||
444 | * @return the original return address. | ||
445 | */ | ||
446 | unsigned long ftrace_return_to_handler(void) | ||
447 | { | ||
448 | struct ftrace_graph_ret trace; | ||
449 | unsigned long ret; | ||
450 | |||
451 | pop_return_trace(&trace, &ret); | ||
452 | trace.rettime = cpu_clock(raw_smp_processor_id()); | ||
453 | ftrace_graph_return(&trace); | ||
454 | |||
455 | if (unlikely(!ret)) { | ||
456 | ftrace_graph_stop(); | ||
457 | WARN_ON(1); | ||
458 | /* Might as well panic. What else to do? */ | ||
459 | ret = (unsigned long)panic; | ||
460 | } | ||
461 | |||
462 | return ret; | ||
463 | } | ||
464 | |||
465 | /* | ||
466 | * Hook the return address and push it in the stack of return addrs | ||
467 | * in current thread info. | ||
468 | */ | ||
469 | void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) | ||
470 | { | ||
471 | unsigned long old; | ||
472 | unsigned long long calltime; | ||
473 | int faulted; | ||
474 | struct ftrace_graph_ent trace; | ||
475 | unsigned long return_hooker = (unsigned long) | ||
476 | &return_to_handler; | ||
477 | |||
478 | /* Nmi's are currently unsupported */ | ||
479 | if (unlikely(atomic_read(&in_nmi))) | ||
480 | return; | ||
481 | |||
482 | if (unlikely(atomic_read(¤t->tracing_graph_pause))) | ||
483 | return; | ||
484 | |||
485 | /* | ||
486 | * Protect against fault, even if it shouldn't | ||
487 | * happen. This tool is too much intrusive to | ||
488 | * ignore such a protection. | ||
489 | */ | ||
490 | asm volatile( | ||
491 | "1: " _ASM_MOV " (%[parent_old]), %[old]\n" | ||
492 | "2: " _ASM_MOV " %[return_hooker], (%[parent_replaced])\n" | ||
493 | " movl $0, %[faulted]\n" | ||
494 | |||
495 | ".section .fixup, \"ax\"\n" | ||
496 | "3: movl $1, %[faulted]\n" | ||
497 | ".previous\n" | ||
498 | |||
499 | _ASM_EXTABLE(1b, 3b) | ||
500 | _ASM_EXTABLE(2b, 3b) | ||
501 | |||
502 | : [parent_replaced] "=r" (parent), [old] "=r" (old), | ||
503 | [faulted] "=r" (faulted) | ||
504 | : [parent_old] "0" (parent), [return_hooker] "r" (return_hooker) | ||
505 | : "memory" | ||
506 | ); | ||
507 | |||
508 | if (unlikely(faulted)) { | ||
509 | ftrace_graph_stop(); | ||
510 | WARN_ON(1); | ||
511 | return; | ||
512 | } | ||
513 | |||
514 | if (unlikely(!__kernel_text_address(old))) { | ||
515 | ftrace_graph_stop(); | ||
516 | *parent = old; | ||
517 | WARN_ON(1); | ||
518 | return; | ||
519 | } | ||
520 | |||
521 | calltime = cpu_clock(raw_smp_processor_id()); | ||
522 | |||
523 | if (push_return_trace(old, calltime, | ||
524 | self_addr, &trace.depth) == -EBUSY) { | ||
525 | *parent = old; | ||
526 | return; | ||
527 | } | ||
528 | |||
529 | trace.func = self_addr; | ||
530 | |||
531 | /* Only trace if the calling function expects to */ | ||
532 | if (!ftrace_graph_entry(&trace)) { | ||
533 | current->curr_ret_stack--; | ||
534 | *parent = old; | ||
535 | } | ||
536 | } | ||
537 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ | ||
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 60eb84eb77a0..11c65e811ffe 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/seq_file.h> | 13 | #include <linux/seq_file.h> |
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/delay.h> | 15 | #include <linux/delay.h> |
16 | #include <linux/ftrace.h> | ||
16 | #include <asm/uaccess.h> | 17 | #include <asm/uaccess.h> |
17 | #include <asm/io_apic.h> | 18 | #include <asm/io_apic.h> |
18 | #include <asm/idle.h> | 19 | #include <asm/idle.h> |
@@ -47,7 +48,7 @@ static inline void stack_overflow_check(struct pt_regs *regs) | |||
47 | * SMP cross-CPU interrupts have their own specific | 48 | * SMP cross-CPU interrupts have their own specific |
48 | * handlers). | 49 | * handlers). |
49 | */ | 50 | */ |
50 | asmlinkage unsigned int do_IRQ(struct pt_regs *regs) | 51 | asmlinkage unsigned int __irq_entry do_IRQ(struct pt_regs *regs) |
51 | { | 52 | { |
52 | struct pt_regs *old_regs = set_irq_regs(regs); | 53 | struct pt_regs *old_regs = set_irq_regs(regs); |
53 | struct irq_desc *desc; | 54 | struct irq_desc *desc; |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 18c70fedba32..cff9a50e389d 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -7,6 +7,7 @@ | |||
7 | #include <linux/module.h> | 7 | #include <linux/module.h> |
8 | #include <linux/pm.h> | 8 | #include <linux/pm.h> |
9 | #include <linux/clockchips.h> | 9 | #include <linux/clockchips.h> |
10 | #include <linux/ftrace.h> | ||
10 | #include <asm/system.h> | 11 | #include <asm/system.h> |
11 | 12 | ||
12 | unsigned long idle_halt; | 13 | unsigned long idle_halt; |
@@ -100,6 +101,9 @@ static inline int hlt_use_halt(void) | |||
100 | void default_idle(void) | 101 | void default_idle(void) |
101 | { | 102 | { |
102 | if (hlt_use_halt()) { | 103 | if (hlt_use_halt()) { |
104 | struct power_trace it; | ||
105 | |||
106 | trace_power_start(&it, POWER_CSTATE, 1); | ||
103 | current_thread_info()->status &= ~TS_POLLING; | 107 | current_thread_info()->status &= ~TS_POLLING; |
104 | /* | 108 | /* |
105 | * TS_POLLING-cleared state must be visible before we | 109 | * TS_POLLING-cleared state must be visible before we |
@@ -112,6 +116,7 @@ void default_idle(void) | |||
112 | else | 116 | else |
113 | local_irq_enable(); | 117 | local_irq_enable(); |
114 | current_thread_info()->status |= TS_POLLING; | 118 | current_thread_info()->status |= TS_POLLING; |
119 | trace_power_end(&it); | ||
115 | } else { | 120 | } else { |
116 | local_irq_enable(); | 121 | local_irq_enable(); |
117 | /* loop is done by the caller */ | 122 | /* loop is done by the caller */ |
@@ -154,24 +159,31 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); | |||
154 | */ | 159 | */ |
155 | void mwait_idle_with_hints(unsigned long ax, unsigned long cx) | 160 | void mwait_idle_with_hints(unsigned long ax, unsigned long cx) |
156 | { | 161 | { |
162 | struct power_trace it; | ||
163 | |||
164 | trace_power_start(&it, POWER_CSTATE, (ax>>4)+1); | ||
157 | if (!need_resched()) { | 165 | if (!need_resched()) { |
158 | __monitor((void *)¤t_thread_info()->flags, 0, 0); | 166 | __monitor((void *)¤t_thread_info()->flags, 0, 0); |
159 | smp_mb(); | 167 | smp_mb(); |
160 | if (!need_resched()) | 168 | if (!need_resched()) |
161 | __mwait(ax, cx); | 169 | __mwait(ax, cx); |
162 | } | 170 | } |
171 | trace_power_end(&it); | ||
163 | } | 172 | } |
164 | 173 | ||
165 | /* Default MONITOR/MWAIT with no hints, used for default C1 state */ | 174 | /* Default MONITOR/MWAIT with no hints, used for default C1 state */ |
166 | static void mwait_idle(void) | 175 | static void mwait_idle(void) |
167 | { | 176 | { |
177 | struct power_trace it; | ||
168 | if (!need_resched()) { | 178 | if (!need_resched()) { |
179 | trace_power_start(&it, POWER_CSTATE, 1); | ||
169 | __monitor((void *)¤t_thread_info()->flags, 0, 0); | 180 | __monitor((void *)¤t_thread_info()->flags, 0, 0); |
170 | smp_mb(); | 181 | smp_mb(); |
171 | if (!need_resched()) | 182 | if (!need_resched()) |
172 | __sti_mwait(0, 0); | 183 | __sti_mwait(0, 0); |
173 | else | 184 | else |
174 | local_irq_enable(); | 185 | local_irq_enable(); |
186 | trace_power_end(&it); | ||
175 | } else | 187 | } else |
176 | local_irq_enable(); | 188 | local_irq_enable(); |
177 | } | 189 | } |
@@ -183,9 +195,13 @@ static void mwait_idle(void) | |||
183 | */ | 195 | */ |
184 | static void poll_idle(void) | 196 | static void poll_idle(void) |
185 | { | 197 | { |
198 | struct power_trace it; | ||
199 | |||
200 | trace_power_start(&it, POWER_CSTATE, 0); | ||
186 | local_irq_enable(); | 201 | local_irq_enable(); |
187 | while (!need_resched()) | 202 | while (!need_resched()) |
188 | cpu_relax(); | 203 | cpu_relax(); |
204 | trace_power_end(&it); | ||
189 | } | 205 | } |
190 | 206 | ||
191 | /* | 207 | /* |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 0a1302fe6d45..605eff9a8ac0 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include <linux/percpu.h> | 38 | #include <linux/percpu.h> |
39 | #include <linux/prctl.h> | 39 | #include <linux/prctl.h> |
40 | #include <linux/dmi.h> | 40 | #include <linux/dmi.h> |
41 | #include <linux/ftrace.h> | ||
41 | 42 | ||
42 | #include <asm/uaccess.h> | 43 | #include <asm/uaccess.h> |
43 | #include <asm/pgtable.h> | 44 | #include <asm/pgtable.h> |
@@ -251,11 +252,14 @@ void exit_thread(void) | |||
251 | put_cpu(); | 252 | put_cpu(); |
252 | } | 253 | } |
253 | #ifdef CONFIG_X86_DS | 254 | #ifdef CONFIG_X86_DS |
254 | /* Free any DS contexts that have not been properly released. */ | 255 | /* Free any BTS tracers that have not been properly released. */ |
255 | if (unlikely(current->thread.ds_ctx)) { | 256 | if (unlikely(current->bts)) { |
256 | /* we clear debugctl to make sure DS is not used. */ | 257 | ds_release_bts(current->bts); |
257 | update_debugctlmsr(0); | 258 | current->bts = NULL; |
258 | ds_free(current->thread.ds_ctx); | 259 | |
260 | kfree(current->bts_buffer); | ||
261 | current->bts_buffer = NULL; | ||
262 | current->bts_size = 0; | ||
259 | } | 263 | } |
260 | #endif /* CONFIG_X86_DS */ | 264 | #endif /* CONFIG_X86_DS */ |
261 | } | 265 | } |
@@ -419,48 +423,19 @@ int set_tsc_mode(unsigned int val) | |||
419 | return 0; | 423 | return 0; |
420 | } | 424 | } |
421 | 425 | ||
422 | #ifdef CONFIG_X86_DS | ||
423 | static int update_debugctl(struct thread_struct *prev, | ||
424 | struct thread_struct *next, unsigned long debugctl) | ||
425 | { | ||
426 | unsigned long ds_prev = 0; | ||
427 | unsigned long ds_next = 0; | ||
428 | |||
429 | if (prev->ds_ctx) | ||
430 | ds_prev = (unsigned long)prev->ds_ctx->ds; | ||
431 | if (next->ds_ctx) | ||
432 | ds_next = (unsigned long)next->ds_ctx->ds; | ||
433 | |||
434 | if (ds_next != ds_prev) { | ||
435 | /* we clear debugctl to make sure DS | ||
436 | * is not in use when we change it */ | ||
437 | debugctl = 0; | ||
438 | update_debugctlmsr(0); | ||
439 | wrmsr(MSR_IA32_DS_AREA, ds_next, 0); | ||
440 | } | ||
441 | return debugctl; | ||
442 | } | ||
443 | #else | ||
444 | static int update_debugctl(struct thread_struct *prev, | ||
445 | struct thread_struct *next, unsigned long debugctl) | ||
446 | { | ||
447 | return debugctl; | ||
448 | } | ||
449 | #endif /* CONFIG_X86_DS */ | ||
450 | |||
451 | static noinline void | 426 | static noinline void |
452 | __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | 427 | __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, |
453 | struct tss_struct *tss) | 428 | struct tss_struct *tss) |
454 | { | 429 | { |
455 | struct thread_struct *prev, *next; | 430 | struct thread_struct *prev, *next; |
456 | unsigned long debugctl; | ||
457 | 431 | ||
458 | prev = &prev_p->thread; | 432 | prev = &prev_p->thread; |
459 | next = &next_p->thread; | 433 | next = &next_p->thread; |
460 | 434 | ||
461 | debugctl = update_debugctl(prev, next, prev->debugctlmsr); | 435 | if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) || |
462 | 436 | test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR)) | |
463 | if (next->debugctlmsr != debugctl) | 437 | ds_switch_to(prev_p, next_p); |
438 | else if (next->debugctlmsr != prev->debugctlmsr) | ||
464 | update_debugctlmsr(next->debugctlmsr); | 439 | update_debugctlmsr(next->debugctlmsr); |
465 | 440 | ||
466 | if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { | 441 | if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { |
@@ -482,15 +457,6 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | |||
482 | hard_enable_TSC(); | 457 | hard_enable_TSC(); |
483 | } | 458 | } |
484 | 459 | ||
485 | #ifdef CONFIG_X86_PTRACE_BTS | ||
486 | if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) | ||
487 | ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); | ||
488 | |||
489 | if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) | ||
490 | ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); | ||
491 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
492 | |||
493 | |||
494 | if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { | 460 | if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { |
495 | /* | 461 | /* |
496 | * Disable the bitmap via an invalid offset. We still cache | 462 | * Disable the bitmap via an invalid offset. We still cache |
@@ -548,7 +514,8 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | |||
548 | * the task-switch, and shows up in ret_from_fork in entry.S, | 514 | * the task-switch, and shows up in ret_from_fork in entry.S, |
549 | * for example. | 515 | * for example. |
550 | */ | 516 | */ |
551 | struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | 517 | __notrace_funcgraph struct task_struct * |
518 | __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | ||
552 | { | 519 | { |
553 | struct thread_struct *prev = &prev_p->thread, | 520 | struct thread_struct *prev = &prev_p->thread, |
554 | *next = &next_p->thread; | 521 | *next = &next_p->thread; |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index c958120fb1b6..1cfd2a4bf853 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include <linux/prctl.h> | 39 | #include <linux/prctl.h> |
40 | #include <linux/uaccess.h> | 40 | #include <linux/uaccess.h> |
41 | #include <linux/io.h> | 41 | #include <linux/io.h> |
42 | #include <linux/ftrace.h> | ||
42 | 43 | ||
43 | #include <asm/pgtable.h> | 44 | #include <asm/pgtable.h> |
44 | #include <asm/system.h> | 45 | #include <asm/system.h> |
@@ -236,11 +237,14 @@ void exit_thread(void) | |||
236 | put_cpu(); | 237 | put_cpu(); |
237 | } | 238 | } |
238 | #ifdef CONFIG_X86_DS | 239 | #ifdef CONFIG_X86_DS |
239 | /* Free any DS contexts that have not been properly released. */ | 240 | /* Free any BTS tracers that have not been properly released. */ |
240 | if (unlikely(t->ds_ctx)) { | 241 | if (unlikely(current->bts)) { |
241 | /* we clear debugctl to make sure DS is not used. */ | 242 | ds_release_bts(current->bts); |
242 | update_debugctlmsr(0); | 243 | current->bts = NULL; |
243 | ds_free(t->ds_ctx); | 244 | |
245 | kfree(current->bts_buffer); | ||
246 | current->bts_buffer = NULL; | ||
247 | current->bts_size = 0; | ||
244 | } | 248 | } |
245 | #endif /* CONFIG_X86_DS */ | 249 | #endif /* CONFIG_X86_DS */ |
246 | } | 250 | } |
@@ -470,35 +474,14 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, | |||
470 | struct tss_struct *tss) | 474 | struct tss_struct *tss) |
471 | { | 475 | { |
472 | struct thread_struct *prev, *next; | 476 | struct thread_struct *prev, *next; |
473 | unsigned long debugctl; | ||
474 | 477 | ||
475 | prev = &prev_p->thread, | 478 | prev = &prev_p->thread, |
476 | next = &next_p->thread; | 479 | next = &next_p->thread; |
477 | 480 | ||
478 | debugctl = prev->debugctlmsr; | 481 | if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) || |
479 | 482 | test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR)) | |
480 | #ifdef CONFIG_X86_DS | 483 | ds_switch_to(prev_p, next_p); |
481 | { | 484 | else if (next->debugctlmsr != prev->debugctlmsr) |
482 | unsigned long ds_prev = 0, ds_next = 0; | ||
483 | |||
484 | if (prev->ds_ctx) | ||
485 | ds_prev = (unsigned long)prev->ds_ctx->ds; | ||
486 | if (next->ds_ctx) | ||
487 | ds_next = (unsigned long)next->ds_ctx->ds; | ||
488 | |||
489 | if (ds_next != ds_prev) { | ||
490 | /* | ||
491 | * We clear debugctl to make sure DS | ||
492 | * is not in use when we change it: | ||
493 | */ | ||
494 | debugctl = 0; | ||
495 | update_debugctlmsr(0); | ||
496 | wrmsrl(MSR_IA32_DS_AREA, ds_next); | ||
497 | } | ||
498 | } | ||
499 | #endif /* CONFIG_X86_DS */ | ||
500 | |||
501 | if (next->debugctlmsr != debugctl) | ||
502 | update_debugctlmsr(next->debugctlmsr); | 485 | update_debugctlmsr(next->debugctlmsr); |
503 | 486 | ||
504 | if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { | 487 | if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { |
@@ -533,14 +516,6 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, | |||
533 | */ | 516 | */ |
534 | memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); | 517 | memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); |
535 | } | 518 | } |
536 | |||
537 | #ifdef CONFIG_X86_PTRACE_BTS | ||
538 | if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) | ||
539 | ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); | ||
540 | |||
541 | if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) | ||
542 | ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); | ||
543 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
544 | } | 519 | } |
545 | 520 | ||
546 | /* | 521 | /* |
@@ -551,8 +526,9 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, | |||
551 | * - could test fs/gs bitsliced | 526 | * - could test fs/gs bitsliced |
552 | * | 527 | * |
553 | * Kprobes not supported here. Set the probe on schedule instead. | 528 | * Kprobes not supported here. Set the probe on schedule instead. |
529 | * Function graph tracer not supported too. | ||
554 | */ | 530 | */ |
555 | struct task_struct * | 531 | __notrace_funcgraph struct task_struct * |
556 | __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | 532 | __switch_to(struct task_struct *prev_p, struct task_struct *next_p) |
557 | { | 533 | { |
558 | struct thread_struct *prev = &prev_p->thread; | 534 | struct thread_struct *prev = &prev_p->thread; |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 06180dff5b2e..45e9855da2d2 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -581,158 +581,73 @@ static int ioperm_get(struct task_struct *target, | |||
581 | } | 581 | } |
582 | 582 | ||
583 | #ifdef CONFIG_X86_PTRACE_BTS | 583 | #ifdef CONFIG_X86_PTRACE_BTS |
584 | /* | ||
585 | * The configuration for a particular BTS hardware implementation. | ||
586 | */ | ||
587 | struct bts_configuration { | ||
588 | /* the size of a BTS record in bytes; at most BTS_MAX_RECORD_SIZE */ | ||
589 | unsigned char sizeof_bts; | ||
590 | /* the size of a field in the BTS record in bytes */ | ||
591 | unsigned char sizeof_field; | ||
592 | /* a bitmask to enable/disable BTS in DEBUGCTL MSR */ | ||
593 | unsigned long debugctl_mask; | ||
594 | }; | ||
595 | static struct bts_configuration bts_cfg; | ||
596 | |||
597 | #define BTS_MAX_RECORD_SIZE (8 * 3) | ||
598 | |||
599 | |||
600 | /* | ||
601 | * Branch Trace Store (BTS) uses the following format. Different | ||
602 | * architectures vary in the size of those fields. | ||
603 | * - source linear address | ||
604 | * - destination linear address | ||
605 | * - flags | ||
606 | * | ||
607 | * Later architectures use 64bit pointers throughout, whereas earlier | ||
608 | * architectures use 32bit pointers in 32bit mode. | ||
609 | * | ||
610 | * We compute the base address for the first 8 fields based on: | ||
611 | * - the field size stored in the DS configuration | ||
612 | * - the relative field position | ||
613 | * | ||
614 | * In order to store additional information in the BTS buffer, we use | ||
615 | * a special source address to indicate that the record requires | ||
616 | * special interpretation. | ||
617 | * | ||
618 | * Netburst indicated via a bit in the flags field whether the branch | ||
619 | * was predicted; this is ignored. | ||
620 | */ | ||
621 | |||
622 | enum bts_field { | ||
623 | bts_from = 0, | ||
624 | bts_to, | ||
625 | bts_flags, | ||
626 | |||
627 | bts_escape = (unsigned long)-1, | ||
628 | bts_qual = bts_to, | ||
629 | bts_jiffies = bts_flags | ||
630 | }; | ||
631 | |||
632 | static inline unsigned long bts_get(const char *base, enum bts_field field) | ||
633 | { | ||
634 | base += (bts_cfg.sizeof_field * field); | ||
635 | return *(unsigned long *)base; | ||
636 | } | ||
637 | |||
638 | static inline void bts_set(char *base, enum bts_field field, unsigned long val) | ||
639 | { | ||
640 | base += (bts_cfg.sizeof_field * field);; | ||
641 | (*(unsigned long *)base) = val; | ||
642 | } | ||
643 | |||
644 | /* | ||
645 | * Translate a BTS record from the raw format into the bts_struct format | ||
646 | * | ||
647 | * out (out): bts_struct interpretation | ||
648 | * raw: raw BTS record | ||
649 | */ | ||
650 | static void ptrace_bts_translate_record(struct bts_struct *out, const void *raw) | ||
651 | { | ||
652 | memset(out, 0, sizeof(*out)); | ||
653 | if (bts_get(raw, bts_from) == bts_escape) { | ||
654 | out->qualifier = bts_get(raw, bts_qual); | ||
655 | out->variant.jiffies = bts_get(raw, bts_jiffies); | ||
656 | } else { | ||
657 | out->qualifier = BTS_BRANCH; | ||
658 | out->variant.lbr.from_ip = bts_get(raw, bts_from); | ||
659 | out->variant.lbr.to_ip = bts_get(raw, bts_to); | ||
660 | } | ||
661 | } | ||
662 | |||
663 | static int ptrace_bts_read_record(struct task_struct *child, size_t index, | 584 | static int ptrace_bts_read_record(struct task_struct *child, size_t index, |
664 | struct bts_struct __user *out) | 585 | struct bts_struct __user *out) |
665 | { | 586 | { |
666 | struct bts_struct ret; | 587 | const struct bts_trace *trace; |
667 | const void *bts_record; | 588 | struct bts_struct bts; |
668 | size_t bts_index, bts_end; | 589 | const unsigned char *at; |
669 | int error; | 590 | int error; |
670 | 591 | ||
671 | error = ds_get_bts_end(child, &bts_end); | 592 | trace = ds_read_bts(child->bts); |
672 | if (error < 0) | 593 | if (!trace) |
673 | return error; | 594 | return -EPERM; |
674 | 595 | ||
675 | if (bts_end <= index) | 596 | at = trace->ds.top - ((index + 1) * trace->ds.size); |
676 | return -EINVAL; | 597 | if ((void *)at < trace->ds.begin) |
677 | 598 | at += (trace->ds.n * trace->ds.size); | |
678 | error = ds_get_bts_index(child, &bts_index); | ||
679 | if (error < 0) | ||
680 | return error; | ||
681 | 599 | ||
682 | /* translate the ptrace bts index into the ds bts index */ | 600 | if (!trace->read) |
683 | bts_index += bts_end - (index + 1); | 601 | return -EOPNOTSUPP; |
684 | if (bts_end <= bts_index) | ||
685 | bts_index -= bts_end; | ||
686 | 602 | ||
687 | error = ds_access_bts(child, bts_index, &bts_record); | 603 | error = trace->read(child->bts, at, &bts); |
688 | if (error < 0) | 604 | if (error < 0) |
689 | return error; | 605 | return error; |
690 | 606 | ||
691 | ptrace_bts_translate_record(&ret, bts_record); | 607 | if (copy_to_user(out, &bts, sizeof(bts))) |
692 | |||
693 | if (copy_to_user(out, &ret, sizeof(ret))) | ||
694 | return -EFAULT; | 608 | return -EFAULT; |
695 | 609 | ||
696 | return sizeof(ret); | 610 | return sizeof(bts); |
697 | } | 611 | } |
698 | 612 | ||
699 | static int ptrace_bts_drain(struct task_struct *child, | 613 | static int ptrace_bts_drain(struct task_struct *child, |
700 | long size, | 614 | long size, |
701 | struct bts_struct __user *out) | 615 | struct bts_struct __user *out) |
702 | { | 616 | { |
703 | struct bts_struct ret; | 617 | const struct bts_trace *trace; |
704 | const unsigned char *raw; | 618 | const unsigned char *at; |
705 | size_t end, i; | 619 | int error, drained = 0; |
706 | int error; | ||
707 | 620 | ||
708 | error = ds_get_bts_index(child, &end); | 621 | trace = ds_read_bts(child->bts); |
709 | if (error < 0) | 622 | if (!trace) |
710 | return error; | 623 | return -EPERM; |
711 | 624 | ||
712 | if (size < (end * sizeof(struct bts_struct))) | 625 | if (!trace->read) |
626 | return -EOPNOTSUPP; | ||
627 | |||
628 | if (size < (trace->ds.top - trace->ds.begin)) | ||
713 | return -EIO; | 629 | return -EIO; |
714 | 630 | ||
715 | error = ds_access_bts(child, 0, (const void **)&raw); | 631 | for (at = trace->ds.begin; (void *)at < trace->ds.top; |
716 | if (error < 0) | 632 | out++, drained++, at += trace->ds.size) { |
717 | return error; | 633 | struct bts_struct bts; |
634 | int error; | ||
718 | 635 | ||
719 | for (i = 0; i < end; i++, out++, raw += bts_cfg.sizeof_bts) { | 636 | error = trace->read(child->bts, at, &bts); |
720 | ptrace_bts_translate_record(&ret, raw); | 637 | if (error < 0) |
638 | return error; | ||
721 | 639 | ||
722 | if (copy_to_user(out, &ret, sizeof(ret))) | 640 | if (copy_to_user(out, &bts, sizeof(bts))) |
723 | return -EFAULT; | 641 | return -EFAULT; |
724 | } | 642 | } |
725 | 643 | ||
726 | error = ds_clear_bts(child); | 644 | memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); |
645 | |||
646 | error = ds_reset_bts(child->bts); | ||
727 | if (error < 0) | 647 | if (error < 0) |
728 | return error; | 648 | return error; |
729 | 649 | ||
730 | return end; | 650 | return drained; |
731 | } | ||
732 | |||
733 | static void ptrace_bts_ovfl(struct task_struct *child) | ||
734 | { | ||
735 | send_sig(child->thread.bts_ovfl_signal, child, 0); | ||
736 | } | 651 | } |
737 | 652 | ||
738 | static int ptrace_bts_config(struct task_struct *child, | 653 | static int ptrace_bts_config(struct task_struct *child, |
@@ -740,114 +655,89 @@ static int ptrace_bts_config(struct task_struct *child, | |||
740 | const struct ptrace_bts_config __user *ucfg) | 655 | const struct ptrace_bts_config __user *ucfg) |
741 | { | 656 | { |
742 | struct ptrace_bts_config cfg; | 657 | struct ptrace_bts_config cfg; |
743 | int error = 0; | 658 | unsigned int flags = 0; |
744 | |||
745 | error = -EOPNOTSUPP; | ||
746 | if (!bts_cfg.sizeof_bts) | ||
747 | goto errout; | ||
748 | 659 | ||
749 | error = -EIO; | ||
750 | if (cfg_size < sizeof(cfg)) | 660 | if (cfg_size < sizeof(cfg)) |
751 | goto errout; | 661 | return -EIO; |
752 | 662 | ||
753 | error = -EFAULT; | ||
754 | if (copy_from_user(&cfg, ucfg, sizeof(cfg))) | 663 | if (copy_from_user(&cfg, ucfg, sizeof(cfg))) |
755 | goto errout; | 664 | return -EFAULT; |
756 | |||
757 | error = -EINVAL; | ||
758 | if ((cfg.flags & PTRACE_BTS_O_SIGNAL) && | ||
759 | !(cfg.flags & PTRACE_BTS_O_ALLOC)) | ||
760 | goto errout; | ||
761 | 665 | ||
762 | if (cfg.flags & PTRACE_BTS_O_ALLOC) { | 666 | if (child->bts) { |
763 | ds_ovfl_callback_t ovfl = NULL; | 667 | ds_release_bts(child->bts); |
764 | unsigned int sig = 0; | 668 | child->bts = NULL; |
669 | } | ||
765 | 670 | ||
766 | /* we ignore the error in case we were not tracing child */ | 671 | if (cfg.flags & PTRACE_BTS_O_SIGNAL) { |
767 | (void)ds_release_bts(child); | 672 | if (!cfg.signal) |
673 | return -EINVAL; | ||
768 | 674 | ||
769 | if (cfg.flags & PTRACE_BTS_O_SIGNAL) { | 675 | return -EOPNOTSUPP; |
770 | if (!cfg.signal) | ||
771 | goto errout; | ||
772 | 676 | ||
773 | sig = cfg.signal; | 677 | child->thread.bts_ovfl_signal = cfg.signal; |
774 | ovfl = ptrace_bts_ovfl; | 678 | } |
775 | } | ||
776 | 679 | ||
777 | error = ds_request_bts(child, /* base = */ NULL, cfg.size, ovfl); | 680 | if ((cfg.flags & PTRACE_BTS_O_ALLOC) && |
778 | if (error < 0) | 681 | (cfg.size != child->bts_size)) { |
779 | goto errout; | 682 | kfree(child->bts_buffer); |
780 | 683 | ||
781 | child->thread.bts_ovfl_signal = sig; | 684 | child->bts_size = cfg.size; |
685 | child->bts_buffer = kzalloc(cfg.size, GFP_KERNEL); | ||
686 | if (!child->bts_buffer) { | ||
687 | child->bts_size = 0; | ||
688 | return -ENOMEM; | ||
689 | } | ||
782 | } | 690 | } |
783 | 691 | ||
784 | error = -EINVAL; | ||
785 | if (!child->thread.ds_ctx && cfg.flags) | ||
786 | goto errout; | ||
787 | |||
788 | if (cfg.flags & PTRACE_BTS_O_TRACE) | 692 | if (cfg.flags & PTRACE_BTS_O_TRACE) |
789 | child->thread.debugctlmsr |= bts_cfg.debugctl_mask; | 693 | flags |= BTS_USER; |
790 | else | ||
791 | child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; | ||
792 | 694 | ||
793 | if (cfg.flags & PTRACE_BTS_O_SCHED) | 695 | if (cfg.flags & PTRACE_BTS_O_SCHED) |
794 | set_tsk_thread_flag(child, TIF_BTS_TRACE_TS); | 696 | flags |= BTS_TIMESTAMPS; |
795 | else | ||
796 | clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); | ||
797 | 697 | ||
798 | error = sizeof(cfg); | 698 | child->bts = ds_request_bts(child, child->bts_buffer, child->bts_size, |
699 | /* ovfl = */ NULL, /* th = */ (size_t)-1, | ||
700 | flags); | ||
701 | if (IS_ERR(child->bts)) { | ||
702 | int error = PTR_ERR(child->bts); | ||
799 | 703 | ||
800 | out: | 704 | kfree(child->bts_buffer); |
801 | if (child->thread.debugctlmsr) | 705 | child->bts = NULL; |
802 | set_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | 706 | child->bts_buffer = NULL; |
803 | else | 707 | child->bts_size = 0; |
804 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | ||
805 | 708 | ||
806 | return error; | 709 | return error; |
710 | } | ||
807 | 711 | ||
808 | errout: | 712 | return sizeof(cfg); |
809 | child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; | ||
810 | clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); | ||
811 | goto out; | ||
812 | } | 713 | } |
813 | 714 | ||
814 | static int ptrace_bts_status(struct task_struct *child, | 715 | static int ptrace_bts_status(struct task_struct *child, |
815 | long cfg_size, | 716 | long cfg_size, |
816 | struct ptrace_bts_config __user *ucfg) | 717 | struct ptrace_bts_config __user *ucfg) |
817 | { | 718 | { |
719 | const struct bts_trace *trace; | ||
818 | struct ptrace_bts_config cfg; | 720 | struct ptrace_bts_config cfg; |
819 | size_t end; | ||
820 | const void *base, *max; | ||
821 | int error; | ||
822 | 721 | ||
823 | if (cfg_size < sizeof(cfg)) | 722 | if (cfg_size < sizeof(cfg)) |
824 | return -EIO; | 723 | return -EIO; |
825 | 724 | ||
826 | error = ds_get_bts_end(child, &end); | 725 | trace = ds_read_bts(child->bts); |
827 | if (error < 0) | 726 | if (!trace) |
828 | return error; | 727 | return -EPERM; |
829 | |||
830 | error = ds_access_bts(child, /* index = */ 0, &base); | ||
831 | if (error < 0) | ||
832 | return error; | ||
833 | |||
834 | error = ds_access_bts(child, /* index = */ end, &max); | ||
835 | if (error < 0) | ||
836 | return error; | ||
837 | 728 | ||
838 | memset(&cfg, 0, sizeof(cfg)); | 729 | memset(&cfg, 0, sizeof(cfg)); |
839 | cfg.size = (max - base); | 730 | cfg.size = trace->ds.end - trace->ds.begin; |
840 | cfg.signal = child->thread.bts_ovfl_signal; | 731 | cfg.signal = child->thread.bts_ovfl_signal; |
841 | cfg.bts_size = sizeof(struct bts_struct); | 732 | cfg.bts_size = sizeof(struct bts_struct); |
842 | 733 | ||
843 | if (cfg.signal) | 734 | if (cfg.signal) |
844 | cfg.flags |= PTRACE_BTS_O_SIGNAL; | 735 | cfg.flags |= PTRACE_BTS_O_SIGNAL; |
845 | 736 | ||
846 | if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) && | 737 | if (trace->ds.flags & BTS_USER) |
847 | child->thread.debugctlmsr & bts_cfg.debugctl_mask) | ||
848 | cfg.flags |= PTRACE_BTS_O_TRACE; | 738 | cfg.flags |= PTRACE_BTS_O_TRACE; |
849 | 739 | ||
850 | if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS)) | 740 | if (trace->ds.flags & BTS_TIMESTAMPS) |
851 | cfg.flags |= PTRACE_BTS_O_SCHED; | 741 | cfg.flags |= PTRACE_BTS_O_SCHED; |
852 | 742 | ||
853 | if (copy_to_user(ucfg, &cfg, sizeof(cfg))) | 743 | if (copy_to_user(ucfg, &cfg, sizeof(cfg))) |
@@ -856,107 +746,28 @@ static int ptrace_bts_status(struct task_struct *child, | |||
856 | return sizeof(cfg); | 746 | return sizeof(cfg); |
857 | } | 747 | } |
858 | 748 | ||
859 | static int ptrace_bts_write_record(struct task_struct *child, | 749 | static int ptrace_bts_clear(struct task_struct *child) |
860 | const struct bts_struct *in) | ||
861 | { | 750 | { |
862 | unsigned char bts_record[BTS_MAX_RECORD_SIZE]; | 751 | const struct bts_trace *trace; |
863 | 752 | ||
864 | BUG_ON(BTS_MAX_RECORD_SIZE < bts_cfg.sizeof_bts); | 753 | trace = ds_read_bts(child->bts); |
754 | if (!trace) | ||
755 | return -EPERM; | ||
865 | 756 | ||
866 | memset(bts_record, 0, bts_cfg.sizeof_bts); | 757 | memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); |
867 | switch (in->qualifier) { | ||
868 | case BTS_INVALID: | ||
869 | break; | ||
870 | |||
871 | case BTS_BRANCH: | ||
872 | bts_set(bts_record, bts_from, in->variant.lbr.from_ip); | ||
873 | bts_set(bts_record, bts_to, in->variant.lbr.to_ip); | ||
874 | break; | ||
875 | |||
876 | case BTS_TASK_ARRIVES: | ||
877 | case BTS_TASK_DEPARTS: | ||
878 | bts_set(bts_record, bts_from, bts_escape); | ||
879 | bts_set(bts_record, bts_qual, in->qualifier); | ||
880 | bts_set(bts_record, bts_jiffies, in->variant.jiffies); | ||
881 | break; | ||
882 | |||
883 | default: | ||
884 | return -EINVAL; | ||
885 | } | ||
886 | 758 | ||
887 | /* The writing task will be the switched-to task on a context | 759 | return ds_reset_bts(child->bts); |
888 | * switch. It needs to write into the switched-from task's BTS | ||
889 | * buffer. */ | ||
890 | return ds_unchecked_write_bts(child, bts_record, bts_cfg.sizeof_bts); | ||
891 | } | 760 | } |
892 | 761 | ||
893 | void ptrace_bts_take_timestamp(struct task_struct *tsk, | 762 | static int ptrace_bts_size(struct task_struct *child) |
894 | enum bts_qualifier qualifier) | ||
895 | { | 763 | { |
896 | struct bts_struct rec = { | 764 | const struct bts_trace *trace; |
897 | .qualifier = qualifier, | ||
898 | .variant.jiffies = jiffies_64 | ||
899 | }; | ||
900 | |||
901 | ptrace_bts_write_record(tsk, &rec); | ||
902 | } | ||
903 | 765 | ||
904 | static const struct bts_configuration bts_cfg_netburst = { | 766 | trace = ds_read_bts(child->bts); |
905 | .sizeof_bts = sizeof(long) * 3, | 767 | if (!trace) |
906 | .sizeof_field = sizeof(long), | 768 | return -EPERM; |
907 | .debugctl_mask = (1<<2)|(1<<3)|(1<<5) | ||
908 | }; | ||
909 | 769 | ||
910 | static const struct bts_configuration bts_cfg_pentium_m = { | 770 | return (trace->ds.top - trace->ds.begin) / trace->ds.size; |
911 | .sizeof_bts = sizeof(long) * 3, | ||
912 | .sizeof_field = sizeof(long), | ||
913 | .debugctl_mask = (1<<6)|(1<<7) | ||
914 | }; | ||
915 | |||
916 | static const struct bts_configuration bts_cfg_core2 = { | ||
917 | .sizeof_bts = 8 * 3, | ||
918 | .sizeof_field = 8, | ||
919 | .debugctl_mask = (1<<6)|(1<<7)|(1<<9) | ||
920 | }; | ||
921 | |||
922 | static inline void bts_configure(const struct bts_configuration *cfg) | ||
923 | { | ||
924 | bts_cfg = *cfg; | ||
925 | } | ||
926 | |||
927 | void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c) | ||
928 | { | ||
929 | switch (c->x86) { | ||
930 | case 0x6: | ||
931 | switch (c->x86_model) { | ||
932 | case 0 ... 0xC: | ||
933 | /* sorry, don't know about them */ | ||
934 | break; | ||
935 | case 0xD: | ||
936 | case 0xE: /* Pentium M */ | ||
937 | bts_configure(&bts_cfg_pentium_m); | ||
938 | break; | ||
939 | default: /* Core2, Atom, ... */ | ||
940 | bts_configure(&bts_cfg_core2); | ||
941 | break; | ||
942 | } | ||
943 | break; | ||
944 | case 0xF: | ||
945 | switch (c->x86_model) { | ||
946 | case 0x0: | ||
947 | case 0x1: | ||
948 | case 0x2: /* Netburst */ | ||
949 | bts_configure(&bts_cfg_netburst); | ||
950 | break; | ||
951 | default: | ||
952 | /* sorry, don't know about them */ | ||
953 | break; | ||
954 | } | ||
955 | break; | ||
956 | default: | ||
957 | /* sorry, don't know about them */ | ||
958 | break; | ||
959 | } | ||
960 | } | 771 | } |
961 | #endif /* CONFIG_X86_PTRACE_BTS */ | 772 | #endif /* CONFIG_X86_PTRACE_BTS */ |
962 | 773 | ||
@@ -972,13 +783,14 @@ void ptrace_disable(struct task_struct *child) | |||
972 | clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); | 783 | clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); |
973 | #endif | 784 | #endif |
974 | #ifdef CONFIG_X86_PTRACE_BTS | 785 | #ifdef CONFIG_X86_PTRACE_BTS |
975 | (void)ds_release_bts(child); | 786 | if (child->bts) { |
976 | 787 | ds_release_bts(child->bts); | |
977 | child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; | 788 | child->bts = NULL; |
978 | if (!child->thread.debugctlmsr) | ||
979 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | ||
980 | 789 | ||
981 | clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); | 790 | kfree(child->bts_buffer); |
791 | child->bts_buffer = NULL; | ||
792 | child->bts_size = 0; | ||
793 | } | ||
982 | #endif /* CONFIG_X86_PTRACE_BTS */ | 794 | #endif /* CONFIG_X86_PTRACE_BTS */ |
983 | } | 795 | } |
984 | 796 | ||
@@ -1111,7 +923,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) | |||
1111 | break; | 923 | break; |
1112 | 924 | ||
1113 | case PTRACE_BTS_SIZE: | 925 | case PTRACE_BTS_SIZE: |
1114 | ret = ds_get_bts_index(child, /* pos = */ NULL); | 926 | ret = ptrace_bts_size(child); |
1115 | break; | 927 | break; |
1116 | 928 | ||
1117 | case PTRACE_BTS_GET: | 929 | case PTRACE_BTS_GET: |
@@ -1120,7 +932,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) | |||
1120 | break; | 932 | break; |
1121 | 933 | ||
1122 | case PTRACE_BTS_CLEAR: | 934 | case PTRACE_BTS_CLEAR: |
1123 | ret = ds_clear_bts(child); | 935 | ret = ptrace_bts_clear(child); |
1124 | break; | 936 | break; |
1125 | 937 | ||
1126 | case PTRACE_BTS_DRAIN: | 938 | case PTRACE_BTS_DRAIN: |
@@ -1383,6 +1195,14 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, | |||
1383 | 1195 | ||
1384 | case PTRACE_GET_THREAD_AREA: | 1196 | case PTRACE_GET_THREAD_AREA: |
1385 | case PTRACE_SET_THREAD_AREA: | 1197 | case PTRACE_SET_THREAD_AREA: |
1198 | #ifdef CONFIG_X86_PTRACE_BTS | ||
1199 | case PTRACE_BTS_CONFIG: | ||
1200 | case PTRACE_BTS_STATUS: | ||
1201 | case PTRACE_BTS_SIZE: | ||
1202 | case PTRACE_BTS_GET: | ||
1203 | case PTRACE_BTS_CLEAR: | ||
1204 | case PTRACE_BTS_DRAIN: | ||
1205 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
1386 | return arch_ptrace(child, request, addr, data); | 1206 | return arch_ptrace(child, request, addr, data); |
1387 | 1207 | ||
1388 | default: | 1208 | default: |
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index a03e7f6d90c3..10786af95545 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c | |||
@@ -6,6 +6,7 @@ | |||
6 | #include <linux/sched.h> | 6 | #include <linux/sched.h> |
7 | #include <linux/stacktrace.h> | 7 | #include <linux/stacktrace.h> |
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include <linux/uaccess.h> | ||
9 | #include <asm/stacktrace.h> | 10 | #include <asm/stacktrace.h> |
10 | 11 | ||
11 | static void save_stack_warning(void *data, char *msg) | 12 | static void save_stack_warning(void *data, char *msg) |
@@ -83,3 +84,66 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) | |||
83 | trace->entries[trace->nr_entries++] = ULONG_MAX; | 84 | trace->entries[trace->nr_entries++] = ULONG_MAX; |
84 | } | 85 | } |
85 | EXPORT_SYMBOL_GPL(save_stack_trace_tsk); | 86 | EXPORT_SYMBOL_GPL(save_stack_trace_tsk); |
87 | |||
88 | /* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */ | ||
89 | |||
90 | struct stack_frame { | ||
91 | const void __user *next_fp; | ||
92 | unsigned long ret_addr; | ||
93 | }; | ||
94 | |||
95 | static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) | ||
96 | { | ||
97 | int ret; | ||
98 | |||
99 | if (!access_ok(VERIFY_READ, fp, sizeof(*frame))) | ||
100 | return 0; | ||
101 | |||
102 | ret = 1; | ||
103 | pagefault_disable(); | ||
104 | if (__copy_from_user_inatomic(frame, fp, sizeof(*frame))) | ||
105 | ret = 0; | ||
106 | pagefault_enable(); | ||
107 | |||
108 | return ret; | ||
109 | } | ||
110 | |||
111 | static inline void __save_stack_trace_user(struct stack_trace *trace) | ||
112 | { | ||
113 | const struct pt_regs *regs = task_pt_regs(current); | ||
114 | const void __user *fp = (const void __user *)regs->bp; | ||
115 | |||
116 | if (trace->nr_entries < trace->max_entries) | ||
117 | trace->entries[trace->nr_entries++] = regs->ip; | ||
118 | |||
119 | while (trace->nr_entries < trace->max_entries) { | ||
120 | struct stack_frame frame; | ||
121 | |||
122 | frame.next_fp = NULL; | ||
123 | frame.ret_addr = 0; | ||
124 | if (!copy_stack_frame(fp, &frame)) | ||
125 | break; | ||
126 | if ((unsigned long)fp < regs->sp) | ||
127 | break; | ||
128 | if (frame.ret_addr) { | ||
129 | trace->entries[trace->nr_entries++] = | ||
130 | frame.ret_addr; | ||
131 | } | ||
132 | if (fp == frame.next_fp) | ||
133 | break; | ||
134 | fp = frame.next_fp; | ||
135 | } | ||
136 | } | ||
137 | |||
138 | void save_stack_trace_user(struct stack_trace *trace) | ||
139 | { | ||
140 | /* | ||
141 | * Trace user stack if we are not a kernel thread | ||
142 | */ | ||
143 | if (current->mm) { | ||
144 | __save_stack_trace_user(trace); | ||
145 | } | ||
146 | if (trace->nr_entries < trace->max_entries) | ||
147 | trace->entries[trace->nr_entries++] = ULONG_MAX; | ||
148 | } | ||
149 | |||
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index a9b8560adbc2..82c67559dde7 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S | |||
@@ -44,6 +44,7 @@ SECTIONS | |||
44 | SCHED_TEXT | 44 | SCHED_TEXT |
45 | LOCK_TEXT | 45 | LOCK_TEXT |
46 | KPROBES_TEXT | 46 | KPROBES_TEXT |
47 | IRQENTRY_TEXT | ||
47 | *(.fixup) | 48 | *(.fixup) |
48 | *(.gnu.warning) | 49 | *(.gnu.warning) |
49 | _etext = .; /* End of text section */ | 50 | _etext = .; /* End of text section */ |
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 46e05447405b..1a614c0e6bef 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S | |||
@@ -35,6 +35,7 @@ SECTIONS | |||
35 | SCHED_TEXT | 35 | SCHED_TEXT |
36 | LOCK_TEXT | 36 | LOCK_TEXT |
37 | KPROBES_TEXT | 37 | KPROBES_TEXT |
38 | IRQENTRY_TEXT | ||
38 | *(.fixup) | 39 | *(.fixup) |
39 | *(.gnu.warning) | 40 | *(.gnu.warning) |
40 | _etext = .; /* End of text section */ | 41 | _etext = .; /* End of text section */ |
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 0b8b6690a86d..6f3d3d4cd973 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -17,6 +17,9 @@ | |||
17 | * want per guest time just set the kernel.vsyscall64 sysctl to 0. | 17 | * want per guest time just set the kernel.vsyscall64 sysctl to 0. |
18 | */ | 18 | */ |
19 | 19 | ||
20 | /* Disable profiling for userspace code: */ | ||
21 | #define DISABLE_BRANCH_PROFILING | ||
22 | |||
20 | #include <linux/time.h> | 23 | #include <linux/time.h> |
21 | #include <linux/init.h> | 24 | #include <linux/init.h> |
22 | #include <linux/kernel.h> | 25 | #include <linux/kernel.h> |
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index fea4565ff576..d8cc96a2738f 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile | |||
@@ -8,9 +8,8 @@ obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o | |||
8 | 8 | ||
9 | obj-$(CONFIG_HIGHMEM) += highmem_32.o | 9 | obj-$(CONFIG_HIGHMEM) += highmem_32.o |
10 | 10 | ||
11 | obj-$(CONFIG_MMIOTRACE_HOOKS) += kmmio.o | ||
12 | obj-$(CONFIG_MMIOTRACE) += mmiotrace.o | 11 | obj-$(CONFIG_MMIOTRACE) += mmiotrace.o |
13 | mmiotrace-y := pf_in.o mmio-mod.o | 12 | mmiotrace-y := kmmio.o pf_in.o mmio-mod.o |
14 | obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o | 13 | obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o |
15 | 14 | ||
16 | obj-$(CONFIG_NUMA) += numa_$(BITS).o | 15 | obj-$(CONFIG_NUMA) += numa_$(BITS).o |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 31e8730fa246..21e996a70d68 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -53,7 +53,7 @@ | |||
53 | 53 | ||
54 | static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) | 54 | static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) |
55 | { | 55 | { |
56 | #ifdef CONFIG_MMIOTRACE_HOOKS | 56 | #ifdef CONFIG_MMIOTRACE |
57 | if (unlikely(is_kmmio_active())) | 57 | if (unlikely(is_kmmio_active())) |
58 | if (kmmio_handler(regs, addr) == 1) | 58 | if (kmmio_handler(regs, addr) == 1) |
59 | return -1; | 59 | return -1; |
@@ -413,6 +413,7 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, | |||
413 | unsigned long error_code) | 413 | unsigned long error_code) |
414 | { | 414 | { |
415 | unsigned long flags = oops_begin(); | 415 | unsigned long flags = oops_begin(); |
416 | int sig = SIGKILL; | ||
416 | struct task_struct *tsk; | 417 | struct task_struct *tsk; |
417 | 418 | ||
418 | printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", | 419 | printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", |
@@ -423,8 +424,8 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, | |||
423 | tsk->thread.trap_no = 14; | 424 | tsk->thread.trap_no = 14; |
424 | tsk->thread.error_code = error_code; | 425 | tsk->thread.error_code = error_code; |
425 | if (__die("Bad pagetable", regs, error_code)) | 426 | if (__die("Bad pagetable", regs, error_code)) |
426 | regs = NULL; | 427 | sig = 0; |
427 | oops_end(flags, regs, SIGKILL); | 428 | oops_end(flags, regs, sig); |
428 | } | 429 | } |
429 | #endif | 430 | #endif |
430 | 431 | ||
@@ -590,6 +591,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
590 | int fault; | 591 | int fault; |
591 | #ifdef CONFIG_X86_64 | 592 | #ifdef CONFIG_X86_64 |
592 | unsigned long flags; | 593 | unsigned long flags; |
594 | int sig; | ||
593 | #endif | 595 | #endif |
594 | 596 | ||
595 | tsk = current; | 597 | tsk = current; |
@@ -849,11 +851,12 @@ no_context: | |||
849 | bust_spinlocks(0); | 851 | bust_spinlocks(0); |
850 | do_exit(SIGKILL); | 852 | do_exit(SIGKILL); |
851 | #else | 853 | #else |
854 | sig = SIGKILL; | ||
852 | if (__die("Oops", regs, error_code)) | 855 | if (__die("Oops", regs, error_code)) |
853 | regs = NULL; | 856 | sig = 0; |
854 | /* Executive summary in case the body of the oops scrolled away */ | 857 | /* Executive summary in case the body of the oops scrolled away */ |
855 | printk(KERN_EMERG "CR2: %016lx\n", address); | 858 | printk(KERN_EMERG "CR2: %016lx\n", address); |
856 | oops_end(flags, regs, SIGKILL); | 859 | oops_end(flags, regs, sig); |
857 | #endif | 860 | #endif |
858 | 861 | ||
859 | /* | 862 | /* |
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 1ef0f90813d6..d9d35824c56f 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c | |||
@@ -9,6 +9,9 @@ | |||
9 | * Also alternative() doesn't work. | 9 | * Also alternative() doesn't work. |
10 | */ | 10 | */ |
11 | 11 | ||
12 | /* Disable profiling for userspace code: */ | ||
13 | #define DISABLE_BRANCH_PROFILING | ||
14 | |||
12 | #include <linux/kernel.h> | 15 | #include <linux/kernel.h> |
13 | #include <linux/posix-timers.h> | 16 | #include <linux/posix-timers.h> |
14 | #include <linux/time.h> | 17 | #include <linux/time.h> |