diff options
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/Makefile_32 | 1 | ||||
-rw-r--r-- | arch/x86/kernel/Makefile_64 | 1 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/intel.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/ds.c | 429 | ||||
-rw-r--r-- | arch/x86/kernel/process_32.c | 19 | ||||
-rw-r--r-- | arch/x86/kernel/process_64.c | 26 | ||||
-rw-r--r-- | arch/x86/kernel/ptrace.c | 212 | ||||
-rw-r--r-- | arch/x86/kernel/setup_64.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/step.c | 18 |
9 files changed, 707 insertions, 9 deletions
diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32 index b2d7aea4c82d..cc2651bcc07f 100644 --- a/arch/x86/kernel/Makefile_32 +++ b/arch/x86/kernel/Makefile_32 | |||
@@ -11,6 +11,7 @@ obj-y := process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \ | |||
11 | quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o io_delay.o rtc.o | 11 | quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o io_delay.o rtc.o |
12 | 12 | ||
13 | obj-y += ptrace.o | 13 | obj-y += ptrace.o |
14 | obj-y += ds.o | ||
14 | obj-y += tls.o | 15 | obj-y += tls.o |
15 | obj-y += step.o | 16 | obj-y += step.o |
16 | obj-$(CONFIG_STACKTRACE) += stacktrace.o | 17 | obj-$(CONFIG_STACKTRACE) += stacktrace.o |
diff --git a/arch/x86/kernel/Makefile_64 b/arch/x86/kernel/Makefile_64 index 19af64e1a3fc..2ec96acf6486 100644 --- a/arch/x86/kernel/Makefile_64 +++ b/arch/x86/kernel/Makefile_64 | |||
@@ -13,6 +13,7 @@ obj-y := process_64.o signal_64.o entry_64.o traps_64.o irq_64.o \ | |||
13 | i8253.o io_delay.o rtc.o | 13 | i8253.o io_delay.o rtc.o |
14 | 14 | ||
15 | obj-y += ptrace.o | 15 | obj-y += ptrace.o |
16 | obj-y += ds.o | ||
16 | obj-y += step.o | 17 | obj-y += step.o |
17 | 18 | ||
18 | obj-$(CONFIG_IA32_EMULATION) += tls.o | 19 | obj-$(CONFIG_IA32_EMULATION) += tls.o |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 867ff94579be..e4b7e73e9024 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -11,6 +11,8 @@ | |||
11 | #include <asm/pgtable.h> | 11 | #include <asm/pgtable.h> |
12 | #include <asm/msr.h> | 12 | #include <asm/msr.h> |
13 | #include <asm/uaccess.h> | 13 | #include <asm/uaccess.h> |
14 | #include <asm/ptrace.h> | ||
15 | #include <asm/ds.h> | ||
14 | 16 | ||
15 | #include "cpu.h" | 17 | #include "cpu.h" |
16 | 18 | ||
@@ -219,6 +221,9 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
219 | if (!(l1 & (1<<12))) | 221 | if (!(l1 & (1<<12))) |
220 | set_bit(X86_FEATURE_PEBS, c->x86_capability); | 222 | set_bit(X86_FEATURE_PEBS, c->x86_capability); |
221 | } | 223 | } |
224 | |||
225 | if (cpu_has_bts) | ||
226 | ds_init_intel(c); | ||
222 | } | 227 | } |
223 | 228 | ||
224 | static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 * c, unsigned int size) | 229 | static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 * c, unsigned int size) |
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c new file mode 100644 index 000000000000..996a7c4f5963 --- /dev/null +++ b/arch/x86/kernel/ds.c | |||
@@ -0,0 +1,429 @@ | |||
1 | /* | ||
2 | * Debug Store support | ||
3 | * | ||
4 | * This provides a low-level interface to the hardware's Debug Store | ||
5 | * feature that is used for last branch recording (LBR) and | ||
6 | * precise-event based sampling (PEBS). | ||
7 | * | ||
8 | * Different architectures use a different DS layout/pointer size. | ||
9 | * The below functions therefore work on a void*. | ||
10 | * | ||
11 | * | ||
12 | * Since there is no user for PEBS, yet, only LBR (or branch | ||
13 | * trace store, BTS) is supported. | ||
14 | * | ||
15 | * | ||
16 | * Copyright (C) 2007 Intel Corporation. | ||
17 | * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007 | ||
18 | */ | ||
19 | |||
20 | #include <asm/ds.h> | ||
21 | |||
22 | #include <linux/errno.h> | ||
23 | #include <linux/string.h> | ||
24 | #include <linux/slab.h> | ||
25 | |||
26 | |||
27 | /* | ||
28 | * Debug Store (DS) save area configuration (see Intel64 and IA32 | ||
29 | * Architectures Software Developer's Manual, section 18.5) | ||
30 | * | ||
31 | * The DS configuration consists of the following fields; different | ||
32 | * architetures vary in the size of those fields. | ||
33 | * - double-word aligned base linear address of the BTS buffer | ||
34 | * - write pointer into the BTS buffer | ||
35 | * - end linear address of the BTS buffer (one byte beyond the end of | ||
36 | * the buffer) | ||
37 | * - interrupt pointer into BTS buffer | ||
38 | * (interrupt occurs when write pointer passes interrupt pointer) | ||
39 | * - double-word aligned base linear address of the PEBS buffer | ||
40 | * - write pointer into the PEBS buffer | ||
41 | * - end linear address of the PEBS buffer (one byte beyond the end of | ||
42 | * the buffer) | ||
43 | * - interrupt pointer into PEBS buffer | ||
44 | * (interrupt occurs when write pointer passes interrupt pointer) | ||
45 | * - value to which counter is reset following counter overflow | ||
46 | * | ||
47 | * On later architectures, the last branch recording hardware uses | ||
48 | * 64bit pointers even in 32bit mode. | ||
49 | * | ||
50 | * | ||
51 | * Branch Trace Store (BTS) records store information about control | ||
52 | * flow changes. They at least provide the following information: | ||
53 | * - source linear address | ||
54 | * - destination linear address | ||
55 | * | ||
56 | * Netburst supported a predicated bit that had been dropped in later | ||
57 | * architectures. We do not suppor it. | ||
58 | * | ||
59 | * | ||
60 | * In order to abstract from the actual DS and BTS layout, we describe | ||
61 | * the access to the relevant fields. | ||
62 | * Thanks to Andi Kleen for proposing this design. | ||
63 | * | ||
64 | * The implementation, however, is not as general as it might seem. In | ||
65 | * order to stay somewhat simple and efficient, we assume an | ||
66 | * underlying unsigned type (mostly a pointer type) and we expect the | ||
67 | * field to be at least as big as that type. | ||
68 | */ | ||
69 | |||
70 | /* | ||
71 | * A special from_ip address to indicate that the BTS record is an | ||
72 | * info record that needs to be interpreted or skipped. | ||
73 | */ | ||
74 | #define BTS_ESCAPE_ADDRESS (-1) | ||
75 | |||
76 | /* | ||
77 | * A field access descriptor | ||
78 | */ | ||
79 | struct access_desc { | ||
80 | unsigned char offset; | ||
81 | unsigned char size; | ||
82 | }; | ||
83 | |||
84 | /* | ||
85 | * The configuration for a particular DS/BTS hardware implementation. | ||
86 | */ | ||
87 | struct ds_configuration { | ||
88 | /* the DS configuration */ | ||
89 | unsigned char sizeof_ds; | ||
90 | struct access_desc bts_buffer_base; | ||
91 | struct access_desc bts_index; | ||
92 | struct access_desc bts_absolute_maximum; | ||
93 | struct access_desc bts_interrupt_threshold; | ||
94 | /* the BTS configuration */ | ||
95 | unsigned char sizeof_bts; | ||
96 | struct access_desc from_ip; | ||
97 | struct access_desc to_ip; | ||
98 | /* BTS variants used to store additional information like | ||
99 | timestamps */ | ||
100 | struct access_desc info_type; | ||
101 | struct access_desc info_data; | ||
102 | unsigned long debugctl_mask; | ||
103 | }; | ||
104 | |||
105 | /* | ||
106 | * The global configuration used by the below accessor functions | ||
107 | */ | ||
108 | static struct ds_configuration ds_cfg; | ||
109 | |||
110 | /* | ||
111 | * Accessor functions for some DS and BTS fields using the above | ||
112 | * global ptrace_bts_cfg. | ||
113 | */ | ||
114 | static inline void *get_bts_buffer_base(char *base) | ||
115 | { | ||
116 | return *(void **)(base + ds_cfg.bts_buffer_base.offset); | ||
117 | } | ||
118 | static inline void set_bts_buffer_base(char *base, void *value) | ||
119 | { | ||
120 | (*(void **)(base + ds_cfg.bts_buffer_base.offset)) = value; | ||
121 | } | ||
122 | static inline void *get_bts_index(char *base) | ||
123 | { | ||
124 | return *(void **)(base + ds_cfg.bts_index.offset); | ||
125 | } | ||
126 | static inline void set_bts_index(char *base, void *value) | ||
127 | { | ||
128 | (*(void **)(base + ds_cfg.bts_index.offset)) = value; | ||
129 | } | ||
130 | static inline void *get_bts_absolute_maximum(char *base) | ||
131 | { | ||
132 | return *(void **)(base + ds_cfg.bts_absolute_maximum.offset); | ||
133 | } | ||
134 | static inline void set_bts_absolute_maximum(char *base, void *value) | ||
135 | { | ||
136 | (*(void **)(base + ds_cfg.bts_absolute_maximum.offset)) = value; | ||
137 | } | ||
138 | static inline void *get_bts_interrupt_threshold(char *base) | ||
139 | { | ||
140 | return *(void **)(base + ds_cfg.bts_interrupt_threshold.offset); | ||
141 | } | ||
142 | static inline void set_bts_interrupt_threshold(char *base, void *value) | ||
143 | { | ||
144 | (*(void **)(base + ds_cfg.bts_interrupt_threshold.offset)) = value; | ||
145 | } | ||
146 | static inline long get_from_ip(char *base) | ||
147 | { | ||
148 | return *(long *)(base + ds_cfg.from_ip.offset); | ||
149 | } | ||
150 | static inline void set_from_ip(char *base, long value) | ||
151 | { | ||
152 | (*(long *)(base + ds_cfg.from_ip.offset)) = value; | ||
153 | } | ||
154 | static inline long get_to_ip(char *base) | ||
155 | { | ||
156 | return *(long *)(base + ds_cfg.to_ip.offset); | ||
157 | } | ||
158 | static inline void set_to_ip(char *base, long value) | ||
159 | { | ||
160 | (*(long *)(base + ds_cfg.to_ip.offset)) = value; | ||
161 | } | ||
162 | static inline unsigned char get_info_type(char *base) | ||
163 | { | ||
164 | return *(unsigned char *)(base + ds_cfg.info_type.offset); | ||
165 | } | ||
166 | static inline void set_info_type(char *base, unsigned char value) | ||
167 | { | ||
168 | (*(unsigned char *)(base + ds_cfg.info_type.offset)) = value; | ||
169 | } | ||
170 | /* | ||
171 | * The info data might overlap with the info type on some architectures. | ||
172 | * We therefore read and write the exact number of bytes. | ||
173 | */ | ||
174 | static inline unsigned long long get_info_data(char *base) | ||
175 | { | ||
176 | unsigned long long value = 0; | ||
177 | memcpy(&value, | ||
178 | base + ds_cfg.info_data.offset, | ||
179 | ds_cfg.info_data.size); | ||
180 | return value; | ||
181 | } | ||
182 | static inline void set_info_data(char *base, unsigned long long value) | ||
183 | { | ||
184 | memcpy(base + ds_cfg.info_data.offset, | ||
185 | &value, | ||
186 | ds_cfg.info_data.size); | ||
187 | } | ||
188 | |||
189 | |||
190 | int ds_allocate(void **dsp, size_t bts_size_in_records) | ||
191 | { | ||
192 | size_t bts_size_in_bytes = 0; | ||
193 | void *bts = 0; | ||
194 | void *ds = 0; | ||
195 | |||
196 | if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) | ||
197 | return -EOPNOTSUPP; | ||
198 | |||
199 | if (bts_size_in_records < 0) | ||
200 | return -EINVAL; | ||
201 | |||
202 | bts_size_in_bytes = | ||
203 | bts_size_in_records * ds_cfg.sizeof_bts; | ||
204 | |||
205 | if (bts_size_in_bytes <= 0) | ||
206 | return -EINVAL; | ||
207 | |||
208 | bts = kzalloc(bts_size_in_bytes, GFP_KERNEL); | ||
209 | |||
210 | if (!bts) | ||
211 | return -ENOMEM; | ||
212 | |||
213 | ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); | ||
214 | |||
215 | if (!ds) { | ||
216 | kfree(bts); | ||
217 | return -ENOMEM; | ||
218 | } | ||
219 | |||
220 | set_bts_buffer_base(ds, bts); | ||
221 | set_bts_index(ds, bts); | ||
222 | set_bts_absolute_maximum(ds, bts + bts_size_in_bytes); | ||
223 | set_bts_interrupt_threshold(ds, bts + bts_size_in_bytes + 1); | ||
224 | |||
225 | *dsp = ds; | ||
226 | return 0; | ||
227 | } | ||
228 | |||
229 | int ds_free(void **dsp) | ||
230 | { | ||
231 | if (*dsp) | ||
232 | kfree(get_bts_buffer_base(*dsp)); | ||
233 | kfree(*dsp); | ||
234 | *dsp = 0; | ||
235 | |||
236 | return 0; | ||
237 | } | ||
238 | |||
239 | int ds_get_bts_size(void *ds) | ||
240 | { | ||
241 | size_t size_in_bytes; | ||
242 | |||
243 | if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) | ||
244 | return -EOPNOTSUPP; | ||
245 | |||
246 | size_in_bytes = | ||
247 | get_bts_absolute_maximum(ds) - | ||
248 | get_bts_buffer_base(ds); | ||
249 | |||
250 | return size_in_bytes / ds_cfg.sizeof_bts; | ||
251 | } | ||
252 | |||
253 | int ds_get_bts_index(void *ds) | ||
254 | { | ||
255 | size_t index_offset_in_bytes; | ||
256 | |||
257 | if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) | ||
258 | return -EOPNOTSUPP; | ||
259 | |||
260 | index_offset_in_bytes = | ||
261 | get_bts_index(ds) - | ||
262 | get_bts_buffer_base(ds); | ||
263 | |||
264 | return index_offset_in_bytes / ds_cfg.sizeof_bts; | ||
265 | } | ||
266 | |||
267 | int ds_read_bts(void *ds, size_t index, struct bts_struct *out) | ||
268 | { | ||
269 | void *bts; | ||
270 | |||
271 | if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) | ||
272 | return -EOPNOTSUPP; | ||
273 | |||
274 | if (index < 0) | ||
275 | return -EINVAL; | ||
276 | |||
277 | if (index >= ds_get_bts_size(ds)) | ||
278 | return -EINVAL; | ||
279 | |||
280 | bts = get_bts_buffer_base(ds); | ||
281 | bts = (char *)bts + (index * ds_cfg.sizeof_bts); | ||
282 | |||
283 | memset(out, 0, sizeof(*out)); | ||
284 | if (get_from_ip(bts) == BTS_ESCAPE_ADDRESS) { | ||
285 | out->qualifier = get_info_type(bts); | ||
286 | out->variant.timestamp = get_info_data(bts); | ||
287 | } else { | ||
288 | out->qualifier = BTS_BRANCH; | ||
289 | out->variant.lbr.from_ip = get_from_ip(bts); | ||
290 | out->variant.lbr.to_ip = get_to_ip(bts); | ||
291 | } | ||
292 | |||
293 | return 0; | ||
294 | } | ||
295 | |||
296 | int ds_write_bts(void *ds, const struct bts_struct *in) | ||
297 | { | ||
298 | void *bts; | ||
299 | |||
300 | if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) | ||
301 | return -EOPNOTSUPP; | ||
302 | |||
303 | if (ds_get_bts_size(ds) <= 0) | ||
304 | return -ENXIO; | ||
305 | |||
306 | bts = get_bts_index(ds); | ||
307 | |||
308 | memset(bts, 0, ds_cfg.sizeof_bts); | ||
309 | switch (in->qualifier) { | ||
310 | case BTS_INVALID: | ||
311 | break; | ||
312 | |||
313 | case BTS_BRANCH: | ||
314 | set_from_ip(bts, in->variant.lbr.from_ip); | ||
315 | set_to_ip(bts, in->variant.lbr.to_ip); | ||
316 | break; | ||
317 | |||
318 | case BTS_TASK_ARRIVES: | ||
319 | case BTS_TASK_DEPARTS: | ||
320 | set_from_ip(bts, BTS_ESCAPE_ADDRESS); | ||
321 | set_info_type(bts, in->qualifier); | ||
322 | set_info_data(bts, in->variant.timestamp); | ||
323 | break; | ||
324 | |||
325 | default: | ||
326 | return -EINVAL; | ||
327 | } | ||
328 | |||
329 | bts = (char *)bts + ds_cfg.sizeof_bts; | ||
330 | if (bts >= get_bts_absolute_maximum(ds)) | ||
331 | bts = get_bts_buffer_base(ds); | ||
332 | set_bts_index(ds, bts); | ||
333 | |||
334 | return 0; | ||
335 | } | ||
336 | |||
337 | unsigned long ds_debugctl_mask(void) | ||
338 | { | ||
339 | return ds_cfg.debugctl_mask; | ||
340 | } | ||
341 | |||
342 | #ifdef __i386__ | ||
343 | static const struct ds_configuration ds_cfg_netburst = { | ||
344 | .sizeof_ds = 9 * 4, | ||
345 | .bts_buffer_base = { 0, 4 }, | ||
346 | .bts_index = { 4, 4 }, | ||
347 | .bts_absolute_maximum = { 8, 4 }, | ||
348 | .bts_interrupt_threshold = { 12, 4 }, | ||
349 | .sizeof_bts = 3 * 4, | ||
350 | .from_ip = { 0, 4 }, | ||
351 | .to_ip = { 4, 4 }, | ||
352 | .info_type = { 4, 1 }, | ||
353 | .info_data = { 5, 7 }, | ||
354 | .debugctl_mask = (1<<2)|(1<<3) | ||
355 | }; | ||
356 | |||
357 | static const struct ds_configuration ds_cfg_pentium_m = { | ||
358 | .sizeof_ds = 9 * 4, | ||
359 | .bts_buffer_base = { 0, 4 }, | ||
360 | .bts_index = { 4, 4 }, | ||
361 | .bts_absolute_maximum = { 8, 4 }, | ||
362 | .bts_interrupt_threshold = { 12, 4 }, | ||
363 | .sizeof_bts = 3 * 4, | ||
364 | .from_ip = { 0, 4 }, | ||
365 | .to_ip = { 4, 4 }, | ||
366 | .info_type = { 4, 1 }, | ||
367 | .info_data = { 5, 7 }, | ||
368 | .debugctl_mask = (1<<6)|(1<<7) | ||
369 | }; | ||
370 | #endif /* _i386_ */ | ||
371 | |||
372 | static const struct ds_configuration ds_cfg_core2 = { | ||
373 | .sizeof_ds = 9 * 8, | ||
374 | .bts_buffer_base = { 0, 8 }, | ||
375 | .bts_index = { 8, 8 }, | ||
376 | .bts_absolute_maximum = { 16, 8 }, | ||
377 | .bts_interrupt_threshold = { 24, 8 }, | ||
378 | .sizeof_bts = 3 * 8, | ||
379 | .from_ip = { 0, 8 }, | ||
380 | .to_ip = { 8, 8 }, | ||
381 | .info_type = { 8, 1 }, | ||
382 | .info_data = { 9, 7 }, | ||
383 | .debugctl_mask = (1<<6)|(1<<7)|(1<<9) | ||
384 | }; | ||
385 | |||
386 | static inline void | ||
387 | ds_configure(const struct ds_configuration *cfg) | ||
388 | { | ||
389 | ds_cfg = *cfg; | ||
390 | } | ||
391 | |||
392 | void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | ||
393 | { | ||
394 | switch (c->x86) { | ||
395 | case 0x6: | ||
396 | switch (c->x86_model) { | ||
397 | #ifdef __i386__ | ||
398 | case 0xD: | ||
399 | case 0xE: /* Pentium M */ | ||
400 | ds_configure(&ds_cfg_pentium_m); | ||
401 | break; | ||
402 | #endif /* _i386_ */ | ||
403 | case 0xF: /* Core2 */ | ||
404 | ds_configure(&ds_cfg_core2); | ||
405 | break; | ||
406 | default: | ||
407 | /* sorry, don't know about them */ | ||
408 | break; | ||
409 | } | ||
410 | break; | ||
411 | case 0xF: | ||
412 | switch (c->x86_model) { | ||
413 | #ifdef __i386__ | ||
414 | case 0x0: | ||
415 | case 0x1: | ||
416 | case 0x2: /* Netburst */ | ||
417 | ds_configure(&ds_cfg_netburst); | ||
418 | break; | ||
419 | #endif /* _i386_ */ | ||
420 | default: | ||
421 | /* sorry, don't know about them */ | ||
422 | break; | ||
423 | } | ||
424 | break; | ||
425 | default: | ||
426 | /* sorry, don't know about them */ | ||
427 | break; | ||
428 | } | ||
429 | } | ||
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 5350763a2d03..2b9db9371060 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -614,11 +614,21 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | |||
614 | struct tss_struct *tss) | 614 | struct tss_struct *tss) |
615 | { | 615 | { |
616 | struct thread_struct *prev, *next; | 616 | struct thread_struct *prev, *next; |
617 | unsigned long debugctl; | ||
617 | 618 | ||
618 | prev = &prev_p->thread; | 619 | prev = &prev_p->thread; |
619 | next = &next_p->thread; | 620 | next = &next_p->thread; |
620 | 621 | ||
621 | if (next->debugctlmsr != prev->debugctlmsr) | 622 | debugctl = prev->debugctlmsr; |
623 | if (next->ds_area_msr != prev->ds_area_msr) { | ||
624 | /* we clear debugctl to make sure DS | ||
625 | * is not in use when we change it */ | ||
626 | debugctl = 0; | ||
627 | wrmsrl(MSR_IA32_DEBUGCTLMSR, 0); | ||
628 | wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0); | ||
629 | } | ||
630 | |||
631 | if (next->debugctlmsr != debugctl) | ||
622 | wrmsr(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr, 0); | 632 | wrmsr(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr, 0); |
623 | 633 | ||
624 | if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { | 634 | if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { |
@@ -642,6 +652,13 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | |||
642 | } | 652 | } |
643 | #endif | 653 | #endif |
644 | 654 | ||
655 | if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) | ||
656 | ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); | ||
657 | |||
658 | if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) | ||
659 | ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); | ||
660 | |||
661 | |||
645 | if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { | 662 | if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { |
646 | /* | 663 | /* |
647 | * Disable the bitmap via an invalid offset. We still cache | 664 | * Disable the bitmap via an invalid offset. We still cache |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 057b5442ffda..843bf0c978a4 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -568,11 +568,21 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, | |||
568 | struct tss_struct *tss) | 568 | struct tss_struct *tss) |
569 | { | 569 | { |
570 | struct thread_struct *prev, *next; | 570 | struct thread_struct *prev, *next; |
571 | unsigned long debugctl; | ||
571 | 572 | ||
572 | prev = &prev_p->thread, | 573 | prev = &prev_p->thread, |
573 | next = &next_p->thread; | 574 | next = &next_p->thread; |
574 | 575 | ||
575 | if (next->debugctlmsr != prev->debugctlmsr) | 576 | debugctl = prev->debugctlmsr; |
577 | if (next->ds_area_msr != prev->ds_area_msr) { | ||
578 | /* we clear debugctl to make sure DS | ||
579 | * is not in use when we change it */ | ||
580 | debugctl = 0; | ||
581 | wrmsrl(MSR_IA32_DEBUGCTLMSR, 0); | ||
582 | wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr); | ||
583 | } | ||
584 | |||
585 | if (next->debugctlmsr != debugctl) | ||
576 | wrmsrl(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr); | 586 | wrmsrl(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr); |
577 | 587 | ||
578 | if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { | 588 | if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { |
@@ -598,6 +608,16 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, | |||
598 | */ | 608 | */ |
599 | memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); | 609 | memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); |
600 | } | 610 | } |
611 | |||
612 | /* | ||
613 | * Last branch recording recofiguration of trace hardware and | ||
614 | * disentangling of trace data per task. | ||
615 | */ | ||
616 | if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) | ||
617 | ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); | ||
618 | |||
619 | if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) | ||
620 | ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); | ||
601 | } | 621 | } |
602 | 622 | ||
603 | /* | 623 | /* |
@@ -701,8 +721,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
701 | /* | 721 | /* |
702 | * Now maybe reload the debug registers and handle I/O bitmaps | 722 | * Now maybe reload the debug registers and handle I/O bitmaps |
703 | */ | 723 | */ |
704 | if (unlikely((task_thread_info(next_p)->flags & _TIF_WORK_CTXSW)) | 724 | if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT || |
705 | || test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) | 725 | task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) |
706 | __switch_to_xtra(prev_p, next_p, tss); | 726 | __switch_to_xtra(prev_p, next_p, tss); |
707 | 727 | ||
708 | /* If the task has used fpu the last 5 timeslices, just do a full | 728 | /* If the task has used fpu the last 5 timeslices, just do a full |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 3399c1be79b8..8d0dd8b5effe 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -2,6 +2,9 @@ | |||
2 | /* | 2 | /* |
3 | * Pentium III FXSR, SSE support | 3 | * Pentium III FXSR, SSE support |
4 | * Gareth Hughes <gareth@valinux.com>, May 2000 | 4 | * Gareth Hughes <gareth@valinux.com>, May 2000 |
5 | * | ||
6 | * BTS tracing | ||
7 | * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007 | ||
5 | */ | 8 | */ |
6 | 9 | ||
7 | #include <linux/kernel.h> | 10 | #include <linux/kernel.h> |
@@ -26,6 +29,14 @@ | |||
26 | #include <asm/desc.h> | 29 | #include <asm/desc.h> |
27 | #include <asm/prctl.h> | 30 | #include <asm/prctl.h> |
28 | #include <asm/proto.h> | 31 | #include <asm/proto.h> |
32 | #include <asm/ds.h> | ||
33 | |||
34 | |||
35 | /* | ||
36 | * The maximal size of a BTS buffer per traced task in number of BTS | ||
37 | * records. | ||
38 | */ | ||
39 | #define PTRACE_BTS_BUFFER_MAX 4000 | ||
29 | 40 | ||
30 | /* | 41 | /* |
31 | * does not yet catch signals sent when the child dies. | 42 | * does not yet catch signals sent when the child dies. |
@@ -455,6 +466,165 @@ static int ptrace_set_debugreg(struct task_struct *child, | |||
455 | return 0; | 466 | return 0; |
456 | } | 467 | } |
457 | 468 | ||
469 | static int ptrace_bts_max_buffer_size(void) | ||
470 | { | ||
471 | return PTRACE_BTS_BUFFER_MAX; | ||
472 | } | ||
473 | |||
474 | static int ptrace_bts_get_buffer_size(struct task_struct *child) | ||
475 | { | ||
476 | if (!child->thread.ds_area_msr) | ||
477 | return -ENXIO; | ||
478 | |||
479 | return ds_get_bts_size((void *)child->thread.ds_area_msr); | ||
480 | } | ||
481 | |||
482 | static int ptrace_bts_get_index(struct task_struct *child) | ||
483 | { | ||
484 | if (!child->thread.ds_area_msr) | ||
485 | return -ENXIO; | ||
486 | |||
487 | return ds_get_bts_index((void *)child->thread.ds_area_msr); | ||
488 | } | ||
489 | |||
490 | static int ptrace_bts_read_record(struct task_struct *child, | ||
491 | long index, | ||
492 | struct bts_struct __user *out) | ||
493 | { | ||
494 | struct bts_struct ret; | ||
495 | int retval; | ||
496 | |||
497 | if (!child->thread.ds_area_msr) | ||
498 | return -ENXIO; | ||
499 | |||
500 | retval = ds_read_bts((void *)child->thread.ds_area_msr, | ||
501 | index, &ret); | ||
502 | if (retval) | ||
503 | return retval; | ||
504 | |||
505 | if (copy_to_user(out, &ret, sizeof(ret))) | ||
506 | return -EFAULT; | ||
507 | |||
508 | return sizeof(ret); | ||
509 | } | ||
510 | |||
511 | static int ptrace_bts_write_record(struct task_struct *child, | ||
512 | const struct bts_struct *in) | ||
513 | { | ||
514 | int retval; | ||
515 | |||
516 | if (!child->thread.ds_area_msr) | ||
517 | return -ENXIO; | ||
518 | |||
519 | retval = ds_write_bts((void *)child->thread.ds_area_msr, in); | ||
520 | if (retval) | ||
521 | return retval; | ||
522 | |||
523 | return sizeof(*in); | ||
524 | } | ||
525 | |||
526 | static int ptrace_bts_config(struct task_struct *child, | ||
527 | unsigned long options) | ||
528 | { | ||
529 | unsigned long debugctl_mask = ds_debugctl_mask(); | ||
530 | int retval; | ||
531 | |||
532 | retval = ptrace_bts_get_buffer_size(child); | ||
533 | if (retval < 0) | ||
534 | return retval; | ||
535 | if (retval == 0) | ||
536 | return -ENXIO; | ||
537 | |||
538 | if (options & PTRACE_BTS_O_TRACE_TASK) { | ||
539 | child->thread.debugctlmsr |= debugctl_mask; | ||
540 | set_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | ||
541 | } else { | ||
542 | /* there is no way for us to check whether we 'own' | ||
543 | * the respective bits in the DEBUGCTL MSR, we're | ||
544 | * about to clear */ | ||
545 | child->thread.debugctlmsr &= ~debugctl_mask; | ||
546 | |||
547 | if (!child->thread.debugctlmsr) | ||
548 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | ||
549 | } | ||
550 | |||
551 | if (options & PTRACE_BTS_O_TIMESTAMPS) | ||
552 | set_tsk_thread_flag(child, TIF_BTS_TRACE_TS); | ||
553 | else | ||
554 | clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); | ||
555 | |||
556 | return 0; | ||
557 | } | ||
558 | |||
559 | static int ptrace_bts_status(struct task_struct *child) | ||
560 | { | ||
561 | unsigned long debugctl_mask = ds_debugctl_mask(); | ||
562 | int retval, status = 0; | ||
563 | |||
564 | retval = ptrace_bts_get_buffer_size(child); | ||
565 | if (retval < 0) | ||
566 | return retval; | ||
567 | if (retval == 0) | ||
568 | return -ENXIO; | ||
569 | |||
570 | if (ptrace_bts_get_buffer_size(child) <= 0) | ||
571 | return -ENXIO; | ||
572 | |||
573 | if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) && | ||
574 | child->thread.debugctlmsr & debugctl_mask) | ||
575 | status |= PTRACE_BTS_O_TRACE_TASK; | ||
576 | if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS)) | ||
577 | status |= PTRACE_BTS_O_TIMESTAMPS; | ||
578 | |||
579 | return status; | ||
580 | } | ||
581 | |||
582 | static int ptrace_bts_allocate_bts(struct task_struct *child, | ||
583 | int size_in_records) | ||
584 | { | ||
585 | int retval = 0; | ||
586 | void *ds; | ||
587 | |||
588 | if (size_in_records < 0) | ||
589 | return -EINVAL; | ||
590 | |||
591 | if (size_in_records > ptrace_bts_max_buffer_size()) | ||
592 | return -EINVAL; | ||
593 | |||
594 | if (size_in_records == 0) { | ||
595 | ptrace_bts_config(child, /* options = */ 0); | ||
596 | } else { | ||
597 | retval = ds_allocate(&ds, size_in_records); | ||
598 | if (retval) | ||
599 | return retval; | ||
600 | } | ||
601 | |||
602 | if (child->thread.ds_area_msr) | ||
603 | ds_free((void **)&child->thread.ds_area_msr); | ||
604 | |||
605 | child->thread.ds_area_msr = (unsigned long)ds; | ||
606 | if (child->thread.ds_area_msr) | ||
607 | set_tsk_thread_flag(child, TIF_DS_AREA_MSR); | ||
608 | else | ||
609 | clear_tsk_thread_flag(child, TIF_DS_AREA_MSR); | ||
610 | |||
611 | return retval; | ||
612 | } | ||
613 | |||
614 | void ptrace_bts_take_timestamp(struct task_struct *tsk, | ||
615 | enum bts_qualifier qualifier) | ||
616 | { | ||
617 | struct bts_struct rec = { | ||
618 | .qualifier = qualifier, | ||
619 | .variant.timestamp = sched_clock() | ||
620 | }; | ||
621 | |||
622 | if (ptrace_bts_get_buffer_size(tsk) <= 0) | ||
623 | return; | ||
624 | |||
625 | ptrace_bts_write_record(tsk, &rec); | ||
626 | } | ||
627 | |||
458 | /* | 628 | /* |
459 | * Called by kernel/ptrace.c when detaching.. | 629 | * Called by kernel/ptrace.c when detaching.. |
460 | * | 630 | * |
@@ -466,6 +636,11 @@ void ptrace_disable(struct task_struct *child) | |||
466 | #ifdef TIF_SYSCALL_EMU | 636 | #ifdef TIF_SYSCALL_EMU |
467 | clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); | 637 | clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); |
468 | #endif | 638 | #endif |
639 | ptrace_bts_config(child, /* options = */ 0); | ||
640 | if (child->thread.ds_area_msr) { | ||
641 | ds_free((void **)&child->thread.ds_area_msr); | ||
642 | clear_tsk_thread_flag(child, TIF_DS_AREA_MSR); | ||
643 | } | ||
469 | } | 644 | } |
470 | 645 | ||
471 | long arch_ptrace(struct task_struct *child, long request, long addr, long data) | 646 | long arch_ptrace(struct task_struct *child, long request, long addr, long data) |
@@ -626,6 +801,36 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) | |||
626 | break; | 801 | break; |
627 | #endif | 802 | #endif |
628 | 803 | ||
804 | case PTRACE_BTS_MAX_BUFFER_SIZE: | ||
805 | ret = ptrace_bts_max_buffer_size(); | ||
806 | break; | ||
807 | |||
808 | case PTRACE_BTS_ALLOCATE_BUFFER: | ||
809 | ret = ptrace_bts_allocate_bts(child, data); | ||
810 | break; | ||
811 | |||
812 | case PTRACE_BTS_GET_BUFFER_SIZE: | ||
813 | ret = ptrace_bts_get_buffer_size(child); | ||
814 | break; | ||
815 | |||
816 | case PTRACE_BTS_GET_INDEX: | ||
817 | ret = ptrace_bts_get_index(child); | ||
818 | break; | ||
819 | |||
820 | case PTRACE_BTS_READ_RECORD: | ||
821 | ret = ptrace_bts_read_record | ||
822 | (child, data, | ||
823 | (struct bts_struct __user *) addr); | ||
824 | break; | ||
825 | |||
826 | case PTRACE_BTS_CONFIG: | ||
827 | ret = ptrace_bts_config(child, data); | ||
828 | break; | ||
829 | |||
830 | case PTRACE_BTS_STATUS: | ||
831 | ret = ptrace_bts_status(child); | ||
832 | break; | ||
833 | |||
629 | default: | 834 | default: |
630 | ret = ptrace_request(child, request, addr, data); | 835 | ret = ptrace_request(child, request, addr, data); |
631 | break; | 836 | break; |
@@ -809,6 +1014,13 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) | |||
809 | case PTRACE_SETOPTIONS: | 1014 | case PTRACE_SETOPTIONS: |
810 | case PTRACE_SET_THREAD_AREA: | 1015 | case PTRACE_SET_THREAD_AREA: |
811 | case PTRACE_GET_THREAD_AREA: | 1016 | case PTRACE_GET_THREAD_AREA: |
1017 | case PTRACE_BTS_MAX_BUFFER_SIZE: | ||
1018 | case PTRACE_BTS_ALLOCATE_BUFFER: | ||
1019 | case PTRACE_BTS_GET_BUFFER_SIZE: | ||
1020 | case PTRACE_BTS_GET_INDEX: | ||
1021 | case PTRACE_BTS_READ_RECORD: | ||
1022 | case PTRACE_BTS_CONFIG: | ||
1023 | case PTRACE_BTS_STATUS: | ||
812 | return sys_ptrace(request, pid, addr, data); | 1024 | return sys_ptrace(request, pid, addr, data); |
813 | 1025 | ||
814 | default: | 1026 | default: |
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index ce4d6b52ce36..f2b131ef844e 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c | |||
@@ -60,6 +60,7 @@ | |||
60 | #include <asm/dmi.h> | 60 | #include <asm/dmi.h> |
61 | #include <asm/cacheflush.h> | 61 | #include <asm/cacheflush.h> |
62 | #include <asm/mce.h> | 62 | #include <asm/mce.h> |
63 | #include <asm/ds.h> | ||
63 | 64 | ||
64 | /* | 65 | /* |
65 | * Machine setup.. | 66 | * Machine setup.. |
@@ -823,6 +824,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
823 | set_cpu_cap(c, X86_FEATURE_PEBS); | 824 | set_cpu_cap(c, X86_FEATURE_PEBS); |
824 | } | 825 | } |
825 | 826 | ||
827 | |||
828 | if (cpu_has_bts) | ||
829 | ds_init_intel(c); | ||
830 | |||
826 | n = c->extended_cpuid_level; | 831 | n = c->extended_cpuid_level; |
827 | if (n >= 0x80000008) { | 832 | if (n >= 0x80000008) { |
828 | unsigned eax = cpuid_eax(0x80000008); | 833 | unsigned eax = cpuid_eax(0x80000008); |
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index f55c003f5b63..21ea22fda5fc 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c | |||
@@ -169,9 +169,14 @@ static void enable_step(struct task_struct *child, bool block) | |||
169 | */ | 169 | */ |
170 | if (enable_single_step(child) && block) { | 170 | if (enable_single_step(child) && block) { |
171 | set_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | 171 | set_tsk_thread_flag(child, TIF_DEBUGCTLMSR); |
172 | write_debugctlmsr(child, DEBUGCTLMSR_BTF); | 172 | write_debugctlmsr(child, |
173 | } else if (test_and_clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR)) { | 173 | child->thread.debugctlmsr | DEBUGCTLMSR_BTF); |
174 | write_debugctlmsr(child, 0); | 174 | } else { |
175 | write_debugctlmsr(child, | ||
176 | child->thread.debugctlmsr & ~TIF_DEBUGCTLMSR); | ||
177 | |||
178 | if (!child->thread.debugctlmsr) | ||
179 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | ||
175 | } | 180 | } |
176 | } | 181 | } |
177 | 182 | ||
@@ -190,8 +195,11 @@ void user_disable_single_step(struct task_struct *child) | |||
190 | /* | 195 | /* |
191 | * Make sure block stepping (BTF) is disabled. | 196 | * Make sure block stepping (BTF) is disabled. |
192 | */ | 197 | */ |
193 | if (test_and_clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR)) | 198 | write_debugctlmsr(child, |
194 | write_debugctlmsr(child, 0); | 199 | child->thread.debugctlmsr & ~TIF_DEBUGCTLMSR); |
200 | |||
201 | if (!child->thread.debugctlmsr) | ||
202 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | ||
195 | 203 | ||
196 | /* Always clear TIF_SINGLESTEP... */ | 204 | /* Always clear TIF_SINGLESTEP... */ |
197 | clear_tsk_thread_flag(child, TIF_SINGLESTEP); | 205 | clear_tsk_thread_flag(child, TIF_SINGLESTEP); |