aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorMarkus Metzger <markus.t.metzger@intel.com>2008-01-30 07:31:09 -0500
committerIngo Molnar <mingo@elte.hu>2008-01-30 07:31:09 -0500
commiteee3af4a2c83a97fff107ddc445d9df6fded9ce4 (patch)
treea7e9179b82b4df9e4cf6e810c54309324589395b /arch/x86/kernel
parent7796931f542518092d1fd2fb7cf1f1d96e0cd4b5 (diff)
x86, ptrace: support for branch trace store(BTS)
Resend using different mail client Changes to the last version: - split implementation into two layers: ds/bts and ptrace - renamed TIF's - save/restore ds save area msr in __switch_to_xtra() - make block-stepping only look at BTF bit Signed-off-by: Markus Metzger <markus.t.metzger@intel.com> Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile_321
-rw-r--r--arch/x86/kernel/Makefile_641
-rw-r--r--arch/x86/kernel/cpu/intel.c5
-rw-r--r--arch/x86/kernel/ds.c429
-rw-r--r--arch/x86/kernel/process_32.c19
-rw-r--r--arch/x86/kernel/process_64.c26
-rw-r--r--arch/x86/kernel/ptrace.c212
-rw-r--r--arch/x86/kernel/setup_64.c5
-rw-r--r--arch/x86/kernel/step.c18
9 files changed, 707 insertions, 9 deletions
diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32
index b2d7aea4c82d..cc2651bcc07f 100644
--- a/arch/x86/kernel/Makefile_32
+++ b/arch/x86/kernel/Makefile_32
@@ -11,6 +11,7 @@ obj-y := process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \
11 quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o io_delay.o rtc.o 11 quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o io_delay.o rtc.o
12 12
13obj-y += ptrace.o 13obj-y += ptrace.o
14obj-y += ds.o
14obj-y += tls.o 15obj-y += tls.o
15obj-y += step.o 16obj-y += step.o
16obj-$(CONFIG_STACKTRACE) += stacktrace.o 17obj-$(CONFIG_STACKTRACE) += stacktrace.o
diff --git a/arch/x86/kernel/Makefile_64 b/arch/x86/kernel/Makefile_64
index 19af64e1a3fc..2ec96acf6486 100644
--- a/arch/x86/kernel/Makefile_64
+++ b/arch/x86/kernel/Makefile_64
@@ -13,6 +13,7 @@ obj-y := process_64.o signal_64.o entry_64.o traps_64.o irq_64.o \
13 i8253.o io_delay.o rtc.o 13 i8253.o io_delay.o rtc.o
14 14
15obj-y += ptrace.o 15obj-y += ptrace.o
16obj-y += ds.o
16obj-y += step.o 17obj-y += step.o
17 18
18obj-$(CONFIG_IA32_EMULATION) += tls.o 19obj-$(CONFIG_IA32_EMULATION) += tls.o
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 867ff94579be..e4b7e73e9024 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -11,6 +11,8 @@
11#include <asm/pgtable.h> 11#include <asm/pgtable.h>
12#include <asm/msr.h> 12#include <asm/msr.h>
13#include <asm/uaccess.h> 13#include <asm/uaccess.h>
14#include <asm/ptrace.h>
15#include <asm/ds.h>
14 16
15#include "cpu.h" 17#include "cpu.h"
16 18
@@ -219,6 +221,9 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
219 if (!(l1 & (1<<12))) 221 if (!(l1 & (1<<12)))
220 set_bit(X86_FEATURE_PEBS, c->x86_capability); 222 set_bit(X86_FEATURE_PEBS, c->x86_capability);
221 } 223 }
224
225 if (cpu_has_bts)
226 ds_init_intel(c);
222} 227}
223 228
224static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 * c, unsigned int size) 229static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 * c, unsigned int size)
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
new file mode 100644
index 000000000000..996a7c4f5963
--- /dev/null
+++ b/arch/x86/kernel/ds.c
@@ -0,0 +1,429 @@
1/*
2 * Debug Store support
3 *
4 * This provides a low-level interface to the hardware's Debug Store
5 * feature that is used for last branch recording (LBR) and
6 * precise-event based sampling (PEBS).
7 *
8 * Different architectures use a different DS layout/pointer size.
9 * The below functions therefore work on a void*.
10 *
11 *
12 * Since there is no user for PEBS, yet, only LBR (or branch
13 * trace store, BTS) is supported.
14 *
15 *
16 * Copyright (C) 2007 Intel Corporation.
17 * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007
18 */
19
20#include <asm/ds.h>
21
22#include <linux/errno.h>
23#include <linux/string.h>
24#include <linux/slab.h>
25
26
27/*
28 * Debug Store (DS) save area configuration (see Intel64 and IA32
29 * Architectures Software Developer's Manual, section 18.5)
30 *
31 * The DS configuration consists of the following fields; different
32 * architetures vary in the size of those fields.
33 * - double-word aligned base linear address of the BTS buffer
34 * - write pointer into the BTS buffer
35 * - end linear address of the BTS buffer (one byte beyond the end of
36 * the buffer)
37 * - interrupt pointer into BTS buffer
38 * (interrupt occurs when write pointer passes interrupt pointer)
39 * - double-word aligned base linear address of the PEBS buffer
40 * - write pointer into the PEBS buffer
41 * - end linear address of the PEBS buffer (one byte beyond the end of
42 * the buffer)
43 * - interrupt pointer into PEBS buffer
44 * (interrupt occurs when write pointer passes interrupt pointer)
45 * - value to which counter is reset following counter overflow
46 *
47 * On later architectures, the last branch recording hardware uses
48 * 64bit pointers even in 32bit mode.
49 *
50 *
51 * Branch Trace Store (BTS) records store information about control
52 * flow changes. They at least provide the following information:
53 * - source linear address
54 * - destination linear address
55 *
56 * Netburst supported a predicated bit that had been dropped in later
57 * architectures. We do not suppor it.
58 *
59 *
60 * In order to abstract from the actual DS and BTS layout, we describe
61 * the access to the relevant fields.
62 * Thanks to Andi Kleen for proposing this design.
63 *
64 * The implementation, however, is not as general as it might seem. In
65 * order to stay somewhat simple and efficient, we assume an
66 * underlying unsigned type (mostly a pointer type) and we expect the
67 * field to be at least as big as that type.
68 */
69
70/*
71 * A special from_ip address to indicate that the BTS record is an
72 * info record that needs to be interpreted or skipped.
73 */
74#define BTS_ESCAPE_ADDRESS (-1)
75
76/*
77 * A field access descriptor
78 */
79struct access_desc {
80 unsigned char offset;
81 unsigned char size;
82};
83
84/*
85 * The configuration for a particular DS/BTS hardware implementation.
86 */
87struct ds_configuration {
88 /* the DS configuration */
89 unsigned char sizeof_ds;
90 struct access_desc bts_buffer_base;
91 struct access_desc bts_index;
92 struct access_desc bts_absolute_maximum;
93 struct access_desc bts_interrupt_threshold;
94 /* the BTS configuration */
95 unsigned char sizeof_bts;
96 struct access_desc from_ip;
97 struct access_desc to_ip;
98 /* BTS variants used to store additional information like
99 timestamps */
100 struct access_desc info_type;
101 struct access_desc info_data;
102 unsigned long debugctl_mask;
103};
104
105/*
106 * The global configuration used by the below accessor functions
107 */
108static struct ds_configuration ds_cfg;
109
110/*
111 * Accessor functions for some DS and BTS fields using the above
112 * global ptrace_bts_cfg.
113 */
114static inline void *get_bts_buffer_base(char *base)
115{
116 return *(void **)(base + ds_cfg.bts_buffer_base.offset);
117}
118static inline void set_bts_buffer_base(char *base, void *value)
119{
120 (*(void **)(base + ds_cfg.bts_buffer_base.offset)) = value;
121}
122static inline void *get_bts_index(char *base)
123{
124 return *(void **)(base + ds_cfg.bts_index.offset);
125}
126static inline void set_bts_index(char *base, void *value)
127{
128 (*(void **)(base + ds_cfg.bts_index.offset)) = value;
129}
130static inline void *get_bts_absolute_maximum(char *base)
131{
132 return *(void **)(base + ds_cfg.bts_absolute_maximum.offset);
133}
134static inline void set_bts_absolute_maximum(char *base, void *value)
135{
136 (*(void **)(base + ds_cfg.bts_absolute_maximum.offset)) = value;
137}
138static inline void *get_bts_interrupt_threshold(char *base)
139{
140 return *(void **)(base + ds_cfg.bts_interrupt_threshold.offset);
141}
142static inline void set_bts_interrupt_threshold(char *base, void *value)
143{
144 (*(void **)(base + ds_cfg.bts_interrupt_threshold.offset)) = value;
145}
146static inline long get_from_ip(char *base)
147{
148 return *(long *)(base + ds_cfg.from_ip.offset);
149}
150static inline void set_from_ip(char *base, long value)
151{
152 (*(long *)(base + ds_cfg.from_ip.offset)) = value;
153}
154static inline long get_to_ip(char *base)
155{
156 return *(long *)(base + ds_cfg.to_ip.offset);
157}
158static inline void set_to_ip(char *base, long value)
159{
160 (*(long *)(base + ds_cfg.to_ip.offset)) = value;
161}
162static inline unsigned char get_info_type(char *base)
163{
164 return *(unsigned char *)(base + ds_cfg.info_type.offset);
165}
166static inline void set_info_type(char *base, unsigned char value)
167{
168 (*(unsigned char *)(base + ds_cfg.info_type.offset)) = value;
169}
170/*
171 * The info data might overlap with the info type on some architectures.
172 * We therefore read and write the exact number of bytes.
173 */
174static inline unsigned long long get_info_data(char *base)
175{
176 unsigned long long value = 0;
177 memcpy(&value,
178 base + ds_cfg.info_data.offset,
179 ds_cfg.info_data.size);
180 return value;
181}
182static inline void set_info_data(char *base, unsigned long long value)
183{
184 memcpy(base + ds_cfg.info_data.offset,
185 &value,
186 ds_cfg.info_data.size);
187}
188
189
190int ds_allocate(void **dsp, size_t bts_size_in_records)
191{
192 size_t bts_size_in_bytes = 0;
193 void *bts = 0;
194 void *ds = 0;
195
196 if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
197 return -EOPNOTSUPP;
198
199 if (bts_size_in_records < 0)
200 return -EINVAL;
201
202 bts_size_in_bytes =
203 bts_size_in_records * ds_cfg.sizeof_bts;
204
205 if (bts_size_in_bytes <= 0)
206 return -EINVAL;
207
208 bts = kzalloc(bts_size_in_bytes, GFP_KERNEL);
209
210 if (!bts)
211 return -ENOMEM;
212
213 ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL);
214
215 if (!ds) {
216 kfree(bts);
217 return -ENOMEM;
218 }
219
220 set_bts_buffer_base(ds, bts);
221 set_bts_index(ds, bts);
222 set_bts_absolute_maximum(ds, bts + bts_size_in_bytes);
223 set_bts_interrupt_threshold(ds, bts + bts_size_in_bytes + 1);
224
225 *dsp = ds;
226 return 0;
227}
228
229int ds_free(void **dsp)
230{
231 if (*dsp)
232 kfree(get_bts_buffer_base(*dsp));
233 kfree(*dsp);
234 *dsp = 0;
235
236 return 0;
237}
238
239int ds_get_bts_size(void *ds)
240{
241 size_t size_in_bytes;
242
243 if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
244 return -EOPNOTSUPP;
245
246 size_in_bytes =
247 get_bts_absolute_maximum(ds) -
248 get_bts_buffer_base(ds);
249
250 return size_in_bytes / ds_cfg.sizeof_bts;
251}
252
253int ds_get_bts_index(void *ds)
254{
255 size_t index_offset_in_bytes;
256
257 if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
258 return -EOPNOTSUPP;
259
260 index_offset_in_bytes =
261 get_bts_index(ds) -
262 get_bts_buffer_base(ds);
263
264 return index_offset_in_bytes / ds_cfg.sizeof_bts;
265}
266
267int ds_read_bts(void *ds, size_t index, struct bts_struct *out)
268{
269 void *bts;
270
271 if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
272 return -EOPNOTSUPP;
273
274 if (index < 0)
275 return -EINVAL;
276
277 if (index >= ds_get_bts_size(ds))
278 return -EINVAL;
279
280 bts = get_bts_buffer_base(ds);
281 bts = (char *)bts + (index * ds_cfg.sizeof_bts);
282
283 memset(out, 0, sizeof(*out));
284 if (get_from_ip(bts) == BTS_ESCAPE_ADDRESS) {
285 out->qualifier = get_info_type(bts);
286 out->variant.timestamp = get_info_data(bts);
287 } else {
288 out->qualifier = BTS_BRANCH;
289 out->variant.lbr.from_ip = get_from_ip(bts);
290 out->variant.lbr.to_ip = get_to_ip(bts);
291 }
292
293 return 0;
294}
295
296int ds_write_bts(void *ds, const struct bts_struct *in)
297{
298 void *bts;
299
300 if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
301 return -EOPNOTSUPP;
302
303 if (ds_get_bts_size(ds) <= 0)
304 return -ENXIO;
305
306 bts = get_bts_index(ds);
307
308 memset(bts, 0, ds_cfg.sizeof_bts);
309 switch (in->qualifier) {
310 case BTS_INVALID:
311 break;
312
313 case BTS_BRANCH:
314 set_from_ip(bts, in->variant.lbr.from_ip);
315 set_to_ip(bts, in->variant.lbr.to_ip);
316 break;
317
318 case BTS_TASK_ARRIVES:
319 case BTS_TASK_DEPARTS:
320 set_from_ip(bts, BTS_ESCAPE_ADDRESS);
321 set_info_type(bts, in->qualifier);
322 set_info_data(bts, in->variant.timestamp);
323 break;
324
325 default:
326 return -EINVAL;
327 }
328
329 bts = (char *)bts + ds_cfg.sizeof_bts;
330 if (bts >= get_bts_absolute_maximum(ds))
331 bts = get_bts_buffer_base(ds);
332 set_bts_index(ds, bts);
333
334 return 0;
335}
336
337unsigned long ds_debugctl_mask(void)
338{
339 return ds_cfg.debugctl_mask;
340}
341
342#ifdef __i386__
343static const struct ds_configuration ds_cfg_netburst = {
344 .sizeof_ds = 9 * 4,
345 .bts_buffer_base = { 0, 4 },
346 .bts_index = { 4, 4 },
347 .bts_absolute_maximum = { 8, 4 },
348 .bts_interrupt_threshold = { 12, 4 },
349 .sizeof_bts = 3 * 4,
350 .from_ip = { 0, 4 },
351 .to_ip = { 4, 4 },
352 .info_type = { 4, 1 },
353 .info_data = { 5, 7 },
354 .debugctl_mask = (1<<2)|(1<<3)
355};
356
357static const struct ds_configuration ds_cfg_pentium_m = {
358 .sizeof_ds = 9 * 4,
359 .bts_buffer_base = { 0, 4 },
360 .bts_index = { 4, 4 },
361 .bts_absolute_maximum = { 8, 4 },
362 .bts_interrupt_threshold = { 12, 4 },
363 .sizeof_bts = 3 * 4,
364 .from_ip = { 0, 4 },
365 .to_ip = { 4, 4 },
366 .info_type = { 4, 1 },
367 .info_data = { 5, 7 },
368 .debugctl_mask = (1<<6)|(1<<7)
369};
370#endif /* _i386_ */
371
372static const struct ds_configuration ds_cfg_core2 = {
373 .sizeof_ds = 9 * 8,
374 .bts_buffer_base = { 0, 8 },
375 .bts_index = { 8, 8 },
376 .bts_absolute_maximum = { 16, 8 },
377 .bts_interrupt_threshold = { 24, 8 },
378 .sizeof_bts = 3 * 8,
379 .from_ip = { 0, 8 },
380 .to_ip = { 8, 8 },
381 .info_type = { 8, 1 },
382 .info_data = { 9, 7 },
383 .debugctl_mask = (1<<6)|(1<<7)|(1<<9)
384};
385
386static inline void
387ds_configure(const struct ds_configuration *cfg)
388{
389 ds_cfg = *cfg;
390}
391
392void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
393{
394 switch (c->x86) {
395 case 0x6:
396 switch (c->x86_model) {
397#ifdef __i386__
398 case 0xD:
399 case 0xE: /* Pentium M */
400 ds_configure(&ds_cfg_pentium_m);
401 break;
402#endif /* _i386_ */
403 case 0xF: /* Core2 */
404 ds_configure(&ds_cfg_core2);
405 break;
406 default:
407 /* sorry, don't know about them */
408 break;
409 }
410 break;
411 case 0xF:
412 switch (c->x86_model) {
413#ifdef __i386__
414 case 0x0:
415 case 0x1:
416 case 0x2: /* Netburst */
417 ds_configure(&ds_cfg_netburst);
418 break;
419#endif /* _i386_ */
420 default:
421 /* sorry, don't know about them */
422 break;
423 }
424 break;
425 default:
426 /* sorry, don't know about them */
427 break;
428 }
429}
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 5350763a2d03..2b9db9371060 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -614,11 +614,21 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
614 struct tss_struct *tss) 614 struct tss_struct *tss)
615{ 615{
616 struct thread_struct *prev, *next; 616 struct thread_struct *prev, *next;
617 unsigned long debugctl;
617 618
618 prev = &prev_p->thread; 619 prev = &prev_p->thread;
619 next = &next_p->thread; 620 next = &next_p->thread;
620 621
621 if (next->debugctlmsr != prev->debugctlmsr) 622 debugctl = prev->debugctlmsr;
623 if (next->ds_area_msr != prev->ds_area_msr) {
624 /* we clear debugctl to make sure DS
625 * is not in use when we change it */
626 debugctl = 0;
627 wrmsrl(MSR_IA32_DEBUGCTLMSR, 0);
628 wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0);
629 }
630
631 if (next->debugctlmsr != debugctl)
622 wrmsr(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr, 0); 632 wrmsr(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr, 0);
623 633
624 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { 634 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
@@ -642,6 +652,13 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
642 } 652 }
643#endif 653#endif
644 654
655 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
656 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
657
658 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
659 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
660
661
645 if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { 662 if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
646 /* 663 /*
647 * Disable the bitmap via an invalid offset. We still cache 664 * Disable the bitmap via an invalid offset. We still cache
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 057b5442ffda..843bf0c978a4 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -568,11 +568,21 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
568 struct tss_struct *tss) 568 struct tss_struct *tss)
569{ 569{
570 struct thread_struct *prev, *next; 570 struct thread_struct *prev, *next;
571 unsigned long debugctl;
571 572
572 prev = &prev_p->thread, 573 prev = &prev_p->thread,
573 next = &next_p->thread; 574 next = &next_p->thread;
574 575
575 if (next->debugctlmsr != prev->debugctlmsr) 576 debugctl = prev->debugctlmsr;
577 if (next->ds_area_msr != prev->ds_area_msr) {
578 /* we clear debugctl to make sure DS
579 * is not in use when we change it */
580 debugctl = 0;
581 wrmsrl(MSR_IA32_DEBUGCTLMSR, 0);
582 wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
583 }
584
585 if (next->debugctlmsr != debugctl)
576 wrmsrl(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr); 586 wrmsrl(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr);
577 587
578 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { 588 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
@@ -598,6 +608,16 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
598 */ 608 */
599 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); 609 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
600 } 610 }
611
612 /*
613 * Last branch recording recofiguration of trace hardware and
614 * disentangling of trace data per task.
615 */
616 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
617 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
618
619 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
620 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
601} 621}
602 622
603/* 623/*
@@ -701,8 +721,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
701 /* 721 /*
702 * Now maybe reload the debug registers and handle I/O bitmaps 722 * Now maybe reload the debug registers and handle I/O bitmaps
703 */ 723 */
704 if (unlikely((task_thread_info(next_p)->flags & _TIF_WORK_CTXSW)) 724 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
705 || test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) 725 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
706 __switch_to_xtra(prev_p, next_p, tss); 726 __switch_to_xtra(prev_p, next_p, tss);
707 727
708 /* If the task has used fpu the last 5 timeslices, just do a full 728 /* If the task has used fpu the last 5 timeslices, just do a full
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 3399c1be79b8..8d0dd8b5effe 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -2,6 +2,9 @@
2/* 2/*
3 * Pentium III FXSR, SSE support 3 * Pentium III FXSR, SSE support
4 * Gareth Hughes <gareth@valinux.com>, May 2000 4 * Gareth Hughes <gareth@valinux.com>, May 2000
5 *
6 * BTS tracing
7 * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007
5 */ 8 */
6 9
7#include <linux/kernel.h> 10#include <linux/kernel.h>
@@ -26,6 +29,14 @@
26#include <asm/desc.h> 29#include <asm/desc.h>
27#include <asm/prctl.h> 30#include <asm/prctl.h>
28#include <asm/proto.h> 31#include <asm/proto.h>
32#include <asm/ds.h>
33
34
35/*
36 * The maximal size of a BTS buffer per traced task in number of BTS
37 * records.
38 */
39#define PTRACE_BTS_BUFFER_MAX 4000
29 40
30/* 41/*
31 * does not yet catch signals sent when the child dies. 42 * does not yet catch signals sent when the child dies.
@@ -455,6 +466,165 @@ static int ptrace_set_debugreg(struct task_struct *child,
455 return 0; 466 return 0;
456} 467}
457 468
469static int ptrace_bts_max_buffer_size(void)
470{
471 return PTRACE_BTS_BUFFER_MAX;
472}
473
474static int ptrace_bts_get_buffer_size(struct task_struct *child)
475{
476 if (!child->thread.ds_area_msr)
477 return -ENXIO;
478
479 return ds_get_bts_size((void *)child->thread.ds_area_msr);
480}
481
482static int ptrace_bts_get_index(struct task_struct *child)
483{
484 if (!child->thread.ds_area_msr)
485 return -ENXIO;
486
487 return ds_get_bts_index((void *)child->thread.ds_area_msr);
488}
489
490static int ptrace_bts_read_record(struct task_struct *child,
491 long index,
492 struct bts_struct __user *out)
493{
494 struct bts_struct ret;
495 int retval;
496
497 if (!child->thread.ds_area_msr)
498 return -ENXIO;
499
500 retval = ds_read_bts((void *)child->thread.ds_area_msr,
501 index, &ret);
502 if (retval)
503 return retval;
504
505 if (copy_to_user(out, &ret, sizeof(ret)))
506 return -EFAULT;
507
508 return sizeof(ret);
509}
510
511static int ptrace_bts_write_record(struct task_struct *child,
512 const struct bts_struct *in)
513{
514 int retval;
515
516 if (!child->thread.ds_area_msr)
517 return -ENXIO;
518
519 retval = ds_write_bts((void *)child->thread.ds_area_msr, in);
520 if (retval)
521 return retval;
522
523 return sizeof(*in);
524}
525
526static int ptrace_bts_config(struct task_struct *child,
527 unsigned long options)
528{
529 unsigned long debugctl_mask = ds_debugctl_mask();
530 int retval;
531
532 retval = ptrace_bts_get_buffer_size(child);
533 if (retval < 0)
534 return retval;
535 if (retval == 0)
536 return -ENXIO;
537
538 if (options & PTRACE_BTS_O_TRACE_TASK) {
539 child->thread.debugctlmsr |= debugctl_mask;
540 set_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
541 } else {
542 /* there is no way for us to check whether we 'own'
543 * the respective bits in the DEBUGCTL MSR, we're
544 * about to clear */
545 child->thread.debugctlmsr &= ~debugctl_mask;
546
547 if (!child->thread.debugctlmsr)
548 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
549 }
550
551 if (options & PTRACE_BTS_O_TIMESTAMPS)
552 set_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
553 else
554 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
555
556 return 0;
557}
558
559static int ptrace_bts_status(struct task_struct *child)
560{
561 unsigned long debugctl_mask = ds_debugctl_mask();
562 int retval, status = 0;
563
564 retval = ptrace_bts_get_buffer_size(child);
565 if (retval < 0)
566 return retval;
567 if (retval == 0)
568 return -ENXIO;
569
570 if (ptrace_bts_get_buffer_size(child) <= 0)
571 return -ENXIO;
572
573 if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) &&
574 child->thread.debugctlmsr & debugctl_mask)
575 status |= PTRACE_BTS_O_TRACE_TASK;
576 if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS))
577 status |= PTRACE_BTS_O_TIMESTAMPS;
578
579 return status;
580}
581
582static int ptrace_bts_allocate_bts(struct task_struct *child,
583 int size_in_records)
584{
585 int retval = 0;
586 void *ds;
587
588 if (size_in_records < 0)
589 return -EINVAL;
590
591 if (size_in_records > ptrace_bts_max_buffer_size())
592 return -EINVAL;
593
594 if (size_in_records == 0) {
595 ptrace_bts_config(child, /* options = */ 0);
596 } else {
597 retval = ds_allocate(&ds, size_in_records);
598 if (retval)
599 return retval;
600 }
601
602 if (child->thread.ds_area_msr)
603 ds_free((void **)&child->thread.ds_area_msr);
604
605 child->thread.ds_area_msr = (unsigned long)ds;
606 if (child->thread.ds_area_msr)
607 set_tsk_thread_flag(child, TIF_DS_AREA_MSR);
608 else
609 clear_tsk_thread_flag(child, TIF_DS_AREA_MSR);
610
611 return retval;
612}
613
614void ptrace_bts_take_timestamp(struct task_struct *tsk,
615 enum bts_qualifier qualifier)
616{
617 struct bts_struct rec = {
618 .qualifier = qualifier,
619 .variant.timestamp = sched_clock()
620 };
621
622 if (ptrace_bts_get_buffer_size(tsk) <= 0)
623 return;
624
625 ptrace_bts_write_record(tsk, &rec);
626}
627
458/* 628/*
459 * Called by kernel/ptrace.c when detaching.. 629 * Called by kernel/ptrace.c when detaching..
460 * 630 *
@@ -466,6 +636,11 @@ void ptrace_disable(struct task_struct *child)
466#ifdef TIF_SYSCALL_EMU 636#ifdef TIF_SYSCALL_EMU
467 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); 637 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
468#endif 638#endif
639 ptrace_bts_config(child, /* options = */ 0);
640 if (child->thread.ds_area_msr) {
641 ds_free((void **)&child->thread.ds_area_msr);
642 clear_tsk_thread_flag(child, TIF_DS_AREA_MSR);
643 }
469} 644}
470 645
471long arch_ptrace(struct task_struct *child, long request, long addr, long data) 646long arch_ptrace(struct task_struct *child, long request, long addr, long data)
@@ -626,6 +801,36 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
626 break; 801 break;
627#endif 802#endif
628 803
804 case PTRACE_BTS_MAX_BUFFER_SIZE:
805 ret = ptrace_bts_max_buffer_size();
806 break;
807
808 case PTRACE_BTS_ALLOCATE_BUFFER:
809 ret = ptrace_bts_allocate_bts(child, data);
810 break;
811
812 case PTRACE_BTS_GET_BUFFER_SIZE:
813 ret = ptrace_bts_get_buffer_size(child);
814 break;
815
816 case PTRACE_BTS_GET_INDEX:
817 ret = ptrace_bts_get_index(child);
818 break;
819
820 case PTRACE_BTS_READ_RECORD:
821 ret = ptrace_bts_read_record
822 (child, data,
823 (struct bts_struct __user *) addr);
824 break;
825
826 case PTRACE_BTS_CONFIG:
827 ret = ptrace_bts_config(child, data);
828 break;
829
830 case PTRACE_BTS_STATUS:
831 ret = ptrace_bts_status(child);
832 break;
833
629 default: 834 default:
630 ret = ptrace_request(child, request, addr, data); 835 ret = ptrace_request(child, request, addr, data);
631 break; 836 break;
@@ -809,6 +1014,13 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
809 case PTRACE_SETOPTIONS: 1014 case PTRACE_SETOPTIONS:
810 case PTRACE_SET_THREAD_AREA: 1015 case PTRACE_SET_THREAD_AREA:
811 case PTRACE_GET_THREAD_AREA: 1016 case PTRACE_GET_THREAD_AREA:
1017 case PTRACE_BTS_MAX_BUFFER_SIZE:
1018 case PTRACE_BTS_ALLOCATE_BUFFER:
1019 case PTRACE_BTS_GET_BUFFER_SIZE:
1020 case PTRACE_BTS_GET_INDEX:
1021 case PTRACE_BTS_READ_RECORD:
1022 case PTRACE_BTS_CONFIG:
1023 case PTRACE_BTS_STATUS:
812 return sys_ptrace(request, pid, addr, data); 1024 return sys_ptrace(request, pid, addr, data);
813 1025
814 default: 1026 default:
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index ce4d6b52ce36..f2b131ef844e 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -60,6 +60,7 @@
60#include <asm/dmi.h> 60#include <asm/dmi.h>
61#include <asm/cacheflush.h> 61#include <asm/cacheflush.h>
62#include <asm/mce.h> 62#include <asm/mce.h>
63#include <asm/ds.h>
63 64
64/* 65/*
65 * Machine setup.. 66 * Machine setup..
@@ -823,6 +824,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
823 set_cpu_cap(c, X86_FEATURE_PEBS); 824 set_cpu_cap(c, X86_FEATURE_PEBS);
824 } 825 }
825 826
827
828 if (cpu_has_bts)
829 ds_init_intel(c);
830
826 n = c->extended_cpuid_level; 831 n = c->extended_cpuid_level;
827 if (n >= 0x80000008) { 832 if (n >= 0x80000008) {
828 unsigned eax = cpuid_eax(0x80000008); 833 unsigned eax = cpuid_eax(0x80000008);
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index f55c003f5b63..21ea22fda5fc 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -169,9 +169,14 @@ static void enable_step(struct task_struct *child, bool block)
169 */ 169 */
170 if (enable_single_step(child) && block) { 170 if (enable_single_step(child) && block) {
171 set_tsk_thread_flag(child, TIF_DEBUGCTLMSR); 171 set_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
172 write_debugctlmsr(child, DEBUGCTLMSR_BTF); 172 write_debugctlmsr(child,
173 } else if (test_and_clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR)) { 173 child->thread.debugctlmsr | DEBUGCTLMSR_BTF);
174 write_debugctlmsr(child, 0); 174 } else {
175 write_debugctlmsr(child,
176 child->thread.debugctlmsr & ~TIF_DEBUGCTLMSR);
177
178 if (!child->thread.debugctlmsr)
179 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
175 } 180 }
176} 181}
177 182
@@ -190,8 +195,11 @@ void user_disable_single_step(struct task_struct *child)
190 /* 195 /*
191 * Make sure block stepping (BTF) is disabled. 196 * Make sure block stepping (BTF) is disabled.
192 */ 197 */
193 if (test_and_clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR)) 198 write_debugctlmsr(child,
194 write_debugctlmsr(child, 0); 199 child->thread.debugctlmsr & ~TIF_DEBUGCTLMSR);
200
201 if (!child->thread.debugctlmsr)
202 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
195 203
196 /* Always clear TIF_SINGLESTEP... */ 204 /* Always clear TIF_SINGLESTEP... */
197 clear_tsk_thread_flag(child, TIF_SINGLESTEP); 205 clear_tsk_thread_flag(child, TIF_SINGLESTEP);