aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarkus Metzger <markus.t.metzger@intel.com>2008-04-08 05:01:58 -0400
committerIngo Molnar <mingo@elte.hu>2008-05-12 15:27:53 -0400
commit93fa7636dfdc059b25df148f230c0991096afdef (patch)
treecf277bd09091ac69abb5f7fdc21c705b8f186f88
parent492c2e476eac010962850006c49df326919b284c (diff)
x86, ptrace: PEBS support
Polish the ds.h interface and add support for PEBS. Ds.c is meant to be the resource allocator for per-thread and per-cpu BTS and PEBS recording. It is used by ptrace/utrace to provide execution tracing of debugged tasks. It will be used by profilers (e.g. perfmon2). It may be used by kernel debuggers to provide a kernel execution trace. Changes in detail: - guard DS and ptrace by CONFIG macros - separate DS and BTS more clearly - simplify field accesses - add functions to manage PEBS buffers - add simple protection/allocation mechanism - added support for Atom Opens: - buffer overflow handling Currently, only circular buffers are supported. This is all we need for debugging. Profilers would want an overflow notification. This is planned to be added when perfmon2 is made to use the ds.h interface. - utrace intermediate layer Signed-off-by: Markus Metzger <markus.t.metzger@intel.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r--arch/x86/Kconfig.cpu18
-rw-r--r--arch/x86/kernel/cpu/intel.c3
-rw-r--r--arch/x86/kernel/ds.c953
-rw-r--r--arch/x86/kernel/process_32.c25
-rw-r--r--arch/x86/kernel/process_64.c25
-rw-r--r--arch/x86/kernel/ptrace.c444
-rw-r--r--arch/x86/kernel/setup_64.c3
-rw-r--r--include/asm-x86/ds.h258
-rw-r--r--include/asm-x86/processor.h12
-rw-r--r--include/asm-x86/ptrace-abi.h14
-rw-r--r--include/asm-x86/ptrace.h38
11 files changed, 1274 insertions, 519 deletions
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 2ad6301849a1..13f3f8bebfd1 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -415,3 +415,21 @@ config X86_MINIMUM_CPU_FAMILY
415config X86_DEBUGCTLMSR 415config X86_DEBUGCTLMSR
416 def_bool y 416 def_bool y
417 depends on !(M586MMX || M586TSC || M586 || M486 || M386) 417 depends on !(M586MMX || M586TSC || M586 || M486 || M386)
418
419config X86_DS
420 bool "Debug Store support"
421 default y
422 help
423 Add support for Debug Store.
424 This allows the kernel to provide a memory buffer to the hardware
425 to store various profiling and tracing events.
426
427config X86_PTRACE_BTS
428 bool "ptrace interface to Branch Trace Store"
429 default y
430 depends on (X86_DS && X86_DEBUGCTLMSR)
431 help
432 Add a ptrace interface to allow collecting an execution trace
433 of the traced task.
434 This collects control flow changes in a (cyclic) buffer and allows
435 debuggers to fill in the gaps and show an execution trace of the debuggee.
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index fe9224c51d37..cbffa2a25a13 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -222,10 +222,11 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
222 set_cpu_cap(c, X86_FEATURE_BTS); 222 set_cpu_cap(c, X86_FEATURE_BTS);
223 if (!(l1 & (1<<12))) 223 if (!(l1 & (1<<12)))
224 set_cpu_cap(c, X86_FEATURE_PEBS); 224 set_cpu_cap(c, X86_FEATURE_PEBS);
225 ds_init_intel(c);
225 } 226 }
226 227
227 if (cpu_has_bts) 228 if (cpu_has_bts)
228 ds_init_intel(c); 229 ptrace_bts_init_intel(c);
229} 230}
230 231
231static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) 232static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size)
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 11c11b8ec48d..5b32b6d062b4 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -2,26 +2,48 @@
2 * Debug Store support 2 * Debug Store support
3 * 3 *
4 * This provides a low-level interface to the hardware's Debug Store 4 * This provides a low-level interface to the hardware's Debug Store
5 * feature that is used for last branch recording (LBR) and 5 * feature that is used for branch trace store (BTS) and
6 * precise-event based sampling (PEBS). 6 * precise-event based sampling (PEBS).
7 * 7 *
8 * Different architectures use a different DS layout/pointer size. 8 * It manages:
9 * The below functions therefore work on a void*. 9 * - per-thread and per-cpu allocation of BTS and PEBS
10 * - buffer memory allocation (optional)
11 * - buffer overflow handling
12 * - buffer access
10 * 13 *
14 * It assumes:
15 * - get_task_struct on all parameter tasks
16 * - current is allowed to trace parameter tasks
11 * 17 *
12 * Since there is no user for PEBS, yet, only LBR (or branch
13 * trace store, BTS) is supported.
14 * 18 *
15 * 19 * Copyright (C) 2007-2008 Intel Corporation.
16 * Copyright (C) 2007 Intel Corporation. 20 * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008
17 * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007
18 */ 21 */
19 22
23
24#ifdef CONFIG_X86_DS
25
20#include <asm/ds.h> 26#include <asm/ds.h>
21 27
22#include <linux/errno.h> 28#include <linux/errno.h>
23#include <linux/string.h> 29#include <linux/string.h>
24#include <linux/slab.h> 30#include <linux/slab.h>
31#include <linux/sched.h>
32
33
34/*
35 * The configuration for a particular DS hardware implementation.
36 */
37struct ds_configuration {
38 /* the size of the DS structure in bytes */
39 unsigned char sizeof_ds;
40 /* the size of one pointer-typed field in the DS structure in bytes;
41 this covers the first 8 fields related to buffer management. */
42 unsigned char sizeof_field;
43 /* the size of a BTS/PEBS record in bytes */
44 unsigned char sizeof_rec[2];
45};
46static struct ds_configuration ds_cfg;
25 47
26 48
27/* 49/*
@@ -44,378 +66,747 @@
44 * (interrupt occurs when write pointer passes interrupt pointer) 66 * (interrupt occurs when write pointer passes interrupt pointer)
45 * - value to which counter is reset following counter overflow 67 * - value to which counter is reset following counter overflow
46 * 68 *
47 * On later architectures, the last branch recording hardware uses 69 * Later architectures use 64bit pointers throughout, whereas earlier
48 * 64bit pointers even in 32bit mode. 70 * architectures use 32bit pointers in 32bit mode.
49 *
50 *
51 * Branch Trace Store (BTS) records store information about control
52 * flow changes. They at least provide the following information:
53 * - source linear address
54 * - destination linear address
55 * 71 *
56 * Netburst supported a predicated bit that had been dropped in later
57 * architectures. We do not suppor it.
58 * 72 *
73 * We compute the base address for the first 8 fields based on:
74 * - the field size stored in the DS configuration
75 * - the relative field position
76 * - an offset giving the start of the respective region
59 * 77 *
60 * In order to abstract from the actual DS and BTS layout, we describe 78 * This offset is further used to index various arrays holding
61 * the access to the relevant fields. 79 * information for BTS and PEBS at the respective index.
62 * Thanks to Andi Kleen for proposing this design.
63 * 80 *
64 * The implementation, however, is not as general as it might seem. In 81 * On later 32bit processors, we only access the lower 32bit of the
65 * order to stay somewhat simple and efficient, we assume an 82 * 64bit pointer fields. The upper halves will be zeroed out.
66 * underlying unsigned type (mostly a pointer type) and we expect the
67 * field to be at least as big as that type.
68 */ 83 */
69 84
70/* 85enum ds_field {
71 * A special from_ip address to indicate that the BTS record is an 86 ds_buffer_base = 0,
72 * info record that needs to be interpreted or skipped. 87 ds_index,
73 */ 88 ds_absolute_maximum,
74#define BTS_ESCAPE_ADDRESS (-1) 89 ds_interrupt_threshold,
90};
75 91
76/* 92enum ds_qualifier {
77 * A field access descriptor 93 ds_bts = 0,
78 */ 94 ds_pebs
79struct access_desc {
80 unsigned char offset;
81 unsigned char size;
82}; 95};
83 96
97static inline unsigned long ds_get(const unsigned char *base,
98 enum ds_qualifier qual, enum ds_field field)
99{
100 base += (ds_cfg.sizeof_field * (field + (4 * qual)));
101 return *(unsigned long *)base;
102}
103
104static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
105 enum ds_field field, unsigned long value)
106{
107 base += (ds_cfg.sizeof_field * (field + (4 * qual)));
108 (*(unsigned long *)base) = value;
109}
110
111
84/* 112/*
85 * The configuration for a particular DS/BTS hardware implementation. 113 * Locking is done only for allocating BTS or PEBS resources and for
114 * guarding context and buffer memory allocation.
115 *
116 * Most functions require the current task to own the ds context part
117 * they are going to access. All the locking is done when validating
118 * access to the context.
86 */ 119 */
87struct ds_configuration { 120static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock);
88 /* the DS configuration */
89 unsigned char sizeof_ds;
90 struct access_desc bts_buffer_base;
91 struct access_desc bts_index;
92 struct access_desc bts_absolute_maximum;
93 struct access_desc bts_interrupt_threshold;
94 /* the BTS configuration */
95 unsigned char sizeof_bts;
96 struct access_desc from_ip;
97 struct access_desc to_ip;
98 /* BTS variants used to store additional information like
99 timestamps */
100 struct access_desc info_type;
101 struct access_desc info_data;
102 unsigned long debugctl_mask;
103};
104 121
105/* 122/*
106 * The global configuration used by the below accessor functions 123 * Validate that the current task is allowed to access the BTS/PEBS
124 * buffer of the parameter task.
125 *
126 * Returns 0, if access is granted; -Eerrno, otherwise.
107 */ 127 */
108static struct ds_configuration ds_cfg; 128static inline int ds_validate_access(struct ds_context *context,
129 enum ds_qualifier qual)
130{
131 if (!context)
132 return -EPERM;
133
134 if (context->owner[qual] == current)
135 return 0;
136
137 return -EPERM;
138}
139
109 140
110/* 141/*
111 * Accessor functions for some DS and BTS fields using the above 142 * We either support (system-wide) per-cpu or per-thread allocation.
112 * global ptrace_bts_cfg. 143 * We distinguish the two based on the task_struct pointer, where a
144 * NULL pointer indicates per-cpu allocation for the current cpu.
145 *
146 * Allocations are use-counted. As soon as resources are allocated,
147 * further allocations must be of the same type (per-cpu or
148 * per-thread). We model this by counting allocations (i.e. the number
149 * of tracers of a certain type) for one type negatively:
150 * =0 no tracers
151 * >0 number of per-thread tracers
152 * <0 number of per-cpu tracers
153 *
154 * The below functions to get and put tracers and to check the
155 * allocation type require the ds_lock to be held by the caller.
156 *
157 * Tracers essentially gives the number of ds contexts for a certain
158 * type of allocation.
113 */ 159 */
114static inline unsigned long get_bts_buffer_base(char *base) 160static long tracers;
161
162static inline void get_tracer(struct task_struct *task)
115{ 163{
116 return *(unsigned long *)(base + ds_cfg.bts_buffer_base.offset); 164 tracers += (task ? 1 : -1);
117} 165}
118static inline void set_bts_buffer_base(char *base, unsigned long value) 166
167static inline void put_tracer(struct task_struct *task)
119{ 168{
120 (*(unsigned long *)(base + ds_cfg.bts_buffer_base.offset)) = value; 169 tracers -= (task ? 1 : -1);
121} 170}
122static inline unsigned long get_bts_index(char *base) 171
172static inline int check_tracer(struct task_struct *task)
123{ 173{
124 return *(unsigned long *)(base + ds_cfg.bts_index.offset); 174 return (task ? (tracers >= 0) : (tracers <= 0));
125} 175}
126static inline void set_bts_index(char *base, unsigned long value) 176
177
178/*
179 * The DS context is either attached to a thread or to a cpu:
180 * - in the former case, the thread_struct contains a pointer to the
181 * attached context.
182 * - in the latter case, we use a static array of per-cpu context
183 * pointers.
184 *
185 * Contexts are use-counted. They are allocated on first access and
186 * deallocated when the last user puts the context.
187 *
188 * We distinguish between an allocating and a non-allocating get of a
189 * context:
190 * - the allocating get is used for requesting BTS/PEBS resources. It
191 * requires the caller to hold the global ds_lock.
192 * - the non-allocating get is used for all other cases. A
193 * non-existing context indicates an error. It acquires and releases
194 * the ds_lock itself for obtaining the context.
195 *
196 * A context and its DS configuration are allocated and deallocated
197 * together. A context always has a DS configuration of the
198 * appropriate size.
199 */
200static DEFINE_PER_CPU(struct ds_context *, system_context);
201
202#define this_system_context per_cpu(system_context, smp_processor_id())
203
204/*
205 * Returns the pointer to the parameter task's context or to the
206 * system-wide context, if task is NULL.
207 *
208 * Increases the use count of the returned context, if not NULL.
209 */
210static inline struct ds_context *ds_get_context(struct task_struct *task)
127{ 211{
128 (*(unsigned long *)(base + ds_cfg.bts_index.offset)) = value; 212 struct ds_context *context;
213
214 spin_lock(&ds_lock);
215
216 context = (task ? task->thread.ds_ctx : this_system_context);
217 if (context)
218 context->count++;
219
220 spin_unlock(&ds_lock);
221
222 return context;
129} 223}
130static inline unsigned long get_bts_absolute_maximum(char *base) 224
225/*
226 * Same as ds_get_context, but allocates the context and it's DS
227 * structure, if necessary; returns NULL; if out of memory.
228 *
229 * pre: requires ds_lock to be held
230 */
231static inline struct ds_context *ds_alloc_context(struct task_struct *task)
131{ 232{
132 return *(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset); 233 struct ds_context **p_context =
234 (task ? &task->thread.ds_ctx : &this_system_context);
235 struct ds_context *context = *p_context;
236
237 if (!context) {
238 context = kzalloc(sizeof(*context), GFP_KERNEL);
239
240 if (!context)
241 return 0;
242
243 context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL);
244 if (!context->ds) {
245 kfree(context);
246 return 0;
247 }
248
249 *p_context = context;
250
251 context->this = p_context;
252 context->task = task;
253
254 if (task)
255 set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
256
257 if (!task || (task == current))
258 wrmsr(MSR_IA32_DS_AREA, (unsigned long)context->ds, 0);
259
260 get_tracer(task);
261 }
262
263 context->count++;
264
265 return context;
133} 266}
134static inline void set_bts_absolute_maximum(char *base, unsigned long value) 267
268/*
269 * Decreases the use count of the parameter context, if not NULL.
270 * Deallocates the context, if the use count reaches zero.
271 */
272static inline void ds_put_context(struct ds_context *context)
135{ 273{
136 (*(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset)) = value; 274 if (!context)
275 return;
276
277 spin_lock(&ds_lock);
278
279 if (--context->count)
280 goto out;
281
282 *(context->this) = 0;
283
284 if (context->task)
285 clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
286
287 if (!context->task || (context->task == current))
288 wrmsrl(MSR_IA32_DS_AREA, 0);
289
290 put_tracer(context->task);
291
292 /* free any leftover buffers from tracers that did not
293 * deallocate them properly. */
294 kfree(context->buffer[ds_bts]);
295 kfree(context->buffer[ds_pebs]);
296 kfree(context->ds);
297 kfree(context);
298 out:
299 spin_unlock(&ds_lock);
137} 300}
138static inline unsigned long get_bts_interrupt_threshold(char *base) 301
302
303/*
304 * Handle a buffer overflow
305 *
306 * task: the task whose buffers are overflowing;
307 * NULL for a buffer overflow on the current cpu
308 * context: the ds context
309 * qual: the buffer type
310 */
311static void ds_overflow(struct task_struct *task, struct ds_context *context,
312 enum ds_qualifier qual)
139{ 313{
140 return *(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset); 314 if (!context)
315 return;
316
317 if (context->callback[qual])
318 (*context->callback[qual])(task);
319
320 /* todo: do some more overflow handling */
141} 321}
142static inline void set_bts_interrupt_threshold(char *base, unsigned long value) 322
323
324/*
325 * Allocate a non-pageable buffer of the parameter size.
326 * Checks the memory and the locked memory rlimit.
327 *
328 * Returns the buffer, if successful;
329 * NULL, if out of memory or rlimit exceeded.
330 *
331 * size: the requested buffer size in bytes
332 * pages (out): if not NULL, contains the number of pages reserved
333 */
334static inline void *ds_allocate_buffer(size_t size, unsigned int *pages)
143{ 335{
144 (*(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset)) = value; 336 unsigned long rlim, vm, pgsz;
337 void *buffer;
338
339 pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
340
341 rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
342 vm = current->mm->total_vm + pgsz;
343 if (rlim < vm)
344 return 0;
345
346 rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
347 vm = current->mm->locked_vm + pgsz;
348 if (rlim < vm)
349 return 0;
350
351 buffer = kzalloc(size, GFP_KERNEL);
352 if (!buffer)
353 return 0;
354
355 current->mm->total_vm += pgsz;
356 current->mm->locked_vm += pgsz;
357
358 if (pages)
359 *pages = pgsz;
360
361 return buffer;
145} 362}
146static inline unsigned long get_from_ip(char *base) 363
364static int ds_request(struct task_struct *task, void *base, size_t size,
365 ds_ovfl_callback_t ovfl, enum ds_qualifier qual)
147{ 366{
148 return *(unsigned long *)(base + ds_cfg.from_ip.offset); 367 struct ds_context *context;
368 unsigned long buffer, adj;
369 const unsigned long alignment = (1 << 3);
370 int error = 0;
371
372 if (!ds_cfg.sizeof_ds)
373 return -EOPNOTSUPP;
374
375 /* we require some space to do alignment adjustments below */
376 if (size < (alignment + ds_cfg.sizeof_rec[qual]))
377 return -EINVAL;
378
379 /* buffer overflow notification is not yet implemented */
380 if (ovfl)
381 return -EOPNOTSUPP;
382
383
384 spin_lock(&ds_lock);
385
386 if (!check_tracer(task))
387 return -EPERM;
388
389 error = -ENOMEM;
390 context = ds_alloc_context(task);
391 if (!context)
392 goto out_unlock;
393
394 error = -EALREADY;
395 if (context->owner[qual] == current)
396 goto out_unlock;
397 error = -EPERM;
398 if (context->owner[qual] != 0)
399 goto out_unlock;
400 context->owner[qual] = current;
401
402 spin_unlock(&ds_lock);
403
404
405 error = -ENOMEM;
406 if (!base) {
407 base = ds_allocate_buffer(size, &context->pages[qual]);
408 if (!base)
409 goto out_release;
410
411 context->buffer[qual] = base;
412 }
413 error = 0;
414
415 context->callback[qual] = ovfl;
416
417 /* adjust the buffer address and size to meet alignment
418 * constraints:
419 * - buffer is double-word aligned
420 * - size is multiple of record size
421 *
422 * We checked the size at the very beginning; we have enough
423 * space to do the adjustment.
424 */
425 buffer = (unsigned long)base;
426
427 adj = ALIGN(buffer, alignment) - buffer;
428 buffer += adj;
429 size -= adj;
430
431 size /= ds_cfg.sizeof_rec[qual];
432 size *= ds_cfg.sizeof_rec[qual];
433
434 ds_set(context->ds, qual, ds_buffer_base, buffer);
435 ds_set(context->ds, qual, ds_index, buffer);
436 ds_set(context->ds, qual, ds_absolute_maximum, buffer + size);
437
438 if (ovfl) {
439 /* todo: select a suitable interrupt threshold */
440 } else
441 ds_set(context->ds, qual,
442 ds_interrupt_threshold, buffer + size + 1);
443
444 /* we keep the context until ds_release */
445 return error;
446
447 out_release:
448 context->owner[qual] = 0;
449 ds_put_context(context);
450 return error;
451
452 out_unlock:
453 spin_unlock(&ds_lock);
454 ds_put_context(context);
455 return error;
149} 456}
150static inline void set_from_ip(char *base, unsigned long value) 457
458int ds_request_bts(struct task_struct *task, void *base, size_t size,
459 ds_ovfl_callback_t ovfl)
151{ 460{
152 (*(unsigned long *)(base + ds_cfg.from_ip.offset)) = value; 461 return ds_request(task, base, size, ovfl, ds_bts);
153} 462}
154static inline unsigned long get_to_ip(char *base) 463
464int ds_request_pebs(struct task_struct *task, void *base, size_t size,
465 ds_ovfl_callback_t ovfl)
155{ 466{
156 return *(unsigned long *)(base + ds_cfg.to_ip.offset); 467 return ds_request(task, base, size, ovfl, ds_pebs);
157} 468}
158static inline void set_to_ip(char *base, unsigned long value) 469
470static int ds_release(struct task_struct *task, enum ds_qualifier qual)
159{ 471{
160 (*(unsigned long *)(base + ds_cfg.to_ip.offset)) = value; 472 struct ds_context *context;
473 int error;
474
475 context = ds_get_context(task);
476 error = ds_validate_access(context, qual);
477 if (error < 0)
478 goto out;
479
480 kfree(context->buffer[qual]);
481 context->buffer[qual] = 0;
482
483 current->mm->total_vm -= context->pages[qual];
484 current->mm->locked_vm -= context->pages[qual];
485 context->pages[qual] = 0;
486 context->owner[qual] = 0;
487
488 /*
489 * we put the context twice:
490 * once for the ds_get_context
491 * once for the corresponding ds_request
492 */
493 ds_put_context(context);
494 out:
495 ds_put_context(context);
496 return error;
161} 497}
162static inline unsigned char get_info_type(char *base) 498
499int ds_release_bts(struct task_struct *task)
163{ 500{
164 return *(unsigned char *)(base + ds_cfg.info_type.offset); 501 return ds_release(task, ds_bts);
165} 502}
166static inline void set_info_type(char *base, unsigned char value) 503
504int ds_release_pebs(struct task_struct *task)
167{ 505{
168 (*(unsigned char *)(base + ds_cfg.info_type.offset)) = value; 506 return ds_release(task, ds_pebs);
169} 507}
170static inline unsigned long get_info_data(char *base) 508
509static int ds_get_index(struct task_struct *task, size_t *pos,
510 enum ds_qualifier qual)
171{ 511{
172 return *(unsigned long *)(base + ds_cfg.info_data.offset); 512 struct ds_context *context;
513 unsigned long base, index;
514 int error;
515
516 context = ds_get_context(task);
517 error = ds_validate_access(context, qual);
518 if (error < 0)
519 goto out;
520
521 base = ds_get(context->ds, qual, ds_buffer_base);
522 index = ds_get(context->ds, qual, ds_index);
523
524 error = ((index - base) / ds_cfg.sizeof_rec[qual]);
525 if (pos)
526 *pos = error;
527 out:
528 ds_put_context(context);
529 return error;
173} 530}
174static inline void set_info_data(char *base, unsigned long value) 531
532int ds_get_bts_index(struct task_struct *task, size_t *pos)
175{ 533{
176 (*(unsigned long *)(base + ds_cfg.info_data.offset)) = value; 534 return ds_get_index(task, pos, ds_bts);
177} 535}
178 536
537int ds_get_pebs_index(struct task_struct *task, size_t *pos)
538{
539 return ds_get_index(task, pos, ds_pebs);
540}
179 541
180int ds_allocate(void **dsp, size_t bts_size_in_bytes) 542static int ds_get_end(struct task_struct *task, size_t *pos,
543 enum ds_qualifier qual)
181{ 544{
182 size_t bts_size_in_records; 545 struct ds_context *context;
183 unsigned long bts; 546 unsigned long base, end;
184 void *ds; 547 int error;
548
549 context = ds_get_context(task);
550 error = ds_validate_access(context, qual);
551 if (error < 0)
552 goto out;
553
554 base = ds_get(context->ds, qual, ds_buffer_base);
555 end = ds_get(context->ds, qual, ds_absolute_maximum);
556
557 error = ((end - base) / ds_cfg.sizeof_rec[qual]);
558 if (pos)
559 *pos = error;
560 out:
561 ds_put_context(context);
562 return error;
563}
185 564
186 if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) 565int ds_get_bts_end(struct task_struct *task, size_t *pos)
187 return -EOPNOTSUPP; 566{
567 return ds_get_end(task, pos, ds_bts);
568}
188 569
189 if (bts_size_in_bytes < 0) 570int ds_get_pebs_end(struct task_struct *task, size_t *pos)
190 return -EINVAL; 571{
572 return ds_get_end(task, pos, ds_pebs);
573}
191 574
192 bts_size_in_records = 575static int ds_access(struct task_struct *task, size_t index,
193 bts_size_in_bytes / ds_cfg.sizeof_bts; 576 const void **record, enum ds_qualifier qual)
194 bts_size_in_bytes = 577{
195 bts_size_in_records * ds_cfg.sizeof_bts; 578 struct ds_context *context;
579 unsigned long base, idx;
580 int error;
196 581
197 if (bts_size_in_bytes <= 0) 582 if (!record)
198 return -EINVAL; 583 return -EINVAL;
199 584
200 bts = (unsigned long)kzalloc(bts_size_in_bytes, GFP_KERNEL); 585 context = ds_get_context(task);
586 error = ds_validate_access(context, qual);
587 if (error < 0)
588 goto out;
201 589
202 if (!bts) 590 base = ds_get(context->ds, qual, ds_buffer_base);
203 return -ENOMEM; 591 idx = base + (index * ds_cfg.sizeof_rec[qual]);
204 592
205 ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); 593 error = -EINVAL;
594 if (idx > ds_get(context->ds, qual, ds_absolute_maximum))
595 goto out;
206 596
207 if (!ds) { 597 *record = (const void *)idx;
208 kfree((void *)bts); 598 error = ds_cfg.sizeof_rec[qual];
209 return -ENOMEM; 599 out:
210 } 600 ds_put_context(context);
211 601 return error;
212 set_bts_buffer_base(ds, bts);
213 set_bts_index(ds, bts);
214 set_bts_absolute_maximum(ds, bts + bts_size_in_bytes);
215 set_bts_interrupt_threshold(ds, bts + bts_size_in_bytes + 1);
216
217 *dsp = ds;
218 return 0;
219} 602}
220 603
221int ds_free(void **dsp) 604int ds_access_bts(struct task_struct *task, size_t index, const void **record)
222{ 605{
223 if (*dsp) { 606 return ds_access(task, index, record, ds_bts);
224 kfree((void *)get_bts_buffer_base(*dsp));
225 kfree(*dsp);
226 *dsp = NULL;
227 }
228 return 0;
229} 607}
230 608
231int ds_get_bts_size(void *ds) 609int ds_access_pebs(struct task_struct *task, size_t index, const void **record)
232{ 610{
233 int size_in_bytes; 611 return ds_access(task, index, record, ds_pebs);
234
235 if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
236 return -EOPNOTSUPP;
237
238 if (!ds)
239 return 0;
240
241 size_in_bytes =
242 get_bts_absolute_maximum(ds) -
243 get_bts_buffer_base(ds);
244 return size_in_bytes;
245} 612}
246 613
247int ds_get_bts_end(void *ds) 614static int ds_write(struct task_struct *task, const void *record, size_t size,
615 enum ds_qualifier qual, int force)
248{ 616{
249 int size_in_bytes = ds_get_bts_size(ds); 617 struct ds_context *context;
250 618 int error;
251 if (size_in_bytes <= 0)
252 return size_in_bytes;
253 619
254 return size_in_bytes / ds_cfg.sizeof_bts; 620 if (!record)
255} 621 return -EINVAL;
256 622
257int ds_get_bts_index(void *ds) 623 error = -EPERM;
258{ 624 context = ds_get_context(task);
259 int index_offset_in_bytes; 625 if (!context)
626 goto out;
260 627
261 if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) 628 if (!force) {
262 return -EOPNOTSUPP; 629 error = ds_validate_access(context, qual);
630 if (error < 0)
631 goto out;
632 }
263 633
264 index_offset_in_bytes = 634 error = 0;
265 get_bts_index(ds) - 635 while (size) {
266 get_bts_buffer_base(ds); 636 unsigned long base, index, end, write_end, int_th;
637 unsigned long write_size, adj_write_size;
638
639 /*
640 * write as much as possible without producing an
641 * overflow interrupt.
642 *
643 * interrupt_threshold must either be
644 * - bigger than absolute_maximum or
645 * - point to a record between buffer_base and absolute_maximum
646 *
647 * index points to a valid record.
648 */
649 base = ds_get(context->ds, qual, ds_buffer_base);
650 index = ds_get(context->ds, qual, ds_index);
651 end = ds_get(context->ds, qual, ds_absolute_maximum);
652 int_th = ds_get(context->ds, qual, ds_interrupt_threshold);
653
654 write_end = min(end, int_th);
655
656 /* if we are already beyond the interrupt threshold,
657 * we fill the entire buffer */
658 if (write_end <= index)
659 write_end = end;
660
661 if (write_end <= index)
662 goto out;
663
664 write_size = min((unsigned long) size, write_end - index);
665 memcpy((void *)index, record, write_size);
666
667 record = (const char *)record + write_size;
668 size -= write_size;
669 error += write_size;
670
671 adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
672 adj_write_size *= ds_cfg.sizeof_rec[qual];
673
674 /* zero out trailing bytes */
675 memset((char *)index + write_size, 0,
676 adj_write_size - write_size);
677 index += adj_write_size;
678
679 if (index >= end)
680 index = base;
681 ds_set(context->ds, qual, ds_index, index);
682
683 if (index >= int_th)
684 ds_overflow(task, context, qual);
685 }
267 686
268 return index_offset_in_bytes / ds_cfg.sizeof_bts; 687 out:
688 ds_put_context(context);
689 return error;
269} 690}
270 691
271int ds_set_overflow(void *ds, int method) 692int ds_write_bts(struct task_struct *task, const void *record, size_t size)
272{ 693{
273 switch (method) { 694 return ds_write(task, record, size, ds_bts, /* force = */ 0);
274 case DS_O_SIGNAL:
275 return -EOPNOTSUPP;
276 case DS_O_WRAP:
277 return 0;
278 default:
279 return -EINVAL;
280 }
281} 695}
282 696
283int ds_get_overflow(void *ds) 697int ds_write_pebs(struct task_struct *task, const void *record, size_t size)
284{ 698{
285 return DS_O_WRAP; 699 return ds_write(task, record, size, ds_pebs, /* force = */ 0);
286} 700}
287 701
288int ds_clear(void *ds) 702int ds_unchecked_write_bts(struct task_struct *task,
703 const void *record, size_t size)
289{ 704{
290 int bts_size = ds_get_bts_size(ds); 705 return ds_write(task, record, size, ds_bts, /* force = */ 1);
291 unsigned long bts_base;
292
293 if (bts_size <= 0)
294 return bts_size;
295
296 bts_base = get_bts_buffer_base(ds);
297 memset((void *)bts_base, 0, bts_size);
298
299 set_bts_index(ds, bts_base);
300 return 0;
301} 706}
302 707
303int ds_read_bts(void *ds, int index, struct bts_struct *out) 708int ds_unchecked_write_pebs(struct task_struct *task,
709 const void *record, size_t size)
304{ 710{
305 void *bts; 711 return ds_write(task, record, size, ds_pebs, /* force = */ 1);
712}
306 713
307 if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) 714static int ds_reset_or_clear(struct task_struct *task,
308 return -EOPNOTSUPP; 715 enum ds_qualifier qual, int clear)
716{
717 struct ds_context *context;
718 unsigned long base, end;
719 int error;
309 720
310 if (index < 0) 721 context = ds_get_context(task);
311 return -EINVAL; 722 error = ds_validate_access(context, qual);
723 if (error < 0)
724 goto out;
312 725
313 if (index >= ds_get_bts_size(ds)) 726 base = ds_get(context->ds, qual, ds_buffer_base);
314 return -EINVAL; 727 end = ds_get(context->ds, qual, ds_absolute_maximum);
315 728
316 bts = (void *)(get_bts_buffer_base(ds) + (index * ds_cfg.sizeof_bts)); 729 if (clear)
730 memset((void *)base, 0, end - base);
317 731
318 memset(out, 0, sizeof(*out)); 732 ds_set(context->ds, qual, ds_index, base);
319 if (get_from_ip(bts) == BTS_ESCAPE_ADDRESS) {
320 out->qualifier = get_info_type(bts);
321 out->variant.jiffies = get_info_data(bts);
322 } else {
323 out->qualifier = BTS_BRANCH;
324 out->variant.lbr.from_ip = get_from_ip(bts);
325 out->variant.lbr.to_ip = get_to_ip(bts);
326 }
327 733
328 return sizeof(*out);; 734 error = 0;
735 out:
736 ds_put_context(context);
737 return error;
329} 738}
330 739
331int ds_write_bts(void *ds, const struct bts_struct *in) 740int ds_reset_bts(struct task_struct *task)
332{ 741{
333 unsigned long bts; 742 return ds_reset_or_clear(task, ds_bts, /* clear = */ 0);
334 743}
335 if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
336 return -EOPNOTSUPP;
337
338 if (ds_get_bts_size(ds) <= 0)
339 return -ENXIO;
340 744
341 bts = get_bts_index(ds); 745int ds_reset_pebs(struct task_struct *task)
746{
747 return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0);
748}
342 749
343 memset((void *)bts, 0, ds_cfg.sizeof_bts); 750int ds_clear_bts(struct task_struct *task)
344 switch (in->qualifier) { 751{
345 case BTS_INVALID: 752 return ds_reset_or_clear(task, ds_bts, /* clear = */ 1);
346 break; 753}
347 754
348 case BTS_BRANCH: 755int ds_clear_pebs(struct task_struct *task)
349 set_from_ip((void *)bts, in->variant.lbr.from_ip); 756{
350 set_to_ip((void *)bts, in->variant.lbr.to_ip); 757 return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1);
351 break; 758}
352 759
353 case BTS_TASK_ARRIVES: 760int ds_get_pebs_reset(struct task_struct *task, u64 *value)
354 case BTS_TASK_DEPARTS: 761{
355 set_from_ip((void *)bts, BTS_ESCAPE_ADDRESS); 762 struct ds_context *context;
356 set_info_type((void *)bts, in->qualifier); 763 int error;
357 set_info_data((void *)bts, in->variant.jiffies);
358 break;
359 764
360 default: 765 if (!value)
361 return -EINVAL; 766 return -EINVAL;
362 }
363 767
364 bts = bts + ds_cfg.sizeof_bts; 768 context = ds_get_context(task);
365 if (bts >= get_bts_absolute_maximum(ds)) 769 error = ds_validate_access(context, ds_pebs);
366 bts = get_bts_buffer_base(ds); 770 if (error < 0)
367 set_bts_index(ds, bts); 771 goto out;
368 772
369 return ds_cfg.sizeof_bts; 773 *value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8));
774
775 error = 0;
776 out:
777 ds_put_context(context);
778 return error;
370} 779}
371 780
372unsigned long ds_debugctl_mask(void) 781int ds_set_pebs_reset(struct task_struct *task, u64 value)
373{ 782{
374 return ds_cfg.debugctl_mask; 783 struct ds_context *context;
375} 784 int error;
376 785
377#ifdef __i386__ 786 context = ds_get_context(task);
378static const struct ds_configuration ds_cfg_netburst = { 787 error = ds_validate_access(context, ds_pebs);
379 .sizeof_ds = 9 * 4, 788 if (error < 0)
380 .bts_buffer_base = { 0, 4 }, 789 goto out;
381 .bts_index = { 4, 4 },
382 .bts_absolute_maximum = { 8, 4 },
383 .bts_interrupt_threshold = { 12, 4 },
384 .sizeof_bts = 3 * 4,
385 .from_ip = { 0, 4 },
386 .to_ip = { 4, 4 },
387 .info_type = { 4, 1 },
388 .info_data = { 8, 4 },
389 .debugctl_mask = (1<<2)|(1<<3)
390};
391 790
392static const struct ds_configuration ds_cfg_pentium_m = { 791 *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value;
393 .sizeof_ds = 9 * 4, 792
394 .bts_buffer_base = { 0, 4 }, 793 error = 0;
395 .bts_index = { 4, 4 }, 794 out:
396 .bts_absolute_maximum = { 8, 4 }, 795 ds_put_context(context);
397 .bts_interrupt_threshold = { 12, 4 }, 796 return error;
398 .sizeof_bts = 3 * 4, 797}
399 .from_ip = { 0, 4 }, 798
400 .to_ip = { 4, 4 }, 799static const struct ds_configuration ds_cfg_var = {
401 .info_type = { 4, 1 }, 800 .sizeof_ds = sizeof(long) * 12,
402 .info_data = { 8, 4 }, 801 .sizeof_field = sizeof(long),
403 .debugctl_mask = (1<<6)|(1<<7) 802 .sizeof_rec[ds_bts] = sizeof(long) * 3,
803 .sizeof_rec[ds_pebs] = sizeof(long) * 10
404}; 804};
405#endif /* _i386_ */ 805static const struct ds_configuration ds_cfg_64 = {
406 806 .sizeof_ds = 8 * 12,
407static const struct ds_configuration ds_cfg_core2 = { 807 .sizeof_field = 8,
408 .sizeof_ds = 9 * 8, 808 .sizeof_rec[ds_bts] = 8 * 3,
409 .bts_buffer_base = { 0, 8 }, 809 .sizeof_rec[ds_pebs] = 8 * 10
410 .bts_index = { 8, 8 },
411 .bts_absolute_maximum = { 16, 8 },
412 .bts_interrupt_threshold = { 24, 8 },
413 .sizeof_bts = 3 * 8,
414 .from_ip = { 0, 8 },
415 .to_ip = { 8, 8 },
416 .info_type = { 8, 1 },
417 .info_data = { 16, 8 },
418 .debugctl_mask = (1<<6)|(1<<7)|(1<<9)
419}; 810};
420 811
421static inline void 812static inline void
@@ -429,14 +820,13 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
429 switch (c->x86) { 820 switch (c->x86) {
430 case 0x6: 821 case 0x6:
431 switch (c->x86_model) { 822 switch (c->x86_model) {
432#ifdef __i386__
433 case 0xD: 823 case 0xD:
434 case 0xE: /* Pentium M */ 824 case 0xE: /* Pentium M */
435 ds_configure(&ds_cfg_pentium_m); 825 ds_configure(&ds_cfg_var);
436 break; 826 break;
437#endif /* _i386_ */
438 case 0xF: /* Core2 */ 827 case 0xF: /* Core2 */
439 ds_configure(&ds_cfg_core2); 828 case 0x1C: /* Atom */
829 ds_configure(&ds_cfg_64);
440 break; 830 break;
441 default: 831 default:
442 /* sorry, don't know about them */ 832 /* sorry, don't know about them */
@@ -445,13 +835,11 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
445 break; 835 break;
446 case 0xF: 836 case 0xF:
447 switch (c->x86_model) { 837 switch (c->x86_model) {
448#ifdef __i386__
449 case 0x0: 838 case 0x0:
450 case 0x1: 839 case 0x1:
451 case 0x2: /* Netburst */ 840 case 0x2: /* Netburst */
452 ds_configure(&ds_cfg_netburst); 841 ds_configure(&ds_cfg_var);
453 break; 842 break;
454#endif /* _i386_ */
455 default: 843 default:
456 /* sorry, don't know about them */ 844 /* sorry, don't know about them */
457 break; 845 break;
@@ -462,3 +850,14 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
462 break; 850 break;
463 } 851 }
464} 852}
853
854void ds_free(struct ds_context *context)
855{
856 /* This is called when the task owning the parameter context
857 * is dying. There should not be any user of that context left
858 * to disturb us, anymore. */
859 unsigned long leftovers = context->count;
860 while (leftovers--)
861 ds_put_context(context);
862}
863#endif /* CONFIG_X86_DS */
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index f8476dfbb60d..5cec8a75a4e8 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -316,6 +316,14 @@ void exit_thread(void)
316 tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; 316 tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
317 put_cpu(); 317 put_cpu();
318 } 318 }
319#ifdef CONFIG_X86_DS
320 /* Free any DS contexts that have not been properly released. */
321 if (unlikely(current->thread.ds_ctx)) {
322 /* we clear debugctl to make sure DS is not used. */
323 update_debugctlmsr(0);
324 ds_free(current->thread.ds_ctx);
325 }
326#endif /* CONFIG_X86_DS */
319} 327}
320 328
321void flush_thread(void) 329void flush_thread(void)
@@ -482,18 +490,27 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
482{ 490{
483 struct thread_struct *prev, *next; 491 struct thread_struct *prev, *next;
484 unsigned long debugctl; 492 unsigned long debugctl;
493 unsigned long ds_prev = 0, ds_next = 0;
485 494
486 prev = &prev_p->thread; 495 prev = &prev_p->thread;
487 next = &next_p->thread; 496 next = &next_p->thread;
488 497
489 debugctl = prev->debugctlmsr; 498 debugctl = prev->debugctlmsr;
490 if (next->ds_area_msr != prev->ds_area_msr) { 499
500#ifdef CONFIG_X86_DS
501 if (prev->ds_ctx)
502 ds_prev = (unsigned long)prev->ds_ctx->ds;
503 if (next->ds_ctx)
504 ds_next = (unsigned long)next->ds_ctx->ds;
505
506 if (ds_next != ds_prev) {
491 /* we clear debugctl to make sure DS 507 /* we clear debugctl to make sure DS
492 * is not in use when we change it */ 508 * is not in use when we change it */
493 debugctl = 0; 509 debugctl = 0;
494 update_debugctlmsr(0); 510 update_debugctlmsr(0);
495 wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0); 511 wrmsr(MSR_IA32_DS_AREA, ds_next, 0);
496 } 512 }
513#endif /* CONFIG_X86_DS */
497 514
498 if (next->debugctlmsr != debugctl) 515 if (next->debugctlmsr != debugctl)
499 update_debugctlmsr(next->debugctlmsr); 516 update_debugctlmsr(next->debugctlmsr);
@@ -517,13 +534,13 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
517 hard_enable_TSC(); 534 hard_enable_TSC();
518 } 535 }
519 536
520#ifdef X86_BTS 537#ifdef CONFIG_X86_PTRACE_BTS
521 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) 538 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
522 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); 539 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
523 540
524 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) 541 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
525 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); 542 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
526#endif 543#endif /* CONFIG_X86_PTRACE_BTS */
527 544
528 545
529 if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { 546 if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index e2319f39988b..ad213494a22f 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -267,6 +267,14 @@ void exit_thread(void)
267 t->io_bitmap_max = 0; 267 t->io_bitmap_max = 0;
268 put_cpu(); 268 put_cpu();
269 } 269 }
270#ifdef CONFIG_X86_DS
271 /* Free any DS contexts that have not been properly released. */
272 if (unlikely(t->ds_ctx)) {
273 /* we clear debugctl to make sure DS is not used. */
274 update_debugctlmsr(0);
275 ds_free(t->ds_ctx);
276 }
277#endif /* CONFIG_X86_DS */
270} 278}
271 279
272void flush_thread(void) 280void flush_thread(void)
@@ -492,18 +500,27 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
492{ 500{
493 struct thread_struct *prev, *next; 501 struct thread_struct *prev, *next;
494 unsigned long debugctl; 502 unsigned long debugctl;
503 unsigned long ds_prev = 0, ds_next = 0;
495 504
496 prev = &prev_p->thread, 505 prev = &prev_p->thread,
497 next = &next_p->thread; 506 next = &next_p->thread;
498 507
499 debugctl = prev->debugctlmsr; 508 debugctl = prev->debugctlmsr;
500 if (next->ds_area_msr != prev->ds_area_msr) { 509
510#ifdef CONFIG_X86_DS
511 if (prev->ds_ctx)
512 ds_prev = (unsigned long)prev->ds_ctx->ds;
513 if (next->ds_ctx)
514 ds_next = (unsigned long)next->ds_ctx->ds;
515
516 if (ds_next != ds_prev) {
501 /* we clear debugctl to make sure DS 517 /* we clear debugctl to make sure DS
502 * is not in use when we change it */ 518 * is not in use when we change it */
503 debugctl = 0; 519 debugctl = 0;
504 update_debugctlmsr(0); 520 update_debugctlmsr(0);
505 wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr); 521 wrmsrl(MSR_IA32_DS_AREA, ds_next);
506 } 522 }
523#endif /* CONFIG_X86_DS */
507 524
508 if (next->debugctlmsr != debugctl) 525 if (next->debugctlmsr != debugctl)
509 update_debugctlmsr(next->debugctlmsr); 526 update_debugctlmsr(next->debugctlmsr);
@@ -541,13 +558,13 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
541 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); 558 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
542 } 559 }
543 560
544#ifdef X86_BTS 561#ifdef CONFIG_X86_PTRACE_BTS
545 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) 562 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
546 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); 563 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
547 564
548 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) 565 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
549 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); 566 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
550#endif 567#endif /* CONFIG_X86_PTRACE_BTS */
551} 568}
552 569
553/* 570/*
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index fb03ef380f0e..b7ff783dc5fe 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -554,45 +554,115 @@ static int ptrace_set_debugreg(struct task_struct *child,
554 return 0; 554 return 0;
555} 555}
556 556
557#ifdef X86_BTS 557#ifdef CONFIG_X86_PTRACE_BTS
558/*
559 * The configuration for a particular BTS hardware implementation.
560 */
561struct bts_configuration {
562 /* the size of a BTS record in bytes; at most BTS_MAX_RECORD_SIZE */
563 unsigned char sizeof_bts;
564 /* the size of a field in the BTS record in bytes */
565 unsigned char sizeof_field;
566 /* a bitmask to enable/disable BTS in DEBUGCTL MSR */
567 unsigned long debugctl_mask;
568};
569static struct bts_configuration bts_cfg;
570
571#define BTS_MAX_RECORD_SIZE (8 * 3)
572
573
574/*
575 * Branch Trace Store (BTS) uses the following format. Different
576 * architectures vary in the size of those fields.
577 * - source linear address
578 * - destination linear address
579 * - flags
580 *
581 * Later architectures use 64bit pointers throughout, whereas earlier
582 * architectures use 32bit pointers in 32bit mode.
583 *
584 * We compute the base address for the first 8 fields based on:
585 * - the field size stored in the DS configuration
586 * - the relative field position
587 *
588 * In order to store additional information in the BTS buffer, we use
589 * a special source address to indicate that the record requires
590 * special interpretation.
591 *
592 * Netburst indicated via a bit in the flags field whether the branch
593 * was predicted; this is ignored.
594 */
558 595
559static int ptrace_bts_get_size(struct task_struct *child) 596enum bts_field {
597 bts_from = 0,
598 bts_to,
599 bts_flags,
600
601 bts_escape = (unsigned long)-1,
602 bts_qual = bts_to,
603 bts_jiffies = bts_flags
604};
605
606static inline unsigned long bts_get(const char *base, enum bts_field field)
560{ 607{
561 if (!child->thread.ds_area_msr) 608 base += (bts_cfg.sizeof_field * field);
562 return -ENXIO; 609 return *(unsigned long *)base;
610}
563 611
564 return ds_get_bts_index((void *)child->thread.ds_area_msr); 612static inline void bts_set(char *base, enum bts_field field, unsigned long val)
613{
614 base += (bts_cfg.sizeof_field * field);;
615 (*(unsigned long *)base) = val;
565} 616}
566 617
567static int ptrace_bts_read_record(struct task_struct *child, 618/*
568 long index, 619 * Translate a BTS record from the raw format into the bts_struct format
620 *
621 * out (out): bts_struct interpretation
622 * raw: raw BTS record
623 */
624static void ptrace_bts_translate_record(struct bts_struct *out, const void *raw)
625{
626 memset(out, 0, sizeof(*out));
627 if (bts_get(raw, bts_from) == bts_escape) {
628 out->qualifier = bts_get(raw, bts_qual);
629 out->variant.jiffies = bts_get(raw, bts_jiffies);
630 } else {
631 out->qualifier = BTS_BRANCH;
632 out->variant.lbr.from_ip = bts_get(raw, bts_from);
633 out->variant.lbr.to_ip = bts_get(raw, bts_to);
634 }
635}
636
637static int ptrace_bts_read_record(struct task_struct *child, size_t index,
569 struct bts_struct __user *out) 638 struct bts_struct __user *out)
570{ 639{
571 struct bts_struct ret; 640 struct bts_struct ret;
572 int retval; 641 const void *bts_record;
573 int bts_end; 642 size_t bts_index, bts_end;
574 int bts_index; 643 int error;
575
576 if (!child->thread.ds_area_msr)
577 return -ENXIO;
578 644
579 if (index < 0) 645 error = ds_get_bts_end(child, &bts_end);
580 return -EINVAL; 646 if (error < 0)
647 return error;
581 648
582 bts_end = ds_get_bts_end((void *)child->thread.ds_area_msr);
583 if (bts_end <= index) 649 if (bts_end <= index)
584 return -EINVAL; 650 return -EINVAL;
585 651
652 error = ds_get_bts_index(child, &bts_index);
653 if (error < 0)
654 return error;
655
586 /* translate the ptrace bts index into the ds bts index */ 656 /* translate the ptrace bts index into the ds bts index */
587 bts_index = ds_get_bts_index((void *)child->thread.ds_area_msr); 657 bts_index += bts_end - (index + 1);
588 bts_index -= (index + 1); 658 if (bts_end <= bts_index)
589 if (bts_index < 0) 659 bts_index -= bts_end;
590 bts_index += bts_end;
591 660
592 retval = ds_read_bts((void *)child->thread.ds_area_msr, 661 error = ds_access_bts(child, bts_index, &bts_record);
593 bts_index, &ret); 662 if (error < 0)
594 if (retval < 0) 663 return error;
595 return retval; 664
665 ptrace_bts_translate_record(&ret, bts_record);
596 666
597 if (copy_to_user(out, &ret, sizeof(ret))) 667 if (copy_to_user(out, &ret, sizeof(ret)))
598 return -EFAULT; 668 return -EFAULT;
@@ -600,101 +670,106 @@ static int ptrace_bts_read_record(struct task_struct *child,
600 return sizeof(ret); 670 return sizeof(ret);
601} 671}
602 672
603static int ptrace_bts_clear(struct task_struct *child)
604{
605 if (!child->thread.ds_area_msr)
606 return -ENXIO;
607
608 return ds_clear((void *)child->thread.ds_area_msr);
609}
610
611static int ptrace_bts_drain(struct task_struct *child, 673static int ptrace_bts_drain(struct task_struct *child,
612 long size, 674 long size,
613 struct bts_struct __user *out) 675 struct bts_struct __user *out)
614{ 676{
615 int end, i; 677 struct bts_struct ret;
616 void *ds = (void *)child->thread.ds_area_msr; 678 const unsigned char *raw;
617 679 size_t end, i;
618 if (!ds) 680 int error;
619 return -ENXIO;
620 681
621 end = ds_get_bts_index(ds); 682 error = ds_get_bts_index(child, &end);
622 if (end <= 0) 683 if (error < 0)
623 return end; 684 return error;
624 685
625 if (size < (end * sizeof(struct bts_struct))) 686 if (size < (end * sizeof(struct bts_struct)))
626 return -EIO; 687 return -EIO;
627 688
628 for (i = 0; i < end; i++, out++) { 689 error = ds_access_bts(child, 0, (const void **)&raw);
629 struct bts_struct ret; 690 if (error < 0)
630 int retval; 691 return error;
631 692
632 retval = ds_read_bts(ds, i, &ret); 693 for (i = 0; i < end; i++, out++, raw += bts_cfg.sizeof_bts) {
633 if (retval < 0) 694 ptrace_bts_translate_record(&ret, raw);
634 return retval;
635 695
636 if (copy_to_user(out, &ret, sizeof(ret))) 696 if (copy_to_user(out, &ret, sizeof(ret)))
637 return -EFAULT; 697 return -EFAULT;
638 } 698 }
639 699
640 ds_clear(ds); 700 error = ds_clear_bts(child);
701 if (error < 0)
702 return error;
641 703
642 return end; 704 return end;
643} 705}
644 706
707static void ptrace_bts_ovfl(struct task_struct *child)
708{
709 send_sig(child->thread.bts_ovfl_signal, child, 0);
710}
711
645static int ptrace_bts_config(struct task_struct *child, 712static int ptrace_bts_config(struct task_struct *child,
646 long cfg_size, 713 long cfg_size,
647 const struct ptrace_bts_config __user *ucfg) 714 const struct ptrace_bts_config __user *ucfg)
648{ 715{
649 struct ptrace_bts_config cfg; 716 struct ptrace_bts_config cfg;
650 int bts_size, ret = 0; 717 int error = 0;
651 void *ds; 718
719 error = -EOPNOTSUPP;
720 if (!bts_cfg.sizeof_bts)
721 goto errout;
652 722
723 error = -EIO;
653 if (cfg_size < sizeof(cfg)) 724 if (cfg_size < sizeof(cfg))
654 return -EIO; 725 goto errout;
655 726
727 error = -EFAULT;
656 if (copy_from_user(&cfg, ucfg, sizeof(cfg))) 728 if (copy_from_user(&cfg, ucfg, sizeof(cfg)))
657 return -EFAULT; 729 goto errout;
658 730
659 if ((int)cfg.size < 0) 731 error = -EINVAL;
660 return -EINVAL; 732 if ((cfg.flags & PTRACE_BTS_O_SIGNAL) &&
733 !(cfg.flags & PTRACE_BTS_O_ALLOC))
734 goto errout;
661 735
662 bts_size = 0; 736 if (cfg.flags & PTRACE_BTS_O_ALLOC) {
663 ds = (void *)child->thread.ds_area_msr; 737 ds_ovfl_callback_t ovfl = 0;
664 if (ds) { 738 unsigned int sig = 0;
665 bts_size = ds_get_bts_size(ds);
666 if (bts_size < 0)
667 return bts_size;
668 }
669 cfg.size = PAGE_ALIGN(cfg.size);
670 739
671 if (bts_size != cfg.size) { 740 /* we ignore the error in case we were not tracing child */
672 ret = ptrace_bts_realloc(child, cfg.size, 741 (void)ds_release_bts(child);
673 cfg.flags & PTRACE_BTS_O_CUT_SIZE); 742
674 if (ret < 0) 743 if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
744 if (!cfg.signal)
745 goto errout;
746
747 sig = cfg.signal;
748 ovfl = ptrace_bts_ovfl;
749 }
750
751 error = ds_request_bts(child, /* base = */ 0, cfg.size, ovfl);
752 if (error < 0)
675 goto errout; 753 goto errout;
676 754
677 ds = (void *)child->thread.ds_area_msr; 755 child->thread.bts_ovfl_signal = sig;
678 } 756 }
679 757
680 if (cfg.flags & PTRACE_BTS_O_SIGNAL) 758 error = -EINVAL;
681 ret = ds_set_overflow(ds, DS_O_SIGNAL); 759 if (!child->thread.ds_ctx && cfg.flags)
682 else
683 ret = ds_set_overflow(ds, DS_O_WRAP);
684 if (ret < 0)
685 goto errout; 760 goto errout;
686 761
687 if (cfg.flags & PTRACE_BTS_O_TRACE) 762 if (cfg.flags & PTRACE_BTS_O_TRACE)
688 child->thread.debugctlmsr |= ds_debugctl_mask(); 763 child->thread.debugctlmsr |= bts_cfg.debugctl_mask;
689 else 764 else
690 child->thread.debugctlmsr &= ~ds_debugctl_mask(); 765 child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
691 766
692 if (cfg.flags & PTRACE_BTS_O_SCHED) 767 if (cfg.flags & PTRACE_BTS_O_SCHED)
693 set_tsk_thread_flag(child, TIF_BTS_TRACE_TS); 768 set_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
694 else 769 else
695 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); 770 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
696 771
697 ret = sizeof(cfg); 772 error = sizeof(cfg);
698 773
699out: 774out:
700 if (child->thread.debugctlmsr) 775 if (child->thread.debugctlmsr)
@@ -702,10 +777,10 @@ out:
702 else 777 else
703 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); 778 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
704 779
705 return ret; 780 return error;
706 781
707errout: 782errout:
708 child->thread.debugctlmsr &= ~ds_debugctl_mask(); 783 child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
709 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); 784 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
710 goto out; 785 goto out;
711} 786}
@@ -714,29 +789,40 @@ static int ptrace_bts_status(struct task_struct *child,
714 long cfg_size, 789 long cfg_size,
715 struct ptrace_bts_config __user *ucfg) 790 struct ptrace_bts_config __user *ucfg)
716{ 791{
717 void *ds = (void *)child->thread.ds_area_msr;
718 struct ptrace_bts_config cfg; 792 struct ptrace_bts_config cfg;
793 size_t end;
794 const void *base, *max;
795 int error;
719 796
720 if (cfg_size < sizeof(cfg)) 797 if (cfg_size < sizeof(cfg))
721 return -EIO; 798 return -EIO;
722 799
723 memset(&cfg, 0, sizeof(cfg)); 800 error = ds_get_bts_end(child, &end);
801 if (error < 0)
802 return error;
724 803
725 if (ds) { 804 error = ds_access_bts(child, /* index = */ 0, &base);
726 cfg.size = ds_get_bts_size(ds); 805 if (error < 0)
806 return error;
727 807
728 if (ds_get_overflow(ds) == DS_O_SIGNAL) 808 error = ds_access_bts(child, /* index = */ end, &max);
729 cfg.flags |= PTRACE_BTS_O_SIGNAL; 809 if (error < 0)
810 return error;
730 811
731 if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) && 812 memset(&cfg, 0, sizeof(cfg));
732 child->thread.debugctlmsr & ds_debugctl_mask()) 813 cfg.size = (max - base);
733 cfg.flags |= PTRACE_BTS_O_TRACE; 814 cfg.signal = child->thread.bts_ovfl_signal;
815 cfg.bts_size = sizeof(struct bts_struct);
734 816
735 if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS)) 817 if (cfg.signal)
736 cfg.flags |= PTRACE_BTS_O_SCHED; 818 cfg.flags |= PTRACE_BTS_O_SIGNAL;
737 }
738 819
739 cfg.bts_size = sizeof(struct bts_struct); 820 if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) &&
821 child->thread.debugctlmsr & bts_cfg.debugctl_mask)
822 cfg.flags |= PTRACE_BTS_O_TRACE;
823
824 if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS))
825 cfg.flags |= PTRACE_BTS_O_SCHED;
740 826
741 if (copy_to_user(ucfg, &cfg, sizeof(cfg))) 827 if (copy_to_user(ucfg, &cfg, sizeof(cfg)))
742 return -EFAULT; 828 return -EFAULT;
@@ -744,89 +830,38 @@ static int ptrace_bts_status(struct task_struct *child,
744 return sizeof(cfg); 830 return sizeof(cfg);
745} 831}
746 832
747
748static int ptrace_bts_write_record(struct task_struct *child, 833static int ptrace_bts_write_record(struct task_struct *child,
749 const struct bts_struct *in) 834 const struct bts_struct *in)
750{ 835{
751 int retval; 836 unsigned char bts_record[BTS_MAX_RECORD_SIZE];
752 837
753 if (!child->thread.ds_area_msr) 838 BUG_ON(BTS_MAX_RECORD_SIZE < bts_cfg.sizeof_bts);
754 return -ENXIO;
755 839
756 retval = ds_write_bts((void *)child->thread.ds_area_msr, in); 840 memset(bts_record, 0, bts_cfg.sizeof_bts);
757 if (retval) 841 switch (in->qualifier) {
758 return retval; 842 case BTS_INVALID:
843 break;
759 844
760 return sizeof(*in); 845 case BTS_BRANCH:
761} 846 bts_set(bts_record, bts_from, in->variant.lbr.from_ip);
847 bts_set(bts_record, bts_to, in->variant.lbr.to_ip);
848 break;
762 849
763static int ptrace_bts_realloc(struct task_struct *child, 850 case BTS_TASK_ARRIVES:
764 int size, int reduce_size) 851 case BTS_TASK_DEPARTS:
765{ 852 bts_set(bts_record, bts_from, bts_escape);
766 unsigned long rlim, vm; 853 bts_set(bts_record, bts_qual, in->qualifier);
767 int ret, old_size; 854 bts_set(bts_record, bts_jiffies, in->variant.jiffies);
855 break;
768 856
769 if (size < 0) 857 default:
770 return -EINVAL; 858 return -EINVAL;
771
772 old_size = ds_get_bts_size((void *)child->thread.ds_area_msr);
773 if (old_size < 0)
774 return old_size;
775
776 ret = ds_free((void **)&child->thread.ds_area_msr);
777 if (ret < 0)
778 goto out;
779
780 size >>= PAGE_SHIFT;
781 old_size >>= PAGE_SHIFT;
782
783 current->mm->total_vm -= old_size;
784 current->mm->locked_vm -= old_size;
785
786 if (size == 0)
787 goto out;
788
789 rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
790 vm = current->mm->total_vm + size;
791 if (rlim < vm) {
792 ret = -ENOMEM;
793
794 if (!reduce_size)
795 goto out;
796
797 size = rlim - current->mm->total_vm;
798 if (size <= 0)
799 goto out;
800 } 859 }
801 860
802 rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; 861 /* The writing task will be the switched-to task on a context
803 vm = current->mm->locked_vm + size; 862 * switch. It needs to write into the switched-from task's BTS
804 if (rlim < vm) { 863 * buffer. */
805 ret = -ENOMEM; 864 return ds_unchecked_write_bts(child, bts_record, bts_cfg.sizeof_bts);
806
807 if (!reduce_size)
808 goto out;
809
810 size = rlim - current->mm->locked_vm;
811 if (size <= 0)
812 goto out;
813 }
814
815 ret = ds_allocate((void **)&child->thread.ds_area_msr,
816 size << PAGE_SHIFT);
817 if (ret < 0)
818 goto out;
819
820 current->mm->total_vm += size;
821 current->mm->locked_vm += size;
822
823out:
824 if (child->thread.ds_area_msr)
825 set_tsk_thread_flag(child, TIF_DS_AREA_MSR);
826 else
827 clear_tsk_thread_flag(child, TIF_DS_AREA_MSR);
828
829 return ret;
830} 865}
831 866
832void ptrace_bts_take_timestamp(struct task_struct *tsk, 867void ptrace_bts_take_timestamp(struct task_struct *tsk,
@@ -839,7 +874,66 @@ void ptrace_bts_take_timestamp(struct task_struct *tsk,
839 874
840 ptrace_bts_write_record(tsk, &rec); 875 ptrace_bts_write_record(tsk, &rec);
841} 876}
842#endif /* X86_BTS */ 877
878static const struct bts_configuration bts_cfg_netburst = {
879 .sizeof_bts = sizeof(long) * 3,
880 .sizeof_field = sizeof(long),
881 .debugctl_mask = (1<<2)|(1<<3)|(1<<5)
882};
883
884static const struct bts_configuration bts_cfg_pentium_m = {
885 .sizeof_bts = sizeof(long) * 3,
886 .sizeof_field = sizeof(long),
887 .debugctl_mask = (1<<6)|(1<<7)
888};
889
890static const struct bts_configuration bts_cfg_core2 = {
891 .sizeof_bts = 8 * 3,
892 .sizeof_field = 8,
893 .debugctl_mask = (1<<6)|(1<<7)|(1<<9)
894};
895
896static inline void bts_configure(const struct bts_configuration *cfg)
897{
898 bts_cfg = *cfg;
899}
900
901void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c)
902{
903 switch (c->x86) {
904 case 0x6:
905 switch (c->x86_model) {
906 case 0xD:
907 case 0xE: /* Pentium M */
908 bts_configure(&bts_cfg_pentium_m);
909 break;
910 case 0xF: /* Core2 */
911 case 0x1C: /* Atom */
912 bts_configure(&bts_cfg_core2);
913 break;
914 default:
915 /* sorry, don't know about them */
916 break;
917 }
918 break;
919 case 0xF:
920 switch (c->x86_model) {
921 case 0x0:
922 case 0x1:
923 case 0x2: /* Netburst */
924 bts_configure(&bts_cfg_netburst);
925 break;
926 default:
927 /* sorry, don't know about them */
928 break;
929 }
930 break;
931 default:
932 /* sorry, don't know about them */
933 break;
934 }
935}
936#endif /* CONFIG_X86_PTRACE_BTS */
843 937
844/* 938/*
845 * Called by kernel/ptrace.c when detaching.. 939 * Called by kernel/ptrace.c when detaching..
@@ -852,15 +946,15 @@ void ptrace_disable(struct task_struct *child)
852#ifdef TIF_SYSCALL_EMU 946#ifdef TIF_SYSCALL_EMU
853 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); 947 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
854#endif 948#endif
855 if (child->thread.ds_area_msr) { 949#ifdef CONFIG_X86_PTRACE_BTS
856#ifdef X86_BTS 950 (void)ds_release_bts(child);
857 ptrace_bts_realloc(child, 0, 0); 951
858#endif 952 child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
859 child->thread.debugctlmsr &= ~ds_debugctl_mask(); 953 if (!child->thread.debugctlmsr)
860 if (!child->thread.debugctlmsr) 954 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
861 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); 955
862 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); 956 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
863 } 957#endif /* CONFIG_X86_PTRACE_BTS */
864} 958}
865 959
866#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION 960#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
@@ -980,7 +1074,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
980 /* 1074 /*
981 * These bits need more cooking - not enabled yet: 1075 * These bits need more cooking - not enabled yet:
982 */ 1076 */
983#ifdef X86_BTS 1077#ifdef CONFIG_X86_PTRACE_BTS
984 case PTRACE_BTS_CONFIG: 1078 case PTRACE_BTS_CONFIG:
985 ret = ptrace_bts_config 1079 ret = ptrace_bts_config
986 (child, data, (struct ptrace_bts_config __user *)addr); 1080 (child, data, (struct ptrace_bts_config __user *)addr);
@@ -992,7 +1086,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
992 break; 1086 break;
993 1087
994 case PTRACE_BTS_SIZE: 1088 case PTRACE_BTS_SIZE:
995 ret = ptrace_bts_get_size(child); 1089 ret = ds_get_bts_index(child, /* pos = */ 0);
996 break; 1090 break;
997 1091
998 case PTRACE_BTS_GET: 1092 case PTRACE_BTS_GET:
@@ -1001,14 +1095,14 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
1001 break; 1095 break;
1002 1096
1003 case PTRACE_BTS_CLEAR: 1097 case PTRACE_BTS_CLEAR:
1004 ret = ptrace_bts_clear(child); 1098 ret = ds_clear_bts(child);
1005 break; 1099 break;
1006 1100
1007 case PTRACE_BTS_DRAIN: 1101 case PTRACE_BTS_DRAIN:
1008 ret = ptrace_bts_drain 1102 ret = ptrace_bts_drain
1009 (child, data, (struct bts_struct __user *) addr); 1103 (child, data, (struct bts_struct __user *) addr);
1010 break; 1104 break;
1011#endif 1105#endif /* CONFIG_X86_PTRACE_BTS */
1012 1106
1013 default: 1107 default:
1014 ret = ptrace_request(child, request, addr, data); 1108 ret = ptrace_request(child, request, addr, data);
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index f2fc8feb727d..f7aebe13c999 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -920,11 +920,12 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
920 set_cpu_cap(c, X86_FEATURE_BTS); 920 set_cpu_cap(c, X86_FEATURE_BTS);
921 if (!(l1 & (1<<12))) 921 if (!(l1 & (1<<12)))
922 set_cpu_cap(c, X86_FEATURE_PEBS); 922 set_cpu_cap(c, X86_FEATURE_PEBS);
923 ds_init_intel(c);
923 } 924 }
924 925
925 926
926 if (cpu_has_bts) 927 if (cpu_has_bts)
927 ds_init_intel(c); 928 ptrace_bts_init_intel(c);
928 929
929 n = c->extended_cpuid_level; 930 n = c->extended_cpuid_level;
930 if (n >= 0x80000008) { 931 if (n >= 0x80000008) {
diff --git a/include/asm-x86/ds.h b/include/asm-x86/ds.h
index 7881368142fa..72c5a190bf48 100644
--- a/include/asm-x86/ds.h
+++ b/include/asm-x86/ds.h
@@ -2,71 +2,237 @@
2 * Debug Store (DS) support 2 * Debug Store (DS) support
3 * 3 *
4 * This provides a low-level interface to the hardware's Debug Store 4 * This provides a low-level interface to the hardware's Debug Store
5 * feature that is used for last branch recording (LBR) and 5 * feature that is used for branch trace store (BTS) and
6 * precise-event based sampling (PEBS). 6 * precise-event based sampling (PEBS).
7 * 7 *
8 * Different architectures use a different DS layout/pointer size. 8 * It manages:
9 * The below functions therefore work on a void*. 9 * - per-thread and per-cpu allocation of BTS and PEBS
10 * - buffer memory allocation (optional)
11 * - buffer overflow handling
12 * - buffer access
10 * 13 *
14 * It assumes:
15 * - get_task_struct on all parameter tasks
16 * - current is allowed to trace parameter tasks
11 * 17 *
12 * Since there is no user for PEBS, yet, only LBR (or branch
13 * trace store, BTS) is supported.
14 * 18 *
15 * 19 * Copyright (C) 2007-2008 Intel Corporation.
16 * Copyright (C) 2007 Intel Corporation. 20 * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008
17 * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007
18 */ 21 */
19 22
20#ifndef _ASM_X86_DS_H 23#ifndef _ASM_X86_DS_H
21#define _ASM_X86_DS_H 24#define _ASM_X86_DS_H
22 25
26#ifdef CONFIG_X86_DS
27
23#include <linux/types.h> 28#include <linux/types.h>
24#include <linux/init.h> 29#include <linux/init.h>
25 30
26struct cpuinfo_x86;
27 31
32struct task_struct;
28 33
29/* a branch trace record entry 34/*
35 * Request BTS or PEBS
36 *
37 * Due to alignement constraints, the actual buffer may be slightly
38 * smaller than the requested or provided buffer.
30 * 39 *
31 * In order to unify the interface between various processor versions, 40 * Returns 0 on success; -Eerrno otherwise
32 * we use the below data structure for all processors. 41 *
42 * task: the task to request recording for;
43 * NULL for per-cpu recording on the current cpu
44 * base: the base pointer for the (non-pageable) buffer;
45 * NULL if buffer allocation requested
46 * size: the size of the requested or provided buffer
47 * ovfl: pointer to a function to be called on buffer overflow;
48 * NULL if cyclic buffer requested
33 */ 49 */
34enum bts_qualifier { 50typedef void (*ds_ovfl_callback_t)(struct task_struct *);
35 BTS_INVALID = 0, 51extern int ds_request_bts(struct task_struct *task, void *base, size_t size,
36 BTS_BRANCH, 52 ds_ovfl_callback_t ovfl);
37 BTS_TASK_ARRIVES, 53extern int ds_request_pebs(struct task_struct *task, void *base, size_t size,
38 BTS_TASK_DEPARTS 54 ds_ovfl_callback_t ovfl);
39}; 55
56/*
57 * Release BTS or PEBS resources
58 *
59 * Frees buffers allocated on ds_request.
60 *
61 * Returns 0 on success; -Eerrno otherwise
62 *
63 * task: the task to release resources for;
64 * NULL to release resources for the current cpu
65 */
66extern int ds_release_bts(struct task_struct *task);
67extern int ds_release_pebs(struct task_struct *task);
68
69/*
70 * Return the (array) index of the write pointer.
71 * (assuming an array of BTS/PEBS records)
72 *
73 * Returns -Eerrno on error
74 *
75 * task: the task to access;
76 * NULL to access the current cpu
77 * pos (out): if not NULL, will hold the result
78 */
79extern int ds_get_bts_index(struct task_struct *task, size_t *pos);
80extern int ds_get_pebs_index(struct task_struct *task, size_t *pos);
81
82/*
83 * Return the (array) index one record beyond the end of the array.
84 * (assuming an array of BTS/PEBS records)
85 *
86 * Returns -Eerrno on error
87 *
88 * task: the task to access;
89 * NULL to access the current cpu
90 * pos (out): if not NULL, will hold the result
91 */
92extern int ds_get_bts_end(struct task_struct *task, size_t *pos);
93extern int ds_get_pebs_end(struct task_struct *task, size_t *pos);
94
95/*
96 * Provide a pointer to the BTS/PEBS record at parameter index.
97 * (assuming an array of BTS/PEBS records)
98 *
99 * The pointer points directly into the buffer. The user is
100 * responsible for copying the record.
101 *
102 * Returns the size of a single record on success; -Eerrno on error
103 *
104 * task: the task to access;
105 * NULL to access the current cpu
106 * index: the index of the requested record
107 * record (out): pointer to the requested record
108 */
109extern int ds_access_bts(struct task_struct *task,
110 size_t index, const void **record);
111extern int ds_access_pebs(struct task_struct *task,
112 size_t index, const void **record);
113
114/*
115 * Write one or more BTS/PEBS records at the write pointer index and
116 * advance the write pointer.
117 *
118 * If size is not a multiple of the record size, trailing bytes are
119 * zeroed out.
120 *
121 * May result in one or more overflow notifications.
122 *
123 * If called during overflow handling, that is, with index >=
124 * interrupt threshold, the write will wrap around.
125 *
126 * An overflow notification is given if and when the interrupt
127 * threshold is reached during or after the write.
128 *
129 * Returns the number of bytes written or -Eerrno.
130 *
131 * task: the task to access;
132 * NULL to access the current cpu
133 * buffer: the buffer to write
134 * size: the size of the buffer
135 */
136extern int ds_write_bts(struct task_struct *task,
137 const void *buffer, size_t size);
138extern int ds_write_pebs(struct task_struct *task,
139 const void *buffer, size_t size);
140
141/*
142 * Same as ds_write_bts/pebs, but omit ownership checks.
143 *
144 * This is needed to have some other task than the owner of the
145 * BTS/PEBS buffer or the parameter task itself write into the
146 * respective buffer.
147 */
148extern int ds_unchecked_write_bts(struct task_struct *task,
149 const void *buffer, size_t size);
150extern int ds_unchecked_write_pebs(struct task_struct *task,
151 const void *buffer, size_t size);
152
153/*
154 * Reset the write pointer of the BTS/PEBS buffer.
155 *
156 * Returns 0 on success; -Eerrno on error
157 *
158 * task: the task to access;
159 * NULL to access the current cpu
160 */
161extern int ds_reset_bts(struct task_struct *task);
162extern int ds_reset_pebs(struct task_struct *task);
163
164/*
165 * Clear the BTS/PEBS buffer and reset the write pointer.
166 * The entire buffer will be zeroed out.
167 *
168 * Returns 0 on success; -Eerrno on error
169 *
170 * task: the task to access;
171 * NULL to access the current cpu
172 */
173extern int ds_clear_bts(struct task_struct *task);
174extern int ds_clear_pebs(struct task_struct *task);
175
176/*
177 * Provide the PEBS counter reset value.
178 *
179 * Returns 0 on success; -Eerrno on error
180 *
181 * task: the task to access;
182 * NULL to access the current cpu
183 * value (out): the counter reset value
184 */
185extern int ds_get_pebs_reset(struct task_struct *task, u64 *value);
186
187/*
188 * Set the PEBS counter reset value.
189 *
190 * Returns 0 on success; -Eerrno on error
191 *
192 * task: the task to access;
193 * NULL to access the current cpu
194 * value: the new counter reset value
195 */
196extern int ds_set_pebs_reset(struct task_struct *task, u64 value);
197
198/*
199 * Initialization
200 */
201struct cpuinfo_x86;
202extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *);
203
204
40 205
41struct bts_struct { 206/*
42 u64 qualifier; 207 * The DS context - part of struct thread_struct.
43 union { 208 */
44 /* BTS_BRANCH */ 209struct ds_context {
45 struct { 210 /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */
46 u64 from_ip; 211 unsigned char *ds;
47 u64 to_ip; 212 /* the owner of the BTS and PEBS configuration, respectively */
48 } lbr; 213 struct task_struct *owner[2];
49 /* BTS_TASK_ARRIVES or 214 /* buffer overflow notification function for BTS and PEBS */
50 BTS_TASK_DEPARTS */ 215 ds_ovfl_callback_t callback[2];
51 u64 jiffies; 216 /* the original buffer address */
52 } variant; 217 void *buffer[2];
218 /* the number of allocated pages for on-request allocated buffers */
219 unsigned int pages[2];
220 /* use count */
221 unsigned long count;
222 /* a pointer to the context location inside the thread_struct
223 * or the per_cpu context array */
224 struct ds_context **this;
225 /* a pointer to the task owning this context, or NULL, if the
226 * context is owned by a cpu */
227 struct task_struct *task;
53}; 228};
54 229
55/* Overflow handling mechanisms */ 230/* called by exit_thread() to free leftover contexts */
56#define DS_O_SIGNAL 1 /* send overflow signal */ 231extern void ds_free(struct ds_context *context);
57#define DS_O_WRAP 2 /* wrap around */ 232
58 233#else /* CONFIG_X86_DS */
59extern int ds_allocate(void **, size_t); 234
60extern int ds_free(void **); 235#define ds_init_intel(config) do {} while (0)
61extern int ds_get_bts_size(void *);
62extern int ds_get_bts_end(void *);
63extern int ds_get_bts_index(void *);
64extern int ds_set_overflow(void *, int);
65extern int ds_get_overflow(void *);
66extern int ds_clear(void *);
67extern int ds_read_bts(void *, int, struct bts_struct *);
68extern int ds_write_bts(void *, const struct bts_struct *);
69extern unsigned long ds_debugctl_mask(void);
70extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *c);
71 236
237#endif /* CONFIG_X86_DS */
72#endif /* _ASM_X86_DS_H */ 238#endif /* _ASM_X86_DS_H */
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index 559105220a47..beaccb71628f 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -20,6 +20,7 @@ struct mm_struct;
20#include <asm/msr.h> 20#include <asm/msr.h>
21#include <asm/desc_defs.h> 21#include <asm/desc_defs.h>
22#include <asm/nops.h> 22#include <asm/nops.h>
23#include <asm/ds.h>
23 24
24#include <linux/personality.h> 25#include <linux/personality.h>
25#include <linux/cpumask.h> 26#include <linux/cpumask.h>
@@ -415,9 +416,14 @@ struct thread_struct {
415 unsigned io_bitmap_max; 416 unsigned io_bitmap_max;
416/* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */ 417/* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */
417 unsigned long debugctlmsr; 418 unsigned long debugctlmsr;
418/* Debug Store - if not 0 points to a DS Save Area configuration; 419#ifdef CONFIG_X86_DS
419 * goes into MSR_IA32_DS_AREA */ 420/* Debug Store context; see include/asm-x86/ds.h; goes into MSR_IA32_DS_AREA */
420 unsigned long ds_area_msr; 421 struct ds_context *ds_ctx;
422#endif /* CONFIG_X86_DS */
423#ifdef CONFIG_X86_PTRACE_BTS
424/* the signal to send on a bts buffer overflow */
425 unsigned int bts_ovfl_signal;
426#endif /* CONFIG_X86_PTRACE_BTS */
421}; 427};
422 428
423static inline unsigned long native_get_debugreg(int regno) 429static inline unsigned long native_get_debugreg(int regno)
diff --git a/include/asm-x86/ptrace-abi.h b/include/asm-x86/ptrace-abi.h
index f224eb3c3157..9bcaa75cbcaf 100644
--- a/include/asm-x86/ptrace-abi.h
+++ b/include/asm-x86/ptrace-abi.h
@@ -80,8 +80,9 @@
80 80
81#define PTRACE_SINGLEBLOCK 33 /* resume execution until next branch */ 81#define PTRACE_SINGLEBLOCK 33 /* resume execution until next branch */
82 82
83#ifndef __ASSEMBLY__ 83#ifdef CONFIG_X86_PTRACE_BTS
84 84
85#ifndef __ASSEMBLY__
85#include <asm/types.h> 86#include <asm/types.h>
86 87
87/* configuration/status structure used in PTRACE_BTS_CONFIG and 88/* configuration/status structure used in PTRACE_BTS_CONFIG and
@@ -97,20 +98,20 @@ struct ptrace_bts_config {
97 /* actual size of bts_struct in bytes */ 98 /* actual size of bts_struct in bytes */
98 __u32 bts_size; 99 __u32 bts_size;
99}; 100};
100#endif 101#endif /* __ASSEMBLY__ */
101 102
102#define PTRACE_BTS_O_TRACE 0x1 /* branch trace */ 103#define PTRACE_BTS_O_TRACE 0x1 /* branch trace */
103#define PTRACE_BTS_O_SCHED 0x2 /* scheduling events w/ jiffies */ 104#define PTRACE_BTS_O_SCHED 0x2 /* scheduling events w/ jiffies */
104#define PTRACE_BTS_O_SIGNAL 0x4 /* send SIG<signal> on buffer overflow 105#define PTRACE_BTS_O_SIGNAL 0x4 /* send SIG<signal> on buffer overflow
105 instead of wrapping around */ 106 instead of wrapping around */
106#define PTRACE_BTS_O_CUT_SIZE 0x8 /* cut requested size to max available 107#define PTRACE_BTS_O_ALLOC 0x8 /* (re)allocate buffer */
107 instead of failing */
108 108
109#define PTRACE_BTS_CONFIG 40 109#define PTRACE_BTS_CONFIG 40
110/* Configure branch trace recording. 110/* Configure branch trace recording.
111 ADDR points to a struct ptrace_bts_config. 111 ADDR points to a struct ptrace_bts_config.
112 DATA gives the size of that buffer. 112 DATA gives the size of that buffer.
113 A new buffer is allocated, iff the size changes. 113 A new buffer is allocated, if requested in the flags.
114 An overflow signal may only be requested for new buffers.
114 Returns the number of bytes read. 115 Returns the number of bytes read.
115*/ 116*/
116#define PTRACE_BTS_STATUS 41 117#define PTRACE_BTS_STATUS 41
@@ -119,7 +120,7 @@ struct ptrace_bts_config {
119 Returns the number of bytes written. 120 Returns the number of bytes written.
120*/ 121*/
121#define PTRACE_BTS_SIZE 42 122#define PTRACE_BTS_SIZE 42
122/* Return the number of available BTS records. 123/* Return the number of available BTS records for draining.
123 DATA and ADDR are ignored. 124 DATA and ADDR are ignored.
124*/ 125*/
125#define PTRACE_BTS_GET 43 126#define PTRACE_BTS_GET 43
@@ -139,5 +140,6 @@ struct ptrace_bts_config {
139 BTS records are read from oldest to newest. 140 BTS records are read from oldest to newest.
140 Returns number of BTS records drained. 141 Returns number of BTS records drained.
141*/ 142*/
143#endif /* CONFIG_X86_PTRACE_BTS */
142 144
143#endif 145#endif
diff --git a/include/asm-x86/ptrace.h b/include/asm-x86/ptrace.h
index 9f922b0b95d6..6303701d18e3 100644
--- a/include/asm-x86/ptrace.h
+++ b/include/asm-x86/ptrace.h
@@ -125,14 +125,48 @@ struct pt_regs {
125#endif /* __KERNEL__ */ 125#endif /* __KERNEL__ */
126#endif /* !__i386__ */ 126#endif /* !__i386__ */
127 127
128
129#ifdef CONFIG_X86_PTRACE_BTS
130/* a branch trace record entry
131 *
132 * In order to unify the interface between various processor versions,
133 * we use the below data structure for all processors.
134 */
135enum bts_qualifier {
136 BTS_INVALID = 0,
137 BTS_BRANCH,
138 BTS_TASK_ARRIVES,
139 BTS_TASK_DEPARTS
140};
141
142struct bts_struct {
143 __u64 qualifier;
144 union {
145 /* BTS_BRANCH */
146 struct {
147 __u64 from_ip;
148 __u64 to_ip;
149 } lbr;
150 /* BTS_TASK_ARRIVES or
151 BTS_TASK_DEPARTS */
152 __u64 jiffies;
153 } variant;
154};
155#endif /* CONFIG_X86_PTRACE_BTS */
156
128#ifdef __KERNEL__ 157#ifdef __KERNEL__
129 158
130/* the DS BTS struct is used for ptrace as well */ 159#include <linux/init.h>
131#include <asm/ds.h>
132 160
161struct cpuinfo_x86;
133struct task_struct; 162struct task_struct;
134 163
164#ifdef CONFIG_X86_PTRACE_BTS
165extern void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *);
135extern void ptrace_bts_take_timestamp(struct task_struct *, enum bts_qualifier); 166extern void ptrace_bts_take_timestamp(struct task_struct *, enum bts_qualifier);
167#else
168#define ptrace_bts_init_intel(config) do {} while (0)
169#endif /* CONFIG_X86_PTRACE_BTS */
136 170
137extern unsigned long profile_pc(struct pt_regs *regs); 171extern unsigned long profile_pc(struct pt_regs *regs);
138 172