aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/Kconfig.cpu18
-rw-r--r--arch/x86/kernel/cpu/intel.c3
-rw-r--r--arch/x86/kernel/ds.c954
-rw-r--r--arch/x86/kernel/process_32.c50
-rw-r--r--arch/x86/kernel/process_64.c38
-rw-r--r--arch/x86/kernel/ptrace.c444
-rw-r--r--include/asm-x86/ds.h258
-rw-r--r--include/asm-x86/processor.h12
-rw-r--r--include/asm-x86/ptrace-abi.h14
-rw-r--r--include/asm-x86/ptrace.h38
10 files changed, 1301 insertions, 528 deletions
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index ab77d409fee0..1b29d6a87563 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -477,3 +477,21 @@ config CPU_SUP_UMC_32
477 depends on !64BIT 477 depends on !64BIT
478 help 478 help
479 This enables extended support for UMC processors 479 This enables extended support for UMC processors
480
481config X86_DS
482 bool "Debug Store support"
483 default y
484 help
485 Add support for Debug Store.
486 This allows the kernel to provide a memory buffer to the hardware
487 to store various profiling and tracing events.
488
489config X86_PTRACE_BTS
490 bool "ptrace interface to Branch Trace Store"
491 default y
492 depends on (X86_DS && X86_DEBUGCTLMSR)
493 help
494 Add a ptrace interface to allow collecting an execution trace
495 of the traced task.
496 This collects control flow changes in a (cyclic) buffer and allows
497 debuggers to fill in the gaps and show an execution trace of the debuggee.
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 4a8ac9d4ff50..a66989586a84 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -221,10 +221,11 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
221 set_cpu_cap(c, X86_FEATURE_BTS); 221 set_cpu_cap(c, X86_FEATURE_BTS);
222 if (!(l1 & (1<<12))) 222 if (!(l1 & (1<<12)))
223 set_cpu_cap(c, X86_FEATURE_PEBS); 223 set_cpu_cap(c, X86_FEATURE_PEBS);
224 ds_init_intel(c);
224 } 225 }
225 226
226 if (cpu_has_bts) 227 if (cpu_has_bts)
227 ds_init_intel(c); 228 ptrace_bts_init_intel(c);
228 229
229 /* 230 /*
230 * See if we have a good local APIC by checking for buggy Pentia, 231 * See if we have a good local APIC by checking for buggy Pentia,
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 11c11b8ec48d..ab21c270bfa4 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -2,26 +2,49 @@
2 * Debug Store support 2 * Debug Store support
3 * 3 *
4 * This provides a low-level interface to the hardware's Debug Store 4 * This provides a low-level interface to the hardware's Debug Store
5 * feature that is used for last branch recording (LBR) and 5 * feature that is used for branch trace store (BTS) and
6 * precise-event based sampling (PEBS). 6 * precise-event based sampling (PEBS).
7 * 7 *
8 * Different architectures use a different DS layout/pointer size. 8 * It manages:
9 * The below functions therefore work on a void*. 9 * - per-thread and per-cpu allocation of BTS and PEBS
10 * - buffer memory allocation (optional)
11 * - buffer overflow handling
12 * - buffer access
10 * 13 *
14 * It assumes:
15 * - get_task_struct on all parameter tasks
16 * - current is allowed to trace parameter tasks
11 * 17 *
12 * Since there is no user for PEBS, yet, only LBR (or branch
13 * trace store, BTS) is supported.
14 * 18 *
15 * 19 * Copyright (C) 2007-2008 Intel Corporation.
16 * Copyright (C) 2007 Intel Corporation. 20 * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008
17 * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007
18 */ 21 */
19 22
23
24#ifdef CONFIG_X86_DS
25
20#include <asm/ds.h> 26#include <asm/ds.h>
21 27
22#include <linux/errno.h> 28#include <linux/errno.h>
23#include <linux/string.h> 29#include <linux/string.h>
24#include <linux/slab.h> 30#include <linux/slab.h>
31#include <linux/sched.h>
32#include <linux/mm.h>
33
34
35/*
36 * The configuration for a particular DS hardware implementation.
37 */
38struct ds_configuration {
39 /* the size of the DS structure in bytes */
40 unsigned char sizeof_ds;
41 /* the size of one pointer-typed field in the DS structure in bytes;
42 this covers the first 8 fields related to buffer management. */
43 unsigned char sizeof_field;
44 /* the size of a BTS/PEBS record in bytes */
45 unsigned char sizeof_rec[2];
46};
47static struct ds_configuration ds_cfg;
25 48
26 49
27/* 50/*
@@ -44,378 +67,747 @@
44 * (interrupt occurs when write pointer passes interrupt pointer) 67 * (interrupt occurs when write pointer passes interrupt pointer)
45 * - value to which counter is reset following counter overflow 68 * - value to which counter is reset following counter overflow
46 * 69 *
47 * On later architectures, the last branch recording hardware uses 70 * Later architectures use 64bit pointers throughout, whereas earlier
48 * 64bit pointers even in 32bit mode. 71 * architectures use 32bit pointers in 32bit mode.
49 *
50 *
51 * Branch Trace Store (BTS) records store information about control
52 * flow changes. They at least provide the following information:
53 * - source linear address
54 * - destination linear address
55 * 72 *
56 * Netburst supported a predicated bit that had been dropped in later
57 * architectures. We do not suppor it.
58 * 73 *
74 * We compute the base address for the first 8 fields based on:
75 * - the field size stored in the DS configuration
76 * - the relative field position
77 * - an offset giving the start of the respective region
59 * 78 *
60 * In order to abstract from the actual DS and BTS layout, we describe 79 * This offset is further used to index various arrays holding
61 * the access to the relevant fields. 80 * information for BTS and PEBS at the respective index.
62 * Thanks to Andi Kleen for proposing this design.
63 * 81 *
64 * The implementation, however, is not as general as it might seem. In 82 * On later 32bit processors, we only access the lower 32bit of the
65 * order to stay somewhat simple and efficient, we assume an 83 * 64bit pointer fields. The upper halves will be zeroed out.
66 * underlying unsigned type (mostly a pointer type) and we expect the
67 * field to be at least as big as that type.
68 */ 84 */
69 85
70/* 86enum ds_field {
71 * A special from_ip address to indicate that the BTS record is an 87 ds_buffer_base = 0,
72 * info record that needs to be interpreted or skipped. 88 ds_index,
73 */ 89 ds_absolute_maximum,
74#define BTS_ESCAPE_ADDRESS (-1) 90 ds_interrupt_threshold,
91};
75 92
76/* 93enum ds_qualifier {
77 * A field access descriptor 94 ds_bts = 0,
78 */ 95 ds_pebs
79struct access_desc {
80 unsigned char offset;
81 unsigned char size;
82}; 96};
83 97
98static inline unsigned long ds_get(const unsigned char *base,
99 enum ds_qualifier qual, enum ds_field field)
100{
101 base += (ds_cfg.sizeof_field * (field + (4 * qual)));
102 return *(unsigned long *)base;
103}
104
105static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
106 enum ds_field field, unsigned long value)
107{
108 base += (ds_cfg.sizeof_field * (field + (4 * qual)));
109 (*(unsigned long *)base) = value;
110}
111
112
84/* 113/*
85 * The configuration for a particular DS/BTS hardware implementation. 114 * Locking is done only for allocating BTS or PEBS resources and for
115 * guarding context and buffer memory allocation.
116 *
117 * Most functions require the current task to own the ds context part
118 * they are going to access. All the locking is done when validating
119 * access to the context.
86 */ 120 */
87struct ds_configuration { 121static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock);
88 /* the DS configuration */
89 unsigned char sizeof_ds;
90 struct access_desc bts_buffer_base;
91 struct access_desc bts_index;
92 struct access_desc bts_absolute_maximum;
93 struct access_desc bts_interrupt_threshold;
94 /* the BTS configuration */
95 unsigned char sizeof_bts;
96 struct access_desc from_ip;
97 struct access_desc to_ip;
98 /* BTS variants used to store additional information like
99 timestamps */
100 struct access_desc info_type;
101 struct access_desc info_data;
102 unsigned long debugctl_mask;
103};
104 122
105/* 123/*
106 * The global configuration used by the below accessor functions 124 * Validate that the current task is allowed to access the BTS/PEBS
125 * buffer of the parameter task.
126 *
127 * Returns 0, if access is granted; -Eerrno, otherwise.
107 */ 128 */
108static struct ds_configuration ds_cfg; 129static inline int ds_validate_access(struct ds_context *context,
130 enum ds_qualifier qual)
131{
132 if (!context)
133 return -EPERM;
134
135 if (context->owner[qual] == current)
136 return 0;
137
138 return -EPERM;
139}
140
109 141
110/* 142/*
111 * Accessor functions for some DS and BTS fields using the above 143 * We either support (system-wide) per-cpu or per-thread allocation.
112 * global ptrace_bts_cfg. 144 * We distinguish the two based on the task_struct pointer, where a
145 * NULL pointer indicates per-cpu allocation for the current cpu.
146 *
147 * Allocations are use-counted. As soon as resources are allocated,
148 * further allocations must be of the same type (per-cpu or
149 * per-thread). We model this by counting allocations (i.e. the number
150 * of tracers of a certain type) for one type negatively:
151 * =0 no tracers
152 * >0 number of per-thread tracers
153 * <0 number of per-cpu tracers
154 *
155 * The below functions to get and put tracers and to check the
156 * allocation type require the ds_lock to be held by the caller.
157 *
158 * Tracers essentially gives the number of ds contexts for a certain
159 * type of allocation.
113 */ 160 */
114static inline unsigned long get_bts_buffer_base(char *base) 161static long tracers;
162
163static inline void get_tracer(struct task_struct *task)
115{ 164{
116 return *(unsigned long *)(base + ds_cfg.bts_buffer_base.offset); 165 tracers += (task ? 1 : -1);
117} 166}
118static inline void set_bts_buffer_base(char *base, unsigned long value) 167
168static inline void put_tracer(struct task_struct *task)
119{ 169{
120 (*(unsigned long *)(base + ds_cfg.bts_buffer_base.offset)) = value; 170 tracers -= (task ? 1 : -1);
121} 171}
122static inline unsigned long get_bts_index(char *base) 172
173static inline int check_tracer(struct task_struct *task)
123{ 174{
124 return *(unsigned long *)(base + ds_cfg.bts_index.offset); 175 return (task ? (tracers >= 0) : (tracers <= 0));
125} 176}
126static inline void set_bts_index(char *base, unsigned long value) 177
178
179/*
180 * The DS context is either attached to a thread or to a cpu:
181 * - in the former case, the thread_struct contains a pointer to the
182 * attached context.
183 * - in the latter case, we use a static array of per-cpu context
184 * pointers.
185 *
186 * Contexts are use-counted. They are allocated on first access and
187 * deallocated when the last user puts the context.
188 *
189 * We distinguish between an allocating and a non-allocating get of a
190 * context:
191 * - the allocating get is used for requesting BTS/PEBS resources. It
192 * requires the caller to hold the global ds_lock.
193 * - the non-allocating get is used for all other cases. A
194 * non-existing context indicates an error. It acquires and releases
195 * the ds_lock itself for obtaining the context.
196 *
197 * A context and its DS configuration are allocated and deallocated
198 * together. A context always has a DS configuration of the
199 * appropriate size.
200 */
201static DEFINE_PER_CPU(struct ds_context *, system_context);
202
203#define this_system_context per_cpu(system_context, smp_processor_id())
204
205/*
206 * Returns the pointer to the parameter task's context or to the
207 * system-wide context, if task is NULL.
208 *
209 * Increases the use count of the returned context, if not NULL.
210 */
211static inline struct ds_context *ds_get_context(struct task_struct *task)
127{ 212{
128 (*(unsigned long *)(base + ds_cfg.bts_index.offset)) = value; 213 struct ds_context *context;
214
215 spin_lock(&ds_lock);
216
217 context = (task ? task->thread.ds_ctx : this_system_context);
218 if (context)
219 context->count++;
220
221 spin_unlock(&ds_lock);
222
223 return context;
129} 224}
130static inline unsigned long get_bts_absolute_maximum(char *base) 225
226/*
227 * Same as ds_get_context, but allocates the context and it's DS
228 * structure, if necessary; returns NULL; if out of memory.
229 *
230 * pre: requires ds_lock to be held
231 */
232static inline struct ds_context *ds_alloc_context(struct task_struct *task)
131{ 233{
132 return *(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset); 234 struct ds_context **p_context =
235 (task ? &task->thread.ds_ctx : &this_system_context);
236 struct ds_context *context = *p_context;
237
238 if (!context) {
239 context = kzalloc(sizeof(*context), GFP_KERNEL);
240
241 if (!context)
242 return NULL;
243
244 context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL);
245 if (!context->ds) {
246 kfree(context);
247 return NULL;
248 }
249
250 *p_context = context;
251
252 context->this = p_context;
253 context->task = task;
254
255 if (task)
256 set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
257
258 if (!task || (task == current))
259 wrmsr(MSR_IA32_DS_AREA, (unsigned long)context->ds, 0);
260
261 get_tracer(task);
262 }
263
264 context->count++;
265
266 return context;
133} 267}
134static inline void set_bts_absolute_maximum(char *base, unsigned long value) 268
269/*
270 * Decreases the use count of the parameter context, if not NULL.
271 * Deallocates the context, if the use count reaches zero.
272 */
273static inline void ds_put_context(struct ds_context *context)
135{ 274{
136 (*(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset)) = value; 275 if (!context)
276 return;
277
278 spin_lock(&ds_lock);
279
280 if (--context->count)
281 goto out;
282
283 *(context->this) = NULL;
284
285 if (context->task)
286 clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
287
288 if (!context->task || (context->task == current))
289 wrmsrl(MSR_IA32_DS_AREA, 0);
290
291 put_tracer(context->task);
292
293 /* free any leftover buffers from tracers that did not
294 * deallocate them properly. */
295 kfree(context->buffer[ds_bts]);
296 kfree(context->buffer[ds_pebs]);
297 kfree(context->ds);
298 kfree(context);
299 out:
300 spin_unlock(&ds_lock);
137} 301}
138static inline unsigned long get_bts_interrupt_threshold(char *base) 302
303
304/*
305 * Handle a buffer overflow
306 *
307 * task: the task whose buffers are overflowing;
308 * NULL for a buffer overflow on the current cpu
309 * context: the ds context
310 * qual: the buffer type
311 */
312static void ds_overflow(struct task_struct *task, struct ds_context *context,
313 enum ds_qualifier qual)
139{ 314{
140 return *(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset); 315 if (!context)
316 return;
317
318 if (context->callback[qual])
319 (*context->callback[qual])(task);
320
321 /* todo: do some more overflow handling */
141} 322}
142static inline void set_bts_interrupt_threshold(char *base, unsigned long value) 323
324
325/*
326 * Allocate a non-pageable buffer of the parameter size.
327 * Checks the memory and the locked memory rlimit.
328 *
329 * Returns the buffer, if successful;
330 * NULL, if out of memory or rlimit exceeded.
331 *
332 * size: the requested buffer size in bytes
333 * pages (out): if not NULL, contains the number of pages reserved
334 */
335static inline void *ds_allocate_buffer(size_t size, unsigned int *pages)
143{ 336{
144 (*(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset)) = value; 337 unsigned long rlim, vm, pgsz;
338 void *buffer;
339
340 pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
341
342 rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
343 vm = current->mm->total_vm + pgsz;
344 if (rlim < vm)
345 return NULL;
346
347 rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
348 vm = current->mm->locked_vm + pgsz;
349 if (rlim < vm)
350 return NULL;
351
352 buffer = kzalloc(size, GFP_KERNEL);
353 if (!buffer)
354 return NULL;
355
356 current->mm->total_vm += pgsz;
357 current->mm->locked_vm += pgsz;
358
359 if (pages)
360 *pages = pgsz;
361
362 return buffer;
145} 363}
146static inline unsigned long get_from_ip(char *base) 364
365static int ds_request(struct task_struct *task, void *base, size_t size,
366 ds_ovfl_callback_t ovfl, enum ds_qualifier qual)
147{ 367{
148 return *(unsigned long *)(base + ds_cfg.from_ip.offset); 368 struct ds_context *context;
369 unsigned long buffer, adj;
370 const unsigned long alignment = (1 << 3);
371 int error = 0;
372
373 if (!ds_cfg.sizeof_ds)
374 return -EOPNOTSUPP;
375
376 /* we require some space to do alignment adjustments below */
377 if (size < (alignment + ds_cfg.sizeof_rec[qual]))
378 return -EINVAL;
379
380 /* buffer overflow notification is not yet implemented */
381 if (ovfl)
382 return -EOPNOTSUPP;
383
384
385 spin_lock(&ds_lock);
386
387 if (!check_tracer(task))
388 return -EPERM;
389
390 error = -ENOMEM;
391 context = ds_alloc_context(task);
392 if (!context)
393 goto out_unlock;
394
395 error = -EALREADY;
396 if (context->owner[qual] == current)
397 goto out_unlock;
398 error = -EPERM;
399 if (context->owner[qual] != NULL)
400 goto out_unlock;
401 context->owner[qual] = current;
402
403 spin_unlock(&ds_lock);
404
405
406 error = -ENOMEM;
407 if (!base) {
408 base = ds_allocate_buffer(size, &context->pages[qual]);
409 if (!base)
410 goto out_release;
411
412 context->buffer[qual] = base;
413 }
414 error = 0;
415
416 context->callback[qual] = ovfl;
417
418 /* adjust the buffer address and size to meet alignment
419 * constraints:
420 * - buffer is double-word aligned
421 * - size is multiple of record size
422 *
423 * We checked the size at the very beginning; we have enough
424 * space to do the adjustment.
425 */
426 buffer = (unsigned long)base;
427
428 adj = ALIGN(buffer, alignment) - buffer;
429 buffer += adj;
430 size -= adj;
431
432 size /= ds_cfg.sizeof_rec[qual];
433 size *= ds_cfg.sizeof_rec[qual];
434
435 ds_set(context->ds, qual, ds_buffer_base, buffer);
436 ds_set(context->ds, qual, ds_index, buffer);
437 ds_set(context->ds, qual, ds_absolute_maximum, buffer + size);
438
439 if (ovfl) {
440 /* todo: select a suitable interrupt threshold */
441 } else
442 ds_set(context->ds, qual,
443 ds_interrupt_threshold, buffer + size + 1);
444
445 /* we keep the context until ds_release */
446 return error;
447
448 out_release:
449 context->owner[qual] = NULL;
450 ds_put_context(context);
451 return error;
452
453 out_unlock:
454 spin_unlock(&ds_lock);
455 ds_put_context(context);
456 return error;
149} 457}
150static inline void set_from_ip(char *base, unsigned long value) 458
459int ds_request_bts(struct task_struct *task, void *base, size_t size,
460 ds_ovfl_callback_t ovfl)
151{ 461{
152 (*(unsigned long *)(base + ds_cfg.from_ip.offset)) = value; 462 return ds_request(task, base, size, ovfl, ds_bts);
153} 463}
154static inline unsigned long get_to_ip(char *base) 464
465int ds_request_pebs(struct task_struct *task, void *base, size_t size,
466 ds_ovfl_callback_t ovfl)
155{ 467{
156 return *(unsigned long *)(base + ds_cfg.to_ip.offset); 468 return ds_request(task, base, size, ovfl, ds_pebs);
157} 469}
158static inline void set_to_ip(char *base, unsigned long value) 470
471static int ds_release(struct task_struct *task, enum ds_qualifier qual)
159{ 472{
160 (*(unsigned long *)(base + ds_cfg.to_ip.offset)) = value; 473 struct ds_context *context;
474 int error;
475
476 context = ds_get_context(task);
477 error = ds_validate_access(context, qual);
478 if (error < 0)
479 goto out;
480
481 kfree(context->buffer[qual]);
482 context->buffer[qual] = 0;
483
484 current->mm->total_vm -= context->pages[qual];
485 current->mm->locked_vm -= context->pages[qual];
486 context->pages[qual] = 0;
487 context->owner[qual] = 0;
488
489 /*
490 * we put the context twice:
491 * once for the ds_get_context
492 * once for the corresponding ds_request
493 */
494 ds_put_context(context);
495 out:
496 ds_put_context(context);
497 return error;
161} 498}
162static inline unsigned char get_info_type(char *base) 499
500int ds_release_bts(struct task_struct *task)
163{ 501{
164 return *(unsigned char *)(base + ds_cfg.info_type.offset); 502 return ds_release(task, ds_bts);
165} 503}
166static inline void set_info_type(char *base, unsigned char value) 504
505int ds_release_pebs(struct task_struct *task)
167{ 506{
168 (*(unsigned char *)(base + ds_cfg.info_type.offset)) = value; 507 return ds_release(task, ds_pebs);
169} 508}
170static inline unsigned long get_info_data(char *base) 509
510static int ds_get_index(struct task_struct *task, size_t *pos,
511 enum ds_qualifier qual)
171{ 512{
172 return *(unsigned long *)(base + ds_cfg.info_data.offset); 513 struct ds_context *context;
514 unsigned long base, index;
515 int error;
516
517 context = ds_get_context(task);
518 error = ds_validate_access(context, qual);
519 if (error < 0)
520 goto out;
521
522 base = ds_get(context->ds, qual, ds_buffer_base);
523 index = ds_get(context->ds, qual, ds_index);
524
525 error = ((index - base) / ds_cfg.sizeof_rec[qual]);
526 if (pos)
527 *pos = error;
528 out:
529 ds_put_context(context);
530 return error;
173} 531}
174static inline void set_info_data(char *base, unsigned long value) 532
533int ds_get_bts_index(struct task_struct *task, size_t *pos)
175{ 534{
176 (*(unsigned long *)(base + ds_cfg.info_data.offset)) = value; 535 return ds_get_index(task, pos, ds_bts);
177} 536}
178 537
538int ds_get_pebs_index(struct task_struct *task, size_t *pos)
539{
540 return ds_get_index(task, pos, ds_pebs);
541}
179 542
180int ds_allocate(void **dsp, size_t bts_size_in_bytes) 543static int ds_get_end(struct task_struct *task, size_t *pos,
544 enum ds_qualifier qual)
181{ 545{
182 size_t bts_size_in_records; 546 struct ds_context *context;
183 unsigned long bts; 547 unsigned long base, end;
184 void *ds; 548 int error;
549
550 context = ds_get_context(task);
551 error = ds_validate_access(context, qual);
552 if (error < 0)
553 goto out;
554
555 base = ds_get(context->ds, qual, ds_buffer_base);
556 end = ds_get(context->ds, qual, ds_absolute_maximum);
557
558 error = ((end - base) / ds_cfg.sizeof_rec[qual]);
559 if (pos)
560 *pos = error;
561 out:
562 ds_put_context(context);
563 return error;
564}
185 565
186 if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) 566int ds_get_bts_end(struct task_struct *task, size_t *pos)
187 return -EOPNOTSUPP; 567{
568 return ds_get_end(task, pos, ds_bts);
569}
188 570
189 if (bts_size_in_bytes < 0) 571int ds_get_pebs_end(struct task_struct *task, size_t *pos)
190 return -EINVAL; 572{
573 return ds_get_end(task, pos, ds_pebs);
574}
191 575
192 bts_size_in_records = 576static int ds_access(struct task_struct *task, size_t index,
193 bts_size_in_bytes / ds_cfg.sizeof_bts; 577 const void **record, enum ds_qualifier qual)
194 bts_size_in_bytes = 578{
195 bts_size_in_records * ds_cfg.sizeof_bts; 579 struct ds_context *context;
580 unsigned long base, idx;
581 int error;
196 582
197 if (bts_size_in_bytes <= 0) 583 if (!record)
198 return -EINVAL; 584 return -EINVAL;
199 585
200 bts = (unsigned long)kzalloc(bts_size_in_bytes, GFP_KERNEL); 586 context = ds_get_context(task);
201 587 error = ds_validate_access(context, qual);
202 if (!bts) 588 if (error < 0)
203 return -ENOMEM; 589 goto out;
204 590
205 ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); 591 base = ds_get(context->ds, qual, ds_buffer_base);
592 idx = base + (index * ds_cfg.sizeof_rec[qual]);
206 593
207 if (!ds) { 594 error = -EINVAL;
208 kfree((void *)bts); 595 if (idx > ds_get(context->ds, qual, ds_absolute_maximum))
209 return -ENOMEM; 596 goto out;
210 }
211
212 set_bts_buffer_base(ds, bts);
213 set_bts_index(ds, bts);
214 set_bts_absolute_maximum(ds, bts + bts_size_in_bytes);
215 set_bts_interrupt_threshold(ds, bts + bts_size_in_bytes + 1);
216 597
217 *dsp = ds; 598 *record = (const void *)idx;
218 return 0; 599 error = ds_cfg.sizeof_rec[qual];
600 out:
601 ds_put_context(context);
602 return error;
219} 603}
220 604
221int ds_free(void **dsp) 605int ds_access_bts(struct task_struct *task, size_t index, const void **record)
222{ 606{
223 if (*dsp) { 607 return ds_access(task, index, record, ds_bts);
224 kfree((void *)get_bts_buffer_base(*dsp));
225 kfree(*dsp);
226 *dsp = NULL;
227 }
228 return 0;
229} 608}
230 609
231int ds_get_bts_size(void *ds) 610int ds_access_pebs(struct task_struct *task, size_t index, const void **record)
232{ 611{
233 int size_in_bytes; 612 return ds_access(task, index, record, ds_pebs);
234
235 if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
236 return -EOPNOTSUPP;
237
238 if (!ds)
239 return 0;
240
241 size_in_bytes =
242 get_bts_absolute_maximum(ds) -
243 get_bts_buffer_base(ds);
244 return size_in_bytes;
245} 613}
246 614
247int ds_get_bts_end(void *ds) 615static int ds_write(struct task_struct *task, const void *record, size_t size,
616 enum ds_qualifier qual, int force)
248{ 617{
249 int size_in_bytes = ds_get_bts_size(ds); 618 struct ds_context *context;
250 619 int error;
251 if (size_in_bytes <= 0)
252 return size_in_bytes;
253 620
254 return size_in_bytes / ds_cfg.sizeof_bts; 621 if (!record)
255} 622 return -EINVAL;
256 623
257int ds_get_bts_index(void *ds) 624 error = -EPERM;
258{ 625 context = ds_get_context(task);
259 int index_offset_in_bytes; 626 if (!context)
627 goto out;
260 628
261 if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) 629 if (!force) {
262 return -EOPNOTSUPP; 630 error = ds_validate_access(context, qual);
631 if (error < 0)
632 goto out;
633 }
263 634
264 index_offset_in_bytes = 635 error = 0;
265 get_bts_index(ds) - 636 while (size) {
266 get_bts_buffer_base(ds); 637 unsigned long base, index, end, write_end, int_th;
638 unsigned long write_size, adj_write_size;
639
640 /*
641 * write as much as possible without producing an
642 * overflow interrupt.
643 *
644 * interrupt_threshold must either be
645 * - bigger than absolute_maximum or
646 * - point to a record between buffer_base and absolute_maximum
647 *
648 * index points to a valid record.
649 */
650 base = ds_get(context->ds, qual, ds_buffer_base);
651 index = ds_get(context->ds, qual, ds_index);
652 end = ds_get(context->ds, qual, ds_absolute_maximum);
653 int_th = ds_get(context->ds, qual, ds_interrupt_threshold);
654
655 write_end = min(end, int_th);
656
657 /* if we are already beyond the interrupt threshold,
658 * we fill the entire buffer */
659 if (write_end <= index)
660 write_end = end;
661
662 if (write_end <= index)
663 goto out;
664
665 write_size = min((unsigned long) size, write_end - index);
666 memcpy((void *)index, record, write_size);
667
668 record = (const char *)record + write_size;
669 size -= write_size;
670 error += write_size;
671
672 adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
673 adj_write_size *= ds_cfg.sizeof_rec[qual];
674
675 /* zero out trailing bytes */
676 memset((char *)index + write_size, 0,
677 adj_write_size - write_size);
678 index += adj_write_size;
679
680 if (index >= end)
681 index = base;
682 ds_set(context->ds, qual, ds_index, index);
683
684 if (index >= int_th)
685 ds_overflow(task, context, qual);
686 }
267 687
268 return index_offset_in_bytes / ds_cfg.sizeof_bts; 688 out:
689 ds_put_context(context);
690 return error;
269} 691}
270 692
271int ds_set_overflow(void *ds, int method) 693int ds_write_bts(struct task_struct *task, const void *record, size_t size)
272{ 694{
273 switch (method) { 695 return ds_write(task, record, size, ds_bts, /* force = */ 0);
274 case DS_O_SIGNAL:
275 return -EOPNOTSUPP;
276 case DS_O_WRAP:
277 return 0;
278 default:
279 return -EINVAL;
280 }
281} 696}
282 697
283int ds_get_overflow(void *ds) 698int ds_write_pebs(struct task_struct *task, const void *record, size_t size)
284{ 699{
285 return DS_O_WRAP; 700 return ds_write(task, record, size, ds_pebs, /* force = */ 0);
286} 701}
287 702
288int ds_clear(void *ds) 703int ds_unchecked_write_bts(struct task_struct *task,
704 const void *record, size_t size)
289{ 705{
290 int bts_size = ds_get_bts_size(ds); 706 return ds_write(task, record, size, ds_bts, /* force = */ 1);
291 unsigned long bts_base;
292
293 if (bts_size <= 0)
294 return bts_size;
295
296 bts_base = get_bts_buffer_base(ds);
297 memset((void *)bts_base, 0, bts_size);
298
299 set_bts_index(ds, bts_base);
300 return 0;
301} 707}
302 708
303int ds_read_bts(void *ds, int index, struct bts_struct *out) 709int ds_unchecked_write_pebs(struct task_struct *task,
710 const void *record, size_t size)
304{ 711{
305 void *bts; 712 return ds_write(task, record, size, ds_pebs, /* force = */ 1);
713}
306 714
307 if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) 715static int ds_reset_or_clear(struct task_struct *task,
308 return -EOPNOTSUPP; 716 enum ds_qualifier qual, int clear)
717{
718 struct ds_context *context;
719 unsigned long base, end;
720 int error;
309 721
310 if (index < 0) 722 context = ds_get_context(task);
311 return -EINVAL; 723 error = ds_validate_access(context, qual);
724 if (error < 0)
725 goto out;
312 726
313 if (index >= ds_get_bts_size(ds)) 727 base = ds_get(context->ds, qual, ds_buffer_base);
314 return -EINVAL; 728 end = ds_get(context->ds, qual, ds_absolute_maximum);
315 729
316 bts = (void *)(get_bts_buffer_base(ds) + (index * ds_cfg.sizeof_bts)); 730 if (clear)
731 memset((void *)base, 0, end - base);
317 732
318 memset(out, 0, sizeof(*out)); 733 ds_set(context->ds, qual, ds_index, base);
319 if (get_from_ip(bts) == BTS_ESCAPE_ADDRESS) {
320 out->qualifier = get_info_type(bts);
321 out->variant.jiffies = get_info_data(bts);
322 } else {
323 out->qualifier = BTS_BRANCH;
324 out->variant.lbr.from_ip = get_from_ip(bts);
325 out->variant.lbr.to_ip = get_to_ip(bts);
326 }
327 734
328 return sizeof(*out);; 735 error = 0;
736 out:
737 ds_put_context(context);
738 return error;
329} 739}
330 740
331int ds_write_bts(void *ds, const struct bts_struct *in) 741int ds_reset_bts(struct task_struct *task)
332{ 742{
333 unsigned long bts; 743 return ds_reset_or_clear(task, ds_bts, /* clear = */ 0);
334 744}
335 if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
336 return -EOPNOTSUPP;
337
338 if (ds_get_bts_size(ds) <= 0)
339 return -ENXIO;
340 745
341 bts = get_bts_index(ds); 746int ds_reset_pebs(struct task_struct *task)
747{
748 return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0);
749}
342 750
343 memset((void *)bts, 0, ds_cfg.sizeof_bts); 751int ds_clear_bts(struct task_struct *task)
344 switch (in->qualifier) { 752{
345 case BTS_INVALID: 753 return ds_reset_or_clear(task, ds_bts, /* clear = */ 1);
346 break; 754}
347 755
348 case BTS_BRANCH: 756int ds_clear_pebs(struct task_struct *task)
349 set_from_ip((void *)bts, in->variant.lbr.from_ip); 757{
350 set_to_ip((void *)bts, in->variant.lbr.to_ip); 758 return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1);
351 break; 759}
352 760
353 case BTS_TASK_ARRIVES: 761int ds_get_pebs_reset(struct task_struct *task, u64 *value)
354 case BTS_TASK_DEPARTS: 762{
355 set_from_ip((void *)bts, BTS_ESCAPE_ADDRESS); 763 struct ds_context *context;
356 set_info_type((void *)bts, in->qualifier); 764 int error;
357 set_info_data((void *)bts, in->variant.jiffies);
358 break;
359 765
360 default: 766 if (!value)
361 return -EINVAL; 767 return -EINVAL;
362 }
363 768
364 bts = bts + ds_cfg.sizeof_bts; 769 context = ds_get_context(task);
365 if (bts >= get_bts_absolute_maximum(ds)) 770 error = ds_validate_access(context, ds_pebs);
366 bts = get_bts_buffer_base(ds); 771 if (error < 0)
367 set_bts_index(ds, bts); 772 goto out;
368 773
369 return ds_cfg.sizeof_bts; 774 *value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8));
775
776 error = 0;
777 out:
778 ds_put_context(context);
779 return error;
370} 780}
371 781
372unsigned long ds_debugctl_mask(void) 782int ds_set_pebs_reset(struct task_struct *task, u64 value)
373{ 783{
374 return ds_cfg.debugctl_mask; 784 struct ds_context *context;
375} 785 int error;
376 786
377#ifdef __i386__ 787 context = ds_get_context(task);
378static const struct ds_configuration ds_cfg_netburst = { 788 error = ds_validate_access(context, ds_pebs);
379 .sizeof_ds = 9 * 4, 789 if (error < 0)
380 .bts_buffer_base = { 0, 4 }, 790 goto out;
381 .bts_index = { 4, 4 },
382 .bts_absolute_maximum = { 8, 4 },
383 .bts_interrupt_threshold = { 12, 4 },
384 .sizeof_bts = 3 * 4,
385 .from_ip = { 0, 4 },
386 .to_ip = { 4, 4 },
387 .info_type = { 4, 1 },
388 .info_data = { 8, 4 },
389 .debugctl_mask = (1<<2)|(1<<3)
390};
391 791
392static const struct ds_configuration ds_cfg_pentium_m = { 792 *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value;
393 .sizeof_ds = 9 * 4, 793
394 .bts_buffer_base = { 0, 4 }, 794 error = 0;
395 .bts_index = { 4, 4 }, 795 out:
396 .bts_absolute_maximum = { 8, 4 }, 796 ds_put_context(context);
397 .bts_interrupt_threshold = { 12, 4 }, 797 return error;
398 .sizeof_bts = 3 * 4, 798}
399 .from_ip = { 0, 4 }, 799
400 .to_ip = { 4, 4 }, 800static const struct ds_configuration ds_cfg_var = {
401 .info_type = { 4, 1 }, 801 .sizeof_ds = sizeof(long) * 12,
402 .info_data = { 8, 4 }, 802 .sizeof_field = sizeof(long),
403 .debugctl_mask = (1<<6)|(1<<7) 803 .sizeof_rec[ds_bts] = sizeof(long) * 3,
804 .sizeof_rec[ds_pebs] = sizeof(long) * 10
404}; 805};
405#endif /* _i386_ */ 806static const struct ds_configuration ds_cfg_64 = {
406 807 .sizeof_ds = 8 * 12,
407static const struct ds_configuration ds_cfg_core2 = { 808 .sizeof_field = 8,
408 .sizeof_ds = 9 * 8, 809 .sizeof_rec[ds_bts] = 8 * 3,
409 .bts_buffer_base = { 0, 8 }, 810 .sizeof_rec[ds_pebs] = 8 * 10
410 .bts_index = { 8, 8 },
411 .bts_absolute_maximum = { 16, 8 },
412 .bts_interrupt_threshold = { 24, 8 },
413 .sizeof_bts = 3 * 8,
414 .from_ip = { 0, 8 },
415 .to_ip = { 8, 8 },
416 .info_type = { 8, 1 },
417 .info_data = { 16, 8 },
418 .debugctl_mask = (1<<6)|(1<<7)|(1<<9)
419}; 811};
420 812
421static inline void 813static inline void
@@ -429,14 +821,13 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
429 switch (c->x86) { 821 switch (c->x86) {
430 case 0x6: 822 case 0x6:
431 switch (c->x86_model) { 823 switch (c->x86_model) {
432#ifdef __i386__
433 case 0xD: 824 case 0xD:
434 case 0xE: /* Pentium M */ 825 case 0xE: /* Pentium M */
435 ds_configure(&ds_cfg_pentium_m); 826 ds_configure(&ds_cfg_var);
436 break; 827 break;
437#endif /* _i386_ */
438 case 0xF: /* Core2 */ 828 case 0xF: /* Core2 */
439 ds_configure(&ds_cfg_core2); 829 case 0x1C: /* Atom */
830 ds_configure(&ds_cfg_64);
440 break; 831 break;
441 default: 832 default:
442 /* sorry, don't know about them */ 833 /* sorry, don't know about them */
@@ -445,13 +836,11 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
445 break; 836 break;
446 case 0xF: 837 case 0xF:
447 switch (c->x86_model) { 838 switch (c->x86_model) {
448#ifdef __i386__
449 case 0x0: 839 case 0x0:
450 case 0x1: 840 case 0x1:
451 case 0x2: /* Netburst */ 841 case 0x2: /* Netburst */
452 ds_configure(&ds_cfg_netburst); 842 ds_configure(&ds_cfg_var);
453 break; 843 break;
454#endif /* _i386_ */
455 default: 844 default:
456 /* sorry, don't know about them */ 845 /* sorry, don't know about them */
457 break; 846 break;
@@ -462,3 +851,14 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
462 break; 851 break;
463 } 852 }
464} 853}
854
855void ds_free(struct ds_context *context)
856{
857 /* This is called when the task owning the parameter context
858 * is dying. There should not be any user of that context left
859 * to disturb us, anymore. */
860 unsigned long leftovers = context->count;
861 while (leftovers--)
862 ds_put_context(context);
863}
864#endif /* CONFIG_X86_DS */
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 2c9abc95e026..1962d27c6b82 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -277,6 +277,14 @@ void exit_thread(void)
277 tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; 277 tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
278 put_cpu(); 278 put_cpu();
279 } 279 }
280#ifdef CONFIG_X86_DS
281 /* Free any DS contexts that have not been properly released. */
282 if (unlikely(current->thread.ds_ctx)) {
283 /* we clear debugctl to make sure DS is not used. */
284 update_debugctlmsr(0);
285 ds_free(current->thread.ds_ctx);
286 }
287#endif /* CONFIG_X86_DS */
280} 288}
281 289
282void flush_thread(void) 290void flush_thread(void)
@@ -438,6 +446,35 @@ int set_tsc_mode(unsigned int val)
438 return 0; 446 return 0;
439} 447}
440 448
449#ifdef CONFIG_X86_DS
450static int update_debugctl(struct thread_struct *prev,
451 struct thread_struct *next, unsigned long debugctl)
452{
453 unsigned long ds_prev = 0;
454 unsigned long ds_next = 0;
455
456 if (prev->ds_ctx)
457 ds_prev = (unsigned long)prev->ds_ctx->ds;
458 if (next->ds_ctx)
459 ds_next = (unsigned long)next->ds_ctx->ds;
460
461 if (ds_next != ds_prev) {
462 /* we clear debugctl to make sure DS
463 * is not in use when we change it */
464 debugctl = 0;
465 update_debugctlmsr(0);
466 wrmsr(MSR_IA32_DS_AREA, ds_next, 0);
467 }
468 return debugctl;
469}
470#else
471static int update_debugctl(struct thread_struct *prev,
472 struct thread_struct *next, unsigned long debugctl)
473{
474 return debugctl;
475}
476#endif /* CONFIG_X86_DS */
477
441static noinline void 478static noinline void
442__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, 479__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
443 struct tss_struct *tss) 480 struct tss_struct *tss)
@@ -448,14 +485,7 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
448 prev = &prev_p->thread; 485 prev = &prev_p->thread;
449 next = &next_p->thread; 486 next = &next_p->thread;
450 487
451 debugctl = prev->debugctlmsr; 488 debugctl = update_debugctl(prev, next, prev->debugctlmsr);
452 if (next->ds_area_msr != prev->ds_area_msr) {
453 /* we clear debugctl to make sure DS
454 * is not in use when we change it */
455 debugctl = 0;
456 update_debugctlmsr(0);
457 wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0);
458 }
459 489
460 if (next->debugctlmsr != debugctl) 490 if (next->debugctlmsr != debugctl)
461 update_debugctlmsr(next->debugctlmsr); 491 update_debugctlmsr(next->debugctlmsr);
@@ -479,13 +509,13 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
479 hard_enable_TSC(); 509 hard_enable_TSC();
480 } 510 }
481 511
482#ifdef X86_BTS 512#ifdef CONFIG_X86_PTRACE_BTS
483 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) 513 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
484 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); 514 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
485 515
486 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) 516 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
487 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); 517 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
488#endif 518#endif /* CONFIG_X86_PTRACE_BTS */
489 519
490 520
491 if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { 521 if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 00263c9e6500..08a9df08adba 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -239,6 +239,14 @@ void exit_thread(void)
239 t->io_bitmap_max = 0; 239 t->io_bitmap_max = 0;
240 put_cpu(); 240 put_cpu();
241 } 241 }
242#ifdef CONFIG_X86_DS
243 /* Free any DS contexts that have not been properly released. */
244 if (unlikely(t->ds_ctx)) {
245 /* we clear debugctl to make sure DS is not used. */
246 update_debugctlmsr(0);
247 ds_free(t->ds_ctx);
248 }
249#endif /* CONFIG_X86_DS */
242} 250}
243 251
244void flush_thread(void) 252void flush_thread(void)
@@ -472,13 +480,27 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
472 next = &next_p->thread; 480 next = &next_p->thread;
473 481
474 debugctl = prev->debugctlmsr; 482 debugctl = prev->debugctlmsr;
475 if (next->ds_area_msr != prev->ds_area_msr) { 483
476 /* we clear debugctl to make sure DS 484#ifdef CONFIG_X86_DS
477 * is not in use when we change it */ 485 {
478 debugctl = 0; 486 unsigned long ds_prev = 0, ds_next = 0;
479 update_debugctlmsr(0); 487
480 wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr); 488 if (prev->ds_ctx)
489 ds_prev = (unsigned long)prev->ds_ctx->ds;
490 if (next->ds_ctx)
491 ds_next = (unsigned long)next->ds_ctx->ds;
492
493 if (ds_next != ds_prev) {
494 /*
495 * We clear debugctl to make sure DS
496 * is not in use when we change it:
497 */
498 debugctl = 0;
499 update_debugctlmsr(0);
500 wrmsrl(MSR_IA32_DS_AREA, ds_next);
501 }
481 } 502 }
503#endif /* CONFIG_X86_DS */
482 504
483 if (next->debugctlmsr != debugctl) 505 if (next->debugctlmsr != debugctl)
484 update_debugctlmsr(next->debugctlmsr); 506 update_debugctlmsr(next->debugctlmsr);
@@ -516,13 +538,13 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
516 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); 538 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
517 } 539 }
518 540
519#ifdef X86_BTS 541#ifdef CONFIG_X86_PTRACE_BTS
520 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) 542 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
521 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); 543 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
522 544
523 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) 545 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
524 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); 546 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
525#endif 547#endif /* CONFIG_X86_PTRACE_BTS */
526} 548}
527 549
528/* 550/*
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index fc3e8dcd9da6..58ce4b50211b 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -554,45 +554,115 @@ static int ptrace_set_debugreg(struct task_struct *child,
554 return 0; 554 return 0;
555} 555}
556 556
557#ifdef X86_BTS 557#ifdef CONFIG_X86_PTRACE_BTS
558/*
559 * The configuration for a particular BTS hardware implementation.
560 */
561struct bts_configuration {
562 /* the size of a BTS record in bytes; at most BTS_MAX_RECORD_SIZE */
563 unsigned char sizeof_bts;
564 /* the size of a field in the BTS record in bytes */
565 unsigned char sizeof_field;
566 /* a bitmask to enable/disable BTS in DEBUGCTL MSR */
567 unsigned long debugctl_mask;
568};
569static struct bts_configuration bts_cfg;
570
571#define BTS_MAX_RECORD_SIZE (8 * 3)
572
573
574/*
575 * Branch Trace Store (BTS) uses the following format. Different
576 * architectures vary in the size of those fields.
577 * - source linear address
578 * - destination linear address
579 * - flags
580 *
581 * Later architectures use 64bit pointers throughout, whereas earlier
582 * architectures use 32bit pointers in 32bit mode.
583 *
584 * We compute the base address for the first 8 fields based on:
585 * - the field size stored in the DS configuration
586 * - the relative field position
587 *
588 * In order to store additional information in the BTS buffer, we use
589 * a special source address to indicate that the record requires
590 * special interpretation.
591 *
592 * Netburst indicated via a bit in the flags field whether the branch
593 * was predicted; this is ignored.
594 */
595
596enum bts_field {
597 bts_from = 0,
598 bts_to,
599 bts_flags,
600
601 bts_escape = (unsigned long)-1,
602 bts_qual = bts_to,
603 bts_jiffies = bts_flags
604};
558 605
559static int ptrace_bts_get_size(struct task_struct *child) 606static inline unsigned long bts_get(const char *base, enum bts_field field)
560{ 607{
561 if (!child->thread.ds_area_msr) 608 base += (bts_cfg.sizeof_field * field);
562 return -ENXIO; 609 return *(unsigned long *)base;
610}
611
612static inline void bts_set(char *base, enum bts_field field, unsigned long val)
613{
614 base += (bts_cfg.sizeof_field * field);;
615 (*(unsigned long *)base) = val;
616}
563 617
564 return ds_get_bts_index((void *)child->thread.ds_area_msr); 618/*
619 * Translate a BTS record from the raw format into the bts_struct format
620 *
621 * out (out): bts_struct interpretation
622 * raw: raw BTS record
623 */
624static void ptrace_bts_translate_record(struct bts_struct *out, const void *raw)
625{
626 memset(out, 0, sizeof(*out));
627 if (bts_get(raw, bts_from) == bts_escape) {
628 out->qualifier = bts_get(raw, bts_qual);
629 out->variant.jiffies = bts_get(raw, bts_jiffies);
630 } else {
631 out->qualifier = BTS_BRANCH;
632 out->variant.lbr.from_ip = bts_get(raw, bts_from);
633 out->variant.lbr.to_ip = bts_get(raw, bts_to);
634 }
565} 635}
566 636
567static int ptrace_bts_read_record(struct task_struct *child, 637static int ptrace_bts_read_record(struct task_struct *child, size_t index,
568 long index,
569 struct bts_struct __user *out) 638 struct bts_struct __user *out)
570{ 639{
571 struct bts_struct ret; 640 struct bts_struct ret;
572 int retval; 641 const void *bts_record;
573 int bts_end; 642 size_t bts_index, bts_end;
574 int bts_index; 643 int error;
575 644
576 if (!child->thread.ds_area_msr) 645 error = ds_get_bts_end(child, &bts_end);
577 return -ENXIO; 646 if (error < 0)
647 return error;
578 648
579 if (index < 0)
580 return -EINVAL;
581
582 bts_end = ds_get_bts_end((void *)child->thread.ds_area_msr);
583 if (bts_end <= index) 649 if (bts_end <= index)
584 return -EINVAL; 650 return -EINVAL;
585 651
652 error = ds_get_bts_index(child, &bts_index);
653 if (error < 0)
654 return error;
655
586 /* translate the ptrace bts index into the ds bts index */ 656 /* translate the ptrace bts index into the ds bts index */
587 bts_index = ds_get_bts_index((void *)child->thread.ds_area_msr); 657 bts_index += bts_end - (index + 1);
588 bts_index -= (index + 1); 658 if (bts_end <= bts_index)
589 if (bts_index < 0) 659 bts_index -= bts_end;
590 bts_index += bts_end; 660
661 error = ds_access_bts(child, bts_index, &bts_record);
662 if (error < 0)
663 return error;
591 664
592 retval = ds_read_bts((void *)child->thread.ds_area_msr, 665 ptrace_bts_translate_record(&ret, bts_record);
593 bts_index, &ret);
594 if (retval < 0)
595 return retval;
596 666
597 if (copy_to_user(out, &ret, sizeof(ret))) 667 if (copy_to_user(out, &ret, sizeof(ret)))
598 return -EFAULT; 668 return -EFAULT;
@@ -600,101 +670,106 @@ static int ptrace_bts_read_record(struct task_struct *child,
600 return sizeof(ret); 670 return sizeof(ret);
601} 671}
602 672
603static int ptrace_bts_clear(struct task_struct *child)
604{
605 if (!child->thread.ds_area_msr)
606 return -ENXIO;
607
608 return ds_clear((void *)child->thread.ds_area_msr);
609}
610
611static int ptrace_bts_drain(struct task_struct *child, 673static int ptrace_bts_drain(struct task_struct *child,
612 long size, 674 long size,
613 struct bts_struct __user *out) 675 struct bts_struct __user *out)
614{ 676{
615 int end, i; 677 struct bts_struct ret;
616 void *ds = (void *)child->thread.ds_area_msr; 678 const unsigned char *raw;
617 679 size_t end, i;
618 if (!ds) 680 int error;
619 return -ENXIO;
620 681
621 end = ds_get_bts_index(ds); 682 error = ds_get_bts_index(child, &end);
622 if (end <= 0) 683 if (error < 0)
623 return end; 684 return error;
624 685
625 if (size < (end * sizeof(struct bts_struct))) 686 if (size < (end * sizeof(struct bts_struct)))
626 return -EIO; 687 return -EIO;
627 688
628 for (i = 0; i < end; i++, out++) { 689 error = ds_access_bts(child, 0, (const void **)&raw);
629 struct bts_struct ret; 690 if (error < 0)
630 int retval; 691 return error;
631 692
632 retval = ds_read_bts(ds, i, &ret); 693 for (i = 0; i < end; i++, out++, raw += bts_cfg.sizeof_bts) {
633 if (retval < 0) 694 ptrace_bts_translate_record(&ret, raw);
634 return retval;
635 695
636 if (copy_to_user(out, &ret, sizeof(ret))) 696 if (copy_to_user(out, &ret, sizeof(ret)))
637 return -EFAULT; 697 return -EFAULT;
638 } 698 }
639 699
640 ds_clear(ds); 700 error = ds_clear_bts(child);
701 if (error < 0)
702 return error;
641 703
642 return end; 704 return end;
643} 705}
644 706
707static void ptrace_bts_ovfl(struct task_struct *child)
708{
709 send_sig(child->thread.bts_ovfl_signal, child, 0);
710}
711
645static int ptrace_bts_config(struct task_struct *child, 712static int ptrace_bts_config(struct task_struct *child,
646 long cfg_size, 713 long cfg_size,
647 const struct ptrace_bts_config __user *ucfg) 714 const struct ptrace_bts_config __user *ucfg)
648{ 715{
649 struct ptrace_bts_config cfg; 716 struct ptrace_bts_config cfg;
650 int bts_size, ret = 0; 717 int error = 0;
651 void *ds; 718
719 error = -EOPNOTSUPP;
720 if (!bts_cfg.sizeof_bts)
721 goto errout;
652 722
723 error = -EIO;
653 if (cfg_size < sizeof(cfg)) 724 if (cfg_size < sizeof(cfg))
654 return -EIO; 725 goto errout;
655 726
727 error = -EFAULT;
656 if (copy_from_user(&cfg, ucfg, sizeof(cfg))) 728 if (copy_from_user(&cfg, ucfg, sizeof(cfg)))
657 return -EFAULT; 729 goto errout;
658 730
659 if ((int)cfg.size < 0) 731 error = -EINVAL;
660 return -EINVAL; 732 if ((cfg.flags & PTRACE_BTS_O_SIGNAL) &&
733 !(cfg.flags & PTRACE_BTS_O_ALLOC))
734 goto errout;
661 735
662 bts_size = 0; 736 if (cfg.flags & PTRACE_BTS_O_ALLOC) {
663 ds = (void *)child->thread.ds_area_msr; 737 ds_ovfl_callback_t ovfl = 0;
664 if (ds) { 738 unsigned int sig = 0;
665 bts_size = ds_get_bts_size(ds); 739
666 if (bts_size < 0) 740 /* we ignore the error in case we were not tracing child */
667 return bts_size; 741 (void)ds_release_bts(child);
668 } 742
669 cfg.size = PAGE_ALIGN(cfg.size); 743 if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
744 if (!cfg.signal)
745 goto errout;
746
747 sig = cfg.signal;
748 ovfl = ptrace_bts_ovfl;
749 }
670 750
671 if (bts_size != cfg.size) { 751 error = ds_request_bts(child, /* base = */ 0, cfg.size, ovfl);
672 ret = ptrace_bts_realloc(child, cfg.size, 752 if (error < 0)
673 cfg.flags & PTRACE_BTS_O_CUT_SIZE);
674 if (ret < 0)
675 goto errout; 753 goto errout;
676 754
677 ds = (void *)child->thread.ds_area_msr; 755 child->thread.bts_ovfl_signal = sig;
678 } 756 }
679 757
680 if (cfg.flags & PTRACE_BTS_O_SIGNAL) 758 error = -EINVAL;
681 ret = ds_set_overflow(ds, DS_O_SIGNAL); 759 if (!child->thread.ds_ctx && cfg.flags)
682 else
683 ret = ds_set_overflow(ds, DS_O_WRAP);
684 if (ret < 0)
685 goto errout; 760 goto errout;
686 761
687 if (cfg.flags & PTRACE_BTS_O_TRACE) 762 if (cfg.flags & PTRACE_BTS_O_TRACE)
688 child->thread.debugctlmsr |= ds_debugctl_mask(); 763 child->thread.debugctlmsr |= bts_cfg.debugctl_mask;
689 else 764 else
690 child->thread.debugctlmsr &= ~ds_debugctl_mask(); 765 child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
691 766
692 if (cfg.flags & PTRACE_BTS_O_SCHED) 767 if (cfg.flags & PTRACE_BTS_O_SCHED)
693 set_tsk_thread_flag(child, TIF_BTS_TRACE_TS); 768 set_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
694 else 769 else
695 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); 770 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
696 771
697 ret = sizeof(cfg); 772 error = sizeof(cfg);
698 773
699out: 774out:
700 if (child->thread.debugctlmsr) 775 if (child->thread.debugctlmsr)
@@ -702,10 +777,10 @@ out:
702 else 777 else
703 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); 778 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
704 779
705 return ret; 780 return error;
706 781
707errout: 782errout:
708 child->thread.debugctlmsr &= ~ds_debugctl_mask(); 783 child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
709 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); 784 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
710 goto out; 785 goto out;
711} 786}
@@ -714,29 +789,40 @@ static int ptrace_bts_status(struct task_struct *child,
714 long cfg_size, 789 long cfg_size,
715 struct ptrace_bts_config __user *ucfg) 790 struct ptrace_bts_config __user *ucfg)
716{ 791{
717 void *ds = (void *)child->thread.ds_area_msr;
718 struct ptrace_bts_config cfg; 792 struct ptrace_bts_config cfg;
793 size_t end;
794 const void *base, *max;
795 int error;
719 796
720 if (cfg_size < sizeof(cfg)) 797 if (cfg_size < sizeof(cfg))
721 return -EIO; 798 return -EIO;
722 799
723 memset(&cfg, 0, sizeof(cfg)); 800 error = ds_get_bts_end(child, &end);
801 if (error < 0)
802 return error;
724 803
725 if (ds) { 804 error = ds_access_bts(child, /* index = */ 0, &base);
726 cfg.size = ds_get_bts_size(ds); 805 if (error < 0)
806 return error;
727 807
728 if (ds_get_overflow(ds) == DS_O_SIGNAL) 808 error = ds_access_bts(child, /* index = */ end, &max);
729 cfg.flags |= PTRACE_BTS_O_SIGNAL; 809 if (error < 0)
810 return error;
730 811
731 if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) && 812 memset(&cfg, 0, sizeof(cfg));
732 child->thread.debugctlmsr & ds_debugctl_mask()) 813 cfg.size = (max - base);
733 cfg.flags |= PTRACE_BTS_O_TRACE; 814 cfg.signal = child->thread.bts_ovfl_signal;
815 cfg.bts_size = sizeof(struct bts_struct);
734 816
735 if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS)) 817 if (cfg.signal)
736 cfg.flags |= PTRACE_BTS_O_SCHED; 818 cfg.flags |= PTRACE_BTS_O_SIGNAL;
737 }
738 819
739 cfg.bts_size = sizeof(struct bts_struct); 820 if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) &&
821 child->thread.debugctlmsr & bts_cfg.debugctl_mask)
822 cfg.flags |= PTRACE_BTS_O_TRACE;
823
824 if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS))
825 cfg.flags |= PTRACE_BTS_O_SCHED;
740 826
741 if (copy_to_user(ucfg, &cfg, sizeof(cfg))) 827 if (copy_to_user(ucfg, &cfg, sizeof(cfg)))
742 return -EFAULT; 828 return -EFAULT;
@@ -744,89 +830,38 @@ static int ptrace_bts_status(struct task_struct *child,
744 return sizeof(cfg); 830 return sizeof(cfg);
745} 831}
746 832
747
748static int ptrace_bts_write_record(struct task_struct *child, 833static int ptrace_bts_write_record(struct task_struct *child,
749 const struct bts_struct *in) 834 const struct bts_struct *in)
750{ 835{
751 int retval; 836 unsigned char bts_record[BTS_MAX_RECORD_SIZE];
752 837
753 if (!child->thread.ds_area_msr) 838 BUG_ON(BTS_MAX_RECORD_SIZE < bts_cfg.sizeof_bts);
754 return -ENXIO;
755 839
756 retval = ds_write_bts((void *)child->thread.ds_area_msr, in); 840 memset(bts_record, 0, bts_cfg.sizeof_bts);
757 if (retval) 841 switch (in->qualifier) {
758 return retval; 842 case BTS_INVALID:
843 break;
759 844
760 return sizeof(*in); 845 case BTS_BRANCH:
761} 846 bts_set(bts_record, bts_from, in->variant.lbr.from_ip);
847 bts_set(bts_record, bts_to, in->variant.lbr.to_ip);
848 break;
762 849
763static int ptrace_bts_realloc(struct task_struct *child, 850 case BTS_TASK_ARRIVES:
764 int size, int reduce_size) 851 case BTS_TASK_DEPARTS:
765{ 852 bts_set(bts_record, bts_from, bts_escape);
766 unsigned long rlim, vm; 853 bts_set(bts_record, bts_qual, in->qualifier);
767 int ret, old_size; 854 bts_set(bts_record, bts_jiffies, in->variant.jiffies);
855 break;
768 856
769 if (size < 0) 857 default:
770 return -EINVAL; 858 return -EINVAL;
771
772 old_size = ds_get_bts_size((void *)child->thread.ds_area_msr);
773 if (old_size < 0)
774 return old_size;
775
776 ret = ds_free((void **)&child->thread.ds_area_msr);
777 if (ret < 0)
778 goto out;
779
780 size >>= PAGE_SHIFT;
781 old_size >>= PAGE_SHIFT;
782
783 current->mm->total_vm -= old_size;
784 current->mm->locked_vm -= old_size;
785
786 if (size == 0)
787 goto out;
788
789 rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
790 vm = current->mm->total_vm + size;
791 if (rlim < vm) {
792 ret = -ENOMEM;
793
794 if (!reduce_size)
795 goto out;
796
797 size = rlim - current->mm->total_vm;
798 if (size <= 0)
799 goto out;
800 } 859 }
801 860
802 rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; 861 /* The writing task will be the switched-to task on a context
803 vm = current->mm->locked_vm + size; 862 * switch. It needs to write into the switched-from task's BTS
804 if (rlim < vm) { 863 * buffer. */
805 ret = -ENOMEM; 864 return ds_unchecked_write_bts(child, bts_record, bts_cfg.sizeof_bts);
806
807 if (!reduce_size)
808 goto out;
809
810 size = rlim - current->mm->locked_vm;
811 if (size <= 0)
812 goto out;
813 }
814
815 ret = ds_allocate((void **)&child->thread.ds_area_msr,
816 size << PAGE_SHIFT);
817 if (ret < 0)
818 goto out;
819
820 current->mm->total_vm += size;
821 current->mm->locked_vm += size;
822
823out:
824 if (child->thread.ds_area_msr)
825 set_tsk_thread_flag(child, TIF_DS_AREA_MSR);
826 else
827 clear_tsk_thread_flag(child, TIF_DS_AREA_MSR);
828
829 return ret;
830} 865}
831 866
832void ptrace_bts_take_timestamp(struct task_struct *tsk, 867void ptrace_bts_take_timestamp(struct task_struct *tsk,
@@ -839,7 +874,66 @@ void ptrace_bts_take_timestamp(struct task_struct *tsk,
839 874
840 ptrace_bts_write_record(tsk, &rec); 875 ptrace_bts_write_record(tsk, &rec);
841} 876}
842#endif /* X86_BTS */ 877
878static const struct bts_configuration bts_cfg_netburst = {
879 .sizeof_bts = sizeof(long) * 3,
880 .sizeof_field = sizeof(long),
881 .debugctl_mask = (1<<2)|(1<<3)|(1<<5)
882};
883
884static const struct bts_configuration bts_cfg_pentium_m = {
885 .sizeof_bts = sizeof(long) * 3,
886 .sizeof_field = sizeof(long),
887 .debugctl_mask = (1<<6)|(1<<7)
888};
889
890static const struct bts_configuration bts_cfg_core2 = {
891 .sizeof_bts = 8 * 3,
892 .sizeof_field = 8,
893 .debugctl_mask = (1<<6)|(1<<7)|(1<<9)
894};
895
896static inline void bts_configure(const struct bts_configuration *cfg)
897{
898 bts_cfg = *cfg;
899}
900
901void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c)
902{
903 switch (c->x86) {
904 case 0x6:
905 switch (c->x86_model) {
906 case 0xD:
907 case 0xE: /* Pentium M */
908 bts_configure(&bts_cfg_pentium_m);
909 break;
910 case 0xF: /* Core2 */
911 case 0x1C: /* Atom */
912 bts_configure(&bts_cfg_core2);
913 break;
914 default:
915 /* sorry, don't know about them */
916 break;
917 }
918 break;
919 case 0xF:
920 switch (c->x86_model) {
921 case 0x0:
922 case 0x1:
923 case 0x2: /* Netburst */
924 bts_configure(&bts_cfg_netburst);
925 break;
926 default:
927 /* sorry, don't know about them */
928 break;
929 }
930 break;
931 default:
932 /* sorry, don't know about them */
933 break;
934 }
935}
936#endif /* CONFIG_X86_PTRACE_BTS */
843 937
844/* 938/*
845 * Called by kernel/ptrace.c when detaching.. 939 * Called by kernel/ptrace.c when detaching..
@@ -852,15 +946,15 @@ void ptrace_disable(struct task_struct *child)
852#ifdef TIF_SYSCALL_EMU 946#ifdef TIF_SYSCALL_EMU
853 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); 947 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
854#endif 948#endif
855 if (child->thread.ds_area_msr) { 949#ifdef CONFIG_X86_PTRACE_BTS
856#ifdef X86_BTS 950 (void)ds_release_bts(child);
857 ptrace_bts_realloc(child, 0, 0); 951
858#endif 952 child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
859 child->thread.debugctlmsr &= ~ds_debugctl_mask(); 953 if (!child->thread.debugctlmsr)
860 if (!child->thread.debugctlmsr) 954 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
861 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); 955
862 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); 956 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
863 } 957#endif /* CONFIG_X86_PTRACE_BTS */
864} 958}
865 959
866#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION 960#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
@@ -980,7 +1074,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
980 /* 1074 /*
981 * These bits need more cooking - not enabled yet: 1075 * These bits need more cooking - not enabled yet:
982 */ 1076 */
983#ifdef X86_BTS 1077#ifdef CONFIG_X86_PTRACE_BTS
984 case PTRACE_BTS_CONFIG: 1078 case PTRACE_BTS_CONFIG:
985 ret = ptrace_bts_config 1079 ret = ptrace_bts_config
986 (child, data, (struct ptrace_bts_config __user *)addr); 1080 (child, data, (struct ptrace_bts_config __user *)addr);
@@ -992,7 +1086,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
992 break; 1086 break;
993 1087
994 case PTRACE_BTS_SIZE: 1088 case PTRACE_BTS_SIZE:
995 ret = ptrace_bts_get_size(child); 1089 ret = ds_get_bts_index(child, /* pos = */ 0);
996 break; 1090 break;
997 1091
998 case PTRACE_BTS_GET: 1092 case PTRACE_BTS_GET:
@@ -1001,14 +1095,14 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
1001 break; 1095 break;
1002 1096
1003 case PTRACE_BTS_CLEAR: 1097 case PTRACE_BTS_CLEAR:
1004 ret = ptrace_bts_clear(child); 1098 ret = ds_clear_bts(child);
1005 break; 1099 break;
1006 1100
1007 case PTRACE_BTS_DRAIN: 1101 case PTRACE_BTS_DRAIN:
1008 ret = ptrace_bts_drain 1102 ret = ptrace_bts_drain
1009 (child, data, (struct bts_struct __user *) addr); 1103 (child, data, (struct bts_struct __user *) addr);
1010 break; 1104 break;
1011#endif 1105#endif /* CONFIG_X86_PTRACE_BTS */
1012 1106
1013 default: 1107 default:
1014 ret = ptrace_request(child, request, addr, data); 1108 ret = ptrace_request(child, request, addr, data);
diff --git a/include/asm-x86/ds.h b/include/asm-x86/ds.h
index 6b27c686fa10..c3c953a45b21 100644
--- a/include/asm-x86/ds.h
+++ b/include/asm-x86/ds.h
@@ -2,71 +2,237 @@
2 * Debug Store (DS) support 2 * Debug Store (DS) support
3 * 3 *
4 * This provides a low-level interface to the hardware's Debug Store 4 * This provides a low-level interface to the hardware's Debug Store
5 * feature that is used for last branch recording (LBR) and 5 * feature that is used for branch trace store (BTS) and
6 * precise-event based sampling (PEBS). 6 * precise-event based sampling (PEBS).
7 * 7 *
8 * Different architectures use a different DS layout/pointer size. 8 * It manages:
9 * The below functions therefore work on a void*. 9 * - per-thread and per-cpu allocation of BTS and PEBS
10 * - buffer memory allocation (optional)
11 * - buffer overflow handling
12 * - buffer access
10 * 13 *
14 * It assumes:
15 * - get_task_struct on all parameter tasks
16 * - current is allowed to trace parameter tasks
11 * 17 *
12 * Since there is no user for PEBS, yet, only LBR (or branch
13 * trace store, BTS) is supported.
14 * 18 *
15 * 19 * Copyright (C) 2007-2008 Intel Corporation.
16 * Copyright (C) 2007 Intel Corporation. 20 * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008
17 * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007
18 */ 21 */
19 22
20#ifndef ASM_X86__DS_H 23#ifndef ASM_X86__DS_H
21#define ASM_X86__DS_H 24#define ASM_X86__DS_H
22 25
26#ifdef CONFIG_X86_DS
27
23#include <linux/types.h> 28#include <linux/types.h>
24#include <linux/init.h> 29#include <linux/init.h>
25 30
26struct cpuinfo_x86;
27 31
32struct task_struct;
28 33
29/* a branch trace record entry 34/*
35 * Request BTS or PEBS
36 *
37 * Due to alignement constraints, the actual buffer may be slightly
38 * smaller than the requested or provided buffer.
30 * 39 *
31 * In order to unify the interface between various processor versions, 40 * Returns 0 on success; -Eerrno otherwise
32 * we use the below data structure for all processors. 41 *
42 * task: the task to request recording for;
43 * NULL for per-cpu recording on the current cpu
44 * base: the base pointer for the (non-pageable) buffer;
45 * NULL if buffer allocation requested
46 * size: the size of the requested or provided buffer
47 * ovfl: pointer to a function to be called on buffer overflow;
48 * NULL if cyclic buffer requested
33 */ 49 */
34enum bts_qualifier { 50typedef void (*ds_ovfl_callback_t)(struct task_struct *);
35 BTS_INVALID = 0, 51extern int ds_request_bts(struct task_struct *task, void *base, size_t size,
36 BTS_BRANCH, 52 ds_ovfl_callback_t ovfl);
37 BTS_TASK_ARRIVES, 53extern int ds_request_pebs(struct task_struct *task, void *base, size_t size,
38 BTS_TASK_DEPARTS 54 ds_ovfl_callback_t ovfl);
39}; 55
56/*
57 * Release BTS or PEBS resources
58 *
59 * Frees buffers allocated on ds_request.
60 *
61 * Returns 0 on success; -Eerrno otherwise
62 *
63 * task: the task to release resources for;
64 * NULL to release resources for the current cpu
65 */
66extern int ds_release_bts(struct task_struct *task);
67extern int ds_release_pebs(struct task_struct *task);
68
69/*
70 * Return the (array) index of the write pointer.
71 * (assuming an array of BTS/PEBS records)
72 *
73 * Returns -Eerrno on error
74 *
75 * task: the task to access;
76 * NULL to access the current cpu
77 * pos (out): if not NULL, will hold the result
78 */
79extern int ds_get_bts_index(struct task_struct *task, size_t *pos);
80extern int ds_get_pebs_index(struct task_struct *task, size_t *pos);
81
82/*
83 * Return the (array) index one record beyond the end of the array.
84 * (assuming an array of BTS/PEBS records)
85 *
86 * Returns -Eerrno on error
87 *
88 * task: the task to access;
89 * NULL to access the current cpu
90 * pos (out): if not NULL, will hold the result
91 */
92extern int ds_get_bts_end(struct task_struct *task, size_t *pos);
93extern int ds_get_pebs_end(struct task_struct *task, size_t *pos);
94
95/*
96 * Provide a pointer to the BTS/PEBS record at parameter index.
97 * (assuming an array of BTS/PEBS records)
98 *
99 * The pointer points directly into the buffer. The user is
100 * responsible for copying the record.
101 *
102 * Returns the size of a single record on success; -Eerrno on error
103 *
104 * task: the task to access;
105 * NULL to access the current cpu
106 * index: the index of the requested record
107 * record (out): pointer to the requested record
108 */
109extern int ds_access_bts(struct task_struct *task,
110 size_t index, const void **record);
111extern int ds_access_pebs(struct task_struct *task,
112 size_t index, const void **record);
113
114/*
115 * Write one or more BTS/PEBS records at the write pointer index and
116 * advance the write pointer.
117 *
118 * If size is not a multiple of the record size, trailing bytes are
119 * zeroed out.
120 *
121 * May result in one or more overflow notifications.
122 *
123 * If called during overflow handling, that is, with index >=
124 * interrupt threshold, the write will wrap around.
125 *
126 * An overflow notification is given if and when the interrupt
127 * threshold is reached during or after the write.
128 *
129 * Returns the number of bytes written or -Eerrno.
130 *
131 * task: the task to access;
132 * NULL to access the current cpu
133 * buffer: the buffer to write
134 * size: the size of the buffer
135 */
136extern int ds_write_bts(struct task_struct *task,
137 const void *buffer, size_t size);
138extern int ds_write_pebs(struct task_struct *task,
139 const void *buffer, size_t size);
140
141/*
142 * Same as ds_write_bts/pebs, but omit ownership checks.
143 *
144 * This is needed to have some other task than the owner of the
145 * BTS/PEBS buffer or the parameter task itself write into the
146 * respective buffer.
147 */
148extern int ds_unchecked_write_bts(struct task_struct *task,
149 const void *buffer, size_t size);
150extern int ds_unchecked_write_pebs(struct task_struct *task,
151 const void *buffer, size_t size);
152
153/*
154 * Reset the write pointer of the BTS/PEBS buffer.
155 *
156 * Returns 0 on success; -Eerrno on error
157 *
158 * task: the task to access;
159 * NULL to access the current cpu
160 */
161extern int ds_reset_bts(struct task_struct *task);
162extern int ds_reset_pebs(struct task_struct *task);
163
164/*
165 * Clear the BTS/PEBS buffer and reset the write pointer.
166 * The entire buffer will be zeroed out.
167 *
168 * Returns 0 on success; -Eerrno on error
169 *
170 * task: the task to access;
171 * NULL to access the current cpu
172 */
173extern int ds_clear_bts(struct task_struct *task);
174extern int ds_clear_pebs(struct task_struct *task);
175
176/*
177 * Provide the PEBS counter reset value.
178 *
179 * Returns 0 on success; -Eerrno on error
180 *
181 * task: the task to access;
182 * NULL to access the current cpu
183 * value (out): the counter reset value
184 */
185extern int ds_get_pebs_reset(struct task_struct *task, u64 *value);
186
187/*
188 * Set the PEBS counter reset value.
189 *
190 * Returns 0 on success; -Eerrno on error
191 *
192 * task: the task to access;
193 * NULL to access the current cpu
194 * value: the new counter reset value
195 */
196extern int ds_set_pebs_reset(struct task_struct *task, u64 value);
197
198/*
199 * Initialization
200 */
201struct cpuinfo_x86;
202extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *);
203
204
40 205
41struct bts_struct { 206/*
42 u64 qualifier; 207 * The DS context - part of struct thread_struct.
43 union { 208 */
44 /* BTS_BRANCH */ 209struct ds_context {
45 struct { 210 /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */
46 u64 from_ip; 211 unsigned char *ds;
47 u64 to_ip; 212 /* the owner of the BTS and PEBS configuration, respectively */
48 } lbr; 213 struct task_struct *owner[2];
49 /* BTS_TASK_ARRIVES or 214 /* buffer overflow notification function for BTS and PEBS */
50 BTS_TASK_DEPARTS */ 215 ds_ovfl_callback_t callback[2];
51 u64 jiffies; 216 /* the original buffer address */
52 } variant; 217 void *buffer[2];
218 /* the number of allocated pages for on-request allocated buffers */
219 unsigned int pages[2];
220 /* use count */
221 unsigned long count;
222 /* a pointer to the context location inside the thread_struct
223 * or the per_cpu context array */
224 struct ds_context **this;
225 /* a pointer to the task owning this context, or NULL, if the
226 * context is owned by a cpu */
227 struct task_struct *task;
53}; 228};
54 229
55/* Overflow handling mechanisms */ 230/* called by exit_thread() to free leftover contexts */
56#define DS_O_SIGNAL 1 /* send overflow signal */ 231extern void ds_free(struct ds_context *context);
57#define DS_O_WRAP 2 /* wrap around */ 232
58 233#else /* CONFIG_X86_DS */
59extern int ds_allocate(void **, size_t); 234
60extern int ds_free(void **); 235#define ds_init_intel(config) do {} while (0)
61extern int ds_get_bts_size(void *);
62extern int ds_get_bts_end(void *);
63extern int ds_get_bts_index(void *);
64extern int ds_set_overflow(void *, int);
65extern int ds_get_overflow(void *);
66extern int ds_clear(void *);
67extern int ds_read_bts(void *, int, struct bts_struct *);
68extern int ds_write_bts(void *, const struct bts_struct *);
69extern unsigned long ds_debugctl_mask(void);
70extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *c);
71 236
237#endif /* CONFIG_X86_DS */
72#endif /* ASM_X86__DS_H */ 238#endif /* ASM_X86__DS_H */
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index 62531ecbde16..c7d35464a4bb 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -20,6 +20,7 @@ struct mm_struct;
20#include <asm/msr.h> 20#include <asm/msr.h>
21#include <asm/desc_defs.h> 21#include <asm/desc_defs.h>
22#include <asm/nops.h> 22#include <asm/nops.h>
23#include <asm/ds.h>
23 24
24#include <linux/personality.h> 25#include <linux/personality.h>
25#include <linux/cpumask.h> 26#include <linux/cpumask.h>
@@ -430,9 +431,14 @@ struct thread_struct {
430 unsigned io_bitmap_max; 431 unsigned io_bitmap_max;
431/* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */ 432/* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */
432 unsigned long debugctlmsr; 433 unsigned long debugctlmsr;
433/* Debug Store - if not 0 points to a DS Save Area configuration; 434#ifdef CONFIG_X86_DS
434 * goes into MSR_IA32_DS_AREA */ 435/* Debug Store context; see include/asm-x86/ds.h; goes into MSR_IA32_DS_AREA */
435 unsigned long ds_area_msr; 436 struct ds_context *ds_ctx;
437#endif /* CONFIG_X86_DS */
438#ifdef CONFIG_X86_PTRACE_BTS
439/* the signal to send on a bts buffer overflow */
440 unsigned int bts_ovfl_signal;
441#endif /* CONFIG_X86_PTRACE_BTS */
436}; 442};
437 443
438static inline unsigned long native_get_debugreg(int regno) 444static inline unsigned long native_get_debugreg(int regno)
diff --git a/include/asm-x86/ptrace-abi.h b/include/asm-x86/ptrace-abi.h
index d0cf3344a586..4298b8882a78 100644
--- a/include/asm-x86/ptrace-abi.h
+++ b/include/asm-x86/ptrace-abi.h
@@ -80,8 +80,9 @@
80 80
81#define PTRACE_SINGLEBLOCK 33 /* resume execution until next branch */ 81#define PTRACE_SINGLEBLOCK 33 /* resume execution until next branch */
82 82
83#ifndef __ASSEMBLY__ 83#ifdef CONFIG_X86_PTRACE_BTS
84 84
85#ifndef __ASSEMBLY__
85#include <asm/types.h> 86#include <asm/types.h>
86 87
87/* configuration/status structure used in PTRACE_BTS_CONFIG and 88/* configuration/status structure used in PTRACE_BTS_CONFIG and
@@ -97,20 +98,20 @@ struct ptrace_bts_config {
97 /* actual size of bts_struct in bytes */ 98 /* actual size of bts_struct in bytes */
98 __u32 bts_size; 99 __u32 bts_size;
99}; 100};
100#endif 101#endif /* __ASSEMBLY__ */
101 102
102#define PTRACE_BTS_O_TRACE 0x1 /* branch trace */ 103#define PTRACE_BTS_O_TRACE 0x1 /* branch trace */
103#define PTRACE_BTS_O_SCHED 0x2 /* scheduling events w/ jiffies */ 104#define PTRACE_BTS_O_SCHED 0x2 /* scheduling events w/ jiffies */
104#define PTRACE_BTS_O_SIGNAL 0x4 /* send SIG<signal> on buffer overflow 105#define PTRACE_BTS_O_SIGNAL 0x4 /* send SIG<signal> on buffer overflow
105 instead of wrapping around */ 106 instead of wrapping around */
106#define PTRACE_BTS_O_CUT_SIZE 0x8 /* cut requested size to max available 107#define PTRACE_BTS_O_ALLOC 0x8 /* (re)allocate buffer */
107 instead of failing */
108 108
109#define PTRACE_BTS_CONFIG 40 109#define PTRACE_BTS_CONFIG 40
110/* Configure branch trace recording. 110/* Configure branch trace recording.
111 ADDR points to a struct ptrace_bts_config. 111 ADDR points to a struct ptrace_bts_config.
112 DATA gives the size of that buffer. 112 DATA gives the size of that buffer.
113 A new buffer is allocated, iff the size changes. 113 A new buffer is allocated, if requested in the flags.
114 An overflow signal may only be requested for new buffers.
114 Returns the number of bytes read. 115 Returns the number of bytes read.
115*/ 116*/
116#define PTRACE_BTS_STATUS 41 117#define PTRACE_BTS_STATUS 41
@@ -119,7 +120,7 @@ struct ptrace_bts_config {
119 Returns the number of bytes written. 120 Returns the number of bytes written.
120*/ 121*/
121#define PTRACE_BTS_SIZE 42 122#define PTRACE_BTS_SIZE 42
122/* Return the number of available BTS records. 123/* Return the number of available BTS records for draining.
123 DATA and ADDR are ignored. 124 DATA and ADDR are ignored.
124*/ 125*/
125#define PTRACE_BTS_GET 43 126#define PTRACE_BTS_GET 43
@@ -139,5 +140,6 @@ struct ptrace_bts_config {
139 BTS records are read from oldest to newest. 140 BTS records are read from oldest to newest.
140 Returns number of BTS records drained. 141 Returns number of BTS records drained.
141*/ 142*/
143#endif /* CONFIG_X86_PTRACE_BTS */
142 144
143#endif /* ASM_X86__PTRACE_ABI_H */ 145#endif /* ASM_X86__PTRACE_ABI_H */
diff --git a/include/asm-x86/ptrace.h b/include/asm-x86/ptrace.h
index 66ff7bd47379..33a034be8b5c 100644
--- a/include/asm-x86/ptrace.h
+++ b/include/asm-x86/ptrace.h
@@ -127,14 +127,48 @@ struct pt_regs {
127#endif /* __KERNEL__ */ 127#endif /* __KERNEL__ */
128#endif /* !__i386__ */ 128#endif /* !__i386__ */
129 129
130
131#ifdef CONFIG_X86_PTRACE_BTS
132/* a branch trace record entry
133 *
134 * In order to unify the interface between various processor versions,
135 * we use the below data structure for all processors.
136 */
137enum bts_qualifier {
138 BTS_INVALID = 0,
139 BTS_BRANCH,
140 BTS_TASK_ARRIVES,
141 BTS_TASK_DEPARTS
142};
143
144struct bts_struct {
145 __u64 qualifier;
146 union {
147 /* BTS_BRANCH */
148 struct {
149 __u64 from_ip;
150 __u64 to_ip;
151 } lbr;
152 /* BTS_TASK_ARRIVES or
153 BTS_TASK_DEPARTS */
154 __u64 jiffies;
155 } variant;
156};
157#endif /* CONFIG_X86_PTRACE_BTS */
158
130#ifdef __KERNEL__ 159#ifdef __KERNEL__
131 160
132/* the DS BTS struct is used for ptrace as well */ 161#include <linux/init.h>
133#include <asm/ds.h>
134 162
163struct cpuinfo_x86;
135struct task_struct; 164struct task_struct;
136 165
166#ifdef CONFIG_X86_PTRACE_BTS
167extern void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *);
137extern void ptrace_bts_take_timestamp(struct task_struct *, enum bts_qualifier); 168extern void ptrace_bts_take_timestamp(struct task_struct *, enum bts_qualifier);
169#else
170#define ptrace_bts_init_intel(config) do {} while (0)
171#endif /* CONFIG_X86_PTRACE_BTS */
138 172
139extern unsigned long profile_pc(struct pt_regs *regs); 173extern unsigned long profile_pc(struct pt_regs *regs);
140 174