aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-09-10 02:20:51 -0400
committerIngo Molnar <mingo@elte.hu>2008-09-10 02:20:51 -0400
commit81faaae45701484bd7368336e02f2a846153b22f (patch)
treed7d8dcafe5bb22decb0024537478fb667b0c896b /arch/x86/kernel
parentf69feff720497237ae9dd2f4604921bd3080c421 (diff)
parent3c9339049df5cc3a468c11de6c4101a1ea8c3d83 (diff)
Merge branch 'x86/pebs' into x86/unify-cpu-detect
Conflicts: arch/x86/Kconfig.cpu include/asm-x86/ds.h Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/cpu/intel.c3
-rw-r--r--arch/x86/kernel/ds.c954
-rw-r--r--arch/x86/kernel/process_32.c50
-rw-r--r--arch/x86/kernel/process_64.c38
-rw-r--r--arch/x86/kernel/ptrace.c444
5 files changed, 1018 insertions, 471 deletions
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 4a8ac9d4ff50..a66989586a84 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -221,10 +221,11 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
221 set_cpu_cap(c, X86_FEATURE_BTS); 221 set_cpu_cap(c, X86_FEATURE_BTS);
222 if (!(l1 & (1<<12))) 222 if (!(l1 & (1<<12)))
223 set_cpu_cap(c, X86_FEATURE_PEBS); 223 set_cpu_cap(c, X86_FEATURE_PEBS);
224 ds_init_intel(c);
224 } 225 }
225 226
226 if (cpu_has_bts) 227 if (cpu_has_bts)
227 ds_init_intel(c); 228 ptrace_bts_init_intel(c);
228 229
229 /* 230 /*
230 * See if we have a good local APIC by checking for buggy Pentia, 231 * See if we have a good local APIC by checking for buggy Pentia,
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 11c11b8ec48d..ab21c270bfa4 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -2,26 +2,49 @@
2 * Debug Store support 2 * Debug Store support
3 * 3 *
4 * This provides a low-level interface to the hardware's Debug Store 4 * This provides a low-level interface to the hardware's Debug Store
5 * feature that is used for last branch recording (LBR) and 5 * feature that is used for branch trace store (BTS) and
6 * precise-event based sampling (PEBS). 6 * precise-event based sampling (PEBS).
7 * 7 *
8 * Different architectures use a different DS layout/pointer size. 8 * It manages:
9 * The below functions therefore work on a void*. 9 * - per-thread and per-cpu allocation of BTS and PEBS
10 * - buffer memory allocation (optional)
11 * - buffer overflow handling
12 * - buffer access
10 * 13 *
14 * It assumes:
15 * - get_task_struct on all parameter tasks
16 * - current is allowed to trace parameter tasks
11 * 17 *
12 * Since there is no user for PEBS, yet, only LBR (or branch
13 * trace store, BTS) is supported.
14 * 18 *
15 * 19 * Copyright (C) 2007-2008 Intel Corporation.
16 * Copyright (C) 2007 Intel Corporation. 20 * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008
17 * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007
18 */ 21 */
19 22
23
24#ifdef CONFIG_X86_DS
25
20#include <asm/ds.h> 26#include <asm/ds.h>
21 27
22#include <linux/errno.h> 28#include <linux/errno.h>
23#include <linux/string.h> 29#include <linux/string.h>
24#include <linux/slab.h> 30#include <linux/slab.h>
31#include <linux/sched.h>
32#include <linux/mm.h>
33
34
35/*
36 * The configuration for a particular DS hardware implementation.
37 */
38struct ds_configuration {
39 /* the size of the DS structure in bytes */
40 unsigned char sizeof_ds;
41 /* the size of one pointer-typed field in the DS structure in bytes;
42 this covers the first 8 fields related to buffer management. */
43 unsigned char sizeof_field;
44 /* the size of a BTS/PEBS record in bytes */
45 unsigned char sizeof_rec[2];
46};
47static struct ds_configuration ds_cfg;
25 48
26 49
27/* 50/*
@@ -44,378 +67,747 @@
44 * (interrupt occurs when write pointer passes interrupt pointer) 67 * (interrupt occurs when write pointer passes interrupt pointer)
45 * - value to which counter is reset following counter overflow 68 * - value to which counter is reset following counter overflow
46 * 69 *
47 * On later architectures, the last branch recording hardware uses 70 * Later architectures use 64bit pointers throughout, whereas earlier
48 * 64bit pointers even in 32bit mode. 71 * architectures use 32bit pointers in 32bit mode.
49 *
50 *
51 * Branch Trace Store (BTS) records store information about control
52 * flow changes. They at least provide the following information:
53 * - source linear address
54 * - destination linear address
55 * 72 *
56 * Netburst supported a predicated bit that had been dropped in later
57 * architectures. We do not suppor it.
58 * 73 *
74 * We compute the base address for the first 8 fields based on:
75 * - the field size stored in the DS configuration
76 * - the relative field position
77 * - an offset giving the start of the respective region
59 * 78 *
60 * In order to abstract from the actual DS and BTS layout, we describe 79 * This offset is further used to index various arrays holding
61 * the access to the relevant fields. 80 * information for BTS and PEBS at the respective index.
62 * Thanks to Andi Kleen for proposing this design.
63 * 81 *
64 * The implementation, however, is not as general as it might seem. In 82 * On later 32bit processors, we only access the lower 32bit of the
65 * order to stay somewhat simple and efficient, we assume an 83 * 64bit pointer fields. The upper halves will be zeroed out.
66 * underlying unsigned type (mostly a pointer type) and we expect the
67 * field to be at least as big as that type.
68 */ 84 */
69 85
70/* 86enum ds_field {
71 * A special from_ip address to indicate that the BTS record is an 87 ds_buffer_base = 0,
72 * info record that needs to be interpreted or skipped. 88 ds_index,
73 */ 89 ds_absolute_maximum,
74#define BTS_ESCAPE_ADDRESS (-1) 90 ds_interrupt_threshold,
91};
75 92
76/* 93enum ds_qualifier {
77 * A field access descriptor 94 ds_bts = 0,
78 */ 95 ds_pebs
79struct access_desc {
80 unsigned char offset;
81 unsigned char size;
82}; 96};
83 97
98static inline unsigned long ds_get(const unsigned char *base,
99 enum ds_qualifier qual, enum ds_field field)
100{
101 base += (ds_cfg.sizeof_field * (field + (4 * qual)));
102 return *(unsigned long *)base;
103}
104
105static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
106 enum ds_field field, unsigned long value)
107{
108 base += (ds_cfg.sizeof_field * (field + (4 * qual)));
109 (*(unsigned long *)base) = value;
110}
111
112
84/* 113/*
85 * The configuration for a particular DS/BTS hardware implementation. 114 * Locking is done only for allocating BTS or PEBS resources and for
115 * guarding context and buffer memory allocation.
116 *
117 * Most functions require the current task to own the ds context part
118 * they are going to access. All the locking is done when validating
119 * access to the context.
86 */ 120 */
87struct ds_configuration { 121static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock);
88 /* the DS configuration */
89 unsigned char sizeof_ds;
90 struct access_desc bts_buffer_base;
91 struct access_desc bts_index;
92 struct access_desc bts_absolute_maximum;
93 struct access_desc bts_interrupt_threshold;
94 /* the BTS configuration */
95 unsigned char sizeof_bts;
96 struct access_desc from_ip;
97 struct access_desc to_ip;
98 /* BTS variants used to store additional information like
99 timestamps */
100 struct access_desc info_type;
101 struct access_desc info_data;
102 unsigned long debugctl_mask;
103};
104 122
105/* 123/*
106 * The global configuration used by the below accessor functions 124 * Validate that the current task is allowed to access the BTS/PEBS
125 * buffer of the parameter task.
126 *
127 * Returns 0, if access is granted; -Eerrno, otherwise.
107 */ 128 */
108static struct ds_configuration ds_cfg; 129static inline int ds_validate_access(struct ds_context *context,
130 enum ds_qualifier qual)
131{
132 if (!context)
133 return -EPERM;
134
135 if (context->owner[qual] == current)
136 return 0;
137
138 return -EPERM;
139}
140
109 141
110/* 142/*
111 * Accessor functions for some DS and BTS fields using the above 143 * We either support (system-wide) per-cpu or per-thread allocation.
112 * global ptrace_bts_cfg. 144 * We distinguish the two based on the task_struct pointer, where a
145 * NULL pointer indicates per-cpu allocation for the current cpu.
146 *
147 * Allocations are use-counted. As soon as resources are allocated,
148 * further allocations must be of the same type (per-cpu or
149 * per-thread). We model this by counting allocations (i.e. the number
150 * of tracers of a certain type) for one type negatively:
151 * =0 no tracers
152 * >0 number of per-thread tracers
153 * <0 number of per-cpu tracers
154 *
155 * The below functions to get and put tracers and to check the
156 * allocation type require the ds_lock to be held by the caller.
157 *
158 * Tracers essentially gives the number of ds contexts for a certain
159 * type of allocation.
113 */ 160 */
114static inline unsigned long get_bts_buffer_base(char *base) 161static long tracers;
162
163static inline void get_tracer(struct task_struct *task)
115{ 164{
116 return *(unsigned long *)(base + ds_cfg.bts_buffer_base.offset); 165 tracers += (task ? 1 : -1);
117} 166}
118static inline void set_bts_buffer_base(char *base, unsigned long value) 167
168static inline void put_tracer(struct task_struct *task)
119{ 169{
120 (*(unsigned long *)(base + ds_cfg.bts_buffer_base.offset)) = value; 170 tracers -= (task ? 1 : -1);
121} 171}
122static inline unsigned long get_bts_index(char *base) 172
173static inline int check_tracer(struct task_struct *task)
123{ 174{
124 return *(unsigned long *)(base + ds_cfg.bts_index.offset); 175 return (task ? (tracers >= 0) : (tracers <= 0));
125} 176}
126static inline void set_bts_index(char *base, unsigned long value) 177
178
179/*
180 * The DS context is either attached to a thread or to a cpu:
181 * - in the former case, the thread_struct contains a pointer to the
182 * attached context.
183 * - in the latter case, we use a static array of per-cpu context
184 * pointers.
185 *
186 * Contexts are use-counted. They are allocated on first access and
187 * deallocated when the last user puts the context.
188 *
189 * We distinguish between an allocating and a non-allocating get of a
190 * context:
191 * - the allocating get is used for requesting BTS/PEBS resources. It
192 * requires the caller to hold the global ds_lock.
193 * - the non-allocating get is used for all other cases. A
194 * non-existing context indicates an error. It acquires and releases
195 * the ds_lock itself for obtaining the context.
196 *
197 * A context and its DS configuration are allocated and deallocated
198 * together. A context always has a DS configuration of the
199 * appropriate size.
200 */
201static DEFINE_PER_CPU(struct ds_context *, system_context);
202
203#define this_system_context per_cpu(system_context, smp_processor_id())
204
205/*
206 * Returns the pointer to the parameter task's context or to the
207 * system-wide context, if task is NULL.
208 *
209 * Increases the use count of the returned context, if not NULL.
210 */
211static inline struct ds_context *ds_get_context(struct task_struct *task)
127{ 212{
128 (*(unsigned long *)(base + ds_cfg.bts_index.offset)) = value; 213 struct ds_context *context;
214
215 spin_lock(&ds_lock);
216
217 context = (task ? task->thread.ds_ctx : this_system_context);
218 if (context)
219 context->count++;
220
221 spin_unlock(&ds_lock);
222
223 return context;
129} 224}
130static inline unsigned long get_bts_absolute_maximum(char *base) 225
226/*
227 * Same as ds_get_context, but allocates the context and it's DS
228 * structure, if necessary; returns NULL; if out of memory.
229 *
230 * pre: requires ds_lock to be held
231 */
232static inline struct ds_context *ds_alloc_context(struct task_struct *task)
131{ 233{
132 return *(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset); 234 struct ds_context **p_context =
235 (task ? &task->thread.ds_ctx : &this_system_context);
236 struct ds_context *context = *p_context;
237
238 if (!context) {
239 context = kzalloc(sizeof(*context), GFP_KERNEL);
240
241 if (!context)
242 return NULL;
243
244 context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL);
245 if (!context->ds) {
246 kfree(context);
247 return NULL;
248 }
249
250 *p_context = context;
251
252 context->this = p_context;
253 context->task = task;
254
255 if (task)
256 set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
257
258 if (!task || (task == current))
259 wrmsr(MSR_IA32_DS_AREA, (unsigned long)context->ds, 0);
260
261 get_tracer(task);
262 }
263
264 context->count++;
265
266 return context;
133} 267}
134static inline void set_bts_absolute_maximum(char *base, unsigned long value) 268
269/*
270 * Decreases the use count of the parameter context, if not NULL.
271 * Deallocates the context, if the use count reaches zero.
272 */
273static inline void ds_put_context(struct ds_context *context)
135{ 274{
136 (*(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset)) = value; 275 if (!context)
276 return;
277
278 spin_lock(&ds_lock);
279
280 if (--context->count)
281 goto out;
282
283 *(context->this) = NULL;
284
285 if (context->task)
286 clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
287
288 if (!context->task || (context->task == current))
289 wrmsrl(MSR_IA32_DS_AREA, 0);
290
291 put_tracer(context->task);
292
293 /* free any leftover buffers from tracers that did not
294 * deallocate them properly. */
295 kfree(context->buffer[ds_bts]);
296 kfree(context->buffer[ds_pebs]);
297 kfree(context->ds);
298 kfree(context);
299 out:
300 spin_unlock(&ds_lock);
137} 301}
138static inline unsigned long get_bts_interrupt_threshold(char *base) 302
303
304/*
305 * Handle a buffer overflow
306 *
307 * task: the task whose buffers are overflowing;
308 * NULL for a buffer overflow on the current cpu
309 * context: the ds context
310 * qual: the buffer type
311 */
312static void ds_overflow(struct task_struct *task, struct ds_context *context,
313 enum ds_qualifier qual)
139{ 314{
140 return *(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset); 315 if (!context)
316 return;
317
318 if (context->callback[qual])
319 (*context->callback[qual])(task);
320
321 /* todo: do some more overflow handling */
141} 322}
142static inline void set_bts_interrupt_threshold(char *base, unsigned long value) 323
324
325/*
326 * Allocate a non-pageable buffer of the parameter size.
327 * Checks the memory and the locked memory rlimit.
328 *
329 * Returns the buffer, if successful;
330 * NULL, if out of memory or rlimit exceeded.
331 *
332 * size: the requested buffer size in bytes
333 * pages (out): if not NULL, contains the number of pages reserved
334 */
335static inline void *ds_allocate_buffer(size_t size, unsigned int *pages)
143{ 336{
144 (*(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset)) = value; 337 unsigned long rlim, vm, pgsz;
338 void *buffer;
339
340 pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
341
342 rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
343 vm = current->mm->total_vm + pgsz;
344 if (rlim < vm)
345 return NULL;
346
347 rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
348 vm = current->mm->locked_vm + pgsz;
349 if (rlim < vm)
350 return NULL;
351
352 buffer = kzalloc(size, GFP_KERNEL);
353 if (!buffer)
354 return NULL;
355
356 current->mm->total_vm += pgsz;
357 current->mm->locked_vm += pgsz;
358
359 if (pages)
360 *pages = pgsz;
361
362 return buffer;
145} 363}
146static inline unsigned long get_from_ip(char *base) 364
365static int ds_request(struct task_struct *task, void *base, size_t size,
366 ds_ovfl_callback_t ovfl, enum ds_qualifier qual)
147{ 367{
148 return *(unsigned long *)(base + ds_cfg.from_ip.offset); 368 struct ds_context *context;
369 unsigned long buffer, adj;
370 const unsigned long alignment = (1 << 3);
371 int error = 0;
372
373 if (!ds_cfg.sizeof_ds)
374 return -EOPNOTSUPP;
375
376 /* we require some space to do alignment adjustments below */
377 if (size < (alignment + ds_cfg.sizeof_rec[qual]))
378 return -EINVAL;
379
380 /* buffer overflow notification is not yet implemented */
381 if (ovfl)
382 return -EOPNOTSUPP;
383
384
385 spin_lock(&ds_lock);
386
387 if (!check_tracer(task))
388 return -EPERM;
389
390 error = -ENOMEM;
391 context = ds_alloc_context(task);
392 if (!context)
393 goto out_unlock;
394
395 error = -EALREADY;
396 if (context->owner[qual] == current)
397 goto out_unlock;
398 error = -EPERM;
399 if (context->owner[qual] != NULL)
400 goto out_unlock;
401 context->owner[qual] = current;
402
403 spin_unlock(&ds_lock);
404
405
406 error = -ENOMEM;
407 if (!base) {
408 base = ds_allocate_buffer(size, &context->pages[qual]);
409 if (!base)
410 goto out_release;
411
412 context->buffer[qual] = base;
413 }
414 error = 0;
415
416 context->callback[qual] = ovfl;
417
418 /* adjust the buffer address and size to meet alignment
419 * constraints:
420 * - buffer is double-word aligned
421 * - size is multiple of record size
422 *
423 * We checked the size at the very beginning; we have enough
424 * space to do the adjustment.
425 */
426 buffer = (unsigned long)base;
427
428 adj = ALIGN(buffer, alignment) - buffer;
429 buffer += adj;
430 size -= adj;
431
432 size /= ds_cfg.sizeof_rec[qual];
433 size *= ds_cfg.sizeof_rec[qual];
434
435 ds_set(context->ds, qual, ds_buffer_base, buffer);
436 ds_set(context->ds, qual, ds_index, buffer);
437 ds_set(context->ds, qual, ds_absolute_maximum, buffer + size);
438
439 if (ovfl) {
440 /* todo: select a suitable interrupt threshold */
441 } else
442 ds_set(context->ds, qual,
443 ds_interrupt_threshold, buffer + size + 1);
444
445 /* we keep the context until ds_release */
446 return error;
447
448 out_release:
449 context->owner[qual] = NULL;
450 ds_put_context(context);
451 return error;
452
453 out_unlock:
454 spin_unlock(&ds_lock);
455 ds_put_context(context);
456 return error;
149} 457}
150static inline void set_from_ip(char *base, unsigned long value) 458
459int ds_request_bts(struct task_struct *task, void *base, size_t size,
460 ds_ovfl_callback_t ovfl)
151{ 461{
152 (*(unsigned long *)(base + ds_cfg.from_ip.offset)) = value; 462 return ds_request(task, base, size, ovfl, ds_bts);
153} 463}
154static inline unsigned long get_to_ip(char *base) 464
465int ds_request_pebs(struct task_struct *task, void *base, size_t size,
466 ds_ovfl_callback_t ovfl)
155{ 467{
156 return *(unsigned long *)(base + ds_cfg.to_ip.offset); 468 return ds_request(task, base, size, ovfl, ds_pebs);
157} 469}
158static inline void set_to_ip(char *base, unsigned long value) 470
471static int ds_release(struct task_struct *task, enum ds_qualifier qual)
159{ 472{
160 (*(unsigned long *)(base + ds_cfg.to_ip.offset)) = value; 473 struct ds_context *context;
474 int error;
475
476 context = ds_get_context(task);
477 error = ds_validate_access(context, qual);
478 if (error < 0)
479 goto out;
480
481 kfree(context->buffer[qual]);
482 context->buffer[qual] = 0;
483
484 current->mm->total_vm -= context->pages[qual];
485 current->mm->locked_vm -= context->pages[qual];
486 context->pages[qual] = 0;
487 context->owner[qual] = 0;
488
489 /*
490 * we put the context twice:
491 * once for the ds_get_context
492 * once for the corresponding ds_request
493 */
494 ds_put_context(context);
495 out:
496 ds_put_context(context);
497 return error;
161} 498}
162static inline unsigned char get_info_type(char *base) 499
500int ds_release_bts(struct task_struct *task)
163{ 501{
164 return *(unsigned char *)(base + ds_cfg.info_type.offset); 502 return ds_release(task, ds_bts);
165} 503}
166static inline void set_info_type(char *base, unsigned char value) 504
505int ds_release_pebs(struct task_struct *task)
167{ 506{
168 (*(unsigned char *)(base + ds_cfg.info_type.offset)) = value; 507 return ds_release(task, ds_pebs);
169} 508}
170static inline unsigned long get_info_data(char *base) 509
510static int ds_get_index(struct task_struct *task, size_t *pos,
511 enum ds_qualifier qual)
171{ 512{
172 return *(unsigned long *)(base + ds_cfg.info_data.offset); 513 struct ds_context *context;
514 unsigned long base, index;
515 int error;
516
517 context = ds_get_context(task);
518 error = ds_validate_access(context, qual);
519 if (error < 0)
520 goto out;
521
522 base = ds_get(context->ds, qual, ds_buffer_base);
523 index = ds_get(context->ds, qual, ds_index);
524
525 error = ((index - base) / ds_cfg.sizeof_rec[qual]);
526 if (pos)
527 *pos = error;
528 out:
529 ds_put_context(context);
530 return error;
173} 531}
174static inline void set_info_data(char *base, unsigned long value) 532
533int ds_get_bts_index(struct task_struct *task, size_t *pos)
175{ 534{
176 (*(unsigned long *)(base + ds_cfg.info_data.offset)) = value; 535 return ds_get_index(task, pos, ds_bts);
177} 536}
178 537
538int ds_get_pebs_index(struct task_struct *task, size_t *pos)
539{
540 return ds_get_index(task, pos, ds_pebs);
541}
179 542
180int ds_allocate(void **dsp, size_t bts_size_in_bytes) 543static int ds_get_end(struct task_struct *task, size_t *pos,
544 enum ds_qualifier qual)
181{ 545{
182 size_t bts_size_in_records; 546 struct ds_context *context;
183 unsigned long bts; 547 unsigned long base, end;
184 void *ds; 548 int error;
549
550 context = ds_get_context(task);
551 error = ds_validate_access(context, qual);
552 if (error < 0)
553 goto out;
554
555 base = ds_get(context->ds, qual, ds_buffer_base);
556 end = ds_get(context->ds, qual, ds_absolute_maximum);
557
558 error = ((end - base) / ds_cfg.sizeof_rec[qual]);
559 if (pos)
560 *pos = error;
561 out:
562 ds_put_context(context);
563 return error;
564}
185 565
186 if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) 566int ds_get_bts_end(struct task_struct *task, size_t *pos)
187 return -EOPNOTSUPP; 567{
568 return ds_get_end(task, pos, ds_bts);
569}
188 570
189 if (bts_size_in_bytes < 0) 571int ds_get_pebs_end(struct task_struct *task, size_t *pos)
190 return -EINVAL; 572{
573 return ds_get_end(task, pos, ds_pebs);
574}
191 575
192 bts_size_in_records = 576static int ds_access(struct task_struct *task, size_t index,
193 bts_size_in_bytes / ds_cfg.sizeof_bts; 577 const void **record, enum ds_qualifier qual)
194 bts_size_in_bytes = 578{
195 bts_size_in_records * ds_cfg.sizeof_bts; 579 struct ds_context *context;
580 unsigned long base, idx;
581 int error;
196 582
197 if (bts_size_in_bytes <= 0) 583 if (!record)
198 return -EINVAL; 584 return -EINVAL;
199 585
200 bts = (unsigned long)kzalloc(bts_size_in_bytes, GFP_KERNEL); 586 context = ds_get_context(task);
201 587 error = ds_validate_access(context, qual);
202 if (!bts) 588 if (error < 0)
203 return -ENOMEM; 589 goto out;
204 590
205 ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); 591 base = ds_get(context->ds, qual, ds_buffer_base);
592 idx = base + (index * ds_cfg.sizeof_rec[qual]);
206 593
207 if (!ds) { 594 error = -EINVAL;
208 kfree((void *)bts); 595 if (idx > ds_get(context->ds, qual, ds_absolute_maximum))
209 return -ENOMEM; 596 goto out;
210 }
211
212 set_bts_buffer_base(ds, bts);
213 set_bts_index(ds, bts);
214 set_bts_absolute_maximum(ds, bts + bts_size_in_bytes);
215 set_bts_interrupt_threshold(ds, bts + bts_size_in_bytes + 1);
216 597
217 *dsp = ds; 598 *record = (const void *)idx;
218 return 0; 599 error = ds_cfg.sizeof_rec[qual];
600 out:
601 ds_put_context(context);
602 return error;
219} 603}
220 604
221int ds_free(void **dsp) 605int ds_access_bts(struct task_struct *task, size_t index, const void **record)
222{ 606{
223 if (*dsp) { 607 return ds_access(task, index, record, ds_bts);
224 kfree((void *)get_bts_buffer_base(*dsp));
225 kfree(*dsp);
226 *dsp = NULL;
227 }
228 return 0;
229} 608}
230 609
231int ds_get_bts_size(void *ds) 610int ds_access_pebs(struct task_struct *task, size_t index, const void **record)
232{ 611{
233 int size_in_bytes; 612 return ds_access(task, index, record, ds_pebs);
234
235 if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
236 return -EOPNOTSUPP;
237
238 if (!ds)
239 return 0;
240
241 size_in_bytes =
242 get_bts_absolute_maximum(ds) -
243 get_bts_buffer_base(ds);
244 return size_in_bytes;
245} 613}
246 614
247int ds_get_bts_end(void *ds) 615static int ds_write(struct task_struct *task, const void *record, size_t size,
616 enum ds_qualifier qual, int force)
248{ 617{
249 int size_in_bytes = ds_get_bts_size(ds); 618 struct ds_context *context;
250 619 int error;
251 if (size_in_bytes <= 0)
252 return size_in_bytes;
253 620
254 return size_in_bytes / ds_cfg.sizeof_bts; 621 if (!record)
255} 622 return -EINVAL;
256 623
257int ds_get_bts_index(void *ds) 624 error = -EPERM;
258{ 625 context = ds_get_context(task);
259 int index_offset_in_bytes; 626 if (!context)
627 goto out;
260 628
261 if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) 629 if (!force) {
262 return -EOPNOTSUPP; 630 error = ds_validate_access(context, qual);
631 if (error < 0)
632 goto out;
633 }
263 634
264 index_offset_in_bytes = 635 error = 0;
265 get_bts_index(ds) - 636 while (size) {
266 get_bts_buffer_base(ds); 637 unsigned long base, index, end, write_end, int_th;
638 unsigned long write_size, adj_write_size;
639
640 /*
641 * write as much as possible without producing an
642 * overflow interrupt.
643 *
644 * interrupt_threshold must either be
645 * - bigger than absolute_maximum or
646 * - point to a record between buffer_base and absolute_maximum
647 *
648 * index points to a valid record.
649 */
650 base = ds_get(context->ds, qual, ds_buffer_base);
651 index = ds_get(context->ds, qual, ds_index);
652 end = ds_get(context->ds, qual, ds_absolute_maximum);
653 int_th = ds_get(context->ds, qual, ds_interrupt_threshold);
654
655 write_end = min(end, int_th);
656
657 /* if we are already beyond the interrupt threshold,
658 * we fill the entire buffer */
659 if (write_end <= index)
660 write_end = end;
661
662 if (write_end <= index)
663 goto out;
664
665 write_size = min((unsigned long) size, write_end - index);
666 memcpy((void *)index, record, write_size);
667
668 record = (const char *)record + write_size;
669 size -= write_size;
670 error += write_size;
671
672 adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
673 adj_write_size *= ds_cfg.sizeof_rec[qual];
674
675 /* zero out trailing bytes */
676 memset((char *)index + write_size, 0,
677 adj_write_size - write_size);
678 index += adj_write_size;
679
680 if (index >= end)
681 index = base;
682 ds_set(context->ds, qual, ds_index, index);
683
684 if (index >= int_th)
685 ds_overflow(task, context, qual);
686 }
267 687
268 return index_offset_in_bytes / ds_cfg.sizeof_bts; 688 out:
689 ds_put_context(context);
690 return error;
269} 691}
270 692
271int ds_set_overflow(void *ds, int method) 693int ds_write_bts(struct task_struct *task, const void *record, size_t size)
272{ 694{
273 switch (method) { 695 return ds_write(task, record, size, ds_bts, /* force = */ 0);
274 case DS_O_SIGNAL:
275 return -EOPNOTSUPP;
276 case DS_O_WRAP:
277 return 0;
278 default:
279 return -EINVAL;
280 }
281} 696}
282 697
283int ds_get_overflow(void *ds) 698int ds_write_pebs(struct task_struct *task, const void *record, size_t size)
284{ 699{
285 return DS_O_WRAP; 700 return ds_write(task, record, size, ds_pebs, /* force = */ 0);
286} 701}
287 702
288int ds_clear(void *ds) 703int ds_unchecked_write_bts(struct task_struct *task,
704 const void *record, size_t size)
289{ 705{
290 int bts_size = ds_get_bts_size(ds); 706 return ds_write(task, record, size, ds_bts, /* force = */ 1);
291 unsigned long bts_base;
292
293 if (bts_size <= 0)
294 return bts_size;
295
296 bts_base = get_bts_buffer_base(ds);
297 memset((void *)bts_base, 0, bts_size);
298
299 set_bts_index(ds, bts_base);
300 return 0;
301} 707}
302 708
303int ds_read_bts(void *ds, int index, struct bts_struct *out) 709int ds_unchecked_write_pebs(struct task_struct *task,
710 const void *record, size_t size)
304{ 711{
305 void *bts; 712 return ds_write(task, record, size, ds_pebs, /* force = */ 1);
713}
306 714
307 if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) 715static int ds_reset_or_clear(struct task_struct *task,
308 return -EOPNOTSUPP; 716 enum ds_qualifier qual, int clear)
717{
718 struct ds_context *context;
719 unsigned long base, end;
720 int error;
309 721
310 if (index < 0) 722 context = ds_get_context(task);
311 return -EINVAL; 723 error = ds_validate_access(context, qual);
724 if (error < 0)
725 goto out;
312 726
313 if (index >= ds_get_bts_size(ds)) 727 base = ds_get(context->ds, qual, ds_buffer_base);
314 return -EINVAL; 728 end = ds_get(context->ds, qual, ds_absolute_maximum);
315 729
316 bts = (void *)(get_bts_buffer_base(ds) + (index * ds_cfg.sizeof_bts)); 730 if (clear)
731 memset((void *)base, 0, end - base);
317 732
318 memset(out, 0, sizeof(*out)); 733 ds_set(context->ds, qual, ds_index, base);
319 if (get_from_ip(bts) == BTS_ESCAPE_ADDRESS) {
320 out->qualifier = get_info_type(bts);
321 out->variant.jiffies = get_info_data(bts);
322 } else {
323 out->qualifier = BTS_BRANCH;
324 out->variant.lbr.from_ip = get_from_ip(bts);
325 out->variant.lbr.to_ip = get_to_ip(bts);
326 }
327 734
328 return sizeof(*out);; 735 error = 0;
736 out:
737 ds_put_context(context);
738 return error;
329} 739}
330 740
331int ds_write_bts(void *ds, const struct bts_struct *in) 741int ds_reset_bts(struct task_struct *task)
332{ 742{
333 unsigned long bts; 743 return ds_reset_or_clear(task, ds_bts, /* clear = */ 0);
334 744}
335 if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
336 return -EOPNOTSUPP;
337
338 if (ds_get_bts_size(ds) <= 0)
339 return -ENXIO;
340 745
341 bts = get_bts_index(ds); 746int ds_reset_pebs(struct task_struct *task)
747{
748 return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0);
749}
342 750
343 memset((void *)bts, 0, ds_cfg.sizeof_bts); 751int ds_clear_bts(struct task_struct *task)
344 switch (in->qualifier) { 752{
345 case BTS_INVALID: 753 return ds_reset_or_clear(task, ds_bts, /* clear = */ 1);
346 break; 754}
347 755
348 case BTS_BRANCH: 756int ds_clear_pebs(struct task_struct *task)
349 set_from_ip((void *)bts, in->variant.lbr.from_ip); 757{
350 set_to_ip((void *)bts, in->variant.lbr.to_ip); 758 return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1);
351 break; 759}
352 760
353 case BTS_TASK_ARRIVES: 761int ds_get_pebs_reset(struct task_struct *task, u64 *value)
354 case BTS_TASK_DEPARTS: 762{
355 set_from_ip((void *)bts, BTS_ESCAPE_ADDRESS); 763 struct ds_context *context;
356 set_info_type((void *)bts, in->qualifier); 764 int error;
357 set_info_data((void *)bts, in->variant.jiffies);
358 break;
359 765
360 default: 766 if (!value)
361 return -EINVAL; 767 return -EINVAL;
362 }
363 768
364 bts = bts + ds_cfg.sizeof_bts; 769 context = ds_get_context(task);
365 if (bts >= get_bts_absolute_maximum(ds)) 770 error = ds_validate_access(context, ds_pebs);
366 bts = get_bts_buffer_base(ds); 771 if (error < 0)
367 set_bts_index(ds, bts); 772 goto out;
368 773
369 return ds_cfg.sizeof_bts; 774 *value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8));
775
776 error = 0;
777 out:
778 ds_put_context(context);
779 return error;
370} 780}
371 781
372unsigned long ds_debugctl_mask(void) 782int ds_set_pebs_reset(struct task_struct *task, u64 value)
373{ 783{
374 return ds_cfg.debugctl_mask; 784 struct ds_context *context;
375} 785 int error;
376 786
377#ifdef __i386__ 787 context = ds_get_context(task);
378static const struct ds_configuration ds_cfg_netburst = { 788 error = ds_validate_access(context, ds_pebs);
379 .sizeof_ds = 9 * 4, 789 if (error < 0)
380 .bts_buffer_base = { 0, 4 }, 790 goto out;
381 .bts_index = { 4, 4 },
382 .bts_absolute_maximum = { 8, 4 },
383 .bts_interrupt_threshold = { 12, 4 },
384 .sizeof_bts = 3 * 4,
385 .from_ip = { 0, 4 },
386 .to_ip = { 4, 4 },
387 .info_type = { 4, 1 },
388 .info_data = { 8, 4 },
389 .debugctl_mask = (1<<2)|(1<<3)
390};
391 791
392static const struct ds_configuration ds_cfg_pentium_m = { 792 *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value;
393 .sizeof_ds = 9 * 4, 793
394 .bts_buffer_base = { 0, 4 }, 794 error = 0;
395 .bts_index = { 4, 4 }, 795 out:
396 .bts_absolute_maximum = { 8, 4 }, 796 ds_put_context(context);
397 .bts_interrupt_threshold = { 12, 4 }, 797 return error;
398 .sizeof_bts = 3 * 4, 798}
399 .from_ip = { 0, 4 }, 799
400 .to_ip = { 4, 4 }, 800static const struct ds_configuration ds_cfg_var = {
401 .info_type = { 4, 1 }, 801 .sizeof_ds = sizeof(long) * 12,
402 .info_data = { 8, 4 }, 802 .sizeof_field = sizeof(long),
403 .debugctl_mask = (1<<6)|(1<<7) 803 .sizeof_rec[ds_bts] = sizeof(long) * 3,
804 .sizeof_rec[ds_pebs] = sizeof(long) * 10
404}; 805};
405#endif /* _i386_ */ 806static const struct ds_configuration ds_cfg_64 = {
406 807 .sizeof_ds = 8 * 12,
407static const struct ds_configuration ds_cfg_core2 = { 808 .sizeof_field = 8,
408 .sizeof_ds = 9 * 8, 809 .sizeof_rec[ds_bts] = 8 * 3,
409 .bts_buffer_base = { 0, 8 }, 810 .sizeof_rec[ds_pebs] = 8 * 10
410 .bts_index = { 8, 8 },
411 .bts_absolute_maximum = { 16, 8 },
412 .bts_interrupt_threshold = { 24, 8 },
413 .sizeof_bts = 3 * 8,
414 .from_ip = { 0, 8 },
415 .to_ip = { 8, 8 },
416 .info_type = { 8, 1 },
417 .info_data = { 16, 8 },
418 .debugctl_mask = (1<<6)|(1<<7)|(1<<9)
419}; 811};
420 812
421static inline void 813static inline void
@@ -429,14 +821,13 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
429 switch (c->x86) { 821 switch (c->x86) {
430 case 0x6: 822 case 0x6:
431 switch (c->x86_model) { 823 switch (c->x86_model) {
432#ifdef __i386__
433 case 0xD: 824 case 0xD:
434 case 0xE: /* Pentium M */ 825 case 0xE: /* Pentium M */
435 ds_configure(&ds_cfg_pentium_m); 826 ds_configure(&ds_cfg_var);
436 break; 827 break;
437#endif /* _i386_ */
438 case 0xF: /* Core2 */ 828 case 0xF: /* Core2 */
439 ds_configure(&ds_cfg_core2); 829 case 0x1C: /* Atom */
830 ds_configure(&ds_cfg_64);
440 break; 831 break;
441 default: 832 default:
442 /* sorry, don't know about them */ 833 /* sorry, don't know about them */
@@ -445,13 +836,11 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
445 break; 836 break;
446 case 0xF: 837 case 0xF:
447 switch (c->x86_model) { 838 switch (c->x86_model) {
448#ifdef __i386__
449 case 0x0: 839 case 0x0:
450 case 0x1: 840 case 0x1:
451 case 0x2: /* Netburst */ 841 case 0x2: /* Netburst */
452 ds_configure(&ds_cfg_netburst); 842 ds_configure(&ds_cfg_var);
453 break; 843 break;
454#endif /* _i386_ */
455 default: 844 default:
456 /* sorry, don't know about them */ 845 /* sorry, don't know about them */
457 break; 846 break;
@@ -462,3 +851,14 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
462 break; 851 break;
463 } 852 }
464} 853}
854
855void ds_free(struct ds_context *context)
856{
857 /* This is called when the task owning the parameter context
858 * is dying. There should not be any user of that context left
859 * to disturb us, anymore. */
860 unsigned long leftovers = context->count;
861 while (leftovers--)
862 ds_put_context(context);
863}
864#endif /* CONFIG_X86_DS */
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 2c9abc95e026..1962d27c6b82 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -277,6 +277,14 @@ void exit_thread(void)
277 tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; 277 tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
278 put_cpu(); 278 put_cpu();
279 } 279 }
280#ifdef CONFIG_X86_DS
281 /* Free any DS contexts that have not been properly released. */
282 if (unlikely(current->thread.ds_ctx)) {
283 /* we clear debugctl to make sure DS is not used. */
284 update_debugctlmsr(0);
285 ds_free(current->thread.ds_ctx);
286 }
287#endif /* CONFIG_X86_DS */
280} 288}
281 289
282void flush_thread(void) 290void flush_thread(void)
@@ -438,6 +446,35 @@ int set_tsc_mode(unsigned int val)
438 return 0; 446 return 0;
439} 447}
440 448
449#ifdef CONFIG_X86_DS
450static int update_debugctl(struct thread_struct *prev,
451 struct thread_struct *next, unsigned long debugctl)
452{
453 unsigned long ds_prev = 0;
454 unsigned long ds_next = 0;
455
456 if (prev->ds_ctx)
457 ds_prev = (unsigned long)prev->ds_ctx->ds;
458 if (next->ds_ctx)
459 ds_next = (unsigned long)next->ds_ctx->ds;
460
461 if (ds_next != ds_prev) {
462 /* we clear debugctl to make sure DS
463 * is not in use when we change it */
464 debugctl = 0;
465 update_debugctlmsr(0);
466 wrmsr(MSR_IA32_DS_AREA, ds_next, 0);
467 }
468 return debugctl;
469}
470#else
471static int update_debugctl(struct thread_struct *prev,
472 struct thread_struct *next, unsigned long debugctl)
473{
474 return debugctl;
475}
476#endif /* CONFIG_X86_DS */
477
441static noinline void 478static noinline void
442__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, 479__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
443 struct tss_struct *tss) 480 struct tss_struct *tss)
@@ -448,14 +485,7 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
448 prev = &prev_p->thread; 485 prev = &prev_p->thread;
449 next = &next_p->thread; 486 next = &next_p->thread;
450 487
451 debugctl = prev->debugctlmsr; 488 debugctl = update_debugctl(prev, next, prev->debugctlmsr);
452 if (next->ds_area_msr != prev->ds_area_msr) {
453 /* we clear debugctl to make sure DS
454 * is not in use when we change it */
455 debugctl = 0;
456 update_debugctlmsr(0);
457 wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0);
458 }
459 489
460 if (next->debugctlmsr != debugctl) 490 if (next->debugctlmsr != debugctl)
461 update_debugctlmsr(next->debugctlmsr); 491 update_debugctlmsr(next->debugctlmsr);
@@ -479,13 +509,13 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
479 hard_enable_TSC(); 509 hard_enable_TSC();
480 } 510 }
481 511
482#ifdef X86_BTS 512#ifdef CONFIG_X86_PTRACE_BTS
483 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) 513 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
484 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); 514 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
485 515
486 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) 516 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
487 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); 517 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
488#endif 518#endif /* CONFIG_X86_PTRACE_BTS */
489 519
490 520
491 if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { 521 if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 00263c9e6500..08a9df08adba 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -239,6 +239,14 @@ void exit_thread(void)
239 t->io_bitmap_max = 0; 239 t->io_bitmap_max = 0;
240 put_cpu(); 240 put_cpu();
241 } 241 }
242#ifdef CONFIG_X86_DS
243 /* Free any DS contexts that have not been properly released. */
244 if (unlikely(t->ds_ctx)) {
245 /* we clear debugctl to make sure DS is not used. */
246 update_debugctlmsr(0);
247 ds_free(t->ds_ctx);
248 }
249#endif /* CONFIG_X86_DS */
242} 250}
243 251
244void flush_thread(void) 252void flush_thread(void)
@@ -472,13 +480,27 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
472 next = &next_p->thread; 480 next = &next_p->thread;
473 481
474 debugctl = prev->debugctlmsr; 482 debugctl = prev->debugctlmsr;
475 if (next->ds_area_msr != prev->ds_area_msr) { 483
476 /* we clear debugctl to make sure DS 484#ifdef CONFIG_X86_DS
477 * is not in use when we change it */ 485 {
478 debugctl = 0; 486 unsigned long ds_prev = 0, ds_next = 0;
479 update_debugctlmsr(0); 487
480 wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr); 488 if (prev->ds_ctx)
489 ds_prev = (unsigned long)prev->ds_ctx->ds;
490 if (next->ds_ctx)
491 ds_next = (unsigned long)next->ds_ctx->ds;
492
493 if (ds_next != ds_prev) {
494 /*
495 * We clear debugctl to make sure DS
496 * is not in use when we change it:
497 */
498 debugctl = 0;
499 update_debugctlmsr(0);
500 wrmsrl(MSR_IA32_DS_AREA, ds_next);
501 }
481 } 502 }
503#endif /* CONFIG_X86_DS */
482 504
483 if (next->debugctlmsr != debugctl) 505 if (next->debugctlmsr != debugctl)
484 update_debugctlmsr(next->debugctlmsr); 506 update_debugctlmsr(next->debugctlmsr);
@@ -516,13 +538,13 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
516 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); 538 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
517 } 539 }
518 540
519#ifdef X86_BTS 541#ifdef CONFIG_X86_PTRACE_BTS
520 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) 542 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
521 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); 543 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
522 544
523 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) 545 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
524 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); 546 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
525#endif 547#endif /* CONFIG_X86_PTRACE_BTS */
526} 548}
527 549
528/* 550/*
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index fc3e8dcd9da6..58ce4b50211b 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -554,45 +554,115 @@ static int ptrace_set_debugreg(struct task_struct *child,
554 return 0; 554 return 0;
555} 555}
556 556
557#ifdef X86_BTS 557#ifdef CONFIG_X86_PTRACE_BTS
558/*
559 * The configuration for a particular BTS hardware implementation.
560 */
561struct bts_configuration {
562 /* the size of a BTS record in bytes; at most BTS_MAX_RECORD_SIZE */
563 unsigned char sizeof_bts;
564 /* the size of a field in the BTS record in bytes */
565 unsigned char sizeof_field;
566 /* a bitmask to enable/disable BTS in DEBUGCTL MSR */
567 unsigned long debugctl_mask;
568};
569static struct bts_configuration bts_cfg;
570
571#define BTS_MAX_RECORD_SIZE (8 * 3)
572
573
574/*
575 * Branch Trace Store (BTS) uses the following format. Different
576 * architectures vary in the size of those fields.
577 * - source linear address
578 * - destination linear address
579 * - flags
580 *
581 * Later architectures use 64bit pointers throughout, whereas earlier
582 * architectures use 32bit pointers in 32bit mode.
583 *
584 * We compute the base address for the first 8 fields based on:
585 * - the field size stored in the DS configuration
586 * - the relative field position
587 *
588 * In order to store additional information in the BTS buffer, we use
589 * a special source address to indicate that the record requires
590 * special interpretation.
591 *
592 * Netburst indicated via a bit in the flags field whether the branch
593 * was predicted; this is ignored.
594 */
595
596enum bts_field {
597 bts_from = 0,
598 bts_to,
599 bts_flags,
600
601 bts_escape = (unsigned long)-1,
602 bts_qual = bts_to,
603 bts_jiffies = bts_flags
604};
558 605
559static int ptrace_bts_get_size(struct task_struct *child) 606static inline unsigned long bts_get(const char *base, enum bts_field field)
560{ 607{
561 if (!child->thread.ds_area_msr) 608 base += (bts_cfg.sizeof_field * field);
562 return -ENXIO; 609 return *(unsigned long *)base;
610}
611
612static inline void bts_set(char *base, enum bts_field field, unsigned long val)
613{
614 base += (bts_cfg.sizeof_field * field);;
615 (*(unsigned long *)base) = val;
616}
563 617
564 return ds_get_bts_index((void *)child->thread.ds_area_msr); 618/*
619 * Translate a BTS record from the raw format into the bts_struct format
620 *
621 * out (out): bts_struct interpretation
622 * raw: raw BTS record
623 */
624static void ptrace_bts_translate_record(struct bts_struct *out, const void *raw)
625{
626 memset(out, 0, sizeof(*out));
627 if (bts_get(raw, bts_from) == bts_escape) {
628 out->qualifier = bts_get(raw, bts_qual);
629 out->variant.jiffies = bts_get(raw, bts_jiffies);
630 } else {
631 out->qualifier = BTS_BRANCH;
632 out->variant.lbr.from_ip = bts_get(raw, bts_from);
633 out->variant.lbr.to_ip = bts_get(raw, bts_to);
634 }
565} 635}
566 636
567static int ptrace_bts_read_record(struct task_struct *child, 637static int ptrace_bts_read_record(struct task_struct *child, size_t index,
568 long index,
569 struct bts_struct __user *out) 638 struct bts_struct __user *out)
570{ 639{
571 struct bts_struct ret; 640 struct bts_struct ret;
572 int retval; 641 const void *bts_record;
573 int bts_end; 642 size_t bts_index, bts_end;
574 int bts_index; 643 int error;
575 644
576 if (!child->thread.ds_area_msr) 645 error = ds_get_bts_end(child, &bts_end);
577 return -ENXIO; 646 if (error < 0)
647 return error;
578 648
579 if (index < 0)
580 return -EINVAL;
581
582 bts_end = ds_get_bts_end((void *)child->thread.ds_area_msr);
583 if (bts_end <= index) 649 if (bts_end <= index)
584 return -EINVAL; 650 return -EINVAL;
585 651
652 error = ds_get_bts_index(child, &bts_index);
653 if (error < 0)
654 return error;
655
586 /* translate the ptrace bts index into the ds bts index */ 656 /* translate the ptrace bts index into the ds bts index */
587 bts_index = ds_get_bts_index((void *)child->thread.ds_area_msr); 657 bts_index += bts_end - (index + 1);
588 bts_index -= (index + 1); 658 if (bts_end <= bts_index)
589 if (bts_index < 0) 659 bts_index -= bts_end;
590 bts_index += bts_end; 660
661 error = ds_access_bts(child, bts_index, &bts_record);
662 if (error < 0)
663 return error;
591 664
592 retval = ds_read_bts((void *)child->thread.ds_area_msr, 665 ptrace_bts_translate_record(&ret, bts_record);
593 bts_index, &ret);
594 if (retval < 0)
595 return retval;
596 666
597 if (copy_to_user(out, &ret, sizeof(ret))) 667 if (copy_to_user(out, &ret, sizeof(ret)))
598 return -EFAULT; 668 return -EFAULT;
@@ -600,101 +670,106 @@ static int ptrace_bts_read_record(struct task_struct *child,
600 return sizeof(ret); 670 return sizeof(ret);
601} 671}
602 672
603static int ptrace_bts_clear(struct task_struct *child)
604{
605 if (!child->thread.ds_area_msr)
606 return -ENXIO;
607
608 return ds_clear((void *)child->thread.ds_area_msr);
609}
610
611static int ptrace_bts_drain(struct task_struct *child, 673static int ptrace_bts_drain(struct task_struct *child,
612 long size, 674 long size,
613 struct bts_struct __user *out) 675 struct bts_struct __user *out)
614{ 676{
615 int end, i; 677 struct bts_struct ret;
616 void *ds = (void *)child->thread.ds_area_msr; 678 const unsigned char *raw;
617 679 size_t end, i;
618 if (!ds) 680 int error;
619 return -ENXIO;
620 681
621 end = ds_get_bts_index(ds); 682 error = ds_get_bts_index(child, &end);
622 if (end <= 0) 683 if (error < 0)
623 return end; 684 return error;
624 685
625 if (size < (end * sizeof(struct bts_struct))) 686 if (size < (end * sizeof(struct bts_struct)))
626 return -EIO; 687 return -EIO;
627 688
628 for (i = 0; i < end; i++, out++) { 689 error = ds_access_bts(child, 0, (const void **)&raw);
629 struct bts_struct ret; 690 if (error < 0)
630 int retval; 691 return error;
631 692
632 retval = ds_read_bts(ds, i, &ret); 693 for (i = 0; i < end; i++, out++, raw += bts_cfg.sizeof_bts) {
633 if (retval < 0) 694 ptrace_bts_translate_record(&ret, raw);
634 return retval;
635 695
636 if (copy_to_user(out, &ret, sizeof(ret))) 696 if (copy_to_user(out, &ret, sizeof(ret)))
637 return -EFAULT; 697 return -EFAULT;
638 } 698 }
639 699
640 ds_clear(ds); 700 error = ds_clear_bts(child);
701 if (error < 0)
702 return error;
641 703
642 return end; 704 return end;
643} 705}
644 706
707static void ptrace_bts_ovfl(struct task_struct *child)
708{
709 send_sig(child->thread.bts_ovfl_signal, child, 0);
710}
711
645static int ptrace_bts_config(struct task_struct *child, 712static int ptrace_bts_config(struct task_struct *child,
646 long cfg_size, 713 long cfg_size,
647 const struct ptrace_bts_config __user *ucfg) 714 const struct ptrace_bts_config __user *ucfg)
648{ 715{
649 struct ptrace_bts_config cfg; 716 struct ptrace_bts_config cfg;
650 int bts_size, ret = 0; 717 int error = 0;
651 void *ds; 718
719 error = -EOPNOTSUPP;
720 if (!bts_cfg.sizeof_bts)
721 goto errout;
652 722
723 error = -EIO;
653 if (cfg_size < sizeof(cfg)) 724 if (cfg_size < sizeof(cfg))
654 return -EIO; 725 goto errout;
655 726
727 error = -EFAULT;
656 if (copy_from_user(&cfg, ucfg, sizeof(cfg))) 728 if (copy_from_user(&cfg, ucfg, sizeof(cfg)))
657 return -EFAULT; 729 goto errout;
658 730
659 if ((int)cfg.size < 0) 731 error = -EINVAL;
660 return -EINVAL; 732 if ((cfg.flags & PTRACE_BTS_O_SIGNAL) &&
733 !(cfg.flags & PTRACE_BTS_O_ALLOC))
734 goto errout;
661 735
662 bts_size = 0; 736 if (cfg.flags & PTRACE_BTS_O_ALLOC) {
663 ds = (void *)child->thread.ds_area_msr; 737 ds_ovfl_callback_t ovfl = 0;
664 if (ds) { 738 unsigned int sig = 0;
665 bts_size = ds_get_bts_size(ds); 739
666 if (bts_size < 0) 740 /* we ignore the error in case we were not tracing child */
667 return bts_size; 741 (void)ds_release_bts(child);
668 } 742
669 cfg.size = PAGE_ALIGN(cfg.size); 743 if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
744 if (!cfg.signal)
745 goto errout;
746
747 sig = cfg.signal;
748 ovfl = ptrace_bts_ovfl;
749 }
670 750
671 if (bts_size != cfg.size) { 751 error = ds_request_bts(child, /* base = */ 0, cfg.size, ovfl);
672 ret = ptrace_bts_realloc(child, cfg.size, 752 if (error < 0)
673 cfg.flags & PTRACE_BTS_O_CUT_SIZE);
674 if (ret < 0)
675 goto errout; 753 goto errout;
676 754
677 ds = (void *)child->thread.ds_area_msr; 755 child->thread.bts_ovfl_signal = sig;
678 } 756 }
679 757
680 if (cfg.flags & PTRACE_BTS_O_SIGNAL) 758 error = -EINVAL;
681 ret = ds_set_overflow(ds, DS_O_SIGNAL); 759 if (!child->thread.ds_ctx && cfg.flags)
682 else
683 ret = ds_set_overflow(ds, DS_O_WRAP);
684 if (ret < 0)
685 goto errout; 760 goto errout;
686 761
687 if (cfg.flags & PTRACE_BTS_O_TRACE) 762 if (cfg.flags & PTRACE_BTS_O_TRACE)
688 child->thread.debugctlmsr |= ds_debugctl_mask(); 763 child->thread.debugctlmsr |= bts_cfg.debugctl_mask;
689 else 764 else
690 child->thread.debugctlmsr &= ~ds_debugctl_mask(); 765 child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
691 766
692 if (cfg.flags & PTRACE_BTS_O_SCHED) 767 if (cfg.flags & PTRACE_BTS_O_SCHED)
693 set_tsk_thread_flag(child, TIF_BTS_TRACE_TS); 768 set_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
694 else 769 else
695 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); 770 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
696 771
697 ret = sizeof(cfg); 772 error = sizeof(cfg);
698 773
699out: 774out:
700 if (child->thread.debugctlmsr) 775 if (child->thread.debugctlmsr)
@@ -702,10 +777,10 @@ out:
702 else 777 else
703 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); 778 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
704 779
705 return ret; 780 return error;
706 781
707errout: 782errout:
708 child->thread.debugctlmsr &= ~ds_debugctl_mask(); 783 child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
709 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); 784 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
710 goto out; 785 goto out;
711} 786}
@@ -714,29 +789,40 @@ static int ptrace_bts_status(struct task_struct *child,
714 long cfg_size, 789 long cfg_size,
715 struct ptrace_bts_config __user *ucfg) 790 struct ptrace_bts_config __user *ucfg)
716{ 791{
717 void *ds = (void *)child->thread.ds_area_msr;
718 struct ptrace_bts_config cfg; 792 struct ptrace_bts_config cfg;
793 size_t end;
794 const void *base, *max;
795 int error;
719 796
720 if (cfg_size < sizeof(cfg)) 797 if (cfg_size < sizeof(cfg))
721 return -EIO; 798 return -EIO;
722 799
723 memset(&cfg, 0, sizeof(cfg)); 800 error = ds_get_bts_end(child, &end);
801 if (error < 0)
802 return error;
724 803
725 if (ds) { 804 error = ds_access_bts(child, /* index = */ 0, &base);
726 cfg.size = ds_get_bts_size(ds); 805 if (error < 0)
806 return error;
727 807
728 if (ds_get_overflow(ds) == DS_O_SIGNAL) 808 error = ds_access_bts(child, /* index = */ end, &max);
729 cfg.flags |= PTRACE_BTS_O_SIGNAL; 809 if (error < 0)
810 return error;
730 811
731 if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) && 812 memset(&cfg, 0, sizeof(cfg));
732 child->thread.debugctlmsr & ds_debugctl_mask()) 813 cfg.size = (max - base);
733 cfg.flags |= PTRACE_BTS_O_TRACE; 814 cfg.signal = child->thread.bts_ovfl_signal;
815 cfg.bts_size = sizeof(struct bts_struct);
734 816
735 if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS)) 817 if (cfg.signal)
736 cfg.flags |= PTRACE_BTS_O_SCHED; 818 cfg.flags |= PTRACE_BTS_O_SIGNAL;
737 }
738 819
739 cfg.bts_size = sizeof(struct bts_struct); 820 if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) &&
821 child->thread.debugctlmsr & bts_cfg.debugctl_mask)
822 cfg.flags |= PTRACE_BTS_O_TRACE;
823
824 if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS))
825 cfg.flags |= PTRACE_BTS_O_SCHED;
740 826
741 if (copy_to_user(ucfg, &cfg, sizeof(cfg))) 827 if (copy_to_user(ucfg, &cfg, sizeof(cfg)))
742 return -EFAULT; 828 return -EFAULT;
@@ -744,89 +830,38 @@ static int ptrace_bts_status(struct task_struct *child,
744 return sizeof(cfg); 830 return sizeof(cfg);
745} 831}
746 832
747
748static int ptrace_bts_write_record(struct task_struct *child, 833static int ptrace_bts_write_record(struct task_struct *child,
749 const struct bts_struct *in) 834 const struct bts_struct *in)
750{ 835{
751 int retval; 836 unsigned char bts_record[BTS_MAX_RECORD_SIZE];
752 837
753 if (!child->thread.ds_area_msr) 838 BUG_ON(BTS_MAX_RECORD_SIZE < bts_cfg.sizeof_bts);
754 return -ENXIO;
755 839
756 retval = ds_write_bts((void *)child->thread.ds_area_msr, in); 840 memset(bts_record, 0, bts_cfg.sizeof_bts);
757 if (retval) 841 switch (in->qualifier) {
758 return retval; 842 case BTS_INVALID:
843 break;
759 844
760 return sizeof(*in); 845 case BTS_BRANCH:
761} 846 bts_set(bts_record, bts_from, in->variant.lbr.from_ip);
847 bts_set(bts_record, bts_to, in->variant.lbr.to_ip);
848 break;
762 849
763static int ptrace_bts_realloc(struct task_struct *child, 850 case BTS_TASK_ARRIVES:
764 int size, int reduce_size) 851 case BTS_TASK_DEPARTS:
765{ 852 bts_set(bts_record, bts_from, bts_escape);
766 unsigned long rlim, vm; 853 bts_set(bts_record, bts_qual, in->qualifier);
767 int ret, old_size; 854 bts_set(bts_record, bts_jiffies, in->variant.jiffies);
855 break;
768 856
769 if (size < 0) 857 default:
770 return -EINVAL; 858 return -EINVAL;
771
772 old_size = ds_get_bts_size((void *)child->thread.ds_area_msr);
773 if (old_size < 0)
774 return old_size;
775
776 ret = ds_free((void **)&child->thread.ds_area_msr);
777 if (ret < 0)
778 goto out;
779
780 size >>= PAGE_SHIFT;
781 old_size >>= PAGE_SHIFT;
782
783 current->mm->total_vm -= old_size;
784 current->mm->locked_vm -= old_size;
785
786 if (size == 0)
787 goto out;
788
789 rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
790 vm = current->mm->total_vm + size;
791 if (rlim < vm) {
792 ret = -ENOMEM;
793
794 if (!reduce_size)
795 goto out;
796
797 size = rlim - current->mm->total_vm;
798 if (size <= 0)
799 goto out;
800 } 859 }
801 860
802 rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; 861 /* The writing task will be the switched-to task on a context
803 vm = current->mm->locked_vm + size; 862 * switch. It needs to write into the switched-from task's BTS
804 if (rlim < vm) { 863 * buffer. */
805 ret = -ENOMEM; 864 return ds_unchecked_write_bts(child, bts_record, bts_cfg.sizeof_bts);
806
807 if (!reduce_size)
808 goto out;
809
810 size = rlim - current->mm->locked_vm;
811 if (size <= 0)
812 goto out;
813 }
814
815 ret = ds_allocate((void **)&child->thread.ds_area_msr,
816 size << PAGE_SHIFT);
817 if (ret < 0)
818 goto out;
819
820 current->mm->total_vm += size;
821 current->mm->locked_vm += size;
822
823out:
824 if (child->thread.ds_area_msr)
825 set_tsk_thread_flag(child, TIF_DS_AREA_MSR);
826 else
827 clear_tsk_thread_flag(child, TIF_DS_AREA_MSR);
828
829 return ret;
830} 865}
831 866
832void ptrace_bts_take_timestamp(struct task_struct *tsk, 867void ptrace_bts_take_timestamp(struct task_struct *tsk,
@@ -839,7 +874,66 @@ void ptrace_bts_take_timestamp(struct task_struct *tsk,
839 874
840 ptrace_bts_write_record(tsk, &rec); 875 ptrace_bts_write_record(tsk, &rec);
841} 876}
842#endif /* X86_BTS */ 877
878static const struct bts_configuration bts_cfg_netburst = {
879 .sizeof_bts = sizeof(long) * 3,
880 .sizeof_field = sizeof(long),
881 .debugctl_mask = (1<<2)|(1<<3)|(1<<5)
882};
883
884static const struct bts_configuration bts_cfg_pentium_m = {
885 .sizeof_bts = sizeof(long) * 3,
886 .sizeof_field = sizeof(long),
887 .debugctl_mask = (1<<6)|(1<<7)
888};
889
890static const struct bts_configuration bts_cfg_core2 = {
891 .sizeof_bts = 8 * 3,
892 .sizeof_field = 8,
893 .debugctl_mask = (1<<6)|(1<<7)|(1<<9)
894};
895
896static inline void bts_configure(const struct bts_configuration *cfg)
897{
898 bts_cfg = *cfg;
899}
900
901void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c)
902{
903 switch (c->x86) {
904 case 0x6:
905 switch (c->x86_model) {
906 case 0xD:
907 case 0xE: /* Pentium M */
908 bts_configure(&bts_cfg_pentium_m);
909 break;
910 case 0xF: /* Core2 */
911 case 0x1C: /* Atom */
912 bts_configure(&bts_cfg_core2);
913 break;
914 default:
915 /* sorry, don't know about them */
916 break;
917 }
918 break;
919 case 0xF:
920 switch (c->x86_model) {
921 case 0x0:
922 case 0x1:
923 case 0x2: /* Netburst */
924 bts_configure(&bts_cfg_netburst);
925 break;
926 default:
927 /* sorry, don't know about them */
928 break;
929 }
930 break;
931 default:
932 /* sorry, don't know about them */
933 break;
934 }
935}
936#endif /* CONFIG_X86_PTRACE_BTS */
843 937
844/* 938/*
845 * Called by kernel/ptrace.c when detaching.. 939 * Called by kernel/ptrace.c when detaching..
@@ -852,15 +946,15 @@ void ptrace_disable(struct task_struct *child)
852#ifdef TIF_SYSCALL_EMU 946#ifdef TIF_SYSCALL_EMU
853 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); 947 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
854#endif 948#endif
855 if (child->thread.ds_area_msr) { 949#ifdef CONFIG_X86_PTRACE_BTS
856#ifdef X86_BTS 950 (void)ds_release_bts(child);
857 ptrace_bts_realloc(child, 0, 0); 951
858#endif 952 child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
859 child->thread.debugctlmsr &= ~ds_debugctl_mask(); 953 if (!child->thread.debugctlmsr)
860 if (!child->thread.debugctlmsr) 954 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
861 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); 955
862 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); 956 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
863 } 957#endif /* CONFIG_X86_PTRACE_BTS */
864} 958}
865 959
866#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION 960#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
@@ -980,7 +1074,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
980 /* 1074 /*
981 * These bits need more cooking - not enabled yet: 1075 * These bits need more cooking - not enabled yet:
982 */ 1076 */
983#ifdef X86_BTS 1077#ifdef CONFIG_X86_PTRACE_BTS
984 case PTRACE_BTS_CONFIG: 1078 case PTRACE_BTS_CONFIG:
985 ret = ptrace_bts_config 1079 ret = ptrace_bts_config
986 (child, data, (struct ptrace_bts_config __user *)addr); 1080 (child, data, (struct ptrace_bts_config __user *)addr);
@@ -992,7 +1086,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
992 break; 1086 break;
993 1087
994 case PTRACE_BTS_SIZE: 1088 case PTRACE_BTS_SIZE:
995 ret = ptrace_bts_get_size(child); 1089 ret = ds_get_bts_index(child, /* pos = */ 0);
996 break; 1090 break;
997 1091
998 case PTRACE_BTS_GET: 1092 case PTRACE_BTS_GET:
@@ -1001,14 +1095,14 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
1001 break; 1095 break;
1002 1096
1003 case PTRACE_BTS_CLEAR: 1097 case PTRACE_BTS_CLEAR:
1004 ret = ptrace_bts_clear(child); 1098 ret = ds_clear_bts(child);
1005 break; 1099 break;
1006 1100
1007 case PTRACE_BTS_DRAIN: 1101 case PTRACE_BTS_DRAIN:
1008 ret = ptrace_bts_drain 1102 ret = ptrace_bts_drain
1009 (child, data, (struct bts_struct __user *) addr); 1103 (child, data, (struct bts_struct __user *) addr);
1010 break; 1104 break;
1011#endif 1105#endif /* CONFIG_X86_PTRACE_BTS */
1012 1106
1013 default: 1107 default:
1014 ret = ptrace_request(child, request, addr, data); 1108 ret = ptrace_request(child, request, addr, data);