aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/ds.c
diff options
context:
space:
mode:
authorMarkus Metzger <markus.t.metzger@intel.com>2008-04-08 05:01:58 -0400
committerIngo Molnar <mingo@elte.hu>2008-05-12 15:27:53 -0400
commit93fa7636dfdc059b25df148f230c0991096afdef (patch)
treecf277bd09091ac69abb5f7fdc21c705b8f186f88 /arch/x86/kernel/ds.c
parent492c2e476eac010962850006c49df326919b284c (diff)
x86, ptrace: PEBS support
Polish the ds.h interface and add support for PEBS. Ds.c is meant to be the resource allocator for per-thread and per-cpu BTS and PEBS recording. It is used by ptrace/utrace to provide execution tracing of debugged tasks. It will be used by profilers (e.g. perfmon2). It may be used by kernel debuggers to provide a kernel execution trace. Changes in detail: - guard DS and ptrace by CONFIG macros - separate DS and BTS more clearly - simplify field accesses - add functions to manage PEBS buffers - add simple protection/allocation mechanism - added support for Atom Opens: - buffer overflow handling Currently, only circular buffers are supported. This is all we need for debugging. Profilers would want an overflow notification. This is planned to be added when perfmon2 is made to use the ds.h interface. - utrace intermediate layer Signed-off-by: Markus Metzger <markus.t.metzger@intel.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch/x86/kernel/ds.c')
-rw-r--r--arch/x86/kernel/ds.c953
1 files changed, 676 insertions, 277 deletions
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 11c11b8ec48d..5b32b6d062b4 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -2,26 +2,48 @@
2 * Debug Store support 2 * Debug Store support
3 * 3 *
4 * This provides a low-level interface to the hardware's Debug Store 4 * This provides a low-level interface to the hardware's Debug Store
5 * feature that is used for last branch recording (LBR) and 5 * feature that is used for branch trace store (BTS) and
6 * precise-event based sampling (PEBS). 6 * precise-event based sampling (PEBS).
7 * 7 *
8 * Different architectures use a different DS layout/pointer size. 8 * It manages:
9 * The below functions therefore work on a void*. 9 * - per-thread and per-cpu allocation of BTS and PEBS
10 * - buffer memory allocation (optional)
11 * - buffer overflow handling
12 * - buffer access
10 * 13 *
14 * It assumes:
15 * - get_task_struct on all parameter tasks
16 * - current is allowed to trace parameter tasks
11 * 17 *
12 * Since there is no user for PEBS, yet, only LBR (or branch
13 * trace store, BTS) is supported.
14 * 18 *
15 * 19 * Copyright (C) 2007-2008 Intel Corporation.
16 * Copyright (C) 2007 Intel Corporation. 20 * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008
17 * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007
18 */ 21 */
19 22
23
24#ifdef CONFIG_X86_DS
25
20#include <asm/ds.h> 26#include <asm/ds.h>
21 27
22#include <linux/errno.h> 28#include <linux/errno.h>
23#include <linux/string.h> 29#include <linux/string.h>
24#include <linux/slab.h> 30#include <linux/slab.h>
31#include <linux/sched.h>
32
33
34/*
35 * The configuration for a particular DS hardware implementation.
36 */
37struct ds_configuration {
38 /* the size of the DS structure in bytes */
39 unsigned char sizeof_ds;
40 /* the size of one pointer-typed field in the DS structure in bytes;
41 this covers the first 8 fields related to buffer management. */
42 unsigned char sizeof_field;
43 /* the size of a BTS/PEBS record in bytes */
44 unsigned char sizeof_rec[2];
45};
46static struct ds_configuration ds_cfg;
25 47
26 48
27/* 49/*
@@ -44,378 +66,747 @@
44 * (interrupt occurs when write pointer passes interrupt pointer) 66 * (interrupt occurs when write pointer passes interrupt pointer)
45 * - value to which counter is reset following counter overflow 67 * - value to which counter is reset following counter overflow
46 * 68 *
47 * On later architectures, the last branch recording hardware uses 69 * Later architectures use 64bit pointers throughout, whereas earlier
48 * 64bit pointers even in 32bit mode. 70 * architectures use 32bit pointers in 32bit mode.
49 *
50 *
51 * Branch Trace Store (BTS) records store information about control
52 * flow changes. They at least provide the following information:
53 * - source linear address
54 * - destination linear address
55 * 71 *
56 * Netburst supported a predicated bit that had been dropped in later
57 * architectures. We do not suppor it.
58 * 72 *
73 * We compute the base address for the first 8 fields based on:
74 * - the field size stored in the DS configuration
75 * - the relative field position
76 * - an offset giving the start of the respective region
59 * 77 *
60 * In order to abstract from the actual DS and BTS layout, we describe 78 * This offset is further used to index various arrays holding
61 * the access to the relevant fields. 79 * information for BTS and PEBS at the respective index.
62 * Thanks to Andi Kleen for proposing this design.
63 * 80 *
64 * The implementation, however, is not as general as it might seem. In 81 * On later 32bit processors, we only access the lower 32bit of the
65 * order to stay somewhat simple and efficient, we assume an 82 * 64bit pointer fields. The upper halves will be zeroed out.
66 * underlying unsigned type (mostly a pointer type) and we expect the
67 * field to be at least as big as that type.
68 */ 83 */
69 84
70/* 85enum ds_field {
71 * A special from_ip address to indicate that the BTS record is an 86 ds_buffer_base = 0,
72 * info record that needs to be interpreted or skipped. 87 ds_index,
73 */ 88 ds_absolute_maximum,
74#define BTS_ESCAPE_ADDRESS (-1) 89 ds_interrupt_threshold,
90};
75 91
76/* 92enum ds_qualifier {
77 * A field access descriptor 93 ds_bts = 0,
78 */ 94 ds_pebs
79struct access_desc {
80 unsigned char offset;
81 unsigned char size;
82}; 95};
83 96
97static inline unsigned long ds_get(const unsigned char *base,
98 enum ds_qualifier qual, enum ds_field field)
99{
100 base += (ds_cfg.sizeof_field * (field + (4 * qual)));
101 return *(unsigned long *)base;
102}
103
104static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
105 enum ds_field field, unsigned long value)
106{
107 base += (ds_cfg.sizeof_field * (field + (4 * qual)));
108 (*(unsigned long *)base) = value;
109}
110
111
84/* 112/*
85 * The configuration for a particular DS/BTS hardware implementation. 113 * Locking is done only for allocating BTS or PEBS resources and for
114 * guarding context and buffer memory allocation.
115 *
116 * Most functions require the current task to own the ds context part
117 * they are going to access. All the locking is done when validating
118 * access to the context.
86 */ 119 */
87struct ds_configuration { 120static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock);
88 /* the DS configuration */
89 unsigned char sizeof_ds;
90 struct access_desc bts_buffer_base;
91 struct access_desc bts_index;
92 struct access_desc bts_absolute_maximum;
93 struct access_desc bts_interrupt_threshold;
94 /* the BTS configuration */
95 unsigned char sizeof_bts;
96 struct access_desc from_ip;
97 struct access_desc to_ip;
98 /* BTS variants used to store additional information like
99 timestamps */
100 struct access_desc info_type;
101 struct access_desc info_data;
102 unsigned long debugctl_mask;
103};
104 121
105/* 122/*
106 * The global configuration used by the below accessor functions 123 * Validate that the current task is allowed to access the BTS/PEBS
124 * buffer of the parameter task.
125 *
126 * Returns 0, if access is granted; -Eerrno, otherwise.
107 */ 127 */
108static struct ds_configuration ds_cfg; 128static inline int ds_validate_access(struct ds_context *context,
129 enum ds_qualifier qual)
130{
131 if (!context)
132 return -EPERM;
133
134 if (context->owner[qual] == current)
135 return 0;
136
137 return -EPERM;
138}
139
109 140
110/* 141/*
111 * Accessor functions for some DS and BTS fields using the above 142 * We either support (system-wide) per-cpu or per-thread allocation.
112 * global ptrace_bts_cfg. 143 * We distinguish the two based on the task_struct pointer, where a
144 * NULL pointer indicates per-cpu allocation for the current cpu.
145 *
146 * Allocations are use-counted. As soon as resources are allocated,
147 * further allocations must be of the same type (per-cpu or
148 * per-thread). We model this by counting allocations (i.e. the number
149 * of tracers of a certain type) for one type negatively:
150 * =0 no tracers
151 * >0 number of per-thread tracers
152 * <0 number of per-cpu tracers
153 *
154 * The below functions to get and put tracers and to check the
155 * allocation type require the ds_lock to be held by the caller.
156 *
157 * Tracers essentially gives the number of ds contexts for a certain
158 * type of allocation.
113 */ 159 */
114static inline unsigned long get_bts_buffer_base(char *base) 160static long tracers;
161
162static inline void get_tracer(struct task_struct *task)
115{ 163{
116 return *(unsigned long *)(base + ds_cfg.bts_buffer_base.offset); 164 tracers += (task ? 1 : -1);
117} 165}
118static inline void set_bts_buffer_base(char *base, unsigned long value) 166
167static inline void put_tracer(struct task_struct *task)
119{ 168{
120 (*(unsigned long *)(base + ds_cfg.bts_buffer_base.offset)) = value; 169 tracers -= (task ? 1 : -1);
121} 170}
122static inline unsigned long get_bts_index(char *base) 171
172static inline int check_tracer(struct task_struct *task)
123{ 173{
124 return *(unsigned long *)(base + ds_cfg.bts_index.offset); 174 return (task ? (tracers >= 0) : (tracers <= 0));
125} 175}
126static inline void set_bts_index(char *base, unsigned long value) 176
177
178/*
179 * The DS context is either attached to a thread or to a cpu:
180 * - in the former case, the thread_struct contains a pointer to the
181 * attached context.
182 * - in the latter case, we use a static array of per-cpu context
183 * pointers.
184 *
185 * Contexts are use-counted. They are allocated on first access and
186 * deallocated when the last user puts the context.
187 *
188 * We distinguish between an allocating and a non-allocating get of a
189 * context:
190 * - the allocating get is used for requesting BTS/PEBS resources. It
191 * requires the caller to hold the global ds_lock.
192 * - the non-allocating get is used for all other cases. A
193 * non-existing context indicates an error. It acquires and releases
194 * the ds_lock itself for obtaining the context.
195 *
196 * A context and its DS configuration are allocated and deallocated
197 * together. A context always has a DS configuration of the
198 * appropriate size.
199 */
200static DEFINE_PER_CPU(struct ds_context *, system_context);
201
202#define this_system_context per_cpu(system_context, smp_processor_id())
203
204/*
205 * Returns the pointer to the parameter task's context or to the
206 * system-wide context, if task is NULL.
207 *
208 * Increases the use count of the returned context, if not NULL.
209 */
210static inline struct ds_context *ds_get_context(struct task_struct *task)
127{ 211{
128 (*(unsigned long *)(base + ds_cfg.bts_index.offset)) = value; 212 struct ds_context *context;
213
214 spin_lock(&ds_lock);
215
216 context = (task ? task->thread.ds_ctx : this_system_context);
217 if (context)
218 context->count++;
219
220 spin_unlock(&ds_lock);
221
222 return context;
129} 223}
130static inline unsigned long get_bts_absolute_maximum(char *base) 224
225/*
226 * Same as ds_get_context, but allocates the context and it's DS
227 * structure, if necessary; returns NULL; if out of memory.
228 *
229 * pre: requires ds_lock to be held
230 */
231static inline struct ds_context *ds_alloc_context(struct task_struct *task)
131{ 232{
132 return *(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset); 233 struct ds_context **p_context =
234 (task ? &task->thread.ds_ctx : &this_system_context);
235 struct ds_context *context = *p_context;
236
237 if (!context) {
238 context = kzalloc(sizeof(*context), GFP_KERNEL);
239
240 if (!context)
241 return 0;
242
243 context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL);
244 if (!context->ds) {
245 kfree(context);
246 return 0;
247 }
248
249 *p_context = context;
250
251 context->this = p_context;
252 context->task = task;
253
254 if (task)
255 set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
256
257 if (!task || (task == current))
258 wrmsr(MSR_IA32_DS_AREA, (unsigned long)context->ds, 0);
259
260 get_tracer(task);
261 }
262
263 context->count++;
264
265 return context;
133} 266}
134static inline void set_bts_absolute_maximum(char *base, unsigned long value) 267
268/*
269 * Decreases the use count of the parameter context, if not NULL.
270 * Deallocates the context, if the use count reaches zero.
271 */
272static inline void ds_put_context(struct ds_context *context)
135{ 273{
136 (*(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset)) = value; 274 if (!context)
275 return;
276
277 spin_lock(&ds_lock);
278
279 if (--context->count)
280 goto out;
281
282 *(context->this) = 0;
283
284 if (context->task)
285 clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
286
287 if (!context->task || (context->task == current))
288 wrmsrl(MSR_IA32_DS_AREA, 0);
289
290 put_tracer(context->task);
291
292 /* free any leftover buffers from tracers that did not
293 * deallocate them properly. */
294 kfree(context->buffer[ds_bts]);
295 kfree(context->buffer[ds_pebs]);
296 kfree(context->ds);
297 kfree(context);
298 out:
299 spin_unlock(&ds_lock);
137} 300}
138static inline unsigned long get_bts_interrupt_threshold(char *base) 301
302
303/*
304 * Handle a buffer overflow
305 *
306 * task: the task whose buffers are overflowing;
307 * NULL for a buffer overflow on the current cpu
308 * context: the ds context
309 * qual: the buffer type
310 */
311static void ds_overflow(struct task_struct *task, struct ds_context *context,
312 enum ds_qualifier qual)
139{ 313{
140 return *(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset); 314 if (!context)
315 return;
316
317 if (context->callback[qual])
318 (*context->callback[qual])(task);
319
320 /* todo: do some more overflow handling */
141} 321}
142static inline void set_bts_interrupt_threshold(char *base, unsigned long value) 322
323
324/*
325 * Allocate a non-pageable buffer of the parameter size.
326 * Checks the memory and the locked memory rlimit.
327 *
328 * Returns the buffer, if successful;
329 * NULL, if out of memory or rlimit exceeded.
330 *
331 * size: the requested buffer size in bytes
332 * pages (out): if not NULL, contains the number of pages reserved
333 */
334static inline void *ds_allocate_buffer(size_t size, unsigned int *pages)
143{ 335{
144 (*(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset)) = value; 336 unsigned long rlim, vm, pgsz;
337 void *buffer;
338
339 pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
340
341 rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
342 vm = current->mm->total_vm + pgsz;
343 if (rlim < vm)
344 return 0;
345
346 rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
347 vm = current->mm->locked_vm + pgsz;
348 if (rlim < vm)
349 return 0;
350
351 buffer = kzalloc(size, GFP_KERNEL);
352 if (!buffer)
353 return 0;
354
355 current->mm->total_vm += pgsz;
356 current->mm->locked_vm += pgsz;
357
358 if (pages)
359 *pages = pgsz;
360
361 return buffer;
145} 362}
146static inline unsigned long get_from_ip(char *base) 363
364static int ds_request(struct task_struct *task, void *base, size_t size,
365 ds_ovfl_callback_t ovfl, enum ds_qualifier qual)
147{ 366{
148 return *(unsigned long *)(base + ds_cfg.from_ip.offset); 367 struct ds_context *context;
368 unsigned long buffer, adj;
369 const unsigned long alignment = (1 << 3);
370 int error = 0;
371
372 if (!ds_cfg.sizeof_ds)
373 return -EOPNOTSUPP;
374
375 /* we require some space to do alignment adjustments below */
376 if (size < (alignment + ds_cfg.sizeof_rec[qual]))
377 return -EINVAL;
378
379 /* buffer overflow notification is not yet implemented */
380 if (ovfl)
381 return -EOPNOTSUPP;
382
383
384 spin_lock(&ds_lock);
385
386 if (!check_tracer(task))
387 return -EPERM;
388
389 error = -ENOMEM;
390 context = ds_alloc_context(task);
391 if (!context)
392 goto out_unlock;
393
394 error = -EALREADY;
395 if (context->owner[qual] == current)
396 goto out_unlock;
397 error = -EPERM;
398 if (context->owner[qual] != 0)
399 goto out_unlock;
400 context->owner[qual] = current;
401
402 spin_unlock(&ds_lock);
403
404
405 error = -ENOMEM;
406 if (!base) {
407 base = ds_allocate_buffer(size, &context->pages[qual]);
408 if (!base)
409 goto out_release;
410
411 context->buffer[qual] = base;
412 }
413 error = 0;
414
415 context->callback[qual] = ovfl;
416
417 /* adjust the buffer address and size to meet alignment
418 * constraints:
419 * - buffer is double-word aligned
420 * - size is multiple of record size
421 *
422 * We checked the size at the very beginning; we have enough
423 * space to do the adjustment.
424 */
425 buffer = (unsigned long)base;
426
427 adj = ALIGN(buffer, alignment) - buffer;
428 buffer += adj;
429 size -= adj;
430
431 size /= ds_cfg.sizeof_rec[qual];
432 size *= ds_cfg.sizeof_rec[qual];
433
434 ds_set(context->ds, qual, ds_buffer_base, buffer);
435 ds_set(context->ds, qual, ds_index, buffer);
436 ds_set(context->ds, qual, ds_absolute_maximum, buffer + size);
437
438 if (ovfl) {
439 /* todo: select a suitable interrupt threshold */
440 } else
441 ds_set(context->ds, qual,
442 ds_interrupt_threshold, buffer + size + 1);
443
444 /* we keep the context until ds_release */
445 return error;
446
447 out_release:
448 context->owner[qual] = 0;
449 ds_put_context(context);
450 return error;
451
452 out_unlock:
453 spin_unlock(&ds_lock);
454 ds_put_context(context);
455 return error;
149} 456}
150static inline void set_from_ip(char *base, unsigned long value) 457
458int ds_request_bts(struct task_struct *task, void *base, size_t size,
459 ds_ovfl_callback_t ovfl)
151{ 460{
152 (*(unsigned long *)(base + ds_cfg.from_ip.offset)) = value; 461 return ds_request(task, base, size, ovfl, ds_bts);
153} 462}
154static inline unsigned long get_to_ip(char *base) 463
464int ds_request_pebs(struct task_struct *task, void *base, size_t size,
465 ds_ovfl_callback_t ovfl)
155{ 466{
156 return *(unsigned long *)(base + ds_cfg.to_ip.offset); 467 return ds_request(task, base, size, ovfl, ds_pebs);
157} 468}
158static inline void set_to_ip(char *base, unsigned long value) 469
470static int ds_release(struct task_struct *task, enum ds_qualifier qual)
159{ 471{
160 (*(unsigned long *)(base + ds_cfg.to_ip.offset)) = value; 472 struct ds_context *context;
473 int error;
474
475 context = ds_get_context(task);
476 error = ds_validate_access(context, qual);
477 if (error < 0)
478 goto out;
479
480 kfree(context->buffer[qual]);
481 context->buffer[qual] = 0;
482
483 current->mm->total_vm -= context->pages[qual];
484 current->mm->locked_vm -= context->pages[qual];
485 context->pages[qual] = 0;
486 context->owner[qual] = 0;
487
488 /*
489 * we put the context twice:
490 * once for the ds_get_context
491 * once for the corresponding ds_request
492 */
493 ds_put_context(context);
494 out:
495 ds_put_context(context);
496 return error;
161} 497}
162static inline unsigned char get_info_type(char *base) 498
499int ds_release_bts(struct task_struct *task)
163{ 500{
164 return *(unsigned char *)(base + ds_cfg.info_type.offset); 501 return ds_release(task, ds_bts);
165} 502}
166static inline void set_info_type(char *base, unsigned char value) 503
504int ds_release_pebs(struct task_struct *task)
167{ 505{
168 (*(unsigned char *)(base + ds_cfg.info_type.offset)) = value; 506 return ds_release(task, ds_pebs);
169} 507}
170static inline unsigned long get_info_data(char *base) 508
509static int ds_get_index(struct task_struct *task, size_t *pos,
510 enum ds_qualifier qual)
171{ 511{
172 return *(unsigned long *)(base + ds_cfg.info_data.offset); 512 struct ds_context *context;
513 unsigned long base, index;
514 int error;
515
516 context = ds_get_context(task);
517 error = ds_validate_access(context, qual);
518 if (error < 0)
519 goto out;
520
521 base = ds_get(context->ds, qual, ds_buffer_base);
522 index = ds_get(context->ds, qual, ds_index);
523
524 error = ((index - base) / ds_cfg.sizeof_rec[qual]);
525 if (pos)
526 *pos = error;
527 out:
528 ds_put_context(context);
529 return error;
173} 530}
174static inline void set_info_data(char *base, unsigned long value) 531
532int ds_get_bts_index(struct task_struct *task, size_t *pos)
175{ 533{
176 (*(unsigned long *)(base + ds_cfg.info_data.offset)) = value; 534 return ds_get_index(task, pos, ds_bts);
177} 535}
178 536
537int ds_get_pebs_index(struct task_struct *task, size_t *pos)
538{
539 return ds_get_index(task, pos, ds_pebs);
540}
179 541
180int ds_allocate(void **dsp, size_t bts_size_in_bytes) 542static int ds_get_end(struct task_struct *task, size_t *pos,
543 enum ds_qualifier qual)
181{ 544{
182 size_t bts_size_in_records; 545 struct ds_context *context;
183 unsigned long bts; 546 unsigned long base, end;
184 void *ds; 547 int error;
548
549 context = ds_get_context(task);
550 error = ds_validate_access(context, qual);
551 if (error < 0)
552 goto out;
553
554 base = ds_get(context->ds, qual, ds_buffer_base);
555 end = ds_get(context->ds, qual, ds_absolute_maximum);
556
557 error = ((end - base) / ds_cfg.sizeof_rec[qual]);
558 if (pos)
559 *pos = error;
560 out:
561 ds_put_context(context);
562 return error;
563}
185 564
186 if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) 565int ds_get_bts_end(struct task_struct *task, size_t *pos)
187 return -EOPNOTSUPP; 566{
567 return ds_get_end(task, pos, ds_bts);
568}
188 569
189 if (bts_size_in_bytes < 0) 570int ds_get_pebs_end(struct task_struct *task, size_t *pos)
190 return -EINVAL; 571{
572 return ds_get_end(task, pos, ds_pebs);
573}
191 574
192 bts_size_in_records = 575static int ds_access(struct task_struct *task, size_t index,
193 bts_size_in_bytes / ds_cfg.sizeof_bts; 576 const void **record, enum ds_qualifier qual)
194 bts_size_in_bytes = 577{
195 bts_size_in_records * ds_cfg.sizeof_bts; 578 struct ds_context *context;
579 unsigned long base, idx;
580 int error;
196 581
197 if (bts_size_in_bytes <= 0) 582 if (!record)
198 return -EINVAL; 583 return -EINVAL;
199 584
200 bts = (unsigned long)kzalloc(bts_size_in_bytes, GFP_KERNEL); 585 context = ds_get_context(task);
586 error = ds_validate_access(context, qual);
587 if (error < 0)
588 goto out;
201 589
202 if (!bts) 590 base = ds_get(context->ds, qual, ds_buffer_base);
203 return -ENOMEM; 591 idx = base + (index * ds_cfg.sizeof_rec[qual]);
204 592
205 ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); 593 error = -EINVAL;
594 if (idx > ds_get(context->ds, qual, ds_absolute_maximum))
595 goto out;
206 596
207 if (!ds) { 597 *record = (const void *)idx;
208 kfree((void *)bts); 598 error = ds_cfg.sizeof_rec[qual];
209 return -ENOMEM; 599 out:
210 } 600 ds_put_context(context);
211 601 return error;
212 set_bts_buffer_base(ds, bts);
213 set_bts_index(ds, bts);
214 set_bts_absolute_maximum(ds, bts + bts_size_in_bytes);
215 set_bts_interrupt_threshold(ds, bts + bts_size_in_bytes + 1);
216
217 *dsp = ds;
218 return 0;
219} 602}
220 603
221int ds_free(void **dsp) 604int ds_access_bts(struct task_struct *task, size_t index, const void **record)
222{ 605{
223 if (*dsp) { 606 return ds_access(task, index, record, ds_bts);
224 kfree((void *)get_bts_buffer_base(*dsp));
225 kfree(*dsp);
226 *dsp = NULL;
227 }
228 return 0;
229} 607}
230 608
231int ds_get_bts_size(void *ds) 609int ds_access_pebs(struct task_struct *task, size_t index, const void **record)
232{ 610{
233 int size_in_bytes; 611 return ds_access(task, index, record, ds_pebs);
234
235 if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
236 return -EOPNOTSUPP;
237
238 if (!ds)
239 return 0;
240
241 size_in_bytes =
242 get_bts_absolute_maximum(ds) -
243 get_bts_buffer_base(ds);
244 return size_in_bytes;
245} 612}
246 613
247int ds_get_bts_end(void *ds) 614static int ds_write(struct task_struct *task, const void *record, size_t size,
615 enum ds_qualifier qual, int force)
248{ 616{
249 int size_in_bytes = ds_get_bts_size(ds); 617 struct ds_context *context;
250 618 int error;
251 if (size_in_bytes <= 0)
252 return size_in_bytes;
253 619
254 return size_in_bytes / ds_cfg.sizeof_bts; 620 if (!record)
255} 621 return -EINVAL;
256 622
257int ds_get_bts_index(void *ds) 623 error = -EPERM;
258{ 624 context = ds_get_context(task);
259 int index_offset_in_bytes; 625 if (!context)
626 goto out;
260 627
261 if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) 628 if (!force) {
262 return -EOPNOTSUPP; 629 error = ds_validate_access(context, qual);
630 if (error < 0)
631 goto out;
632 }
263 633
264 index_offset_in_bytes = 634 error = 0;
265 get_bts_index(ds) - 635 while (size) {
266 get_bts_buffer_base(ds); 636 unsigned long base, index, end, write_end, int_th;
637 unsigned long write_size, adj_write_size;
638
639 /*
640 * write as much as possible without producing an
641 * overflow interrupt.
642 *
643 * interrupt_threshold must either be
644 * - bigger than absolute_maximum or
645 * - point to a record between buffer_base and absolute_maximum
646 *
647 * index points to a valid record.
648 */
649 base = ds_get(context->ds, qual, ds_buffer_base);
650 index = ds_get(context->ds, qual, ds_index);
651 end = ds_get(context->ds, qual, ds_absolute_maximum);
652 int_th = ds_get(context->ds, qual, ds_interrupt_threshold);
653
654 write_end = min(end, int_th);
655
656 /* if we are already beyond the interrupt threshold,
657 * we fill the entire buffer */
658 if (write_end <= index)
659 write_end = end;
660
661 if (write_end <= index)
662 goto out;
663
664 write_size = min((unsigned long) size, write_end - index);
665 memcpy((void *)index, record, write_size);
666
667 record = (const char *)record + write_size;
668 size -= write_size;
669 error += write_size;
670
671 adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
672 adj_write_size *= ds_cfg.sizeof_rec[qual];
673
674 /* zero out trailing bytes */
675 memset((char *)index + write_size, 0,
676 adj_write_size - write_size);
677 index += adj_write_size;
678
679 if (index >= end)
680 index = base;
681 ds_set(context->ds, qual, ds_index, index);
682
683 if (index >= int_th)
684 ds_overflow(task, context, qual);
685 }
267 686
268 return index_offset_in_bytes / ds_cfg.sizeof_bts; 687 out:
688 ds_put_context(context);
689 return error;
269} 690}
270 691
271int ds_set_overflow(void *ds, int method) 692int ds_write_bts(struct task_struct *task, const void *record, size_t size)
272{ 693{
273 switch (method) { 694 return ds_write(task, record, size, ds_bts, /* force = */ 0);
274 case DS_O_SIGNAL:
275 return -EOPNOTSUPP;
276 case DS_O_WRAP:
277 return 0;
278 default:
279 return -EINVAL;
280 }
281} 695}
282 696
283int ds_get_overflow(void *ds) 697int ds_write_pebs(struct task_struct *task, const void *record, size_t size)
284{ 698{
285 return DS_O_WRAP; 699 return ds_write(task, record, size, ds_pebs, /* force = */ 0);
286} 700}
287 701
288int ds_clear(void *ds) 702int ds_unchecked_write_bts(struct task_struct *task,
703 const void *record, size_t size)
289{ 704{
290 int bts_size = ds_get_bts_size(ds); 705 return ds_write(task, record, size, ds_bts, /* force = */ 1);
291 unsigned long bts_base;
292
293 if (bts_size <= 0)
294 return bts_size;
295
296 bts_base = get_bts_buffer_base(ds);
297 memset((void *)bts_base, 0, bts_size);
298
299 set_bts_index(ds, bts_base);
300 return 0;
301} 706}
302 707
303int ds_read_bts(void *ds, int index, struct bts_struct *out) 708int ds_unchecked_write_pebs(struct task_struct *task,
709 const void *record, size_t size)
304{ 710{
305 void *bts; 711 return ds_write(task, record, size, ds_pebs, /* force = */ 1);
712}
306 713
307 if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) 714static int ds_reset_or_clear(struct task_struct *task,
308 return -EOPNOTSUPP; 715 enum ds_qualifier qual, int clear)
716{
717 struct ds_context *context;
718 unsigned long base, end;
719 int error;
309 720
310 if (index < 0) 721 context = ds_get_context(task);
311 return -EINVAL; 722 error = ds_validate_access(context, qual);
723 if (error < 0)
724 goto out;
312 725
313 if (index >= ds_get_bts_size(ds)) 726 base = ds_get(context->ds, qual, ds_buffer_base);
314 return -EINVAL; 727 end = ds_get(context->ds, qual, ds_absolute_maximum);
315 728
316 bts = (void *)(get_bts_buffer_base(ds) + (index * ds_cfg.sizeof_bts)); 729 if (clear)
730 memset((void *)base, 0, end - base);
317 731
318 memset(out, 0, sizeof(*out)); 732 ds_set(context->ds, qual, ds_index, base);
319 if (get_from_ip(bts) == BTS_ESCAPE_ADDRESS) {
320 out->qualifier = get_info_type(bts);
321 out->variant.jiffies = get_info_data(bts);
322 } else {
323 out->qualifier = BTS_BRANCH;
324 out->variant.lbr.from_ip = get_from_ip(bts);
325 out->variant.lbr.to_ip = get_to_ip(bts);
326 }
327 733
328 return sizeof(*out);; 734 error = 0;
735 out:
736 ds_put_context(context);
737 return error;
329} 738}
330 739
331int ds_write_bts(void *ds, const struct bts_struct *in) 740int ds_reset_bts(struct task_struct *task)
332{ 741{
333 unsigned long bts; 742 return ds_reset_or_clear(task, ds_bts, /* clear = */ 0);
334 743}
335 if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
336 return -EOPNOTSUPP;
337
338 if (ds_get_bts_size(ds) <= 0)
339 return -ENXIO;
340 744
341 bts = get_bts_index(ds); 745int ds_reset_pebs(struct task_struct *task)
746{
747 return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0);
748}
342 749
343 memset((void *)bts, 0, ds_cfg.sizeof_bts); 750int ds_clear_bts(struct task_struct *task)
344 switch (in->qualifier) { 751{
345 case BTS_INVALID: 752 return ds_reset_or_clear(task, ds_bts, /* clear = */ 1);
346 break; 753}
347 754
348 case BTS_BRANCH: 755int ds_clear_pebs(struct task_struct *task)
349 set_from_ip((void *)bts, in->variant.lbr.from_ip); 756{
350 set_to_ip((void *)bts, in->variant.lbr.to_ip); 757 return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1);
351 break; 758}
352 759
353 case BTS_TASK_ARRIVES: 760int ds_get_pebs_reset(struct task_struct *task, u64 *value)
354 case BTS_TASK_DEPARTS: 761{
355 set_from_ip((void *)bts, BTS_ESCAPE_ADDRESS); 762 struct ds_context *context;
356 set_info_type((void *)bts, in->qualifier); 763 int error;
357 set_info_data((void *)bts, in->variant.jiffies);
358 break;
359 764
360 default: 765 if (!value)
361 return -EINVAL; 766 return -EINVAL;
362 }
363 767
364 bts = bts + ds_cfg.sizeof_bts; 768 context = ds_get_context(task);
365 if (bts >= get_bts_absolute_maximum(ds)) 769 error = ds_validate_access(context, ds_pebs);
366 bts = get_bts_buffer_base(ds); 770 if (error < 0)
367 set_bts_index(ds, bts); 771 goto out;
368 772
369 return ds_cfg.sizeof_bts; 773 *value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8));
774
775 error = 0;
776 out:
777 ds_put_context(context);
778 return error;
370} 779}
371 780
372unsigned long ds_debugctl_mask(void) 781int ds_set_pebs_reset(struct task_struct *task, u64 value)
373{ 782{
374 return ds_cfg.debugctl_mask; 783 struct ds_context *context;
375} 784 int error;
376 785
377#ifdef __i386__ 786 context = ds_get_context(task);
378static const struct ds_configuration ds_cfg_netburst = { 787 error = ds_validate_access(context, ds_pebs);
379 .sizeof_ds = 9 * 4, 788 if (error < 0)
380 .bts_buffer_base = { 0, 4 }, 789 goto out;
381 .bts_index = { 4, 4 },
382 .bts_absolute_maximum = { 8, 4 },
383 .bts_interrupt_threshold = { 12, 4 },
384 .sizeof_bts = 3 * 4,
385 .from_ip = { 0, 4 },
386 .to_ip = { 4, 4 },
387 .info_type = { 4, 1 },
388 .info_data = { 8, 4 },
389 .debugctl_mask = (1<<2)|(1<<3)
390};
391 790
392static const struct ds_configuration ds_cfg_pentium_m = { 791 *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value;
393 .sizeof_ds = 9 * 4, 792
394 .bts_buffer_base = { 0, 4 }, 793 error = 0;
395 .bts_index = { 4, 4 }, 794 out:
396 .bts_absolute_maximum = { 8, 4 }, 795 ds_put_context(context);
397 .bts_interrupt_threshold = { 12, 4 }, 796 return error;
398 .sizeof_bts = 3 * 4, 797}
399 .from_ip = { 0, 4 }, 798
400 .to_ip = { 4, 4 }, 799static const struct ds_configuration ds_cfg_var = {
401 .info_type = { 4, 1 }, 800 .sizeof_ds = sizeof(long) * 12,
402 .info_data = { 8, 4 }, 801 .sizeof_field = sizeof(long),
403 .debugctl_mask = (1<<6)|(1<<7) 802 .sizeof_rec[ds_bts] = sizeof(long) * 3,
803 .sizeof_rec[ds_pebs] = sizeof(long) * 10
404}; 804};
405#endif /* _i386_ */ 805static const struct ds_configuration ds_cfg_64 = {
406 806 .sizeof_ds = 8 * 12,
407static const struct ds_configuration ds_cfg_core2 = { 807 .sizeof_field = 8,
408 .sizeof_ds = 9 * 8, 808 .sizeof_rec[ds_bts] = 8 * 3,
409 .bts_buffer_base = { 0, 8 }, 809 .sizeof_rec[ds_pebs] = 8 * 10
410 .bts_index = { 8, 8 },
411 .bts_absolute_maximum = { 16, 8 },
412 .bts_interrupt_threshold = { 24, 8 },
413 .sizeof_bts = 3 * 8,
414 .from_ip = { 0, 8 },
415 .to_ip = { 8, 8 },
416 .info_type = { 8, 1 },
417 .info_data = { 16, 8 },
418 .debugctl_mask = (1<<6)|(1<<7)|(1<<9)
419}; 810};
420 811
421static inline void 812static inline void
@@ -429,14 +820,13 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
429 switch (c->x86) { 820 switch (c->x86) {
430 case 0x6: 821 case 0x6:
431 switch (c->x86_model) { 822 switch (c->x86_model) {
432#ifdef __i386__
433 case 0xD: 823 case 0xD:
434 case 0xE: /* Pentium M */ 824 case 0xE: /* Pentium M */
435 ds_configure(&ds_cfg_pentium_m); 825 ds_configure(&ds_cfg_var);
436 break; 826 break;
437#endif /* _i386_ */
438 case 0xF: /* Core2 */ 827 case 0xF: /* Core2 */
439 ds_configure(&ds_cfg_core2); 828 case 0x1C: /* Atom */
829 ds_configure(&ds_cfg_64);
440 break; 830 break;
441 default: 831 default:
442 /* sorry, don't know about them */ 832 /* sorry, don't know about them */
@@ -445,13 +835,11 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
445 break; 835 break;
446 case 0xF: 836 case 0xF:
447 switch (c->x86_model) { 837 switch (c->x86_model) {
448#ifdef __i386__
449 case 0x0: 838 case 0x0:
450 case 0x1: 839 case 0x1:
451 case 0x2: /* Netburst */ 840 case 0x2: /* Netburst */
452 ds_configure(&ds_cfg_netburst); 841 ds_configure(&ds_cfg_var);
453 break; 842 break;
454#endif /* _i386_ */
455 default: 843 default:
456 /* sorry, don't know about them */ 844 /* sorry, don't know about them */
457 break; 845 break;
@@ -462,3 +850,14 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
462 break; 850 break;
463 } 851 }
464} 852}
853
854void ds_free(struct ds_context *context)
855{
856 /* This is called when the task owning the parameter context
857 * is dying. There should not be any user of that context left
858 * to disturb us, anymore. */
859 unsigned long leftovers = context->count;
860 while (leftovers--)
861 ds_put_context(context);
862}
863#endif /* CONFIG_X86_DS */