aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/ds.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/ds.c')
-rw-r--r--arch/x86/kernel/ds.c857
1 files changed, 528 insertions, 329 deletions
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 095306988667..f0583005b75e 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -6,13 +6,13 @@
6 * precise-event based sampling (PEBS). 6 * precise-event based sampling (PEBS).
7 * 7 *
8 * It manages: 8 * It manages:
9 * - per-thread and per-cpu allocation of BTS and PEBS 9 * - DS and BTS hardware configuration
10 * - buffer overflow handling (to be done) 10 * - buffer overflow handling (to be done)
11 * - buffer access 11 * - buffer access
12 * 12 *
13 * It assumes: 13 * It does not do:
14 * - get_task_struct on all traced tasks 14 * - security checking (is the caller allowed to trace the task)
15 * - current is allowed to trace tasks 15 * - buffer allocation (memory accounting)
16 * 16 *
17 * 17 *
18 * Copyright (C) 2007-2008 Intel Corporation. 18 * Copyright (C) 2007-2008 Intel Corporation.
@@ -34,15 +34,30 @@
34 * The configuration for a particular DS hardware implementation. 34 * The configuration for a particular DS hardware implementation.
35 */ 35 */
36struct ds_configuration { 36struct ds_configuration {
37 /* the size of the DS structure in bytes */ 37 /* the name of the configuration */
38 unsigned char sizeof_ds; 38 const char *name;
39 /* the size of one pointer-typed field in the DS structure in bytes; 39 /* the size of one pointer-typed field in the DS structure and
40 this covers the first 8 fields related to buffer management. */ 40 in the BTS and PEBS buffers in bytes;
41 this covers the first 8 DS fields related to buffer management. */
41 unsigned char sizeof_field; 42 unsigned char sizeof_field;
42 /* the size of a BTS/PEBS record in bytes */ 43 /* the size of a BTS/PEBS record in bytes */
43 unsigned char sizeof_rec[2]; 44 unsigned char sizeof_rec[2];
45 /* a series of bit-masks to control various features indexed
46 * by enum ds_feature */
47 unsigned long ctl[dsf_ctl_max];
44}; 48};
45static struct ds_configuration ds_cfg; 49static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array);
50
51#define ds_cfg per_cpu(ds_cfg_array, smp_processor_id())
52
53#define MAX_SIZEOF_DS (12 * 8) /* maximal size of a DS configuration */
54#define MAX_SIZEOF_BTS (3 * 8) /* maximal size of a BTS record */
55#define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */
56
57#define BTS_CONTROL \
58 (ds_cfg.ctl[dsf_bts] | ds_cfg.ctl[dsf_bts_kernel] | ds_cfg.ctl[dsf_bts_user] |\
59 ds_cfg.ctl[dsf_bts_overflow])
60
46 61
47/* 62/*
48 * A BTS or PEBS tracer. 63 * A BTS or PEBS tracer.
@@ -61,6 +76,8 @@ struct ds_tracer {
61struct bts_tracer { 76struct bts_tracer {
62 /* the common DS part */ 77 /* the common DS part */
63 struct ds_tracer ds; 78 struct ds_tracer ds;
79 /* the trace including the DS configuration */
80 struct bts_trace trace;
64 /* buffer overflow notification function */ 81 /* buffer overflow notification function */
65 bts_ovfl_callback_t ovfl; 82 bts_ovfl_callback_t ovfl;
66}; 83};
@@ -68,6 +85,8 @@ struct bts_tracer {
68struct pebs_tracer { 85struct pebs_tracer {
69 /* the common DS part */ 86 /* the common DS part */
70 struct ds_tracer ds; 87 struct ds_tracer ds;
88 /* the trace including the DS configuration */
89 struct pebs_trace trace;
71 /* buffer overflow notification function */ 90 /* buffer overflow notification function */
72 pebs_ovfl_callback_t ovfl; 91 pebs_ovfl_callback_t ovfl;
73}; 92};
@@ -134,13 +153,11 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
134 (*(unsigned long *)base) = value; 153 (*(unsigned long *)base) = value;
135} 154}
136 155
137#define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */
138
139 156
140/* 157/*
141 * Locking is done only for allocating BTS or PEBS resources. 158 * Locking is done only for allocating BTS or PEBS resources.
142 */ 159 */
143static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock); 160static DEFINE_SPINLOCK(ds_lock);
144 161
145 162
146/* 163/*
@@ -156,27 +173,32 @@ static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock);
156 * >0 number of per-thread tracers 173 * >0 number of per-thread tracers
157 * <0 number of per-cpu tracers 174 * <0 number of per-cpu tracers
158 * 175 *
159 * The below functions to get and put tracers and to check the
160 * allocation type require the ds_lock to be held by the caller.
161 *
162 * Tracers essentially gives the number of ds contexts for a certain 176 * Tracers essentially gives the number of ds contexts for a certain
163 * type of allocation. 177 * type of allocation.
164 */ 178 */
165static long tracers; 179static atomic_t tracers = ATOMIC_INIT(0);
166 180
167static inline void get_tracer(struct task_struct *task) 181static inline void get_tracer(struct task_struct *task)
168{ 182{
169 tracers += (task ? 1 : -1); 183 if (task)
184 atomic_inc(&tracers);
185 else
186 atomic_dec(&tracers);
170} 187}
171 188
172static inline void put_tracer(struct task_struct *task) 189static inline void put_tracer(struct task_struct *task)
173{ 190{
174 tracers -= (task ? 1 : -1); 191 if (task)
192 atomic_dec(&tracers);
193 else
194 atomic_inc(&tracers);
175} 195}
176 196
177static inline int check_tracer(struct task_struct *task) 197static inline int check_tracer(struct task_struct *task)
178{ 198{
179 return (task ? (tracers >= 0) : (tracers <= 0)); 199 return task ?
200 (atomic_read(&tracers) >= 0) :
201 (atomic_read(&tracers) <= 0);
180} 202}
181 203
182 204
@@ -190,14 +212,30 @@ static inline int check_tracer(struct task_struct *task)
190 * Contexts are use-counted. They are allocated on first access and 212 * Contexts are use-counted. They are allocated on first access and
191 * deallocated when the last user puts the context. 213 * deallocated when the last user puts the context.
192 */ 214 */
193static DEFINE_PER_CPU(struct ds_context *, system_context); 215struct ds_context {
216 /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */
217 unsigned char ds[MAX_SIZEOF_DS];
218 /* the owner of the BTS and PEBS configuration, respectively */
219 struct bts_tracer *bts_master;
220 struct pebs_tracer *pebs_master;
221 /* use count */
222 unsigned long count;
223 /* a pointer to the context location inside the thread_struct
224 * or the per_cpu context array */
225 struct ds_context **this;
226 /* a pointer to the task owning this context, or NULL, if the
227 * context is owned by a cpu */
228 struct task_struct *task;
229};
230
231static DEFINE_PER_CPU(struct ds_context *, system_context_array);
194 232
195#define this_system_context per_cpu(system_context, smp_processor_id()) 233#define system_context per_cpu(system_context_array, smp_processor_id())
196 234
197static inline struct ds_context *ds_get_context(struct task_struct *task) 235static inline struct ds_context *ds_get_context(struct task_struct *task)
198{ 236{
199 struct ds_context **p_context = 237 struct ds_context **p_context =
200 (task ? &task->thread.ds_ctx : &this_system_context); 238 (task ? &task->thread.ds_ctx : &system_context);
201 struct ds_context *context = *p_context; 239 struct ds_context *context = *p_context;
202 unsigned long irq; 240 unsigned long irq;
203 241
@@ -225,10 +263,22 @@ static inline struct ds_context *ds_get_context(struct task_struct *task)
225 wrmsrl(MSR_IA32_DS_AREA, 263 wrmsrl(MSR_IA32_DS_AREA,
226 (unsigned long)context->ds); 264 (unsigned long)context->ds);
227 } 265 }
266
267 context->count++;
268
269 spin_unlock_irqrestore(&ds_lock, irq);
270 } else {
271 spin_lock_irqsave(&ds_lock, irq);
272
273 context = *p_context;
274 if (context)
275 context->count++;
276
228 spin_unlock_irqrestore(&ds_lock, irq); 277 spin_unlock_irqrestore(&ds_lock, irq);
229 }
230 278
231 context->count++; 279 if (!context)
280 context = ds_get_context(task);
281 }
232 282
233 return context; 283 return context;
234} 284}
@@ -242,8 +292,10 @@ static inline void ds_put_context(struct ds_context *context)
242 292
243 spin_lock_irqsave(&ds_lock, irq); 293 spin_lock_irqsave(&ds_lock, irq);
244 294
245 if (--context->count) 295 if (--context->count) {
246 goto out; 296 spin_unlock_irqrestore(&ds_lock, irq);
297 return;
298 }
247 299
248 *(context->this) = NULL; 300 *(context->this) = NULL;
249 301
@@ -253,14 +305,14 @@ static inline void ds_put_context(struct ds_context *context)
253 if (!context->task || (context->task == current)) 305 if (!context->task || (context->task == current))
254 wrmsrl(MSR_IA32_DS_AREA, 0); 306 wrmsrl(MSR_IA32_DS_AREA, 0);
255 307
256 kfree(context);
257 out:
258 spin_unlock_irqrestore(&ds_lock, irq); 308 spin_unlock_irqrestore(&ds_lock, irq);
309
310 kfree(context);
259} 311}
260 312
261 313
262/* 314/*
263 * Handle a buffer overflow 315 * Call the tracer's callback on a buffer overflow.
264 * 316 *
265 * context: the ds context 317 * context: the ds context
266 * qual: the buffer type 318 * qual: the buffer type
@@ -268,30 +320,244 @@ static inline void ds_put_context(struct ds_context *context)
268static void ds_overflow(struct ds_context *context, enum ds_qualifier qual) 320static void ds_overflow(struct ds_context *context, enum ds_qualifier qual)
269{ 321{
270 switch (qual) { 322 switch (qual) {
271 case ds_bts: { 323 case ds_bts:
272 struct bts_tracer *tracer = 324 if (context->bts_master &&
273 container_of(context->owner[qual], 325 context->bts_master->ovfl)
274 struct bts_tracer, ds); 326 context->bts_master->ovfl(context->bts_master);
275 if (tracer->ovfl) 327 break;
276 tracer->ovfl(tracer); 328 case ds_pebs:
277 } 329 if (context->pebs_master &&
330 context->pebs_master->ovfl)
331 context->pebs_master->ovfl(context->pebs_master);
278 break; 332 break;
279 case ds_pebs: {
280 struct pebs_tracer *tracer =
281 container_of(context->owner[qual],
282 struct pebs_tracer, ds);
283 if (tracer->ovfl)
284 tracer->ovfl(tracer);
285 } 333 }
334}
335
336
337/*
338 * Write raw data into the BTS or PEBS buffer.
339 *
340 * The remainder of any partially written record is zeroed out.
341 *
342 * context: the DS context
343 * qual: the buffer type
344 * record: the data to write
345 * size: the size of the data
346 */
347static int ds_write(struct ds_context *context, enum ds_qualifier qual,
348 const void *record, size_t size)
349{
350 int bytes_written = 0;
351
352 if (!record)
353 return -EINVAL;
354
355 while (size) {
356 unsigned long base, index, end, write_end, int_th;
357 unsigned long write_size, adj_write_size;
358
359 /*
360 * write as much as possible without producing an
361 * overflow interrupt.
362 *
363 * interrupt_threshold must either be
364 * - bigger than absolute_maximum or
365 * - point to a record between buffer_base and absolute_maximum
366 *
367 * index points to a valid record.
368 */
369 base = ds_get(context->ds, qual, ds_buffer_base);
370 index = ds_get(context->ds, qual, ds_index);
371 end = ds_get(context->ds, qual, ds_absolute_maximum);
372 int_th = ds_get(context->ds, qual, ds_interrupt_threshold);
373
374 write_end = min(end, int_th);
375
376 /* if we are already beyond the interrupt threshold,
377 * we fill the entire buffer */
378 if (write_end <= index)
379 write_end = end;
380
381 if (write_end <= index)
382 break;
383
384 write_size = min((unsigned long) size, write_end - index);
385 memcpy((void *)index, record, write_size);
386
387 record = (const char *)record + write_size;
388 size -= write_size;
389 bytes_written += write_size;
390
391 adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
392 adj_write_size *= ds_cfg.sizeof_rec[qual];
393
394 /* zero out trailing bytes */
395 memset((char *)index + write_size, 0,
396 adj_write_size - write_size);
397 index += adj_write_size;
398
399 if (index >= end)
400 index = base;
401 ds_set(context->ds, qual, ds_index, index);
402
403 if (index >= int_th)
404 ds_overflow(context, qual);
405 }
406
407 return bytes_written;
408}
409
410
411/*
412 * Branch Trace Store (BTS) uses the following format. Different
413 * architectures vary in the size of those fields.
414 * - source linear address
415 * - destination linear address
416 * - flags
417 *
418 * Later architectures use 64bit pointers throughout, whereas earlier
419 * architectures use 32bit pointers in 32bit mode.
420 *
421 * We compute the base address for the first 8 fields based on:
422 * - the field size stored in the DS configuration
423 * - the relative field position
424 *
425 * In order to store additional information in the BTS buffer, we use
426 * a special source address to indicate that the record requires
427 * special interpretation.
428 *
429 * Netburst indicated via a bit in the flags field whether the branch
430 * was predicted; this is ignored.
431 *
432 * We use two levels of abstraction:
433 * - the raw data level defined here
434 * - an arch-independent level defined in ds.h
435 */
436
437enum bts_field {
438 bts_from,
439 bts_to,
440 bts_flags,
441
442 bts_qual = bts_from,
443 bts_jiffies = bts_to,
444 bts_pid = bts_flags,
445
446 bts_qual_mask = (bts_qual_max - 1),
447 bts_escape = ((unsigned long)-1 & ~bts_qual_mask)
448};
449
450static inline unsigned long bts_get(const char *base, enum bts_field field)
451{
452 base += (ds_cfg.sizeof_field * field);
453 return *(unsigned long *)base;
454}
455
456static inline void bts_set(char *base, enum bts_field field, unsigned long val)
457{
458 base += (ds_cfg.sizeof_field * field);;
459 (*(unsigned long *)base) = val;
460}
461
462
463/*
464 * The raw BTS data is architecture dependent.
465 *
466 * For higher-level users, we give an arch-independent view.
467 * - ds.h defines struct bts_struct
468 * - bts_read translates one raw bts record into a bts_struct
469 * - bts_write translates one bts_struct into the raw format and
470 * writes it into the top of the parameter tracer's buffer.
471 *
472 * return: bytes read/written on success; -Eerrno, otherwise
473 */
474static int bts_read(struct bts_tracer *tracer, const void *at,
475 struct bts_struct *out)
476{
477 if (!tracer)
478 return -EINVAL;
479
480 if (at < tracer->trace.ds.begin)
481 return -EINVAL;
482
483 if (tracer->trace.ds.end < (at + tracer->trace.ds.size))
484 return -EINVAL;
485
486 memset(out, 0, sizeof(*out));
487 if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) {
488 out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask);
489 out->variant.timestamp.jiffies = bts_get(at, bts_jiffies);
490 out->variant.timestamp.pid = bts_get(at, bts_pid);
491 } else {
492 out->qualifier = bts_branch;
493 out->variant.lbr.from = bts_get(at, bts_from);
494 out->variant.lbr.to = bts_get(at, bts_to);
495 }
496
497 return ds_cfg.sizeof_rec[ds_bts];
498}
499
500static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in)
501{
502 unsigned char raw[MAX_SIZEOF_BTS];
503
504 if (!tracer)
505 return -EINVAL;
506
507 if (MAX_SIZEOF_BTS < ds_cfg.sizeof_rec[ds_bts])
508 return -EOVERFLOW;
509
510 switch (in->qualifier) {
511 case bts_invalid:
512 bts_set(raw, bts_from, 0);
513 bts_set(raw, bts_to, 0);
514 bts_set(raw, bts_flags, 0);
515 break;
516 case bts_branch:
517 bts_set(raw, bts_from, in->variant.lbr.from);
518 bts_set(raw, bts_to, in->variant.lbr.to);
519 bts_set(raw, bts_flags, 0);
520 break;
521 case bts_task_arrives:
522 case bts_task_departs:
523 bts_set(raw, bts_qual, (bts_escape | in->qualifier));
524 bts_set(raw, bts_jiffies, in->variant.timestamp.jiffies);
525 bts_set(raw, bts_pid, in->variant.timestamp.pid);
286 break; 526 break;
527 default:
528 return -EINVAL;
287 } 529 }
530
531 return ds_write(tracer->ds.context, ds_bts, raw,
532 ds_cfg.sizeof_rec[ds_bts]);
288} 533}
289 534
290 535
291static void ds_install_ds_config(struct ds_context *context, 536static void ds_write_config(struct ds_context *context,
292 enum ds_qualifier qual, 537 struct ds_trace *cfg, enum ds_qualifier qual)
293 void *base, size_t size, size_t ith) 538{
539 unsigned char *ds = context->ds;
540
541 ds_set(ds, qual, ds_buffer_base, (unsigned long)cfg->begin);
542 ds_set(ds, qual, ds_index, (unsigned long)cfg->top);
543 ds_set(ds, qual, ds_absolute_maximum, (unsigned long)cfg->end);
544 ds_set(ds, qual, ds_interrupt_threshold, (unsigned long)cfg->ith);
545}
546
547static void ds_read_config(struct ds_context *context,
548 struct ds_trace *cfg, enum ds_qualifier qual)
294{ 549{
550 unsigned char *ds = context->ds;
551
552 cfg->begin = (void *)ds_get(ds, qual, ds_buffer_base);
553 cfg->top = (void *)ds_get(ds, qual, ds_index);
554 cfg->end = (void *)ds_get(ds, qual, ds_absolute_maximum);
555 cfg->ith = (void *)ds_get(ds, qual, ds_interrupt_threshold);
556}
557
558static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
559 void *base, size_t size, size_t ith,
560 unsigned int flags) {
295 unsigned long buffer, adj; 561 unsigned long buffer, adj;
296 562
297 /* adjust the buffer address and size to meet alignment 563 /* adjust the buffer address and size to meet alignment
@@ -308,32 +574,30 @@ static void ds_install_ds_config(struct ds_context *context,
308 buffer += adj; 574 buffer += adj;
309 size -= adj; 575 size -= adj;
310 576
311 size /= ds_cfg.sizeof_rec[qual]; 577 trace->n = size / ds_cfg.sizeof_rec[qual];
312 size *= ds_cfg.sizeof_rec[qual]; 578 trace->size = ds_cfg.sizeof_rec[qual];
313 579
314 ds_set(context->ds, qual, ds_buffer_base, buffer); 580 size = (trace->n * trace->size);
315 ds_set(context->ds, qual, ds_index, buffer);
316 ds_set(context->ds, qual, ds_absolute_maximum, buffer + size);
317 581
582 trace->begin = (void *)buffer;
583 trace->top = trace->begin;
584 trace->end = (void *)(buffer + size);
318 /* The value for 'no threshold' is -1, which will set the 585 /* The value for 'no threshold' is -1, which will set the
319 * threshold outside of the buffer, just like we want it. 586 * threshold outside of the buffer, just like we want it.
320 */ 587 */
321 ds_set(context->ds, qual, 588 trace->ith = (void *)(buffer + size - ith);
322 ds_interrupt_threshold, buffer + size - ith); 589
590 trace->flags = flags;
323} 591}
324 592
325static int ds_request(struct ds_tracer *tracer, enum ds_qualifier qual, 593
326 struct task_struct *task, 594static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
327 void *base, size_t size, size_t th) 595 enum ds_qualifier qual, struct task_struct *task,
596 void *base, size_t size, size_t th, unsigned int flags)
328{ 597{
329 struct ds_context *context; 598 struct ds_context *context;
330 unsigned long irq;
331 int error; 599 int error;
332 600
333 error = -EOPNOTSUPP;
334 if (!ds_cfg.sizeof_ds)
335 goto out;
336
337 error = -EINVAL; 601 error = -EINVAL;
338 if (!base) 602 if (!base)
339 goto out; 603 goto out;
@@ -360,43 +624,26 @@ static int ds_request(struct ds_tracer *tracer, enum ds_qualifier qual,
360 goto out; 624 goto out;
361 tracer->context = context; 625 tracer->context = context;
362 626
627 ds_init_ds_trace(trace, qual, base, size, th, flags);
363 628
364 spin_lock_irqsave(&ds_lock, irq); 629 error = 0;
365
366 error = -EPERM;
367 if (!check_tracer(task))
368 goto out_unlock;
369 get_tracer(task);
370
371 error = -EPERM;
372 if (context->owner[qual])
373 goto out_put_tracer;
374 context->owner[qual] = tracer;
375
376 spin_unlock_irqrestore(&ds_lock, irq);
377
378
379 ds_install_ds_config(context, qual, base, size, th);
380
381 return 0;
382
383 out_put_tracer:
384 put_tracer(task);
385 out_unlock:
386 spin_unlock_irqrestore(&ds_lock, irq);
387 ds_put_context(context);
388 tracer->context = NULL;
389 out: 630 out:
390 return error; 631 return error;
391} 632}
392 633
393struct bts_tracer *ds_request_bts(struct task_struct *task, 634struct bts_tracer *ds_request_bts(struct task_struct *task,
394 void *base, size_t size, 635 void *base, size_t size,
395 bts_ovfl_callback_t ovfl, size_t th) 636 bts_ovfl_callback_t ovfl, size_t th,
637 unsigned int flags)
396{ 638{
397 struct bts_tracer *tracer; 639 struct bts_tracer *tracer;
640 unsigned long irq;
398 int error; 641 int error;
399 642
643 error = -EOPNOTSUPP;
644 if (!ds_cfg.ctl[dsf_bts])
645 goto out;
646
400 /* buffer overflow notification is not yet implemented */ 647 /* buffer overflow notification is not yet implemented */
401 error = -EOPNOTSUPP; 648 error = -EOPNOTSUPP;
402 if (ovfl) 649 if (ovfl)
@@ -408,12 +655,40 @@ struct bts_tracer *ds_request_bts(struct task_struct *task,
408 goto out; 655 goto out;
409 tracer->ovfl = ovfl; 656 tracer->ovfl = ovfl;
410 657
411 error = ds_request(&tracer->ds, ds_bts, task, base, size, th); 658 error = ds_request(&tracer->ds, &tracer->trace.ds,
659 ds_bts, task, base, size, th, flags);
412 if (error < 0) 660 if (error < 0)
413 goto out_tracer; 661 goto out_tracer;
414 662
663
664 spin_lock_irqsave(&ds_lock, irq);
665
666 error = -EPERM;
667 if (!check_tracer(task))
668 goto out_unlock;
669 get_tracer(task);
670
671 error = -EPERM;
672 if (tracer->ds.context->bts_master)
673 goto out_put_tracer;
674 tracer->ds.context->bts_master = tracer;
675
676 spin_unlock_irqrestore(&ds_lock, irq);
677
678
679 tracer->trace.read = bts_read;
680 tracer->trace.write = bts_write;
681
682 ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
683 ds_resume_bts(tracer);
684
415 return tracer; 685 return tracer;
416 686
687 out_put_tracer:
688 put_tracer(task);
689 out_unlock:
690 spin_unlock_irqrestore(&ds_lock, irq);
691 ds_put_context(tracer->ds.context);
417 out_tracer: 692 out_tracer:
418 kfree(tracer); 693 kfree(tracer);
419 out: 694 out:
@@ -422,9 +697,11 @@ struct bts_tracer *ds_request_bts(struct task_struct *task,
422 697
423struct pebs_tracer *ds_request_pebs(struct task_struct *task, 698struct pebs_tracer *ds_request_pebs(struct task_struct *task,
424 void *base, size_t size, 699 void *base, size_t size,
425 pebs_ovfl_callback_t ovfl, size_t th) 700 pebs_ovfl_callback_t ovfl, size_t th,
701 unsigned int flags)
426{ 702{
427 struct pebs_tracer *tracer; 703 struct pebs_tracer *tracer;
704 unsigned long irq;
428 int error; 705 int error;
429 706
430 /* buffer overflow notification is not yet implemented */ 707 /* buffer overflow notification is not yet implemented */
@@ -438,300 +715,171 @@ struct pebs_tracer *ds_request_pebs(struct task_struct *task,
438 goto out; 715 goto out;
439 tracer->ovfl = ovfl; 716 tracer->ovfl = ovfl;
440 717
441 error = ds_request(&tracer->ds, ds_pebs, task, base, size, th); 718 error = ds_request(&tracer->ds, &tracer->trace.ds,
719 ds_pebs, task, base, size, th, flags);
442 if (error < 0) 720 if (error < 0)
443 goto out_tracer; 721 goto out_tracer;
444 722
723 spin_lock_irqsave(&ds_lock, irq);
724
725 error = -EPERM;
726 if (!check_tracer(task))
727 goto out_unlock;
728 get_tracer(task);
729
730 error = -EPERM;
731 if (tracer->ds.context->pebs_master)
732 goto out_put_tracer;
733 tracer->ds.context->pebs_master = tracer;
734
735 spin_unlock_irqrestore(&ds_lock, irq);
736
737 ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
738 ds_resume_pebs(tracer);
739
445 return tracer; 740 return tracer;
446 741
742 out_put_tracer:
743 put_tracer(task);
744 out_unlock:
745 spin_unlock_irqrestore(&ds_lock, irq);
746 ds_put_context(tracer->ds.context);
447 out_tracer: 747 out_tracer:
448 kfree(tracer); 748 kfree(tracer);
449 out: 749 out:
450 return ERR_PTR(error); 750 return ERR_PTR(error);
451} 751}
452 752
453static void ds_release(struct ds_tracer *tracer, enum ds_qualifier qual) 753void ds_release_bts(struct bts_tracer *tracer)
454{
455 WARN_ON_ONCE(tracer->context->owner[qual] != tracer);
456 tracer->context->owner[qual] = NULL;
457
458 put_tracer(tracer->context->task);
459 ds_put_context(tracer->context);
460}
461
462int ds_release_bts(struct bts_tracer *tracer)
463{ 754{
464 if (!tracer) 755 if (!tracer)
465 return -EINVAL; 756 return;
466 757
467 ds_release(&tracer->ds, ds_bts); 758 ds_suspend_bts(tracer);
468 kfree(tracer);
469 759
470 return 0; 760 WARN_ON_ONCE(tracer->ds.context->bts_master != tracer);
471} 761 tracer->ds.context->bts_master = NULL;
472 762
473int ds_release_pebs(struct pebs_tracer *tracer) 763 put_tracer(tracer->ds.context->task);
474{ 764 ds_put_context(tracer->ds.context);
475 if (!tracer)
476 return -EINVAL;
477 765
478 ds_release(&tracer->ds, ds_pebs);
479 kfree(tracer); 766 kfree(tracer);
480
481 return 0;
482}
483
484static size_t ds_get_index(struct ds_context *context, enum ds_qualifier qual)
485{
486 unsigned long base, index;
487
488 base = ds_get(context->ds, qual, ds_buffer_base);
489 index = ds_get(context->ds, qual, ds_index);
490
491 return (index - base) / ds_cfg.sizeof_rec[qual];
492} 767}
493 768
494int ds_get_bts_index(struct bts_tracer *tracer, size_t *pos) 769void ds_suspend_bts(struct bts_tracer *tracer)
495{ 770{
496 if (!tracer) 771 struct task_struct *task;
497 return -EINVAL;
498 772
499 if (!pos)
500 return -EINVAL;
501
502 *pos = ds_get_index(tracer->ds.context, ds_bts);
503
504 return 0;
505}
506
507int ds_get_pebs_index(struct pebs_tracer *tracer, size_t *pos)
508{
509 if (!tracer) 773 if (!tracer)
510 return -EINVAL; 774 return;
511 775
512 if (!pos) 776 task = tracer->ds.context->task;
513 return -EINVAL;
514 777
515 *pos = ds_get_index(tracer->ds.context, ds_pebs); 778 if (!task || (task == current))
779 update_debugctlmsr(get_debugctlmsr() & ~BTS_CONTROL);
516 780
517 return 0; 781 if (task) {
518} 782 task->thread.debugctlmsr &= ~BTS_CONTROL;
519 783
520static size_t ds_get_end(struct ds_context *context, enum ds_qualifier qual) 784 if (!task->thread.debugctlmsr)
521{ 785 clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
522 unsigned long base, max; 786 }
523
524 base = ds_get(context->ds, qual, ds_buffer_base);
525 max = ds_get(context->ds, qual, ds_absolute_maximum);
526
527 return (max - base) / ds_cfg.sizeof_rec[qual];
528} 787}
529 788
530int ds_get_bts_end(struct bts_tracer *tracer, size_t *pos) 789void ds_resume_bts(struct bts_tracer *tracer)
531{ 790{
532 if (!tracer) 791 struct task_struct *task;
533 return -EINVAL; 792 unsigned long control;
534
535 if (!pos)
536 return -EINVAL;
537
538 *pos = ds_get_end(tracer->ds.context, ds_bts);
539
540 return 0;
541}
542 793
543int ds_get_pebs_end(struct pebs_tracer *tracer, size_t *pos)
544{
545 if (!tracer) 794 if (!tracer)
546 return -EINVAL; 795 return;
547
548 if (!pos)
549 return -EINVAL;
550
551 *pos = ds_get_end(tracer->ds.context, ds_pebs);
552
553 return 0;
554}
555
556static int ds_access(struct ds_context *context, enum ds_qualifier qual,
557 size_t index, const void **record)
558{
559 unsigned long base, idx;
560
561 if (!record)
562 return -EINVAL;
563
564 base = ds_get(context->ds, qual, ds_buffer_base);
565 idx = base + (index * ds_cfg.sizeof_rec[qual]);
566
567 if (idx > ds_get(context->ds, qual, ds_absolute_maximum))
568 return -EINVAL;
569 796
570 *record = (const void *)idx; 797 task = tracer->ds.context->task;
571 798
572 return ds_cfg.sizeof_rec[qual]; 799 control = ds_cfg.ctl[dsf_bts];
573} 800 if (!(tracer->trace.ds.flags & BTS_KERNEL))
801 control |= ds_cfg.ctl[dsf_bts_kernel];
802 if (!(tracer->trace.ds.flags & BTS_USER))
803 control |= ds_cfg.ctl[dsf_bts_user];
574 804
575int ds_access_bts(struct bts_tracer *tracer, size_t index, 805 if (task) {
576 const void **record) 806 task->thread.debugctlmsr |= control;
577{ 807 set_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
578 if (!tracer) 808 }
579 return -EINVAL;
580 809
581 return ds_access(tracer->ds.context, ds_bts, index, record); 810 if (!task || (task == current))
811 update_debugctlmsr(get_debugctlmsr() | control);
582} 812}
583 813
584int ds_access_pebs(struct pebs_tracer *tracer, size_t index, 814void ds_release_pebs(struct pebs_tracer *tracer)
585 const void **record)
586{ 815{
587 if (!tracer) 816 if (!tracer)
588 return -EINVAL; 817 return;
589
590 return ds_access(tracer->ds.context, ds_pebs, index, record);
591}
592
593static int ds_write(struct ds_context *context, enum ds_qualifier qual,
594 const void *record, size_t size)
595{
596 int bytes_written = 0;
597
598 if (!record)
599 return -EINVAL;
600
601 while (size) {
602 unsigned long base, index, end, write_end, int_th;
603 unsigned long write_size, adj_write_size;
604
605 /*
606 * write as much as possible without producing an
607 * overflow interrupt.
608 *
609 * interrupt_threshold must either be
610 * - bigger than absolute_maximum or
611 * - point to a record between buffer_base and absolute_maximum
612 *
613 * index points to a valid record.
614 */
615 base = ds_get(context->ds, qual, ds_buffer_base);
616 index = ds_get(context->ds, qual, ds_index);
617 end = ds_get(context->ds, qual, ds_absolute_maximum);
618 int_th = ds_get(context->ds, qual, ds_interrupt_threshold);
619
620 write_end = min(end, int_th);
621
622 /* if we are already beyond the interrupt threshold,
623 * we fill the entire buffer */
624 if (write_end <= index)
625 write_end = end;
626
627 if (write_end <= index)
628 break;
629
630 write_size = min((unsigned long) size, write_end - index);
631 memcpy((void *)index, record, write_size);
632
633 record = (const char *)record + write_size;
634 size -= write_size;
635 bytes_written += write_size;
636
637 adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
638 adj_write_size *= ds_cfg.sizeof_rec[qual];
639
640 /* zero out trailing bytes */
641 memset((char *)index + write_size, 0,
642 adj_write_size - write_size);
643 index += adj_write_size;
644 818
645 if (index >= end) 819 ds_suspend_pebs(tracer);
646 index = base;
647 ds_set(context->ds, qual, ds_index, index);
648 820
649 if (index >= int_th) 821 WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer);
650 ds_overflow(context, qual); 822 tracer->ds.context->pebs_master = NULL;
651 }
652 823
653 return bytes_written; 824 put_tracer(tracer->ds.context->task);
654} 825 ds_put_context(tracer->ds.context);
655 826
656int ds_write_bts(struct bts_tracer *tracer, const void *record, size_t size) 827 kfree(tracer);
657{
658 if (!tracer)
659 return -EINVAL;
660
661 return ds_write(tracer->ds.context, ds_bts, record, size);
662} 828}
663 829
664int ds_write_pebs(struct pebs_tracer *tracer, const void *record, size_t size) 830void ds_suspend_pebs(struct pebs_tracer *tracer)
665{ 831{
666 if (!tracer)
667 return -EINVAL;
668 832
669 return ds_write(tracer->ds.context, ds_pebs, record, size);
670} 833}
671 834
672static void ds_reset_or_clear(struct ds_context *context, 835void ds_resume_pebs(struct pebs_tracer *tracer)
673 enum ds_qualifier qual, int clear)
674{ 836{
675 unsigned long base, end;
676
677 base = ds_get(context->ds, qual, ds_buffer_base);
678 end = ds_get(context->ds, qual, ds_absolute_maximum);
679
680 if (clear)
681 memset((void *)base, 0, end - base);
682 837
683 ds_set(context->ds, qual, ds_index, base);
684} 838}
685 839
686int ds_reset_bts(struct bts_tracer *tracer) 840const struct bts_trace *ds_read_bts(struct bts_tracer *tracer)
687{ 841{
688 if (!tracer) 842 if (!tracer)
689 return -EINVAL; 843 return NULL;
690
691 ds_reset_or_clear(tracer->ds.context, ds_bts, /* clear = */ 0);
692 844
693 return 0; 845 ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
846 return &tracer->trace;
694} 847}
695 848
696int ds_reset_pebs(struct pebs_tracer *tracer) 849const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer)
697{ 850{
698 if (!tracer) 851 if (!tracer)
699 return -EINVAL; 852 return NULL;
700 853
701 ds_reset_or_clear(tracer->ds.context, ds_pebs, /* clear = */ 0); 854 ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
855 tracer->trace.reset_value =
856 *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8));
702 857
703 return 0; 858 return &tracer->trace;
704} 859}
705 860
706int ds_clear_bts(struct bts_tracer *tracer) 861int ds_reset_bts(struct bts_tracer *tracer)
707{ 862{
708 if (!tracer) 863 if (!tracer)
709 return -EINVAL; 864 return -EINVAL;
710 865
711 ds_reset_or_clear(tracer->ds.context, ds_bts, /* clear = */ 1); 866 tracer->trace.ds.top = tracer->trace.ds.begin;
712
713 return 0;
714}
715
716int ds_clear_pebs(struct pebs_tracer *tracer)
717{
718 if (!tracer)
719 return -EINVAL;
720 867
721 ds_reset_or_clear(tracer->ds.context, ds_pebs, /* clear = */ 1); 868 ds_set(tracer->ds.context->ds, ds_bts, ds_index,
869 (unsigned long)tracer->trace.ds.top);
722 870
723 return 0; 871 return 0;
724} 872}
725 873
726int ds_get_pebs_reset(struct pebs_tracer *tracer, u64 *value) 874int ds_reset_pebs(struct pebs_tracer *tracer)
727{ 875{
728 if (!tracer) 876 if (!tracer)
729 return -EINVAL; 877 return -EINVAL;
730 878
731 if (!value) 879 tracer->trace.ds.top = tracer->trace.ds.begin;
732 return -EINVAL;
733 880
734 *value = *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)); 881 ds_set(tracer->ds.context->ds, ds_bts, ds_index,
882 (unsigned long)tracer->trace.ds.top);
735 883
736 return 0; 884 return 0;
737} 885}
@@ -746,35 +894,59 @@ int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value)
746 return 0; 894 return 0;
747} 895}
748 896
749static const struct ds_configuration ds_cfg_var = { 897static const struct ds_configuration ds_cfg_netburst = {
750 .sizeof_ds = sizeof(long) * 12, 898 .name = "netburst",
751 .sizeof_field = sizeof(long), 899 .ctl[dsf_bts] = (1 << 2) | (1 << 3),
752 .sizeof_rec[ds_bts] = sizeof(long) * 3, 900 .ctl[dsf_bts_kernel] = (1 << 5),
901 .ctl[dsf_bts_user] = (1 << 6),
902
903 .sizeof_field = sizeof(long),
904 .sizeof_rec[ds_bts] = sizeof(long) * 3,
753#ifdef __i386__ 905#ifdef __i386__
754 .sizeof_rec[ds_pebs] = sizeof(long) * 10 906 .sizeof_rec[ds_pebs] = sizeof(long) * 10,
755#else 907#else
756 .sizeof_rec[ds_pebs] = sizeof(long) * 18 908 .sizeof_rec[ds_pebs] = sizeof(long) * 18,
757#endif 909#endif
758}; 910};
759static const struct ds_configuration ds_cfg_64 = { 911static const struct ds_configuration ds_cfg_pentium_m = {
760 .sizeof_ds = 8 * 12, 912 .name = "pentium m",
761 .sizeof_field = 8, 913 .ctl[dsf_bts] = (1 << 6) | (1 << 7),
762 .sizeof_rec[ds_bts] = 8 * 3, 914
915 .sizeof_field = sizeof(long),
916 .sizeof_rec[ds_bts] = sizeof(long) * 3,
763#ifdef __i386__ 917#ifdef __i386__
764 .sizeof_rec[ds_pebs] = 8 * 10 918 .sizeof_rec[ds_pebs] = sizeof(long) * 10,
765#else 919#else
766 .sizeof_rec[ds_pebs] = 8 * 18 920 .sizeof_rec[ds_pebs] = sizeof(long) * 18,
767#endif 921#endif
768}; 922};
923static const struct ds_configuration ds_cfg_core2 = {
924 .name = "core 2",
925 .ctl[dsf_bts] = (1 << 6) | (1 << 7),
926 .ctl[dsf_bts_kernel] = (1 << 9),
927 .ctl[dsf_bts_user] = (1 << 10),
928
929 .sizeof_field = 8,
930 .sizeof_rec[ds_bts] = 8 * 3,
931 .sizeof_rec[ds_pebs] = 8 * 18,
932};
769 933
770static inline void 934static void
771ds_configure(const struct ds_configuration *cfg) 935ds_configure(const struct ds_configuration *cfg)
772{ 936{
937 memset(&ds_cfg, 0, sizeof(ds_cfg));
773 ds_cfg = *cfg; 938 ds_cfg = *cfg;
774 939
775 printk(KERN_INFO "DS available\n"); 940 printk(KERN_INFO "[ds] using %s configuration\n", ds_cfg.name);
941
942 if (!cpu_has_bts) {
943 ds_cfg.ctl[dsf_bts] = 0;
944 printk(KERN_INFO "[ds] bts not available\n");
945 }
946 if (!cpu_has_pebs)
947 printk(KERN_INFO "[ds] pebs not available\n");
776 948
777 WARN_ON_ONCE(MAX_SIZEOF_DS < ds_cfg.sizeof_ds); 949 WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_field));
778} 950}
779 951
780void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) 952void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
@@ -787,10 +959,10 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
787 break; 959 break;
788 case 0xD: 960 case 0xD:
789 case 0xE: /* Pentium M */ 961 case 0xE: /* Pentium M */
790 ds_configure(&ds_cfg_var); 962 ds_configure(&ds_cfg_pentium_m);
791 break; 963 break;
792 default: /* Core2, Atom, ... */ 964 default: /* Core2, Atom, ... */
793 ds_configure(&ds_cfg_64); 965 ds_configure(&ds_cfg_core2);
794 break; 966 break;
795 } 967 }
796 break; 968 break;
@@ -799,7 +971,7 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
799 case 0x0: 971 case 0x0:
800 case 0x1: 972 case 0x1:
801 case 0x2: /* Netburst */ 973 case 0x2: /* Netburst */
802 ds_configure(&ds_cfg_var); 974 ds_configure(&ds_cfg_netburst);
803 break; 975 break;
804 default: 976 default:
805 /* sorry, don't know about them */ 977 /* sorry, don't know about them */
@@ -812,14 +984,41 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
812 } 984 }
813} 985}
814 986
815void ds_free(struct ds_context *context) 987/*
988 * Change the DS configuration from tracing prev to tracing next.
989 */
990void ds_switch_to(struct task_struct *prev, struct task_struct *next)
816{ 991{
817 /* This is called when the task owning the parameter context 992 struct ds_context *prev_ctx = prev->thread.ds_ctx;
818 * is dying. There should not be any user of that context left 993 struct ds_context *next_ctx = next->thread.ds_ctx;
819 * to disturb us, anymore. */ 994
820 unsigned long leftovers = context->count; 995 if (prev_ctx) {
821 while (leftovers--) { 996 update_debugctlmsr(0);
822 put_tracer(context->task); 997
823 ds_put_context(context); 998 if (prev_ctx->bts_master &&
999 (prev_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) {
1000 struct bts_struct ts = {
1001 .qualifier = bts_task_departs,
1002 .variant.timestamp.jiffies = jiffies_64,
1003 .variant.timestamp.pid = prev->pid
1004 };
1005 bts_write(prev_ctx->bts_master, &ts);
1006 }
1007 }
1008
1009 if (next_ctx) {
1010 if (next_ctx->bts_master &&
1011 (next_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) {
1012 struct bts_struct ts = {
1013 .qualifier = bts_task_arrives,
1014 .variant.timestamp.jiffies = jiffies_64,
1015 .variant.timestamp.pid = next->pid
1016 };
1017 bts_write(next_ctx->bts_master, &ts);
1018 }
1019
1020 wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds);
824 } 1021 }
1022
1023 update_debugctlmsr(next->thread.debugctlmsr);
825} 1024}