aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorMarkus Metzger <markus.t.metzger@intel.com>2008-11-25 03:01:25 -0500
committerIngo Molnar <mingo@elte.hu>2008-11-25 11:31:11 -0500
commitca0002a179bfa532d009a9272d619732872c49bd (patch)
tree9a9ca02164dfb2c13afaa38ab67f3f15d8dd5ce8 /arch/x86/kernel
parent7d55718b0c19ba611241c330f688ee824e9bab79 (diff)
x86, bts: base in-kernel ds interface on handles
Impact: generalize the DS code to shared buffers Change the in-kernel ds.h interface to identify the tracer via a handle returned on ds_request_~(). Tracers used to be identified via their task_struct. The changes are required to allow DS to be shared between different tasks, which is needed for perfmon2 and for ftrace. For ptrace, the handle is stored in the traced task's task_struct. This should probably go into a (arch-specific) ptrace context some time. Signed-off-by: Markus Metzger <markus.t.metzger@intel.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/ds.c679
-rw-r--r--arch/x86/kernel/ptrace.c73
2 files changed, 380 insertions, 372 deletions
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index d6938d9351cf..96768e9cce99 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -28,6 +28,7 @@
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <linux/sched.h> 29#include <linux/sched.h>
30#include <linux/mm.h> 30#include <linux/mm.h>
31#include <linux/kernel.h>
31 32
32 33
33/* 34/*
@@ -44,6 +45,35 @@ struct ds_configuration {
44}; 45};
45static struct ds_configuration ds_cfg; 46static struct ds_configuration ds_cfg;
46 47
48/*
49 * A BTS or PEBS tracer.
50 *
51 * This holds the configuration of the tracer and serves as a handle
52 * to identify tracers.
53 */
54struct ds_tracer {
55 /* the DS context (partially) owned by this tracer */
56 struct ds_context *context;
57 /* the buffer provided on ds_request() and its size in bytes */
58 void *buffer;
59 size_t size;
60 /* the number of allocated pages for on-request allocated buffers */
61 unsigned int pages;
62};
63
64struct bts_tracer {
65 /* the common DS part */
66 struct ds_tracer ds;
67 /* buffer overflow notification function */
68 bts_ovfl_callback_t ovfl;
69};
70
71struct pebs_tracer {
72 /* the common DS part */
73 struct ds_tracer ds;
74 /* buffer overflow notification function */
75 pebs_ovfl_callback_t ovfl;
76};
47 77
48/* 78/*
49 * Debug Store (DS) save area configuration (see Intel64 and IA32 79 * Debug Store (DS) save area configuration (see Intel64 and IA32
@@ -107,35 +137,15 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
107 (*(unsigned long *)base) = value; 137 (*(unsigned long *)base) = value;
108} 138}
109 139
140#define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */
141
110 142
111/* 143/*
112 * Locking is done only for allocating BTS or PEBS resources and for 144 * Locking is done only for allocating BTS or PEBS resources and for
113 * guarding context and buffer memory allocation. 145 * guarding context and buffer memory allocation.
114 *
115 * Most functions require the current task to own the ds context part
116 * they are going to access. All the locking is done when validating
117 * access to the context.
118 */ 146 */
119static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock); 147static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock);
120 148
121/*
122 * Validate that the current task is allowed to access the BTS/PEBS
123 * buffer of the parameter task.
124 *
125 * Returns 0, if access is granted; -Eerrno, otherwise.
126 */
127static inline int ds_validate_access(struct ds_context *context,
128 enum ds_qualifier qual)
129{
130 if (!context)
131 return -EPERM;
132
133 if (context->owner[qual] == current)
134 return 0;
135
136 return -EPERM;
137}
138
139 149
140/* 150/*
141 * We either support (system-wide) per-cpu or per-thread allocation. 151 * We either support (system-wide) per-cpu or per-thread allocation.
@@ -183,51 +193,13 @@ static inline int check_tracer(struct task_struct *task)
183 * 193 *
184 * Contexts are use-counted. They are allocated on first access and 194 * Contexts are use-counted. They are allocated on first access and
185 * deallocated when the last user puts the context. 195 * deallocated when the last user puts the context.
186 *
187 * We distinguish between an allocating and a non-allocating get of a
188 * context:
189 * - the allocating get is used for requesting BTS/PEBS resources. It
190 * requires the caller to hold the global ds_lock.
191 * - the non-allocating get is used for all other cases. A
192 * non-existing context indicates an error. It acquires and releases
193 * the ds_lock itself for obtaining the context.
194 *
195 * A context and its DS configuration are allocated and deallocated
196 * together. A context always has a DS configuration of the
197 * appropriate size.
198 */ 196 */
199static DEFINE_PER_CPU(struct ds_context *, system_context); 197static DEFINE_PER_CPU(struct ds_context *, system_context);
200 198
201#define this_system_context per_cpu(system_context, smp_processor_id()) 199#define this_system_context per_cpu(system_context, smp_processor_id())
202 200
203/*
204 * Returns the pointer to the parameter task's context or to the
205 * system-wide context, if task is NULL.
206 *
207 * Increases the use count of the returned context, if not NULL.
208 */
209static inline struct ds_context *ds_get_context(struct task_struct *task) 201static inline struct ds_context *ds_get_context(struct task_struct *task)
210{ 202{
211 struct ds_context *context;
212 unsigned long irq;
213
214 spin_lock_irqsave(&ds_lock, irq);
215
216 context = (task ? task->thread.ds_ctx : this_system_context);
217 if (context)
218 context->count++;
219
220 spin_unlock_irqrestore(&ds_lock, irq);
221
222 return context;
223}
224
225/*
226 * Same as ds_get_context, but allocates the context and it's DS
227 * structure, if necessary; returns NULL; if out of memory.
228 */
229static inline struct ds_context *ds_alloc_context(struct task_struct *task)
230{
231 struct ds_context **p_context = 203 struct ds_context **p_context =
232 (task ? &task->thread.ds_ctx : &this_system_context); 204 (task ? &task->thread.ds_ctx : &this_system_context);
233 struct ds_context *context = *p_context; 205 struct ds_context *context = *p_context;
@@ -238,16 +210,9 @@ static inline struct ds_context *ds_alloc_context(struct task_struct *task)
238 if (!context) 210 if (!context)
239 return NULL; 211 return NULL;
240 212
241 context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL);
242 if (!context->ds) {
243 kfree(context);
244 return NULL;
245 }
246
247 spin_lock_irqsave(&ds_lock, irq); 213 spin_lock_irqsave(&ds_lock, irq);
248 214
249 if (*p_context) { 215 if (*p_context) {
250 kfree(context->ds);
251 kfree(context); 216 kfree(context);
252 217
253 context = *p_context; 218 context = *p_context;
@@ -272,10 +237,6 @@ static inline struct ds_context *ds_alloc_context(struct task_struct *task)
272 return context; 237 return context;
273} 238}
274 239
275/*
276 * Decreases the use count of the parameter context, if not NULL.
277 * Deallocates the context, if the use count reaches zero.
278 */
279static inline void ds_put_context(struct ds_context *context) 240static inline void ds_put_context(struct ds_context *context)
280{ 241{
281 unsigned long irq; 242 unsigned long irq;
@@ -296,13 +257,6 @@ static inline void ds_put_context(struct ds_context *context)
296 if (!context->task || (context->task == current)) 257 if (!context->task || (context->task == current))
297 wrmsrl(MSR_IA32_DS_AREA, 0); 258 wrmsrl(MSR_IA32_DS_AREA, 0);
298 259
299 put_tracer(context->task);
300
301 /* free any leftover buffers from tracers that did not
302 * deallocate them properly. */
303 kfree(context->buffer[ds_bts]);
304 kfree(context->buffer[ds_pebs]);
305 kfree(context->ds);
306 kfree(context); 260 kfree(context);
307 out: 261 out:
308 spin_unlock_irqrestore(&ds_lock, irq); 262 spin_unlock_irqrestore(&ds_lock, irq);
@@ -312,21 +266,29 @@ static inline void ds_put_context(struct ds_context *context)
312/* 266/*
313 * Handle a buffer overflow 267 * Handle a buffer overflow
314 * 268 *
315 * task: the task whose buffers are overflowing;
316 * NULL for a buffer overflow on the current cpu
317 * context: the ds context 269 * context: the ds context
318 * qual: the buffer type 270 * qual: the buffer type
319 */ 271 */
320static void ds_overflow(struct task_struct *task, struct ds_context *context, 272static void ds_overflow(struct ds_context *context, enum ds_qualifier qual)
321 enum ds_qualifier qual) 273{
322{ 274 switch (qual) {
323 if (!context) 275 case ds_bts: {
324 return; 276 struct bts_tracer *tracer =
325 277 container_of(context->owner[qual],
326 if (context->callback[qual]) 278 struct bts_tracer, ds);
327 (*context->callback[qual])(task); 279 if (tracer->ovfl)
328 280 tracer->ovfl(tracer);
329 /* todo: do some more overflow handling */ 281 }
282 break;
283 case ds_pebs: {
284 struct pebs_tracer *tracer =
285 container_of(context->owner[qual],
286 struct pebs_tracer, ds);
287 if (tracer->ovfl)
288 tracer->ovfl(tracer);
289 }
290 break;
291 }
330} 292}
331 293
332 294
@@ -343,23 +305,25 @@ static void ds_overflow(struct task_struct *task, struct ds_context *context,
343static inline void *ds_allocate_buffer(size_t size, unsigned int *pages) 305static inline void *ds_allocate_buffer(size_t size, unsigned int *pages)
344{ 306{
345 unsigned long rlim, vm, pgsz; 307 unsigned long rlim, vm, pgsz;
346 void *buffer; 308 void *buffer = NULL;
347 309
348 pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; 310 pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
349 311
312 down_write(&current->mm->mmap_sem);
313
350 rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; 314 rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
351 vm = current->mm->total_vm + pgsz; 315 vm = current->mm->total_vm + pgsz;
352 if (rlim < vm) 316 if (rlim < vm)
353 return NULL; 317 goto out;
354 318
355 rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; 319 rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
356 vm = current->mm->locked_vm + pgsz; 320 vm = current->mm->locked_vm + pgsz;
357 if (rlim < vm) 321 if (rlim < vm)
358 return NULL; 322 goto out;
359 323
360 buffer = kzalloc(size, GFP_KERNEL); 324 buffer = kzalloc(size, GFP_KERNEL);
361 if (!buffer) 325 if (!buffer)
362 return NULL; 326 goto out;
363 327
364 current->mm->total_vm += pgsz; 328 current->mm->total_vm += pgsz;
365 current->mm->locked_vm += pgsz; 329 current->mm->locked_vm += pgsz;
@@ -367,290 +331,337 @@ static inline void *ds_allocate_buffer(size_t size, unsigned int *pages)
367 if (pages) 331 if (pages)
368 *pages = pgsz; 332 *pages = pgsz;
369 333
334 out:
335 up_write(&current->mm->mmap_sem);
370 return buffer; 336 return buffer;
371} 337}
372 338
373static int ds_request(struct task_struct *task, void *base, size_t size, 339static void ds_install_ds_config(struct ds_context *context,
374 ds_ovfl_callback_t ovfl, enum ds_qualifier qual) 340 enum ds_qualifier qual,
341 void *base, size_t size, size_t ith)
375{ 342{
376 struct ds_context *context;
377 unsigned long buffer, adj; 343 unsigned long buffer, adj;
378 const unsigned long alignment = (1 << 3); 344
345 /* adjust the buffer address and size to meet alignment
346 * constraints:
347 * - buffer is double-word aligned
348 * - size is multiple of record size
349 *
350 * We checked the size at the very beginning; we have enough
351 * space to do the adjustment.
352 */
353 buffer = (unsigned long)base;
354
355 adj = ALIGN(buffer, DS_ALIGNMENT) - buffer;
356 buffer += adj;
357 size -= adj;
358
359 size /= ds_cfg.sizeof_rec[qual];
360 size *= ds_cfg.sizeof_rec[qual];
361
362 ds_set(context->ds, qual, ds_buffer_base, buffer);
363 ds_set(context->ds, qual, ds_index, buffer);
364 ds_set(context->ds, qual, ds_absolute_maximum, buffer + size);
365
366 /* The value for 'no threshold' is -1, which will set the
367 * threshold outside of the buffer, just like we want it.
368 */
369 ds_set(context->ds, qual,
370 ds_interrupt_threshold, buffer + size - ith);
371}
372
373static int ds_request(struct ds_tracer *tracer, enum ds_qualifier qual,
374 struct task_struct *task,
375 void *base, size_t size, size_t th)
376{
377 struct ds_context *context;
379 unsigned long irq; 378 unsigned long irq;
380 int error = 0; 379 int error;
381 380
381 error = -EOPNOTSUPP;
382 if (!ds_cfg.sizeof_ds) 382 if (!ds_cfg.sizeof_ds)
383 return -EOPNOTSUPP; 383 goto out;
384 384
385 /* we require some space to do alignment adjustments below */ 385 /* we require some space to do alignment adjustments below */
386 if (size < (alignment + ds_cfg.sizeof_rec[qual])) 386 error = -EINVAL;
387 return -EINVAL; 387 if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual]))
388 goto out;
388 389
389 /* buffer overflow notification is not yet implemented */ 390 if (th != (size_t)-1) {
390 if (ovfl) 391 th *= ds_cfg.sizeof_rec[qual];
391 return -EOPNOTSUPP; 392
393 error = -EINVAL;
394 if (size <= th)
395 goto out;
396 }
397
398 error = -ENOMEM;
399 if (!base) {
400 base = ds_allocate_buffer(size, &tracer->pages);
401 if (!base)
402 goto out;
403 }
392 404
405 tracer->buffer = base;
406 tracer->size = size;
393 407
394 context = ds_alloc_context(task); 408 error = -ENOMEM;
409 context = ds_get_context(task);
395 if (!context) 410 if (!context)
396 return -ENOMEM; 411 goto out;
412 tracer->context = context;
413
397 414
398 spin_lock_irqsave(&ds_lock, irq); 415 spin_lock_irqsave(&ds_lock, irq);
399 416
400 error = -EPERM; 417 error = -EPERM;
401 if (!check_tracer(task)) 418 if (!check_tracer(task))
402 goto out_unlock; 419 goto out_unlock;
403
404 get_tracer(task); 420 get_tracer(task);
405 421
406 error = -EALREADY;
407 if (context->owner[qual] == current)
408 goto out_put_tracer;
409 error = -EPERM; 422 error = -EPERM;
410 if (context->owner[qual] != NULL) 423 if (context->owner[qual])
411 goto out_put_tracer; 424 goto out_put_tracer;
412 context->owner[qual] = current; 425 context->owner[qual] = tracer;
413 426
414 spin_unlock_irqrestore(&ds_lock, irq); 427 spin_unlock_irqrestore(&ds_lock, irq);
415 428
416 429
417 error = -ENOMEM; 430 ds_install_ds_config(context, qual, base, size, th);
418 if (!base) {
419 base = ds_allocate_buffer(size, &context->pages[qual]);
420 if (!base)
421 goto out_release;
422
423 context->buffer[qual] = base;
424 }
425 error = 0;
426 431
427 context->callback[qual] = ovfl; 432 return 0;
428
429 /* adjust the buffer address and size to meet alignment
430 * constraints:
431 * - buffer is double-word aligned
432 * - size is multiple of record size
433 *
434 * We checked the size at the very beginning; we have enough
435 * space to do the adjustment.
436 */
437 buffer = (unsigned long)base;
438
439 adj = ALIGN(buffer, alignment) - buffer;
440 buffer += adj;
441 size -= adj;
442
443 size /= ds_cfg.sizeof_rec[qual];
444 size *= ds_cfg.sizeof_rec[qual];
445
446 ds_set(context->ds, qual, ds_buffer_base, buffer);
447 ds_set(context->ds, qual, ds_index, buffer);
448 ds_set(context->ds, qual, ds_absolute_maximum, buffer + size);
449
450 if (ovfl) {
451 /* todo: select a suitable interrupt threshold */
452 } else
453 ds_set(context->ds, qual,
454 ds_interrupt_threshold, buffer + size + 1);
455
456 /* we keep the context until ds_release */
457 return error;
458
459 out_release:
460 context->owner[qual] = NULL;
461 ds_put_context(context);
462 put_tracer(task);
463 return error;
464 433
465 out_put_tracer: 434 out_put_tracer:
466 spin_unlock_irqrestore(&ds_lock, irq);
467 ds_put_context(context);
468 put_tracer(task); 435 put_tracer(task);
469 return error;
470
471 out_unlock: 436 out_unlock:
472 spin_unlock_irqrestore(&ds_lock, irq); 437 spin_unlock_irqrestore(&ds_lock, irq);
473 ds_put_context(context); 438 ds_put_context(context);
439 tracer->context = NULL;
440 out:
474 return error; 441 return error;
475} 442}
476 443
477int ds_request_bts(struct task_struct *task, void *base, size_t size, 444struct bts_tracer *ds_request_bts(struct task_struct *task,
478 ds_ovfl_callback_t ovfl) 445 void *base, size_t size,
446 bts_ovfl_callback_t ovfl, size_t th)
479{ 447{
480 return ds_request(task, base, size, ovfl, ds_bts); 448 struct bts_tracer *tracer;
481} 449 int error;
482 450
483int ds_request_pebs(struct task_struct *task, void *base, size_t size, 451 /* buffer overflow notification is not yet implemented */
484 ds_ovfl_callback_t ovfl) 452 error = -EOPNOTSUPP;
485{ 453 if (ovfl)
486 return ds_request(task, base, size, ovfl, ds_pebs); 454 goto out;
455
456 error = -ENOMEM;
457 tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
458 if (!tracer)
459 goto out;
460 tracer->ovfl = ovfl;
461
462 error = ds_request(&tracer->ds, ds_bts, task, base, size, th);
463 if (error < 0)
464 goto out_tracer;
465
466 return tracer;
467
468 out_tracer:
469 (void)ds_release_bts(tracer);
470 out:
471 return ERR_PTR(error);
487} 472}
488 473
489static int ds_release(struct task_struct *task, enum ds_qualifier qual) 474struct pebs_tracer *ds_request_pebs(struct task_struct *task,
475 void *base, size_t size,
476 pebs_ovfl_callback_t ovfl, size_t th)
490{ 477{
491 struct ds_context *context; 478 struct pebs_tracer *tracer;
492 int error; 479 int error;
493 480
494 context = ds_get_context(task); 481 /* buffer overflow notification is not yet implemented */
495 error = ds_validate_access(context, qual); 482 error = -EOPNOTSUPP;
496 if (error < 0) 483 if (ovfl)
497 goto out; 484 goto out;
498 485
499 kfree(context->buffer[qual]); 486 error = -ENOMEM;
500 context->buffer[qual] = NULL; 487 tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
488 if (!tracer)
489 goto out;
490 tracer->ovfl = ovfl;
501 491
502 current->mm->total_vm -= context->pages[qual]; 492 error = ds_request(&tracer->ds, ds_pebs, task, base, size, th);
503 current->mm->locked_vm -= context->pages[qual]; 493 if (error < 0)
504 context->pages[qual] = 0; 494 goto out_tracer;
505 context->owner[qual] = NULL;
506 495
507 /* 496 return tracer;
508 * we put the context twice: 497
509 * once for the ds_get_context 498 out_tracer:
510 * once for the corresponding ds_request 499 (void)ds_release_pebs(tracer);
511 */
512 ds_put_context(context);
513 out: 500 out:
514 ds_put_context(context); 501 return ERR_PTR(error);
515 return error; 502}
503
504static void ds_release(struct ds_tracer *tracer, enum ds_qualifier qual)
505{
506 if (tracer->context) {
507 BUG_ON(tracer->context->owner[qual] != tracer);
508 tracer->context->owner[qual] = NULL;
509
510 put_tracer(tracer->context->task);
511 ds_put_context(tracer->context);
512 }
513
514 if (tracer->pages) {
515 kfree(tracer->buffer);
516
517 down_write(&current->mm->mmap_sem);
518
519 current->mm->total_vm -= tracer->pages;
520 current->mm->locked_vm -= tracer->pages;
521
522 up_write(&current->mm->mmap_sem);
523 }
516} 524}
517 525
518int ds_release_bts(struct task_struct *task) 526int ds_release_bts(struct bts_tracer *tracer)
519{ 527{
520 return ds_release(task, ds_bts); 528 if (!tracer)
529 return -EINVAL;
530
531 ds_release(&tracer->ds, ds_bts);
532 kfree(tracer);
533
534 return 0;
521} 535}
522 536
523int ds_release_pebs(struct task_struct *task) 537int ds_release_pebs(struct pebs_tracer *tracer)
524{ 538{
525 return ds_release(task, ds_pebs); 539 if (!tracer)
540 return -EINVAL;
541
542 ds_release(&tracer->ds, ds_pebs);
543 kfree(tracer);
544
545 return 0;
526} 546}
527 547
528static int ds_get_index(struct task_struct *task, size_t *pos, 548static size_t ds_get_index(struct ds_context *context, enum ds_qualifier qual)
529 enum ds_qualifier qual)
530{ 549{
531 struct ds_context *context;
532 unsigned long base, index; 550 unsigned long base, index;
533 int error;
534
535 context = ds_get_context(task);
536 error = ds_validate_access(context, qual);
537 if (error < 0)
538 goto out;
539 551
540 base = ds_get(context->ds, qual, ds_buffer_base); 552 base = ds_get(context->ds, qual, ds_buffer_base);
541 index = ds_get(context->ds, qual, ds_index); 553 index = ds_get(context->ds, qual, ds_index);
542 554
543 error = ((index - base) / ds_cfg.sizeof_rec[qual]); 555 return (index - base) / ds_cfg.sizeof_rec[qual];
544 if (pos)
545 *pos = error;
546 out:
547 ds_put_context(context);
548 return error;
549} 556}
550 557
551int ds_get_bts_index(struct task_struct *task, size_t *pos) 558int ds_get_bts_index(struct bts_tracer *tracer, size_t *pos)
552{ 559{
553 return ds_get_index(task, pos, ds_bts); 560 if (!tracer)
561 return -EINVAL;
562
563 if (!pos)
564 return -EINVAL;
565
566 *pos = ds_get_index(tracer->ds.context, ds_bts);
567
568 return 0;
554} 569}
555 570
556int ds_get_pebs_index(struct task_struct *task, size_t *pos) 571int ds_get_pebs_index(struct pebs_tracer *tracer, size_t *pos)
557{ 572{
558 return ds_get_index(task, pos, ds_pebs); 573 if (!tracer)
574 return -EINVAL;
575
576 if (!pos)
577 return -EINVAL;
578
579 *pos = ds_get_index(tracer->ds.context, ds_pebs);
580
581 return 0;
559} 582}
560 583
561static int ds_get_end(struct task_struct *task, size_t *pos, 584static size_t ds_get_end(struct ds_context *context, enum ds_qualifier qual)
562 enum ds_qualifier qual)
563{ 585{
564 struct ds_context *context; 586 unsigned long base, max;
565 unsigned long base, end;
566 int error;
567
568 context = ds_get_context(task);
569 error = ds_validate_access(context, qual);
570 if (error < 0)
571 goto out;
572 587
573 base = ds_get(context->ds, qual, ds_buffer_base); 588 base = ds_get(context->ds, qual, ds_buffer_base);
574 end = ds_get(context->ds, qual, ds_absolute_maximum); 589 max = ds_get(context->ds, qual, ds_absolute_maximum);
575 590
576 error = ((end - base) / ds_cfg.sizeof_rec[qual]); 591 return (max - base) / ds_cfg.sizeof_rec[qual];
577 if (pos)
578 *pos = error;
579 out:
580 ds_put_context(context);
581 return error;
582} 592}
583 593
584int ds_get_bts_end(struct task_struct *task, size_t *pos) 594int ds_get_bts_end(struct bts_tracer *tracer, size_t *pos)
585{ 595{
586 return ds_get_end(task, pos, ds_bts); 596 if (!tracer)
597 return -EINVAL;
598
599 if (!pos)
600 return -EINVAL;
601
602 *pos = ds_get_end(tracer->ds.context, ds_bts);
603
604 return 0;
587} 605}
588 606
589int ds_get_pebs_end(struct task_struct *task, size_t *pos) 607int ds_get_pebs_end(struct pebs_tracer *tracer, size_t *pos)
590{ 608{
591 return ds_get_end(task, pos, ds_pebs); 609 if (!tracer)
610 return -EINVAL;
611
612 if (!pos)
613 return -EINVAL;
614
615 *pos = ds_get_end(tracer->ds.context, ds_pebs);
616
617 return 0;
592} 618}
593 619
594static int ds_access(struct task_struct *task, size_t index, 620static int ds_access(struct ds_context *context, enum ds_qualifier qual,
595 const void **record, enum ds_qualifier qual) 621 size_t index, const void **record)
596{ 622{
597 struct ds_context *context;
598 unsigned long base, idx; 623 unsigned long base, idx;
599 int error;
600 624
601 if (!record) 625 if (!record)
602 return -EINVAL; 626 return -EINVAL;
603 627
604 context = ds_get_context(task);
605 error = ds_validate_access(context, qual);
606 if (error < 0)
607 goto out;
608
609 base = ds_get(context->ds, qual, ds_buffer_base); 628 base = ds_get(context->ds, qual, ds_buffer_base);
610 idx = base + (index * ds_cfg.sizeof_rec[qual]); 629 idx = base + (index * ds_cfg.sizeof_rec[qual]);
611 630
612 error = -EINVAL;
613 if (idx > ds_get(context->ds, qual, ds_absolute_maximum)) 631 if (idx > ds_get(context->ds, qual, ds_absolute_maximum))
614 goto out; 632 return -EINVAL;
615 633
616 *record = (const void *)idx; 634 *record = (const void *)idx;
617 error = ds_cfg.sizeof_rec[qual]; 635
618 out: 636 return ds_cfg.sizeof_rec[qual];
619 ds_put_context(context);
620 return error;
621} 637}
622 638
623int ds_access_bts(struct task_struct *task, size_t index, const void **record) 639int ds_access_bts(struct bts_tracer *tracer, size_t index,
640 const void **record)
624{ 641{
625 return ds_access(task, index, record, ds_bts); 642 if (!tracer)
643 return -EINVAL;
644
645 return ds_access(tracer->ds.context, ds_bts, index, record);
626} 646}
627 647
628int ds_access_pebs(struct task_struct *task, size_t index, const void **record) 648int ds_access_pebs(struct pebs_tracer *tracer, size_t index,
649 const void **record)
629{ 650{
630 return ds_access(task, index, record, ds_pebs); 651 if (!tracer)
652 return -EINVAL;
653
654 return ds_access(tracer->ds.context, ds_pebs, index, record);
631} 655}
632 656
633static int ds_write(struct task_struct *task, const void *record, size_t size, 657static int ds_write(struct ds_context *context, enum ds_qualifier qual,
634 enum ds_qualifier qual, int force) 658 const void *record, size_t size)
635{ 659{
636 struct ds_context *context; 660 int bytes_written = 0;
637 int error;
638 661
639 if (!record) 662 if (!record)
640 return -EINVAL; 663 return -EINVAL;
641 664
642 error = -EPERM;
643 context = ds_get_context(task);
644 if (!context)
645 goto out;
646
647 if (!force) {
648 error = ds_validate_access(context, qual);
649 if (error < 0)
650 goto out;
651 }
652
653 error = 0;
654 while (size) { 665 while (size) {
655 unsigned long base, index, end, write_end, int_th; 666 unsigned long base, index, end, write_end, int_th;
656 unsigned long write_size, adj_write_size; 667 unsigned long write_size, adj_write_size;
@@ -678,14 +689,14 @@ static int ds_write(struct task_struct *task, const void *record, size_t size,
678 write_end = end; 689 write_end = end;
679 690
680 if (write_end <= index) 691 if (write_end <= index)
681 goto out; 692 break;
682 693
683 write_size = min((unsigned long) size, write_end - index); 694 write_size = min((unsigned long) size, write_end - index);
684 memcpy((void *)index, record, write_size); 695 memcpy((void *)index, record, write_size);
685 696
686 record = (const char *)record + write_size; 697 record = (const char *)record + write_size;
687 size -= write_size; 698 size -= write_size;
688 error += write_size; 699 bytes_written += write_size;
689 700
690 adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; 701 adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
691 adj_write_size *= ds_cfg.sizeof_rec[qual]; 702 adj_write_size *= ds_cfg.sizeof_rec[qual];
@@ -700,47 +711,32 @@ static int ds_write(struct task_struct *task, const void *record, size_t size,
700 ds_set(context->ds, qual, ds_index, index); 711 ds_set(context->ds, qual, ds_index, index);
701 712
702 if (index >= int_th) 713 if (index >= int_th)
703 ds_overflow(task, context, qual); 714 ds_overflow(context, qual);
704 } 715 }
705 716
706 out: 717 return bytes_written;
707 ds_put_context(context);
708 return error;
709} 718}
710 719
711int ds_write_bts(struct task_struct *task, const void *record, size_t size) 720int ds_write_bts(struct bts_tracer *tracer, const void *record, size_t size)
712{ 721{
713 return ds_write(task, record, size, ds_bts, /* force = */ 0); 722 if (!tracer)
714} 723 return -EINVAL;
715 724
716int ds_write_pebs(struct task_struct *task, const void *record, size_t size) 725 return ds_write(tracer->ds.context, ds_bts, record, size);
717{
718 return ds_write(task, record, size, ds_pebs, /* force = */ 0);
719} 726}
720 727
721int ds_unchecked_write_bts(struct task_struct *task, 728int ds_write_pebs(struct pebs_tracer *tracer, const void *record, size_t size)
722 const void *record, size_t size)
723{ 729{
724 return ds_write(task, record, size, ds_bts, /* force = */ 1); 730 if (!tracer)
725} 731 return -EINVAL;
726 732
727int ds_unchecked_write_pebs(struct task_struct *task, 733 return ds_write(tracer->ds.context, ds_pebs, record, size);
728 const void *record, size_t size)
729{
730 return ds_write(task, record, size, ds_pebs, /* force = */ 1);
731} 734}
732 735
733static int ds_reset_or_clear(struct task_struct *task, 736static void ds_reset_or_clear(struct ds_context *context,
734 enum ds_qualifier qual, int clear) 737 enum ds_qualifier qual, int clear)
735{ 738{
736 struct ds_context *context;
737 unsigned long base, end; 739 unsigned long base, end;
738 int error;
739
740 context = ds_get_context(task);
741 error = ds_validate_access(context, qual);
742 if (error < 0)
743 goto out;
744 740
745 base = ds_get(context->ds, qual, ds_buffer_base); 741 base = ds_get(context->ds, qual, ds_buffer_base);
746 end = ds_get(context->ds, qual, ds_absolute_maximum); 742 end = ds_get(context->ds, qual, ds_absolute_maximum);
@@ -749,70 +745,69 @@ static int ds_reset_or_clear(struct task_struct *task,
749 memset((void *)base, 0, end - base); 745 memset((void *)base, 0, end - base);
750 746
751 ds_set(context->ds, qual, ds_index, base); 747 ds_set(context->ds, qual, ds_index, base);
752
753 error = 0;
754 out:
755 ds_put_context(context);
756 return error;
757} 748}
758 749
759int ds_reset_bts(struct task_struct *task) 750int ds_reset_bts(struct bts_tracer *tracer)
760{ 751{
761 return ds_reset_or_clear(task, ds_bts, /* clear = */ 0); 752 if (!tracer)
753 return -EINVAL;
754
755 ds_reset_or_clear(tracer->ds.context, ds_bts, /* clear = */ 0);
756
757 return 0;
762} 758}
763 759
764int ds_reset_pebs(struct task_struct *task) 760int ds_reset_pebs(struct pebs_tracer *tracer)
765{ 761{
766 return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0); 762 if (!tracer)
763 return -EINVAL;
764
765 ds_reset_or_clear(tracer->ds.context, ds_pebs, /* clear = */ 0);
766
767 return 0;
767} 768}
768 769
769int ds_clear_bts(struct task_struct *task) 770int ds_clear_bts(struct bts_tracer *tracer)
770{ 771{
771 return ds_reset_or_clear(task, ds_bts, /* clear = */ 1); 772 if (!tracer)
773 return -EINVAL;
774
775 ds_reset_or_clear(tracer->ds.context, ds_bts, /* clear = */ 1);
776
777 return 0;
772} 778}
773 779
774int ds_clear_pebs(struct task_struct *task) 780int ds_clear_pebs(struct pebs_tracer *tracer)
775{ 781{
776 return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1); 782 if (!tracer)
783 return -EINVAL;
784
785 ds_reset_or_clear(tracer->ds.context, ds_pebs, /* clear = */ 1);
786
787 return 0;
777} 788}
778 789
779int ds_get_pebs_reset(struct task_struct *task, u64 *value) 790int ds_get_pebs_reset(struct pebs_tracer *tracer, u64 *value)
780{ 791{
781 struct ds_context *context; 792 if (!tracer)
782 int error; 793 return -EINVAL;
783 794
784 if (!value) 795 if (!value)
785 return -EINVAL; 796 return -EINVAL;
786 797
787 context = ds_get_context(task); 798 *value = *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8));
788 error = ds_validate_access(context, ds_pebs);
789 if (error < 0)
790 goto out;
791
792 *value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8));
793 799
794 error = 0; 800 return 0;
795 out:
796 ds_put_context(context);
797 return error;
798} 801}
799 802
800int ds_set_pebs_reset(struct task_struct *task, u64 value) 803int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value)
801{ 804{
802 struct ds_context *context; 805 if (!tracer)
803 int error; 806 return -EINVAL;
804
805 context = ds_get_context(task);
806 error = ds_validate_access(context, ds_pebs);
807 if (error < 0)
808 goto out;
809 807
810 *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value; 808 *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value;
811 809
812 error = 0; 810 return 0;
813 out:
814 ds_put_context(context);
815 return error;
816} 811}
817 812
818static const struct ds_configuration ds_cfg_var = { 813static const struct ds_configuration ds_cfg_var = {
@@ -840,6 +835,10 @@ static inline void
840ds_configure(const struct ds_configuration *cfg) 835ds_configure(const struct ds_configuration *cfg)
841{ 836{
842 ds_cfg = *cfg; 837 ds_cfg = *cfg;
838
839 printk(KERN_INFO "DS available\n");
840
841 BUG_ON(MAX_SIZEOF_DS < ds_cfg.sizeof_ds);
843} 842}
844 843
845void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) 844void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
@@ -883,6 +882,8 @@ void ds_free(struct ds_context *context)
883 * is dying. There should not be any user of that context left 882 * is dying. There should not be any user of that context left
884 * to disturb us, anymore. */ 883 * to disturb us, anymore. */
885 unsigned long leftovers = context->count; 884 unsigned long leftovers = context->count;
886 while (leftovers--) 885 while (leftovers--) {
886 put_tracer(context->task);
887 ds_put_context(context); 887 ds_put_context(context);
888 }
888} 889}
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 06180dff5b2e..76adf5b640ff 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -668,14 +668,14 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index,
668 size_t bts_index, bts_end; 668 size_t bts_index, bts_end;
669 int error; 669 int error;
670 670
671 error = ds_get_bts_end(child, &bts_end); 671 error = ds_get_bts_end(child->bts, &bts_end);
672 if (error < 0) 672 if (error < 0)
673 return error; 673 return error;
674 674
675 if (bts_end <= index) 675 if (bts_end <= index)
676 return -EINVAL; 676 return -EINVAL;
677 677
678 error = ds_get_bts_index(child, &bts_index); 678 error = ds_get_bts_index(child->bts, &bts_index);
679 if (error < 0) 679 if (error < 0)
680 return error; 680 return error;
681 681
@@ -684,7 +684,7 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index,
684 if (bts_end <= bts_index) 684 if (bts_end <= bts_index)
685 bts_index -= bts_end; 685 bts_index -= bts_end;
686 686
687 error = ds_access_bts(child, bts_index, &bts_record); 687 error = ds_access_bts(child->bts, bts_index, &bts_record);
688 if (error < 0) 688 if (error < 0)
689 return error; 689 return error;
690 690
@@ -705,14 +705,14 @@ static int ptrace_bts_drain(struct task_struct *child,
705 size_t end, i; 705 size_t end, i;
706 int error; 706 int error;
707 707
708 error = ds_get_bts_index(child, &end); 708 error = ds_get_bts_index(child->bts, &end);
709 if (error < 0) 709 if (error < 0)
710 return error; 710 return error;
711 711
712 if (size < (end * sizeof(struct bts_struct))) 712 if (size < (end * sizeof(struct bts_struct)))
713 return -EIO; 713 return -EIO;
714 714
715 error = ds_access_bts(child, 0, (const void **)&raw); 715 error = ds_access_bts(child->bts, 0, (const void **)&raw);
716 if (error < 0) 716 if (error < 0)
717 return error; 717 return error;
718 718
@@ -723,18 +723,13 @@ static int ptrace_bts_drain(struct task_struct *child,
723 return -EFAULT; 723 return -EFAULT;
724 } 724 }
725 725
726 error = ds_clear_bts(child); 726 error = ds_clear_bts(child->bts);
727 if (error < 0) 727 if (error < 0)
728 return error; 728 return error;
729 729
730 return end; 730 return end;
731} 731}
732 732
733static void ptrace_bts_ovfl(struct task_struct *child)
734{
735 send_sig(child->thread.bts_ovfl_signal, child, 0);
736}
737
738static int ptrace_bts_config(struct task_struct *child, 733static int ptrace_bts_config(struct task_struct *child,
739 long cfg_size, 734 long cfg_size,
740 const struct ptrace_bts_config __user *ucfg) 735 const struct ptrace_bts_config __user *ucfg)
@@ -760,23 +755,29 @@ static int ptrace_bts_config(struct task_struct *child,
760 goto errout; 755 goto errout;
761 756
762 if (cfg.flags & PTRACE_BTS_O_ALLOC) { 757 if (cfg.flags & PTRACE_BTS_O_ALLOC) {
763 ds_ovfl_callback_t ovfl = NULL; 758 bts_ovfl_callback_t ovfl = NULL;
764 unsigned int sig = 0; 759 unsigned int sig = 0;
765 760
766 /* we ignore the error in case we were not tracing child */
767 (void)ds_release_bts(child);
768
769 if (cfg.flags & PTRACE_BTS_O_SIGNAL) { 761 if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
770 if (!cfg.signal) 762 if (!cfg.signal)
771 goto errout; 763 goto errout;
772 764
765 error = -EOPNOTSUPP;
766 goto errout;
767
773 sig = cfg.signal; 768 sig = cfg.signal;
774 ovfl = ptrace_bts_ovfl;
775 } 769 }
776 770
777 error = ds_request_bts(child, /* base = */ NULL, cfg.size, ovfl); 771 if (child->bts)
778 if (error < 0) 772 (void)ds_release_bts(child->bts);
773
774 child->bts = ds_request_bts(child, /* base = */ NULL, cfg.size,
775 ovfl, /* th = */ (size_t)-1);
776 if (IS_ERR(child->bts)) {
777 error = PTR_ERR(child->bts);
778 child->bts = NULL;
779 goto errout; 779 goto errout;
780 }
780 781
781 child->thread.bts_ovfl_signal = sig; 782 child->thread.bts_ovfl_signal = sig;
782 } 783 }
@@ -823,15 +824,15 @@ static int ptrace_bts_status(struct task_struct *child,
823 if (cfg_size < sizeof(cfg)) 824 if (cfg_size < sizeof(cfg))
824 return -EIO; 825 return -EIO;
825 826
826 error = ds_get_bts_end(child, &end); 827 error = ds_get_bts_end(child->bts, &end);
827 if (error < 0) 828 if (error < 0)
828 return error; 829 return error;
829 830
830 error = ds_access_bts(child, /* index = */ 0, &base); 831 error = ds_access_bts(child->bts, /* index = */ 0, &base);
831 if (error < 0) 832 if (error < 0)
832 return error; 833 return error;
833 834
834 error = ds_access_bts(child, /* index = */ end, &max); 835 error = ds_access_bts(child->bts, /* index = */ end, &max);
835 if (error < 0) 836 if (error < 0)
836 return error; 837 return error;
837 838
@@ -884,10 +885,7 @@ static int ptrace_bts_write_record(struct task_struct *child,
884 return -EINVAL; 885 return -EINVAL;
885 } 886 }
886 887
887 /* The writing task will be the switched-to task on a context 888 return ds_write_bts(child->bts, bts_record, bts_cfg.sizeof_bts);
888 * switch. It needs to write into the switched-from task's BTS
889 * buffer. */
890 return ds_unchecked_write_bts(child, bts_record, bts_cfg.sizeof_bts);
891} 889}
892 890
893void ptrace_bts_take_timestamp(struct task_struct *tsk, 891void ptrace_bts_take_timestamp(struct task_struct *tsk,
@@ -972,13 +970,15 @@ void ptrace_disable(struct task_struct *child)
972 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); 970 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
973#endif 971#endif
974#ifdef CONFIG_X86_PTRACE_BTS 972#ifdef CONFIG_X86_PTRACE_BTS
975 (void)ds_release_bts(child); 973 if (child->bts) {
974 (void)ds_release_bts(child->bts);
976 975
977 child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; 976 child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
978 if (!child->thread.debugctlmsr) 977 if (!child->thread.debugctlmsr)
979 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); 978 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
980 979
981 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); 980 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
981 }
982#endif /* CONFIG_X86_PTRACE_BTS */ 982#endif /* CONFIG_X86_PTRACE_BTS */
983} 983}
984 984
@@ -1110,9 +1110,16 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
1110 (child, data, (struct ptrace_bts_config __user *)addr); 1110 (child, data, (struct ptrace_bts_config __user *)addr);
1111 break; 1111 break;
1112 1112
1113 case PTRACE_BTS_SIZE: 1113 case PTRACE_BTS_SIZE: {
1114 ret = ds_get_bts_index(child, /* pos = */ NULL); 1114 size_t size;
1115
1116 ret = ds_get_bts_index(child->bts, &size);
1117 if (ret == 0) {
1118 BUG_ON(size != (int) size);
1119 ret = (int) size;
1120 }
1115 break; 1121 break;
1122 }
1116 1123
1117 case PTRACE_BTS_GET: 1124 case PTRACE_BTS_GET:
1118 ret = ptrace_bts_read_record 1125 ret = ptrace_bts_read_record
@@ -1120,7 +1127,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
1120 break; 1127 break;
1121 1128
1122 case PTRACE_BTS_CLEAR: 1129 case PTRACE_BTS_CLEAR:
1123 ret = ds_clear_bts(child); 1130 ret = ds_clear_bts(child->bts);
1124 break; 1131 break;
1125 1132
1126 case PTRACE_BTS_DRAIN: 1133 case PTRACE_BTS_DRAIN: