diff options
author | Frederic Weisbecker <fweisbec@gmail.com> | 2009-09-09 13:22:48 -0400 |
---|---|---|
committer | Frederic Weisbecker <fweisbec@gmail.com> | 2009-11-08 09:34:42 -0500 |
commit | 24f1e32c60c45c89a997c73395b69c8af6f0a84e (patch) | |
tree | 4f30f16e18cb4abbcf96b3b331e6a3f01bfa26e6 /arch/x86/kernel/ptrace.c | |
parent | 2da3e160cb3d226d87b907fab26850d838ed8d7c (diff) |
hw-breakpoints: Rewrite the hw-breakpoints layer on top of perf events
This patch rebase the implementation of the breakpoints API on top of
perf events instances.
Each breakpoints are now perf events that handle the
register scheduling, thread/cpu attachment, etc..
The new layering is now made as follows:
ptrace kgdb ftrace perf syscall
\ | / /
\ | / /
/
Core breakpoint API /
/
| /
| /
Breakpoints perf events
|
|
Breakpoints PMU ---- Debug Register constraints handling
(Part of core breakpoint API)
|
|
Hardware debug registers
Reasons of this rewrite:
- Use the centralized/optimized pmu registers scheduling,
implying an easier arch integration
- More powerful register handling: perf attributes (pinned/flexible
events, exclusive/non-exclusive, tunable period, etc...)
Impact:
- New perf ABI: the hardware breakpoints counters
- Ptrace breakpoints setting remains tricky and still needs some per
thread breakpoints references.
Todo (in the order):
- Support breakpoints perf counter events for perf tools (ie: implement
perf_bpcounter_event())
- Support from perf tools
Changes in v2:
- Follow the perf "event " rename
- The ptrace regression have been fixed (ptrace breakpoint perf events
weren't released when a task ended)
- Drop the struct hw_breakpoint and store generic fields in
perf_event_attr.
- Separate core and arch specific headers, drop
asm-generic/hw_breakpoint.h and create linux/hw_breakpoint.h
- Use new generic len/type for breakpoint
- Handle off case: when breakpoints api is not supported by an arch
Changes in v3:
- Fix broken CONFIG_KVM, we need to propagate the breakpoint api
changes to kvm when we exit the guest and restore the bp registers
to the host.
Changes in v4:
- Drop the hw_breakpoint_restore() stub as it is only used by KVM
- EXPORT_SYMBOL_GPL hw_breakpoint_restore() as KVM can be built as a
module
- Restore the breakpoints unconditionally on kvm guest exit:
TIF_DEBUG_THREAD doesn't anymore cover every cases of running
breakpoints and vcpu->arch.switch_db_regs might not always be
set when the guest used debug registers.
(Waiting for a reliable optimization)
Changes in v5:
- Split-up the asm-generic/hw-breakpoint.h moving to
linux/hw_breakpoint.h into a separate patch
- Optimize the breakpoints restoring while switching from kvm guest
to host. We only want to restore the state if we have active
breakpoints to the host, otherwise we don't care about messed-up
address registers.
- Add asm/hw_breakpoint.h to Kbuild
- Fix bad breakpoint type in trace_selftest.c
Changes in v6:
- Fix wrong header inclusion in trace.h (triggered a build
error with CONFIG_FTRACE_SELFTEST
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Prasad <prasad@linux.vnet.ibm.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Jan Kiszka <jan.kiszka@web.de>
Cc: Jiri Slaby <jirislaby@gmail.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Diffstat (limited to 'arch/x86/kernel/ptrace.c')
-rw-r--r-- | arch/x86/kernel/ptrace.c | 182 |
1 files changed, 125 insertions, 57 deletions
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 267cb85b479c..e79610d95971 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -22,6 +22,8 @@ | |||
22 | #include <linux/seccomp.h> | 22 | #include <linux/seccomp.h> |
23 | #include <linux/signal.h> | 23 | #include <linux/signal.h> |
24 | #include <linux/workqueue.h> | 24 | #include <linux/workqueue.h> |
25 | #include <linux/perf_event.h> | ||
26 | #include <linux/hw_breakpoint.h> | ||
25 | 27 | ||
26 | #include <asm/uaccess.h> | 28 | #include <asm/uaccess.h> |
27 | #include <asm/pgtable.h> | 29 | #include <asm/pgtable.h> |
@@ -441,54 +443,59 @@ static int genregs_set(struct task_struct *target, | |||
441 | return ret; | 443 | return ret; |
442 | } | 444 | } |
443 | 445 | ||
444 | /* | 446 | static void ptrace_triggered(struct perf_event *bp, void *data) |
445 | * Decode the length and type bits for a particular breakpoint as | ||
446 | * stored in debug register 7. Return the "enabled" status. | ||
447 | */ | ||
448 | static int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, | ||
449 | unsigned *type) | ||
450 | { | ||
451 | int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE); | ||
452 | |||
453 | *len = (bp_info & 0xc) | 0x40; | ||
454 | *type = (bp_info & 0x3) | 0x80; | ||
455 | return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3; | ||
456 | } | ||
457 | |||
458 | static void ptrace_triggered(struct hw_breakpoint *bp, struct pt_regs *regs) | ||
459 | { | 447 | { |
460 | struct thread_struct *thread = &(current->thread); | ||
461 | int i; | 448 | int i; |
449 | struct thread_struct *thread = &(current->thread); | ||
462 | 450 | ||
463 | /* | 451 | /* |
464 | * Store in the virtual DR6 register the fact that the breakpoint | 452 | * Store in the virtual DR6 register the fact that the breakpoint |
465 | * was hit so the thread's debugger will see it. | 453 | * was hit so the thread's debugger will see it. |
466 | */ | 454 | */ |
467 | for (i = 0; i < hbp_kernel_pos; i++) | 455 | for (i = 0; i < HBP_NUM; i++) { |
468 | /* | 456 | if (thread->ptrace_bps[i] == bp) |
469 | * We will check bp->info.address against the address stored in | ||
470 | * thread's hbp structure and not debugreg[i]. This is to ensure | ||
471 | * that the corresponding bit for 'i' in DR7 register is enabled | ||
472 | */ | ||
473 | if (bp->info.address == thread->hbp[i]->info.address) | ||
474 | break; | 457 | break; |
458 | } | ||
475 | 459 | ||
476 | thread->debugreg6 |= (DR_TRAP0 << i); | 460 | thread->debugreg6 |= (DR_TRAP0 << i); |
477 | } | 461 | } |
478 | 462 | ||
479 | /* | 463 | /* |
464 | * Walk through every ptrace breakpoints for this thread and | ||
465 | * build the dr7 value on top of their attributes. | ||
466 | * | ||
467 | */ | ||
468 | static unsigned long ptrace_get_dr7(struct perf_event *bp[]) | ||
469 | { | ||
470 | int i; | ||
471 | int dr7 = 0; | ||
472 | struct arch_hw_breakpoint *info; | ||
473 | |||
474 | for (i = 0; i < HBP_NUM; i++) { | ||
475 | if (bp[i] && !bp[i]->attr.disabled) { | ||
476 | info = counter_arch_bp(bp[i]); | ||
477 | dr7 |= encode_dr7(i, info->len, info->type); | ||
478 | } | ||
479 | } | ||
480 | |||
481 | return dr7; | ||
482 | } | ||
483 | |||
484 | /* | ||
480 | * Handle ptrace writes to debug register 7. | 485 | * Handle ptrace writes to debug register 7. |
481 | */ | 486 | */ |
482 | static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) | 487 | static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) |
483 | { | 488 | { |
484 | struct thread_struct *thread = &(tsk->thread); | 489 | struct thread_struct *thread = &(tsk->thread); |
485 | unsigned long old_dr7 = thread->debugreg7; | 490 | unsigned long old_dr7; |
486 | int i, orig_ret = 0, rc = 0; | 491 | int i, orig_ret = 0, rc = 0; |
487 | int enabled, second_pass = 0; | 492 | int enabled, second_pass = 0; |
488 | unsigned len, type; | 493 | unsigned len, type; |
489 | struct hw_breakpoint *bp; | 494 | int gen_len, gen_type; |
495 | struct perf_event *bp; | ||
490 | 496 | ||
491 | data &= ~DR_CONTROL_RESERVED; | 497 | data &= ~DR_CONTROL_RESERVED; |
498 | old_dr7 = ptrace_get_dr7(thread->ptrace_bps); | ||
492 | restore: | 499 | restore: |
493 | /* | 500 | /* |
494 | * Loop through all the hardware breakpoints, making the | 501 | * Loop through all the hardware breakpoints, making the |
@@ -496,11 +503,12 @@ restore: | |||
496 | */ | 503 | */ |
497 | for (i = 0; i < HBP_NUM; i++) { | 504 | for (i = 0; i < HBP_NUM; i++) { |
498 | enabled = decode_dr7(data, i, &len, &type); | 505 | enabled = decode_dr7(data, i, &len, &type); |
499 | bp = thread->hbp[i]; | 506 | bp = thread->ptrace_bps[i]; |
500 | 507 | ||
501 | if (!enabled) { | 508 | if (!enabled) { |
502 | if (bp) { | 509 | if (bp) { |
503 | /* Don't unregister the breakpoints right-away, | 510 | /* |
511 | * Don't unregister the breakpoints right-away, | ||
504 | * unless all register_user_hw_breakpoint() | 512 | * unless all register_user_hw_breakpoint() |
505 | * requests have succeeded. This prevents | 513 | * requests have succeeded. This prevents |
506 | * any window of opportunity for debug | 514 | * any window of opportunity for debug |
@@ -508,27 +516,45 @@ restore: | |||
508 | */ | 516 | */ |
509 | if (!second_pass) | 517 | if (!second_pass) |
510 | continue; | 518 | continue; |
511 | unregister_user_hw_breakpoint(tsk, bp); | 519 | thread->ptrace_bps[i] = NULL; |
512 | kfree(bp); | 520 | unregister_hw_breakpoint(bp); |
513 | } | 521 | } |
514 | continue; | 522 | continue; |
515 | } | 523 | } |
524 | |||
525 | /* | ||
526 | * We shoud have at least an inactive breakpoint at this | ||
527 | * slot. It means the user is writing dr7 without having | ||
528 | * written the address register first | ||
529 | */ | ||
516 | if (!bp) { | 530 | if (!bp) { |
517 | rc = -ENOMEM; | 531 | rc = -EINVAL; |
518 | bp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL); | 532 | break; |
519 | if (bp) { | 533 | } |
520 | bp->info.address = thread->debugreg[i]; | 534 | |
521 | bp->triggered = ptrace_triggered; | 535 | rc = arch_bp_generic_fields(len, type, &gen_len, &gen_type); |
522 | bp->info.len = len; | ||
523 | bp->info.type = type; | ||
524 | rc = register_user_hw_breakpoint(tsk, bp); | ||
525 | if (rc) | ||
526 | kfree(bp); | ||
527 | } | ||
528 | } else | ||
529 | rc = modify_user_hw_breakpoint(tsk, bp); | ||
530 | if (rc) | 536 | if (rc) |
531 | break; | 537 | break; |
538 | |||
539 | /* | ||
540 | * This is a temporary thing as bp is unregistered/registered | ||
541 | * to simulate modification | ||
542 | */ | ||
543 | bp = modify_user_hw_breakpoint(bp, bp->attr.bp_addr, gen_len, | ||
544 | gen_type, bp->callback, | ||
545 | tsk, true); | ||
546 | thread->ptrace_bps[i] = NULL; | ||
547 | |||
548 | if (!bp) { /* incorrect bp, or we have a bug in bp API */ | ||
549 | rc = -EINVAL; | ||
550 | break; | ||
551 | } | ||
552 | if (IS_ERR(bp)) { | ||
553 | rc = PTR_ERR(bp); | ||
554 | bp = NULL; | ||
555 | break; | ||
556 | } | ||
557 | thread->ptrace_bps[i] = bp; | ||
532 | } | 558 | } |
533 | /* | 559 | /* |
534 | * Make a second pass to free the remaining unused breakpoints | 560 | * Make a second pass to free the remaining unused breakpoints |
@@ -553,15 +579,63 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) | |||
553 | struct thread_struct *thread = &(tsk->thread); | 579 | struct thread_struct *thread = &(tsk->thread); |
554 | unsigned long val = 0; | 580 | unsigned long val = 0; |
555 | 581 | ||
556 | if (n < HBP_NUM) | 582 | if (n < HBP_NUM) { |
557 | val = thread->debugreg[n]; | 583 | struct perf_event *bp; |
558 | else if (n == 6) | 584 | bp = thread->ptrace_bps[n]; |
585 | if (!bp) | ||
586 | return 0; | ||
587 | val = bp->hw.info.address; | ||
588 | } else if (n == 6) { | ||
559 | val = thread->debugreg6; | 589 | val = thread->debugreg6; |
560 | else if (n == 7) | 590 | } else if (n == 7) { |
561 | val = thread->debugreg7; | 591 | val = ptrace_get_dr7(thread->ptrace_bps); |
592 | } | ||
562 | return val; | 593 | return val; |
563 | } | 594 | } |
564 | 595 | ||
596 | static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, | ||
597 | unsigned long addr) | ||
598 | { | ||
599 | struct perf_event *bp; | ||
600 | struct thread_struct *t = &tsk->thread; | ||
601 | |||
602 | if (!t->ptrace_bps[nr]) { | ||
603 | /* | ||
604 | * Put stub len and type to register (reserve) an inactive but | ||
605 | * correct bp | ||
606 | */ | ||
607 | bp = register_user_hw_breakpoint(addr, HW_BREAKPOINT_LEN_1, | ||
608 | HW_BREAKPOINT_W, | ||
609 | ptrace_triggered, tsk, | ||
610 | false); | ||
611 | } else { | ||
612 | bp = t->ptrace_bps[nr]; | ||
613 | t->ptrace_bps[nr] = NULL; | ||
614 | bp = modify_user_hw_breakpoint(bp, addr, bp->attr.bp_len, | ||
615 | bp->attr.bp_type, | ||
616 | bp->callback, | ||
617 | tsk, | ||
618 | bp->attr.disabled); | ||
619 | } | ||
620 | |||
621 | if (!bp) | ||
622 | return -EIO; | ||
623 | /* | ||
624 | * CHECKME: the previous code returned -EIO if the addr wasn't a | ||
625 | * valid task virtual addr. The new one will return -EINVAL in this | ||
626 | * case. | ||
627 | * -EINVAL may be what we want for in-kernel breakpoints users, but | ||
628 | * -EIO looks better for ptrace, since we refuse a register writing | ||
629 | * for the user. And anyway this is the previous behaviour. | ||
630 | */ | ||
631 | if (IS_ERR(bp)) | ||
632 | return PTR_ERR(bp); | ||
633 | |||
634 | t->ptrace_bps[nr] = bp; | ||
635 | |||
636 | return 0; | ||
637 | } | ||
638 | |||
565 | /* | 639 | /* |
566 | * Handle PTRACE_POKEUSR calls for the debug register area. | 640 | * Handle PTRACE_POKEUSR calls for the debug register area. |
567 | */ | 641 | */ |
@@ -575,19 +649,13 @@ int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val) | |||
575 | return -EIO; | 649 | return -EIO; |
576 | 650 | ||
577 | if (n == 6) { | 651 | if (n == 6) { |
578 | tsk->thread.debugreg6 = val; | 652 | thread->debugreg6 = val; |
579 | goto ret_path; | 653 | goto ret_path; |
580 | } | 654 | } |
581 | if (n < HBP_NUM) { | 655 | if (n < HBP_NUM) { |
582 | if (thread->hbp[n]) { | 656 | rc = ptrace_set_breakpoint_addr(tsk, n, val); |
583 | if (arch_check_va_in_userspace(val, | 657 | if (rc) |
584 | thread->hbp[n]->info.len) == 0) { | 658 | return rc; |
585 | rc = -EIO; | ||
586 | goto ret_path; | ||
587 | } | ||
588 | thread->hbp[n]->info.address = val; | ||
589 | } | ||
590 | thread->debugreg[n] = val; | ||
591 | } | 659 | } |
592 | /* All that's left is DR7 */ | 660 | /* All that's left is DR7 */ |
593 | if (n == 7) | 661 | if (n == 7) |