aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarkus Metzger <markus.t.metzger@intel.com>2009-04-03 10:43:35 -0400
committerIngo Molnar <mingo@elte.hu>2009-04-07 07:36:13 -0400
commite2b371f00a6f529f6362654239bdec8dcd510760 (patch)
tree36e47cd9fb949fd72893c2d5ddab489fa55b1869
parenta26b89f05d194413c7238e0bea071054f6b5d3c8 (diff)
mm, x86, ptrace, bts: defer branch trace stopping
When a ptraced task is unlinked, we need to stop branch tracing for that task. Since the unlink is called with interrupts disabled, and we need interrupts enabled to stop branch tracing, we defer the work. Collect all branch tracing related stuff in a branch tracing context. Reviewed-by: Oleg Nesterov <oleg@redhat.com> Signed-off-by: Markus Metzger <markus.t.metzger@intel.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: roland@redhat.com Cc: eranian@googlemail.com Cc: juan.villacis@intel.com Cc: ak@linux.jf.intel.com LKML-Reference: <20090403144550.712401000@intel.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/x86/include/asm/processor.h4
-rw-r--r--arch/x86/kernel/ptrace.c254
-rw-r--r--include/linux/mm.h3
-rw-r--r--include/linux/sched.h9
-rw-r--r--mm/mlock.c13
5 files changed, 179 insertions, 104 deletions
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 34c52370f2fe..2483807e06e7 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -458,10 +458,6 @@ struct thread_struct {
458/* Debug Store context; see include/asm-x86/ds.h; goes into MSR_IA32_DS_AREA */ 458/* Debug Store context; see include/asm-x86/ds.h; goes into MSR_IA32_DS_AREA */
459 struct ds_context *ds_ctx; 459 struct ds_context *ds_ctx;
460#endif /* CONFIG_X86_DS */ 460#endif /* CONFIG_X86_DS */
461#ifdef CONFIG_X86_PTRACE_BTS
462/* the signal to send on a bts buffer overflow */
463 unsigned int bts_ovfl_signal;
464#endif /* CONFIG_X86_PTRACE_BTS */
465}; 461};
466 462
467static inline unsigned long native_get_debugreg(int regno) 463static inline unsigned long native_get_debugreg(int regno)
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index fe9345c967de..7c21d1e8cae7 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -22,6 +22,7 @@
22#include <linux/seccomp.h> 22#include <linux/seccomp.h>
23#include <linux/signal.h> 23#include <linux/signal.h>
24#include <linux/ftrace.h> 24#include <linux/ftrace.h>
25#include <linux/workqueue.h>
25 26
26#include <asm/uaccess.h> 27#include <asm/uaccess.h>
27#include <asm/pgtable.h> 28#include <asm/pgtable.h>
@@ -577,17 +578,119 @@ static int ioperm_get(struct task_struct *target,
577} 578}
578 579
579#ifdef CONFIG_X86_PTRACE_BTS 580#ifdef CONFIG_X86_PTRACE_BTS
581/*
582 * A branch trace store context.
583 *
584 * Contexts may only be installed by ptrace_bts_config() and only for
585 * ptraced tasks.
586 *
587 * Contexts are destroyed when the tracee is detached from the tracer.
588 * The actual destruction work requires interrupts enabled, so the
589 * work is deferred and will be scheduled during __ptrace_unlink().
590 *
591 * Contexts hold an additional task_struct reference on the traced
592 * task, as well as a reference on the tracer's mm.
593 *
594 * Ptrace already holds a task_struct for the duration of ptrace operations,
595 * but since destruction is deferred, it may be executed after both
596 * tracer and tracee exited.
597 */
598struct bts_context {
599 /* The branch trace handle. */
600 struct bts_tracer *tracer;
601
602 /* The buffer used to store the branch trace and its size. */
603 void *buffer;
604 unsigned int size;
605
606 /* The mm that paid for the above buffer. */
607 struct mm_struct *mm;
608
609 /* The task this context belongs to. */
610 struct task_struct *task;
611
612 /* The signal to send on a bts buffer overflow. */
613 unsigned int bts_ovfl_signal;
614
615 /* The work struct to destroy a context. */
616 struct work_struct work;
617};
618
619static inline void alloc_bts_buffer(struct bts_context *context,
620 unsigned int size)
621{
622 void *buffer;
623
624 buffer = alloc_locked_buffer(size);
625 if (buffer) {
626 context->buffer = buffer;
627 context->size = size;
628 context->mm = get_task_mm(current);
629 }
630}
631
632static inline void free_bts_buffer(struct bts_context *context)
633{
634 if (!context->buffer)
635 return;
636
637 kfree(context->buffer);
638 context->buffer = NULL;
639
640 refund_locked_buffer_memory(context->mm, context->size);
641 context->size = 0;
642
643 mmput(context->mm);
644 context->mm = NULL;
645}
646
647static void free_bts_context_work(struct work_struct *w)
648{
649 struct bts_context *context;
650
651 context = container_of(w, struct bts_context, work);
652
653 ds_release_bts(context->tracer);
654 put_task_struct(context->task);
655 free_bts_buffer(context);
656 kfree(context);
657}
658
659static inline void free_bts_context(struct bts_context *context)
660{
661 INIT_WORK(&context->work, free_bts_context_work);
662 schedule_work(&context->work);
663}
664
665static inline struct bts_context *alloc_bts_context(struct task_struct *task)
666{
667 struct bts_context *context = kzalloc(sizeof(*context), GFP_KERNEL);
668 if (context) {
669 context->task = task;
670 task->bts = context;
671
672 get_task_struct(task);
673 }
674
675 return context;
676}
677
580static int ptrace_bts_read_record(struct task_struct *child, size_t index, 678static int ptrace_bts_read_record(struct task_struct *child, size_t index,
581 struct bts_struct __user *out) 679 struct bts_struct __user *out)
582{ 680{
681 struct bts_context *context;
583 const struct bts_trace *trace; 682 const struct bts_trace *trace;
584 struct bts_struct bts; 683 struct bts_struct bts;
585 const unsigned char *at; 684 const unsigned char *at;
586 int error; 685 int error;
587 686
588 trace = ds_read_bts(child->bts); 687 context = child->bts;
688 if (!context)
689 return -ESRCH;
690
691 trace = ds_read_bts(context->tracer);
589 if (!trace) 692 if (!trace)
590 return -EPERM; 693 return -ESRCH;
591 694
592 at = trace->ds.top - ((index + 1) * trace->ds.size); 695 at = trace->ds.top - ((index + 1) * trace->ds.size);
593 if ((void *)at < trace->ds.begin) 696 if ((void *)at < trace->ds.begin)
@@ -596,7 +699,7 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index,
596 if (!trace->read) 699 if (!trace->read)
597 return -EOPNOTSUPP; 700 return -EOPNOTSUPP;
598 701
599 error = trace->read(child->bts, at, &bts); 702 error = trace->read(context->tracer, at, &bts);
600 if (error < 0) 703 if (error < 0)
601 return error; 704 return error;
602 705
@@ -610,13 +713,18 @@ static int ptrace_bts_drain(struct task_struct *child,
610 long size, 713 long size,
611 struct bts_struct __user *out) 714 struct bts_struct __user *out)
612{ 715{
716 struct bts_context *context;
613 const struct bts_trace *trace; 717 const struct bts_trace *trace;
614 const unsigned char *at; 718 const unsigned char *at;
615 int error, drained = 0; 719 int error, drained = 0;
616 720
617 trace = ds_read_bts(child->bts); 721 context = child->bts;
722 if (!context)
723 return -ESRCH;
724
725 trace = ds_read_bts(context->tracer);
618 if (!trace) 726 if (!trace)
619 return -EPERM; 727 return -ESRCH;
620 728
621 if (!trace->read) 729 if (!trace->read)
622 return -EOPNOTSUPP; 730 return -EOPNOTSUPP;
@@ -627,9 +735,8 @@ static int ptrace_bts_drain(struct task_struct *child,
627 for (at = trace->ds.begin; (void *)at < trace->ds.top; 735 for (at = trace->ds.begin; (void *)at < trace->ds.top;
628 out++, drained++, at += trace->ds.size) { 736 out++, drained++, at += trace->ds.size) {
629 struct bts_struct bts; 737 struct bts_struct bts;
630 int error;
631 738
632 error = trace->read(child->bts, at, &bts); 739 error = trace->read(context->tracer, at, &bts);
633 if (error < 0) 740 if (error < 0)
634 return error; 741 return error;
635 742
@@ -639,35 +746,18 @@ static int ptrace_bts_drain(struct task_struct *child,
639 746
640 memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); 747 memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
641 748
642 error = ds_reset_bts(child->bts); 749 error = ds_reset_bts(context->tracer);
643 if (error < 0) 750 if (error < 0)
644 return error; 751 return error;
645 752
646 return drained; 753 return drained;
647} 754}
648 755
649static int ptrace_bts_allocate_buffer(struct task_struct *child, size_t size)
650{
651 child->bts_buffer = alloc_locked_buffer(size);
652 if (!child->bts_buffer)
653 return -ENOMEM;
654
655 child->bts_size = size;
656
657 return 0;
658}
659
660static void ptrace_bts_free_buffer(struct task_struct *child)
661{
662 free_locked_buffer(child->bts_buffer, child->bts_size);
663 child->bts_buffer = NULL;
664 child->bts_size = 0;
665}
666
667static int ptrace_bts_config(struct task_struct *child, 756static int ptrace_bts_config(struct task_struct *child,
668 long cfg_size, 757 long cfg_size,
669 const struct ptrace_bts_config __user *ucfg) 758 const struct ptrace_bts_config __user *ucfg)
670{ 759{
760 struct bts_context *context;
671 struct ptrace_bts_config cfg; 761 struct ptrace_bts_config cfg;
672 unsigned int flags = 0; 762 unsigned int flags = 0;
673 763
@@ -677,28 +767,31 @@ static int ptrace_bts_config(struct task_struct *child,
677 if (copy_from_user(&cfg, ucfg, sizeof(cfg))) 767 if (copy_from_user(&cfg, ucfg, sizeof(cfg)))
678 return -EFAULT; 768 return -EFAULT;
679 769
680 if (child->bts) { 770 context = child->bts;
681 ds_release_bts(child->bts); 771 if (!context)
682 child->bts = NULL; 772 context = alloc_bts_context(child);
683 } 773 if (!context)
774 return -ENOMEM;
684 775
685 if (cfg.flags & PTRACE_BTS_O_SIGNAL) { 776 if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
686 if (!cfg.signal) 777 if (!cfg.signal)
687 return -EINVAL; 778 return -EINVAL;
688 779
689 child->thread.bts_ovfl_signal = cfg.signal;
690 return -EOPNOTSUPP; 780 return -EOPNOTSUPP;
781 context->bts_ovfl_signal = cfg.signal;
691 } 782 }
692 783
693 if ((cfg.flags & PTRACE_BTS_O_ALLOC) && 784 ds_release_bts(context->tracer);
694 (cfg.size != child->bts_size)) { 785 context->tracer = NULL;
695 int error;
696 786
697 ptrace_bts_free_buffer(child); 787 if ((cfg.flags & PTRACE_BTS_O_ALLOC) && (cfg.size != context->size)) {
788 free_bts_buffer(context);
789 if (!cfg.size)
790 return 0;
698 791
699 error = ptrace_bts_allocate_buffer(child, cfg.size); 792 alloc_bts_buffer(context, cfg.size);
700 if (error < 0) 793 if (!context->buffer)
701 return error; 794 return -ENOMEM;
702 } 795 }
703 796
704 if (cfg.flags & PTRACE_BTS_O_TRACE) 797 if (cfg.flags & PTRACE_BTS_O_TRACE)
@@ -707,15 +800,13 @@ static int ptrace_bts_config(struct task_struct *child,
707 if (cfg.flags & PTRACE_BTS_O_SCHED) 800 if (cfg.flags & PTRACE_BTS_O_SCHED)
708 flags |= BTS_TIMESTAMPS; 801 flags |= BTS_TIMESTAMPS;
709 802
710 child->bts = ds_request_bts(child, child->bts_buffer, child->bts_size, 803 context->tracer = ds_request_bts(child, context->buffer, context->size,
711 /* ovfl = */ NULL, /* th = */ (size_t)-1, 804 NULL, (size_t)-1, flags);
712 flags); 805 if (unlikely(IS_ERR(context->tracer))) {
713 if (IS_ERR(child->bts)) { 806 int error = PTR_ERR(context->tracer);
714 int error = PTR_ERR(child->bts);
715
716 ptrace_bts_free_buffer(child);
717 child->bts = NULL;
718 807
808 free_bts_buffer(context);
809 context->tracer = NULL;
719 return error; 810 return error;
720 } 811 }
721 812
@@ -726,20 +817,25 @@ static int ptrace_bts_status(struct task_struct *child,
726 long cfg_size, 817 long cfg_size,
727 struct ptrace_bts_config __user *ucfg) 818 struct ptrace_bts_config __user *ucfg)
728{ 819{
820 struct bts_context *context;
729 const struct bts_trace *trace; 821 const struct bts_trace *trace;
730 struct ptrace_bts_config cfg; 822 struct ptrace_bts_config cfg;
731 823
824 context = child->bts;
825 if (!context)
826 return -ESRCH;
827
732 if (cfg_size < sizeof(cfg)) 828 if (cfg_size < sizeof(cfg))
733 return -EIO; 829 return -EIO;
734 830
735 trace = ds_read_bts(child->bts); 831 trace = ds_read_bts(context->tracer);
736 if (!trace) 832 if (!trace)
737 return -EPERM; 833 return -ESRCH;
738 834
739 memset(&cfg, 0, sizeof(cfg)); 835 memset(&cfg, 0, sizeof(cfg));
740 cfg.size = trace->ds.end - trace->ds.begin; 836 cfg.size = trace->ds.end - trace->ds.begin;
741 cfg.signal = child->thread.bts_ovfl_signal; 837 cfg.signal = context->bts_ovfl_signal;
742 cfg.bts_size = sizeof(struct bts_struct); 838 cfg.bts_size = sizeof(struct bts_struct);
743 839
744 if (cfg.signal) 840 if (cfg.signal)
745 cfg.flags |= PTRACE_BTS_O_SIGNAL; 841 cfg.flags |= PTRACE_BTS_O_SIGNAL;
@@ -758,67 +854,56 @@ static int ptrace_bts_status(struct task_struct *child,
758 854
759static int ptrace_bts_clear(struct task_struct *child) 855static int ptrace_bts_clear(struct task_struct *child)
760{ 856{
857 struct bts_context *context;
761 const struct bts_trace *trace; 858 const struct bts_trace *trace;
762 859
763 trace = ds_read_bts(child->bts); 860 context = child->bts;
861 if (!context)
862 return -ESRCH;
863
864 trace = ds_read_bts(context->tracer);
764 if (!trace) 865 if (!trace)
765 return -EPERM; 866 return -ESRCH;
766 867
767 memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); 868 memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
768 869
769 return ds_reset_bts(child->bts); 870 return ds_reset_bts(context->tracer);
770} 871}
771 872
772static int ptrace_bts_size(struct task_struct *child) 873static int ptrace_bts_size(struct task_struct *child)
773{ 874{
875 struct bts_context *context;
774 const struct bts_trace *trace; 876 const struct bts_trace *trace;
775 877
776 trace = ds_read_bts(child->bts); 878 context = child->bts;
879 if (!context)
880 return -ESRCH;
881
882 trace = ds_read_bts(context->tracer);
777 if (!trace) 883 if (!trace)
778 return -EPERM; 884 return -ESRCH;
779 885
780 return (trace->ds.top - trace->ds.begin) / trace->ds.size; 886 return (trace->ds.top - trace->ds.begin) / trace->ds.size;
781} 887}
782 888
783static void ptrace_bts_fork(struct task_struct *tsk) 889static inline void ptrace_bts_fork(struct task_struct *tsk)
784{ 890{
785 tsk->bts = NULL; 891 tsk->bts = NULL;
786 tsk->bts_buffer = NULL;
787 tsk->bts_size = 0;
788 tsk->thread.bts_ovfl_signal = 0;
789} 892}
790 893
791static void ptrace_bts_untrace(struct task_struct *child) 894/*
895 * Called from __ptrace_unlink() after the child has been moved back
896 * to its original parent.
897 */
898static inline void ptrace_bts_untrace(struct task_struct *child)
792{ 899{
793 if (unlikely(child->bts)) { 900 if (unlikely(child->bts)) {
794 ds_release_bts(child->bts); 901 free_bts_context(child->bts);
795 child->bts = NULL; 902 child->bts = NULL;
796
797 /* We cannot update total_vm and locked_vm since
798 child's mm is already gone. But we can reclaim the
799 memory. */
800 kfree(child->bts_buffer);
801 child->bts_buffer = NULL;
802 child->bts_size = 0;
803 } 903 }
804} 904}
805
806static void ptrace_bts_detach(struct task_struct *child)
807{
808 /*
809 * Ptrace_detach() races with ptrace_untrace() in case
810 * the child dies and is reaped by another thread.
811 *
812 * We only do the memory accounting at this point and
813 * leave the buffer deallocation and the bts tracer
814 * release to ptrace_bts_untrace() which will be called
815 * later on with tasklist_lock held.
816 */
817 release_locked_buffer(child->bts_buffer, child->bts_size);
818}
819#else 905#else
820static inline void ptrace_bts_fork(struct task_struct *tsk) {} 906static inline void ptrace_bts_fork(struct task_struct *tsk) {}
821static inline void ptrace_bts_detach(struct task_struct *child) {}
822static inline void ptrace_bts_untrace(struct task_struct *child) {} 907static inline void ptrace_bts_untrace(struct task_struct *child) {}
823#endif /* CONFIG_X86_PTRACE_BTS */ 908#endif /* CONFIG_X86_PTRACE_BTS */
824 909
@@ -843,7 +928,6 @@ void ptrace_disable(struct task_struct *child)
843#ifdef TIF_SYSCALL_EMU 928#ifdef TIF_SYSCALL_EMU
844 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); 929 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
845#endif 930#endif
846 ptrace_bts_detach(child);
847} 931}
848 932
849#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION 933#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
diff --git a/include/linux/mm.h b/include/linux/mm.h
index bff1f0d475c7..64d8ed2538ae 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -13,6 +13,7 @@
13#include <linux/prio_tree.h> 13#include <linux/prio_tree.h>
14#include <linux/debug_locks.h> 14#include <linux/debug_locks.h>
15#include <linux/mm_types.h> 15#include <linux/mm_types.h>
16#include <linux/sched.h>
16 17
17struct mempolicy; 18struct mempolicy;
18struct anon_vma; 19struct anon_vma;
@@ -1321,6 +1322,6 @@ void vmemmap_populate_print_last(void);
1321 1322
1322extern void *alloc_locked_buffer(size_t size); 1323extern void *alloc_locked_buffer(size_t size);
1323extern void free_locked_buffer(void *buffer, size_t size); 1324extern void free_locked_buffer(void *buffer, size_t size);
1324extern void release_locked_buffer(void *buffer, size_t size); 1325extern void refund_locked_buffer_memory(struct mm_struct *mm, size_t size);
1325#endif /* __KERNEL__ */ 1326#endif /* __KERNEL__ */
1326#endif /* _LINUX_MM_H */ 1327#endif /* _LINUX_MM_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a5b9a83065fa..52b8cd049c2e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -96,8 +96,8 @@ struct exec_domain;
96struct futex_pi_state; 96struct futex_pi_state;
97struct robust_list_head; 97struct robust_list_head;
98struct bio; 98struct bio;
99struct bts_tracer;
100struct fs_struct; 99struct fs_struct;
100struct bts_context;
101 101
102/* 102/*
103 * List of flags we want to share for kernel threads, 103 * List of flags we want to share for kernel threads,
@@ -1210,12 +1210,7 @@ struct task_struct {
1210 * This is the tracer handle for the ptrace BTS extension. 1210 * This is the tracer handle for the ptrace BTS extension.
1211 * This field actually belongs to the ptracer task. 1211 * This field actually belongs to the ptracer task.
1212 */ 1212 */
1213 struct bts_tracer *bts; 1213 struct bts_context *bts;
1214 /*
1215 * The buffer to hold the BTS data.
1216 */
1217 void *bts_buffer;
1218 size_t bts_size;
1219#endif /* CONFIG_X86_PTRACE_BTS */ 1214#endif /* CONFIG_X86_PTRACE_BTS */
1220 1215
1221 /* PID/PID hash table linkage. */ 1216 /* PID/PID hash table linkage. */
diff --git a/mm/mlock.c b/mm/mlock.c
index cbe9e0581b75..749383b442c7 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -660,21 +660,20 @@ void *alloc_locked_buffer(size_t size)
660 return buffer; 660 return buffer;
661} 661}
662 662
663void release_locked_buffer(void *buffer, size_t size) 663void refund_locked_buffer_memory(struct mm_struct *mm, size_t size)
664{ 664{
665 unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; 665 unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
666 666
667 down_write(&current->mm->mmap_sem); 667 down_write(&mm->mmap_sem);
668 668
669 current->mm->total_vm -= pgsz; 669 mm->total_vm -= pgsz;
670 current->mm->locked_vm -= pgsz; 670 mm->locked_vm -= pgsz;
671 671
672 up_write(&current->mm->mmap_sem); 672 up_write(&mm->mmap_sem);
673} 673}
674 674
675void free_locked_buffer(void *buffer, size_t size) 675void free_locked_buffer(void *buffer, size_t size)
676{ 676{
677 release_locked_buffer(buffer, size); 677 refund_locked_buffer_memory(current->mm, size);
678
679 kfree(buffer); 678 kfree(buffer);
680} 679}