diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/events/core.c | 5 | ||||
-rw-r--r-- | kernel/events/hw_breakpoint.c | 2 | ||||
-rw-r--r-- | kernel/events/uprobes.c | 466 | ||||
-rw-r--r-- | kernel/kprobes.c | 8 | ||||
-rw-r--r-- | kernel/profile.c | 24 | ||||
-rw-r--r-- | kernel/ptrace.c | 6 | ||||
-rw-r--r-- | kernel/trace/Kconfig | 18 | ||||
-rw-r--r-- | kernel/trace/blktrace.c | 2 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 88 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 108 | ||||
-rw-r--r-- | kernel/trace/trace.c | 252 | ||||
-rw-r--r-- | kernel/trace/trace.h | 134 | ||||
-rw-r--r-- | kernel/trace/trace_clock.c | 4 | ||||
-rw-r--r-- | kernel/trace/trace_events.c | 1 | ||||
-rw-r--r-- | kernel/trace/trace_functions.c | 61 | ||||
-rw-r--r-- | kernel/trace/trace_functions_graph.c | 68 | ||||
-rw-r--r-- | kernel/trace/trace_probe.h | 1 | ||||
-rw-r--r-- | kernel/trace/trace_selftest.c | 21 | ||||
-rw-r--r-- | kernel/trace/trace_syscalls.c | 18 | ||||
-rw-r--r-- | kernel/trace/trace_uprobe.c | 217 |
20 files changed, 992 insertions, 512 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index 7b6646a8c067..5c75791d7269 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -6171,11 +6171,14 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, | |||
6171 | 6171 | ||
6172 | if (task) { | 6172 | if (task) { |
6173 | event->attach_state = PERF_ATTACH_TASK; | 6173 | event->attach_state = PERF_ATTACH_TASK; |
6174 | |||
6175 | if (attr->type == PERF_TYPE_TRACEPOINT) | ||
6176 | event->hw.tp_target = task; | ||
6174 | #ifdef CONFIG_HAVE_HW_BREAKPOINT | 6177 | #ifdef CONFIG_HAVE_HW_BREAKPOINT |
6175 | /* | 6178 | /* |
6176 | * hw_breakpoint is a bit difficult here.. | 6179 | * hw_breakpoint is a bit difficult here.. |
6177 | */ | 6180 | */ |
6178 | if (attr->type == PERF_TYPE_BREAKPOINT) | 6181 | else if (attr->type == PERF_TYPE_BREAKPOINT) |
6179 | event->hw.bp_target = task; | 6182 | event->hw.bp_target = task; |
6180 | #endif | 6183 | #endif |
6181 | } | 6184 | } |
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index fe8a916507ed..a64f8aeb5c1f 100644 --- a/kernel/events/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c | |||
@@ -676,7 +676,7 @@ int __init init_hw_breakpoint(void) | |||
676 | err_alloc: | 676 | err_alloc: |
677 | for_each_possible_cpu(err_cpu) { | 677 | for_each_possible_cpu(err_cpu) { |
678 | for (i = 0; i < TYPE_MAX; i++) | 678 | for (i = 0; i < TYPE_MAX; i++) |
679 | kfree(per_cpu(nr_task_bp_pinned[i], cpu)); | 679 | kfree(per_cpu(nr_task_bp_pinned[i], err_cpu)); |
680 | if (err_cpu == cpu) | 680 | if (err_cpu == cpu) |
681 | break; | 681 | break; |
682 | } | 682 | } |
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index dea7acfbb071..a567c8c7ef31 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/pagemap.h> /* read_mapping_page */ | 27 | #include <linux/pagemap.h> /* read_mapping_page */ |
28 | #include <linux/slab.h> | 28 | #include <linux/slab.h> |
29 | #include <linux/sched.h> | 29 | #include <linux/sched.h> |
30 | #include <linux/export.h> | ||
30 | #include <linux/rmap.h> /* anon_vma_prepare */ | 31 | #include <linux/rmap.h> /* anon_vma_prepare */ |
31 | #include <linux/mmu_notifier.h> /* set_pte_at_notify */ | 32 | #include <linux/mmu_notifier.h> /* set_pte_at_notify */ |
32 | #include <linux/swap.h> /* try_to_free_swap */ | 33 | #include <linux/swap.h> /* try_to_free_swap */ |
@@ -41,58 +42,31 @@ | |||
41 | #define MAX_UPROBE_XOL_SLOTS UINSNS_PER_PAGE | 42 | #define MAX_UPROBE_XOL_SLOTS UINSNS_PER_PAGE |
42 | 43 | ||
43 | static struct rb_root uprobes_tree = RB_ROOT; | 44 | static struct rb_root uprobes_tree = RB_ROOT; |
44 | |||
45 | static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */ | ||
46 | |||
47 | #define UPROBES_HASH_SZ 13 | ||
48 | |||
49 | /* | 45 | /* |
50 | * We need separate register/unregister and mmap/munmap lock hashes because | 46 | * allows us to skip the uprobe_mmap if there are no uprobe events active |
51 | * of mmap_sem nesting. | 47 | * at this time. Probably a fine grained per inode count is better? |
52 | * | ||
53 | * uprobe_register() needs to install probes on (potentially) all processes | ||
54 | * and thus needs to acquire multiple mmap_sems (consequtively, not | ||
55 | * concurrently), whereas uprobe_mmap() is called while holding mmap_sem | ||
56 | * for the particular process doing the mmap. | ||
57 | * | ||
58 | * uprobe_register()->register_for_each_vma() needs to drop/acquire mmap_sem | ||
59 | * because of lock order against i_mmap_mutex. This means there's a hole in | ||
60 | * the register vma iteration where a mmap() can happen. | ||
61 | * | ||
62 | * Thus uprobe_register() can race with uprobe_mmap() and we can try and | ||
63 | * install a probe where one is already installed. | ||
64 | */ | 48 | */ |
49 | #define no_uprobe_events() RB_EMPTY_ROOT(&uprobes_tree) | ||
65 | 50 | ||
66 | /* serialize (un)register */ | 51 | static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */ |
67 | static struct mutex uprobes_mutex[UPROBES_HASH_SZ]; | ||
68 | |||
69 | #define uprobes_hash(v) (&uprobes_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ]) | ||
70 | 52 | ||
53 | #define UPROBES_HASH_SZ 13 | ||
71 | /* serialize uprobe->pending_list */ | 54 | /* serialize uprobe->pending_list */ |
72 | static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ]; | 55 | static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ]; |
73 | #define uprobes_mmap_hash(v) (&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ]) | 56 | #define uprobes_mmap_hash(v) (&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ]) |
74 | 57 | ||
75 | static struct percpu_rw_semaphore dup_mmap_sem; | 58 | static struct percpu_rw_semaphore dup_mmap_sem; |
76 | 59 | ||
77 | /* | ||
78 | * uprobe_events allows us to skip the uprobe_mmap if there are no uprobe | ||
79 | * events active at this time. Probably a fine grained per inode count is | ||
80 | * better? | ||
81 | */ | ||
82 | static atomic_t uprobe_events = ATOMIC_INIT(0); | ||
83 | |||
84 | /* Have a copy of original instruction */ | 60 | /* Have a copy of original instruction */ |
85 | #define UPROBE_COPY_INSN 0 | 61 | #define UPROBE_COPY_INSN 0 |
86 | /* Dont run handlers when first register/ last unregister in progress*/ | ||
87 | #define UPROBE_RUN_HANDLER 1 | ||
88 | /* Can skip singlestep */ | 62 | /* Can skip singlestep */ |
89 | #define UPROBE_SKIP_SSTEP 2 | 63 | #define UPROBE_SKIP_SSTEP 1 |
90 | 64 | ||
91 | struct uprobe { | 65 | struct uprobe { |
92 | struct rb_node rb_node; /* node in the rb tree */ | 66 | struct rb_node rb_node; /* node in the rb tree */ |
93 | atomic_t ref; | 67 | atomic_t ref; |
68 | struct rw_semaphore register_rwsem; | ||
94 | struct rw_semaphore consumer_rwsem; | 69 | struct rw_semaphore consumer_rwsem; |
95 | struct mutex copy_mutex; /* TODO: kill me and UPROBE_COPY_INSN */ | ||
96 | struct list_head pending_list; | 70 | struct list_head pending_list; |
97 | struct uprobe_consumer *consumers; | 71 | struct uprobe_consumer *consumers; |
98 | struct inode *inode; /* Also hold a ref to inode */ | 72 | struct inode *inode; /* Also hold a ref to inode */ |
@@ -430,9 +404,6 @@ static struct uprobe *insert_uprobe(struct uprobe *uprobe) | |||
430 | u = __insert_uprobe(uprobe); | 404 | u = __insert_uprobe(uprobe); |
431 | spin_unlock(&uprobes_treelock); | 405 | spin_unlock(&uprobes_treelock); |
432 | 406 | ||
433 | /* For now assume that the instruction need not be single-stepped */ | ||
434 | __set_bit(UPROBE_SKIP_SSTEP, &uprobe->flags); | ||
435 | |||
436 | return u; | 407 | return u; |
437 | } | 408 | } |
438 | 409 | ||
@@ -452,8 +423,10 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset) | |||
452 | 423 | ||
453 | uprobe->inode = igrab(inode); | 424 | uprobe->inode = igrab(inode); |
454 | uprobe->offset = offset; | 425 | uprobe->offset = offset; |
426 | init_rwsem(&uprobe->register_rwsem); | ||
455 | init_rwsem(&uprobe->consumer_rwsem); | 427 | init_rwsem(&uprobe->consumer_rwsem); |
456 | mutex_init(&uprobe->copy_mutex); | 428 | /* For now assume that the instruction need not be single-stepped */ |
429 | __set_bit(UPROBE_SKIP_SSTEP, &uprobe->flags); | ||
457 | 430 | ||
458 | /* add to uprobes_tree, sorted on inode:offset */ | 431 | /* add to uprobes_tree, sorted on inode:offset */ |
459 | cur_uprobe = insert_uprobe(uprobe); | 432 | cur_uprobe = insert_uprobe(uprobe); |
@@ -463,38 +436,17 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset) | |||
463 | kfree(uprobe); | 436 | kfree(uprobe); |
464 | uprobe = cur_uprobe; | 437 | uprobe = cur_uprobe; |
465 | iput(inode); | 438 | iput(inode); |
466 | } else { | ||
467 | atomic_inc(&uprobe_events); | ||
468 | } | 439 | } |
469 | 440 | ||
470 | return uprobe; | 441 | return uprobe; |
471 | } | 442 | } |
472 | 443 | ||
473 | static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) | 444 | static void consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc) |
474 | { | ||
475 | struct uprobe_consumer *uc; | ||
476 | |||
477 | if (!test_bit(UPROBE_RUN_HANDLER, &uprobe->flags)) | ||
478 | return; | ||
479 | |||
480 | down_read(&uprobe->consumer_rwsem); | ||
481 | for (uc = uprobe->consumers; uc; uc = uc->next) { | ||
482 | if (!uc->filter || uc->filter(uc, current)) | ||
483 | uc->handler(uc, regs); | ||
484 | } | ||
485 | up_read(&uprobe->consumer_rwsem); | ||
486 | } | ||
487 | |||
488 | /* Returns the previous consumer */ | ||
489 | static struct uprobe_consumer * | ||
490 | consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc) | ||
491 | { | 445 | { |
492 | down_write(&uprobe->consumer_rwsem); | 446 | down_write(&uprobe->consumer_rwsem); |
493 | uc->next = uprobe->consumers; | 447 | uc->next = uprobe->consumers; |
494 | uprobe->consumers = uc; | 448 | uprobe->consumers = uc; |
495 | up_write(&uprobe->consumer_rwsem); | 449 | up_write(&uprobe->consumer_rwsem); |
496 | |||
497 | return uc->next; | ||
498 | } | 450 | } |
499 | 451 | ||
500 | /* | 452 | /* |
@@ -588,7 +540,8 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file, | |||
588 | if (test_bit(UPROBE_COPY_INSN, &uprobe->flags)) | 540 | if (test_bit(UPROBE_COPY_INSN, &uprobe->flags)) |
589 | return ret; | 541 | return ret; |
590 | 542 | ||
591 | mutex_lock(&uprobe->copy_mutex); | 543 | /* TODO: move this into _register, until then we abuse this sem. */ |
544 | down_write(&uprobe->consumer_rwsem); | ||
592 | if (test_bit(UPROBE_COPY_INSN, &uprobe->flags)) | 545 | if (test_bit(UPROBE_COPY_INSN, &uprobe->flags)) |
593 | goto out; | 546 | goto out; |
594 | 547 | ||
@@ -612,7 +565,30 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file, | |||
612 | set_bit(UPROBE_COPY_INSN, &uprobe->flags); | 565 | set_bit(UPROBE_COPY_INSN, &uprobe->flags); |
613 | 566 | ||
614 | out: | 567 | out: |
615 | mutex_unlock(&uprobe->copy_mutex); | 568 | up_write(&uprobe->consumer_rwsem); |
569 | |||
570 | return ret; | ||
571 | } | ||
572 | |||
573 | static inline bool consumer_filter(struct uprobe_consumer *uc, | ||
574 | enum uprobe_filter_ctx ctx, struct mm_struct *mm) | ||
575 | { | ||
576 | return !uc->filter || uc->filter(uc, ctx, mm); | ||
577 | } | ||
578 | |||
579 | static bool filter_chain(struct uprobe *uprobe, | ||
580 | enum uprobe_filter_ctx ctx, struct mm_struct *mm) | ||
581 | { | ||
582 | struct uprobe_consumer *uc; | ||
583 | bool ret = false; | ||
584 | |||
585 | down_read(&uprobe->consumer_rwsem); | ||
586 | for (uc = uprobe->consumers; uc; uc = uc->next) { | ||
587 | ret = consumer_filter(uc, ctx, mm); | ||
588 | if (ret) | ||
589 | break; | ||
590 | } | ||
591 | up_read(&uprobe->consumer_rwsem); | ||
616 | 592 | ||
617 | return ret; | 593 | return ret; |
618 | } | 594 | } |
@@ -624,16 +600,6 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, | |||
624 | bool first_uprobe; | 600 | bool first_uprobe; |
625 | int ret; | 601 | int ret; |
626 | 602 | ||
627 | /* | ||
628 | * If probe is being deleted, unregister thread could be done with | ||
629 | * the vma-rmap-walk through. Adding a probe now can be fatal since | ||
630 | * nobody will be able to cleanup. Also we could be from fork or | ||
631 | * mremap path, where the probe might have already been inserted. | ||
632 | * Hence behave as if probe already existed. | ||
633 | */ | ||
634 | if (!uprobe->consumers) | ||
635 | return 0; | ||
636 | |||
637 | ret = prepare_uprobe(uprobe, vma->vm_file, mm, vaddr); | 603 | ret = prepare_uprobe(uprobe, vma->vm_file, mm, vaddr); |
638 | if (ret) | 604 | if (ret) |
639 | return ret; | 605 | return ret; |
@@ -658,14 +624,14 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, | |||
658 | static int | 624 | static int |
659 | remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr) | 625 | remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr) |
660 | { | 626 | { |
661 | /* can happen if uprobe_register() fails */ | ||
662 | if (!test_bit(MMF_HAS_UPROBES, &mm->flags)) | ||
663 | return 0; | ||
664 | |||
665 | set_bit(MMF_RECALC_UPROBES, &mm->flags); | 627 | set_bit(MMF_RECALC_UPROBES, &mm->flags); |
666 | return set_orig_insn(&uprobe->arch, mm, vaddr); | 628 | return set_orig_insn(&uprobe->arch, mm, vaddr); |
667 | } | 629 | } |
668 | 630 | ||
631 | static inline bool uprobe_is_active(struct uprobe *uprobe) | ||
632 | { | ||
633 | return !RB_EMPTY_NODE(&uprobe->rb_node); | ||
634 | } | ||
669 | /* | 635 | /* |
670 | * There could be threads that have already hit the breakpoint. They | 636 | * There could be threads that have already hit the breakpoint. They |
671 | * will recheck the current insn and restart if find_uprobe() fails. | 637 | * will recheck the current insn and restart if find_uprobe() fails. |
@@ -673,12 +639,15 @@ remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vad | |||
673 | */ | 639 | */ |
674 | static void delete_uprobe(struct uprobe *uprobe) | 640 | static void delete_uprobe(struct uprobe *uprobe) |
675 | { | 641 | { |
642 | if (WARN_ON(!uprobe_is_active(uprobe))) | ||
643 | return; | ||
644 | |||
676 | spin_lock(&uprobes_treelock); | 645 | spin_lock(&uprobes_treelock); |
677 | rb_erase(&uprobe->rb_node, &uprobes_tree); | 646 | rb_erase(&uprobe->rb_node, &uprobes_tree); |
678 | spin_unlock(&uprobes_treelock); | 647 | spin_unlock(&uprobes_treelock); |
648 | RB_CLEAR_NODE(&uprobe->rb_node); /* for uprobe_is_active() */ | ||
679 | iput(uprobe->inode); | 649 | iput(uprobe->inode); |
680 | put_uprobe(uprobe); | 650 | put_uprobe(uprobe); |
681 | atomic_dec(&uprobe_events); | ||
682 | } | 651 | } |
683 | 652 | ||
684 | struct map_info { | 653 | struct map_info { |
@@ -764,8 +733,10 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register) | |||
764 | return curr; | 733 | return curr; |
765 | } | 734 | } |
766 | 735 | ||
767 | static int register_for_each_vma(struct uprobe *uprobe, bool is_register) | 736 | static int |
737 | register_for_each_vma(struct uprobe *uprobe, struct uprobe_consumer *new) | ||
768 | { | 738 | { |
739 | bool is_register = !!new; | ||
769 | struct map_info *info; | 740 | struct map_info *info; |
770 | int err = 0; | 741 | int err = 0; |
771 | 742 | ||
@@ -794,10 +765,16 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register) | |||
794 | vaddr_to_offset(vma, info->vaddr) != uprobe->offset) | 765 | vaddr_to_offset(vma, info->vaddr) != uprobe->offset) |
795 | goto unlock; | 766 | goto unlock; |
796 | 767 | ||
797 | if (is_register) | 768 | if (is_register) { |
798 | err = install_breakpoint(uprobe, mm, vma, info->vaddr); | 769 | /* consult only the "caller", new consumer. */ |
799 | else | 770 | if (consumer_filter(new, |
800 | err |= remove_breakpoint(uprobe, mm, info->vaddr); | 771 | UPROBE_FILTER_REGISTER, mm)) |
772 | err = install_breakpoint(uprobe, mm, vma, info->vaddr); | ||
773 | } else if (test_bit(MMF_HAS_UPROBES, &mm->flags)) { | ||
774 | if (!filter_chain(uprobe, | ||
775 | UPROBE_FILTER_UNREGISTER, mm)) | ||
776 | err |= remove_breakpoint(uprobe, mm, info->vaddr); | ||
777 | } | ||
801 | 778 | ||
802 | unlock: | 779 | unlock: |
803 | up_write(&mm->mmap_sem); | 780 | up_write(&mm->mmap_sem); |
@@ -810,17 +787,23 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register) | |||
810 | return err; | 787 | return err; |
811 | } | 788 | } |
812 | 789 | ||
813 | static int __uprobe_register(struct uprobe *uprobe) | 790 | static int __uprobe_register(struct uprobe *uprobe, struct uprobe_consumer *uc) |
814 | { | 791 | { |
815 | return register_for_each_vma(uprobe, true); | 792 | consumer_add(uprobe, uc); |
793 | return register_for_each_vma(uprobe, uc); | ||
816 | } | 794 | } |
817 | 795 | ||
818 | static void __uprobe_unregister(struct uprobe *uprobe) | 796 | static void __uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *uc) |
819 | { | 797 | { |
820 | if (!register_for_each_vma(uprobe, false)) | 798 | int err; |
821 | delete_uprobe(uprobe); | 799 | |
800 | if (!consumer_del(uprobe, uc)) /* WARN? */ | ||
801 | return; | ||
822 | 802 | ||
803 | err = register_for_each_vma(uprobe, NULL); | ||
823 | /* TODO : cant unregister? schedule a worker thread */ | 804 | /* TODO : cant unregister? schedule a worker thread */ |
805 | if (!uprobe->consumers && !err) | ||
806 | delete_uprobe(uprobe); | ||
824 | } | 807 | } |
825 | 808 | ||
826 | /* | 809 | /* |
@@ -845,31 +828,59 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer * | |||
845 | struct uprobe *uprobe; | 828 | struct uprobe *uprobe; |
846 | int ret; | 829 | int ret; |
847 | 830 | ||
848 | if (!inode || !uc || uc->next) | 831 | /* Racy, just to catch the obvious mistakes */ |
849 | return -EINVAL; | ||
850 | |||
851 | if (offset > i_size_read(inode)) | 832 | if (offset > i_size_read(inode)) |
852 | return -EINVAL; | 833 | return -EINVAL; |
853 | 834 | ||
854 | ret = 0; | 835 | retry: |
855 | mutex_lock(uprobes_hash(inode)); | ||
856 | uprobe = alloc_uprobe(inode, offset); | 836 | uprobe = alloc_uprobe(inode, offset); |
857 | 837 | if (!uprobe) | |
858 | if (!uprobe) { | 838 | return -ENOMEM; |
859 | ret = -ENOMEM; | 839 | /* |
860 | } else if (!consumer_add(uprobe, uc)) { | 840 | * We can race with uprobe_unregister()->delete_uprobe(). |
861 | ret = __uprobe_register(uprobe); | 841 | * Check uprobe_is_active() and retry if it is false. |
862 | if (ret) { | 842 | */ |
863 | uprobe->consumers = NULL; | 843 | down_write(&uprobe->register_rwsem); |
864 | __uprobe_unregister(uprobe); | 844 | ret = -EAGAIN; |
865 | } else { | 845 | if (likely(uprobe_is_active(uprobe))) { |
866 | set_bit(UPROBE_RUN_HANDLER, &uprobe->flags); | 846 | ret = __uprobe_register(uprobe, uc); |
867 | } | 847 | if (ret) |
848 | __uprobe_unregister(uprobe, uc); | ||
868 | } | 849 | } |
850 | up_write(&uprobe->register_rwsem); | ||
851 | put_uprobe(uprobe); | ||
869 | 852 | ||
870 | mutex_unlock(uprobes_hash(inode)); | 853 | if (unlikely(ret == -EAGAIN)) |
871 | if (uprobe) | 854 | goto retry; |
872 | put_uprobe(uprobe); | 855 | return ret; |
856 | } | ||
857 | EXPORT_SYMBOL_GPL(uprobe_register); | ||
858 | |||
859 | /* | ||
860 | * uprobe_apply - unregister a already registered probe. | ||
861 | * @inode: the file in which the probe has to be removed. | ||
862 | * @offset: offset from the start of the file. | ||
863 | * @uc: consumer which wants to add more or remove some breakpoints | ||
864 | * @add: add or remove the breakpoints | ||
865 | */ | ||
866 | int uprobe_apply(struct inode *inode, loff_t offset, | ||
867 | struct uprobe_consumer *uc, bool add) | ||
868 | { | ||
869 | struct uprobe *uprobe; | ||
870 | struct uprobe_consumer *con; | ||
871 | int ret = -ENOENT; | ||
872 | |||
873 | uprobe = find_uprobe(inode, offset); | ||
874 | if (!uprobe) | ||
875 | return ret; | ||
876 | |||
877 | down_write(&uprobe->register_rwsem); | ||
878 | for (con = uprobe->consumers; con && con != uc ; con = con->next) | ||
879 | ; | ||
880 | if (con) | ||
881 | ret = register_for_each_vma(uprobe, add ? uc : NULL); | ||
882 | up_write(&uprobe->register_rwsem); | ||
883 | put_uprobe(uprobe); | ||
873 | 884 | ||
874 | return ret; | 885 | return ret; |
875 | } | 886 | } |
@@ -884,25 +895,42 @@ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consume | |||
884 | { | 895 | { |
885 | struct uprobe *uprobe; | 896 | struct uprobe *uprobe; |
886 | 897 | ||
887 | if (!inode || !uc) | ||
888 | return; | ||
889 | |||
890 | uprobe = find_uprobe(inode, offset); | 898 | uprobe = find_uprobe(inode, offset); |
891 | if (!uprobe) | 899 | if (!uprobe) |
892 | return; | 900 | return; |
893 | 901 | ||
894 | mutex_lock(uprobes_hash(inode)); | 902 | down_write(&uprobe->register_rwsem); |
903 | __uprobe_unregister(uprobe, uc); | ||
904 | up_write(&uprobe->register_rwsem); | ||
905 | put_uprobe(uprobe); | ||
906 | } | ||
907 | EXPORT_SYMBOL_GPL(uprobe_unregister); | ||
895 | 908 | ||
896 | if (consumer_del(uprobe, uc)) { | 909 | static int unapply_uprobe(struct uprobe *uprobe, struct mm_struct *mm) |
897 | if (!uprobe->consumers) { | 910 | { |
898 | __uprobe_unregister(uprobe); | 911 | struct vm_area_struct *vma; |
899 | clear_bit(UPROBE_RUN_HANDLER, &uprobe->flags); | 912 | int err = 0; |
900 | } | 913 | |
914 | down_read(&mm->mmap_sem); | ||
915 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | ||
916 | unsigned long vaddr; | ||
917 | loff_t offset; | ||
918 | |||
919 | if (!valid_vma(vma, false) || | ||
920 | vma->vm_file->f_mapping->host != uprobe->inode) | ||
921 | continue; | ||
922 | |||
923 | offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT; | ||
924 | if (uprobe->offset < offset || | ||
925 | uprobe->offset >= offset + vma->vm_end - vma->vm_start) | ||
926 | continue; | ||
927 | |||
928 | vaddr = offset_to_vaddr(vma, uprobe->offset); | ||
929 | err |= remove_breakpoint(uprobe, mm, vaddr); | ||
901 | } | 930 | } |
931 | up_read(&mm->mmap_sem); | ||
902 | 932 | ||
903 | mutex_unlock(uprobes_hash(inode)); | 933 | return err; |
904 | if (uprobe) | ||
905 | put_uprobe(uprobe); | ||
906 | } | 934 | } |
907 | 935 | ||
908 | static struct rb_node * | 936 | static struct rb_node * |
@@ -979,7 +1007,7 @@ int uprobe_mmap(struct vm_area_struct *vma) | |||
979 | struct uprobe *uprobe, *u; | 1007 | struct uprobe *uprobe, *u; |
980 | struct inode *inode; | 1008 | struct inode *inode; |
981 | 1009 | ||
982 | if (!atomic_read(&uprobe_events) || !valid_vma(vma, true)) | 1010 | if (no_uprobe_events() || !valid_vma(vma, true)) |
983 | return 0; | 1011 | return 0; |
984 | 1012 | ||
985 | inode = vma->vm_file->f_mapping->host; | 1013 | inode = vma->vm_file->f_mapping->host; |
@@ -988,9 +1016,14 @@ int uprobe_mmap(struct vm_area_struct *vma) | |||
988 | 1016 | ||
989 | mutex_lock(uprobes_mmap_hash(inode)); | 1017 | mutex_lock(uprobes_mmap_hash(inode)); |
990 | build_probe_list(inode, vma, vma->vm_start, vma->vm_end, &tmp_list); | 1018 | build_probe_list(inode, vma, vma->vm_start, vma->vm_end, &tmp_list); |
991 | 1019 | /* | |
1020 | * We can race with uprobe_unregister(), this uprobe can be already | ||
1021 | * removed. But in this case filter_chain() must return false, all | ||
1022 | * consumers have gone away. | ||
1023 | */ | ||
992 | list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { | 1024 | list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { |
993 | if (!fatal_signal_pending(current)) { | 1025 | if (!fatal_signal_pending(current) && |
1026 | filter_chain(uprobe, UPROBE_FILTER_MMAP, vma->vm_mm)) { | ||
994 | unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset); | 1027 | unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset); |
995 | install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); | 1028 | install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); |
996 | } | 1029 | } |
@@ -1025,7 +1058,7 @@ vma_has_uprobes(struct vm_area_struct *vma, unsigned long start, unsigned long e | |||
1025 | */ | 1058 | */ |
1026 | void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) | 1059 | void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) |
1027 | { | 1060 | { |
1028 | if (!atomic_read(&uprobe_events) || !valid_vma(vma, false)) | 1061 | if (no_uprobe_events() || !valid_vma(vma, false)) |
1029 | return; | 1062 | return; |
1030 | 1063 | ||
1031 | if (!atomic_read(&vma->vm_mm->mm_users)) /* called by mmput() ? */ | 1064 | if (!atomic_read(&vma->vm_mm->mm_users)) /* called by mmput() ? */ |
@@ -1042,22 +1075,14 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon | |||
1042 | /* Slot allocation for XOL */ | 1075 | /* Slot allocation for XOL */ |
1043 | static int xol_add_vma(struct xol_area *area) | 1076 | static int xol_add_vma(struct xol_area *area) |
1044 | { | 1077 | { |
1045 | struct mm_struct *mm; | 1078 | struct mm_struct *mm = current->mm; |
1046 | int ret; | 1079 | int ret = -EALREADY; |
1047 | |||
1048 | area->page = alloc_page(GFP_HIGHUSER); | ||
1049 | if (!area->page) | ||
1050 | return -ENOMEM; | ||
1051 | |||
1052 | ret = -EALREADY; | ||
1053 | mm = current->mm; | ||
1054 | 1080 | ||
1055 | down_write(&mm->mmap_sem); | 1081 | down_write(&mm->mmap_sem); |
1056 | if (mm->uprobes_state.xol_area) | 1082 | if (mm->uprobes_state.xol_area) |
1057 | goto fail; | 1083 | goto fail; |
1058 | 1084 | ||
1059 | ret = -ENOMEM; | 1085 | ret = -ENOMEM; |
1060 | |||
1061 | /* Try to map as high as possible, this is only a hint. */ | 1086 | /* Try to map as high as possible, this is only a hint. */ |
1062 | area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0); | 1087 | area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0); |
1063 | if (area->vaddr & ~PAGE_MASK) { | 1088 | if (area->vaddr & ~PAGE_MASK) { |
@@ -1073,54 +1098,53 @@ static int xol_add_vma(struct xol_area *area) | |||
1073 | smp_wmb(); /* pairs with get_xol_area() */ | 1098 | smp_wmb(); /* pairs with get_xol_area() */ |
1074 | mm->uprobes_state.xol_area = area; | 1099 | mm->uprobes_state.xol_area = area; |
1075 | ret = 0; | 1100 | ret = 0; |
1076 | 1101 | fail: | |
1077 | fail: | ||
1078 | up_write(&mm->mmap_sem); | 1102 | up_write(&mm->mmap_sem); |
1079 | if (ret) | ||
1080 | __free_page(area->page); | ||
1081 | 1103 | ||
1082 | return ret; | 1104 | return ret; |
1083 | } | 1105 | } |
1084 | 1106 | ||
1085 | static struct xol_area *get_xol_area(struct mm_struct *mm) | ||
1086 | { | ||
1087 | struct xol_area *area; | ||
1088 | |||
1089 | area = mm->uprobes_state.xol_area; | ||
1090 | smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */ | ||
1091 | |||
1092 | return area; | ||
1093 | } | ||
1094 | |||
1095 | /* | 1107 | /* |
1096 | * xol_alloc_area - Allocate process's xol_area. | 1108 | * get_xol_area - Allocate process's xol_area if necessary. |
1097 | * This area will be used for storing instructions for execution out of | 1109 | * This area will be used for storing instructions for execution out of line. |
1098 | * line. | ||
1099 | * | 1110 | * |
1100 | * Returns the allocated area or NULL. | 1111 | * Returns the allocated area or NULL. |
1101 | */ | 1112 | */ |
1102 | static struct xol_area *xol_alloc_area(void) | 1113 | static struct xol_area *get_xol_area(void) |
1103 | { | 1114 | { |
1115 | struct mm_struct *mm = current->mm; | ||
1104 | struct xol_area *area; | 1116 | struct xol_area *area; |
1105 | 1117 | ||
1118 | area = mm->uprobes_state.xol_area; | ||
1119 | if (area) | ||
1120 | goto ret; | ||
1121 | |||
1106 | area = kzalloc(sizeof(*area), GFP_KERNEL); | 1122 | area = kzalloc(sizeof(*area), GFP_KERNEL); |
1107 | if (unlikely(!area)) | 1123 | if (unlikely(!area)) |
1108 | return NULL; | 1124 | goto out; |
1109 | 1125 | ||
1110 | area->bitmap = kzalloc(BITS_TO_LONGS(UINSNS_PER_PAGE) * sizeof(long), GFP_KERNEL); | 1126 | area->bitmap = kzalloc(BITS_TO_LONGS(UINSNS_PER_PAGE) * sizeof(long), GFP_KERNEL); |
1111 | |||
1112 | if (!area->bitmap) | 1127 | if (!area->bitmap) |
1113 | goto fail; | 1128 | goto free_area; |
1129 | |||
1130 | area->page = alloc_page(GFP_HIGHUSER); | ||
1131 | if (!area->page) | ||
1132 | goto free_bitmap; | ||
1114 | 1133 | ||
1115 | init_waitqueue_head(&area->wq); | 1134 | init_waitqueue_head(&area->wq); |
1116 | if (!xol_add_vma(area)) | 1135 | if (!xol_add_vma(area)) |
1117 | return area; | 1136 | return area; |
1118 | 1137 | ||
1119 | fail: | 1138 | __free_page(area->page); |
1139 | free_bitmap: | ||
1120 | kfree(area->bitmap); | 1140 | kfree(area->bitmap); |
1141 | free_area: | ||
1121 | kfree(area); | 1142 | kfree(area); |
1122 | 1143 | out: | |
1123 | return get_xol_area(current->mm); | 1144 | area = mm->uprobes_state.xol_area; |
1145 | ret: | ||
1146 | smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */ | ||
1147 | return area; | ||
1124 | } | 1148 | } |
1125 | 1149 | ||
1126 | /* | 1150 | /* |
@@ -1186,33 +1210,26 @@ static unsigned long xol_take_insn_slot(struct xol_area *area) | |||
1186 | } | 1210 | } |
1187 | 1211 | ||
1188 | /* | 1212 | /* |
1189 | * xol_get_insn_slot - If was not allocated a slot, then | 1213 | * xol_get_insn_slot - allocate a slot for xol. |
1190 | * allocate a slot. | ||
1191 | * Returns the allocated slot address or 0. | 1214 | * Returns the allocated slot address or 0. |
1192 | */ | 1215 | */ |
1193 | static unsigned long xol_get_insn_slot(struct uprobe *uprobe, unsigned long slot_addr) | 1216 | static unsigned long xol_get_insn_slot(struct uprobe *uprobe) |
1194 | { | 1217 | { |
1195 | struct xol_area *area; | 1218 | struct xol_area *area; |
1196 | unsigned long offset; | 1219 | unsigned long offset; |
1220 | unsigned long xol_vaddr; | ||
1197 | void *vaddr; | 1221 | void *vaddr; |
1198 | 1222 | ||
1199 | area = get_xol_area(current->mm); | 1223 | area = get_xol_area(); |
1200 | if (!area) { | 1224 | if (!area) |
1201 | area = xol_alloc_area(); | 1225 | return 0; |
1202 | if (!area) | ||
1203 | return 0; | ||
1204 | } | ||
1205 | current->utask->xol_vaddr = xol_take_insn_slot(area); | ||
1206 | 1226 | ||
1207 | /* | 1227 | xol_vaddr = xol_take_insn_slot(area); |
1208 | * Initialize the slot if xol_vaddr points to valid | 1228 | if (unlikely(!xol_vaddr)) |
1209 | * instruction slot. | ||
1210 | */ | ||
1211 | if (unlikely(!current->utask->xol_vaddr)) | ||
1212 | return 0; | 1229 | return 0; |
1213 | 1230 | ||
1214 | current->utask->vaddr = slot_addr; | 1231 | /* Initialize the slot */ |
1215 | offset = current->utask->xol_vaddr & ~PAGE_MASK; | 1232 | offset = xol_vaddr & ~PAGE_MASK; |
1216 | vaddr = kmap_atomic(area->page); | 1233 | vaddr = kmap_atomic(area->page); |
1217 | memcpy(vaddr + offset, uprobe->arch.insn, MAX_UINSN_BYTES); | 1234 | memcpy(vaddr + offset, uprobe->arch.insn, MAX_UINSN_BYTES); |
1218 | kunmap_atomic(vaddr); | 1235 | kunmap_atomic(vaddr); |
@@ -1222,7 +1239,7 @@ static unsigned long xol_get_insn_slot(struct uprobe *uprobe, unsigned long slot | |||
1222 | */ | 1239 | */ |
1223 | flush_dcache_page(area->page); | 1240 | flush_dcache_page(area->page); |
1224 | 1241 | ||
1225 | return current->utask->xol_vaddr; | 1242 | return xol_vaddr; |
1226 | } | 1243 | } |
1227 | 1244 | ||
1228 | /* | 1245 | /* |
@@ -1240,8 +1257,7 @@ static void xol_free_insn_slot(struct task_struct *tsk) | |||
1240 | return; | 1257 | return; |
1241 | 1258 | ||
1242 | slot_addr = tsk->utask->xol_vaddr; | 1259 | slot_addr = tsk->utask->xol_vaddr; |
1243 | 1260 | if (unlikely(!slot_addr)) | |
1244 | if (unlikely(!slot_addr || IS_ERR_VALUE(slot_addr))) | ||
1245 | return; | 1261 | return; |
1246 | 1262 | ||
1247 | area = tsk->mm->uprobes_state.xol_area; | 1263 | area = tsk->mm->uprobes_state.xol_area; |
@@ -1303,33 +1319,48 @@ void uprobe_copy_process(struct task_struct *t) | |||
1303 | } | 1319 | } |
1304 | 1320 | ||
1305 | /* | 1321 | /* |
1306 | * Allocate a uprobe_task object for the task. | 1322 | * Allocate a uprobe_task object for the task if if necessary. |
1307 | * Called when the thread hits a breakpoint for the first time. | 1323 | * Called when the thread hits a breakpoint. |
1308 | * | 1324 | * |
1309 | * Returns: | 1325 | * Returns: |
1310 | * - pointer to new uprobe_task on success | 1326 | * - pointer to new uprobe_task on success |
1311 | * - NULL otherwise | 1327 | * - NULL otherwise |
1312 | */ | 1328 | */ |
1313 | static struct uprobe_task *add_utask(void) | 1329 | static struct uprobe_task *get_utask(void) |
1314 | { | 1330 | { |
1315 | struct uprobe_task *utask; | 1331 | if (!current->utask) |
1316 | 1332 | current->utask = kzalloc(sizeof(struct uprobe_task), GFP_KERNEL); | |
1317 | utask = kzalloc(sizeof *utask, GFP_KERNEL); | 1333 | return current->utask; |
1318 | if (unlikely(!utask)) | ||
1319 | return NULL; | ||
1320 | |||
1321 | current->utask = utask; | ||
1322 | return utask; | ||
1323 | } | 1334 | } |
1324 | 1335 | ||
1325 | /* Prepare to single-step probed instruction out of line. */ | 1336 | /* Prepare to single-step probed instruction out of line. */ |
1326 | static int | 1337 | static int |
1327 | pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long vaddr) | 1338 | pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr) |
1328 | { | 1339 | { |
1329 | if (xol_get_insn_slot(uprobe, vaddr) && !arch_uprobe_pre_xol(&uprobe->arch, regs)) | 1340 | struct uprobe_task *utask; |
1330 | return 0; | 1341 | unsigned long xol_vaddr; |
1342 | int err; | ||
1343 | |||
1344 | utask = get_utask(); | ||
1345 | if (!utask) | ||
1346 | return -ENOMEM; | ||
1347 | |||
1348 | xol_vaddr = xol_get_insn_slot(uprobe); | ||
1349 | if (!xol_vaddr) | ||
1350 | return -ENOMEM; | ||
1351 | |||
1352 | utask->xol_vaddr = xol_vaddr; | ||
1353 | utask->vaddr = bp_vaddr; | ||
1354 | |||
1355 | err = arch_uprobe_pre_xol(&uprobe->arch, regs); | ||
1356 | if (unlikely(err)) { | ||
1357 | xol_free_insn_slot(current); | ||
1358 | return err; | ||
1359 | } | ||
1331 | 1360 | ||
1332 | return -EFAULT; | 1361 | utask->active_uprobe = uprobe; |
1362 | utask->state = UTASK_SSTEP; | ||
1363 | return 0; | ||
1333 | } | 1364 | } |
1334 | 1365 | ||
1335 | /* | 1366 | /* |
@@ -1391,6 +1422,7 @@ static void mmf_recalc_uprobes(struct mm_struct *mm) | |||
1391 | * This is not strictly accurate, we can race with | 1422 | * This is not strictly accurate, we can race with |
1392 | * uprobe_unregister() and see the already removed | 1423 | * uprobe_unregister() and see the already removed |
1393 | * uprobe if delete_uprobe() was not yet called. | 1424 | * uprobe if delete_uprobe() was not yet called. |
1425 | * Or this uprobe can be filtered out. | ||
1394 | */ | 1426 | */ |
1395 | if (vma_has_uprobes(vma, vma->vm_start, vma->vm_end)) | 1427 | if (vma_has_uprobes(vma, vma->vm_start, vma->vm_end)) |
1396 | return; | 1428 | return; |
@@ -1452,13 +1484,33 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) | |||
1452 | return uprobe; | 1484 | return uprobe; |
1453 | } | 1485 | } |
1454 | 1486 | ||
1487 | static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) | ||
1488 | { | ||
1489 | struct uprobe_consumer *uc; | ||
1490 | int remove = UPROBE_HANDLER_REMOVE; | ||
1491 | |||
1492 | down_read(&uprobe->register_rwsem); | ||
1493 | for (uc = uprobe->consumers; uc; uc = uc->next) { | ||
1494 | int rc = uc->handler(uc, regs); | ||
1495 | |||
1496 | WARN(rc & ~UPROBE_HANDLER_MASK, | ||
1497 | "bad rc=0x%x from %pf()\n", rc, uc->handler); | ||
1498 | remove &= rc; | ||
1499 | } | ||
1500 | |||
1501 | if (remove && uprobe->consumers) { | ||
1502 | WARN_ON(!uprobe_is_active(uprobe)); | ||
1503 | unapply_uprobe(uprobe, current->mm); | ||
1504 | } | ||
1505 | up_read(&uprobe->register_rwsem); | ||
1506 | } | ||
1507 | |||
1455 | /* | 1508 | /* |
1456 | * Run handler and ask thread to singlestep. | 1509 | * Run handler and ask thread to singlestep. |
1457 | * Ensure all non-fatal signals cannot interrupt thread while it singlesteps. | 1510 | * Ensure all non-fatal signals cannot interrupt thread while it singlesteps. |
1458 | */ | 1511 | */ |
1459 | static void handle_swbp(struct pt_regs *regs) | 1512 | static void handle_swbp(struct pt_regs *regs) |
1460 | { | 1513 | { |
1461 | struct uprobe_task *utask; | ||
1462 | struct uprobe *uprobe; | 1514 | struct uprobe *uprobe; |
1463 | unsigned long bp_vaddr; | 1515 | unsigned long bp_vaddr; |
1464 | int uninitialized_var(is_swbp); | 1516 | int uninitialized_var(is_swbp); |
@@ -1483,6 +1535,10 @@ static void handle_swbp(struct pt_regs *regs) | |||
1483 | } | 1535 | } |
1484 | return; | 1536 | return; |
1485 | } | 1537 | } |
1538 | |||
1539 | /* change it in advance for ->handler() and restart */ | ||
1540 | instruction_pointer_set(regs, bp_vaddr); | ||
1541 | |||
1486 | /* | 1542 | /* |
1487 | * TODO: move copy_insn/etc into _register and remove this hack. | 1543 | * TODO: move copy_insn/etc into _register and remove this hack. |
1488 | * After we hit the bp, _unregister + _register can install the | 1544 | * After we hit the bp, _unregister + _register can install the |
@@ -1490,32 +1546,16 @@ static void handle_swbp(struct pt_regs *regs) | |||
1490 | */ | 1546 | */ |
1491 | smp_rmb(); /* pairs with wmb() in install_breakpoint() */ | 1547 | smp_rmb(); /* pairs with wmb() in install_breakpoint() */ |
1492 | if (unlikely(!test_bit(UPROBE_COPY_INSN, &uprobe->flags))) | 1548 | if (unlikely(!test_bit(UPROBE_COPY_INSN, &uprobe->flags))) |
1493 | goto restart; | 1549 | goto out; |
1494 | |||
1495 | utask = current->utask; | ||
1496 | if (!utask) { | ||
1497 | utask = add_utask(); | ||
1498 | /* Cannot allocate; re-execute the instruction. */ | ||
1499 | if (!utask) | ||
1500 | goto restart; | ||
1501 | } | ||
1502 | 1550 | ||
1503 | handler_chain(uprobe, regs); | 1551 | handler_chain(uprobe, regs); |
1504 | if (can_skip_sstep(uprobe, regs)) | 1552 | if (can_skip_sstep(uprobe, regs)) |
1505 | goto out; | 1553 | goto out; |
1506 | 1554 | ||
1507 | if (!pre_ssout(uprobe, regs, bp_vaddr)) { | 1555 | if (!pre_ssout(uprobe, regs, bp_vaddr)) |
1508 | utask->active_uprobe = uprobe; | ||
1509 | utask->state = UTASK_SSTEP; | ||
1510 | return; | 1556 | return; |
1511 | } | ||
1512 | 1557 | ||
1513 | restart: | 1558 | /* can_skip_sstep() succeeded, or restart if can't singlestep */ |
1514 | /* | ||
1515 | * cannot singlestep; cannot skip instruction; | ||
1516 | * re-execute the instruction. | ||
1517 | */ | ||
1518 | instruction_pointer_set(regs, bp_vaddr); | ||
1519 | out: | 1559 | out: |
1520 | put_uprobe(uprobe); | 1560 | put_uprobe(uprobe); |
1521 | } | 1561 | } |
@@ -1609,10 +1649,8 @@ static int __init init_uprobes(void) | |||
1609 | { | 1649 | { |
1610 | int i; | 1650 | int i; |
1611 | 1651 | ||
1612 | for (i = 0; i < UPROBES_HASH_SZ; i++) { | 1652 | for (i = 0; i < UPROBES_HASH_SZ; i++) |
1613 | mutex_init(&uprobes_mutex[i]); | ||
1614 | mutex_init(&uprobes_mmap_mutex[i]); | 1653 | mutex_init(&uprobes_mmap_mutex[i]); |
1615 | } | ||
1616 | 1654 | ||
1617 | if (percpu_init_rwsem(&dup_mmap_sem)) | 1655 | if (percpu_init_rwsem(&dup_mmap_sem)) |
1618 | return -ENOMEM; | 1656 | return -ENOMEM; |
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 098f396aa409..f423c3ef4a82 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
@@ -919,7 +919,7 @@ static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p) | |||
919 | } | 919 | } |
920 | #endif /* CONFIG_OPTPROBES */ | 920 | #endif /* CONFIG_OPTPROBES */ |
921 | 921 | ||
922 | #ifdef KPROBES_CAN_USE_FTRACE | 922 | #ifdef CONFIG_KPROBES_ON_FTRACE |
923 | static struct ftrace_ops kprobe_ftrace_ops __read_mostly = { | 923 | static struct ftrace_ops kprobe_ftrace_ops __read_mostly = { |
924 | .func = kprobe_ftrace_handler, | 924 | .func = kprobe_ftrace_handler, |
925 | .flags = FTRACE_OPS_FL_SAVE_REGS, | 925 | .flags = FTRACE_OPS_FL_SAVE_REGS, |
@@ -964,7 +964,7 @@ static void __kprobes disarm_kprobe_ftrace(struct kprobe *p) | |||
964 | (unsigned long)p->addr, 1, 0); | 964 | (unsigned long)p->addr, 1, 0); |
965 | WARN(ret < 0, "Failed to disarm kprobe-ftrace at %p (%d)\n", p->addr, ret); | 965 | WARN(ret < 0, "Failed to disarm kprobe-ftrace at %p (%d)\n", p->addr, ret); |
966 | } | 966 | } |
967 | #else /* !KPROBES_CAN_USE_FTRACE */ | 967 | #else /* !CONFIG_KPROBES_ON_FTRACE */ |
968 | #define prepare_kprobe(p) arch_prepare_kprobe(p) | 968 | #define prepare_kprobe(p) arch_prepare_kprobe(p) |
969 | #define arm_kprobe_ftrace(p) do {} while (0) | 969 | #define arm_kprobe_ftrace(p) do {} while (0) |
970 | #define disarm_kprobe_ftrace(p) do {} while (0) | 970 | #define disarm_kprobe_ftrace(p) do {} while (0) |
@@ -1414,12 +1414,12 @@ static __kprobes int check_kprobe_address_safe(struct kprobe *p, | |||
1414 | */ | 1414 | */ |
1415 | ftrace_addr = ftrace_location((unsigned long)p->addr); | 1415 | ftrace_addr = ftrace_location((unsigned long)p->addr); |
1416 | if (ftrace_addr) { | 1416 | if (ftrace_addr) { |
1417 | #ifdef KPROBES_CAN_USE_FTRACE | 1417 | #ifdef CONFIG_KPROBES_ON_FTRACE |
1418 | /* Given address is not on the instruction boundary */ | 1418 | /* Given address is not on the instruction boundary */ |
1419 | if ((unsigned long)p->addr != ftrace_addr) | 1419 | if ((unsigned long)p->addr != ftrace_addr) |
1420 | return -EILSEQ; | 1420 | return -EILSEQ; |
1421 | p->flags |= KPROBE_FLAG_FTRACE; | 1421 | p->flags |= KPROBE_FLAG_FTRACE; |
1422 | #else /* !KPROBES_CAN_USE_FTRACE */ | 1422 | #else /* !CONFIG_KPROBES_ON_FTRACE */ |
1423 | return -EINVAL; | 1423 | return -EINVAL; |
1424 | #endif | 1424 | #endif |
1425 | } | 1425 | } |
diff --git a/kernel/profile.c b/kernel/profile.c index 1f391819c42f..dc3384ee874e 100644 --- a/kernel/profile.c +++ b/kernel/profile.c | |||
@@ -37,9 +37,6 @@ struct profile_hit { | |||
37 | #define NR_PROFILE_HIT (PAGE_SIZE/sizeof(struct profile_hit)) | 37 | #define NR_PROFILE_HIT (PAGE_SIZE/sizeof(struct profile_hit)) |
38 | #define NR_PROFILE_GRP (NR_PROFILE_HIT/PROFILE_GRPSZ) | 38 | #define NR_PROFILE_GRP (NR_PROFILE_HIT/PROFILE_GRPSZ) |
39 | 39 | ||
40 | /* Oprofile timer tick hook */ | ||
41 | static int (*timer_hook)(struct pt_regs *) __read_mostly; | ||
42 | |||
43 | static atomic_t *prof_buffer; | 40 | static atomic_t *prof_buffer; |
44 | static unsigned long prof_len, prof_shift; | 41 | static unsigned long prof_len, prof_shift; |
45 | 42 | ||
@@ -208,25 +205,6 @@ int profile_event_unregister(enum profile_type type, struct notifier_block *n) | |||
208 | } | 205 | } |
209 | EXPORT_SYMBOL_GPL(profile_event_unregister); | 206 | EXPORT_SYMBOL_GPL(profile_event_unregister); |
210 | 207 | ||
211 | int register_timer_hook(int (*hook)(struct pt_regs *)) | ||
212 | { | ||
213 | if (timer_hook) | ||
214 | return -EBUSY; | ||
215 | timer_hook = hook; | ||
216 | return 0; | ||
217 | } | ||
218 | EXPORT_SYMBOL_GPL(register_timer_hook); | ||
219 | |||
220 | void unregister_timer_hook(int (*hook)(struct pt_regs *)) | ||
221 | { | ||
222 | WARN_ON(hook != timer_hook); | ||
223 | timer_hook = NULL; | ||
224 | /* make sure all CPUs see the NULL hook */ | ||
225 | synchronize_sched(); /* Allow ongoing interrupts to complete. */ | ||
226 | } | ||
227 | EXPORT_SYMBOL_GPL(unregister_timer_hook); | ||
228 | |||
229 | |||
230 | #ifdef CONFIG_SMP | 208 | #ifdef CONFIG_SMP |
231 | /* | 209 | /* |
232 | * Each cpu has a pair of open-addressed hashtables for pending | 210 | * Each cpu has a pair of open-addressed hashtables for pending |
@@ -436,8 +414,6 @@ void profile_tick(int type) | |||
436 | { | 414 | { |
437 | struct pt_regs *regs = get_irq_regs(); | 415 | struct pt_regs *regs = get_irq_regs(); |
438 | 416 | ||
439 | if (type == CPU_PROFILING && timer_hook) | ||
440 | timer_hook(regs); | ||
441 | if (!user_mode(regs) && prof_cpu_mask != NULL && | 417 | if (!user_mode(regs) && prof_cpu_mask != NULL && |
442 | cpumask_test_cpu(smp_processor_id(), prof_cpu_mask)) | 418 | cpumask_test_cpu(smp_processor_id(), prof_cpu_mask)) |
443 | profile_hit(type, (void *)profile_pc(regs)); | 419 | profile_hit(type, (void *)profile_pc(regs)); |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 6cbeaae4406d..acbd28424d81 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -712,6 +712,12 @@ static int ptrace_regset(struct task_struct *task, int req, unsigned int type, | |||
712 | kiov->iov_len, kiov->iov_base); | 712 | kiov->iov_len, kiov->iov_base); |
713 | } | 713 | } |
714 | 714 | ||
715 | /* | ||
716 | * This is declared in linux/regset.h and defined in machine-dependent | ||
717 | * code. We put the export here, near the primary machine-neutral use, | ||
718 | * to ensure no machine forgets it. | ||
719 | */ | ||
720 | EXPORT_SYMBOL_GPL(task_user_regset_view); | ||
715 | #endif | 721 | #endif |
716 | 722 | ||
717 | int ptrace_request(struct task_struct *child, long request, | 723 | int ptrace_request(struct task_struct *child, long request, |
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 5d89335a485f..36567564e221 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig | |||
@@ -39,6 +39,9 @@ config HAVE_DYNAMIC_FTRACE | |||
39 | help | 39 | help |
40 | See Documentation/trace/ftrace-design.txt | 40 | See Documentation/trace/ftrace-design.txt |
41 | 41 | ||
42 | config HAVE_DYNAMIC_FTRACE_WITH_REGS | ||
43 | bool | ||
44 | |||
42 | config HAVE_FTRACE_MCOUNT_RECORD | 45 | config HAVE_FTRACE_MCOUNT_RECORD |
43 | bool | 46 | bool |
44 | help | 47 | help |
@@ -250,6 +253,16 @@ config FTRACE_SYSCALLS | |||
250 | help | 253 | help |
251 | Basic tracer to catch the syscall entry and exit events. | 254 | Basic tracer to catch the syscall entry and exit events. |
252 | 255 | ||
256 | config TRACER_SNAPSHOT | ||
257 | bool "Create a snapshot trace buffer" | ||
258 | select TRACER_MAX_TRACE | ||
259 | help | ||
260 | Allow tracing users to take snapshot of the current buffer using the | ||
261 | ftrace interface, e.g.: | ||
262 | |||
263 | echo 1 > /sys/kernel/debug/tracing/snapshot | ||
264 | cat snapshot | ||
265 | |||
253 | config TRACE_BRANCH_PROFILING | 266 | config TRACE_BRANCH_PROFILING |
254 | bool | 267 | bool |
255 | select GENERIC_TRACER | 268 | select GENERIC_TRACER |
@@ -434,6 +447,11 @@ config DYNAMIC_FTRACE | |||
434 | were made. If so, it runs stop_machine (stops all CPUS) | 447 | were made. If so, it runs stop_machine (stops all CPUS) |
435 | and modifies the code to jump over the call to ftrace. | 448 | and modifies the code to jump over the call to ftrace. |
436 | 449 | ||
450 | config DYNAMIC_FTRACE_WITH_REGS | ||
451 | def_bool y | ||
452 | depends on DYNAMIC_FTRACE | ||
453 | depends on HAVE_DYNAMIC_FTRACE_WITH_REGS | ||
454 | |||
437 | config FUNCTION_PROFILER | 455 | config FUNCTION_PROFILER |
438 | bool "Kernel function profiler" | 456 | bool "Kernel function profiler" |
439 | depends on FUNCTION_TRACER | 457 | depends on FUNCTION_TRACER |
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index c0bd0308741c..71259e2b6b61 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c | |||
@@ -147,7 +147,7 @@ void __trace_note_message(struct blk_trace *bt, const char *fmt, ...) | |||
147 | return; | 147 | return; |
148 | 148 | ||
149 | local_irq_save(flags); | 149 | local_irq_save(flags); |
150 | buf = per_cpu_ptr(bt->msg_data, smp_processor_id()); | 150 | buf = this_cpu_ptr(bt->msg_data); |
151 | va_start(args, fmt); | 151 | va_start(args, fmt); |
152 | n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args); | 152 | n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args); |
153 | va_end(args); | 153 | va_end(args); |
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 41473b4ad7a4..ce8c3d68292f 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
@@ -111,6 +111,26 @@ static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip); | |||
111 | #define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops) | 111 | #define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops) |
112 | #endif | 112 | #endif |
113 | 113 | ||
114 | /* | ||
115 | * Traverse the ftrace_global_list, invoking all entries. The reason that we | ||
116 | * can use rcu_dereference_raw() is that elements removed from this list | ||
117 | * are simply leaked, so there is no need to interact with a grace-period | ||
118 | * mechanism. The rcu_dereference_raw() calls are needed to handle | ||
119 | * concurrent insertions into the ftrace_global_list. | ||
120 | * | ||
121 | * Silly Alpha and silly pointer-speculation compiler optimizations! | ||
122 | */ | ||
123 | #define do_for_each_ftrace_op(op, list) \ | ||
124 | op = rcu_dereference_raw(list); \ | ||
125 | do | ||
126 | |||
127 | /* | ||
128 | * Optimized for just a single item in the list (as that is the normal case). | ||
129 | */ | ||
130 | #define while_for_each_ftrace_op(op) \ | ||
131 | while (likely(op = rcu_dereference_raw((op)->next)) && \ | ||
132 | unlikely((op) != &ftrace_list_end)) | ||
133 | |||
114 | /** | 134 | /** |
115 | * ftrace_nr_registered_ops - return number of ops registered | 135 | * ftrace_nr_registered_ops - return number of ops registered |
116 | * | 136 | * |
@@ -132,29 +152,21 @@ int ftrace_nr_registered_ops(void) | |||
132 | return cnt; | 152 | return cnt; |
133 | } | 153 | } |
134 | 154 | ||
135 | /* | ||
136 | * Traverse the ftrace_global_list, invoking all entries. The reason that we | ||
137 | * can use rcu_dereference_raw() is that elements removed from this list | ||
138 | * are simply leaked, so there is no need to interact with a grace-period | ||
139 | * mechanism. The rcu_dereference_raw() calls are needed to handle | ||
140 | * concurrent insertions into the ftrace_global_list. | ||
141 | * | ||
142 | * Silly Alpha and silly pointer-speculation compiler optimizations! | ||
143 | */ | ||
144 | static void | 155 | static void |
145 | ftrace_global_list_func(unsigned long ip, unsigned long parent_ip, | 156 | ftrace_global_list_func(unsigned long ip, unsigned long parent_ip, |
146 | struct ftrace_ops *op, struct pt_regs *regs) | 157 | struct ftrace_ops *op, struct pt_regs *regs) |
147 | { | 158 | { |
148 | if (unlikely(trace_recursion_test(TRACE_GLOBAL_BIT))) | 159 | int bit; |
160 | |||
161 | bit = trace_test_and_set_recursion(TRACE_GLOBAL_START, TRACE_GLOBAL_MAX); | ||
162 | if (bit < 0) | ||
149 | return; | 163 | return; |
150 | 164 | ||
151 | trace_recursion_set(TRACE_GLOBAL_BIT); | 165 | do_for_each_ftrace_op(op, ftrace_global_list) { |
152 | op = rcu_dereference_raw(ftrace_global_list); /*see above*/ | ||
153 | while (op != &ftrace_list_end) { | ||
154 | op->func(ip, parent_ip, op, regs); | 166 | op->func(ip, parent_ip, op, regs); |
155 | op = rcu_dereference_raw(op->next); /*see above*/ | 167 | } while_for_each_ftrace_op(op); |
156 | }; | 168 | |
157 | trace_recursion_clear(TRACE_GLOBAL_BIT); | 169 | trace_clear_recursion(bit); |
158 | } | 170 | } |
159 | 171 | ||
160 | static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip, | 172 | static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip, |
@@ -221,10 +233,24 @@ static void update_global_ops(void) | |||
221 | * registered callers. | 233 | * registered callers. |
222 | */ | 234 | */ |
223 | if (ftrace_global_list == &ftrace_list_end || | 235 | if (ftrace_global_list == &ftrace_list_end || |
224 | ftrace_global_list->next == &ftrace_list_end) | 236 | ftrace_global_list->next == &ftrace_list_end) { |
225 | func = ftrace_global_list->func; | 237 | func = ftrace_global_list->func; |
226 | else | 238 | /* |
239 | * As we are calling the function directly. | ||
240 | * If it does not have recursion protection, | ||
241 | * the function_trace_op needs to be updated | ||
242 | * accordingly. | ||
243 | */ | ||
244 | if (ftrace_global_list->flags & FTRACE_OPS_FL_RECURSION_SAFE) | ||
245 | global_ops.flags |= FTRACE_OPS_FL_RECURSION_SAFE; | ||
246 | else | ||
247 | global_ops.flags &= ~FTRACE_OPS_FL_RECURSION_SAFE; | ||
248 | } else { | ||
227 | func = ftrace_global_list_func; | 249 | func = ftrace_global_list_func; |
250 | /* The list has its own recursion protection. */ | ||
251 | global_ops.flags |= FTRACE_OPS_FL_RECURSION_SAFE; | ||
252 | } | ||
253 | |||
228 | 254 | ||
229 | /* If we filter on pids, update to use the pid function */ | 255 | /* If we filter on pids, update to use the pid function */ |
230 | if (!list_empty(&ftrace_pids)) { | 256 | if (!list_empty(&ftrace_pids)) { |
@@ -337,7 +363,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops) | |||
337 | if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK) | 363 | if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK) |
338 | return -EINVAL; | 364 | return -EINVAL; |
339 | 365 | ||
340 | #ifndef ARCH_SUPPORTS_FTRACE_SAVE_REGS | 366 | #ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS |
341 | /* | 367 | /* |
342 | * If the ftrace_ops specifies SAVE_REGS, then it only can be used | 368 | * If the ftrace_ops specifies SAVE_REGS, then it only can be used |
343 | * if the arch supports it, or SAVE_REGS_IF_SUPPORTED is also set. | 369 | * if the arch supports it, or SAVE_REGS_IF_SUPPORTED is also set. |
@@ -4090,14 +4116,11 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip, | |||
4090 | */ | 4116 | */ |
4091 | preempt_disable_notrace(); | 4117 | preempt_disable_notrace(); |
4092 | trace_recursion_set(TRACE_CONTROL_BIT); | 4118 | trace_recursion_set(TRACE_CONTROL_BIT); |
4093 | op = rcu_dereference_raw(ftrace_control_list); | 4119 | do_for_each_ftrace_op(op, ftrace_control_list) { |
4094 | while (op != &ftrace_list_end) { | ||
4095 | if (!ftrace_function_local_disabled(op) && | 4120 | if (!ftrace_function_local_disabled(op) && |
4096 | ftrace_ops_test(op, ip)) | 4121 | ftrace_ops_test(op, ip)) |
4097 | op->func(ip, parent_ip, op, regs); | 4122 | op->func(ip, parent_ip, op, regs); |
4098 | 4123 | } while_for_each_ftrace_op(op); | |
4099 | op = rcu_dereference_raw(op->next); | ||
4100 | }; | ||
4101 | trace_recursion_clear(TRACE_CONTROL_BIT); | 4124 | trace_recursion_clear(TRACE_CONTROL_BIT); |
4102 | preempt_enable_notrace(); | 4125 | preempt_enable_notrace(); |
4103 | } | 4126 | } |
@@ -4112,27 +4135,26 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, | |||
4112 | struct ftrace_ops *ignored, struct pt_regs *regs) | 4135 | struct ftrace_ops *ignored, struct pt_regs *regs) |
4113 | { | 4136 | { |
4114 | struct ftrace_ops *op; | 4137 | struct ftrace_ops *op; |
4138 | int bit; | ||
4115 | 4139 | ||
4116 | if (function_trace_stop) | 4140 | if (function_trace_stop) |
4117 | return; | 4141 | return; |
4118 | 4142 | ||
4119 | if (unlikely(trace_recursion_test(TRACE_INTERNAL_BIT))) | 4143 | bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX); |
4144 | if (bit < 0) | ||
4120 | return; | 4145 | return; |
4121 | 4146 | ||
4122 | trace_recursion_set(TRACE_INTERNAL_BIT); | ||
4123 | /* | 4147 | /* |
4124 | * Some of the ops may be dynamically allocated, | 4148 | * Some of the ops may be dynamically allocated, |
4125 | * they must be freed after a synchronize_sched(). | 4149 | * they must be freed after a synchronize_sched(). |
4126 | */ | 4150 | */ |
4127 | preempt_disable_notrace(); | 4151 | preempt_disable_notrace(); |
4128 | op = rcu_dereference_raw(ftrace_ops_list); | 4152 | do_for_each_ftrace_op(op, ftrace_ops_list) { |
4129 | while (op != &ftrace_list_end) { | ||
4130 | if (ftrace_ops_test(op, ip)) | 4153 | if (ftrace_ops_test(op, ip)) |
4131 | op->func(ip, parent_ip, op, regs); | 4154 | op->func(ip, parent_ip, op, regs); |
4132 | op = rcu_dereference_raw(op->next); | 4155 | } while_for_each_ftrace_op(op); |
4133 | }; | ||
4134 | preempt_enable_notrace(); | 4156 | preempt_enable_notrace(); |
4135 | trace_recursion_clear(TRACE_INTERNAL_BIT); | 4157 | trace_clear_recursion(bit); |
4136 | } | 4158 | } |
4137 | 4159 | ||
4138 | /* | 4160 | /* |
@@ -4143,8 +4165,8 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, | |||
4143 | * Archs are to support both the regs and ftrace_ops at the same time. | 4165 | * Archs are to support both the regs and ftrace_ops at the same time. |
4144 | * If they support ftrace_ops, it is assumed they support regs. | 4166 | * If they support ftrace_ops, it is assumed they support regs. |
4145 | * If call backs want to use regs, they must either check for regs | 4167 | * If call backs want to use regs, they must either check for regs |
4146 | * being NULL, or ARCH_SUPPORTS_FTRACE_SAVE_REGS. | 4168 | * being NULL, or CONFIG_DYNAMIC_FTRACE_WITH_REGS. |
4147 | * Note, ARCH_SUPPORT_SAVE_REGS expects a full regs to be saved. | 4169 | * Note, CONFIG_DYNAMIC_FTRACE_WITH_REGS expects a full regs to be saved. |
4148 | * An architecture can pass partial regs with ftrace_ops and still | 4170 | * An architecture can pass partial regs with ftrace_ops and still |
4149 | * set the ARCH_SUPPORT_FTARCE_OPS. | 4171 | * set the ARCH_SUPPORT_FTARCE_OPS. |
4150 | */ | 4172 | */ |
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index ce8514feedcd..7244acde77b0 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -3,8 +3,10 @@ | |||
3 | * | 3 | * |
4 | * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> | 4 | * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> |
5 | */ | 5 | */ |
6 | #include <linux/ftrace_event.h> | ||
6 | #include <linux/ring_buffer.h> | 7 | #include <linux/ring_buffer.h> |
7 | #include <linux/trace_clock.h> | 8 | #include <linux/trace_clock.h> |
9 | #include <linux/trace_seq.h> | ||
8 | #include <linux/spinlock.h> | 10 | #include <linux/spinlock.h> |
9 | #include <linux/debugfs.h> | 11 | #include <linux/debugfs.h> |
10 | #include <linux/uaccess.h> | 12 | #include <linux/uaccess.h> |
@@ -21,7 +23,6 @@ | |||
21 | #include <linux/fs.h> | 23 | #include <linux/fs.h> |
22 | 24 | ||
23 | #include <asm/local.h> | 25 | #include <asm/local.h> |
24 | #include "trace.h" | ||
25 | 26 | ||
26 | static void update_pages_handler(struct work_struct *work); | 27 | static void update_pages_handler(struct work_struct *work); |
27 | 28 | ||
@@ -2432,41 +2433,76 @@ rb_reserve_next_event(struct ring_buffer *buffer, | |||
2432 | 2433 | ||
2433 | #ifdef CONFIG_TRACING | 2434 | #ifdef CONFIG_TRACING |
2434 | 2435 | ||
2435 | #define TRACE_RECURSIVE_DEPTH 16 | 2436 | /* |
2437 | * The lock and unlock are done within a preempt disable section. | ||
2438 | * The current_context per_cpu variable can only be modified | ||
2439 | * by the current task between lock and unlock. But it can | ||
2440 | * be modified more than once via an interrupt. To pass this | ||
2441 | * information from the lock to the unlock without having to | ||
2442 | * access the 'in_interrupt()' functions again (which do show | ||
2443 | * a bit of overhead in something as critical as function tracing, | ||
2444 | * we use a bitmask trick. | ||
2445 | * | ||
2446 | * bit 0 = NMI context | ||
2447 | * bit 1 = IRQ context | ||
2448 | * bit 2 = SoftIRQ context | ||
2449 | * bit 3 = normal context. | ||
2450 | * | ||
2451 | * This works because this is the order of contexts that can | ||
2452 | * preempt other contexts. A SoftIRQ never preempts an IRQ | ||
2453 | * context. | ||
2454 | * | ||
2455 | * When the context is determined, the corresponding bit is | ||
2456 | * checked and set (if it was set, then a recursion of that context | ||
2457 | * happened). | ||
2458 | * | ||
2459 | * On unlock, we need to clear this bit. To do so, just subtract | ||
2460 | * 1 from the current_context and AND it to itself. | ||
2461 | * | ||
2462 | * (binary) | ||
2463 | * 101 - 1 = 100 | ||
2464 | * 101 & 100 = 100 (clearing bit zero) | ||
2465 | * | ||
2466 | * 1010 - 1 = 1001 | ||
2467 | * 1010 & 1001 = 1000 (clearing bit 1) | ||
2468 | * | ||
2469 | * The least significant bit can be cleared this way, and it | ||
2470 | * just so happens that it is the same bit corresponding to | ||
2471 | * the current context. | ||
2472 | */ | ||
2473 | static DEFINE_PER_CPU(unsigned int, current_context); | ||
2436 | 2474 | ||
2437 | /* Keep this code out of the fast path cache */ | 2475 | static __always_inline int trace_recursive_lock(void) |
2438 | static noinline void trace_recursive_fail(void) | ||
2439 | { | 2476 | { |
2440 | /* Disable all tracing before we do anything else */ | 2477 | unsigned int val = this_cpu_read(current_context); |
2441 | tracing_off_permanent(); | 2478 | int bit; |
2442 | |||
2443 | printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:" | ||
2444 | "HC[%lu]:SC[%lu]:NMI[%lu]\n", | ||
2445 | trace_recursion_buffer(), | ||
2446 | hardirq_count() >> HARDIRQ_SHIFT, | ||
2447 | softirq_count() >> SOFTIRQ_SHIFT, | ||
2448 | in_nmi()); | ||
2449 | |||
2450 | WARN_ON_ONCE(1); | ||
2451 | } | ||
2452 | 2479 | ||
2453 | static inline int trace_recursive_lock(void) | 2480 | if (in_interrupt()) { |
2454 | { | 2481 | if (in_nmi()) |
2455 | trace_recursion_inc(); | 2482 | bit = 0; |
2483 | else if (in_irq()) | ||
2484 | bit = 1; | ||
2485 | else | ||
2486 | bit = 2; | ||
2487 | } else | ||
2488 | bit = 3; | ||
2456 | 2489 | ||
2457 | if (likely(trace_recursion_buffer() < TRACE_RECURSIVE_DEPTH)) | 2490 | if (unlikely(val & (1 << bit))) |
2458 | return 0; | 2491 | return 1; |
2459 | 2492 | ||
2460 | trace_recursive_fail(); | 2493 | val |= (1 << bit); |
2494 | this_cpu_write(current_context, val); | ||
2461 | 2495 | ||
2462 | return -1; | 2496 | return 0; |
2463 | } | 2497 | } |
2464 | 2498 | ||
2465 | static inline void trace_recursive_unlock(void) | 2499 | static __always_inline void trace_recursive_unlock(void) |
2466 | { | 2500 | { |
2467 | WARN_ON_ONCE(!trace_recursion_buffer()); | 2501 | unsigned int val = this_cpu_read(current_context); |
2468 | 2502 | ||
2469 | trace_recursion_dec(); | 2503 | val--; |
2504 | val &= this_cpu_read(current_context); | ||
2505 | this_cpu_write(current_context, val); | ||
2470 | } | 2506 | } |
2471 | 2507 | ||
2472 | #else | 2508 | #else |
@@ -3067,6 +3103,24 @@ ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu) | |||
3067 | EXPORT_SYMBOL_GPL(ring_buffer_dropped_events_cpu); | 3103 | EXPORT_SYMBOL_GPL(ring_buffer_dropped_events_cpu); |
3068 | 3104 | ||
3069 | /** | 3105 | /** |
3106 | * ring_buffer_read_events_cpu - get the number of events successfully read | ||
3107 | * @buffer: The ring buffer | ||
3108 | * @cpu: The per CPU buffer to get the number of events read | ||
3109 | */ | ||
3110 | unsigned long | ||
3111 | ring_buffer_read_events_cpu(struct ring_buffer *buffer, int cpu) | ||
3112 | { | ||
3113 | struct ring_buffer_per_cpu *cpu_buffer; | ||
3114 | |||
3115 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | ||
3116 | return 0; | ||
3117 | |||
3118 | cpu_buffer = buffer->buffers[cpu]; | ||
3119 | return cpu_buffer->read; | ||
3120 | } | ||
3121 | EXPORT_SYMBOL_GPL(ring_buffer_read_events_cpu); | ||
3122 | |||
3123 | /** | ||
3070 | * ring_buffer_entries - get the number of entries in a buffer | 3124 | * ring_buffer_entries - get the number of entries in a buffer |
3071 | * @buffer: The ring buffer | 3125 | * @buffer: The ring buffer |
3072 | * | 3126 | * |
@@ -3425,7 +3479,7 @@ static void rb_advance_iter(struct ring_buffer_iter *iter) | |||
3425 | /* check for end of page padding */ | 3479 | /* check for end of page padding */ |
3426 | if ((iter->head >= rb_page_size(iter->head_page)) && | 3480 | if ((iter->head >= rb_page_size(iter->head_page)) && |
3427 | (iter->head_page != cpu_buffer->commit_page)) | 3481 | (iter->head_page != cpu_buffer->commit_page)) |
3428 | rb_advance_iter(iter); | 3482 | rb_inc_iter(iter); |
3429 | } | 3483 | } |
3430 | 3484 | ||
3431 | static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer) | 3485 | static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer) |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3c13e46d7d24..5d520b7bb4c5 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -249,7 +249,7 @@ static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT; | |||
249 | static struct tracer *trace_types __read_mostly; | 249 | static struct tracer *trace_types __read_mostly; |
250 | 250 | ||
251 | /* current_trace points to the tracer that is currently active */ | 251 | /* current_trace points to the tracer that is currently active */ |
252 | static struct tracer *current_trace __read_mostly; | 252 | static struct tracer *current_trace __read_mostly = &nop_trace; |
253 | 253 | ||
254 | /* | 254 | /* |
255 | * trace_types_lock is used to protect the trace_types list. | 255 | * trace_types_lock is used to protect the trace_types list. |
@@ -709,10 +709,13 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) | |||
709 | return; | 709 | return; |
710 | 710 | ||
711 | WARN_ON_ONCE(!irqs_disabled()); | 711 | WARN_ON_ONCE(!irqs_disabled()); |
712 | if (!current_trace->use_max_tr) { | 712 | |
713 | WARN_ON_ONCE(1); | 713 | if (!current_trace->allocated_snapshot) { |
714 | /* Only the nop tracer should hit this when disabling */ | ||
715 | WARN_ON_ONCE(current_trace != &nop_trace); | ||
714 | return; | 716 | return; |
715 | } | 717 | } |
718 | |||
716 | arch_spin_lock(&ftrace_max_lock); | 719 | arch_spin_lock(&ftrace_max_lock); |
717 | 720 | ||
718 | tr->buffer = max_tr.buffer; | 721 | tr->buffer = max_tr.buffer; |
@@ -739,10 +742,8 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) | |||
739 | return; | 742 | return; |
740 | 743 | ||
741 | WARN_ON_ONCE(!irqs_disabled()); | 744 | WARN_ON_ONCE(!irqs_disabled()); |
742 | if (!current_trace->use_max_tr) { | 745 | if (WARN_ON_ONCE(!current_trace->allocated_snapshot)) |
743 | WARN_ON_ONCE(1); | ||
744 | return; | 746 | return; |
745 | } | ||
746 | 747 | ||
747 | arch_spin_lock(&ftrace_max_lock); | 748 | arch_spin_lock(&ftrace_max_lock); |
748 | 749 | ||
@@ -862,10 +863,13 @@ int register_tracer(struct tracer *type) | |||
862 | 863 | ||
863 | current_trace = type; | 864 | current_trace = type; |
864 | 865 | ||
865 | /* If we expanded the buffers, make sure the max is expanded too */ | 866 | if (type->use_max_tr) { |
866 | if (ring_buffer_expanded && type->use_max_tr) | 867 | /* If we expanded the buffers, make sure the max is expanded too */ |
867 | ring_buffer_resize(max_tr.buffer, trace_buf_size, | 868 | if (ring_buffer_expanded) |
868 | RING_BUFFER_ALL_CPUS); | 869 | ring_buffer_resize(max_tr.buffer, trace_buf_size, |
870 | RING_BUFFER_ALL_CPUS); | ||
871 | type->allocated_snapshot = true; | ||
872 | } | ||
869 | 873 | ||
870 | /* the test is responsible for initializing and enabling */ | 874 | /* the test is responsible for initializing and enabling */ |
871 | pr_info("Testing tracer %s: ", type->name); | 875 | pr_info("Testing tracer %s: ", type->name); |
@@ -881,10 +885,14 @@ int register_tracer(struct tracer *type) | |||
881 | /* Only reset on passing, to avoid touching corrupted buffers */ | 885 | /* Only reset on passing, to avoid touching corrupted buffers */ |
882 | tracing_reset_online_cpus(tr); | 886 | tracing_reset_online_cpus(tr); |
883 | 887 | ||
884 | /* Shrink the max buffer again */ | 888 | if (type->use_max_tr) { |
885 | if (ring_buffer_expanded && type->use_max_tr) | 889 | type->allocated_snapshot = false; |
886 | ring_buffer_resize(max_tr.buffer, 1, | 890 | |
887 | RING_BUFFER_ALL_CPUS); | 891 | /* Shrink the max buffer again */ |
892 | if (ring_buffer_expanded) | ||
893 | ring_buffer_resize(max_tr.buffer, 1, | ||
894 | RING_BUFFER_ALL_CPUS); | ||
895 | } | ||
888 | 896 | ||
889 | printk(KERN_CONT "PASSED\n"); | 897 | printk(KERN_CONT "PASSED\n"); |
890 | } | 898 | } |
@@ -922,6 +930,9 @@ void tracing_reset(struct trace_array *tr, int cpu) | |||
922 | { | 930 | { |
923 | struct ring_buffer *buffer = tr->buffer; | 931 | struct ring_buffer *buffer = tr->buffer; |
924 | 932 | ||
933 | if (!buffer) | ||
934 | return; | ||
935 | |||
925 | ring_buffer_record_disable(buffer); | 936 | ring_buffer_record_disable(buffer); |
926 | 937 | ||
927 | /* Make sure all commits have finished */ | 938 | /* Make sure all commits have finished */ |
@@ -936,6 +947,9 @@ void tracing_reset_online_cpus(struct trace_array *tr) | |||
936 | struct ring_buffer *buffer = tr->buffer; | 947 | struct ring_buffer *buffer = tr->buffer; |
937 | int cpu; | 948 | int cpu; |
938 | 949 | ||
950 | if (!buffer) | ||
951 | return; | ||
952 | |||
939 | ring_buffer_record_disable(buffer); | 953 | ring_buffer_record_disable(buffer); |
940 | 954 | ||
941 | /* Make sure all commits have finished */ | 955 | /* Make sure all commits have finished */ |
@@ -1167,7 +1181,6 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, | |||
1167 | 1181 | ||
1168 | entry->preempt_count = pc & 0xff; | 1182 | entry->preempt_count = pc & 0xff; |
1169 | entry->pid = (tsk) ? tsk->pid : 0; | 1183 | entry->pid = (tsk) ? tsk->pid : 0; |
1170 | entry->padding = 0; | ||
1171 | entry->flags = | 1184 | entry->flags = |
1172 | #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT | 1185 | #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT |
1173 | (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | | 1186 | (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | |
@@ -1335,7 +1348,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer, | |||
1335 | */ | 1348 | */ |
1336 | preempt_disable_notrace(); | 1349 | preempt_disable_notrace(); |
1337 | 1350 | ||
1338 | use_stack = ++__get_cpu_var(ftrace_stack_reserve); | 1351 | use_stack = __this_cpu_inc_return(ftrace_stack_reserve); |
1339 | /* | 1352 | /* |
1340 | * We don't need any atomic variables, just a barrier. | 1353 | * We don't need any atomic variables, just a barrier. |
1341 | * If an interrupt comes in, we don't care, because it would | 1354 | * If an interrupt comes in, we don't care, because it would |
@@ -1389,7 +1402,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer, | |||
1389 | out: | 1402 | out: |
1390 | /* Again, don't let gcc optimize things here */ | 1403 | /* Again, don't let gcc optimize things here */ |
1391 | barrier(); | 1404 | barrier(); |
1392 | __get_cpu_var(ftrace_stack_reserve)--; | 1405 | __this_cpu_dec(ftrace_stack_reserve); |
1393 | preempt_enable_notrace(); | 1406 | preempt_enable_notrace(); |
1394 | 1407 | ||
1395 | } | 1408 | } |
@@ -1517,7 +1530,6 @@ static struct trace_buffer_struct *trace_percpu_nmi_buffer; | |||
1517 | static char *get_trace_buf(void) | 1530 | static char *get_trace_buf(void) |
1518 | { | 1531 | { |
1519 | struct trace_buffer_struct *percpu_buffer; | 1532 | struct trace_buffer_struct *percpu_buffer; |
1520 | struct trace_buffer_struct *buffer; | ||
1521 | 1533 | ||
1522 | /* | 1534 | /* |
1523 | * If we have allocated per cpu buffers, then we do not | 1535 | * If we have allocated per cpu buffers, then we do not |
@@ -1535,9 +1547,7 @@ static char *get_trace_buf(void) | |||
1535 | if (!percpu_buffer) | 1547 | if (!percpu_buffer) |
1536 | return NULL; | 1548 | return NULL; |
1537 | 1549 | ||
1538 | buffer = per_cpu_ptr(percpu_buffer, smp_processor_id()); | 1550 | return this_cpu_ptr(&percpu_buffer->buffer[0]); |
1539 | |||
1540 | return buffer->buffer; | ||
1541 | } | 1551 | } |
1542 | 1552 | ||
1543 | static int alloc_percpu_trace_buffer(void) | 1553 | static int alloc_percpu_trace_buffer(void) |
@@ -1942,21 +1952,27 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu) | |||
1942 | static void *s_start(struct seq_file *m, loff_t *pos) | 1952 | static void *s_start(struct seq_file *m, loff_t *pos) |
1943 | { | 1953 | { |
1944 | struct trace_iterator *iter = m->private; | 1954 | struct trace_iterator *iter = m->private; |
1945 | static struct tracer *old_tracer; | ||
1946 | int cpu_file = iter->cpu_file; | 1955 | int cpu_file = iter->cpu_file; |
1947 | void *p = NULL; | 1956 | void *p = NULL; |
1948 | loff_t l = 0; | 1957 | loff_t l = 0; |
1949 | int cpu; | 1958 | int cpu; |
1950 | 1959 | ||
1951 | /* copy the tracer to avoid using a global lock all around */ | 1960 | /* |
1961 | * copy the tracer to avoid using a global lock all around. | ||
1962 | * iter->trace is a copy of current_trace, the pointer to the | ||
1963 | * name may be used instead of a strcmp(), as iter->trace->name | ||
1964 | * will point to the same string as current_trace->name. | ||
1965 | */ | ||
1952 | mutex_lock(&trace_types_lock); | 1966 | mutex_lock(&trace_types_lock); |
1953 | if (unlikely(old_tracer != current_trace && current_trace)) { | 1967 | if (unlikely(current_trace && iter->trace->name != current_trace->name)) |
1954 | old_tracer = current_trace; | ||
1955 | *iter->trace = *current_trace; | 1968 | *iter->trace = *current_trace; |
1956 | } | ||
1957 | mutex_unlock(&trace_types_lock); | 1969 | mutex_unlock(&trace_types_lock); |
1958 | 1970 | ||
1959 | atomic_inc(&trace_record_cmdline_disabled); | 1971 | if (iter->snapshot && iter->trace->use_max_tr) |
1972 | return ERR_PTR(-EBUSY); | ||
1973 | |||
1974 | if (!iter->snapshot) | ||
1975 | atomic_inc(&trace_record_cmdline_disabled); | ||
1960 | 1976 | ||
1961 | if (*pos != iter->pos) { | 1977 | if (*pos != iter->pos) { |
1962 | iter->ent = NULL; | 1978 | iter->ent = NULL; |
@@ -1995,7 +2011,11 @@ static void s_stop(struct seq_file *m, void *p) | |||
1995 | { | 2011 | { |
1996 | struct trace_iterator *iter = m->private; | 2012 | struct trace_iterator *iter = m->private; |
1997 | 2013 | ||
1998 | atomic_dec(&trace_record_cmdline_disabled); | 2014 | if (iter->snapshot && iter->trace->use_max_tr) |
2015 | return; | ||
2016 | |||
2017 | if (!iter->snapshot) | ||
2018 | atomic_dec(&trace_record_cmdline_disabled); | ||
1999 | trace_access_unlock(iter->cpu_file); | 2019 | trace_access_unlock(iter->cpu_file); |
2000 | trace_event_read_unlock(); | 2020 | trace_event_read_unlock(); |
2001 | } | 2021 | } |
@@ -2080,8 +2100,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) | |||
2080 | unsigned long total; | 2100 | unsigned long total; |
2081 | const char *name = "preemption"; | 2101 | const char *name = "preemption"; |
2082 | 2102 | ||
2083 | if (type) | 2103 | name = type->name; |
2084 | name = type->name; | ||
2085 | 2104 | ||
2086 | get_total_entries(tr, &total, &entries); | 2105 | get_total_entries(tr, &total, &entries); |
2087 | 2106 | ||
@@ -2430,7 +2449,7 @@ static const struct seq_operations tracer_seq_ops = { | |||
2430 | }; | 2449 | }; |
2431 | 2450 | ||
2432 | static struct trace_iterator * | 2451 | static struct trace_iterator * |
2433 | __tracing_open(struct inode *inode, struct file *file) | 2452 | __tracing_open(struct inode *inode, struct file *file, bool snapshot) |
2434 | { | 2453 | { |
2435 | long cpu_file = (long) inode->i_private; | 2454 | long cpu_file = (long) inode->i_private; |
2436 | struct trace_iterator *iter; | 2455 | struct trace_iterator *iter; |
@@ -2457,16 +2476,16 @@ __tracing_open(struct inode *inode, struct file *file) | |||
2457 | if (!iter->trace) | 2476 | if (!iter->trace) |
2458 | goto fail; | 2477 | goto fail; |
2459 | 2478 | ||
2460 | if (current_trace) | 2479 | *iter->trace = *current_trace; |
2461 | *iter->trace = *current_trace; | ||
2462 | 2480 | ||
2463 | if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL)) | 2481 | if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL)) |
2464 | goto fail; | 2482 | goto fail; |
2465 | 2483 | ||
2466 | if (current_trace && current_trace->print_max) | 2484 | if (current_trace->print_max || snapshot) |
2467 | iter->tr = &max_tr; | 2485 | iter->tr = &max_tr; |
2468 | else | 2486 | else |
2469 | iter->tr = &global_trace; | 2487 | iter->tr = &global_trace; |
2488 | iter->snapshot = snapshot; | ||
2470 | iter->pos = -1; | 2489 | iter->pos = -1; |
2471 | mutex_init(&iter->mutex); | 2490 | mutex_init(&iter->mutex); |
2472 | iter->cpu_file = cpu_file; | 2491 | iter->cpu_file = cpu_file; |
@@ -2483,8 +2502,9 @@ __tracing_open(struct inode *inode, struct file *file) | |||
2483 | if (trace_clocks[trace_clock_id].in_ns) | 2502 | if (trace_clocks[trace_clock_id].in_ns) |
2484 | iter->iter_flags |= TRACE_FILE_TIME_IN_NS; | 2503 | iter->iter_flags |= TRACE_FILE_TIME_IN_NS; |
2485 | 2504 | ||
2486 | /* stop the trace while dumping */ | 2505 | /* stop the trace while dumping if we are not opening "snapshot" */ |
2487 | tracing_stop(); | 2506 | if (!iter->snapshot) |
2507 | tracing_stop(); | ||
2488 | 2508 | ||
2489 | if (iter->cpu_file == TRACE_PIPE_ALL_CPU) { | 2509 | if (iter->cpu_file == TRACE_PIPE_ALL_CPU) { |
2490 | for_each_tracing_cpu(cpu) { | 2510 | for_each_tracing_cpu(cpu) { |
@@ -2547,8 +2567,9 @@ static int tracing_release(struct inode *inode, struct file *file) | |||
2547 | if (iter->trace && iter->trace->close) | 2567 | if (iter->trace && iter->trace->close) |
2548 | iter->trace->close(iter); | 2568 | iter->trace->close(iter); |
2549 | 2569 | ||
2550 | /* reenable tracing if it was previously enabled */ | 2570 | if (!iter->snapshot) |
2551 | tracing_start(); | 2571 | /* reenable tracing if it was previously enabled */ |
2572 | tracing_start(); | ||
2552 | mutex_unlock(&trace_types_lock); | 2573 | mutex_unlock(&trace_types_lock); |
2553 | 2574 | ||
2554 | mutex_destroy(&iter->mutex); | 2575 | mutex_destroy(&iter->mutex); |
@@ -2576,7 +2597,7 @@ static int tracing_open(struct inode *inode, struct file *file) | |||
2576 | } | 2597 | } |
2577 | 2598 | ||
2578 | if (file->f_mode & FMODE_READ) { | 2599 | if (file->f_mode & FMODE_READ) { |
2579 | iter = __tracing_open(inode, file); | 2600 | iter = __tracing_open(inode, file, false); |
2580 | if (IS_ERR(iter)) | 2601 | if (IS_ERR(iter)) |
2581 | ret = PTR_ERR(iter); | 2602 | ret = PTR_ERR(iter); |
2582 | else if (trace_flags & TRACE_ITER_LATENCY_FMT) | 2603 | else if (trace_flags & TRACE_ITER_LATENCY_FMT) |
@@ -3014,10 +3035,7 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf, | |||
3014 | int r; | 3035 | int r; |
3015 | 3036 | ||
3016 | mutex_lock(&trace_types_lock); | 3037 | mutex_lock(&trace_types_lock); |
3017 | if (current_trace) | 3038 | r = sprintf(buf, "%s\n", current_trace->name); |
3018 | r = sprintf(buf, "%s\n", current_trace->name); | ||
3019 | else | ||
3020 | r = sprintf(buf, "\n"); | ||
3021 | mutex_unlock(&trace_types_lock); | 3039 | mutex_unlock(&trace_types_lock); |
3022 | 3040 | ||
3023 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | 3041 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); |
@@ -3183,6 +3201,7 @@ static int tracing_set_tracer(const char *buf) | |||
3183 | static struct trace_option_dentry *topts; | 3201 | static struct trace_option_dentry *topts; |
3184 | struct trace_array *tr = &global_trace; | 3202 | struct trace_array *tr = &global_trace; |
3185 | struct tracer *t; | 3203 | struct tracer *t; |
3204 | bool had_max_tr; | ||
3186 | int ret = 0; | 3205 | int ret = 0; |
3187 | 3206 | ||
3188 | mutex_lock(&trace_types_lock); | 3207 | mutex_lock(&trace_types_lock); |
@@ -3207,9 +3226,21 @@ static int tracing_set_tracer(const char *buf) | |||
3207 | goto out; | 3226 | goto out; |
3208 | 3227 | ||
3209 | trace_branch_disable(); | 3228 | trace_branch_disable(); |
3210 | if (current_trace && current_trace->reset) | 3229 | if (current_trace->reset) |
3211 | current_trace->reset(tr); | 3230 | current_trace->reset(tr); |
3212 | if (current_trace && current_trace->use_max_tr) { | 3231 | |
3232 | had_max_tr = current_trace->allocated_snapshot; | ||
3233 | current_trace = &nop_trace; | ||
3234 | |||
3235 | if (had_max_tr && !t->use_max_tr) { | ||
3236 | /* | ||
3237 | * We need to make sure that the update_max_tr sees that | ||
3238 | * current_trace changed to nop_trace to keep it from | ||
3239 | * swapping the buffers after we resize it. | ||
3240 | * The update_max_tr is called from interrupts disabled | ||
3241 | * so a synchronized_sched() is sufficient. | ||
3242 | */ | ||
3243 | synchronize_sched(); | ||
3213 | /* | 3244 | /* |
3214 | * We don't free the ring buffer. instead, resize it because | 3245 | * We don't free the ring buffer. instead, resize it because |
3215 | * The max_tr ring buffer has some state (e.g. ring->clock) and | 3246 | * The max_tr ring buffer has some state (e.g. ring->clock) and |
@@ -3217,18 +3248,19 @@ static int tracing_set_tracer(const char *buf) | |||
3217 | */ | 3248 | */ |
3218 | ring_buffer_resize(max_tr.buffer, 1, RING_BUFFER_ALL_CPUS); | 3249 | ring_buffer_resize(max_tr.buffer, 1, RING_BUFFER_ALL_CPUS); |
3219 | set_buffer_entries(&max_tr, 1); | 3250 | set_buffer_entries(&max_tr, 1); |
3251 | tracing_reset_online_cpus(&max_tr); | ||
3252 | current_trace->allocated_snapshot = false; | ||
3220 | } | 3253 | } |
3221 | destroy_trace_option_files(topts); | 3254 | destroy_trace_option_files(topts); |
3222 | 3255 | ||
3223 | current_trace = &nop_trace; | ||
3224 | |||
3225 | topts = create_trace_option_files(t); | 3256 | topts = create_trace_option_files(t); |
3226 | if (t->use_max_tr) { | 3257 | if (t->use_max_tr && !had_max_tr) { |
3227 | /* we need to make per cpu buffer sizes equivalent */ | 3258 | /* we need to make per cpu buffer sizes equivalent */ |
3228 | ret = resize_buffer_duplicate_size(&max_tr, &global_trace, | 3259 | ret = resize_buffer_duplicate_size(&max_tr, &global_trace, |
3229 | RING_BUFFER_ALL_CPUS); | 3260 | RING_BUFFER_ALL_CPUS); |
3230 | if (ret < 0) | 3261 | if (ret < 0) |
3231 | goto out; | 3262 | goto out; |
3263 | t->allocated_snapshot = true; | ||
3232 | } | 3264 | } |
3233 | 3265 | ||
3234 | if (t->init) { | 3266 | if (t->init) { |
@@ -3336,8 +3368,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) | |||
3336 | ret = -ENOMEM; | 3368 | ret = -ENOMEM; |
3337 | goto fail; | 3369 | goto fail; |
3338 | } | 3370 | } |
3339 | if (current_trace) | 3371 | *iter->trace = *current_trace; |
3340 | *iter->trace = *current_trace; | ||
3341 | 3372 | ||
3342 | if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) { | 3373 | if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) { |
3343 | ret = -ENOMEM; | 3374 | ret = -ENOMEM; |
@@ -3477,7 +3508,6 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, | |||
3477 | size_t cnt, loff_t *ppos) | 3508 | size_t cnt, loff_t *ppos) |
3478 | { | 3509 | { |
3479 | struct trace_iterator *iter = filp->private_data; | 3510 | struct trace_iterator *iter = filp->private_data; |
3480 | static struct tracer *old_tracer; | ||
3481 | ssize_t sret; | 3511 | ssize_t sret; |
3482 | 3512 | ||
3483 | /* return any leftover data */ | 3513 | /* return any leftover data */ |
@@ -3489,10 +3519,8 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, | |||
3489 | 3519 | ||
3490 | /* copy the tracer to avoid using a global lock all around */ | 3520 | /* copy the tracer to avoid using a global lock all around */ |
3491 | mutex_lock(&trace_types_lock); | 3521 | mutex_lock(&trace_types_lock); |
3492 | if (unlikely(old_tracer != current_trace && current_trace)) { | 3522 | if (unlikely(iter->trace->name != current_trace->name)) |
3493 | old_tracer = current_trace; | ||
3494 | *iter->trace = *current_trace; | 3523 | *iter->trace = *current_trace; |
3495 | } | ||
3496 | mutex_unlock(&trace_types_lock); | 3524 | mutex_unlock(&trace_types_lock); |
3497 | 3525 | ||
3498 | /* | 3526 | /* |
@@ -3648,7 +3676,6 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, | |||
3648 | .ops = &tracing_pipe_buf_ops, | 3676 | .ops = &tracing_pipe_buf_ops, |
3649 | .spd_release = tracing_spd_release_pipe, | 3677 | .spd_release = tracing_spd_release_pipe, |
3650 | }; | 3678 | }; |
3651 | static struct tracer *old_tracer; | ||
3652 | ssize_t ret; | 3679 | ssize_t ret; |
3653 | size_t rem; | 3680 | size_t rem; |
3654 | unsigned int i; | 3681 | unsigned int i; |
@@ -3658,10 +3685,8 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, | |||
3658 | 3685 | ||
3659 | /* copy the tracer to avoid using a global lock all around */ | 3686 | /* copy the tracer to avoid using a global lock all around */ |
3660 | mutex_lock(&trace_types_lock); | 3687 | mutex_lock(&trace_types_lock); |
3661 | if (unlikely(old_tracer != current_trace && current_trace)) { | 3688 | if (unlikely(iter->trace->name != current_trace->name)) |
3662 | old_tracer = current_trace; | ||
3663 | *iter->trace = *current_trace; | 3689 | *iter->trace = *current_trace; |
3664 | } | ||
3665 | mutex_unlock(&trace_types_lock); | 3690 | mutex_unlock(&trace_types_lock); |
3666 | 3691 | ||
3667 | mutex_lock(&iter->mutex); | 3692 | mutex_lock(&iter->mutex); |
@@ -4037,8 +4062,7 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, | |||
4037 | * Reset the buffer so that it doesn't have incomparable timestamps. | 4062 | * Reset the buffer so that it doesn't have incomparable timestamps. |
4038 | */ | 4063 | */ |
4039 | tracing_reset_online_cpus(&global_trace); | 4064 | tracing_reset_online_cpus(&global_trace); |
4040 | if (max_tr.buffer) | 4065 | tracing_reset_online_cpus(&max_tr); |
4041 | tracing_reset_online_cpus(&max_tr); | ||
4042 | 4066 | ||
4043 | mutex_unlock(&trace_types_lock); | 4067 | mutex_unlock(&trace_types_lock); |
4044 | 4068 | ||
@@ -4054,6 +4078,87 @@ static int tracing_clock_open(struct inode *inode, struct file *file) | |||
4054 | return single_open(file, tracing_clock_show, NULL); | 4078 | return single_open(file, tracing_clock_show, NULL); |
4055 | } | 4079 | } |
4056 | 4080 | ||
4081 | #ifdef CONFIG_TRACER_SNAPSHOT | ||
4082 | static int tracing_snapshot_open(struct inode *inode, struct file *file) | ||
4083 | { | ||
4084 | struct trace_iterator *iter; | ||
4085 | int ret = 0; | ||
4086 | |||
4087 | if (file->f_mode & FMODE_READ) { | ||
4088 | iter = __tracing_open(inode, file, true); | ||
4089 | if (IS_ERR(iter)) | ||
4090 | ret = PTR_ERR(iter); | ||
4091 | } | ||
4092 | return ret; | ||
4093 | } | ||
4094 | |||
4095 | static ssize_t | ||
4096 | tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, | ||
4097 | loff_t *ppos) | ||
4098 | { | ||
4099 | unsigned long val; | ||
4100 | int ret; | ||
4101 | |||
4102 | ret = tracing_update_buffers(); | ||
4103 | if (ret < 0) | ||
4104 | return ret; | ||
4105 | |||
4106 | ret = kstrtoul_from_user(ubuf, cnt, 10, &val); | ||
4107 | if (ret) | ||
4108 | return ret; | ||
4109 | |||
4110 | mutex_lock(&trace_types_lock); | ||
4111 | |||
4112 | if (current_trace->use_max_tr) { | ||
4113 | ret = -EBUSY; | ||
4114 | goto out; | ||
4115 | } | ||
4116 | |||
4117 | switch (val) { | ||
4118 | case 0: | ||
4119 | if (current_trace->allocated_snapshot) { | ||
4120 | /* free spare buffer */ | ||
4121 | ring_buffer_resize(max_tr.buffer, 1, | ||
4122 | RING_BUFFER_ALL_CPUS); | ||
4123 | set_buffer_entries(&max_tr, 1); | ||
4124 | tracing_reset_online_cpus(&max_tr); | ||
4125 | current_trace->allocated_snapshot = false; | ||
4126 | } | ||
4127 | break; | ||
4128 | case 1: | ||
4129 | if (!current_trace->allocated_snapshot) { | ||
4130 | /* allocate spare buffer */ | ||
4131 | ret = resize_buffer_duplicate_size(&max_tr, | ||
4132 | &global_trace, RING_BUFFER_ALL_CPUS); | ||
4133 | if (ret < 0) | ||
4134 | break; | ||
4135 | current_trace->allocated_snapshot = true; | ||
4136 | } | ||
4137 | |||
4138 | local_irq_disable(); | ||
4139 | /* Now, we're going to swap */ | ||
4140 | update_max_tr(&global_trace, current, smp_processor_id()); | ||
4141 | local_irq_enable(); | ||
4142 | break; | ||
4143 | default: | ||
4144 | if (current_trace->allocated_snapshot) | ||
4145 | tracing_reset_online_cpus(&max_tr); | ||
4146 | else | ||
4147 | ret = -EINVAL; | ||
4148 | break; | ||
4149 | } | ||
4150 | |||
4151 | if (ret >= 0) { | ||
4152 | *ppos += cnt; | ||
4153 | ret = cnt; | ||
4154 | } | ||
4155 | out: | ||
4156 | mutex_unlock(&trace_types_lock); | ||
4157 | return ret; | ||
4158 | } | ||
4159 | #endif /* CONFIG_TRACER_SNAPSHOT */ | ||
4160 | |||
4161 | |||
4057 | static const struct file_operations tracing_max_lat_fops = { | 4162 | static const struct file_operations tracing_max_lat_fops = { |
4058 | .open = tracing_open_generic, | 4163 | .open = tracing_open_generic, |
4059 | .read = tracing_max_lat_read, | 4164 | .read = tracing_max_lat_read, |
@@ -4110,6 +4215,16 @@ static const struct file_operations trace_clock_fops = { | |||
4110 | .write = tracing_clock_write, | 4215 | .write = tracing_clock_write, |
4111 | }; | 4216 | }; |
4112 | 4217 | ||
4218 | #ifdef CONFIG_TRACER_SNAPSHOT | ||
4219 | static const struct file_operations snapshot_fops = { | ||
4220 | .open = tracing_snapshot_open, | ||
4221 | .read = seq_read, | ||
4222 | .write = tracing_snapshot_write, | ||
4223 | .llseek = tracing_seek, | ||
4224 | .release = tracing_release, | ||
4225 | }; | ||
4226 | #endif /* CONFIG_TRACER_SNAPSHOT */ | ||
4227 | |||
4113 | struct ftrace_buffer_info { | 4228 | struct ftrace_buffer_info { |
4114 | struct trace_array *tr; | 4229 | struct trace_array *tr; |
4115 | void *spare; | 4230 | void *spare; |
@@ -4414,6 +4529,9 @@ tracing_stats_read(struct file *filp, char __user *ubuf, | |||
4414 | cnt = ring_buffer_dropped_events_cpu(tr->buffer, cpu); | 4529 | cnt = ring_buffer_dropped_events_cpu(tr->buffer, cpu); |
4415 | trace_seq_printf(s, "dropped events: %ld\n", cnt); | 4530 | trace_seq_printf(s, "dropped events: %ld\n", cnt); |
4416 | 4531 | ||
4532 | cnt = ring_buffer_read_events_cpu(tr->buffer, cpu); | ||
4533 | trace_seq_printf(s, "read events: %ld\n", cnt); | ||
4534 | |||
4417 | count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len); | 4535 | count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len); |
4418 | 4536 | ||
4419 | kfree(s); | 4537 | kfree(s); |
@@ -4490,7 +4608,7 @@ struct dentry *tracing_init_dentry(void) | |||
4490 | 4608 | ||
4491 | static struct dentry *d_percpu; | 4609 | static struct dentry *d_percpu; |
4492 | 4610 | ||
4493 | struct dentry *tracing_dentry_percpu(void) | 4611 | static struct dentry *tracing_dentry_percpu(void) |
4494 | { | 4612 | { |
4495 | static int once; | 4613 | static int once; |
4496 | struct dentry *d_tracer; | 4614 | struct dentry *d_tracer; |
@@ -4906,6 +5024,11 @@ static __init int tracer_init_debugfs(void) | |||
4906 | &ftrace_update_tot_cnt, &tracing_dyn_info_fops); | 5024 | &ftrace_update_tot_cnt, &tracing_dyn_info_fops); |
4907 | #endif | 5025 | #endif |
4908 | 5026 | ||
5027 | #ifdef CONFIG_TRACER_SNAPSHOT | ||
5028 | trace_create_file("snapshot", 0644, d_tracer, | ||
5029 | (void *) TRACE_PIPE_ALL_CPU, &snapshot_fops); | ||
5030 | #endif | ||
5031 | |||
4909 | create_trace_options_dir(); | 5032 | create_trace_options_dir(); |
4910 | 5033 | ||
4911 | for_each_tracing_cpu(cpu) | 5034 | for_each_tracing_cpu(cpu) |
@@ -5014,6 +5137,7 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode) | |||
5014 | if (disable_tracing) | 5137 | if (disable_tracing) |
5015 | ftrace_kill(); | 5138 | ftrace_kill(); |
5016 | 5139 | ||
5140 | /* Simulate the iterator */ | ||
5017 | trace_init_global_iter(&iter); | 5141 | trace_init_global_iter(&iter); |
5018 | 5142 | ||
5019 | for_each_tracing_cpu(cpu) { | 5143 | for_each_tracing_cpu(cpu) { |
@@ -5025,10 +5149,6 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode) | |||
5025 | /* don't look at user memory in panic mode */ | 5149 | /* don't look at user memory in panic mode */ |
5026 | trace_flags &= ~TRACE_ITER_SYM_USEROBJ; | 5150 | trace_flags &= ~TRACE_ITER_SYM_USEROBJ; |
5027 | 5151 | ||
5028 | /* Simulate the iterator */ | ||
5029 | iter.tr = &global_trace; | ||
5030 | iter.trace = current_trace; | ||
5031 | |||
5032 | switch (oops_dump_mode) { | 5152 | switch (oops_dump_mode) { |
5033 | case DUMP_ALL: | 5153 | case DUMP_ALL: |
5034 | iter.cpu_file = TRACE_PIPE_ALL_CPU; | 5154 | iter.cpu_file = TRACE_PIPE_ALL_CPU; |
@@ -5173,7 +5293,7 @@ __init static int tracer_alloc_buffers(void) | |||
5173 | init_irq_work(&trace_work_wakeup, trace_wake_up); | 5293 | init_irq_work(&trace_work_wakeup, trace_wake_up); |
5174 | 5294 | ||
5175 | register_tracer(&nop_trace); | 5295 | register_tracer(&nop_trace); |
5176 | current_trace = &nop_trace; | 5296 | |
5177 | /* All seems OK, enable tracing */ | 5297 | /* All seems OK, enable tracing */ |
5178 | tracing_disabled = 0; | 5298 | tracing_disabled = 0; |
5179 | 5299 | ||
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index c75d7988902c..57d7e5397d56 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
@@ -287,20 +287,62 @@ struct tracer { | |||
287 | struct tracer_flags *flags; | 287 | struct tracer_flags *flags; |
288 | bool print_max; | 288 | bool print_max; |
289 | bool use_max_tr; | 289 | bool use_max_tr; |
290 | bool allocated_snapshot; | ||
290 | }; | 291 | }; |
291 | 292 | ||
292 | 293 | ||
293 | /* Only current can touch trace_recursion */ | 294 | /* Only current can touch trace_recursion */ |
294 | #define trace_recursion_inc() do { (current)->trace_recursion++; } while (0) | ||
295 | #define trace_recursion_dec() do { (current)->trace_recursion--; } while (0) | ||
296 | 295 | ||
297 | /* Ring buffer has the 10 LSB bits to count */ | 296 | /* |
298 | #define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff) | 297 | * For function tracing recursion: |
299 | 298 | * The order of these bits are important. | |
300 | /* for function tracing recursion */ | 299 | * |
301 | #define TRACE_INTERNAL_BIT (1<<11) | 300 | * When function tracing occurs, the following steps are made: |
302 | #define TRACE_GLOBAL_BIT (1<<12) | 301 | * If arch does not support a ftrace feature: |
303 | #define TRACE_CONTROL_BIT (1<<13) | 302 | * call internal function (uses INTERNAL bits) which calls... |
303 | * If callback is registered to the "global" list, the list | ||
304 | * function is called and recursion checks the GLOBAL bits. | ||
305 | * then this function calls... | ||
306 | * The function callback, which can use the FTRACE bits to | ||
307 | * check for recursion. | ||
308 | * | ||
309 | * Now if the arch does not suppport a feature, and it calls | ||
310 | * the global list function which calls the ftrace callback | ||
311 | * all three of these steps will do a recursion protection. | ||
312 | * There's no reason to do one if the previous caller already | ||
313 | * did. The recursion that we are protecting against will | ||
314 | * go through the same steps again. | ||
315 | * | ||
316 | * To prevent the multiple recursion checks, if a recursion | ||
317 | * bit is set that is higher than the MAX bit of the current | ||
318 | * check, then we know that the check was made by the previous | ||
319 | * caller, and we can skip the current check. | ||
320 | */ | ||
321 | enum { | ||
322 | TRACE_BUFFER_BIT, | ||
323 | TRACE_BUFFER_NMI_BIT, | ||
324 | TRACE_BUFFER_IRQ_BIT, | ||
325 | TRACE_BUFFER_SIRQ_BIT, | ||
326 | |||
327 | /* Start of function recursion bits */ | ||
328 | TRACE_FTRACE_BIT, | ||
329 | TRACE_FTRACE_NMI_BIT, | ||
330 | TRACE_FTRACE_IRQ_BIT, | ||
331 | TRACE_FTRACE_SIRQ_BIT, | ||
332 | |||
333 | /* GLOBAL_BITs must be greater than FTRACE_BITs */ | ||
334 | TRACE_GLOBAL_BIT, | ||
335 | TRACE_GLOBAL_NMI_BIT, | ||
336 | TRACE_GLOBAL_IRQ_BIT, | ||
337 | TRACE_GLOBAL_SIRQ_BIT, | ||
338 | |||
339 | /* INTERNAL_BITs must be greater than GLOBAL_BITs */ | ||
340 | TRACE_INTERNAL_BIT, | ||
341 | TRACE_INTERNAL_NMI_BIT, | ||
342 | TRACE_INTERNAL_IRQ_BIT, | ||
343 | TRACE_INTERNAL_SIRQ_BIT, | ||
344 | |||
345 | TRACE_CONTROL_BIT, | ||
304 | 346 | ||
305 | /* | 347 | /* |
306 | * Abuse of the trace_recursion. | 348 | * Abuse of the trace_recursion. |
@@ -309,11 +351,77 @@ struct tracer { | |||
309 | * was called in irq context but we have irq tracing off. Since this | 351 | * was called in irq context but we have irq tracing off. Since this |
310 | * can only be modified by current, we can reuse trace_recursion. | 352 | * can only be modified by current, we can reuse trace_recursion. |
311 | */ | 353 | */ |
312 | #define TRACE_IRQ_BIT (1<<13) | 354 | TRACE_IRQ_BIT, |
355 | }; | ||
356 | |||
357 | #define trace_recursion_set(bit) do { (current)->trace_recursion |= (1<<(bit)); } while (0) | ||
358 | #define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(1<<(bit)); } while (0) | ||
359 | #define trace_recursion_test(bit) ((current)->trace_recursion & (1<<(bit))) | ||
360 | |||
361 | #define TRACE_CONTEXT_BITS 4 | ||
362 | |||
363 | #define TRACE_FTRACE_START TRACE_FTRACE_BIT | ||
364 | #define TRACE_FTRACE_MAX ((1 << (TRACE_FTRACE_START + TRACE_CONTEXT_BITS)) - 1) | ||
365 | |||
366 | #define TRACE_GLOBAL_START TRACE_GLOBAL_BIT | ||
367 | #define TRACE_GLOBAL_MAX ((1 << (TRACE_GLOBAL_START + TRACE_CONTEXT_BITS)) - 1) | ||
368 | |||
369 | #define TRACE_LIST_START TRACE_INTERNAL_BIT | ||
370 | #define TRACE_LIST_MAX ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1) | ||
371 | |||
372 | #define TRACE_CONTEXT_MASK TRACE_LIST_MAX | ||
373 | |||
374 | static __always_inline int trace_get_context_bit(void) | ||
375 | { | ||
376 | int bit; | ||
313 | 377 | ||
314 | #define trace_recursion_set(bit) do { (current)->trace_recursion |= (bit); } while (0) | 378 | if (in_interrupt()) { |
315 | #define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(bit); } while (0) | 379 | if (in_nmi()) |
316 | #define trace_recursion_test(bit) ((current)->trace_recursion & (bit)) | 380 | bit = 0; |
381 | |||
382 | else if (in_irq()) | ||
383 | bit = 1; | ||
384 | else | ||
385 | bit = 2; | ||
386 | } else | ||
387 | bit = 3; | ||
388 | |||
389 | return bit; | ||
390 | } | ||
391 | |||
392 | static __always_inline int trace_test_and_set_recursion(int start, int max) | ||
393 | { | ||
394 | unsigned int val = current->trace_recursion; | ||
395 | int bit; | ||
396 | |||
397 | /* A previous recursion check was made */ | ||
398 | if ((val & TRACE_CONTEXT_MASK) > max) | ||
399 | return 0; | ||
400 | |||
401 | bit = trace_get_context_bit() + start; | ||
402 | if (unlikely(val & (1 << bit))) | ||
403 | return -1; | ||
404 | |||
405 | val |= 1 << bit; | ||
406 | current->trace_recursion = val; | ||
407 | barrier(); | ||
408 | |||
409 | return bit; | ||
410 | } | ||
411 | |||
412 | static __always_inline void trace_clear_recursion(int bit) | ||
413 | { | ||
414 | unsigned int val = current->trace_recursion; | ||
415 | |||
416 | if (!bit) | ||
417 | return; | ||
418 | |||
419 | bit = 1 << bit; | ||
420 | val &= ~bit; | ||
421 | |||
422 | barrier(); | ||
423 | current->trace_recursion = val; | ||
424 | } | ||
317 | 425 | ||
318 | #define TRACE_PIPE_ALL_CPU -1 | 426 | #define TRACE_PIPE_ALL_CPU -1 |
319 | 427 | ||
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c index 1bbb1b200cec..aa8f5f48dae6 100644 --- a/kernel/trace/trace_clock.c +++ b/kernel/trace/trace_clock.c | |||
@@ -21,8 +21,6 @@ | |||
21 | #include <linux/ktime.h> | 21 | #include <linux/ktime.h> |
22 | #include <linux/trace_clock.h> | 22 | #include <linux/trace_clock.h> |
23 | 23 | ||
24 | #include "trace.h" | ||
25 | |||
26 | /* | 24 | /* |
27 | * trace_clock_local(): the simplest and least coherent tracing clock. | 25 | * trace_clock_local(): the simplest and least coherent tracing clock. |
28 | * | 26 | * |
@@ -87,7 +85,7 @@ u64 notrace trace_clock_global(void) | |||
87 | local_irq_save(flags); | 85 | local_irq_save(flags); |
88 | 86 | ||
89 | this_cpu = raw_smp_processor_id(); | 87 | this_cpu = raw_smp_processor_id(); |
90 | now = cpu_clock(this_cpu); | 88 | now = sched_clock_cpu(this_cpu); |
91 | /* | 89 | /* |
92 | * If in an NMI context then dont risk lockups and return the | 90 | * If in an NMI context then dont risk lockups and return the |
93 | * cpu_clock() time: | 91 | * cpu_clock() time: |
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 880073d0b946..57e9b284250c 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c | |||
@@ -116,7 +116,6 @@ static int trace_define_common_fields(void) | |||
116 | __common_field(unsigned char, flags); | 116 | __common_field(unsigned char, flags); |
117 | __common_field(unsigned char, preempt_count); | 117 | __common_field(unsigned char, preempt_count); |
118 | __common_field(int, pid); | 118 | __common_field(int, pid); |
119 | __common_field(int, padding); | ||
120 | 119 | ||
121 | return ret; | 120 | return ret; |
122 | } | 121 | } |
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 8e3ad8082ab7..601152523326 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c | |||
@@ -47,34 +47,6 @@ static void function_trace_start(struct trace_array *tr) | |||
47 | tracing_reset_online_cpus(tr); | 47 | tracing_reset_online_cpus(tr); |
48 | } | 48 | } |
49 | 49 | ||
50 | static void | ||
51 | function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip, | ||
52 | struct ftrace_ops *op, struct pt_regs *pt_regs) | ||
53 | { | ||
54 | struct trace_array *tr = func_trace; | ||
55 | struct trace_array_cpu *data; | ||
56 | unsigned long flags; | ||
57 | long disabled; | ||
58 | int cpu; | ||
59 | int pc; | ||
60 | |||
61 | if (unlikely(!ftrace_function_enabled)) | ||
62 | return; | ||
63 | |||
64 | pc = preempt_count(); | ||
65 | preempt_disable_notrace(); | ||
66 | local_save_flags(flags); | ||
67 | cpu = raw_smp_processor_id(); | ||
68 | data = tr->data[cpu]; | ||
69 | disabled = atomic_inc_return(&data->disabled); | ||
70 | |||
71 | if (likely(disabled == 1)) | ||
72 | trace_function(tr, ip, parent_ip, flags, pc); | ||
73 | |||
74 | atomic_dec(&data->disabled); | ||
75 | preempt_enable_notrace(); | ||
76 | } | ||
77 | |||
78 | /* Our option */ | 50 | /* Our option */ |
79 | enum { | 51 | enum { |
80 | TRACE_FUNC_OPT_STACK = 0x1, | 52 | TRACE_FUNC_OPT_STACK = 0x1, |
@@ -85,34 +57,34 @@ static struct tracer_flags func_flags; | |||
85 | static void | 57 | static void |
86 | function_trace_call(unsigned long ip, unsigned long parent_ip, | 58 | function_trace_call(unsigned long ip, unsigned long parent_ip, |
87 | struct ftrace_ops *op, struct pt_regs *pt_regs) | 59 | struct ftrace_ops *op, struct pt_regs *pt_regs) |
88 | |||
89 | { | 60 | { |
90 | struct trace_array *tr = func_trace; | 61 | struct trace_array *tr = func_trace; |
91 | struct trace_array_cpu *data; | 62 | struct trace_array_cpu *data; |
92 | unsigned long flags; | 63 | unsigned long flags; |
93 | long disabled; | 64 | int bit; |
94 | int cpu; | 65 | int cpu; |
95 | int pc; | 66 | int pc; |
96 | 67 | ||
97 | if (unlikely(!ftrace_function_enabled)) | 68 | if (unlikely(!ftrace_function_enabled)) |
98 | return; | 69 | return; |
99 | 70 | ||
100 | /* | 71 | pc = preempt_count(); |
101 | * Need to use raw, since this must be called before the | 72 | preempt_disable_notrace(); |
102 | * recursive protection is performed. | ||
103 | */ | ||
104 | local_irq_save(flags); | ||
105 | cpu = raw_smp_processor_id(); | ||
106 | data = tr->data[cpu]; | ||
107 | disabled = atomic_inc_return(&data->disabled); | ||
108 | 73 | ||
109 | if (likely(disabled == 1)) { | 74 | bit = trace_test_and_set_recursion(TRACE_FTRACE_START, TRACE_FTRACE_MAX); |
110 | pc = preempt_count(); | 75 | if (bit < 0) |
76 | goto out; | ||
77 | |||
78 | cpu = smp_processor_id(); | ||
79 | data = tr->data[cpu]; | ||
80 | if (!atomic_read(&data->disabled)) { | ||
81 | local_save_flags(flags); | ||
111 | trace_function(tr, ip, parent_ip, flags, pc); | 82 | trace_function(tr, ip, parent_ip, flags, pc); |
112 | } | 83 | } |
84 | trace_clear_recursion(bit); | ||
113 | 85 | ||
114 | atomic_dec(&data->disabled); | 86 | out: |
115 | local_irq_restore(flags); | 87 | preempt_enable_notrace(); |
116 | } | 88 | } |
117 | 89 | ||
118 | static void | 90 | static void |
@@ -185,11 +157,6 @@ static void tracing_start_function_trace(void) | |||
185 | { | 157 | { |
186 | ftrace_function_enabled = 0; | 158 | ftrace_function_enabled = 0; |
187 | 159 | ||
188 | if (trace_flags & TRACE_ITER_PREEMPTONLY) | ||
189 | trace_ops.func = function_trace_call_preempt_only; | ||
190 | else | ||
191 | trace_ops.func = function_trace_call; | ||
192 | |||
193 | if (func_flags.val & TRACE_FUNC_OPT_STACK) | 160 | if (func_flags.val & TRACE_FUNC_OPT_STACK) |
194 | register_ftrace_function(&trace_stack_ops); | 161 | register_ftrace_function(&trace_stack_ops); |
195 | else | 162 | else |
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 4edb4b74eb7e..39ada66389cc 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c | |||
@@ -47,6 +47,8 @@ struct fgraph_data { | |||
47 | #define TRACE_GRAPH_PRINT_ABS_TIME 0x20 | 47 | #define TRACE_GRAPH_PRINT_ABS_TIME 0x20 |
48 | #define TRACE_GRAPH_PRINT_IRQS 0x40 | 48 | #define TRACE_GRAPH_PRINT_IRQS 0x40 |
49 | 49 | ||
50 | static unsigned int max_depth; | ||
51 | |||
50 | static struct tracer_opt trace_opts[] = { | 52 | static struct tracer_opt trace_opts[] = { |
51 | /* Display overruns? (for self-debug purpose) */ | 53 | /* Display overruns? (for self-debug purpose) */ |
52 | { TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) }, | 54 | { TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) }, |
@@ -189,10 +191,16 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer) | |||
189 | 191 | ||
190 | ftrace_pop_return_trace(&trace, &ret, frame_pointer); | 192 | ftrace_pop_return_trace(&trace, &ret, frame_pointer); |
191 | trace.rettime = trace_clock_local(); | 193 | trace.rettime = trace_clock_local(); |
192 | ftrace_graph_return(&trace); | ||
193 | barrier(); | 194 | barrier(); |
194 | current->curr_ret_stack--; | 195 | current->curr_ret_stack--; |
195 | 196 | ||
197 | /* | ||
198 | * The trace should run after decrementing the ret counter | ||
199 | * in case an interrupt were to come in. We don't want to | ||
200 | * lose the interrupt if max_depth is set. | ||
201 | */ | ||
202 | ftrace_graph_return(&trace); | ||
203 | |||
196 | if (unlikely(!ret)) { | 204 | if (unlikely(!ret)) { |
197 | ftrace_graph_stop(); | 205 | ftrace_graph_stop(); |
198 | WARN_ON(1); | 206 | WARN_ON(1); |
@@ -250,8 +258,9 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) | |||
250 | return 0; | 258 | return 0; |
251 | 259 | ||
252 | /* trace it when it is-nested-in or is a function enabled. */ | 260 | /* trace it when it is-nested-in or is a function enabled. */ |
253 | if (!(trace->depth || ftrace_graph_addr(trace->func)) || | 261 | if ((!(trace->depth || ftrace_graph_addr(trace->func)) || |
254 | ftrace_graph_ignore_irqs()) | 262 | ftrace_graph_ignore_irqs()) || |
263 | (max_depth && trace->depth >= max_depth)) | ||
255 | return 0; | 264 | return 0; |
256 | 265 | ||
257 | local_irq_save(flags); | 266 | local_irq_save(flags); |
@@ -1457,6 +1466,59 @@ static struct tracer graph_trace __read_mostly = { | |||
1457 | #endif | 1466 | #endif |
1458 | }; | 1467 | }; |
1459 | 1468 | ||
1469 | |||
1470 | static ssize_t | ||
1471 | graph_depth_write(struct file *filp, const char __user *ubuf, size_t cnt, | ||
1472 | loff_t *ppos) | ||
1473 | { | ||
1474 | unsigned long val; | ||
1475 | int ret; | ||
1476 | |||
1477 | ret = kstrtoul_from_user(ubuf, cnt, 10, &val); | ||
1478 | if (ret) | ||
1479 | return ret; | ||
1480 | |||
1481 | max_depth = val; | ||
1482 | |||
1483 | *ppos += cnt; | ||
1484 | |||
1485 | return cnt; | ||
1486 | } | ||
1487 | |||
1488 | static ssize_t | ||
1489 | graph_depth_read(struct file *filp, char __user *ubuf, size_t cnt, | ||
1490 | loff_t *ppos) | ||
1491 | { | ||
1492 | char buf[15]; /* More than enough to hold UINT_MAX + "\n"*/ | ||
1493 | int n; | ||
1494 | |||
1495 | n = sprintf(buf, "%d\n", max_depth); | ||
1496 | |||
1497 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, n); | ||
1498 | } | ||
1499 | |||
1500 | static const struct file_operations graph_depth_fops = { | ||
1501 | .open = tracing_open_generic, | ||
1502 | .write = graph_depth_write, | ||
1503 | .read = graph_depth_read, | ||
1504 | .llseek = generic_file_llseek, | ||
1505 | }; | ||
1506 | |||
1507 | static __init int init_graph_debugfs(void) | ||
1508 | { | ||
1509 | struct dentry *d_tracer; | ||
1510 | |||
1511 | d_tracer = tracing_init_dentry(); | ||
1512 | if (!d_tracer) | ||
1513 | return 0; | ||
1514 | |||
1515 | trace_create_file("max_graph_depth", 0644, d_tracer, | ||
1516 | NULL, &graph_depth_fops); | ||
1517 | |||
1518 | return 0; | ||
1519 | } | ||
1520 | fs_initcall(init_graph_debugfs); | ||
1521 | |||
1460 | static __init int init_graph_trace(void) | 1522 | static __init int init_graph_trace(void) |
1461 | { | 1523 | { |
1462 | max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1); | 1524 | max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1); |
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 933708677814..5c7e09d10d74 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h | |||
@@ -66,7 +66,6 @@ | |||
66 | #define TP_FLAG_TRACE 1 | 66 | #define TP_FLAG_TRACE 1 |
67 | #define TP_FLAG_PROFILE 2 | 67 | #define TP_FLAG_PROFILE 2 |
68 | #define TP_FLAG_REGISTERED 4 | 68 | #define TP_FLAG_REGISTERED 4 |
69 | #define TP_FLAG_UPROBE 8 | ||
70 | 69 | ||
71 | 70 | ||
72 | /* data_rloc: data relative location, compatible with u32 */ | 71 | /* data_rloc: data relative location, compatible with u32 */ |
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 47623169a815..51c819c12c29 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c | |||
@@ -415,7 +415,8 @@ static void trace_selftest_test_recursion_func(unsigned long ip, | |||
415 | * The ftrace infrastructure should provide the recursion | 415 | * The ftrace infrastructure should provide the recursion |
416 | * protection. If not, this will crash the kernel! | 416 | * protection. If not, this will crash the kernel! |
417 | */ | 417 | */ |
418 | trace_selftest_recursion_cnt++; | 418 | if (trace_selftest_recursion_cnt++ > 10) |
419 | return; | ||
419 | DYN_FTRACE_TEST_NAME(); | 420 | DYN_FTRACE_TEST_NAME(); |
420 | } | 421 | } |
421 | 422 | ||
@@ -452,7 +453,6 @@ trace_selftest_function_recursion(void) | |||
452 | char *func_name; | 453 | char *func_name; |
453 | int len; | 454 | int len; |
454 | int ret; | 455 | int ret; |
455 | int cnt; | ||
456 | 456 | ||
457 | /* The previous test PASSED */ | 457 | /* The previous test PASSED */ |
458 | pr_cont("PASSED\n"); | 458 | pr_cont("PASSED\n"); |
@@ -510,19 +510,10 @@ trace_selftest_function_recursion(void) | |||
510 | 510 | ||
511 | unregister_ftrace_function(&test_recsafe_probe); | 511 | unregister_ftrace_function(&test_recsafe_probe); |
512 | 512 | ||
513 | /* | ||
514 | * If arch supports all ftrace features, and no other task | ||
515 | * was on the list, we should be fine. | ||
516 | */ | ||
517 | if (!ftrace_nr_registered_ops() && !FTRACE_FORCE_LIST_FUNC) | ||
518 | cnt = 2; /* Should have recursed */ | ||
519 | else | ||
520 | cnt = 1; | ||
521 | |||
522 | ret = -1; | 513 | ret = -1; |
523 | if (trace_selftest_recursion_cnt != cnt) { | 514 | if (trace_selftest_recursion_cnt != 2) { |
524 | pr_cont("*callback not called expected %d times (%d)* ", | 515 | pr_cont("*callback not called expected 2 times (%d)* ", |
525 | cnt, trace_selftest_recursion_cnt); | 516 | trace_selftest_recursion_cnt); |
526 | goto out; | 517 | goto out; |
527 | } | 518 | } |
528 | 519 | ||
@@ -568,7 +559,7 @@ trace_selftest_function_regs(void) | |||
568 | int ret; | 559 | int ret; |
569 | int supported = 0; | 560 | int supported = 0; |
570 | 561 | ||
571 | #ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS | 562 | #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS |
572 | supported = 1; | 563 | supported = 1; |
573 | #endif | 564 | #endif |
574 | 565 | ||
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 7609dd6714c2..5329e13e74a1 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c | |||
@@ -77,7 +77,7 @@ static struct syscall_metadata *syscall_nr_to_meta(int nr) | |||
77 | return syscalls_metadata[nr]; | 77 | return syscalls_metadata[nr]; |
78 | } | 78 | } |
79 | 79 | ||
80 | enum print_line_t | 80 | static enum print_line_t |
81 | print_syscall_enter(struct trace_iterator *iter, int flags, | 81 | print_syscall_enter(struct trace_iterator *iter, int flags, |
82 | struct trace_event *event) | 82 | struct trace_event *event) |
83 | { | 83 | { |
@@ -130,7 +130,7 @@ end: | |||
130 | return TRACE_TYPE_HANDLED; | 130 | return TRACE_TYPE_HANDLED; |
131 | } | 131 | } |
132 | 132 | ||
133 | enum print_line_t | 133 | static enum print_line_t |
134 | print_syscall_exit(struct trace_iterator *iter, int flags, | 134 | print_syscall_exit(struct trace_iterator *iter, int flags, |
135 | struct trace_event *event) | 135 | struct trace_event *event) |
136 | { | 136 | { |
@@ -270,7 +270,7 @@ static int syscall_exit_define_fields(struct ftrace_event_call *call) | |||
270 | return ret; | 270 | return ret; |
271 | } | 271 | } |
272 | 272 | ||
273 | void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id) | 273 | static void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id) |
274 | { | 274 | { |
275 | struct syscall_trace_enter *entry; | 275 | struct syscall_trace_enter *entry; |
276 | struct syscall_metadata *sys_data; | 276 | struct syscall_metadata *sys_data; |
@@ -305,7 +305,7 @@ void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id) | |||
305 | trace_current_buffer_unlock_commit(buffer, event, 0, 0); | 305 | trace_current_buffer_unlock_commit(buffer, event, 0, 0); |
306 | } | 306 | } |
307 | 307 | ||
308 | void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret) | 308 | static void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret) |
309 | { | 309 | { |
310 | struct syscall_trace_exit *entry; | 310 | struct syscall_trace_exit *entry; |
311 | struct syscall_metadata *sys_data; | 311 | struct syscall_metadata *sys_data; |
@@ -337,7 +337,7 @@ void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret) | |||
337 | trace_current_buffer_unlock_commit(buffer, event, 0, 0); | 337 | trace_current_buffer_unlock_commit(buffer, event, 0, 0); |
338 | } | 338 | } |
339 | 339 | ||
340 | int reg_event_syscall_enter(struct ftrace_event_call *call) | 340 | static int reg_event_syscall_enter(struct ftrace_event_call *call) |
341 | { | 341 | { |
342 | int ret = 0; | 342 | int ret = 0; |
343 | int num; | 343 | int num; |
@@ -356,7 +356,7 @@ int reg_event_syscall_enter(struct ftrace_event_call *call) | |||
356 | return ret; | 356 | return ret; |
357 | } | 357 | } |
358 | 358 | ||
359 | void unreg_event_syscall_enter(struct ftrace_event_call *call) | 359 | static void unreg_event_syscall_enter(struct ftrace_event_call *call) |
360 | { | 360 | { |
361 | int num; | 361 | int num; |
362 | 362 | ||
@@ -371,7 +371,7 @@ void unreg_event_syscall_enter(struct ftrace_event_call *call) | |||
371 | mutex_unlock(&syscall_trace_lock); | 371 | mutex_unlock(&syscall_trace_lock); |
372 | } | 372 | } |
373 | 373 | ||
374 | int reg_event_syscall_exit(struct ftrace_event_call *call) | 374 | static int reg_event_syscall_exit(struct ftrace_event_call *call) |
375 | { | 375 | { |
376 | int ret = 0; | 376 | int ret = 0; |
377 | int num; | 377 | int num; |
@@ -390,7 +390,7 @@ int reg_event_syscall_exit(struct ftrace_event_call *call) | |||
390 | return ret; | 390 | return ret; |
391 | } | 391 | } |
392 | 392 | ||
393 | void unreg_event_syscall_exit(struct ftrace_event_call *call) | 393 | static void unreg_event_syscall_exit(struct ftrace_event_call *call) |
394 | { | 394 | { |
395 | int num; | 395 | int num; |
396 | 396 | ||
@@ -459,7 +459,7 @@ unsigned long __init __weak arch_syscall_addr(int nr) | |||
459 | return (unsigned long)sys_call_table[nr]; | 459 | return (unsigned long)sys_call_table[nr]; |
460 | } | 460 | } |
461 | 461 | ||
462 | int __init init_ftrace_syscalls(void) | 462 | static int __init init_ftrace_syscalls(void) |
463 | { | 463 | { |
464 | struct syscall_metadata *meta; | 464 | struct syscall_metadata *meta; |
465 | unsigned long addr; | 465 | unsigned long addr; |
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index c86e6d4f67fb..8dad2a92dee9 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c | |||
@@ -28,20 +28,21 @@ | |||
28 | 28 | ||
29 | #define UPROBE_EVENT_SYSTEM "uprobes" | 29 | #define UPROBE_EVENT_SYSTEM "uprobes" |
30 | 30 | ||
31 | struct trace_uprobe_filter { | ||
32 | rwlock_t rwlock; | ||
33 | int nr_systemwide; | ||
34 | struct list_head perf_events; | ||
35 | }; | ||
36 | |||
31 | /* | 37 | /* |
32 | * uprobe event core functions | 38 | * uprobe event core functions |
33 | */ | 39 | */ |
34 | struct trace_uprobe; | ||
35 | struct uprobe_trace_consumer { | ||
36 | struct uprobe_consumer cons; | ||
37 | struct trace_uprobe *tu; | ||
38 | }; | ||
39 | |||
40 | struct trace_uprobe { | 40 | struct trace_uprobe { |
41 | struct list_head list; | 41 | struct list_head list; |
42 | struct ftrace_event_class class; | 42 | struct ftrace_event_class class; |
43 | struct ftrace_event_call call; | 43 | struct ftrace_event_call call; |
44 | struct uprobe_trace_consumer *consumer; | 44 | struct trace_uprobe_filter filter; |
45 | struct uprobe_consumer consumer; | ||
45 | struct inode *inode; | 46 | struct inode *inode; |
46 | char *filename; | 47 | char *filename; |
47 | unsigned long offset; | 48 | unsigned long offset; |
@@ -64,6 +65,18 @@ static LIST_HEAD(uprobe_list); | |||
64 | 65 | ||
65 | static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs); | 66 | static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs); |
66 | 67 | ||
68 | static inline void init_trace_uprobe_filter(struct trace_uprobe_filter *filter) | ||
69 | { | ||
70 | rwlock_init(&filter->rwlock); | ||
71 | filter->nr_systemwide = 0; | ||
72 | INIT_LIST_HEAD(&filter->perf_events); | ||
73 | } | ||
74 | |||
75 | static inline bool uprobe_filter_is_empty(struct trace_uprobe_filter *filter) | ||
76 | { | ||
77 | return !filter->nr_systemwide && list_empty(&filter->perf_events); | ||
78 | } | ||
79 | |||
67 | /* | 80 | /* |
68 | * Allocate new trace_uprobe and initialize it (including uprobes). | 81 | * Allocate new trace_uprobe and initialize it (including uprobes). |
69 | */ | 82 | */ |
@@ -92,6 +105,8 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs) | |||
92 | goto error; | 105 | goto error; |
93 | 106 | ||
94 | INIT_LIST_HEAD(&tu->list); | 107 | INIT_LIST_HEAD(&tu->list); |
108 | tu->consumer.handler = uprobe_dispatcher; | ||
109 | init_trace_uprobe_filter(&tu->filter); | ||
95 | return tu; | 110 | return tu; |
96 | 111 | ||
97 | error: | 112 | error: |
@@ -253,12 +268,18 @@ static int create_trace_uprobe(int argc, char **argv) | |||
253 | if (ret) | 268 | if (ret) |
254 | goto fail_address_parse; | 269 | goto fail_address_parse; |
255 | 270 | ||
271 | inode = igrab(path.dentry->d_inode); | ||
272 | path_put(&path); | ||
273 | |||
274 | if (!inode || !S_ISREG(inode->i_mode)) { | ||
275 | ret = -EINVAL; | ||
276 | goto fail_address_parse; | ||
277 | } | ||
278 | |||
256 | ret = kstrtoul(arg, 0, &offset); | 279 | ret = kstrtoul(arg, 0, &offset); |
257 | if (ret) | 280 | if (ret) |
258 | goto fail_address_parse; | 281 | goto fail_address_parse; |
259 | 282 | ||
260 | inode = igrab(path.dentry->d_inode); | ||
261 | |||
262 | argc -= 2; | 283 | argc -= 2; |
263 | argv += 2; | 284 | argv += 2; |
264 | 285 | ||
@@ -356,7 +377,7 @@ fail_address_parse: | |||
356 | if (inode) | 377 | if (inode) |
357 | iput(inode); | 378 | iput(inode); |
358 | 379 | ||
359 | pr_info("Failed to parse address.\n"); | 380 | pr_info("Failed to parse address or file.\n"); |
360 | 381 | ||
361 | return ret; | 382 | return ret; |
362 | } | 383 | } |
@@ -465,7 +486,7 @@ static const struct file_operations uprobe_profile_ops = { | |||
465 | }; | 486 | }; |
466 | 487 | ||
467 | /* uprobe handler */ | 488 | /* uprobe handler */ |
468 | static void uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs) | 489 | static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs) |
469 | { | 490 | { |
470 | struct uprobe_trace_entry_head *entry; | 491 | struct uprobe_trace_entry_head *entry; |
471 | struct ring_buffer_event *event; | 492 | struct ring_buffer_event *event; |
@@ -475,8 +496,6 @@ static void uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs) | |||
475 | unsigned long irq_flags; | 496 | unsigned long irq_flags; |
476 | struct ftrace_event_call *call = &tu->call; | 497 | struct ftrace_event_call *call = &tu->call; |
477 | 498 | ||
478 | tu->nhit++; | ||
479 | |||
480 | local_save_flags(irq_flags); | 499 | local_save_flags(irq_flags); |
481 | pc = preempt_count(); | 500 | pc = preempt_count(); |
482 | 501 | ||
@@ -485,16 +504,18 @@ static void uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs) | |||
485 | event = trace_current_buffer_lock_reserve(&buffer, call->event.type, | 504 | event = trace_current_buffer_lock_reserve(&buffer, call->event.type, |
486 | size, irq_flags, pc); | 505 | size, irq_flags, pc); |
487 | if (!event) | 506 | if (!event) |
488 | return; | 507 | return 0; |
489 | 508 | ||
490 | entry = ring_buffer_event_data(event); | 509 | entry = ring_buffer_event_data(event); |
491 | entry->ip = uprobe_get_swbp_addr(task_pt_regs(current)); | 510 | entry->ip = instruction_pointer(task_pt_regs(current)); |
492 | data = (u8 *)&entry[1]; | 511 | data = (u8 *)&entry[1]; |
493 | for (i = 0; i < tu->nr_args; i++) | 512 | for (i = 0; i < tu->nr_args; i++) |
494 | call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); | 513 | call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); |
495 | 514 | ||
496 | if (!filter_current_check_discard(buffer, call, entry, event)) | 515 | if (!filter_current_check_discard(buffer, call, entry, event)) |
497 | trace_buffer_unlock_commit(buffer, event, irq_flags, pc); | 516 | trace_buffer_unlock_commit(buffer, event, irq_flags, pc); |
517 | |||
518 | return 0; | ||
498 | } | 519 | } |
499 | 520 | ||
500 | /* Event entry printers */ | 521 | /* Event entry printers */ |
@@ -533,42 +554,43 @@ partial: | |||
533 | return TRACE_TYPE_PARTIAL_LINE; | 554 | return TRACE_TYPE_PARTIAL_LINE; |
534 | } | 555 | } |
535 | 556 | ||
536 | static int probe_event_enable(struct trace_uprobe *tu, int flag) | 557 | static inline bool is_trace_uprobe_enabled(struct trace_uprobe *tu) |
537 | { | 558 | { |
538 | struct uprobe_trace_consumer *utc; | 559 | return tu->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE); |
539 | int ret = 0; | 560 | } |
540 | 561 | ||
541 | if (!tu->inode || tu->consumer) | 562 | typedef bool (*filter_func_t)(struct uprobe_consumer *self, |
542 | return -EINTR; | 563 | enum uprobe_filter_ctx ctx, |
564 | struct mm_struct *mm); | ||
543 | 565 | ||
544 | utc = kzalloc(sizeof(struct uprobe_trace_consumer), GFP_KERNEL); | 566 | static int |
545 | if (!utc) | 567 | probe_event_enable(struct trace_uprobe *tu, int flag, filter_func_t filter) |
568 | { | ||
569 | int ret = 0; | ||
570 | |||
571 | if (is_trace_uprobe_enabled(tu)) | ||
546 | return -EINTR; | 572 | return -EINTR; |
547 | 573 | ||
548 | utc->cons.handler = uprobe_dispatcher; | 574 | WARN_ON(!uprobe_filter_is_empty(&tu->filter)); |
549 | utc->cons.filter = NULL; | ||
550 | ret = uprobe_register(tu->inode, tu->offset, &utc->cons); | ||
551 | if (ret) { | ||
552 | kfree(utc); | ||
553 | return ret; | ||
554 | } | ||
555 | 575 | ||
556 | tu->flags |= flag; | 576 | tu->flags |= flag; |
557 | utc->tu = tu; | 577 | tu->consumer.filter = filter; |
558 | tu->consumer = utc; | 578 | ret = uprobe_register(tu->inode, tu->offset, &tu->consumer); |
579 | if (ret) | ||
580 | tu->flags &= ~flag; | ||
559 | 581 | ||
560 | return 0; | 582 | return ret; |
561 | } | 583 | } |
562 | 584 | ||
563 | static void probe_event_disable(struct trace_uprobe *tu, int flag) | 585 | static void probe_event_disable(struct trace_uprobe *tu, int flag) |
564 | { | 586 | { |
565 | if (!tu->inode || !tu->consumer) | 587 | if (!is_trace_uprobe_enabled(tu)) |
566 | return; | 588 | return; |
567 | 589 | ||
568 | uprobe_unregister(tu->inode, tu->offset, &tu->consumer->cons); | 590 | WARN_ON(!uprobe_filter_is_empty(&tu->filter)); |
591 | |||
592 | uprobe_unregister(tu->inode, tu->offset, &tu->consumer); | ||
569 | tu->flags &= ~flag; | 593 | tu->flags &= ~flag; |
570 | kfree(tu->consumer); | ||
571 | tu->consumer = NULL; | ||
572 | } | 594 | } |
573 | 595 | ||
574 | static int uprobe_event_define_fields(struct ftrace_event_call *event_call) | 596 | static int uprobe_event_define_fields(struct ftrace_event_call *event_call) |
@@ -642,8 +664,96 @@ static int set_print_fmt(struct trace_uprobe *tu) | |||
642 | } | 664 | } |
643 | 665 | ||
644 | #ifdef CONFIG_PERF_EVENTS | 666 | #ifdef CONFIG_PERF_EVENTS |
667 | static bool | ||
668 | __uprobe_perf_filter(struct trace_uprobe_filter *filter, struct mm_struct *mm) | ||
669 | { | ||
670 | struct perf_event *event; | ||
671 | |||
672 | if (filter->nr_systemwide) | ||
673 | return true; | ||
674 | |||
675 | list_for_each_entry(event, &filter->perf_events, hw.tp_list) { | ||
676 | if (event->hw.tp_target->mm == mm) | ||
677 | return true; | ||
678 | } | ||
679 | |||
680 | return false; | ||
681 | } | ||
682 | |||
683 | static inline bool | ||
684 | uprobe_filter_event(struct trace_uprobe *tu, struct perf_event *event) | ||
685 | { | ||
686 | return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm); | ||
687 | } | ||
688 | |||
689 | static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event) | ||
690 | { | ||
691 | bool done; | ||
692 | |||
693 | write_lock(&tu->filter.rwlock); | ||
694 | if (event->hw.tp_target) { | ||
695 | /* | ||
696 | * event->parent != NULL means copy_process(), we can avoid | ||
697 | * uprobe_apply(). current->mm must be probed and we can rely | ||
698 | * on dup_mmap() which preserves the already installed bp's. | ||
699 | * | ||
700 | * attr.enable_on_exec means that exec/mmap will install the | ||
701 | * breakpoints we need. | ||
702 | */ | ||
703 | done = tu->filter.nr_systemwide || | ||
704 | event->parent || event->attr.enable_on_exec || | ||
705 | uprobe_filter_event(tu, event); | ||
706 | list_add(&event->hw.tp_list, &tu->filter.perf_events); | ||
707 | } else { | ||
708 | done = tu->filter.nr_systemwide; | ||
709 | tu->filter.nr_systemwide++; | ||
710 | } | ||
711 | write_unlock(&tu->filter.rwlock); | ||
712 | |||
713 | if (!done) | ||
714 | uprobe_apply(tu->inode, tu->offset, &tu->consumer, true); | ||
715 | |||
716 | return 0; | ||
717 | } | ||
718 | |||
719 | static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event) | ||
720 | { | ||
721 | bool done; | ||
722 | |||
723 | write_lock(&tu->filter.rwlock); | ||
724 | if (event->hw.tp_target) { | ||
725 | list_del(&event->hw.tp_list); | ||
726 | done = tu->filter.nr_systemwide || | ||
727 | (event->hw.tp_target->flags & PF_EXITING) || | ||
728 | uprobe_filter_event(tu, event); | ||
729 | } else { | ||
730 | tu->filter.nr_systemwide--; | ||
731 | done = tu->filter.nr_systemwide; | ||
732 | } | ||
733 | write_unlock(&tu->filter.rwlock); | ||
734 | |||
735 | if (!done) | ||
736 | uprobe_apply(tu->inode, tu->offset, &tu->consumer, false); | ||
737 | |||
738 | return 0; | ||
739 | } | ||
740 | |||
741 | static bool uprobe_perf_filter(struct uprobe_consumer *uc, | ||
742 | enum uprobe_filter_ctx ctx, struct mm_struct *mm) | ||
743 | { | ||
744 | struct trace_uprobe *tu; | ||
745 | int ret; | ||
746 | |||
747 | tu = container_of(uc, struct trace_uprobe, consumer); | ||
748 | read_lock(&tu->filter.rwlock); | ||
749 | ret = __uprobe_perf_filter(&tu->filter, mm); | ||
750 | read_unlock(&tu->filter.rwlock); | ||
751 | |||
752 | return ret; | ||
753 | } | ||
754 | |||
645 | /* uprobe profile handler */ | 755 | /* uprobe profile handler */ |
646 | static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) | 756 | static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) |
647 | { | 757 | { |
648 | struct ftrace_event_call *call = &tu->call; | 758 | struct ftrace_event_call *call = &tu->call; |
649 | struct uprobe_trace_entry_head *entry; | 759 | struct uprobe_trace_entry_head *entry; |
@@ -652,11 +762,14 @@ static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) | |||
652 | int size, __size, i; | 762 | int size, __size, i; |
653 | int rctx; | 763 | int rctx; |
654 | 764 | ||
765 | if (!uprobe_perf_filter(&tu->consumer, 0, current->mm)) | ||
766 | return UPROBE_HANDLER_REMOVE; | ||
767 | |||
655 | __size = sizeof(*entry) + tu->size; | 768 | __size = sizeof(*entry) + tu->size; |
656 | size = ALIGN(__size + sizeof(u32), sizeof(u64)); | 769 | size = ALIGN(__size + sizeof(u32), sizeof(u64)); |
657 | size -= sizeof(u32); | 770 | size -= sizeof(u32); |
658 | if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough")) | 771 | if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough")) |
659 | return; | 772 | return 0; |
660 | 773 | ||
661 | preempt_disable(); | 774 | preempt_disable(); |
662 | 775 | ||
@@ -664,7 +777,7 @@ static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) | |||
664 | if (!entry) | 777 | if (!entry) |
665 | goto out; | 778 | goto out; |
666 | 779 | ||
667 | entry->ip = uprobe_get_swbp_addr(task_pt_regs(current)); | 780 | entry->ip = instruction_pointer(task_pt_regs(current)); |
668 | data = (u8 *)&entry[1]; | 781 | data = (u8 *)&entry[1]; |
669 | for (i = 0; i < tu->nr_args; i++) | 782 | for (i = 0; i < tu->nr_args; i++) |
670 | call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); | 783 | call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); |
@@ -674,6 +787,7 @@ static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) | |||
674 | 787 | ||
675 | out: | 788 | out: |
676 | preempt_enable(); | 789 | preempt_enable(); |
790 | return 0; | ||
677 | } | 791 | } |
678 | #endif /* CONFIG_PERF_EVENTS */ | 792 | #endif /* CONFIG_PERF_EVENTS */ |
679 | 793 | ||
@@ -684,7 +798,7 @@ int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, | |||
684 | 798 | ||
685 | switch (type) { | 799 | switch (type) { |
686 | case TRACE_REG_REGISTER: | 800 | case TRACE_REG_REGISTER: |
687 | return probe_event_enable(tu, TP_FLAG_TRACE); | 801 | return probe_event_enable(tu, TP_FLAG_TRACE, NULL); |
688 | 802 | ||
689 | case TRACE_REG_UNREGISTER: | 803 | case TRACE_REG_UNREGISTER: |
690 | probe_event_disable(tu, TP_FLAG_TRACE); | 804 | probe_event_disable(tu, TP_FLAG_TRACE); |
@@ -692,11 +806,18 @@ int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, | |||
692 | 806 | ||
693 | #ifdef CONFIG_PERF_EVENTS | 807 | #ifdef CONFIG_PERF_EVENTS |
694 | case TRACE_REG_PERF_REGISTER: | 808 | case TRACE_REG_PERF_REGISTER: |
695 | return probe_event_enable(tu, TP_FLAG_PROFILE); | 809 | return probe_event_enable(tu, TP_FLAG_PROFILE, uprobe_perf_filter); |
696 | 810 | ||
697 | case TRACE_REG_PERF_UNREGISTER: | 811 | case TRACE_REG_PERF_UNREGISTER: |
698 | probe_event_disable(tu, TP_FLAG_PROFILE); | 812 | probe_event_disable(tu, TP_FLAG_PROFILE); |
699 | return 0; | 813 | return 0; |
814 | |||
815 | case TRACE_REG_PERF_OPEN: | ||
816 | return uprobe_perf_open(tu, data); | ||
817 | |||
818 | case TRACE_REG_PERF_CLOSE: | ||
819 | return uprobe_perf_close(tu, data); | ||
820 | |||
700 | #endif | 821 | #endif |
701 | default: | 822 | default: |
702 | return 0; | 823 | return 0; |
@@ -706,22 +827,20 @@ int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, | |||
706 | 827 | ||
707 | static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) | 828 | static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) |
708 | { | 829 | { |
709 | struct uprobe_trace_consumer *utc; | ||
710 | struct trace_uprobe *tu; | 830 | struct trace_uprobe *tu; |
831 | int ret = 0; | ||
711 | 832 | ||
712 | utc = container_of(con, struct uprobe_trace_consumer, cons); | 833 | tu = container_of(con, struct trace_uprobe, consumer); |
713 | tu = utc->tu; | 834 | tu->nhit++; |
714 | if (!tu || tu->consumer != utc) | ||
715 | return 0; | ||
716 | 835 | ||
717 | if (tu->flags & TP_FLAG_TRACE) | 836 | if (tu->flags & TP_FLAG_TRACE) |
718 | uprobe_trace_func(tu, regs); | 837 | ret |= uprobe_trace_func(tu, regs); |
719 | 838 | ||
720 | #ifdef CONFIG_PERF_EVENTS | 839 | #ifdef CONFIG_PERF_EVENTS |
721 | if (tu->flags & TP_FLAG_PROFILE) | 840 | if (tu->flags & TP_FLAG_PROFILE) |
722 | uprobe_perf_func(tu, regs); | 841 | ret |= uprobe_perf_func(tu, regs); |
723 | #endif | 842 | #endif |
724 | return 0; | 843 | return ret; |
725 | } | 844 | } |
726 | 845 | ||
727 | static struct trace_event_functions uprobe_funcs = { | 846 | static struct trace_event_functions uprobe_funcs = { |