aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
authorNamhyung Kim <namhyung.kim@lge.com>2013-07-03 03:40:28 -0400
committerSteven Rostedt <rostedt@goodmis.org>2014-01-02 16:17:44 -0500
commitdcad1a204f72624796ae83359403898d10393b9c (patch)
tree801d8e11c1e2ffef8c8de48980a01a08a5ddd194 /kernel/trace
parenta4734145a4771ffa0cd5ef283a5cfd03b30bedf3 (diff)
tracing/uprobes: Fetch args before reserving a ring buffer
Fetching from user space should be done in a non-atomic context. So use a per-cpu buffer and copy its content to the ring buffer atomically. Note that we can migrate during accessing user memory thus use a per-cpu mutex to protect concurrent accesses. This is needed since we'll be able to fetch args from an user memory which can be swapped out. Before that uprobes could fetch args from registers only which saved in a kernel space. While at it, use __get_data_size() and store_trace_args() to reduce code duplication. And add struct uprobe_cpu_buffer and its helpers as suggested by Oleg. Reviewed-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Acked-by: Oleg Nesterov <oleg@redhat.com> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Cc: zhangwei(Jovi) <jovi.zhangwei@huawei.com> Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/trace_uprobe.c146
1 files changed, 132 insertions, 14 deletions
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 8bfd29a8d713..794e8bc171f3 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -652,21 +652,117 @@ static const struct file_operations uprobe_profile_ops = {
652 .release = seq_release, 652 .release = seq_release,
653}; 653};
654 654
655struct uprobe_cpu_buffer {
656 struct mutex mutex;
657 void *buf;
658};
659static struct uprobe_cpu_buffer __percpu *uprobe_cpu_buffer;
660static int uprobe_buffer_refcnt;
661
662static int uprobe_buffer_init(void)
663{
664 int cpu, err_cpu;
665
666 uprobe_cpu_buffer = alloc_percpu(struct uprobe_cpu_buffer);
667 if (uprobe_cpu_buffer == NULL)
668 return -ENOMEM;
669
670 for_each_possible_cpu(cpu) {
671 struct page *p = alloc_pages_node(cpu_to_node(cpu),
672 GFP_KERNEL, 0);
673 if (p == NULL) {
674 err_cpu = cpu;
675 goto err;
676 }
677 per_cpu_ptr(uprobe_cpu_buffer, cpu)->buf = page_address(p);
678 mutex_init(&per_cpu_ptr(uprobe_cpu_buffer, cpu)->mutex);
679 }
680
681 return 0;
682
683err:
684 for_each_possible_cpu(cpu) {
685 if (cpu == err_cpu)
686 break;
687 free_page((unsigned long)per_cpu_ptr(uprobe_cpu_buffer, cpu)->buf);
688 }
689
690 free_percpu(uprobe_cpu_buffer);
691 return -ENOMEM;
692}
693
694static int uprobe_buffer_enable(void)
695{
696 int ret = 0;
697
698 BUG_ON(!mutex_is_locked(&event_mutex));
699
700 if (uprobe_buffer_refcnt++ == 0) {
701 ret = uprobe_buffer_init();
702 if (ret < 0)
703 uprobe_buffer_refcnt--;
704 }
705
706 return ret;
707}
708
709static void uprobe_buffer_disable(void)
710{
711 BUG_ON(!mutex_is_locked(&event_mutex));
712
713 if (--uprobe_buffer_refcnt == 0) {
714 free_percpu(uprobe_cpu_buffer);
715 uprobe_cpu_buffer = NULL;
716 }
717}
718
719static struct uprobe_cpu_buffer *uprobe_buffer_get(void)
720{
721 struct uprobe_cpu_buffer *ucb;
722 int cpu;
723
724 cpu = raw_smp_processor_id();
725 ucb = per_cpu_ptr(uprobe_cpu_buffer, cpu);
726
727 /*
728 * Use per-cpu buffers for fastest access, but we might migrate
729 * so the mutex makes sure we have sole access to it.
730 */
731 mutex_lock(&ucb->mutex);
732
733 return ucb;
734}
735
736static void uprobe_buffer_put(struct uprobe_cpu_buffer *ucb)
737{
738 mutex_unlock(&ucb->mutex);
739}
740
655static void uprobe_trace_print(struct trace_uprobe *tu, 741static void uprobe_trace_print(struct trace_uprobe *tu,
656 unsigned long func, struct pt_regs *regs) 742 unsigned long func, struct pt_regs *regs)
657{ 743{
658 struct uprobe_trace_entry_head *entry; 744 struct uprobe_trace_entry_head *entry;
659 struct ring_buffer_event *event; 745 struct ring_buffer_event *event;
660 struct ring_buffer *buffer; 746 struct ring_buffer *buffer;
747 struct uprobe_cpu_buffer *ucb;
661 void *data; 748 void *data;
662 int size, i; 749 int size, dsize, esize;
663 struct ftrace_event_call *call = &tu->tp.call; 750 struct ftrace_event_call *call = &tu->tp.call;
664 751
665 size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); 752 dsize = __get_data_size(&tu->tp, regs);
753 esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
754
755 if (WARN_ON_ONCE(!uprobe_cpu_buffer || tu->tp.size + dsize > PAGE_SIZE))
756 return;
757
758 ucb = uprobe_buffer_get();
759 store_trace_args(esize, &tu->tp, regs, ucb->buf, dsize);
760
761 size = esize + tu->tp.size + dsize;
666 event = trace_current_buffer_lock_reserve(&buffer, call->event.type, 762 event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
667 size + tu->tp.size, 0, 0); 763 size, 0, 0);
668 if (!event) 764 if (!event)
669 return; 765 goto out;
670 766
671 entry = ring_buffer_event_data(event); 767 entry = ring_buffer_event_data(event);
672 if (is_ret_probe(tu)) { 768 if (is_ret_probe(tu)) {
@@ -678,13 +774,13 @@ static void uprobe_trace_print(struct trace_uprobe *tu,
678 data = DATAOF_TRACE_ENTRY(entry, false); 774 data = DATAOF_TRACE_ENTRY(entry, false);
679 } 775 }
680 776
681 for (i = 0; i < tu->tp.nr_args; i++) { 777 memcpy(data, ucb->buf, tu->tp.size + dsize);
682 call_fetch(&tu->tp.args[i].fetch, regs,
683 data + tu->tp.args[i].offset);
684 }
685 778
686 if (!call_filter_check_discard(call, entry, buffer, event)) 779 if (!call_filter_check_discard(call, entry, buffer, event))
687 trace_buffer_unlock_commit(buffer, event, 0, 0); 780 trace_buffer_unlock_commit(buffer, event, 0, 0);
781
782out:
783 uprobe_buffer_put(ucb);
688} 784}
689 785
690/* uprobe handler */ 786/* uprobe handler */
@@ -752,6 +848,10 @@ probe_event_enable(struct trace_uprobe *tu, int flag, filter_func_t filter)
752 if (trace_probe_is_enabled(&tu->tp)) 848 if (trace_probe_is_enabled(&tu->tp))
753 return -EINTR; 849 return -EINTR;
754 850
851 ret = uprobe_buffer_enable();
852 if (ret < 0)
853 return ret;
854
755 WARN_ON(!uprobe_filter_is_empty(&tu->filter)); 855 WARN_ON(!uprobe_filter_is_empty(&tu->filter));
756 856
757 tu->tp.flags |= flag; 857 tu->tp.flags |= flag;
@@ -772,6 +872,8 @@ static void probe_event_disable(struct trace_uprobe *tu, int flag)
772 872
773 uprobe_unregister(tu->inode, tu->offset, &tu->consumer); 873 uprobe_unregister(tu->inode, tu->offset, &tu->consumer);
774 tu->tp.flags &= ~flag; 874 tu->tp.flags &= ~flag;
875
876 uprobe_buffer_disable();
775} 877}
776 878
777static int uprobe_event_define_fields(struct ftrace_event_call *event_call) 879static int uprobe_event_define_fields(struct ftrace_event_call *event_call)
@@ -898,11 +1000,24 @@ static void uprobe_perf_print(struct trace_uprobe *tu,
898 struct ftrace_event_call *call = &tu->tp.call; 1000 struct ftrace_event_call *call = &tu->tp.call;
899 struct uprobe_trace_entry_head *entry; 1001 struct uprobe_trace_entry_head *entry;
900 struct hlist_head *head; 1002 struct hlist_head *head;
1003 struct uprobe_cpu_buffer *ucb;
901 void *data; 1004 void *data;
902 int size, rctx, i; 1005 int size, dsize, esize;
1006 int rctx;
1007
1008 dsize = __get_data_size(&tu->tp, regs);
1009 esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
903 1010
904 size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); 1011 if (WARN_ON_ONCE(!uprobe_cpu_buffer))
905 size = ALIGN(size + tu->tp.size + sizeof(u32), sizeof(u64)) - sizeof(u32); 1012 return;
1013
1014 size = esize + tu->tp.size + dsize;
1015 size = ALIGN(size + sizeof(u32), sizeof(u64)) - sizeof(u32);
1016 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough"))
1017 return;
1018
1019 ucb = uprobe_buffer_get();
1020 store_trace_args(esize, &tu->tp, regs, ucb->buf, dsize);
906 1021
907 preempt_disable(); 1022 preempt_disable();
908 head = this_cpu_ptr(call->perf_events); 1023 head = this_cpu_ptr(call->perf_events);
@@ -922,15 +1037,18 @@ static void uprobe_perf_print(struct trace_uprobe *tu,
922 data = DATAOF_TRACE_ENTRY(entry, false); 1037 data = DATAOF_TRACE_ENTRY(entry, false);
923 } 1038 }
924 1039
925 for (i = 0; i < tu->tp.nr_args; i++) { 1040 memcpy(data, ucb->buf, tu->tp.size + dsize);
926 struct probe_arg *parg = &tu->tp.args[i]; 1041
1042 if (size - esize > tu->tp.size + dsize) {
1043 int len = tu->tp.size + dsize;
927 1044
928 call_fetch(&parg->fetch, regs, data + parg->offset); 1045 memset(data + len, 0, size - esize - len);
929 } 1046 }
930 1047
931 perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL); 1048 perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL);
932 out: 1049 out:
933 preempt_enable(); 1050 preempt_enable();
1051 uprobe_buffer_put(ucb);
934} 1052}
935 1053
936/* uprobe profile handler */ 1054/* uprobe profile handler */