aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2014-06-05 05:18:03 -0400
committerIngo Molnar <mingo@kernel.org>2014-06-05 06:26:27 -0400
commitc56d34064b6eb9f9cde9e35bbfe16eedf3d81f94 (patch)
tree9877ef9e1b238e14a1878f10d51ea55fbca5f619 /kernel
parentb13fa91421213a8d1fd05086050f05e994f3b72d (diff)
parenta03b1e1c372b60183b8141cdd161316429fab5ac (diff)
Merge branch 'perf/uprobes' into perf/core
These bits from Oleg are fully cooked, ship them to Linus. Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/events/uprobes.c35
-rw-r--r--kernel/trace/trace_uprobe.c46
2 files changed, 47 insertions, 34 deletions
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index d1edc5e6fd03..3b02c72938a8 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -127,7 +127,7 @@ struct xol_area {
127 */ 127 */
128static bool valid_vma(struct vm_area_struct *vma, bool is_register) 128static bool valid_vma(struct vm_area_struct *vma, bool is_register)
129{ 129{
130 vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_SHARED; 130 vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_MAYSHARE;
131 131
132 if (is_register) 132 if (is_register)
133 flags |= VM_WRITE; 133 flags |= VM_WRITE;
@@ -279,18 +279,13 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t
279 * supported by that architecture then we need to modify is_trap_at_addr and 279 * supported by that architecture then we need to modify is_trap_at_addr and
280 * uprobe_write_opcode accordingly. This would never be a problem for archs 280 * uprobe_write_opcode accordingly. This would never be a problem for archs
281 * that have fixed length instructions. 281 * that have fixed length instructions.
282 */ 282 *
283
284/*
285 * uprobe_write_opcode - write the opcode at a given virtual address. 283 * uprobe_write_opcode - write the opcode at a given virtual address.
286 * @mm: the probed process address space. 284 * @mm: the probed process address space.
287 * @vaddr: the virtual address to store the opcode. 285 * @vaddr: the virtual address to store the opcode.
288 * @opcode: opcode to be written at @vaddr. 286 * @opcode: opcode to be written at @vaddr.
289 * 287 *
290 * Called with mm->mmap_sem held (for read and with a reference to 288 * Called with mm->mmap_sem held for write.
291 * mm).
292 *
293 * For mm @mm, write the opcode at @vaddr.
294 * Return 0 (success) or a negative errno. 289 * Return 0 (success) or a negative errno.
295 */ 290 */
296int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, 291int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr,
@@ -310,21 +305,25 @@ retry:
310 if (ret <= 0) 305 if (ret <= 0)
311 goto put_old; 306 goto put_old;
312 307
308 ret = anon_vma_prepare(vma);
309 if (ret)
310 goto put_old;
311
313 ret = -ENOMEM; 312 ret = -ENOMEM;
314 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr); 313 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr);
315 if (!new_page) 314 if (!new_page)
316 goto put_old; 315 goto put_old;
317 316
318 __SetPageUptodate(new_page); 317 if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))
318 goto put_new;
319 319
320 __SetPageUptodate(new_page);
320 copy_highpage(new_page, old_page); 321 copy_highpage(new_page, old_page);
321 copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); 322 copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
322 323
323 ret = anon_vma_prepare(vma);
324 if (ret)
325 goto put_new;
326
327 ret = __replace_page(vma, vaddr, old_page, new_page); 324 ret = __replace_page(vma, vaddr, old_page, new_page);
325 if (ret)
326 mem_cgroup_uncharge_page(new_page);
328 327
329put_new: 328put_new:
330 page_cache_release(new_page); 329 page_cache_release(new_page);
@@ -1352,6 +1351,16 @@ unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs)
1352 return instruction_pointer(regs) - UPROBE_SWBP_INSN_SIZE; 1351 return instruction_pointer(regs) - UPROBE_SWBP_INSN_SIZE;
1353} 1352}
1354 1353
1354unsigned long uprobe_get_trap_addr(struct pt_regs *regs)
1355{
1356 struct uprobe_task *utask = current->utask;
1357
1358 if (unlikely(utask && utask->active_uprobe))
1359 return utask->vaddr;
1360
1361 return instruction_pointer(regs);
1362}
1363
1355/* 1364/*
1356 * Called with no locks held. 1365 * Called with no locks held.
1357 * Called in context of a exiting or a exec-ing thread. 1366 * Called in context of a exiting or a exec-ing thread.
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index c082a7441345..5a7f1a6b3b8b 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -1009,56 +1009,60 @@ uprobe_filter_event(struct trace_uprobe *tu, struct perf_event *event)
1009 return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm); 1009 return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm);
1010} 1010}
1011 1011
1012static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event) 1012static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event)
1013{ 1013{
1014 bool done; 1014 bool done;
1015 1015
1016 write_lock(&tu->filter.rwlock); 1016 write_lock(&tu->filter.rwlock);
1017 if (event->hw.tp_target) { 1017 if (event->hw.tp_target) {
1018 /* 1018 list_del(&event->hw.tp_list);
1019 * event->parent != NULL means copy_process(), we can avoid
1020 * uprobe_apply(). current->mm must be probed and we can rely
1021 * on dup_mmap() which preserves the already installed bp's.
1022 *
1023 * attr.enable_on_exec means that exec/mmap will install the
1024 * breakpoints we need.
1025 */
1026 done = tu->filter.nr_systemwide || 1019 done = tu->filter.nr_systemwide ||
1027 event->parent || event->attr.enable_on_exec || 1020 (event->hw.tp_target->flags & PF_EXITING) ||
1028 uprobe_filter_event(tu, event); 1021 uprobe_filter_event(tu, event);
1029 list_add(&event->hw.tp_list, &tu->filter.perf_events);
1030 } else { 1022 } else {
1023 tu->filter.nr_systemwide--;
1031 done = tu->filter.nr_systemwide; 1024 done = tu->filter.nr_systemwide;
1032 tu->filter.nr_systemwide++;
1033 } 1025 }
1034 write_unlock(&tu->filter.rwlock); 1026 write_unlock(&tu->filter.rwlock);
1035 1027
1036 if (!done) 1028 if (!done)
1037 uprobe_apply(tu->inode, tu->offset, &tu->consumer, true); 1029 return uprobe_apply(tu->inode, tu->offset, &tu->consumer, false);
1038 1030
1039 return 0; 1031 return 0;
1040} 1032}
1041 1033
1042static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event) 1034static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event)
1043{ 1035{
1044 bool done; 1036 bool done;
1037 int err;
1045 1038
1046 write_lock(&tu->filter.rwlock); 1039 write_lock(&tu->filter.rwlock);
1047 if (event->hw.tp_target) { 1040 if (event->hw.tp_target) {
1048 list_del(&event->hw.tp_list); 1041 /*
1042 * event->parent != NULL means copy_process(), we can avoid
1043 * uprobe_apply(). current->mm must be probed and we can rely
1044 * on dup_mmap() which preserves the already installed bp's.
1045 *
1046 * attr.enable_on_exec means that exec/mmap will install the
1047 * breakpoints we need.
1048 */
1049 done = tu->filter.nr_systemwide || 1049 done = tu->filter.nr_systemwide ||
1050 (event->hw.tp_target->flags & PF_EXITING) || 1050 event->parent || event->attr.enable_on_exec ||
1051 uprobe_filter_event(tu, event); 1051 uprobe_filter_event(tu, event);
1052 list_add(&event->hw.tp_list, &tu->filter.perf_events);
1052 } else { 1053 } else {
1053 tu->filter.nr_systemwide--;
1054 done = tu->filter.nr_systemwide; 1054 done = tu->filter.nr_systemwide;
1055 tu->filter.nr_systemwide++;
1055 } 1056 }
1056 write_unlock(&tu->filter.rwlock); 1057 write_unlock(&tu->filter.rwlock);
1057 1058
1058 if (!done) 1059 err = 0;
1059 uprobe_apply(tu->inode, tu->offset, &tu->consumer, false); 1060 if (!done) {
1060 1061 err = uprobe_apply(tu->inode, tu->offset, &tu->consumer, true);
1061 return 0; 1062 if (err)
1063 uprobe_perf_close(tu, event);
1064 }
1065 return err;
1062} 1066}
1063 1067
1064static bool uprobe_perf_filter(struct uprobe_consumer *uc, 1068static bool uprobe_perf_filter(struct uprobe_consumer *uc,