diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-04-30 10:41:01 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-04-30 10:41:01 -0400 |
| commit | e0972916e8fe943f342b0dd1c9d43dbf5bc261c2 (patch) | |
| tree | 690c436f1f9b839c4ba34d17ab3efa63b97a2dce /kernel | |
| parent | 1f889ec62c3f0d8913f3c32f9aff2a1e15099346 (diff) | |
| parent | 5ac2b5c2721501a8f5c5e1cd4116cbc31ace6886 (diff) | |
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar:
"Features:
- Add "uretprobes" - an optimization to uprobes, like kretprobes are
an optimization to kprobes. "perf probe -x file sym%return" now
works like kretprobes. By Oleg Nesterov.
- Introduce per core aggregation in 'perf stat', from Stephane
Eranian.
- Add memory profiling via PEBS, from Stephane Eranian.
- Event group view for 'annotate' in --stdio, --tui and --gtk, from
Namhyung Kim.
- Add support for AMD NB and L2I "uncore" counters, by Jacob Shin.
- Add Ivy Bridge-EP uncore support, by Zheng Yan
- IBM zEnterprise EC12 oprofile support patchlet from Robert Richter.
- Add perf test entries for checking breakpoint overflow signal
handler issues, from Jiri Olsa.
- Add perf test entry for for checking number of EXIT events, from
Namhyung Kim.
- Add perf test entries for checking --cpu in record and stat, from
Jiri Olsa.
- Introduce perf stat --repeat forever, from Frederik Deweerdt.
- Add --no-demangle to report/top, from Namhyung Kim.
- PowerPC fixes plus a couple of cleanups/optimizations in uprobes
and trace_uprobes, by Oleg Nesterov.
Various fixes and refactorings:
- Fix dependency of the python binding wrt libtraceevent, from
Naohiro Aota.
- Simplify some perf_evlist methods and to allow 'stat' to share code
with 'record' and 'trace', by Arnaldo Carvalho de Melo.
- Remove dead code in related to libtraceevent integration, from
Namhyung Kim.
- Revert "perf sched: Handle PERF_RECORD_EXIT events" to get 'perf
sched lat' back working, by Arnaldo Carvalho de Melo
- We don't use Newt anymore, just plain libslang, by Arnaldo Carvalho
de Melo.
- Kill a bunch of die() calls, from Namhyung Kim.
- Fix build on non-glibc systems due to libio.h absence, from Cody P
Schafer.
- Remove some perf_session and tracing dead code, from David Ahern.
- Honor parallel jobs, fix from Borislav Petkov
- Introduce tools/lib/lk library, initially just removing duplication
among tools/perf and tools/vm. from Borislav Petkov
... and many more I missed to list, see the shortlog and git log for
more details."
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (136 commits)
perf/x86/intel/P4: Robistify P4 PMU types
perf/x86/amd: Fix AMD NB and L2I "uncore" support
perf/x86/amd: Remove old-style NB counter support from perf_event_amd.c
perf/x86: Check all MSRs before passing hw check
perf/x86/amd: Add support for AMD NB and L2I "uncore" counters
perf/x86/intel: Add Ivy Bridge-EP uncore support
perf/x86/intel: Fix SNB-EP CBO and PCU uncore PMU filter management
perf/x86: Avoid kfree() in CPU_{STARTING,DYING}
uprobes/perf: Avoid perf_trace_buf_prepare/submit if ->perf_events is empty
uprobes/tracing: Don't pass addr=ip to perf_trace_buf_submit()
uprobes/tracing: Change create_trace_uprobe() to support uretprobes
uprobes/tracing: Make seq_printf() code uretprobe-friendly
uprobes/tracing: Make register_uprobe_event() paths uretprobe-friendly
uprobes/tracing: Make uprobe_{trace,perf}_print() uretprobe-friendly
uprobes/tracing: Introduce is_ret_probe() and uretprobe_dispatcher()
uprobes/tracing: Introduce uprobe_{trace,perf}_print() helpers
uprobes/tracing: Generalize struct uprobe_trace_entry_head
uprobes/tracing: Kill the pointless local_save_flags/preempt_count calls
uprobes/tracing: Kill the pointless seq_print_ip_sym() call
uprobes/tracing: Kill the pointless task_pt_regs() calls
...
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/events/core.c | 30 | ||||
| -rw-r--r-- | kernel/events/uprobes.c | 300 | ||||
| -rw-r--r-- | kernel/trace/trace.h | 5 | ||||
| -rw-r--r-- | kernel/trace/trace_uprobe.c | 203 | ||||
| -rw-r--r-- | kernel/watchdog.c | 5 |
5 files changed, 432 insertions, 111 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index dce6e13cf9d7..3820e3cefbae 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
| @@ -37,6 +37,7 @@ | |||
| 37 | #include <linux/ftrace_event.h> | 37 | #include <linux/ftrace_event.h> |
| 38 | #include <linux/hw_breakpoint.h> | 38 | #include <linux/hw_breakpoint.h> |
| 39 | #include <linux/mm_types.h> | 39 | #include <linux/mm_types.h> |
| 40 | #include <linux/cgroup.h> | ||
| 40 | 41 | ||
| 41 | #include "internal.h" | 42 | #include "internal.h" |
| 42 | 43 | ||
| @@ -234,6 +235,20 @@ static void perf_ctx_unlock(struct perf_cpu_context *cpuctx, | |||
| 234 | #ifdef CONFIG_CGROUP_PERF | 235 | #ifdef CONFIG_CGROUP_PERF |
| 235 | 236 | ||
| 236 | /* | 237 | /* |
| 238 | * perf_cgroup_info keeps track of time_enabled for a cgroup. | ||
| 239 | * This is a per-cpu dynamically allocated data structure. | ||
| 240 | */ | ||
| 241 | struct perf_cgroup_info { | ||
| 242 | u64 time; | ||
| 243 | u64 timestamp; | ||
| 244 | }; | ||
| 245 | |||
| 246 | struct perf_cgroup { | ||
| 247 | struct cgroup_subsys_state css; | ||
| 248 | struct perf_cgroup_info __percpu *info; | ||
| 249 | }; | ||
| 250 | |||
| 251 | /* | ||
| 237 | * Must ensure cgroup is pinned (css_get) before calling | 252 | * Must ensure cgroup is pinned (css_get) before calling |
| 238 | * this function. In other words, we cannot call this function | 253 | * this function. In other words, we cannot call this function |
| 239 | * if there is no cgroup event for the current CPU context. | 254 | * if there is no cgroup event for the current CPU context. |
| @@ -976,9 +991,15 @@ static void perf_event__header_size(struct perf_event *event) | |||
| 976 | if (sample_type & PERF_SAMPLE_PERIOD) | 991 | if (sample_type & PERF_SAMPLE_PERIOD) |
| 977 | size += sizeof(data->period); | 992 | size += sizeof(data->period); |
| 978 | 993 | ||
| 994 | if (sample_type & PERF_SAMPLE_WEIGHT) | ||
| 995 | size += sizeof(data->weight); | ||
| 996 | |||
| 979 | if (sample_type & PERF_SAMPLE_READ) | 997 | if (sample_type & PERF_SAMPLE_READ) |
| 980 | size += event->read_size; | 998 | size += event->read_size; |
| 981 | 999 | ||
| 1000 | if (sample_type & PERF_SAMPLE_DATA_SRC) | ||
| 1001 | size += sizeof(data->data_src.val); | ||
| 1002 | |||
| 982 | event->header_size = size; | 1003 | event->header_size = size; |
| 983 | } | 1004 | } |
| 984 | 1005 | ||
| @@ -4193,6 +4214,12 @@ void perf_output_sample(struct perf_output_handle *handle, | |||
| 4193 | perf_output_sample_ustack(handle, | 4214 | perf_output_sample_ustack(handle, |
| 4194 | data->stack_user_size, | 4215 | data->stack_user_size, |
| 4195 | data->regs_user.regs); | 4216 | data->regs_user.regs); |
| 4217 | |||
| 4218 | if (sample_type & PERF_SAMPLE_WEIGHT) | ||
| 4219 | perf_output_put(handle, data->weight); | ||
| 4220 | |||
| 4221 | if (sample_type & PERF_SAMPLE_DATA_SRC) | ||
| 4222 | perf_output_put(handle, data->data_src.val); | ||
| 4196 | } | 4223 | } |
| 4197 | 4224 | ||
| 4198 | void perf_prepare_sample(struct perf_event_header *header, | 4225 | void perf_prepare_sample(struct perf_event_header *header, |
| @@ -4782,6 +4809,9 @@ got_name: | |||
| 4782 | mmap_event->file_name = name; | 4809 | mmap_event->file_name = name; |
| 4783 | mmap_event->file_size = size; | 4810 | mmap_event->file_size = size; |
| 4784 | 4811 | ||
| 4812 | if (!(vma->vm_flags & VM_EXEC)) | ||
| 4813 | mmap_event->event_id.header.misc |= PERF_RECORD_MISC_MMAP_DATA; | ||
| 4814 | |||
| 4785 | mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; | 4815 | mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; |
| 4786 | 4816 | ||
| 4787 | rcu_read_lock(); | 4817 | rcu_read_lock(); |
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index a567c8c7ef31..f3569747d629 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c | |||
| @@ -75,6 +75,15 @@ struct uprobe { | |||
| 75 | struct arch_uprobe arch; | 75 | struct arch_uprobe arch; |
| 76 | }; | 76 | }; |
| 77 | 77 | ||
| 78 | struct return_instance { | ||
| 79 | struct uprobe *uprobe; | ||
| 80 | unsigned long func; | ||
| 81 | unsigned long orig_ret_vaddr; /* original return address */ | ||
| 82 | bool chained; /* true, if instance is nested */ | ||
| 83 | |||
| 84 | struct return_instance *next; /* keep as stack */ | ||
| 85 | }; | ||
| 86 | |||
| 78 | /* | 87 | /* |
| 79 | * valid_vma: Verify if the specified vma is an executable vma | 88 | * valid_vma: Verify if the specified vma is an executable vma |
| 80 | * Relax restrictions while unregistering: vm_flags might have | 89 | * Relax restrictions while unregistering: vm_flags might have |
| @@ -173,10 +182,31 @@ bool __weak is_swbp_insn(uprobe_opcode_t *insn) | |||
| 173 | return *insn == UPROBE_SWBP_INSN; | 182 | return *insn == UPROBE_SWBP_INSN; |
| 174 | } | 183 | } |
| 175 | 184 | ||
| 176 | static void copy_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t *opcode) | 185 | /** |
| 186 | * is_trap_insn - check if instruction is breakpoint instruction. | ||
| 187 | * @insn: instruction to be checked. | ||
| 188 | * Default implementation of is_trap_insn | ||
| 189 | * Returns true if @insn is a breakpoint instruction. | ||
| 190 | * | ||
| 191 | * This function is needed for the case where an architecture has multiple | ||
| 192 | * trap instructions (like powerpc). | ||
| 193 | */ | ||
| 194 | bool __weak is_trap_insn(uprobe_opcode_t *insn) | ||
| 195 | { | ||
| 196 | return is_swbp_insn(insn); | ||
| 197 | } | ||
| 198 | |||
| 199 | static void copy_from_page(struct page *page, unsigned long vaddr, void *dst, int len) | ||
| 177 | { | 200 | { |
| 178 | void *kaddr = kmap_atomic(page); | 201 | void *kaddr = kmap_atomic(page); |
| 179 | memcpy(opcode, kaddr + (vaddr & ~PAGE_MASK), UPROBE_SWBP_INSN_SIZE); | 202 | memcpy(dst, kaddr + (vaddr & ~PAGE_MASK), len); |
| 203 | kunmap_atomic(kaddr); | ||
| 204 | } | ||
| 205 | |||
| 206 | static void copy_to_page(struct page *page, unsigned long vaddr, const void *src, int len) | ||
| 207 | { | ||
| 208 | void *kaddr = kmap_atomic(page); | ||
| 209 | memcpy(kaddr + (vaddr & ~PAGE_MASK), src, len); | ||
| 180 | kunmap_atomic(kaddr); | 210 | kunmap_atomic(kaddr); |
| 181 | } | 211 | } |
| 182 | 212 | ||
| @@ -185,7 +215,16 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t | |||
| 185 | uprobe_opcode_t old_opcode; | 215 | uprobe_opcode_t old_opcode; |
| 186 | bool is_swbp; | 216 | bool is_swbp; |
| 187 | 217 | ||
| 188 | copy_opcode(page, vaddr, &old_opcode); | 218 | /* |
| 219 | * Note: We only check if the old_opcode is UPROBE_SWBP_INSN here. | ||
| 220 | * We do not check if it is any other 'trap variant' which could | ||
| 221 | * be conditional trap instruction such as the one powerpc supports. | ||
| 222 | * | ||
| 223 | * The logic is that we do not care if the underlying instruction | ||
| 224 | * is a trap variant; uprobes always wins over any other (gdb) | ||
| 225 | * breakpoint. | ||
| 226 | */ | ||
| 227 | copy_from_page(page, vaddr, &old_opcode, UPROBE_SWBP_INSN_SIZE); | ||
| 189 | is_swbp = is_swbp_insn(&old_opcode); | 228 | is_swbp = is_swbp_insn(&old_opcode); |
| 190 | 229 | ||
| 191 | if (is_swbp_insn(new_opcode)) { | 230 | if (is_swbp_insn(new_opcode)) { |
| @@ -204,7 +243,7 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t | |||
| 204 | * Expect the breakpoint instruction to be the smallest size instruction for | 243 | * Expect the breakpoint instruction to be the smallest size instruction for |
| 205 | * the architecture. If an arch has variable length instruction and the | 244 | * the architecture. If an arch has variable length instruction and the |
| 206 | * breakpoint instruction is not of the smallest length instruction | 245 | * breakpoint instruction is not of the smallest length instruction |
| 207 | * supported by that architecture then we need to modify is_swbp_at_addr and | 246 | * supported by that architecture then we need to modify is_trap_at_addr and |
| 208 | * write_opcode accordingly. This would never be a problem for archs that | 247 | * write_opcode accordingly. This would never be a problem for archs that |
| 209 | * have fixed length instructions. | 248 | * have fixed length instructions. |
| 210 | */ | 249 | */ |
| @@ -225,7 +264,6 @@ static int write_opcode(struct mm_struct *mm, unsigned long vaddr, | |||
| 225 | uprobe_opcode_t opcode) | 264 | uprobe_opcode_t opcode) |
| 226 | { | 265 | { |
| 227 | struct page *old_page, *new_page; | 266 | struct page *old_page, *new_page; |
| 228 | void *vaddr_old, *vaddr_new; | ||
| 229 | struct vm_area_struct *vma; | 267 | struct vm_area_struct *vma; |
| 230 | int ret; | 268 | int ret; |
| 231 | 269 | ||
| @@ -246,15 +284,8 @@ retry: | |||
| 246 | 284 | ||
| 247 | __SetPageUptodate(new_page); | 285 | __SetPageUptodate(new_page); |
| 248 | 286 | ||
| 249 | /* copy the page now that we've got it stable */ | 287 | copy_highpage(new_page, old_page); |
| 250 | vaddr_old = kmap_atomic(old_page); | 288 | copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); |
| 251 | vaddr_new = kmap_atomic(new_page); | ||
| 252 | |||
| 253 | memcpy(vaddr_new, vaddr_old, PAGE_SIZE); | ||
| 254 | memcpy(vaddr_new + (vaddr & ~PAGE_MASK), &opcode, UPROBE_SWBP_INSN_SIZE); | ||
| 255 | |||
| 256 | kunmap_atomic(vaddr_new); | ||
| 257 | kunmap_atomic(vaddr_old); | ||
| 258 | 289 | ||
| 259 | ret = anon_vma_prepare(vma); | 290 | ret = anon_vma_prepare(vma); |
| 260 | if (ret) | 291 | if (ret) |
| @@ -477,30 +508,18 @@ __copy_insn(struct address_space *mapping, struct file *filp, char *insn, | |||
| 477 | unsigned long nbytes, loff_t offset) | 508 | unsigned long nbytes, loff_t offset) |
| 478 | { | 509 | { |
| 479 | struct page *page; | 510 | struct page *page; |
| 480 | void *vaddr; | ||
| 481 | unsigned long off; | ||
| 482 | pgoff_t idx; | ||
| 483 | |||
| 484 | if (!filp) | ||
| 485 | return -EINVAL; | ||
| 486 | 511 | ||
| 487 | if (!mapping->a_ops->readpage) | 512 | if (!mapping->a_ops->readpage) |
| 488 | return -EIO; | 513 | return -EIO; |
| 489 | |||
| 490 | idx = offset >> PAGE_CACHE_SHIFT; | ||
| 491 | off = offset & ~PAGE_MASK; | ||
| 492 | |||
| 493 | /* | 514 | /* |
| 494 | * Ensure that the page that has the original instruction is | 515 | * Ensure that the page that has the original instruction is |
| 495 | * populated and in page-cache. | 516 | * populated and in page-cache. |
| 496 | */ | 517 | */ |
| 497 | page = read_mapping_page(mapping, idx, filp); | 518 | page = read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT, filp); |
| 498 | if (IS_ERR(page)) | 519 | if (IS_ERR(page)) |
| 499 | return PTR_ERR(page); | 520 | return PTR_ERR(page); |
| 500 | 521 | ||
| 501 | vaddr = kmap_atomic(page); | 522 | copy_from_page(page, offset, insn, nbytes); |
| 502 | memcpy(insn, vaddr + off, nbytes); | ||
| 503 | kunmap_atomic(vaddr); | ||
| 504 | page_cache_release(page); | 523 | page_cache_release(page); |
| 505 | 524 | ||
| 506 | return 0; | 525 | return 0; |
| @@ -550,7 +569,7 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file, | |||
| 550 | goto out; | 569 | goto out; |
| 551 | 570 | ||
| 552 | ret = -ENOTSUPP; | 571 | ret = -ENOTSUPP; |
| 553 | if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn)) | 572 | if (is_trap_insn((uprobe_opcode_t *)uprobe->arch.insn)) |
| 554 | goto out; | 573 | goto out; |
| 555 | 574 | ||
| 556 | ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr); | 575 | ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr); |
| @@ -758,7 +777,7 @@ register_for_each_vma(struct uprobe *uprobe, struct uprobe_consumer *new) | |||
| 758 | down_write(&mm->mmap_sem); | 777 | down_write(&mm->mmap_sem); |
| 759 | vma = find_vma(mm, info->vaddr); | 778 | vma = find_vma(mm, info->vaddr); |
| 760 | if (!vma || !valid_vma(vma, is_register) || | 779 | if (!vma || !valid_vma(vma, is_register) || |
| 761 | vma->vm_file->f_mapping->host != uprobe->inode) | 780 | file_inode(vma->vm_file) != uprobe->inode) |
| 762 | goto unlock; | 781 | goto unlock; |
| 763 | 782 | ||
| 764 | if (vma->vm_start > info->vaddr || | 783 | if (vma->vm_start > info->vaddr || |
| @@ -828,6 +847,10 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer * | |||
| 828 | struct uprobe *uprobe; | 847 | struct uprobe *uprobe; |
| 829 | int ret; | 848 | int ret; |
| 830 | 849 | ||
| 850 | /* Uprobe must have at least one set consumer */ | ||
| 851 | if (!uc->handler && !uc->ret_handler) | ||
| 852 | return -EINVAL; | ||
| 853 | |||
| 831 | /* Racy, just to catch the obvious mistakes */ | 854 | /* Racy, just to catch the obvious mistakes */ |
| 832 | if (offset > i_size_read(inode)) | 855 | if (offset > i_size_read(inode)) |
| 833 | return -EINVAL; | 856 | return -EINVAL; |
| @@ -917,7 +940,7 @@ static int unapply_uprobe(struct uprobe *uprobe, struct mm_struct *mm) | |||
| 917 | loff_t offset; | 940 | loff_t offset; |
| 918 | 941 | ||
| 919 | if (!valid_vma(vma, false) || | 942 | if (!valid_vma(vma, false) || |
| 920 | vma->vm_file->f_mapping->host != uprobe->inode) | 943 | file_inode(vma->vm_file) != uprobe->inode) |
| 921 | continue; | 944 | continue; |
| 922 | 945 | ||
| 923 | offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT; | 946 | offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT; |
| @@ -1010,7 +1033,7 @@ int uprobe_mmap(struct vm_area_struct *vma) | |||
| 1010 | if (no_uprobe_events() || !valid_vma(vma, true)) | 1033 | if (no_uprobe_events() || !valid_vma(vma, true)) |
| 1011 | return 0; | 1034 | return 0; |
| 1012 | 1035 | ||
| 1013 | inode = vma->vm_file->f_mapping->host; | 1036 | inode = file_inode(vma->vm_file); |
| 1014 | if (!inode) | 1037 | if (!inode) |
| 1015 | return 0; | 1038 | return 0; |
| 1016 | 1039 | ||
| @@ -1041,7 +1064,7 @@ vma_has_uprobes(struct vm_area_struct *vma, unsigned long start, unsigned long e | |||
| 1041 | struct inode *inode; | 1064 | struct inode *inode; |
| 1042 | struct rb_node *n; | 1065 | struct rb_node *n; |
| 1043 | 1066 | ||
| 1044 | inode = vma->vm_file->f_mapping->host; | 1067 | inode = file_inode(vma->vm_file); |
| 1045 | 1068 | ||
| 1046 | min = vaddr_to_offset(vma, start); | 1069 | min = vaddr_to_offset(vma, start); |
| 1047 | max = min + (end - start) - 1; | 1070 | max = min + (end - start) - 1; |
| @@ -1114,6 +1137,7 @@ static struct xol_area *get_xol_area(void) | |||
| 1114 | { | 1137 | { |
| 1115 | struct mm_struct *mm = current->mm; | 1138 | struct mm_struct *mm = current->mm; |
| 1116 | struct xol_area *area; | 1139 | struct xol_area *area; |
| 1140 | uprobe_opcode_t insn = UPROBE_SWBP_INSN; | ||
| 1117 | 1141 | ||
| 1118 | area = mm->uprobes_state.xol_area; | 1142 | area = mm->uprobes_state.xol_area; |
| 1119 | if (area) | 1143 | if (area) |
| @@ -1131,7 +1155,12 @@ static struct xol_area *get_xol_area(void) | |||
| 1131 | if (!area->page) | 1155 | if (!area->page) |
| 1132 | goto free_bitmap; | 1156 | goto free_bitmap; |
| 1133 | 1157 | ||
| 1158 | /* allocate first slot of task's xol_area for the return probes */ | ||
| 1159 | set_bit(0, area->bitmap); | ||
| 1160 | copy_to_page(area->page, 0, &insn, UPROBE_SWBP_INSN_SIZE); | ||
| 1161 | atomic_set(&area->slot_count, 1); | ||
| 1134 | init_waitqueue_head(&area->wq); | 1162 | init_waitqueue_head(&area->wq); |
| 1163 | |||
| 1135 | if (!xol_add_vma(area)) | 1164 | if (!xol_add_vma(area)) |
| 1136 | return area; | 1165 | return area; |
| 1137 | 1166 | ||
| @@ -1216,9 +1245,7 @@ static unsigned long xol_take_insn_slot(struct xol_area *area) | |||
| 1216 | static unsigned long xol_get_insn_slot(struct uprobe *uprobe) | 1245 | static unsigned long xol_get_insn_slot(struct uprobe *uprobe) |
| 1217 | { | 1246 | { |
| 1218 | struct xol_area *area; | 1247 | struct xol_area *area; |
| 1219 | unsigned long offset; | ||
| 1220 | unsigned long xol_vaddr; | 1248 | unsigned long xol_vaddr; |
| 1221 | void *vaddr; | ||
| 1222 | 1249 | ||
| 1223 | area = get_xol_area(); | 1250 | area = get_xol_area(); |
| 1224 | if (!area) | 1251 | if (!area) |
| @@ -1229,10 +1256,7 @@ static unsigned long xol_get_insn_slot(struct uprobe *uprobe) | |||
| 1229 | return 0; | 1256 | return 0; |
| 1230 | 1257 | ||
| 1231 | /* Initialize the slot */ | 1258 | /* Initialize the slot */ |
| 1232 | offset = xol_vaddr & ~PAGE_MASK; | 1259 | copy_to_page(area->page, xol_vaddr, uprobe->arch.insn, MAX_UINSN_BYTES); |
| 1233 | vaddr = kmap_atomic(area->page); | ||
| 1234 | memcpy(vaddr + offset, uprobe->arch.insn, MAX_UINSN_BYTES); | ||
| 1235 | kunmap_atomic(vaddr); | ||
| 1236 | /* | 1260 | /* |
| 1237 | * We probably need flush_icache_user_range() but it needs vma. | 1261 | * We probably need flush_icache_user_range() but it needs vma. |
| 1238 | * This should work on supported architectures too. | 1262 | * This should work on supported architectures too. |
| @@ -1298,6 +1322,7 @@ unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs) | |||
| 1298 | void uprobe_free_utask(struct task_struct *t) | 1322 | void uprobe_free_utask(struct task_struct *t) |
| 1299 | { | 1323 | { |
| 1300 | struct uprobe_task *utask = t->utask; | 1324 | struct uprobe_task *utask = t->utask; |
| 1325 | struct return_instance *ri, *tmp; | ||
| 1301 | 1326 | ||
| 1302 | if (!utask) | 1327 | if (!utask) |
| 1303 | return; | 1328 | return; |
| @@ -1305,6 +1330,15 @@ void uprobe_free_utask(struct task_struct *t) | |||
| 1305 | if (utask->active_uprobe) | 1330 | if (utask->active_uprobe) |
| 1306 | put_uprobe(utask->active_uprobe); | 1331 | put_uprobe(utask->active_uprobe); |
| 1307 | 1332 | ||
| 1333 | ri = utask->return_instances; | ||
| 1334 | while (ri) { | ||
| 1335 | tmp = ri; | ||
| 1336 | ri = ri->next; | ||
| 1337 | |||
| 1338 | put_uprobe(tmp->uprobe); | ||
| 1339 | kfree(tmp); | ||
| 1340 | } | ||
| 1341 | |||
| 1308 | xol_free_insn_slot(t); | 1342 | xol_free_insn_slot(t); |
| 1309 | kfree(utask); | 1343 | kfree(utask); |
| 1310 | t->utask = NULL; | 1344 | t->utask = NULL; |
| @@ -1333,6 +1367,93 @@ static struct uprobe_task *get_utask(void) | |||
| 1333 | return current->utask; | 1367 | return current->utask; |
| 1334 | } | 1368 | } |
| 1335 | 1369 | ||
| 1370 | /* | ||
| 1371 | * Current area->vaddr notion assume the trampoline address is always | ||
| 1372 | * equal area->vaddr. | ||
| 1373 | * | ||
| 1374 | * Returns -1 in case the xol_area is not allocated. | ||
| 1375 | */ | ||
| 1376 | static unsigned long get_trampoline_vaddr(void) | ||
| 1377 | { | ||
| 1378 | struct xol_area *area; | ||
| 1379 | unsigned long trampoline_vaddr = -1; | ||
| 1380 | |||
| 1381 | area = current->mm->uprobes_state.xol_area; | ||
| 1382 | smp_read_barrier_depends(); | ||
| 1383 | if (area) | ||
| 1384 | trampoline_vaddr = area->vaddr; | ||
| 1385 | |||
| 1386 | return trampoline_vaddr; | ||
| 1387 | } | ||
| 1388 | |||
| 1389 | static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs) | ||
| 1390 | { | ||
| 1391 | struct return_instance *ri; | ||
| 1392 | struct uprobe_task *utask; | ||
| 1393 | unsigned long orig_ret_vaddr, trampoline_vaddr; | ||
| 1394 | bool chained = false; | ||
| 1395 | |||
| 1396 | if (!get_xol_area()) | ||
| 1397 | return; | ||
| 1398 | |||
| 1399 | utask = get_utask(); | ||
| 1400 | if (!utask) | ||
| 1401 | return; | ||
| 1402 | |||
| 1403 | if (utask->depth >= MAX_URETPROBE_DEPTH) { | ||
| 1404 | printk_ratelimited(KERN_INFO "uprobe: omit uretprobe due to" | ||
| 1405 | " nestedness limit pid/tgid=%d/%d\n", | ||
| 1406 | current->pid, current->tgid); | ||
| 1407 | return; | ||
| 1408 | } | ||
| 1409 | |||
| 1410 | ri = kzalloc(sizeof(struct return_instance), GFP_KERNEL); | ||
| 1411 | if (!ri) | ||
| 1412 | goto fail; | ||
| 1413 | |||
| 1414 | trampoline_vaddr = get_trampoline_vaddr(); | ||
| 1415 | orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs); | ||
| 1416 | if (orig_ret_vaddr == -1) | ||
| 1417 | goto fail; | ||
| 1418 | |||
| 1419 | /* | ||
| 1420 | * We don't want to keep trampoline address in stack, rather keep the | ||
| 1421 | * original return address of first caller thru all the consequent | ||
| 1422 | * instances. This also makes breakpoint unwrapping easier. | ||
| 1423 | */ | ||
| 1424 | if (orig_ret_vaddr == trampoline_vaddr) { | ||
| 1425 | if (!utask->return_instances) { | ||
| 1426 | /* | ||
| 1427 | * This situation is not possible. Likely we have an | ||
| 1428 | * attack from user-space. | ||
| 1429 | */ | ||
| 1430 | pr_warn("uprobe: unable to set uretprobe pid/tgid=%d/%d\n", | ||
| 1431 | current->pid, current->tgid); | ||
| 1432 | goto fail; | ||
| 1433 | } | ||
| 1434 | |||
| 1435 | chained = true; | ||
| 1436 | orig_ret_vaddr = utask->return_instances->orig_ret_vaddr; | ||
| 1437 | } | ||
| 1438 | |||
| 1439 | atomic_inc(&uprobe->ref); | ||
| 1440 | ri->uprobe = uprobe; | ||
| 1441 | ri->func = instruction_pointer(regs); | ||
| 1442 | ri->orig_ret_vaddr = orig_ret_vaddr; | ||
| 1443 | ri->chained = chained; | ||
| 1444 | |||
| 1445 | utask->depth++; | ||
| 1446 | |||
| 1447 | /* add instance to the stack */ | ||
| 1448 | ri->next = utask->return_instances; | ||
| 1449 | utask->return_instances = ri; | ||
| 1450 | |||
| 1451 | return; | ||
| 1452 | |||
| 1453 | fail: | ||
| 1454 | kfree(ri); | ||
| 1455 | } | ||
| 1456 | |||
| 1336 | /* Prepare to single-step probed instruction out of line. */ | 1457 | /* Prepare to single-step probed instruction out of line. */ |
| 1337 | static int | 1458 | static int |
| 1338 | pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr) | 1459 | pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr) |
| @@ -1431,7 +1552,7 @@ static void mmf_recalc_uprobes(struct mm_struct *mm) | |||
| 1431 | clear_bit(MMF_HAS_UPROBES, &mm->flags); | 1552 | clear_bit(MMF_HAS_UPROBES, &mm->flags); |
| 1432 | } | 1553 | } |
| 1433 | 1554 | ||
| 1434 | static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr) | 1555 | static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr) |
| 1435 | { | 1556 | { |
| 1436 | struct page *page; | 1557 | struct page *page; |
| 1437 | uprobe_opcode_t opcode; | 1558 | uprobe_opcode_t opcode; |
| @@ -1449,10 +1570,11 @@ static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr) | |||
| 1449 | if (result < 0) | 1570 | if (result < 0) |
| 1450 | return result; | 1571 | return result; |
| 1451 | 1572 | ||
| 1452 | copy_opcode(page, vaddr, &opcode); | 1573 | copy_from_page(page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); |
| 1453 | put_page(page); | 1574 | put_page(page); |
| 1454 | out: | 1575 | out: |
| 1455 | return is_swbp_insn(&opcode); | 1576 | /* This needs to return true for any variant of the trap insn */ |
| 1577 | return is_trap_insn(&opcode); | ||
| 1456 | } | 1578 | } |
| 1457 | 1579 | ||
| 1458 | static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) | 1580 | static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) |
| @@ -1465,14 +1587,14 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) | |||
| 1465 | vma = find_vma(mm, bp_vaddr); | 1587 | vma = find_vma(mm, bp_vaddr); |
| 1466 | if (vma && vma->vm_start <= bp_vaddr) { | 1588 | if (vma && vma->vm_start <= bp_vaddr) { |
| 1467 | if (valid_vma(vma, false)) { | 1589 | if (valid_vma(vma, false)) { |
| 1468 | struct inode *inode = vma->vm_file->f_mapping->host; | 1590 | struct inode *inode = file_inode(vma->vm_file); |
| 1469 | loff_t offset = vaddr_to_offset(vma, bp_vaddr); | 1591 | loff_t offset = vaddr_to_offset(vma, bp_vaddr); |
| 1470 | 1592 | ||
| 1471 | uprobe = find_uprobe(inode, offset); | 1593 | uprobe = find_uprobe(inode, offset); |
| 1472 | } | 1594 | } |
| 1473 | 1595 | ||
| 1474 | if (!uprobe) | 1596 | if (!uprobe) |
| 1475 | *is_swbp = is_swbp_at_addr(mm, bp_vaddr); | 1597 | *is_swbp = is_trap_at_addr(mm, bp_vaddr); |
| 1476 | } else { | 1598 | } else { |
| 1477 | *is_swbp = -EFAULT; | 1599 | *is_swbp = -EFAULT; |
| 1478 | } | 1600 | } |
| @@ -1488,16 +1610,27 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) | |||
| 1488 | { | 1610 | { |
| 1489 | struct uprobe_consumer *uc; | 1611 | struct uprobe_consumer *uc; |
| 1490 | int remove = UPROBE_HANDLER_REMOVE; | 1612 | int remove = UPROBE_HANDLER_REMOVE; |
| 1613 | bool need_prep = false; /* prepare return uprobe, when needed */ | ||
| 1491 | 1614 | ||
| 1492 | down_read(&uprobe->register_rwsem); | 1615 | down_read(&uprobe->register_rwsem); |
| 1493 | for (uc = uprobe->consumers; uc; uc = uc->next) { | 1616 | for (uc = uprobe->consumers; uc; uc = uc->next) { |
| 1494 | int rc = uc->handler(uc, regs); | 1617 | int rc = 0; |
| 1618 | |||
| 1619 | if (uc->handler) { | ||
| 1620 | rc = uc->handler(uc, regs); | ||
| 1621 | WARN(rc & ~UPROBE_HANDLER_MASK, | ||
| 1622 | "bad rc=0x%x from %pf()\n", rc, uc->handler); | ||
| 1623 | } | ||
| 1624 | |||
| 1625 | if (uc->ret_handler) | ||
| 1626 | need_prep = true; | ||
| 1495 | 1627 | ||
| 1496 | WARN(rc & ~UPROBE_HANDLER_MASK, | ||
| 1497 | "bad rc=0x%x from %pf()\n", rc, uc->handler); | ||
| 1498 | remove &= rc; | 1628 | remove &= rc; |
| 1499 | } | 1629 | } |
| 1500 | 1630 | ||
| 1631 | if (need_prep && !remove) | ||
| 1632 | prepare_uretprobe(uprobe, regs); /* put bp at return */ | ||
| 1633 | |||
| 1501 | if (remove && uprobe->consumers) { | 1634 | if (remove && uprobe->consumers) { |
| 1502 | WARN_ON(!uprobe_is_active(uprobe)); | 1635 | WARN_ON(!uprobe_is_active(uprobe)); |
| 1503 | unapply_uprobe(uprobe, current->mm); | 1636 | unapply_uprobe(uprobe, current->mm); |
| @@ -1505,6 +1638,64 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) | |||
| 1505 | up_read(&uprobe->register_rwsem); | 1638 | up_read(&uprobe->register_rwsem); |
| 1506 | } | 1639 | } |
| 1507 | 1640 | ||
| 1641 | static void | ||
| 1642 | handle_uretprobe_chain(struct return_instance *ri, struct pt_regs *regs) | ||
| 1643 | { | ||
| 1644 | struct uprobe *uprobe = ri->uprobe; | ||
| 1645 | struct uprobe_consumer *uc; | ||
| 1646 | |||
| 1647 | down_read(&uprobe->register_rwsem); | ||
| 1648 | for (uc = uprobe->consumers; uc; uc = uc->next) { | ||
| 1649 | if (uc->ret_handler) | ||
| 1650 | uc->ret_handler(uc, ri->func, regs); | ||
| 1651 | } | ||
| 1652 | up_read(&uprobe->register_rwsem); | ||
| 1653 | } | ||
| 1654 | |||
| 1655 | static bool handle_trampoline(struct pt_regs *regs) | ||
| 1656 | { | ||
| 1657 | struct uprobe_task *utask; | ||
| 1658 | struct return_instance *ri, *tmp; | ||
| 1659 | bool chained; | ||
| 1660 | |||
| 1661 | utask = current->utask; | ||
| 1662 | if (!utask) | ||
| 1663 | return false; | ||
| 1664 | |||
| 1665 | ri = utask->return_instances; | ||
| 1666 | if (!ri) | ||
| 1667 | return false; | ||
| 1668 | |||
| 1669 | /* | ||
| 1670 | * TODO: we should throw out return_instance's invalidated by | ||
| 1671 | * longjmp(), currently we assume that the probed function always | ||
| 1672 | * returns. | ||
| 1673 | */ | ||
| 1674 | instruction_pointer_set(regs, ri->orig_ret_vaddr); | ||
| 1675 | |||
| 1676 | for (;;) { | ||
| 1677 | handle_uretprobe_chain(ri, regs); | ||
| 1678 | |||
| 1679 | chained = ri->chained; | ||
| 1680 | put_uprobe(ri->uprobe); | ||
| 1681 | |||
| 1682 | tmp = ri; | ||
| 1683 | ri = ri->next; | ||
| 1684 | kfree(tmp); | ||
| 1685 | |||
| 1686 | if (!chained) | ||
| 1687 | break; | ||
| 1688 | |||
| 1689 | utask->depth--; | ||
| 1690 | |||
| 1691 | BUG_ON(!ri); | ||
| 1692 | } | ||
| 1693 | |||
| 1694 | utask->return_instances = ri; | ||
| 1695 | |||
| 1696 | return true; | ||
| 1697 | } | ||
| 1698 | |||
| 1508 | /* | 1699 | /* |
| 1509 | * Run handler and ask thread to singlestep. | 1700 | * Run handler and ask thread to singlestep. |
| 1510 | * Ensure all non-fatal signals cannot interrupt thread while it singlesteps. | 1701 | * Ensure all non-fatal signals cannot interrupt thread while it singlesteps. |
| @@ -1516,8 +1707,15 @@ static void handle_swbp(struct pt_regs *regs) | |||
| 1516 | int uninitialized_var(is_swbp); | 1707 | int uninitialized_var(is_swbp); |
| 1517 | 1708 | ||
| 1518 | bp_vaddr = uprobe_get_swbp_addr(regs); | 1709 | bp_vaddr = uprobe_get_swbp_addr(regs); |
| 1519 | uprobe = find_active_uprobe(bp_vaddr, &is_swbp); | 1710 | if (bp_vaddr == get_trampoline_vaddr()) { |
| 1711 | if (handle_trampoline(regs)) | ||
| 1712 | return; | ||
| 1713 | |||
| 1714 | pr_warn("uprobe: unable to handle uretprobe pid/tgid=%d/%d\n", | ||
| 1715 | current->pid, current->tgid); | ||
| 1716 | } | ||
| 1520 | 1717 | ||
| 1718 | uprobe = find_active_uprobe(bp_vaddr, &is_swbp); | ||
| 1521 | if (!uprobe) { | 1719 | if (!uprobe) { |
| 1522 | if (is_swbp > 0) { | 1720 | if (is_swbp > 0) { |
| 1523 | /* No matching uprobe; signal SIGTRAP. */ | 1721 | /* No matching uprobe; signal SIGTRAP. */ |
| @@ -1616,7 +1814,11 @@ void uprobe_notify_resume(struct pt_regs *regs) | |||
| 1616 | */ | 1814 | */ |
| 1617 | int uprobe_pre_sstep_notifier(struct pt_regs *regs) | 1815 | int uprobe_pre_sstep_notifier(struct pt_regs *regs) |
| 1618 | { | 1816 | { |
| 1619 | if (!current->mm || !test_bit(MMF_HAS_UPROBES, ¤t->mm->flags)) | 1817 | if (!current->mm) |
| 1818 | return 0; | ||
| 1819 | |||
| 1820 | if (!test_bit(MMF_HAS_UPROBES, ¤t->mm->flags) && | ||
| 1821 | (!current->utask || !current->utask->return_instances)) | ||
| 1620 | return 0; | 1822 | return 0; |
| 1621 | 1823 | ||
| 1622 | set_thread_flag(TIF_UPROBE); | 1824 | set_thread_flag(TIF_UPROBE); |
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 9e014582e763..711ca7d3e7f1 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
| @@ -109,11 +109,6 @@ struct kretprobe_trace_entry_head { | |||
| 109 | unsigned long ret_ip; | 109 | unsigned long ret_ip; |
| 110 | }; | 110 | }; |
| 111 | 111 | ||
| 112 | struct uprobe_trace_entry_head { | ||
| 113 | struct trace_entry ent; | ||
| 114 | unsigned long ip; | ||
| 115 | }; | ||
| 116 | |||
| 117 | /* | 112 | /* |
| 118 | * trace_flag_type is an enumeration that holds different | 113 | * trace_flag_type is an enumeration that holds different |
| 119 | * states when a trace occurs. These are: | 114 | * states when a trace occurs. These are: |
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 8dad2a92dee9..32494fb0ee64 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c | |||
| @@ -28,6 +28,18 @@ | |||
| 28 | 28 | ||
| 29 | #define UPROBE_EVENT_SYSTEM "uprobes" | 29 | #define UPROBE_EVENT_SYSTEM "uprobes" |
| 30 | 30 | ||
| 31 | struct uprobe_trace_entry_head { | ||
| 32 | struct trace_entry ent; | ||
| 33 | unsigned long vaddr[]; | ||
| 34 | }; | ||
| 35 | |||
| 36 | #define SIZEOF_TRACE_ENTRY(is_return) \ | ||
| 37 | (sizeof(struct uprobe_trace_entry_head) + \ | ||
| 38 | sizeof(unsigned long) * (is_return ? 2 : 1)) | ||
| 39 | |||
| 40 | #define DATAOF_TRACE_ENTRY(entry, is_return) \ | ||
| 41 | ((void*)(entry) + SIZEOF_TRACE_ENTRY(is_return)) | ||
| 42 | |||
| 31 | struct trace_uprobe_filter { | 43 | struct trace_uprobe_filter { |
| 32 | rwlock_t rwlock; | 44 | rwlock_t rwlock; |
| 33 | int nr_systemwide; | 45 | int nr_systemwide; |
| @@ -64,6 +76,8 @@ static DEFINE_MUTEX(uprobe_lock); | |||
| 64 | static LIST_HEAD(uprobe_list); | 76 | static LIST_HEAD(uprobe_list); |
| 65 | 77 | ||
| 66 | static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs); | 78 | static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs); |
| 79 | static int uretprobe_dispatcher(struct uprobe_consumer *con, | ||
| 80 | unsigned long func, struct pt_regs *regs); | ||
| 67 | 81 | ||
| 68 | static inline void init_trace_uprobe_filter(struct trace_uprobe_filter *filter) | 82 | static inline void init_trace_uprobe_filter(struct trace_uprobe_filter *filter) |
| 69 | { | 83 | { |
| @@ -77,11 +91,16 @@ static inline bool uprobe_filter_is_empty(struct trace_uprobe_filter *filter) | |||
| 77 | return !filter->nr_systemwide && list_empty(&filter->perf_events); | 91 | return !filter->nr_systemwide && list_empty(&filter->perf_events); |
| 78 | } | 92 | } |
| 79 | 93 | ||
| 94 | static inline bool is_ret_probe(struct trace_uprobe *tu) | ||
| 95 | { | ||
| 96 | return tu->consumer.ret_handler != NULL; | ||
| 97 | } | ||
| 98 | |||
| 80 | /* | 99 | /* |
| 81 | * Allocate new trace_uprobe and initialize it (including uprobes). | 100 | * Allocate new trace_uprobe and initialize it (including uprobes). |
| 82 | */ | 101 | */ |
| 83 | static struct trace_uprobe * | 102 | static struct trace_uprobe * |
| 84 | alloc_trace_uprobe(const char *group, const char *event, int nargs) | 103 | alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret) |
| 85 | { | 104 | { |
| 86 | struct trace_uprobe *tu; | 105 | struct trace_uprobe *tu; |
| 87 | 106 | ||
| @@ -106,6 +125,8 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs) | |||
| 106 | 125 | ||
| 107 | INIT_LIST_HEAD(&tu->list); | 126 | INIT_LIST_HEAD(&tu->list); |
| 108 | tu->consumer.handler = uprobe_dispatcher; | 127 | tu->consumer.handler = uprobe_dispatcher; |
| 128 | if (is_ret) | ||
| 129 | tu->consumer.ret_handler = uretprobe_dispatcher; | ||
| 109 | init_trace_uprobe_filter(&tu->filter); | 130 | init_trace_uprobe_filter(&tu->filter); |
| 110 | return tu; | 131 | return tu; |
| 111 | 132 | ||
| @@ -180,7 +201,7 @@ end: | |||
| 180 | 201 | ||
| 181 | /* | 202 | /* |
| 182 | * Argument syntax: | 203 | * Argument syntax: |
| 183 | * - Add uprobe: p[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS] | 204 | * - Add uprobe: p|r[:[GRP/]EVENT] PATH:SYMBOL [FETCHARGS] |
| 184 | * | 205 | * |
| 185 | * - Remove uprobe: -:[GRP/]EVENT | 206 | * - Remove uprobe: -:[GRP/]EVENT |
| 186 | */ | 207 | */ |
| @@ -192,20 +213,23 @@ static int create_trace_uprobe(int argc, char **argv) | |||
| 192 | char buf[MAX_EVENT_NAME_LEN]; | 213 | char buf[MAX_EVENT_NAME_LEN]; |
| 193 | struct path path; | 214 | struct path path; |
| 194 | unsigned long offset; | 215 | unsigned long offset; |
| 195 | bool is_delete; | 216 | bool is_delete, is_return; |
| 196 | int i, ret; | 217 | int i, ret; |
| 197 | 218 | ||
| 198 | inode = NULL; | 219 | inode = NULL; |
| 199 | ret = 0; | 220 | ret = 0; |
| 200 | is_delete = false; | 221 | is_delete = false; |
| 222 | is_return = false; | ||
| 201 | event = NULL; | 223 | event = NULL; |
| 202 | group = NULL; | 224 | group = NULL; |
| 203 | 225 | ||
| 204 | /* argc must be >= 1 */ | 226 | /* argc must be >= 1 */ |
| 205 | if (argv[0][0] == '-') | 227 | if (argv[0][0] == '-') |
| 206 | is_delete = true; | 228 | is_delete = true; |
| 229 | else if (argv[0][0] == 'r') | ||
| 230 | is_return = true; | ||
| 207 | else if (argv[0][0] != 'p') { | 231 | else if (argv[0][0] != 'p') { |
| 208 | pr_info("Probe definition must be started with 'p' or '-'.\n"); | 232 | pr_info("Probe definition must be started with 'p', 'r' or '-'.\n"); |
| 209 | return -EINVAL; | 233 | return -EINVAL; |
| 210 | } | 234 | } |
| 211 | 235 | ||
| @@ -303,7 +327,7 @@ static int create_trace_uprobe(int argc, char **argv) | |||
| 303 | kfree(tail); | 327 | kfree(tail); |
| 304 | } | 328 | } |
| 305 | 329 | ||
| 306 | tu = alloc_trace_uprobe(group, event, argc); | 330 | tu = alloc_trace_uprobe(group, event, argc, is_return); |
| 307 | if (IS_ERR(tu)) { | 331 | if (IS_ERR(tu)) { |
| 308 | pr_info("Failed to allocate trace_uprobe.(%d)\n", (int)PTR_ERR(tu)); | 332 | pr_info("Failed to allocate trace_uprobe.(%d)\n", (int)PTR_ERR(tu)); |
| 309 | ret = PTR_ERR(tu); | 333 | ret = PTR_ERR(tu); |
| @@ -414,9 +438,10 @@ static void probes_seq_stop(struct seq_file *m, void *v) | |||
| 414 | static int probes_seq_show(struct seq_file *m, void *v) | 438 | static int probes_seq_show(struct seq_file *m, void *v) |
| 415 | { | 439 | { |
| 416 | struct trace_uprobe *tu = v; | 440 | struct trace_uprobe *tu = v; |
| 441 | char c = is_ret_probe(tu) ? 'r' : 'p'; | ||
| 417 | int i; | 442 | int i; |
| 418 | 443 | ||
| 419 | seq_printf(m, "p:%s/%s", tu->call.class->system, tu->call.name); | 444 | seq_printf(m, "%c:%s/%s", c, tu->call.class->system, tu->call.name); |
| 420 | seq_printf(m, " %s:0x%p", tu->filename, (void *)tu->offset); | 445 | seq_printf(m, " %s:0x%p", tu->filename, (void *)tu->offset); |
| 421 | 446 | ||
| 422 | for (i = 0; i < tu->nr_args; i++) | 447 | for (i = 0; i < tu->nr_args; i++) |
| @@ -485,65 +510,81 @@ static const struct file_operations uprobe_profile_ops = { | |||
| 485 | .release = seq_release, | 510 | .release = seq_release, |
| 486 | }; | 511 | }; |
| 487 | 512 | ||
| 488 | /* uprobe handler */ | 513 | static void uprobe_trace_print(struct trace_uprobe *tu, |
| 489 | static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs) | 514 | unsigned long func, struct pt_regs *regs) |
| 490 | { | 515 | { |
| 491 | struct uprobe_trace_entry_head *entry; | 516 | struct uprobe_trace_entry_head *entry; |
| 492 | struct ring_buffer_event *event; | 517 | struct ring_buffer_event *event; |
| 493 | struct ring_buffer *buffer; | 518 | struct ring_buffer *buffer; |
| 494 | u8 *data; | 519 | void *data; |
| 495 | int size, i, pc; | 520 | int size, i; |
| 496 | unsigned long irq_flags; | ||
| 497 | struct ftrace_event_call *call = &tu->call; | 521 | struct ftrace_event_call *call = &tu->call; |
| 498 | 522 | ||
| 499 | local_save_flags(irq_flags); | 523 | size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); |
| 500 | pc = preempt_count(); | ||
| 501 | |||
| 502 | size = sizeof(*entry) + tu->size; | ||
| 503 | |||
| 504 | event = trace_current_buffer_lock_reserve(&buffer, call->event.type, | 524 | event = trace_current_buffer_lock_reserve(&buffer, call->event.type, |
| 505 | size, irq_flags, pc); | 525 | size + tu->size, 0, 0); |
| 506 | if (!event) | 526 | if (!event) |
| 507 | return 0; | 527 | return; |
| 508 | 528 | ||
| 509 | entry = ring_buffer_event_data(event); | 529 | entry = ring_buffer_event_data(event); |
| 510 | entry->ip = instruction_pointer(task_pt_regs(current)); | 530 | if (is_ret_probe(tu)) { |
| 511 | data = (u8 *)&entry[1]; | 531 | entry->vaddr[0] = func; |
| 532 | entry->vaddr[1] = instruction_pointer(regs); | ||
| 533 | data = DATAOF_TRACE_ENTRY(entry, true); | ||
| 534 | } else { | ||
| 535 | entry->vaddr[0] = instruction_pointer(regs); | ||
| 536 | data = DATAOF_TRACE_ENTRY(entry, false); | ||
| 537 | } | ||
| 538 | |||
| 512 | for (i = 0; i < tu->nr_args; i++) | 539 | for (i = 0; i < tu->nr_args; i++) |
| 513 | call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); | 540 | call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); |
| 514 | 541 | ||
| 515 | if (!filter_current_check_discard(buffer, call, entry, event)) | 542 | if (!filter_current_check_discard(buffer, call, entry, event)) |
| 516 | trace_buffer_unlock_commit(buffer, event, irq_flags, pc); | 543 | trace_buffer_unlock_commit(buffer, event, 0, 0); |
| 544 | } | ||
| 517 | 545 | ||
| 546 | /* uprobe handler */ | ||
| 547 | static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs) | ||
| 548 | { | ||
| 549 | if (!is_ret_probe(tu)) | ||
| 550 | uprobe_trace_print(tu, 0, regs); | ||
| 518 | return 0; | 551 | return 0; |
| 519 | } | 552 | } |
| 520 | 553 | ||
| 554 | static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func, | ||
| 555 | struct pt_regs *regs) | ||
| 556 | { | ||
| 557 | uprobe_trace_print(tu, func, regs); | ||
| 558 | } | ||
| 559 | |||
| 521 | /* Event entry printers */ | 560 | /* Event entry printers */ |
| 522 | static enum print_line_t | 561 | static enum print_line_t |
| 523 | print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *event) | 562 | print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *event) |
| 524 | { | 563 | { |
| 525 | struct uprobe_trace_entry_head *field; | 564 | struct uprobe_trace_entry_head *entry; |
| 526 | struct trace_seq *s = &iter->seq; | 565 | struct trace_seq *s = &iter->seq; |
| 527 | struct trace_uprobe *tu; | 566 | struct trace_uprobe *tu; |
| 528 | u8 *data; | 567 | u8 *data; |
| 529 | int i; | 568 | int i; |
| 530 | 569 | ||
| 531 | field = (struct uprobe_trace_entry_head *)iter->ent; | 570 | entry = (struct uprobe_trace_entry_head *)iter->ent; |
| 532 | tu = container_of(event, struct trace_uprobe, call.event); | 571 | tu = container_of(event, struct trace_uprobe, call.event); |
| 533 | 572 | ||
| 534 | if (!trace_seq_printf(s, "%s: (", tu->call.name)) | 573 | if (is_ret_probe(tu)) { |
| 535 | goto partial; | 574 | if (!trace_seq_printf(s, "%s: (0x%lx <- 0x%lx)", tu->call.name, |
| 536 | 575 | entry->vaddr[1], entry->vaddr[0])) | |
| 537 | if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET)) | 576 | goto partial; |
| 538 | goto partial; | 577 | data = DATAOF_TRACE_ENTRY(entry, true); |
| 539 | 578 | } else { | |
| 540 | if (!trace_seq_puts(s, ")")) | 579 | if (!trace_seq_printf(s, "%s: (0x%lx)", tu->call.name, |
| 541 | goto partial; | 580 | entry->vaddr[0])) |
| 581 | goto partial; | ||
| 582 | data = DATAOF_TRACE_ENTRY(entry, false); | ||
| 583 | } | ||
| 542 | 584 | ||
| 543 | data = (u8 *)&field[1]; | ||
| 544 | for (i = 0; i < tu->nr_args; i++) { | 585 | for (i = 0; i < tu->nr_args; i++) { |
| 545 | if (!tu->args[i].type->print(s, tu->args[i].name, | 586 | if (!tu->args[i].type->print(s, tu->args[i].name, |
| 546 | data + tu->args[i].offset, field)) | 587 | data + tu->args[i].offset, entry)) |
| 547 | goto partial; | 588 | goto partial; |
| 548 | } | 589 | } |
| 549 | 590 | ||
| @@ -595,16 +636,23 @@ static void probe_event_disable(struct trace_uprobe *tu, int flag) | |||
| 595 | 636 | ||
| 596 | static int uprobe_event_define_fields(struct ftrace_event_call *event_call) | 637 | static int uprobe_event_define_fields(struct ftrace_event_call *event_call) |
| 597 | { | 638 | { |
| 598 | int ret, i; | 639 | int ret, i, size; |
| 599 | struct uprobe_trace_entry_head field; | 640 | struct uprobe_trace_entry_head field; |
| 600 | struct trace_uprobe *tu = (struct trace_uprobe *)event_call->data; | 641 | struct trace_uprobe *tu = event_call->data; |
| 601 | 642 | ||
| 602 | DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); | 643 | if (is_ret_probe(tu)) { |
| 644 | DEFINE_FIELD(unsigned long, vaddr[0], FIELD_STRING_FUNC, 0); | ||
| 645 | DEFINE_FIELD(unsigned long, vaddr[1], FIELD_STRING_RETIP, 0); | ||
| 646 | size = SIZEOF_TRACE_ENTRY(true); | ||
| 647 | } else { | ||
| 648 | DEFINE_FIELD(unsigned long, vaddr[0], FIELD_STRING_IP, 0); | ||
| 649 | size = SIZEOF_TRACE_ENTRY(false); | ||
| 650 | } | ||
| 603 | /* Set argument names as fields */ | 651 | /* Set argument names as fields */ |
| 604 | for (i = 0; i < tu->nr_args; i++) { | 652 | for (i = 0; i < tu->nr_args; i++) { |
| 605 | ret = trace_define_field(event_call, tu->args[i].type->fmttype, | 653 | ret = trace_define_field(event_call, tu->args[i].type->fmttype, |
| 606 | tu->args[i].name, | 654 | tu->args[i].name, |
| 607 | sizeof(field) + tu->args[i].offset, | 655 | size + tu->args[i].offset, |
| 608 | tu->args[i].type->size, | 656 | tu->args[i].type->size, |
| 609 | tu->args[i].type->is_signed, | 657 | tu->args[i].type->is_signed, |
| 610 | FILTER_OTHER); | 658 | FILTER_OTHER); |
| @@ -622,8 +670,13 @@ static int __set_print_fmt(struct trace_uprobe *tu, char *buf, int len) | |||
| 622 | int i; | 670 | int i; |
| 623 | int pos = 0; | 671 | int pos = 0; |
| 624 | 672 | ||
| 625 | fmt = "(%lx)"; | 673 | if (is_ret_probe(tu)) { |
| 626 | arg = "REC->" FIELD_STRING_IP; | 674 | fmt = "(%lx <- %lx)"; |
| 675 | arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP; | ||
| 676 | } else { | ||
| 677 | fmt = "(%lx)"; | ||
| 678 | arg = "REC->" FIELD_STRING_IP; | ||
| 679 | } | ||
| 627 | 680 | ||
| 628 | /* When len=0, we just calculate the needed length */ | 681 | /* When len=0, we just calculate the needed length */ |
| 629 | 682 | ||
| @@ -752,49 +805,68 @@ static bool uprobe_perf_filter(struct uprobe_consumer *uc, | |||
| 752 | return ret; | 805 | return ret; |
| 753 | } | 806 | } |
| 754 | 807 | ||
| 755 | /* uprobe profile handler */ | 808 | static void uprobe_perf_print(struct trace_uprobe *tu, |
| 756 | static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) | 809 | unsigned long func, struct pt_regs *regs) |
| 757 | { | 810 | { |
| 758 | struct ftrace_event_call *call = &tu->call; | 811 | struct ftrace_event_call *call = &tu->call; |
| 759 | struct uprobe_trace_entry_head *entry; | 812 | struct uprobe_trace_entry_head *entry; |
| 760 | struct hlist_head *head; | 813 | struct hlist_head *head; |
| 761 | u8 *data; | 814 | void *data; |
| 762 | int size, __size, i; | 815 | int size, rctx, i; |
| 763 | int rctx; | ||
| 764 | 816 | ||
| 765 | if (!uprobe_perf_filter(&tu->consumer, 0, current->mm)) | 817 | size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); |
| 766 | return UPROBE_HANDLER_REMOVE; | 818 | size = ALIGN(size + tu->size + sizeof(u32), sizeof(u64)) - sizeof(u32); |
| 767 | |||
| 768 | __size = sizeof(*entry) + tu->size; | ||
| 769 | size = ALIGN(__size + sizeof(u32), sizeof(u64)); | ||
| 770 | size -= sizeof(u32); | ||
| 771 | if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough")) | 819 | if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough")) |
| 772 | return 0; | 820 | return; |
| 773 | 821 | ||
| 774 | preempt_disable(); | 822 | preempt_disable(); |
| 823 | head = this_cpu_ptr(call->perf_events); | ||
| 824 | if (hlist_empty(head)) | ||
| 825 | goto out; | ||
| 775 | 826 | ||
| 776 | entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); | 827 | entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); |
| 777 | if (!entry) | 828 | if (!entry) |
| 778 | goto out; | 829 | goto out; |
| 779 | 830 | ||
| 780 | entry->ip = instruction_pointer(task_pt_regs(current)); | 831 | if (is_ret_probe(tu)) { |
| 781 | data = (u8 *)&entry[1]; | 832 | entry->vaddr[0] = func; |
| 833 | entry->vaddr[1] = instruction_pointer(regs); | ||
| 834 | data = DATAOF_TRACE_ENTRY(entry, true); | ||
| 835 | } else { | ||
| 836 | entry->vaddr[0] = instruction_pointer(regs); | ||
| 837 | data = DATAOF_TRACE_ENTRY(entry, false); | ||
| 838 | } | ||
| 839 | |||
| 782 | for (i = 0; i < tu->nr_args; i++) | 840 | for (i = 0; i < tu->nr_args; i++) |
| 783 | call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); | 841 | call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); |
| 784 | 842 | ||
| 785 | head = this_cpu_ptr(call->perf_events); | 843 | perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL); |
| 786 | perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head, NULL); | ||
| 787 | |||
| 788 | out: | 844 | out: |
| 789 | preempt_enable(); | 845 | preempt_enable(); |
| 846 | } | ||
| 847 | |||
| 848 | /* uprobe profile handler */ | ||
| 849 | static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) | ||
| 850 | { | ||
| 851 | if (!uprobe_perf_filter(&tu->consumer, 0, current->mm)) | ||
| 852 | return UPROBE_HANDLER_REMOVE; | ||
| 853 | |||
| 854 | if (!is_ret_probe(tu)) | ||
| 855 | uprobe_perf_print(tu, 0, regs); | ||
| 790 | return 0; | 856 | return 0; |
| 791 | } | 857 | } |
| 858 | |||
| 859 | static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func, | ||
| 860 | struct pt_regs *regs) | ||
| 861 | { | ||
| 862 | uprobe_perf_print(tu, func, regs); | ||
| 863 | } | ||
| 792 | #endif /* CONFIG_PERF_EVENTS */ | 864 | #endif /* CONFIG_PERF_EVENTS */ |
| 793 | 865 | ||
| 794 | static | 866 | static |
| 795 | int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, void *data) | 867 | int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, void *data) |
| 796 | { | 868 | { |
| 797 | struct trace_uprobe *tu = (struct trace_uprobe *)event->data; | 869 | struct trace_uprobe *tu = event->data; |
| 798 | 870 | ||
| 799 | switch (type) { | 871 | switch (type) { |
| 800 | case TRACE_REG_REGISTER: | 872 | case TRACE_REG_REGISTER: |
| @@ -843,6 +915,23 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) | |||
| 843 | return ret; | 915 | return ret; |
| 844 | } | 916 | } |
| 845 | 917 | ||
| 918 | static int uretprobe_dispatcher(struct uprobe_consumer *con, | ||
| 919 | unsigned long func, struct pt_regs *regs) | ||
| 920 | { | ||
| 921 | struct trace_uprobe *tu; | ||
| 922 | |||
| 923 | tu = container_of(con, struct trace_uprobe, consumer); | ||
| 924 | |||
| 925 | if (tu->flags & TP_FLAG_TRACE) | ||
| 926 | uretprobe_trace_func(tu, func, regs); | ||
| 927 | |||
| 928 | #ifdef CONFIG_PERF_EVENTS | ||
| 929 | if (tu->flags & TP_FLAG_PROFILE) | ||
| 930 | uretprobe_perf_func(tu, func, regs); | ||
| 931 | #endif | ||
| 932 | return 0; | ||
| 933 | } | ||
| 934 | |||
| 846 | static struct trace_event_functions uprobe_funcs = { | 935 | static struct trace_event_functions uprobe_funcs = { |
| 847 | .trace = print_uprobe_event | 936 | .trace = print_uprobe_event |
| 848 | }; | 937 | }; |
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 4a944676358e..05039e348f07 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
| @@ -517,6 +517,11 @@ int proc_dowatchdog(struct ctl_table *table, int write, | |||
| 517 | return ret; | 517 | return ret; |
| 518 | 518 | ||
| 519 | set_sample_period(); | 519 | set_sample_period(); |
| 520 | /* | ||
| 521 | * Watchdog threads shouldn't be enabled if they are | ||
| 522 | * disabled. The 'watchdog_disabled' variable check in | ||
| 523 | * watchdog_*_all_cpus() function takes care of this. | ||
| 524 | */ | ||
| 520 | if (watchdog_enabled && watchdog_thresh) | 525 | if (watchdog_enabled && watchdog_thresh) |
| 521 | watchdog_enable_all_cpus(); | 526 | watchdog_enable_all_cpus(); |
| 522 | else | 527 | else |
