aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-04-30 10:41:01 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-04-30 10:41:01 -0400
commite0972916e8fe943f342b0dd1c9d43dbf5bc261c2 (patch)
tree690c436f1f9b839c4ba34d17ab3efa63b97a2dce /kernel
parent1f889ec62c3f0d8913f3c32f9aff2a1e15099346 (diff)
parent5ac2b5c2721501a8f5c5e1cd4116cbc31ace6886 (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar: "Features: - Add "uretprobes" - an optimization to uprobes, like kretprobes are an optimization to kprobes. "perf probe -x file sym%return" now works like kretprobes. By Oleg Nesterov. - Introduce per core aggregation in 'perf stat', from Stephane Eranian. - Add memory profiling via PEBS, from Stephane Eranian. - Event group view for 'annotate' in --stdio, --tui and --gtk, from Namhyung Kim. - Add support for AMD NB and L2I "uncore" counters, by Jacob Shin. - Add Ivy Bridge-EP uncore support, by Zheng Yan - IBM zEnterprise EC12 oprofile support patchlet from Robert Richter. - Add perf test entries for checking breakpoint overflow signal handler issues, from Jiri Olsa. - Add perf test entry for for checking number of EXIT events, from Namhyung Kim. - Add perf test entries for checking --cpu in record and stat, from Jiri Olsa. - Introduce perf stat --repeat forever, from Frederik Deweerdt. - Add --no-demangle to report/top, from Namhyung Kim. - PowerPC fixes plus a couple of cleanups/optimizations in uprobes and trace_uprobes, by Oleg Nesterov. Various fixes and refactorings: - Fix dependency of the python binding wrt libtraceevent, from Naohiro Aota. - Simplify some perf_evlist methods and to allow 'stat' to share code with 'record' and 'trace', by Arnaldo Carvalho de Melo. - Remove dead code in related to libtraceevent integration, from Namhyung Kim. - Revert "perf sched: Handle PERF_RECORD_EXIT events" to get 'perf sched lat' back working, by Arnaldo Carvalho de Melo - We don't use Newt anymore, just plain libslang, by Arnaldo Carvalho de Melo. - Kill a bunch of die() calls, from Namhyung Kim. - Fix build on non-glibc systems due to libio.h absence, from Cody P Schafer. - Remove some perf_session and tracing dead code, from David Ahern. - Honor parallel jobs, fix from Borislav Petkov - Introduce tools/lib/lk library, initially just removing duplication among tools/perf and tools/vm. from Borislav Petkov ... and many more I missed to list, see the shortlog and git log for more details." * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (136 commits) perf/x86/intel/P4: Robistify P4 PMU types perf/x86/amd: Fix AMD NB and L2I "uncore" support perf/x86/amd: Remove old-style NB counter support from perf_event_amd.c perf/x86: Check all MSRs before passing hw check perf/x86/amd: Add support for AMD NB and L2I "uncore" counters perf/x86/intel: Add Ivy Bridge-EP uncore support perf/x86/intel: Fix SNB-EP CBO and PCU uncore PMU filter management perf/x86: Avoid kfree() in CPU_{STARTING,DYING} uprobes/perf: Avoid perf_trace_buf_prepare/submit if ->perf_events is empty uprobes/tracing: Don't pass addr=ip to perf_trace_buf_submit() uprobes/tracing: Change create_trace_uprobe() to support uretprobes uprobes/tracing: Make seq_printf() code uretprobe-friendly uprobes/tracing: Make register_uprobe_event() paths uretprobe-friendly uprobes/tracing: Make uprobe_{trace,perf}_print() uretprobe-friendly uprobes/tracing: Introduce is_ret_probe() and uretprobe_dispatcher() uprobes/tracing: Introduce uprobe_{trace,perf}_print() helpers uprobes/tracing: Generalize struct uprobe_trace_entry_head uprobes/tracing: Kill the pointless local_save_flags/preempt_count calls uprobes/tracing: Kill the pointless seq_print_ip_sym() call uprobes/tracing: Kill the pointless task_pt_regs() calls ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/events/core.c30
-rw-r--r--kernel/events/uprobes.c300
-rw-r--r--kernel/trace/trace.h5
-rw-r--r--kernel/trace/trace_uprobe.c203
-rw-r--r--kernel/watchdog.c5
5 files changed, 432 insertions, 111 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index dce6e13cf9d7..3820e3cefbae 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -37,6 +37,7 @@
37#include <linux/ftrace_event.h> 37#include <linux/ftrace_event.h>
38#include <linux/hw_breakpoint.h> 38#include <linux/hw_breakpoint.h>
39#include <linux/mm_types.h> 39#include <linux/mm_types.h>
40#include <linux/cgroup.h>
40 41
41#include "internal.h" 42#include "internal.h"
42 43
@@ -234,6 +235,20 @@ static void perf_ctx_unlock(struct perf_cpu_context *cpuctx,
234#ifdef CONFIG_CGROUP_PERF 235#ifdef CONFIG_CGROUP_PERF
235 236
236/* 237/*
238 * perf_cgroup_info keeps track of time_enabled for a cgroup.
239 * This is a per-cpu dynamically allocated data structure.
240 */
241struct perf_cgroup_info {
242 u64 time;
243 u64 timestamp;
244};
245
246struct perf_cgroup {
247 struct cgroup_subsys_state css;
248 struct perf_cgroup_info __percpu *info;
249};
250
251/*
237 * Must ensure cgroup is pinned (css_get) before calling 252 * Must ensure cgroup is pinned (css_get) before calling
238 * this function. In other words, we cannot call this function 253 * this function. In other words, we cannot call this function
239 * if there is no cgroup event for the current CPU context. 254 * if there is no cgroup event for the current CPU context.
@@ -976,9 +991,15 @@ static void perf_event__header_size(struct perf_event *event)
976 if (sample_type & PERF_SAMPLE_PERIOD) 991 if (sample_type & PERF_SAMPLE_PERIOD)
977 size += sizeof(data->period); 992 size += sizeof(data->period);
978 993
994 if (sample_type & PERF_SAMPLE_WEIGHT)
995 size += sizeof(data->weight);
996
979 if (sample_type & PERF_SAMPLE_READ) 997 if (sample_type & PERF_SAMPLE_READ)
980 size += event->read_size; 998 size += event->read_size;
981 999
1000 if (sample_type & PERF_SAMPLE_DATA_SRC)
1001 size += sizeof(data->data_src.val);
1002
982 event->header_size = size; 1003 event->header_size = size;
983} 1004}
984 1005
@@ -4193,6 +4214,12 @@ void perf_output_sample(struct perf_output_handle *handle,
4193 perf_output_sample_ustack(handle, 4214 perf_output_sample_ustack(handle,
4194 data->stack_user_size, 4215 data->stack_user_size,
4195 data->regs_user.regs); 4216 data->regs_user.regs);
4217
4218 if (sample_type & PERF_SAMPLE_WEIGHT)
4219 perf_output_put(handle, data->weight);
4220
4221 if (sample_type & PERF_SAMPLE_DATA_SRC)
4222 perf_output_put(handle, data->data_src.val);
4196} 4223}
4197 4224
4198void perf_prepare_sample(struct perf_event_header *header, 4225void perf_prepare_sample(struct perf_event_header *header,
@@ -4782,6 +4809,9 @@ got_name:
4782 mmap_event->file_name = name; 4809 mmap_event->file_name = name;
4783 mmap_event->file_size = size; 4810 mmap_event->file_size = size;
4784 4811
4812 if (!(vma->vm_flags & VM_EXEC))
4813 mmap_event->event_id.header.misc |= PERF_RECORD_MISC_MMAP_DATA;
4814
4785 mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; 4815 mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
4786 4816
4787 rcu_read_lock(); 4817 rcu_read_lock();
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index a567c8c7ef31..f3569747d629 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -75,6 +75,15 @@ struct uprobe {
75 struct arch_uprobe arch; 75 struct arch_uprobe arch;
76}; 76};
77 77
78struct return_instance {
79 struct uprobe *uprobe;
80 unsigned long func;
81 unsigned long orig_ret_vaddr; /* original return address */
82 bool chained; /* true, if instance is nested */
83
84 struct return_instance *next; /* keep as stack */
85};
86
78/* 87/*
79 * valid_vma: Verify if the specified vma is an executable vma 88 * valid_vma: Verify if the specified vma is an executable vma
80 * Relax restrictions while unregistering: vm_flags might have 89 * Relax restrictions while unregistering: vm_flags might have
@@ -173,10 +182,31 @@ bool __weak is_swbp_insn(uprobe_opcode_t *insn)
173 return *insn == UPROBE_SWBP_INSN; 182 return *insn == UPROBE_SWBP_INSN;
174} 183}
175 184
176static void copy_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t *opcode) 185/**
186 * is_trap_insn - check if instruction is breakpoint instruction.
187 * @insn: instruction to be checked.
188 * Default implementation of is_trap_insn
189 * Returns true if @insn is a breakpoint instruction.
190 *
191 * This function is needed for the case where an architecture has multiple
192 * trap instructions (like powerpc).
193 */
194bool __weak is_trap_insn(uprobe_opcode_t *insn)
195{
196 return is_swbp_insn(insn);
197}
198
199static void copy_from_page(struct page *page, unsigned long vaddr, void *dst, int len)
177{ 200{
178 void *kaddr = kmap_atomic(page); 201 void *kaddr = kmap_atomic(page);
179 memcpy(opcode, kaddr + (vaddr & ~PAGE_MASK), UPROBE_SWBP_INSN_SIZE); 202 memcpy(dst, kaddr + (vaddr & ~PAGE_MASK), len);
203 kunmap_atomic(kaddr);
204}
205
206static void copy_to_page(struct page *page, unsigned long vaddr, const void *src, int len)
207{
208 void *kaddr = kmap_atomic(page);
209 memcpy(kaddr + (vaddr & ~PAGE_MASK), src, len);
180 kunmap_atomic(kaddr); 210 kunmap_atomic(kaddr);
181} 211}
182 212
@@ -185,7 +215,16 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t
185 uprobe_opcode_t old_opcode; 215 uprobe_opcode_t old_opcode;
186 bool is_swbp; 216 bool is_swbp;
187 217
188 copy_opcode(page, vaddr, &old_opcode); 218 /*
219 * Note: We only check if the old_opcode is UPROBE_SWBP_INSN here.
220 * We do not check if it is any other 'trap variant' which could
221 * be conditional trap instruction such as the one powerpc supports.
222 *
223 * The logic is that we do not care if the underlying instruction
224 * is a trap variant; uprobes always wins over any other (gdb)
225 * breakpoint.
226 */
227 copy_from_page(page, vaddr, &old_opcode, UPROBE_SWBP_INSN_SIZE);
189 is_swbp = is_swbp_insn(&old_opcode); 228 is_swbp = is_swbp_insn(&old_opcode);
190 229
191 if (is_swbp_insn(new_opcode)) { 230 if (is_swbp_insn(new_opcode)) {
@@ -204,7 +243,7 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t
204 * Expect the breakpoint instruction to be the smallest size instruction for 243 * Expect the breakpoint instruction to be the smallest size instruction for
205 * the architecture. If an arch has variable length instruction and the 244 * the architecture. If an arch has variable length instruction and the
206 * breakpoint instruction is not of the smallest length instruction 245 * breakpoint instruction is not of the smallest length instruction
207 * supported by that architecture then we need to modify is_swbp_at_addr and 246 * supported by that architecture then we need to modify is_trap_at_addr and
208 * write_opcode accordingly. This would never be a problem for archs that 247 * write_opcode accordingly. This would never be a problem for archs that
209 * have fixed length instructions. 248 * have fixed length instructions.
210 */ 249 */
@@ -225,7 +264,6 @@ static int write_opcode(struct mm_struct *mm, unsigned long vaddr,
225 uprobe_opcode_t opcode) 264 uprobe_opcode_t opcode)
226{ 265{
227 struct page *old_page, *new_page; 266 struct page *old_page, *new_page;
228 void *vaddr_old, *vaddr_new;
229 struct vm_area_struct *vma; 267 struct vm_area_struct *vma;
230 int ret; 268 int ret;
231 269
@@ -246,15 +284,8 @@ retry:
246 284
247 __SetPageUptodate(new_page); 285 __SetPageUptodate(new_page);
248 286
249 /* copy the page now that we've got it stable */ 287 copy_highpage(new_page, old_page);
250 vaddr_old = kmap_atomic(old_page); 288 copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
251 vaddr_new = kmap_atomic(new_page);
252
253 memcpy(vaddr_new, vaddr_old, PAGE_SIZE);
254 memcpy(vaddr_new + (vaddr & ~PAGE_MASK), &opcode, UPROBE_SWBP_INSN_SIZE);
255
256 kunmap_atomic(vaddr_new);
257 kunmap_atomic(vaddr_old);
258 289
259 ret = anon_vma_prepare(vma); 290 ret = anon_vma_prepare(vma);
260 if (ret) 291 if (ret)
@@ -477,30 +508,18 @@ __copy_insn(struct address_space *mapping, struct file *filp, char *insn,
477 unsigned long nbytes, loff_t offset) 508 unsigned long nbytes, loff_t offset)
478{ 509{
479 struct page *page; 510 struct page *page;
480 void *vaddr;
481 unsigned long off;
482 pgoff_t idx;
483
484 if (!filp)
485 return -EINVAL;
486 511
487 if (!mapping->a_ops->readpage) 512 if (!mapping->a_ops->readpage)
488 return -EIO; 513 return -EIO;
489
490 idx = offset >> PAGE_CACHE_SHIFT;
491 off = offset & ~PAGE_MASK;
492
493 /* 514 /*
494 * Ensure that the page that has the original instruction is 515 * Ensure that the page that has the original instruction is
495 * populated and in page-cache. 516 * populated and in page-cache.
496 */ 517 */
497 page = read_mapping_page(mapping, idx, filp); 518 page = read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT, filp);
498 if (IS_ERR(page)) 519 if (IS_ERR(page))
499 return PTR_ERR(page); 520 return PTR_ERR(page);
500 521
501 vaddr = kmap_atomic(page); 522 copy_from_page(page, offset, insn, nbytes);
502 memcpy(insn, vaddr + off, nbytes);
503 kunmap_atomic(vaddr);
504 page_cache_release(page); 523 page_cache_release(page);
505 524
506 return 0; 525 return 0;
@@ -550,7 +569,7 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file,
550 goto out; 569 goto out;
551 570
552 ret = -ENOTSUPP; 571 ret = -ENOTSUPP;
553 if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn)) 572 if (is_trap_insn((uprobe_opcode_t *)uprobe->arch.insn))
554 goto out; 573 goto out;
555 574
556 ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr); 575 ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr);
@@ -758,7 +777,7 @@ register_for_each_vma(struct uprobe *uprobe, struct uprobe_consumer *new)
758 down_write(&mm->mmap_sem); 777 down_write(&mm->mmap_sem);
759 vma = find_vma(mm, info->vaddr); 778 vma = find_vma(mm, info->vaddr);
760 if (!vma || !valid_vma(vma, is_register) || 779 if (!vma || !valid_vma(vma, is_register) ||
761 vma->vm_file->f_mapping->host != uprobe->inode) 780 file_inode(vma->vm_file) != uprobe->inode)
762 goto unlock; 781 goto unlock;
763 782
764 if (vma->vm_start > info->vaddr || 783 if (vma->vm_start > info->vaddr ||
@@ -828,6 +847,10 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *
828 struct uprobe *uprobe; 847 struct uprobe *uprobe;
829 int ret; 848 int ret;
830 849
850 /* Uprobe must have at least one set consumer */
851 if (!uc->handler && !uc->ret_handler)
852 return -EINVAL;
853
831 /* Racy, just to catch the obvious mistakes */ 854 /* Racy, just to catch the obvious mistakes */
832 if (offset > i_size_read(inode)) 855 if (offset > i_size_read(inode))
833 return -EINVAL; 856 return -EINVAL;
@@ -917,7 +940,7 @@ static int unapply_uprobe(struct uprobe *uprobe, struct mm_struct *mm)
917 loff_t offset; 940 loff_t offset;
918 941
919 if (!valid_vma(vma, false) || 942 if (!valid_vma(vma, false) ||
920 vma->vm_file->f_mapping->host != uprobe->inode) 943 file_inode(vma->vm_file) != uprobe->inode)
921 continue; 944 continue;
922 945
923 offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT; 946 offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
@@ -1010,7 +1033,7 @@ int uprobe_mmap(struct vm_area_struct *vma)
1010 if (no_uprobe_events() || !valid_vma(vma, true)) 1033 if (no_uprobe_events() || !valid_vma(vma, true))
1011 return 0; 1034 return 0;
1012 1035
1013 inode = vma->vm_file->f_mapping->host; 1036 inode = file_inode(vma->vm_file);
1014 if (!inode) 1037 if (!inode)
1015 return 0; 1038 return 0;
1016 1039
@@ -1041,7 +1064,7 @@ vma_has_uprobes(struct vm_area_struct *vma, unsigned long start, unsigned long e
1041 struct inode *inode; 1064 struct inode *inode;
1042 struct rb_node *n; 1065 struct rb_node *n;
1043 1066
1044 inode = vma->vm_file->f_mapping->host; 1067 inode = file_inode(vma->vm_file);
1045 1068
1046 min = vaddr_to_offset(vma, start); 1069 min = vaddr_to_offset(vma, start);
1047 max = min + (end - start) - 1; 1070 max = min + (end - start) - 1;
@@ -1114,6 +1137,7 @@ static struct xol_area *get_xol_area(void)
1114{ 1137{
1115 struct mm_struct *mm = current->mm; 1138 struct mm_struct *mm = current->mm;
1116 struct xol_area *area; 1139 struct xol_area *area;
1140 uprobe_opcode_t insn = UPROBE_SWBP_INSN;
1117 1141
1118 area = mm->uprobes_state.xol_area; 1142 area = mm->uprobes_state.xol_area;
1119 if (area) 1143 if (area)
@@ -1131,7 +1155,12 @@ static struct xol_area *get_xol_area(void)
1131 if (!area->page) 1155 if (!area->page)
1132 goto free_bitmap; 1156 goto free_bitmap;
1133 1157
1158 /* allocate first slot of task's xol_area for the return probes */
1159 set_bit(0, area->bitmap);
1160 copy_to_page(area->page, 0, &insn, UPROBE_SWBP_INSN_SIZE);
1161 atomic_set(&area->slot_count, 1);
1134 init_waitqueue_head(&area->wq); 1162 init_waitqueue_head(&area->wq);
1163
1135 if (!xol_add_vma(area)) 1164 if (!xol_add_vma(area))
1136 return area; 1165 return area;
1137 1166
@@ -1216,9 +1245,7 @@ static unsigned long xol_take_insn_slot(struct xol_area *area)
1216static unsigned long xol_get_insn_slot(struct uprobe *uprobe) 1245static unsigned long xol_get_insn_slot(struct uprobe *uprobe)
1217{ 1246{
1218 struct xol_area *area; 1247 struct xol_area *area;
1219 unsigned long offset;
1220 unsigned long xol_vaddr; 1248 unsigned long xol_vaddr;
1221 void *vaddr;
1222 1249
1223 area = get_xol_area(); 1250 area = get_xol_area();
1224 if (!area) 1251 if (!area)
@@ -1229,10 +1256,7 @@ static unsigned long xol_get_insn_slot(struct uprobe *uprobe)
1229 return 0; 1256 return 0;
1230 1257
1231 /* Initialize the slot */ 1258 /* Initialize the slot */
1232 offset = xol_vaddr & ~PAGE_MASK; 1259 copy_to_page(area->page, xol_vaddr, uprobe->arch.insn, MAX_UINSN_BYTES);
1233 vaddr = kmap_atomic(area->page);
1234 memcpy(vaddr + offset, uprobe->arch.insn, MAX_UINSN_BYTES);
1235 kunmap_atomic(vaddr);
1236 /* 1260 /*
1237 * We probably need flush_icache_user_range() but it needs vma. 1261 * We probably need flush_icache_user_range() but it needs vma.
1238 * This should work on supported architectures too. 1262 * This should work on supported architectures too.
@@ -1298,6 +1322,7 @@ unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs)
1298void uprobe_free_utask(struct task_struct *t) 1322void uprobe_free_utask(struct task_struct *t)
1299{ 1323{
1300 struct uprobe_task *utask = t->utask; 1324 struct uprobe_task *utask = t->utask;
1325 struct return_instance *ri, *tmp;
1301 1326
1302 if (!utask) 1327 if (!utask)
1303 return; 1328 return;
@@ -1305,6 +1330,15 @@ void uprobe_free_utask(struct task_struct *t)
1305 if (utask->active_uprobe) 1330 if (utask->active_uprobe)
1306 put_uprobe(utask->active_uprobe); 1331 put_uprobe(utask->active_uprobe);
1307 1332
1333 ri = utask->return_instances;
1334 while (ri) {
1335 tmp = ri;
1336 ri = ri->next;
1337
1338 put_uprobe(tmp->uprobe);
1339 kfree(tmp);
1340 }
1341
1308 xol_free_insn_slot(t); 1342 xol_free_insn_slot(t);
1309 kfree(utask); 1343 kfree(utask);
1310 t->utask = NULL; 1344 t->utask = NULL;
@@ -1333,6 +1367,93 @@ static struct uprobe_task *get_utask(void)
1333 return current->utask; 1367 return current->utask;
1334} 1368}
1335 1369
1370/*
1371 * Current area->vaddr notion assume the trampoline address is always
1372 * equal area->vaddr.
1373 *
1374 * Returns -1 in case the xol_area is not allocated.
1375 */
1376static unsigned long get_trampoline_vaddr(void)
1377{
1378 struct xol_area *area;
1379 unsigned long trampoline_vaddr = -1;
1380
1381 area = current->mm->uprobes_state.xol_area;
1382 smp_read_barrier_depends();
1383 if (area)
1384 trampoline_vaddr = area->vaddr;
1385
1386 return trampoline_vaddr;
1387}
1388
1389static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
1390{
1391 struct return_instance *ri;
1392 struct uprobe_task *utask;
1393 unsigned long orig_ret_vaddr, trampoline_vaddr;
1394 bool chained = false;
1395
1396 if (!get_xol_area())
1397 return;
1398
1399 utask = get_utask();
1400 if (!utask)
1401 return;
1402
1403 if (utask->depth >= MAX_URETPROBE_DEPTH) {
1404 printk_ratelimited(KERN_INFO "uprobe: omit uretprobe due to"
1405 " nestedness limit pid/tgid=%d/%d\n",
1406 current->pid, current->tgid);
1407 return;
1408 }
1409
1410 ri = kzalloc(sizeof(struct return_instance), GFP_KERNEL);
1411 if (!ri)
1412 goto fail;
1413
1414 trampoline_vaddr = get_trampoline_vaddr();
1415 orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs);
1416 if (orig_ret_vaddr == -1)
1417 goto fail;
1418
1419 /*
1420 * We don't want to keep trampoline address in stack, rather keep the
1421 * original return address of first caller thru all the consequent
1422 * instances. This also makes breakpoint unwrapping easier.
1423 */
1424 if (orig_ret_vaddr == trampoline_vaddr) {
1425 if (!utask->return_instances) {
1426 /*
1427 * This situation is not possible. Likely we have an
1428 * attack from user-space.
1429 */
1430 pr_warn("uprobe: unable to set uretprobe pid/tgid=%d/%d\n",
1431 current->pid, current->tgid);
1432 goto fail;
1433 }
1434
1435 chained = true;
1436 orig_ret_vaddr = utask->return_instances->orig_ret_vaddr;
1437 }
1438
1439 atomic_inc(&uprobe->ref);
1440 ri->uprobe = uprobe;
1441 ri->func = instruction_pointer(regs);
1442 ri->orig_ret_vaddr = orig_ret_vaddr;
1443 ri->chained = chained;
1444
1445 utask->depth++;
1446
1447 /* add instance to the stack */
1448 ri->next = utask->return_instances;
1449 utask->return_instances = ri;
1450
1451 return;
1452
1453 fail:
1454 kfree(ri);
1455}
1456
1336/* Prepare to single-step probed instruction out of line. */ 1457/* Prepare to single-step probed instruction out of line. */
1337static int 1458static int
1338pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr) 1459pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr)
@@ -1431,7 +1552,7 @@ static void mmf_recalc_uprobes(struct mm_struct *mm)
1431 clear_bit(MMF_HAS_UPROBES, &mm->flags); 1552 clear_bit(MMF_HAS_UPROBES, &mm->flags);
1432} 1553}
1433 1554
1434static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr) 1555static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr)
1435{ 1556{
1436 struct page *page; 1557 struct page *page;
1437 uprobe_opcode_t opcode; 1558 uprobe_opcode_t opcode;
@@ -1449,10 +1570,11 @@ static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr)
1449 if (result < 0) 1570 if (result < 0)
1450 return result; 1571 return result;
1451 1572
1452 copy_opcode(page, vaddr, &opcode); 1573 copy_from_page(page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
1453 put_page(page); 1574 put_page(page);
1454 out: 1575 out:
1455 return is_swbp_insn(&opcode); 1576 /* This needs to return true for any variant of the trap insn */
1577 return is_trap_insn(&opcode);
1456} 1578}
1457 1579
1458static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) 1580static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
@@ -1465,14 +1587,14 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
1465 vma = find_vma(mm, bp_vaddr); 1587 vma = find_vma(mm, bp_vaddr);
1466 if (vma && vma->vm_start <= bp_vaddr) { 1588 if (vma && vma->vm_start <= bp_vaddr) {
1467 if (valid_vma(vma, false)) { 1589 if (valid_vma(vma, false)) {
1468 struct inode *inode = vma->vm_file->f_mapping->host; 1590 struct inode *inode = file_inode(vma->vm_file);
1469 loff_t offset = vaddr_to_offset(vma, bp_vaddr); 1591 loff_t offset = vaddr_to_offset(vma, bp_vaddr);
1470 1592
1471 uprobe = find_uprobe(inode, offset); 1593 uprobe = find_uprobe(inode, offset);
1472 } 1594 }
1473 1595
1474 if (!uprobe) 1596 if (!uprobe)
1475 *is_swbp = is_swbp_at_addr(mm, bp_vaddr); 1597 *is_swbp = is_trap_at_addr(mm, bp_vaddr);
1476 } else { 1598 } else {
1477 *is_swbp = -EFAULT; 1599 *is_swbp = -EFAULT;
1478 } 1600 }
@@ -1488,16 +1610,27 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
1488{ 1610{
1489 struct uprobe_consumer *uc; 1611 struct uprobe_consumer *uc;
1490 int remove = UPROBE_HANDLER_REMOVE; 1612 int remove = UPROBE_HANDLER_REMOVE;
1613 bool need_prep = false; /* prepare return uprobe, when needed */
1491 1614
1492 down_read(&uprobe->register_rwsem); 1615 down_read(&uprobe->register_rwsem);
1493 for (uc = uprobe->consumers; uc; uc = uc->next) { 1616 for (uc = uprobe->consumers; uc; uc = uc->next) {
1494 int rc = uc->handler(uc, regs); 1617 int rc = 0;
1618
1619 if (uc->handler) {
1620 rc = uc->handler(uc, regs);
1621 WARN(rc & ~UPROBE_HANDLER_MASK,
1622 "bad rc=0x%x from %pf()\n", rc, uc->handler);
1623 }
1624
1625 if (uc->ret_handler)
1626 need_prep = true;
1495 1627
1496 WARN(rc & ~UPROBE_HANDLER_MASK,
1497 "bad rc=0x%x from %pf()\n", rc, uc->handler);
1498 remove &= rc; 1628 remove &= rc;
1499 } 1629 }
1500 1630
1631 if (need_prep && !remove)
1632 prepare_uretprobe(uprobe, regs); /* put bp at return */
1633
1501 if (remove && uprobe->consumers) { 1634 if (remove && uprobe->consumers) {
1502 WARN_ON(!uprobe_is_active(uprobe)); 1635 WARN_ON(!uprobe_is_active(uprobe));
1503 unapply_uprobe(uprobe, current->mm); 1636 unapply_uprobe(uprobe, current->mm);
@@ -1505,6 +1638,64 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
1505 up_read(&uprobe->register_rwsem); 1638 up_read(&uprobe->register_rwsem);
1506} 1639}
1507 1640
1641static void
1642handle_uretprobe_chain(struct return_instance *ri, struct pt_regs *regs)
1643{
1644 struct uprobe *uprobe = ri->uprobe;
1645 struct uprobe_consumer *uc;
1646
1647 down_read(&uprobe->register_rwsem);
1648 for (uc = uprobe->consumers; uc; uc = uc->next) {
1649 if (uc->ret_handler)
1650 uc->ret_handler(uc, ri->func, regs);
1651 }
1652 up_read(&uprobe->register_rwsem);
1653}
1654
1655static bool handle_trampoline(struct pt_regs *regs)
1656{
1657 struct uprobe_task *utask;
1658 struct return_instance *ri, *tmp;
1659 bool chained;
1660
1661 utask = current->utask;
1662 if (!utask)
1663 return false;
1664
1665 ri = utask->return_instances;
1666 if (!ri)
1667 return false;
1668
1669 /*
1670 * TODO: we should throw out return_instance's invalidated by
1671 * longjmp(), currently we assume that the probed function always
1672 * returns.
1673 */
1674 instruction_pointer_set(regs, ri->orig_ret_vaddr);
1675
1676 for (;;) {
1677 handle_uretprobe_chain(ri, regs);
1678
1679 chained = ri->chained;
1680 put_uprobe(ri->uprobe);
1681
1682 tmp = ri;
1683 ri = ri->next;
1684 kfree(tmp);
1685
1686 if (!chained)
1687 break;
1688
1689 utask->depth--;
1690
1691 BUG_ON(!ri);
1692 }
1693
1694 utask->return_instances = ri;
1695
1696 return true;
1697}
1698
1508/* 1699/*
1509 * Run handler and ask thread to singlestep. 1700 * Run handler and ask thread to singlestep.
1510 * Ensure all non-fatal signals cannot interrupt thread while it singlesteps. 1701 * Ensure all non-fatal signals cannot interrupt thread while it singlesteps.
@@ -1516,8 +1707,15 @@ static void handle_swbp(struct pt_regs *regs)
1516 int uninitialized_var(is_swbp); 1707 int uninitialized_var(is_swbp);
1517 1708
1518 bp_vaddr = uprobe_get_swbp_addr(regs); 1709 bp_vaddr = uprobe_get_swbp_addr(regs);
1519 uprobe = find_active_uprobe(bp_vaddr, &is_swbp); 1710 if (bp_vaddr == get_trampoline_vaddr()) {
1711 if (handle_trampoline(regs))
1712 return;
1713
1714 pr_warn("uprobe: unable to handle uretprobe pid/tgid=%d/%d\n",
1715 current->pid, current->tgid);
1716 }
1520 1717
1718 uprobe = find_active_uprobe(bp_vaddr, &is_swbp);
1521 if (!uprobe) { 1719 if (!uprobe) {
1522 if (is_swbp > 0) { 1720 if (is_swbp > 0) {
1523 /* No matching uprobe; signal SIGTRAP. */ 1721 /* No matching uprobe; signal SIGTRAP. */
@@ -1616,7 +1814,11 @@ void uprobe_notify_resume(struct pt_regs *regs)
1616 */ 1814 */
1617int uprobe_pre_sstep_notifier(struct pt_regs *regs) 1815int uprobe_pre_sstep_notifier(struct pt_regs *regs)
1618{ 1816{
1619 if (!current->mm || !test_bit(MMF_HAS_UPROBES, &current->mm->flags)) 1817 if (!current->mm)
1818 return 0;
1819
1820 if (!test_bit(MMF_HAS_UPROBES, &current->mm->flags) &&
1821 (!current->utask || !current->utask->return_instances))
1620 return 0; 1822 return 0;
1621 1823
1622 set_thread_flag(TIF_UPROBE); 1824 set_thread_flag(TIF_UPROBE);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 9e014582e763..711ca7d3e7f1 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -109,11 +109,6 @@ struct kretprobe_trace_entry_head {
109 unsigned long ret_ip; 109 unsigned long ret_ip;
110}; 110};
111 111
112struct uprobe_trace_entry_head {
113 struct trace_entry ent;
114 unsigned long ip;
115};
116
117/* 112/*
118 * trace_flag_type is an enumeration that holds different 113 * trace_flag_type is an enumeration that holds different
119 * states when a trace occurs. These are: 114 * states when a trace occurs. These are:
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 8dad2a92dee9..32494fb0ee64 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -28,6 +28,18 @@
28 28
29#define UPROBE_EVENT_SYSTEM "uprobes" 29#define UPROBE_EVENT_SYSTEM "uprobes"
30 30
31struct uprobe_trace_entry_head {
32 struct trace_entry ent;
33 unsigned long vaddr[];
34};
35
36#define SIZEOF_TRACE_ENTRY(is_return) \
37 (sizeof(struct uprobe_trace_entry_head) + \
38 sizeof(unsigned long) * (is_return ? 2 : 1))
39
40#define DATAOF_TRACE_ENTRY(entry, is_return) \
41 ((void*)(entry) + SIZEOF_TRACE_ENTRY(is_return))
42
31struct trace_uprobe_filter { 43struct trace_uprobe_filter {
32 rwlock_t rwlock; 44 rwlock_t rwlock;
33 int nr_systemwide; 45 int nr_systemwide;
@@ -64,6 +76,8 @@ static DEFINE_MUTEX(uprobe_lock);
64static LIST_HEAD(uprobe_list); 76static LIST_HEAD(uprobe_list);
65 77
66static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs); 78static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs);
79static int uretprobe_dispatcher(struct uprobe_consumer *con,
80 unsigned long func, struct pt_regs *regs);
67 81
68static inline void init_trace_uprobe_filter(struct trace_uprobe_filter *filter) 82static inline void init_trace_uprobe_filter(struct trace_uprobe_filter *filter)
69{ 83{
@@ -77,11 +91,16 @@ static inline bool uprobe_filter_is_empty(struct trace_uprobe_filter *filter)
77 return !filter->nr_systemwide && list_empty(&filter->perf_events); 91 return !filter->nr_systemwide && list_empty(&filter->perf_events);
78} 92}
79 93
94static inline bool is_ret_probe(struct trace_uprobe *tu)
95{
96 return tu->consumer.ret_handler != NULL;
97}
98
80/* 99/*
81 * Allocate new trace_uprobe and initialize it (including uprobes). 100 * Allocate new trace_uprobe and initialize it (including uprobes).
82 */ 101 */
83static struct trace_uprobe * 102static struct trace_uprobe *
84alloc_trace_uprobe(const char *group, const char *event, int nargs) 103alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret)
85{ 104{
86 struct trace_uprobe *tu; 105 struct trace_uprobe *tu;
87 106
@@ -106,6 +125,8 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs)
106 125
107 INIT_LIST_HEAD(&tu->list); 126 INIT_LIST_HEAD(&tu->list);
108 tu->consumer.handler = uprobe_dispatcher; 127 tu->consumer.handler = uprobe_dispatcher;
128 if (is_ret)
129 tu->consumer.ret_handler = uretprobe_dispatcher;
109 init_trace_uprobe_filter(&tu->filter); 130 init_trace_uprobe_filter(&tu->filter);
110 return tu; 131 return tu;
111 132
@@ -180,7 +201,7 @@ end:
180 201
181/* 202/*
182 * Argument syntax: 203 * Argument syntax:
183 * - Add uprobe: p[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS] 204 * - Add uprobe: p|r[:[GRP/]EVENT] PATH:SYMBOL [FETCHARGS]
184 * 205 *
185 * - Remove uprobe: -:[GRP/]EVENT 206 * - Remove uprobe: -:[GRP/]EVENT
186 */ 207 */
@@ -192,20 +213,23 @@ static int create_trace_uprobe(int argc, char **argv)
192 char buf[MAX_EVENT_NAME_LEN]; 213 char buf[MAX_EVENT_NAME_LEN];
193 struct path path; 214 struct path path;
194 unsigned long offset; 215 unsigned long offset;
195 bool is_delete; 216 bool is_delete, is_return;
196 int i, ret; 217 int i, ret;
197 218
198 inode = NULL; 219 inode = NULL;
199 ret = 0; 220 ret = 0;
200 is_delete = false; 221 is_delete = false;
222 is_return = false;
201 event = NULL; 223 event = NULL;
202 group = NULL; 224 group = NULL;
203 225
204 /* argc must be >= 1 */ 226 /* argc must be >= 1 */
205 if (argv[0][0] == '-') 227 if (argv[0][0] == '-')
206 is_delete = true; 228 is_delete = true;
229 else if (argv[0][0] == 'r')
230 is_return = true;
207 else if (argv[0][0] != 'p') { 231 else if (argv[0][0] != 'p') {
208 pr_info("Probe definition must be started with 'p' or '-'.\n"); 232 pr_info("Probe definition must be started with 'p', 'r' or '-'.\n");
209 return -EINVAL; 233 return -EINVAL;
210 } 234 }
211 235
@@ -303,7 +327,7 @@ static int create_trace_uprobe(int argc, char **argv)
303 kfree(tail); 327 kfree(tail);
304 } 328 }
305 329
306 tu = alloc_trace_uprobe(group, event, argc); 330 tu = alloc_trace_uprobe(group, event, argc, is_return);
307 if (IS_ERR(tu)) { 331 if (IS_ERR(tu)) {
308 pr_info("Failed to allocate trace_uprobe.(%d)\n", (int)PTR_ERR(tu)); 332 pr_info("Failed to allocate trace_uprobe.(%d)\n", (int)PTR_ERR(tu));
309 ret = PTR_ERR(tu); 333 ret = PTR_ERR(tu);
@@ -414,9 +438,10 @@ static void probes_seq_stop(struct seq_file *m, void *v)
414static int probes_seq_show(struct seq_file *m, void *v) 438static int probes_seq_show(struct seq_file *m, void *v)
415{ 439{
416 struct trace_uprobe *tu = v; 440 struct trace_uprobe *tu = v;
441 char c = is_ret_probe(tu) ? 'r' : 'p';
417 int i; 442 int i;
418 443
419 seq_printf(m, "p:%s/%s", tu->call.class->system, tu->call.name); 444 seq_printf(m, "%c:%s/%s", c, tu->call.class->system, tu->call.name);
420 seq_printf(m, " %s:0x%p", tu->filename, (void *)tu->offset); 445 seq_printf(m, " %s:0x%p", tu->filename, (void *)tu->offset);
421 446
422 for (i = 0; i < tu->nr_args; i++) 447 for (i = 0; i < tu->nr_args; i++)
@@ -485,65 +510,81 @@ static const struct file_operations uprobe_profile_ops = {
485 .release = seq_release, 510 .release = seq_release,
486}; 511};
487 512
488/* uprobe handler */ 513static void uprobe_trace_print(struct trace_uprobe *tu,
489static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs) 514 unsigned long func, struct pt_regs *regs)
490{ 515{
491 struct uprobe_trace_entry_head *entry; 516 struct uprobe_trace_entry_head *entry;
492 struct ring_buffer_event *event; 517 struct ring_buffer_event *event;
493 struct ring_buffer *buffer; 518 struct ring_buffer *buffer;
494 u8 *data; 519 void *data;
495 int size, i, pc; 520 int size, i;
496 unsigned long irq_flags;
497 struct ftrace_event_call *call = &tu->call; 521 struct ftrace_event_call *call = &tu->call;
498 522
499 local_save_flags(irq_flags); 523 size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
500 pc = preempt_count();
501
502 size = sizeof(*entry) + tu->size;
503
504 event = trace_current_buffer_lock_reserve(&buffer, call->event.type, 524 event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
505 size, irq_flags, pc); 525 size + tu->size, 0, 0);
506 if (!event) 526 if (!event)
507 return 0; 527 return;
508 528
509 entry = ring_buffer_event_data(event); 529 entry = ring_buffer_event_data(event);
510 entry->ip = instruction_pointer(task_pt_regs(current)); 530 if (is_ret_probe(tu)) {
511 data = (u8 *)&entry[1]; 531 entry->vaddr[0] = func;
532 entry->vaddr[1] = instruction_pointer(regs);
533 data = DATAOF_TRACE_ENTRY(entry, true);
534 } else {
535 entry->vaddr[0] = instruction_pointer(regs);
536 data = DATAOF_TRACE_ENTRY(entry, false);
537 }
538
512 for (i = 0; i < tu->nr_args; i++) 539 for (i = 0; i < tu->nr_args; i++)
513 call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); 540 call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
514 541
515 if (!filter_current_check_discard(buffer, call, entry, event)) 542 if (!filter_current_check_discard(buffer, call, entry, event))
516 trace_buffer_unlock_commit(buffer, event, irq_flags, pc); 543 trace_buffer_unlock_commit(buffer, event, 0, 0);
544}
517 545
546/* uprobe handler */
547static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
548{
549 if (!is_ret_probe(tu))
550 uprobe_trace_print(tu, 0, regs);
518 return 0; 551 return 0;
519} 552}
520 553
554static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func,
555 struct pt_regs *regs)
556{
557 uprobe_trace_print(tu, func, regs);
558}
559
521/* Event entry printers */ 560/* Event entry printers */
522static enum print_line_t 561static enum print_line_t
523print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *event) 562print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *event)
524{ 563{
525 struct uprobe_trace_entry_head *field; 564 struct uprobe_trace_entry_head *entry;
526 struct trace_seq *s = &iter->seq; 565 struct trace_seq *s = &iter->seq;
527 struct trace_uprobe *tu; 566 struct trace_uprobe *tu;
528 u8 *data; 567 u8 *data;
529 int i; 568 int i;
530 569
531 field = (struct uprobe_trace_entry_head *)iter->ent; 570 entry = (struct uprobe_trace_entry_head *)iter->ent;
532 tu = container_of(event, struct trace_uprobe, call.event); 571 tu = container_of(event, struct trace_uprobe, call.event);
533 572
534 if (!trace_seq_printf(s, "%s: (", tu->call.name)) 573 if (is_ret_probe(tu)) {
535 goto partial; 574 if (!trace_seq_printf(s, "%s: (0x%lx <- 0x%lx)", tu->call.name,
536 575 entry->vaddr[1], entry->vaddr[0]))
537 if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET)) 576 goto partial;
538 goto partial; 577 data = DATAOF_TRACE_ENTRY(entry, true);
539 578 } else {
540 if (!trace_seq_puts(s, ")")) 579 if (!trace_seq_printf(s, "%s: (0x%lx)", tu->call.name,
541 goto partial; 580 entry->vaddr[0]))
581 goto partial;
582 data = DATAOF_TRACE_ENTRY(entry, false);
583 }
542 584
543 data = (u8 *)&field[1];
544 for (i = 0; i < tu->nr_args; i++) { 585 for (i = 0; i < tu->nr_args; i++) {
545 if (!tu->args[i].type->print(s, tu->args[i].name, 586 if (!tu->args[i].type->print(s, tu->args[i].name,
546 data + tu->args[i].offset, field)) 587 data + tu->args[i].offset, entry))
547 goto partial; 588 goto partial;
548 } 589 }
549 590
@@ -595,16 +636,23 @@ static void probe_event_disable(struct trace_uprobe *tu, int flag)
595 636
596static int uprobe_event_define_fields(struct ftrace_event_call *event_call) 637static int uprobe_event_define_fields(struct ftrace_event_call *event_call)
597{ 638{
598 int ret, i; 639 int ret, i, size;
599 struct uprobe_trace_entry_head field; 640 struct uprobe_trace_entry_head field;
600 struct trace_uprobe *tu = (struct trace_uprobe *)event_call->data; 641 struct trace_uprobe *tu = event_call->data;
601 642
602 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); 643 if (is_ret_probe(tu)) {
644 DEFINE_FIELD(unsigned long, vaddr[0], FIELD_STRING_FUNC, 0);
645 DEFINE_FIELD(unsigned long, vaddr[1], FIELD_STRING_RETIP, 0);
646 size = SIZEOF_TRACE_ENTRY(true);
647 } else {
648 DEFINE_FIELD(unsigned long, vaddr[0], FIELD_STRING_IP, 0);
649 size = SIZEOF_TRACE_ENTRY(false);
650 }
603 /* Set argument names as fields */ 651 /* Set argument names as fields */
604 for (i = 0; i < tu->nr_args; i++) { 652 for (i = 0; i < tu->nr_args; i++) {
605 ret = trace_define_field(event_call, tu->args[i].type->fmttype, 653 ret = trace_define_field(event_call, tu->args[i].type->fmttype,
606 tu->args[i].name, 654 tu->args[i].name,
607 sizeof(field) + tu->args[i].offset, 655 size + tu->args[i].offset,
608 tu->args[i].type->size, 656 tu->args[i].type->size,
609 tu->args[i].type->is_signed, 657 tu->args[i].type->is_signed,
610 FILTER_OTHER); 658 FILTER_OTHER);
@@ -622,8 +670,13 @@ static int __set_print_fmt(struct trace_uprobe *tu, char *buf, int len)
622 int i; 670 int i;
623 int pos = 0; 671 int pos = 0;
624 672
625 fmt = "(%lx)"; 673 if (is_ret_probe(tu)) {
626 arg = "REC->" FIELD_STRING_IP; 674 fmt = "(%lx <- %lx)";
675 arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
676 } else {
677 fmt = "(%lx)";
678 arg = "REC->" FIELD_STRING_IP;
679 }
627 680
628 /* When len=0, we just calculate the needed length */ 681 /* When len=0, we just calculate the needed length */
629 682
@@ -752,49 +805,68 @@ static bool uprobe_perf_filter(struct uprobe_consumer *uc,
752 return ret; 805 return ret;
753} 806}
754 807
755/* uprobe profile handler */ 808static void uprobe_perf_print(struct trace_uprobe *tu,
756static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) 809 unsigned long func, struct pt_regs *regs)
757{ 810{
758 struct ftrace_event_call *call = &tu->call; 811 struct ftrace_event_call *call = &tu->call;
759 struct uprobe_trace_entry_head *entry; 812 struct uprobe_trace_entry_head *entry;
760 struct hlist_head *head; 813 struct hlist_head *head;
761 u8 *data; 814 void *data;
762 int size, __size, i; 815 int size, rctx, i;
763 int rctx;
764 816
765 if (!uprobe_perf_filter(&tu->consumer, 0, current->mm)) 817 size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
766 return UPROBE_HANDLER_REMOVE; 818 size = ALIGN(size + tu->size + sizeof(u32), sizeof(u64)) - sizeof(u32);
767
768 __size = sizeof(*entry) + tu->size;
769 size = ALIGN(__size + sizeof(u32), sizeof(u64));
770 size -= sizeof(u32);
771 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough")) 819 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough"))
772 return 0; 820 return;
773 821
774 preempt_disable(); 822 preempt_disable();
823 head = this_cpu_ptr(call->perf_events);
824 if (hlist_empty(head))
825 goto out;
775 826
776 entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); 827 entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
777 if (!entry) 828 if (!entry)
778 goto out; 829 goto out;
779 830
780 entry->ip = instruction_pointer(task_pt_regs(current)); 831 if (is_ret_probe(tu)) {
781 data = (u8 *)&entry[1]; 832 entry->vaddr[0] = func;
833 entry->vaddr[1] = instruction_pointer(regs);
834 data = DATAOF_TRACE_ENTRY(entry, true);
835 } else {
836 entry->vaddr[0] = instruction_pointer(regs);
837 data = DATAOF_TRACE_ENTRY(entry, false);
838 }
839
782 for (i = 0; i < tu->nr_args; i++) 840 for (i = 0; i < tu->nr_args; i++)
783 call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); 841 call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
784 842
785 head = this_cpu_ptr(call->perf_events); 843 perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL);
786 perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head, NULL);
787
788 out: 844 out:
789 preempt_enable(); 845 preempt_enable();
846}
847
848/* uprobe profile handler */
849static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
850{
851 if (!uprobe_perf_filter(&tu->consumer, 0, current->mm))
852 return UPROBE_HANDLER_REMOVE;
853
854 if (!is_ret_probe(tu))
855 uprobe_perf_print(tu, 0, regs);
790 return 0; 856 return 0;
791} 857}
858
859static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func,
860 struct pt_regs *regs)
861{
862 uprobe_perf_print(tu, func, regs);
863}
792#endif /* CONFIG_PERF_EVENTS */ 864#endif /* CONFIG_PERF_EVENTS */
793 865
794static 866static
795int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, void *data) 867int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, void *data)
796{ 868{
797 struct trace_uprobe *tu = (struct trace_uprobe *)event->data; 869 struct trace_uprobe *tu = event->data;
798 870
799 switch (type) { 871 switch (type) {
800 case TRACE_REG_REGISTER: 872 case TRACE_REG_REGISTER:
@@ -843,6 +915,23 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
843 return ret; 915 return ret;
844} 916}
845 917
918static int uretprobe_dispatcher(struct uprobe_consumer *con,
919 unsigned long func, struct pt_regs *regs)
920{
921 struct trace_uprobe *tu;
922
923 tu = container_of(con, struct trace_uprobe, consumer);
924
925 if (tu->flags & TP_FLAG_TRACE)
926 uretprobe_trace_func(tu, func, regs);
927
928#ifdef CONFIG_PERF_EVENTS
929 if (tu->flags & TP_FLAG_PROFILE)
930 uretprobe_perf_func(tu, func, regs);
931#endif
932 return 0;
933}
934
846static struct trace_event_functions uprobe_funcs = { 935static struct trace_event_functions uprobe_funcs = {
847 .trace = print_uprobe_event 936 .trace = print_uprobe_event
848}; 937};
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 4a944676358e..05039e348f07 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -517,6 +517,11 @@ int proc_dowatchdog(struct ctl_table *table, int write,
517 return ret; 517 return ret;
518 518
519 set_sample_period(); 519 set_sample_period();
520 /*
521 * Watchdog threads shouldn't be enabled if they are
522 * disabled. The 'watchdog_disabled' variable check in
523 * watchdog_*_all_cpus() function takes care of this.
524 */
520 if (watchdog_enabled && watchdog_thresh) 525 if (watchdog_enabled && watchdog_thresh)
521 watchdog_enable_all_cpus(); 526 watchdog_enable_all_cpus();
522 else 527 else