aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/events/callchain.c38
-rw-r--r--kernel/events/core.c214
-rw-r--r--kernel/events/internal.h82
-rw-r--r--kernel/events/ring_buffer.c10
-rw-r--r--kernel/events/uprobes.c248
-rw-r--r--kernel/fork.c6
-rw-r--r--kernel/kprobes.c247
-rw-r--r--kernel/trace/Kconfig10
-rw-r--r--kernel/trace/Makefile8
-rw-r--r--kernel/trace/ftrace.c322
-rw-r--r--kernel/trace/ring_buffer.c4
-rw-r--r--kernel/trace/trace.c12
-rw-r--r--kernel/trace/trace.h3
-rw-r--r--kernel/trace/trace_event_perf.c3
-rw-r--r--kernel/trace/trace_events.c116
-rw-r--r--kernel/trace/trace_events_filter.c2
-rw-r--r--kernel/trace/trace_functions.c14
-rw-r--r--kernel/trace/trace_functions_graph.c5
-rw-r--r--kernel/trace/trace_irqsoff.c5
-rw-r--r--kernel/trace/trace_sched_wakeup.c5
-rw-r--r--kernel/trace/trace_selftest.c304
-rw-r--r--kernel/trace/trace_stack.c4
-rw-r--r--kernel/trace/trace_syscalls.c2
24 files changed, 1259 insertions, 407 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index e5602d32acb3..5404911eaee9 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -97,7 +97,7 @@ obj-$(CONFIG_COMPAT_BINFMT_ELF) += elfcore.o
97obj-$(CONFIG_BINFMT_ELF_FDPIC) += elfcore.o 97obj-$(CONFIG_BINFMT_ELF_FDPIC) += elfcore.o
98obj-$(CONFIG_FUNCTION_TRACER) += trace/ 98obj-$(CONFIG_FUNCTION_TRACER) += trace/
99obj-$(CONFIG_TRACING) += trace/ 99obj-$(CONFIG_TRACING) += trace/
100obj-$(CONFIG_X86_DS) += trace/ 100obj-$(CONFIG_TRACE_CLOCK) += trace/
101obj-$(CONFIG_RING_BUFFER) += trace/ 101obj-$(CONFIG_RING_BUFFER) += trace/
102obj-$(CONFIG_TRACEPOINTS) += trace/ 102obj-$(CONFIG_TRACEPOINTS) += trace/
103obj-$(CONFIG_IRQ_WORK) += irq_work.o 103obj-$(CONFIG_IRQ_WORK) += irq_work.o
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index 98d4597f43d6..c77206184b8b 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -159,6 +159,11 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs)
159 int rctx; 159 int rctx;
160 struct perf_callchain_entry *entry; 160 struct perf_callchain_entry *entry;
161 161
162 int kernel = !event->attr.exclude_callchain_kernel;
163 int user = !event->attr.exclude_callchain_user;
164
165 if (!kernel && !user)
166 return NULL;
162 167
163 entry = get_callchain_entry(&rctx); 168 entry = get_callchain_entry(&rctx);
164 if (rctx == -1) 169 if (rctx == -1)
@@ -169,24 +174,29 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs)
169 174
170 entry->nr = 0; 175 entry->nr = 0;
171 176
172 if (!user_mode(regs)) { 177 if (kernel && !user_mode(regs)) {
173 perf_callchain_store(entry, PERF_CONTEXT_KERNEL); 178 perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
174 perf_callchain_kernel(entry, regs); 179 perf_callchain_kernel(entry, regs);
175 if (current->mm)
176 regs = task_pt_regs(current);
177 else
178 regs = NULL;
179 } 180 }
180 181
181 if (regs) { 182 if (user) {
182 /* 183 if (!user_mode(regs)) {
183 * Disallow cross-task user callchains. 184 if (current->mm)
184 */ 185 regs = task_pt_regs(current);
185 if (event->ctx->task && event->ctx->task != current) 186 else
186 goto exit_put; 187 regs = NULL;
187 188 }
188 perf_callchain_store(entry, PERF_CONTEXT_USER); 189
189 perf_callchain_user(entry, regs); 190 if (regs) {
191 /*
192 * Disallow cross-task user callchains.
193 */
194 if (event->ctx->task && event->ctx->task != current)
195 goto exit_put;
196
197 perf_callchain_store(entry, PERF_CONTEXT_USER);
198 perf_callchain_user(entry, regs);
199 }
190 } 200 }
191 201
192exit_put: 202exit_put:
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 7fee567153f0..7b9df353ba1b 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -36,6 +36,7 @@
36#include <linux/perf_event.h> 36#include <linux/perf_event.h>
37#include <linux/ftrace_event.h> 37#include <linux/ftrace_event.h>
38#include <linux/hw_breakpoint.h> 38#include <linux/hw_breakpoint.h>
39#include <linux/mm_types.h>
39 40
40#include "internal.h" 41#include "internal.h"
41 42
@@ -3764,6 +3765,132 @@ int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
3764} 3765}
3765EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks); 3766EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
3766 3767
3768static void
3769perf_output_sample_regs(struct perf_output_handle *handle,
3770 struct pt_regs *regs, u64 mask)
3771{
3772 int bit;
3773
3774 for_each_set_bit(bit, (const unsigned long *) &mask,
3775 sizeof(mask) * BITS_PER_BYTE) {
3776 u64 val;
3777
3778 val = perf_reg_value(regs, bit);
3779 perf_output_put(handle, val);
3780 }
3781}
3782
3783static void perf_sample_regs_user(struct perf_regs_user *regs_user,
3784 struct pt_regs *regs)
3785{
3786 if (!user_mode(regs)) {
3787 if (current->mm)
3788 regs = task_pt_regs(current);
3789 else
3790 regs = NULL;
3791 }
3792
3793 if (regs) {
3794 regs_user->regs = regs;
3795 regs_user->abi = perf_reg_abi(current);
3796 }
3797}
3798
3799/*
3800 * Get remaining task size from user stack pointer.
3801 *
3802 * It'd be better to take stack vma map and limit this more
3803 * precisly, but there's no way to get it safely under interrupt,
3804 * so using TASK_SIZE as limit.
3805 */
3806static u64 perf_ustack_task_size(struct pt_regs *regs)
3807{
3808 unsigned long addr = perf_user_stack_pointer(regs);
3809
3810 if (!addr || addr >= TASK_SIZE)
3811 return 0;
3812
3813 return TASK_SIZE - addr;
3814}
3815
3816static u16
3817perf_sample_ustack_size(u16 stack_size, u16 header_size,
3818 struct pt_regs *regs)
3819{
3820 u64 task_size;
3821
3822 /* No regs, no stack pointer, no dump. */
3823 if (!regs)
3824 return 0;
3825
3826 /*
3827 * Check if we fit in with the requested stack size into the:
3828 * - TASK_SIZE
3829 * If we don't, we limit the size to the TASK_SIZE.
3830 *
3831 * - remaining sample size
3832 * If we don't, we customize the stack size to
3833 * fit in to the remaining sample size.
3834 */
3835
3836 task_size = min((u64) USHRT_MAX, perf_ustack_task_size(regs));
3837 stack_size = min(stack_size, (u16) task_size);
3838
3839 /* Current header size plus static size and dynamic size. */
3840 header_size += 2 * sizeof(u64);
3841
3842 /* Do we fit in with the current stack dump size? */
3843 if ((u16) (header_size + stack_size) < header_size) {
3844 /*
3845 * If we overflow the maximum size for the sample,
3846 * we customize the stack dump size to fit in.
3847 */
3848 stack_size = USHRT_MAX - header_size - sizeof(u64);
3849 stack_size = round_up(stack_size, sizeof(u64));
3850 }
3851
3852 return stack_size;
3853}
3854
3855static void
3856perf_output_sample_ustack(struct perf_output_handle *handle, u64 dump_size,
3857 struct pt_regs *regs)
3858{
3859 /* Case of a kernel thread, nothing to dump */
3860 if (!regs) {
3861 u64 size = 0;
3862 perf_output_put(handle, size);
3863 } else {
3864 unsigned long sp;
3865 unsigned int rem;
3866 u64 dyn_size;
3867
3868 /*
3869 * We dump:
3870 * static size
3871 * - the size requested by user or the best one we can fit
3872 * in to the sample max size
3873 * data
3874 * - user stack dump data
3875 * dynamic size
3876 * - the actual dumped size
3877 */
3878
3879 /* Static size. */
3880 perf_output_put(handle, dump_size);
3881
3882 /* Data. */
3883 sp = perf_user_stack_pointer(regs);
3884 rem = __output_copy_user(handle, (void *) sp, dump_size);
3885 dyn_size = dump_size - rem;
3886
3887 perf_output_skip(handle, rem);
3888
3889 /* Dynamic size. */
3890 perf_output_put(handle, dyn_size);
3891 }
3892}
3893
3767static void __perf_event_header__init_id(struct perf_event_header *header, 3894static void __perf_event_header__init_id(struct perf_event_header *header,
3768 struct perf_sample_data *data, 3895 struct perf_sample_data *data,
3769 struct perf_event *event) 3896 struct perf_event *event)
@@ -4024,6 +4151,28 @@ void perf_output_sample(struct perf_output_handle *handle,
4024 perf_output_put(handle, nr); 4151 perf_output_put(handle, nr);
4025 } 4152 }
4026 } 4153 }
4154
4155 if (sample_type & PERF_SAMPLE_REGS_USER) {
4156 u64 abi = data->regs_user.abi;
4157
4158 /*
4159 * If there are no regs to dump, notice it through
4160 * first u64 being zero (PERF_SAMPLE_REGS_ABI_NONE).
4161 */
4162 perf_output_put(handle, abi);
4163
4164 if (abi) {
4165 u64 mask = event->attr.sample_regs_user;
4166 perf_output_sample_regs(handle,
4167 data->regs_user.regs,
4168 mask);
4169 }
4170 }
4171
4172 if (sample_type & PERF_SAMPLE_STACK_USER)
4173 perf_output_sample_ustack(handle,
4174 data->stack_user_size,
4175 data->regs_user.regs);
4027} 4176}
4028 4177
4029void perf_prepare_sample(struct perf_event_header *header, 4178void perf_prepare_sample(struct perf_event_header *header,
@@ -4075,6 +4224,49 @@ void perf_prepare_sample(struct perf_event_header *header,
4075 } 4224 }
4076 header->size += size; 4225 header->size += size;
4077 } 4226 }
4227
4228 if (sample_type & PERF_SAMPLE_REGS_USER) {
4229 /* regs dump ABI info */
4230 int size = sizeof(u64);
4231
4232 perf_sample_regs_user(&data->regs_user, regs);
4233
4234 if (data->regs_user.regs) {
4235 u64 mask = event->attr.sample_regs_user;
4236 size += hweight64(mask) * sizeof(u64);
4237 }
4238
4239 header->size += size;
4240 }
4241
4242 if (sample_type & PERF_SAMPLE_STACK_USER) {
4243 /*
4244 * Either we need PERF_SAMPLE_STACK_USER bit to be allways
4245 * processed as the last one or have additional check added
4246 * in case new sample type is added, because we could eat
4247 * up the rest of the sample size.
4248 */
4249 struct perf_regs_user *uregs = &data->regs_user;
4250 u16 stack_size = event->attr.sample_stack_user;
4251 u16 size = sizeof(u64);
4252
4253 if (!uregs->abi)
4254 perf_sample_regs_user(uregs, regs);
4255
4256 stack_size = perf_sample_ustack_size(stack_size, header->size,
4257 uregs->regs);
4258
4259 /*
4260 * If there is something to dump, add space for the dump
4261 * itself and for the field that tells the dynamic size,
4262 * which is how many have been actually dumped.
4263 */
4264 if (stack_size)
4265 size += sizeof(u64) + stack_size;
4266
4267 data->stack_user_size = stack_size;
4268 header->size += size;
4269 }
4078} 4270}
4079 4271
4080static void perf_event_output(struct perf_event *event, 4272static void perf_event_output(struct perf_event *event,
@@ -6151,6 +6343,28 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
6151 attr->branch_sample_type = mask; 6343 attr->branch_sample_type = mask;
6152 } 6344 }
6153 } 6345 }
6346
6347 if (attr->sample_type & PERF_SAMPLE_REGS_USER) {
6348 ret = perf_reg_validate(attr->sample_regs_user);
6349 if (ret)
6350 return ret;
6351 }
6352
6353 if (attr->sample_type & PERF_SAMPLE_STACK_USER) {
6354 if (!arch_perf_have_user_stack_dump())
6355 return -ENOSYS;
6356
6357 /*
6358 * We have __u32 type for the size, but so far
6359 * we can only use __u16 as maximum due to the
6360 * __u16 sample size limit.
6361 */
6362 if (attr->sample_stack_user >= USHRT_MAX)
6363 ret = -EINVAL;
6364 else if (!IS_ALIGNED(attr->sample_stack_user, sizeof(u64)))
6365 ret = -EINVAL;
6366 }
6367
6154out: 6368out:
6155 return ret; 6369 return ret;
6156 6370
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index a096c19f2c2a..d56a64c99a8b 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -2,6 +2,7 @@
2#define _KERNEL_EVENTS_INTERNAL_H 2#define _KERNEL_EVENTS_INTERNAL_H
3 3
4#include <linux/hardirq.h> 4#include <linux/hardirq.h>
5#include <linux/uaccess.h>
5 6
6/* Buffer handling */ 7/* Buffer handling */
7 8
@@ -76,30 +77,53 @@ static inline unsigned long perf_data_size(struct ring_buffer *rb)
76 return rb->nr_pages << (PAGE_SHIFT + page_order(rb)); 77 return rb->nr_pages << (PAGE_SHIFT + page_order(rb));
77} 78}
78 79
79static inline void 80#define DEFINE_OUTPUT_COPY(func_name, memcpy_func) \
80__output_copy(struct perf_output_handle *handle, 81static inline unsigned int \
81 const void *buf, unsigned int len) 82func_name(struct perf_output_handle *handle, \
83 const void *buf, unsigned int len) \
84{ \
85 unsigned long size, written; \
86 \
87 do { \
88 size = min_t(unsigned long, handle->size, len); \
89 \
90 written = memcpy_func(handle->addr, buf, size); \
91 \
92 len -= written; \
93 handle->addr += written; \
94 buf += written; \
95 handle->size -= written; \
96 if (!handle->size) { \
97 struct ring_buffer *rb = handle->rb; \
98 \
99 handle->page++; \
100 handle->page &= rb->nr_pages - 1; \
101 handle->addr = rb->data_pages[handle->page]; \
102 handle->size = PAGE_SIZE << page_order(rb); \
103 } \
104 } while (len && written == size); \
105 \
106 return len; \
107}
108
109static inline int memcpy_common(void *dst, const void *src, size_t n)
82{ 110{
83 do { 111 memcpy(dst, src, n);
84 unsigned long size = min_t(unsigned long, handle->size, len); 112 return n;
85
86 memcpy(handle->addr, buf, size);
87
88 len -= size;
89 handle->addr += size;
90 buf += size;
91 handle->size -= size;
92 if (!handle->size) {
93 struct ring_buffer *rb = handle->rb;
94
95 handle->page++;
96 handle->page &= rb->nr_pages - 1;
97 handle->addr = rb->data_pages[handle->page];
98 handle->size = PAGE_SIZE << page_order(rb);
99 }
100 } while (len);
101} 113}
102 114
115DEFINE_OUTPUT_COPY(__output_copy, memcpy_common)
116
117#define MEMCPY_SKIP(dst, src, n) (n)
118
119DEFINE_OUTPUT_COPY(__output_skip, MEMCPY_SKIP)
120
121#ifndef arch_perf_out_copy_user
122#define arch_perf_out_copy_user __copy_from_user_inatomic
123#endif
124
125DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user)
126
103/* Callchain handling */ 127/* Callchain handling */
104extern struct perf_callchain_entry * 128extern struct perf_callchain_entry *
105perf_callchain(struct perf_event *event, struct pt_regs *regs); 129perf_callchain(struct perf_event *event, struct pt_regs *regs);
@@ -134,4 +158,20 @@ static inline void put_recursion_context(int *recursion, int rctx)
134 recursion[rctx]--; 158 recursion[rctx]--;
135} 159}
136 160
161#ifdef CONFIG_HAVE_PERF_USER_STACK_DUMP
162static inline bool arch_perf_have_user_stack_dump(void)
163{
164 return true;
165}
166
167#define perf_user_stack_pointer(regs) user_stack_pointer(regs)
168#else
169static inline bool arch_perf_have_user_stack_dump(void)
170{
171 return false;
172}
173
174#define perf_user_stack_pointer(regs) 0
175#endif /* CONFIG_HAVE_PERF_USER_STACK_DUMP */
176
137#endif /* _KERNEL_EVENTS_INTERNAL_H */ 177#endif /* _KERNEL_EVENTS_INTERNAL_H */
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 6ddaba43fb7a..23cb34ff3973 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -182,10 +182,16 @@ out:
182 return -ENOSPC; 182 return -ENOSPC;
183} 183}
184 184
185void perf_output_copy(struct perf_output_handle *handle, 185unsigned int perf_output_copy(struct perf_output_handle *handle,
186 const void *buf, unsigned int len) 186 const void *buf, unsigned int len)
187{ 187{
188 __output_copy(handle, buf, len); 188 return __output_copy(handle, buf, len);
189}
190
191unsigned int perf_output_skip(struct perf_output_handle *handle,
192 unsigned int len)
193{
194 return __output_skip(handle, NULL, len);
189} 195}
190 196
191void perf_output_end(struct perf_output_handle *handle) 197void perf_output_end(struct perf_output_handle *handle)
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index c08a22d02f72..912ef48d28ab 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -280,12 +280,10 @@ static int read_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_
280 if (ret <= 0) 280 if (ret <= 0)
281 return ret; 281 return ret;
282 282
283 lock_page(page);
284 vaddr_new = kmap_atomic(page); 283 vaddr_new = kmap_atomic(page);
285 vaddr &= ~PAGE_MASK; 284 vaddr &= ~PAGE_MASK;
286 memcpy(opcode, vaddr_new + vaddr, UPROBE_SWBP_INSN_SIZE); 285 memcpy(opcode, vaddr_new + vaddr, UPROBE_SWBP_INSN_SIZE);
287 kunmap_atomic(vaddr_new); 286 kunmap_atomic(vaddr_new);
288 unlock_page(page);
289 287
290 put_page(page); 288 put_page(page);
291 289
@@ -334,7 +332,7 @@ int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned
334 */ 332 */
335 result = is_swbp_at_addr(mm, vaddr); 333 result = is_swbp_at_addr(mm, vaddr);
336 if (result == 1) 334 if (result == 1)
337 return -EEXIST; 335 return 0;
338 336
339 if (result) 337 if (result)
340 return result; 338 return result;
@@ -347,24 +345,22 @@ int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned
347 * @mm: the probed process address space. 345 * @mm: the probed process address space.
348 * @auprobe: arch specific probepoint information. 346 * @auprobe: arch specific probepoint information.
349 * @vaddr: the virtual address to insert the opcode. 347 * @vaddr: the virtual address to insert the opcode.
350 * @verify: if true, verify existance of breakpoint instruction.
351 * 348 *
352 * For mm @mm, restore the original opcode (opcode) at @vaddr. 349 * For mm @mm, restore the original opcode (opcode) at @vaddr.
353 * Return 0 (success) or a negative errno. 350 * Return 0 (success) or a negative errno.
354 */ 351 */
355int __weak 352int __weak
356set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr, bool verify) 353set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr)
357{ 354{
358 if (verify) { 355 int result;
359 int result; 356
357 result = is_swbp_at_addr(mm, vaddr);
358 if (!result)
359 return -EINVAL;
360 360
361 result = is_swbp_at_addr(mm, vaddr); 361 if (result != 1)
362 if (!result) 362 return result;
363 return -EINVAL;
364 363
365 if (result != 1)
366 return result;
367 }
368 return write_opcode(auprobe, mm, vaddr, *(uprobe_opcode_t *)auprobe->insn); 364 return write_opcode(auprobe, mm, vaddr, *(uprobe_opcode_t *)auprobe->insn);
369} 365}
370 366
@@ -415,11 +411,10 @@ static struct uprobe *__find_uprobe(struct inode *inode, loff_t offset)
415static struct uprobe *find_uprobe(struct inode *inode, loff_t offset) 411static struct uprobe *find_uprobe(struct inode *inode, loff_t offset)
416{ 412{
417 struct uprobe *uprobe; 413 struct uprobe *uprobe;
418 unsigned long flags;
419 414
420 spin_lock_irqsave(&uprobes_treelock, flags); 415 spin_lock(&uprobes_treelock);
421 uprobe = __find_uprobe(inode, offset); 416 uprobe = __find_uprobe(inode, offset);
422 spin_unlock_irqrestore(&uprobes_treelock, flags); 417 spin_unlock(&uprobes_treelock);
423 418
424 return uprobe; 419 return uprobe;
425} 420}
@@ -466,12 +461,11 @@ static struct uprobe *__insert_uprobe(struct uprobe *uprobe)
466 */ 461 */
467static struct uprobe *insert_uprobe(struct uprobe *uprobe) 462static struct uprobe *insert_uprobe(struct uprobe *uprobe)
468{ 463{
469 unsigned long flags;
470 struct uprobe *u; 464 struct uprobe *u;
471 465
472 spin_lock_irqsave(&uprobes_treelock, flags); 466 spin_lock(&uprobes_treelock);
473 u = __insert_uprobe(uprobe); 467 u = __insert_uprobe(uprobe);
474 spin_unlock_irqrestore(&uprobes_treelock, flags); 468 spin_unlock(&uprobes_treelock);
475 469
476 /* For now assume that the instruction need not be single-stepped */ 470 /* For now assume that the instruction need not be single-stepped */
477 uprobe->flags |= UPROBE_SKIP_SSTEP; 471 uprobe->flags |= UPROBE_SKIP_SSTEP;
@@ -649,6 +643,7 @@ static int
649install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, 643install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
650 struct vm_area_struct *vma, unsigned long vaddr) 644 struct vm_area_struct *vma, unsigned long vaddr)
651{ 645{
646 bool first_uprobe;
652 int ret; 647 int ret;
653 648
654 /* 649 /*
@@ -659,7 +654,7 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
659 * Hence behave as if probe already existed. 654 * Hence behave as if probe already existed.
660 */ 655 */
661 if (!uprobe->consumers) 656 if (!uprobe->consumers)
662 return -EEXIST; 657 return 0;
663 658
664 if (!(uprobe->flags & UPROBE_COPY_INSN)) { 659 if (!(uprobe->flags & UPROBE_COPY_INSN)) {
665 ret = copy_insn(uprobe, vma->vm_file); 660 ret = copy_insn(uprobe, vma->vm_file);
@@ -681,17 +676,18 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
681 } 676 }
682 677
683 /* 678 /*
684 * Ideally, should be updating the probe count after the breakpoint 679 * set MMF_HAS_UPROBES in advance for uprobe_pre_sstep_notifier(),
685 * has been successfully inserted. However a thread could hit the 680 * the task can hit this breakpoint right after __replace_page().
686 * breakpoint we just inserted even before the probe count is
687 * incremented. If this is the first breakpoint placed, breakpoint
688 * notifier might ignore uprobes and pass the trap to the thread.
689 * Hence increment before and decrement on failure.
690 */ 681 */
691 atomic_inc(&mm->uprobes_state.count); 682 first_uprobe = !test_bit(MMF_HAS_UPROBES, &mm->flags);
683 if (first_uprobe)
684 set_bit(MMF_HAS_UPROBES, &mm->flags);
685
692 ret = set_swbp(&uprobe->arch, mm, vaddr); 686 ret = set_swbp(&uprobe->arch, mm, vaddr);
693 if (ret) 687 if (!ret)
694 atomic_dec(&mm->uprobes_state.count); 688 clear_bit(MMF_RECALC_UPROBES, &mm->flags);
689 else if (first_uprobe)
690 clear_bit(MMF_HAS_UPROBES, &mm->flags);
695 691
696 return ret; 692 return ret;
697} 693}
@@ -699,8 +695,12 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
699static void 695static void
700remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr) 696remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr)
701{ 697{
702 if (!set_orig_insn(&uprobe->arch, mm, vaddr, true)) 698 /* can happen if uprobe_register() fails */
703 atomic_dec(&mm->uprobes_state.count); 699 if (!test_bit(MMF_HAS_UPROBES, &mm->flags))
700 return;
701
702 set_bit(MMF_RECALC_UPROBES, &mm->flags);
703 set_orig_insn(&uprobe->arch, mm, vaddr);
704} 704}
705 705
706/* 706/*
@@ -710,11 +710,9 @@ remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vad
710 */ 710 */
711static void delete_uprobe(struct uprobe *uprobe) 711static void delete_uprobe(struct uprobe *uprobe)
712{ 712{
713 unsigned long flags; 713 spin_lock(&uprobes_treelock);
714
715 spin_lock_irqsave(&uprobes_treelock, flags);
716 rb_erase(&uprobe->rb_node, &uprobes_tree); 714 rb_erase(&uprobe->rb_node, &uprobes_tree);
717 spin_unlock_irqrestore(&uprobes_treelock, flags); 715 spin_unlock(&uprobes_treelock);
718 iput(uprobe->inode); 716 iput(uprobe->inode);
719 put_uprobe(uprobe); 717 put_uprobe(uprobe);
720 atomic_dec(&uprobe_events); 718 atomic_dec(&uprobe_events);
@@ -831,17 +829,11 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
831 vaddr_to_offset(vma, info->vaddr) != uprobe->offset) 829 vaddr_to_offset(vma, info->vaddr) != uprobe->offset)
832 goto unlock; 830 goto unlock;
833 831
834 if (is_register) { 832 if (is_register)
835 err = install_breakpoint(uprobe, mm, vma, info->vaddr); 833 err = install_breakpoint(uprobe, mm, vma, info->vaddr);
836 /* 834 else
837 * We can race against uprobe_mmap(), see the
838 * comment near uprobe_hash().
839 */
840 if (err == -EEXIST)
841 err = 0;
842 } else {
843 remove_breakpoint(uprobe, mm, info->vaddr); 835 remove_breakpoint(uprobe, mm, info->vaddr);
844 } 836
845 unlock: 837 unlock:
846 up_write(&mm->mmap_sem); 838 up_write(&mm->mmap_sem);
847 free: 839 free:
@@ -908,7 +900,8 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *
908 } 900 }
909 901
910 mutex_unlock(uprobes_hash(inode)); 902 mutex_unlock(uprobes_hash(inode));
911 put_uprobe(uprobe); 903 if (uprobe)
904 put_uprobe(uprobe);
912 905
913 return ret; 906 return ret;
914} 907}
@@ -978,7 +971,6 @@ static void build_probe_list(struct inode *inode,
978 struct list_head *head) 971 struct list_head *head)
979{ 972{
980 loff_t min, max; 973 loff_t min, max;
981 unsigned long flags;
982 struct rb_node *n, *t; 974 struct rb_node *n, *t;
983 struct uprobe *u; 975 struct uprobe *u;
984 976
@@ -986,7 +978,7 @@ static void build_probe_list(struct inode *inode,
986 min = vaddr_to_offset(vma, start); 978 min = vaddr_to_offset(vma, start);
987 max = min + (end - start) - 1; 979 max = min + (end - start) - 1;
988 980
989 spin_lock_irqsave(&uprobes_treelock, flags); 981 spin_lock(&uprobes_treelock);
990 n = find_node_in_range(inode, min, max); 982 n = find_node_in_range(inode, min, max);
991 if (n) { 983 if (n) {
992 for (t = n; t; t = rb_prev(t)) { 984 for (t = n; t; t = rb_prev(t)) {
@@ -1004,27 +996,20 @@ static void build_probe_list(struct inode *inode,
1004 atomic_inc(&u->ref); 996 atomic_inc(&u->ref);
1005 } 997 }
1006 } 998 }
1007 spin_unlock_irqrestore(&uprobes_treelock, flags); 999 spin_unlock(&uprobes_treelock);
1008} 1000}
1009 1001
1010/* 1002/*
1011 * Called from mmap_region. 1003 * Called from mmap_region/vma_adjust with mm->mmap_sem acquired.
1012 * called with mm->mmap_sem acquired.
1013 * 1004 *
1014 * Return -ve no if we fail to insert probes and we cannot 1005 * Currently we ignore all errors and always return 0, the callers
1015 * bail-out. 1006 * can't handle the failure anyway.
1016 * Return 0 otherwise. i.e:
1017 *
1018 * - successful insertion of probes
1019 * - (or) no possible probes to be inserted.
1020 * - (or) insertion of probes failed but we can bail-out.
1021 */ 1007 */
1022int uprobe_mmap(struct vm_area_struct *vma) 1008int uprobe_mmap(struct vm_area_struct *vma)
1023{ 1009{
1024 struct list_head tmp_list; 1010 struct list_head tmp_list;
1025 struct uprobe *uprobe, *u; 1011 struct uprobe *uprobe, *u;
1026 struct inode *inode; 1012 struct inode *inode;
1027 int ret, count;
1028 1013
1029 if (!atomic_read(&uprobe_events) || !valid_vma(vma, true)) 1014 if (!atomic_read(&uprobe_events) || !valid_vma(vma, true))
1030 return 0; 1015 return 0;
@@ -1036,44 +1021,35 @@ int uprobe_mmap(struct vm_area_struct *vma)
1036 mutex_lock(uprobes_mmap_hash(inode)); 1021 mutex_lock(uprobes_mmap_hash(inode));
1037 build_probe_list(inode, vma, vma->vm_start, vma->vm_end, &tmp_list); 1022 build_probe_list(inode, vma, vma->vm_start, vma->vm_end, &tmp_list);
1038 1023
1039 ret = 0;
1040 count = 0;
1041
1042 list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { 1024 list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) {
1043 if (!ret) { 1025 if (!fatal_signal_pending(current)) {
1044 unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset); 1026 unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset);
1045 1027 install_breakpoint(uprobe, vma->vm_mm, vma, vaddr);
1046 ret = install_breakpoint(uprobe, vma->vm_mm, vma, vaddr);
1047 /*
1048 * We can race against uprobe_register(), see the
1049 * comment near uprobe_hash().
1050 */
1051 if (ret == -EEXIST) {
1052 ret = 0;
1053
1054 if (!is_swbp_at_addr(vma->vm_mm, vaddr))
1055 continue;
1056
1057 /*
1058 * Unable to insert a breakpoint, but
1059 * breakpoint lies underneath. Increment the
1060 * probe count.
1061 */
1062 atomic_inc(&vma->vm_mm->uprobes_state.count);
1063 }
1064
1065 if (!ret)
1066 count++;
1067 } 1028 }
1068 put_uprobe(uprobe); 1029 put_uprobe(uprobe);
1069 } 1030 }
1070
1071 mutex_unlock(uprobes_mmap_hash(inode)); 1031 mutex_unlock(uprobes_mmap_hash(inode));
1072 1032
1073 if (ret) 1033 return 0;
1074 atomic_sub(count, &vma->vm_mm->uprobes_state.count); 1034}
1075 1035
1076 return ret; 1036static bool
1037vma_has_uprobes(struct vm_area_struct *vma, unsigned long start, unsigned long end)
1038{
1039 loff_t min, max;
1040 struct inode *inode;
1041 struct rb_node *n;
1042
1043 inode = vma->vm_file->f_mapping->host;
1044
1045 min = vaddr_to_offset(vma, start);
1046 max = min + (end - start) - 1;
1047
1048 spin_lock(&uprobes_treelock);
1049 n = find_node_in_range(inode, min, max);
1050 spin_unlock(&uprobes_treelock);
1051
1052 return !!n;
1077} 1053}
1078 1054
1079/* 1055/*
@@ -1081,37 +1057,18 @@ int uprobe_mmap(struct vm_area_struct *vma)
1081 */ 1057 */
1082void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) 1058void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end)
1083{ 1059{
1084 struct list_head tmp_list;
1085 struct uprobe *uprobe, *u;
1086 struct inode *inode;
1087
1088 if (!atomic_read(&uprobe_events) || !valid_vma(vma, false)) 1060 if (!atomic_read(&uprobe_events) || !valid_vma(vma, false))
1089 return; 1061 return;
1090 1062
1091 if (!atomic_read(&vma->vm_mm->mm_users)) /* called by mmput() ? */ 1063 if (!atomic_read(&vma->vm_mm->mm_users)) /* called by mmput() ? */
1092 return; 1064 return;
1093 1065
1094 if (!atomic_read(&vma->vm_mm->uprobes_state.count)) 1066 if (!test_bit(MMF_HAS_UPROBES, &vma->vm_mm->flags) ||
1095 return; 1067 test_bit(MMF_RECALC_UPROBES, &vma->vm_mm->flags))
1096
1097 inode = vma->vm_file->f_mapping->host;
1098 if (!inode)
1099 return; 1068 return;
1100 1069
1101 mutex_lock(uprobes_mmap_hash(inode)); 1070 if (vma_has_uprobes(vma, start, end))
1102 build_probe_list(inode, vma, start, end, &tmp_list); 1071 set_bit(MMF_RECALC_UPROBES, &vma->vm_mm->flags);
1103
1104 list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) {
1105 unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset);
1106 /*
1107 * An unregister could have removed the probe before
1108 * unmap. So check before we decrement the count.
1109 */
1110 if (is_swbp_at_addr(vma->vm_mm, vaddr) == 1)
1111 atomic_dec(&vma->vm_mm->uprobes_state.count);
1112 put_uprobe(uprobe);
1113 }
1114 mutex_unlock(uprobes_mmap_hash(inode));
1115} 1072}
1116 1073
1117/* Slot allocation for XOL */ 1074/* Slot allocation for XOL */
@@ -1213,13 +1170,15 @@ void uprobe_clear_state(struct mm_struct *mm)
1213 kfree(area); 1170 kfree(area);
1214} 1171}
1215 1172
1216/* 1173void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm)
1217 * uprobe_reset_state - Free the area allocated for slots.
1218 */
1219void uprobe_reset_state(struct mm_struct *mm)
1220{ 1174{
1221 mm->uprobes_state.xol_area = NULL; 1175 newmm->uprobes_state.xol_area = NULL;
1222 atomic_set(&mm->uprobes_state.count, 0); 1176
1177 if (test_bit(MMF_HAS_UPROBES, &oldmm->flags)) {
1178 set_bit(MMF_HAS_UPROBES, &newmm->flags);
1179 /* unconditionally, dup_mmap() skips VM_DONTCOPY vmas */
1180 set_bit(MMF_RECALC_UPROBES, &newmm->flags);
1181 }
1223} 1182}
1224 1183
1225/* 1184/*
@@ -1437,6 +1396,25 @@ static bool can_skip_sstep(struct uprobe *uprobe, struct pt_regs *regs)
1437 return false; 1396 return false;
1438} 1397}
1439 1398
1399static void mmf_recalc_uprobes(struct mm_struct *mm)
1400{
1401 struct vm_area_struct *vma;
1402
1403 for (vma = mm->mmap; vma; vma = vma->vm_next) {
1404 if (!valid_vma(vma, false))
1405 continue;
1406 /*
1407 * This is not strictly accurate, we can race with
1408 * uprobe_unregister() and see the already removed
1409 * uprobe if delete_uprobe() was not yet called.
1410 */
1411 if (vma_has_uprobes(vma, vma->vm_start, vma->vm_end))
1412 return;
1413 }
1414
1415 clear_bit(MMF_HAS_UPROBES, &mm->flags);
1416}
1417
1440static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) 1418static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
1441{ 1419{
1442 struct mm_struct *mm = current->mm; 1420 struct mm_struct *mm = current->mm;
@@ -1458,11 +1436,24 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
1458 } else { 1436 } else {
1459 *is_swbp = -EFAULT; 1437 *is_swbp = -EFAULT;
1460 } 1438 }
1439
1440 if (!uprobe && test_and_clear_bit(MMF_RECALC_UPROBES, &mm->flags))
1441 mmf_recalc_uprobes(mm);
1461 up_read(&mm->mmap_sem); 1442 up_read(&mm->mmap_sem);
1462 1443
1463 return uprobe; 1444 return uprobe;
1464} 1445}
1465 1446
1447void __weak arch_uprobe_enable_step(struct arch_uprobe *arch)
1448{
1449 user_enable_single_step(current);
1450}
1451
1452void __weak arch_uprobe_disable_step(struct arch_uprobe *arch)
1453{
1454 user_disable_single_step(current);
1455}
1456
1466/* 1457/*
1467 * Run handler and ask thread to singlestep. 1458 * Run handler and ask thread to singlestep.
1468 * Ensure all non-fatal signals cannot interrupt thread while it singlesteps. 1459 * Ensure all non-fatal signals cannot interrupt thread while it singlesteps.
@@ -1509,7 +1500,7 @@ static void handle_swbp(struct pt_regs *regs)
1509 1500
1510 utask->state = UTASK_SSTEP; 1501 utask->state = UTASK_SSTEP;
1511 if (!pre_ssout(uprobe, regs, bp_vaddr)) { 1502 if (!pre_ssout(uprobe, regs, bp_vaddr)) {
1512 user_enable_single_step(current); 1503 arch_uprobe_enable_step(&uprobe->arch);
1513 return; 1504 return;
1514 } 1505 }
1515 1506
@@ -1518,17 +1509,15 @@ cleanup_ret:
1518 utask->active_uprobe = NULL; 1509 utask->active_uprobe = NULL;
1519 utask->state = UTASK_RUNNING; 1510 utask->state = UTASK_RUNNING;
1520 } 1511 }
1521 if (uprobe) { 1512 if (!(uprobe->flags & UPROBE_SKIP_SSTEP))
1522 if (!(uprobe->flags & UPROBE_SKIP_SSTEP))
1523 1513
1524 /* 1514 /*
1525 * cannot singlestep; cannot skip instruction; 1515 * cannot singlestep; cannot skip instruction;
1526 * re-execute the instruction. 1516 * re-execute the instruction.
1527 */ 1517 */
1528 instruction_pointer_set(regs, bp_vaddr); 1518 instruction_pointer_set(regs, bp_vaddr);
1529 1519
1530 put_uprobe(uprobe); 1520 put_uprobe(uprobe);
1531 }
1532} 1521}
1533 1522
1534/* 1523/*
@@ -1547,10 +1536,10 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
1547 else 1536 else
1548 WARN_ON_ONCE(1); 1537 WARN_ON_ONCE(1);
1549 1538
1539 arch_uprobe_disable_step(&uprobe->arch);
1550 put_uprobe(uprobe); 1540 put_uprobe(uprobe);
1551 utask->active_uprobe = NULL; 1541 utask->active_uprobe = NULL;
1552 utask->state = UTASK_RUNNING; 1542 utask->state = UTASK_RUNNING;
1553 user_disable_single_step(current);
1554 xol_free_insn_slot(current); 1543 xol_free_insn_slot(current);
1555 1544
1556 spin_lock_irq(&current->sighand->siglock); 1545 spin_lock_irq(&current->sighand->siglock);
@@ -1589,8 +1578,7 @@ int uprobe_pre_sstep_notifier(struct pt_regs *regs)
1589{ 1578{
1590 struct uprobe_task *utask; 1579 struct uprobe_task *utask;
1591 1580
1592 if (!current->mm || !atomic_read(&current->mm->uprobes_state.count)) 1581 if (!current->mm || !test_bit(MMF_HAS_UPROBES, &current->mm->flags))
1593 /* task is currently not uprobed */
1594 return 0; 1582 return 0;
1595 1583
1596 utask = current->utask; 1584 utask = current->utask;
diff --git a/kernel/fork.c b/kernel/fork.c
index 2c8857e12855..2343c9eaaaf4 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -353,6 +353,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
353 353
354 down_write(&oldmm->mmap_sem); 354 down_write(&oldmm->mmap_sem);
355 flush_cache_dup_mm(oldmm); 355 flush_cache_dup_mm(oldmm);
356 uprobe_dup_mmap(oldmm, mm);
356 /* 357 /*
357 * Not linked in yet - no deadlock potential: 358 * Not linked in yet - no deadlock potential:
358 */ 359 */
@@ -454,9 +455,6 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
454 455
455 if (retval) 456 if (retval)
456 goto out; 457 goto out;
457
458 if (file)
459 uprobe_mmap(tmp);
460 } 458 }
461 /* a new mm has just been created */ 459 /* a new mm has just been created */
462 arch_dup_mmap(oldmm, mm); 460 arch_dup_mmap(oldmm, mm);
@@ -839,8 +837,6 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
839#ifdef CONFIG_TRANSPARENT_HUGEPAGE 837#ifdef CONFIG_TRANSPARENT_HUGEPAGE
840 mm->pmd_huge_pte = NULL; 838 mm->pmd_huge_pte = NULL;
841#endif 839#endif
842 uprobe_reset_state(mm);
843
844 if (!mm_init(mm, tsk)) 840 if (!mm_init(mm, tsk))
845 goto fail_nomem; 841 goto fail_nomem;
846 842
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index c62b8546cc90..098f396aa409 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -561,9 +561,9 @@ static __kprobes void kprobe_optimizer(struct work_struct *work)
561{ 561{
562 LIST_HEAD(free_list); 562 LIST_HEAD(free_list);
563 563
564 mutex_lock(&kprobe_mutex);
564 /* Lock modules while optimizing kprobes */ 565 /* Lock modules while optimizing kprobes */
565 mutex_lock(&module_mutex); 566 mutex_lock(&module_mutex);
566 mutex_lock(&kprobe_mutex);
567 567
568 /* 568 /*
569 * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed) 569 * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
@@ -586,8 +586,8 @@ static __kprobes void kprobe_optimizer(struct work_struct *work)
586 /* Step 4: Free cleaned kprobes after quiesence period */ 586 /* Step 4: Free cleaned kprobes after quiesence period */
587 do_free_cleaned_kprobes(&free_list); 587 do_free_cleaned_kprobes(&free_list);
588 588
589 mutex_unlock(&kprobe_mutex);
590 mutex_unlock(&module_mutex); 589 mutex_unlock(&module_mutex);
590 mutex_unlock(&kprobe_mutex);
591 591
592 /* Step 5: Kick optimizer again if needed */ 592 /* Step 5: Kick optimizer again if needed */
593 if (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list)) 593 if (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list))
@@ -759,20 +759,32 @@ static __kprobes void try_to_optimize_kprobe(struct kprobe *p)
759 struct kprobe *ap; 759 struct kprobe *ap;
760 struct optimized_kprobe *op; 760 struct optimized_kprobe *op;
761 761
762 /* Impossible to optimize ftrace-based kprobe */
763 if (kprobe_ftrace(p))
764 return;
765
766 /* For preparing optimization, jump_label_text_reserved() is called */
767 jump_label_lock();
768 mutex_lock(&text_mutex);
769
762 ap = alloc_aggr_kprobe(p); 770 ap = alloc_aggr_kprobe(p);
763 if (!ap) 771 if (!ap)
764 return; 772 goto out;
765 773
766 op = container_of(ap, struct optimized_kprobe, kp); 774 op = container_of(ap, struct optimized_kprobe, kp);
767 if (!arch_prepared_optinsn(&op->optinsn)) { 775 if (!arch_prepared_optinsn(&op->optinsn)) {
768 /* If failed to setup optimizing, fallback to kprobe */ 776 /* If failed to setup optimizing, fallback to kprobe */
769 arch_remove_optimized_kprobe(op); 777 arch_remove_optimized_kprobe(op);
770 kfree(op); 778 kfree(op);
771 return; 779 goto out;
772 } 780 }
773 781
774 init_aggr_kprobe(ap, p); 782 init_aggr_kprobe(ap, p);
775 optimize_kprobe(ap); 783 optimize_kprobe(ap); /* This just kicks optimizer thread */
784
785out:
786 mutex_unlock(&text_mutex);
787 jump_label_unlock();
776} 788}
777 789
778#ifdef CONFIG_SYSCTL 790#ifdef CONFIG_SYSCTL
@@ -907,9 +919,64 @@ static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
907} 919}
908#endif /* CONFIG_OPTPROBES */ 920#endif /* CONFIG_OPTPROBES */
909 921
922#ifdef KPROBES_CAN_USE_FTRACE
923static struct ftrace_ops kprobe_ftrace_ops __read_mostly = {
924 .func = kprobe_ftrace_handler,
925 .flags = FTRACE_OPS_FL_SAVE_REGS,
926};
927static int kprobe_ftrace_enabled;
928
929/* Must ensure p->addr is really on ftrace */
930static int __kprobes prepare_kprobe(struct kprobe *p)
931{
932 if (!kprobe_ftrace(p))
933 return arch_prepare_kprobe(p);
934
935 return arch_prepare_kprobe_ftrace(p);
936}
937
938/* Caller must lock kprobe_mutex */
939static void __kprobes arm_kprobe_ftrace(struct kprobe *p)
940{
941 int ret;
942
943 ret = ftrace_set_filter_ip(&kprobe_ftrace_ops,
944 (unsigned long)p->addr, 0, 0);
945 WARN(ret < 0, "Failed to arm kprobe-ftrace at %p (%d)\n", p->addr, ret);
946 kprobe_ftrace_enabled++;
947 if (kprobe_ftrace_enabled == 1) {
948 ret = register_ftrace_function(&kprobe_ftrace_ops);
949 WARN(ret < 0, "Failed to init kprobe-ftrace (%d)\n", ret);
950 }
951}
952
953/* Caller must lock kprobe_mutex */
954static void __kprobes disarm_kprobe_ftrace(struct kprobe *p)
955{
956 int ret;
957
958 kprobe_ftrace_enabled--;
959 if (kprobe_ftrace_enabled == 0) {
960 ret = unregister_ftrace_function(&kprobe_ftrace_ops);
961 WARN(ret < 0, "Failed to init kprobe-ftrace (%d)\n", ret);
962 }
963 ret = ftrace_set_filter_ip(&kprobe_ftrace_ops,
964 (unsigned long)p->addr, 1, 0);
965 WARN(ret < 0, "Failed to disarm kprobe-ftrace at %p (%d)\n", p->addr, ret);
966}
967#else /* !KPROBES_CAN_USE_FTRACE */
968#define prepare_kprobe(p) arch_prepare_kprobe(p)
969#define arm_kprobe_ftrace(p) do {} while (0)
970#define disarm_kprobe_ftrace(p) do {} while (0)
971#endif
972
910/* Arm a kprobe with text_mutex */ 973/* Arm a kprobe with text_mutex */
911static void __kprobes arm_kprobe(struct kprobe *kp) 974static void __kprobes arm_kprobe(struct kprobe *kp)
912{ 975{
976 if (unlikely(kprobe_ftrace(kp))) {
977 arm_kprobe_ftrace(kp);
978 return;
979 }
913 /* 980 /*
914 * Here, since __arm_kprobe() doesn't use stop_machine(), 981 * Here, since __arm_kprobe() doesn't use stop_machine(),
915 * this doesn't cause deadlock on text_mutex. So, we don't 982 * this doesn't cause deadlock on text_mutex. So, we don't
@@ -921,11 +988,15 @@ static void __kprobes arm_kprobe(struct kprobe *kp)
921} 988}
922 989
923/* Disarm a kprobe with text_mutex */ 990/* Disarm a kprobe with text_mutex */
924static void __kprobes disarm_kprobe(struct kprobe *kp) 991static void __kprobes disarm_kprobe(struct kprobe *kp, bool reopt)
925{ 992{
993 if (unlikely(kprobe_ftrace(kp))) {
994 disarm_kprobe_ftrace(kp);
995 return;
996 }
926 /* Ditto */ 997 /* Ditto */
927 mutex_lock(&text_mutex); 998 mutex_lock(&text_mutex);
928 __disarm_kprobe(kp, true); 999 __disarm_kprobe(kp, reopt);
929 mutex_unlock(&text_mutex); 1000 mutex_unlock(&text_mutex);
930} 1001}
931 1002
@@ -1144,12 +1215,6 @@ static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p)
1144 if (p->post_handler && !ap->post_handler) 1215 if (p->post_handler && !ap->post_handler)
1145 ap->post_handler = aggr_post_handler; 1216 ap->post_handler = aggr_post_handler;
1146 1217
1147 if (kprobe_disabled(ap) && !kprobe_disabled(p)) {
1148 ap->flags &= ~KPROBE_FLAG_DISABLED;
1149 if (!kprobes_all_disarmed)
1150 /* Arm the breakpoint again. */
1151 __arm_kprobe(ap);
1152 }
1153 return 0; 1218 return 0;
1154} 1219}
1155 1220
@@ -1189,11 +1254,22 @@ static int __kprobes register_aggr_kprobe(struct kprobe *orig_p,
1189 int ret = 0; 1254 int ret = 0;
1190 struct kprobe *ap = orig_p; 1255 struct kprobe *ap = orig_p;
1191 1256
1257 /* For preparing optimization, jump_label_text_reserved() is called */
1258 jump_label_lock();
1259 /*
1260 * Get online CPUs to avoid text_mutex deadlock.with stop machine,
1261 * which is invoked by unoptimize_kprobe() in add_new_kprobe()
1262 */
1263 get_online_cpus();
1264 mutex_lock(&text_mutex);
1265
1192 if (!kprobe_aggrprobe(orig_p)) { 1266 if (!kprobe_aggrprobe(orig_p)) {
1193 /* If orig_p is not an aggr_kprobe, create new aggr_kprobe. */ 1267 /* If orig_p is not an aggr_kprobe, create new aggr_kprobe. */
1194 ap = alloc_aggr_kprobe(orig_p); 1268 ap = alloc_aggr_kprobe(orig_p);
1195 if (!ap) 1269 if (!ap) {
1196 return -ENOMEM; 1270 ret = -ENOMEM;
1271 goto out;
1272 }
1197 init_aggr_kprobe(ap, orig_p); 1273 init_aggr_kprobe(ap, orig_p);
1198 } else if (kprobe_unused(ap)) 1274 } else if (kprobe_unused(ap))
1199 /* This probe is going to die. Rescue it */ 1275 /* This probe is going to die. Rescue it */
@@ -1213,7 +1289,7 @@ static int __kprobes register_aggr_kprobe(struct kprobe *orig_p,
1213 * free aggr_probe. It will be used next time, or 1289 * free aggr_probe. It will be used next time, or
1214 * freed by unregister_kprobe. 1290 * freed by unregister_kprobe.
1215 */ 1291 */
1216 return ret; 1292 goto out;
1217 1293
1218 /* Prepare optimized instructions if possible. */ 1294 /* Prepare optimized instructions if possible. */
1219 prepare_optimized_kprobe(ap); 1295 prepare_optimized_kprobe(ap);
@@ -1228,7 +1304,20 @@ static int __kprobes register_aggr_kprobe(struct kprobe *orig_p,
1228 1304
1229 /* Copy ap's insn slot to p */ 1305 /* Copy ap's insn slot to p */
1230 copy_kprobe(ap, p); 1306 copy_kprobe(ap, p);
1231 return add_new_kprobe(ap, p); 1307 ret = add_new_kprobe(ap, p);
1308
1309out:
1310 mutex_unlock(&text_mutex);
1311 put_online_cpus();
1312 jump_label_unlock();
1313
1314 if (ret == 0 && kprobe_disabled(ap) && !kprobe_disabled(p)) {
1315 ap->flags &= ~KPROBE_FLAG_DISABLED;
1316 if (!kprobes_all_disarmed)
1317 /* Arm the breakpoint again. */
1318 arm_kprobe(ap);
1319 }
1320 return ret;
1232} 1321}
1233 1322
1234static int __kprobes in_kprobes_functions(unsigned long addr) 1323static int __kprobes in_kprobes_functions(unsigned long addr)
@@ -1313,71 +1402,96 @@ static inline int check_kprobe_rereg(struct kprobe *p)
1313 return ret; 1402 return ret;
1314} 1403}
1315 1404
1316int __kprobes register_kprobe(struct kprobe *p) 1405static __kprobes int check_kprobe_address_safe(struct kprobe *p,
1406 struct module **probed_mod)
1317{ 1407{
1318 int ret = 0; 1408 int ret = 0;
1319 struct kprobe *old_p; 1409 unsigned long ftrace_addr;
1320 struct module *probed_mod;
1321 kprobe_opcode_t *addr;
1322
1323 addr = kprobe_addr(p);
1324 if (IS_ERR(addr))
1325 return PTR_ERR(addr);
1326 p->addr = addr;
1327 1410
1328 ret = check_kprobe_rereg(p); 1411 /*
1329 if (ret) 1412 * If the address is located on a ftrace nop, set the
1330 return ret; 1413 * breakpoint to the following instruction.
1414 */
1415 ftrace_addr = ftrace_location((unsigned long)p->addr);
1416 if (ftrace_addr) {
1417#ifdef KPROBES_CAN_USE_FTRACE
1418 /* Given address is not on the instruction boundary */
1419 if ((unsigned long)p->addr != ftrace_addr)
1420 return -EILSEQ;
1421 p->flags |= KPROBE_FLAG_FTRACE;
1422#else /* !KPROBES_CAN_USE_FTRACE */
1423 return -EINVAL;
1424#endif
1425 }
1331 1426
1332 jump_label_lock(); 1427 jump_label_lock();
1333 preempt_disable(); 1428 preempt_disable();
1429
1430 /* Ensure it is not in reserved area nor out of text */
1334 if (!kernel_text_address((unsigned long) p->addr) || 1431 if (!kernel_text_address((unsigned long) p->addr) ||
1335 in_kprobes_functions((unsigned long) p->addr) || 1432 in_kprobes_functions((unsigned long) p->addr) ||
1336 ftrace_text_reserved(p->addr, p->addr) ||
1337 jump_label_text_reserved(p->addr, p->addr)) { 1433 jump_label_text_reserved(p->addr, p->addr)) {
1338 ret = -EINVAL; 1434 ret = -EINVAL;
1339 goto cannot_probe; 1435 goto out;
1340 } 1436 }
1341 1437
1342 /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */ 1438 /* Check if are we probing a module */
1343 p->flags &= KPROBE_FLAG_DISABLED; 1439 *probed_mod = __module_text_address((unsigned long) p->addr);
1344 1440 if (*probed_mod) {
1345 /*
1346 * Check if are we probing a module.
1347 */
1348 probed_mod = __module_text_address((unsigned long) p->addr);
1349 if (probed_mod) {
1350 /* Return -ENOENT if fail. */
1351 ret = -ENOENT;
1352 /* 1441 /*
1353 * We must hold a refcount of the probed module while updating 1442 * We must hold a refcount of the probed module while updating
1354 * its code to prohibit unexpected unloading. 1443 * its code to prohibit unexpected unloading.
1355 */ 1444 */
1356 if (unlikely(!try_module_get(probed_mod))) 1445 if (unlikely(!try_module_get(*probed_mod))) {
1357 goto cannot_probe; 1446 ret = -ENOENT;
1447 goto out;
1448 }
1358 1449
1359 /* 1450 /*
1360 * If the module freed .init.text, we couldn't insert 1451 * If the module freed .init.text, we couldn't insert
1361 * kprobes in there. 1452 * kprobes in there.
1362 */ 1453 */
1363 if (within_module_init((unsigned long)p->addr, probed_mod) && 1454 if (within_module_init((unsigned long)p->addr, *probed_mod) &&
1364 probed_mod->state != MODULE_STATE_COMING) { 1455 (*probed_mod)->state != MODULE_STATE_COMING) {
1365 module_put(probed_mod); 1456 module_put(*probed_mod);
1366 goto cannot_probe; 1457 *probed_mod = NULL;
1458 ret = -ENOENT;
1367 } 1459 }
1368 /* ret will be updated by following code */
1369 } 1460 }
1461out:
1370 preempt_enable(); 1462 preempt_enable();
1371 jump_label_unlock(); 1463 jump_label_unlock();
1372 1464
1465 return ret;
1466}
1467
1468int __kprobes register_kprobe(struct kprobe *p)
1469{
1470 int ret;
1471 struct kprobe *old_p;
1472 struct module *probed_mod;
1473 kprobe_opcode_t *addr;
1474
1475 /* Adjust probe address from symbol */
1476 addr = kprobe_addr(p);
1477 if (IS_ERR(addr))
1478 return PTR_ERR(addr);
1479 p->addr = addr;
1480
1481 ret = check_kprobe_rereg(p);
1482 if (ret)
1483 return ret;
1484
1485 /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */
1486 p->flags &= KPROBE_FLAG_DISABLED;
1373 p->nmissed = 0; 1487 p->nmissed = 0;
1374 INIT_LIST_HEAD(&p->list); 1488 INIT_LIST_HEAD(&p->list);
1375 mutex_lock(&kprobe_mutex);
1376 1489
1377 jump_label_lock(); /* needed to call jump_label_text_reserved() */ 1490 ret = check_kprobe_address_safe(p, &probed_mod);
1491 if (ret)
1492 return ret;
1378 1493
1379 get_online_cpus(); /* For avoiding text_mutex deadlock. */ 1494 mutex_lock(&kprobe_mutex);
1380 mutex_lock(&text_mutex);
1381 1495
1382 old_p = get_kprobe(p->addr); 1496 old_p = get_kprobe(p->addr);
1383 if (old_p) { 1497 if (old_p) {
@@ -1386,7 +1500,9 @@ int __kprobes register_kprobe(struct kprobe *p)
1386 goto out; 1500 goto out;
1387 } 1501 }
1388 1502
1389 ret = arch_prepare_kprobe(p); 1503 mutex_lock(&text_mutex); /* Avoiding text modification */
1504 ret = prepare_kprobe(p);
1505 mutex_unlock(&text_mutex);
1390 if (ret) 1506 if (ret)
1391 goto out; 1507 goto out;
1392 1508
@@ -1395,26 +1511,18 @@ int __kprobes register_kprobe(struct kprobe *p)
1395 &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); 1511 &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
1396 1512
1397 if (!kprobes_all_disarmed && !kprobe_disabled(p)) 1513 if (!kprobes_all_disarmed && !kprobe_disabled(p))
1398 __arm_kprobe(p); 1514 arm_kprobe(p);
1399 1515
1400 /* Try to optimize kprobe */ 1516 /* Try to optimize kprobe */
1401 try_to_optimize_kprobe(p); 1517 try_to_optimize_kprobe(p);
1402 1518
1403out: 1519out:
1404 mutex_unlock(&text_mutex);
1405 put_online_cpus();
1406 jump_label_unlock();
1407 mutex_unlock(&kprobe_mutex); 1520 mutex_unlock(&kprobe_mutex);
1408 1521
1409 if (probed_mod) 1522 if (probed_mod)
1410 module_put(probed_mod); 1523 module_put(probed_mod);
1411 1524
1412 return ret; 1525 return ret;
1413
1414cannot_probe:
1415 preempt_enable();
1416 jump_label_unlock();
1417 return ret;
1418} 1526}
1419EXPORT_SYMBOL_GPL(register_kprobe); 1527EXPORT_SYMBOL_GPL(register_kprobe);
1420 1528
@@ -1451,7 +1559,7 @@ static struct kprobe *__kprobes __disable_kprobe(struct kprobe *p)
1451 1559
1452 /* Try to disarm and disable this/parent probe */ 1560 /* Try to disarm and disable this/parent probe */
1453 if (p == orig_p || aggr_kprobe_disabled(orig_p)) { 1561 if (p == orig_p || aggr_kprobe_disabled(orig_p)) {
1454 disarm_kprobe(orig_p); 1562 disarm_kprobe(orig_p, true);
1455 orig_p->flags |= KPROBE_FLAG_DISABLED; 1563 orig_p->flags |= KPROBE_FLAG_DISABLED;
1456 } 1564 }
1457 } 1565 }
@@ -2049,10 +2157,11 @@ static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p,
2049 2157
2050 if (!pp) 2158 if (!pp)
2051 pp = p; 2159 pp = p;
2052 seq_printf(pi, "%s%s%s\n", 2160 seq_printf(pi, "%s%s%s%s\n",
2053 (kprobe_gone(p) ? "[GONE]" : ""), 2161 (kprobe_gone(p) ? "[GONE]" : ""),
2054 ((kprobe_disabled(p) && !kprobe_gone(p)) ? "[DISABLED]" : ""), 2162 ((kprobe_disabled(p) && !kprobe_gone(p)) ? "[DISABLED]" : ""),
2055 (kprobe_optimized(pp) ? "[OPTIMIZED]" : "")); 2163 (kprobe_optimized(pp) ? "[OPTIMIZED]" : ""),
2164 (kprobe_ftrace(pp) ? "[FTRACE]" : ""));
2056} 2165}
2057 2166
2058static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos) 2167static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos)
@@ -2131,14 +2240,12 @@ static void __kprobes arm_all_kprobes(void)
2131 goto already_enabled; 2240 goto already_enabled;
2132 2241
2133 /* Arming kprobes doesn't optimize kprobe itself */ 2242 /* Arming kprobes doesn't optimize kprobe itself */
2134 mutex_lock(&text_mutex);
2135 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 2243 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
2136 head = &kprobe_table[i]; 2244 head = &kprobe_table[i];
2137 hlist_for_each_entry_rcu(p, node, head, hlist) 2245 hlist_for_each_entry_rcu(p, node, head, hlist)
2138 if (!kprobe_disabled(p)) 2246 if (!kprobe_disabled(p))
2139 __arm_kprobe(p); 2247 arm_kprobe(p);
2140 } 2248 }
2141 mutex_unlock(&text_mutex);
2142 2249
2143 kprobes_all_disarmed = false; 2250 kprobes_all_disarmed = false;
2144 printk(KERN_INFO "Kprobes globally enabled\n"); 2251 printk(KERN_INFO "Kprobes globally enabled\n");
@@ -2166,15 +2273,13 @@ static void __kprobes disarm_all_kprobes(void)
2166 kprobes_all_disarmed = true; 2273 kprobes_all_disarmed = true;
2167 printk(KERN_INFO "Kprobes globally disabled\n"); 2274 printk(KERN_INFO "Kprobes globally disabled\n");
2168 2275
2169 mutex_lock(&text_mutex);
2170 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 2276 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
2171 head = &kprobe_table[i]; 2277 head = &kprobe_table[i];
2172 hlist_for_each_entry_rcu(p, node, head, hlist) { 2278 hlist_for_each_entry_rcu(p, node, head, hlist) {
2173 if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) 2279 if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p))
2174 __disarm_kprobe(p, false); 2280 disarm_kprobe(p, false);
2175 } 2281 }
2176 } 2282 }
2177 mutex_unlock(&text_mutex);
2178 mutex_unlock(&kprobe_mutex); 2283 mutex_unlock(&kprobe_mutex);
2179 2284
2180 /* Wait for disarming all kprobes by optimizer */ 2285 /* Wait for disarming all kprobes by optimizer */
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 8c4c07071cc5..4cea4f41c1d9 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -49,6 +49,11 @@ config HAVE_SYSCALL_TRACEPOINTS
49 help 49 help
50 See Documentation/trace/ftrace-design.txt 50 See Documentation/trace/ftrace-design.txt
51 51
52config HAVE_FENTRY
53 bool
54 help
55 Arch supports the gcc options -pg with -mfentry
56
52config HAVE_C_RECORDMCOUNT 57config HAVE_C_RECORDMCOUNT
53 bool 58 bool
54 help 59 help
@@ -57,8 +62,12 @@ config HAVE_C_RECORDMCOUNT
57config TRACER_MAX_TRACE 62config TRACER_MAX_TRACE
58 bool 63 bool
59 64
65config TRACE_CLOCK
66 bool
67
60config RING_BUFFER 68config RING_BUFFER
61 bool 69 bool
70 select TRACE_CLOCK
62 71
63config FTRACE_NMI_ENTER 72config FTRACE_NMI_ENTER
64 bool 73 bool
@@ -109,6 +118,7 @@ config TRACING
109 select NOP_TRACER 118 select NOP_TRACER
110 select BINARY_PRINTF 119 select BINARY_PRINTF
111 select EVENT_TRACING 120 select EVENT_TRACING
121 select TRACE_CLOCK
112 122
113config GENERIC_TRACER 123config GENERIC_TRACER
114 bool 124 bool
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index b831087c8200..d7e2068e4b71 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -5,10 +5,12 @@ ifdef CONFIG_FUNCTION_TRACER
5ORIG_CFLAGS := $(KBUILD_CFLAGS) 5ORIG_CFLAGS := $(KBUILD_CFLAGS)
6KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS)) 6KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS))
7 7
8ifdef CONFIG_FTRACE_SELFTEST
8# selftest needs instrumentation 9# selftest needs instrumentation
9CFLAGS_trace_selftest_dynamic.o = -pg 10CFLAGS_trace_selftest_dynamic.o = -pg
10obj-y += trace_selftest_dynamic.o 11obj-y += trace_selftest_dynamic.o
11endif 12endif
13endif
12 14
13# If unlikely tracing is enabled, do not trace these files 15# If unlikely tracing is enabled, do not trace these files
14ifdef CONFIG_TRACING_BRANCHES 16ifdef CONFIG_TRACING_BRANCHES
@@ -17,11 +19,7 @@ endif
17 19
18CFLAGS_trace_events_filter.o := -I$(src) 20CFLAGS_trace_events_filter.o := -I$(src)
19 21
20# 22obj-$(CONFIG_TRACE_CLOCK) += trace_clock.o
21# Make the trace clocks available generally: it's infrastructure
22# relied on by ptrace for example:
23#
24obj-y += trace_clock.o
25 23
26obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o 24obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
27obj-$(CONFIG_RING_BUFFER) += ring_buffer.o 25obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index b4f20fba09fc..9dcf15d38380 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -64,12 +64,20 @@
64 64
65#define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_CONTROL) 65#define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_CONTROL)
66 66
67static struct ftrace_ops ftrace_list_end __read_mostly = {
68 .func = ftrace_stub,
69 .flags = FTRACE_OPS_FL_RECURSION_SAFE,
70};
71
67/* ftrace_enabled is a method to turn ftrace on or off */ 72/* ftrace_enabled is a method to turn ftrace on or off */
68int ftrace_enabled __read_mostly; 73int ftrace_enabled __read_mostly;
69static int last_ftrace_enabled; 74static int last_ftrace_enabled;
70 75
71/* Quick disabling of function tracer. */ 76/* Quick disabling of function tracer. */
72int function_trace_stop; 77int function_trace_stop __read_mostly;
78
79/* Current function tracing op */
80struct ftrace_ops *function_trace_op __read_mostly = &ftrace_list_end;
73 81
74/* List for set_ftrace_pid's pids. */ 82/* List for set_ftrace_pid's pids. */
75LIST_HEAD(ftrace_pids); 83LIST_HEAD(ftrace_pids);
@@ -86,22 +94,43 @@ static int ftrace_disabled __read_mostly;
86 94
87static DEFINE_MUTEX(ftrace_lock); 95static DEFINE_MUTEX(ftrace_lock);
88 96
89static struct ftrace_ops ftrace_list_end __read_mostly = {
90 .func = ftrace_stub,
91};
92
93static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end; 97static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end;
94static struct ftrace_ops *ftrace_control_list __read_mostly = &ftrace_list_end; 98static struct ftrace_ops *ftrace_control_list __read_mostly = &ftrace_list_end;
95static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end; 99static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end;
96ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; 100ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
97static ftrace_func_t __ftrace_trace_function_delay __read_mostly = ftrace_stub;
98ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
99ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; 101ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
100static struct ftrace_ops global_ops; 102static struct ftrace_ops global_ops;
101static struct ftrace_ops control_ops; 103static struct ftrace_ops control_ops;
102 104
103static void 105#if ARCH_SUPPORTS_FTRACE_OPS
104ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip); 106static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
107 struct ftrace_ops *op, struct pt_regs *regs);
108#else
109/* See comment below, where ftrace_ops_list_func is defined */
110static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip);
111#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops)
112#endif
113
114/**
115 * ftrace_nr_registered_ops - return number of ops registered
116 *
117 * Returns the number of ftrace_ops registered and tracing functions
118 */
119int ftrace_nr_registered_ops(void)
120{
121 struct ftrace_ops *ops;
122 int cnt = 0;
123
124 mutex_lock(&ftrace_lock);
125
126 for (ops = ftrace_ops_list;
127 ops != &ftrace_list_end; ops = ops->next)
128 cnt++;
129
130 mutex_unlock(&ftrace_lock);
131
132 return cnt;
133}
105 134
106/* 135/*
107 * Traverse the ftrace_global_list, invoking all entries. The reason that we 136 * Traverse the ftrace_global_list, invoking all entries. The reason that we
@@ -112,29 +141,29 @@ ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip);
112 * 141 *
113 * Silly Alpha and silly pointer-speculation compiler optimizations! 142 * Silly Alpha and silly pointer-speculation compiler optimizations!
114 */ 143 */
115static void ftrace_global_list_func(unsigned long ip, 144static void
116 unsigned long parent_ip) 145ftrace_global_list_func(unsigned long ip, unsigned long parent_ip,
146 struct ftrace_ops *op, struct pt_regs *regs)
117{ 147{
118 struct ftrace_ops *op;
119
120 if (unlikely(trace_recursion_test(TRACE_GLOBAL_BIT))) 148 if (unlikely(trace_recursion_test(TRACE_GLOBAL_BIT)))
121 return; 149 return;
122 150
123 trace_recursion_set(TRACE_GLOBAL_BIT); 151 trace_recursion_set(TRACE_GLOBAL_BIT);
124 op = rcu_dereference_raw(ftrace_global_list); /*see above*/ 152 op = rcu_dereference_raw(ftrace_global_list); /*see above*/
125 while (op != &ftrace_list_end) { 153 while (op != &ftrace_list_end) {
126 op->func(ip, parent_ip); 154 op->func(ip, parent_ip, op, regs);
127 op = rcu_dereference_raw(op->next); /*see above*/ 155 op = rcu_dereference_raw(op->next); /*see above*/
128 }; 156 };
129 trace_recursion_clear(TRACE_GLOBAL_BIT); 157 trace_recursion_clear(TRACE_GLOBAL_BIT);
130} 158}
131 159
132static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip) 160static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
161 struct ftrace_ops *op, struct pt_regs *regs)
133{ 162{
134 if (!test_tsk_trace_trace(current)) 163 if (!test_tsk_trace_trace(current))
135 return; 164 return;
136 165
137 ftrace_pid_function(ip, parent_ip); 166 ftrace_pid_function(ip, parent_ip, op, regs);
138} 167}
139 168
140static void set_ftrace_pid_function(ftrace_func_t func) 169static void set_ftrace_pid_function(ftrace_func_t func)
@@ -153,25 +182,9 @@ static void set_ftrace_pid_function(ftrace_func_t func)
153void clear_ftrace_function(void) 182void clear_ftrace_function(void)
154{ 183{
155 ftrace_trace_function = ftrace_stub; 184 ftrace_trace_function = ftrace_stub;
156 __ftrace_trace_function = ftrace_stub;
157 __ftrace_trace_function_delay = ftrace_stub;
158 ftrace_pid_function = ftrace_stub; 185 ftrace_pid_function = ftrace_stub;
159} 186}
160 187
161#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
162/*
163 * For those archs that do not test ftrace_trace_stop in their
164 * mcount call site, we need to do it from C.
165 */
166static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip)
167{
168 if (function_trace_stop)
169 return;
170
171 __ftrace_trace_function(ip, parent_ip);
172}
173#endif
174
175static void control_ops_disable_all(struct ftrace_ops *ops) 188static void control_ops_disable_all(struct ftrace_ops *ops)
176{ 189{
177 int cpu; 190 int cpu;
@@ -230,28 +243,27 @@ static void update_ftrace_function(void)
230 243
231 /* 244 /*
232 * If we are at the end of the list and this ops is 245 * If we are at the end of the list and this ops is
233 * not dynamic, then have the mcount trampoline call 246 * recursion safe and not dynamic and the arch supports passing ops,
234 * the function directly 247 * then have the mcount trampoline call the function directly.
235 */ 248 */
236 if (ftrace_ops_list == &ftrace_list_end || 249 if (ftrace_ops_list == &ftrace_list_end ||
237 (ftrace_ops_list->next == &ftrace_list_end && 250 (ftrace_ops_list->next == &ftrace_list_end &&
238 !(ftrace_ops_list->flags & FTRACE_OPS_FL_DYNAMIC))) 251 !(ftrace_ops_list->flags & FTRACE_OPS_FL_DYNAMIC) &&
252 (ftrace_ops_list->flags & FTRACE_OPS_FL_RECURSION_SAFE) &&
253 !FTRACE_FORCE_LIST_FUNC)) {
254 /* Set the ftrace_ops that the arch callback uses */
255 if (ftrace_ops_list == &global_ops)
256 function_trace_op = ftrace_global_list;
257 else
258 function_trace_op = ftrace_ops_list;
239 func = ftrace_ops_list->func; 259 func = ftrace_ops_list->func;
240 else 260 } else {
261 /* Just use the default ftrace_ops */
262 function_trace_op = &ftrace_list_end;
241 func = ftrace_ops_list_func; 263 func = ftrace_ops_list_func;
264 }
242 265
243#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
244 ftrace_trace_function = func; 266 ftrace_trace_function = func;
245#else
246#ifdef CONFIG_DYNAMIC_FTRACE
247 /* do not update till all functions have been modified */
248 __ftrace_trace_function_delay = func;
249#else
250 __ftrace_trace_function = func;
251#endif
252 ftrace_trace_function =
253 (func == ftrace_stub) ? func : ftrace_test_stop_func;
254#endif
255} 267}
256 268
257static void add_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops) 269static void add_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops)
@@ -325,6 +337,20 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
325 if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK) 337 if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK)
326 return -EINVAL; 338 return -EINVAL;
327 339
340#ifndef ARCH_SUPPORTS_FTRACE_SAVE_REGS
341 /*
342 * If the ftrace_ops specifies SAVE_REGS, then it only can be used
343 * if the arch supports it, or SAVE_REGS_IF_SUPPORTED is also set.
344 * Setting SAVE_REGS_IF_SUPPORTED makes SAVE_REGS irrelevant.
345 */
346 if (ops->flags & FTRACE_OPS_FL_SAVE_REGS &&
347 !(ops->flags & FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED))
348 return -EINVAL;
349
350 if (ops->flags & FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED)
351 ops->flags |= FTRACE_OPS_FL_SAVE_REGS;
352#endif
353
328 if (!core_kernel_data((unsigned long)ops)) 354 if (!core_kernel_data((unsigned long)ops))
329 ops->flags |= FTRACE_OPS_FL_DYNAMIC; 355 ops->flags |= FTRACE_OPS_FL_DYNAMIC;
330 356
@@ -773,7 +799,8 @@ ftrace_profile_alloc(struct ftrace_profile_stat *stat, unsigned long ip)
773} 799}
774 800
775static void 801static void
776function_profile_call(unsigned long ip, unsigned long parent_ip) 802function_profile_call(unsigned long ip, unsigned long parent_ip,
803 struct ftrace_ops *ops, struct pt_regs *regs)
777{ 804{
778 struct ftrace_profile_stat *stat; 805 struct ftrace_profile_stat *stat;
779 struct ftrace_profile *rec; 806 struct ftrace_profile *rec;
@@ -803,7 +830,7 @@ function_profile_call(unsigned long ip, unsigned long parent_ip)
803#ifdef CONFIG_FUNCTION_GRAPH_TRACER 830#ifdef CONFIG_FUNCTION_GRAPH_TRACER
804static int profile_graph_entry(struct ftrace_graph_ent *trace) 831static int profile_graph_entry(struct ftrace_graph_ent *trace)
805{ 832{
806 function_profile_call(trace->func, 0); 833 function_profile_call(trace->func, 0, NULL, NULL);
807 return 1; 834 return 1;
808} 835}
809 836
@@ -863,6 +890,7 @@ static void unregister_ftrace_profiler(void)
863#else 890#else
864static struct ftrace_ops ftrace_profile_ops __read_mostly = { 891static struct ftrace_ops ftrace_profile_ops __read_mostly = {
865 .func = function_profile_call, 892 .func = function_profile_call,
893 .flags = FTRACE_OPS_FL_RECURSION_SAFE,
866}; 894};
867 895
868static int register_ftrace_profiler(void) 896static int register_ftrace_profiler(void)
@@ -1045,6 +1073,7 @@ static struct ftrace_ops global_ops = {
1045 .func = ftrace_stub, 1073 .func = ftrace_stub,
1046 .notrace_hash = EMPTY_HASH, 1074 .notrace_hash = EMPTY_HASH,
1047 .filter_hash = EMPTY_HASH, 1075 .filter_hash = EMPTY_HASH,
1076 .flags = FTRACE_OPS_FL_RECURSION_SAFE,
1048}; 1077};
1049 1078
1050static DEFINE_MUTEX(ftrace_regex_lock); 1079static DEFINE_MUTEX(ftrace_regex_lock);
@@ -1525,6 +1554,12 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
1525 rec->flags++; 1554 rec->flags++;
1526 if (FTRACE_WARN_ON((rec->flags & ~FTRACE_FL_MASK) == FTRACE_REF_MAX)) 1555 if (FTRACE_WARN_ON((rec->flags & ~FTRACE_FL_MASK) == FTRACE_REF_MAX))
1527 return; 1556 return;
1557 /*
1558 * If any ops wants regs saved for this function
1559 * then all ops will get saved regs.
1560 */
1561 if (ops->flags & FTRACE_OPS_FL_SAVE_REGS)
1562 rec->flags |= FTRACE_FL_REGS;
1528 } else { 1563 } else {
1529 if (FTRACE_WARN_ON((rec->flags & ~FTRACE_FL_MASK) == 0)) 1564 if (FTRACE_WARN_ON((rec->flags & ~FTRACE_FL_MASK) == 0))
1530 return; 1565 return;
@@ -1616,18 +1651,59 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update)
1616 if (enable && (rec->flags & ~FTRACE_FL_MASK)) 1651 if (enable && (rec->flags & ~FTRACE_FL_MASK))
1617 flag = FTRACE_FL_ENABLED; 1652 flag = FTRACE_FL_ENABLED;
1618 1653
1654 /*
1655 * If enabling and the REGS flag does not match the REGS_EN, then
1656 * do not ignore this record. Set flags to fail the compare against
1657 * ENABLED.
1658 */
1659 if (flag &&
1660 (!(rec->flags & FTRACE_FL_REGS) != !(rec->flags & FTRACE_FL_REGS_EN)))
1661 flag |= FTRACE_FL_REGS;
1662
1619 /* If the state of this record hasn't changed, then do nothing */ 1663 /* If the state of this record hasn't changed, then do nothing */
1620 if ((rec->flags & FTRACE_FL_ENABLED) == flag) 1664 if ((rec->flags & FTRACE_FL_ENABLED) == flag)
1621 return FTRACE_UPDATE_IGNORE; 1665 return FTRACE_UPDATE_IGNORE;
1622 1666
1623 if (flag) { 1667 if (flag) {
1624 if (update) 1668 /* Save off if rec is being enabled (for return value) */
1669 flag ^= rec->flags & FTRACE_FL_ENABLED;
1670
1671 if (update) {
1625 rec->flags |= FTRACE_FL_ENABLED; 1672 rec->flags |= FTRACE_FL_ENABLED;
1626 return FTRACE_UPDATE_MAKE_CALL; 1673 if (flag & FTRACE_FL_REGS) {
1674 if (rec->flags & FTRACE_FL_REGS)
1675 rec->flags |= FTRACE_FL_REGS_EN;
1676 else
1677 rec->flags &= ~FTRACE_FL_REGS_EN;
1678 }
1679 }
1680
1681 /*
1682 * If this record is being updated from a nop, then
1683 * return UPDATE_MAKE_CALL.
1684 * Otherwise, if the EN flag is set, then return
1685 * UPDATE_MODIFY_CALL_REGS to tell the caller to convert
1686 * from the non-save regs, to a save regs function.
1687 * Otherwise,
1688 * return UPDATE_MODIFY_CALL to tell the caller to convert
1689 * from the save regs, to a non-save regs function.
1690 */
1691 if (flag & FTRACE_FL_ENABLED)
1692 return FTRACE_UPDATE_MAKE_CALL;
1693 else if (rec->flags & FTRACE_FL_REGS_EN)
1694 return FTRACE_UPDATE_MODIFY_CALL_REGS;
1695 else
1696 return FTRACE_UPDATE_MODIFY_CALL;
1627 } 1697 }
1628 1698
1629 if (update) 1699 if (update) {
1630 rec->flags &= ~FTRACE_FL_ENABLED; 1700 /* If there's no more users, clear all flags */
1701 if (!(rec->flags & ~FTRACE_FL_MASK))
1702 rec->flags = 0;
1703 else
1704 /* Just disable the record (keep REGS state) */
1705 rec->flags &= ~FTRACE_FL_ENABLED;
1706 }
1631 1707
1632 return FTRACE_UPDATE_MAKE_NOP; 1708 return FTRACE_UPDATE_MAKE_NOP;
1633} 1709}
@@ -1662,13 +1738,17 @@ int ftrace_test_record(struct dyn_ftrace *rec, int enable)
1662static int 1738static int
1663__ftrace_replace_code(struct dyn_ftrace *rec, int enable) 1739__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
1664{ 1740{
1741 unsigned long ftrace_old_addr;
1665 unsigned long ftrace_addr; 1742 unsigned long ftrace_addr;
1666 int ret; 1743 int ret;
1667 1744
1668 ftrace_addr = (unsigned long)FTRACE_ADDR;
1669
1670 ret = ftrace_update_record(rec, enable); 1745 ret = ftrace_update_record(rec, enable);
1671 1746
1747 if (rec->flags & FTRACE_FL_REGS)
1748 ftrace_addr = (unsigned long)FTRACE_REGS_ADDR;
1749 else
1750 ftrace_addr = (unsigned long)FTRACE_ADDR;
1751
1672 switch (ret) { 1752 switch (ret) {
1673 case FTRACE_UPDATE_IGNORE: 1753 case FTRACE_UPDATE_IGNORE:
1674 return 0; 1754 return 0;
@@ -1678,6 +1758,15 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
1678 1758
1679 case FTRACE_UPDATE_MAKE_NOP: 1759 case FTRACE_UPDATE_MAKE_NOP:
1680 return ftrace_make_nop(NULL, rec, ftrace_addr); 1760 return ftrace_make_nop(NULL, rec, ftrace_addr);
1761
1762 case FTRACE_UPDATE_MODIFY_CALL_REGS:
1763 case FTRACE_UPDATE_MODIFY_CALL:
1764 if (rec->flags & FTRACE_FL_REGS)
1765 ftrace_old_addr = (unsigned long)FTRACE_ADDR;
1766 else
1767 ftrace_old_addr = (unsigned long)FTRACE_REGS_ADDR;
1768
1769 return ftrace_modify_call(rec, ftrace_old_addr, ftrace_addr);
1681 } 1770 }
1682 1771
1683 return -1; /* unknow ftrace bug */ 1772 return -1; /* unknow ftrace bug */
@@ -1882,16 +1971,6 @@ static void ftrace_run_update_code(int command)
1882 */ 1971 */
1883 arch_ftrace_update_code(command); 1972 arch_ftrace_update_code(command);
1884 1973
1885#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
1886 /*
1887 * For archs that call ftrace_test_stop_func(), we must
1888 * wait till after we update all the function callers
1889 * before we update the callback. This keeps different
1890 * ops that record different functions from corrupting
1891 * each other.
1892 */
1893 __ftrace_trace_function = __ftrace_trace_function_delay;
1894#endif
1895 function_trace_stop--; 1974 function_trace_stop--;
1896 1975
1897 ret = ftrace_arch_code_modify_post_process(); 1976 ret = ftrace_arch_code_modify_post_process();
@@ -2441,8 +2520,9 @@ static int t_show(struct seq_file *m, void *v)
2441 2520
2442 seq_printf(m, "%ps", (void *)rec->ip); 2521 seq_printf(m, "%ps", (void *)rec->ip);
2443 if (iter->flags & FTRACE_ITER_ENABLED) 2522 if (iter->flags & FTRACE_ITER_ENABLED)
2444 seq_printf(m, " (%ld)", 2523 seq_printf(m, " (%ld)%s",
2445 rec->flags & ~FTRACE_FL_MASK); 2524 rec->flags & ~FTRACE_FL_MASK,
2525 rec->flags & FTRACE_FL_REGS ? " R" : "");
2446 seq_printf(m, "\n"); 2526 seq_printf(m, "\n");
2447 2527
2448 return 0; 2528 return 0;
@@ -2790,8 +2870,8 @@ static int __init ftrace_mod_cmd_init(void)
2790} 2870}
2791device_initcall(ftrace_mod_cmd_init); 2871device_initcall(ftrace_mod_cmd_init);
2792 2872
2793static void 2873static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip,
2794function_trace_probe_call(unsigned long ip, unsigned long parent_ip) 2874 struct ftrace_ops *op, struct pt_regs *pt_regs)
2795{ 2875{
2796 struct ftrace_func_probe *entry; 2876 struct ftrace_func_probe *entry;
2797 struct hlist_head *hhd; 2877 struct hlist_head *hhd;
@@ -3162,8 +3242,27 @@ ftrace_notrace_write(struct file *file, const char __user *ubuf,
3162} 3242}
3163 3243
3164static int 3244static int
3165ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len, 3245ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove)
3166 int reset, int enable) 3246{
3247 struct ftrace_func_entry *entry;
3248
3249 if (!ftrace_location(ip))
3250 return -EINVAL;
3251
3252 if (remove) {
3253 entry = ftrace_lookup_ip(hash, ip);
3254 if (!entry)
3255 return -ENOENT;
3256 free_hash_entry(hash, entry);
3257 return 0;
3258 }
3259
3260 return add_hash_entry(hash, ip);
3261}
3262
3263static int
3264ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len,
3265 unsigned long ip, int remove, int reset, int enable)
3167{ 3266{
3168 struct ftrace_hash **orig_hash; 3267 struct ftrace_hash **orig_hash;
3169 struct ftrace_hash *hash; 3268 struct ftrace_hash *hash;
@@ -3192,6 +3291,11 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
3192 ret = -EINVAL; 3291 ret = -EINVAL;
3193 goto out_regex_unlock; 3292 goto out_regex_unlock;
3194 } 3293 }
3294 if (ip) {
3295 ret = ftrace_match_addr(hash, ip, remove);
3296 if (ret < 0)
3297 goto out_regex_unlock;
3298 }
3195 3299
3196 mutex_lock(&ftrace_lock); 3300 mutex_lock(&ftrace_lock);
3197 ret = ftrace_hash_move(ops, enable, orig_hash, hash); 3301 ret = ftrace_hash_move(ops, enable, orig_hash, hash);
@@ -3208,6 +3312,37 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
3208 return ret; 3312 return ret;
3209} 3313}
3210 3314
3315static int
3316ftrace_set_addr(struct ftrace_ops *ops, unsigned long ip, int remove,
3317 int reset, int enable)
3318{
3319 return ftrace_set_hash(ops, 0, 0, ip, remove, reset, enable);
3320}
3321
3322/**
3323 * ftrace_set_filter_ip - set a function to filter on in ftrace by address
3324 * @ops - the ops to set the filter with
3325 * @ip - the address to add to or remove from the filter.
3326 * @remove - non zero to remove the ip from the filter
3327 * @reset - non zero to reset all filters before applying this filter.
3328 *
3329 * Filters denote which functions should be enabled when tracing is enabled
3330 * If @ip is NULL, it failes to update filter.
3331 */
3332int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip,
3333 int remove, int reset)
3334{
3335 return ftrace_set_addr(ops, ip, remove, reset, 1);
3336}
3337EXPORT_SYMBOL_GPL(ftrace_set_filter_ip);
3338
3339static int
3340ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
3341 int reset, int enable)
3342{
3343 return ftrace_set_hash(ops, buf, len, 0, 0, reset, enable);
3344}
3345
3211/** 3346/**
3212 * ftrace_set_filter - set a function to filter on in ftrace 3347 * ftrace_set_filter - set a function to filter on in ftrace
3213 * @ops - the ops to set the filter with 3348 * @ops - the ops to set the filter with
@@ -3912,6 +4047,7 @@ void __init ftrace_init(void)
3912 4047
3913static struct ftrace_ops global_ops = { 4048static struct ftrace_ops global_ops = {
3914 .func = ftrace_stub, 4049 .func = ftrace_stub,
4050 .flags = FTRACE_OPS_FL_RECURSION_SAFE,
3915}; 4051};
3916 4052
3917static int __init ftrace_nodyn_init(void) 4053static int __init ftrace_nodyn_init(void)
@@ -3942,10 +4078,9 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
3942#endif /* CONFIG_DYNAMIC_FTRACE */ 4078#endif /* CONFIG_DYNAMIC_FTRACE */
3943 4079
3944static void 4080static void
3945ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip) 4081ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip,
4082 struct ftrace_ops *op, struct pt_regs *regs)
3946{ 4083{
3947 struct ftrace_ops *op;
3948
3949 if (unlikely(trace_recursion_test(TRACE_CONTROL_BIT))) 4084 if (unlikely(trace_recursion_test(TRACE_CONTROL_BIT)))
3950 return; 4085 return;
3951 4086
@@ -3959,7 +4094,7 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip)
3959 while (op != &ftrace_list_end) { 4094 while (op != &ftrace_list_end) {
3960 if (!ftrace_function_local_disabled(op) && 4095 if (!ftrace_function_local_disabled(op) &&
3961 ftrace_ops_test(op, ip)) 4096 ftrace_ops_test(op, ip))
3962 op->func(ip, parent_ip); 4097 op->func(ip, parent_ip, op, regs);
3963 4098
3964 op = rcu_dereference_raw(op->next); 4099 op = rcu_dereference_raw(op->next);
3965 }; 4100 };
@@ -3969,13 +4104,18 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip)
3969 4104
3970static struct ftrace_ops control_ops = { 4105static struct ftrace_ops control_ops = {
3971 .func = ftrace_ops_control_func, 4106 .func = ftrace_ops_control_func,
4107 .flags = FTRACE_OPS_FL_RECURSION_SAFE,
3972}; 4108};
3973 4109
3974static void 4110static inline void
3975ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip) 4111__ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
4112 struct ftrace_ops *ignored, struct pt_regs *regs)
3976{ 4113{
3977 struct ftrace_ops *op; 4114 struct ftrace_ops *op;
3978 4115
4116 if (function_trace_stop)
4117 return;
4118
3979 if (unlikely(trace_recursion_test(TRACE_INTERNAL_BIT))) 4119 if (unlikely(trace_recursion_test(TRACE_INTERNAL_BIT)))
3980 return; 4120 return;
3981 4121
@@ -3988,13 +4128,39 @@ ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip)
3988 op = rcu_dereference_raw(ftrace_ops_list); 4128 op = rcu_dereference_raw(ftrace_ops_list);
3989 while (op != &ftrace_list_end) { 4129 while (op != &ftrace_list_end) {
3990 if (ftrace_ops_test(op, ip)) 4130 if (ftrace_ops_test(op, ip))
3991 op->func(ip, parent_ip); 4131 op->func(ip, parent_ip, op, regs);
3992 op = rcu_dereference_raw(op->next); 4132 op = rcu_dereference_raw(op->next);
3993 }; 4133 };
3994 preempt_enable_notrace(); 4134 preempt_enable_notrace();
3995 trace_recursion_clear(TRACE_INTERNAL_BIT); 4135 trace_recursion_clear(TRACE_INTERNAL_BIT);
3996} 4136}
3997 4137
4138/*
4139 * Some archs only support passing ip and parent_ip. Even though
4140 * the list function ignores the op parameter, we do not want any
4141 * C side effects, where a function is called without the caller
4142 * sending a third parameter.
4143 * Archs are to support both the regs and ftrace_ops at the same time.
4144 * If they support ftrace_ops, it is assumed they support regs.
4145 * If call backs want to use regs, they must either check for regs
4146 * being NULL, or ARCH_SUPPORTS_FTRACE_SAVE_REGS.
4147 * Note, ARCH_SUPPORT_SAVE_REGS expects a full regs to be saved.
4148 * An architecture can pass partial regs with ftrace_ops and still
4149 * set the ARCH_SUPPORT_FTARCE_OPS.
4150 */
4151#if ARCH_SUPPORTS_FTRACE_OPS
4152static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
4153 struct ftrace_ops *op, struct pt_regs *regs)
4154{
4155 __ftrace_ops_list_func(ip, parent_ip, NULL, regs);
4156}
4157#else
4158static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip)
4159{
4160 __ftrace_ops_list_func(ip, parent_ip, NULL, NULL);
4161}
4162#endif
4163
3998static void clear_ftrace_swapper(void) 4164static void clear_ftrace_swapper(void)
3999{ 4165{
4000 struct task_struct *p; 4166 struct task_struct *p;
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 49491fa7daa2..b32ed0e385a5 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2816,7 +2816,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_enable);
2816 * to the buffer after this will fail and return NULL. 2816 * to the buffer after this will fail and return NULL.
2817 * 2817 *
2818 * This is different than ring_buffer_record_disable() as 2818 * This is different than ring_buffer_record_disable() as
2819 * it works like an on/off switch, where as the disable() verison 2819 * it works like an on/off switch, where as the disable() version
2820 * must be paired with a enable(). 2820 * must be paired with a enable().
2821 */ 2821 */
2822void ring_buffer_record_off(struct ring_buffer *buffer) 2822void ring_buffer_record_off(struct ring_buffer *buffer)
@@ -2839,7 +2839,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_off);
2839 * ring_buffer_record_off(). 2839 * ring_buffer_record_off().
2840 * 2840 *
2841 * This is different than ring_buffer_record_enable() as 2841 * This is different than ring_buffer_record_enable() as
2842 * it works like an on/off switch, where as the enable() verison 2842 * it works like an on/off switch, where as the enable() version
2843 * must be paired with a disable(). 2843 * must be paired with a disable().
2844 */ 2844 */
2845void ring_buffer_record_on(struct ring_buffer *buffer) 2845void ring_buffer_record_on(struct ring_buffer *buffer)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 5c38c81496ce..1ec5c1dab629 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -328,7 +328,7 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
328unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | 328unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
329 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME | 329 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
330 TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | 330 TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
331 TRACE_ITER_IRQ_INFO; 331 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS;
332 332
333static int trace_stop_count; 333static int trace_stop_count;
334static DEFINE_RAW_SPINLOCK(tracing_start_lock); 334static DEFINE_RAW_SPINLOCK(tracing_start_lock);
@@ -426,15 +426,15 @@ __setup("trace_buf_size=", set_buf_size);
426 426
427static int __init set_tracing_thresh(char *str) 427static int __init set_tracing_thresh(char *str)
428{ 428{
429 unsigned long threshhold; 429 unsigned long threshold;
430 int ret; 430 int ret;
431 431
432 if (!str) 432 if (!str)
433 return 0; 433 return 0;
434 ret = strict_strtoul(str, 0, &threshhold); 434 ret = strict_strtoul(str, 0, &threshold);
435 if (ret < 0) 435 if (ret < 0)
436 return 0; 436 return 0;
437 tracing_thresh = threshhold * 1000; 437 tracing_thresh = threshold * 1000;
438 return 1; 438 return 1;
439} 439}
440__setup("tracing_thresh=", set_tracing_thresh); 440__setup("tracing_thresh=", set_tracing_thresh);
@@ -470,6 +470,7 @@ static const char *trace_options[] = {
470 "overwrite", 470 "overwrite",
471 "disable_on_free", 471 "disable_on_free",
472 "irq-info", 472 "irq-info",
473 "markers",
473 NULL 474 NULL
474}; 475};
475 476
@@ -3886,6 +3887,9 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
3886 if (tracing_disabled) 3887 if (tracing_disabled)
3887 return -EINVAL; 3888 return -EINVAL;
3888 3889
3890 if (!(trace_flags & TRACE_ITER_MARKERS))
3891 return -EINVAL;
3892
3889 if (cnt > TRACE_BUF_SIZE) 3893 if (cnt > TRACE_BUF_SIZE)
3890 cnt = TRACE_BUF_SIZE; 3894 cnt = TRACE_BUF_SIZE;
3891 3895
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 55e1f7f0db12..63a2da0b9a6e 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -472,11 +472,11 @@ extern void trace_find_cmdline(int pid, char comm[]);
472 472
473#ifdef CONFIG_DYNAMIC_FTRACE 473#ifdef CONFIG_DYNAMIC_FTRACE
474extern unsigned long ftrace_update_tot_cnt; 474extern unsigned long ftrace_update_tot_cnt;
475#endif
475#define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func 476#define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func
476extern int DYN_FTRACE_TEST_NAME(void); 477extern int DYN_FTRACE_TEST_NAME(void);
477#define DYN_FTRACE_TEST_NAME2 trace_selftest_dynamic_test_func2 478#define DYN_FTRACE_TEST_NAME2 trace_selftest_dynamic_test_func2
478extern int DYN_FTRACE_TEST_NAME2(void); 479extern int DYN_FTRACE_TEST_NAME2(void);
479#endif
480 480
481extern int ring_buffer_expanded; 481extern int ring_buffer_expanded;
482extern bool tracing_selftest_disabled; 482extern bool tracing_selftest_disabled;
@@ -680,6 +680,7 @@ enum trace_iterator_flags {
680 TRACE_ITER_OVERWRITE = 0x200000, 680 TRACE_ITER_OVERWRITE = 0x200000,
681 TRACE_ITER_STOP_ON_FREE = 0x400000, 681 TRACE_ITER_STOP_ON_FREE = 0x400000,
682 TRACE_ITER_IRQ_INFO = 0x800000, 682 TRACE_ITER_IRQ_INFO = 0x800000,
683 TRACE_ITER_MARKERS = 0x1000000,
683}; 684};
684 685
685/* 686/*
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 8a6d2ee2086c..84b1e045faba 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -258,7 +258,8 @@ EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
258 258
259#ifdef CONFIG_FUNCTION_TRACER 259#ifdef CONFIG_FUNCTION_TRACER
260static void 260static void
261perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip) 261perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip,
262 struct ftrace_ops *ops, struct pt_regs *pt_regs)
262{ 263{
263 struct ftrace_entry *entry; 264 struct ftrace_entry *entry;
264 struct hlist_head *head; 265 struct hlist_head *head;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 29111da1d100..d608d09d08c0 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1199,6 +1199,31 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
1199 return 0; 1199 return 0;
1200} 1200}
1201 1201
1202static void event_remove(struct ftrace_event_call *call)
1203{
1204 ftrace_event_enable_disable(call, 0);
1205 if (call->event.funcs)
1206 __unregister_ftrace_event(&call->event);
1207 list_del(&call->list);
1208}
1209
1210static int event_init(struct ftrace_event_call *call)
1211{
1212 int ret = 0;
1213
1214 if (WARN_ON(!call->name))
1215 return -EINVAL;
1216
1217 if (call->class->raw_init) {
1218 ret = call->class->raw_init(call);
1219 if (ret < 0 && ret != -ENOSYS)
1220 pr_warn("Could not initialize trace events/%s\n",
1221 call->name);
1222 }
1223
1224 return ret;
1225}
1226
1202static int 1227static int
1203__trace_add_event_call(struct ftrace_event_call *call, struct module *mod, 1228__trace_add_event_call(struct ftrace_event_call *call, struct module *mod,
1204 const struct file_operations *id, 1229 const struct file_operations *id,
@@ -1209,19 +1234,9 @@ __trace_add_event_call(struct ftrace_event_call *call, struct module *mod,
1209 struct dentry *d_events; 1234 struct dentry *d_events;
1210 int ret; 1235 int ret;
1211 1236
1212 /* The linker may leave blanks */ 1237 ret = event_init(call);
1213 if (!call->name) 1238 if (ret < 0)
1214 return -EINVAL; 1239 return ret;
1215
1216 if (call->class->raw_init) {
1217 ret = call->class->raw_init(call);
1218 if (ret < 0) {
1219 if (ret != -ENOSYS)
1220 pr_warning("Could not initialize trace events/%s\n",
1221 call->name);
1222 return ret;
1223 }
1224 }
1225 1240
1226 d_events = event_trace_events_dir(); 1241 d_events = event_trace_events_dir();
1227 if (!d_events) 1242 if (!d_events)
@@ -1272,13 +1287,10 @@ static void remove_subsystem_dir(const char *name)
1272 */ 1287 */
1273static void __trace_remove_event_call(struct ftrace_event_call *call) 1288static void __trace_remove_event_call(struct ftrace_event_call *call)
1274{ 1289{
1275 ftrace_event_enable_disable(call, 0); 1290 event_remove(call);
1276 if (call->event.funcs)
1277 __unregister_ftrace_event(&call->event);
1278 debugfs_remove_recursive(call->dir);
1279 list_del(&call->list);
1280 trace_destroy_fields(call); 1291 trace_destroy_fields(call);
1281 destroy_preds(call); 1292 destroy_preds(call);
1293 debugfs_remove_recursive(call->dir);
1282 remove_subsystem_dir(call->class->system); 1294 remove_subsystem_dir(call->class->system);
1283} 1295}
1284 1296
@@ -1450,15 +1462,43 @@ static __init int setup_trace_event(char *str)
1450} 1462}
1451__setup("trace_event=", setup_trace_event); 1463__setup("trace_event=", setup_trace_event);
1452 1464
1465static __init int event_trace_enable(void)
1466{
1467 struct ftrace_event_call **iter, *call;
1468 char *buf = bootup_event_buf;
1469 char *token;
1470 int ret;
1471
1472 for_each_event(iter, __start_ftrace_events, __stop_ftrace_events) {
1473
1474 call = *iter;
1475 ret = event_init(call);
1476 if (!ret)
1477 list_add(&call->list, &ftrace_events);
1478 }
1479
1480 while (true) {
1481 token = strsep(&buf, ",");
1482
1483 if (!token)
1484 break;
1485 if (!*token)
1486 continue;
1487
1488 ret = ftrace_set_clr_event(token, 1);
1489 if (ret)
1490 pr_warn("Failed to enable trace event: %s\n", token);
1491 }
1492 return 0;
1493}
1494
1453static __init int event_trace_init(void) 1495static __init int event_trace_init(void)
1454{ 1496{
1455 struct ftrace_event_call **call; 1497 struct ftrace_event_call *call;
1456 struct dentry *d_tracer; 1498 struct dentry *d_tracer;
1457 struct dentry *entry; 1499 struct dentry *entry;
1458 struct dentry *d_events; 1500 struct dentry *d_events;
1459 int ret; 1501 int ret;
1460 char *buf = bootup_event_buf;
1461 char *token;
1462 1502
1463 d_tracer = tracing_init_dentry(); 1503 d_tracer = tracing_init_dentry();
1464 if (!d_tracer) 1504 if (!d_tracer)
@@ -1497,24 +1537,19 @@ static __init int event_trace_init(void)
1497 if (trace_define_common_fields()) 1537 if (trace_define_common_fields())
1498 pr_warning("tracing: Failed to allocate common fields"); 1538 pr_warning("tracing: Failed to allocate common fields");
1499 1539
1500 for_each_event(call, __start_ftrace_events, __stop_ftrace_events) { 1540 /*
1501 __trace_add_event_call(*call, NULL, &ftrace_event_id_fops, 1541 * Early initialization already enabled ftrace event.
1542 * Now it's only necessary to create the event directory.
1543 */
1544 list_for_each_entry(call, &ftrace_events, list) {
1545
1546 ret = event_create_dir(call, d_events,
1547 &ftrace_event_id_fops,
1502 &ftrace_enable_fops, 1548 &ftrace_enable_fops,
1503 &ftrace_event_filter_fops, 1549 &ftrace_event_filter_fops,
1504 &ftrace_event_format_fops); 1550 &ftrace_event_format_fops);
1505 } 1551 if (ret < 0)
1506 1552 event_remove(call);
1507 while (true) {
1508 token = strsep(&buf, ",");
1509
1510 if (!token)
1511 break;
1512 if (!*token)
1513 continue;
1514
1515 ret = ftrace_set_clr_event(token, 1);
1516 if (ret)
1517 pr_warning("Failed to enable trace event: %s\n", token);
1518 } 1553 }
1519 1554
1520 ret = register_module_notifier(&trace_module_nb); 1555 ret = register_module_notifier(&trace_module_nb);
@@ -1523,6 +1558,7 @@ static __init int event_trace_init(void)
1523 1558
1524 return 0; 1559 return 0;
1525} 1560}
1561core_initcall(event_trace_enable);
1526fs_initcall(event_trace_init); 1562fs_initcall(event_trace_init);
1527 1563
1528#ifdef CONFIG_FTRACE_STARTUP_TEST 1564#ifdef CONFIG_FTRACE_STARTUP_TEST
@@ -1646,9 +1682,11 @@ static __init void event_trace_self_tests(void)
1646 event_test_stuff(); 1682 event_test_stuff();
1647 1683
1648 ret = __ftrace_set_clr_event(NULL, system->name, NULL, 0); 1684 ret = __ftrace_set_clr_event(NULL, system->name, NULL, 0);
1649 if (WARN_ON_ONCE(ret)) 1685 if (WARN_ON_ONCE(ret)) {
1650 pr_warning("error disabling system %s\n", 1686 pr_warning("error disabling system %s\n",
1651 system->name); 1687 system->name);
1688 continue;
1689 }
1652 1690
1653 pr_cont("OK\n"); 1691 pr_cont("OK\n");
1654 } 1692 }
@@ -1681,7 +1719,8 @@ static __init void event_trace_self_tests(void)
1681static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable); 1719static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable);
1682 1720
1683static void 1721static void
1684function_test_events_call(unsigned long ip, unsigned long parent_ip) 1722function_test_events_call(unsigned long ip, unsigned long parent_ip,
1723 struct ftrace_ops *op, struct pt_regs *pt_regs)
1685{ 1724{
1686 struct ring_buffer_event *event; 1725 struct ring_buffer_event *event;
1687 struct ring_buffer *buffer; 1726 struct ring_buffer *buffer;
@@ -1720,6 +1759,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip)
1720static struct ftrace_ops trace_ops __initdata = 1759static struct ftrace_ops trace_ops __initdata =
1721{ 1760{
1722 .func = function_test_events_call, 1761 .func = function_test_events_call,
1762 .flags = FTRACE_OPS_FL_RECURSION_SAFE,
1723}; 1763};
1724 1764
1725static __init void event_trace_self_test_with_function(void) 1765static __init void event_trace_self_test_with_function(void)
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 431dba8b7542..c154797a7ff7 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -2002,7 +2002,7 @@ static int ftrace_function_set_regexp(struct ftrace_ops *ops, int filter,
2002static int __ftrace_function_set_filter(int filter, char *buf, int len, 2002static int __ftrace_function_set_filter(int filter, char *buf, int len,
2003 struct function_filter_data *data) 2003 struct function_filter_data *data)
2004{ 2004{
2005 int i, re_cnt, ret; 2005 int i, re_cnt, ret = -EINVAL;
2006 int *reset; 2006 int *reset;
2007 char **re; 2007 char **re;
2008 2008
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index a426f410c060..483162a9f908 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -49,7 +49,8 @@ static void function_trace_start(struct trace_array *tr)
49} 49}
50 50
51static void 51static void
52function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip) 52function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip,
53 struct ftrace_ops *op, struct pt_regs *pt_regs)
53{ 54{
54 struct trace_array *tr = func_trace; 55 struct trace_array *tr = func_trace;
55 struct trace_array_cpu *data; 56 struct trace_array_cpu *data;
@@ -84,7 +85,9 @@ enum {
84static struct tracer_flags func_flags; 85static struct tracer_flags func_flags;
85 86
86static void 87static void
87function_trace_call(unsigned long ip, unsigned long parent_ip) 88function_trace_call(unsigned long ip, unsigned long parent_ip,
89 struct ftrace_ops *op, struct pt_regs *pt_regs)
90
88{ 91{
89 struct trace_array *tr = func_trace; 92 struct trace_array *tr = func_trace;
90 struct trace_array_cpu *data; 93 struct trace_array_cpu *data;
@@ -121,7 +124,8 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
121} 124}
122 125
123static void 126static void
124function_stack_trace_call(unsigned long ip, unsigned long parent_ip) 127function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
128 struct ftrace_ops *op, struct pt_regs *pt_regs)
125{ 129{
126 struct trace_array *tr = func_trace; 130 struct trace_array *tr = func_trace;
127 struct trace_array_cpu *data; 131 struct trace_array_cpu *data;
@@ -164,13 +168,13 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip)
164static struct ftrace_ops trace_ops __read_mostly = 168static struct ftrace_ops trace_ops __read_mostly =
165{ 169{
166 .func = function_trace_call, 170 .func = function_trace_call,
167 .flags = FTRACE_OPS_FL_GLOBAL, 171 .flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE,
168}; 172};
169 173
170static struct ftrace_ops trace_stack_ops __read_mostly = 174static struct ftrace_ops trace_stack_ops __read_mostly =
171{ 175{
172 .func = function_stack_trace_call, 176 .func = function_stack_trace_call,
173 .flags = FTRACE_OPS_FL_GLOBAL, 177 .flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE,
174}; 178};
175 179
176static struct tracer_opt func_opts[] = { 180static struct tracer_opt func_opts[] = {
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index ce27c8ba8d31..99b4378393d5 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -143,7 +143,7 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
143 return; 143 return;
144 } 144 }
145 145
146#ifdef CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST 146#if defined(CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST) && !defined(CC_USING_FENTRY)
147 /* 147 /*
148 * The arch may choose to record the frame pointer used 148 * The arch may choose to record the frame pointer used
149 * and check it here to make sure that it is what we expect it 149 * and check it here to make sure that it is what we expect it
@@ -154,6 +154,9 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
154 * 154 *
155 * Currently, x86_32 with optimize for size (-Os) makes the latest 155 * Currently, x86_32 with optimize for size (-Os) makes the latest
156 * gcc do the above. 156 * gcc do the above.
157 *
158 * Note, -mfentry does not use frame pointers, and this test
159 * is not needed if CC_USING_FENTRY is set.
157 */ 160 */
158 if (unlikely(current->ret_stack[index].fp != frame_pointer)) { 161 if (unlikely(current->ret_stack[index].fp != frame_pointer)) {
159 ftrace_graph_stop(); 162 ftrace_graph_stop();
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 99d20e920368..d98ee8283b29 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -136,7 +136,8 @@ static int func_prolog_dec(struct trace_array *tr,
136 * irqsoff uses its own tracer function to keep the overhead down: 136 * irqsoff uses its own tracer function to keep the overhead down:
137 */ 137 */
138static void 138static void
139irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip) 139irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip,
140 struct ftrace_ops *op, struct pt_regs *pt_regs)
140{ 141{
141 struct trace_array *tr = irqsoff_trace; 142 struct trace_array *tr = irqsoff_trace;
142 struct trace_array_cpu *data; 143 struct trace_array_cpu *data;
@@ -153,7 +154,7 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
153static struct ftrace_ops trace_ops __read_mostly = 154static struct ftrace_ops trace_ops __read_mostly =
154{ 155{
155 .func = irqsoff_tracer_call, 156 .func = irqsoff_tracer_call,
156 .flags = FTRACE_OPS_FL_GLOBAL, 157 .flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE,
157}; 158};
158#endif /* CONFIG_FUNCTION_TRACER */ 159#endif /* CONFIG_FUNCTION_TRACER */
159 160
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index ff791ea48b57..02170c00c413 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -108,7 +108,8 @@ out_enable:
108 * wakeup uses its own tracer function to keep the overhead down: 108 * wakeup uses its own tracer function to keep the overhead down:
109 */ 109 */
110static void 110static void
111wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) 111wakeup_tracer_call(unsigned long ip, unsigned long parent_ip,
112 struct ftrace_ops *op, struct pt_regs *pt_regs)
112{ 113{
113 struct trace_array *tr = wakeup_trace; 114 struct trace_array *tr = wakeup_trace;
114 struct trace_array_cpu *data; 115 struct trace_array_cpu *data;
@@ -129,7 +130,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
129static struct ftrace_ops trace_ops __read_mostly = 130static struct ftrace_ops trace_ops __read_mostly =
130{ 131{
131 .func = wakeup_tracer_call, 132 .func = wakeup_tracer_call,
132 .flags = FTRACE_OPS_FL_GLOBAL, 133 .flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE,
133}; 134};
134#endif /* CONFIG_FUNCTION_TRACER */ 135#endif /* CONFIG_FUNCTION_TRACER */
135 136
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 288541f977fb..2c00a691a540 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -103,54 +103,67 @@ static inline void warn_failed_init_tracer(struct tracer *trace, int init_ret)
103 103
104static int trace_selftest_test_probe1_cnt; 104static int trace_selftest_test_probe1_cnt;
105static void trace_selftest_test_probe1_func(unsigned long ip, 105static void trace_selftest_test_probe1_func(unsigned long ip,
106 unsigned long pip) 106 unsigned long pip,
107 struct ftrace_ops *op,
108 struct pt_regs *pt_regs)
107{ 109{
108 trace_selftest_test_probe1_cnt++; 110 trace_selftest_test_probe1_cnt++;
109} 111}
110 112
111static int trace_selftest_test_probe2_cnt; 113static int trace_selftest_test_probe2_cnt;
112static void trace_selftest_test_probe2_func(unsigned long ip, 114static void trace_selftest_test_probe2_func(unsigned long ip,
113 unsigned long pip) 115 unsigned long pip,
116 struct ftrace_ops *op,
117 struct pt_regs *pt_regs)
114{ 118{
115 trace_selftest_test_probe2_cnt++; 119 trace_selftest_test_probe2_cnt++;
116} 120}
117 121
118static int trace_selftest_test_probe3_cnt; 122static int trace_selftest_test_probe3_cnt;
119static void trace_selftest_test_probe3_func(unsigned long ip, 123static void trace_selftest_test_probe3_func(unsigned long ip,
120 unsigned long pip) 124 unsigned long pip,
125 struct ftrace_ops *op,
126 struct pt_regs *pt_regs)
121{ 127{
122 trace_selftest_test_probe3_cnt++; 128 trace_selftest_test_probe3_cnt++;
123} 129}
124 130
125static int trace_selftest_test_global_cnt; 131static int trace_selftest_test_global_cnt;
126static void trace_selftest_test_global_func(unsigned long ip, 132static void trace_selftest_test_global_func(unsigned long ip,
127 unsigned long pip) 133 unsigned long pip,
134 struct ftrace_ops *op,
135 struct pt_regs *pt_regs)
128{ 136{
129 trace_selftest_test_global_cnt++; 137 trace_selftest_test_global_cnt++;
130} 138}
131 139
132static int trace_selftest_test_dyn_cnt; 140static int trace_selftest_test_dyn_cnt;
133static void trace_selftest_test_dyn_func(unsigned long ip, 141static void trace_selftest_test_dyn_func(unsigned long ip,
134 unsigned long pip) 142 unsigned long pip,
143 struct ftrace_ops *op,
144 struct pt_regs *pt_regs)
135{ 145{
136 trace_selftest_test_dyn_cnt++; 146 trace_selftest_test_dyn_cnt++;
137} 147}
138 148
139static struct ftrace_ops test_probe1 = { 149static struct ftrace_ops test_probe1 = {
140 .func = trace_selftest_test_probe1_func, 150 .func = trace_selftest_test_probe1_func,
151 .flags = FTRACE_OPS_FL_RECURSION_SAFE,
141}; 152};
142 153
143static struct ftrace_ops test_probe2 = { 154static struct ftrace_ops test_probe2 = {
144 .func = trace_selftest_test_probe2_func, 155 .func = trace_selftest_test_probe2_func,
156 .flags = FTRACE_OPS_FL_RECURSION_SAFE,
145}; 157};
146 158
147static struct ftrace_ops test_probe3 = { 159static struct ftrace_ops test_probe3 = {
148 .func = trace_selftest_test_probe3_func, 160 .func = trace_selftest_test_probe3_func,
161 .flags = FTRACE_OPS_FL_RECURSION_SAFE,
149}; 162};
150 163
151static struct ftrace_ops test_global = { 164static struct ftrace_ops test_global = {
152 .func = trace_selftest_test_global_func, 165 .func = trace_selftest_test_global_func,
153 .flags = FTRACE_OPS_FL_GLOBAL, 166 .flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE,
154}; 167};
155 168
156static void print_counts(void) 169static void print_counts(void)
@@ -393,10 +406,253 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
393 406
394 return ret; 407 return ret;
395} 408}
409
410static int trace_selftest_recursion_cnt;
411static void trace_selftest_test_recursion_func(unsigned long ip,
412 unsigned long pip,
413 struct ftrace_ops *op,
414 struct pt_regs *pt_regs)
415{
416 /*
417 * This function is registered without the recursion safe flag.
418 * The ftrace infrastructure should provide the recursion
419 * protection. If not, this will crash the kernel!
420 */
421 trace_selftest_recursion_cnt++;
422 DYN_FTRACE_TEST_NAME();
423}
424
425static void trace_selftest_test_recursion_safe_func(unsigned long ip,
426 unsigned long pip,
427 struct ftrace_ops *op,
428 struct pt_regs *pt_regs)
429{
430 /*
431 * We said we would provide our own recursion. By calling
432 * this function again, we should recurse back into this function
433 * and count again. But this only happens if the arch supports
434 * all of ftrace features and nothing else is using the function
435 * tracing utility.
436 */
437 if (trace_selftest_recursion_cnt++)
438 return;
439 DYN_FTRACE_TEST_NAME();
440}
441
442static struct ftrace_ops test_rec_probe = {
443 .func = trace_selftest_test_recursion_func,
444};
445
446static struct ftrace_ops test_recsafe_probe = {
447 .func = trace_selftest_test_recursion_safe_func,
448 .flags = FTRACE_OPS_FL_RECURSION_SAFE,
449};
450
451static int
452trace_selftest_function_recursion(void)
453{
454 int save_ftrace_enabled = ftrace_enabled;
455 int save_tracer_enabled = tracer_enabled;
456 char *func_name;
457 int len;
458 int ret;
459 int cnt;
460
461 /* The previous test PASSED */
462 pr_cont("PASSED\n");
463 pr_info("Testing ftrace recursion: ");
464
465
466 /* enable tracing, and record the filter function */
467 ftrace_enabled = 1;
468 tracer_enabled = 1;
469
470 /* Handle PPC64 '.' name */
471 func_name = "*" __stringify(DYN_FTRACE_TEST_NAME);
472 len = strlen(func_name);
473
474 ret = ftrace_set_filter(&test_rec_probe, func_name, len, 1);
475 if (ret) {
476 pr_cont("*Could not set filter* ");
477 goto out;
478 }
479
480 ret = register_ftrace_function(&test_rec_probe);
481 if (ret) {
482 pr_cont("*could not register callback* ");
483 goto out;
484 }
485
486 DYN_FTRACE_TEST_NAME();
487
488 unregister_ftrace_function(&test_rec_probe);
489
490 ret = -1;
491 if (trace_selftest_recursion_cnt != 1) {
492 pr_cont("*callback not called once (%d)* ",
493 trace_selftest_recursion_cnt);
494 goto out;
495 }
496
497 trace_selftest_recursion_cnt = 1;
498
499 pr_cont("PASSED\n");
500 pr_info("Testing ftrace recursion safe: ");
501
502 ret = ftrace_set_filter(&test_recsafe_probe, func_name, len, 1);
503 if (ret) {
504 pr_cont("*Could not set filter* ");
505 goto out;
506 }
507
508 ret = register_ftrace_function(&test_recsafe_probe);
509 if (ret) {
510 pr_cont("*could not register callback* ");
511 goto out;
512 }
513
514 DYN_FTRACE_TEST_NAME();
515
516 unregister_ftrace_function(&test_recsafe_probe);
517
518 /*
519 * If arch supports all ftrace features, and no other task
520 * was on the list, we should be fine.
521 */
522 if (!ftrace_nr_registered_ops() && !FTRACE_FORCE_LIST_FUNC)
523 cnt = 2; /* Should have recursed */
524 else
525 cnt = 1;
526
527 ret = -1;
528 if (trace_selftest_recursion_cnt != cnt) {
529 pr_cont("*callback not called expected %d times (%d)* ",
530 cnt, trace_selftest_recursion_cnt);
531 goto out;
532 }
533
534 ret = 0;
535out:
536 ftrace_enabled = save_ftrace_enabled;
537 tracer_enabled = save_tracer_enabled;
538
539 return ret;
540}
396#else 541#else
397# define trace_selftest_startup_dynamic_tracing(trace, tr, func) ({ 0; }) 542# define trace_selftest_startup_dynamic_tracing(trace, tr, func) ({ 0; })
543# define trace_selftest_function_recursion() ({ 0; })
398#endif /* CONFIG_DYNAMIC_FTRACE */ 544#endif /* CONFIG_DYNAMIC_FTRACE */
399 545
546static enum {
547 TRACE_SELFTEST_REGS_START,
548 TRACE_SELFTEST_REGS_FOUND,
549 TRACE_SELFTEST_REGS_NOT_FOUND,
550} trace_selftest_regs_stat;
551
552static void trace_selftest_test_regs_func(unsigned long ip,
553 unsigned long pip,
554 struct ftrace_ops *op,
555 struct pt_regs *pt_regs)
556{
557 if (pt_regs)
558 trace_selftest_regs_stat = TRACE_SELFTEST_REGS_FOUND;
559 else
560 trace_selftest_regs_stat = TRACE_SELFTEST_REGS_NOT_FOUND;
561}
562
563static struct ftrace_ops test_regs_probe = {
564 .func = trace_selftest_test_regs_func,
565 .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_SAVE_REGS,
566};
567
568static int
569trace_selftest_function_regs(void)
570{
571 int save_ftrace_enabled = ftrace_enabled;
572 int save_tracer_enabled = tracer_enabled;
573 char *func_name;
574 int len;
575 int ret;
576 int supported = 0;
577
578#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS
579 supported = 1;
580#endif
581
582 /* The previous test PASSED */
583 pr_cont("PASSED\n");
584 pr_info("Testing ftrace regs%s: ",
585 !supported ? "(no arch support)" : "");
586
587 /* enable tracing, and record the filter function */
588 ftrace_enabled = 1;
589 tracer_enabled = 1;
590
591 /* Handle PPC64 '.' name */
592 func_name = "*" __stringify(DYN_FTRACE_TEST_NAME);
593 len = strlen(func_name);
594
595 ret = ftrace_set_filter(&test_regs_probe, func_name, len, 1);
596 /*
597 * If DYNAMIC_FTRACE is not set, then we just trace all functions.
598 * This test really doesn't care.
599 */
600 if (ret && ret != -ENODEV) {
601 pr_cont("*Could not set filter* ");
602 goto out;
603 }
604
605 ret = register_ftrace_function(&test_regs_probe);
606 /*
607 * Now if the arch does not support passing regs, then this should
608 * have failed.
609 */
610 if (!supported) {
611 if (!ret) {
612 pr_cont("*registered save-regs without arch support* ");
613 goto out;
614 }
615 test_regs_probe.flags |= FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED;
616 ret = register_ftrace_function(&test_regs_probe);
617 }
618 if (ret) {
619 pr_cont("*could not register callback* ");
620 goto out;
621 }
622
623
624 DYN_FTRACE_TEST_NAME();
625
626 unregister_ftrace_function(&test_regs_probe);
627
628 ret = -1;
629
630 switch (trace_selftest_regs_stat) {
631 case TRACE_SELFTEST_REGS_START:
632 pr_cont("*callback never called* ");
633 goto out;
634
635 case TRACE_SELFTEST_REGS_FOUND:
636 if (supported)
637 break;
638 pr_cont("*callback received regs without arch support* ");
639 goto out;
640
641 case TRACE_SELFTEST_REGS_NOT_FOUND:
642 if (!supported)
643 break;
644 pr_cont("*callback received NULL regs* ");
645 goto out;
646 }
647
648 ret = 0;
649out:
650 ftrace_enabled = save_ftrace_enabled;
651 tracer_enabled = save_tracer_enabled;
652
653 return ret;
654}
655
400/* 656/*
401 * Simple verification test of ftrace function tracer. 657 * Simple verification test of ftrace function tracer.
402 * Enable ftrace, sleep 1/10 second, and then read the trace 658 * Enable ftrace, sleep 1/10 second, and then read the trace
@@ -442,7 +698,14 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
442 698
443 ret = trace_selftest_startup_dynamic_tracing(trace, tr, 699 ret = trace_selftest_startup_dynamic_tracing(trace, tr,
444 DYN_FTRACE_TEST_NAME); 700 DYN_FTRACE_TEST_NAME);
701 if (ret)
702 goto out;
445 703
704 ret = trace_selftest_function_recursion();
705 if (ret)
706 goto out;
707
708 ret = trace_selftest_function_regs();
446 out: 709 out:
447 ftrace_enabled = save_ftrace_enabled; 710 ftrace_enabled = save_ftrace_enabled;
448 tracer_enabled = save_tracer_enabled; 711 tracer_enabled = save_tracer_enabled;
@@ -778,6 +1041,8 @@ static int trace_wakeup_test_thread(void *data)
778 set_current_state(TASK_INTERRUPTIBLE); 1041 set_current_state(TASK_INTERRUPTIBLE);
779 schedule(); 1042 schedule();
780 1043
1044 complete(x);
1045
781 /* we are awake, now wait to disappear */ 1046 /* we are awake, now wait to disappear */
782 while (!kthread_should_stop()) { 1047 while (!kthread_should_stop()) {
783 /* 1048 /*
@@ -821,24 +1086,21 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
821 /* reset the max latency */ 1086 /* reset the max latency */
822 tracing_max_latency = 0; 1087 tracing_max_latency = 0;
823 1088
824 /* sleep to let the RT thread sleep too */ 1089 while (p->on_rq) {
825 msleep(100); 1090 /*
1091 * Sleep to make sure the RT thread is asleep too.
1092 * On virtual machines we can't rely on timings,
1093 * but we want to make sure this test still works.
1094 */
1095 msleep(100);
1096 }
826 1097
827 /* 1098 init_completion(&isrt);
828 * Yes this is slightly racy. It is possible that for some
829 * strange reason that the RT thread we created, did not
830 * call schedule for 100ms after doing the completion,
831 * and we do a wakeup on a task that already is awake.
832 * But that is extremely unlikely, and the worst thing that
833 * happens in such a case, is that we disable tracing.
834 * Honestly, if this race does happen something is horrible
835 * wrong with the system.
836 */
837 1099
838 wake_up_process(p); 1100 wake_up_process(p);
839 1101
840 /* give a little time to let the thread wake up */ 1102 /* Wait for the task to wake up */
841 msleep(100); 1103 wait_for_completion(&isrt);
842 1104
843 /* stop the tracing. */ 1105 /* stop the tracing. */
844 tracing_stop(); 1106 tracing_stop();
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index d4545f49242e..0c1b165778e5 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -111,7 +111,8 @@ static inline void check_stack(void)
111} 111}
112 112
113static void 113static void
114stack_trace_call(unsigned long ip, unsigned long parent_ip) 114stack_trace_call(unsigned long ip, unsigned long parent_ip,
115 struct ftrace_ops *op, struct pt_regs *pt_regs)
115{ 116{
116 int cpu; 117 int cpu;
117 118
@@ -136,6 +137,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip)
136static struct ftrace_ops trace_ops __read_mostly = 137static struct ftrace_ops trace_ops __read_mostly =
137{ 138{
138 .func = stack_trace_call, 139 .func = stack_trace_call,
140 .flags = FTRACE_OPS_FL_RECURSION_SAFE,
139}; 141};
140 142
141static ssize_t 143static ssize_t
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 6b245f64c8dd..2485a7d09b11 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -487,7 +487,7 @@ int __init init_ftrace_syscalls(void)
487 487
488 return 0; 488 return 0;
489} 489}
490core_initcall(init_ftrace_syscalls); 490early_initcall(init_ftrace_syscalls);
491 491
492#ifdef CONFIG_PERF_EVENTS 492#ifdef CONFIG_PERF_EVENTS
493 493