aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-06-12 22:18:49 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-06-12 22:18:49 -0400
commit3737a12761636ebde0f09ef49daebb8eed18cc8a (patch)
tree965057f4bccd97049f8c0140f8670c5d4278ca3e
parentc29deef32e3699e40da3e9e82267610de04e6b54 (diff)
parent82b897782d10fcc4930c9d4a15b175348fdd2871 (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull more perf updates from Ingo Molnar: "A second round of perf updates: - wide reaching kprobes sanitization and robustization, with the hope of fixing all 'probe this function crashes the kernel' bugs, by Masami Hiramatsu. - uprobes updates from Oleg Nesterov: tmpfs support, corner case fixes and robustization work. - perf tooling updates and fixes from Jiri Olsa, Namhyung Ki, Arnaldo et al: * Add support to accumulate hist periods (Namhyung Kim) * various fixes, refactorings and enhancements" * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (101 commits) perf: Differentiate exec() and non-exec() comm events perf: Fix perf_event_comm() vs. exec() assumption uprobes/x86: Rename arch_uprobe->def to ->defparam, minor comment updates perf/documentation: Add description for conditional branch filter perf/x86: Add conditional branch filtering support perf/tool: Add conditional branch filter 'cond' to perf record perf: Add new conditional branch filter 'PERF_SAMPLE_BRANCH_COND' uprobes: Teach copy_insn() to support tmpfs uprobes: Shift ->readpage check from __copy_insn() to uprobe_register() perf/x86: Use common PMU interrupt disabled code perf/ARM: Use common PMU interrupt disabled code perf: Disable sampled events if no PMU interrupt perf: Fix use after free in perf_remove_from_context() perf tools: Fix 'make help' message error perf record: Fix poll return value propagation perf tools: Move elide bool into perf_hpp_fmt struct perf tools: Remove elide setup for SORT_MODE__MEMORY mode perf tools: Fix "==" into "=" in ui_browser__warning assignment perf tools: Allow overriding sysfs and proc finding with env var perf tools: Consider header files outside perf directory in tags target ...
-rw-r--r--Documentation/kprobes.txt16
-rw-r--r--arch/arm/kernel/perf_event.c2
-rw-r--r--arch/arm/kernel/perf_event_cpu.c8
-rw-r--r--arch/x86/include/asm/asm.h7
-rw-r--r--arch/x86/include/asm/kprobes.h2
-rw-r--r--arch/x86/include/asm/traps.h3
-rw-r--r--arch/x86/include/asm/uprobes.h10
-rw-r--r--arch/x86/kernel/alternative.c3
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c3
-rw-r--r--arch/x86/kernel/cpu/common.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event.c21
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_ibs.c3
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c5
-rw-r--r--arch/x86/kernel/dumpstack.c9
-rw-r--r--arch/x86/kernel/entry_32.S33
-rw-r--r--arch/x86/kernel/entry_64.S21
-rw-r--r--arch/x86/kernel/hw_breakpoint.c5
-rw-r--r--arch/x86/kernel/kprobes/core.c128
-rw-r--r--arch/x86/kernel/kprobes/ftrace.c17
-rw-r--r--arch/x86/kernel/kprobes/opt.c32
-rw-r--r--arch/x86/kernel/kvm.c4
-rw-r--r--arch/x86/kernel/nmi.c18
-rw-r--r--arch/x86/kernel/paravirt.c6
-rw-r--r--arch/x86/kernel/process_64.c7
-rw-r--r--arch/x86/kernel/traps.c145
-rw-r--r--arch/x86/kernel/uprobes.c505
-rw-r--r--arch/x86/lib/thunk_32.S3
-rw-r--r--arch/x86/lib/thunk_64.S3
-rw-r--r--arch/x86/mm/fault.c29
-rw-r--r--fs/exec.c7
-rw-r--r--include/asm-generic/vmlinux.lds.h10
-rw-r--r--include/linux/compiler.h2
-rw-r--r--include/linux/kprobes.h21
-rw-r--r--include/linux/perf_event.h18
-rw-r--r--include/linux/sched.h6
-rw-r--r--include/linux/uprobes.h4
-rw-r--r--include/uapi/linux/perf_event.h12
-rw-r--r--kernel/events/core.c43
-rw-r--r--kernel/events/uprobes.c52
-rw-r--r--kernel/kprobes.c392
-rw-r--r--kernel/notifier.c22
-rw-r--r--kernel/sched/core.c7
-rw-r--r--kernel/trace/trace_event_perf.c5
-rw-r--r--kernel/trace/trace_kprobe.c71
-rw-r--r--kernel/trace/trace_probe.c65
-rw-r--r--kernel/trace/trace_probe.h15
-rw-r--r--kernel/trace/trace_uprobe.c66
-rw-r--r--tools/lib/api/fs/fs.c43
-rw-r--r--tools/perf/Documentation/perf-record.txt3
-rw-r--r--tools/perf/Documentation/perf-report.txt7
-rw-r--r--tools/perf/Documentation/perf-top.txt8
-rw-r--r--tools/perf/Makefile.perf14
-rw-r--r--tools/perf/builtin-annotate.c5
-rw-r--r--tools/perf/builtin-diff.c2
-rw-r--r--tools/perf/builtin-record.c7
-rw-r--r--tools/perf/builtin-report.c210
-rw-r--r--tools/perf/builtin-sched.c2
-rw-r--r--tools/perf/builtin-top.c90
-rw-r--r--tools/perf/config/Makefile3
-rw-r--r--tools/perf/perf.c8
-rw-r--r--tools/perf/tests/builtin-test.c4
-rw-r--r--tools/perf/tests/hists_common.c52
-rw-r--r--tools/perf/tests/hists_common.h32
-rw-r--r--tools/perf/tests/hists_cumulate.c726
-rw-r--r--tools/perf/tests/hists_filter.c39
-rw-r--r--tools/perf/tests/hists_link.c36
-rw-r--r--tools/perf/tests/hists_output.c31
-rw-r--r--tools/perf/tests/tests.h1
-rw-r--r--tools/perf/ui/browser.c2
-rw-r--r--tools/perf/ui/browsers/hists.c73
-rw-r--r--tools/perf/ui/gtk/hists.c33
-rw-r--r--tools/perf/ui/hist.c119
-rw-r--r--tools/perf/ui/stdio/hist.c8
-rw-r--r--tools/perf/util/callchain.c45
-rw-r--r--tools/perf/util/callchain.h11
-rw-r--r--tools/perf/util/hist.c481
-rw-r--r--tools/perf/util/hist.h57
-rw-r--r--tools/perf/util/sort.c107
-rw-r--r--tools/perf/util/sort.h20
-rw-r--r--tools/perf/util/symbol.c11
-rw-r--r--tools/perf/util/symbol.h1
81 files changed, 2979 insertions, 1182 deletions
diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt
index 0cfb00fd86ff..4bbeca8483ed 100644
--- a/Documentation/kprobes.txt
+++ b/Documentation/kprobes.txt
@@ -22,8 +22,9 @@ Appendix B: The kprobes sysctl interface
22 22
23Kprobes enables you to dynamically break into any kernel routine and 23Kprobes enables you to dynamically break into any kernel routine and
24collect debugging and performance information non-disruptively. You 24collect debugging and performance information non-disruptively. You
25can trap at almost any kernel code address, specifying a handler 25can trap at almost any kernel code address(*), specifying a handler
26routine to be invoked when the breakpoint is hit. 26routine to be invoked when the breakpoint is hit.
27(*: some parts of the kernel code can not be trapped, see 1.5 Blacklist)
27 28
28There are currently three types of probes: kprobes, jprobes, and 29There are currently three types of probes: kprobes, jprobes, and
29kretprobes (also called return probes). A kprobe can be inserted 30kretprobes (also called return probes). A kprobe can be inserted
@@ -273,6 +274,19 @@ using one of the following techniques:
273 or 274 or
274- Execute 'sysctl -w debug.kprobes_optimization=n' 275- Execute 'sysctl -w debug.kprobes_optimization=n'
275 276
2771.5 Blacklist
278
279Kprobes can probe most of the kernel except itself. This means
280that there are some functions where kprobes cannot probe. Probing
281(trapping) such functions can cause a recursive trap (e.g. double
282fault) or the nested probe handler may never be called.
283Kprobes manages such functions as a blacklist.
284If you want to add a function into the blacklist, you just need
285to (1) include linux/kprobes.h and (2) use NOKPROBE_SYMBOL() macro
286to specify a blacklisted function.
287Kprobes checks the given probe address against the blacklist and
288rejects registering it, if the given address is in the blacklist.
289
2762. Architectures Supported 2902. Architectures Supported
277 291
278Kprobes, jprobes, and return probes are implemented on the following 292Kprobes, jprobes, and return probes are implemented on the following
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index a6bc431cde70..4238bcba9d60 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -410,7 +410,7 @@ __hw_perf_event_init(struct perf_event *event)
410 */ 410 */
411 hwc->config_base |= (unsigned long)mapping; 411 hwc->config_base |= (unsigned long)mapping;
412 412
413 if (!hwc->sample_period) { 413 if (!is_sampling_event(event)) {
414 /* 414 /*
415 * For non-sampling runs, limit the sample_period to half 415 * For non-sampling runs, limit the sample_period to half
416 * of the counter width. That way, the new counter value 416 * of the counter width. That way, the new counter value
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index a71ae1523620..af9e35e8836f 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -126,8 +126,8 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
126 126
127 irqs = min(pmu_device->num_resources, num_possible_cpus()); 127 irqs = min(pmu_device->num_resources, num_possible_cpus());
128 if (irqs < 1) { 128 if (irqs < 1) {
129 pr_err("no irqs for PMUs defined\n"); 129 printk_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n");
130 return -ENODEV; 130 return 0;
131 } 131 }
132 132
133 irq = platform_get_irq(pmu_device, 0); 133 irq = platform_get_irq(pmu_device, 0);
@@ -191,6 +191,10 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
191 /* Ensure the PMU has sane values out of reset. */ 191 /* Ensure the PMU has sane values out of reset. */
192 if (cpu_pmu->reset) 192 if (cpu_pmu->reset)
193 on_each_cpu(cpu_pmu->reset, cpu_pmu, 1); 193 on_each_cpu(cpu_pmu->reset, cpu_pmu, 1);
194
195 /* If no interrupts available, set the corresponding capability flag */
196 if (!platform_get_irq(cpu_pmu->plat_device, 0))
197 cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
194} 198}
195 199
196/* 200/*
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 4582e8e1cd1a..7730c1c5c83a 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -57,6 +57,12 @@
57 .long (from) - . ; \ 57 .long (from) - . ; \
58 .long (to) - . + 0x7ffffff0 ; \ 58 .long (to) - . + 0x7ffffff0 ; \
59 .popsection 59 .popsection
60
61# define _ASM_NOKPROBE(entry) \
62 .pushsection "_kprobe_blacklist","aw" ; \
63 _ASM_ALIGN ; \
64 _ASM_PTR (entry); \
65 .popsection
60#else 66#else
61# define _ASM_EXTABLE(from,to) \ 67# define _ASM_EXTABLE(from,to) \
62 " .pushsection \"__ex_table\",\"a\"\n" \ 68 " .pushsection \"__ex_table\",\"a\"\n" \
@@ -71,6 +77,7 @@
71 " .long (" #from ") - .\n" \ 77 " .long (" #from ") - .\n" \
72 " .long (" #to ") - . + 0x7ffffff0\n" \ 78 " .long (" #to ") - . + 0x7ffffff0\n" \
73 " .popsection\n" 79 " .popsection\n"
80/* For C file, we already have NOKPROBE_SYMBOL macro */
74#endif 81#endif
75 82
76#endif /* _ASM_X86_ASM_H */ 83#endif /* _ASM_X86_ASM_H */
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 9454c167629f..53cdfb2857ab 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -116,4 +116,6 @@ struct kprobe_ctlblk {
116extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); 116extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
117extern int kprobe_exceptions_notify(struct notifier_block *self, 117extern int kprobe_exceptions_notify(struct notifier_block *self,
118 unsigned long val, void *data); 118 unsigned long val, void *data);
119extern int kprobe_int3_handler(struct pt_regs *regs);
120extern int kprobe_debug_handler(struct pt_regs *regs);
119#endif /* _ASM_X86_KPROBES_H */ 121#endif /* _ASM_X86_KPROBES_H */
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 8ba18842c48e..bc8352e7010a 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -68,7 +68,7 @@ dotraplinkage void do_segment_not_present(struct pt_regs *, long);
68dotraplinkage void do_stack_segment(struct pt_regs *, long); 68dotraplinkage void do_stack_segment(struct pt_regs *, long);
69#ifdef CONFIG_X86_64 69#ifdef CONFIG_X86_64
70dotraplinkage void do_double_fault(struct pt_regs *, long); 70dotraplinkage void do_double_fault(struct pt_regs *, long);
71asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *); 71asmlinkage struct pt_regs *sync_regs(struct pt_regs *);
72#endif 72#endif
73dotraplinkage void do_general_protection(struct pt_regs *, long); 73dotraplinkage void do_general_protection(struct pt_regs *, long);
74dotraplinkage void do_page_fault(struct pt_regs *, unsigned long); 74dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
@@ -103,7 +103,6 @@ static inline int get_si_code(unsigned long condition)
103 103
104extern int panic_on_unrecovered_nmi; 104extern int panic_on_unrecovered_nmi;
105 105
106void math_error(struct pt_regs *, int, int);
107void math_emulate(struct math_emu_info *); 106void math_emulate(struct math_emu_info *);
108#ifndef CONFIG_X86_32 107#ifndef CONFIG_X86_32
109asmlinkage void smp_thermal_interrupt(void); 108asmlinkage void smp_thermal_interrupt(void);
diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
index 93bee7b93854..74f4c2ff6427 100644
--- a/arch/x86/include/asm/uprobes.h
+++ b/arch/x86/include/asm/uprobes.h
@@ -41,18 +41,18 @@ struct arch_uprobe {
41 u8 ixol[MAX_UINSN_BYTES]; 41 u8 ixol[MAX_UINSN_BYTES];
42 }; 42 };
43 43
44 u16 fixups;
45 const struct uprobe_xol_ops *ops; 44 const struct uprobe_xol_ops *ops;
46 45
47 union { 46 union {
48#ifdef CONFIG_X86_64
49 unsigned long rip_rela_target_address;
50#endif
51 struct { 47 struct {
52 s32 offs; 48 s32 offs;
53 u8 ilen; 49 u8 ilen;
54 u8 opc1; 50 u8 opc1;
55 } branch; 51 } branch;
52 struct {
53 u8 fixups;
54 u8 ilen;
55 } defparam;
56 }; 56 };
57}; 57};
58 58
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index df94598ad05a..703130f469ec 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -5,7 +5,6 @@
5#include <linux/mutex.h> 5#include <linux/mutex.h>
6#include <linux/list.h> 6#include <linux/list.h>
7#include <linux/stringify.h> 7#include <linux/stringify.h>
8#include <linux/kprobes.h>
9#include <linux/mm.h> 8#include <linux/mm.h>
10#include <linux/vmalloc.h> 9#include <linux/vmalloc.h>
11#include <linux/memory.h> 10#include <linux/memory.h>
@@ -551,7 +550,7 @@ void *__init_or_module text_poke_early(void *addr, const void *opcode,
551 * 550 *
552 * Note: Must be called under text_mutex. 551 * Note: Must be called under text_mutex.
553 */ 552 */
554void *__kprobes text_poke(void *addr, const void *opcode, size_t len) 553void *text_poke(void *addr, const void *opcode, size_t len)
555{ 554{
556 unsigned long flags; 555 unsigned long flags;
557 char *vaddr; 556 char *vaddr;
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index eab67047dec3..c3fcb5de5083 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -60,7 +60,7 @@ void arch_trigger_all_cpu_backtrace(void)
60 smp_mb__after_atomic(); 60 smp_mb__after_atomic();
61} 61}
62 62
63static int __kprobes 63static int
64arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs) 64arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs)
65{ 65{
66 int cpu; 66 int cpu;
@@ -80,6 +80,7 @@ arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs)
80 80
81 return NMI_DONE; 81 return NMI_DONE;
82} 82}
83NOKPROBE_SYMBOL(arch_trigger_all_cpu_backtrace_handler);
83 84
84static int __init register_trigger_all_cpu_backtrace(void) 85static int __init register_trigger_all_cpu_backtrace(void)
85{ 86{
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 2cbbf88d8f2c..ef1b93f18ed1 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -8,6 +8,7 @@
8#include <linux/delay.h> 8#include <linux/delay.h>
9#include <linux/sched.h> 9#include <linux/sched.h>
10#include <linux/init.h> 10#include <linux/init.h>
11#include <linux/kprobes.h>
11#include <linux/kgdb.h> 12#include <linux/kgdb.h>
12#include <linux/smp.h> 13#include <linux/smp.h>
13#include <linux/io.h> 14#include <linux/io.h>
@@ -1193,6 +1194,7 @@ int is_debug_stack(unsigned long addr)
1193 (addr <= __get_cpu_var(debug_stack_addr) && 1194 (addr <= __get_cpu_var(debug_stack_addr) &&
1194 addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ)); 1195 addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ));
1195} 1196}
1197NOKPROBE_SYMBOL(is_debug_stack);
1196 1198
1197DEFINE_PER_CPU(u32, debug_idt_ctr); 1199DEFINE_PER_CPU(u32, debug_idt_ctr);
1198 1200
@@ -1201,6 +1203,7 @@ void debug_stack_set_zero(void)
1201 this_cpu_inc(debug_idt_ctr); 1203 this_cpu_inc(debug_idt_ctr);
1202 load_current_idt(); 1204 load_current_idt();
1203} 1205}
1206NOKPROBE_SYMBOL(debug_stack_set_zero);
1204 1207
1205void debug_stack_reset(void) 1208void debug_stack_reset(void)
1206{ 1209{
@@ -1209,6 +1212,7 @@ void debug_stack_reset(void)
1209 if (this_cpu_dec_return(debug_idt_ctr) == 0) 1212 if (this_cpu_dec_return(debug_idt_ctr) == 0)
1210 load_current_idt(); 1213 load_current_idt();
1211} 1214}
1215NOKPROBE_SYMBOL(debug_stack_reset);
1212 1216
1213#else /* CONFIG_X86_64 */ 1217#else /* CONFIG_X86_64 */
1214 1218
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 89f3b7c1af20..2bdfbff8a4f6 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -303,15 +303,6 @@ int x86_setup_perfctr(struct perf_event *event)
303 hwc->sample_period = x86_pmu.max_period; 303 hwc->sample_period = x86_pmu.max_period;
304 hwc->last_period = hwc->sample_period; 304 hwc->last_period = hwc->sample_period;
305 local64_set(&hwc->period_left, hwc->sample_period); 305 local64_set(&hwc->period_left, hwc->sample_period);
306 } else {
307 /*
308 * If we have a PMU initialized but no APIC
309 * interrupts, we cannot sample hardware
310 * events (user-space has to fall back and
311 * sample via a hrtimer based software event):
312 */
313 if (!x86_pmu.apic)
314 return -EOPNOTSUPP;
315 } 306 }
316 307
317 if (attr->type == PERF_TYPE_RAW) 308 if (attr->type == PERF_TYPE_RAW)
@@ -1293,7 +1284,7 @@ void perf_events_lapic_init(void)
1293 apic_write(APIC_LVTPC, APIC_DM_NMI); 1284 apic_write(APIC_LVTPC, APIC_DM_NMI);
1294} 1285}
1295 1286
1296static int __kprobes 1287static int
1297perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs) 1288perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
1298{ 1289{
1299 u64 start_clock; 1290 u64 start_clock;
@@ -1311,6 +1302,7 @@ perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
1311 1302
1312 return ret; 1303 return ret;
1313} 1304}
1305NOKPROBE_SYMBOL(perf_event_nmi_handler);
1314 1306
1315struct event_constraint emptyconstraint; 1307struct event_constraint emptyconstraint;
1316struct event_constraint unconstrained; 1308struct event_constraint unconstrained;
@@ -1366,6 +1358,15 @@ static void __init pmu_check_apic(void)
1366 x86_pmu.apic = 0; 1358 x86_pmu.apic = 0;
1367 pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n"); 1359 pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
1368 pr_info("no hardware sampling interrupt available.\n"); 1360 pr_info("no hardware sampling interrupt available.\n");
1361
1362 /*
1363 * If we have a PMU initialized but no APIC
1364 * interrupts, we cannot sample hardware
1365 * events (user-space has to fall back and
1366 * sample via a hrtimer based software event):
1367 */
1368 pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
1369
1369} 1370}
1370 1371
1371static struct attribute_group x86_pmu_format_group = { 1372static struct attribute_group x86_pmu_format_group = {
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index 4c36bbe3173a..cbb1be3ed9e4 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -593,7 +593,7 @@ out:
593 return 1; 593 return 1;
594} 594}
595 595
596static int __kprobes 596static int
597perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs) 597perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs)
598{ 598{
599 int handled = 0; 599 int handled = 0;
@@ -606,6 +606,7 @@ perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs)
606 606
607 return handled; 607 return handled;
608} 608}
609NOKPROBE_SYMBOL(perf_ibs_nmi_handler);
609 610
610static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name) 611static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
611{ 612{
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index d82d155aca8c..9dd2459a4c73 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -384,6 +384,9 @@ static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
384 if (br_type & PERF_SAMPLE_BRANCH_NO_TX) 384 if (br_type & PERF_SAMPLE_BRANCH_NO_TX)
385 mask |= X86_BR_NO_TX; 385 mask |= X86_BR_NO_TX;
386 386
387 if (br_type & PERF_SAMPLE_BRANCH_COND)
388 mask |= X86_BR_JCC;
389
387 /* 390 /*
388 * stash actual user request into reg, it may 391 * stash actual user request into reg, it may
389 * be used by fixup code for some CPU 392 * be used by fixup code for some CPU
@@ -678,6 +681,7 @@ static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
678 * NHM/WSM erratum: must include IND_JMP to capture IND_CALL 681 * NHM/WSM erratum: must include IND_JMP to capture IND_CALL
679 */ 682 */
680 [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP, 683 [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP,
684 [PERF_SAMPLE_BRANCH_COND] = LBR_JCC,
681}; 685};
682 686
683static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { 687static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
@@ -689,6 +693,7 @@ static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
689 [PERF_SAMPLE_BRANCH_ANY_CALL] = LBR_REL_CALL | LBR_IND_CALL 693 [PERF_SAMPLE_BRANCH_ANY_CALL] = LBR_REL_CALL | LBR_IND_CALL
690 | LBR_FAR, 694 | LBR_FAR,
691 [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL, 695 [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL,
696 [PERF_SAMPLE_BRANCH_COND] = LBR_JCC,
692}; 697};
693 698
694/* core */ 699/* core */
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index d9c12d3022a7..b74ebc7c4402 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -200,7 +200,7 @@ static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
200static int die_owner = -1; 200static int die_owner = -1;
201static unsigned int die_nest_count; 201static unsigned int die_nest_count;
202 202
203unsigned __kprobes long oops_begin(void) 203unsigned long oops_begin(void)
204{ 204{
205 int cpu; 205 int cpu;
206 unsigned long flags; 206 unsigned long flags;
@@ -223,8 +223,9 @@ unsigned __kprobes long oops_begin(void)
223 return flags; 223 return flags;
224} 224}
225EXPORT_SYMBOL_GPL(oops_begin); 225EXPORT_SYMBOL_GPL(oops_begin);
226NOKPROBE_SYMBOL(oops_begin);
226 227
227void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) 228void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
228{ 229{
229 if (regs && kexec_should_crash(current)) 230 if (regs && kexec_should_crash(current))
230 crash_kexec(regs); 231 crash_kexec(regs);
@@ -247,8 +248,9 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
247 panic("Fatal exception"); 248 panic("Fatal exception");
248 do_exit(signr); 249 do_exit(signr);
249} 250}
251NOKPROBE_SYMBOL(oops_end);
250 252
251int __kprobes __die(const char *str, struct pt_regs *regs, long err) 253int __die(const char *str, struct pt_regs *regs, long err)
252{ 254{
253#ifdef CONFIG_X86_32 255#ifdef CONFIG_X86_32
254 unsigned short ss; 256 unsigned short ss;
@@ -291,6 +293,7 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
291#endif 293#endif
292 return 0; 294 return 0;
293} 295}
296NOKPROBE_SYMBOL(__die);
294 297
295/* 298/*
296 * This is gone through when something in the kernel has done something bad 299 * This is gone through when something in the kernel has done something bad
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 98313ffaae6a..f0da82b8e634 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -315,10 +315,6 @@ ENTRY(ret_from_kernel_thread)
315ENDPROC(ret_from_kernel_thread) 315ENDPROC(ret_from_kernel_thread)
316 316
317/* 317/*
318 * Interrupt exit functions should be protected against kprobes
319 */
320 .pushsection .kprobes.text, "ax"
321/*
322 * Return to user mode is not as complex as all this looks, 318 * Return to user mode is not as complex as all this looks,
323 * but we want the default path for a system call return to 319 * but we want the default path for a system call return to
324 * go as quickly as possible which is why some of this is 320 * go as quickly as possible which is why some of this is
@@ -372,10 +368,6 @@ need_resched:
372END(resume_kernel) 368END(resume_kernel)
373#endif 369#endif
374 CFI_ENDPROC 370 CFI_ENDPROC
375/*
376 * End of kprobes section
377 */
378 .popsection
379 371
380/* SYSENTER_RETURN points to after the "sysenter" instruction in 372/* SYSENTER_RETURN points to after the "sysenter" instruction in
381 the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ 373 the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */
@@ -495,10 +487,6 @@ sysexit_audit:
495 PTGS_TO_GS_EX 487 PTGS_TO_GS_EX
496ENDPROC(ia32_sysenter_target) 488ENDPROC(ia32_sysenter_target)
497 489
498/*
499 * syscall stub including irq exit should be protected against kprobes
500 */
501 .pushsection .kprobes.text, "ax"
502 # system call handler stub 490 # system call handler stub
503ENTRY(system_call) 491ENTRY(system_call)
504 RING0_INT_FRAME # can't unwind into user space anyway 492 RING0_INT_FRAME # can't unwind into user space anyway
@@ -690,10 +678,6 @@ syscall_badsys:
690 jmp resume_userspace 678 jmp resume_userspace
691END(syscall_badsys) 679END(syscall_badsys)
692 CFI_ENDPROC 680 CFI_ENDPROC
693/*
694 * End of kprobes section
695 */
696 .popsection
697 681
698.macro FIXUP_ESPFIX_STACK 682.macro FIXUP_ESPFIX_STACK
699/* 683/*
@@ -784,10 +768,6 @@ common_interrupt:
784ENDPROC(common_interrupt) 768ENDPROC(common_interrupt)
785 CFI_ENDPROC 769 CFI_ENDPROC
786 770
787/*
788 * Irq entries should be protected against kprobes
789 */
790 .pushsection .kprobes.text, "ax"
791#define BUILD_INTERRUPT3(name, nr, fn) \ 771#define BUILD_INTERRUPT3(name, nr, fn) \
792ENTRY(name) \ 772ENTRY(name) \
793 RING0_INT_FRAME; \ 773 RING0_INT_FRAME; \
@@ -964,10 +944,6 @@ ENTRY(spurious_interrupt_bug)
964 jmp error_code 944 jmp error_code
965 CFI_ENDPROC 945 CFI_ENDPROC
966END(spurious_interrupt_bug) 946END(spurious_interrupt_bug)
967/*
968 * End of kprobes section
969 */
970 .popsection
971 947
972#ifdef CONFIG_XEN 948#ifdef CONFIG_XEN
973/* Xen doesn't set %esp to be precisely what the normal sysenter 949/* Xen doesn't set %esp to be precisely what the normal sysenter
@@ -1242,11 +1218,6 @@ return_to_handler:
1242 jmp *%ecx 1218 jmp *%ecx
1243#endif 1219#endif
1244 1220
1245/*
1246 * Some functions should be protected against kprobes
1247 */
1248 .pushsection .kprobes.text, "ax"
1249
1250#ifdef CONFIG_TRACING 1221#ifdef CONFIG_TRACING
1251ENTRY(trace_page_fault) 1222ENTRY(trace_page_fault)
1252 RING0_EC_FRAME 1223 RING0_EC_FRAME
@@ -1460,7 +1431,3 @@ ENTRY(async_page_fault)
1460END(async_page_fault) 1431END(async_page_fault)
1461#endif 1432#endif
1462 1433
1463/*
1464 * End of kprobes section
1465 */
1466 .popsection
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 48a2644a082a..b25ca969edd2 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -284,8 +284,6 @@ ENDPROC(native_usergs_sysret64)
284 TRACE_IRQS_OFF 284 TRACE_IRQS_OFF
285 .endm 285 .endm
286 286
287/* save complete stack frame */
288 .pushsection .kprobes.text, "ax"
289ENTRY(save_paranoid) 287ENTRY(save_paranoid)
290 XCPT_FRAME 1 RDI+8 288 XCPT_FRAME 1 RDI+8
291 cld 289 cld
@@ -314,7 +312,6 @@ ENTRY(save_paranoid)
3141: ret 3121: ret
315 CFI_ENDPROC 313 CFI_ENDPROC
316END(save_paranoid) 314END(save_paranoid)
317 .popsection
318 315
319/* 316/*
320 * A newly forked process directly context switches into this address. 317 * A newly forked process directly context switches into this address.
@@ -772,10 +769,6 @@ END(interrupt)
772 call \func 769 call \func
773 .endm 770 .endm
774 771
775/*
776 * Interrupt entry/exit should be protected against kprobes
777 */
778 .pushsection .kprobes.text, "ax"
779 /* 772 /*
780 * The interrupt stubs push (~vector+0x80) onto the stack and 773 * The interrupt stubs push (~vector+0x80) onto the stack and
781 * then jump to common_interrupt. 774 * then jump to common_interrupt.
@@ -983,11 +976,6 @@ END(__do_double_fault)
983#endif 976#endif
984 977
985/* 978/*
986 * End of kprobes section
987 */
988 .popsection
989
990/*
991 * APIC interrupts. 979 * APIC interrupts.
992 */ 980 */
993.macro apicinterrupt3 num sym do_sym 981.macro apicinterrupt3 num sym do_sym
@@ -1321,11 +1309,6 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
1321 hyperv_callback_vector hyperv_vector_handler 1309 hyperv_callback_vector hyperv_vector_handler
1322#endif /* CONFIG_HYPERV */ 1310#endif /* CONFIG_HYPERV */
1323 1311
1324/*
1325 * Some functions should be protected against kprobes
1326 */
1327 .pushsection .kprobes.text, "ax"
1328
1329idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK 1312idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
1330idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK 1313idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
1331idtentry stack_segment do_stack_segment has_error_code=1 paranoid=1 1314idtentry stack_segment do_stack_segment has_error_code=1 paranoid=1
@@ -1742,7 +1725,3 @@ ENTRY(ignore_sysret)
1742 CFI_ENDPROC 1725 CFI_ENDPROC
1743END(ignore_sysret) 1726END(ignore_sysret)
1744 1727
1745/*
1746 * End of kprobes section
1747 */
1748 .popsection
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index a67b47c31314..5f9cf20cdb68 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -32,7 +32,6 @@
32#include <linux/irqflags.h> 32#include <linux/irqflags.h>
33#include <linux/notifier.h> 33#include <linux/notifier.h>
34#include <linux/kallsyms.h> 34#include <linux/kallsyms.h>
35#include <linux/kprobes.h>
36#include <linux/percpu.h> 35#include <linux/percpu.h>
37#include <linux/kdebug.h> 36#include <linux/kdebug.h>
38#include <linux/kernel.h> 37#include <linux/kernel.h>
@@ -424,7 +423,7 @@ EXPORT_SYMBOL_GPL(hw_breakpoint_restore);
424 * NOTIFY_STOP returned for all other cases 423 * NOTIFY_STOP returned for all other cases
425 * 424 *
426 */ 425 */
427static int __kprobes hw_breakpoint_handler(struct die_args *args) 426static int hw_breakpoint_handler(struct die_args *args)
428{ 427{
429 int i, cpu, rc = NOTIFY_STOP; 428 int i, cpu, rc = NOTIFY_STOP;
430 struct perf_event *bp; 429 struct perf_event *bp;
@@ -511,7 +510,7 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
511/* 510/*
512 * Handle debug exception notifications. 511 * Handle debug exception notifications.
513 */ 512 */
514int __kprobes hw_breakpoint_exceptions_notify( 513int hw_breakpoint_exceptions_notify(
515 struct notifier_block *unused, unsigned long val, void *data) 514 struct notifier_block *unused, unsigned long val, void *data)
516{ 515{
517 if (val != DIE_DEBUG) 516 if (val != DIE_DEBUG)
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 61b17dc2c277..7596df664901 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -112,7 +112,8 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = {
112 112
113const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); 113const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist);
114 114
115static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op) 115static nokprobe_inline void
116__synthesize_relative_insn(void *from, void *to, u8 op)
116{ 117{
117 struct __arch_relative_insn { 118 struct __arch_relative_insn {
118 u8 op; 119 u8 op;
@@ -125,21 +126,23 @@ static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op)
125} 126}
126 127
127/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ 128/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/
128void __kprobes synthesize_reljump(void *from, void *to) 129void synthesize_reljump(void *from, void *to)
129{ 130{
130 __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE); 131 __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE);
131} 132}
133NOKPROBE_SYMBOL(synthesize_reljump);
132 134
133/* Insert a call instruction at address 'from', which calls address 'to'.*/ 135/* Insert a call instruction at address 'from', which calls address 'to'.*/
134void __kprobes synthesize_relcall(void *from, void *to) 136void synthesize_relcall(void *from, void *to)
135{ 137{
136 __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE); 138 __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE);
137} 139}
140NOKPROBE_SYMBOL(synthesize_relcall);
138 141
139/* 142/*
140 * Skip the prefixes of the instruction. 143 * Skip the prefixes of the instruction.
141 */ 144 */
142static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn) 145static kprobe_opcode_t *skip_prefixes(kprobe_opcode_t *insn)
143{ 146{
144 insn_attr_t attr; 147 insn_attr_t attr;
145 148
@@ -154,12 +157,13 @@ static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn)
154#endif 157#endif
155 return insn; 158 return insn;
156} 159}
160NOKPROBE_SYMBOL(skip_prefixes);
157 161
158/* 162/*
159 * Returns non-zero if opcode is boostable. 163 * Returns non-zero if opcode is boostable.
160 * RIP relative instructions are adjusted at copying time in 64 bits mode 164 * RIP relative instructions are adjusted at copying time in 64 bits mode
161 */ 165 */
162int __kprobes can_boost(kprobe_opcode_t *opcodes) 166int can_boost(kprobe_opcode_t *opcodes)
163{ 167{
164 kprobe_opcode_t opcode; 168 kprobe_opcode_t opcode;
165 kprobe_opcode_t *orig_opcodes = opcodes; 169 kprobe_opcode_t *orig_opcodes = opcodes;
@@ -260,7 +264,7 @@ unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long add
260} 264}
261 265
262/* Check if paddr is at an instruction boundary */ 266/* Check if paddr is at an instruction boundary */
263static int __kprobes can_probe(unsigned long paddr) 267static int can_probe(unsigned long paddr)
264{ 268{
265 unsigned long addr, __addr, offset = 0; 269 unsigned long addr, __addr, offset = 0;
266 struct insn insn; 270 struct insn insn;
@@ -299,7 +303,7 @@ static int __kprobes can_probe(unsigned long paddr)
299/* 303/*
300 * Returns non-zero if opcode modifies the interrupt flag. 304 * Returns non-zero if opcode modifies the interrupt flag.
301 */ 305 */
302static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) 306static int is_IF_modifier(kprobe_opcode_t *insn)
303{ 307{
304 /* Skip prefixes */ 308 /* Skip prefixes */
305 insn = skip_prefixes(insn); 309 insn = skip_prefixes(insn);
@@ -322,7 +326,7 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
322 * If not, return null. 326 * If not, return null.
323 * Only applicable to 64-bit x86. 327 * Only applicable to 64-bit x86.
324 */ 328 */
325int __kprobes __copy_instruction(u8 *dest, u8 *src) 329int __copy_instruction(u8 *dest, u8 *src)
326{ 330{
327 struct insn insn; 331 struct insn insn;
328 kprobe_opcode_t buf[MAX_INSN_SIZE]; 332 kprobe_opcode_t buf[MAX_INSN_SIZE];
@@ -365,7 +369,7 @@ int __kprobes __copy_instruction(u8 *dest, u8 *src)
365 return insn.length; 369 return insn.length;
366} 370}
367 371
368static int __kprobes arch_copy_kprobe(struct kprobe *p) 372static int arch_copy_kprobe(struct kprobe *p)
369{ 373{
370 int ret; 374 int ret;
371 375
@@ -392,7 +396,7 @@ static int __kprobes arch_copy_kprobe(struct kprobe *p)
392 return 0; 396 return 0;
393} 397}
394 398
395int __kprobes arch_prepare_kprobe(struct kprobe *p) 399int arch_prepare_kprobe(struct kprobe *p)
396{ 400{
397 if (alternatives_text_reserved(p->addr, p->addr)) 401 if (alternatives_text_reserved(p->addr, p->addr))
398 return -EINVAL; 402 return -EINVAL;
@@ -407,17 +411,17 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
407 return arch_copy_kprobe(p); 411 return arch_copy_kprobe(p);
408} 412}
409 413
410void __kprobes arch_arm_kprobe(struct kprobe *p) 414void arch_arm_kprobe(struct kprobe *p)
411{ 415{
412 text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1); 416 text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1);
413} 417}
414 418
415void __kprobes arch_disarm_kprobe(struct kprobe *p) 419void arch_disarm_kprobe(struct kprobe *p)
416{ 420{
417 text_poke(p->addr, &p->opcode, 1); 421 text_poke(p->addr, &p->opcode, 1);
418} 422}
419 423
420void __kprobes arch_remove_kprobe(struct kprobe *p) 424void arch_remove_kprobe(struct kprobe *p)
421{ 425{
422 if (p->ainsn.insn) { 426 if (p->ainsn.insn) {
423 free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1)); 427 free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1));
@@ -425,7 +429,8 @@ void __kprobes arch_remove_kprobe(struct kprobe *p)
425 } 429 }
426} 430}
427 431
428static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) 432static nokprobe_inline void
433save_previous_kprobe(struct kprobe_ctlblk *kcb)
429{ 434{
430 kcb->prev_kprobe.kp = kprobe_running(); 435 kcb->prev_kprobe.kp = kprobe_running();
431 kcb->prev_kprobe.status = kcb->kprobe_status; 436 kcb->prev_kprobe.status = kcb->kprobe_status;
@@ -433,7 +438,8 @@ static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
433 kcb->prev_kprobe.saved_flags = kcb->kprobe_saved_flags; 438 kcb->prev_kprobe.saved_flags = kcb->kprobe_saved_flags;
434} 439}
435 440
436static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) 441static nokprobe_inline void
442restore_previous_kprobe(struct kprobe_ctlblk *kcb)
437{ 443{
438 __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); 444 __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
439 kcb->kprobe_status = kcb->prev_kprobe.status; 445 kcb->kprobe_status = kcb->prev_kprobe.status;
@@ -441,8 +447,9 @@ static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
441 kcb->kprobe_saved_flags = kcb->prev_kprobe.saved_flags; 447 kcb->kprobe_saved_flags = kcb->prev_kprobe.saved_flags;
442} 448}
443 449
444static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, 450static nokprobe_inline void
445 struct kprobe_ctlblk *kcb) 451set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
452 struct kprobe_ctlblk *kcb)
446{ 453{
447 __this_cpu_write(current_kprobe, p); 454 __this_cpu_write(current_kprobe, p);
448 kcb->kprobe_saved_flags = kcb->kprobe_old_flags 455 kcb->kprobe_saved_flags = kcb->kprobe_old_flags
@@ -451,7 +458,7 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
451 kcb->kprobe_saved_flags &= ~X86_EFLAGS_IF; 458 kcb->kprobe_saved_flags &= ~X86_EFLAGS_IF;
452} 459}
453 460
454static void __kprobes clear_btf(void) 461static nokprobe_inline void clear_btf(void)
455{ 462{
456 if (test_thread_flag(TIF_BLOCKSTEP)) { 463 if (test_thread_flag(TIF_BLOCKSTEP)) {
457 unsigned long debugctl = get_debugctlmsr(); 464 unsigned long debugctl = get_debugctlmsr();
@@ -461,7 +468,7 @@ static void __kprobes clear_btf(void)
461 } 468 }
462} 469}
463 470
464static void __kprobes restore_btf(void) 471static nokprobe_inline void restore_btf(void)
465{ 472{
466 if (test_thread_flag(TIF_BLOCKSTEP)) { 473 if (test_thread_flag(TIF_BLOCKSTEP)) {
467 unsigned long debugctl = get_debugctlmsr(); 474 unsigned long debugctl = get_debugctlmsr();
@@ -471,8 +478,7 @@ static void __kprobes restore_btf(void)
471 } 478 }
472} 479}
473 480
474void __kprobes 481void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
475arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
476{ 482{
477 unsigned long *sara = stack_addr(regs); 483 unsigned long *sara = stack_addr(regs);
478 484
@@ -481,9 +487,10 @@ arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
481 /* Replace the return addr with trampoline addr */ 487 /* Replace the return addr with trampoline addr */
482 *sara = (unsigned long) &kretprobe_trampoline; 488 *sara = (unsigned long) &kretprobe_trampoline;
483} 489}
490NOKPROBE_SYMBOL(arch_prepare_kretprobe);
484 491
485static void __kprobes 492static void setup_singlestep(struct kprobe *p, struct pt_regs *regs,
486setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb, int reenter) 493 struct kprobe_ctlblk *kcb, int reenter)
487{ 494{
488 if (setup_detour_execution(p, regs, reenter)) 495 if (setup_detour_execution(p, regs, reenter))
489 return; 496 return;
@@ -519,22 +526,24 @@ setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *k
519 else 526 else
520 regs->ip = (unsigned long)p->ainsn.insn; 527 regs->ip = (unsigned long)p->ainsn.insn;
521} 528}
529NOKPROBE_SYMBOL(setup_singlestep);
522 530
523/* 531/*
524 * We have reentered the kprobe_handler(), since another probe was hit while 532 * We have reentered the kprobe_handler(), since another probe was hit while
525 * within the handler. We save the original kprobes variables and just single 533 * within the handler. We save the original kprobes variables and just single
526 * step on the instruction of the new probe without calling any user handlers. 534 * step on the instruction of the new probe without calling any user handlers.
527 */ 535 */
528static int __kprobes 536static int reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
529reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) 537 struct kprobe_ctlblk *kcb)
530{ 538{
531 switch (kcb->kprobe_status) { 539 switch (kcb->kprobe_status) {
532 case KPROBE_HIT_SSDONE: 540 case KPROBE_HIT_SSDONE:
533 case KPROBE_HIT_ACTIVE: 541 case KPROBE_HIT_ACTIVE:
542 case KPROBE_HIT_SS:
534 kprobes_inc_nmissed_count(p); 543 kprobes_inc_nmissed_count(p);
535 setup_singlestep(p, regs, kcb, 1); 544 setup_singlestep(p, regs, kcb, 1);
536 break; 545 break;
537 case KPROBE_HIT_SS: 546 case KPROBE_REENTER:
538 /* A probe has been hit in the codepath leading up to, or just 547 /* A probe has been hit in the codepath leading up to, or just
539 * after, single-stepping of a probed instruction. This entire 548 * after, single-stepping of a probed instruction. This entire
540 * codepath should strictly reside in .kprobes.text section. 549 * codepath should strictly reside in .kprobes.text section.
@@ -553,12 +562,13 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb
553 562
554 return 1; 563 return 1;
555} 564}
565NOKPROBE_SYMBOL(reenter_kprobe);
556 566
557/* 567/*
558 * Interrupts are disabled on entry as trap3 is an interrupt gate and they 568 * Interrupts are disabled on entry as trap3 is an interrupt gate and they
559 * remain disabled throughout this function. 569 * remain disabled throughout this function.
560 */ 570 */
561static int __kprobes kprobe_handler(struct pt_regs *regs) 571int kprobe_int3_handler(struct pt_regs *regs)
562{ 572{
563 kprobe_opcode_t *addr; 573 kprobe_opcode_t *addr;
564 struct kprobe *p; 574 struct kprobe *p;
@@ -621,12 +631,13 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
621 preempt_enable_no_resched(); 631 preempt_enable_no_resched();
622 return 0; 632 return 0;
623} 633}
634NOKPROBE_SYMBOL(kprobe_int3_handler);
624 635
625/* 636/*
626 * When a retprobed function returns, this code saves registers and 637 * When a retprobed function returns, this code saves registers and
627 * calls trampoline_handler() runs, which calls the kretprobe's handler. 638 * calls trampoline_handler() runs, which calls the kretprobe's handler.
628 */ 639 */
629static void __used __kprobes kretprobe_trampoline_holder(void) 640static void __used kretprobe_trampoline_holder(void)
630{ 641{
631 asm volatile ( 642 asm volatile (
632 ".global kretprobe_trampoline\n" 643 ".global kretprobe_trampoline\n"
@@ -657,11 +668,13 @@ static void __used __kprobes kretprobe_trampoline_holder(void)
657#endif 668#endif
658 " ret\n"); 669 " ret\n");
659} 670}
671NOKPROBE_SYMBOL(kretprobe_trampoline_holder);
672NOKPROBE_SYMBOL(kretprobe_trampoline);
660 673
661/* 674/*
662 * Called from kretprobe_trampoline 675 * Called from kretprobe_trampoline
663 */ 676 */
664__visible __used __kprobes void *trampoline_handler(struct pt_regs *regs) 677__visible __used void *trampoline_handler(struct pt_regs *regs)
665{ 678{
666 struct kretprobe_instance *ri = NULL; 679 struct kretprobe_instance *ri = NULL;
667 struct hlist_head *head, empty_rp; 680 struct hlist_head *head, empty_rp;
@@ -747,6 +760,7 @@ __visible __used __kprobes void *trampoline_handler(struct pt_regs *regs)
747 } 760 }
748 return (void *)orig_ret_address; 761 return (void *)orig_ret_address;
749} 762}
763NOKPROBE_SYMBOL(trampoline_handler);
750 764
751/* 765/*
752 * Called after single-stepping. p->addr is the address of the 766 * Called after single-stepping. p->addr is the address of the
@@ -775,8 +789,8 @@ __visible __used __kprobes void *trampoline_handler(struct pt_regs *regs)
775 * jump instruction after the copied instruction, that jumps to the next 789 * jump instruction after the copied instruction, that jumps to the next
776 * instruction after the probepoint. 790 * instruction after the probepoint.
777 */ 791 */
778static void __kprobes 792static void resume_execution(struct kprobe *p, struct pt_regs *regs,
779resume_execution(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) 793 struct kprobe_ctlblk *kcb)
780{ 794{
781 unsigned long *tos = stack_addr(regs); 795 unsigned long *tos = stack_addr(regs);
782 unsigned long copy_ip = (unsigned long)p->ainsn.insn; 796 unsigned long copy_ip = (unsigned long)p->ainsn.insn;
@@ -851,12 +865,13 @@ resume_execution(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *k
851no_change: 865no_change:
852 restore_btf(); 866 restore_btf();
853} 867}
868NOKPROBE_SYMBOL(resume_execution);
854 869
855/* 870/*
856 * Interrupts are disabled on entry as trap1 is an interrupt gate and they 871 * Interrupts are disabled on entry as trap1 is an interrupt gate and they
857 * remain disabled throughout this function. 872 * remain disabled throughout this function.
858 */ 873 */
859static int __kprobes post_kprobe_handler(struct pt_regs *regs) 874int kprobe_debug_handler(struct pt_regs *regs)
860{ 875{
861 struct kprobe *cur = kprobe_running(); 876 struct kprobe *cur = kprobe_running();
862 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 877 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
@@ -891,8 +906,9 @@ out:
891 906
892 return 1; 907 return 1;
893} 908}
909NOKPROBE_SYMBOL(kprobe_debug_handler);
894 910
895int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) 911int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
896{ 912{
897 struct kprobe *cur = kprobe_running(); 913 struct kprobe *cur = kprobe_running();
898 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 914 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
@@ -949,12 +965,13 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
949 965
950 return 0; 966 return 0;
951} 967}
968NOKPROBE_SYMBOL(kprobe_fault_handler);
952 969
953/* 970/*
954 * Wrapper routine for handling exceptions. 971 * Wrapper routine for handling exceptions.
955 */ 972 */
956int __kprobes 973int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val,
957kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data) 974 void *data)
958{ 975{
959 struct die_args *args = data; 976 struct die_args *args = data;
960 int ret = NOTIFY_DONE; 977 int ret = NOTIFY_DONE;
@@ -962,22 +979,7 @@ kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *d
962 if (args->regs && user_mode_vm(args->regs)) 979 if (args->regs && user_mode_vm(args->regs))
963 return ret; 980 return ret;
964 981
965 switch (val) { 982 if (val == DIE_GPF) {
966 case DIE_INT3:
967 if (kprobe_handler(args->regs))
968 ret = NOTIFY_STOP;
969 break;
970 case DIE_DEBUG:
971 if (post_kprobe_handler(args->regs)) {
972 /*
973 * Reset the BS bit in dr6 (pointed by args->err) to
974 * denote completion of processing
975 */
976 (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
977 ret = NOTIFY_STOP;
978 }
979 break;
980 case DIE_GPF:
981 /* 983 /*
982 * To be potentially processing a kprobe fault and to 984 * To be potentially processing a kprobe fault and to
983 * trust the result from kprobe_running(), we have 985 * trust the result from kprobe_running(), we have
@@ -986,14 +988,12 @@ kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *d
986 if (!preemptible() && kprobe_running() && 988 if (!preemptible() && kprobe_running() &&
987 kprobe_fault_handler(args->regs, args->trapnr)) 989 kprobe_fault_handler(args->regs, args->trapnr))
988 ret = NOTIFY_STOP; 990 ret = NOTIFY_STOP;
989 break;
990 default:
991 break;
992 } 991 }
993 return ret; 992 return ret;
994} 993}
994NOKPROBE_SYMBOL(kprobe_exceptions_notify);
995 995
996int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) 996int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
997{ 997{
998 struct jprobe *jp = container_of(p, struct jprobe, kp); 998 struct jprobe *jp = container_of(p, struct jprobe, kp);
999 unsigned long addr; 999 unsigned long addr;
@@ -1017,8 +1017,9 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
1017 regs->ip = (unsigned long)(jp->entry); 1017 regs->ip = (unsigned long)(jp->entry);
1018 return 1; 1018 return 1;
1019} 1019}
1020NOKPROBE_SYMBOL(setjmp_pre_handler);
1020 1021
1021void __kprobes jprobe_return(void) 1022void jprobe_return(void)
1022{ 1023{
1023 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 1024 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
1024 1025
@@ -1034,8 +1035,10 @@ void __kprobes jprobe_return(void)
1034 " nop \n"::"b" 1035 " nop \n"::"b"
1035 (kcb->jprobe_saved_sp):"memory"); 1036 (kcb->jprobe_saved_sp):"memory");
1036} 1037}
1038NOKPROBE_SYMBOL(jprobe_return);
1039NOKPROBE_SYMBOL(jprobe_return_end);
1037 1040
1038int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) 1041int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
1039{ 1042{
1040 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 1043 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
1041 u8 *addr = (u8 *) (regs->ip - 1); 1044 u8 *addr = (u8 *) (regs->ip - 1);
@@ -1063,13 +1066,22 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
1063 } 1066 }
1064 return 0; 1067 return 0;
1065} 1068}
1069NOKPROBE_SYMBOL(longjmp_break_handler);
1070
1071bool arch_within_kprobe_blacklist(unsigned long addr)
1072{
1073 return (addr >= (unsigned long)__kprobes_text_start &&
1074 addr < (unsigned long)__kprobes_text_end) ||
1075 (addr >= (unsigned long)__entry_text_start &&
1076 addr < (unsigned long)__entry_text_end);
1077}
1066 1078
1067int __init arch_init_kprobes(void) 1079int __init arch_init_kprobes(void)
1068{ 1080{
1069 return 0; 1081 return 0;
1070} 1082}
1071 1083
1072int __kprobes arch_trampoline_kprobe(struct kprobe *p) 1084int arch_trampoline_kprobe(struct kprobe *p)
1073{ 1085{
1074 return 0; 1086 return 0;
1075} 1087}
diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
index 23ef5c556f06..717b02a22e67 100644
--- a/arch/x86/kernel/kprobes/ftrace.c
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -25,8 +25,9 @@
25 25
26#include "common.h" 26#include "common.h"
27 27
28static int __skip_singlestep(struct kprobe *p, struct pt_regs *regs, 28static nokprobe_inline
29 struct kprobe_ctlblk *kcb) 29int __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
30 struct kprobe_ctlblk *kcb)
30{ 31{
31 /* 32 /*
32 * Emulate singlestep (and also recover regs->ip) 33 * Emulate singlestep (and also recover regs->ip)
@@ -41,18 +42,19 @@ static int __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
41 return 1; 42 return 1;
42} 43}
43 44
44int __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs, 45int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
45 struct kprobe_ctlblk *kcb) 46 struct kprobe_ctlblk *kcb)
46{ 47{
47 if (kprobe_ftrace(p)) 48 if (kprobe_ftrace(p))
48 return __skip_singlestep(p, regs, kcb); 49 return __skip_singlestep(p, regs, kcb);
49 else 50 else
50 return 0; 51 return 0;
51} 52}
53NOKPROBE_SYMBOL(skip_singlestep);
52 54
53/* Ftrace callback handler for kprobes */ 55/* Ftrace callback handler for kprobes */
54void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, 56void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
55 struct ftrace_ops *ops, struct pt_regs *regs) 57 struct ftrace_ops *ops, struct pt_regs *regs)
56{ 58{
57 struct kprobe *p; 59 struct kprobe *p;
58 struct kprobe_ctlblk *kcb; 60 struct kprobe_ctlblk *kcb;
@@ -84,8 +86,9 @@ void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
84end: 86end:
85 local_irq_restore(flags); 87 local_irq_restore(flags);
86} 88}
89NOKPROBE_SYMBOL(kprobe_ftrace_handler);
87 90
88int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p) 91int arch_prepare_kprobe_ftrace(struct kprobe *p)
89{ 92{
90 p->ainsn.insn = NULL; 93 p->ainsn.insn = NULL;
91 p->ainsn.boostable = -1; 94 p->ainsn.boostable = -1;
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 898160b42e43..f304773285ae 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -77,7 +77,7 @@ found:
77} 77}
78 78
79/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */ 79/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
80static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val) 80static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
81{ 81{
82#ifdef CONFIG_X86_64 82#ifdef CONFIG_X86_64
83 *addr++ = 0x48; 83 *addr++ = 0x48;
@@ -138,7 +138,8 @@ asm (
138#define INT3_SIZE sizeof(kprobe_opcode_t) 138#define INT3_SIZE sizeof(kprobe_opcode_t)
139 139
140/* Optimized kprobe call back function: called from optinsn */ 140/* Optimized kprobe call back function: called from optinsn */
141static void __kprobes optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) 141static void
142optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
142{ 143{
143 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 144 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
144 unsigned long flags; 145 unsigned long flags;
@@ -168,8 +169,9 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op, struct pt_
168 } 169 }
169 local_irq_restore(flags); 170 local_irq_restore(flags);
170} 171}
172NOKPROBE_SYMBOL(optimized_callback);
171 173
172static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) 174static int copy_optimized_instructions(u8 *dest, u8 *src)
173{ 175{
174 int len = 0, ret; 176 int len = 0, ret;
175 177
@@ -189,7 +191,7 @@ static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
189} 191}
190 192
191/* Check whether insn is indirect jump */ 193/* Check whether insn is indirect jump */
192static int __kprobes insn_is_indirect_jump(struct insn *insn) 194static int insn_is_indirect_jump(struct insn *insn)
193{ 195{
194 return ((insn->opcode.bytes[0] == 0xff && 196 return ((insn->opcode.bytes[0] == 0xff &&
195 (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ 197 (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
@@ -224,7 +226,7 @@ static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
224} 226}
225 227
226/* Decode whole function to ensure any instructions don't jump into target */ 228/* Decode whole function to ensure any instructions don't jump into target */
227static int __kprobes can_optimize(unsigned long paddr) 229static int can_optimize(unsigned long paddr)
228{ 230{
229 unsigned long addr, size = 0, offset = 0; 231 unsigned long addr, size = 0, offset = 0;
230 struct insn insn; 232 struct insn insn;
@@ -275,7 +277,7 @@ static int __kprobes can_optimize(unsigned long paddr)
275} 277}
276 278
277/* Check optimized_kprobe can actually be optimized. */ 279/* Check optimized_kprobe can actually be optimized. */
278int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op) 280int arch_check_optimized_kprobe(struct optimized_kprobe *op)
279{ 281{
280 int i; 282 int i;
281 struct kprobe *p; 283 struct kprobe *p;
@@ -290,15 +292,15 @@ int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op)
290} 292}
291 293
292/* Check the addr is within the optimized instructions. */ 294/* Check the addr is within the optimized instructions. */
293int __kprobes 295int arch_within_optimized_kprobe(struct optimized_kprobe *op,
294arch_within_optimized_kprobe(struct optimized_kprobe *op, unsigned long addr) 296 unsigned long addr)
295{ 297{
296 return ((unsigned long)op->kp.addr <= addr && 298 return ((unsigned long)op->kp.addr <= addr &&
297 (unsigned long)op->kp.addr + op->optinsn.size > addr); 299 (unsigned long)op->kp.addr + op->optinsn.size > addr);
298} 300}
299 301
300/* Free optimized instruction slot */ 302/* Free optimized instruction slot */
301static __kprobes 303static
302void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) 304void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
303{ 305{
304 if (op->optinsn.insn) { 306 if (op->optinsn.insn) {
@@ -308,7 +310,7 @@ void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
308 } 310 }
309} 311}
310 312
311void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op) 313void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
312{ 314{
313 __arch_remove_optimized_kprobe(op, 1); 315 __arch_remove_optimized_kprobe(op, 1);
314} 316}
@@ -318,7 +320,7 @@ void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op)
318 * Target instructions MUST be relocatable (checked inside) 320 * Target instructions MUST be relocatable (checked inside)
319 * This is called when new aggr(opt)probe is allocated or reused. 321 * This is called when new aggr(opt)probe is allocated or reused.
320 */ 322 */
321int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op) 323int arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
322{ 324{
323 u8 *buf; 325 u8 *buf;
324 int ret; 326 int ret;
@@ -372,7 +374,7 @@ int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
372 * Replace breakpoints (int3) with relative jumps. 374 * Replace breakpoints (int3) with relative jumps.
373 * Caller must call with locking kprobe_mutex and text_mutex. 375 * Caller must call with locking kprobe_mutex and text_mutex.
374 */ 376 */
375void __kprobes arch_optimize_kprobes(struct list_head *oplist) 377void arch_optimize_kprobes(struct list_head *oplist)
376{ 378{
377 struct optimized_kprobe *op, *tmp; 379 struct optimized_kprobe *op, *tmp;
378 u8 insn_buf[RELATIVEJUMP_SIZE]; 380 u8 insn_buf[RELATIVEJUMP_SIZE];
@@ -398,7 +400,7 @@ void __kprobes arch_optimize_kprobes(struct list_head *oplist)
398} 400}
399 401
400/* Replace a relative jump with a breakpoint (int3). */ 402/* Replace a relative jump with a breakpoint (int3). */
401void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op) 403void arch_unoptimize_kprobe(struct optimized_kprobe *op)
402{ 404{
403 u8 insn_buf[RELATIVEJUMP_SIZE]; 405 u8 insn_buf[RELATIVEJUMP_SIZE];
404 406
@@ -424,8 +426,7 @@ extern void arch_unoptimize_kprobes(struct list_head *oplist,
424 } 426 }
425} 427}
426 428
427int __kprobes 429int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
428setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
429{ 430{
430 struct optimized_kprobe *op; 431 struct optimized_kprobe *op;
431 432
@@ -441,3 +442,4 @@ setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
441 } 442 }
442 return 0; 443 return 0;
443} 444}
445NOKPROBE_SYMBOL(setup_detour_execution);
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 7e97371387fd..3dd8e2c4d74a 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -251,8 +251,9 @@ u32 kvm_read_and_reset_pf_reason(void)
251 return reason; 251 return reason;
252} 252}
253EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason); 253EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason);
254NOKPROBE_SYMBOL(kvm_read_and_reset_pf_reason);
254 255
255dotraplinkage void __kprobes 256dotraplinkage void
256do_async_page_fault(struct pt_regs *regs, unsigned long error_code) 257do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
257{ 258{
258 enum ctx_state prev_state; 259 enum ctx_state prev_state;
@@ -276,6 +277,7 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
276 break; 277 break;
277 } 278 }
278} 279}
280NOKPROBE_SYMBOL(do_async_page_fault);
279 281
280static void __init paravirt_ops_setup(void) 282static void __init paravirt_ops_setup(void)
281{ 283{
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index b4872b999a71..c3e985d1751c 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -110,7 +110,7 @@ static void nmi_max_handler(struct irq_work *w)
110 a->handler, whole_msecs, decimal_msecs); 110 a->handler, whole_msecs, decimal_msecs);
111} 111}
112 112
113static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) 113static int nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
114{ 114{
115 struct nmi_desc *desc = nmi_to_desc(type); 115 struct nmi_desc *desc = nmi_to_desc(type);
116 struct nmiaction *a; 116 struct nmiaction *a;
@@ -146,6 +146,7 @@ static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2
146 /* return total number of NMI events handled */ 146 /* return total number of NMI events handled */
147 return handled; 147 return handled;
148} 148}
149NOKPROBE_SYMBOL(nmi_handle);
149 150
150int __register_nmi_handler(unsigned int type, struct nmiaction *action) 151int __register_nmi_handler(unsigned int type, struct nmiaction *action)
151{ 152{
@@ -208,7 +209,7 @@ void unregister_nmi_handler(unsigned int type, const char *name)
208} 209}
209EXPORT_SYMBOL_GPL(unregister_nmi_handler); 210EXPORT_SYMBOL_GPL(unregister_nmi_handler);
210 211
211static __kprobes void 212static void
212pci_serr_error(unsigned char reason, struct pt_regs *regs) 213pci_serr_error(unsigned char reason, struct pt_regs *regs)
213{ 214{
214 /* check to see if anyone registered against these types of errors */ 215 /* check to see if anyone registered against these types of errors */
@@ -238,8 +239,9 @@ pci_serr_error(unsigned char reason, struct pt_regs *regs)
238 reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR; 239 reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR;
239 outb(reason, NMI_REASON_PORT); 240 outb(reason, NMI_REASON_PORT);
240} 241}
242NOKPROBE_SYMBOL(pci_serr_error);
241 243
242static __kprobes void 244static void
243io_check_error(unsigned char reason, struct pt_regs *regs) 245io_check_error(unsigned char reason, struct pt_regs *regs)
244{ 246{
245 unsigned long i; 247 unsigned long i;
@@ -269,8 +271,9 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
269 reason &= ~NMI_REASON_CLEAR_IOCHK; 271 reason &= ~NMI_REASON_CLEAR_IOCHK;
270 outb(reason, NMI_REASON_PORT); 272 outb(reason, NMI_REASON_PORT);
271} 273}
274NOKPROBE_SYMBOL(io_check_error);
272 275
273static __kprobes void 276static void
274unknown_nmi_error(unsigned char reason, struct pt_regs *regs) 277unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
275{ 278{
276 int handled; 279 int handled;
@@ -298,11 +301,12 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
298 301
299 pr_emerg("Dazed and confused, but trying to continue\n"); 302 pr_emerg("Dazed and confused, but trying to continue\n");
300} 303}
304NOKPROBE_SYMBOL(unknown_nmi_error);
301 305
302static DEFINE_PER_CPU(bool, swallow_nmi); 306static DEFINE_PER_CPU(bool, swallow_nmi);
303static DEFINE_PER_CPU(unsigned long, last_nmi_rip); 307static DEFINE_PER_CPU(unsigned long, last_nmi_rip);
304 308
305static __kprobes void default_do_nmi(struct pt_regs *regs) 309static void default_do_nmi(struct pt_regs *regs)
306{ 310{
307 unsigned char reason = 0; 311 unsigned char reason = 0;
308 int handled; 312 int handled;
@@ -401,6 +405,7 @@ static __kprobes void default_do_nmi(struct pt_regs *regs)
401 else 405 else
402 unknown_nmi_error(reason, regs); 406 unknown_nmi_error(reason, regs);
403} 407}
408NOKPROBE_SYMBOL(default_do_nmi);
404 409
405/* 410/*
406 * NMIs can hit breakpoints which will cause it to lose its 411 * NMIs can hit breakpoints which will cause it to lose its
@@ -520,7 +525,7 @@ static inline void nmi_nesting_postprocess(void)
520} 525}
521#endif 526#endif
522 527
523dotraplinkage notrace __kprobes void 528dotraplinkage notrace void
524do_nmi(struct pt_regs *regs, long error_code) 529do_nmi(struct pt_regs *regs, long error_code)
525{ 530{
526 nmi_nesting_preprocess(regs); 531 nmi_nesting_preprocess(regs);
@@ -537,6 +542,7 @@ do_nmi(struct pt_regs *regs, long error_code)
537 /* On i386, may loop back to preprocess */ 542 /* On i386, may loop back to preprocess */
538 nmi_nesting_postprocess(); 543 nmi_nesting_postprocess();
539} 544}
545NOKPROBE_SYMBOL(do_nmi);
540 546
541void stop_nmi(void) 547void stop_nmi(void)
542{ 548{
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 1b10af835c31..548d25f00c90 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -23,6 +23,7 @@
23#include <linux/efi.h> 23#include <linux/efi.h>
24#include <linux/bcd.h> 24#include <linux/bcd.h>
25#include <linux/highmem.h> 25#include <linux/highmem.h>
26#include <linux/kprobes.h>
26 27
27#include <asm/bug.h> 28#include <asm/bug.h>
28#include <asm/paravirt.h> 29#include <asm/paravirt.h>
@@ -389,6 +390,11 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
389 .end_context_switch = paravirt_nop, 390 .end_context_switch = paravirt_nop,
390}; 391};
391 392
393/* At this point, native_get/set_debugreg has real function entries */
394NOKPROBE_SYMBOL(native_get_debugreg);
395NOKPROBE_SYMBOL(native_set_debugreg);
396NOKPROBE_SYMBOL(native_load_idt);
397
392struct pv_apic_ops pv_apic_ops = { 398struct pv_apic_ops pv_apic_ops = {
393#ifdef CONFIG_X86_LOCAL_APIC 399#ifdef CONFIG_X86_LOCAL_APIC
394 .startup_ipi_hook = paravirt_nop, 400 .startup_ipi_hook = paravirt_nop,
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 898d077617a9..ca5b02d405c3 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -413,12 +413,11 @@ void set_personality_ia32(bool x32)
413 set_thread_flag(TIF_ADDR32); 413 set_thread_flag(TIF_ADDR32);
414 414
415 /* Mark the associated mm as containing 32-bit tasks. */ 415 /* Mark the associated mm as containing 32-bit tasks. */
416 if (current->mm)
417 current->mm->context.ia32_compat = 1;
418
419 if (x32) { 416 if (x32) {
420 clear_thread_flag(TIF_IA32); 417 clear_thread_flag(TIF_IA32);
421 set_thread_flag(TIF_X32); 418 set_thread_flag(TIF_X32);
419 if (current->mm)
420 current->mm->context.ia32_compat = TIF_X32;
422 current->personality &= ~READ_IMPLIES_EXEC; 421 current->personality &= ~READ_IMPLIES_EXEC;
423 /* is_compat_task() uses the presence of the x32 422 /* is_compat_task() uses the presence of the x32
424 syscall bit flag to determine compat status */ 423 syscall bit flag to determine compat status */
@@ -426,6 +425,8 @@ void set_personality_ia32(bool x32)
426 } else { 425 } else {
427 set_thread_flag(TIF_IA32); 426 set_thread_flag(TIF_IA32);
428 clear_thread_flag(TIF_X32); 427 clear_thread_flag(TIF_X32);
428 if (current->mm)
429 current->mm->context.ia32_compat = TIF_IA32;
429 current->personality |= force_personality32; 430 current->personality |= force_personality32;
430 /* Prepare the first "return" to user space */ 431 /* Prepare the first "return" to user space */
431 current_thread_info()->status |= TS_COMPAT; 432 current_thread_info()->status |= TS_COMPAT;
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index f73b5d435bdc..c6eb418c5627 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -23,6 +23,7 @@
23#include <linux/kernel.h> 23#include <linux/kernel.h>
24#include <linux/module.h> 24#include <linux/module.h>
25#include <linux/ptrace.h> 25#include <linux/ptrace.h>
26#include <linux/uprobes.h>
26#include <linux/string.h> 27#include <linux/string.h>
27#include <linux/delay.h> 28#include <linux/delay.h>
28#include <linux/errno.h> 29#include <linux/errno.h>
@@ -106,7 +107,7 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
106 preempt_count_dec(); 107 preempt_count_dec();
107} 108}
108 109
109static int __kprobes 110static nokprobe_inline int
110do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, 111do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
111 struct pt_regs *regs, long error_code) 112 struct pt_regs *regs, long error_code)
112{ 113{
@@ -136,7 +137,38 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
136 return -1; 137 return -1;
137} 138}
138 139
139static void __kprobes 140static siginfo_t *fill_trap_info(struct pt_regs *regs, int signr, int trapnr,
141 siginfo_t *info)
142{
143 unsigned long siaddr;
144 int sicode;
145
146 switch (trapnr) {
147 default:
148 return SEND_SIG_PRIV;
149
150 case X86_TRAP_DE:
151 sicode = FPE_INTDIV;
152 siaddr = uprobe_get_trap_addr(regs);
153 break;
154 case X86_TRAP_UD:
155 sicode = ILL_ILLOPN;
156 siaddr = uprobe_get_trap_addr(regs);
157 break;
158 case X86_TRAP_AC:
159 sicode = BUS_ADRALN;
160 siaddr = 0;
161 break;
162 }
163
164 info->si_signo = signr;
165 info->si_errno = 0;
166 info->si_code = sicode;
167 info->si_addr = (void __user *)siaddr;
168 return info;
169}
170
171static void
140do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, 172do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
141 long error_code, siginfo_t *info) 173 long error_code, siginfo_t *info)
142{ 174{
@@ -168,60 +200,43 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
168 } 200 }
169#endif 201#endif
170 202
171 if (info) 203 force_sig_info(signr, info ?: SEND_SIG_PRIV, tsk);
172 force_sig_info(signr, info, tsk);
173 else
174 force_sig(signr, tsk);
175} 204}
205NOKPROBE_SYMBOL(do_trap);
176 206
177#define DO_ERROR(trapnr, signr, str, name) \ 207static void do_error_trap(struct pt_regs *regs, long error_code, char *str,
178dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ 208 unsigned long trapnr, int signr)
179{ \ 209{
180 enum ctx_state prev_state; \ 210 enum ctx_state prev_state = exception_enter();
181 \ 211 siginfo_t info;
182 prev_state = exception_enter(); \ 212
183 if (notify_die(DIE_TRAP, str, regs, error_code, \ 213 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) !=
184 trapnr, signr) == NOTIFY_STOP) { \ 214 NOTIFY_STOP) {
185 exception_exit(prev_state); \ 215 conditional_sti(regs);
186 return; \ 216 do_trap(trapnr, signr, str, regs, error_code,
187 } \ 217 fill_trap_info(regs, signr, trapnr, &info));
188 conditional_sti(regs); \ 218 }
189 do_trap(trapnr, signr, str, regs, error_code, NULL); \ 219
190 exception_exit(prev_state); \ 220 exception_exit(prev_state);
191} 221}
192 222
193#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ 223#define DO_ERROR(trapnr, signr, str, name) \
194dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ 224dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \
195{ \ 225{ \
196 siginfo_t info; \ 226 do_error_trap(regs, error_code, str, trapnr, signr); \
197 enum ctx_state prev_state; \
198 \
199 info.si_signo = signr; \
200 info.si_errno = 0; \
201 info.si_code = sicode; \
202 info.si_addr = (void __user *)siaddr; \
203 prev_state = exception_enter(); \
204 if (notify_die(DIE_TRAP, str, regs, error_code, \
205 trapnr, signr) == NOTIFY_STOP) { \
206 exception_exit(prev_state); \
207 return; \
208 } \
209 conditional_sti(regs); \
210 do_trap(trapnr, signr, str, regs, error_code, &info); \
211 exception_exit(prev_state); \
212} 227}
213 228
214DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip ) 229DO_ERROR(X86_TRAP_DE, SIGFPE, "divide error", divide_error)
215DO_ERROR (X86_TRAP_OF, SIGSEGV, "overflow", overflow ) 230DO_ERROR(X86_TRAP_OF, SIGSEGV, "overflow", overflow)
216DO_ERROR (X86_TRAP_BR, SIGSEGV, "bounds", bounds ) 231DO_ERROR(X86_TRAP_BR, SIGSEGV, "bounds", bounds)
217DO_ERROR_INFO(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip ) 232DO_ERROR(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op)
218DO_ERROR (X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun ) 233DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun",coprocessor_segment_overrun)
219DO_ERROR (X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS ) 234DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS)
220DO_ERROR (X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present ) 235DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present)
221#ifdef CONFIG_X86_32 236#ifdef CONFIG_X86_32
222DO_ERROR (X86_TRAP_SS, SIGBUS, "stack segment", stack_segment ) 237DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment)
223#endif 238#endif
224DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0 ) 239DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check)
225 240
226#ifdef CONFIG_X86_64 241#ifdef CONFIG_X86_64
227/* Runs on IST stack */ 242/* Runs on IST stack */
@@ -263,7 +278,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
263} 278}
264#endif 279#endif
265 280
266dotraplinkage void __kprobes 281dotraplinkage void
267do_general_protection(struct pt_regs *regs, long error_code) 282do_general_protection(struct pt_regs *regs, long error_code)
268{ 283{
269 struct task_struct *tsk; 284 struct task_struct *tsk;
@@ -305,13 +320,14 @@ do_general_protection(struct pt_regs *regs, long error_code)
305 pr_cont("\n"); 320 pr_cont("\n");
306 } 321 }
307 322
308 force_sig(SIGSEGV, tsk); 323 force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk);
309exit: 324exit:
310 exception_exit(prev_state); 325 exception_exit(prev_state);
311} 326}
327NOKPROBE_SYMBOL(do_general_protection);
312 328
313/* May run on IST stack. */ 329/* May run on IST stack. */
314dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code) 330dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
315{ 331{
316 enum ctx_state prev_state; 332 enum ctx_state prev_state;
317 333
@@ -327,13 +343,18 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co
327 if (poke_int3_handler(regs)) 343 if (poke_int3_handler(regs))
328 return; 344 return;
329 345
330 prev_state = exception_enter();
331#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP 346#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
332 if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, 347 if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
333 SIGTRAP) == NOTIFY_STOP) 348 SIGTRAP) == NOTIFY_STOP)
334 goto exit; 349 goto exit;
335#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ 350#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */
336 351
352#ifdef CONFIG_KPROBES
353 if (kprobe_int3_handler(regs))
354 return;
355#endif
356 prev_state = exception_enter();
357
337 if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, 358 if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
338 SIGTRAP) == NOTIFY_STOP) 359 SIGTRAP) == NOTIFY_STOP)
339 goto exit; 360 goto exit;
@@ -350,6 +371,7 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co
350exit: 371exit:
351 exception_exit(prev_state); 372 exception_exit(prev_state);
352} 373}
374NOKPROBE_SYMBOL(do_int3);
353 375
354#ifdef CONFIG_X86_64 376#ifdef CONFIG_X86_64
355/* 377/*
@@ -357,7 +379,7 @@ exit:
357 * for scheduling or signal handling. The actual stack switch is done in 379 * for scheduling or signal handling. The actual stack switch is done in
358 * entry.S 380 * entry.S
359 */ 381 */
360asmlinkage __visible __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) 382asmlinkage __visible struct pt_regs *sync_regs(struct pt_regs *eregs)
361{ 383{
362 struct pt_regs *regs = eregs; 384 struct pt_regs *regs = eregs;
363 /* Did already sync */ 385 /* Did already sync */
@@ -376,6 +398,7 @@ asmlinkage __visible __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
376 *regs = *eregs; 398 *regs = *eregs;
377 return regs; 399 return regs;
378} 400}
401NOKPROBE_SYMBOL(sync_regs);
379#endif 402#endif
380 403
381/* 404/*
@@ -402,7 +425,7 @@ asmlinkage __visible __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
402 * 425 *
403 * May run on IST stack. 426 * May run on IST stack.
404 */ 427 */
405dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) 428dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
406{ 429{
407 struct task_struct *tsk = current; 430 struct task_struct *tsk = current;
408 enum ctx_state prev_state; 431 enum ctx_state prev_state;
@@ -410,8 +433,6 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
410 unsigned long dr6; 433 unsigned long dr6;
411 int si_code; 434 int si_code;
412 435
413 prev_state = exception_enter();
414
415 get_debugreg(dr6, 6); 436 get_debugreg(dr6, 6);
416 437
417 /* Filter out all the reserved bits which are preset to 1 */ 438 /* Filter out all the reserved bits which are preset to 1 */
@@ -440,6 +461,12 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
440 /* Store the virtualized DR6 value */ 461 /* Store the virtualized DR6 value */
441 tsk->thread.debugreg6 = dr6; 462 tsk->thread.debugreg6 = dr6;
442 463
464#ifdef CONFIG_KPROBES
465 if (kprobe_debug_handler(regs))
466 goto exit;
467#endif
468 prev_state = exception_enter();
469
443 if (notify_die(DIE_DEBUG, "debug", regs, (long)&dr6, error_code, 470 if (notify_die(DIE_DEBUG, "debug", regs, (long)&dr6, error_code,
444 SIGTRAP) == NOTIFY_STOP) 471 SIGTRAP) == NOTIFY_STOP)
445 goto exit; 472 goto exit;
@@ -482,13 +509,14 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
482exit: 509exit:
483 exception_exit(prev_state); 510 exception_exit(prev_state);
484} 511}
512NOKPROBE_SYMBOL(do_debug);
485 513
486/* 514/*
487 * Note that we play around with the 'TS' bit in an attempt to get 515 * Note that we play around with the 'TS' bit in an attempt to get
488 * the correct behaviour even in the presence of the asynchronous 516 * the correct behaviour even in the presence of the asynchronous
489 * IRQ13 behaviour 517 * IRQ13 behaviour
490 */ 518 */
491void math_error(struct pt_regs *regs, int error_code, int trapnr) 519static void math_error(struct pt_regs *regs, int error_code, int trapnr)
492{ 520{
493 struct task_struct *task = current; 521 struct task_struct *task = current;
494 siginfo_t info; 522 siginfo_t info;
@@ -518,7 +546,7 @@ void math_error(struct pt_regs *regs, int error_code, int trapnr)
518 task->thread.error_code = error_code; 546 task->thread.error_code = error_code;
519 info.si_signo = SIGFPE; 547 info.si_signo = SIGFPE;
520 info.si_errno = 0; 548 info.si_errno = 0;
521 info.si_addr = (void __user *)regs->ip; 549 info.si_addr = (void __user *)uprobe_get_trap_addr(regs);
522 if (trapnr == X86_TRAP_MF) { 550 if (trapnr == X86_TRAP_MF) {
523 unsigned short cwd, swd; 551 unsigned short cwd, swd;
524 /* 552 /*
@@ -645,7 +673,7 @@ void math_state_restore(void)
645 */ 673 */
646 if (unlikely(restore_fpu_checking(tsk))) { 674 if (unlikely(restore_fpu_checking(tsk))) {
647 drop_init_fpu(tsk); 675 drop_init_fpu(tsk);
648 force_sig(SIGSEGV, tsk); 676 force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk);
649 return; 677 return;
650 } 678 }
651 679
@@ -653,7 +681,7 @@ void math_state_restore(void)
653} 681}
654EXPORT_SYMBOL_GPL(math_state_restore); 682EXPORT_SYMBOL_GPL(math_state_restore);
655 683
656dotraplinkage void __kprobes 684dotraplinkage void
657do_device_not_available(struct pt_regs *regs, long error_code) 685do_device_not_available(struct pt_regs *regs, long error_code)
658{ 686{
659 enum ctx_state prev_state; 687 enum ctx_state prev_state;
@@ -679,6 +707,7 @@ do_device_not_available(struct pt_regs *regs, long error_code)
679#endif 707#endif
680 exception_exit(prev_state); 708 exception_exit(prev_state);
681} 709}
710NOKPROBE_SYMBOL(do_device_not_available);
682 711
683#ifdef CONFIG_X86_32 712#ifdef CONFIG_X86_32
684dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) 713dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index ace22916ade3..5d1cbfe4ae58 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -32,20 +32,20 @@
32 32
33/* Post-execution fixups. */ 33/* Post-execution fixups. */
34 34
35/* No fixup needed */
36#define UPROBE_FIX_NONE 0x0
37
38/* Adjust IP back to vicinity of actual insn */ 35/* Adjust IP back to vicinity of actual insn */
39#define UPROBE_FIX_IP 0x1 36#define UPROBE_FIX_IP 0x01
40 37
41/* Adjust the return address of a call insn */ 38/* Adjust the return address of a call insn */
42#define UPROBE_FIX_CALL 0x2 39#define UPROBE_FIX_CALL 0x02
43 40
44/* Instruction will modify TF, don't change it */ 41/* Instruction will modify TF, don't change it */
45#define UPROBE_FIX_SETF 0x4 42#define UPROBE_FIX_SETF 0x04
46 43
47#define UPROBE_FIX_RIP_AX 0x8000 44#define UPROBE_FIX_RIP_SI 0x08
48#define UPROBE_FIX_RIP_CX 0x4000 45#define UPROBE_FIX_RIP_DI 0x10
46#define UPROBE_FIX_RIP_BX 0x20
47#define UPROBE_FIX_RIP_MASK \
48 (UPROBE_FIX_RIP_SI | UPROBE_FIX_RIP_DI | UPROBE_FIX_RIP_BX)
49 49
50#define UPROBE_TRAP_NR UINT_MAX 50#define UPROBE_TRAP_NR UINT_MAX
51 51
@@ -67,6 +67,7 @@
67 * to keep gcc from statically optimizing it out, as variable_test_bit makes 67 * to keep gcc from statically optimizing it out, as variable_test_bit makes
68 * some versions of gcc to think only *(unsigned long*) is used. 68 * some versions of gcc to think only *(unsigned long*) is used.
69 */ 69 */
70#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
70static volatile u32 good_insns_32[256 / 32] = { 71static volatile u32 good_insns_32[256 / 32] = {
71 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 72 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
72 /* ---------------------------------------------- */ 73 /* ---------------------------------------------- */
@@ -89,33 +90,12 @@ static volatile u32 good_insns_32[256 / 32] = {
89 /* ---------------------------------------------- */ 90 /* ---------------------------------------------- */
90 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 91 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
91}; 92};
93#else
94#define good_insns_32 NULL
95#endif
92 96
93/* Using this for both 64-bit and 32-bit apps */
94static volatile u32 good_2byte_insns[256 / 32] = {
95 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
96 /* ---------------------------------------------- */
97 W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */
98 W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */
99 W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */
100 W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */
101 W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
102 W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
103 W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */
104 W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */
105 W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
106 W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
107 W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */
108 W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
109 W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */
110 W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
111 W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */
112 W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* f0 */
113 /* ---------------------------------------------- */
114 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
115};
116
117#ifdef CONFIG_X86_64
118/* Good-instruction tables for 64-bit apps */ 97/* Good-instruction tables for 64-bit apps */
98#if defined(CONFIG_X86_64)
119static volatile u32 good_insns_64[256 / 32] = { 99static volatile u32 good_insns_64[256 / 32] = {
120 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 100 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
121 /* ---------------------------------------------- */ 101 /* ---------------------------------------------- */
@@ -138,7 +118,33 @@ static volatile u32 good_insns_64[256 / 32] = {
138 /* ---------------------------------------------- */ 118 /* ---------------------------------------------- */
139 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 119 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
140}; 120};
121#else
122#define good_insns_64 NULL
141#endif 123#endif
124
125/* Using this for both 64-bit and 32-bit apps */
126static volatile u32 good_2byte_insns[256 / 32] = {
127 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
128 /* ---------------------------------------------- */
129 W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */
130 W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */
131 W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */
132 W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */
133 W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
134 W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
135 W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */
136 W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */
137 W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
138 W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
139 W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */
140 W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
141 W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */
142 W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
143 W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */
144 W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* f0 */
145 /* ---------------------------------------------- */
146 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
147};
142#undef W 148#undef W
143 149
144/* 150/*
@@ -209,16 +215,25 @@ static bool is_prefix_bad(struct insn *insn)
209 return false; 215 return false;
210} 216}
211 217
212static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn) 218static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool x86_64)
213{ 219{
214 insn_init(insn, auprobe->insn, false); 220 u32 volatile *good_insns;
221
222 insn_init(insn, auprobe->insn, x86_64);
223 /* has the side-effect of processing the entire instruction */
224 insn_get_length(insn);
225 if (WARN_ON_ONCE(!insn_complete(insn)))
226 return -ENOEXEC;
215 227
216 /* Skip good instruction prefixes; reject "bad" ones. */
217 insn_get_opcode(insn);
218 if (is_prefix_bad(insn)) 228 if (is_prefix_bad(insn))
219 return -ENOTSUPP; 229 return -ENOTSUPP;
220 230
221 if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_32)) 231 if (x86_64)
232 good_insns = good_insns_64;
233 else
234 good_insns = good_insns_32;
235
236 if (test_bit(OPCODE1(insn), (unsigned long *)good_insns))
222 return 0; 237 return 0;
223 238
224 if (insn->opcode.nbytes == 2) { 239 if (insn->opcode.nbytes == 2) {
@@ -230,14 +245,18 @@ static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn)
230} 245}
231 246
232#ifdef CONFIG_X86_64 247#ifdef CONFIG_X86_64
248static inline bool is_64bit_mm(struct mm_struct *mm)
249{
250 return !config_enabled(CONFIG_IA32_EMULATION) ||
251 !(mm->context.ia32_compat == TIF_IA32);
252}
233/* 253/*
234 * If arch_uprobe->insn doesn't use rip-relative addressing, return 254 * If arch_uprobe->insn doesn't use rip-relative addressing, return
235 * immediately. Otherwise, rewrite the instruction so that it accesses 255 * immediately. Otherwise, rewrite the instruction so that it accesses
236 * its memory operand indirectly through a scratch register. Set 256 * its memory operand indirectly through a scratch register. Set
237 * arch_uprobe->fixups and arch_uprobe->rip_rela_target_address 257 * defparam->fixups accordingly. (The contents of the scratch register
238 * accordingly. (The contents of the scratch register will be saved 258 * will be saved before we single-step the modified instruction,
239 * before we single-step the modified instruction, and restored 259 * and restored afterward).
240 * afterward.)
241 * 260 *
242 * We do this because a rip-relative instruction can access only a 261 * We do this because a rip-relative instruction can access only a
243 * relatively small area (+/- 2 GB from the instruction), and the XOL 262 * relatively small area (+/- 2 GB from the instruction), and the XOL
@@ -248,164 +267,192 @@ static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn)
248 * 267 *
249 * Some useful facts about rip-relative instructions: 268 * Some useful facts about rip-relative instructions:
250 * 269 *
251 * - There's always a modrm byte. 270 * - There's always a modrm byte with bit layout "00 reg 101".
252 * - There's never a SIB byte. 271 * - There's never a SIB byte.
253 * - The displacement is always 4 bytes. 272 * - The displacement is always 4 bytes.
273 * - REX.B=1 bit in REX prefix, which normally extends r/m field,
274 * has no effect on rip-relative mode. It doesn't make modrm byte
275 * with r/m=101 refer to register 1101 = R13.
254 */ 276 */
255static void 277static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
256handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn)
257{ 278{
258 u8 *cursor; 279 u8 *cursor;
259 u8 reg; 280 u8 reg;
281 u8 reg2;
260 282
261 if (!insn_rip_relative(insn)) 283 if (!insn_rip_relative(insn))
262 return; 284 return;
263 285
264 /* 286 /*
265 * insn_rip_relative() would have decoded rex_prefix, modrm. 287 * insn_rip_relative() would have decoded rex_prefix, vex_prefix, modrm.
266 * Clear REX.b bit (extension of MODRM.rm field): 288 * Clear REX.b bit (extension of MODRM.rm field):
267 * we want to encode rax/rcx, not r8/r9. 289 * we want to encode low numbered reg, not r8+.
268 */ 290 */
269 if (insn->rex_prefix.nbytes) { 291 if (insn->rex_prefix.nbytes) {
270 cursor = auprobe->insn + insn_offset_rex_prefix(insn); 292 cursor = auprobe->insn + insn_offset_rex_prefix(insn);
271 *cursor &= 0xfe; /* Clearing REX.B bit */ 293 /* REX byte has 0100wrxb layout, clearing REX.b bit */
294 *cursor &= 0xfe;
295 }
296 /*
297 * Similar treatment for VEX3 prefix.
298 * TODO: add XOP/EVEX treatment when insn decoder supports them
299 */
300 if (insn->vex_prefix.nbytes == 3) {
301 /*
302 * vex2: c5 rvvvvLpp (has no b bit)
303 * vex3/xop: c4/8f rxbmmmmm wvvvvLpp
304 * evex: 62 rxbR00mm wvvvv1pp zllBVaaa
305 * (evex will need setting of both b and x since
306 * in non-sib encoding evex.x is 4th bit of MODRM.rm)
307 * Setting VEX3.b (setting because it has inverted meaning):
308 */
309 cursor = auprobe->insn + insn_offset_vex_prefix(insn) + 1;
310 *cursor |= 0x20;
272 } 311 }
273 312
274 /* 313 /*
314 * Convert from rip-relative addressing to register-relative addressing
315 * via a scratch register.
316 *
317 * This is tricky since there are insns with modrm byte
318 * which also use registers not encoded in modrm byte:
319 * [i]div/[i]mul: implicitly use dx:ax
320 * shift ops: implicitly use cx
321 * cmpxchg: implicitly uses ax
322 * cmpxchg8/16b: implicitly uses dx:ax and bx:cx
323 * Encoding: 0f c7/1 modrm
324 * The code below thinks that reg=1 (cx), chooses si as scratch.
325 * mulx: implicitly uses dx: mulx r/m,r1,r2 does r1:r2 = dx * r/m.
326 * First appeared in Haswell (BMI2 insn). It is vex-encoded.
327 * Example where none of bx,cx,dx can be used as scratch reg:
328 * c4 e2 63 f6 0d disp32 mulx disp32(%rip),%ebx,%ecx
329 * [v]pcmpistri: implicitly uses cx, xmm0
330 * [v]pcmpistrm: implicitly uses xmm0
331 * [v]pcmpestri: implicitly uses ax, dx, cx, xmm0
332 * [v]pcmpestrm: implicitly uses ax, dx, xmm0
333 * Evil SSE4.2 string comparison ops from hell.
334 * maskmovq/[v]maskmovdqu: implicitly uses (ds:rdi) as destination.
335 * Encoding: 0f f7 modrm, 66 0f f7 modrm, vex-encoded: c5 f9 f7 modrm.
336 * Store op1, byte-masked by op2 msb's in each byte, to (ds:rdi).
337 * AMD says it has no 3-operand form (vex.vvvv must be 1111)
338 * and that it can have only register operands, not mem
339 * (its modrm byte must have mode=11).
340 * If these restrictions will ever be lifted,
341 * we'll need code to prevent selection of di as scratch reg!
342 *
343 * Summary: I don't know any insns with modrm byte which
344 * use SI register implicitly. DI register is used only
345 * by one insn (maskmovq) and BX register is used
346 * only by one too (cmpxchg8b).
347 * BP is stack-segment based (may be a problem?).
348 * AX, DX, CX are off-limits (many implicit users).
349 * SP is unusable (it's stack pointer - think about "pop mem";
350 * also, rsp+disp32 needs sib encoding -> insn length change).
351 */
352
353 reg = MODRM_REG(insn); /* Fetch modrm.reg */
354 reg2 = 0xff; /* Fetch vex.vvvv */
355 if (insn->vex_prefix.nbytes == 2)
356 reg2 = insn->vex_prefix.bytes[1];
357 else if (insn->vex_prefix.nbytes == 3)
358 reg2 = insn->vex_prefix.bytes[2];
359 /*
360 * TODO: add XOP, EXEV vvvv reading.
361 *
362 * vex.vvvv field is in bits 6-3, bits are inverted.
363 * But in 32-bit mode, high-order bit may be ignored.
364 * Therefore, let's consider only 3 low-order bits.
365 */
366 reg2 = ((reg2 >> 3) & 0x7) ^ 0x7;
367 /*
368 * Register numbering is ax,cx,dx,bx, sp,bp,si,di, r8..r15.
369 *
370 * Choose scratch reg. Order is important: must not select bx
371 * if we can use si (cmpxchg8b case!)
372 */
373 if (reg != 6 && reg2 != 6) {
374 reg2 = 6;
375 auprobe->defparam.fixups |= UPROBE_FIX_RIP_SI;
376 } else if (reg != 7 && reg2 != 7) {
377 reg2 = 7;
378 auprobe->defparam.fixups |= UPROBE_FIX_RIP_DI;
379 /* TODO (paranoia): force maskmovq to not use di */
380 } else {
381 reg2 = 3;
382 auprobe->defparam.fixups |= UPROBE_FIX_RIP_BX;
383 }
384 /*
275 * Point cursor at the modrm byte. The next 4 bytes are the 385 * Point cursor at the modrm byte. The next 4 bytes are the
276 * displacement. Beyond the displacement, for some instructions, 386 * displacement. Beyond the displacement, for some instructions,
277 * is the immediate operand. 387 * is the immediate operand.
278 */ 388 */
279 cursor = auprobe->insn + insn_offset_modrm(insn); 389 cursor = auprobe->insn + insn_offset_modrm(insn);
280 insn_get_length(insn);
281
282 /* 390 /*
283 * Convert from rip-relative addressing to indirect addressing 391 * Change modrm from "00 reg 101" to "10 reg reg2". Example:
284 * via a scratch register. Change the r/m field from 0x5 (%rip) 392 * 89 05 disp32 mov %eax,disp32(%rip) becomes
285 * to 0x0 (%rax) or 0x1 (%rcx), and squeeze out the offset field. 393 * 89 86 disp32 mov %eax,disp32(%rsi)
286 */ 394 */
287 reg = MODRM_REG(insn); 395 *cursor = 0x80 | (reg << 3) | reg2;
288 if (reg == 0) { 396}
289 /*
290 * The register operand (if any) is either the A register
291 * (%rax, %eax, etc.) or (if the 0x4 bit is set in the
292 * REX prefix) %r8. In any case, we know the C register
293 * is NOT the register operand, so we use %rcx (register
294 * #1) for the scratch register.
295 */
296 auprobe->fixups = UPROBE_FIX_RIP_CX;
297 /* Change modrm from 00 000 101 to 00 000 001. */
298 *cursor = 0x1;
299 } else {
300 /* Use %rax (register #0) for the scratch register. */
301 auprobe->fixups = UPROBE_FIX_RIP_AX;
302 /* Change modrm from 00 xxx 101 to 00 xxx 000 */
303 *cursor = (reg << 3);
304 }
305
306 /* Target address = address of next instruction + (signed) offset */
307 auprobe->rip_rela_target_address = (long)insn->length + insn->displacement.value;
308 397
309 /* Displacement field is gone; slide immediate field (if any) over. */ 398static inline unsigned long *
310 if (insn->immediate.nbytes) { 399scratch_reg(struct arch_uprobe *auprobe, struct pt_regs *regs)
311 cursor++; 400{
312 memmove(cursor, cursor + insn->displacement.nbytes, insn->immediate.nbytes); 401 if (auprobe->defparam.fixups & UPROBE_FIX_RIP_SI)
313 } 402 return &regs->si;
403 if (auprobe->defparam.fixups & UPROBE_FIX_RIP_DI)
404 return &regs->di;
405 return &regs->bx;
314} 406}
315 407
316/* 408/*
317 * If we're emulating a rip-relative instruction, save the contents 409 * If we're emulating a rip-relative instruction, save the contents
318 * of the scratch register and store the target address in that register. 410 * of the scratch register and store the target address in that register.
319 */ 411 */
320static void 412static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
321pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs,
322 struct arch_uprobe_task *autask)
323{
324 if (auprobe->fixups & UPROBE_FIX_RIP_AX) {
325 autask->saved_scratch_register = regs->ax;
326 regs->ax = current->utask->vaddr;
327 regs->ax += auprobe->rip_rela_target_address;
328 } else if (auprobe->fixups & UPROBE_FIX_RIP_CX) {
329 autask->saved_scratch_register = regs->cx;
330 regs->cx = current->utask->vaddr;
331 regs->cx += auprobe->rip_rela_target_address;
332 }
333}
334
335static void
336handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction)
337{ 413{
338 if (auprobe->fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) { 414 if (auprobe->defparam.fixups & UPROBE_FIX_RIP_MASK) {
339 struct arch_uprobe_task *autask; 415 struct uprobe_task *utask = current->utask;
340 416 unsigned long *sr = scratch_reg(auprobe, regs);
341 autask = &current->utask->autask;
342 if (auprobe->fixups & UPROBE_FIX_RIP_AX)
343 regs->ax = autask->saved_scratch_register;
344 else
345 regs->cx = autask->saved_scratch_register;
346 417
347 /* 418 utask->autask.saved_scratch_register = *sr;
348 * The original instruction includes a displacement, and so 419 *sr = utask->vaddr + auprobe->defparam.ilen;
349 * is 4 bytes longer than what we've just single-stepped.
350 * Caller may need to apply other fixups to handle stuff
351 * like "jmpq *...(%rip)" and "callq *...(%rip)".
352 */
353 if (correction)
354 *correction += 4;
355 } 420 }
356} 421}
357 422
358static int validate_insn_64bits(struct arch_uprobe *auprobe, struct insn *insn) 423static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
359{ 424{
360 insn_init(insn, auprobe->insn, true); 425 if (auprobe->defparam.fixups & UPROBE_FIX_RIP_MASK) {
361 426 struct uprobe_task *utask = current->utask;
362 /* Skip good instruction prefixes; reject "bad" ones. */ 427 unsigned long *sr = scratch_reg(auprobe, regs);
363 insn_get_opcode(insn);
364 if (is_prefix_bad(insn))
365 return -ENOTSUPP;
366 428
367 if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_64)) 429 *sr = utask->autask.saved_scratch_register;
368 return 0;
369
370 if (insn->opcode.nbytes == 2) {
371 if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns))
372 return 0;
373 } 430 }
374 return -ENOTSUPP;
375} 431}
376 432#else /* 32-bit: */
377static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) 433static inline bool is_64bit_mm(struct mm_struct *mm)
378{ 434{
379 if (mm->context.ia32_compat) 435 return false;
380 return validate_insn_32bits(auprobe, insn);
381 return validate_insn_64bits(auprobe, insn);
382} 436}
383#else /* 32-bit: */
384/* 437/*
385 * No RIP-relative addressing on 32-bit 438 * No RIP-relative addressing on 32-bit
386 */ 439 */
387static void handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn) 440static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
388{ 441{
389} 442}
390static void pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, 443static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
391 struct arch_uprobe_task *autask)
392{ 444{
393} 445}
394static void handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, 446static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
395 long *correction)
396{ 447{
397} 448}
398
399static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
400{
401 return validate_insn_32bits(auprobe, insn);
402}
403#endif /* CONFIG_X86_64 */ 449#endif /* CONFIG_X86_64 */
404 450
405struct uprobe_xol_ops { 451struct uprobe_xol_ops {
406 bool (*emulate)(struct arch_uprobe *, struct pt_regs *); 452 bool (*emulate)(struct arch_uprobe *, struct pt_regs *);
407 int (*pre_xol)(struct arch_uprobe *, struct pt_regs *); 453 int (*pre_xol)(struct arch_uprobe *, struct pt_regs *);
408 int (*post_xol)(struct arch_uprobe *, struct pt_regs *); 454 int (*post_xol)(struct arch_uprobe *, struct pt_regs *);
455 void (*abort)(struct arch_uprobe *, struct pt_regs *);
409}; 456};
410 457
411static inline int sizeof_long(void) 458static inline int sizeof_long(void)
@@ -415,50 +462,67 @@ static inline int sizeof_long(void)
415 462
416static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) 463static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
417{ 464{
418 pre_xol_rip_insn(auprobe, regs, &current->utask->autask); 465 riprel_pre_xol(auprobe, regs);
419 return 0; 466 return 0;
420} 467}
421 468
422/* 469static int push_ret_address(struct pt_regs *regs, unsigned long ip)
423 * Adjust the return address pushed by a call insn executed out of line.
424 */
425static int adjust_ret_addr(unsigned long sp, long correction)
426{ 470{
427 int rasize = sizeof_long(); 471 unsigned long new_sp = regs->sp - sizeof_long();
428 long ra;
429
430 if (copy_from_user(&ra, (void __user *)sp, rasize))
431 return -EFAULT;
432 472
433 ra += correction; 473 if (copy_to_user((void __user *)new_sp, &ip, sizeof_long()))
434 if (copy_to_user((void __user *)sp, &ra, rasize))
435 return -EFAULT; 474 return -EFAULT;
436 475
476 regs->sp = new_sp;
437 return 0; 477 return 0;
438} 478}
439 479
480/*
481 * We have to fix things up as follows:
482 *
483 * Typically, the new ip is relative to the copied instruction. We need
484 * to make it relative to the original instruction (FIX_IP). Exceptions
485 * are return instructions and absolute or indirect jump or call instructions.
486 *
487 * If the single-stepped instruction was a call, the return address that
488 * is atop the stack is the address following the copied instruction. We
489 * need to make it the address following the original instruction (FIX_CALL).
490 *
491 * If the original instruction was a rip-relative instruction such as
492 * "movl %edx,0xnnnn(%rip)", we have instead executed an equivalent
493 * instruction using a scratch register -- e.g., "movl %edx,0xnnnn(%rsi)".
494 * We need to restore the contents of the scratch register
495 * (FIX_RIP_reg).
496 */
440static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) 497static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
441{ 498{
442 struct uprobe_task *utask = current->utask; 499 struct uprobe_task *utask = current->utask;
443 long correction = (long)(utask->vaddr - utask->xol_vaddr);
444 500
445 handle_riprel_post_xol(auprobe, regs, &correction); 501 riprel_post_xol(auprobe, regs);
446 if (auprobe->fixups & UPROBE_FIX_IP) 502 if (auprobe->defparam.fixups & UPROBE_FIX_IP) {
503 long correction = utask->vaddr - utask->xol_vaddr;
447 regs->ip += correction; 504 regs->ip += correction;
448 505 } else if (auprobe->defparam.fixups & UPROBE_FIX_CALL) {
449 if (auprobe->fixups & UPROBE_FIX_CALL) { 506 regs->sp += sizeof_long(); /* Pop incorrect return address */
450 if (adjust_ret_addr(regs->sp, correction)) { 507 if (push_ret_address(regs, utask->vaddr + auprobe->defparam.ilen))
451 regs->sp += sizeof_long();
452 return -ERESTART; 508 return -ERESTART;
453 }
454 } 509 }
510 /* popf; tell the caller to not touch TF */
511 if (auprobe->defparam.fixups & UPROBE_FIX_SETF)
512 utask->autask.saved_tf = true;
455 513
456 return 0; 514 return 0;
457} 515}
458 516
517static void default_abort_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
518{
519 riprel_post_xol(auprobe, regs);
520}
521
459static struct uprobe_xol_ops default_xol_ops = { 522static struct uprobe_xol_ops default_xol_ops = {
460 .pre_xol = default_pre_xol_op, 523 .pre_xol = default_pre_xol_op,
461 .post_xol = default_post_xol_op, 524 .post_xol = default_post_xol_op,
525 .abort = default_abort_op,
462}; 526};
463 527
464static bool branch_is_call(struct arch_uprobe *auprobe) 528static bool branch_is_call(struct arch_uprobe *auprobe)
@@ -520,7 +584,6 @@ static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
520 unsigned long offs = (long)auprobe->branch.offs; 584 unsigned long offs = (long)auprobe->branch.offs;
521 585
522 if (branch_is_call(auprobe)) { 586 if (branch_is_call(auprobe)) {
523 unsigned long new_sp = regs->sp - sizeof_long();
524 /* 587 /*
525 * If it fails we execute this (mangled, see the comment in 588 * If it fails we execute this (mangled, see the comment in
526 * branch_clear_offset) insn out-of-line. In the likely case 589 * branch_clear_offset) insn out-of-line. In the likely case
@@ -530,9 +593,8 @@ static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
530 * 593 *
531 * But there is corner case, see the comment in ->post_xol(). 594 * But there is corner case, see the comment in ->post_xol().
532 */ 595 */
533 if (copy_to_user((void __user *)new_sp, &new_ip, sizeof_long())) 596 if (push_ret_address(regs, new_ip))
534 return false; 597 return false;
535 regs->sp = new_sp;
536 } else if (!check_jmp_cond(auprobe, regs)) { 598 } else if (!check_jmp_cond(auprobe, regs)) {
537 offs = 0; 599 offs = 0;
538 } 600 }
@@ -583,11 +645,7 @@ static struct uprobe_xol_ops branch_xol_ops = {
583static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) 645static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
584{ 646{
585 u8 opc1 = OPCODE1(insn); 647 u8 opc1 = OPCODE1(insn);
586 648 int i;
587 /* has the side-effect of processing the entire instruction */
588 insn_get_length(insn);
589 if (WARN_ON_ONCE(!insn_complete(insn)))
590 return -ENOEXEC;
591 649
592 switch (opc1) { 650 switch (opc1) {
593 case 0xeb: /* jmp 8 */ 651 case 0xeb: /* jmp 8 */
@@ -612,6 +670,16 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
612 return -ENOSYS; 670 return -ENOSYS;
613 } 671 }
614 672
673 /*
674 * 16-bit overrides such as CALLW (66 e8 nn nn) are not supported.
675 * Intel and AMD behavior differ in 64-bit mode: Intel ignores 66 prefix.
676 * No one uses these insns, reject any branch insns with such prefix.
677 */
678 for (i = 0; i < insn->prefixes.nbytes; i++) {
679 if (insn->prefixes.bytes[i] == 0x66)
680 return -ENOTSUPP;
681 }
682
615 auprobe->branch.opc1 = opc1; 683 auprobe->branch.opc1 = opc1;
616 auprobe->branch.ilen = insn->length; 684 auprobe->branch.ilen = insn->length;
617 auprobe->branch.offs = insn->immediate.value; 685 auprobe->branch.offs = insn->immediate.value;
@@ -630,10 +698,10 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
630int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr) 698int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr)
631{ 699{
632 struct insn insn; 700 struct insn insn;
633 bool fix_ip = true, fix_call = false; 701 u8 fix_ip_or_call = UPROBE_FIX_IP;
634 int ret; 702 int ret;
635 703
636 ret = validate_insn_bits(auprobe, mm, &insn); 704 ret = uprobe_init_insn(auprobe, &insn, is_64bit_mm(mm));
637 if (ret) 705 if (ret)
638 return ret; 706 return ret;
639 707
@@ -642,44 +710,39 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
642 return ret; 710 return ret;
643 711
644 /* 712 /*
645 * Figure out which fixups arch_uprobe_post_xol() will need to perform, 713 * Figure out which fixups default_post_xol_op() will need to perform,
646 * and annotate arch_uprobe->fixups accordingly. To start with, ->fixups 714 * and annotate defparam->fixups accordingly.
647 * is either zero or it reflects rip-related fixups.
648 */ 715 */
649 switch (OPCODE1(&insn)) { 716 switch (OPCODE1(&insn)) {
650 case 0x9d: /* popf */ 717 case 0x9d: /* popf */
651 auprobe->fixups |= UPROBE_FIX_SETF; 718 auprobe->defparam.fixups |= UPROBE_FIX_SETF;
652 break; 719 break;
653 case 0xc3: /* ret or lret -- ip is correct */ 720 case 0xc3: /* ret or lret -- ip is correct */
654 case 0xcb: 721 case 0xcb:
655 case 0xc2: 722 case 0xc2:
656 case 0xca: 723 case 0xca:
657 fix_ip = false; 724 case 0xea: /* jmp absolute -- ip is correct */
725 fix_ip_or_call = 0;
658 break; 726 break;
659 case 0x9a: /* call absolute - Fix return addr, not ip */ 727 case 0x9a: /* call absolute - Fix return addr, not ip */
660 fix_call = true; 728 fix_ip_or_call = UPROBE_FIX_CALL;
661 fix_ip = false;
662 break;
663 case 0xea: /* jmp absolute -- ip is correct */
664 fix_ip = false;
665 break; 729 break;
666 case 0xff: 730 case 0xff:
667 insn_get_modrm(&insn);
668 switch (MODRM_REG(&insn)) { 731 switch (MODRM_REG(&insn)) {
669 case 2: case 3: /* call or lcall, indirect */ 732 case 2: case 3: /* call or lcall, indirect */
670 fix_call = true; 733 fix_ip_or_call = UPROBE_FIX_CALL;
734 break;
671 case 4: case 5: /* jmp or ljmp, indirect */ 735 case 4: case 5: /* jmp or ljmp, indirect */
672 fix_ip = false; 736 fix_ip_or_call = 0;
737 break;
673 } 738 }
674 /* fall through */ 739 /* fall through */
675 default: 740 default:
676 handle_riprel_insn(auprobe, &insn); 741 riprel_analyze(auprobe, &insn);
677 } 742 }
678 743
679 if (fix_ip) 744 auprobe->defparam.ilen = insn.length;
680 auprobe->fixups |= UPROBE_FIX_IP; 745 auprobe->defparam.fixups |= fix_ip_or_call;
681 if (fix_call)
682 auprobe->fixups |= UPROBE_FIX_CALL;
683 746
684 auprobe->ops = &default_xol_ops; 747 auprobe->ops = &default_xol_ops;
685 return 0; 748 return 0;
@@ -694,6 +757,12 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
694{ 757{
695 struct uprobe_task *utask = current->utask; 758 struct uprobe_task *utask = current->utask;
696 759
760 if (auprobe->ops->pre_xol) {
761 int err = auprobe->ops->pre_xol(auprobe, regs);
762 if (err)
763 return err;
764 }
765
697 regs->ip = utask->xol_vaddr; 766 regs->ip = utask->xol_vaddr;
698 utask->autask.saved_trap_nr = current->thread.trap_nr; 767 utask->autask.saved_trap_nr = current->thread.trap_nr;
699 current->thread.trap_nr = UPROBE_TRAP_NR; 768 current->thread.trap_nr = UPROBE_TRAP_NR;
@@ -703,8 +772,6 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
703 if (test_tsk_thread_flag(current, TIF_BLOCKSTEP)) 772 if (test_tsk_thread_flag(current, TIF_BLOCKSTEP))
704 set_task_blockstep(current, false); 773 set_task_blockstep(current, false);
705 774
706 if (auprobe->ops->pre_xol)
707 return auprobe->ops->pre_xol(auprobe, regs);
708 return 0; 775 return 0;
709} 776}
710 777
@@ -732,56 +799,42 @@ bool arch_uprobe_xol_was_trapped(struct task_struct *t)
732 * single-step, we single-stepped a copy of the instruction. 799 * single-step, we single-stepped a copy of the instruction.
733 * 800 *
734 * This function prepares to resume execution after the single-step. 801 * This function prepares to resume execution after the single-step.
735 * We have to fix things up as follows:
736 *
737 * Typically, the new ip is relative to the copied instruction. We need
738 * to make it relative to the original instruction (FIX_IP). Exceptions
739 * are return instructions and absolute or indirect jump or call instructions.
740 *
741 * If the single-stepped instruction was a call, the return address that
742 * is atop the stack is the address following the copied instruction. We
743 * need to make it the address following the original instruction (FIX_CALL).
744 *
745 * If the original instruction was a rip-relative instruction such as
746 * "movl %edx,0xnnnn(%rip)", we have instead executed an equivalent
747 * instruction using a scratch register -- e.g., "movl %edx,(%rax)".
748 * We need to restore the contents of the scratch register and adjust
749 * the ip, keeping in mind that the instruction we executed is 4 bytes
750 * shorter than the original instruction (since we squeezed out the offset
751 * field). (FIX_RIP_AX or FIX_RIP_CX)
752 */ 802 */
753int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) 803int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
754{ 804{
755 struct uprobe_task *utask = current->utask; 805 struct uprobe_task *utask = current->utask;
806 bool send_sigtrap = utask->autask.saved_tf;
807 int err = 0;
756 808
757 WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR); 809 WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR);
810 current->thread.trap_nr = utask->autask.saved_trap_nr;
758 811
759 if (auprobe->ops->post_xol) { 812 if (auprobe->ops->post_xol) {
760 int err = auprobe->ops->post_xol(auprobe, regs); 813 err = auprobe->ops->post_xol(auprobe, regs);
761 if (err) { 814 if (err) {
762 arch_uprobe_abort_xol(auprobe, regs);
763 /* 815 /*
764 * Restart the probed insn. ->post_xol() must ensure 816 * Restore ->ip for restart or post mortem analysis.
765 * this is really possible if it returns -ERESTART. 817 * ->post_xol() must not return -ERESTART unless this
818 * is really possible.
766 */ 819 */
820 regs->ip = utask->vaddr;
767 if (err == -ERESTART) 821 if (err == -ERESTART)
768 return 0; 822 err = 0;
769 return err; 823 send_sigtrap = false;
770 } 824 }
771 } 825 }
772
773 current->thread.trap_nr = utask->autask.saved_trap_nr;
774 /* 826 /*
775 * arch_uprobe_pre_xol() doesn't save the state of TIF_BLOCKSTEP 827 * arch_uprobe_pre_xol() doesn't save the state of TIF_BLOCKSTEP
776 * so we can get an extra SIGTRAP if we do not clear TF. We need 828 * so we can get an extra SIGTRAP if we do not clear TF. We need
777 * to examine the opcode to make it right. 829 * to examine the opcode to make it right.
778 */ 830 */
779 if (utask->autask.saved_tf) 831 if (send_sigtrap)
780 send_sig(SIGTRAP, current, 0); 832 send_sig(SIGTRAP, current, 0);
781 else if (!(auprobe->fixups & UPROBE_FIX_SETF)) 833
834 if (!utask->autask.saved_tf)
782 regs->flags &= ~X86_EFLAGS_TF; 835 regs->flags &= ~X86_EFLAGS_TF;
783 836
784 return 0; 837 return err;
785} 838}
786 839
787/* callback routine for handling exceptions. */ 840/* callback routine for handling exceptions. */
@@ -815,18 +868,18 @@ int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val,
815 868
816/* 869/*
817 * This function gets called when XOL instruction either gets trapped or 870 * This function gets called when XOL instruction either gets trapped or
818 * the thread has a fatal signal, or if arch_uprobe_post_xol() failed. 871 * the thread has a fatal signal. Reset the instruction pointer to its
819 * Reset the instruction pointer to its probed address for the potential 872 * probed address for the potential restart or for post mortem analysis.
820 * restart or for post mortem analysis.
821 */ 873 */
822void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) 874void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
823{ 875{
824 struct uprobe_task *utask = current->utask; 876 struct uprobe_task *utask = current->utask;
825 877
826 current->thread.trap_nr = utask->autask.saved_trap_nr; 878 if (auprobe->ops->abort)
827 handle_riprel_post_xol(auprobe, regs, NULL); 879 auprobe->ops->abort(auprobe, regs);
828 instruction_pointer_set(regs, utask->vaddr);
829 880
881 current->thread.trap_nr = utask->autask.saved_trap_nr;
882 regs->ip = utask->vaddr;
830 /* clear TF if it was set by us in arch_uprobe_pre_xol() */ 883 /* clear TF if it was set by us in arch_uprobe_pre_xol() */
831 if (!utask->autask.saved_tf) 884 if (!utask->autask.saved_tf)
832 regs->flags &= ~X86_EFLAGS_TF; 885 regs->flags &= ~X86_EFLAGS_TF;
diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S
index 2930ae05d773..28f85c916712 100644
--- a/arch/x86/lib/thunk_32.S
+++ b/arch/x86/lib/thunk_32.S
@@ -4,8 +4,8 @@
4 * (inspired by Andi Kleen's thunk_64.S) 4 * (inspired by Andi Kleen's thunk_64.S)
5 * Subject to the GNU public license, v.2. No warranty of any kind. 5 * Subject to the GNU public license, v.2. No warranty of any kind.
6 */ 6 */
7
8 #include <linux/linkage.h> 7 #include <linux/linkage.h>
8 #include <asm/asm.h>
9 9
10#ifdef CONFIG_TRACE_IRQFLAGS 10#ifdef CONFIG_TRACE_IRQFLAGS
11 /* put return address in eax (arg1) */ 11 /* put return address in eax (arg1) */
@@ -22,6 +22,7 @@
22 popl %ecx 22 popl %ecx
23 popl %eax 23 popl %eax
24 ret 24 ret
25 _ASM_NOKPROBE(\name)
25 .endm 26 .endm
26 27
27 thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller 28 thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
index a63efd6bb6a5..92d9feaff42b 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/lib/thunk_64.S
@@ -8,6 +8,7 @@
8#include <linux/linkage.h> 8#include <linux/linkage.h>
9#include <asm/dwarf2.h> 9#include <asm/dwarf2.h>
10#include <asm/calling.h> 10#include <asm/calling.h>
11#include <asm/asm.h>
11 12
12 /* rdi: arg1 ... normal C conventions. rax is saved/restored. */ 13 /* rdi: arg1 ... normal C conventions. rax is saved/restored. */
13 .macro THUNK name, func, put_ret_addr_in_rdi=0 14 .macro THUNK name, func, put_ret_addr_in_rdi=0
@@ -25,6 +26,7 @@
25 call \func 26 call \func
26 jmp restore 27 jmp restore
27 CFI_ENDPROC 28 CFI_ENDPROC
29 _ASM_NOKPROBE(\name)
28 .endm 30 .endm
29 31
30#ifdef CONFIG_TRACE_IRQFLAGS 32#ifdef CONFIG_TRACE_IRQFLAGS
@@ -43,3 +45,4 @@ restore:
43 RESTORE_ARGS 45 RESTORE_ARGS
44 ret 46 ret
45 CFI_ENDPROC 47 CFI_ENDPROC
48 _ASM_NOKPROBE(restore)
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 858b47b5221b..36642793e315 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -8,7 +8,7 @@
8#include <linux/kdebug.h> /* oops_begin/end, ... */ 8#include <linux/kdebug.h> /* oops_begin/end, ... */
9#include <linux/module.h> /* search_exception_table */ 9#include <linux/module.h> /* search_exception_table */
10#include <linux/bootmem.h> /* max_low_pfn */ 10#include <linux/bootmem.h> /* max_low_pfn */
11#include <linux/kprobes.h> /* __kprobes, ... */ 11#include <linux/kprobes.h> /* NOKPROBE_SYMBOL, ... */
12#include <linux/mmiotrace.h> /* kmmio_handler, ... */ 12#include <linux/mmiotrace.h> /* kmmio_handler, ... */
13#include <linux/perf_event.h> /* perf_sw_event */ 13#include <linux/perf_event.h> /* perf_sw_event */
14#include <linux/hugetlb.h> /* hstate_index_to_shift */ 14#include <linux/hugetlb.h> /* hstate_index_to_shift */
@@ -46,7 +46,7 @@ enum x86_pf_error_code {
46 * Returns 0 if mmiotrace is disabled, or if the fault is not 46 * Returns 0 if mmiotrace is disabled, or if the fault is not
47 * handled by mmiotrace: 47 * handled by mmiotrace:
48 */ 48 */
49static inline int __kprobes 49static nokprobe_inline int
50kmmio_fault(struct pt_regs *regs, unsigned long addr) 50kmmio_fault(struct pt_regs *regs, unsigned long addr)
51{ 51{
52 if (unlikely(is_kmmio_active())) 52 if (unlikely(is_kmmio_active()))
@@ -55,7 +55,7 @@ kmmio_fault(struct pt_regs *regs, unsigned long addr)
55 return 0; 55 return 0;
56} 56}
57 57
58static inline int __kprobes kprobes_fault(struct pt_regs *regs) 58static nokprobe_inline int kprobes_fault(struct pt_regs *regs)
59{ 59{
60 int ret = 0; 60 int ret = 0;
61 61
@@ -262,7 +262,7 @@ void vmalloc_sync_all(void)
262 * 262 *
263 * Handle a fault on the vmalloc or module mapping area 263 * Handle a fault on the vmalloc or module mapping area
264 */ 264 */
265static noinline __kprobes int vmalloc_fault(unsigned long address) 265static noinline int vmalloc_fault(unsigned long address)
266{ 266{
267 unsigned long pgd_paddr; 267 unsigned long pgd_paddr;
268 pmd_t *pmd_k; 268 pmd_t *pmd_k;
@@ -292,6 +292,7 @@ static noinline __kprobes int vmalloc_fault(unsigned long address)
292 292
293 return 0; 293 return 0;
294} 294}
295NOKPROBE_SYMBOL(vmalloc_fault);
295 296
296/* 297/*
297 * Did it hit the DOS screen memory VA from vm86 mode? 298 * Did it hit the DOS screen memory VA from vm86 mode?
@@ -359,7 +360,7 @@ void vmalloc_sync_all(void)
359 * 360 *
360 * This assumes no large pages in there. 361 * This assumes no large pages in there.
361 */ 362 */
362static noinline __kprobes int vmalloc_fault(unsigned long address) 363static noinline int vmalloc_fault(unsigned long address)
363{ 364{
364 pgd_t *pgd, *pgd_ref; 365 pgd_t *pgd, *pgd_ref;
365 pud_t *pud, *pud_ref; 366 pud_t *pud, *pud_ref;
@@ -426,6 +427,7 @@ static noinline __kprobes int vmalloc_fault(unsigned long address)
426 427
427 return 0; 428 return 0;
428} 429}
430NOKPROBE_SYMBOL(vmalloc_fault);
429 431
430#ifdef CONFIG_CPU_SUP_AMD 432#ifdef CONFIG_CPU_SUP_AMD
431static const char errata93_warning[] = 433static const char errata93_warning[] =
@@ -928,7 +930,7 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte)
928 * There are no security implications to leaving a stale TLB when 930 * There are no security implications to leaving a stale TLB when
929 * increasing the permissions on a page. 931 * increasing the permissions on a page.
930 */ 932 */
931static noinline __kprobes int 933static noinline int
932spurious_fault(unsigned long error_code, unsigned long address) 934spurious_fault(unsigned long error_code, unsigned long address)
933{ 935{
934 pgd_t *pgd; 936 pgd_t *pgd;
@@ -976,6 +978,7 @@ spurious_fault(unsigned long error_code, unsigned long address)
976 978
977 return ret; 979 return ret;
978} 980}
981NOKPROBE_SYMBOL(spurious_fault);
979 982
980int show_unhandled_signals = 1; 983int show_unhandled_signals = 1;
981 984
@@ -1031,7 +1034,7 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs)
1031 * {,trace_}do_page_fault() have notrace on. Having this an actual function 1034 * {,trace_}do_page_fault() have notrace on. Having this an actual function
1032 * guarantees there's a function trace entry. 1035 * guarantees there's a function trace entry.
1033 */ 1036 */
1034static void __kprobes noinline 1037static noinline void
1035__do_page_fault(struct pt_regs *regs, unsigned long error_code, 1038__do_page_fault(struct pt_regs *regs, unsigned long error_code,
1036 unsigned long address) 1039 unsigned long address)
1037{ 1040{
@@ -1254,8 +1257,9 @@ good_area:
1254 1257
1255 up_read(&mm->mmap_sem); 1258 up_read(&mm->mmap_sem);
1256} 1259}
1260NOKPROBE_SYMBOL(__do_page_fault);
1257 1261
1258dotraplinkage void __kprobes notrace 1262dotraplinkage void notrace
1259do_page_fault(struct pt_regs *regs, unsigned long error_code) 1263do_page_fault(struct pt_regs *regs, unsigned long error_code)
1260{ 1264{
1261 unsigned long address = read_cr2(); /* Get the faulting address */ 1265 unsigned long address = read_cr2(); /* Get the faulting address */
@@ -1273,10 +1277,12 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
1273 __do_page_fault(regs, error_code, address); 1277 __do_page_fault(regs, error_code, address);
1274 exception_exit(prev_state); 1278 exception_exit(prev_state);
1275} 1279}
1280NOKPROBE_SYMBOL(do_page_fault);
1276 1281
1277#ifdef CONFIG_TRACING 1282#ifdef CONFIG_TRACING
1278static void trace_page_fault_entries(unsigned long address, struct pt_regs *regs, 1283static nokprobe_inline void
1279 unsigned long error_code) 1284trace_page_fault_entries(unsigned long address, struct pt_regs *regs,
1285 unsigned long error_code)
1280{ 1286{
1281 if (user_mode(regs)) 1287 if (user_mode(regs))
1282 trace_page_fault_user(address, regs, error_code); 1288 trace_page_fault_user(address, regs, error_code);
@@ -1284,7 +1290,7 @@ static void trace_page_fault_entries(unsigned long address, struct pt_regs *regs
1284 trace_page_fault_kernel(address, regs, error_code); 1290 trace_page_fault_kernel(address, regs, error_code);
1285} 1291}
1286 1292
1287dotraplinkage void __kprobes notrace 1293dotraplinkage void notrace
1288trace_do_page_fault(struct pt_regs *regs, unsigned long error_code) 1294trace_do_page_fault(struct pt_regs *regs, unsigned long error_code)
1289{ 1295{
1290 /* 1296 /*
@@ -1301,4 +1307,5 @@ trace_do_page_fault(struct pt_regs *regs, unsigned long error_code)
1301 __do_page_fault(regs, error_code, address); 1307 __do_page_fault(regs, error_code, address);
1302 exception_exit(prev_state); 1308 exception_exit(prev_state);
1303} 1309}
1310NOKPROBE_SYMBOL(trace_do_page_fault);
1304#endif /* CONFIG_TRACING */ 1311#endif /* CONFIG_TRACING */
diff --git a/fs/exec.c b/fs/exec.c
index 238b7aa26f68..a3d33fe592d6 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1046,13 +1046,13 @@ EXPORT_SYMBOL_GPL(get_task_comm);
1046 * so that a new one can be started 1046 * so that a new one can be started
1047 */ 1047 */
1048 1048
1049void set_task_comm(struct task_struct *tsk, const char *buf) 1049void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec)
1050{ 1050{
1051 task_lock(tsk); 1051 task_lock(tsk);
1052 trace_task_rename(tsk, buf); 1052 trace_task_rename(tsk, buf);
1053 strlcpy(tsk->comm, buf, sizeof(tsk->comm)); 1053 strlcpy(tsk->comm, buf, sizeof(tsk->comm));
1054 task_unlock(tsk); 1054 task_unlock(tsk);
1055 perf_event_comm(tsk); 1055 perf_event_comm(tsk, exec);
1056} 1056}
1057 1057
1058int flush_old_exec(struct linux_binprm * bprm) 1058int flush_old_exec(struct linux_binprm * bprm)
@@ -1110,7 +1110,8 @@ void setup_new_exec(struct linux_binprm * bprm)
1110 else 1110 else
1111 set_dumpable(current->mm, suid_dumpable); 1111 set_dumpable(current->mm, suid_dumpable);
1112 1112
1113 set_task_comm(current, kbasename(bprm->filename)); 1113 perf_event_exec();
1114 __set_task_comm(current, kbasename(bprm->filename), true);
1114 1115
1115 /* Set the new mm task size. We have to do that late because it may 1116 /* Set the new mm task size. We have to do that late because it may
1116 * depend on TIF_32BIT which is only updated in flush_thread() on 1117 * depend on TIF_32BIT which is only updated in flush_thread() on
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index d647637cd699..471ba48c7ae4 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -109,6 +109,15 @@
109#define BRANCH_PROFILE() 109#define BRANCH_PROFILE()
110#endif 110#endif
111 111
112#ifdef CONFIG_KPROBES
113#define KPROBE_BLACKLIST() . = ALIGN(8); \
114 VMLINUX_SYMBOL(__start_kprobe_blacklist) = .; \
115 *(_kprobe_blacklist) \
116 VMLINUX_SYMBOL(__stop_kprobe_blacklist) = .;
117#else
118#define KPROBE_BLACKLIST()
119#endif
120
112#ifdef CONFIG_EVENT_TRACING 121#ifdef CONFIG_EVENT_TRACING
113#define FTRACE_EVENTS() . = ALIGN(8); \ 122#define FTRACE_EVENTS() . = ALIGN(8); \
114 VMLINUX_SYMBOL(__start_ftrace_events) = .; \ 123 VMLINUX_SYMBOL(__start_ftrace_events) = .; \
@@ -478,6 +487,7 @@
478 *(.init.rodata) \ 487 *(.init.rodata) \
479 FTRACE_EVENTS() \ 488 FTRACE_EVENTS() \
480 TRACE_SYSCALLS() \ 489 TRACE_SYSCALLS() \
490 KPROBE_BLACKLIST() \
481 MEM_DISCARD(init.rodata) \ 491 MEM_DISCARD(init.rodata) \
482 CLK_OF_TABLES() \ 492 CLK_OF_TABLES() \
483 RESERVEDMEM_OF_TABLES() \ 493 RESERVEDMEM_OF_TABLES() \
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 64fdfe1cfcf0..d5ad7b1118fc 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -383,7 +383,9 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
383/* Ignore/forbid kprobes attach on very low level functions marked by this attribute: */ 383/* Ignore/forbid kprobes attach on very low level functions marked by this attribute: */
384#ifdef CONFIG_KPROBES 384#ifdef CONFIG_KPROBES
385# define __kprobes __attribute__((__section__(".kprobes.text"))) 385# define __kprobes __attribute__((__section__(".kprobes.text")))
386# define nokprobe_inline __always_inline
386#else 387#else
387# define __kprobes 388# define __kprobes
389# define nokprobe_inline inline
388#endif 390#endif
389#endif /* __LINUX_COMPILER_H */ 391#endif /* __LINUX_COMPILER_H */
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 7bd2ad01e39c..f7296e57d614 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -205,10 +205,10 @@ struct kretprobe_blackpoint {
205 void *addr; 205 void *addr;
206}; 206};
207 207
208struct kprobe_blackpoint { 208struct kprobe_blacklist_entry {
209 const char *name; 209 struct list_head list;
210 unsigned long start_addr; 210 unsigned long start_addr;
211 unsigned long range; 211 unsigned long end_addr;
212}; 212};
213 213
214#ifdef CONFIG_KPROBES 214#ifdef CONFIG_KPROBES
@@ -265,6 +265,7 @@ extern void arch_disarm_kprobe(struct kprobe *p);
265extern int arch_init_kprobes(void); 265extern int arch_init_kprobes(void);
266extern void show_registers(struct pt_regs *regs); 266extern void show_registers(struct pt_regs *regs);
267extern void kprobes_inc_nmissed_count(struct kprobe *p); 267extern void kprobes_inc_nmissed_count(struct kprobe *p);
268extern bool arch_within_kprobe_blacklist(unsigned long addr);
268 269
269struct kprobe_insn_cache { 270struct kprobe_insn_cache {
270 struct mutex mutex; 271 struct mutex mutex;
@@ -476,4 +477,18 @@ static inline int enable_jprobe(struct jprobe *jp)
476 return enable_kprobe(&jp->kp); 477 return enable_kprobe(&jp->kp);
477} 478}
478 479
480#ifdef CONFIG_KPROBES
481/*
482 * Blacklist ganerating macro. Specify functions which is not probed
483 * by using this macro.
484 */
485#define __NOKPROBE_SYMBOL(fname) \
486static unsigned long __used \
487 __attribute__((section("_kprobe_blacklist"))) \
488 _kbl_addr_##fname = (unsigned long)fname;
489#define NOKPROBE_SYMBOL(fname) __NOKPROBE_SYMBOL(fname)
490#else
491#define NOKPROBE_SYMBOL(fname)
492#endif
493
479#endif /* _LINUX_KPROBES_H */ 494#endif /* _LINUX_KPROBES_H */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index a9209118d80f..707617a8c0f6 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -167,6 +167,11 @@ struct perf_event;
167#define PERF_EVENT_TXN 0x1 167#define PERF_EVENT_TXN 0x1
168 168
169/** 169/**
170 * pmu::capabilities flags
171 */
172#define PERF_PMU_CAP_NO_INTERRUPT 0x01
173
174/**
170 * struct pmu - generic performance monitoring unit 175 * struct pmu - generic performance monitoring unit
171 */ 176 */
172struct pmu { 177struct pmu {
@@ -178,6 +183,11 @@ struct pmu {
178 const char *name; 183 const char *name;
179 int type; 184 int type;
180 185
186 /*
187 * various common per-pmu feature flags
188 */
189 int capabilities;
190
181 int * __percpu pmu_disable_count; 191 int * __percpu pmu_disable_count;
182 struct perf_cpu_context * __percpu pmu_cpu_context; 192 struct perf_cpu_context * __percpu pmu_cpu_context;
183 int task_ctx_nr; 193 int task_ctx_nr;
@@ -696,7 +706,8 @@ extern struct perf_guest_info_callbacks *perf_guest_cbs;
696extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); 706extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
697extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); 707extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
698 708
699extern void perf_event_comm(struct task_struct *tsk); 709extern void perf_event_exec(void);
710extern void perf_event_comm(struct task_struct *tsk, bool exec);
700extern void perf_event_fork(struct task_struct *tsk); 711extern void perf_event_fork(struct task_struct *tsk);
701 712
702/* Callchains */ 713/* Callchains */
@@ -773,7 +784,7 @@ extern void perf_event_enable(struct perf_event *event);
773extern void perf_event_disable(struct perf_event *event); 784extern void perf_event_disable(struct perf_event *event);
774extern int __perf_event_disable(void *info); 785extern int __perf_event_disable(void *info);
775extern void perf_event_task_tick(void); 786extern void perf_event_task_tick(void);
776#else 787#else /* !CONFIG_PERF_EVENTS: */
777static inline void 788static inline void
778perf_event_task_sched_in(struct task_struct *prev, 789perf_event_task_sched_in(struct task_struct *prev,
779 struct task_struct *task) { } 790 struct task_struct *task) { }
@@ -803,7 +814,8 @@ static inline int perf_unregister_guest_info_callbacks
803(struct perf_guest_info_callbacks *callbacks) { return 0; } 814(struct perf_guest_info_callbacks *callbacks) { return 0; }
804 815
805static inline void perf_event_mmap(struct vm_area_struct *vma) { } 816static inline void perf_event_mmap(struct vm_area_struct *vma) { }
806static inline void perf_event_comm(struct task_struct *tsk) { } 817static inline void perf_event_exec(void) { }
818static inline void perf_event_comm(struct task_struct *tsk, bool exec) { }
807static inline void perf_event_fork(struct task_struct *tsk) { } 819static inline void perf_event_fork(struct task_struct *tsk) { }
808static inline void perf_event_init(void) { } 820static inline void perf_event_init(void) { }
809static inline int perf_swevent_get_recursion_context(void) { return -1; } 821static inline int perf_swevent_get_recursion_context(void) { return -1; }
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ea74596014a2..b8a98427f964 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2421,7 +2421,11 @@ extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, i
2421struct task_struct *fork_idle(int); 2421struct task_struct *fork_idle(int);
2422extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); 2422extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
2423 2423
2424extern void set_task_comm(struct task_struct *tsk, const char *from); 2424extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec);
2425static inline void set_task_comm(struct task_struct *tsk, const char *from)
2426{
2427 __set_task_comm(tsk, from, false);
2428}
2425extern char *get_task_comm(char *to, struct task_struct *tsk); 2429extern char *get_task_comm(char *to, struct task_struct *tsk);
2426 2430
2427#ifdef CONFIG_SMP 2431#ifdef CONFIG_SMP
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index c52f827ba6ce..4f844c6b03ee 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -103,6 +103,7 @@ extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, u
103extern bool __weak is_swbp_insn(uprobe_opcode_t *insn); 103extern bool __weak is_swbp_insn(uprobe_opcode_t *insn);
104extern bool __weak is_trap_insn(uprobe_opcode_t *insn); 104extern bool __weak is_trap_insn(uprobe_opcode_t *insn);
105extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs); 105extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs);
106extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs);
106extern int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t); 107extern int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t);
107extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); 108extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
108extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool); 109extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool);
@@ -133,6 +134,9 @@ extern void __weak arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
133#else /* !CONFIG_UPROBES */ 134#else /* !CONFIG_UPROBES */
134struct uprobes_state { 135struct uprobes_state {
135}; 136};
137
138#define uprobe_get_trap_addr(regs) instruction_pointer(regs)
139
136static inline int 140static inline int
137uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) 141uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
138{ 142{
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index e3fc8f09d110..5312fae47218 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -163,8 +163,9 @@ enum perf_branch_sample_type {
163 PERF_SAMPLE_BRANCH_ABORT_TX = 1U << 7, /* transaction aborts */ 163 PERF_SAMPLE_BRANCH_ABORT_TX = 1U << 7, /* transaction aborts */
164 PERF_SAMPLE_BRANCH_IN_TX = 1U << 8, /* in transaction */ 164 PERF_SAMPLE_BRANCH_IN_TX = 1U << 8, /* in transaction */
165 PERF_SAMPLE_BRANCH_NO_TX = 1U << 9, /* not in transaction */ 165 PERF_SAMPLE_BRANCH_NO_TX = 1U << 9, /* not in transaction */
166 PERF_SAMPLE_BRANCH_COND = 1U << 10, /* conditional branches */
166 167
167 PERF_SAMPLE_BRANCH_MAX = 1U << 10, /* non-ABI */ 168 PERF_SAMPLE_BRANCH_MAX = 1U << 11, /* non-ABI */
168}; 169};
169 170
170#define PERF_SAMPLE_BRANCH_PLM_ALL \ 171#define PERF_SAMPLE_BRANCH_PLM_ALL \
@@ -301,8 +302,8 @@ struct perf_event_attr {
301 exclude_callchain_kernel : 1, /* exclude kernel callchains */ 302 exclude_callchain_kernel : 1, /* exclude kernel callchains */
302 exclude_callchain_user : 1, /* exclude user callchains */ 303 exclude_callchain_user : 1, /* exclude user callchains */
303 mmap2 : 1, /* include mmap with inode data */ 304 mmap2 : 1, /* include mmap with inode data */
304 305 comm_exec : 1, /* flag comm events that are due to an exec */
305 __reserved_1 : 40; 306 __reserved_1 : 39;
306 307
307 union { 308 union {
308 __u32 wakeup_events; /* wakeup every n events */ 309 __u32 wakeup_events; /* wakeup every n events */
@@ -501,7 +502,12 @@ struct perf_event_mmap_page {
501#define PERF_RECORD_MISC_GUEST_KERNEL (4 << 0) 502#define PERF_RECORD_MISC_GUEST_KERNEL (4 << 0)
502#define PERF_RECORD_MISC_GUEST_USER (5 << 0) 503#define PERF_RECORD_MISC_GUEST_USER (5 << 0)
503 504
505/*
506 * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on
507 * different events so can reuse the same bit position.
508 */
504#define PERF_RECORD_MISC_MMAP_DATA (1 << 13) 509#define PERF_RECORD_MISC_MMAP_DATA (1 << 13)
510#define PERF_RECORD_MISC_COMM_EXEC (1 << 13)
505/* 511/*
506 * Indicates that the content of PERF_SAMPLE_IP points to 512 * Indicates that the content of PERF_SAMPLE_IP points to
507 * the actual instruction that triggered the event. See also 513 * the actual instruction that triggered the event. See also
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 24d35cc38e42..5fa58e4cffac 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2974,6 +2974,22 @@ out:
2974 local_irq_restore(flags); 2974 local_irq_restore(flags);
2975} 2975}
2976 2976
2977void perf_event_exec(void)
2978{
2979 struct perf_event_context *ctx;
2980 int ctxn;
2981
2982 rcu_read_lock();
2983 for_each_task_context_nr(ctxn) {
2984 ctx = current->perf_event_ctxp[ctxn];
2985 if (!ctx)
2986 continue;
2987
2988 perf_event_enable_on_exec(ctx);
2989 }
2990 rcu_read_unlock();
2991}
2992
2977/* 2993/*
2978 * Cross CPU call to read the hardware event 2994 * Cross CPU call to read the hardware event
2979 */ 2995 */
@@ -5075,21 +5091,9 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
5075 NULL); 5091 NULL);
5076} 5092}
5077 5093
5078void perf_event_comm(struct task_struct *task) 5094void perf_event_comm(struct task_struct *task, bool exec)
5079{ 5095{
5080 struct perf_comm_event comm_event; 5096 struct perf_comm_event comm_event;
5081 struct perf_event_context *ctx;
5082 int ctxn;
5083
5084 rcu_read_lock();
5085 for_each_task_context_nr(ctxn) {
5086 ctx = task->perf_event_ctxp[ctxn];
5087 if (!ctx)
5088 continue;
5089
5090 perf_event_enable_on_exec(ctx);
5091 }
5092 rcu_read_unlock();
5093 5097
5094 if (!atomic_read(&nr_comm_events)) 5098 if (!atomic_read(&nr_comm_events))
5095 return; 5099 return;
@@ -5101,7 +5105,7 @@ void perf_event_comm(struct task_struct *task)
5101 .event_id = { 5105 .event_id = {
5102 .header = { 5106 .header = {
5103 .type = PERF_RECORD_COMM, 5107 .type = PERF_RECORD_COMM,
5104 .misc = 0, 5108 .misc = exec ? PERF_RECORD_MISC_COMM_EXEC : 0,
5105 /* .size */ 5109 /* .size */
5106 }, 5110 },
5107 /* .pid */ 5111 /* .pid */
@@ -7122,6 +7126,13 @@ SYSCALL_DEFINE5(perf_event_open,
7122 } 7126 }
7123 } 7127 }
7124 7128
7129 if (is_sampling_event(event)) {
7130 if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) {
7131 err = -ENOTSUPP;
7132 goto err_alloc;
7133 }
7134 }
7135
7125 account_event(event); 7136 account_event(event);
7126 7137
7127 /* 7138 /*
@@ -7433,7 +7444,7 @@ __perf_event_exit_task(struct perf_event *child_event,
7433 7444
7434static void perf_event_exit_task_context(struct task_struct *child, int ctxn) 7445static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
7435{ 7446{
7436 struct perf_event *child_event; 7447 struct perf_event *child_event, *next;
7437 struct perf_event_context *child_ctx; 7448 struct perf_event_context *child_ctx;
7438 unsigned long flags; 7449 unsigned long flags;
7439 7450
@@ -7487,7 +7498,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
7487 */ 7498 */
7488 mutex_lock(&child_ctx->mutex); 7499 mutex_lock(&child_ctx->mutex);
7489 7500
7490 list_for_each_entry_rcu(child_event, &child_ctx->event_list, event_entry) 7501 list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry)
7491 __perf_event_exit_task(child_event, child_ctx, child); 7502 __perf_event_exit_task(child_event, child_ctx, child);
7492 7503
7493 mutex_unlock(&child_ctx->mutex); 7504 mutex_unlock(&child_ctx->mutex);
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index adcd76a96839..c445e392e93f 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -36,6 +36,7 @@
36#include "../../mm/internal.h" /* munlock_vma_page */ 36#include "../../mm/internal.h" /* munlock_vma_page */
37#include <linux/percpu-rwsem.h> 37#include <linux/percpu-rwsem.h>
38#include <linux/task_work.h> 38#include <linux/task_work.h>
39#include <linux/shmem_fs.h>
39 40
40#include <linux/uprobes.h> 41#include <linux/uprobes.h>
41 42
@@ -127,7 +128,7 @@ struct xol_area {
127 */ 128 */
128static bool valid_vma(struct vm_area_struct *vma, bool is_register) 129static bool valid_vma(struct vm_area_struct *vma, bool is_register)
129{ 130{
130 vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_SHARED; 131 vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_MAYSHARE;
131 132
132 if (is_register) 133 if (is_register)
133 flags |= VM_WRITE; 134 flags |= VM_WRITE;
@@ -279,18 +280,13 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t
279 * supported by that architecture then we need to modify is_trap_at_addr and 280 * supported by that architecture then we need to modify is_trap_at_addr and
280 * uprobe_write_opcode accordingly. This would never be a problem for archs 281 * uprobe_write_opcode accordingly. This would never be a problem for archs
281 * that have fixed length instructions. 282 * that have fixed length instructions.
282 */ 283 *
283
284/*
285 * uprobe_write_opcode - write the opcode at a given virtual address. 284 * uprobe_write_opcode - write the opcode at a given virtual address.
286 * @mm: the probed process address space. 285 * @mm: the probed process address space.
287 * @vaddr: the virtual address to store the opcode. 286 * @vaddr: the virtual address to store the opcode.
288 * @opcode: opcode to be written at @vaddr. 287 * @opcode: opcode to be written at @vaddr.
289 * 288 *
290 * Called with mm->mmap_sem held (for read and with a reference to 289 * Called with mm->mmap_sem held for write.
291 * mm).
292 *
293 * For mm @mm, write the opcode at @vaddr.
294 * Return 0 (success) or a negative errno. 290 * Return 0 (success) or a negative errno.
295 */ 291 */
296int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, 292int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr,
@@ -310,21 +306,25 @@ retry:
310 if (ret <= 0) 306 if (ret <= 0)
311 goto put_old; 307 goto put_old;
312 308
309 ret = anon_vma_prepare(vma);
310 if (ret)
311 goto put_old;
312
313 ret = -ENOMEM; 313 ret = -ENOMEM;
314 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr); 314 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr);
315 if (!new_page) 315 if (!new_page)
316 goto put_old; 316 goto put_old;
317 317
318 __SetPageUptodate(new_page); 318 if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))
319 goto put_new;
319 320
321 __SetPageUptodate(new_page);
320 copy_highpage(new_page, old_page); 322 copy_highpage(new_page, old_page);
321 copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); 323 copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
322 324
323 ret = anon_vma_prepare(vma);
324 if (ret)
325 goto put_new;
326
327 ret = __replace_page(vma, vaddr, old_page, new_page); 325 ret = __replace_page(vma, vaddr, old_page, new_page);
326 if (ret)
327 mem_cgroup_uncharge_page(new_page);
328 328
329put_new: 329put_new:
330 page_cache_release(new_page); 330 page_cache_release(new_page);
@@ -537,14 +537,15 @@ static int __copy_insn(struct address_space *mapping, struct file *filp,
537 void *insn, int nbytes, loff_t offset) 537 void *insn, int nbytes, loff_t offset)
538{ 538{
539 struct page *page; 539 struct page *page;
540
541 if (!mapping->a_ops->readpage)
542 return -EIO;
543 /* 540 /*
544 * Ensure that the page that has the original instruction is 541 * Ensure that the page that has the original instruction is populated
545 * populated and in page-cache. 542 * and in page-cache. If ->readpage == NULL it must be shmem_mapping(),
543 * see uprobe_register().
546 */ 544 */
547 page = read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT, filp); 545 if (mapping->a_ops->readpage)
546 page = read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT, filp);
547 else
548 page = shmem_read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT);
548 if (IS_ERR(page)) 549 if (IS_ERR(page))
549 return PTR_ERR(page); 550 return PTR_ERR(page);
550 551
@@ -880,6 +881,9 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *
880 if (!uc->handler && !uc->ret_handler) 881 if (!uc->handler && !uc->ret_handler)
881 return -EINVAL; 882 return -EINVAL;
882 883
884 /* copy_insn() uses read_mapping_page() or shmem_read_mapping_page() */
885 if (!inode->i_mapping->a_ops->readpage && !shmem_mapping(inode->i_mapping))
886 return -EIO;
883 /* Racy, just to catch the obvious mistakes */ 887 /* Racy, just to catch the obvious mistakes */
884 if (offset > i_size_read(inode)) 888 if (offset > i_size_read(inode))
885 return -EINVAL; 889 return -EINVAL;
@@ -1361,6 +1365,16 @@ unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs)
1361 return instruction_pointer(regs) - UPROBE_SWBP_INSN_SIZE; 1365 return instruction_pointer(regs) - UPROBE_SWBP_INSN_SIZE;
1362} 1366}
1363 1367
1368unsigned long uprobe_get_trap_addr(struct pt_regs *regs)
1369{
1370 struct uprobe_task *utask = current->utask;
1371
1372 if (unlikely(utask && utask->active_uprobe))
1373 return utask->vaddr;
1374
1375 return instruction_pointer(regs);
1376}
1377
1364/* 1378/*
1365 * Called with no locks held. 1379 * Called with no locks held.
1366 * Called in context of a exiting or a exec-ing thread. 1380 * Called in context of a exiting or a exec-ing thread.
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index ceeadfcabb76..3214289df5a7 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -86,21 +86,8 @@ static raw_spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
86 return &(kretprobe_table_locks[hash].lock); 86 return &(kretprobe_table_locks[hash].lock);
87} 87}
88 88
89/* 89/* Blacklist -- list of struct kprobe_blacklist_entry */
90 * Normally, functions that we'd want to prohibit kprobes in, are marked 90static LIST_HEAD(kprobe_blacklist);
91 * __kprobes. But, there are cases where such functions already belong to
92 * a different section (__sched for preempt_schedule)
93 *
94 * For such cases, we now have a blacklist
95 */
96static struct kprobe_blackpoint kprobe_blacklist[] = {
97 {"preempt_schedule",},
98 {"native_get_debugreg",},
99 {"irq_entries_start",},
100 {"common_interrupt",},
101 {"mcount",}, /* mcount can be called from everywhere */
102 {NULL} /* Terminator */
103};
104 91
105#ifdef __ARCH_WANT_KPROBES_INSN_SLOT 92#ifdef __ARCH_WANT_KPROBES_INSN_SLOT
106/* 93/*
@@ -151,13 +138,13 @@ struct kprobe_insn_cache kprobe_insn_slots = {
151 .insn_size = MAX_INSN_SIZE, 138 .insn_size = MAX_INSN_SIZE,
152 .nr_garbage = 0, 139 .nr_garbage = 0,
153}; 140};
154static int __kprobes collect_garbage_slots(struct kprobe_insn_cache *c); 141static int collect_garbage_slots(struct kprobe_insn_cache *c);
155 142
156/** 143/**
157 * __get_insn_slot() - Find a slot on an executable page for an instruction. 144 * __get_insn_slot() - Find a slot on an executable page for an instruction.
158 * We allocate an executable page if there's no room on existing ones. 145 * We allocate an executable page if there's no room on existing ones.
159 */ 146 */
160kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c) 147kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c)
161{ 148{
162 struct kprobe_insn_page *kip; 149 struct kprobe_insn_page *kip;
163 kprobe_opcode_t *slot = NULL; 150 kprobe_opcode_t *slot = NULL;
@@ -214,7 +201,7 @@ out:
214} 201}
215 202
216/* Return 1 if all garbages are collected, otherwise 0. */ 203/* Return 1 if all garbages are collected, otherwise 0. */
217static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx) 204static int collect_one_slot(struct kprobe_insn_page *kip, int idx)
218{ 205{
219 kip->slot_used[idx] = SLOT_CLEAN; 206 kip->slot_used[idx] = SLOT_CLEAN;
220 kip->nused--; 207 kip->nused--;
@@ -235,7 +222,7 @@ static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx)
235 return 0; 222 return 0;
236} 223}
237 224
238static int __kprobes collect_garbage_slots(struct kprobe_insn_cache *c) 225static int collect_garbage_slots(struct kprobe_insn_cache *c)
239{ 226{
240 struct kprobe_insn_page *kip, *next; 227 struct kprobe_insn_page *kip, *next;
241 228
@@ -257,8 +244,8 @@ static int __kprobes collect_garbage_slots(struct kprobe_insn_cache *c)
257 return 0; 244 return 0;
258} 245}
259 246
260void __kprobes __free_insn_slot(struct kprobe_insn_cache *c, 247void __free_insn_slot(struct kprobe_insn_cache *c,
261 kprobe_opcode_t *slot, int dirty) 248 kprobe_opcode_t *slot, int dirty)
262{ 249{
263 struct kprobe_insn_page *kip; 250 struct kprobe_insn_page *kip;
264 251
@@ -314,7 +301,7 @@ static inline void reset_kprobe_instance(void)
314 * OR 301 * OR
315 * - with preemption disabled - from arch/xxx/kernel/kprobes.c 302 * - with preemption disabled - from arch/xxx/kernel/kprobes.c
316 */ 303 */
317struct kprobe __kprobes *get_kprobe(void *addr) 304struct kprobe *get_kprobe(void *addr)
318{ 305{
319 struct hlist_head *head; 306 struct hlist_head *head;
320 struct kprobe *p; 307 struct kprobe *p;
@@ -327,8 +314,9 @@ struct kprobe __kprobes *get_kprobe(void *addr)
327 314
328 return NULL; 315 return NULL;
329} 316}
317NOKPROBE_SYMBOL(get_kprobe);
330 318
331static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs); 319static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs);
332 320
333/* Return true if the kprobe is an aggregator */ 321/* Return true if the kprobe is an aggregator */
334static inline int kprobe_aggrprobe(struct kprobe *p) 322static inline int kprobe_aggrprobe(struct kprobe *p)
@@ -360,7 +348,7 @@ static bool kprobes_allow_optimization;
360 * Call all pre_handler on the list, but ignores its return value. 348 * Call all pre_handler on the list, but ignores its return value.
361 * This must be called from arch-dep optimized caller. 349 * This must be called from arch-dep optimized caller.
362 */ 350 */
363void __kprobes opt_pre_handler(struct kprobe *p, struct pt_regs *regs) 351void opt_pre_handler(struct kprobe *p, struct pt_regs *regs)
364{ 352{
365 struct kprobe *kp; 353 struct kprobe *kp;
366 354
@@ -372,9 +360,10 @@ void __kprobes opt_pre_handler(struct kprobe *p, struct pt_regs *regs)
372 reset_kprobe_instance(); 360 reset_kprobe_instance();
373 } 361 }
374} 362}
363NOKPROBE_SYMBOL(opt_pre_handler);
375 364
376/* Free optimized instructions and optimized_kprobe */ 365/* Free optimized instructions and optimized_kprobe */
377static __kprobes void free_aggr_kprobe(struct kprobe *p) 366static void free_aggr_kprobe(struct kprobe *p)
378{ 367{
379 struct optimized_kprobe *op; 368 struct optimized_kprobe *op;
380 369
@@ -412,7 +401,7 @@ static inline int kprobe_disarmed(struct kprobe *p)
412} 401}
413 402
414/* Return true(!0) if the probe is queued on (un)optimizing lists */ 403/* Return true(!0) if the probe is queued on (un)optimizing lists */
415static int __kprobes kprobe_queued(struct kprobe *p) 404static int kprobe_queued(struct kprobe *p)
416{ 405{
417 struct optimized_kprobe *op; 406 struct optimized_kprobe *op;
418 407
@@ -428,7 +417,7 @@ static int __kprobes kprobe_queued(struct kprobe *p)
428 * Return an optimized kprobe whose optimizing code replaces 417 * Return an optimized kprobe whose optimizing code replaces
429 * instructions including addr (exclude breakpoint). 418 * instructions including addr (exclude breakpoint).
430 */ 419 */
431static struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr) 420static struct kprobe *get_optimized_kprobe(unsigned long addr)
432{ 421{
433 int i; 422 int i;
434 struct kprobe *p = NULL; 423 struct kprobe *p = NULL;
@@ -460,7 +449,7 @@ static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer);
460 * Optimize (replace a breakpoint with a jump) kprobes listed on 449 * Optimize (replace a breakpoint with a jump) kprobes listed on
461 * optimizing_list. 450 * optimizing_list.
462 */ 451 */
463static __kprobes void do_optimize_kprobes(void) 452static void do_optimize_kprobes(void)
464{ 453{
465 /* Optimization never be done when disarmed */ 454 /* Optimization never be done when disarmed */
466 if (kprobes_all_disarmed || !kprobes_allow_optimization || 455 if (kprobes_all_disarmed || !kprobes_allow_optimization ||
@@ -488,7 +477,7 @@ static __kprobes void do_optimize_kprobes(void)
488 * Unoptimize (replace a jump with a breakpoint and remove the breakpoint 477 * Unoptimize (replace a jump with a breakpoint and remove the breakpoint
489 * if need) kprobes listed on unoptimizing_list. 478 * if need) kprobes listed on unoptimizing_list.
490 */ 479 */
491static __kprobes void do_unoptimize_kprobes(void) 480static void do_unoptimize_kprobes(void)
492{ 481{
493 struct optimized_kprobe *op, *tmp; 482 struct optimized_kprobe *op, *tmp;
494 483
@@ -520,7 +509,7 @@ static __kprobes void do_unoptimize_kprobes(void)
520} 509}
521 510
522/* Reclaim all kprobes on the free_list */ 511/* Reclaim all kprobes on the free_list */
523static __kprobes void do_free_cleaned_kprobes(void) 512static void do_free_cleaned_kprobes(void)
524{ 513{
525 struct optimized_kprobe *op, *tmp; 514 struct optimized_kprobe *op, *tmp;
526 515
@@ -532,13 +521,13 @@ static __kprobes void do_free_cleaned_kprobes(void)
532} 521}
533 522
534/* Start optimizer after OPTIMIZE_DELAY passed */ 523/* Start optimizer after OPTIMIZE_DELAY passed */
535static __kprobes void kick_kprobe_optimizer(void) 524static void kick_kprobe_optimizer(void)
536{ 525{
537 schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY); 526 schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY);
538} 527}
539 528
540/* Kprobe jump optimizer */ 529/* Kprobe jump optimizer */
541static __kprobes void kprobe_optimizer(struct work_struct *work) 530static void kprobe_optimizer(struct work_struct *work)
542{ 531{
543 mutex_lock(&kprobe_mutex); 532 mutex_lock(&kprobe_mutex);
544 /* Lock modules while optimizing kprobes */ 533 /* Lock modules while optimizing kprobes */
@@ -574,7 +563,7 @@ static __kprobes void kprobe_optimizer(struct work_struct *work)
574} 563}
575 564
576/* Wait for completing optimization and unoptimization */ 565/* Wait for completing optimization and unoptimization */
577static __kprobes void wait_for_kprobe_optimizer(void) 566static void wait_for_kprobe_optimizer(void)
578{ 567{
579 mutex_lock(&kprobe_mutex); 568 mutex_lock(&kprobe_mutex);
580 569
@@ -593,7 +582,7 @@ static __kprobes void wait_for_kprobe_optimizer(void)
593} 582}
594 583
595/* Optimize kprobe if p is ready to be optimized */ 584/* Optimize kprobe if p is ready to be optimized */
596static __kprobes void optimize_kprobe(struct kprobe *p) 585static void optimize_kprobe(struct kprobe *p)
597{ 586{
598 struct optimized_kprobe *op; 587 struct optimized_kprobe *op;
599 588
@@ -627,7 +616,7 @@ static __kprobes void optimize_kprobe(struct kprobe *p)
627} 616}
628 617
629/* Short cut to direct unoptimizing */ 618/* Short cut to direct unoptimizing */
630static __kprobes void force_unoptimize_kprobe(struct optimized_kprobe *op) 619static void force_unoptimize_kprobe(struct optimized_kprobe *op)
631{ 620{
632 get_online_cpus(); 621 get_online_cpus();
633 arch_unoptimize_kprobe(op); 622 arch_unoptimize_kprobe(op);
@@ -637,7 +626,7 @@ static __kprobes void force_unoptimize_kprobe(struct optimized_kprobe *op)
637} 626}
638 627
639/* Unoptimize a kprobe if p is optimized */ 628/* Unoptimize a kprobe if p is optimized */
640static __kprobes void unoptimize_kprobe(struct kprobe *p, bool force) 629static void unoptimize_kprobe(struct kprobe *p, bool force)
641{ 630{
642 struct optimized_kprobe *op; 631 struct optimized_kprobe *op;
643 632
@@ -697,7 +686,7 @@ static void reuse_unused_kprobe(struct kprobe *ap)
697} 686}
698 687
699/* Remove optimized instructions */ 688/* Remove optimized instructions */
700static void __kprobes kill_optimized_kprobe(struct kprobe *p) 689static void kill_optimized_kprobe(struct kprobe *p)
701{ 690{
702 struct optimized_kprobe *op; 691 struct optimized_kprobe *op;
703 692
@@ -723,7 +712,7 @@ static void __kprobes kill_optimized_kprobe(struct kprobe *p)
723} 712}
724 713
725/* Try to prepare optimized instructions */ 714/* Try to prepare optimized instructions */
726static __kprobes void prepare_optimized_kprobe(struct kprobe *p) 715static void prepare_optimized_kprobe(struct kprobe *p)
727{ 716{
728 struct optimized_kprobe *op; 717 struct optimized_kprobe *op;
729 718
@@ -732,7 +721,7 @@ static __kprobes void prepare_optimized_kprobe(struct kprobe *p)
732} 721}
733 722
734/* Allocate new optimized_kprobe and try to prepare optimized instructions */ 723/* Allocate new optimized_kprobe and try to prepare optimized instructions */
735static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p) 724static struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
736{ 725{
737 struct optimized_kprobe *op; 726 struct optimized_kprobe *op;
738 727
@@ -747,13 +736,13 @@ static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
747 return &op->kp; 736 return &op->kp;
748} 737}
749 738
750static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p); 739static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p);
751 740
752/* 741/*
753 * Prepare an optimized_kprobe and optimize it 742 * Prepare an optimized_kprobe and optimize it
754 * NOTE: p must be a normal registered kprobe 743 * NOTE: p must be a normal registered kprobe
755 */ 744 */
756static __kprobes void try_to_optimize_kprobe(struct kprobe *p) 745static void try_to_optimize_kprobe(struct kprobe *p)
757{ 746{
758 struct kprobe *ap; 747 struct kprobe *ap;
759 struct optimized_kprobe *op; 748 struct optimized_kprobe *op;
@@ -787,7 +776,7 @@ out:
787} 776}
788 777
789#ifdef CONFIG_SYSCTL 778#ifdef CONFIG_SYSCTL
790static void __kprobes optimize_all_kprobes(void) 779static void optimize_all_kprobes(void)
791{ 780{
792 struct hlist_head *head; 781 struct hlist_head *head;
793 struct kprobe *p; 782 struct kprobe *p;
@@ -810,7 +799,7 @@ out:
810 mutex_unlock(&kprobe_mutex); 799 mutex_unlock(&kprobe_mutex);
811} 800}
812 801
813static void __kprobes unoptimize_all_kprobes(void) 802static void unoptimize_all_kprobes(void)
814{ 803{
815 struct hlist_head *head; 804 struct hlist_head *head;
816 struct kprobe *p; 805 struct kprobe *p;
@@ -861,7 +850,7 @@ int proc_kprobes_optimization_handler(struct ctl_table *table, int write,
861#endif /* CONFIG_SYSCTL */ 850#endif /* CONFIG_SYSCTL */
862 851
863/* Put a breakpoint for a probe. Must be called with text_mutex locked */ 852/* Put a breakpoint for a probe. Must be called with text_mutex locked */
864static void __kprobes __arm_kprobe(struct kprobe *p) 853static void __arm_kprobe(struct kprobe *p)
865{ 854{
866 struct kprobe *_p; 855 struct kprobe *_p;
867 856
@@ -876,7 +865,7 @@ static void __kprobes __arm_kprobe(struct kprobe *p)
876} 865}
877 866
878/* Remove the breakpoint of a probe. Must be called with text_mutex locked */ 867/* Remove the breakpoint of a probe. Must be called with text_mutex locked */
879static void __kprobes __disarm_kprobe(struct kprobe *p, bool reopt) 868static void __disarm_kprobe(struct kprobe *p, bool reopt)
880{ 869{
881 struct kprobe *_p; 870 struct kprobe *_p;
882 871
@@ -911,13 +900,13 @@ static void reuse_unused_kprobe(struct kprobe *ap)
911 BUG_ON(kprobe_unused(ap)); 900 BUG_ON(kprobe_unused(ap));
912} 901}
913 902
914static __kprobes void free_aggr_kprobe(struct kprobe *p) 903static void free_aggr_kprobe(struct kprobe *p)
915{ 904{
916 arch_remove_kprobe(p); 905 arch_remove_kprobe(p);
917 kfree(p); 906 kfree(p);
918} 907}
919 908
920static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p) 909static struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
921{ 910{
922 return kzalloc(sizeof(struct kprobe), GFP_KERNEL); 911 return kzalloc(sizeof(struct kprobe), GFP_KERNEL);
923} 912}
@@ -931,7 +920,7 @@ static struct ftrace_ops kprobe_ftrace_ops __read_mostly = {
931static int kprobe_ftrace_enabled; 920static int kprobe_ftrace_enabled;
932 921
933/* Must ensure p->addr is really on ftrace */ 922/* Must ensure p->addr is really on ftrace */
934static int __kprobes prepare_kprobe(struct kprobe *p) 923static int prepare_kprobe(struct kprobe *p)
935{ 924{
936 if (!kprobe_ftrace(p)) 925 if (!kprobe_ftrace(p))
937 return arch_prepare_kprobe(p); 926 return arch_prepare_kprobe(p);
@@ -940,7 +929,7 @@ static int __kprobes prepare_kprobe(struct kprobe *p)
940} 929}
941 930
942/* Caller must lock kprobe_mutex */ 931/* Caller must lock kprobe_mutex */
943static void __kprobes arm_kprobe_ftrace(struct kprobe *p) 932static void arm_kprobe_ftrace(struct kprobe *p)
944{ 933{
945 int ret; 934 int ret;
946 935
@@ -955,7 +944,7 @@ static void __kprobes arm_kprobe_ftrace(struct kprobe *p)
955} 944}
956 945
957/* Caller must lock kprobe_mutex */ 946/* Caller must lock kprobe_mutex */
958static void __kprobes disarm_kprobe_ftrace(struct kprobe *p) 947static void disarm_kprobe_ftrace(struct kprobe *p)
959{ 948{
960 int ret; 949 int ret;
961 950
@@ -975,7 +964,7 @@ static void __kprobes disarm_kprobe_ftrace(struct kprobe *p)
975#endif 964#endif
976 965
977/* Arm a kprobe with text_mutex */ 966/* Arm a kprobe with text_mutex */
978static void __kprobes arm_kprobe(struct kprobe *kp) 967static void arm_kprobe(struct kprobe *kp)
979{ 968{
980 if (unlikely(kprobe_ftrace(kp))) { 969 if (unlikely(kprobe_ftrace(kp))) {
981 arm_kprobe_ftrace(kp); 970 arm_kprobe_ftrace(kp);
@@ -992,7 +981,7 @@ static void __kprobes arm_kprobe(struct kprobe *kp)
992} 981}
993 982
994/* Disarm a kprobe with text_mutex */ 983/* Disarm a kprobe with text_mutex */
995static void __kprobes disarm_kprobe(struct kprobe *kp, bool reopt) 984static void disarm_kprobe(struct kprobe *kp, bool reopt)
996{ 985{
997 if (unlikely(kprobe_ftrace(kp))) { 986 if (unlikely(kprobe_ftrace(kp))) {
998 disarm_kprobe_ftrace(kp); 987 disarm_kprobe_ftrace(kp);
@@ -1008,7 +997,7 @@ static void __kprobes disarm_kprobe(struct kprobe *kp, bool reopt)
1008 * Aggregate handlers for multiple kprobes support - these handlers 997 * Aggregate handlers for multiple kprobes support - these handlers
1009 * take care of invoking the individual kprobe handlers on p->list 998 * take care of invoking the individual kprobe handlers on p->list
1010 */ 999 */
1011static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) 1000static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
1012{ 1001{
1013 struct kprobe *kp; 1002 struct kprobe *kp;
1014 1003
@@ -1022,9 +1011,10 @@ static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
1022 } 1011 }
1023 return 0; 1012 return 0;
1024} 1013}
1014NOKPROBE_SYMBOL(aggr_pre_handler);
1025 1015
1026static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs, 1016static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
1027 unsigned long flags) 1017 unsigned long flags)
1028{ 1018{
1029 struct kprobe *kp; 1019 struct kprobe *kp;
1030 1020
@@ -1036,9 +1026,10 @@ static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
1036 } 1026 }
1037 } 1027 }
1038} 1028}
1029NOKPROBE_SYMBOL(aggr_post_handler);
1039 1030
1040static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, 1031static int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs,
1041 int trapnr) 1032 int trapnr)
1042{ 1033{
1043 struct kprobe *cur = __this_cpu_read(kprobe_instance); 1034 struct kprobe *cur = __this_cpu_read(kprobe_instance);
1044 1035
@@ -1052,8 +1043,9 @@ static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs,
1052 } 1043 }
1053 return 0; 1044 return 0;
1054} 1045}
1046NOKPROBE_SYMBOL(aggr_fault_handler);
1055 1047
1056static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs) 1048static int aggr_break_handler(struct kprobe *p, struct pt_regs *regs)
1057{ 1049{
1058 struct kprobe *cur = __this_cpu_read(kprobe_instance); 1050 struct kprobe *cur = __this_cpu_read(kprobe_instance);
1059 int ret = 0; 1051 int ret = 0;
@@ -1065,9 +1057,10 @@ static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs)
1065 reset_kprobe_instance(); 1057 reset_kprobe_instance();
1066 return ret; 1058 return ret;
1067} 1059}
1060NOKPROBE_SYMBOL(aggr_break_handler);
1068 1061
1069/* Walks the list and increments nmissed count for multiprobe case */ 1062/* Walks the list and increments nmissed count for multiprobe case */
1070void __kprobes kprobes_inc_nmissed_count(struct kprobe *p) 1063void kprobes_inc_nmissed_count(struct kprobe *p)
1071{ 1064{
1072 struct kprobe *kp; 1065 struct kprobe *kp;
1073 if (!kprobe_aggrprobe(p)) { 1066 if (!kprobe_aggrprobe(p)) {
@@ -1078,9 +1071,10 @@ void __kprobes kprobes_inc_nmissed_count(struct kprobe *p)
1078 } 1071 }
1079 return; 1072 return;
1080} 1073}
1074NOKPROBE_SYMBOL(kprobes_inc_nmissed_count);
1081 1075
1082void __kprobes recycle_rp_inst(struct kretprobe_instance *ri, 1076void recycle_rp_inst(struct kretprobe_instance *ri,
1083 struct hlist_head *head) 1077 struct hlist_head *head)
1084{ 1078{
1085 struct kretprobe *rp = ri->rp; 1079 struct kretprobe *rp = ri->rp;
1086 1080
@@ -1095,8 +1089,9 @@ void __kprobes recycle_rp_inst(struct kretprobe_instance *ri,
1095 /* Unregistering */ 1089 /* Unregistering */
1096 hlist_add_head(&ri->hlist, head); 1090 hlist_add_head(&ri->hlist, head);
1097} 1091}
1092NOKPROBE_SYMBOL(recycle_rp_inst);
1098 1093
1099void __kprobes kretprobe_hash_lock(struct task_struct *tsk, 1094void kretprobe_hash_lock(struct task_struct *tsk,
1100 struct hlist_head **head, unsigned long *flags) 1095 struct hlist_head **head, unsigned long *flags)
1101__acquires(hlist_lock) 1096__acquires(hlist_lock)
1102{ 1097{
@@ -1107,17 +1102,19 @@ __acquires(hlist_lock)
1107 hlist_lock = kretprobe_table_lock_ptr(hash); 1102 hlist_lock = kretprobe_table_lock_ptr(hash);
1108 raw_spin_lock_irqsave(hlist_lock, *flags); 1103 raw_spin_lock_irqsave(hlist_lock, *flags);
1109} 1104}
1105NOKPROBE_SYMBOL(kretprobe_hash_lock);
1110 1106
1111static void __kprobes kretprobe_table_lock(unsigned long hash, 1107static void kretprobe_table_lock(unsigned long hash,
1112 unsigned long *flags) 1108 unsigned long *flags)
1113__acquires(hlist_lock) 1109__acquires(hlist_lock)
1114{ 1110{
1115 raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); 1111 raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
1116 raw_spin_lock_irqsave(hlist_lock, *flags); 1112 raw_spin_lock_irqsave(hlist_lock, *flags);
1117} 1113}
1114NOKPROBE_SYMBOL(kretprobe_table_lock);
1118 1115
1119void __kprobes kretprobe_hash_unlock(struct task_struct *tsk, 1116void kretprobe_hash_unlock(struct task_struct *tsk,
1120 unsigned long *flags) 1117 unsigned long *flags)
1121__releases(hlist_lock) 1118__releases(hlist_lock)
1122{ 1119{
1123 unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); 1120 unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
@@ -1126,14 +1123,16 @@ __releases(hlist_lock)
1126 hlist_lock = kretprobe_table_lock_ptr(hash); 1123 hlist_lock = kretprobe_table_lock_ptr(hash);
1127 raw_spin_unlock_irqrestore(hlist_lock, *flags); 1124 raw_spin_unlock_irqrestore(hlist_lock, *flags);
1128} 1125}
1126NOKPROBE_SYMBOL(kretprobe_hash_unlock);
1129 1127
1130static void __kprobes kretprobe_table_unlock(unsigned long hash, 1128static void kretprobe_table_unlock(unsigned long hash,
1131 unsigned long *flags) 1129 unsigned long *flags)
1132__releases(hlist_lock) 1130__releases(hlist_lock)
1133{ 1131{
1134 raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); 1132 raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
1135 raw_spin_unlock_irqrestore(hlist_lock, *flags); 1133 raw_spin_unlock_irqrestore(hlist_lock, *flags);
1136} 1134}
1135NOKPROBE_SYMBOL(kretprobe_table_unlock);
1137 1136
1138/* 1137/*
1139 * This function is called from finish_task_switch when task tk becomes dead, 1138 * This function is called from finish_task_switch when task tk becomes dead,
@@ -1141,7 +1140,7 @@ __releases(hlist_lock)
1141 * with this task. These left over instances represent probed functions 1140 * with this task. These left over instances represent probed functions
1142 * that have been called but will never return. 1141 * that have been called but will never return.
1143 */ 1142 */
1144void __kprobes kprobe_flush_task(struct task_struct *tk) 1143void kprobe_flush_task(struct task_struct *tk)
1145{ 1144{
1146 struct kretprobe_instance *ri; 1145 struct kretprobe_instance *ri;
1147 struct hlist_head *head, empty_rp; 1146 struct hlist_head *head, empty_rp;
@@ -1166,6 +1165,7 @@ void __kprobes kprobe_flush_task(struct task_struct *tk)
1166 kfree(ri); 1165 kfree(ri);
1167 } 1166 }
1168} 1167}
1168NOKPROBE_SYMBOL(kprobe_flush_task);
1169 1169
1170static inline void free_rp_inst(struct kretprobe *rp) 1170static inline void free_rp_inst(struct kretprobe *rp)
1171{ 1171{
@@ -1178,7 +1178,7 @@ static inline void free_rp_inst(struct kretprobe *rp)
1178 } 1178 }
1179} 1179}
1180 1180
1181static void __kprobes cleanup_rp_inst(struct kretprobe *rp) 1181static void cleanup_rp_inst(struct kretprobe *rp)
1182{ 1182{
1183 unsigned long flags, hash; 1183 unsigned long flags, hash;
1184 struct kretprobe_instance *ri; 1184 struct kretprobe_instance *ri;
@@ -1197,12 +1197,13 @@ static void __kprobes cleanup_rp_inst(struct kretprobe *rp)
1197 } 1197 }
1198 free_rp_inst(rp); 1198 free_rp_inst(rp);
1199} 1199}
1200NOKPROBE_SYMBOL(cleanup_rp_inst);
1200 1201
1201/* 1202/*
1202* Add the new probe to ap->list. Fail if this is the 1203* Add the new probe to ap->list. Fail if this is the
1203* second jprobe at the address - two jprobes can't coexist 1204* second jprobe at the address - two jprobes can't coexist
1204*/ 1205*/
1205static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p) 1206static int add_new_kprobe(struct kprobe *ap, struct kprobe *p)
1206{ 1207{
1207 BUG_ON(kprobe_gone(ap) || kprobe_gone(p)); 1208 BUG_ON(kprobe_gone(ap) || kprobe_gone(p));
1208 1209
@@ -1226,7 +1227,7 @@ static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p)
1226 * Fill in the required fields of the "manager kprobe". Replace the 1227 * Fill in the required fields of the "manager kprobe". Replace the
1227 * earlier kprobe in the hlist with the manager kprobe 1228 * earlier kprobe in the hlist with the manager kprobe
1228 */ 1229 */
1229static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p) 1230static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
1230{ 1231{
1231 /* Copy p's insn slot to ap */ 1232 /* Copy p's insn slot to ap */
1232 copy_kprobe(p, ap); 1233 copy_kprobe(p, ap);
@@ -1252,8 +1253,7 @@ static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
1252 * This is the second or subsequent kprobe at the address - handle 1253 * This is the second or subsequent kprobe at the address - handle
1253 * the intricacies 1254 * the intricacies
1254 */ 1255 */
1255static int __kprobes register_aggr_kprobe(struct kprobe *orig_p, 1256static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p)
1256 struct kprobe *p)
1257{ 1257{
1258 int ret = 0; 1258 int ret = 0;
1259 struct kprobe *ap = orig_p; 1259 struct kprobe *ap = orig_p;
@@ -1324,25 +1324,29 @@ out:
1324 return ret; 1324 return ret;
1325} 1325}
1326 1326
1327static int __kprobes in_kprobes_functions(unsigned long addr) 1327bool __weak arch_within_kprobe_blacklist(unsigned long addr)
1328{ 1328{
1329 struct kprobe_blackpoint *kb; 1329 /* The __kprobes marked functions and entry code must not be probed */
1330 return addr >= (unsigned long)__kprobes_text_start &&
1331 addr < (unsigned long)__kprobes_text_end;
1332}
1330 1333
1331 if (addr >= (unsigned long)__kprobes_text_start && 1334static bool within_kprobe_blacklist(unsigned long addr)
1332 addr < (unsigned long)__kprobes_text_end) 1335{
1333 return -EINVAL; 1336 struct kprobe_blacklist_entry *ent;
1337
1338 if (arch_within_kprobe_blacklist(addr))
1339 return true;
1334 /* 1340 /*
1335 * If there exists a kprobe_blacklist, verify and 1341 * If there exists a kprobe_blacklist, verify and
1336 * fail any probe registration in the prohibited area 1342 * fail any probe registration in the prohibited area
1337 */ 1343 */
1338 for (kb = kprobe_blacklist; kb->name != NULL; kb++) { 1344 list_for_each_entry(ent, &kprobe_blacklist, list) {
1339 if (kb->start_addr) { 1345 if (addr >= ent->start_addr && addr < ent->end_addr)
1340 if (addr >= kb->start_addr && 1346 return true;
1341 addr < (kb->start_addr + kb->range))
1342 return -EINVAL;
1343 }
1344 } 1347 }
1345 return 0; 1348
1349 return false;
1346} 1350}
1347 1351
1348/* 1352/*
@@ -1351,7 +1355,7 @@ static int __kprobes in_kprobes_functions(unsigned long addr)
1351 * This returns encoded errors if it fails to look up symbol or invalid 1355 * This returns encoded errors if it fails to look up symbol or invalid
1352 * combination of parameters. 1356 * combination of parameters.
1353 */ 1357 */
1354static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p) 1358static kprobe_opcode_t *kprobe_addr(struct kprobe *p)
1355{ 1359{
1356 kprobe_opcode_t *addr = p->addr; 1360 kprobe_opcode_t *addr = p->addr;
1357 1361
@@ -1374,7 +1378,7 @@ invalid:
1374} 1378}
1375 1379
1376/* Check passed kprobe is valid and return kprobe in kprobe_table. */ 1380/* Check passed kprobe is valid and return kprobe in kprobe_table. */
1377static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p) 1381static struct kprobe *__get_valid_kprobe(struct kprobe *p)
1378{ 1382{
1379 struct kprobe *ap, *list_p; 1383 struct kprobe *ap, *list_p;
1380 1384
@@ -1406,8 +1410,8 @@ static inline int check_kprobe_rereg(struct kprobe *p)
1406 return ret; 1410 return ret;
1407} 1411}
1408 1412
1409static __kprobes int check_kprobe_address_safe(struct kprobe *p, 1413static int check_kprobe_address_safe(struct kprobe *p,
1410 struct module **probed_mod) 1414 struct module **probed_mod)
1411{ 1415{
1412 int ret = 0; 1416 int ret = 0;
1413 unsigned long ftrace_addr; 1417 unsigned long ftrace_addr;
@@ -1433,7 +1437,7 @@ static __kprobes int check_kprobe_address_safe(struct kprobe *p,
1433 1437
1434 /* Ensure it is not in reserved area nor out of text */ 1438 /* Ensure it is not in reserved area nor out of text */
1435 if (!kernel_text_address((unsigned long) p->addr) || 1439 if (!kernel_text_address((unsigned long) p->addr) ||
1436 in_kprobes_functions((unsigned long) p->addr) || 1440 within_kprobe_blacklist((unsigned long) p->addr) ||
1437 jump_label_text_reserved(p->addr, p->addr)) { 1441 jump_label_text_reserved(p->addr, p->addr)) {
1438 ret = -EINVAL; 1442 ret = -EINVAL;
1439 goto out; 1443 goto out;
@@ -1469,7 +1473,7 @@ out:
1469 return ret; 1473 return ret;
1470} 1474}
1471 1475
1472int __kprobes register_kprobe(struct kprobe *p) 1476int register_kprobe(struct kprobe *p)
1473{ 1477{
1474 int ret; 1478 int ret;
1475 struct kprobe *old_p; 1479 struct kprobe *old_p;
@@ -1531,7 +1535,7 @@ out:
1531EXPORT_SYMBOL_GPL(register_kprobe); 1535EXPORT_SYMBOL_GPL(register_kprobe);
1532 1536
1533/* Check if all probes on the aggrprobe are disabled */ 1537/* Check if all probes on the aggrprobe are disabled */
1534static int __kprobes aggr_kprobe_disabled(struct kprobe *ap) 1538static int aggr_kprobe_disabled(struct kprobe *ap)
1535{ 1539{
1536 struct kprobe *kp; 1540 struct kprobe *kp;
1537 1541
@@ -1547,7 +1551,7 @@ static int __kprobes aggr_kprobe_disabled(struct kprobe *ap)
1547} 1551}
1548 1552
1549/* Disable one kprobe: Make sure called under kprobe_mutex is locked */ 1553/* Disable one kprobe: Make sure called under kprobe_mutex is locked */
1550static struct kprobe *__kprobes __disable_kprobe(struct kprobe *p) 1554static struct kprobe *__disable_kprobe(struct kprobe *p)
1551{ 1555{
1552 struct kprobe *orig_p; 1556 struct kprobe *orig_p;
1553 1557
@@ -1574,7 +1578,7 @@ static struct kprobe *__kprobes __disable_kprobe(struct kprobe *p)
1574/* 1578/*
1575 * Unregister a kprobe without a scheduler synchronization. 1579 * Unregister a kprobe without a scheduler synchronization.
1576 */ 1580 */
1577static int __kprobes __unregister_kprobe_top(struct kprobe *p) 1581static int __unregister_kprobe_top(struct kprobe *p)
1578{ 1582{
1579 struct kprobe *ap, *list_p; 1583 struct kprobe *ap, *list_p;
1580 1584
@@ -1631,7 +1635,7 @@ disarmed:
1631 return 0; 1635 return 0;
1632} 1636}
1633 1637
1634static void __kprobes __unregister_kprobe_bottom(struct kprobe *p) 1638static void __unregister_kprobe_bottom(struct kprobe *p)
1635{ 1639{
1636 struct kprobe *ap; 1640 struct kprobe *ap;
1637 1641
@@ -1647,7 +1651,7 @@ static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
1647 /* Otherwise, do nothing. */ 1651 /* Otherwise, do nothing. */
1648} 1652}
1649 1653
1650int __kprobes register_kprobes(struct kprobe **kps, int num) 1654int register_kprobes(struct kprobe **kps, int num)
1651{ 1655{
1652 int i, ret = 0; 1656 int i, ret = 0;
1653 1657
@@ -1665,13 +1669,13 @@ int __kprobes register_kprobes(struct kprobe **kps, int num)
1665} 1669}
1666EXPORT_SYMBOL_GPL(register_kprobes); 1670EXPORT_SYMBOL_GPL(register_kprobes);
1667 1671
1668void __kprobes unregister_kprobe(struct kprobe *p) 1672void unregister_kprobe(struct kprobe *p)
1669{ 1673{
1670 unregister_kprobes(&p, 1); 1674 unregister_kprobes(&p, 1);
1671} 1675}
1672EXPORT_SYMBOL_GPL(unregister_kprobe); 1676EXPORT_SYMBOL_GPL(unregister_kprobe);
1673 1677
1674void __kprobes unregister_kprobes(struct kprobe **kps, int num) 1678void unregister_kprobes(struct kprobe **kps, int num)
1675{ 1679{
1676 int i; 1680 int i;
1677 1681
@@ -1700,7 +1704,7 @@ unsigned long __weak arch_deref_entry_point(void *entry)
1700 return (unsigned long)entry; 1704 return (unsigned long)entry;
1701} 1705}
1702 1706
1703int __kprobes register_jprobes(struct jprobe **jps, int num) 1707int register_jprobes(struct jprobe **jps, int num)
1704{ 1708{
1705 struct jprobe *jp; 1709 struct jprobe *jp;
1706 int ret = 0, i; 1710 int ret = 0, i;
@@ -1731,19 +1735,19 @@ int __kprobes register_jprobes(struct jprobe **jps, int num)
1731} 1735}
1732EXPORT_SYMBOL_GPL(register_jprobes); 1736EXPORT_SYMBOL_GPL(register_jprobes);
1733 1737
1734int __kprobes register_jprobe(struct jprobe *jp) 1738int register_jprobe(struct jprobe *jp)
1735{ 1739{
1736 return register_jprobes(&jp, 1); 1740 return register_jprobes(&jp, 1);
1737} 1741}
1738EXPORT_SYMBOL_GPL(register_jprobe); 1742EXPORT_SYMBOL_GPL(register_jprobe);
1739 1743
1740void __kprobes unregister_jprobe(struct jprobe *jp) 1744void unregister_jprobe(struct jprobe *jp)
1741{ 1745{
1742 unregister_jprobes(&jp, 1); 1746 unregister_jprobes(&jp, 1);
1743} 1747}
1744EXPORT_SYMBOL_GPL(unregister_jprobe); 1748EXPORT_SYMBOL_GPL(unregister_jprobe);
1745 1749
1746void __kprobes unregister_jprobes(struct jprobe **jps, int num) 1750void unregister_jprobes(struct jprobe **jps, int num)
1747{ 1751{
1748 int i; 1752 int i;
1749 1753
@@ -1768,8 +1772,7 @@ EXPORT_SYMBOL_GPL(unregister_jprobes);
1768 * This kprobe pre_handler is registered with every kretprobe. When probe 1772 * This kprobe pre_handler is registered with every kretprobe. When probe
1769 * hits it will set up the return probe. 1773 * hits it will set up the return probe.
1770 */ 1774 */
1771static int __kprobes pre_handler_kretprobe(struct kprobe *p, 1775static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
1772 struct pt_regs *regs)
1773{ 1776{
1774 struct kretprobe *rp = container_of(p, struct kretprobe, kp); 1777 struct kretprobe *rp = container_of(p, struct kretprobe, kp);
1775 unsigned long hash, flags = 0; 1778 unsigned long hash, flags = 0;
@@ -1807,8 +1810,9 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p,
1807 } 1810 }
1808 return 0; 1811 return 0;
1809} 1812}
1813NOKPROBE_SYMBOL(pre_handler_kretprobe);
1810 1814
1811int __kprobes register_kretprobe(struct kretprobe *rp) 1815int register_kretprobe(struct kretprobe *rp)
1812{ 1816{
1813 int ret = 0; 1817 int ret = 0;
1814 struct kretprobe_instance *inst; 1818 struct kretprobe_instance *inst;
@@ -1861,7 +1865,7 @@ int __kprobes register_kretprobe(struct kretprobe *rp)
1861} 1865}
1862EXPORT_SYMBOL_GPL(register_kretprobe); 1866EXPORT_SYMBOL_GPL(register_kretprobe);
1863 1867
1864int __kprobes register_kretprobes(struct kretprobe **rps, int num) 1868int register_kretprobes(struct kretprobe **rps, int num)
1865{ 1869{
1866 int ret = 0, i; 1870 int ret = 0, i;
1867 1871
@@ -1879,13 +1883,13 @@ int __kprobes register_kretprobes(struct kretprobe **rps, int num)
1879} 1883}
1880EXPORT_SYMBOL_GPL(register_kretprobes); 1884EXPORT_SYMBOL_GPL(register_kretprobes);
1881 1885
1882void __kprobes unregister_kretprobe(struct kretprobe *rp) 1886void unregister_kretprobe(struct kretprobe *rp)
1883{ 1887{
1884 unregister_kretprobes(&rp, 1); 1888 unregister_kretprobes(&rp, 1);
1885} 1889}
1886EXPORT_SYMBOL_GPL(unregister_kretprobe); 1890EXPORT_SYMBOL_GPL(unregister_kretprobe);
1887 1891
1888void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) 1892void unregister_kretprobes(struct kretprobe **rps, int num)
1889{ 1893{
1890 int i; 1894 int i;
1891 1895
@@ -1908,38 +1912,38 @@ void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
1908EXPORT_SYMBOL_GPL(unregister_kretprobes); 1912EXPORT_SYMBOL_GPL(unregister_kretprobes);
1909 1913
1910#else /* CONFIG_KRETPROBES */ 1914#else /* CONFIG_KRETPROBES */
1911int __kprobes register_kretprobe(struct kretprobe *rp) 1915int register_kretprobe(struct kretprobe *rp)
1912{ 1916{
1913 return -ENOSYS; 1917 return -ENOSYS;
1914} 1918}
1915EXPORT_SYMBOL_GPL(register_kretprobe); 1919EXPORT_SYMBOL_GPL(register_kretprobe);
1916 1920
1917int __kprobes register_kretprobes(struct kretprobe **rps, int num) 1921int register_kretprobes(struct kretprobe **rps, int num)
1918{ 1922{
1919 return -ENOSYS; 1923 return -ENOSYS;
1920} 1924}
1921EXPORT_SYMBOL_GPL(register_kretprobes); 1925EXPORT_SYMBOL_GPL(register_kretprobes);
1922 1926
1923void __kprobes unregister_kretprobe(struct kretprobe *rp) 1927void unregister_kretprobe(struct kretprobe *rp)
1924{ 1928{
1925} 1929}
1926EXPORT_SYMBOL_GPL(unregister_kretprobe); 1930EXPORT_SYMBOL_GPL(unregister_kretprobe);
1927 1931
1928void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) 1932void unregister_kretprobes(struct kretprobe **rps, int num)
1929{ 1933{
1930} 1934}
1931EXPORT_SYMBOL_GPL(unregister_kretprobes); 1935EXPORT_SYMBOL_GPL(unregister_kretprobes);
1932 1936
1933static int __kprobes pre_handler_kretprobe(struct kprobe *p, 1937static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
1934 struct pt_regs *regs)
1935{ 1938{
1936 return 0; 1939 return 0;
1937} 1940}
1941NOKPROBE_SYMBOL(pre_handler_kretprobe);
1938 1942
1939#endif /* CONFIG_KRETPROBES */ 1943#endif /* CONFIG_KRETPROBES */
1940 1944
1941/* Set the kprobe gone and remove its instruction buffer. */ 1945/* Set the kprobe gone and remove its instruction buffer. */
1942static void __kprobes kill_kprobe(struct kprobe *p) 1946static void kill_kprobe(struct kprobe *p)
1943{ 1947{
1944 struct kprobe *kp; 1948 struct kprobe *kp;
1945 1949
@@ -1963,7 +1967,7 @@ static void __kprobes kill_kprobe(struct kprobe *p)
1963} 1967}
1964 1968
1965/* Disable one kprobe */ 1969/* Disable one kprobe */
1966int __kprobes disable_kprobe(struct kprobe *kp) 1970int disable_kprobe(struct kprobe *kp)
1967{ 1971{
1968 int ret = 0; 1972 int ret = 0;
1969 1973
@@ -1979,7 +1983,7 @@ int __kprobes disable_kprobe(struct kprobe *kp)
1979EXPORT_SYMBOL_GPL(disable_kprobe); 1983EXPORT_SYMBOL_GPL(disable_kprobe);
1980 1984
1981/* Enable one kprobe */ 1985/* Enable one kprobe */
1982int __kprobes enable_kprobe(struct kprobe *kp) 1986int enable_kprobe(struct kprobe *kp)
1983{ 1987{
1984 int ret = 0; 1988 int ret = 0;
1985 struct kprobe *p; 1989 struct kprobe *p;
@@ -2012,16 +2016,49 @@ out:
2012} 2016}
2013EXPORT_SYMBOL_GPL(enable_kprobe); 2017EXPORT_SYMBOL_GPL(enable_kprobe);
2014 2018
2015void __kprobes dump_kprobe(struct kprobe *kp) 2019void dump_kprobe(struct kprobe *kp)
2016{ 2020{
2017 printk(KERN_WARNING "Dumping kprobe:\n"); 2021 printk(KERN_WARNING "Dumping kprobe:\n");
2018 printk(KERN_WARNING "Name: %s\nAddress: %p\nOffset: %x\n", 2022 printk(KERN_WARNING "Name: %s\nAddress: %p\nOffset: %x\n",
2019 kp->symbol_name, kp->addr, kp->offset); 2023 kp->symbol_name, kp->addr, kp->offset);
2020} 2024}
2025NOKPROBE_SYMBOL(dump_kprobe);
2026
2027/*
2028 * Lookup and populate the kprobe_blacklist.
2029 *
2030 * Unlike the kretprobe blacklist, we'll need to determine
2031 * the range of addresses that belong to the said functions,
2032 * since a kprobe need not necessarily be at the beginning
2033 * of a function.
2034 */
2035static int __init populate_kprobe_blacklist(unsigned long *start,
2036 unsigned long *end)
2037{
2038 unsigned long *iter;
2039 struct kprobe_blacklist_entry *ent;
2040 unsigned long offset = 0, size = 0;
2041
2042 for (iter = start; iter < end; iter++) {
2043 if (!kallsyms_lookup_size_offset(*iter, &size, &offset)) {
2044 pr_err("Failed to find blacklist %p\n", (void *)*iter);
2045 continue;
2046 }
2047
2048 ent = kmalloc(sizeof(*ent), GFP_KERNEL);
2049 if (!ent)
2050 return -ENOMEM;
2051 ent->start_addr = *iter;
2052 ent->end_addr = *iter + size;
2053 INIT_LIST_HEAD(&ent->list);
2054 list_add_tail(&ent->list, &kprobe_blacklist);
2055 }
2056 return 0;
2057}
2021 2058
2022/* Module notifier call back, checking kprobes on the module */ 2059/* Module notifier call back, checking kprobes on the module */
2023static int __kprobes kprobes_module_callback(struct notifier_block *nb, 2060static int kprobes_module_callback(struct notifier_block *nb,
2024 unsigned long val, void *data) 2061 unsigned long val, void *data)
2025{ 2062{
2026 struct module *mod = data; 2063 struct module *mod = data;
2027 struct hlist_head *head; 2064 struct hlist_head *head;
@@ -2062,14 +2099,13 @@ static struct notifier_block kprobe_module_nb = {
2062 .priority = 0 2099 .priority = 0
2063}; 2100};
2064 2101
2102/* Markers of _kprobe_blacklist section */
2103extern unsigned long __start_kprobe_blacklist[];
2104extern unsigned long __stop_kprobe_blacklist[];
2105
2065static int __init init_kprobes(void) 2106static int __init init_kprobes(void)
2066{ 2107{
2067 int i, err = 0; 2108 int i, err = 0;
2068 unsigned long offset = 0, size = 0;
2069 char *modname, namebuf[KSYM_NAME_LEN];
2070 const char *symbol_name;
2071 void *addr;
2072 struct kprobe_blackpoint *kb;
2073 2109
2074 /* FIXME allocate the probe table, currently defined statically */ 2110 /* FIXME allocate the probe table, currently defined statically */
2075 /* initialize all list heads */ 2111 /* initialize all list heads */
@@ -2079,26 +2115,11 @@ static int __init init_kprobes(void)
2079 raw_spin_lock_init(&(kretprobe_table_locks[i].lock)); 2115 raw_spin_lock_init(&(kretprobe_table_locks[i].lock));
2080 } 2116 }
2081 2117
2082 /* 2118 err = populate_kprobe_blacklist(__start_kprobe_blacklist,
2083 * Lookup and populate the kprobe_blacklist. 2119 __stop_kprobe_blacklist);
2084 * 2120 if (err) {
2085 * Unlike the kretprobe blacklist, we'll need to determine 2121 pr_err("kprobes: failed to populate blacklist: %d\n", err);
2086 * the range of addresses that belong to the said functions, 2122 pr_err("Please take care of using kprobes.\n");
2087 * since a kprobe need not necessarily be at the beginning
2088 * of a function.
2089 */
2090 for (kb = kprobe_blacklist; kb->name != NULL; kb++) {
2091 kprobe_lookup_name(kb->name, addr);
2092 if (!addr)
2093 continue;
2094
2095 kb->start_addr = (unsigned long)addr;
2096 symbol_name = kallsyms_lookup(kb->start_addr,
2097 &size, &offset, &modname, namebuf);
2098 if (!symbol_name)
2099 kb->range = 0;
2100 else
2101 kb->range = size;
2102 } 2123 }
2103 2124
2104 if (kretprobe_blacklist_size) { 2125 if (kretprobe_blacklist_size) {
@@ -2138,7 +2159,7 @@ static int __init init_kprobes(void)
2138} 2159}
2139 2160
2140#ifdef CONFIG_DEBUG_FS 2161#ifdef CONFIG_DEBUG_FS
2141static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p, 2162static void report_probe(struct seq_file *pi, struct kprobe *p,
2142 const char *sym, int offset, char *modname, struct kprobe *pp) 2163 const char *sym, int offset, char *modname, struct kprobe *pp)
2143{ 2164{
2144 char *kprobe_type; 2165 char *kprobe_type;
@@ -2167,12 +2188,12 @@ static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p,
2167 (kprobe_ftrace(pp) ? "[FTRACE]" : "")); 2188 (kprobe_ftrace(pp) ? "[FTRACE]" : ""));
2168} 2189}
2169 2190
2170static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos) 2191static void *kprobe_seq_start(struct seq_file *f, loff_t *pos)
2171{ 2192{
2172 return (*pos < KPROBE_TABLE_SIZE) ? pos : NULL; 2193 return (*pos < KPROBE_TABLE_SIZE) ? pos : NULL;
2173} 2194}
2174 2195
2175static void __kprobes *kprobe_seq_next(struct seq_file *f, void *v, loff_t *pos) 2196static void *kprobe_seq_next(struct seq_file *f, void *v, loff_t *pos)
2176{ 2197{
2177 (*pos)++; 2198 (*pos)++;
2178 if (*pos >= KPROBE_TABLE_SIZE) 2199 if (*pos >= KPROBE_TABLE_SIZE)
@@ -2180,12 +2201,12 @@ static void __kprobes *kprobe_seq_next(struct seq_file *f, void *v, loff_t *pos)
2180 return pos; 2201 return pos;
2181} 2202}
2182 2203
2183static void __kprobes kprobe_seq_stop(struct seq_file *f, void *v) 2204static void kprobe_seq_stop(struct seq_file *f, void *v)
2184{ 2205{
2185 /* Nothing to do */ 2206 /* Nothing to do */
2186} 2207}
2187 2208
2188static int __kprobes show_kprobe_addr(struct seq_file *pi, void *v) 2209static int show_kprobe_addr(struct seq_file *pi, void *v)
2189{ 2210{
2190 struct hlist_head *head; 2211 struct hlist_head *head;
2191 struct kprobe *p, *kp; 2212 struct kprobe *p, *kp;
@@ -2216,7 +2237,7 @@ static const struct seq_operations kprobes_seq_ops = {
2216 .show = show_kprobe_addr 2237 .show = show_kprobe_addr
2217}; 2238};
2218 2239
2219static int __kprobes kprobes_open(struct inode *inode, struct file *filp) 2240static int kprobes_open(struct inode *inode, struct file *filp)
2220{ 2241{
2221 return seq_open(filp, &kprobes_seq_ops); 2242 return seq_open(filp, &kprobes_seq_ops);
2222} 2243}
@@ -2228,7 +2249,47 @@ static const struct file_operations debugfs_kprobes_operations = {
2228 .release = seq_release, 2249 .release = seq_release,
2229}; 2250};
2230 2251
2231static void __kprobes arm_all_kprobes(void) 2252/* kprobes/blacklist -- shows which functions can not be probed */
2253static void *kprobe_blacklist_seq_start(struct seq_file *m, loff_t *pos)
2254{
2255 return seq_list_start(&kprobe_blacklist, *pos);
2256}
2257
2258static void *kprobe_blacklist_seq_next(struct seq_file *m, void *v, loff_t *pos)
2259{
2260 return seq_list_next(v, &kprobe_blacklist, pos);
2261}
2262
2263static int kprobe_blacklist_seq_show(struct seq_file *m, void *v)
2264{
2265 struct kprobe_blacklist_entry *ent =
2266 list_entry(v, struct kprobe_blacklist_entry, list);
2267
2268 seq_printf(m, "0x%p-0x%p\t%ps\n", (void *)ent->start_addr,
2269 (void *)ent->end_addr, (void *)ent->start_addr);
2270 return 0;
2271}
2272
2273static const struct seq_operations kprobe_blacklist_seq_ops = {
2274 .start = kprobe_blacklist_seq_start,
2275 .next = kprobe_blacklist_seq_next,
2276 .stop = kprobe_seq_stop, /* Reuse void function */
2277 .show = kprobe_blacklist_seq_show,
2278};
2279
2280static int kprobe_blacklist_open(struct inode *inode, struct file *filp)
2281{
2282 return seq_open(filp, &kprobe_blacklist_seq_ops);
2283}
2284
2285static const struct file_operations debugfs_kprobe_blacklist_ops = {
2286 .open = kprobe_blacklist_open,
2287 .read = seq_read,
2288 .llseek = seq_lseek,
2289 .release = seq_release,
2290};
2291
2292static void arm_all_kprobes(void)
2232{ 2293{
2233 struct hlist_head *head; 2294 struct hlist_head *head;
2234 struct kprobe *p; 2295 struct kprobe *p;
@@ -2256,7 +2317,7 @@ already_enabled:
2256 return; 2317 return;
2257} 2318}
2258 2319
2259static void __kprobes disarm_all_kprobes(void) 2320static void disarm_all_kprobes(void)
2260{ 2321{
2261 struct hlist_head *head; 2322 struct hlist_head *head;
2262 struct kprobe *p; 2323 struct kprobe *p;
@@ -2340,7 +2401,7 @@ static const struct file_operations fops_kp = {
2340 .llseek = default_llseek, 2401 .llseek = default_llseek,
2341}; 2402};
2342 2403
2343static int __kprobes debugfs_kprobe_init(void) 2404static int __init debugfs_kprobe_init(void)
2344{ 2405{
2345 struct dentry *dir, *file; 2406 struct dentry *dir, *file;
2346 unsigned int value = 1; 2407 unsigned int value = 1;
@@ -2351,19 +2412,24 @@ static int __kprobes debugfs_kprobe_init(void)
2351 2412
2352 file = debugfs_create_file("list", 0444, dir, NULL, 2413 file = debugfs_create_file("list", 0444, dir, NULL,
2353 &debugfs_kprobes_operations); 2414 &debugfs_kprobes_operations);
2354 if (!file) { 2415 if (!file)
2355 debugfs_remove(dir); 2416 goto error;
2356 return -ENOMEM;
2357 }
2358 2417
2359 file = debugfs_create_file("enabled", 0600, dir, 2418 file = debugfs_create_file("enabled", 0600, dir,
2360 &value, &fops_kp); 2419 &value, &fops_kp);
2361 if (!file) { 2420 if (!file)
2362 debugfs_remove(dir); 2421 goto error;
2363 return -ENOMEM; 2422
2364 } 2423 file = debugfs_create_file("blacklist", 0444, dir, NULL,
2424 &debugfs_kprobe_blacklist_ops);
2425 if (!file)
2426 goto error;
2365 2427
2366 return 0; 2428 return 0;
2429
2430error:
2431 debugfs_remove(dir);
2432 return -ENOMEM;
2367} 2433}
2368 2434
2369late_initcall(debugfs_kprobe_init); 2435late_initcall(debugfs_kprobe_init);
diff --git a/kernel/notifier.c b/kernel/notifier.c
index db4c8b08a50c..4803da6eab62 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -71,9 +71,9 @@ static int notifier_chain_unregister(struct notifier_block **nl,
71 * @returns: notifier_call_chain returns the value returned by the 71 * @returns: notifier_call_chain returns the value returned by the
72 * last notifier function called. 72 * last notifier function called.
73 */ 73 */
74static int __kprobes notifier_call_chain(struct notifier_block **nl, 74static int notifier_call_chain(struct notifier_block **nl,
75 unsigned long val, void *v, 75 unsigned long val, void *v,
76 int nr_to_call, int *nr_calls) 76 int nr_to_call, int *nr_calls)
77{ 77{
78 int ret = NOTIFY_DONE; 78 int ret = NOTIFY_DONE;
79 struct notifier_block *nb, *next_nb; 79 struct notifier_block *nb, *next_nb;
@@ -102,6 +102,7 @@ static int __kprobes notifier_call_chain(struct notifier_block **nl,
102 } 102 }
103 return ret; 103 return ret;
104} 104}
105NOKPROBE_SYMBOL(notifier_call_chain);
105 106
106/* 107/*
107 * Atomic notifier chain routines. Registration and unregistration 108 * Atomic notifier chain routines. Registration and unregistration
@@ -172,9 +173,9 @@ EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister);
172 * Otherwise the return value is the return value 173 * Otherwise the return value is the return value
173 * of the last notifier function called. 174 * of the last notifier function called.
174 */ 175 */
175int __kprobes __atomic_notifier_call_chain(struct atomic_notifier_head *nh, 176int __atomic_notifier_call_chain(struct atomic_notifier_head *nh,
176 unsigned long val, void *v, 177 unsigned long val, void *v,
177 int nr_to_call, int *nr_calls) 178 int nr_to_call, int *nr_calls)
178{ 179{
179 int ret; 180 int ret;
180 181
@@ -184,13 +185,15 @@ int __kprobes __atomic_notifier_call_chain(struct atomic_notifier_head *nh,
184 return ret; 185 return ret;
185} 186}
186EXPORT_SYMBOL_GPL(__atomic_notifier_call_chain); 187EXPORT_SYMBOL_GPL(__atomic_notifier_call_chain);
188NOKPROBE_SYMBOL(__atomic_notifier_call_chain);
187 189
188int __kprobes atomic_notifier_call_chain(struct atomic_notifier_head *nh, 190int atomic_notifier_call_chain(struct atomic_notifier_head *nh,
189 unsigned long val, void *v) 191 unsigned long val, void *v)
190{ 192{
191 return __atomic_notifier_call_chain(nh, val, v, -1, NULL); 193 return __atomic_notifier_call_chain(nh, val, v, -1, NULL);
192} 194}
193EXPORT_SYMBOL_GPL(atomic_notifier_call_chain); 195EXPORT_SYMBOL_GPL(atomic_notifier_call_chain);
196NOKPROBE_SYMBOL(atomic_notifier_call_chain);
194 197
195/* 198/*
196 * Blocking notifier chain routines. All access to the chain is 199 * Blocking notifier chain routines. All access to the chain is
@@ -527,7 +530,7 @@ EXPORT_SYMBOL_GPL(srcu_init_notifier_head);
527 530
528static ATOMIC_NOTIFIER_HEAD(die_chain); 531static ATOMIC_NOTIFIER_HEAD(die_chain);
529 532
530int notrace __kprobes notify_die(enum die_val val, const char *str, 533int notrace notify_die(enum die_val val, const char *str,
531 struct pt_regs *regs, long err, int trap, int sig) 534 struct pt_regs *regs, long err, int trap, int sig)
532{ 535{
533 struct die_args args = { 536 struct die_args args = {
@@ -540,6 +543,7 @@ int notrace __kprobes notify_die(enum die_val val, const char *str,
540 }; 543 };
541 return atomic_notifier_call_chain(&die_chain, val, &args); 544 return atomic_notifier_call_chain(&die_chain, val, &args);
542} 545}
546NOKPROBE_SYMBOL(notify_die);
543 547
544int register_die_notifier(struct notifier_block *nb) 548int register_die_notifier(struct notifier_block *nb)
545{ 549{
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c6b98793d647..4f611561ba4c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2527,7 +2527,7 @@ notrace unsigned long get_parent_ip(unsigned long addr)
2527#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \ 2527#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
2528 defined(CONFIG_PREEMPT_TRACER)) 2528 defined(CONFIG_PREEMPT_TRACER))
2529 2529
2530void __kprobes preempt_count_add(int val) 2530void preempt_count_add(int val)
2531{ 2531{
2532#ifdef CONFIG_DEBUG_PREEMPT 2532#ifdef CONFIG_DEBUG_PREEMPT
2533 /* 2533 /*
@@ -2553,8 +2553,9 @@ void __kprobes preempt_count_add(int val)
2553 } 2553 }
2554} 2554}
2555EXPORT_SYMBOL(preempt_count_add); 2555EXPORT_SYMBOL(preempt_count_add);
2556NOKPROBE_SYMBOL(preempt_count_add);
2556 2557
2557void __kprobes preempt_count_sub(int val) 2558void preempt_count_sub(int val)
2558{ 2559{
2559#ifdef CONFIG_DEBUG_PREEMPT 2560#ifdef CONFIG_DEBUG_PREEMPT
2560 /* 2561 /*
@@ -2575,6 +2576,7 @@ void __kprobes preempt_count_sub(int val)
2575 __preempt_count_sub(val); 2576 __preempt_count_sub(val);
2576} 2577}
2577EXPORT_SYMBOL(preempt_count_sub); 2578EXPORT_SYMBOL(preempt_count_sub);
2579NOKPROBE_SYMBOL(preempt_count_sub);
2578 2580
2579#endif 2581#endif
2580 2582
@@ -2857,6 +2859,7 @@ asmlinkage __visible void __sched notrace preempt_schedule(void)
2857 barrier(); 2859 barrier();
2858 } while (need_resched()); 2860 } while (need_resched());
2859} 2861}
2862NOKPROBE_SYMBOL(preempt_schedule);
2860EXPORT_SYMBOL(preempt_schedule); 2863EXPORT_SYMBOL(preempt_schedule);
2861#endif /* CONFIG_PREEMPT */ 2864#endif /* CONFIG_PREEMPT */
2862 2865
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index c894614de14d..5d12bb407b44 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -248,8 +248,8 @@ void perf_trace_del(struct perf_event *p_event, int flags)
248 tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event); 248 tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event);
249} 249}
250 250
251__kprobes void *perf_trace_buf_prepare(int size, unsigned short type, 251void *perf_trace_buf_prepare(int size, unsigned short type,
252 struct pt_regs *regs, int *rctxp) 252 struct pt_regs *regs, int *rctxp)
253{ 253{
254 struct trace_entry *entry; 254 struct trace_entry *entry;
255 unsigned long flags; 255 unsigned long flags;
@@ -281,6 +281,7 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
281 return raw_data; 281 return raw_data;
282} 282}
283EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); 283EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
284NOKPROBE_SYMBOL(perf_trace_buf_prepare);
284 285
285#ifdef CONFIG_FUNCTION_TRACER 286#ifdef CONFIG_FUNCTION_TRACER
286static void 287static void
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index ef2fba1f46b5..282f6e4e5539 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -40,27 +40,27 @@ struct trace_kprobe {
40 (sizeof(struct probe_arg) * (n))) 40 (sizeof(struct probe_arg) * (n)))
41 41
42 42
43static __kprobes bool trace_kprobe_is_return(struct trace_kprobe *tk) 43static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk)
44{ 44{
45 return tk->rp.handler != NULL; 45 return tk->rp.handler != NULL;
46} 46}
47 47
48static __kprobes const char *trace_kprobe_symbol(struct trace_kprobe *tk) 48static nokprobe_inline const char *trace_kprobe_symbol(struct trace_kprobe *tk)
49{ 49{
50 return tk->symbol ? tk->symbol : "unknown"; 50 return tk->symbol ? tk->symbol : "unknown";
51} 51}
52 52
53static __kprobes unsigned long trace_kprobe_offset(struct trace_kprobe *tk) 53static nokprobe_inline unsigned long trace_kprobe_offset(struct trace_kprobe *tk)
54{ 54{
55 return tk->rp.kp.offset; 55 return tk->rp.kp.offset;
56} 56}
57 57
58static __kprobes bool trace_kprobe_has_gone(struct trace_kprobe *tk) 58static nokprobe_inline bool trace_kprobe_has_gone(struct trace_kprobe *tk)
59{ 59{
60 return !!(kprobe_gone(&tk->rp.kp)); 60 return !!(kprobe_gone(&tk->rp.kp));
61} 61}
62 62
63static __kprobes bool trace_kprobe_within_module(struct trace_kprobe *tk, 63static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk,
64 struct module *mod) 64 struct module *mod)
65{ 65{
66 int len = strlen(mod->name); 66 int len = strlen(mod->name);
@@ -68,7 +68,7 @@ static __kprobes bool trace_kprobe_within_module(struct trace_kprobe *tk,
68 return strncmp(mod->name, name, len) == 0 && name[len] == ':'; 68 return strncmp(mod->name, name, len) == 0 && name[len] == ':';
69} 69}
70 70
71static __kprobes bool trace_kprobe_is_on_module(struct trace_kprobe *tk) 71static nokprobe_inline bool trace_kprobe_is_on_module(struct trace_kprobe *tk)
72{ 72{
73 return !!strchr(trace_kprobe_symbol(tk), ':'); 73 return !!strchr(trace_kprobe_symbol(tk), ':');
74} 74}
@@ -132,19 +132,21 @@ struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
132 * Kprobes-specific fetch functions 132 * Kprobes-specific fetch functions
133 */ 133 */
134#define DEFINE_FETCH_stack(type) \ 134#define DEFINE_FETCH_stack(type) \
135static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\ 135static void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs, \
136 void *offset, void *dest) \ 136 void *offset, void *dest) \
137{ \ 137{ \
138 *(type *)dest = (type)regs_get_kernel_stack_nth(regs, \ 138 *(type *)dest = (type)regs_get_kernel_stack_nth(regs, \
139 (unsigned int)((unsigned long)offset)); \ 139 (unsigned int)((unsigned long)offset)); \
140} 140} \
141NOKPROBE_SYMBOL(FETCH_FUNC_NAME(stack, type));
142
141DEFINE_BASIC_FETCH_FUNCS(stack) 143DEFINE_BASIC_FETCH_FUNCS(stack)
142/* No string on the stack entry */ 144/* No string on the stack entry */
143#define fetch_stack_string NULL 145#define fetch_stack_string NULL
144#define fetch_stack_string_size NULL 146#define fetch_stack_string_size NULL
145 147
146#define DEFINE_FETCH_memory(type) \ 148#define DEFINE_FETCH_memory(type) \
147static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\ 149static void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs, \
148 void *addr, void *dest) \ 150 void *addr, void *dest) \
149{ \ 151{ \
150 type retval; \ 152 type retval; \
@@ -152,14 +154,16 @@ static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\
152 *(type *)dest = 0; \ 154 *(type *)dest = 0; \
153 else \ 155 else \
154 *(type *)dest = retval; \ 156 *(type *)dest = retval; \
155} 157} \
158NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, type));
159
156DEFINE_BASIC_FETCH_FUNCS(memory) 160DEFINE_BASIC_FETCH_FUNCS(memory)
157/* 161/*
158 * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max 162 * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
159 * length and relative data location. 163 * length and relative data location.
160 */ 164 */
161static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs, 165static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
162 void *addr, void *dest) 166 void *addr, void *dest)
163{ 167{
164 long ret; 168 long ret;
165 int maxlen = get_rloc_len(*(u32 *)dest); 169 int maxlen = get_rloc_len(*(u32 *)dest);
@@ -193,10 +197,11 @@ static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
193 get_rloc_offs(*(u32 *)dest)); 197 get_rloc_offs(*(u32 *)dest));
194 } 198 }
195} 199}
200NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string));
196 201
197/* Return the length of string -- including null terminal byte */ 202/* Return the length of string -- including null terminal byte */
198static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs, 203static void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
199 void *addr, void *dest) 204 void *addr, void *dest)
200{ 205{
201 mm_segment_t old_fs; 206 mm_segment_t old_fs;
202 int ret, len = 0; 207 int ret, len = 0;
@@ -219,17 +224,19 @@ static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
219 else 224 else
220 *(u32 *)dest = len; 225 *(u32 *)dest = len;
221} 226}
227NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string_size));
222 228
223#define DEFINE_FETCH_symbol(type) \ 229#define DEFINE_FETCH_symbol(type) \
224__kprobes void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs, \ 230void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs, void *data, void *dest)\
225 void *data, void *dest) \
226{ \ 231{ \
227 struct symbol_cache *sc = data; \ 232 struct symbol_cache *sc = data; \
228 if (sc->addr) \ 233 if (sc->addr) \
229 fetch_memory_##type(regs, (void *)sc->addr, dest); \ 234 fetch_memory_##type(regs, (void *)sc->addr, dest); \
230 else \ 235 else \
231 *(type *)dest = 0; \ 236 *(type *)dest = 0; \
232} 237} \
238NOKPROBE_SYMBOL(FETCH_FUNC_NAME(symbol, type));
239
233DEFINE_BASIC_FETCH_FUNCS(symbol) 240DEFINE_BASIC_FETCH_FUNCS(symbol)
234DEFINE_FETCH_symbol(string) 241DEFINE_FETCH_symbol(string)
235DEFINE_FETCH_symbol(string_size) 242DEFINE_FETCH_symbol(string_size)
@@ -907,7 +914,7 @@ static const struct file_operations kprobe_profile_ops = {
907}; 914};
908 915
909/* Kprobe handler */ 916/* Kprobe handler */
910static __kprobes void 917static nokprobe_inline void
911__kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs, 918__kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs,
912 struct ftrace_event_file *ftrace_file) 919 struct ftrace_event_file *ftrace_file)
913{ 920{
@@ -943,7 +950,7 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs,
943 entry, irq_flags, pc, regs); 950 entry, irq_flags, pc, regs);
944} 951}
945 952
946static __kprobes void 953static void
947kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs) 954kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs)
948{ 955{
949 struct event_file_link *link; 956 struct event_file_link *link;
@@ -951,9 +958,10 @@ kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs)
951 list_for_each_entry_rcu(link, &tk->tp.files, list) 958 list_for_each_entry_rcu(link, &tk->tp.files, list)
952 __kprobe_trace_func(tk, regs, link->file); 959 __kprobe_trace_func(tk, regs, link->file);
953} 960}
961NOKPROBE_SYMBOL(kprobe_trace_func);
954 962
955/* Kretprobe handler */ 963/* Kretprobe handler */
956static __kprobes void 964static nokprobe_inline void
957__kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, 965__kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
958 struct pt_regs *regs, 966 struct pt_regs *regs,
959 struct ftrace_event_file *ftrace_file) 967 struct ftrace_event_file *ftrace_file)
@@ -991,7 +999,7 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
991 entry, irq_flags, pc, regs); 999 entry, irq_flags, pc, regs);
992} 1000}
993 1001
994static __kprobes void 1002static void
995kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, 1003kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
996 struct pt_regs *regs) 1004 struct pt_regs *regs)
997{ 1005{
@@ -1000,6 +1008,7 @@ kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
1000 list_for_each_entry_rcu(link, &tk->tp.files, list) 1008 list_for_each_entry_rcu(link, &tk->tp.files, list)
1001 __kretprobe_trace_func(tk, ri, regs, link->file); 1009 __kretprobe_trace_func(tk, ri, regs, link->file);
1002} 1010}
1011NOKPROBE_SYMBOL(kretprobe_trace_func);
1003 1012
1004/* Event entry printers */ 1013/* Event entry printers */
1005static enum print_line_t 1014static enum print_line_t
@@ -1131,7 +1140,7 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
1131#ifdef CONFIG_PERF_EVENTS 1140#ifdef CONFIG_PERF_EVENTS
1132 1141
1133/* Kprobe profile handler */ 1142/* Kprobe profile handler */
1134static __kprobes void 1143static void
1135kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) 1144kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
1136{ 1145{
1137 struct ftrace_event_call *call = &tk->tp.call; 1146 struct ftrace_event_call *call = &tk->tp.call;
@@ -1158,9 +1167,10 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
1158 store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); 1167 store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
1159 perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL); 1168 perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL);
1160} 1169}
1170NOKPROBE_SYMBOL(kprobe_perf_func);
1161 1171
1162/* Kretprobe profile handler */ 1172/* Kretprobe profile handler */
1163static __kprobes void 1173static void
1164kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, 1174kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
1165 struct pt_regs *regs) 1175 struct pt_regs *regs)
1166{ 1176{
@@ -1188,6 +1198,7 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
1188 store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); 1198 store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
1189 perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL); 1199 perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL);
1190} 1200}
1201NOKPROBE_SYMBOL(kretprobe_perf_func);
1191#endif /* CONFIG_PERF_EVENTS */ 1202#endif /* CONFIG_PERF_EVENTS */
1192 1203
1193/* 1204/*
@@ -1196,9 +1207,8 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
1196 * kprobe_trace_self_tests_init() does enable_trace_probe/disable_trace_probe 1207 * kprobe_trace_self_tests_init() does enable_trace_probe/disable_trace_probe
1197 * lockless, but we can't race with this __init function. 1208 * lockless, but we can't race with this __init function.
1198 */ 1209 */
1199static __kprobes 1210static int kprobe_register(struct ftrace_event_call *event,
1200int kprobe_register(struct ftrace_event_call *event, 1211 enum trace_reg type, void *data)
1201 enum trace_reg type, void *data)
1202{ 1212{
1203 struct trace_kprobe *tk = (struct trace_kprobe *)event->data; 1213 struct trace_kprobe *tk = (struct trace_kprobe *)event->data;
1204 struct ftrace_event_file *file = data; 1214 struct ftrace_event_file *file = data;
@@ -1224,8 +1234,7 @@ int kprobe_register(struct ftrace_event_call *event,
1224 return 0; 1234 return 0;
1225} 1235}
1226 1236
1227static __kprobes 1237static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1228int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1229{ 1238{
1230 struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp); 1239 struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp);
1231 1240
@@ -1239,9 +1248,10 @@ int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1239#endif 1248#endif
1240 return 0; /* We don't tweek kernel, so just return 0 */ 1249 return 0; /* We don't tweek kernel, so just return 0 */
1241} 1250}
1251NOKPROBE_SYMBOL(kprobe_dispatcher);
1242 1252
1243static __kprobes 1253static int
1244int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs) 1254kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1245{ 1255{
1246 struct trace_kprobe *tk = container_of(ri->rp, struct trace_kprobe, rp); 1256 struct trace_kprobe *tk = container_of(ri->rp, struct trace_kprobe, rp);
1247 1257
@@ -1255,6 +1265,7 @@ int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1255#endif 1265#endif
1256 return 0; /* We don't tweek kernel, so just return 0 */ 1266 return 0; /* We don't tweek kernel, so just return 0 */
1257} 1267}
1268NOKPROBE_SYMBOL(kretprobe_dispatcher);
1258 1269
1259static struct trace_event_functions kretprobe_funcs = { 1270static struct trace_event_functions kretprobe_funcs = {
1260 .trace = print_kretprobe_event 1271 .trace = print_kretprobe_event
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 8364a421b4df..d4b9fc22cd27 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -37,13 +37,13 @@ const char *reserved_field_names[] = {
37 37
38/* Printing in basic type function template */ 38/* Printing in basic type function template */
39#define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt) \ 39#define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt) \
40__kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, \ 40int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, const char *name, \
41 const char *name, \ 41 void *data, void *ent) \
42 void *data, void *ent) \
43{ \ 42{ \
44 return trace_seq_printf(s, " %s=" fmt, name, *(type *)data); \ 43 return trace_seq_printf(s, " %s=" fmt, name, *(type *)data); \
45} \ 44} \
46const char PRINT_TYPE_FMT_NAME(type)[] = fmt; 45const char PRINT_TYPE_FMT_NAME(type)[] = fmt; \
46NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(type));
47 47
48DEFINE_BASIC_PRINT_TYPE_FUNC(u8 , "0x%x") 48DEFINE_BASIC_PRINT_TYPE_FUNC(u8 , "0x%x")
49DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "0x%x") 49DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "0x%x")
@@ -55,9 +55,8 @@ DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%d")
55DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%Ld") 55DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%Ld")
56 56
57/* Print type function for string type */ 57/* Print type function for string type */
58__kprobes int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, 58int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, const char *name,
59 const char *name, 59 void *data, void *ent)
60 void *data, void *ent)
61{ 60{
62 int len = *(u32 *)data >> 16; 61 int len = *(u32 *)data >> 16;
63 62
@@ -67,6 +66,7 @@ __kprobes int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s,
67 return trace_seq_printf(s, " %s=\"%s\"", name, 66 return trace_seq_printf(s, " %s=\"%s\"", name,
68 (const char *)get_loc_data(data, ent)); 67 (const char *)get_loc_data(data, ent));
69} 68}
69NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(string));
70 70
71const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\""; 71const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\"";
72 72
@@ -81,23 +81,24 @@ const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\"";
81 81
82/* Data fetch function templates */ 82/* Data fetch function templates */
83#define DEFINE_FETCH_reg(type) \ 83#define DEFINE_FETCH_reg(type) \
84__kprobes void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, \ 84void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, void *offset, void *dest) \
85 void *offset, void *dest) \
86{ \ 85{ \
87 *(type *)dest = (type)regs_get_register(regs, \ 86 *(type *)dest = (type)regs_get_register(regs, \
88 (unsigned int)((unsigned long)offset)); \ 87 (unsigned int)((unsigned long)offset)); \
89} 88} \
89NOKPROBE_SYMBOL(FETCH_FUNC_NAME(reg, type));
90DEFINE_BASIC_FETCH_FUNCS(reg) 90DEFINE_BASIC_FETCH_FUNCS(reg)
91/* No string on the register */ 91/* No string on the register */
92#define fetch_reg_string NULL 92#define fetch_reg_string NULL
93#define fetch_reg_string_size NULL 93#define fetch_reg_string_size NULL
94 94
95#define DEFINE_FETCH_retval(type) \ 95#define DEFINE_FETCH_retval(type) \
96__kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs, \ 96void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs, \
97 void *dummy, void *dest) \ 97 void *dummy, void *dest) \
98{ \ 98{ \
99 *(type *)dest = (type)regs_return_value(regs); \ 99 *(type *)dest = (type)regs_return_value(regs); \
100} 100} \
101NOKPROBE_SYMBOL(FETCH_FUNC_NAME(retval, type));
101DEFINE_BASIC_FETCH_FUNCS(retval) 102DEFINE_BASIC_FETCH_FUNCS(retval)
102/* No string on the retval */ 103/* No string on the retval */
103#define fetch_retval_string NULL 104#define fetch_retval_string NULL
@@ -112,8 +113,8 @@ struct deref_fetch_param {
112}; 113};
113 114
114#define DEFINE_FETCH_deref(type) \ 115#define DEFINE_FETCH_deref(type) \
115__kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs, \ 116void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs, \
116 void *data, void *dest) \ 117 void *data, void *dest) \
117{ \ 118{ \
118 struct deref_fetch_param *dprm = data; \ 119 struct deref_fetch_param *dprm = data; \
119 unsigned long addr; \ 120 unsigned long addr; \
@@ -123,12 +124,13 @@ __kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs, \
123 dprm->fetch(regs, (void *)addr, dest); \ 124 dprm->fetch(regs, (void *)addr, dest); \
124 } else \ 125 } else \
125 *(type *)dest = 0; \ 126 *(type *)dest = 0; \
126} 127} \
128NOKPROBE_SYMBOL(FETCH_FUNC_NAME(deref, type));
127DEFINE_BASIC_FETCH_FUNCS(deref) 129DEFINE_BASIC_FETCH_FUNCS(deref)
128DEFINE_FETCH_deref(string) 130DEFINE_FETCH_deref(string)
129 131
130__kprobes void FETCH_FUNC_NAME(deref, string_size)(struct pt_regs *regs, 132void FETCH_FUNC_NAME(deref, string_size)(struct pt_regs *regs,
131 void *data, void *dest) 133 void *data, void *dest)
132{ 134{
133 struct deref_fetch_param *dprm = data; 135 struct deref_fetch_param *dprm = data;
134 unsigned long addr; 136 unsigned long addr;
@@ -140,16 +142,18 @@ __kprobes void FETCH_FUNC_NAME(deref, string_size)(struct pt_regs *regs,
140 } else 142 } else
141 *(string_size *)dest = 0; 143 *(string_size *)dest = 0;
142} 144}
145NOKPROBE_SYMBOL(FETCH_FUNC_NAME(deref, string_size));
143 146
144static __kprobes void update_deref_fetch_param(struct deref_fetch_param *data) 147static void update_deref_fetch_param(struct deref_fetch_param *data)
145{ 148{
146 if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) 149 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
147 update_deref_fetch_param(data->orig.data); 150 update_deref_fetch_param(data->orig.data);
148 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn)) 151 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
149 update_symbol_cache(data->orig.data); 152 update_symbol_cache(data->orig.data);
150} 153}
154NOKPROBE_SYMBOL(update_deref_fetch_param);
151 155
152static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data) 156static void free_deref_fetch_param(struct deref_fetch_param *data)
153{ 157{
154 if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) 158 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
155 free_deref_fetch_param(data->orig.data); 159 free_deref_fetch_param(data->orig.data);
@@ -157,6 +161,7 @@ static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
157 free_symbol_cache(data->orig.data); 161 free_symbol_cache(data->orig.data);
158 kfree(data); 162 kfree(data);
159} 163}
164NOKPROBE_SYMBOL(free_deref_fetch_param);
160 165
161/* Bitfield fetch function */ 166/* Bitfield fetch function */
162struct bitfield_fetch_param { 167struct bitfield_fetch_param {
@@ -166,8 +171,8 @@ struct bitfield_fetch_param {
166}; 171};
167 172
168#define DEFINE_FETCH_bitfield(type) \ 173#define DEFINE_FETCH_bitfield(type) \
169__kprobes void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs, \ 174void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs, \
170 void *data, void *dest) \ 175 void *data, void *dest) \
171{ \ 176{ \
172 struct bitfield_fetch_param *bprm = data; \ 177 struct bitfield_fetch_param *bprm = data; \
173 type buf = 0; \ 178 type buf = 0; \
@@ -177,13 +182,13 @@ __kprobes void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs, \
177 buf >>= bprm->low_shift; \ 182 buf >>= bprm->low_shift; \
178 } \ 183 } \
179 *(type *)dest = buf; \ 184 *(type *)dest = buf; \
180} 185} \
181 186NOKPROBE_SYMBOL(FETCH_FUNC_NAME(bitfield, type));
182DEFINE_BASIC_FETCH_FUNCS(bitfield) 187DEFINE_BASIC_FETCH_FUNCS(bitfield)
183#define fetch_bitfield_string NULL 188#define fetch_bitfield_string NULL
184#define fetch_bitfield_string_size NULL 189#define fetch_bitfield_string_size NULL
185 190
186static __kprobes void 191static void
187update_bitfield_fetch_param(struct bitfield_fetch_param *data) 192update_bitfield_fetch_param(struct bitfield_fetch_param *data)
188{ 193{
189 /* 194 /*
@@ -196,7 +201,7 @@ update_bitfield_fetch_param(struct bitfield_fetch_param *data)
196 update_symbol_cache(data->orig.data); 201 update_symbol_cache(data->orig.data);
197} 202}
198 203
199static __kprobes void 204static void
200free_bitfield_fetch_param(struct bitfield_fetch_param *data) 205free_bitfield_fetch_param(struct bitfield_fetch_param *data)
201{ 206{
202 /* 207 /*
@@ -255,17 +260,17 @@ fail:
255} 260}
256 261
257/* Special function : only accept unsigned long */ 262/* Special function : only accept unsigned long */
258static __kprobes void fetch_kernel_stack_address(struct pt_regs *regs, 263static void fetch_kernel_stack_address(struct pt_regs *regs, void *dummy, void *dest)
259 void *dummy, void *dest)
260{ 264{
261 *(unsigned long *)dest = kernel_stack_pointer(regs); 265 *(unsigned long *)dest = kernel_stack_pointer(regs);
262} 266}
267NOKPROBE_SYMBOL(fetch_kernel_stack_address);
263 268
264static __kprobes void fetch_user_stack_address(struct pt_regs *regs, 269static void fetch_user_stack_address(struct pt_regs *regs, void *dummy, void *dest)
265 void *dummy, void *dest)
266{ 270{
267 *(unsigned long *)dest = user_stack_pointer(regs); 271 *(unsigned long *)dest = user_stack_pointer(regs);
268} 272}
273NOKPROBE_SYMBOL(fetch_user_stack_address);
269 274
270static fetch_func_t get_fetch_size_function(const struct fetch_type *type, 275static fetch_func_t get_fetch_size_function(const struct fetch_type *type,
271 fetch_func_t orig_fn, 276 fetch_func_t orig_fn,
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index fb1ab5dfbd42..4f815fbce16d 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -81,13 +81,13 @@
81 */ 81 */
82#define convert_rloc_to_loc(dl, offs) ((u32)(dl) + (offs)) 82#define convert_rloc_to_loc(dl, offs) ((u32)(dl) + (offs))
83 83
84static inline void *get_rloc_data(u32 *dl) 84static nokprobe_inline void *get_rloc_data(u32 *dl)
85{ 85{
86 return (u8 *)dl + get_rloc_offs(*dl); 86 return (u8 *)dl + get_rloc_offs(*dl);
87} 87}
88 88
89/* For data_loc conversion */ 89/* For data_loc conversion */
90static inline void *get_loc_data(u32 *dl, void *ent) 90static nokprobe_inline void *get_loc_data(u32 *dl, void *ent)
91{ 91{
92 return (u8 *)ent + get_rloc_offs(*dl); 92 return (u8 *)ent + get_rloc_offs(*dl);
93} 93}
@@ -136,9 +136,8 @@ typedef u32 string_size;
136 136
137/* Printing in basic type function template */ 137/* Printing in basic type function template */
138#define DECLARE_BASIC_PRINT_TYPE_FUNC(type) \ 138#define DECLARE_BASIC_PRINT_TYPE_FUNC(type) \
139__kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, \ 139int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, const char *name, \
140 const char *name, \ 140 void *data, void *ent); \
141 void *data, void *ent); \
142extern const char PRINT_TYPE_FMT_NAME(type)[] 141extern const char PRINT_TYPE_FMT_NAME(type)[]
143 142
144DECLARE_BASIC_PRINT_TYPE_FUNC(u8); 143DECLARE_BASIC_PRINT_TYPE_FUNC(u8);
@@ -303,7 +302,7 @@ static inline bool trace_probe_is_registered(struct trace_probe *tp)
303 return !!(tp->flags & TP_FLAG_REGISTERED); 302 return !!(tp->flags & TP_FLAG_REGISTERED);
304} 303}
305 304
306static inline __kprobes void call_fetch(struct fetch_param *fprm, 305static nokprobe_inline void call_fetch(struct fetch_param *fprm,
307 struct pt_regs *regs, void *dest) 306 struct pt_regs *regs, void *dest)
308{ 307{
309 return fprm->fn(regs, fprm->data, dest); 308 return fprm->fn(regs, fprm->data, dest);
@@ -351,7 +350,7 @@ extern ssize_t traceprobe_probes_write(struct file *file,
351extern int traceprobe_command(const char *buf, int (*createfn)(int, char**)); 350extern int traceprobe_command(const char *buf, int (*createfn)(int, char**));
352 351
353/* Sum up total data length for dynamic arraies (strings) */ 352/* Sum up total data length for dynamic arraies (strings) */
354static inline __kprobes int 353static nokprobe_inline int
355__get_data_size(struct trace_probe *tp, struct pt_regs *regs) 354__get_data_size(struct trace_probe *tp, struct pt_regs *regs)
356{ 355{
357 int i, ret = 0; 356 int i, ret = 0;
@@ -367,7 +366,7 @@ __get_data_size(struct trace_probe *tp, struct pt_regs *regs)
367} 366}
368 367
369/* Store the value of each argument */ 368/* Store the value of each argument */
370static inline __kprobes void 369static nokprobe_inline void
371store_trace_args(int ent_size, struct trace_probe *tp, struct pt_regs *regs, 370store_trace_args(int ent_size, struct trace_probe *tp, struct pt_regs *regs,
372 u8 *data, int maxlen) 371 u8 *data, int maxlen)
373{ 372{
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index c082a7441345..04fdb5de823c 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -108,8 +108,8 @@ static unsigned long get_user_stack_nth(struct pt_regs *regs, unsigned int n)
108 * Uprobes-specific fetch functions 108 * Uprobes-specific fetch functions
109 */ 109 */
110#define DEFINE_FETCH_stack(type) \ 110#define DEFINE_FETCH_stack(type) \
111static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\ 111static void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs, \
112 void *offset, void *dest) \ 112 void *offset, void *dest) \
113{ \ 113{ \
114 *(type *)dest = (type)get_user_stack_nth(regs, \ 114 *(type *)dest = (type)get_user_stack_nth(regs, \
115 ((unsigned long)offset)); \ 115 ((unsigned long)offset)); \
@@ -120,8 +120,8 @@ DEFINE_BASIC_FETCH_FUNCS(stack)
120#define fetch_stack_string_size NULL 120#define fetch_stack_string_size NULL
121 121
122#define DEFINE_FETCH_memory(type) \ 122#define DEFINE_FETCH_memory(type) \
123static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\ 123static void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs, \
124 void *addr, void *dest) \ 124 void *addr, void *dest) \
125{ \ 125{ \
126 type retval; \ 126 type retval; \
127 void __user *vaddr = (void __force __user *) addr; \ 127 void __user *vaddr = (void __force __user *) addr; \
@@ -136,8 +136,8 @@ DEFINE_BASIC_FETCH_FUNCS(memory)
136 * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max 136 * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
137 * length and relative data location. 137 * length and relative data location.
138 */ 138 */
139static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs, 139static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
140 void *addr, void *dest) 140 void *addr, void *dest)
141{ 141{
142 long ret; 142 long ret;
143 u32 rloc = *(u32 *)dest; 143 u32 rloc = *(u32 *)dest;
@@ -158,8 +158,8 @@ static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
158 } 158 }
159} 159}
160 160
161static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs, 161static void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
162 void *addr, void *dest) 162 void *addr, void *dest)
163{ 163{
164 int len; 164 int len;
165 void __user *vaddr = (void __force __user *) addr; 165 void __user *vaddr = (void __force __user *) addr;
@@ -184,8 +184,8 @@ static unsigned long translate_user_vaddr(void *file_offset)
184} 184}
185 185
186#define DEFINE_FETCH_file_offset(type) \ 186#define DEFINE_FETCH_file_offset(type) \
187static __kprobes void FETCH_FUNC_NAME(file_offset, type)(struct pt_regs *regs,\ 187static void FETCH_FUNC_NAME(file_offset, type)(struct pt_regs *regs, \
188 void *offset, void *dest) \ 188 void *offset, void *dest)\
189{ \ 189{ \
190 void *vaddr = (void *)translate_user_vaddr(offset); \ 190 void *vaddr = (void *)translate_user_vaddr(offset); \
191 \ 191 \
@@ -1009,56 +1009,60 @@ uprobe_filter_event(struct trace_uprobe *tu, struct perf_event *event)
1009 return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm); 1009 return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm);
1010} 1010}
1011 1011
1012static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event) 1012static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event)
1013{ 1013{
1014 bool done; 1014 bool done;
1015 1015
1016 write_lock(&tu->filter.rwlock); 1016 write_lock(&tu->filter.rwlock);
1017 if (event->hw.tp_target) { 1017 if (event->hw.tp_target) {
1018 /* 1018 list_del(&event->hw.tp_list);
1019 * event->parent != NULL means copy_process(), we can avoid
1020 * uprobe_apply(). current->mm must be probed and we can rely
1021 * on dup_mmap() which preserves the already installed bp's.
1022 *
1023 * attr.enable_on_exec means that exec/mmap will install the
1024 * breakpoints we need.
1025 */
1026 done = tu->filter.nr_systemwide || 1019 done = tu->filter.nr_systemwide ||
1027 event->parent || event->attr.enable_on_exec || 1020 (event->hw.tp_target->flags & PF_EXITING) ||
1028 uprobe_filter_event(tu, event); 1021 uprobe_filter_event(tu, event);
1029 list_add(&event->hw.tp_list, &tu->filter.perf_events);
1030 } else { 1022 } else {
1023 tu->filter.nr_systemwide--;
1031 done = tu->filter.nr_systemwide; 1024 done = tu->filter.nr_systemwide;
1032 tu->filter.nr_systemwide++;
1033 } 1025 }
1034 write_unlock(&tu->filter.rwlock); 1026 write_unlock(&tu->filter.rwlock);
1035 1027
1036 if (!done) 1028 if (!done)
1037 uprobe_apply(tu->inode, tu->offset, &tu->consumer, true); 1029 return uprobe_apply(tu->inode, tu->offset, &tu->consumer, false);
1038 1030
1039 return 0; 1031 return 0;
1040} 1032}
1041 1033
1042static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event) 1034static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event)
1043{ 1035{
1044 bool done; 1036 bool done;
1037 int err;
1045 1038
1046 write_lock(&tu->filter.rwlock); 1039 write_lock(&tu->filter.rwlock);
1047 if (event->hw.tp_target) { 1040 if (event->hw.tp_target) {
1048 list_del(&event->hw.tp_list); 1041 /*
1042 * event->parent != NULL means copy_process(), we can avoid
1043 * uprobe_apply(). current->mm must be probed and we can rely
1044 * on dup_mmap() which preserves the already installed bp's.
1045 *
1046 * attr.enable_on_exec means that exec/mmap will install the
1047 * breakpoints we need.
1048 */
1049 done = tu->filter.nr_systemwide || 1049 done = tu->filter.nr_systemwide ||
1050 (event->hw.tp_target->flags & PF_EXITING) || 1050 event->parent || event->attr.enable_on_exec ||
1051 uprobe_filter_event(tu, event); 1051 uprobe_filter_event(tu, event);
1052 list_add(&event->hw.tp_list, &tu->filter.perf_events);
1052 } else { 1053 } else {
1053 tu->filter.nr_systemwide--;
1054 done = tu->filter.nr_systemwide; 1054 done = tu->filter.nr_systemwide;
1055 tu->filter.nr_systemwide++;
1055 } 1056 }
1056 write_unlock(&tu->filter.rwlock); 1057 write_unlock(&tu->filter.rwlock);
1057 1058
1058 if (!done) 1059 err = 0;
1059 uprobe_apply(tu->inode, tu->offset, &tu->consumer, false); 1060 if (!done) {
1060 1061 err = uprobe_apply(tu->inode, tu->offset, &tu->consumer, true);
1061 return 0; 1062 if (err)
1063 uprobe_perf_close(tu, event);
1064 }
1065 return err;
1062} 1066}
1063 1067
1064static bool uprobe_perf_filter(struct uprobe_consumer *uc, 1068static bool uprobe_perf_filter(struct uprobe_consumer *uc,
diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
index 5b5eb788996e..c1b49c36a951 100644
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c
@@ -1,8 +1,10 @@
1/* TODO merge/factor in debugfs.c here */ 1/* TODO merge/factor in debugfs.c here */
2 2
3#include <ctype.h>
3#include <errno.h> 4#include <errno.h>
4#include <stdbool.h> 5#include <stdbool.h>
5#include <stdio.h> 6#include <stdio.h>
7#include <stdlib.h>
6#include <string.h> 8#include <string.h>
7#include <sys/vfs.h> 9#include <sys/vfs.h>
8 10
@@ -96,12 +98,51 @@ static bool fs__check_mounts(struct fs *fs)
96 return false; 98 return false;
97} 99}
98 100
101static void mem_toupper(char *f, size_t len)
102{
103 while (len) {
104 *f = toupper(*f);
105 f++;
106 len--;
107 }
108}
109
110/*
111 * Check for "NAME_PATH" environment variable to override fs location (for
112 * testing). This matches the recommendation in Documentation/sysfs-rules.txt
113 * for SYSFS_PATH.
114 */
115static bool fs__env_override(struct fs *fs)
116{
117 char *override_path;
118 size_t name_len = strlen(fs->name);
119 /* name + "_PATH" + '\0' */
120 char upper_name[name_len + 5 + 1];
121 memcpy(upper_name, fs->name, name_len);
122 mem_toupper(upper_name, name_len);
123 strcpy(&upper_name[name_len], "_PATH");
124
125 override_path = getenv(upper_name);
126 if (!override_path)
127 return false;
128
129 fs->found = true;
130 strncpy(fs->path, override_path, sizeof(fs->path));
131 return true;
132}
133
99static const char *fs__get_mountpoint(struct fs *fs) 134static const char *fs__get_mountpoint(struct fs *fs)
100{ 135{
136 if (fs__env_override(fs))
137 return fs->path;
138
101 if (fs__check_mounts(fs)) 139 if (fs__check_mounts(fs))
102 return fs->path; 140 return fs->path;
103 141
104 return fs__read_mounts(fs) ? fs->path : NULL; 142 if (fs__read_mounts(fs))
143 return fs->path;
144
145 return NULL;
105} 146}
106 147
107static const char *fs__mountpoint(int idx) 148static const char *fs__mountpoint(int idx)
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index c71b0f36d9e8..d460049cae8e 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -184,9 +184,10 @@ following filters are defined:
184 - in_tx: only when the target is in a hardware transaction 184 - in_tx: only when the target is in a hardware transaction
185 - no_tx: only when the target is not in a hardware transaction 185 - no_tx: only when the target is not in a hardware transaction
186 - abort_tx: only when the target is a hardware transaction abort 186 - abort_tx: only when the target is a hardware transaction abort
187 - cond: conditional branches
187 188
188+ 189+
189The option requires at least one branch type among any, any_call, any_ret, ind_call. 190The option requires at least one branch type among any, any_call, any_ret, ind_call, cond.
190The privilege levels may be omitted, in which case, the privilege levels of the associated 191The privilege levels may be omitted, in which case, the privilege levels of the associated
191event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege 192event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege
192levels are subject to permissions. When sampling on multiple events, branch stack sampling 193levels are subject to permissions. When sampling on multiple events, branch stack sampling
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index a1b5185402d5..cefdf430d1b4 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -111,7 +111,7 @@ OPTIONS
111--fields=:: 111--fields=::
112 Specify output field - multiple keys can be specified in CSV format. 112 Specify output field - multiple keys can be specified in CSV format.
113 Following fields are available: 113 Following fields are available:
114 overhead, overhead_sys, overhead_us, sample and period. 114 overhead, overhead_sys, overhead_us, overhead_children, sample and period.
115 Also it can contain any sort key(s). 115 Also it can contain any sort key(s).
116 116
117 By default, every sort keys not specified in -F will be appended 117 By default, every sort keys not specified in -F will be appended
@@ -163,6 +163,11 @@ OPTIONS
163 163
164 Default: fractal,0.5,callee,function. 164 Default: fractal,0.5,callee,function.
165 165
166--children::
167 Accumulate callchain of children to parent entry so that then can
168 show up in the output. The output will have a new "Children" column
169 and will be sorted on the data. It requires callchains are recorded.
170
166--max-stack:: 171--max-stack::
167 Set the stack depth limit when parsing the callchain, anything 172 Set the stack depth limit when parsing the callchain, anything
168 beyond the specified depth will be ignored. This is a trade-off 173 beyond the specified depth will be ignored. This is a trade-off
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index dcfa54c851e9..180ae02137a5 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -119,7 +119,7 @@ Default is to monitor all CPUS.
119--fields=:: 119--fields=::
120 Specify output field - multiple keys can be specified in CSV format. 120 Specify output field - multiple keys can be specified in CSV format.
121 Following fields are available: 121 Following fields are available:
122 overhead, overhead_sys, overhead_us, sample and period. 122 overhead, overhead_sys, overhead_us, overhead_children, sample and period.
123 Also it can contain any sort key(s). 123 Also it can contain any sort key(s).
124 124
125 By default, every sort keys not specified in --field will be appended 125 By default, every sort keys not specified in --field will be appended
@@ -161,6 +161,12 @@ Default is to monitor all CPUS.
161 Setup and enable call-graph (stack chain/backtrace) recording, 161 Setup and enable call-graph (stack chain/backtrace) recording,
162 implies -g. 162 implies -g.
163 163
164--children::
165 Accumulate callchain of children to parent entry so that then can
166 show up in the output. The output will have a new "Children" column
167 and will be sorted on the data. It requires -g/--call-graph option
168 enabled.
169
164--max-stack:: 170--max-stack::
165 Set the stack depth limit when parsing the callchain, anything 171 Set the stack depth limit when parsing the callchain, anything
166 beyond the specified depth will be ignored. This is a trade-off 172 beyond the specified depth will be ignored. This is a trade-off
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 02f0a4dd1a80..ae20edfcc3f7 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -400,6 +400,7 @@ LIB_OBJS += $(OUTPUT)tests/hists_common.o
400LIB_OBJS += $(OUTPUT)tests/hists_link.o 400LIB_OBJS += $(OUTPUT)tests/hists_link.o
401LIB_OBJS += $(OUTPUT)tests/hists_filter.o 401LIB_OBJS += $(OUTPUT)tests/hists_filter.o
402LIB_OBJS += $(OUTPUT)tests/hists_output.o 402LIB_OBJS += $(OUTPUT)tests/hists_output.o
403LIB_OBJS += $(OUTPUT)tests/hists_cumulate.o
403LIB_OBJS += $(OUTPUT)tests/python-use.o 404LIB_OBJS += $(OUTPUT)tests/python-use.o
404LIB_OBJS += $(OUTPUT)tests/bp_signal.o 405LIB_OBJS += $(OUTPUT)tests/bp_signal.o
405LIB_OBJS += $(OUTPUT)tests/bp_signal_overflow.o 406LIB_OBJS += $(OUTPUT)tests/bp_signal_overflow.o
@@ -788,8 +789,8 @@ help:
788 @echo '' 789 @echo ''
789 @echo 'Perf install targets:' 790 @echo 'Perf install targets:'
790 @echo ' NOTE: documentation build requires asciidoc, xmlto packages to be installed' 791 @echo ' NOTE: documentation build requires asciidoc, xmlto packages to be installed'
791 @echo ' HINT: use "make prefix=<path> <install target>" to install to a particular' 792 @echo ' HINT: use "prefix" or "DESTDIR" to install to a particular'
792 @echo ' path like make prefix=/usr/local install install-doc' 793 @echo ' path like "make prefix=/usr/local install install-doc"'
793 @echo ' install - install compiled binaries' 794 @echo ' install - install compiled binaries'
794 @echo ' install-doc - install *all* documentation' 795 @echo ' install-doc - install *all* documentation'
795 @echo ' install-man - install manpage documentation' 796 @echo ' install-man - install manpage documentation'
@@ -814,17 +815,20 @@ INSTALL_DOC_TARGETS += quick-install-doc quick-install-man quick-install-html
814$(DOC_TARGETS): 815$(DOC_TARGETS):
815 $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:doc=all) 816 $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:doc=all)
816 817
818TAG_FOLDERS= . ../lib/traceevent ../lib/api ../lib/symbol
819TAG_FILES= ../../include/uapi/linux/perf_event.h
820
817TAGS: 821TAGS:
818 $(RM) TAGS 822 $(RM) TAGS
819 $(FIND) . -name '*.[hcS]' -print | xargs etags -a 823 $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs etags -a $(TAG_FILES)
820 824
821tags: 825tags:
822 $(RM) tags 826 $(RM) tags
823 $(FIND) . -name '*.[hcS]' -print | xargs ctags -a 827 $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs ctags -a $(TAG_FILES)
824 828
825cscope: 829cscope:
826 $(RM) cscope* 830 $(RM) cscope*
827 $(FIND) . -name '*.[hcS]' -print | xargs cscope -b 831 $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs cscope -b $(TAG_FILES)
828 832
829### Detect prefix changes 833### Detect prefix changes
830TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):\ 834TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):\
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index d30d2c2e2a7a..1ec429fef2be 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -65,12 +65,13 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
65 return 0; 65 return 0;
66 } 66 }
67 67
68 he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL, 1, 1, 0); 68 he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL, 1, 1, 0,
69 true);
69 if (he == NULL) 70 if (he == NULL)
70 return -ENOMEM; 71 return -ENOMEM;
71 72
72 ret = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); 73 ret = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
73 hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); 74 hists__inc_nr_samples(&evsel->hists, true);
74 return ret; 75 return ret;
75} 76}
76 77
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 8bff543acaab..9a5a035cb426 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -315,7 +315,7 @@ static int hists__add_entry(struct hists *hists,
315 u64 weight, u64 transaction) 315 u64 weight, u64 transaction)
316{ 316{
317 if (__hists__add_entry(hists, al, NULL, NULL, NULL, period, weight, 317 if (__hists__add_entry(hists, al, NULL, NULL, NULL, period, weight,
318 transaction) != NULL) 318 transaction, true) != NULL)
319 return 0; 319 return 0;
320 return -ENOMEM; 320 return -ENOMEM;
321} 321}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index e4c85b8f46c2..378b85b731a7 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -454,7 +454,11 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
454 if (done) 454 if (done)
455 break; 455 break;
456 err = poll(rec->evlist->pollfd, rec->evlist->nr_fds, -1); 456 err = poll(rec->evlist->pollfd, rec->evlist->nr_fds, -1);
457 if (err < 0 && errno == EINTR) 457 /*
458 * Propagate error, only if there's any. Ignore positive
459 * number of returned events and interrupt error.
460 */
461 if (err > 0 || (err < 0 && errno == EINTR))
458 err = 0; 462 err = 0;
459 waking++; 463 waking++;
460 } 464 }
@@ -544,6 +548,7 @@ static const struct branch_mode branch_modes[] = {
544 BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX), 548 BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
545 BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX), 549 BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
546 BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX), 550 BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
551 BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND),
547 BRANCH_END 552 BRANCH_END
548}; 553};
549 554
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index bc0eec1ce4be..21d830bafff3 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -72,6 +72,10 @@ static int report__config(const char *var, const char *value, void *cb)
72 rep->min_percent = strtof(value, NULL); 72 rep->min_percent = strtof(value, NULL);
73 return 0; 73 return 0;
74 } 74 }
75 if (!strcmp(var, "report.children")) {
76 symbol_conf.cumulate_callchain = perf_config_bool(var, value);
77 return 0;
78 }
75 79
76 return perf_default_config(var, value, cb); 80 return perf_default_config(var, value, cb);
77} 81}
@@ -85,156 +89,52 @@ static void report__inc_stats(struct report *rep, struct hist_entry *he)
85 */ 89 */
86 if (he->stat.nr_events == 1) 90 if (he->stat.nr_events == 1)
87 rep->nr_entries++; 91 rep->nr_entries++;
88
89 /*
90 * Only counts number of samples at this stage as it's more
91 * natural to do it here and non-sample events are also
92 * counted in perf_session_deliver_event(). The dump_trace
93 * requires this info is ready before going to the output tree.
94 */
95 hists__inc_nr_events(he->hists, PERF_RECORD_SAMPLE);
96 if (!he->filtered)
97 he->hists->stats.nr_non_filtered_samples++;
98} 92}
99 93
100static int report__add_mem_hist_entry(struct report *rep, struct addr_location *al, 94static int hist_iter__report_callback(struct hist_entry_iter *iter,
101 struct perf_sample *sample, struct perf_evsel *evsel) 95 struct addr_location *al, bool single,
96 void *arg)
102{ 97{
103 struct symbol *parent = NULL; 98 int err = 0;
104 struct hist_entry *he; 99 struct report *rep = arg;
105 struct mem_info *mi, *mx; 100 struct hist_entry *he = iter->he;
106 uint64_t cost; 101 struct perf_evsel *evsel = iter->evsel;
107 int err = sample__resolve_callchain(sample, &parent, evsel, al, rep->max_stack); 102 struct mem_info *mi;
108 103 struct branch_info *bi;
109 if (err)
110 return err;
111 104
112 mi = sample__resolve_mem(sample, al); 105 report__inc_stats(rep, he);
113 if (!mi)
114 return -ENOMEM;
115 106
116 if (rep->hide_unresolved && !al->sym) 107 if (!ui__has_annotation())
117 return 0; 108 return 0;
118 109
119 cost = sample->weight; 110 if (sort__mode == SORT_MODE__BRANCH) {
120 if (!cost) 111 bi = he->branch_info;
121 cost = 1; 112 err = addr_map_symbol__inc_samples(&bi->from, evsel->idx);
122
123 /*
124 * must pass period=weight in order to get the correct
125 * sorting from hists__collapse_resort() which is solely
126 * based on periods. We want sorting be done on nr_events * weight
127 * and this is indirectly achieved by passing period=weight here
128 * and the he_stat__add_period() function.
129 */
130 he = __hists__add_entry(&evsel->hists, al, parent, NULL, mi,
131 cost, cost, 0);
132 if (!he)
133 return -ENOMEM;
134
135 if (ui__has_annotation()) {
136 err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
137 if (err)
138 goto out;
139
140 mx = he->mem_info;
141 err = addr_map_symbol__inc_samples(&mx->daddr, evsel->idx);
142 if (err) 113 if (err)
143 goto out; 114 goto out;
144 }
145
146 report__inc_stats(rep, he);
147
148 err = hist_entry__append_callchain(he, sample);
149out:
150 return err;
151}
152
153static int report__add_branch_hist_entry(struct report *rep, struct addr_location *al,
154 struct perf_sample *sample, struct perf_evsel *evsel)
155{
156 struct symbol *parent = NULL;
157 unsigned i;
158 struct hist_entry *he;
159 struct branch_info *bi, *bx;
160 int err = sample__resolve_callchain(sample, &parent, evsel, al, rep->max_stack);
161 115
162 if (err) 116 err = addr_map_symbol__inc_samples(&bi->to, evsel->idx);
163 return err;
164
165 bi = sample__resolve_bstack(sample, al);
166 if (!bi)
167 return -ENOMEM;
168
169 for (i = 0; i < sample->branch_stack->nr; i++) {
170 if (rep->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym))
171 continue;
172 117
173 err = -ENOMEM; 118 } else if (rep->mem_mode) {
174 119 mi = he->mem_info;
175 /* overwrite the 'al' to branch-to info */ 120 err = addr_map_symbol__inc_samples(&mi->daddr, evsel->idx);
176 al->map = bi[i].to.map; 121 if (err)
177 al->sym = bi[i].to.sym;
178 al->addr = bi[i].to.addr;
179 /*
180 * The report shows the percentage of total branches captured
181 * and not events sampled. Thus we use a pseudo period of 1.
182 */
183 he = __hists__add_entry(&evsel->hists, al, parent, &bi[i], NULL,
184 1, 1, 0);
185 if (he) {
186 if (ui__has_annotation()) {
187 bx = he->branch_info;
188 err = addr_map_symbol__inc_samples(&bx->from,
189 evsel->idx);
190 if (err)
191 goto out;
192
193 err = addr_map_symbol__inc_samples(&bx->to,
194 evsel->idx);
195 if (err)
196 goto out;
197 }
198 report__inc_stats(rep, he);
199 } else
200 goto out; 122 goto out;
201 }
202 err = 0;
203out:
204 free(bi);
205 return err;
206}
207
208static int report__add_hist_entry(struct report *rep, struct perf_evsel *evsel,
209 struct addr_location *al, struct perf_sample *sample)
210{
211 struct symbol *parent = NULL;
212 struct hist_entry *he;
213 int err = sample__resolve_callchain(sample, &parent, evsel, al, rep->max_stack);
214
215 if (err)
216 return err;
217 123
218 he = __hists__add_entry(&evsel->hists, al, parent, NULL, NULL,
219 sample->period, sample->weight,
220 sample->transaction);
221 if (he == NULL)
222 return -ENOMEM;
223
224 err = hist_entry__append_callchain(he, sample);
225 if (err)
226 goto out;
227
228 if (ui__has_annotation())
229 err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); 124 err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
230 125
231 report__inc_stats(rep, he); 126 } else if (symbol_conf.cumulate_callchain) {
127 if (single)
128 err = hist_entry__inc_addr_samples(he, evsel->idx,
129 al->addr);
130 } else {
131 err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
132 }
232 133
233out: 134out:
234 return err; 135 return err;
235} 136}
236 137
237
238static int process_sample_event(struct perf_tool *tool, 138static int process_sample_event(struct perf_tool *tool,
239 union perf_event *event, 139 union perf_event *event,
240 struct perf_sample *sample, 140 struct perf_sample *sample,
@@ -243,6 +143,10 @@ static int process_sample_event(struct perf_tool *tool,
243{ 143{
244 struct report *rep = container_of(tool, struct report, tool); 144 struct report *rep = container_of(tool, struct report, tool);
245 struct addr_location al; 145 struct addr_location al;
146 struct hist_entry_iter iter = {
147 .hide_unresolved = rep->hide_unresolved,
148 .add_entry_cb = hist_iter__report_callback,
149 };
246 int ret; 150 int ret;
247 151
248 if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) { 152 if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
@@ -257,22 +161,23 @@ static int process_sample_event(struct perf_tool *tool,
257 if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap)) 161 if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap))
258 return 0; 162 return 0;
259 163
260 if (sort__mode == SORT_MODE__BRANCH) { 164 if (sort__mode == SORT_MODE__BRANCH)
261 ret = report__add_branch_hist_entry(rep, &al, sample, evsel); 165 iter.ops = &hist_iter_branch;
262 if (ret < 0) 166 else if (rep->mem_mode)
263 pr_debug("problem adding lbr entry, skipping event\n"); 167 iter.ops = &hist_iter_mem;
264 } else if (rep->mem_mode == 1) { 168 else if (symbol_conf.cumulate_callchain)
265 ret = report__add_mem_hist_entry(rep, &al, sample, evsel); 169 iter.ops = &hist_iter_cumulative;
266 if (ret < 0) 170 else
267 pr_debug("problem adding mem entry, skipping event\n"); 171 iter.ops = &hist_iter_normal;
268 } else { 172
269 if (al.map != NULL) 173 if (al.map != NULL)
270 al.map->dso->hit = 1; 174 al.map->dso->hit = 1;
175
176 ret = hist_entry_iter__add(&iter, &al, evsel, sample, rep->max_stack,
177 rep);
178 if (ret < 0)
179 pr_debug("problem adding hist entry, skipping event\n");
271 180
272 ret = report__add_hist_entry(rep, evsel, &al, sample);
273 if (ret < 0)
274 pr_debug("problem incrementing symbol period, skipping event\n");
275 }
276 return ret; 181 return ret;
277} 182}
278 183
@@ -329,6 +234,14 @@ static int report__setup_sample_type(struct report *rep)
329 } 234 }
330 } 235 }
331 236
237 if (symbol_conf.cumulate_callchain) {
238 /* Silently ignore if callchain is missing */
239 if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) {
240 symbol_conf.cumulate_callchain = false;
241 perf_hpp__cancel_cumulate();
242 }
243 }
244
332 if (sort__mode == SORT_MODE__BRANCH) { 245 if (sort__mode == SORT_MODE__BRANCH) {
333 if (!is_pipe && 246 if (!is_pipe &&
334 !(sample_type & PERF_SAMPLE_BRANCH_STACK)) { 247 !(sample_type & PERF_SAMPLE_BRANCH_STACK)) {
@@ -712,6 +625,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
712 OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order", 625 OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order",
713 "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). " 626 "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). "
714 "Default: fractal,0.5,callee,function", &report_parse_callchain_opt, callchain_default_opt), 627 "Default: fractal,0.5,callee,function", &report_parse_callchain_opt, callchain_default_opt),
628 OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
629 "Accumulate callchains of children and show total overhead as well"),
715 OPT_INTEGER(0, "max-stack", &report.max_stack, 630 OPT_INTEGER(0, "max-stack", &report.max_stack,
716 "Set the maximum stack depth when parsing the callchain, " 631 "Set the maximum stack depth when parsing the callchain, "
717 "anything beyond the specified depth will be ignored. " 632 "anything beyond the specified depth will be ignored. "
@@ -804,8 +719,10 @@ repeat:
804 has_br_stack = perf_header__has_feat(&session->header, 719 has_br_stack = perf_header__has_feat(&session->header,
805 HEADER_BRANCH_STACK); 720 HEADER_BRANCH_STACK);
806 721
807 if (branch_mode == -1 && has_br_stack) 722 if (branch_mode == -1 && has_br_stack) {
808 sort__mode = SORT_MODE__BRANCH; 723 sort__mode = SORT_MODE__BRANCH;
724 symbol_conf.cumulate_callchain = false;
725 }
809 726
810 if (report.mem_mode) { 727 if (report.mem_mode) {
811 if (sort__mode == SORT_MODE__BRANCH) { 728 if (sort__mode == SORT_MODE__BRANCH) {
@@ -813,6 +730,7 @@ repeat:
813 goto error; 730 goto error;
814 } 731 }
815 sort__mode = SORT_MODE__MEMORY; 732 sort__mode = SORT_MODE__MEMORY;
733 symbol_conf.cumulate_callchain = false;
816 } 734 }
817 735
818 if (setup_sorting() < 0) { 736 if (setup_sorting() < 0) {
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index d7176830b9b2..c38d06c04775 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1428,7 +1428,7 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_
1428 int err = 0; 1428 int err = 0;
1429 1429
1430 evsel->hists.stats.total_period += sample->period; 1430 evsel->hists.stats.total_period += sample->period;
1431 hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); 1431 hists__inc_nr_samples(&evsel->hists, true);
1432 1432
1433 if (evsel->handler != NULL) { 1433 if (evsel->handler != NULL) {
1434 tracepoint_handler f = evsel->handler; 1434 tracepoint_handler f = evsel->handler;
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 5b389ce4cd15..377971dc89a3 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -196,6 +196,12 @@ static void perf_top__record_precise_ip(struct perf_top *top,
196 196
197 pthread_mutex_unlock(&notes->lock); 197 pthread_mutex_unlock(&notes->lock);
198 198
199 /*
200 * This function is now called with he->hists->lock held.
201 * Release it before going to sleep.
202 */
203 pthread_mutex_unlock(&he->hists->lock);
204
199 if (err == -ERANGE && !he->ms.map->erange_warned) 205 if (err == -ERANGE && !he->ms.map->erange_warned)
200 ui__warn_map_erange(he->ms.map, sym, ip); 206 ui__warn_map_erange(he->ms.map, sym, ip);
201 else if (err == -ENOMEM) { 207 else if (err == -ENOMEM) {
@@ -203,6 +209,8 @@ static void perf_top__record_precise_ip(struct perf_top *top,
203 sym->name); 209 sym->name);
204 sleep(1); 210 sleep(1);
205 } 211 }
212
213 pthread_mutex_lock(&he->hists->lock);
206} 214}
207 215
208static void perf_top__show_details(struct perf_top *top) 216static void perf_top__show_details(struct perf_top *top)
@@ -238,27 +246,6 @@ out_unlock:
238 pthread_mutex_unlock(&notes->lock); 246 pthread_mutex_unlock(&notes->lock);
239} 247}
240 248
241static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel,
242 struct addr_location *al,
243 struct perf_sample *sample)
244{
245 struct hist_entry *he;
246
247 pthread_mutex_lock(&evsel->hists.lock);
248 he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL,
249 sample->period, sample->weight,
250 sample->transaction);
251 pthread_mutex_unlock(&evsel->hists.lock);
252 if (he == NULL)
253 return NULL;
254
255 hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
256 if (!he->filtered)
257 evsel->hists.stats.nr_non_filtered_samples++;
258
259 return he;
260}
261
262static void perf_top__print_sym_table(struct perf_top *top) 249static void perf_top__print_sym_table(struct perf_top *top)
263{ 250{
264 char bf[160]; 251 char bf[160];
@@ -662,6 +649,26 @@ static int symbol_filter(struct map *map __maybe_unused, struct symbol *sym)
662 return 0; 649 return 0;
663} 650}
664 651
652static int hist_iter__top_callback(struct hist_entry_iter *iter,
653 struct addr_location *al, bool single,
654 void *arg)
655{
656 struct perf_top *top = arg;
657 struct hist_entry *he = iter->he;
658 struct perf_evsel *evsel = iter->evsel;
659
660 if (sort__has_sym && single) {
661 u64 ip = al->addr;
662
663 if (al->map)
664 ip = al->map->unmap_ip(al->map, ip);
665
666 perf_top__record_precise_ip(top, he, evsel->idx, ip);
667 }
668
669 return 0;
670}
671
665static void perf_event__process_sample(struct perf_tool *tool, 672static void perf_event__process_sample(struct perf_tool *tool,
666 const union perf_event *event, 673 const union perf_event *event,
667 struct perf_evsel *evsel, 674 struct perf_evsel *evsel,
@@ -669,8 +676,6 @@ static void perf_event__process_sample(struct perf_tool *tool,
669 struct machine *machine) 676 struct machine *machine)
670{ 677{
671 struct perf_top *top = container_of(tool, struct perf_top, tool); 678 struct perf_top *top = container_of(tool, struct perf_top, tool);
672 struct symbol *parent = NULL;
673 u64 ip = sample->ip;
674 struct addr_location al; 679 struct addr_location al;
675 int err; 680 int err;
676 681
@@ -745,25 +750,23 @@ static void perf_event__process_sample(struct perf_tool *tool,
745 } 750 }
746 751
747 if (al.sym == NULL || !al.sym->ignore) { 752 if (al.sym == NULL || !al.sym->ignore) {
748 struct hist_entry *he; 753 struct hist_entry_iter iter = {
754 .add_entry_cb = hist_iter__top_callback,
755 };
749 756
750 err = sample__resolve_callchain(sample, &parent, evsel, &al, 757 if (symbol_conf.cumulate_callchain)
751 top->max_stack); 758 iter.ops = &hist_iter_cumulative;
752 if (err) 759 else
753 return; 760 iter.ops = &hist_iter_normal;
754 761
755 he = perf_evsel__add_hist_entry(evsel, &al, sample); 762 pthread_mutex_lock(&evsel->hists.lock);
756 if (he == NULL) {
757 pr_err("Problem incrementing symbol period, skipping event\n");
758 return;
759 }
760 763
761 err = hist_entry__append_callchain(he, sample); 764 err = hist_entry_iter__add(&iter, &al, evsel, sample,
762 if (err) 765 top->max_stack, top);
763 return; 766 if (err < 0)
767 pr_err("Problem incrementing symbol period, skipping event\n");
764 768
765 if (sort__has_sym) 769 pthread_mutex_unlock(&evsel->hists.lock);
766 perf_top__record_precise_ip(top, he, evsel->idx, ip);
767 } 770 }
768 771
769 return; 772 return;
@@ -1001,6 +1004,10 @@ static int perf_top_config(const char *var, const char *value, void *cb)
1001 1004
1002 if (!strcmp(var, "top.call-graph")) 1005 if (!strcmp(var, "top.call-graph"))
1003 return record_parse_callchain(value, &top->record_opts); 1006 return record_parse_callchain(value, &top->record_opts);
1007 if (!strcmp(var, "top.children")) {
1008 symbol_conf.cumulate_callchain = perf_config_bool(var, value);
1009 return 0;
1010 }
1004 1011
1005 return perf_default_config(var, value, cb); 1012 return perf_default_config(var, value, cb);
1006} 1013}
@@ -1095,6 +1102,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
1095 OPT_CALLBACK(0, "call-graph", &top.record_opts, 1102 OPT_CALLBACK(0, "call-graph", &top.record_opts,
1096 "mode[,dump_size]", record_callchain_help, 1103 "mode[,dump_size]", record_callchain_help,
1097 &parse_callchain_opt), 1104 &parse_callchain_opt),
1105 OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
1106 "Accumulate callchains of children and show total overhead as well"),
1098 OPT_INTEGER(0, "max-stack", &top.max_stack, 1107 OPT_INTEGER(0, "max-stack", &top.max_stack,
1099 "Set the maximum stack depth when parsing the callchain. " 1108 "Set the maximum stack depth when parsing the callchain. "
1100 "Default: " __stringify(PERF_MAX_STACK_DEPTH)), 1109 "Default: " __stringify(PERF_MAX_STACK_DEPTH)),
@@ -1200,6 +1209,11 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
1200 1209
1201 top.sym_evsel = perf_evlist__first(top.evlist); 1210 top.sym_evsel = perf_evlist__first(top.evlist);
1202 1211
1212 if (!symbol_conf.use_callchain) {
1213 symbol_conf.cumulate_callchain = false;
1214 perf_hpp__cancel_cumulate();
1215 }
1216
1203 symbol_conf.priv_size = sizeof(struct annotation); 1217 symbol_conf.priv_size = sizeof(struct annotation);
1204 1218
1205 symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL); 1219 symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index 729bbdf5cec7..4f100b54ba8b 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -447,6 +447,7 @@ else
447 ifneq ($(feature-libperl), 1) 447 ifneq ($(feature-libperl), 1)
448 CFLAGS += -DNO_LIBPERL 448 CFLAGS += -DNO_LIBPERL
449 NO_LIBPERL := 1 449 NO_LIBPERL := 1
450 msg := $(warning Missing perl devel files. Disabling perl scripting support, consider installing perl-ExtUtils-Embed);
450 else 451 else
451 LDFLAGS += $(PERL_EMBED_LDFLAGS) 452 LDFLAGS += $(PERL_EMBED_LDFLAGS)
452 EXTLIBS += $(PERL_EMBED_LIBADD) 453 EXTLIBS += $(PERL_EMBED_LIBADD)
@@ -599,7 +600,7 @@ endif
599 600
600# Make the path relative to DESTDIR, not to prefix 601# Make the path relative to DESTDIR, not to prefix
601ifndef DESTDIR 602ifndef DESTDIR
602prefix = $(HOME) 603prefix ?= $(HOME)
603endif 604endif
604bindir_relative = bin 605bindir_relative = bin
605bindir = $(prefix)/$(bindir_relative) 606bindir = $(prefix)/$(bindir_relative)
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 431798a4110d..78f7b920e548 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -481,14 +481,18 @@ int main(int argc, const char **argv)
481 fprintf(stderr, "cannot handle %s internally", cmd); 481 fprintf(stderr, "cannot handle %s internally", cmd);
482 goto out; 482 goto out;
483 } 483 }
484#ifdef HAVE_LIBAUDIT_SUPPORT
485 if (!prefixcmp(cmd, "trace")) { 484 if (!prefixcmp(cmd, "trace")) {
485#ifdef HAVE_LIBAUDIT_SUPPORT
486 set_buildid_dir(); 486 set_buildid_dir();
487 setup_path(); 487 setup_path();
488 argv[0] = "trace"; 488 argv[0] = "trace";
489 return cmd_trace(argc, argv, NULL); 489 return cmd_trace(argc, argv, NULL);
490 } 490#else
491 fprintf(stderr,
492 "trace command not available: missing audit-libs devel package at build time.\n");
493 goto out;
491#endif 494#endif
495 }
492 /* Look for flags.. */ 496 /* Look for flags.. */
493 argv++; 497 argv++;
494 argc--; 498 argc--;
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 831f52cae197..802e3cd50f6f 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -140,6 +140,10 @@ static struct test {
140 .func = test__hists_output, 140 .func = test__hists_output,
141 }, 141 },
142 { 142 {
143 .desc = "Test cumulation of child hist entries",
144 .func = test__hists_cumulate,
145 },
146 {
143 .func = NULL, 147 .func = NULL,
144 }, 148 },
145}; 149};
diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c
index e4e01aadc3be..a62c09134516 100644
--- a/tools/perf/tests/hists_common.c
+++ b/tools/perf/tests/hists_common.c
@@ -12,9 +12,9 @@ static struct {
12 u32 pid; 12 u32 pid;
13 const char *comm; 13 const char *comm;
14} fake_threads[] = { 14} fake_threads[] = {
15 { 100, "perf" }, 15 { FAKE_PID_PERF1, "perf" },
16 { 200, "perf" }, 16 { FAKE_PID_PERF2, "perf" },
17 { 300, "bash" }, 17 { FAKE_PID_BASH, "bash" },
18}; 18};
19 19
20static struct { 20static struct {
@@ -22,15 +22,15 @@ static struct {
22 u64 start; 22 u64 start;
23 const char *filename; 23 const char *filename;
24} fake_mmap_info[] = { 24} fake_mmap_info[] = {
25 { 100, 0x40000, "perf" }, 25 { FAKE_PID_PERF1, FAKE_MAP_PERF, "perf" },
26 { 100, 0x50000, "libc" }, 26 { FAKE_PID_PERF1, FAKE_MAP_LIBC, "libc" },
27 { 100, 0xf0000, "[kernel]" }, 27 { FAKE_PID_PERF1, FAKE_MAP_KERNEL, "[kernel]" },
28 { 200, 0x40000, "perf" }, 28 { FAKE_PID_PERF2, FAKE_MAP_PERF, "perf" },
29 { 200, 0x50000, "libc" }, 29 { FAKE_PID_PERF2, FAKE_MAP_LIBC, "libc" },
30 { 200, 0xf0000, "[kernel]" }, 30 { FAKE_PID_PERF2, FAKE_MAP_KERNEL, "[kernel]" },
31 { 300, 0x40000, "bash" }, 31 { FAKE_PID_BASH, FAKE_MAP_BASH, "bash" },
32 { 300, 0x50000, "libc" }, 32 { FAKE_PID_BASH, FAKE_MAP_LIBC, "libc" },
33 { 300, 0xf0000, "[kernel]" }, 33 { FAKE_PID_BASH, FAKE_MAP_KERNEL, "[kernel]" },
34}; 34};
35 35
36struct fake_sym { 36struct fake_sym {
@@ -40,27 +40,30 @@ struct fake_sym {
40}; 40};
41 41
42static struct fake_sym perf_syms[] = { 42static struct fake_sym perf_syms[] = {
43 { 700, 100, "main" }, 43 { FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "main" },
44 { 800, 100, "run_command" }, 44 { FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "run_command" },
45 { 900, 100, "cmd_record" }, 45 { FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "cmd_record" },
46}; 46};
47 47
48static struct fake_sym bash_syms[] = { 48static struct fake_sym bash_syms[] = {
49 { 700, 100, "main" }, 49 { FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "main" },
50 { 800, 100, "xmalloc" }, 50 { FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "xmalloc" },
51 { 900, 100, "xfree" }, 51 { FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "xfree" },
52}; 52};
53 53
54static struct fake_sym libc_syms[] = { 54static struct fake_sym libc_syms[] = {
55 { 700, 100, "malloc" }, 55 { 700, 100, "malloc" },
56 { 800, 100, "free" }, 56 { 800, 100, "free" },
57 { 900, 100, "realloc" }, 57 { 900, 100, "realloc" },
58 { FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "malloc" },
59 { FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "free" },
60 { FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "realloc" },
58}; 61};
59 62
60static struct fake_sym kernel_syms[] = { 63static struct fake_sym kernel_syms[] = {
61 { 700, 100, "schedule" }, 64 { FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "schedule" },
62 { 800, 100, "page_fault" }, 65 { FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "page_fault" },
63 { 900, 100, "sys_perf_event_open" }, 66 { FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "sys_perf_event_open" },
64}; 67};
65 68
66static struct { 69static struct {
@@ -102,7 +105,7 @@ struct machine *setup_fake_machine(struct machines *machines)
102 .pid = fake_mmap_info[i].pid, 105 .pid = fake_mmap_info[i].pid,
103 .tid = fake_mmap_info[i].pid, 106 .tid = fake_mmap_info[i].pid,
104 .start = fake_mmap_info[i].start, 107 .start = fake_mmap_info[i].start,
105 .len = 0x1000ULL, 108 .len = FAKE_MAP_LENGTH,
106 .pgoff = 0ULL, 109 .pgoff = 0ULL,
107 }, 110 },
108 }; 111 };
@@ -193,10 +196,11 @@ void print_hists_out(struct hists *hists)
193 he = rb_entry(node, struct hist_entry, rb_node); 196 he = rb_entry(node, struct hist_entry, rb_node);
194 197
195 if (!he->filtered) { 198 if (!he->filtered) {
196 pr_info("%2d: entry: %8s:%5d [%-8s] %20s: period = %"PRIu64"\n", 199 pr_info("%2d: entry: %8s:%5d [%-8s] %20s: period = %"PRIu64"/%"PRIu64"\n",
197 i, thread__comm_str(he->thread), he->thread->tid, 200 i, thread__comm_str(he->thread), he->thread->tid,
198 he->ms.map->dso->short_name, 201 he->ms.map->dso->short_name,
199 he->ms.sym->name, he->stat.period); 202 he->ms.sym->name, he->stat.period,
203 he->stat_acc ? he->stat_acc->period : 0);
200 } 204 }
201 205
202 i++; 206 i++;
diff --git a/tools/perf/tests/hists_common.h b/tools/perf/tests/hists_common.h
index 1415ae69d7b6..888254e8665c 100644
--- a/tools/perf/tests/hists_common.h
+++ b/tools/perf/tests/hists_common.h
@@ -4,6 +4,34 @@
4struct machine; 4struct machine;
5struct machines; 5struct machines;
6 6
7#define FAKE_PID_PERF1 100
8#define FAKE_PID_PERF2 200
9#define FAKE_PID_BASH 300
10
11#define FAKE_MAP_PERF 0x400000
12#define FAKE_MAP_BASH 0x400000
13#define FAKE_MAP_LIBC 0x500000
14#define FAKE_MAP_KERNEL 0xf00000
15#define FAKE_MAP_LENGTH 0x100000
16
17#define FAKE_SYM_OFFSET1 700
18#define FAKE_SYM_OFFSET2 800
19#define FAKE_SYM_OFFSET3 900
20#define FAKE_SYM_LENGTH 100
21
22#define FAKE_IP_PERF_MAIN FAKE_MAP_PERF + FAKE_SYM_OFFSET1
23#define FAKE_IP_PERF_RUN_COMMAND FAKE_MAP_PERF + FAKE_SYM_OFFSET2
24#define FAKE_IP_PERF_CMD_RECORD FAKE_MAP_PERF + FAKE_SYM_OFFSET3
25#define FAKE_IP_BASH_MAIN FAKE_MAP_BASH + FAKE_SYM_OFFSET1
26#define FAKE_IP_BASH_XMALLOC FAKE_MAP_BASH + FAKE_SYM_OFFSET2
27#define FAKE_IP_BASH_XFREE FAKE_MAP_BASH + FAKE_SYM_OFFSET3
28#define FAKE_IP_LIBC_MALLOC FAKE_MAP_LIBC + FAKE_SYM_OFFSET1
29#define FAKE_IP_LIBC_FREE FAKE_MAP_LIBC + FAKE_SYM_OFFSET2
30#define FAKE_IP_LIBC_REALLOC FAKE_MAP_LIBC + FAKE_SYM_OFFSET3
31#define FAKE_IP_KERNEL_SCHEDULE FAKE_MAP_KERNEL + FAKE_SYM_OFFSET1
32#define FAKE_IP_KERNEL_PAGE_FAULT FAKE_MAP_KERNEL + FAKE_SYM_OFFSET2
33#define FAKE_IP_KERNEL_SYS_PERF_EVENT_OPEN FAKE_MAP_KERNEL + FAKE_SYM_OFFSET3
34
7/* 35/*
8 * The setup_fake_machine() provides a test environment which consists 36 * The setup_fake_machine() provides a test environment which consists
9 * of 3 processes that have 3 mappings and in turn, have 3 symbols 37 * of 3 processes that have 3 mappings and in turn, have 3 symbols
@@ -13,7 +41,7 @@ struct machines;
13 * ............. ............. ................... 41 * ............. ............. ...................
14 * perf: 100 perf main 42 * perf: 100 perf main
15 * perf: 100 perf run_command 43 * perf: 100 perf run_command
16 * perf: 100 perf comd_record 44 * perf: 100 perf cmd_record
17 * perf: 100 libc malloc 45 * perf: 100 libc malloc
18 * perf: 100 libc free 46 * perf: 100 libc free
19 * perf: 100 libc realloc 47 * perf: 100 libc realloc
@@ -22,7 +50,7 @@ struct machines;
22 * perf: 100 [kernel] sys_perf_event_open 50 * perf: 100 [kernel] sys_perf_event_open
23 * perf: 200 perf main 51 * perf: 200 perf main
24 * perf: 200 perf run_command 52 * perf: 200 perf run_command
25 * perf: 200 perf comd_record 53 * perf: 200 perf cmd_record
26 * perf: 200 libc malloc 54 * perf: 200 libc malloc
27 * perf: 200 libc free 55 * perf: 200 libc free
28 * perf: 200 libc realloc 56 * perf: 200 libc realloc
diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c
new file mode 100644
index 000000000000..0ac240db2e24
--- /dev/null
+++ b/tools/perf/tests/hists_cumulate.c
@@ -0,0 +1,726 @@
1#include "perf.h"
2#include "util/debug.h"
3#include "util/symbol.h"
4#include "util/sort.h"
5#include "util/evsel.h"
6#include "util/evlist.h"
7#include "util/machine.h"
8#include "util/thread.h"
9#include "util/parse-events.h"
10#include "tests/tests.h"
11#include "tests/hists_common.h"
12
13struct sample {
14 u32 pid;
15 u64 ip;
16 struct thread *thread;
17 struct map *map;
18 struct symbol *sym;
19};
20
21/* For the numbers, see hists_common.c */
22static struct sample fake_samples[] = {
23 /* perf [kernel] schedule() */
24 { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, },
25 /* perf [perf] main() */
26 { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, },
27 /* perf [perf] cmd_record() */
28 { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_CMD_RECORD, },
29 /* perf [libc] malloc() */
30 { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, },
31 /* perf [libc] free() */
32 { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_FREE, },
33 /* perf [perf] main() */
34 { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, },
35 /* perf [kernel] page_fault() */
36 { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
37 /* bash [bash] main() */
38 { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_MAIN, },
39 /* bash [bash] xmalloc() */
40 { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, },
41 /* bash [kernel] page_fault() */
42 { .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
43};
44
45/*
46 * Will be casted to struct ip_callchain which has all 64 bit entries
47 * of nr and ips[].
48 */
49static u64 fake_callchains[][10] = {
50 /* schedule => run_command => main */
51 { 3, FAKE_IP_KERNEL_SCHEDULE, FAKE_IP_PERF_RUN_COMMAND, FAKE_IP_PERF_MAIN, },
52 /* main */
53 { 1, FAKE_IP_PERF_MAIN, },
54 /* cmd_record => run_command => main */
55 { 3, FAKE_IP_PERF_CMD_RECORD, FAKE_IP_PERF_RUN_COMMAND, FAKE_IP_PERF_MAIN, },
56 /* malloc => cmd_record => run_command => main */
57 { 4, FAKE_IP_LIBC_MALLOC, FAKE_IP_PERF_CMD_RECORD, FAKE_IP_PERF_RUN_COMMAND,
58 FAKE_IP_PERF_MAIN, },
59 /* free => cmd_record => run_command => main */
60 { 4, FAKE_IP_LIBC_FREE, FAKE_IP_PERF_CMD_RECORD, FAKE_IP_PERF_RUN_COMMAND,
61 FAKE_IP_PERF_MAIN, },
62 /* main */
63 { 1, FAKE_IP_PERF_MAIN, },
64 /* page_fault => sys_perf_event_open => run_command => main */
65 { 4, FAKE_IP_KERNEL_PAGE_FAULT, FAKE_IP_KERNEL_SYS_PERF_EVENT_OPEN,
66 FAKE_IP_PERF_RUN_COMMAND, FAKE_IP_PERF_MAIN, },
67 /* main */
68 { 1, FAKE_IP_BASH_MAIN, },
69 /* xmalloc => malloc => xmalloc => malloc => xmalloc => main */
70 { 6, FAKE_IP_BASH_XMALLOC, FAKE_IP_LIBC_MALLOC, FAKE_IP_BASH_XMALLOC,
71 FAKE_IP_LIBC_MALLOC, FAKE_IP_BASH_XMALLOC, FAKE_IP_BASH_MAIN, },
72 /* page_fault => malloc => main */
73 { 3, FAKE_IP_KERNEL_PAGE_FAULT, FAKE_IP_LIBC_MALLOC, FAKE_IP_BASH_MAIN, },
74};
75
76static int add_hist_entries(struct hists *hists, struct machine *machine)
77{
78 struct addr_location al;
79 struct perf_evsel *evsel = hists_to_evsel(hists);
80 struct perf_sample sample = { .period = 1000, };
81 size_t i;
82
83 for (i = 0; i < ARRAY_SIZE(fake_samples); i++) {
84 const union perf_event event = {
85 .header = {
86 .misc = PERF_RECORD_MISC_USER,
87 },
88 };
89 struct hist_entry_iter iter = {
90 .hide_unresolved = false,
91 };
92
93 if (symbol_conf.cumulate_callchain)
94 iter.ops = &hist_iter_cumulative;
95 else
96 iter.ops = &hist_iter_normal;
97
98 sample.pid = fake_samples[i].pid;
99 sample.tid = fake_samples[i].pid;
100 sample.ip = fake_samples[i].ip;
101 sample.callchain = (struct ip_callchain *)fake_callchains[i];
102
103 if (perf_event__preprocess_sample(&event, machine, &al,
104 &sample) < 0)
105 goto out;
106
107 if (hist_entry_iter__add(&iter, &al, evsel, &sample,
108 PERF_MAX_STACK_DEPTH, NULL) < 0)
109 goto out;
110
111 fake_samples[i].thread = al.thread;
112 fake_samples[i].map = al.map;
113 fake_samples[i].sym = al.sym;
114 }
115
116 return TEST_OK;
117
118out:
119 pr_debug("Not enough memory for adding a hist entry\n");
120 return TEST_FAIL;
121}
122
123static void del_hist_entries(struct hists *hists)
124{
125 struct hist_entry *he;
126 struct rb_root *root_in;
127 struct rb_root *root_out;
128 struct rb_node *node;
129
130 if (sort__need_collapse)
131 root_in = &hists->entries_collapsed;
132 else
133 root_in = hists->entries_in;
134
135 root_out = &hists->entries;
136
137 while (!RB_EMPTY_ROOT(root_out)) {
138 node = rb_first(root_out);
139
140 he = rb_entry(node, struct hist_entry, rb_node);
141 rb_erase(node, root_out);
142 rb_erase(&he->rb_node_in, root_in);
143 hist_entry__free(he);
144 }
145}
146
147typedef int (*test_fn_t)(struct perf_evsel *, struct machine *);
148
149#define COMM(he) (thread__comm_str(he->thread))
150#define DSO(he) (he->ms.map->dso->short_name)
151#define SYM(he) (he->ms.sym->name)
152#define CPU(he) (he->cpu)
153#define PID(he) (he->thread->tid)
154#define DEPTH(he) (he->callchain->max_depth)
155#define CDSO(cl) (cl->ms.map->dso->short_name)
156#define CSYM(cl) (cl->ms.sym->name)
157
158struct result {
159 u64 children;
160 u64 self;
161 const char *comm;
162 const char *dso;
163 const char *sym;
164};
165
166struct callchain_result {
167 u64 nr;
168 struct {
169 const char *dso;
170 const char *sym;
171 } node[10];
172};
173
174static int do_test(struct hists *hists, struct result *expected, size_t nr_expected,
175 struct callchain_result *expected_callchain, size_t nr_callchain)
176{
177 char buf[32];
178 size_t i, c;
179 struct hist_entry *he;
180 struct rb_root *root;
181 struct rb_node *node;
182 struct callchain_node *cnode;
183 struct callchain_list *clist;
184
185 /*
186 * adding and deleting hist entries must be done outside of this
187 * function since TEST_ASSERT_VAL() returns in case of failure.
188 */
189 hists__collapse_resort(hists, NULL);
190 hists__output_resort(hists);
191
192 if (verbose > 2) {
193 pr_info("use callchain: %d, cumulate callchain: %d\n",
194 symbol_conf.use_callchain,
195 symbol_conf.cumulate_callchain);
196 print_hists_out(hists);
197 }
198
199 root = &hists->entries;
200 for (node = rb_first(root), i = 0;
201 node && (he = rb_entry(node, struct hist_entry, rb_node));
202 node = rb_next(node), i++) {
203 scnprintf(buf, sizeof(buf), "Invalid hist entry #%zd", i);
204
205 TEST_ASSERT_VAL("Incorrect number of hist entry",
206 i < nr_expected);
207 TEST_ASSERT_VAL(buf, he->stat.period == expected[i].self &&
208 !strcmp(COMM(he), expected[i].comm) &&
209 !strcmp(DSO(he), expected[i].dso) &&
210 !strcmp(SYM(he), expected[i].sym));
211
212 if (symbol_conf.cumulate_callchain)
213 TEST_ASSERT_VAL(buf, he->stat_acc->period == expected[i].children);
214
215 if (!symbol_conf.use_callchain)
216 continue;
217
218 /* check callchain entries */
219 root = &he->callchain->node.rb_root;
220 cnode = rb_entry(rb_first(root), struct callchain_node, rb_node);
221
222 c = 0;
223 list_for_each_entry(clist, &cnode->val, list) {
224 scnprintf(buf, sizeof(buf), "Invalid callchain entry #%zd/%zd", i, c);
225
226 TEST_ASSERT_VAL("Incorrect number of callchain entry",
227 c < expected_callchain[i].nr);
228 TEST_ASSERT_VAL(buf,
229 !strcmp(CDSO(clist), expected_callchain[i].node[c].dso) &&
230 !strcmp(CSYM(clist), expected_callchain[i].node[c].sym));
231 c++;
232 }
233 /* TODO: handle multiple child nodes properly */
234 TEST_ASSERT_VAL("Incorrect number of callchain entry",
235 c <= expected_callchain[i].nr);
236 }
237 TEST_ASSERT_VAL("Incorrect number of hist entry",
238 i == nr_expected);
239 TEST_ASSERT_VAL("Incorrect number of callchain entry",
240 !symbol_conf.use_callchain || nr_expected == nr_callchain);
241 return 0;
242}
243
244/* NO callchain + NO children */
245static int test1(struct perf_evsel *evsel, struct machine *machine)
246{
247 int err;
248 struct hists *hists = &evsel->hists;
249 /*
250 * expected output:
251 *
252 * Overhead Command Shared Object Symbol
253 * ======== ======= ============= ==============
254 * 20.00% perf perf [.] main
255 * 10.00% bash [kernel] [k] page_fault
256 * 10.00% bash bash [.] main
257 * 10.00% bash bash [.] xmalloc
258 * 10.00% perf [kernel] [k] page_fault
259 * 10.00% perf [kernel] [k] schedule
260 * 10.00% perf libc [.] free
261 * 10.00% perf libc [.] malloc
262 * 10.00% perf perf [.] cmd_record
263 */
264 struct result expected[] = {
265 { 0, 2000, "perf", "perf", "main" },
266 { 0, 1000, "bash", "[kernel]", "page_fault" },
267 { 0, 1000, "bash", "bash", "main" },
268 { 0, 1000, "bash", "bash", "xmalloc" },
269 { 0, 1000, "perf", "[kernel]", "page_fault" },
270 { 0, 1000, "perf", "[kernel]", "schedule" },
271 { 0, 1000, "perf", "libc", "free" },
272 { 0, 1000, "perf", "libc", "malloc" },
273 { 0, 1000, "perf", "perf", "cmd_record" },
274 };
275
276 symbol_conf.use_callchain = false;
277 symbol_conf.cumulate_callchain = false;
278
279 setup_sorting();
280 callchain_register_param(&callchain_param);
281
282 err = add_hist_entries(hists, machine);
283 if (err < 0)
284 goto out;
285
286 err = do_test(hists, expected, ARRAY_SIZE(expected), NULL, 0);
287
288out:
289 del_hist_entries(hists);
290 reset_output_field();
291 return err;
292}
293
294/* callcain + NO children */
295static int test2(struct perf_evsel *evsel, struct machine *machine)
296{
297 int err;
298 struct hists *hists = &evsel->hists;
299 /*
300 * expected output:
301 *
302 * Overhead Command Shared Object Symbol
303 * ======== ======= ============= ==============
304 * 20.00% perf perf [.] main
305 * |
306 * --- main
307 *
308 * 10.00% bash [kernel] [k] page_fault
309 * |
310 * --- page_fault
311 * malloc
312 * main
313 *
314 * 10.00% bash bash [.] main
315 * |
316 * --- main
317 *
318 * 10.00% bash bash [.] xmalloc
319 * |
320 * --- xmalloc
321 * malloc
322 * xmalloc <--- NOTE: there's a cycle
323 * malloc
324 * xmalloc
325 * main
326 *
327 * 10.00% perf [kernel] [k] page_fault
328 * |
329 * --- page_fault
330 * sys_perf_event_open
331 * run_command
332 * main
333 *
334 * 10.00% perf [kernel] [k] schedule
335 * |
336 * --- schedule
337 * run_command
338 * main
339 *
340 * 10.00% perf libc [.] free
341 * |
342 * --- free
343 * cmd_record
344 * run_command
345 * main
346 *
347 * 10.00% perf libc [.] malloc
348 * |
349 * --- malloc
350 * cmd_record
351 * run_command
352 * main
353 *
354 * 10.00% perf perf [.] cmd_record
355 * |
356 * --- cmd_record
357 * run_command
358 * main
359 *
360 */
361 struct result expected[] = {
362 { 0, 2000, "perf", "perf", "main" },
363 { 0, 1000, "bash", "[kernel]", "page_fault" },
364 { 0, 1000, "bash", "bash", "main" },
365 { 0, 1000, "bash", "bash", "xmalloc" },
366 { 0, 1000, "perf", "[kernel]", "page_fault" },
367 { 0, 1000, "perf", "[kernel]", "schedule" },
368 { 0, 1000, "perf", "libc", "free" },
369 { 0, 1000, "perf", "libc", "malloc" },
370 { 0, 1000, "perf", "perf", "cmd_record" },
371 };
372 struct callchain_result expected_callchain[] = {
373 {
374 1, { { "perf", "main" }, },
375 },
376 {
377 3, { { "[kernel]", "page_fault" },
378 { "libc", "malloc" },
379 { "bash", "main" }, },
380 },
381 {
382 1, { { "bash", "main" }, },
383 },
384 {
385 6, { { "bash", "xmalloc" },
386 { "libc", "malloc" },
387 { "bash", "xmalloc" },
388 { "libc", "malloc" },
389 { "bash", "xmalloc" },
390 { "bash", "main" }, },
391 },
392 {
393 4, { { "[kernel]", "page_fault" },
394 { "[kernel]", "sys_perf_event_open" },
395 { "perf", "run_command" },
396 { "perf", "main" }, },
397 },
398 {
399 3, { { "[kernel]", "schedule" },
400 { "perf", "run_command" },
401 { "perf", "main" }, },
402 },
403 {
404 4, { { "libc", "free" },
405 { "perf", "cmd_record" },
406 { "perf", "run_command" },
407 { "perf", "main" }, },
408 },
409 {
410 4, { { "libc", "malloc" },
411 { "perf", "cmd_record" },
412 { "perf", "run_command" },
413 { "perf", "main" }, },
414 },
415 {
416 3, { { "perf", "cmd_record" },
417 { "perf", "run_command" },
418 { "perf", "main" }, },
419 },
420 };
421
422 symbol_conf.use_callchain = true;
423 symbol_conf.cumulate_callchain = false;
424
425 setup_sorting();
426 callchain_register_param(&callchain_param);
427
428 err = add_hist_entries(hists, machine);
429 if (err < 0)
430 goto out;
431
432 err = do_test(hists, expected, ARRAY_SIZE(expected),
433 expected_callchain, ARRAY_SIZE(expected_callchain));
434
435out:
436 del_hist_entries(hists);
437 reset_output_field();
438 return err;
439}
440
441/* NO callchain + children */
442static int test3(struct perf_evsel *evsel, struct machine *machine)
443{
444 int err;
445 struct hists *hists = &evsel->hists;
446 /*
447 * expected output:
448 *
449 * Children Self Command Shared Object Symbol
450 * ======== ======== ======= ============= =======================
451 * 70.00% 20.00% perf perf [.] main
452 * 50.00% 0.00% perf perf [.] run_command
453 * 30.00% 10.00% bash bash [.] main
454 * 30.00% 10.00% perf perf [.] cmd_record
455 * 20.00% 0.00% bash libc [.] malloc
456 * 10.00% 10.00% bash [kernel] [k] page_fault
457 * 10.00% 10.00% perf [kernel] [k] schedule
458 * 10.00% 0.00% perf [kernel] [k] sys_perf_event_open
459 * 10.00% 10.00% perf [kernel] [k] page_fault
460 * 10.00% 10.00% perf libc [.] free
461 * 10.00% 10.00% perf libc [.] malloc
462 * 10.00% 10.00% bash bash [.] xmalloc
463 */
464 struct result expected[] = {
465 { 7000, 2000, "perf", "perf", "main" },
466 { 5000, 0, "perf", "perf", "run_command" },
467 { 3000, 1000, "bash", "bash", "main" },
468 { 3000, 1000, "perf", "perf", "cmd_record" },
469 { 2000, 0, "bash", "libc", "malloc" },
470 { 1000, 1000, "bash", "[kernel]", "page_fault" },
471 { 1000, 1000, "perf", "[kernel]", "schedule" },
472 { 1000, 0, "perf", "[kernel]", "sys_perf_event_open" },
473 { 1000, 1000, "perf", "[kernel]", "page_fault" },
474 { 1000, 1000, "perf", "libc", "free" },
475 { 1000, 1000, "perf", "libc", "malloc" },
476 { 1000, 1000, "bash", "bash", "xmalloc" },
477 };
478
479 symbol_conf.use_callchain = false;
480 symbol_conf.cumulate_callchain = true;
481
482 setup_sorting();
483 callchain_register_param(&callchain_param);
484
485 err = add_hist_entries(hists, machine);
486 if (err < 0)
487 goto out;
488
489 err = do_test(hists, expected, ARRAY_SIZE(expected), NULL, 0);
490
491out:
492 del_hist_entries(hists);
493 reset_output_field();
494 return err;
495}
496
497/* callchain + children */
498static int test4(struct perf_evsel *evsel, struct machine *machine)
499{
500 int err;
501 struct hists *hists = &evsel->hists;
502 /*
503 * expected output:
504 *
505 * Children Self Command Shared Object Symbol
506 * ======== ======== ======= ============= =======================
507 * 70.00% 20.00% perf perf [.] main
508 * |
509 * --- main
510 *
511 * 50.00% 0.00% perf perf [.] run_command
512 * |
513 * --- run_command
514 * main
515 *
516 * 30.00% 10.00% bash bash [.] main
517 * |
518 * --- main
519 *
520 * 30.00% 10.00% perf perf [.] cmd_record
521 * |
522 * --- cmd_record
523 * run_command
524 * main
525 *
526 * 20.00% 0.00% bash libc [.] malloc
527 * |
528 * --- malloc
529 * |
530 * |--50.00%-- xmalloc
531 * | main
532 * --50.00%-- main
533 *
534 * 10.00% 10.00% bash [kernel] [k] page_fault
535 * |
536 * --- page_fault
537 * malloc
538 * main
539 *
540 * 10.00% 10.00% perf [kernel] [k] schedule
541 * |
542 * --- schedule
543 * run_command
544 * main
545 *
546 * 10.00% 0.00% perf [kernel] [k] sys_perf_event_open
547 * |
548 * --- sys_perf_event_open
549 * run_command
550 * main
551 *
552 * 10.00% 10.00% perf [kernel] [k] page_fault
553 * |
554 * --- page_fault
555 * sys_perf_event_open
556 * run_command
557 * main
558 *
559 * 10.00% 10.00% perf libc [.] free
560 * |
561 * --- free
562 * cmd_record
563 * run_command
564 * main
565 *
566 * 10.00% 10.00% perf libc [.] malloc
567 * |
568 * --- malloc
569 * cmd_record
570 * run_command
571 * main
572 *
573 * 10.00% 10.00% bash bash [.] xmalloc
574 * |
575 * --- xmalloc
576 * malloc
577 * xmalloc <--- NOTE: there's a cycle
578 * malloc
579 * xmalloc
580 * main
581 *
582 */
583 struct result expected[] = {
584 { 7000, 2000, "perf", "perf", "main" },
585 { 5000, 0, "perf", "perf", "run_command" },
586 { 3000, 1000, "bash", "bash", "main" },
587 { 3000, 1000, "perf", "perf", "cmd_record" },
588 { 2000, 0, "bash", "libc", "malloc" },
589 { 1000, 1000, "bash", "[kernel]", "page_fault" },
590 { 1000, 1000, "perf", "[kernel]", "schedule" },
591 { 1000, 0, "perf", "[kernel]", "sys_perf_event_open" },
592 { 1000, 1000, "perf", "[kernel]", "page_fault" },
593 { 1000, 1000, "perf", "libc", "free" },
594 { 1000, 1000, "perf", "libc", "malloc" },
595 { 1000, 1000, "bash", "bash", "xmalloc" },
596 };
597 struct callchain_result expected_callchain[] = {
598 {
599 1, { { "perf", "main" }, },
600 },
601 {
602 2, { { "perf", "run_command" },
603 { "perf", "main" }, },
604 },
605 {
606 1, { { "bash", "main" }, },
607 },
608 {
609 3, { { "perf", "cmd_record" },
610 { "perf", "run_command" },
611 { "perf", "main" }, },
612 },
613 {
614 4, { { "libc", "malloc" },
615 { "bash", "xmalloc" },
616 { "bash", "main" },
617 { "bash", "main" }, },
618 },
619 {
620 3, { { "[kernel]", "page_fault" },
621 { "libc", "malloc" },
622 { "bash", "main" }, },
623 },
624 {
625 3, { { "[kernel]", "schedule" },
626 { "perf", "run_command" },
627 { "perf", "main" }, },
628 },
629 {
630 3, { { "[kernel]", "sys_perf_event_open" },
631 { "perf", "run_command" },
632 { "perf", "main" }, },
633 },
634 {
635 4, { { "[kernel]", "page_fault" },
636 { "[kernel]", "sys_perf_event_open" },
637 { "perf", "run_command" },
638 { "perf", "main" }, },
639 },
640 {
641 4, { { "libc", "free" },
642 { "perf", "cmd_record" },
643 { "perf", "run_command" },
644 { "perf", "main" }, },
645 },
646 {
647 4, { { "libc", "malloc" },
648 { "perf", "cmd_record" },
649 { "perf", "run_command" },
650 { "perf", "main" }, },
651 },
652 {
653 6, { { "bash", "xmalloc" },
654 { "libc", "malloc" },
655 { "bash", "xmalloc" },
656 { "libc", "malloc" },
657 { "bash", "xmalloc" },
658 { "bash", "main" }, },
659 },
660 };
661
662 symbol_conf.use_callchain = true;
663 symbol_conf.cumulate_callchain = true;
664
665 setup_sorting();
666 callchain_register_param(&callchain_param);
667
668 err = add_hist_entries(hists, machine);
669 if (err < 0)
670 goto out;
671
672 err = do_test(hists, expected, ARRAY_SIZE(expected),
673 expected_callchain, ARRAY_SIZE(expected_callchain));
674
675out:
676 del_hist_entries(hists);
677 reset_output_field();
678 return err;
679}
680
681int test__hists_cumulate(void)
682{
683 int err = TEST_FAIL;
684 struct machines machines;
685 struct machine *machine;
686 struct perf_evsel *evsel;
687 struct perf_evlist *evlist = perf_evlist__new();
688 size_t i;
689 test_fn_t testcases[] = {
690 test1,
691 test2,
692 test3,
693 test4,
694 };
695
696 TEST_ASSERT_VAL("No memory", evlist);
697
698 err = parse_events(evlist, "cpu-clock");
699 if (err)
700 goto out;
701
702 machines__init(&machines);
703
704 /* setup threads/dso/map/symbols also */
705 machine = setup_fake_machine(&machines);
706 if (!machine)
707 goto out;
708
709 if (verbose > 1)
710 machine__fprintf(machine, stderr);
711
712 evsel = perf_evlist__first(evlist);
713
714 for (i = 0; i < ARRAY_SIZE(testcases); i++) {
715 err = testcases[i](evsel, machine);
716 if (err < 0)
717 break;
718 }
719
720out:
721 /* tear down everything */
722 perf_evlist__delete(evlist);
723 machines__exit(&machines);
724
725 return err;
726}
diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c
index c5ba924a3581..821f581fd930 100644
--- a/tools/perf/tests/hists_filter.c
+++ b/tools/perf/tests/hists_filter.c
@@ -21,33 +21,33 @@ struct sample {
21/* For the numbers, see hists_common.c */ 21/* For the numbers, see hists_common.c */
22static struct sample fake_samples[] = { 22static struct sample fake_samples[] = {
23 /* perf [kernel] schedule() */ 23 /* perf [kernel] schedule() */
24 { .pid = 100, .ip = 0xf0000 + 700, }, 24 { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, },
25 /* perf [perf] main() */ 25 /* perf [perf] main() */
26 { .pid = 100, .ip = 0x40000 + 700, }, 26 { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, },
27 /* perf [libc] malloc() */ 27 /* perf [libc] malloc() */
28 { .pid = 100, .ip = 0x50000 + 700, }, 28 { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, },
29 /* perf [perf] main() */ 29 /* perf [perf] main() */
30 { .pid = 200, .ip = 0x40000 + 700, }, /* will be merged */ 30 { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, }, /* will be merged */
31 /* perf [perf] cmd_record() */ 31 /* perf [perf] cmd_record() */
32 { .pid = 200, .ip = 0x40000 + 900, }, 32 { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_CMD_RECORD, },
33 /* perf [kernel] page_fault() */ 33 /* perf [kernel] page_fault() */
34 { .pid = 200, .ip = 0xf0000 + 800, }, 34 { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
35 /* bash [bash] main() */ 35 /* bash [bash] main() */
36 { .pid = 300, .ip = 0x40000 + 700, }, 36 { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_MAIN, },
37 /* bash [bash] xmalloc() */ 37 /* bash [bash] xmalloc() */
38 { .pid = 300, .ip = 0x40000 + 800, }, 38 { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, },
39 /* bash [libc] malloc() */ 39 /* bash [libc] malloc() */
40 { .pid = 300, .ip = 0x50000 + 700, }, 40 { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_MALLOC, },
41 /* bash [kernel] page_fault() */ 41 /* bash [kernel] page_fault() */
42 { .pid = 300, .ip = 0xf0000 + 800, }, 42 { .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
43}; 43};
44 44
45static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) 45static int add_hist_entries(struct perf_evlist *evlist,
46 struct machine *machine __maybe_unused)
46{ 47{
47 struct perf_evsel *evsel; 48 struct perf_evsel *evsel;
48 struct addr_location al; 49 struct addr_location al;
49 struct hist_entry *he; 50 struct perf_sample sample = { .period = 100, };
50 struct perf_sample sample = { .cpu = 0, };
51 size_t i; 51 size_t i;
52 52
53 /* 53 /*
@@ -62,6 +62,10 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
62 .misc = PERF_RECORD_MISC_USER, 62 .misc = PERF_RECORD_MISC_USER,
63 }, 63 },
64 }; 64 };
65 struct hist_entry_iter iter = {
66 .ops = &hist_iter_normal,
67 .hide_unresolved = false,
68 };
65 69
66 /* make sure it has no filter at first */ 70 /* make sure it has no filter at first */
67 evsel->hists.thread_filter = NULL; 71 evsel->hists.thread_filter = NULL;
@@ -76,18 +80,13 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
76 &sample) < 0) 80 &sample) < 0)
77 goto out; 81 goto out;
78 82
79 he = __hists__add_entry(&evsel->hists, &al, NULL, 83 if (hist_entry_iter__add(&iter, &al, evsel, &sample,
80 NULL, NULL, 100, 1, 0); 84 PERF_MAX_STACK_DEPTH, NULL) < 0)
81 if (he == NULL)
82 goto out; 85 goto out;
83 86
84 fake_samples[i].thread = al.thread; 87 fake_samples[i].thread = al.thread;
85 fake_samples[i].map = al.map; 88 fake_samples[i].map = al.map;
86 fake_samples[i].sym = al.sym; 89 fake_samples[i].sym = al.sym;
87
88 hists__inc_nr_events(he->hists, PERF_RECORD_SAMPLE);
89 if (!he->filtered)
90 he->hists->stats.nr_non_filtered_samples++;
91 } 90 }
92 } 91 }
93 92
diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c
index 5ffa2c3eb77d..d4b34b0f50a2 100644
--- a/tools/perf/tests/hists_link.c
+++ b/tools/perf/tests/hists_link.c
@@ -21,41 +21,41 @@ struct sample {
21/* For the numbers, see hists_common.c */ 21/* For the numbers, see hists_common.c */
22static struct sample fake_common_samples[] = { 22static struct sample fake_common_samples[] = {
23 /* perf [kernel] schedule() */ 23 /* perf [kernel] schedule() */
24 { .pid = 100, .ip = 0xf0000 + 700, }, 24 { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, },
25 /* perf [perf] main() */ 25 /* perf [perf] main() */
26 { .pid = 200, .ip = 0x40000 + 700, }, 26 { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, },
27 /* perf [perf] cmd_record() */ 27 /* perf [perf] cmd_record() */
28 { .pid = 200, .ip = 0x40000 + 900, }, 28 { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_CMD_RECORD, },
29 /* bash [bash] xmalloc() */ 29 /* bash [bash] xmalloc() */
30 { .pid = 300, .ip = 0x40000 + 800, }, 30 { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, },
31 /* bash [libc] malloc() */ 31 /* bash [libc] malloc() */
32 { .pid = 300, .ip = 0x50000 + 700, }, 32 { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_MALLOC, },
33}; 33};
34 34
35static struct sample fake_samples[][5] = { 35static struct sample fake_samples[][5] = {
36 { 36 {
37 /* perf [perf] run_command() */ 37 /* perf [perf] run_command() */
38 { .pid = 100, .ip = 0x40000 + 800, }, 38 { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_RUN_COMMAND, },
39 /* perf [libc] malloc() */ 39 /* perf [libc] malloc() */
40 { .pid = 100, .ip = 0x50000 + 700, }, 40 { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, },
41 /* perf [kernel] page_fault() */ 41 /* perf [kernel] page_fault() */
42 { .pid = 100, .ip = 0xf0000 + 800, }, 42 { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
43 /* perf [kernel] sys_perf_event_open() */ 43 /* perf [kernel] sys_perf_event_open() */
44 { .pid = 200, .ip = 0xf0000 + 900, }, 44 { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_SYS_PERF_EVENT_OPEN, },
45 /* bash [libc] free() */ 45 /* bash [libc] free() */
46 { .pid = 300, .ip = 0x50000 + 800, }, 46 { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_FREE, },
47 }, 47 },
48 { 48 {
49 /* perf [libc] free() */ 49 /* perf [libc] free() */
50 { .pid = 200, .ip = 0x50000 + 800, }, 50 { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_LIBC_FREE, },
51 /* bash [libc] malloc() */ 51 /* bash [libc] malloc() */
52 { .pid = 300, .ip = 0x50000 + 700, }, /* will be merged */ 52 { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_MALLOC, }, /* will be merged */
53 /* bash [bash] xfee() */ 53 /* bash [bash] xfee() */
54 { .pid = 300, .ip = 0x40000 + 900, }, 54 { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XFREE, },
55 /* bash [libc] realloc() */ 55 /* bash [libc] realloc() */
56 { .pid = 300, .ip = 0x50000 + 900, }, 56 { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_REALLOC, },
57 /* bash [kernel] page_fault() */ 57 /* bash [kernel] page_fault() */
58 { .pid = 300, .ip = 0xf0000 + 800, }, 58 { .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
59 }, 59 },
60}; 60};
61 61
@@ -64,7 +64,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
64 struct perf_evsel *evsel; 64 struct perf_evsel *evsel;
65 struct addr_location al; 65 struct addr_location al;
66 struct hist_entry *he; 66 struct hist_entry *he;
67 struct perf_sample sample = { .cpu = 0, }; 67 struct perf_sample sample = { .period = 1, };
68 size_t i = 0, k; 68 size_t i = 0, k;
69 69
70 /* 70 /*
@@ -88,7 +88,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
88 goto out; 88 goto out;
89 89
90 he = __hists__add_entry(&evsel->hists, &al, NULL, 90 he = __hists__add_entry(&evsel->hists, &al, NULL,
91 NULL, NULL, 1, 1, 0); 91 NULL, NULL, 1, 1, 0, true);
92 if (he == NULL) 92 if (he == NULL)
93 goto out; 93 goto out;
94 94
@@ -112,7 +112,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
112 goto out; 112 goto out;
113 113
114 he = __hists__add_entry(&evsel->hists, &al, NULL, 114 he = __hists__add_entry(&evsel->hists, &al, NULL,
115 NULL, NULL, 1, 1, 0); 115 NULL, NULL, 1, 1, 0, true);
116 if (he == NULL) 116 if (he == NULL)
117 goto out; 117 goto out;
118 118
diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c
index a16850551797..e3bbd6c54c1b 100644
--- a/tools/perf/tests/hists_output.c
+++ b/tools/perf/tests/hists_output.c
@@ -22,31 +22,31 @@ struct sample {
22/* For the numbers, see hists_common.c */ 22/* For the numbers, see hists_common.c */
23static struct sample fake_samples[] = { 23static struct sample fake_samples[] = {
24 /* perf [kernel] schedule() */ 24 /* perf [kernel] schedule() */
25 { .cpu = 0, .pid = 100, .ip = 0xf0000 + 700, }, 25 { .cpu = 0, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, },
26 /* perf [perf] main() */ 26 /* perf [perf] main() */
27 { .cpu = 1, .pid = 100, .ip = 0x40000 + 700, }, 27 { .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, },
28 /* perf [perf] cmd_record() */ 28 /* perf [perf] cmd_record() */
29 { .cpu = 1, .pid = 100, .ip = 0x40000 + 900, }, 29 { .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_CMD_RECORD, },
30 /* perf [libc] malloc() */ 30 /* perf [libc] malloc() */
31 { .cpu = 1, .pid = 100, .ip = 0x50000 + 700, }, 31 { .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, },
32 /* perf [libc] free() */ 32 /* perf [libc] free() */
33 { .cpu = 2, .pid = 100, .ip = 0x50000 + 800, }, 33 { .cpu = 2, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_FREE, },
34 /* perf [perf] main() */ 34 /* perf [perf] main() */
35 { .cpu = 2, .pid = 200, .ip = 0x40000 + 700, }, 35 { .cpu = 2, .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, },
36 /* perf [kernel] page_fault() */ 36 /* perf [kernel] page_fault() */
37 { .cpu = 2, .pid = 200, .ip = 0xf0000 + 800, }, 37 { .cpu = 2, .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
38 /* bash [bash] main() */ 38 /* bash [bash] main() */
39 { .cpu = 3, .pid = 300, .ip = 0x40000 + 700, }, 39 { .cpu = 3, .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_MAIN, },
40 /* bash [bash] xmalloc() */ 40 /* bash [bash] xmalloc() */
41 { .cpu = 0, .pid = 300, .ip = 0x40000 + 800, }, 41 { .cpu = 0, .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, },
42 /* bash [kernel] page_fault() */ 42 /* bash [kernel] page_fault() */
43 { .cpu = 1, .pid = 300, .ip = 0xf0000 + 800, }, 43 { .cpu = 1, .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
44}; 44};
45 45
46static int add_hist_entries(struct hists *hists, struct machine *machine) 46static int add_hist_entries(struct hists *hists, struct machine *machine)
47{ 47{
48 struct addr_location al; 48 struct addr_location al;
49 struct hist_entry *he; 49 struct perf_evsel *evsel = hists_to_evsel(hists);
50 struct perf_sample sample = { .period = 100, }; 50 struct perf_sample sample = { .period = 100, };
51 size_t i; 51 size_t i;
52 52
@@ -56,6 +56,10 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
56 .misc = PERF_RECORD_MISC_USER, 56 .misc = PERF_RECORD_MISC_USER,
57 }, 57 },
58 }; 58 };
59 struct hist_entry_iter iter = {
60 .ops = &hist_iter_normal,
61 .hide_unresolved = false,
62 };
59 63
60 sample.cpu = fake_samples[i].cpu; 64 sample.cpu = fake_samples[i].cpu;
61 sample.pid = fake_samples[i].pid; 65 sample.pid = fake_samples[i].pid;
@@ -66,9 +70,8 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
66 &sample) < 0) 70 &sample) < 0)
67 goto out; 71 goto out;
68 72
69 he = __hists__add_entry(hists, &al, NULL, NULL, NULL, 73 if (hist_entry_iter__add(&iter, &al, evsel, &sample,
70 sample.period, 1, 0); 74 PERF_MAX_STACK_DEPTH, NULL) < 0)
71 if (he == NULL)
72 goto out; 75 goto out;
73 76
74 fake_samples[i].thread = al.thread; 77 fake_samples[i].thread = al.thread;
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index d76c0e2e6635..022bb68fd9c7 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -45,6 +45,7 @@ int test__hists_filter(void);
45int test__mmap_thread_lookup(void); 45int test__mmap_thread_lookup(void);
46int test__thread_mg_share(void); 46int test__thread_mg_share(void);
47int test__hists_output(void); 47int test__hists_output(void);
48int test__hists_cumulate(void);
48 49
49#if defined(__x86_64__) || defined(__i386__) || defined(__arm__) 50#if defined(__x86_64__) || defined(__i386__) || defined(__arm__)
50#ifdef HAVE_DWARF_UNWIND_SUPPORT 51#ifdef HAVE_DWARF_UNWIND_SUPPORT
diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c
index d11541d4d7d7..3ccf6e14f89b 100644
--- a/tools/perf/ui/browser.c
+++ b/tools/perf/ui/browser.c
@@ -194,7 +194,7 @@ int ui_browser__warning(struct ui_browser *browser, int timeout,
194 ui_helpline__vpush(format, args); 194 ui_helpline__vpush(format, args);
195 va_end(args); 195 va_end(args);
196 } else { 196 } else {
197 while ((key == ui__question_window("Warning!", text, 197 while ((key = ui__question_window("Warning!", text,
198 "Press any key...", 198 "Press any key...",
199 timeout)) == K_RESIZE) 199 timeout)) == K_RESIZE)
200 ui_browser__handle_resize(browser); 200 ui_browser__handle_resize(browser);
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 1c331b934ffc..52c03fbbba17 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -37,7 +37,6 @@ static int hists__browser_title(struct hists *hists, char *bf, size_t size,
37static void hist_browser__update_nr_entries(struct hist_browser *hb); 37static void hist_browser__update_nr_entries(struct hist_browser *hb);
38 38
39static struct rb_node *hists__filter_entries(struct rb_node *nd, 39static struct rb_node *hists__filter_entries(struct rb_node *nd,
40 struct hists *hists,
41 float min_pcnt); 40 float min_pcnt);
42 41
43static bool hist_browser__has_filter(struct hist_browser *hb) 42static bool hist_browser__has_filter(struct hist_browser *hb)
@@ -319,7 +318,7 @@ __hist_browser__set_folding(struct hist_browser *browser, bool unfold)
319 struct hists *hists = browser->hists; 318 struct hists *hists = browser->hists;
320 319
321 for (nd = rb_first(&hists->entries); 320 for (nd = rb_first(&hists->entries);
322 (nd = hists__filter_entries(nd, hists, browser->min_pcnt)) != NULL; 321 (nd = hists__filter_entries(nd, browser->min_pcnt)) != NULL;
323 nd = rb_next(nd)) { 322 nd = rb_next(nd)) {
324 struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); 323 struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
325 hist_entry__set_folding(he, unfold); 324 hist_entry__set_folding(he, unfold);
@@ -651,13 +650,36 @@ hist_browser__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,\
651 __hpp__slsmg_color_printf, true); \ 650 __hpp__slsmg_color_printf, true); \
652} 651}
653 652
653#define __HPP_COLOR_ACC_PERCENT_FN(_type, _field) \
654static u64 __hpp_get_acc_##_field(struct hist_entry *he) \
655{ \
656 return he->stat_acc->_field; \
657} \
658 \
659static int \
660hist_browser__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,\
661 struct perf_hpp *hpp, \
662 struct hist_entry *he) \
663{ \
664 if (!symbol_conf.cumulate_callchain) { \
665 int ret = scnprintf(hpp->buf, hpp->size, "%8s", "N/A"); \
666 slsmg_printf("%s", hpp->buf); \
667 \
668 return ret; \
669 } \
670 return __hpp__fmt(hpp, he, __hpp_get_acc_##_field, " %6.2f%%", \
671 __hpp__slsmg_color_printf, true); \
672}
673
654__HPP_COLOR_PERCENT_FN(overhead, period) 674__HPP_COLOR_PERCENT_FN(overhead, period)
655__HPP_COLOR_PERCENT_FN(overhead_sys, period_sys) 675__HPP_COLOR_PERCENT_FN(overhead_sys, period_sys)
656__HPP_COLOR_PERCENT_FN(overhead_us, period_us) 676__HPP_COLOR_PERCENT_FN(overhead_us, period_us)
657__HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys) 677__HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys)
658__HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us) 678__HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us)
679__HPP_COLOR_ACC_PERCENT_FN(overhead_acc, period)
659 680
660#undef __HPP_COLOR_PERCENT_FN 681#undef __HPP_COLOR_PERCENT_FN
682#undef __HPP_COLOR_ACC_PERCENT_FN
661 683
662void hist_browser__init_hpp(void) 684void hist_browser__init_hpp(void)
663{ 685{
@@ -671,6 +693,8 @@ void hist_browser__init_hpp(void)
671 hist_browser__hpp_color_overhead_guest_sys; 693 hist_browser__hpp_color_overhead_guest_sys;
672 perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color = 694 perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color =
673 hist_browser__hpp_color_overhead_guest_us; 695 hist_browser__hpp_color_overhead_guest_us;
696 perf_hpp__format[PERF_HPP__OVERHEAD_ACC].color =
697 hist_browser__hpp_color_overhead_acc;
674} 698}
675 699
676static int hist_browser__show_entry(struct hist_browser *browser, 700static int hist_browser__show_entry(struct hist_browser *browser,
@@ -783,15 +807,12 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser)
783 807
784 for (nd = browser->top; nd; nd = rb_next(nd)) { 808 for (nd = browser->top; nd; nd = rb_next(nd)) {
785 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); 809 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
786 u64 total = hists__total_period(h->hists); 810 float percent;
787 float percent = 0.0;
788 811
789 if (h->filtered) 812 if (h->filtered)
790 continue; 813 continue;
791 814
792 if (total) 815 percent = hist_entry__get_percent_limit(h);
793 percent = h->stat.period * 100.0 / total;
794
795 if (percent < hb->min_pcnt) 816 if (percent < hb->min_pcnt)
796 continue; 817 continue;
797 818
@@ -804,16 +825,11 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser)
804} 825}
805 826
806static struct rb_node *hists__filter_entries(struct rb_node *nd, 827static struct rb_node *hists__filter_entries(struct rb_node *nd,
807 struct hists *hists,
808 float min_pcnt) 828 float min_pcnt)
809{ 829{
810 while (nd != NULL) { 830 while (nd != NULL) {
811 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); 831 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
812 u64 total = hists__total_period(hists); 832 float percent = hist_entry__get_percent_limit(h);
813 float percent = 0.0;
814
815 if (total)
816 percent = h->stat.period * 100.0 / total;
817 833
818 if (!h->filtered && percent >= min_pcnt) 834 if (!h->filtered && percent >= min_pcnt)
819 return nd; 835 return nd;
@@ -825,16 +841,11 @@ static struct rb_node *hists__filter_entries(struct rb_node *nd,
825} 841}
826 842
827static struct rb_node *hists__filter_prev_entries(struct rb_node *nd, 843static struct rb_node *hists__filter_prev_entries(struct rb_node *nd,
828 struct hists *hists,
829 float min_pcnt) 844 float min_pcnt)
830{ 845{
831 while (nd != NULL) { 846 while (nd != NULL) {
832 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); 847 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
833 u64 total = hists__total_period(hists); 848 float percent = hist_entry__get_percent_limit(h);
834 float percent = 0.0;
835
836 if (total)
837 percent = h->stat.period * 100.0 / total;
838 849
839 if (!h->filtered && percent >= min_pcnt) 850 if (!h->filtered && percent >= min_pcnt)
840 return nd; 851 return nd;
@@ -863,14 +874,14 @@ static void ui_browser__hists_seek(struct ui_browser *browser,
863 switch (whence) { 874 switch (whence) {
864 case SEEK_SET: 875 case SEEK_SET:
865 nd = hists__filter_entries(rb_first(browser->entries), 876 nd = hists__filter_entries(rb_first(browser->entries),
866 hb->hists, hb->min_pcnt); 877 hb->min_pcnt);
867 break; 878 break;
868 case SEEK_CUR: 879 case SEEK_CUR:
869 nd = browser->top; 880 nd = browser->top;
870 goto do_offset; 881 goto do_offset;
871 case SEEK_END: 882 case SEEK_END:
872 nd = hists__filter_prev_entries(rb_last(browser->entries), 883 nd = hists__filter_prev_entries(rb_last(browser->entries),
873 hb->hists, hb->min_pcnt); 884 hb->min_pcnt);
874 first = false; 885 first = false;
875 break; 886 break;
876 default: 887 default:
@@ -913,8 +924,7 @@ do_offset:
913 break; 924 break;
914 } 925 }
915 } 926 }
916 nd = hists__filter_entries(rb_next(nd), hb->hists, 927 nd = hists__filter_entries(rb_next(nd), hb->min_pcnt);
917 hb->min_pcnt);
918 if (nd == NULL) 928 if (nd == NULL)
919 break; 929 break;
920 --offset; 930 --offset;
@@ -947,7 +957,7 @@ do_offset:
947 } 957 }
948 } 958 }
949 959
950 nd = hists__filter_prev_entries(rb_prev(nd), hb->hists, 960 nd = hists__filter_prev_entries(rb_prev(nd),
951 hb->min_pcnt); 961 hb->min_pcnt);
952 if (nd == NULL) 962 if (nd == NULL)
953 break; 963 break;
@@ -1126,7 +1136,6 @@ static int hist_browser__fprintf_entry(struct hist_browser *browser,
1126static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp) 1136static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp)
1127{ 1137{
1128 struct rb_node *nd = hists__filter_entries(rb_first(browser->b.entries), 1138 struct rb_node *nd = hists__filter_entries(rb_first(browser->b.entries),
1129 browser->hists,
1130 browser->min_pcnt); 1139 browser->min_pcnt);
1131 int printed = 0; 1140 int printed = 0;
1132 1141
@@ -1134,8 +1143,7 @@ static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp)
1134 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); 1143 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
1135 1144
1136 printed += hist_browser__fprintf_entry(browser, h, fp); 1145 printed += hist_browser__fprintf_entry(browser, h, fp);
1137 nd = hists__filter_entries(rb_next(nd), browser->hists, 1146 nd = hists__filter_entries(rb_next(nd), browser->min_pcnt);
1138 browser->min_pcnt);
1139 } 1147 }
1140 1148
1141 return printed; 1149 return printed;
@@ -1372,8 +1380,7 @@ static void hist_browser__update_nr_entries(struct hist_browser *hb)
1372 return; 1380 return;
1373 } 1381 }
1374 1382
1375 while ((nd = hists__filter_entries(nd, hb->hists, 1383 while ((nd = hists__filter_entries(nd, hb->min_pcnt)) != NULL) {
1376 hb->min_pcnt)) != NULL) {
1377 nr_entries++; 1384 nr_entries++;
1378 nd = rb_next(nd); 1385 nd = rb_next(nd);
1379 } 1386 }
@@ -1699,14 +1706,14 @@ zoom_dso:
1699zoom_out_dso: 1706zoom_out_dso:
1700 ui_helpline__pop(); 1707 ui_helpline__pop();
1701 browser->hists->dso_filter = NULL; 1708 browser->hists->dso_filter = NULL;
1702 sort_dso.elide = false; 1709 perf_hpp__set_elide(HISTC_DSO, false);
1703 } else { 1710 } else {
1704 if (dso == NULL) 1711 if (dso == NULL)
1705 continue; 1712 continue;
1706 ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s DSO\"", 1713 ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s DSO\"",
1707 dso->kernel ? "the Kernel" : dso->short_name); 1714 dso->kernel ? "the Kernel" : dso->short_name);
1708 browser->hists->dso_filter = dso; 1715 browser->hists->dso_filter = dso;
1709 sort_dso.elide = true; 1716 perf_hpp__set_elide(HISTC_DSO, true);
1710 pstack__push(fstack, &browser->hists->dso_filter); 1717 pstack__push(fstack, &browser->hists->dso_filter);
1711 } 1718 }
1712 hists__filter_by_dso(hists); 1719 hists__filter_by_dso(hists);
@@ -1718,13 +1725,13 @@ zoom_thread:
1718zoom_out_thread: 1725zoom_out_thread:
1719 ui_helpline__pop(); 1726 ui_helpline__pop();
1720 browser->hists->thread_filter = NULL; 1727 browser->hists->thread_filter = NULL;
1721 sort_thread.elide = false; 1728 perf_hpp__set_elide(HISTC_THREAD, false);
1722 } else { 1729 } else {
1723 ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s(%d) thread\"", 1730 ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s(%d) thread\"",
1724 thread->comm_set ? thread__comm_str(thread) : "", 1731 thread->comm_set ? thread__comm_str(thread) : "",
1725 thread->tid); 1732 thread->tid);
1726 browser->hists->thread_filter = thread; 1733 browser->hists->thread_filter = thread;
1727 sort_thread.elide = true; 1734 perf_hpp__set_elide(HISTC_THREAD, false);
1728 pstack__push(fstack, &browser->hists->thread_filter); 1735 pstack__push(fstack, &browser->hists->thread_filter);
1729 } 1736 }
1730 hists__filter_by_thread(hists); 1737 hists__filter_by_thread(hists);
diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index 9d90683914d4..6ca60e482cdc 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c
@@ -47,11 +47,26 @@ static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,
47 __percent_color_snprintf, true); \ 47 __percent_color_snprintf, true); \
48} 48}
49 49
50#define __HPP_COLOR_ACC_PERCENT_FN(_type, _field) \
51static u64 he_get_acc_##_field(struct hist_entry *he) \
52{ \
53 return he->stat_acc->_field; \
54} \
55 \
56static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \
57 struct perf_hpp *hpp, \
58 struct hist_entry *he) \
59{ \
60 return __hpp__fmt_acc(hpp, he, he_get_acc_##_field, " %6.2f%%", \
61 __percent_color_snprintf, true); \
62}
63
50__HPP_COLOR_PERCENT_FN(overhead, period) 64__HPP_COLOR_PERCENT_FN(overhead, period)
51__HPP_COLOR_PERCENT_FN(overhead_sys, period_sys) 65__HPP_COLOR_PERCENT_FN(overhead_sys, period_sys)
52__HPP_COLOR_PERCENT_FN(overhead_us, period_us) 66__HPP_COLOR_PERCENT_FN(overhead_us, period_us)
53__HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys) 67__HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys)
54__HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us) 68__HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us)
69__HPP_COLOR_ACC_PERCENT_FN(overhead_acc, period)
55 70
56#undef __HPP_COLOR_PERCENT_FN 71#undef __HPP_COLOR_PERCENT_FN
57 72
@@ -68,6 +83,8 @@ void perf_gtk__init_hpp(void)
68 perf_gtk__hpp_color_overhead_guest_sys; 83 perf_gtk__hpp_color_overhead_guest_sys;
69 perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color = 84 perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color =
70 perf_gtk__hpp_color_overhead_guest_us; 85 perf_gtk__hpp_color_overhead_guest_us;
86 perf_hpp__format[PERF_HPP__OVERHEAD_ACC].color =
87 perf_gtk__hpp_color_overhead_acc;
71} 88}
72 89
73static void callchain_list__sym_name(struct callchain_list *cl, 90static void callchain_list__sym_name(struct callchain_list *cl,
@@ -181,6 +198,13 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
181 if (perf_hpp__should_skip(fmt)) 198 if (perf_hpp__should_skip(fmt))
182 continue; 199 continue;
183 200
201 /*
202 * XXX no way to determine where symcol column is..
203 * Just use last column for now.
204 */
205 if (perf_hpp__is_sort_entry(fmt))
206 sym_col = col_idx;
207
184 fmt->header(fmt, &hpp, hists_to_evsel(hists)); 208 fmt->header(fmt, &hpp, hists_to_evsel(hists));
185 209
186 gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view), 210 gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
@@ -209,14 +233,12 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
209 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); 233 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
210 GtkTreeIter iter; 234 GtkTreeIter iter;
211 u64 total = hists__total_period(h->hists); 235 u64 total = hists__total_period(h->hists);
212 float percent = 0.0; 236 float percent;
213 237
214 if (h->filtered) 238 if (h->filtered)
215 continue; 239 continue;
216 240
217 if (total) 241 percent = hist_entry__get_percent_limit(h);
218 percent = h->stat.period * 100.0 / total;
219
220 if (percent < min_pcnt) 242 if (percent < min_pcnt)
221 continue; 243 continue;
222 244
@@ -238,7 +260,8 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
238 260
239 if (symbol_conf.use_callchain && sort__has_sym) { 261 if (symbol_conf.use_callchain && sort__has_sym) {
240 if (callchain_param.mode == CHAIN_GRAPH_REL) 262 if (callchain_param.mode == CHAIN_GRAPH_REL)
241 total = h->stat.period; 263 total = symbol_conf.cumulate_callchain ?
264 h->stat_acc->period : h->stat.period;
242 265
243 perf_gtk__add_callchain(&h->sorted_chain, store, &iter, 266 perf_gtk__add_callchain(&h->sorted_chain, store, &iter,
244 sym_col, total); 267 sym_col, total);
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 4484f5bd1b14..498adb23c02e 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -104,6 +104,18 @@ int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
104 return ret; 104 return ret;
105} 105}
106 106
107int __hpp__fmt_acc(struct perf_hpp *hpp, struct hist_entry *he,
108 hpp_field_fn get_field, const char *fmt,
109 hpp_snprint_fn print_fn, bool fmt_percent)
110{
111 if (!symbol_conf.cumulate_callchain) {
112 return snprintf(hpp->buf, hpp->size, "%*s",
113 fmt_percent ? 8 : 12, "N/A");
114 }
115
116 return __hpp__fmt(hpp, he, get_field, fmt, print_fn, fmt_percent);
117}
118
107static int field_cmp(u64 field_a, u64 field_b) 119static int field_cmp(u64 field_a, u64 field_b)
108{ 120{
109 if (field_a > field_b) 121 if (field_a > field_b)
@@ -160,6 +172,24 @@ out:
160 return ret; 172 return ret;
161} 173}
162 174
175static int __hpp__sort_acc(struct hist_entry *a, struct hist_entry *b,
176 hpp_field_fn get_field)
177{
178 s64 ret = 0;
179
180 if (symbol_conf.cumulate_callchain) {
181 /*
182 * Put caller above callee when they have equal period.
183 */
184 ret = field_cmp(get_field(a), get_field(b));
185 if (ret)
186 return ret;
187
188 ret = b->callchain->max_depth - a->callchain->max_depth;
189 }
190 return ret;
191}
192
163#define __HPP_HEADER_FN(_type, _str, _min_width, _unit_width) \ 193#define __HPP_HEADER_FN(_type, _str, _min_width, _unit_width) \
164static int hpp__header_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \ 194static int hpp__header_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \
165 struct perf_hpp *hpp, \ 195 struct perf_hpp *hpp, \
@@ -242,6 +272,34 @@ static int64_t hpp__sort_##_type(struct hist_entry *a, struct hist_entry *b) \
242 return __hpp__sort(a, b, he_get_##_field); \ 272 return __hpp__sort(a, b, he_get_##_field); \
243} 273}
244 274
275#define __HPP_COLOR_ACC_PERCENT_FN(_type, _field) \
276static u64 he_get_acc_##_field(struct hist_entry *he) \
277{ \
278 return he->stat_acc->_field; \
279} \
280 \
281static int hpp__color_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \
282 struct perf_hpp *hpp, struct hist_entry *he) \
283{ \
284 return __hpp__fmt_acc(hpp, he, he_get_acc_##_field, " %6.2f%%", \
285 hpp_color_scnprintf, true); \
286}
287
288#define __HPP_ENTRY_ACC_PERCENT_FN(_type, _field) \
289static int hpp__entry_##_type(struct perf_hpp_fmt *_fmt __maybe_unused, \
290 struct perf_hpp *hpp, struct hist_entry *he) \
291{ \
292 const char *fmt = symbol_conf.field_sep ? " %.2f" : " %6.2f%%"; \
293 return __hpp__fmt_acc(hpp, he, he_get_acc_##_field, fmt, \
294 hpp_entry_scnprintf, true); \
295}
296
297#define __HPP_SORT_ACC_FN(_type, _field) \
298static int64_t hpp__sort_##_type(struct hist_entry *a, struct hist_entry *b) \
299{ \
300 return __hpp__sort_acc(a, b, he_get_acc_##_field); \
301}
302
245#define __HPP_ENTRY_RAW_FN(_type, _field) \ 303#define __HPP_ENTRY_RAW_FN(_type, _field) \
246static u64 he_get_raw_##_field(struct hist_entry *he) \ 304static u64 he_get_raw_##_field(struct hist_entry *he) \
247{ \ 305{ \
@@ -270,18 +328,27 @@ __HPP_COLOR_PERCENT_FN(_type, _field) \
270__HPP_ENTRY_PERCENT_FN(_type, _field) \ 328__HPP_ENTRY_PERCENT_FN(_type, _field) \
271__HPP_SORT_FN(_type, _field) 329__HPP_SORT_FN(_type, _field)
272 330
331#define HPP_PERCENT_ACC_FNS(_type, _str, _field, _min_width, _unit_width)\
332__HPP_HEADER_FN(_type, _str, _min_width, _unit_width) \
333__HPP_WIDTH_FN(_type, _min_width, _unit_width) \
334__HPP_COLOR_ACC_PERCENT_FN(_type, _field) \
335__HPP_ENTRY_ACC_PERCENT_FN(_type, _field) \
336__HPP_SORT_ACC_FN(_type, _field)
337
273#define HPP_RAW_FNS(_type, _str, _field, _min_width, _unit_width) \ 338#define HPP_RAW_FNS(_type, _str, _field, _min_width, _unit_width) \
274__HPP_HEADER_FN(_type, _str, _min_width, _unit_width) \ 339__HPP_HEADER_FN(_type, _str, _min_width, _unit_width) \
275__HPP_WIDTH_FN(_type, _min_width, _unit_width) \ 340__HPP_WIDTH_FN(_type, _min_width, _unit_width) \
276__HPP_ENTRY_RAW_FN(_type, _field) \ 341__HPP_ENTRY_RAW_FN(_type, _field) \
277__HPP_SORT_RAW_FN(_type, _field) 342__HPP_SORT_RAW_FN(_type, _field)
278 343
344__HPP_HEADER_FN(overhead_self, "Self", 8, 8)
279 345
280HPP_PERCENT_FNS(overhead, "Overhead", period, 8, 8) 346HPP_PERCENT_FNS(overhead, "Overhead", period, 8, 8)
281HPP_PERCENT_FNS(overhead_sys, "sys", period_sys, 8, 8) 347HPP_PERCENT_FNS(overhead_sys, "sys", period_sys, 8, 8)
282HPP_PERCENT_FNS(overhead_us, "usr", period_us, 8, 8) 348HPP_PERCENT_FNS(overhead_us, "usr", period_us, 8, 8)
283HPP_PERCENT_FNS(overhead_guest_sys, "guest sys", period_guest_sys, 9, 8) 349HPP_PERCENT_FNS(overhead_guest_sys, "guest sys", period_guest_sys, 9, 8)
284HPP_PERCENT_FNS(overhead_guest_us, "guest usr", period_guest_us, 9, 8) 350HPP_PERCENT_FNS(overhead_guest_us, "guest usr", period_guest_us, 9, 8)
351HPP_PERCENT_ACC_FNS(overhead_acc, "Children", period, 8, 8)
285 352
286HPP_RAW_FNS(samples, "Samples", nr_events, 12, 12) 353HPP_RAW_FNS(samples, "Samples", nr_events, 12, 12)
287HPP_RAW_FNS(period, "Period", period, 12, 12) 354HPP_RAW_FNS(period, "Period", period, 12, 12)
@@ -303,6 +370,17 @@ static int64_t hpp__nop_cmp(struct hist_entry *a __maybe_unused,
303 .sort = hpp__sort_ ## _name, \ 370 .sort = hpp__sort_ ## _name, \
304 } 371 }
305 372
373#define HPP__COLOR_ACC_PRINT_FNS(_name) \
374 { \
375 .header = hpp__header_ ## _name, \
376 .width = hpp__width_ ## _name, \
377 .color = hpp__color_ ## _name, \
378 .entry = hpp__entry_ ## _name, \
379 .cmp = hpp__nop_cmp, \
380 .collapse = hpp__nop_cmp, \
381 .sort = hpp__sort_ ## _name, \
382 }
383
306#define HPP__PRINT_FNS(_name) \ 384#define HPP__PRINT_FNS(_name) \
307 { \ 385 { \
308 .header = hpp__header_ ## _name, \ 386 .header = hpp__header_ ## _name, \
@@ -319,6 +397,7 @@ struct perf_hpp_fmt perf_hpp__format[] = {
319 HPP__COLOR_PRINT_FNS(overhead_us), 397 HPP__COLOR_PRINT_FNS(overhead_us),
320 HPP__COLOR_PRINT_FNS(overhead_guest_sys), 398 HPP__COLOR_PRINT_FNS(overhead_guest_sys),
321 HPP__COLOR_PRINT_FNS(overhead_guest_us), 399 HPP__COLOR_PRINT_FNS(overhead_guest_us),
400 HPP__COLOR_ACC_PRINT_FNS(overhead_acc),
322 HPP__PRINT_FNS(samples), 401 HPP__PRINT_FNS(samples),
323 HPP__PRINT_FNS(period) 402 HPP__PRINT_FNS(period)
324}; 403};
@@ -328,16 +407,23 @@ LIST_HEAD(perf_hpp__sort_list);
328 407
329 408
330#undef HPP__COLOR_PRINT_FNS 409#undef HPP__COLOR_PRINT_FNS
410#undef HPP__COLOR_ACC_PRINT_FNS
331#undef HPP__PRINT_FNS 411#undef HPP__PRINT_FNS
332 412
333#undef HPP_PERCENT_FNS 413#undef HPP_PERCENT_FNS
414#undef HPP_PERCENT_ACC_FNS
334#undef HPP_RAW_FNS 415#undef HPP_RAW_FNS
335 416
336#undef __HPP_HEADER_FN 417#undef __HPP_HEADER_FN
337#undef __HPP_WIDTH_FN 418#undef __HPP_WIDTH_FN
338#undef __HPP_COLOR_PERCENT_FN 419#undef __HPP_COLOR_PERCENT_FN
339#undef __HPP_ENTRY_PERCENT_FN 420#undef __HPP_ENTRY_PERCENT_FN
421#undef __HPP_COLOR_ACC_PERCENT_FN
422#undef __HPP_ENTRY_ACC_PERCENT_FN
340#undef __HPP_ENTRY_RAW_FN 423#undef __HPP_ENTRY_RAW_FN
424#undef __HPP_SORT_FN
425#undef __HPP_SORT_ACC_FN
426#undef __HPP_SORT_RAW_FN
341 427
342 428
343void perf_hpp__init(void) 429void perf_hpp__init(void)
@@ -361,6 +447,13 @@ void perf_hpp__init(void)
361 if (field_order) 447 if (field_order)
362 return; 448 return;
363 449
450 if (symbol_conf.cumulate_callchain) {
451 perf_hpp__column_enable(PERF_HPP__OVERHEAD_ACC);
452
453 perf_hpp__format[PERF_HPP__OVERHEAD].header =
454 hpp__header_overhead_self;
455 }
456
364 perf_hpp__column_enable(PERF_HPP__OVERHEAD); 457 perf_hpp__column_enable(PERF_HPP__OVERHEAD);
365 458
366 if (symbol_conf.show_cpu_utilization) { 459 if (symbol_conf.show_cpu_utilization) {
@@ -383,6 +476,12 @@ void perf_hpp__init(void)
383 list = &perf_hpp__format[PERF_HPP__OVERHEAD].sort_list; 476 list = &perf_hpp__format[PERF_HPP__OVERHEAD].sort_list;
384 if (list_empty(list)) 477 if (list_empty(list))
385 list_add(list, &perf_hpp__sort_list); 478 list_add(list, &perf_hpp__sort_list);
479
480 if (symbol_conf.cumulate_callchain) {
481 list = &perf_hpp__format[PERF_HPP__OVERHEAD_ACC].sort_list;
482 if (list_empty(list))
483 list_add(list, &perf_hpp__sort_list);
484 }
386} 485}
387 486
388void perf_hpp__column_register(struct perf_hpp_fmt *format) 487void perf_hpp__column_register(struct perf_hpp_fmt *format)
@@ -390,6 +489,11 @@ void perf_hpp__column_register(struct perf_hpp_fmt *format)
390 list_add_tail(&format->list, &perf_hpp__list); 489 list_add_tail(&format->list, &perf_hpp__list);
391} 490}
392 491
492void perf_hpp__column_unregister(struct perf_hpp_fmt *format)
493{
494 list_del(&format->list);
495}
496
393void perf_hpp__register_sort_field(struct perf_hpp_fmt *format) 497void perf_hpp__register_sort_field(struct perf_hpp_fmt *format)
394{ 498{
395 list_add_tail(&format->sort_list, &perf_hpp__sort_list); 499 list_add_tail(&format->sort_list, &perf_hpp__sort_list);
@@ -401,6 +505,21 @@ void perf_hpp__column_enable(unsigned col)
401 perf_hpp__column_register(&perf_hpp__format[col]); 505 perf_hpp__column_register(&perf_hpp__format[col]);
402} 506}
403 507
508void perf_hpp__column_disable(unsigned col)
509{
510 BUG_ON(col >= PERF_HPP__MAX_INDEX);
511 perf_hpp__column_unregister(&perf_hpp__format[col]);
512}
513
514void perf_hpp__cancel_cumulate(void)
515{
516 if (field_order)
517 return;
518
519 perf_hpp__column_disable(PERF_HPP__OVERHEAD_ACC);
520 perf_hpp__format[PERF_HPP__OVERHEAD].header = hpp__header_overhead;
521}
522
404void perf_hpp__setup_output_field(void) 523void perf_hpp__setup_output_field(void)
405{ 524{
406 struct perf_hpp_fmt *fmt; 525 struct perf_hpp_fmt *fmt;
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 9f57991025a9..90122abd3721 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -271,7 +271,9 @@ static size_t hist_entry_callchain__fprintf(struct hist_entry *he,
271{ 271{
272 switch (callchain_param.mode) { 272 switch (callchain_param.mode) {
273 case CHAIN_GRAPH_REL: 273 case CHAIN_GRAPH_REL:
274 return callchain__fprintf_graph(fp, &he->sorted_chain, he->stat.period, 274 return callchain__fprintf_graph(fp, &he->sorted_chain,
275 symbol_conf.cumulate_callchain ?
276 he->stat_acc->period : he->stat.period,
275 left_margin); 277 left_margin);
276 break; 278 break;
277 case CHAIN_GRAPH_ABS: 279 case CHAIN_GRAPH_ABS:
@@ -461,12 +463,12 @@ print_entries:
461 463
462 for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { 464 for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
463 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); 465 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
464 float percent = h->stat.period * 100.0 / 466 float percent;
465 hists->stats.total_period;
466 467
467 if (h->filtered) 468 if (h->filtered)
468 continue; 469 continue;
469 470
471 percent = hist_entry__get_percent_limit(h);
470 if (percent < min_pcnt) 472 if (percent < min_pcnt)
471 continue; 473 continue;
472 474
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 9a42382b3921..48b6d3f50012 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -616,7 +616,8 @@ int sample__resolve_callchain(struct perf_sample *sample, struct symbol **parent
616 if (sample->callchain == NULL) 616 if (sample->callchain == NULL)
617 return 0; 617 return 0;
618 618
619 if (symbol_conf.use_callchain || sort__has_parent) { 619 if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain ||
620 sort__has_parent) {
620 return machine__resolve_callchain(al->machine, evsel, al->thread, 621 return machine__resolve_callchain(al->machine, evsel, al->thread,
621 sample, parent, al, max_stack); 622 sample, parent, al, max_stack);
622 } 623 }
@@ -629,3 +630,45 @@ int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *samp
629 return 0; 630 return 0;
630 return callchain_append(he->callchain, &callchain_cursor, sample->period); 631 return callchain_append(he->callchain, &callchain_cursor, sample->period);
631} 632}
633
634int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node,
635 bool hide_unresolved)
636{
637 al->map = node->map;
638 al->sym = node->sym;
639 if (node->map)
640 al->addr = node->map->map_ip(node->map, node->ip);
641 else
642 al->addr = node->ip;
643
644 if (al->sym == NULL) {
645 if (hide_unresolved)
646 return 0;
647 if (al->map == NULL)
648 goto out;
649 }
650
651 if (al->map->groups == &al->machine->kmaps) {
652 if (machine__is_host(al->machine)) {
653 al->cpumode = PERF_RECORD_MISC_KERNEL;
654 al->level = 'k';
655 } else {
656 al->cpumode = PERF_RECORD_MISC_GUEST_KERNEL;
657 al->level = 'g';
658 }
659 } else {
660 if (machine__is_host(al->machine)) {
661 al->cpumode = PERF_RECORD_MISC_USER;
662 al->level = '.';
663 } else if (perf_guest) {
664 al->cpumode = PERF_RECORD_MISC_GUEST_USER;
665 al->level = 'u';
666 } else {
667 al->cpumode = PERF_RECORD_MISC_HYPERVISOR;
668 al->level = 'H';
669 }
670 }
671
672out:
673 return 1;
674}
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index bde2b0cc24cf..8f84423a75da 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -162,7 +162,18 @@ int sample__resolve_callchain(struct perf_sample *sample, struct symbol **parent
162 struct perf_evsel *evsel, struct addr_location *al, 162 struct perf_evsel *evsel, struct addr_location *al,
163 int max_stack); 163 int max_stack);
164int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *sample); 164int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *sample);
165int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node,
166 bool hide_unresolved);
165 167
166extern const char record_callchain_help[]; 168extern const char record_callchain_help[];
167int parse_callchain_report_opt(const char *arg); 169int parse_callchain_report_opt(const char *arg);
170
171static inline void callchain_cursor_snapshot(struct callchain_cursor *dest,
172 struct callchain_cursor *src)
173{
174 *dest = *src;
175
176 dest->first = src->curr;
177 dest->nr -= src->pos;
178}
168#endif /* __PERF_CALLCHAIN_H */ 179#endif /* __PERF_CALLCHAIN_H */
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index b262b44b7a65..5a0a4b2cadc4 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -4,6 +4,7 @@
4#include "session.h" 4#include "session.h"
5#include "sort.h" 5#include "sort.h"
6#include "evsel.h" 6#include "evsel.h"
7#include "annotate.h"
7#include <math.h> 8#include <math.h>
8 9
9static bool hists__filter_entry_by_dso(struct hists *hists, 10static bool hists__filter_entry_by_dso(struct hists *hists,
@@ -231,6 +232,8 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he)
231 return true; 232 return true;
232 233
233 he_stat__decay(&he->stat); 234 he_stat__decay(&he->stat);
235 if (symbol_conf.cumulate_callchain)
236 he_stat__decay(he->stat_acc);
234 237
235 diff = prev_period - he->stat.period; 238 diff = prev_period - he->stat.period;
236 239
@@ -276,14 +279,31 @@ void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel)
276 * histogram, sorted on item, collects periods 279 * histogram, sorted on item, collects periods
277 */ 280 */
278 281
279static struct hist_entry *hist_entry__new(struct hist_entry *template) 282static struct hist_entry *hist_entry__new(struct hist_entry *template,
283 bool sample_self)
280{ 284{
281 size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_root) : 0; 285 size_t callchain_size = 0;
282 struct hist_entry *he = zalloc(sizeof(*he) + callchain_size); 286 struct hist_entry *he;
287
288 if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain)
289 callchain_size = sizeof(struct callchain_root);
290
291 he = zalloc(sizeof(*he) + callchain_size);
283 292
284 if (he != NULL) { 293 if (he != NULL) {
285 *he = *template; 294 *he = *template;
286 295
296 if (symbol_conf.cumulate_callchain) {
297 he->stat_acc = malloc(sizeof(he->stat));
298 if (he->stat_acc == NULL) {
299 free(he);
300 return NULL;
301 }
302 memcpy(he->stat_acc, &he->stat, sizeof(he->stat));
303 if (!sample_self)
304 memset(&he->stat, 0, sizeof(he->stat));
305 }
306
287 if (he->ms.map) 307 if (he->ms.map)
288 he->ms.map->referenced = true; 308 he->ms.map->referenced = true;
289 309
@@ -295,6 +315,7 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template)
295 */ 315 */
296 he->branch_info = malloc(sizeof(*he->branch_info)); 316 he->branch_info = malloc(sizeof(*he->branch_info));
297 if (he->branch_info == NULL) { 317 if (he->branch_info == NULL) {
318 free(he->stat_acc);
298 free(he); 319 free(he);
299 return NULL; 320 return NULL;
300 } 321 }
@@ -333,7 +354,8 @@ static u8 symbol__parent_filter(const struct symbol *parent)
333 354
334static struct hist_entry *add_hist_entry(struct hists *hists, 355static struct hist_entry *add_hist_entry(struct hists *hists,
335 struct hist_entry *entry, 356 struct hist_entry *entry,
336 struct addr_location *al) 357 struct addr_location *al,
358 bool sample_self)
337{ 359{
338 struct rb_node **p; 360 struct rb_node **p;
339 struct rb_node *parent = NULL; 361 struct rb_node *parent = NULL;
@@ -357,7 +379,10 @@ static struct hist_entry *add_hist_entry(struct hists *hists,
357 cmp = hist_entry__cmp(he, entry); 379 cmp = hist_entry__cmp(he, entry);
358 380
359 if (!cmp) { 381 if (!cmp) {
360 he_stat__add_period(&he->stat, period, weight); 382 if (sample_self)
383 he_stat__add_period(&he->stat, period, weight);
384 if (symbol_conf.cumulate_callchain)
385 he_stat__add_period(he->stat_acc, period, weight);
361 386
362 /* 387 /*
363 * This mem info was allocated from sample__resolve_mem 388 * This mem info was allocated from sample__resolve_mem
@@ -385,14 +410,17 @@ static struct hist_entry *add_hist_entry(struct hists *hists,
385 p = &(*p)->rb_right; 410 p = &(*p)->rb_right;
386 } 411 }
387 412
388 he = hist_entry__new(entry); 413 he = hist_entry__new(entry, sample_self);
389 if (!he) 414 if (!he)
390 return NULL; 415 return NULL;
391 416
392 rb_link_node(&he->rb_node_in, parent, p); 417 rb_link_node(&he->rb_node_in, parent, p);
393 rb_insert_color(&he->rb_node_in, hists->entries_in); 418 rb_insert_color(&he->rb_node_in, hists->entries_in);
394out: 419out:
395 he_stat__add_cpumode_period(&he->stat, al->cpumode, period); 420 if (sample_self)
421 he_stat__add_cpumode_period(&he->stat, al->cpumode, period);
422 if (symbol_conf.cumulate_callchain)
423 he_stat__add_cpumode_period(he->stat_acc, al->cpumode, period);
396 return he; 424 return he;
397} 425}
398 426
@@ -401,7 +429,8 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
401 struct symbol *sym_parent, 429 struct symbol *sym_parent,
402 struct branch_info *bi, 430 struct branch_info *bi,
403 struct mem_info *mi, 431 struct mem_info *mi,
404 u64 period, u64 weight, u64 transaction) 432 u64 period, u64 weight, u64 transaction,
433 bool sample_self)
405{ 434{
406 struct hist_entry entry = { 435 struct hist_entry entry = {
407 .thread = al->thread, 436 .thread = al->thread,
@@ -426,7 +455,429 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
426 .transaction = transaction, 455 .transaction = transaction,
427 }; 456 };
428 457
429 return add_hist_entry(hists, &entry, al); 458 return add_hist_entry(hists, &entry, al, sample_self);
459}
460
461static int
462iter_next_nop_entry(struct hist_entry_iter *iter __maybe_unused,
463 struct addr_location *al __maybe_unused)
464{
465 return 0;
466}
467
468static int
469iter_add_next_nop_entry(struct hist_entry_iter *iter __maybe_unused,
470 struct addr_location *al __maybe_unused)
471{
472 return 0;
473}
474
475static int
476iter_prepare_mem_entry(struct hist_entry_iter *iter, struct addr_location *al)
477{
478 struct perf_sample *sample = iter->sample;
479 struct mem_info *mi;
480
481 mi = sample__resolve_mem(sample, al);
482 if (mi == NULL)
483 return -ENOMEM;
484
485 iter->priv = mi;
486 return 0;
487}
488
489static int
490iter_add_single_mem_entry(struct hist_entry_iter *iter, struct addr_location *al)
491{
492 u64 cost;
493 struct mem_info *mi = iter->priv;
494 struct hist_entry *he;
495
496 if (mi == NULL)
497 return -EINVAL;
498
499 cost = iter->sample->weight;
500 if (!cost)
501 cost = 1;
502
503 /*
504 * must pass period=weight in order to get the correct
505 * sorting from hists__collapse_resort() which is solely
506 * based on periods. We want sorting be done on nr_events * weight
507 * and this is indirectly achieved by passing period=weight here
508 * and the he_stat__add_period() function.
509 */
510 he = __hists__add_entry(&iter->evsel->hists, al, iter->parent, NULL, mi,
511 cost, cost, 0, true);
512 if (!he)
513 return -ENOMEM;
514
515 iter->he = he;
516 return 0;
517}
518
519static int
520iter_finish_mem_entry(struct hist_entry_iter *iter,
521 struct addr_location *al __maybe_unused)
522{
523 struct perf_evsel *evsel = iter->evsel;
524 struct hist_entry *he = iter->he;
525 int err = -EINVAL;
526
527 if (he == NULL)
528 goto out;
529
530 hists__inc_nr_samples(&evsel->hists, he->filtered);
531
532 err = hist_entry__append_callchain(he, iter->sample);
533
534out:
535 /*
536 * We don't need to free iter->priv (mem_info) here since
537 * the mem info was either already freed in add_hist_entry() or
538 * passed to a new hist entry by hist_entry__new().
539 */
540 iter->priv = NULL;
541
542 iter->he = NULL;
543 return err;
544}
545
546static int
547iter_prepare_branch_entry(struct hist_entry_iter *iter, struct addr_location *al)
548{
549 struct branch_info *bi;
550 struct perf_sample *sample = iter->sample;
551
552 bi = sample__resolve_bstack(sample, al);
553 if (!bi)
554 return -ENOMEM;
555
556 iter->curr = 0;
557 iter->total = sample->branch_stack->nr;
558
559 iter->priv = bi;
560 return 0;
561}
562
563static int
564iter_add_single_branch_entry(struct hist_entry_iter *iter __maybe_unused,
565 struct addr_location *al __maybe_unused)
566{
567 /* to avoid calling callback function */
568 iter->he = NULL;
569
570 return 0;
571}
572
573static int
574iter_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *al)
575{
576 struct branch_info *bi = iter->priv;
577 int i = iter->curr;
578
579 if (bi == NULL)
580 return 0;
581
582 if (iter->curr >= iter->total)
583 return 0;
584
585 al->map = bi[i].to.map;
586 al->sym = bi[i].to.sym;
587 al->addr = bi[i].to.addr;
588 return 1;
589}
590
591static int
592iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *al)
593{
594 struct branch_info *bi;
595 struct perf_evsel *evsel = iter->evsel;
596 struct hist_entry *he = NULL;
597 int i = iter->curr;
598 int err = 0;
599
600 bi = iter->priv;
601
602 if (iter->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym))
603 goto out;
604
605 /*
606 * The report shows the percentage of total branches captured
607 * and not events sampled. Thus we use a pseudo period of 1.
608 */
609 he = __hists__add_entry(&evsel->hists, al, iter->parent, &bi[i], NULL,
610 1, 1, 0, true);
611 if (he == NULL)
612 return -ENOMEM;
613
614 hists__inc_nr_samples(&evsel->hists, he->filtered);
615
616out:
617 iter->he = he;
618 iter->curr++;
619 return err;
620}
621
622static int
623iter_finish_branch_entry(struct hist_entry_iter *iter,
624 struct addr_location *al __maybe_unused)
625{
626 zfree(&iter->priv);
627 iter->he = NULL;
628
629 return iter->curr >= iter->total ? 0 : -1;
630}
631
632static int
633iter_prepare_normal_entry(struct hist_entry_iter *iter __maybe_unused,
634 struct addr_location *al __maybe_unused)
635{
636 return 0;
637}
638
639static int
640iter_add_single_normal_entry(struct hist_entry_iter *iter, struct addr_location *al)
641{
642 struct perf_evsel *evsel = iter->evsel;
643 struct perf_sample *sample = iter->sample;
644 struct hist_entry *he;
645
646 he = __hists__add_entry(&evsel->hists, al, iter->parent, NULL, NULL,
647 sample->period, sample->weight,
648 sample->transaction, true);
649 if (he == NULL)
650 return -ENOMEM;
651
652 iter->he = he;
653 return 0;
654}
655
656static int
657iter_finish_normal_entry(struct hist_entry_iter *iter,
658 struct addr_location *al __maybe_unused)
659{
660 struct hist_entry *he = iter->he;
661 struct perf_evsel *evsel = iter->evsel;
662 struct perf_sample *sample = iter->sample;
663
664 if (he == NULL)
665 return 0;
666
667 iter->he = NULL;
668
669 hists__inc_nr_samples(&evsel->hists, he->filtered);
670
671 return hist_entry__append_callchain(he, sample);
672}
673
674static int
675iter_prepare_cumulative_entry(struct hist_entry_iter *iter __maybe_unused,
676 struct addr_location *al __maybe_unused)
677{
678 struct hist_entry **he_cache;
679
680 callchain_cursor_commit(&callchain_cursor);
681
682 /*
683 * This is for detecting cycles or recursions so that they're
684 * cumulated only one time to prevent entries more than 100%
685 * overhead.
686 */
687 he_cache = malloc(sizeof(*he_cache) * (PERF_MAX_STACK_DEPTH + 1));
688 if (he_cache == NULL)
689 return -ENOMEM;
690
691 iter->priv = he_cache;
692 iter->curr = 0;
693
694 return 0;
695}
696
697static int
698iter_add_single_cumulative_entry(struct hist_entry_iter *iter,
699 struct addr_location *al)
700{
701 struct perf_evsel *evsel = iter->evsel;
702 struct perf_sample *sample = iter->sample;
703 struct hist_entry **he_cache = iter->priv;
704 struct hist_entry *he;
705 int err = 0;
706
707 he = __hists__add_entry(&evsel->hists, al, iter->parent, NULL, NULL,
708 sample->period, sample->weight,
709 sample->transaction, true);
710 if (he == NULL)
711 return -ENOMEM;
712
713 iter->he = he;
714 he_cache[iter->curr++] = he;
715
716 callchain_append(he->callchain, &callchain_cursor, sample->period);
717
718 /*
719 * We need to re-initialize the cursor since callchain_append()
720 * advanced the cursor to the end.
721 */
722 callchain_cursor_commit(&callchain_cursor);
723
724 hists__inc_nr_samples(&evsel->hists, he->filtered);
725
726 return err;
727}
728
729static int
730iter_next_cumulative_entry(struct hist_entry_iter *iter,
731 struct addr_location *al)
732{
733 struct callchain_cursor_node *node;
734
735 node = callchain_cursor_current(&callchain_cursor);
736 if (node == NULL)
737 return 0;
738
739 return fill_callchain_info(al, node, iter->hide_unresolved);
740}
741
742static int
743iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
744 struct addr_location *al)
745{
746 struct perf_evsel *evsel = iter->evsel;
747 struct perf_sample *sample = iter->sample;
748 struct hist_entry **he_cache = iter->priv;
749 struct hist_entry *he;
750 struct hist_entry he_tmp = {
751 .cpu = al->cpu,
752 .thread = al->thread,
753 .comm = thread__comm(al->thread),
754 .ip = al->addr,
755 .ms = {
756 .map = al->map,
757 .sym = al->sym,
758 },
759 .parent = iter->parent,
760 };
761 int i;
762 struct callchain_cursor cursor;
763
764 callchain_cursor_snapshot(&cursor, &callchain_cursor);
765
766 callchain_cursor_advance(&callchain_cursor);
767
768 /*
769 * Check if there's duplicate entries in the callchain.
770 * It's possible that it has cycles or recursive calls.
771 */
772 for (i = 0; i < iter->curr; i++) {
773 if (hist_entry__cmp(he_cache[i], &he_tmp) == 0) {
774 /* to avoid calling callback function */
775 iter->he = NULL;
776 return 0;
777 }
778 }
779
780 he = __hists__add_entry(&evsel->hists, al, iter->parent, NULL, NULL,
781 sample->period, sample->weight,
782 sample->transaction, false);
783 if (he == NULL)
784 return -ENOMEM;
785
786 iter->he = he;
787 he_cache[iter->curr++] = he;
788
789 callchain_append(he->callchain, &cursor, sample->period);
790 return 0;
791}
792
793static int
794iter_finish_cumulative_entry(struct hist_entry_iter *iter,
795 struct addr_location *al __maybe_unused)
796{
797 zfree(&iter->priv);
798 iter->he = NULL;
799
800 return 0;
801}
802
803const struct hist_iter_ops hist_iter_mem = {
804 .prepare_entry = iter_prepare_mem_entry,
805 .add_single_entry = iter_add_single_mem_entry,
806 .next_entry = iter_next_nop_entry,
807 .add_next_entry = iter_add_next_nop_entry,
808 .finish_entry = iter_finish_mem_entry,
809};
810
811const struct hist_iter_ops hist_iter_branch = {
812 .prepare_entry = iter_prepare_branch_entry,
813 .add_single_entry = iter_add_single_branch_entry,
814 .next_entry = iter_next_branch_entry,
815 .add_next_entry = iter_add_next_branch_entry,
816 .finish_entry = iter_finish_branch_entry,
817};
818
819const struct hist_iter_ops hist_iter_normal = {
820 .prepare_entry = iter_prepare_normal_entry,
821 .add_single_entry = iter_add_single_normal_entry,
822 .next_entry = iter_next_nop_entry,
823 .add_next_entry = iter_add_next_nop_entry,
824 .finish_entry = iter_finish_normal_entry,
825};
826
827const struct hist_iter_ops hist_iter_cumulative = {
828 .prepare_entry = iter_prepare_cumulative_entry,
829 .add_single_entry = iter_add_single_cumulative_entry,
830 .next_entry = iter_next_cumulative_entry,
831 .add_next_entry = iter_add_next_cumulative_entry,
832 .finish_entry = iter_finish_cumulative_entry,
833};
834
835int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
836 struct perf_evsel *evsel, struct perf_sample *sample,
837 int max_stack_depth, void *arg)
838{
839 int err, err2;
840
841 err = sample__resolve_callchain(sample, &iter->parent, evsel, al,
842 max_stack_depth);
843 if (err)
844 return err;
845
846 iter->evsel = evsel;
847 iter->sample = sample;
848
849 err = iter->ops->prepare_entry(iter, al);
850 if (err)
851 goto out;
852
853 err = iter->ops->add_single_entry(iter, al);
854 if (err)
855 goto out;
856
857 if (iter->he && iter->add_entry_cb) {
858 err = iter->add_entry_cb(iter, al, true, arg);
859 if (err)
860 goto out;
861 }
862
863 while (iter->ops->next_entry(iter, al)) {
864 err = iter->ops->add_next_entry(iter, al);
865 if (err)
866 break;
867
868 if (iter->he && iter->add_entry_cb) {
869 err = iter->add_entry_cb(iter, al, false, arg);
870 if (err)
871 goto out;
872 }
873 }
874
875out:
876 err2 = iter->ops->finish_entry(iter, al);
877 if (!err)
878 err = err2;
879
880 return err;
430} 881}
431 882
432int64_t 883int64_t
@@ -469,6 +920,7 @@ void hist_entry__free(struct hist_entry *he)
469{ 920{
470 zfree(&he->branch_info); 921 zfree(&he->branch_info);
471 zfree(&he->mem_info); 922 zfree(&he->mem_info);
923 zfree(&he->stat_acc);
472 free_srcline(he->srcline); 924 free_srcline(he->srcline);
473 free(he); 925 free(he);
474} 926}
@@ -494,6 +946,8 @@ static bool hists__collapse_insert_entry(struct hists *hists __maybe_unused,
494 946
495 if (!cmp) { 947 if (!cmp) {
496 he_stat__add_stat(&iter->stat, &he->stat); 948 he_stat__add_stat(&iter->stat, &he->stat);
949 if (symbol_conf.cumulate_callchain)
950 he_stat__add_stat(iter->stat_acc, he->stat_acc);
497 951
498 if (symbol_conf.use_callchain) { 952 if (symbol_conf.use_callchain) {
499 callchain_cursor_reset(&callchain_cursor); 953 callchain_cursor_reset(&callchain_cursor);
@@ -800,6 +1254,13 @@ void hists__inc_nr_events(struct hists *hists, u32 type)
800 events_stats__inc(&hists->stats, type); 1254 events_stats__inc(&hists->stats, type);
801} 1255}
802 1256
1257void hists__inc_nr_samples(struct hists *hists, bool filtered)
1258{
1259 events_stats__inc(&hists->stats, PERF_RECORD_SAMPLE);
1260 if (!filtered)
1261 hists->stats.nr_non_filtered_samples++;
1262}
1263
803static struct hist_entry *hists__add_dummy_entry(struct hists *hists, 1264static struct hist_entry *hists__add_dummy_entry(struct hists *hists,
804 struct hist_entry *pair) 1265 struct hist_entry *pair)
805{ 1266{
@@ -831,7 +1292,7 @@ static struct hist_entry *hists__add_dummy_entry(struct hists *hists,
831 p = &(*p)->rb_right; 1292 p = &(*p)->rb_right;
832 } 1293 }
833 1294
834 he = hist_entry__new(pair); 1295 he = hist_entry__new(pair, true);
835 if (he) { 1296 if (he) {
836 memset(&he->stat, 0, sizeof(he->stat)); 1297 memset(&he->stat, 0, sizeof(he->stat));
837 he->hists = hists; 1298 he->hists = hists;
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index a8418d19808d..d2bf03575d5f 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -96,12 +96,50 @@ struct hists {
96 u16 col_len[HISTC_NR_COLS]; 96 u16 col_len[HISTC_NR_COLS];
97}; 97};
98 98
99struct hist_entry_iter;
100
101struct hist_iter_ops {
102 int (*prepare_entry)(struct hist_entry_iter *, struct addr_location *);
103 int (*add_single_entry)(struct hist_entry_iter *, struct addr_location *);
104 int (*next_entry)(struct hist_entry_iter *, struct addr_location *);
105 int (*add_next_entry)(struct hist_entry_iter *, struct addr_location *);
106 int (*finish_entry)(struct hist_entry_iter *, struct addr_location *);
107};
108
109struct hist_entry_iter {
110 int total;
111 int curr;
112
113 bool hide_unresolved;
114
115 struct perf_evsel *evsel;
116 struct perf_sample *sample;
117 struct hist_entry *he;
118 struct symbol *parent;
119 void *priv;
120
121 const struct hist_iter_ops *ops;
122 /* user-defined callback function (optional) */
123 int (*add_entry_cb)(struct hist_entry_iter *iter,
124 struct addr_location *al, bool single, void *arg);
125};
126
127extern const struct hist_iter_ops hist_iter_normal;
128extern const struct hist_iter_ops hist_iter_branch;
129extern const struct hist_iter_ops hist_iter_mem;
130extern const struct hist_iter_ops hist_iter_cumulative;
131
99struct hist_entry *__hists__add_entry(struct hists *hists, 132struct hist_entry *__hists__add_entry(struct hists *hists,
100 struct addr_location *al, 133 struct addr_location *al,
101 struct symbol *parent, 134 struct symbol *parent,
102 struct branch_info *bi, 135 struct branch_info *bi,
103 struct mem_info *mi, u64 period, 136 struct mem_info *mi, u64 period,
104 u64 weight, u64 transaction); 137 u64 weight, u64 transaction,
138 bool sample_self);
139int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
140 struct perf_evsel *evsel, struct perf_sample *sample,
141 int max_stack_depth, void *arg);
142
105int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right); 143int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
106int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right); 144int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right);
107int hist_entry__transaction_len(void); 145int hist_entry__transaction_len(void);
@@ -119,6 +157,7 @@ u64 hists__total_period(struct hists *hists);
119void hists__reset_stats(struct hists *hists); 157void hists__reset_stats(struct hists *hists);
120void hists__inc_stats(struct hists *hists, struct hist_entry *h); 158void hists__inc_stats(struct hists *hists, struct hist_entry *h);
121void hists__inc_nr_events(struct hists *hists, u32 type); 159void hists__inc_nr_events(struct hists *hists, u32 type);
160void hists__inc_nr_samples(struct hists *hists, bool filtered);
122void events_stats__inc(struct events_stats *stats, u32 type); 161void events_stats__inc(struct events_stats *stats, u32 type);
123size_t events_stats__fprintf(struct events_stats *stats, FILE *fp); 162size_t events_stats__fprintf(struct events_stats *stats, FILE *fp);
124 163
@@ -166,6 +205,7 @@ struct perf_hpp_fmt {
166 205
167 struct list_head list; 206 struct list_head list;
168 struct list_head sort_list; 207 struct list_head sort_list;
208 bool elide;
169}; 209};
170 210
171extern struct list_head perf_hpp__list; 211extern struct list_head perf_hpp__list;
@@ -192,6 +232,7 @@ enum {
192 PERF_HPP__OVERHEAD_US, 232 PERF_HPP__OVERHEAD_US,
193 PERF_HPP__OVERHEAD_GUEST_SYS, 233 PERF_HPP__OVERHEAD_GUEST_SYS,
194 PERF_HPP__OVERHEAD_GUEST_US, 234 PERF_HPP__OVERHEAD_GUEST_US,
235 PERF_HPP__OVERHEAD_ACC,
195 PERF_HPP__SAMPLES, 236 PERF_HPP__SAMPLES,
196 PERF_HPP__PERIOD, 237 PERF_HPP__PERIOD,
197 238
@@ -200,7 +241,11 @@ enum {
200 241
201void perf_hpp__init(void); 242void perf_hpp__init(void);
202void perf_hpp__column_register(struct perf_hpp_fmt *format); 243void perf_hpp__column_register(struct perf_hpp_fmt *format);
244void perf_hpp__column_unregister(struct perf_hpp_fmt *format);
203void perf_hpp__column_enable(unsigned col); 245void perf_hpp__column_enable(unsigned col);
246void perf_hpp__column_disable(unsigned col);
247void perf_hpp__cancel_cumulate(void);
248
204void perf_hpp__register_sort_field(struct perf_hpp_fmt *format); 249void perf_hpp__register_sort_field(struct perf_hpp_fmt *format);
205void perf_hpp__setup_output_field(void); 250void perf_hpp__setup_output_field(void);
206void perf_hpp__reset_output_field(void); 251void perf_hpp__reset_output_field(void);
@@ -208,7 +253,12 @@ void perf_hpp__append_sort_keys(void);
208 253
209bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format); 254bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format);
210bool perf_hpp__same_sort_entry(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b); 255bool perf_hpp__same_sort_entry(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b);
211bool perf_hpp__should_skip(struct perf_hpp_fmt *format); 256
257static inline bool perf_hpp__should_skip(struct perf_hpp_fmt *format)
258{
259 return format->elide;
260}
261
212void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists); 262void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists);
213 263
214typedef u64 (*hpp_field_fn)(struct hist_entry *he); 264typedef u64 (*hpp_field_fn)(struct hist_entry *he);
@@ -218,6 +268,9 @@ typedef int (*hpp_snprint_fn)(struct perf_hpp *hpp, const char *fmt, ...);
218int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he, 268int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
219 hpp_field_fn get_field, const char *fmt, 269 hpp_field_fn get_field, const char *fmt,
220 hpp_snprint_fn print_fn, bool fmt_percent); 270 hpp_snprint_fn print_fn, bool fmt_percent);
271int __hpp__fmt_acc(struct perf_hpp *hpp, struct hist_entry *he,
272 hpp_field_fn get_field, const char *fmt,
273 hpp_snprint_fn print_fn, bool fmt_percent);
221 274
222static inline void advance_hpp(struct perf_hpp *hpp, int inc) 275static inline void advance_hpp(struct perf_hpp *hpp, int inc)
223{ 276{
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 901b9bece2ee..45512baaab67 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -1061,6 +1061,7 @@ static struct hpp_dimension hpp_sort_dimensions[] = {
1061 DIM(PERF_HPP__OVERHEAD_US, "overhead_us"), 1061 DIM(PERF_HPP__OVERHEAD_US, "overhead_us"),
1062 DIM(PERF_HPP__OVERHEAD_GUEST_SYS, "overhead_guest_sys"), 1062 DIM(PERF_HPP__OVERHEAD_GUEST_SYS, "overhead_guest_sys"),
1063 DIM(PERF_HPP__OVERHEAD_GUEST_US, "overhead_guest_us"), 1063 DIM(PERF_HPP__OVERHEAD_GUEST_US, "overhead_guest_us"),
1064 DIM(PERF_HPP__OVERHEAD_ACC, "overhead_children"),
1064 DIM(PERF_HPP__SAMPLES, "sample"), 1065 DIM(PERF_HPP__SAMPLES, "sample"),
1065 DIM(PERF_HPP__PERIOD, "period"), 1066 DIM(PERF_HPP__PERIOD, "period"),
1066}; 1067};
@@ -1156,6 +1157,7 @@ __sort_dimension__alloc_hpp(struct sort_dimension *sd)
1156 1157
1157 INIT_LIST_HEAD(&hse->hpp.list); 1158 INIT_LIST_HEAD(&hse->hpp.list);
1158 INIT_LIST_HEAD(&hse->hpp.sort_list); 1159 INIT_LIST_HEAD(&hse->hpp.sort_list);
1160 hse->hpp.elide = false;
1159 1161
1160 return hse; 1162 return hse;
1161} 1163}
@@ -1363,27 +1365,64 @@ static int __setup_sorting(void)
1363 return ret; 1365 return ret;
1364} 1366}
1365 1367
1366bool perf_hpp__should_skip(struct perf_hpp_fmt *format) 1368void perf_hpp__set_elide(int idx, bool elide)
1367{ 1369{
1368 if (perf_hpp__is_sort_entry(format)) { 1370 struct perf_hpp_fmt *fmt;
1369 struct hpp_sort_entry *hse; 1371 struct hpp_sort_entry *hse;
1372
1373 perf_hpp__for_each_format(fmt) {
1374 if (!perf_hpp__is_sort_entry(fmt))
1375 continue;
1370 1376
1371 hse = container_of(format, struct hpp_sort_entry, hpp); 1377 hse = container_of(fmt, struct hpp_sort_entry, hpp);
1372 return hse->se->elide; 1378 if (hse->se->se_width_idx == idx) {
1379 fmt->elide = elide;
1380 break;
1381 }
1373 } 1382 }
1374 return false;
1375} 1383}
1376 1384
1377static void sort_entry__setup_elide(struct sort_entry *se, 1385static bool __get_elide(struct strlist *list, const char *list_name, FILE *fp)
1378 struct strlist *list,
1379 const char *list_name, FILE *fp)
1380{ 1386{
1381 if (list && strlist__nr_entries(list) == 1) { 1387 if (list && strlist__nr_entries(list) == 1) {
1382 if (fp != NULL) 1388 if (fp != NULL)
1383 fprintf(fp, "# %s: %s\n", list_name, 1389 fprintf(fp, "# %s: %s\n", list_name,
1384 strlist__entry(list, 0)->s); 1390 strlist__entry(list, 0)->s);
1385 se->elide = true; 1391 return true;
1386 } 1392 }
1393 return false;
1394}
1395
1396static bool get_elide(int idx, FILE *output)
1397{
1398 switch (idx) {
1399 case HISTC_SYMBOL:
1400 return __get_elide(symbol_conf.sym_list, "symbol", output);
1401 case HISTC_DSO:
1402 return __get_elide(symbol_conf.dso_list, "dso", output);
1403 case HISTC_COMM:
1404 return __get_elide(symbol_conf.comm_list, "comm", output);
1405 default:
1406 break;
1407 }
1408
1409 if (sort__mode != SORT_MODE__BRANCH)
1410 return false;
1411
1412 switch (idx) {
1413 case HISTC_SYMBOL_FROM:
1414 return __get_elide(symbol_conf.sym_from_list, "sym_from", output);
1415 case HISTC_SYMBOL_TO:
1416 return __get_elide(symbol_conf.sym_to_list, "sym_to", output);
1417 case HISTC_DSO_FROM:
1418 return __get_elide(symbol_conf.dso_from_list, "dso_from", output);
1419 case HISTC_DSO_TO:
1420 return __get_elide(symbol_conf.dso_to_list, "dso_to", output);
1421 default:
1422 break;
1423 }
1424
1425 return false;
1387} 1426}
1388 1427
1389void sort__setup_elide(FILE *output) 1428void sort__setup_elide(FILE *output)
@@ -1391,39 +1430,12 @@ void sort__setup_elide(FILE *output)
1391 struct perf_hpp_fmt *fmt; 1430 struct perf_hpp_fmt *fmt;
1392 struct hpp_sort_entry *hse; 1431 struct hpp_sort_entry *hse;
1393 1432
1394 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, 1433 perf_hpp__for_each_format(fmt) {
1395 "dso", output); 1434 if (!perf_hpp__is_sort_entry(fmt))
1396 sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, 1435 continue;
1397 "comm", output); 1436
1398 sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, 1437 hse = container_of(fmt, struct hpp_sort_entry, hpp);
1399 "symbol", output); 1438 fmt->elide = get_elide(hse->se->se_width_idx, output);
1400
1401 if (sort__mode == SORT_MODE__BRANCH) {
1402 sort_entry__setup_elide(&sort_dso_from,
1403 symbol_conf.dso_from_list,
1404 "dso_from", output);
1405 sort_entry__setup_elide(&sort_dso_to,
1406 symbol_conf.dso_to_list,
1407 "dso_to", output);
1408 sort_entry__setup_elide(&sort_sym_from,
1409 symbol_conf.sym_from_list,
1410 "sym_from", output);
1411 sort_entry__setup_elide(&sort_sym_to,
1412 symbol_conf.sym_to_list,
1413 "sym_to", output);
1414 } else if (sort__mode == SORT_MODE__MEMORY) {
1415 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list,
1416 "symbol_daddr", output);
1417 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list,
1418 "dso_daddr", output);
1419 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list,
1420 "mem", output);
1421 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list,
1422 "local_weight", output);
1423 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list,
1424 "tlb", output);
1425 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list,
1426 "snoop", output);
1427 } 1439 }
1428 1440
1429 /* 1441 /*
@@ -1434,8 +1446,7 @@ void sort__setup_elide(FILE *output)
1434 if (!perf_hpp__is_sort_entry(fmt)) 1446 if (!perf_hpp__is_sort_entry(fmt))
1435 continue; 1447 continue;
1436 1448
1437 hse = container_of(fmt, struct hpp_sort_entry, hpp); 1449 if (!fmt->elide)
1438 if (!hse->se->elide)
1439 return; 1450 return;
1440 } 1451 }
1441 1452
@@ -1443,8 +1454,7 @@ void sort__setup_elide(FILE *output)
1443 if (!perf_hpp__is_sort_entry(fmt)) 1454 if (!perf_hpp__is_sort_entry(fmt))
1444 continue; 1455 continue;
1445 1456
1446 hse = container_of(fmt, struct hpp_sort_entry, hpp); 1457 fmt->elide = false;
1447 hse->se->elide = false;
1448 } 1458 }
1449} 1459}
1450 1460
@@ -1581,6 +1591,9 @@ void reset_output_field(void)
1581 sort__has_sym = 0; 1591 sort__has_sym = 0;
1582 sort__has_dso = 0; 1592 sort__has_dso = 0;
1583 1593
1594 field_order = NULL;
1595 sort_order = NULL;
1596
1584 reset_dimensions(); 1597 reset_dimensions();
1585 perf_hpp__reset_output_field(); 1598 perf_hpp__reset_output_field();
1586} 1599}
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 5f38d925e92f..5bf0098d6b06 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -20,7 +20,7 @@
20 20
21#include "parse-options.h" 21#include "parse-options.h"
22#include "parse-events.h" 22#include "parse-events.h"
23 23#include "hist.h"
24#include "thread.h" 24#include "thread.h"
25 25
26extern regex_t parent_regex; 26extern regex_t parent_regex;
@@ -82,6 +82,7 @@ struct hist_entry {
82 struct list_head head; 82 struct list_head head;
83 } pairs; 83 } pairs;
84 struct he_stat stat; 84 struct he_stat stat;
85 struct he_stat *stat_acc;
85 struct map_symbol ms; 86 struct map_symbol ms;
86 struct thread *thread; 87 struct thread *thread;
87 struct comm *comm; 88 struct comm *comm;
@@ -130,6 +131,21 @@ static inline void hist_entry__add_pair(struct hist_entry *pair,
130 list_add_tail(&pair->pairs.node, &he->pairs.head); 131 list_add_tail(&pair->pairs.node, &he->pairs.head);
131} 132}
132 133
134static inline float hist_entry__get_percent_limit(struct hist_entry *he)
135{
136 u64 period = he->stat.period;
137 u64 total_period = hists__total_period(he->hists);
138
139 if (unlikely(total_period == 0))
140 return 0;
141
142 if (symbol_conf.cumulate_callchain)
143 period = he->stat_acc->period;
144
145 return period * 100.0 / total_period;
146}
147
148
133enum sort_mode { 149enum sort_mode {
134 SORT_MODE__NORMAL, 150 SORT_MODE__NORMAL,
135 SORT_MODE__BRANCH, 151 SORT_MODE__BRANCH,
@@ -186,7 +202,6 @@ struct sort_entry {
186 int (*se_snprintf)(struct hist_entry *he, char *bf, size_t size, 202 int (*se_snprintf)(struct hist_entry *he, char *bf, size_t size,
187 unsigned int width); 203 unsigned int width);
188 u8 se_width_idx; 204 u8 se_width_idx;
189 bool elide;
190}; 205};
191 206
192extern struct sort_entry sort_thread; 207extern struct sort_entry sort_thread;
@@ -197,6 +212,7 @@ int setup_output_field(void);
197void reset_output_field(void); 212void reset_output_field(void);
198extern int sort_dimension__add(const char *); 213extern int sort_dimension__add(const char *);
199void sort__setup_elide(FILE *fp); 214void sort__setup_elide(FILE *fp);
215void perf_hpp__set_elide(int idx, bool elide);
200 216
201int report_parse_ignore_callees_opt(const struct option *opt, const char *arg, int unset); 217int report_parse_ignore_callees_opt(const struct option *opt, const char *arg, int unset);
202 218
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 95e249779931..7b9096f29cdb 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -29,11 +29,12 @@ int vmlinux_path__nr_entries;
29char **vmlinux_path; 29char **vmlinux_path;
30 30
31struct symbol_conf symbol_conf = { 31struct symbol_conf symbol_conf = {
32 .use_modules = true, 32 .use_modules = true,
33 .try_vmlinux_path = true, 33 .try_vmlinux_path = true,
34 .annotate_src = true, 34 .annotate_src = true,
35 .demangle = true, 35 .demangle = true,
36 .symfs = "", 36 .cumulate_callchain = true,
37 .symfs = "",
37}; 38};
38 39
39static enum dso_binary_type binary_type_symtab[] = { 40static enum dso_binary_type binary_type_symtab[] = {
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 33ede53fa6b9..615c752dd767 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -109,6 +109,7 @@ struct symbol_conf {
109 show_nr_samples, 109 show_nr_samples,
110 show_total_period, 110 show_total_period,
111 use_callchain, 111 use_callchain,
112 cumulate_callchain,
112 exclude_other, 113 exclude_other,
113 show_cpu_utilization, 114 show_cpu_utilization,
114 initialized, 115 initialized,