aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-05-22 21:18:55 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-05-22 21:18:55 -0400
commit2ff2b289a695807e291e1ed9f639d8a3ba5f4254 (patch)
treee4b7f44e5cc1582ba2be8aeba221f4841f4c86a6 /arch
parent88d6ae8dc33af12fe1c7941b1fae2767374046fd (diff)
parent73787190d04a34e6da745da893b3ae8bedde418f (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf changes from Ingo Molnar: "Lots of changes: - (much) improved assembly annotation support in perf report, with jump visualization, searching, navigation, visual output improvements and more. - kernel support for AMD IBS PMU hardware features. Notably 'perf record -e cycles:p' and 'perf top -e cycles:p' should work without skid now, like PEBS does on the Intel side, because it takes advantage of IBS transparently. - the libtracevents library: it is the first step towards unifying tracing tooling and perf, and it also gives a tracing library for external tools like powertop to rely on. - infrastructure: various improvements and refactoring of the UI modules and related code - infrastructure: cleanup and simplification of the profiling targets code (--uid, --pid, --tid, --cpu, --all-cpus, etc.) - tons of robustness fixes all around - various ftrace updates: speedups, cleanups, robustness improvements. - typing 'make' in tools/ will now give you a menu of projects to build and a short help text to explain what each does. - ... and lots of other changes I forgot to list. The perf record make bzImage + perf report regression you reported should be fixed." * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (166 commits) tracing: Remove kernel_lock annotations tracing: Fix initial buffer_size_kb state ring-buffer: Merge separate resize loops perf evsel: Create events initially disabled -- again perf tools: Split term type into value type and term type perf hists: Fix callchain ip printf format perf target: Add uses_mmap field ftrace: Remove selecting FRAME_POINTER with FUNCTION_TRACER ftrace/x86: Have x86 ftrace use the ftrace_modify_all_code() ftrace: Make ftrace_modify_all_code() global for archs to use ftrace: Return record ip addr for ftrace_location() ftrace: Consolidate ftrace_location() and ftrace_text_reserved() ftrace: Speed up search by skipping pages by address ftrace: Remove extra helper functions ftrace: Sort all function addresses, not just per page tracing: change CPU ring buffer state from tracing_cpumask tracing: Check return value of tracing_dentry_percpu() ring-buffer: Reset head page before running self test ring-buffer: Add integrity check at end of iter read ring-buffer: Make addition of pages in ring buffer atomic ...
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/kernel/perf_event.c3
-rw-r--r--arch/arm/configs/bcmring_defconfig2
-rw-r--r--arch/arm/kernel/perf_event_v6.c4
-rw-r--r--arch/arm/kernel/perf_event_v7.c4
-rw-r--r--arch/arm/kernel/perf_event_xscale.c8
-rw-r--r--arch/mips/kernel/perf_event_mipsxx.c2
-rw-r--r--arch/powerpc/configs/chroma_defconfig2
-rw-r--r--arch/powerpc/configs/gamecube_defconfig2
-rw-r--r--arch/powerpc/configs/wii_defconfig2
-rw-r--r--arch/powerpc/perf/core-book3s.c3
-rw-r--r--arch/powerpc/perf/core-fsl-emb.c3
-rw-r--r--arch/sh/configs/sh7785lcr_32bit_defconfig2
-rw-r--r--arch/sparc/configs/sparc64_defconfig2
-rw-r--r--arch/sparc/kernel/perf_event.c4
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/ftrace.h3
-rw-r--r--arch/x86/include/asm/msr-index.h5
-rw-r--r--arch/x86/include/asm/perf_event.h12
-rw-r--r--arch/x86/kernel/cpu/perf_event.c7
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c11
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_ibs.c570
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c6
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c6
-rw-r--r--arch/x86/kernel/ftrace.c500
-rw-r--r--arch/x86/kernel/nmi.c10
-rw-r--r--arch/x86/kernel/traps.c8
27 files changed, 954 insertions, 232 deletions
diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c
index 0dae252f7a33..d821b17047e0 100644
--- a/arch/alpha/kernel/perf_event.c
+++ b/arch/alpha/kernel/perf_event.c
@@ -824,7 +824,6 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
824 824
825 idx = la_ptr; 825 idx = la_ptr;
826 826
827 perf_sample_data_init(&data, 0);
828 for (j = 0; j < cpuc->n_events; j++) { 827 for (j = 0; j < cpuc->n_events; j++) {
829 if (cpuc->current_idx[j] == idx) 828 if (cpuc->current_idx[j] == idx)
830 break; 829 break;
@@ -848,7 +847,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
848 847
849 hwc = &event->hw; 848 hwc = &event->hw;
850 alpha_perf_event_update(event, hwc, idx, alpha_pmu->pmc_max_period[idx]+1); 849 alpha_perf_event_update(event, hwc, idx, alpha_pmu->pmc_max_period[idx]+1);
851 data.period = event->hw.last_period; 850 perf_sample_data_init(&data, 0, hwc->last_period);
852 851
853 if (alpha_perf_event_set_period(event, hwc, idx)) { 852 if (alpha_perf_event_set_period(event, hwc, idx)) {
854 if (perf_event_overflow(event, &data, regs)) { 853 if (perf_event_overflow(event, &data, regs)) {
diff --git a/arch/arm/configs/bcmring_defconfig b/arch/arm/configs/bcmring_defconfig
index 795374d48f81..9e6a8fe13164 100644
--- a/arch/arm/configs/bcmring_defconfig
+++ b/arch/arm/configs/bcmring_defconfig
@@ -11,7 +11,7 @@ CONFIG_KALLSYMS_EXTRA_PASS=y
11# CONFIG_TIMERFD is not set 11# CONFIG_TIMERFD is not set
12# CONFIG_EVENTFD is not set 12# CONFIG_EVENTFD is not set
13# CONFIG_AIO is not set 13# CONFIG_AIO is not set
14CONFIG_PERF_COUNTERS=y 14CONFIG_PERF_EVENTS=y
15# CONFIG_VM_EVENT_COUNTERS is not set 15# CONFIG_VM_EVENT_COUNTERS is not set
16# CONFIG_SLUB_DEBUG is not set 16# CONFIG_SLUB_DEBUG is not set
17# CONFIG_COMPAT_BRK is not set 17# CONFIG_COMPAT_BRK is not set
diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c
index b78af0cc6ef3..ab627a740fa3 100644
--- a/arch/arm/kernel/perf_event_v6.c
+++ b/arch/arm/kernel/perf_event_v6.c
@@ -489,8 +489,6 @@ armv6pmu_handle_irq(int irq_num,
489 */ 489 */
490 armv6_pmcr_write(pmcr); 490 armv6_pmcr_write(pmcr);
491 491
492 perf_sample_data_init(&data, 0);
493
494 cpuc = &__get_cpu_var(cpu_hw_events); 492 cpuc = &__get_cpu_var(cpu_hw_events);
495 for (idx = 0; idx < cpu_pmu->num_events; ++idx) { 493 for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
496 struct perf_event *event = cpuc->events[idx]; 494 struct perf_event *event = cpuc->events[idx];
@@ -509,7 +507,7 @@ armv6pmu_handle_irq(int irq_num,
509 507
510 hwc = &event->hw; 508 hwc = &event->hw;
511 armpmu_event_update(event, hwc, idx); 509 armpmu_event_update(event, hwc, idx);
512 data.period = event->hw.last_period; 510 perf_sample_data_init(&data, 0, hwc->last_period);
513 if (!armpmu_event_set_period(event, hwc, idx)) 511 if (!armpmu_event_set_period(event, hwc, idx))
514 continue; 512 continue;
515 513
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index 00755d82e2f2..d3c536068162 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -1077,8 +1077,6 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
1077 */ 1077 */
1078 regs = get_irq_regs(); 1078 regs = get_irq_regs();
1079 1079
1080 perf_sample_data_init(&data, 0);
1081
1082 cpuc = &__get_cpu_var(cpu_hw_events); 1080 cpuc = &__get_cpu_var(cpu_hw_events);
1083 for (idx = 0; idx < cpu_pmu->num_events; ++idx) { 1081 for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
1084 struct perf_event *event = cpuc->events[idx]; 1082 struct perf_event *event = cpuc->events[idx];
@@ -1097,7 +1095,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
1097 1095
1098 hwc = &event->hw; 1096 hwc = &event->hw;
1099 armpmu_event_update(event, hwc, idx); 1097 armpmu_event_update(event, hwc, idx);
1100 data.period = event->hw.last_period; 1098 perf_sample_data_init(&data, 0, hwc->last_period);
1101 if (!armpmu_event_set_period(event, hwc, idx)) 1099 if (!armpmu_event_set_period(event, hwc, idx))
1102 continue; 1100 continue;
1103 1101
diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c
index 71a21e6712f5..e34e7254e652 100644
--- a/arch/arm/kernel/perf_event_xscale.c
+++ b/arch/arm/kernel/perf_event_xscale.c
@@ -248,8 +248,6 @@ xscale1pmu_handle_irq(int irq_num, void *dev)
248 248
249 regs = get_irq_regs(); 249 regs = get_irq_regs();
250 250
251 perf_sample_data_init(&data, 0);
252
253 cpuc = &__get_cpu_var(cpu_hw_events); 251 cpuc = &__get_cpu_var(cpu_hw_events);
254 for (idx = 0; idx < cpu_pmu->num_events; ++idx) { 252 for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
255 struct perf_event *event = cpuc->events[idx]; 253 struct perf_event *event = cpuc->events[idx];
@@ -263,7 +261,7 @@ xscale1pmu_handle_irq(int irq_num, void *dev)
263 261
264 hwc = &event->hw; 262 hwc = &event->hw;
265 armpmu_event_update(event, hwc, idx); 263 armpmu_event_update(event, hwc, idx);
266 data.period = event->hw.last_period; 264 perf_sample_data_init(&data, 0, hwc->last_period);
267 if (!armpmu_event_set_period(event, hwc, idx)) 265 if (!armpmu_event_set_period(event, hwc, idx))
268 continue; 266 continue;
269 267
@@ -588,8 +586,6 @@ xscale2pmu_handle_irq(int irq_num, void *dev)
588 586
589 regs = get_irq_regs(); 587 regs = get_irq_regs();
590 588
591 perf_sample_data_init(&data, 0);
592
593 cpuc = &__get_cpu_var(cpu_hw_events); 589 cpuc = &__get_cpu_var(cpu_hw_events);
594 for (idx = 0; idx < cpu_pmu->num_events; ++idx) { 590 for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
595 struct perf_event *event = cpuc->events[idx]; 591 struct perf_event *event = cpuc->events[idx];
@@ -603,7 +599,7 @@ xscale2pmu_handle_irq(int irq_num, void *dev)
603 599
604 hwc = &event->hw; 600 hwc = &event->hw;
605 armpmu_event_update(event, hwc, idx); 601 armpmu_event_update(event, hwc, idx);
606 data.period = event->hw.last_period; 602 perf_sample_data_init(&data, 0, hwc->last_period);
607 if (!armpmu_event_set_period(event, hwc, idx)) 603 if (!armpmu_event_set_period(event, hwc, idx))
608 continue; 604 continue;
609 605
diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c
index 811084f4e422..ab73fa2fb9b5 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -1325,7 +1325,7 @@ static int mipsxx_pmu_handle_shared_irq(void)
1325 1325
1326 regs = get_irq_regs(); 1326 regs = get_irq_regs();
1327 1327
1328 perf_sample_data_init(&data, 0); 1328 perf_sample_data_init(&data, 0, 0);
1329 1329
1330 switch (counters) { 1330 switch (counters) {
1331#define HANDLE_COUNTER(n) \ 1331#define HANDLE_COUNTER(n) \
diff --git a/arch/powerpc/configs/chroma_defconfig b/arch/powerpc/configs/chroma_defconfig
index f104ccde6b53..b1f9597fe312 100644
--- a/arch/powerpc/configs/chroma_defconfig
+++ b/arch/powerpc/configs/chroma_defconfig
@@ -32,7 +32,7 @@ CONFIG_RD_LZMA=y
32CONFIG_INITRAMFS_COMPRESSION_GZIP=y 32CONFIG_INITRAMFS_COMPRESSION_GZIP=y
33CONFIG_KALLSYMS_ALL=y 33CONFIG_KALLSYMS_ALL=y
34CONFIG_EMBEDDED=y 34CONFIG_EMBEDDED=y
35CONFIG_PERF_COUNTERS=y 35CONFIG_PERF_EVENTS=y
36CONFIG_PROFILING=y 36CONFIG_PROFILING=y
37CONFIG_OPROFILE=y 37CONFIG_OPROFILE=y
38CONFIG_KPROBES=y 38CONFIG_KPROBES=y
diff --git a/arch/powerpc/configs/gamecube_defconfig b/arch/powerpc/configs/gamecube_defconfig
index e74d3a483705..9ef2cc13e1b4 100644
--- a/arch/powerpc/configs/gamecube_defconfig
+++ b/arch/powerpc/configs/gamecube_defconfig
@@ -8,7 +8,7 @@ CONFIG_BLK_DEV_INITRD=y
8# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set 8# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
9CONFIG_EXPERT=y 9CONFIG_EXPERT=y
10# CONFIG_ELF_CORE is not set 10# CONFIG_ELF_CORE is not set
11CONFIG_PERF_COUNTERS=y 11CONFIG_PERF_EVENTS=y
12# CONFIG_VM_EVENT_COUNTERS is not set 12# CONFIG_VM_EVENT_COUNTERS is not set
13CONFIG_SLAB=y 13CONFIG_SLAB=y
14CONFIG_MODULES=y 14CONFIG_MODULES=y
diff --git a/arch/powerpc/configs/wii_defconfig b/arch/powerpc/configs/wii_defconfig
index 175295fbf4f3..1e2b7d062aa4 100644
--- a/arch/powerpc/configs/wii_defconfig
+++ b/arch/powerpc/configs/wii_defconfig
@@ -9,7 +9,7 @@ CONFIG_BLK_DEV_INITRD=y
9# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set 9# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
10CONFIG_EXPERT=y 10CONFIG_EXPERT=y
11# CONFIG_ELF_CORE is not set 11# CONFIG_ELF_CORE is not set
12CONFIG_PERF_COUNTERS=y 12CONFIG_PERF_EVENTS=y
13# CONFIG_VM_EVENT_COUNTERS is not set 13# CONFIG_VM_EVENT_COUNTERS is not set
14CONFIG_SLAB=y 14CONFIG_SLAB=y
15CONFIG_MODULES=y 15CONFIG_MODULES=y
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 02aee03e713c..8f84bcba18da 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -1299,8 +1299,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
1299 if (record) { 1299 if (record) {
1300 struct perf_sample_data data; 1300 struct perf_sample_data data;
1301 1301
1302 perf_sample_data_init(&data, ~0ULL); 1302 perf_sample_data_init(&data, ~0ULL, event->hw.last_period);
1303 data.period = event->hw.last_period;
1304 1303
1305 if (event->attr.sample_type & PERF_SAMPLE_ADDR) 1304 if (event->attr.sample_type & PERF_SAMPLE_ADDR)
1306 perf_get_data_addr(regs, &data.addr); 1305 perf_get_data_addr(regs, &data.addr);
diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.c
index 0a6d2a9d569c..106c53354675 100644
--- a/arch/powerpc/perf/core-fsl-emb.c
+++ b/arch/powerpc/perf/core-fsl-emb.c
@@ -613,8 +613,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
613 if (record) { 613 if (record) {
614 struct perf_sample_data data; 614 struct perf_sample_data data;
615 615
616 perf_sample_data_init(&data, 0); 616 perf_sample_data_init(&data, 0, event->hw.last_period);
617 data.period = event->hw.last_period;
618 617
619 if (perf_event_overflow(event, &data, regs)) 618 if (perf_event_overflow(event, &data, regs))
620 fsl_emb_pmu_stop(event, 0); 619 fsl_emb_pmu_stop(event, 0);
diff --git a/arch/sh/configs/sh7785lcr_32bit_defconfig b/arch/sh/configs/sh7785lcr_32bit_defconfig
index 7b9c696ac5e0..9bdcf72ec06a 100644
--- a/arch/sh/configs/sh7785lcr_32bit_defconfig
+++ b/arch/sh/configs/sh7785lcr_32bit_defconfig
@@ -5,7 +5,7 @@ CONFIG_BSD_PROCESS_ACCT=y
5CONFIG_IKCONFIG=y 5CONFIG_IKCONFIG=y
6CONFIG_IKCONFIG_PROC=y 6CONFIG_IKCONFIG_PROC=y
7CONFIG_LOG_BUF_SHIFT=16 7CONFIG_LOG_BUF_SHIFT=16
8CONFIG_PERF_COUNTERS=y 8CONFIG_PERF_EVENTS=y
9# CONFIG_COMPAT_BRK is not set 9# CONFIG_COMPAT_BRK is not set
10CONFIG_SLAB=y 10CONFIG_SLAB=y
11CONFIG_PROFILING=y 11CONFIG_PROFILING=y
diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig
index 3c1e85807403..9d8521b8c854 100644
--- a/arch/sparc/configs/sparc64_defconfig
+++ b/arch/sparc/configs/sparc64_defconfig
@@ -5,7 +5,7 @@ CONFIG_SYSVIPC=y
5CONFIG_POSIX_MQUEUE=y 5CONFIG_POSIX_MQUEUE=y
6CONFIG_LOG_BUF_SHIFT=18 6CONFIG_LOG_BUF_SHIFT=18
7CONFIG_BLK_DEV_INITRD=y 7CONFIG_BLK_DEV_INITRD=y
8CONFIG_PERF_COUNTERS=y 8CONFIG_PERF_EVENTS=y
9# CONFIG_COMPAT_BRK is not set 9# CONFIG_COMPAT_BRK is not set
10CONFIG_SLAB=y 10CONFIG_SLAB=y
11CONFIG_PROFILING=y 11CONFIG_PROFILING=y
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 28559ce5eeb5..5713957dcb8a 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -1296,8 +1296,6 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
1296 1296
1297 regs = args->regs; 1297 regs = args->regs;
1298 1298
1299 perf_sample_data_init(&data, 0);
1300
1301 cpuc = &__get_cpu_var(cpu_hw_events); 1299 cpuc = &__get_cpu_var(cpu_hw_events);
1302 1300
1303 /* If the PMU has the TOE IRQ enable bits, we need to do a 1301 /* If the PMU has the TOE IRQ enable bits, we need to do a
@@ -1321,7 +1319,7 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
1321 if (val & (1ULL << 31)) 1319 if (val & (1ULL << 31))
1322 continue; 1320 continue;
1323 1321
1324 data.period = event->hw.last_period; 1322 perf_sample_data_init(&data, 0, hwc->last_period);
1325 if (!sparc_perf_event_set_period(event, hwc, idx)) 1323 if (!sparc_perf_event_set_period(event, hwc, idx))
1326 continue; 1324 continue;
1327 1325
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2787fbec7aed..7b383d8da7b9 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -40,7 +40,6 @@ config X86
40 select HAVE_FUNCTION_GRAPH_TRACER 40 select HAVE_FUNCTION_GRAPH_TRACER
41 select HAVE_FUNCTION_GRAPH_FP_TEST 41 select HAVE_FUNCTION_GRAPH_FP_TEST
42 select HAVE_FUNCTION_TRACE_MCOUNT_TEST 42 select HAVE_FUNCTION_TRACE_MCOUNT_TEST
43 select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE
44 select HAVE_SYSCALL_TRACEPOINTS 43 select HAVE_SYSCALL_TRACEPOINTS
45 select HAVE_KVM 44 select HAVE_KVM
46 select HAVE_ARCH_KGDB 45 select HAVE_ARCH_KGDB
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 268c783ab1c0..18d9005d9e4f 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -34,6 +34,7 @@
34 34
35#ifndef __ASSEMBLY__ 35#ifndef __ASSEMBLY__
36extern void mcount(void); 36extern void mcount(void);
37extern int modifying_ftrace_code;
37 38
38static inline unsigned long ftrace_call_adjust(unsigned long addr) 39static inline unsigned long ftrace_call_adjust(unsigned long addr)
39{ 40{
@@ -50,6 +51,8 @@ struct dyn_arch_ftrace {
50 /* No extra data needed for x86 */ 51 /* No extra data needed for x86 */
51}; 52};
52 53
54int ftrace_int3_handler(struct pt_regs *regs);
55
53#endif /* CONFIG_DYNAMIC_FTRACE */ 56#endif /* CONFIG_DYNAMIC_FTRACE */
54#endif /* __ASSEMBLY__ */ 57#endif /* __ASSEMBLY__ */
55#endif /* CONFIG_FUNCTION_TRACER */ 58#endif /* CONFIG_FUNCTION_TRACER */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index ccb805966f68..957ec87385af 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -134,6 +134,8 @@
134#define MSR_AMD64_IBSFETCHCTL 0xc0011030 134#define MSR_AMD64_IBSFETCHCTL 0xc0011030
135#define MSR_AMD64_IBSFETCHLINAD 0xc0011031 135#define MSR_AMD64_IBSFETCHLINAD 0xc0011031
136#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 136#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032
137#define MSR_AMD64_IBSFETCH_REG_COUNT 3
138#define MSR_AMD64_IBSFETCH_REG_MASK ((1UL<<MSR_AMD64_IBSFETCH_REG_COUNT)-1)
137#define MSR_AMD64_IBSOPCTL 0xc0011033 139#define MSR_AMD64_IBSOPCTL 0xc0011033
138#define MSR_AMD64_IBSOPRIP 0xc0011034 140#define MSR_AMD64_IBSOPRIP 0xc0011034
139#define MSR_AMD64_IBSOPDATA 0xc0011035 141#define MSR_AMD64_IBSOPDATA 0xc0011035
@@ -141,8 +143,11 @@
141#define MSR_AMD64_IBSOPDATA3 0xc0011037 143#define MSR_AMD64_IBSOPDATA3 0xc0011037
142#define MSR_AMD64_IBSDCLINAD 0xc0011038 144#define MSR_AMD64_IBSDCLINAD 0xc0011038
143#define MSR_AMD64_IBSDCPHYSAD 0xc0011039 145#define MSR_AMD64_IBSDCPHYSAD 0xc0011039
146#define MSR_AMD64_IBSOP_REG_COUNT 7
147#define MSR_AMD64_IBSOP_REG_MASK ((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1)
144#define MSR_AMD64_IBSCTL 0xc001103a 148#define MSR_AMD64_IBSCTL 0xc001103a
145#define MSR_AMD64_IBSBRTARGET 0xc001103b 149#define MSR_AMD64_IBSBRTARGET 0xc001103b
150#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
146 151
147/* Fam 15h MSRs */ 152/* Fam 15h MSRs */
148#define MSR_F15H_PERF_CTL 0xc0010200 153#define MSR_F15H_PERF_CTL 0xc0010200
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 2291895b1836..588f52ea810e 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -158,6 +158,7 @@ struct x86_pmu_capability {
158#define IBS_CAPS_OPCNT (1U<<4) 158#define IBS_CAPS_OPCNT (1U<<4)
159#define IBS_CAPS_BRNTRGT (1U<<5) 159#define IBS_CAPS_BRNTRGT (1U<<5)
160#define IBS_CAPS_OPCNTEXT (1U<<6) 160#define IBS_CAPS_OPCNTEXT (1U<<6)
161#define IBS_CAPS_RIPINVALIDCHK (1U<<7)
161 162
162#define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \ 163#define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \
163 | IBS_CAPS_FETCHSAM \ 164 | IBS_CAPS_FETCHSAM \
@@ -170,21 +171,28 @@ struct x86_pmu_capability {
170#define IBSCTL_LVT_OFFSET_VALID (1ULL<<8) 171#define IBSCTL_LVT_OFFSET_VALID (1ULL<<8)
171#define IBSCTL_LVT_OFFSET_MASK 0x0F 172#define IBSCTL_LVT_OFFSET_MASK 0x0F
172 173
173/* IbsFetchCtl bits/masks */ 174/* ibs fetch bits/masks */
174#define IBS_FETCH_RAND_EN (1ULL<<57) 175#define IBS_FETCH_RAND_EN (1ULL<<57)
175#define IBS_FETCH_VAL (1ULL<<49) 176#define IBS_FETCH_VAL (1ULL<<49)
176#define IBS_FETCH_ENABLE (1ULL<<48) 177#define IBS_FETCH_ENABLE (1ULL<<48)
177#define IBS_FETCH_CNT 0xFFFF0000ULL 178#define IBS_FETCH_CNT 0xFFFF0000ULL
178#define IBS_FETCH_MAX_CNT 0x0000FFFFULL 179#define IBS_FETCH_MAX_CNT 0x0000FFFFULL
179 180
180/* IbsOpCtl bits */ 181/* ibs op bits/masks */
182/* lower 4 bits of the current count are ignored: */
183#define IBS_OP_CUR_CNT (0xFFFF0ULL<<32)
181#define IBS_OP_CNT_CTL (1ULL<<19) 184#define IBS_OP_CNT_CTL (1ULL<<19)
182#define IBS_OP_VAL (1ULL<<18) 185#define IBS_OP_VAL (1ULL<<18)
183#define IBS_OP_ENABLE (1ULL<<17) 186#define IBS_OP_ENABLE (1ULL<<17)
184#define IBS_OP_MAX_CNT 0x0000FFFFULL 187#define IBS_OP_MAX_CNT 0x0000FFFFULL
185#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */ 188#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */
189#define IBS_RIP_INVALID (1ULL<<38)
186 190
191#ifdef CONFIG_X86_LOCAL_APIC
187extern u32 get_ibs_caps(void); 192extern u32 get_ibs_caps(void);
193#else
194static inline u32 get_ibs_caps(void) { return 0; }
195#endif
188 196
189#ifdef CONFIG_PERF_EVENTS 197#ifdef CONFIG_PERF_EVENTS
190extern void perf_events_lapic_init(void); 198extern void perf_events_lapic_init(void);
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index bb8e03407e18..e049d6da0183 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -484,9 +484,6 @@ static int __x86_pmu_event_init(struct perf_event *event)
484 484
485 /* mark unused */ 485 /* mark unused */
486 event->hw.extra_reg.idx = EXTRA_REG_NONE; 486 event->hw.extra_reg.idx = EXTRA_REG_NONE;
487
488 /* mark not used */
489 event->hw.extra_reg.idx = EXTRA_REG_NONE;
490 event->hw.branch_reg.idx = EXTRA_REG_NONE; 487 event->hw.branch_reg.idx = EXTRA_REG_NONE;
491 488
492 return x86_pmu.hw_config(event); 489 return x86_pmu.hw_config(event);
@@ -1186,8 +1183,6 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
1186 int idx, handled = 0; 1183 int idx, handled = 0;
1187 u64 val; 1184 u64 val;
1188 1185
1189 perf_sample_data_init(&data, 0);
1190
1191 cpuc = &__get_cpu_var(cpu_hw_events); 1186 cpuc = &__get_cpu_var(cpu_hw_events);
1192 1187
1193 /* 1188 /*
@@ -1222,7 +1217,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
1222 * event overflow 1217 * event overflow
1223 */ 1218 */
1224 handled++; 1219 handled++;
1225 data.period = event->hw.last_period; 1220 perf_sample_data_init(&data, 0, event->hw.last_period);
1226 1221
1227 if (!x86_perf_event_set_period(event)) 1222 if (!x86_perf_event_set_period(event))
1228 continue; 1223 continue;
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 95e7fe1c5f0b..65652265fffd 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -134,8 +134,13 @@ static u64 amd_pmu_event_map(int hw_event)
134 134
135static int amd_pmu_hw_config(struct perf_event *event) 135static int amd_pmu_hw_config(struct perf_event *event)
136{ 136{
137 int ret = x86_pmu_hw_config(event); 137 int ret;
138 138
139 /* pass precise event sampling to ibs: */
140 if (event->attr.precise_ip && get_ibs_caps())
141 return -ENOENT;
142
143 ret = x86_pmu_hw_config(event);
139 if (ret) 144 if (ret)
140 return ret; 145 return ret;
141 146
@@ -205,10 +210,8 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
205 * when we come here 210 * when we come here
206 */ 211 */
207 for (i = 0; i < x86_pmu.num_counters; i++) { 212 for (i = 0; i < x86_pmu.num_counters; i++) {
208 if (nb->owners[i] == event) { 213 if (cmpxchg(nb->owners + i, event, NULL) == event)
209 cmpxchg(nb->owners+i, event, NULL);
210 break; 214 break;
211 }
212 } 215 }
213} 216}
214 217
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index 3b8a2d30d14e..da9bcdcd9856 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -9,6 +9,7 @@
9#include <linux/perf_event.h> 9#include <linux/perf_event.h>
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/pci.h> 11#include <linux/pci.h>
12#include <linux/ptrace.h>
12 13
13#include <asm/apic.h> 14#include <asm/apic.h>
14 15
@@ -16,36 +17,591 @@ static u32 ibs_caps;
16 17
17#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) 18#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
18 19
19static struct pmu perf_ibs; 20#include <linux/kprobes.h>
21#include <linux/hardirq.h>
22
23#include <asm/nmi.h>
24
25#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
26#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT
27
28enum ibs_states {
29 IBS_ENABLED = 0,
30 IBS_STARTED = 1,
31 IBS_STOPPING = 2,
32
33 IBS_MAX_STATES,
34};
35
36struct cpu_perf_ibs {
37 struct perf_event *event;
38 unsigned long state[BITS_TO_LONGS(IBS_MAX_STATES)];
39};
40
41struct perf_ibs {
42 struct pmu pmu;
43 unsigned int msr;
44 u64 config_mask;
45 u64 cnt_mask;
46 u64 enable_mask;
47 u64 valid_mask;
48 u64 max_period;
49 unsigned long offset_mask[1];
50 int offset_max;
51 struct cpu_perf_ibs __percpu *pcpu;
52 u64 (*get_count)(u64 config);
53};
54
55struct perf_ibs_data {
56 u32 size;
57 union {
58 u32 data[0]; /* data buffer starts here */
59 u32 caps;
60 };
61 u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX];
62};
63
64static int
65perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period)
66{
67 s64 left = local64_read(&hwc->period_left);
68 s64 period = hwc->sample_period;
69 int overflow = 0;
70
71 /*
72 * If we are way outside a reasonable range then just skip forward:
73 */
74 if (unlikely(left <= -period)) {
75 left = period;
76 local64_set(&hwc->period_left, left);
77 hwc->last_period = period;
78 overflow = 1;
79 }
80
81 if (unlikely(left < (s64)min)) {
82 left += period;
83 local64_set(&hwc->period_left, left);
84 hwc->last_period = period;
85 overflow = 1;
86 }
87
88 /*
89 * If the hw period that triggers the sw overflow is too short
90 * we might hit the irq handler. This biases the results.
91 * Thus we shorten the next-to-last period and set the last
92 * period to the max period.
93 */
94 if (left > max) {
95 left -= max;
96 if (left > max)
97 left = max;
98 else if (left < min)
99 left = min;
100 }
101
102 *hw_period = (u64)left;
103
104 return overflow;
105}
106
107static int
108perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width)
109{
110 struct hw_perf_event *hwc = &event->hw;
111 int shift = 64 - width;
112 u64 prev_raw_count;
113 u64 delta;
114
115 /*
116 * Careful: an NMI might modify the previous event value.
117 *
118 * Our tactic to handle this is to first atomically read and
119 * exchange a new raw count - then add that new-prev delta
120 * count to the generic event atomically:
121 */
122 prev_raw_count = local64_read(&hwc->prev_count);
123 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
124 new_raw_count) != prev_raw_count)
125 return 0;
126
127 /*
128 * Now we have the new raw value and have updated the prev
129 * timestamp already. We can now calculate the elapsed delta
130 * (event-)time and add that to the generic event.
131 *
132 * Careful, not all hw sign-extends above the physical width
133 * of the count.
134 */
135 delta = (new_raw_count << shift) - (prev_raw_count << shift);
136 delta >>= shift;
137
138 local64_add(delta, &event->count);
139 local64_sub(delta, &hwc->period_left);
140
141 return 1;
142}
143
144static struct perf_ibs perf_ibs_fetch;
145static struct perf_ibs perf_ibs_op;
146
147static struct perf_ibs *get_ibs_pmu(int type)
148{
149 if (perf_ibs_fetch.pmu.type == type)
150 return &perf_ibs_fetch;
151 if (perf_ibs_op.pmu.type == type)
152 return &perf_ibs_op;
153 return NULL;
154}
155
156/*
157 * Use IBS for precise event sampling:
158 *
159 * perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count
160 * perf record -a -e r076:p ... # same as -e cpu-cycles:p
161 * perf record -a -e r0C1:p ... # use ibs op counting micro-ops
162 *
163 * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl,
164 * MSRC001_1033) is used to select either cycle or micro-ops counting
165 * mode.
166 *
167 * The rip of IBS samples has skid 0. Thus, IBS supports precise
168 * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the
169 * rip is invalid when IBS was not able to record the rip correctly.
170 * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then.
171 *
172 */
173static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
174{
175 switch (event->attr.precise_ip) {
176 case 0:
177 return -ENOENT;
178 case 1:
179 case 2:
180 break;
181 default:
182 return -EOPNOTSUPP;
183 }
184
185 switch (event->attr.type) {
186 case PERF_TYPE_HARDWARE:
187 switch (event->attr.config) {
188 case PERF_COUNT_HW_CPU_CYCLES:
189 *config = 0;
190 return 0;
191 }
192 break;
193 case PERF_TYPE_RAW:
194 switch (event->attr.config) {
195 case 0x0076:
196 *config = 0;
197 return 0;
198 case 0x00C1:
199 *config = IBS_OP_CNT_CTL;
200 return 0;
201 }
202 break;
203 default:
204 return -ENOENT;
205 }
206
207 return -EOPNOTSUPP;
208}
20 209
21static int perf_ibs_init(struct perf_event *event) 210static int perf_ibs_init(struct perf_event *event)
22{ 211{
23 if (perf_ibs.type != event->attr.type) 212 struct hw_perf_event *hwc = &event->hw;
213 struct perf_ibs *perf_ibs;
214 u64 max_cnt, config;
215 int ret;
216
217 perf_ibs = get_ibs_pmu(event->attr.type);
218 if (perf_ibs) {
219 config = event->attr.config;
220 } else {
221 perf_ibs = &perf_ibs_op;
222 ret = perf_ibs_precise_event(event, &config);
223 if (ret)
224 return ret;
225 }
226
227 if (event->pmu != &perf_ibs->pmu)
24 return -ENOENT; 228 return -ENOENT;
229
230 if (config & ~perf_ibs->config_mask)
231 return -EINVAL;
232
233 if (hwc->sample_period) {
234 if (config & perf_ibs->cnt_mask)
235 /* raw max_cnt may not be set */
236 return -EINVAL;
237 if (!event->attr.sample_freq && hwc->sample_period & 0x0f)
238 /*
239 * lower 4 bits can not be set in ibs max cnt,
240 * but allowing it in case we adjust the
241 * sample period to set a frequency.
242 */
243 return -EINVAL;
244 hwc->sample_period &= ~0x0FULL;
245 if (!hwc->sample_period)
246 hwc->sample_period = 0x10;
247 } else {
248 max_cnt = config & perf_ibs->cnt_mask;
249 config &= ~perf_ibs->cnt_mask;
250 event->attr.sample_period = max_cnt << 4;
251 hwc->sample_period = event->attr.sample_period;
252 }
253
254 if (!hwc->sample_period)
255 return -EINVAL;
256
257 /*
258 * If we modify hwc->sample_period, we also need to update
259 * hwc->last_period and hwc->period_left.
260 */
261 hwc->last_period = hwc->sample_period;
262 local64_set(&hwc->period_left, hwc->sample_period);
263
264 hwc->config_base = perf_ibs->msr;
265 hwc->config = config;
266
25 return 0; 267 return 0;
26} 268}
27 269
270static int perf_ibs_set_period(struct perf_ibs *perf_ibs,
271 struct hw_perf_event *hwc, u64 *period)
272{
273 int overflow;
274
275 /* ignore lower 4 bits in min count: */
276 overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period);
277 local64_set(&hwc->prev_count, 0);
278
279 return overflow;
280}
281
282static u64 get_ibs_fetch_count(u64 config)
283{
284 return (config & IBS_FETCH_CNT) >> 12;
285}
286
287static u64 get_ibs_op_count(u64 config)
288{
289 u64 count = 0;
290
291 if (config & IBS_OP_VAL)
292 count += (config & IBS_OP_MAX_CNT) << 4; /* cnt rolled over */
293
294 if (ibs_caps & IBS_CAPS_RDWROPCNT)
295 count += (config & IBS_OP_CUR_CNT) >> 32;
296
297 return count;
298}
299
300static void
301perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event,
302 u64 *config)
303{
304 u64 count = perf_ibs->get_count(*config);
305
306 /*
307 * Set width to 64 since we do not overflow on max width but
308 * instead on max count. In perf_ibs_set_period() we clear
309 * prev count manually on overflow.
310 */
311 while (!perf_event_try_update(event, count, 64)) {
312 rdmsrl(event->hw.config_base, *config);
313 count = perf_ibs->get_count(*config);
314 }
315}
316
317static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
318 struct hw_perf_event *hwc, u64 config)
319{
320 wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask);
321}
322
323/*
324 * Erratum #420 Instruction-Based Sampling Engine May Generate
325 * Interrupt that Cannot Be Cleared:
326 *
327 * Must clear counter mask first, then clear the enable bit. See
328 * Revision Guide for AMD Family 10h Processors, Publication #41322.
329 */
330static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs,
331 struct hw_perf_event *hwc, u64 config)
332{
333 config &= ~perf_ibs->cnt_mask;
334 wrmsrl(hwc->config_base, config);
335 config &= ~perf_ibs->enable_mask;
336 wrmsrl(hwc->config_base, config);
337}
338
339/*
340 * We cannot restore the ibs pmu state, so we always needs to update
341 * the event while stopping it and then reset the state when starting
342 * again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in
343 * perf_ibs_start()/perf_ibs_stop() and instead always do it.
344 */
345static void perf_ibs_start(struct perf_event *event, int flags)
346{
347 struct hw_perf_event *hwc = &event->hw;
348 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
349 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
350 u64 period;
351
352 if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
353 return;
354
355 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
356 hwc->state = 0;
357
358 perf_ibs_set_period(perf_ibs, hwc, &period);
359 set_bit(IBS_STARTED, pcpu->state);
360 perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
361
362 perf_event_update_userpage(event);
363}
364
365static void perf_ibs_stop(struct perf_event *event, int flags)
366{
367 struct hw_perf_event *hwc = &event->hw;
368 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
369 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
370 u64 config;
371 int stopping;
372
373 stopping = test_and_clear_bit(IBS_STARTED, pcpu->state);
374
375 if (!stopping && (hwc->state & PERF_HES_UPTODATE))
376 return;
377
378 rdmsrl(hwc->config_base, config);
379
380 if (stopping) {
381 set_bit(IBS_STOPPING, pcpu->state);
382 perf_ibs_disable_event(perf_ibs, hwc, config);
383 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
384 hwc->state |= PERF_HES_STOPPED;
385 }
386
387 if (hwc->state & PERF_HES_UPTODATE)
388 return;
389
390 /*
391 * Clear valid bit to not count rollovers on update, rollovers
392 * are only updated in the irq handler.
393 */
394 config &= ~perf_ibs->valid_mask;
395
396 perf_ibs_event_update(perf_ibs, event, &config);
397 hwc->state |= PERF_HES_UPTODATE;
398}
399
28static int perf_ibs_add(struct perf_event *event, int flags) 400static int perf_ibs_add(struct perf_event *event, int flags)
29{ 401{
402 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
403 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
404
405 if (test_and_set_bit(IBS_ENABLED, pcpu->state))
406 return -ENOSPC;
407
408 event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
409
410 pcpu->event = event;
411
412 if (flags & PERF_EF_START)
413 perf_ibs_start(event, PERF_EF_RELOAD);
414
30 return 0; 415 return 0;
31} 416}
32 417
33static void perf_ibs_del(struct perf_event *event, int flags) 418static void perf_ibs_del(struct perf_event *event, int flags)
34{ 419{
420 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
421 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
422
423 if (!test_and_clear_bit(IBS_ENABLED, pcpu->state))
424 return;
425
426 perf_ibs_stop(event, PERF_EF_UPDATE);
427
428 pcpu->event = NULL;
429
430 perf_event_update_userpage(event);
35} 431}
36 432
37static struct pmu perf_ibs = { 433static void perf_ibs_read(struct perf_event *event) { }
38 .event_init= perf_ibs_init, 434
39 .add= perf_ibs_add, 435static struct perf_ibs perf_ibs_fetch = {
40 .del= perf_ibs_del, 436 .pmu = {
437 .task_ctx_nr = perf_invalid_context,
438
439 .event_init = perf_ibs_init,
440 .add = perf_ibs_add,
441 .del = perf_ibs_del,
442 .start = perf_ibs_start,
443 .stop = perf_ibs_stop,
444 .read = perf_ibs_read,
445 },
446 .msr = MSR_AMD64_IBSFETCHCTL,
447 .config_mask = IBS_FETCH_CONFIG_MASK,
448 .cnt_mask = IBS_FETCH_MAX_CNT,
449 .enable_mask = IBS_FETCH_ENABLE,
450 .valid_mask = IBS_FETCH_VAL,
451 .max_period = IBS_FETCH_MAX_CNT << 4,
452 .offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK },
453 .offset_max = MSR_AMD64_IBSFETCH_REG_COUNT,
454
455 .get_count = get_ibs_fetch_count,
41}; 456};
42 457
458static struct perf_ibs perf_ibs_op = {
459 .pmu = {
460 .task_ctx_nr = perf_invalid_context,
461
462 .event_init = perf_ibs_init,
463 .add = perf_ibs_add,
464 .del = perf_ibs_del,
465 .start = perf_ibs_start,
466 .stop = perf_ibs_stop,
467 .read = perf_ibs_read,
468 },
469 .msr = MSR_AMD64_IBSOPCTL,
470 .config_mask = IBS_OP_CONFIG_MASK,
471 .cnt_mask = IBS_OP_MAX_CNT,
472 .enable_mask = IBS_OP_ENABLE,
473 .valid_mask = IBS_OP_VAL,
474 .max_period = IBS_OP_MAX_CNT << 4,
475 .offset_mask = { MSR_AMD64_IBSOP_REG_MASK },
476 .offset_max = MSR_AMD64_IBSOP_REG_COUNT,
477
478 .get_count = get_ibs_op_count,
479};
480
481static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
482{
483 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
484 struct perf_event *event = pcpu->event;
485 struct hw_perf_event *hwc = &event->hw;
486 struct perf_sample_data data;
487 struct perf_raw_record raw;
488 struct pt_regs regs;
489 struct perf_ibs_data ibs_data;
490 int offset, size, check_rip, offset_max, throttle = 0;
491 unsigned int msr;
492 u64 *buf, *config, period;
493
494 if (!test_bit(IBS_STARTED, pcpu->state)) {
495 /*
496 * Catch spurious interrupts after stopping IBS: After
497 * disabling IBS there could be still incomming NMIs
498 * with samples that even have the valid bit cleared.
499 * Mark all this NMIs as handled.
500 */
501 return test_and_clear_bit(IBS_STOPPING, pcpu->state) ? 1 : 0;
502 }
503
504 msr = hwc->config_base;
505 buf = ibs_data.regs;
506 rdmsrl(msr, *buf);
507 if (!(*buf++ & perf_ibs->valid_mask))
508 return 0;
509
510 config = &ibs_data.regs[0];
511 perf_ibs_event_update(perf_ibs, event, config);
512 perf_sample_data_init(&data, 0, hwc->last_period);
513 if (!perf_ibs_set_period(perf_ibs, hwc, &period))
514 goto out; /* no sw counter overflow */
515
516 ibs_data.caps = ibs_caps;
517 size = 1;
518 offset = 1;
519 check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK));
520 if (event->attr.sample_type & PERF_SAMPLE_RAW)
521 offset_max = perf_ibs->offset_max;
522 else if (check_rip)
523 offset_max = 2;
524 else
525 offset_max = 1;
526 do {
527 rdmsrl(msr + offset, *buf++);
528 size++;
529 offset = find_next_bit(perf_ibs->offset_mask,
530 perf_ibs->offset_max,
531 offset + 1);
532 } while (offset < offset_max);
533 ibs_data.size = sizeof(u64) * size;
534
535 regs = *iregs;
536 if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) {
537 regs.flags &= ~PERF_EFLAGS_EXACT;
538 } else {
539 instruction_pointer_set(&regs, ibs_data.regs[1]);
540 regs.flags |= PERF_EFLAGS_EXACT;
541 }
542
543 if (event->attr.sample_type & PERF_SAMPLE_RAW) {
544 raw.size = sizeof(u32) + ibs_data.size;
545 raw.data = ibs_data.data;
546 data.raw = &raw;
547 }
548
549 throttle = perf_event_overflow(event, &data, &regs);
550out:
551 if (throttle)
552 perf_ibs_disable_event(perf_ibs, hwc, *config);
553 else
554 perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
555
556 perf_event_update_userpage(event);
557
558 return 1;
559}
560
561static int __kprobes
562perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs)
563{
564 int handled = 0;
565
566 handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs);
567 handled += perf_ibs_handle_irq(&perf_ibs_op, regs);
568
569 if (handled)
570 inc_irq_stat(apic_perf_irqs);
571
572 return handled;
573}
574
575static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
576{
577 struct cpu_perf_ibs __percpu *pcpu;
578 int ret;
579
580 pcpu = alloc_percpu(struct cpu_perf_ibs);
581 if (!pcpu)
582 return -ENOMEM;
583
584 perf_ibs->pcpu = pcpu;
585
586 ret = perf_pmu_register(&perf_ibs->pmu, name, -1);
587 if (ret) {
588 perf_ibs->pcpu = NULL;
589 free_percpu(pcpu);
590 }
591
592 return ret;
593}
594
43static __init int perf_event_ibs_init(void) 595static __init int perf_event_ibs_init(void)
44{ 596{
45 if (!ibs_caps) 597 if (!ibs_caps)
46 return -ENODEV; /* ibs not supported by the cpu */ 598 return -ENODEV; /* ibs not supported by the cpu */
47 599
48 perf_pmu_register(&perf_ibs, "ibs", -1); 600 perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
601 if (ibs_caps & IBS_CAPS_OPCNT)
602 perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
603 perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
604 register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
49 printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps); 605 printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps);
50 606
51 return 0; 607 return 0;
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 26b3e2fef104..166546ec6aef 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1027,8 +1027,6 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
1027 u64 status; 1027 u64 status;
1028 int handled; 1028 int handled;
1029 1029
1030 perf_sample_data_init(&data, 0);
1031
1032 cpuc = &__get_cpu_var(cpu_hw_events); 1030 cpuc = &__get_cpu_var(cpu_hw_events);
1033 1031
1034 /* 1032 /*
@@ -1082,7 +1080,7 @@ again:
1082 if (!intel_pmu_save_and_restart(event)) 1080 if (!intel_pmu_save_and_restart(event))
1083 continue; 1081 continue;
1084 1082
1085 data.period = event->hw.last_period; 1083 perf_sample_data_init(&data, 0, event->hw.last_period);
1086 1084
1087 if (has_branch_stack(event)) 1085 if (has_branch_stack(event))
1088 data.br_stack = &cpuc->lbr_stack; 1086 data.br_stack = &cpuc->lbr_stack;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 7f64df19e7dd..5a3edc27f6e5 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -316,8 +316,7 @@ int intel_pmu_drain_bts_buffer(void)
316 316
317 ds->bts_index = ds->bts_buffer_base; 317 ds->bts_index = ds->bts_buffer_base;
318 318
319 perf_sample_data_init(&data, 0); 319 perf_sample_data_init(&data, 0, event->hw.last_period);
320 data.period = event->hw.last_period;
321 regs.ip = 0; 320 regs.ip = 0;
322 321
323 /* 322 /*
@@ -564,8 +563,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
564 if (!intel_pmu_save_and_restart(event)) 563 if (!intel_pmu_save_and_restart(event))
565 return; 564 return;
566 565
567 perf_sample_data_init(&data, 0); 566 perf_sample_data_init(&data, 0, event->hw.last_period);
568 data.period = event->hw.last_period;
569 567
570 /* 568 /*
571 * We use the interrupt regs as a base because the PEBS record 569 * We use the interrupt regs as a base because the PEBS record
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index a2dfacfd7103..47124a73dd73 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -1005,8 +1005,6 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
1005 int idx, handled = 0; 1005 int idx, handled = 0;
1006 u64 val; 1006 u64 val;
1007 1007
1008 perf_sample_data_init(&data, 0);
1009
1010 cpuc = &__get_cpu_var(cpu_hw_events); 1008 cpuc = &__get_cpu_var(cpu_hw_events);
1011 1009
1012 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 1010 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
@@ -1034,10 +1032,12 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
1034 handled += overflow; 1032 handled += overflow;
1035 1033
1036 /* event overflow for sure */ 1034 /* event overflow for sure */
1037 data.period = event->hw.last_period; 1035 perf_sample_data_init(&data, 0, hwc->last_period);
1038 1036
1039 if (!x86_perf_event_set_period(event)) 1037 if (!x86_perf_event_set_period(event))
1040 continue; 1038 continue;
1039
1040
1041 if (perf_event_overflow(event, &data, regs)) 1041 if (perf_event_overflow(event, &data, regs))
1042 x86_pmu_stop(event, 0); 1042 x86_pmu_stop(event, 0);
1043 } 1043 }
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index c9a281f272fd..32ff36596ab1 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -24,40 +24,21 @@
24#include <trace/syscall.h> 24#include <trace/syscall.h>
25 25
26#include <asm/cacheflush.h> 26#include <asm/cacheflush.h>
27#include <asm/kprobes.h>
27#include <asm/ftrace.h> 28#include <asm/ftrace.h>
28#include <asm/nops.h> 29#include <asm/nops.h>
29#include <asm/nmi.h>
30
31 30
32#ifdef CONFIG_DYNAMIC_FTRACE 31#ifdef CONFIG_DYNAMIC_FTRACE
33 32
34/*
35 * modifying_code is set to notify NMIs that they need to use
36 * memory barriers when entering or exiting. But we don't want
37 * to burden NMIs with unnecessary memory barriers when code
38 * modification is not being done (which is most of the time).
39 *
40 * A mutex is already held when ftrace_arch_code_modify_prepare
41 * and post_process are called. No locks need to be taken here.
42 *
43 * Stop machine will make sure currently running NMIs are done
44 * and new NMIs will see the updated variable before we need
45 * to worry about NMIs doing memory barriers.
46 */
47static int modifying_code __read_mostly;
48static DEFINE_PER_CPU(int, save_modifying_code);
49
50int ftrace_arch_code_modify_prepare(void) 33int ftrace_arch_code_modify_prepare(void)
51{ 34{
52 set_kernel_text_rw(); 35 set_kernel_text_rw();
53 set_all_modules_text_rw(); 36 set_all_modules_text_rw();
54 modifying_code = 1;
55 return 0; 37 return 0;
56} 38}
57 39
58int ftrace_arch_code_modify_post_process(void) 40int ftrace_arch_code_modify_post_process(void)
59{ 41{
60 modifying_code = 0;
61 set_all_modules_text_ro(); 42 set_all_modules_text_ro();
62 set_kernel_text_ro(); 43 set_kernel_text_ro();
63 return 0; 44 return 0;
@@ -90,134 +71,6 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
90 return calc.code; 71 return calc.code;
91} 72}
92 73
93/*
94 * Modifying code must take extra care. On an SMP machine, if
95 * the code being modified is also being executed on another CPU
96 * that CPU will have undefined results and possibly take a GPF.
97 * We use kstop_machine to stop other CPUS from exectuing code.
98 * But this does not stop NMIs from happening. We still need
99 * to protect against that. We separate out the modification of
100 * the code to take care of this.
101 *
102 * Two buffers are added: An IP buffer and a "code" buffer.
103 *
104 * 1) Put the instruction pointer into the IP buffer
105 * and the new code into the "code" buffer.
106 * 2) Wait for any running NMIs to finish and set a flag that says
107 * we are modifying code, it is done in an atomic operation.
108 * 3) Write the code
109 * 4) clear the flag.
110 * 5) Wait for any running NMIs to finish.
111 *
112 * If an NMI is executed, the first thing it does is to call
113 * "ftrace_nmi_enter". This will check if the flag is set to write
114 * and if it is, it will write what is in the IP and "code" buffers.
115 *
116 * The trick is, it does not matter if everyone is writing the same
117 * content to the code location. Also, if a CPU is executing code
118 * it is OK to write to that code location if the contents being written
119 * are the same as what exists.
120 */
121
122#define MOD_CODE_WRITE_FLAG (1 << 31) /* set when NMI should do the write */
123static atomic_t nmi_running = ATOMIC_INIT(0);
124static int mod_code_status; /* holds return value of text write */
125static void *mod_code_ip; /* holds the IP to write to */
126static const void *mod_code_newcode; /* holds the text to write to the IP */
127
128static unsigned nmi_wait_count;
129static atomic_t nmi_update_count = ATOMIC_INIT(0);
130
131int ftrace_arch_read_dyn_info(char *buf, int size)
132{
133 int r;
134
135 r = snprintf(buf, size, "%u %u",
136 nmi_wait_count,
137 atomic_read(&nmi_update_count));
138 return r;
139}
140
141static void clear_mod_flag(void)
142{
143 int old = atomic_read(&nmi_running);
144
145 for (;;) {
146 int new = old & ~MOD_CODE_WRITE_FLAG;
147
148 if (old == new)
149 break;
150
151 old = atomic_cmpxchg(&nmi_running, old, new);
152 }
153}
154
155static void ftrace_mod_code(void)
156{
157 /*
158 * Yes, more than one CPU process can be writing to mod_code_status.
159 * (and the code itself)
160 * But if one were to fail, then they all should, and if one were
161 * to succeed, then they all should.
162 */
163 mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
164 MCOUNT_INSN_SIZE);
165
166 /* if we fail, then kill any new writers */
167 if (mod_code_status)
168 clear_mod_flag();
169}
170
171void ftrace_nmi_enter(void)
172{
173 __this_cpu_write(save_modifying_code, modifying_code);
174
175 if (!__this_cpu_read(save_modifying_code))
176 return;
177
178 if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {
179 smp_rmb();
180 ftrace_mod_code();
181 atomic_inc(&nmi_update_count);
182 }
183 /* Must have previous changes seen before executions */
184 smp_mb();
185}
186
187void ftrace_nmi_exit(void)
188{
189 if (!__this_cpu_read(save_modifying_code))
190 return;
191
192 /* Finish all executions before clearing nmi_running */
193 smp_mb();
194 atomic_dec(&nmi_running);
195}
196
197static void wait_for_nmi_and_set_mod_flag(void)
198{
199 if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG))
200 return;
201
202 do {
203 cpu_relax();
204 } while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG));
205
206 nmi_wait_count++;
207}
208
209static void wait_for_nmi(void)
210{
211 if (!atomic_read(&nmi_running))
212 return;
213
214 do {
215 cpu_relax();
216 } while (atomic_read(&nmi_running));
217
218 nmi_wait_count++;
219}
220
221static inline int 74static inline int
222within(unsigned long addr, unsigned long start, unsigned long end) 75within(unsigned long addr, unsigned long start, unsigned long end)
223{ 76{
@@ -238,26 +91,7 @@ do_ftrace_mod_code(unsigned long ip, const void *new_code)
238 if (within(ip, (unsigned long)_text, (unsigned long)_etext)) 91 if (within(ip, (unsigned long)_text, (unsigned long)_etext))
239 ip = (unsigned long)__va(__pa(ip)); 92 ip = (unsigned long)__va(__pa(ip));
240 93
241 mod_code_ip = (void *)ip; 94 return probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE);
242 mod_code_newcode = new_code;
243
244 /* The buffers need to be visible before we let NMIs write them */
245 smp_mb();
246
247 wait_for_nmi_and_set_mod_flag();
248
249 /* Make sure all running NMIs have finished before we write the code */
250 smp_mb();
251
252 ftrace_mod_code();
253
254 /* Make sure the write happens before clearing the bit */
255 smp_mb();
256
257 clear_mod_flag();
258 wait_for_nmi();
259
260 return mod_code_status;
261} 95}
262 96
263static const unsigned char *ftrace_nop_replace(void) 97static const unsigned char *ftrace_nop_replace(void)
@@ -334,6 +168,336 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
334 return ret; 168 return ret;
335} 169}
336 170
171int modifying_ftrace_code __read_mostly;
172
173/*
174 * A breakpoint was added to the code address we are about to
175 * modify, and this is the handle that will just skip over it.
176 * We are either changing a nop into a trace call, or a trace
177 * call to a nop. While the change is taking place, we treat
178 * it just like it was a nop.
179 */
180int ftrace_int3_handler(struct pt_regs *regs)
181{
182 if (WARN_ON_ONCE(!regs))
183 return 0;
184
185 if (!ftrace_location(regs->ip - 1))
186 return 0;
187
188 regs->ip += MCOUNT_INSN_SIZE - 1;
189
190 return 1;
191}
192
193static int ftrace_write(unsigned long ip, const char *val, int size)
194{
195 /*
196 * On x86_64, kernel text mappings are mapped read-only with
197 * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead
198 * of the kernel text mapping to modify the kernel text.
199 *
200 * For 32bit kernels, these mappings are same and we can use
201 * kernel identity mapping to modify code.
202 */
203 if (within(ip, (unsigned long)_text, (unsigned long)_etext))
204 ip = (unsigned long)__va(__pa(ip));
205
206 return probe_kernel_write((void *)ip, val, size);
207}
208
209static int add_break(unsigned long ip, const char *old)
210{
211 unsigned char replaced[MCOUNT_INSN_SIZE];
212 unsigned char brk = BREAKPOINT_INSTRUCTION;
213
214 if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
215 return -EFAULT;
216
217 /* Make sure it is what we expect it to be */
218 if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0)
219 return -EINVAL;
220
221 if (ftrace_write(ip, &brk, 1))
222 return -EPERM;
223
224 return 0;
225}
226
227static int add_brk_on_call(struct dyn_ftrace *rec, unsigned long addr)
228{
229 unsigned const char *old;
230 unsigned long ip = rec->ip;
231
232 old = ftrace_call_replace(ip, addr);
233
234 return add_break(rec->ip, old);
235}
236
237
238static int add_brk_on_nop(struct dyn_ftrace *rec)
239{
240 unsigned const char *old;
241
242 old = ftrace_nop_replace();
243
244 return add_break(rec->ip, old);
245}
246
247static int add_breakpoints(struct dyn_ftrace *rec, int enable)
248{
249 unsigned long ftrace_addr;
250 int ret;
251
252 ret = ftrace_test_record(rec, enable);
253
254 ftrace_addr = (unsigned long)FTRACE_ADDR;
255
256 switch (ret) {
257 case FTRACE_UPDATE_IGNORE:
258 return 0;
259
260 case FTRACE_UPDATE_MAKE_CALL:
261 /* converting nop to call */
262 return add_brk_on_nop(rec);
263
264 case FTRACE_UPDATE_MAKE_NOP:
265 /* converting a call to a nop */
266 return add_brk_on_call(rec, ftrace_addr);
267 }
268 return 0;
269}
270
271/*
272 * On error, we need to remove breakpoints. This needs to
273 * be done caefully. If the address does not currently have a
274 * breakpoint, we know we are done. Otherwise, we look at the
275 * remaining 4 bytes of the instruction. If it matches a nop
276 * we replace the breakpoint with the nop. Otherwise we replace
277 * it with the call instruction.
278 */
279static int remove_breakpoint(struct dyn_ftrace *rec)
280{
281 unsigned char ins[MCOUNT_INSN_SIZE];
282 unsigned char brk = BREAKPOINT_INSTRUCTION;
283 const unsigned char *nop;
284 unsigned long ftrace_addr;
285 unsigned long ip = rec->ip;
286
287 /* If we fail the read, just give up */
288 if (probe_kernel_read(ins, (void *)ip, MCOUNT_INSN_SIZE))
289 return -EFAULT;
290
291 /* If this does not have a breakpoint, we are done */
292 if (ins[0] != brk)
293 return -1;
294
295 nop = ftrace_nop_replace();
296
297 /*
298 * If the last 4 bytes of the instruction do not match
299 * a nop, then we assume that this is a call to ftrace_addr.
300 */
301 if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) {
302 /*
303 * For extra paranoidism, we check if the breakpoint is on
304 * a call that would actually jump to the ftrace_addr.
305 * If not, don't touch the breakpoint, we make just create
306 * a disaster.
307 */
308 ftrace_addr = (unsigned long)FTRACE_ADDR;
309 nop = ftrace_call_replace(ip, ftrace_addr);
310
311 if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0)
312 return -EINVAL;
313 }
314
315 return probe_kernel_write((void *)ip, &nop[0], 1);
316}
317
318static int add_update_code(unsigned long ip, unsigned const char *new)
319{
320 /* skip breakpoint */
321 ip++;
322 new++;
323 if (ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1))
324 return -EPERM;
325 return 0;
326}
327
328static int add_update_call(struct dyn_ftrace *rec, unsigned long addr)
329{
330 unsigned long ip = rec->ip;
331 unsigned const char *new;
332
333 new = ftrace_call_replace(ip, addr);
334 return add_update_code(ip, new);
335}
336
337static int add_update_nop(struct dyn_ftrace *rec)
338{
339 unsigned long ip = rec->ip;
340 unsigned const char *new;
341
342 new = ftrace_nop_replace();
343 return add_update_code(ip, new);
344}
345
346static int add_update(struct dyn_ftrace *rec, int enable)
347{
348 unsigned long ftrace_addr;
349 int ret;
350
351 ret = ftrace_test_record(rec, enable);
352
353 ftrace_addr = (unsigned long)FTRACE_ADDR;
354
355 switch (ret) {
356 case FTRACE_UPDATE_IGNORE:
357 return 0;
358
359 case FTRACE_UPDATE_MAKE_CALL:
360 /* converting nop to call */
361 return add_update_call(rec, ftrace_addr);
362
363 case FTRACE_UPDATE_MAKE_NOP:
364 /* converting a call to a nop */
365 return add_update_nop(rec);
366 }
367
368 return 0;
369}
370
371static int finish_update_call(struct dyn_ftrace *rec, unsigned long addr)
372{
373 unsigned long ip = rec->ip;
374 unsigned const char *new;
375
376 new = ftrace_call_replace(ip, addr);
377
378 if (ftrace_write(ip, new, 1))
379 return -EPERM;
380
381 return 0;
382}
383
384static int finish_update_nop(struct dyn_ftrace *rec)
385{
386 unsigned long ip = rec->ip;
387 unsigned const char *new;
388
389 new = ftrace_nop_replace();
390
391 if (ftrace_write(ip, new, 1))
392 return -EPERM;
393 return 0;
394}
395
396static int finish_update(struct dyn_ftrace *rec, int enable)
397{
398 unsigned long ftrace_addr;
399 int ret;
400
401 ret = ftrace_update_record(rec, enable);
402
403 ftrace_addr = (unsigned long)FTRACE_ADDR;
404
405 switch (ret) {
406 case FTRACE_UPDATE_IGNORE:
407 return 0;
408
409 case FTRACE_UPDATE_MAKE_CALL:
410 /* converting nop to call */
411 return finish_update_call(rec, ftrace_addr);
412
413 case FTRACE_UPDATE_MAKE_NOP:
414 /* converting a call to a nop */
415 return finish_update_nop(rec);
416 }
417
418 return 0;
419}
420
421static void do_sync_core(void *data)
422{
423 sync_core();
424}
425
426static void run_sync(void)
427{
428 int enable_irqs = irqs_disabled();
429
430 /* We may be called with interrupts disbled (on bootup). */
431 if (enable_irqs)
432 local_irq_enable();
433 on_each_cpu(do_sync_core, NULL, 1);
434 if (enable_irqs)
435 local_irq_disable();
436}
437
438void ftrace_replace_code(int enable)
439{
440 struct ftrace_rec_iter *iter;
441 struct dyn_ftrace *rec;
442 const char *report = "adding breakpoints";
443 int count = 0;
444 int ret;
445
446 for_ftrace_rec_iter(iter) {
447 rec = ftrace_rec_iter_record(iter);
448
449 ret = add_breakpoints(rec, enable);
450 if (ret)
451 goto remove_breakpoints;
452 count++;
453 }
454
455 run_sync();
456
457 report = "updating code";
458
459 for_ftrace_rec_iter(iter) {
460 rec = ftrace_rec_iter_record(iter);
461
462 ret = add_update(rec, enable);
463 if (ret)
464 goto remove_breakpoints;
465 }
466
467 run_sync();
468
469 report = "removing breakpoints";
470
471 for_ftrace_rec_iter(iter) {
472 rec = ftrace_rec_iter_record(iter);
473
474 ret = finish_update(rec, enable);
475 if (ret)
476 goto remove_breakpoints;
477 }
478
479 run_sync();
480
481 return;
482
483 remove_breakpoints:
484 ftrace_bug(ret, rec ? rec->ip : 0);
485 printk(KERN_WARNING "Failed on %s (%d):\n", report, count);
486 for_ftrace_rec_iter(iter) {
487 rec = ftrace_rec_iter_record(iter);
488 remove_breakpoint(rec);
489 }
490}
491
492void arch_ftrace_update_code(int command)
493{
494 modifying_ftrace_code++;
495
496 ftrace_modify_all_code(command);
497
498 modifying_ftrace_code--;
499}
500
337int __init ftrace_dyn_arch_init(void *data) 501int __init ftrace_dyn_arch_init(void *data)
338{ 502{
339 /* The return code is retured via data */ 503 /* The return code is retured via data */
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 585be4bd71a5..a1faed5ac6a2 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -84,7 +84,7 @@ __setup("unknown_nmi_panic", setup_unknown_nmi_panic);
84 84
85#define nmi_to_desc(type) (&nmi_desc[type]) 85#define nmi_to_desc(type) (&nmi_desc[type])
86 86
87static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) 87static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
88{ 88{
89 struct nmi_desc *desc = nmi_to_desc(type); 89 struct nmi_desc *desc = nmi_to_desc(type);
90 struct nmiaction *a; 90 struct nmiaction *a;
@@ -166,7 +166,7 @@ void unregister_nmi_handler(unsigned int type, const char *name)
166} 166}
167EXPORT_SYMBOL_GPL(unregister_nmi_handler); 167EXPORT_SYMBOL_GPL(unregister_nmi_handler);
168 168
169static notrace __kprobes void 169static __kprobes void
170pci_serr_error(unsigned char reason, struct pt_regs *regs) 170pci_serr_error(unsigned char reason, struct pt_regs *regs)
171{ 171{
172 /* check to see if anyone registered against these types of errors */ 172 /* check to see if anyone registered against these types of errors */
@@ -197,7 +197,7 @@ pci_serr_error(unsigned char reason, struct pt_regs *regs)
197 outb(reason, NMI_REASON_PORT); 197 outb(reason, NMI_REASON_PORT);
198} 198}
199 199
200static notrace __kprobes void 200static __kprobes void
201io_check_error(unsigned char reason, struct pt_regs *regs) 201io_check_error(unsigned char reason, struct pt_regs *regs)
202{ 202{
203 unsigned long i; 203 unsigned long i;
@@ -228,7 +228,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
228 outb(reason, NMI_REASON_PORT); 228 outb(reason, NMI_REASON_PORT);
229} 229}
230 230
231static notrace __kprobes void 231static __kprobes void
232unknown_nmi_error(unsigned char reason, struct pt_regs *regs) 232unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
233{ 233{
234 int handled; 234 int handled;
@@ -270,7 +270,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
270static DEFINE_PER_CPU(bool, swallow_nmi); 270static DEFINE_PER_CPU(bool, swallow_nmi);
271static DEFINE_PER_CPU(unsigned long, last_nmi_rip); 271static DEFINE_PER_CPU(unsigned long, last_nmi_rip);
272 272
273static notrace __kprobes void default_do_nmi(struct pt_regs *regs) 273static __kprobes void default_do_nmi(struct pt_regs *regs)
274{ 274{
275 unsigned char reason = 0; 275 unsigned char reason = 0;
276 int handled; 276 int handled;
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index ff9281f16029..92d5756d85fc 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -50,6 +50,7 @@
50#include <asm/processor.h> 50#include <asm/processor.h>
51#include <asm/debugreg.h> 51#include <asm/debugreg.h>
52#include <linux/atomic.h> 52#include <linux/atomic.h>
53#include <asm/ftrace.h>
53#include <asm/traps.h> 54#include <asm/traps.h>
54#include <asm/desc.h> 55#include <asm/desc.h>
55#include <asm/i387.h> 56#include <asm/i387.h>
@@ -303,8 +304,13 @@ gp_in_kernel:
303} 304}
304 305
305/* May run on IST stack. */ 306/* May run on IST stack. */
306dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) 307dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code)
307{ 308{
309#ifdef CONFIG_DYNAMIC_FTRACE
310 /* ftrace must be first, everything else may cause a recursive crash */
311 if (unlikely(modifying_ftrace_code) && ftrace_int3_handler(regs))
312 return;
313#endif
308#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP 314#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
309 if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, 315 if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
310 SIGTRAP) == NOTIFY_STOP) 316 SIGTRAP) == NOTIFY_STOP)