aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-10-21 15:54:49 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-10-21 15:54:49 -0400
commit5d70f79b5ef6ea2de4f72a37b2d96e2601e40a22 (patch)
treea0d6de0930ba83ecf4629c2e2e261f5eaa2d8f33
parent888a6f77e0418b049f83d37547c209b904d30af4 (diff)
parent750ed158bf6c782d2813da1bca2c824365a0b777 (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (163 commits) tracing: Fix compile issue for trace_sched_wakeup.c [S390] hardirq: remove pointless header file includes [IA64] Move local_softirq_pending() definition perf, powerpc: Fix power_pmu_event_init to not use event->ctx ftrace: Remove recursion between recordmcount and scripts/mod/empty jump_label: Add COND_STMT(), reducer wrappery perf: Optimize sw events perf: Use jump_labels to optimize the scheduler hooks jump_label: Add atomic_t interface jump_label: Use more consistent naming perf, hw_breakpoint: Fix crash in hw_breakpoint creation perf: Find task before event alloc perf: Fix task refcount bugs perf: Fix group moving irq_work: Add generic hardirq context callbacks perf_events: Fix transaction recovery in group_sched_in() perf_events: Fix bogus AMD64 generic TLB events perf_events: Fix bogus context time tracking tracing: Remove parent recording in latency tracer graph options tracing: Use one prologue for the preempt irqs off tracer function tracers ...
-rw-r--r--Documentation/kprobes.txt8
-rw-r--r--Makefile11
-rw-r--r--arch/Kconfig3
-rw-r--r--arch/alpha/Kconfig1
-rw-r--r--arch/alpha/include/asm/perf_event.h5
-rw-r--r--arch/alpha/kernel/perf_event.c128
-rw-r--r--arch/alpha/kernel/time.c30
-rw-r--r--arch/arm/Kconfig1
-rw-r--r--arch/arm/include/asm/perf_event.h12
-rw-r--r--arch/arm/kernel/perf_event.c212
-rw-r--r--arch/arm/oprofile/Makefile4
-rw-r--r--arch/arm/oprofile/common.c311
-rw-r--r--arch/frv/Kconfig1
-rw-r--r--arch/frv/lib/Makefile2
-rw-r--r--arch/frv/lib/perf_event.c19
-rw-r--r--arch/ia64/include/asm/hardirq.h11
-rw-r--r--arch/parisc/Kconfig1
-rw-r--r--arch/parisc/include/asm/perf_event.h3
-rw-r--r--arch/powerpc/Kconfig1
-rw-r--r--arch/powerpc/include/asm/paca.h2
-rw-r--r--arch/powerpc/kernel/perf_callchain.c86
-rw-r--r--arch/powerpc/kernel/perf_event.c166
-rw-r--r--arch/powerpc/kernel/perf_event_fsl_emb.c148
-rw-r--r--arch/powerpc/kernel/time.c42
-rw-r--r--arch/s390/Kconfig1
-rw-r--r--arch/s390/include/asm/hardirq.h4
-rw-r--r--arch/s390/include/asm/perf_event.h3
-rw-r--r--arch/sh/Kconfig14
-rw-r--r--arch/sh/include/asm/perf_event.h7
-rw-r--r--arch/sh/kernel/perf_callchain.c50
-rw-r--r--arch/sh/kernel/perf_event.c159
-rw-r--r--arch/sh/oprofile/Makefile4
-rw-r--r--arch/sh/oprofile/common.c115
-rw-r--r--arch/sh/oprofile/op_impl.h33
-rw-r--r--arch/sparc/Kconfig3
-rw-r--r--arch/sparc/include/asm/jump_label.h32
-rw-r--r--arch/sparc/include/asm/perf_event.h4
-rw-r--r--arch/sparc/kernel/Makefile2
-rw-r--r--arch/sparc/kernel/jump_label.c47
-rw-r--r--arch/sparc/kernel/module.c6
-rw-r--r--arch/sparc/kernel/pcr.c8
-rw-r--r--arch/sparc/kernel/perf_event.c240
-rw-r--r--arch/x86/Kconfig8
-rw-r--r--arch/x86/include/asm/alternative.h11
-rw-r--r--arch/x86/include/asm/entry_arch.h4
-rw-r--r--arch/x86/include/asm/hardirq.h2
-rw-r--r--arch/x86/include/asm/hw_irq.h2
-rw-r--r--arch/x86/include/asm/irq_vectors.h4
-rw-r--r--arch/x86/include/asm/jump_label.h37
-rw-r--r--arch/x86/include/asm/perf_event_p4.h52
-rw-r--r--arch/x86/kernel/Makefile3
-rw-r--r--arch/x86/kernel/alternative.c71
-rw-r--r--arch/x86/kernel/cpu/perf_event.c280
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c8
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c13
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c292
-rw-r--r--arch/x86/kernel/entry_64.S6
-rw-r--r--arch/x86/kernel/ftrace.c63
-rw-r--r--arch/x86/kernel/irq.c8
-rw-r--r--arch/x86/kernel/irq_work.c30
-rw-r--r--arch/x86/kernel/irqinit.c6
-rw-r--r--arch/x86/kernel/jump_label.c50
-rw-r--r--arch/x86/kernel/kprobes.c14
-rw-r--r--arch/x86/kernel/module.c3
-rw-r--r--arch/x86/kernel/setup.c6
-rw-r--r--arch/x86/mm/fault.c4
-rw-r--r--arch/x86/mm/kmemcheck/kmemcheck.c2
-rw-r--r--arch/x86/oprofile/backtrace.c70
-rw-r--r--arch/x86/oprofile/nmi_int.c9
-rw-r--r--drivers/oprofile/oprof.c32
-rw-r--r--drivers/oprofile/oprof.h2
-rw-r--r--drivers/oprofile/oprofile_files.c7
-rw-r--r--drivers/oprofile/oprofile_perf.c328
-rw-r--r--drivers/oprofile/oprofilefs.c54
-rw-r--r--include/asm-generic/hardirq.h2
-rw-r--r--include/asm-generic/vmlinux.lds.h10
-rw-r--r--include/linux/dynamic_debug.h39
-rw-r--r--include/linux/ftrace_event.h8
-rw-r--r--include/linux/interrupt.h8
-rw-r--r--include/linux/irq_work.h20
-rw-r--r--include/linux/jump_label.h74
-rw-r--r--include/linux/jump_label_ref.h44
-rw-r--r--include/linux/module.h5
-rw-r--r--include/linux/oprofile.h7
-rw-r--r--include/linux/percpu.h9
-rw-r--r--include/linux/perf_event.h212
-rw-r--r--include/linux/sched.h9
-rw-r--r--include/linux/stop_machine.h10
-rw-r--r--include/linux/tracepoint.h5
-rw-r--r--include/trace/events/irq.h26
-rw-r--r--include/trace/events/napi.h25
-rw-r--r--include/trace/events/net.h82
-rw-r--r--include/trace/events/power.h90
-rw-r--r--include/trace/events/skb.h17
-rw-r--r--init/Kconfig8
-rw-r--r--kernel/Makefile4
-rw-r--r--kernel/exit.c4
-rw-r--r--kernel/hw_breakpoint.c75
-rw-r--r--kernel/irq_work.c164
-rw-r--r--kernel/jump_label.c429
-rw-r--r--kernel/kprobes.c26
-rw-r--r--kernel/module.c6
-rw-r--r--kernel/perf_event.c2592
-rw-r--r--kernel/sched.c2
-rw-r--r--kernel/test_kprobes.c12
-rw-r--r--kernel/timer.c7
-rw-r--r--kernel/trace/Kconfig5
-rw-r--r--kernel/trace/ftrace.c127
-rw-r--r--kernel/trace/ring_buffer.c21
-rw-r--r--kernel/trace/trace.c2
-rw-r--r--kernel/trace/trace.h4
-rw-r--r--kernel/trace/trace_event_perf.c28
-rw-r--r--kernel/trace/trace_events.c55
-rw-r--r--kernel/trace/trace_functions_graph.c209
-rw-r--r--kernel/trace/trace_irqsoff.c152
-rw-r--r--kernel/trace/trace_sched_wakeup.c256
-rw-r--r--kernel/trace/trace_workqueue.c10
-rw-r--r--kernel/tracepoint.c14
-rw-r--r--kernel/watchdog.c41
-rw-r--r--lib/Kconfig.debug8
-rw-r--r--lib/dynamic_debug.c42
-rw-r--r--net/core/datagram.c1
-rw-r--r--net/core/dev.c8
-rw-r--r--net/core/net-traces.c1
-rw-r--r--net/core/skbuff.c1
-rw-r--r--scripts/Makefile1
-rw-r--r--scripts/Makefile.build10
-rw-r--r--scripts/Makefile.lib11
-rw-r--r--scripts/basic/Makefile2
-rw-r--r--scripts/basic/hash.c64
-rw-r--r--scripts/gcc-goto.sh5
-rw-r--r--scripts/recordmcount.c363
-rw-r--r--scripts/recordmcount.h366
-rw-r--r--tools/perf/Documentation/perf-annotate.txt11
-rw-r--r--tools/perf/Documentation/perf-report.txt7
-rw-r--r--tools/perf/Makefile30
-rw-r--r--tools/perf/builtin-annotate.c26
-rw-r--r--tools/perf/builtin-report.c14
-rw-r--r--tools/perf/feature-tests.mak11
-rw-r--r--tools/perf/scripts/python/bin/netdev-times-record8
-rw-r--r--tools/perf/scripts/python/bin/netdev-times-report5
-rw-r--r--tools/perf/scripts/python/netdev-times.py464
-rw-r--r--tools/perf/util/cache.h2
-rw-r--r--tools/perf/util/callchain.c98
-rw-r--r--tools/perf/util/callchain.h27
-rw-r--r--tools/perf/util/hist.c4
-rw-r--r--tools/perf/util/path.c3
-rw-r--r--tools/perf/util/sort.h2
-rw-r--r--tools/perf/util/symbol.c14
-rw-r--r--tools/perf/util/symbol.h1
-rw-r--r--tools/perf/util/ui/browser.c117
-rw-r--r--tools/perf/util/ui/browser.h9
-rw-r--r--tools/perf/util/ui/browsers/annotate.c38
-rw-r--r--tools/perf/util/ui/browsers/hists.c327
-rw-r--r--tools/perf/util/ui/browsers/map.c32
-rw-r--r--tools/perf/util/ui/util.c4
-rw-r--r--tools/perf/util/util.h13
158 files changed, 7162 insertions, 3368 deletions
diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt
index 1762b81fcdf2..741fe66d6eca 100644
--- a/Documentation/kprobes.txt
+++ b/Documentation/kprobes.txt
@@ -542,9 +542,11 @@ Kprobes does not use mutexes or allocate memory except during
542registration and unregistration. 542registration and unregistration.
543 543
544Probe handlers are run with preemption disabled. Depending on the 544Probe handlers are run with preemption disabled. Depending on the
545architecture, handlers may also run with interrupts disabled. In any 545architecture and optimization state, handlers may also run with
546case, your handler should not yield the CPU (e.g., by attempting to 546interrupts disabled (e.g., kretprobe handlers and optimized kprobe
547acquire a semaphore). 547handlers run without interrupt disabled on x86/x86-64). In any case,
548your handler should not yield the CPU (e.g., by attempting to acquire
549a semaphore).
548 550
549Since a return probe is implemented by replacing the return 551Since a return probe is implemented by replacing the return
550address with the trampoline's address, stack backtraces and calls 552address with the trampoline's address, stack backtraces and calls
diff --git a/Makefile b/Makefile
index 860c26af52c3..d3c10719bbbd 100644
--- a/Makefile
+++ b/Makefile
@@ -568,6 +568,12 @@ endif
568 568
569ifdef CONFIG_FUNCTION_TRACER 569ifdef CONFIG_FUNCTION_TRACER
570KBUILD_CFLAGS += -pg 570KBUILD_CFLAGS += -pg
571ifdef CONFIG_DYNAMIC_FTRACE
572 ifdef CONFIG_HAVE_C_RECORDMCOUNT
573 BUILD_C_RECORDMCOUNT := y
574 export BUILD_C_RECORDMCOUNT
575 endif
576endif
571endif 577endif
572 578
573# We trigger additional mismatches with less inlining 579# We trigger additional mismatches with less inlining
@@ -591,6 +597,11 @@ KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow)
591# conserve stack if available 597# conserve stack if available
592KBUILD_CFLAGS += $(call cc-option,-fconserve-stack) 598KBUILD_CFLAGS += $(call cc-option,-fconserve-stack)
593 599
600# check for 'asm goto'
601ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC)), y)
602 KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO
603endif
604
594# Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments 605# Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments
595# But warn user when we do so 606# But warn user when we do so
596warn-assign = \ 607warn-assign = \
diff --git a/arch/Kconfig b/arch/Kconfig
index fe48fc7a3eba..53d7f619a1b9 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -158,4 +158,7 @@ config HAVE_PERF_EVENTS_NMI
158 subsystem. Also has support for calculating CPU cycle events 158 subsystem. Also has support for calculating CPU cycle events
159 to determine how many clock cycles in a given period. 159 to determine how many clock cycles in a given period.
160 160
161config HAVE_ARCH_JUMP_LABEL
162 bool
163
161source "kernel/gcov/Kconfig" 164source "kernel/gcov/Kconfig"
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index b9647bb66d13..d04ccd73af45 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -9,6 +9,7 @@ config ALPHA
9 select HAVE_IDE 9 select HAVE_IDE
10 select HAVE_OPROFILE 10 select HAVE_OPROFILE
11 select HAVE_SYSCALL_WRAPPERS 11 select HAVE_SYSCALL_WRAPPERS
12 select HAVE_IRQ_WORK
12 select HAVE_PERF_EVENTS 13 select HAVE_PERF_EVENTS
13 select HAVE_DMA_ATTRS 14 select HAVE_DMA_ATTRS
14 help 15 help
diff --git a/arch/alpha/include/asm/perf_event.h b/arch/alpha/include/asm/perf_event.h
index 4157cd3c44a9..fe792ca818f6 100644
--- a/arch/alpha/include/asm/perf_event.h
+++ b/arch/alpha/include/asm/perf_event.h
@@ -1,11 +1,6 @@
1#ifndef __ASM_ALPHA_PERF_EVENT_H 1#ifndef __ASM_ALPHA_PERF_EVENT_H
2#define __ASM_ALPHA_PERF_EVENT_H 2#define __ASM_ALPHA_PERF_EVENT_H
3 3
4/* Alpha only supports software events through this interface. */
5extern void set_perf_event_pending(void);
6
7#define PERF_EVENT_INDEX_OFFSET 0
8
9#ifdef CONFIG_PERF_EVENTS 4#ifdef CONFIG_PERF_EVENTS
10extern void init_hw_perf_events(void); 5extern void init_hw_perf_events(void);
11#else 6#else
diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c
index 85d8e4f58c83..1cc49683fb69 100644
--- a/arch/alpha/kernel/perf_event.c
+++ b/arch/alpha/kernel/perf_event.c
@@ -307,7 +307,7 @@ again:
307 new_raw_count) != prev_raw_count) 307 new_raw_count) != prev_raw_count)
308 goto again; 308 goto again;
309 309
310 delta = (new_raw_count - (prev_raw_count & alpha_pmu->pmc_count_mask[idx])) + ovf; 310 delta = (new_raw_count - (prev_raw_count & alpha_pmu->pmc_count_mask[idx])) + ovf;
311 311
312 /* It is possible on very rare occasions that the PMC has overflowed 312 /* It is possible on very rare occasions that the PMC has overflowed
313 * but the interrupt is yet to come. Detect and fix this situation. 313 * but the interrupt is yet to come. Detect and fix this situation.
@@ -402,14 +402,13 @@ static void maybe_change_configuration(struct cpu_hw_events *cpuc)
402 struct hw_perf_event *hwc = &pe->hw; 402 struct hw_perf_event *hwc = &pe->hw;
403 int idx = hwc->idx; 403 int idx = hwc->idx;
404 404
405 if (cpuc->current_idx[j] != PMC_NO_INDEX) { 405 if (cpuc->current_idx[j] == PMC_NO_INDEX) {
406 cpuc->idx_mask |= (1<<cpuc->current_idx[j]); 406 alpha_perf_event_set_period(pe, hwc, idx);
407 continue; 407 cpuc->current_idx[j] = idx;
408 } 408 }
409 409
410 alpha_perf_event_set_period(pe, hwc, idx); 410 if (!(hwc->state & PERF_HES_STOPPED))
411 cpuc->current_idx[j] = idx; 411 cpuc->idx_mask |= (1<<cpuc->current_idx[j]);
412 cpuc->idx_mask |= (1<<cpuc->current_idx[j]);
413 } 412 }
414 cpuc->config = cpuc->event[0]->hw.config_base; 413 cpuc->config = cpuc->event[0]->hw.config_base;
415} 414}
@@ -420,12 +419,13 @@ static void maybe_change_configuration(struct cpu_hw_events *cpuc)
420 * - this function is called from outside this module via the pmu struct 419 * - this function is called from outside this module via the pmu struct
421 * returned from perf event initialisation. 420 * returned from perf event initialisation.
422 */ 421 */
423static int alpha_pmu_enable(struct perf_event *event) 422static int alpha_pmu_add(struct perf_event *event, int flags)
424{ 423{
425 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 424 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
425 struct hw_perf_event *hwc = &event->hw;
426 int n0; 426 int n0;
427 int ret; 427 int ret;
428 unsigned long flags; 428 unsigned long irq_flags;
429 429
430 /* 430 /*
431 * The Sparc code has the IRQ disable first followed by the perf 431 * The Sparc code has the IRQ disable first followed by the perf
@@ -435,8 +435,8 @@ static int alpha_pmu_enable(struct perf_event *event)
435 * nevertheless we disable the PMCs first to enable a potential 435 * nevertheless we disable the PMCs first to enable a potential
436 * final PMI to occur before we disable interrupts. 436 * final PMI to occur before we disable interrupts.
437 */ 437 */
438 perf_disable(); 438 perf_pmu_disable(event->pmu);
439 local_irq_save(flags); 439 local_irq_save(irq_flags);
440 440
441 /* Default to error to be returned */ 441 /* Default to error to be returned */
442 ret = -EAGAIN; 442 ret = -EAGAIN;
@@ -455,8 +455,12 @@ static int alpha_pmu_enable(struct perf_event *event)
455 } 455 }
456 } 456 }
457 457
458 local_irq_restore(flags); 458 hwc->state = PERF_HES_UPTODATE;
459 perf_enable(); 459 if (!(flags & PERF_EF_START))
460 hwc->state |= PERF_HES_STOPPED;
461
462 local_irq_restore(irq_flags);
463 perf_pmu_enable(event->pmu);
460 464
461 return ret; 465 return ret;
462} 466}
@@ -467,15 +471,15 @@ static int alpha_pmu_enable(struct perf_event *event)
467 * - this function is called from outside this module via the pmu struct 471 * - this function is called from outside this module via the pmu struct
468 * returned from perf event initialisation. 472 * returned from perf event initialisation.
469 */ 473 */
470static void alpha_pmu_disable(struct perf_event *event) 474static void alpha_pmu_del(struct perf_event *event, int flags)
471{ 475{
472 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 476 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
473 struct hw_perf_event *hwc = &event->hw; 477 struct hw_perf_event *hwc = &event->hw;
474 unsigned long flags; 478 unsigned long irq_flags;
475 int j; 479 int j;
476 480
477 perf_disable(); 481 perf_pmu_disable(event->pmu);
478 local_irq_save(flags); 482 local_irq_save(irq_flags);
479 483
480 for (j = 0; j < cpuc->n_events; j++) { 484 for (j = 0; j < cpuc->n_events; j++) {
481 if (event == cpuc->event[j]) { 485 if (event == cpuc->event[j]) {
@@ -501,8 +505,8 @@ static void alpha_pmu_disable(struct perf_event *event)
501 } 505 }
502 } 506 }
503 507
504 local_irq_restore(flags); 508 local_irq_restore(irq_flags);
505 perf_enable(); 509 perf_pmu_enable(event->pmu);
506} 510}
507 511
508 512
@@ -514,13 +518,44 @@ static void alpha_pmu_read(struct perf_event *event)
514} 518}
515 519
516 520
517static void alpha_pmu_unthrottle(struct perf_event *event) 521static void alpha_pmu_stop(struct perf_event *event, int flags)
522{
523 struct hw_perf_event *hwc = &event->hw;
524 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
525
526 if (!(hwc->state & PERF_HES_STOPPED)) {
527 cpuc->idx_mask &= ~(1UL<<hwc->idx);
528 hwc->state |= PERF_HES_STOPPED;
529 }
530
531 if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
532 alpha_perf_event_update(event, hwc, hwc->idx, 0);
533 hwc->state |= PERF_HES_UPTODATE;
534 }
535
536 if (cpuc->enabled)
537 wrperfmon(PERFMON_CMD_DISABLE, (1UL<<hwc->idx));
538}
539
540
541static void alpha_pmu_start(struct perf_event *event, int flags)
518{ 542{
519 struct hw_perf_event *hwc = &event->hw; 543 struct hw_perf_event *hwc = &event->hw;
520 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 544 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
521 545
546 if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
547 return;
548
549 if (flags & PERF_EF_RELOAD) {
550 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
551 alpha_perf_event_set_period(event, hwc, hwc->idx);
552 }
553
554 hwc->state = 0;
555
522 cpuc->idx_mask |= 1UL<<hwc->idx; 556 cpuc->idx_mask |= 1UL<<hwc->idx;
523 wrperfmon(PERFMON_CMD_ENABLE, (1UL<<hwc->idx)); 557 if (cpuc->enabled)
558 wrperfmon(PERFMON_CMD_ENABLE, (1UL<<hwc->idx));
524} 559}
525 560
526 561
@@ -642,39 +677,36 @@ static int __hw_perf_event_init(struct perf_event *event)
642 return 0; 677 return 0;
643} 678}
644 679
645static const struct pmu pmu = {
646 .enable = alpha_pmu_enable,
647 .disable = alpha_pmu_disable,
648 .read = alpha_pmu_read,
649 .unthrottle = alpha_pmu_unthrottle,
650};
651
652
653/* 680/*
654 * Main entry point to initialise a HW performance event. 681 * Main entry point to initialise a HW performance event.
655 */ 682 */
656const struct pmu *hw_perf_event_init(struct perf_event *event) 683static int alpha_pmu_event_init(struct perf_event *event)
657{ 684{
658 int err; 685 int err;
659 686
687 switch (event->attr.type) {
688 case PERF_TYPE_RAW:
689 case PERF_TYPE_HARDWARE:
690 case PERF_TYPE_HW_CACHE:
691 break;
692
693 default:
694 return -ENOENT;
695 }
696
660 if (!alpha_pmu) 697 if (!alpha_pmu)
661 return ERR_PTR(-ENODEV); 698 return -ENODEV;
662 699
663 /* Do the real initialisation work. */ 700 /* Do the real initialisation work. */
664 err = __hw_perf_event_init(event); 701 err = __hw_perf_event_init(event);
665 702
666 if (err) 703 return err;
667 return ERR_PTR(err);
668
669 return &pmu;
670} 704}
671 705
672
673
674/* 706/*
675 * Main entry point - enable HW performance counters. 707 * Main entry point - enable HW performance counters.
676 */ 708 */
677void hw_perf_enable(void) 709static void alpha_pmu_enable(struct pmu *pmu)
678{ 710{
679 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 711 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
680 712
@@ -700,7 +732,7 @@ void hw_perf_enable(void)
700 * Main entry point - disable HW performance counters. 732 * Main entry point - disable HW performance counters.
701 */ 733 */
702 734
703void hw_perf_disable(void) 735static void alpha_pmu_disable(struct pmu *pmu)
704{ 736{
705 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 737 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
706 738
@@ -713,6 +745,17 @@ void hw_perf_disable(void)
713 wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask); 745 wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask);
714} 746}
715 747
748static struct pmu pmu = {
749 .pmu_enable = alpha_pmu_enable,
750 .pmu_disable = alpha_pmu_disable,
751 .event_init = alpha_pmu_event_init,
752 .add = alpha_pmu_add,
753 .del = alpha_pmu_del,
754 .start = alpha_pmu_start,
755 .stop = alpha_pmu_stop,
756 .read = alpha_pmu_read,
757};
758
716 759
717/* 760/*
718 * Main entry point - don't know when this is called but it 761 * Main entry point - don't know when this is called but it
@@ -766,7 +809,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
766 wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask); 809 wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask);
767 810
768 /* la_ptr is the counter that overflowed. */ 811 /* la_ptr is the counter that overflowed. */
769 if (unlikely(la_ptr >= perf_max_events)) { 812 if (unlikely(la_ptr >= alpha_pmu->num_pmcs)) {
770 /* This should never occur! */ 813 /* This should never occur! */
771 irq_err_count++; 814 irq_err_count++;
772 pr_warning("PMI: silly index %ld\n", la_ptr); 815 pr_warning("PMI: silly index %ld\n", la_ptr);
@@ -807,7 +850,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
807 /* Interrupts coming too quickly; "throttle" the 850 /* Interrupts coming too quickly; "throttle" the
808 * counter, i.e., disable it for a little while. 851 * counter, i.e., disable it for a little while.
809 */ 852 */
810 cpuc->idx_mask &= ~(1UL<<idx); 853 alpha_pmu_stop(event, 0);
811 } 854 }
812 } 855 }
813 wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask); 856 wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask);
@@ -837,6 +880,7 @@ void __init init_hw_perf_events(void)
837 880
838 /* And set up PMU specification */ 881 /* And set up PMU specification */
839 alpha_pmu = &ev67_pmu; 882 alpha_pmu = &ev67_pmu;
840 perf_max_events = alpha_pmu->num_pmcs; 883
884 perf_pmu_register(&pmu);
841} 885}
842 886
diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c
index 396af1799ea4..0f1d8493cfca 100644
--- a/arch/alpha/kernel/time.c
+++ b/arch/alpha/kernel/time.c
@@ -41,7 +41,7 @@
41#include <linux/init.h> 41#include <linux/init.h>
42#include <linux/bcd.h> 42#include <linux/bcd.h>
43#include <linux/profile.h> 43#include <linux/profile.h>
44#include <linux/perf_event.h> 44#include <linux/irq_work.h>
45 45
46#include <asm/uaccess.h> 46#include <asm/uaccess.h>
47#include <asm/io.h> 47#include <asm/io.h>
@@ -83,25 +83,25 @@ static struct {
83 83
84unsigned long est_cycle_freq; 84unsigned long est_cycle_freq;
85 85
86#ifdef CONFIG_PERF_EVENTS 86#ifdef CONFIG_IRQ_WORK
87 87
88DEFINE_PER_CPU(u8, perf_event_pending); 88DEFINE_PER_CPU(u8, irq_work_pending);
89 89
90#define set_perf_event_pending_flag() __get_cpu_var(perf_event_pending) = 1 90#define set_irq_work_pending_flag() __get_cpu_var(irq_work_pending) = 1
91#define test_perf_event_pending() __get_cpu_var(perf_event_pending) 91#define test_irq_work_pending() __get_cpu_var(irq_work_pending)
92#define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0 92#define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0
93 93
94void set_perf_event_pending(void) 94void set_irq_work_pending(void)
95{ 95{
96 set_perf_event_pending_flag(); 96 set_irq_work_pending_flag();
97} 97}
98 98
99#else /* CONFIG_PERF_EVENTS */ 99#else /* CONFIG_IRQ_WORK */
100 100
101#define test_perf_event_pending() 0 101#define test_irq_work_pending() 0
102#define clear_perf_event_pending() 102#define clear_irq_work_pending()
103 103
104#endif /* CONFIG_PERF_EVENTS */ 104#endif /* CONFIG_IRQ_WORK */
105 105
106 106
107static inline __u32 rpcc(void) 107static inline __u32 rpcc(void)
@@ -191,9 +191,9 @@ irqreturn_t timer_interrupt(int irq, void *dev)
191 191
192 write_sequnlock(&xtime_lock); 192 write_sequnlock(&xtime_lock);
193 193
194 if (test_perf_event_pending()) { 194 if (test_irq_work_pending()) {
195 clear_perf_event_pending(); 195 clear_irq_work_pending();
196 perf_event_do_pending(); 196 irq_work_run();
197 } 197 }
198 198
199#ifndef CONFIG_SMP 199#ifndef CONFIG_SMP
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 9c26ba7244fb..9103904b3dab 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -23,6 +23,7 @@ config ARM
23 select HAVE_KERNEL_GZIP 23 select HAVE_KERNEL_GZIP
24 select HAVE_KERNEL_LZO 24 select HAVE_KERNEL_LZO
25 select HAVE_KERNEL_LZMA 25 select HAVE_KERNEL_LZMA
26 select HAVE_IRQ_WORK
26 select HAVE_PERF_EVENTS 27 select HAVE_PERF_EVENTS
27 select PERF_USE_VMALLOC 28 select PERF_USE_VMALLOC
28 select HAVE_REGS_AND_STACK_ACCESS_API 29 select HAVE_REGS_AND_STACK_ACCESS_API
diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h
index b5799a3b7117..c4aa4e8c6af9 100644
--- a/arch/arm/include/asm/perf_event.h
+++ b/arch/arm/include/asm/perf_event.h
@@ -12,18 +12,6 @@
12#ifndef __ARM_PERF_EVENT_H__ 12#ifndef __ARM_PERF_EVENT_H__
13#define __ARM_PERF_EVENT_H__ 13#define __ARM_PERF_EVENT_H__
14 14
15/*
16 * NOP: on *most* (read: all supported) ARM platforms, the performance
17 * counter interrupts are regular interrupts and not an NMI. This
18 * means that when we receive the interrupt we can call
19 * perf_event_do_pending() that handles all of the work with
20 * interrupts disabled.
21 */
22static inline void
23set_perf_event_pending(void)
24{
25}
26
27/* ARM performance counters start from 1 (in the cp15 accesses) so use the 15/* ARM performance counters start from 1 (in the cp15 accesses) so use the
28 * same indexes here for consistency. */ 16 * same indexes here for consistency. */
29#define PERF_EVENT_INDEX_OFFSET 1 17#define PERF_EVENT_INDEX_OFFSET 1
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index ecbb0288e5dd..49643b1467e6 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -123,6 +123,12 @@ armpmu_get_max_events(void)
123} 123}
124EXPORT_SYMBOL_GPL(armpmu_get_max_events); 124EXPORT_SYMBOL_GPL(armpmu_get_max_events);
125 125
126int perf_num_counters(void)
127{
128 return armpmu_get_max_events();
129}
130EXPORT_SYMBOL_GPL(perf_num_counters);
131
126#define HW_OP_UNSUPPORTED 0xFFFF 132#define HW_OP_UNSUPPORTED 0xFFFF
127 133
128#define C(_x) \ 134#define C(_x) \
@@ -221,46 +227,56 @@ again:
221} 227}
222 228
223static void 229static void
224armpmu_disable(struct perf_event *event) 230armpmu_read(struct perf_event *event)
225{ 231{
226 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
227 struct hw_perf_event *hwc = &event->hw; 232 struct hw_perf_event *hwc = &event->hw;
228 int idx = hwc->idx;
229
230 WARN_ON(idx < 0);
231
232 clear_bit(idx, cpuc->active_mask);
233 armpmu->disable(hwc, idx);
234
235 barrier();
236 233
237 armpmu_event_update(event, hwc, idx); 234 /* Don't read disabled counters! */
238 cpuc->events[idx] = NULL; 235 if (hwc->idx < 0)
239 clear_bit(idx, cpuc->used_mask); 236 return;
240 237
241 perf_event_update_userpage(event); 238 armpmu_event_update(event, hwc, hwc->idx);
242} 239}
243 240
244static void 241static void
245armpmu_read(struct perf_event *event) 242armpmu_stop(struct perf_event *event, int flags)
246{ 243{
247 struct hw_perf_event *hwc = &event->hw; 244 struct hw_perf_event *hwc = &event->hw;
248 245
249 /* Don't read disabled counters! */ 246 if (!armpmu)
250 if (hwc->idx < 0)
251 return; 247 return;
252 248
253 armpmu_event_update(event, hwc, hwc->idx); 249 /*
250 * ARM pmu always has to update the counter, so ignore
251 * PERF_EF_UPDATE, see comments in armpmu_start().
252 */
253 if (!(hwc->state & PERF_HES_STOPPED)) {
254 armpmu->disable(hwc, hwc->idx);
255 barrier(); /* why? */
256 armpmu_event_update(event, hwc, hwc->idx);
257 hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
258 }
254} 259}
255 260
256static void 261static void
257armpmu_unthrottle(struct perf_event *event) 262armpmu_start(struct perf_event *event, int flags)
258{ 263{
259 struct hw_perf_event *hwc = &event->hw; 264 struct hw_perf_event *hwc = &event->hw;
260 265
266 if (!armpmu)
267 return;
268
269 /*
270 * ARM pmu always has to reprogram the period, so ignore
271 * PERF_EF_RELOAD, see the comment below.
272 */
273 if (flags & PERF_EF_RELOAD)
274 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
275
276 hwc->state = 0;
261 /* 277 /*
262 * Set the period again. Some counters can't be stopped, so when we 278 * Set the period again. Some counters can't be stopped, so when we
263 * were throttled we simply disabled the IRQ source and the counter 279 * were stopped we simply disabled the IRQ source and the counter
264 * may have been left counting. If we don't do this step then we may 280 * may have been left counting. If we don't do this step then we may
265 * get an interrupt too soon or *way* too late if the overflow has 281 * get an interrupt too soon or *way* too late if the overflow has
266 * happened since disabling. 282 * happened since disabling.
@@ -269,14 +285,33 @@ armpmu_unthrottle(struct perf_event *event)
269 armpmu->enable(hwc, hwc->idx); 285 armpmu->enable(hwc, hwc->idx);
270} 286}
271 287
288static void
289armpmu_del(struct perf_event *event, int flags)
290{
291 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
292 struct hw_perf_event *hwc = &event->hw;
293 int idx = hwc->idx;
294
295 WARN_ON(idx < 0);
296
297 clear_bit(idx, cpuc->active_mask);
298 armpmu_stop(event, PERF_EF_UPDATE);
299 cpuc->events[idx] = NULL;
300 clear_bit(idx, cpuc->used_mask);
301
302 perf_event_update_userpage(event);
303}
304
272static int 305static int
273armpmu_enable(struct perf_event *event) 306armpmu_add(struct perf_event *event, int flags)
274{ 307{
275 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 308 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
276 struct hw_perf_event *hwc = &event->hw; 309 struct hw_perf_event *hwc = &event->hw;
277 int idx; 310 int idx;
278 int err = 0; 311 int err = 0;
279 312
313 perf_pmu_disable(event->pmu);
314
280 /* If we don't have a space for the counter then finish early. */ 315 /* If we don't have a space for the counter then finish early. */
281 idx = armpmu->get_event_idx(cpuc, hwc); 316 idx = armpmu->get_event_idx(cpuc, hwc);
282 if (idx < 0) { 317 if (idx < 0) {
@@ -293,25 +328,19 @@ armpmu_enable(struct perf_event *event)
293 cpuc->events[idx] = event; 328 cpuc->events[idx] = event;
294 set_bit(idx, cpuc->active_mask); 329 set_bit(idx, cpuc->active_mask);
295 330
296 /* Set the period for the event. */ 331 hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
297 armpmu_event_set_period(event, hwc, idx); 332 if (flags & PERF_EF_START)
298 333 armpmu_start(event, PERF_EF_RELOAD);
299 /* Enable the event. */
300 armpmu->enable(hwc, idx);
301 334
302 /* Propagate our changes to the userspace mapping. */ 335 /* Propagate our changes to the userspace mapping. */
303 perf_event_update_userpage(event); 336 perf_event_update_userpage(event);
304 337
305out: 338out:
339 perf_pmu_enable(event->pmu);
306 return err; 340 return err;
307} 341}
308 342
309static struct pmu pmu = { 343static struct pmu pmu;
310 .enable = armpmu_enable,
311 .disable = armpmu_disable,
312 .unthrottle = armpmu_unthrottle,
313 .read = armpmu_read,
314};
315 344
316static int 345static int
317validate_event(struct cpu_hw_events *cpuc, 346validate_event(struct cpu_hw_events *cpuc,
@@ -491,20 +520,29 @@ __hw_perf_event_init(struct perf_event *event)
491 return err; 520 return err;
492} 521}
493 522
494const struct pmu * 523static int armpmu_event_init(struct perf_event *event)
495hw_perf_event_init(struct perf_event *event)
496{ 524{
497 int err = 0; 525 int err = 0;
498 526
527 switch (event->attr.type) {
528 case PERF_TYPE_RAW:
529 case PERF_TYPE_HARDWARE:
530 case PERF_TYPE_HW_CACHE:
531 break;
532
533 default:
534 return -ENOENT;
535 }
536
499 if (!armpmu) 537 if (!armpmu)
500 return ERR_PTR(-ENODEV); 538 return -ENODEV;
501 539
502 event->destroy = hw_perf_event_destroy; 540 event->destroy = hw_perf_event_destroy;
503 541
504 if (!atomic_inc_not_zero(&active_events)) { 542 if (!atomic_inc_not_zero(&active_events)) {
505 if (atomic_read(&active_events) > perf_max_events) { 543 if (atomic_read(&active_events) > armpmu->num_events) {
506 atomic_dec(&active_events); 544 atomic_dec(&active_events);
507 return ERR_PTR(-ENOSPC); 545 return -ENOSPC;
508 } 546 }
509 547
510 mutex_lock(&pmu_reserve_mutex); 548 mutex_lock(&pmu_reserve_mutex);
@@ -518,17 +556,16 @@ hw_perf_event_init(struct perf_event *event)
518 } 556 }
519 557
520 if (err) 558 if (err)
521 return ERR_PTR(err); 559 return err;
522 560
523 err = __hw_perf_event_init(event); 561 err = __hw_perf_event_init(event);
524 if (err) 562 if (err)
525 hw_perf_event_destroy(event); 563 hw_perf_event_destroy(event);
526 564
527 return err ? ERR_PTR(err) : &pmu; 565 return err;
528} 566}
529 567
530void 568static void armpmu_enable(struct pmu *pmu)
531hw_perf_enable(void)
532{ 569{
533 /* Enable all of the perf events on hardware. */ 570 /* Enable all of the perf events on hardware. */
534 int idx; 571 int idx;
@@ -549,13 +586,23 @@ hw_perf_enable(void)
549 armpmu->start(); 586 armpmu->start();
550} 587}
551 588
552void 589static void armpmu_disable(struct pmu *pmu)
553hw_perf_disable(void)
554{ 590{
555 if (armpmu) 591 if (armpmu)
556 armpmu->stop(); 592 armpmu->stop();
557} 593}
558 594
595static struct pmu pmu = {
596 .pmu_enable = armpmu_enable,
597 .pmu_disable = armpmu_disable,
598 .event_init = armpmu_event_init,
599 .add = armpmu_add,
600 .del = armpmu_del,
601 .start = armpmu_start,
602 .stop = armpmu_stop,
603 .read = armpmu_read,
604};
605
559/* 606/*
560 * ARMv6 Performance counter handling code. 607 * ARMv6 Performance counter handling code.
561 * 608 *
@@ -1045,7 +1092,7 @@ armv6pmu_handle_irq(int irq_num,
1045 * platforms that can have the PMU interrupts raised as an NMI, this 1092 * platforms that can have the PMU interrupts raised as an NMI, this
1046 * will not work. 1093 * will not work.
1047 */ 1094 */
1048 perf_event_do_pending(); 1095 irq_work_run();
1049 1096
1050 return IRQ_HANDLED; 1097 return IRQ_HANDLED;
1051} 1098}
@@ -2021,7 +2068,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
2021 * platforms that can have the PMU interrupts raised as an NMI, this 2068 * platforms that can have the PMU interrupts raised as an NMI, this
2022 * will not work. 2069 * will not work.
2023 */ 2070 */
2024 perf_event_do_pending(); 2071 irq_work_run();
2025 2072
2026 return IRQ_HANDLED; 2073 return IRQ_HANDLED;
2027} 2074}
@@ -2389,7 +2436,7 @@ xscale1pmu_handle_irq(int irq_num, void *dev)
2389 armpmu->disable(hwc, idx); 2436 armpmu->disable(hwc, idx);
2390 } 2437 }
2391 2438
2392 perf_event_do_pending(); 2439 irq_work_run();
2393 2440
2394 /* 2441 /*
2395 * Re-enable the PMU. 2442 * Re-enable the PMU.
@@ -2716,7 +2763,7 @@ xscale2pmu_handle_irq(int irq_num, void *dev)
2716 armpmu->disable(hwc, idx); 2763 armpmu->disable(hwc, idx);
2717 } 2764 }
2718 2765
2719 perf_event_do_pending(); 2766 irq_work_run();
2720 2767
2721 /* 2768 /*
2722 * Re-enable the PMU. 2769 * Re-enable the PMU.
@@ -2933,14 +2980,12 @@ init_hw_perf_events(void)
2933 armpmu = &armv6pmu; 2980 armpmu = &armv6pmu;
2934 memcpy(armpmu_perf_cache_map, armv6_perf_cache_map, 2981 memcpy(armpmu_perf_cache_map, armv6_perf_cache_map,
2935 sizeof(armv6_perf_cache_map)); 2982 sizeof(armv6_perf_cache_map));
2936 perf_max_events = armv6pmu.num_events;
2937 break; 2983 break;
2938 case 0xB020: /* ARM11mpcore */ 2984 case 0xB020: /* ARM11mpcore */
2939 armpmu = &armv6mpcore_pmu; 2985 armpmu = &armv6mpcore_pmu;
2940 memcpy(armpmu_perf_cache_map, 2986 memcpy(armpmu_perf_cache_map,
2941 armv6mpcore_perf_cache_map, 2987 armv6mpcore_perf_cache_map,
2942 sizeof(armv6mpcore_perf_cache_map)); 2988 sizeof(armv6mpcore_perf_cache_map));
2943 perf_max_events = armv6mpcore_pmu.num_events;
2944 break; 2989 break;
2945 case 0xC080: /* Cortex-A8 */ 2990 case 0xC080: /* Cortex-A8 */
2946 armv7pmu.id = ARM_PERF_PMU_ID_CA8; 2991 armv7pmu.id = ARM_PERF_PMU_ID_CA8;
@@ -2952,7 +2997,6 @@ init_hw_perf_events(void)
2952 /* Reset PMNC and read the nb of CNTx counters 2997 /* Reset PMNC and read the nb of CNTx counters
2953 supported */ 2998 supported */
2954 armv7pmu.num_events = armv7_reset_read_pmnc(); 2999 armv7pmu.num_events = armv7_reset_read_pmnc();
2955 perf_max_events = armv7pmu.num_events;
2956 break; 3000 break;
2957 case 0xC090: /* Cortex-A9 */ 3001 case 0xC090: /* Cortex-A9 */
2958 armv7pmu.id = ARM_PERF_PMU_ID_CA9; 3002 armv7pmu.id = ARM_PERF_PMU_ID_CA9;
@@ -2964,7 +3008,6 @@ init_hw_perf_events(void)
2964 /* Reset PMNC and read the nb of CNTx counters 3008 /* Reset PMNC and read the nb of CNTx counters
2965 supported */ 3009 supported */
2966 armv7pmu.num_events = armv7_reset_read_pmnc(); 3010 armv7pmu.num_events = armv7_reset_read_pmnc();
2967 perf_max_events = armv7pmu.num_events;
2968 break; 3011 break;
2969 } 3012 }
2970 /* Intel CPUs [xscale]. */ 3013 /* Intel CPUs [xscale]. */
@@ -2975,13 +3018,11 @@ init_hw_perf_events(void)
2975 armpmu = &xscale1pmu; 3018 armpmu = &xscale1pmu;
2976 memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, 3019 memcpy(armpmu_perf_cache_map, xscale_perf_cache_map,
2977 sizeof(xscale_perf_cache_map)); 3020 sizeof(xscale_perf_cache_map));
2978 perf_max_events = xscale1pmu.num_events;
2979 break; 3021 break;
2980 case 2: 3022 case 2:
2981 armpmu = &xscale2pmu; 3023 armpmu = &xscale2pmu;
2982 memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, 3024 memcpy(armpmu_perf_cache_map, xscale_perf_cache_map,
2983 sizeof(xscale_perf_cache_map)); 3025 sizeof(xscale_perf_cache_map));
2984 perf_max_events = xscale2pmu.num_events;
2985 break; 3026 break;
2986 } 3027 }
2987 } 3028 }
@@ -2991,9 +3032,10 @@ init_hw_perf_events(void)
2991 arm_pmu_names[armpmu->id], armpmu->num_events); 3032 arm_pmu_names[armpmu->id], armpmu->num_events);
2992 } else { 3033 } else {
2993 pr_info("no hardware support available\n"); 3034 pr_info("no hardware support available\n");
2994 perf_max_events = -1;
2995 } 3035 }
2996 3036
3037 perf_pmu_register(&pmu);
3038
2997 return 0; 3039 return 0;
2998} 3040}
2999arch_initcall(init_hw_perf_events); 3041arch_initcall(init_hw_perf_events);
@@ -3001,13 +3043,6 @@ arch_initcall(init_hw_perf_events);
3001/* 3043/*
3002 * Callchain handling code. 3044 * Callchain handling code.
3003 */ 3045 */
3004static inline void
3005callchain_store(struct perf_callchain_entry *entry,
3006 u64 ip)
3007{
3008 if (entry->nr < PERF_MAX_STACK_DEPTH)
3009 entry->ip[entry->nr++] = ip;
3010}
3011 3046
3012/* 3047/*
3013 * The registers we're interested in are at the end of the variable 3048 * The registers we're interested in are at the end of the variable
@@ -3039,7 +3074,7 @@ user_backtrace(struct frame_tail *tail,
3039 if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail))) 3074 if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail)))
3040 return NULL; 3075 return NULL;
3041 3076
3042 callchain_store(entry, buftail.lr); 3077 perf_callchain_store(entry, buftail.lr);
3043 3078
3044 /* 3079 /*
3045 * Frame pointers should strictly progress back up the stack 3080 * Frame pointers should strictly progress back up the stack
@@ -3051,16 +3086,11 @@ user_backtrace(struct frame_tail *tail,
3051 return buftail.fp - 1; 3086 return buftail.fp - 1;
3052} 3087}
3053 3088
3054static void 3089void
3055perf_callchain_user(struct pt_regs *regs, 3090perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
3056 struct perf_callchain_entry *entry)
3057{ 3091{
3058 struct frame_tail *tail; 3092 struct frame_tail *tail;
3059 3093
3060 callchain_store(entry, PERF_CONTEXT_USER);
3061
3062 if (!user_mode(regs))
3063 regs = task_pt_regs(current);
3064 3094
3065 tail = (struct frame_tail *)regs->ARM_fp - 1; 3095 tail = (struct frame_tail *)regs->ARM_fp - 1;
3066 3096
@@ -3078,56 +3108,18 @@ callchain_trace(struct stackframe *fr,
3078 void *data) 3108 void *data)
3079{ 3109{
3080 struct perf_callchain_entry *entry = data; 3110 struct perf_callchain_entry *entry = data;
3081 callchain_store(entry, fr->pc); 3111 perf_callchain_store(entry, fr->pc);
3082 return 0; 3112 return 0;
3083} 3113}
3084 3114
3085static void 3115void
3086perf_callchain_kernel(struct pt_regs *regs, 3116perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
3087 struct perf_callchain_entry *entry)
3088{ 3117{
3089 struct stackframe fr; 3118 struct stackframe fr;
3090 3119
3091 callchain_store(entry, PERF_CONTEXT_KERNEL);
3092 fr.fp = regs->ARM_fp; 3120 fr.fp = regs->ARM_fp;
3093 fr.sp = regs->ARM_sp; 3121 fr.sp = regs->ARM_sp;
3094 fr.lr = regs->ARM_lr; 3122 fr.lr = regs->ARM_lr;
3095 fr.pc = regs->ARM_pc; 3123 fr.pc = regs->ARM_pc;
3096 walk_stackframe(&fr, callchain_trace, entry); 3124 walk_stackframe(&fr, callchain_trace, entry);
3097} 3125}
3098
3099static void
3100perf_do_callchain(struct pt_regs *regs,
3101 struct perf_callchain_entry *entry)
3102{
3103 int is_user;
3104
3105 if (!regs)
3106 return;
3107
3108 is_user = user_mode(regs);
3109
3110 if (!current || !current->pid)
3111 return;
3112
3113 if (is_user && current->state != TASK_RUNNING)
3114 return;
3115
3116 if (!is_user)
3117 perf_callchain_kernel(regs, entry);
3118
3119 if (current->mm)
3120 perf_callchain_user(regs, entry);
3121}
3122
3123static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
3124
3125struct perf_callchain_entry *
3126perf_callchain(struct pt_regs *regs)
3127{
3128 struct perf_callchain_entry *entry = &__get_cpu_var(pmc_irq_entry);
3129
3130 entry->nr = 0;
3131 perf_do_callchain(regs, entry);
3132 return entry;
3133}
diff --git a/arch/arm/oprofile/Makefile b/arch/arm/oprofile/Makefile
index e666eafed152..b2215c61cdf0 100644
--- a/arch/arm/oprofile/Makefile
+++ b/arch/arm/oprofile/Makefile
@@ -6,4 +6,8 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
6 oprofilefs.o oprofile_stats.o \ 6 oprofilefs.o oprofile_stats.o \
7 timer_int.o ) 7 timer_int.o )
8 8
9ifeq ($(CONFIG_HW_PERF_EVENTS),y)
10DRIVER_OBJS += $(addprefix ../../../drivers/oprofile/, oprofile_perf.o)
11endif
12
9oprofile-y := $(DRIVER_OBJS) common.o 13oprofile-y := $(DRIVER_OBJS) common.o
diff --git a/arch/arm/oprofile/common.c b/arch/arm/oprofile/common.c
index 72e09eb642dd..8aa974491dfc 100644
--- a/arch/arm/oprofile/common.c
+++ b/arch/arm/oprofile/common.c
@@ -25,139 +25,10 @@
25#include <asm/ptrace.h> 25#include <asm/ptrace.h>
26 26
27#ifdef CONFIG_HW_PERF_EVENTS 27#ifdef CONFIG_HW_PERF_EVENTS
28/* 28char *op_name_from_perf_id(void)
29 * Per performance monitor configuration as set via oprofilefs.
30 */
31struct op_counter_config {
32 unsigned long count;
33 unsigned long enabled;
34 unsigned long event;
35 unsigned long unit_mask;
36 unsigned long kernel;
37 unsigned long user;
38 struct perf_event_attr attr;
39};
40
41static int op_arm_enabled;
42static DEFINE_MUTEX(op_arm_mutex);
43
44static struct op_counter_config *counter_config;
45static struct perf_event **perf_events[nr_cpumask_bits];
46static int perf_num_counters;
47
48/*
49 * Overflow callback for oprofile.
50 */
51static void op_overflow_handler(struct perf_event *event, int unused,
52 struct perf_sample_data *data, struct pt_regs *regs)
53{ 29{
54 int id; 30 enum arm_perf_pmu_ids id = armpmu_get_pmu_id();
55 u32 cpu = smp_processor_id();
56
57 for (id = 0; id < perf_num_counters; ++id)
58 if (perf_events[cpu][id] == event)
59 break;
60
61 if (id != perf_num_counters)
62 oprofile_add_sample(regs, id);
63 else
64 pr_warning("oprofile: ignoring spurious overflow "
65 "on cpu %u\n", cpu);
66}
67
68/*
69 * Called by op_arm_setup to create perf attributes to mirror the oprofile
70 * settings in counter_config. Attributes are created as `pinned' events and
71 * so are permanently scheduled on the PMU.
72 */
73static void op_perf_setup(void)
74{
75 int i;
76 u32 size = sizeof(struct perf_event_attr);
77 struct perf_event_attr *attr;
78
79 for (i = 0; i < perf_num_counters; ++i) {
80 attr = &counter_config[i].attr;
81 memset(attr, 0, size);
82 attr->type = PERF_TYPE_RAW;
83 attr->size = size;
84 attr->config = counter_config[i].event;
85 attr->sample_period = counter_config[i].count;
86 attr->pinned = 1;
87 }
88}
89
90static int op_create_counter(int cpu, int event)
91{
92 int ret = 0;
93 struct perf_event *pevent;
94
95 if (!counter_config[event].enabled || (perf_events[cpu][event] != NULL))
96 return ret;
97
98 pevent = perf_event_create_kernel_counter(&counter_config[event].attr,
99 cpu, -1,
100 op_overflow_handler);
101
102 if (IS_ERR(pevent)) {
103 ret = PTR_ERR(pevent);
104 } else if (pevent->state != PERF_EVENT_STATE_ACTIVE) {
105 perf_event_release_kernel(pevent);
106 pr_warning("oprofile: failed to enable event %d "
107 "on CPU %d\n", event, cpu);
108 ret = -EBUSY;
109 } else {
110 perf_events[cpu][event] = pevent;
111 }
112
113 return ret;
114}
115 31
116static void op_destroy_counter(int cpu, int event)
117{
118 struct perf_event *pevent = perf_events[cpu][event];
119
120 if (pevent) {
121 perf_event_release_kernel(pevent);
122 perf_events[cpu][event] = NULL;
123 }
124}
125
126/*
127 * Called by op_arm_start to create active perf events based on the
128 * perviously configured attributes.
129 */
130static int op_perf_start(void)
131{
132 int cpu, event, ret = 0;
133
134 for_each_online_cpu(cpu) {
135 for (event = 0; event < perf_num_counters; ++event) {
136 ret = op_create_counter(cpu, event);
137 if (ret)
138 goto out;
139 }
140 }
141
142out:
143 return ret;
144}
145
146/*
147 * Called by op_arm_stop at the end of a profiling run.
148 */
149static void op_perf_stop(void)
150{
151 int cpu, event;
152
153 for_each_online_cpu(cpu)
154 for (event = 0; event < perf_num_counters; ++event)
155 op_destroy_counter(cpu, event);
156}
157
158
159static char *op_name_from_perf_id(enum arm_perf_pmu_ids id)
160{
161 switch (id) { 32 switch (id) {
162 case ARM_PERF_PMU_ID_XSCALE1: 33 case ARM_PERF_PMU_ID_XSCALE1:
163 return "arm/xscale1"; 34 return "arm/xscale1";
@@ -176,116 +47,6 @@ static char *op_name_from_perf_id(enum arm_perf_pmu_ids id)
176 } 47 }
177} 48}
178 49
179static int op_arm_create_files(struct super_block *sb, struct dentry *root)
180{
181 unsigned int i;
182
183 for (i = 0; i < perf_num_counters; i++) {
184 struct dentry *dir;
185 char buf[4];
186
187 snprintf(buf, sizeof buf, "%d", i);
188 dir = oprofilefs_mkdir(sb, root, buf);
189 oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
190 oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
191 oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count);
192 oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);
193 oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);
194 oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
195 }
196
197 return 0;
198}
199
200static int op_arm_setup(void)
201{
202 spin_lock(&oprofilefs_lock);
203 op_perf_setup();
204 spin_unlock(&oprofilefs_lock);
205 return 0;
206}
207
208static int op_arm_start(void)
209{
210 int ret = -EBUSY;
211
212 mutex_lock(&op_arm_mutex);
213 if (!op_arm_enabled) {
214 ret = 0;
215 op_perf_start();
216 op_arm_enabled = 1;
217 }
218 mutex_unlock(&op_arm_mutex);
219 return ret;
220}
221
222static void op_arm_stop(void)
223{
224 mutex_lock(&op_arm_mutex);
225 if (op_arm_enabled)
226 op_perf_stop();
227 op_arm_enabled = 0;
228 mutex_unlock(&op_arm_mutex);
229}
230
231#ifdef CONFIG_PM
232static int op_arm_suspend(struct platform_device *dev, pm_message_t state)
233{
234 mutex_lock(&op_arm_mutex);
235 if (op_arm_enabled)
236 op_perf_stop();
237 mutex_unlock(&op_arm_mutex);
238 return 0;
239}
240
241static int op_arm_resume(struct platform_device *dev)
242{
243 mutex_lock(&op_arm_mutex);
244 if (op_arm_enabled && op_perf_start())
245 op_arm_enabled = 0;
246 mutex_unlock(&op_arm_mutex);
247 return 0;
248}
249
250static struct platform_driver oprofile_driver = {
251 .driver = {
252 .name = "arm-oprofile",
253 },
254 .resume = op_arm_resume,
255 .suspend = op_arm_suspend,
256};
257
258static struct platform_device *oprofile_pdev;
259
260static int __init init_driverfs(void)
261{
262 int ret;
263
264 ret = platform_driver_register(&oprofile_driver);
265 if (ret)
266 goto out;
267
268 oprofile_pdev = platform_device_register_simple(
269 oprofile_driver.driver.name, 0, NULL, 0);
270 if (IS_ERR(oprofile_pdev)) {
271 ret = PTR_ERR(oprofile_pdev);
272 platform_driver_unregister(&oprofile_driver);
273 }
274
275out:
276 return ret;
277}
278
279static void exit_driverfs(void)
280{
281 platform_device_unregister(oprofile_pdev);
282 platform_driver_unregister(&oprofile_driver);
283}
284#else
285static int __init init_driverfs(void) { return 0; }
286#define exit_driverfs() do { } while (0)
287#endif /* CONFIG_PM */
288
289static int report_trace(struct stackframe *frame, void *d) 50static int report_trace(struct stackframe *frame, void *d)
290{ 51{
291 unsigned int *depth = d; 52 unsigned int *depth = d;
@@ -350,74 +111,14 @@ static void arm_backtrace(struct pt_regs * const regs, unsigned int depth)
350 111
351int __init oprofile_arch_init(struct oprofile_operations *ops) 112int __init oprofile_arch_init(struct oprofile_operations *ops)
352{ 113{
353 int cpu, ret = 0;
354
355 perf_num_counters = armpmu_get_max_events();
356
357 counter_config = kcalloc(perf_num_counters,
358 sizeof(struct op_counter_config), GFP_KERNEL);
359
360 if (!counter_config) {
361 pr_info("oprofile: failed to allocate %d "
362 "counters\n", perf_num_counters);
363 return -ENOMEM;
364 }
365
366 ret = init_driverfs();
367 if (ret) {
368 kfree(counter_config);
369 counter_config = NULL;
370 return ret;
371 }
372
373 for_each_possible_cpu(cpu) {
374 perf_events[cpu] = kcalloc(perf_num_counters,
375 sizeof(struct perf_event *), GFP_KERNEL);
376 if (!perf_events[cpu]) {
377 pr_info("oprofile: failed to allocate %d perf events "
378 "for cpu %d\n", perf_num_counters, cpu);
379 while (--cpu >= 0)
380 kfree(perf_events[cpu]);
381 return -ENOMEM;
382 }
383 }
384
385 ops->backtrace = arm_backtrace; 114 ops->backtrace = arm_backtrace;
386 ops->create_files = op_arm_create_files;
387 ops->setup = op_arm_setup;
388 ops->start = op_arm_start;
389 ops->stop = op_arm_stop;
390 ops->shutdown = op_arm_stop;
391 ops->cpu_type = op_name_from_perf_id(armpmu_get_pmu_id());
392
393 if (!ops->cpu_type)
394 ret = -ENODEV;
395 else
396 pr_info("oprofile: using %s\n", ops->cpu_type);
397 115
398 return ret; 116 return oprofile_perf_init(ops);
399} 117}
400 118
401void oprofile_arch_exit(void) 119void __exit oprofile_arch_exit(void)
402{ 120{
403 int cpu, id; 121 oprofile_perf_exit();
404 struct perf_event *event;
405
406 if (*perf_events) {
407 for_each_possible_cpu(cpu) {
408 for (id = 0; id < perf_num_counters; ++id) {
409 event = perf_events[cpu][id];
410 if (event != NULL)
411 perf_event_release_kernel(event);
412 }
413 kfree(perf_events[cpu]);
414 }
415 }
416
417 if (counter_config) {
418 kfree(counter_config);
419 exit_driverfs();
420 }
421} 122}
422#else 123#else
423int __init oprofile_arch_init(struct oprofile_operations *ops) 124int __init oprofile_arch_init(struct oprofile_operations *ops)
@@ -425,5 +126,5 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
425 pr_info("oprofile: hardware counters not available\n"); 126 pr_info("oprofile: hardware counters not available\n");
426 return -ENODEV; 127 return -ENODEV;
427} 128}
428void oprofile_arch_exit(void) {} 129void __exit oprofile_arch_exit(void) {}
429#endif /* CONFIG_HW_PERF_EVENTS */ 130#endif /* CONFIG_HW_PERF_EVENTS */
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index 16399bd24993..0f2417df6323 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -7,6 +7,7 @@ config FRV
7 default y 7 default y
8 select HAVE_IDE 8 select HAVE_IDE
9 select HAVE_ARCH_TRACEHOOK 9 select HAVE_ARCH_TRACEHOOK
10 select HAVE_IRQ_WORK
10 select HAVE_PERF_EVENTS 11 select HAVE_PERF_EVENTS
11 12
12config ZONE_DMA 13config ZONE_DMA
diff --git a/arch/frv/lib/Makefile b/arch/frv/lib/Makefile
index f4709756d0d9..4ff2fb1e6b16 100644
--- a/arch/frv/lib/Makefile
+++ b/arch/frv/lib/Makefile
@@ -5,4 +5,4 @@
5lib-y := \ 5lib-y := \
6 __ashldi3.o __lshrdi3.o __muldi3.o __ashrdi3.o __negdi2.o __ucmpdi2.o \ 6 __ashldi3.o __lshrdi3.o __muldi3.o __ashrdi3.o __negdi2.o __ucmpdi2.o \
7 checksum.o memcpy.o memset.o atomic-ops.o atomic64-ops.o \ 7 checksum.o memcpy.o memset.o atomic-ops.o atomic64-ops.o \
8 outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o perf_event.o 8 outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o
diff --git a/arch/frv/lib/perf_event.c b/arch/frv/lib/perf_event.c
deleted file mode 100644
index 9ac5acfd2e91..000000000000
--- a/arch/frv/lib/perf_event.c
+++ /dev/null
@@ -1,19 +0,0 @@
1/* Performance event handling
2 *
3 * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11
12#include <linux/perf_event.h>
13
14/*
15 * mark the performance event as pending
16 */
17void set_perf_event_pending(void)
18{
19}
diff --git a/arch/ia64/include/asm/hardirq.h b/arch/ia64/include/asm/hardirq.h
index d514cd9edb49..8fb7d33a661f 100644
--- a/arch/ia64/include/asm/hardirq.h
+++ b/arch/ia64/include/asm/hardirq.h
@@ -6,12 +6,6 @@
6 * David Mosberger-Tang <davidm@hpl.hp.com> 6 * David Mosberger-Tang <davidm@hpl.hp.com>
7 */ 7 */
8 8
9
10#include <linux/threads.h>
11#include <linux/irq.h>
12
13#include <asm/processor.h>
14
15/* 9/*
16 * No irq_cpustat_t for IA-64. The data is held in the per-CPU data structure. 10 * No irq_cpustat_t for IA-64. The data is held in the per-CPU data structure.
17 */ 11 */
@@ -20,6 +14,11 @@
20 14
21#define local_softirq_pending() (local_cpu_data->softirq_pending) 15#define local_softirq_pending() (local_cpu_data->softirq_pending)
22 16
17#include <linux/threads.h>
18#include <linux/irq.h>
19
20#include <asm/processor.h>
21
23extern void __iomem *ipi_base_addr; 22extern void __iomem *ipi_base_addr;
24 23
25void ack_bad_irq(unsigned int irq); 24void ack_bad_irq(unsigned int irq);
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 907417d187e1..79a04a9394d5 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -16,6 +16,7 @@ config PARISC
16 select RTC_DRV_GENERIC 16 select RTC_DRV_GENERIC
17 select INIT_ALL_POSSIBLE 17 select INIT_ALL_POSSIBLE
18 select BUG 18 select BUG
19 select HAVE_IRQ_WORK
19 select HAVE_PERF_EVENTS 20 select HAVE_PERF_EVENTS
20 select GENERIC_ATOMIC64 if !64BIT 21 select GENERIC_ATOMIC64 if !64BIT
21 help 22 help
diff --git a/arch/parisc/include/asm/perf_event.h b/arch/parisc/include/asm/perf_event.h
index cc146427d8f9..1e0fd8ba6c03 100644
--- a/arch/parisc/include/asm/perf_event.h
+++ b/arch/parisc/include/asm/perf_event.h
@@ -1,7 +1,6 @@
1#ifndef __ASM_PARISC_PERF_EVENT_H 1#ifndef __ASM_PARISC_PERF_EVENT_H
2#define __ASM_PARISC_PERF_EVENT_H 2#define __ASM_PARISC_PERF_EVENT_H
3 3
4/* parisc only supports software events through this interface. */ 4/* Empty, just to avoid compiling error */
5static inline void set_perf_event_pending(void) { }
6 5
7#endif /* __ASM_PARISC_PERF_EVENT_H */ 6#endif /* __ASM_PARISC_PERF_EVENT_H */
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 631e5a0fb6ab..4b1e521d966f 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -138,6 +138,7 @@ config PPC
138 select HAVE_OPROFILE 138 select HAVE_OPROFILE
139 select HAVE_SYSCALL_WRAPPERS if PPC64 139 select HAVE_SYSCALL_WRAPPERS if PPC64
140 select GENERIC_ATOMIC64 if PPC32 140 select GENERIC_ATOMIC64 if PPC32
141 select HAVE_IRQ_WORK
141 select HAVE_PERF_EVENTS 142 select HAVE_PERF_EVENTS
142 select HAVE_REGS_AND_STACK_ACCESS_API 143 select HAVE_REGS_AND_STACK_ACCESS_API
143 select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64 144 select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 1ff6662f7faf..9b287fdd8ea3 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -129,7 +129,7 @@ struct paca_struct {
129 u8 soft_enabled; /* irq soft-enable flag */ 129 u8 soft_enabled; /* irq soft-enable flag */
130 u8 hard_enabled; /* set if irqs are enabled in MSR */ 130 u8 hard_enabled; /* set if irqs are enabled in MSR */
131 u8 io_sync; /* writel() needs spin_unlock sync */ 131 u8 io_sync; /* writel() needs spin_unlock sync */
132 u8 perf_event_pending; /* PM interrupt while soft-disabled */ 132 u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */
133 133
134 /* Stuff for accurate time accounting */ 134 /* Stuff for accurate time accounting */
135 u64 user_time; /* accumulated usermode TB ticks */ 135 u64 user_time; /* accumulated usermode TB ticks */
diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c
index 95ad9dad298e..d05ae4204bbf 100644
--- a/arch/powerpc/kernel/perf_callchain.c
+++ b/arch/powerpc/kernel/perf_callchain.c
@@ -23,18 +23,6 @@
23#include "ppc32.h" 23#include "ppc32.h"
24#endif 24#endif
25 25
26/*
27 * Store another value in a callchain_entry.
28 */
29static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip)
30{
31 unsigned int nr = entry->nr;
32
33 if (nr < PERF_MAX_STACK_DEPTH) {
34 entry->ip[nr] = ip;
35 entry->nr = nr + 1;
36 }
37}
38 26
39/* 27/*
40 * Is sp valid as the address of the next kernel stack frame after prev_sp? 28 * Is sp valid as the address of the next kernel stack frame after prev_sp?
@@ -58,8 +46,8 @@ static int valid_next_sp(unsigned long sp, unsigned long prev_sp)
58 return 0; 46 return 0;
59} 47}
60 48
61static void perf_callchain_kernel(struct pt_regs *regs, 49void
62 struct perf_callchain_entry *entry) 50perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
63{ 51{
64 unsigned long sp, next_sp; 52 unsigned long sp, next_sp;
65 unsigned long next_ip; 53 unsigned long next_ip;
@@ -69,8 +57,7 @@ static void perf_callchain_kernel(struct pt_regs *regs,
69 57
70 lr = regs->link; 58 lr = regs->link;
71 sp = regs->gpr[1]; 59 sp = regs->gpr[1];
72 callchain_store(entry, PERF_CONTEXT_KERNEL); 60 perf_callchain_store(entry, regs->nip);
73 callchain_store(entry, regs->nip);
74 61
75 if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) 62 if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD))
76 return; 63 return;
@@ -89,7 +76,7 @@ static void perf_callchain_kernel(struct pt_regs *regs,
89 next_ip = regs->nip; 76 next_ip = regs->nip;
90 lr = regs->link; 77 lr = regs->link;
91 level = 0; 78 level = 0;
92 callchain_store(entry, PERF_CONTEXT_KERNEL); 79 perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
93 80
94 } else { 81 } else {
95 if (level == 0) 82 if (level == 0)
@@ -111,7 +98,7 @@ static void perf_callchain_kernel(struct pt_regs *regs,
111 ++level; 98 ++level;
112 } 99 }
113 100
114 callchain_store(entry, next_ip); 101 perf_callchain_store(entry, next_ip);
115 if (!valid_next_sp(next_sp, sp)) 102 if (!valid_next_sp(next_sp, sp))
116 return; 103 return;
117 sp = next_sp; 104 sp = next_sp;
@@ -233,8 +220,8 @@ static int sane_signal_64_frame(unsigned long sp)
233 puc == (unsigned long) &sf->uc; 220 puc == (unsigned long) &sf->uc;
234} 221}
235 222
236static void perf_callchain_user_64(struct pt_regs *regs, 223static void perf_callchain_user_64(struct perf_callchain_entry *entry,
237 struct perf_callchain_entry *entry) 224 struct pt_regs *regs)
238{ 225{
239 unsigned long sp, next_sp; 226 unsigned long sp, next_sp;
240 unsigned long next_ip; 227 unsigned long next_ip;
@@ -246,8 +233,7 @@ static void perf_callchain_user_64(struct pt_regs *regs,
246 next_ip = regs->nip; 233 next_ip = regs->nip;
247 lr = regs->link; 234 lr = regs->link;
248 sp = regs->gpr[1]; 235 sp = regs->gpr[1];
249 callchain_store(entry, PERF_CONTEXT_USER); 236 perf_callchain_store(entry, next_ip);
250 callchain_store(entry, next_ip);
251 237
252 for (;;) { 238 for (;;) {
253 fp = (unsigned long __user *) sp; 239 fp = (unsigned long __user *) sp;
@@ -276,14 +262,14 @@ static void perf_callchain_user_64(struct pt_regs *regs,
276 read_user_stack_64(&uregs[PT_R1], &sp)) 262 read_user_stack_64(&uregs[PT_R1], &sp))
277 return; 263 return;
278 level = 0; 264 level = 0;
279 callchain_store(entry, PERF_CONTEXT_USER); 265 perf_callchain_store(entry, PERF_CONTEXT_USER);
280 callchain_store(entry, next_ip); 266 perf_callchain_store(entry, next_ip);
281 continue; 267 continue;
282 } 268 }
283 269
284 if (level == 0) 270 if (level == 0)
285 next_ip = lr; 271 next_ip = lr;
286 callchain_store(entry, next_ip); 272 perf_callchain_store(entry, next_ip);
287 ++level; 273 ++level;
288 sp = next_sp; 274 sp = next_sp;
289 } 275 }
@@ -315,8 +301,8 @@ static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
315 return __get_user_inatomic(*ret, ptr); 301 return __get_user_inatomic(*ret, ptr);
316} 302}
317 303
318static inline void perf_callchain_user_64(struct pt_regs *regs, 304static inline void perf_callchain_user_64(struct perf_callchain_entry *entry,
319 struct perf_callchain_entry *entry) 305 struct pt_regs *regs)
320{ 306{
321} 307}
322 308
@@ -435,8 +421,8 @@ static unsigned int __user *signal_frame_32_regs(unsigned int sp,
435 return mctx->mc_gregs; 421 return mctx->mc_gregs;
436} 422}
437 423
438static void perf_callchain_user_32(struct pt_regs *regs, 424static void perf_callchain_user_32(struct perf_callchain_entry *entry,
439 struct perf_callchain_entry *entry) 425 struct pt_regs *regs)
440{ 426{
441 unsigned int sp, next_sp; 427 unsigned int sp, next_sp;
442 unsigned int next_ip; 428 unsigned int next_ip;
@@ -447,8 +433,7 @@ static void perf_callchain_user_32(struct pt_regs *regs,
447 next_ip = regs->nip; 433 next_ip = regs->nip;
448 lr = regs->link; 434 lr = regs->link;
449 sp = regs->gpr[1]; 435 sp = regs->gpr[1];
450 callchain_store(entry, PERF_CONTEXT_USER); 436 perf_callchain_store(entry, next_ip);
451 callchain_store(entry, next_ip);
452 437
453 while (entry->nr < PERF_MAX_STACK_DEPTH) { 438 while (entry->nr < PERF_MAX_STACK_DEPTH) {
454 fp = (unsigned int __user *) (unsigned long) sp; 439 fp = (unsigned int __user *) (unsigned long) sp;
@@ -470,45 +455,24 @@ static void perf_callchain_user_32(struct pt_regs *regs,
470 read_user_stack_32(&uregs[PT_R1], &sp)) 455 read_user_stack_32(&uregs[PT_R1], &sp))
471 return; 456 return;
472 level = 0; 457 level = 0;
473 callchain_store(entry, PERF_CONTEXT_USER); 458 perf_callchain_store(entry, PERF_CONTEXT_USER);
474 callchain_store(entry, next_ip); 459 perf_callchain_store(entry, next_ip);
475 continue; 460 continue;
476 } 461 }
477 462
478 if (level == 0) 463 if (level == 0)
479 next_ip = lr; 464 next_ip = lr;
480 callchain_store(entry, next_ip); 465 perf_callchain_store(entry, next_ip);
481 ++level; 466 ++level;
482 sp = next_sp; 467 sp = next_sp;
483 } 468 }
484} 469}
485 470
486/* 471void
487 * Since we can't get PMU interrupts inside a PMU interrupt handler, 472perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
488 * we don't need separate irq and nmi entries here.
489 */
490static DEFINE_PER_CPU(struct perf_callchain_entry, cpu_perf_callchain);
491
492struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
493{ 473{
494 struct perf_callchain_entry *entry = &__get_cpu_var(cpu_perf_callchain); 474 if (current_is_64bit())
495 475 perf_callchain_user_64(entry, regs);
496 entry->nr = 0; 476 else
497 477 perf_callchain_user_32(entry, regs);
498 if (!user_mode(regs)) {
499 perf_callchain_kernel(regs, entry);
500 if (current->mm)
501 regs = task_pt_regs(current);
502 else
503 regs = NULL;
504 }
505
506 if (regs) {
507 if (current_is_64bit())
508 perf_callchain_user_64(regs, entry);
509 else
510 perf_callchain_user_32(regs, entry);
511 }
512
513 return entry;
514} 478}
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index d301a30445e0..3129c855933c 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -402,6 +402,9 @@ static void power_pmu_read(struct perf_event *event)
402{ 402{
403 s64 val, delta, prev; 403 s64 val, delta, prev;
404 404
405 if (event->hw.state & PERF_HES_STOPPED)
406 return;
407
405 if (!event->hw.idx) 408 if (!event->hw.idx)
406 return; 409 return;
407 /* 410 /*
@@ -517,7 +520,7 @@ static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0)
517 * Disable all events to prevent PMU interrupts and to allow 520 * Disable all events to prevent PMU interrupts and to allow
518 * events to be added or removed. 521 * events to be added or removed.
519 */ 522 */
520void hw_perf_disable(void) 523static void power_pmu_disable(struct pmu *pmu)
521{ 524{
522 struct cpu_hw_events *cpuhw; 525 struct cpu_hw_events *cpuhw;
523 unsigned long flags; 526 unsigned long flags;
@@ -565,7 +568,7 @@ void hw_perf_disable(void)
565 * If we were previously disabled and events were added, then 568 * If we were previously disabled and events were added, then
566 * put the new config on the PMU. 569 * put the new config on the PMU.
567 */ 570 */
568void hw_perf_enable(void) 571static void power_pmu_enable(struct pmu *pmu)
569{ 572{
570 struct perf_event *event; 573 struct perf_event *event;
571 struct cpu_hw_events *cpuhw; 574 struct cpu_hw_events *cpuhw;
@@ -672,6 +675,8 @@ void hw_perf_enable(void)
672 } 675 }
673 local64_set(&event->hw.prev_count, val); 676 local64_set(&event->hw.prev_count, val);
674 event->hw.idx = idx; 677 event->hw.idx = idx;
678 if (event->hw.state & PERF_HES_STOPPED)
679 val = 0;
675 write_pmc(idx, val); 680 write_pmc(idx, val);
676 perf_event_update_userpage(event); 681 perf_event_update_userpage(event);
677 } 682 }
@@ -727,7 +732,7 @@ static int collect_events(struct perf_event *group, int max_count,
727 * re-enable the PMU in order to get hw_perf_enable to do the 732 * re-enable the PMU in order to get hw_perf_enable to do the
728 * actual work of reconfiguring the PMU. 733 * actual work of reconfiguring the PMU.
729 */ 734 */
730static int power_pmu_enable(struct perf_event *event) 735static int power_pmu_add(struct perf_event *event, int ef_flags)
731{ 736{
732 struct cpu_hw_events *cpuhw; 737 struct cpu_hw_events *cpuhw;
733 unsigned long flags; 738 unsigned long flags;
@@ -735,7 +740,7 @@ static int power_pmu_enable(struct perf_event *event)
735 int ret = -EAGAIN; 740 int ret = -EAGAIN;
736 741
737 local_irq_save(flags); 742 local_irq_save(flags);
738 perf_disable(); 743 perf_pmu_disable(event->pmu);
739 744
740 /* 745 /*
741 * Add the event to the list (if there is room) 746 * Add the event to the list (if there is room)
@@ -749,6 +754,9 @@ static int power_pmu_enable(struct perf_event *event)
749 cpuhw->events[n0] = event->hw.config; 754 cpuhw->events[n0] = event->hw.config;
750 cpuhw->flags[n0] = event->hw.event_base; 755 cpuhw->flags[n0] = event->hw.event_base;
751 756
757 if (!(ef_flags & PERF_EF_START))
758 event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
759
752 /* 760 /*
753 * If group events scheduling transaction was started, 761 * If group events scheduling transaction was started,
754 * skip the schedulability test here, it will be peformed 762 * skip the schedulability test here, it will be peformed
@@ -769,7 +777,7 @@ nocheck:
769 777
770 ret = 0; 778 ret = 0;
771 out: 779 out:
772 perf_enable(); 780 perf_pmu_enable(event->pmu);
773 local_irq_restore(flags); 781 local_irq_restore(flags);
774 return ret; 782 return ret;
775} 783}
@@ -777,14 +785,14 @@ nocheck:
777/* 785/*
778 * Remove a event from the PMU. 786 * Remove a event from the PMU.
779 */ 787 */
780static void power_pmu_disable(struct perf_event *event) 788static void power_pmu_del(struct perf_event *event, int ef_flags)
781{ 789{
782 struct cpu_hw_events *cpuhw; 790 struct cpu_hw_events *cpuhw;
783 long i; 791 long i;
784 unsigned long flags; 792 unsigned long flags;
785 793
786 local_irq_save(flags); 794 local_irq_save(flags);
787 perf_disable(); 795 perf_pmu_disable(event->pmu);
788 796
789 power_pmu_read(event); 797 power_pmu_read(event);
790 798
@@ -821,34 +829,60 @@ static void power_pmu_disable(struct perf_event *event)
821 cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); 829 cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
822 } 830 }
823 831
824 perf_enable(); 832 perf_pmu_enable(event->pmu);
825 local_irq_restore(flags); 833 local_irq_restore(flags);
826} 834}
827 835
828/* 836/*
829 * Re-enable interrupts on a event after they were throttled 837 * POWER-PMU does not support disabling individual counters, hence
830 * because they were coming too fast. 838 * program their cycle counter to their max value and ignore the interrupts.
831 */ 839 */
832static void power_pmu_unthrottle(struct perf_event *event) 840
841static void power_pmu_start(struct perf_event *event, int ef_flags)
842{
843 unsigned long flags;
844 s64 left;
845
846 if (!event->hw.idx || !event->hw.sample_period)
847 return;
848
849 if (!(event->hw.state & PERF_HES_STOPPED))
850 return;
851
852 if (ef_flags & PERF_EF_RELOAD)
853 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
854
855 local_irq_save(flags);
856 perf_pmu_disable(event->pmu);
857
858 event->hw.state = 0;
859 left = local64_read(&event->hw.period_left);
860 write_pmc(event->hw.idx, left);
861
862 perf_event_update_userpage(event);
863 perf_pmu_enable(event->pmu);
864 local_irq_restore(flags);
865}
866
867static void power_pmu_stop(struct perf_event *event, int ef_flags)
833{ 868{
834 s64 val, left;
835 unsigned long flags; 869 unsigned long flags;
836 870
837 if (!event->hw.idx || !event->hw.sample_period) 871 if (!event->hw.idx || !event->hw.sample_period)
838 return; 872 return;
873
874 if (event->hw.state & PERF_HES_STOPPED)
875 return;
876
839 local_irq_save(flags); 877 local_irq_save(flags);
840 perf_disable(); 878 perf_pmu_disable(event->pmu);
879
841 power_pmu_read(event); 880 power_pmu_read(event);
842 left = event->hw.sample_period; 881 event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
843 event->hw.last_period = left; 882 write_pmc(event->hw.idx, 0);
844 val = 0; 883
845 if (left < 0x80000000L)
846 val = 0x80000000L - left;
847 write_pmc(event->hw.idx, val);
848 local64_set(&event->hw.prev_count, val);
849 local64_set(&event->hw.period_left, left);
850 perf_event_update_userpage(event); 884 perf_event_update_userpage(event);
851 perf_enable(); 885 perf_pmu_enable(event->pmu);
852 local_irq_restore(flags); 886 local_irq_restore(flags);
853} 887}
854 888
@@ -857,10 +891,11 @@ static void power_pmu_unthrottle(struct perf_event *event)
857 * Set the flag to make pmu::enable() not perform the 891 * Set the flag to make pmu::enable() not perform the
858 * schedulability test, it will be performed at commit time 892 * schedulability test, it will be performed at commit time
859 */ 893 */
860void power_pmu_start_txn(const struct pmu *pmu) 894void power_pmu_start_txn(struct pmu *pmu)
861{ 895{
862 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 896 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
863 897
898 perf_pmu_disable(pmu);
864 cpuhw->group_flag |= PERF_EVENT_TXN; 899 cpuhw->group_flag |= PERF_EVENT_TXN;
865 cpuhw->n_txn_start = cpuhw->n_events; 900 cpuhw->n_txn_start = cpuhw->n_events;
866} 901}
@@ -870,11 +905,12 @@ void power_pmu_start_txn(const struct pmu *pmu)
870 * Clear the flag and pmu::enable() will perform the 905 * Clear the flag and pmu::enable() will perform the
871 * schedulability test. 906 * schedulability test.
872 */ 907 */
873void power_pmu_cancel_txn(const struct pmu *pmu) 908void power_pmu_cancel_txn(struct pmu *pmu)
874{ 909{
875 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 910 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
876 911
877 cpuhw->group_flag &= ~PERF_EVENT_TXN; 912 cpuhw->group_flag &= ~PERF_EVENT_TXN;
913 perf_pmu_enable(pmu);
878} 914}
879 915
880/* 916/*
@@ -882,7 +918,7 @@ void power_pmu_cancel_txn(const struct pmu *pmu)
882 * Perform the group schedulability test as a whole 918 * Perform the group schedulability test as a whole
883 * Return 0 if success 919 * Return 0 if success
884 */ 920 */
885int power_pmu_commit_txn(const struct pmu *pmu) 921int power_pmu_commit_txn(struct pmu *pmu)
886{ 922{
887 struct cpu_hw_events *cpuhw; 923 struct cpu_hw_events *cpuhw;
888 long i, n; 924 long i, n;
@@ -901,19 +937,10 @@ int power_pmu_commit_txn(const struct pmu *pmu)
901 cpuhw->event[i]->hw.config = cpuhw->events[i]; 937 cpuhw->event[i]->hw.config = cpuhw->events[i];
902 938
903 cpuhw->group_flag &= ~PERF_EVENT_TXN; 939 cpuhw->group_flag &= ~PERF_EVENT_TXN;
940 perf_pmu_enable(pmu);
904 return 0; 941 return 0;
905} 942}
906 943
907struct pmu power_pmu = {
908 .enable = power_pmu_enable,
909 .disable = power_pmu_disable,
910 .read = power_pmu_read,
911 .unthrottle = power_pmu_unthrottle,
912 .start_txn = power_pmu_start_txn,
913 .cancel_txn = power_pmu_cancel_txn,
914 .commit_txn = power_pmu_commit_txn,
915};
916
917/* 944/*
918 * Return 1 if we might be able to put event on a limited PMC, 945 * Return 1 if we might be able to put event on a limited PMC,
919 * or 0 if not. 946 * or 0 if not.
@@ -1014,7 +1041,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp)
1014 return 0; 1041 return 0;
1015} 1042}
1016 1043
1017const struct pmu *hw_perf_event_init(struct perf_event *event) 1044static int power_pmu_event_init(struct perf_event *event)
1018{ 1045{
1019 u64 ev; 1046 u64 ev;
1020 unsigned long flags; 1047 unsigned long flags;
@@ -1026,25 +1053,27 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
1026 struct cpu_hw_events *cpuhw; 1053 struct cpu_hw_events *cpuhw;
1027 1054
1028 if (!ppmu) 1055 if (!ppmu)
1029 return ERR_PTR(-ENXIO); 1056 return -ENOENT;
1057
1030 switch (event->attr.type) { 1058 switch (event->attr.type) {
1031 case PERF_TYPE_HARDWARE: 1059 case PERF_TYPE_HARDWARE:
1032 ev = event->attr.config; 1060 ev = event->attr.config;
1033 if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) 1061 if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
1034 return ERR_PTR(-EOPNOTSUPP); 1062 return -EOPNOTSUPP;
1035 ev = ppmu->generic_events[ev]; 1063 ev = ppmu->generic_events[ev];
1036 break; 1064 break;
1037 case PERF_TYPE_HW_CACHE: 1065 case PERF_TYPE_HW_CACHE:
1038 err = hw_perf_cache_event(event->attr.config, &ev); 1066 err = hw_perf_cache_event(event->attr.config, &ev);
1039 if (err) 1067 if (err)
1040 return ERR_PTR(err); 1068 return err;
1041 break; 1069 break;
1042 case PERF_TYPE_RAW: 1070 case PERF_TYPE_RAW:
1043 ev = event->attr.config; 1071 ev = event->attr.config;
1044 break; 1072 break;
1045 default: 1073 default:
1046 return ERR_PTR(-EINVAL); 1074 return -ENOENT;
1047 } 1075 }
1076
1048 event->hw.config_base = ev; 1077 event->hw.config_base = ev;
1049 event->hw.idx = 0; 1078 event->hw.idx = 0;
1050 1079
@@ -1063,7 +1092,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
1063 * XXX we should check if the task is an idle task. 1092 * XXX we should check if the task is an idle task.
1064 */ 1093 */
1065 flags = 0; 1094 flags = 0;
1066 if (event->ctx->task) 1095 if (event->attach_state & PERF_ATTACH_TASK)
1067 flags |= PPMU_ONLY_COUNT_RUN; 1096 flags |= PPMU_ONLY_COUNT_RUN;
1068 1097
1069 /* 1098 /*
@@ -1081,7 +1110,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
1081 */ 1110 */
1082 ev = normal_pmc_alternative(ev, flags); 1111 ev = normal_pmc_alternative(ev, flags);
1083 if (!ev) 1112 if (!ev)
1084 return ERR_PTR(-EINVAL); 1113 return -EINVAL;
1085 } 1114 }
1086 } 1115 }
1087 1116
@@ -1095,19 +1124,19 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
1095 n = collect_events(event->group_leader, ppmu->n_counter - 1, 1124 n = collect_events(event->group_leader, ppmu->n_counter - 1,
1096 ctrs, events, cflags); 1125 ctrs, events, cflags);
1097 if (n < 0) 1126 if (n < 0)
1098 return ERR_PTR(-EINVAL); 1127 return -EINVAL;
1099 } 1128 }
1100 events[n] = ev; 1129 events[n] = ev;
1101 ctrs[n] = event; 1130 ctrs[n] = event;
1102 cflags[n] = flags; 1131 cflags[n] = flags;
1103 if (check_excludes(ctrs, cflags, n, 1)) 1132 if (check_excludes(ctrs, cflags, n, 1))
1104 return ERR_PTR(-EINVAL); 1133 return -EINVAL;
1105 1134
1106 cpuhw = &get_cpu_var(cpu_hw_events); 1135 cpuhw = &get_cpu_var(cpu_hw_events);
1107 err = power_check_constraints(cpuhw, events, cflags, n + 1); 1136 err = power_check_constraints(cpuhw, events, cflags, n + 1);
1108 put_cpu_var(cpu_hw_events); 1137 put_cpu_var(cpu_hw_events);
1109 if (err) 1138 if (err)
1110 return ERR_PTR(-EINVAL); 1139 return -EINVAL;
1111 1140
1112 event->hw.config = events[n]; 1141 event->hw.config = events[n];
1113 event->hw.event_base = cflags[n]; 1142 event->hw.event_base = cflags[n];
@@ -1132,11 +1161,23 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
1132 } 1161 }
1133 event->destroy = hw_perf_event_destroy; 1162 event->destroy = hw_perf_event_destroy;
1134 1163
1135 if (err) 1164 return err;
1136 return ERR_PTR(err);
1137 return &power_pmu;
1138} 1165}
1139 1166
1167struct pmu power_pmu = {
1168 .pmu_enable = power_pmu_enable,
1169 .pmu_disable = power_pmu_disable,
1170 .event_init = power_pmu_event_init,
1171 .add = power_pmu_add,
1172 .del = power_pmu_del,
1173 .start = power_pmu_start,
1174 .stop = power_pmu_stop,
1175 .read = power_pmu_read,
1176 .start_txn = power_pmu_start_txn,
1177 .cancel_txn = power_pmu_cancel_txn,
1178 .commit_txn = power_pmu_commit_txn,
1179};
1180
1140/* 1181/*
1141 * A counter has overflowed; update its count and record 1182 * A counter has overflowed; update its count and record
1142 * things if requested. Note that interrupts are hard-disabled 1183 * things if requested. Note that interrupts are hard-disabled
@@ -1149,6 +1190,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
1149 s64 prev, delta, left; 1190 s64 prev, delta, left;
1150 int record = 0; 1191 int record = 0;
1151 1192
1193 if (event->hw.state & PERF_HES_STOPPED) {
1194 write_pmc(event->hw.idx, 0);
1195 return;
1196 }
1197
1152 /* we don't have to worry about interrupts here */ 1198 /* we don't have to worry about interrupts here */
1153 prev = local64_read(&event->hw.prev_count); 1199 prev = local64_read(&event->hw.prev_count);
1154 delta = (val - prev) & 0xfffffffful; 1200 delta = (val - prev) & 0xfffffffful;
@@ -1171,6 +1217,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
1171 val = 0x80000000LL - left; 1217 val = 0x80000000LL - left;
1172 } 1218 }
1173 1219
1220 write_pmc(event->hw.idx, val);
1221 local64_set(&event->hw.prev_count, val);
1222 local64_set(&event->hw.period_left, left);
1223 perf_event_update_userpage(event);
1224
1174 /* 1225 /*
1175 * Finally record data if requested. 1226 * Finally record data if requested.
1176 */ 1227 */
@@ -1183,23 +1234,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
1183 if (event->attr.sample_type & PERF_SAMPLE_ADDR) 1234 if (event->attr.sample_type & PERF_SAMPLE_ADDR)
1184 perf_get_data_addr(regs, &data.addr); 1235 perf_get_data_addr(regs, &data.addr);
1185 1236
1186 if (perf_event_overflow(event, nmi, &data, regs)) { 1237 if (perf_event_overflow(event, nmi, &data, regs))
1187 /* 1238 power_pmu_stop(event, 0);
1188 * Interrupts are coming too fast - throttle them
1189 * by setting the event to 0, so it will be
1190 * at least 2^30 cycles until the next interrupt
1191 * (assuming each event counts at most 2 counts
1192 * per cycle).
1193 */
1194 val = 0;
1195 left = ~0ULL >> 1;
1196 }
1197 } 1239 }
1198
1199 write_pmc(event->hw.idx, val);
1200 local64_set(&event->hw.prev_count, val);
1201 local64_set(&event->hw.period_left, left);
1202 perf_event_update_userpage(event);
1203} 1240}
1204 1241
1205/* 1242/*
@@ -1342,6 +1379,7 @@ int register_power_pmu(struct power_pmu *pmu)
1342 freeze_events_kernel = MMCR0_FCHV; 1379 freeze_events_kernel = MMCR0_FCHV;
1343#endif /* CONFIG_PPC64 */ 1380#endif /* CONFIG_PPC64 */
1344 1381
1382 perf_pmu_register(&power_pmu);
1345 perf_cpu_notifier(power_pmu_notifier); 1383 perf_cpu_notifier(power_pmu_notifier);
1346 1384
1347 return 0; 1385 return 0;
diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c
index 1ba45471ae43..7ecca59ddf77 100644
--- a/arch/powerpc/kernel/perf_event_fsl_emb.c
+++ b/arch/powerpc/kernel/perf_event_fsl_emb.c
@@ -156,6 +156,9 @@ static void fsl_emb_pmu_read(struct perf_event *event)
156{ 156{
157 s64 val, delta, prev; 157 s64 val, delta, prev;
158 158
159 if (event->hw.state & PERF_HES_STOPPED)
160 return;
161
159 /* 162 /*
160 * Performance monitor interrupts come even when interrupts 163 * Performance monitor interrupts come even when interrupts
161 * are soft-disabled, as long as interrupts are hard-enabled. 164 * are soft-disabled, as long as interrupts are hard-enabled.
@@ -177,7 +180,7 @@ static void fsl_emb_pmu_read(struct perf_event *event)
177 * Disable all events to prevent PMU interrupts and to allow 180 * Disable all events to prevent PMU interrupts and to allow
178 * events to be added or removed. 181 * events to be added or removed.
179 */ 182 */
180void hw_perf_disable(void) 183static void fsl_emb_pmu_disable(struct pmu *pmu)
181{ 184{
182 struct cpu_hw_events *cpuhw; 185 struct cpu_hw_events *cpuhw;
183 unsigned long flags; 186 unsigned long flags;
@@ -216,7 +219,7 @@ void hw_perf_disable(void)
216 * If we were previously disabled and events were added, then 219 * If we were previously disabled and events were added, then
217 * put the new config on the PMU. 220 * put the new config on the PMU.
218 */ 221 */
219void hw_perf_enable(void) 222static void fsl_emb_pmu_enable(struct pmu *pmu)
220{ 223{
221 struct cpu_hw_events *cpuhw; 224 struct cpu_hw_events *cpuhw;
222 unsigned long flags; 225 unsigned long flags;
@@ -262,8 +265,8 @@ static int collect_events(struct perf_event *group, int max_count,
262 return n; 265 return n;
263} 266}
264 267
265/* perf must be disabled, context locked on entry */ 268/* context locked on entry */
266static int fsl_emb_pmu_enable(struct perf_event *event) 269static int fsl_emb_pmu_add(struct perf_event *event, int flags)
267{ 270{
268 struct cpu_hw_events *cpuhw; 271 struct cpu_hw_events *cpuhw;
269 int ret = -EAGAIN; 272 int ret = -EAGAIN;
@@ -271,6 +274,7 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
271 u64 val; 274 u64 val;
272 int i; 275 int i;
273 276
277 perf_pmu_disable(event->pmu);
274 cpuhw = &get_cpu_var(cpu_hw_events); 278 cpuhw = &get_cpu_var(cpu_hw_events);
275 279
276 if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) 280 if (event->hw.config & FSL_EMB_EVENT_RESTRICTED)
@@ -301,6 +305,12 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
301 val = 0x80000000L - left; 305 val = 0x80000000L - left;
302 } 306 }
303 local64_set(&event->hw.prev_count, val); 307 local64_set(&event->hw.prev_count, val);
308
309 if (!(flags & PERF_EF_START)) {
310 event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
311 val = 0;
312 }
313
304 write_pmc(i, val); 314 write_pmc(i, val);
305 perf_event_update_userpage(event); 315 perf_event_update_userpage(event);
306 316
@@ -310,15 +320,17 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
310 ret = 0; 320 ret = 0;
311 out: 321 out:
312 put_cpu_var(cpu_hw_events); 322 put_cpu_var(cpu_hw_events);
323 perf_pmu_enable(event->pmu);
313 return ret; 324 return ret;
314} 325}
315 326
316/* perf must be disabled, context locked on entry */ 327/* context locked on entry */
317static void fsl_emb_pmu_disable(struct perf_event *event) 328static void fsl_emb_pmu_del(struct perf_event *event, int flags)
318{ 329{
319 struct cpu_hw_events *cpuhw; 330 struct cpu_hw_events *cpuhw;
320 int i = event->hw.idx; 331 int i = event->hw.idx;
321 332
333 perf_pmu_disable(event->pmu);
322 if (i < 0) 334 if (i < 0)
323 goto out; 335 goto out;
324 336
@@ -346,44 +358,57 @@ static void fsl_emb_pmu_disable(struct perf_event *event)
346 cpuhw->n_events--; 358 cpuhw->n_events--;
347 359
348 out: 360 out:
361 perf_pmu_enable(event->pmu);
349 put_cpu_var(cpu_hw_events); 362 put_cpu_var(cpu_hw_events);
350} 363}
351 364
352/* 365static void fsl_emb_pmu_start(struct perf_event *event, int ef_flags)
353 * Re-enable interrupts on a event after they were throttled
354 * because they were coming too fast.
355 *
356 * Context is locked on entry, but perf is not disabled.
357 */
358static void fsl_emb_pmu_unthrottle(struct perf_event *event)
359{ 366{
360 s64 val, left;
361 unsigned long flags; 367 unsigned long flags;
368 s64 left;
362 369
363 if (event->hw.idx < 0 || !event->hw.sample_period) 370 if (event->hw.idx < 0 || !event->hw.sample_period)
364 return; 371 return;
372
373 if (!(event->hw.state & PERF_HES_STOPPED))
374 return;
375
376 if (ef_flags & PERF_EF_RELOAD)
377 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
378
365 local_irq_save(flags); 379 local_irq_save(flags);
366 perf_disable(); 380 perf_pmu_disable(event->pmu);
367 fsl_emb_pmu_read(event); 381
368 left = event->hw.sample_period; 382 event->hw.state = 0;
369 event->hw.last_period = left; 383 left = local64_read(&event->hw.period_left);
370 val = 0; 384 write_pmc(event->hw.idx, left);
371 if (left < 0x80000000L) 385
372 val = 0x80000000L - left;
373 write_pmc(event->hw.idx, val);
374 local64_set(&event->hw.prev_count, val);
375 local64_set(&event->hw.period_left, left);
376 perf_event_update_userpage(event); 386 perf_event_update_userpage(event);
377 perf_enable(); 387 perf_pmu_enable(event->pmu);
378 local_irq_restore(flags); 388 local_irq_restore(flags);
379} 389}
380 390
381static struct pmu fsl_emb_pmu = { 391static void fsl_emb_pmu_stop(struct perf_event *event, int ef_flags)
382 .enable = fsl_emb_pmu_enable, 392{
383 .disable = fsl_emb_pmu_disable, 393 unsigned long flags;
384 .read = fsl_emb_pmu_read, 394
385 .unthrottle = fsl_emb_pmu_unthrottle, 395 if (event->hw.idx < 0 || !event->hw.sample_period)
386}; 396 return;
397
398 if (event->hw.state & PERF_HES_STOPPED)
399 return;
400
401 local_irq_save(flags);
402 perf_pmu_disable(event->pmu);
403
404 fsl_emb_pmu_read(event);
405 event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
406 write_pmc(event->hw.idx, 0);
407
408 perf_event_update_userpage(event);
409 perf_pmu_enable(event->pmu);
410 local_irq_restore(flags);
411}
387 412
388/* 413/*
389 * Release the PMU if this is the last perf_event. 414 * Release the PMU if this is the last perf_event.
@@ -428,7 +453,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp)
428 return 0; 453 return 0;
429} 454}
430 455
431const struct pmu *hw_perf_event_init(struct perf_event *event) 456static int fsl_emb_pmu_event_init(struct perf_event *event)
432{ 457{
433 u64 ev; 458 u64 ev;
434 struct perf_event *events[MAX_HWEVENTS]; 459 struct perf_event *events[MAX_HWEVENTS];
@@ -441,14 +466,14 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
441 case PERF_TYPE_HARDWARE: 466 case PERF_TYPE_HARDWARE:
442 ev = event->attr.config; 467 ev = event->attr.config;
443 if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) 468 if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
444 return ERR_PTR(-EOPNOTSUPP); 469 return -EOPNOTSUPP;
445 ev = ppmu->generic_events[ev]; 470 ev = ppmu->generic_events[ev];
446 break; 471 break;
447 472
448 case PERF_TYPE_HW_CACHE: 473 case PERF_TYPE_HW_CACHE:
449 err = hw_perf_cache_event(event->attr.config, &ev); 474 err = hw_perf_cache_event(event->attr.config, &ev);
450 if (err) 475 if (err)
451 return ERR_PTR(err); 476 return err;
452 break; 477 break;
453 478
454 case PERF_TYPE_RAW: 479 case PERF_TYPE_RAW:
@@ -456,12 +481,12 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
456 break; 481 break;
457 482
458 default: 483 default:
459 return ERR_PTR(-EINVAL); 484 return -ENOENT;
460 } 485 }
461 486
462 event->hw.config = ppmu->xlate_event(ev); 487 event->hw.config = ppmu->xlate_event(ev);
463 if (!(event->hw.config & FSL_EMB_EVENT_VALID)) 488 if (!(event->hw.config & FSL_EMB_EVENT_VALID))
464 return ERR_PTR(-EINVAL); 489 return -EINVAL;
465 490
466 /* 491 /*
467 * If this is in a group, check if it can go on with all the 492 * If this is in a group, check if it can go on with all the
@@ -473,7 +498,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
473 n = collect_events(event->group_leader, 498 n = collect_events(event->group_leader,
474 ppmu->n_counter - 1, events); 499 ppmu->n_counter - 1, events);
475 if (n < 0) 500 if (n < 0)
476 return ERR_PTR(-EINVAL); 501 return -EINVAL;
477 } 502 }
478 503
479 if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) { 504 if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) {
@@ -484,7 +509,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
484 } 509 }
485 510
486 if (num_restricted >= ppmu->n_restricted) 511 if (num_restricted >= ppmu->n_restricted)
487 return ERR_PTR(-EINVAL); 512 return -EINVAL;
488 } 513 }
489 514
490 event->hw.idx = -1; 515 event->hw.idx = -1;
@@ -497,7 +522,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
497 if (event->attr.exclude_kernel) 522 if (event->attr.exclude_kernel)
498 event->hw.config_base |= PMLCA_FCS; 523 event->hw.config_base |= PMLCA_FCS;
499 if (event->attr.exclude_idle) 524 if (event->attr.exclude_idle)
500 return ERR_PTR(-ENOTSUPP); 525 return -ENOTSUPP;
501 526
502 event->hw.last_period = event->hw.sample_period; 527 event->hw.last_period = event->hw.sample_period;
503 local64_set(&event->hw.period_left, event->hw.last_period); 528 local64_set(&event->hw.period_left, event->hw.last_period);
@@ -523,11 +548,20 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
523 } 548 }
524 event->destroy = hw_perf_event_destroy; 549 event->destroy = hw_perf_event_destroy;
525 550
526 if (err) 551 return err;
527 return ERR_PTR(err);
528 return &fsl_emb_pmu;
529} 552}
530 553
554static struct pmu fsl_emb_pmu = {
555 .pmu_enable = fsl_emb_pmu_enable,
556 .pmu_disable = fsl_emb_pmu_disable,
557 .event_init = fsl_emb_pmu_event_init,
558 .add = fsl_emb_pmu_add,
559 .del = fsl_emb_pmu_del,
560 .start = fsl_emb_pmu_start,
561 .stop = fsl_emb_pmu_stop,
562 .read = fsl_emb_pmu_read,
563};
564
531/* 565/*
532 * A counter has overflowed; update its count and record 566 * A counter has overflowed; update its count and record
533 * things if requested. Note that interrupts are hard-disabled 567 * things if requested. Note that interrupts are hard-disabled
@@ -540,6 +574,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
540 s64 prev, delta, left; 574 s64 prev, delta, left;
541 int record = 0; 575 int record = 0;
542 576
577 if (event->hw.state & PERF_HES_STOPPED) {
578 write_pmc(event->hw.idx, 0);
579 return;
580 }
581
543 /* we don't have to worry about interrupts here */ 582 /* we don't have to worry about interrupts here */
544 prev = local64_read(&event->hw.prev_count); 583 prev = local64_read(&event->hw.prev_count);
545 delta = (val - prev) & 0xfffffffful; 584 delta = (val - prev) & 0xfffffffful;
@@ -562,6 +601,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
562 val = 0x80000000LL - left; 601 val = 0x80000000LL - left;
563 } 602 }
564 603
604 write_pmc(event->hw.idx, val);
605 local64_set(&event->hw.prev_count, val);
606 local64_set(&event->hw.period_left, left);
607 perf_event_update_userpage(event);
608
565 /* 609 /*
566 * Finally record data if requested. 610 * Finally record data if requested.
567 */ 611 */
@@ -571,23 +615,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
571 perf_sample_data_init(&data, 0); 615 perf_sample_data_init(&data, 0);
572 data.period = event->hw.last_period; 616 data.period = event->hw.last_period;
573 617
574 if (perf_event_overflow(event, nmi, &data, regs)) { 618 if (perf_event_overflow(event, nmi, &data, regs))
575 /* 619 fsl_emb_pmu_stop(event, 0);
576 * Interrupts are coming too fast - throttle them
577 * by setting the event to 0, so it will be
578 * at least 2^30 cycles until the next interrupt
579 * (assuming each event counts at most 2 counts
580 * per cycle).
581 */
582 val = 0;
583 left = ~0ULL >> 1;
584 }
585 } 620 }
586
587 write_pmc(event->hw.idx, val);
588 local64_set(&event->hw.prev_count, val);
589 local64_set(&event->hw.period_left, left);
590 perf_event_update_userpage(event);
591} 621}
592 622
593static void perf_event_interrupt(struct pt_regs *regs) 623static void perf_event_interrupt(struct pt_regs *regs)
@@ -651,5 +681,7 @@ int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu)
651 pr_info("%s performance monitor hardware support registered\n", 681 pr_info("%s performance monitor hardware support registered\n",
652 pmu->name); 682 pmu->name);
653 683
684 perf_pmu_register(&fsl_emb_pmu);
685
654 return 0; 686 return 0;
655} 687}
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 8533b3b83f5d..54888eb10c3b 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -53,7 +53,7 @@
53#include <linux/posix-timers.h> 53#include <linux/posix-timers.h>
54#include <linux/irq.h> 54#include <linux/irq.h>
55#include <linux/delay.h> 55#include <linux/delay.h>
56#include <linux/perf_event.h> 56#include <linux/irq_work.h>
57#include <asm/trace.h> 57#include <asm/trace.h>
58 58
59#include <asm/io.h> 59#include <asm/io.h>
@@ -493,60 +493,60 @@ void __init iSeries_time_init_early(void)
493} 493}
494#endif /* CONFIG_PPC_ISERIES */ 494#endif /* CONFIG_PPC_ISERIES */
495 495
496#ifdef CONFIG_PERF_EVENTS 496#ifdef CONFIG_IRQ_WORK
497 497
498/* 498/*
499 * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable... 499 * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
500 */ 500 */
501#ifdef CONFIG_PPC64 501#ifdef CONFIG_PPC64
502static inline unsigned long test_perf_event_pending(void) 502static inline unsigned long test_irq_work_pending(void)
503{ 503{
504 unsigned long x; 504 unsigned long x;
505 505
506 asm volatile("lbz %0,%1(13)" 506 asm volatile("lbz %0,%1(13)"
507 : "=r" (x) 507 : "=r" (x)
508 : "i" (offsetof(struct paca_struct, perf_event_pending))); 508 : "i" (offsetof(struct paca_struct, irq_work_pending)));
509 return x; 509 return x;
510} 510}
511 511
512static inline void set_perf_event_pending_flag(void) 512static inline void set_irq_work_pending_flag(void)
513{ 513{
514 asm volatile("stb %0,%1(13)" : : 514 asm volatile("stb %0,%1(13)" : :
515 "r" (1), 515 "r" (1),
516 "i" (offsetof(struct paca_struct, perf_event_pending))); 516 "i" (offsetof(struct paca_struct, irq_work_pending)));
517} 517}
518 518
519static inline void clear_perf_event_pending(void) 519static inline void clear_irq_work_pending(void)
520{ 520{
521 asm volatile("stb %0,%1(13)" : : 521 asm volatile("stb %0,%1(13)" : :
522 "r" (0), 522 "r" (0),
523 "i" (offsetof(struct paca_struct, perf_event_pending))); 523 "i" (offsetof(struct paca_struct, irq_work_pending)));
524} 524}
525 525
526#else /* 32-bit */ 526#else /* 32-bit */
527 527
528DEFINE_PER_CPU(u8, perf_event_pending); 528DEFINE_PER_CPU(u8, irq_work_pending);
529 529
530#define set_perf_event_pending_flag() __get_cpu_var(perf_event_pending) = 1 530#define set_irq_work_pending_flag() __get_cpu_var(irq_work_pending) = 1
531#define test_perf_event_pending() __get_cpu_var(perf_event_pending) 531#define test_irq_work_pending() __get_cpu_var(irq_work_pending)
532#define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0 532#define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0
533 533
534#endif /* 32 vs 64 bit */ 534#endif /* 32 vs 64 bit */
535 535
536void set_perf_event_pending(void) 536void set_irq_work_pending(void)
537{ 537{
538 preempt_disable(); 538 preempt_disable();
539 set_perf_event_pending_flag(); 539 set_irq_work_pending_flag();
540 set_dec(1); 540 set_dec(1);
541 preempt_enable(); 541 preempt_enable();
542} 542}
543 543
544#else /* CONFIG_PERF_EVENTS */ 544#else /* CONFIG_IRQ_WORK */
545 545
546#define test_perf_event_pending() 0 546#define test_irq_work_pending() 0
547#define clear_perf_event_pending() 547#define clear_irq_work_pending()
548 548
549#endif /* CONFIG_PERF_EVENTS */ 549#endif /* CONFIG_IRQ_WORK */
550 550
551/* 551/*
552 * For iSeries shared processors, we have to let the hypervisor 552 * For iSeries shared processors, we have to let the hypervisor
@@ -587,9 +587,9 @@ void timer_interrupt(struct pt_regs * regs)
587 587
588 calculate_steal_time(); 588 calculate_steal_time();
589 589
590 if (test_perf_event_pending()) { 590 if (test_irq_work_pending()) {
591 clear_perf_event_pending(); 591 clear_irq_work_pending();
592 perf_event_do_pending(); 592 irq_work_run();
593 } 593 }
594 594
595#ifdef CONFIG_PPC_ISERIES 595#ifdef CONFIG_PPC_ISERIES
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index f0777a47e3a5..958f0dadeadf 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -95,6 +95,7 @@ config S390
95 select HAVE_KVM if 64BIT 95 select HAVE_KVM if 64BIT
96 select HAVE_ARCH_TRACEHOOK 96 select HAVE_ARCH_TRACEHOOK
97 select INIT_ALL_POSSIBLE 97 select INIT_ALL_POSSIBLE
98 select HAVE_IRQ_WORK
98 select HAVE_PERF_EVENTS 99 select HAVE_PERF_EVENTS
99 select HAVE_KERNEL_GZIP 100 select HAVE_KERNEL_GZIP
100 select HAVE_KERNEL_BZIP2 101 select HAVE_KERNEL_BZIP2
diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h
index 498bc3892385..881d94590aeb 100644
--- a/arch/s390/include/asm/hardirq.h
+++ b/arch/s390/include/asm/hardirq.h
@@ -12,10 +12,6 @@
12#ifndef __ASM_HARDIRQ_H 12#ifndef __ASM_HARDIRQ_H
13#define __ASM_HARDIRQ_H 13#define __ASM_HARDIRQ_H
14 14
15#include <linux/threads.h>
16#include <linux/sched.h>
17#include <linux/cache.h>
18#include <linux/interrupt.h>
19#include <asm/lowcore.h> 15#include <asm/lowcore.h>
20 16
21#define local_softirq_pending() (S390_lowcore.softirq_pending) 17#define local_softirq_pending() (S390_lowcore.softirq_pending)
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index 3840cbe77637..a75f168d2718 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -4,7 +4,6 @@
4 * Copyright 2009 Martin Schwidefsky, IBM Corporation. 4 * Copyright 2009 Martin Schwidefsky, IBM Corporation.
5 */ 5 */
6 6
7static inline void set_perf_event_pending(void) {} 7/* Empty, just to avoid compiling error */
8static inline void clear_perf_event_pending(void) {}
9 8
10#define PERF_EVENT_INDEX_OFFSET 0 9#define PERF_EVENT_INDEX_OFFSET 0
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 33990fa95af0..35b6879628a0 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -16,6 +16,7 @@ config SUPERH
16 select HAVE_ARCH_TRACEHOOK 16 select HAVE_ARCH_TRACEHOOK
17 select HAVE_DMA_API_DEBUG 17 select HAVE_DMA_API_DEBUG
18 select HAVE_DMA_ATTRS 18 select HAVE_DMA_ATTRS
19 select HAVE_IRQ_WORK
19 select HAVE_PERF_EVENTS 20 select HAVE_PERF_EVENTS
20 select PERF_USE_VMALLOC 21 select PERF_USE_VMALLOC
21 select HAVE_KERNEL_GZIP 22 select HAVE_KERNEL_GZIP
@@ -249,6 +250,11 @@ config ARCH_SHMOBILE
249 select PM 250 select PM
250 select PM_RUNTIME 251 select PM_RUNTIME
251 252
253config CPU_HAS_PMU
254 depends on CPU_SH4 || CPU_SH4A
255 default y
256 bool
257
252if SUPERH32 258if SUPERH32
253 259
254choice 260choice
@@ -738,6 +744,14 @@ config GUSA_RB
738 LLSC, this should be more efficient than the other alternative of 744 LLSC, this should be more efficient than the other alternative of
739 disabling interrupts around the atomic sequence. 745 disabling interrupts around the atomic sequence.
740 746
747config HW_PERF_EVENTS
748 bool "Enable hardware performance counter support for perf events"
749 depends on PERF_EVENTS && CPU_HAS_PMU
750 default y
751 help
752 Enable hardware performance counter support for perf events. If
753 disabled, perf events will use software events only.
754
741source "drivers/sh/Kconfig" 755source "drivers/sh/Kconfig"
742 756
743endmenu 757endmenu
diff --git a/arch/sh/include/asm/perf_event.h b/arch/sh/include/asm/perf_event.h
index 3d0c9f36d150..14308bed7ea5 100644
--- a/arch/sh/include/asm/perf_event.h
+++ b/arch/sh/include/asm/perf_event.h
@@ -26,11 +26,4 @@ extern int register_sh_pmu(struct sh_pmu *);
26extern int reserve_pmc_hardware(void); 26extern int reserve_pmc_hardware(void);
27extern void release_pmc_hardware(void); 27extern void release_pmc_hardware(void);
28 28
29static inline void set_perf_event_pending(void)
30{
31 /* Nothing to see here, move along. */
32}
33
34#define PERF_EVENT_INDEX_OFFSET 0
35
36#endif /* __ASM_SH_PERF_EVENT_H */ 29#endif /* __ASM_SH_PERF_EVENT_H */
diff --git a/arch/sh/kernel/perf_callchain.c b/arch/sh/kernel/perf_callchain.c
index a9dd3abde28e..d5ca1ef50fa9 100644
--- a/arch/sh/kernel/perf_callchain.c
+++ b/arch/sh/kernel/perf_callchain.c
@@ -14,11 +14,6 @@
14#include <asm/unwinder.h> 14#include <asm/unwinder.h>
15#include <asm/ptrace.h> 15#include <asm/ptrace.h>
16 16
17static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip)
18{
19 if (entry->nr < PERF_MAX_STACK_DEPTH)
20 entry->ip[entry->nr++] = ip;
21}
22 17
23static void callchain_warning(void *data, char *msg) 18static void callchain_warning(void *data, char *msg)
24{ 19{
@@ -39,7 +34,7 @@ static void callchain_address(void *data, unsigned long addr, int reliable)
39 struct perf_callchain_entry *entry = data; 34 struct perf_callchain_entry *entry = data;
40 35
41 if (reliable) 36 if (reliable)
42 callchain_store(entry, addr); 37 perf_callchain_store(entry, addr);
43} 38}
44 39
45static const struct stacktrace_ops callchain_ops = { 40static const struct stacktrace_ops callchain_ops = {
@@ -49,47 +44,10 @@ static const struct stacktrace_ops callchain_ops = {
49 .address = callchain_address, 44 .address = callchain_address,
50}; 45};
51 46
52static void 47void
53perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) 48perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
54{ 49{
55 callchain_store(entry, PERF_CONTEXT_KERNEL); 50 perf_callchain_store(entry, regs->pc);
56 callchain_store(entry, regs->pc);
57 51
58 unwind_stack(NULL, regs, NULL, &callchain_ops, entry); 52 unwind_stack(NULL, regs, NULL, &callchain_ops, entry);
59} 53}
60
61static void
62perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
63{
64 int is_user;
65
66 if (!regs)
67 return;
68
69 is_user = user_mode(regs);
70
71 if (is_user && current->state != TASK_RUNNING)
72 return;
73
74 /*
75 * Only the kernel side is implemented for now.
76 */
77 if (!is_user)
78 perf_callchain_kernel(regs, entry);
79}
80
81/*
82 * No need for separate IRQ and NMI entries.
83 */
84static DEFINE_PER_CPU(struct perf_callchain_entry, callchain);
85
86struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
87{
88 struct perf_callchain_entry *entry = &__get_cpu_var(callchain);
89
90 entry->nr = 0;
91
92 perf_do_callchain(regs, entry);
93
94 return entry;
95}
diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c
index 7a3dc3567258..5a4b33435650 100644
--- a/arch/sh/kernel/perf_event.c
+++ b/arch/sh/kernel/perf_event.c
@@ -59,6 +59,24 @@ static inline int sh_pmu_initialized(void)
59 return !!sh_pmu; 59 return !!sh_pmu;
60} 60}
61 61
62const char *perf_pmu_name(void)
63{
64 if (!sh_pmu)
65 return NULL;
66
67 return sh_pmu->name;
68}
69EXPORT_SYMBOL_GPL(perf_pmu_name);
70
71int perf_num_counters(void)
72{
73 if (!sh_pmu)
74 return 0;
75
76 return sh_pmu->num_events;
77}
78EXPORT_SYMBOL_GPL(perf_num_counters);
79
62/* 80/*
63 * Release the PMU if this is the last perf_event. 81 * Release the PMU if this is the last perf_event.
64 */ 82 */
@@ -206,50 +224,80 @@ again:
206 local64_add(delta, &event->count); 224 local64_add(delta, &event->count);
207} 225}
208 226
209static void sh_pmu_disable(struct perf_event *event) 227static void sh_pmu_stop(struct perf_event *event, int flags)
210{ 228{
211 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 229 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
212 struct hw_perf_event *hwc = &event->hw; 230 struct hw_perf_event *hwc = &event->hw;
213 int idx = hwc->idx; 231 int idx = hwc->idx;
214 232
215 clear_bit(idx, cpuc->active_mask); 233 if (!(event->hw.state & PERF_HES_STOPPED)) {
216 sh_pmu->disable(hwc, idx); 234 sh_pmu->disable(hwc, idx);
235 cpuc->events[idx] = NULL;
236 event->hw.state |= PERF_HES_STOPPED;
237 }
238
239 if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) {
240 sh_perf_event_update(event, &event->hw, idx);
241 event->hw.state |= PERF_HES_UPTODATE;
242 }
243}
244
245static void sh_pmu_start(struct perf_event *event, int flags)
246{
247 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
248 struct hw_perf_event *hwc = &event->hw;
249 int idx = hwc->idx;
250
251 if (WARN_ON_ONCE(idx == -1))
252 return;
253
254 if (flags & PERF_EF_RELOAD)
255 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
217 256
218 barrier(); 257 cpuc->events[idx] = event;
258 event->hw.state = 0;
259 sh_pmu->enable(hwc, idx);
260}
219 261
220 sh_perf_event_update(event, &event->hw, idx); 262static void sh_pmu_del(struct perf_event *event, int flags)
263{
264 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
221 265
222 cpuc->events[idx] = NULL; 266 sh_pmu_stop(event, PERF_EF_UPDATE);
223 clear_bit(idx, cpuc->used_mask); 267 __clear_bit(event->hw.idx, cpuc->used_mask);
224 268
225 perf_event_update_userpage(event); 269 perf_event_update_userpage(event);
226} 270}
227 271
228static int sh_pmu_enable(struct perf_event *event) 272static int sh_pmu_add(struct perf_event *event, int flags)
229{ 273{
230 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 274 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
231 struct hw_perf_event *hwc = &event->hw; 275 struct hw_perf_event *hwc = &event->hw;
232 int idx = hwc->idx; 276 int idx = hwc->idx;
277 int ret = -EAGAIN;
278
279 perf_pmu_disable(event->pmu);
233 280
234 if (test_and_set_bit(idx, cpuc->used_mask)) { 281 if (__test_and_set_bit(idx, cpuc->used_mask)) {
235 idx = find_first_zero_bit(cpuc->used_mask, sh_pmu->num_events); 282 idx = find_first_zero_bit(cpuc->used_mask, sh_pmu->num_events);
236 if (idx == sh_pmu->num_events) 283 if (idx == sh_pmu->num_events)
237 return -EAGAIN; 284 goto out;
238 285
239 set_bit(idx, cpuc->used_mask); 286 __set_bit(idx, cpuc->used_mask);
240 hwc->idx = idx; 287 hwc->idx = idx;
241 } 288 }
242 289
243 sh_pmu->disable(hwc, idx); 290 sh_pmu->disable(hwc, idx);
244 291
245 cpuc->events[idx] = event; 292 event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
246 set_bit(idx, cpuc->active_mask); 293 if (flags & PERF_EF_START)
247 294 sh_pmu_start(event, PERF_EF_RELOAD);
248 sh_pmu->enable(hwc, idx);
249 295
250 perf_event_update_userpage(event); 296 perf_event_update_userpage(event);
251 297 ret = 0;
252 return 0; 298out:
299 perf_pmu_enable(event->pmu);
300 return ret;
253} 301}
254 302
255static void sh_pmu_read(struct perf_event *event) 303static void sh_pmu_read(struct perf_event *event)
@@ -257,24 +305,56 @@ static void sh_pmu_read(struct perf_event *event)
257 sh_perf_event_update(event, &event->hw, event->hw.idx); 305 sh_perf_event_update(event, &event->hw, event->hw.idx);
258} 306}
259 307
260static const struct pmu pmu = { 308static int sh_pmu_event_init(struct perf_event *event)
261 .enable = sh_pmu_enable,
262 .disable = sh_pmu_disable,
263 .read = sh_pmu_read,
264};
265
266const struct pmu *hw_perf_event_init(struct perf_event *event)
267{ 309{
268 int err = __hw_perf_event_init(event); 310 int err;
311
312 switch (event->attr.type) {
313 case PERF_TYPE_RAW:
314 case PERF_TYPE_HW_CACHE:
315 case PERF_TYPE_HARDWARE:
316 err = __hw_perf_event_init(event);
317 break;
318
319 default:
320 return -ENOENT;
321 }
322
269 if (unlikely(err)) { 323 if (unlikely(err)) {
270 if (event->destroy) 324 if (event->destroy)
271 event->destroy(event); 325 event->destroy(event);
272 return ERR_PTR(err);
273 } 326 }
274 327
275 return &pmu; 328 return err;
329}
330
331static void sh_pmu_enable(struct pmu *pmu)
332{
333 if (!sh_pmu_initialized())
334 return;
335
336 sh_pmu->enable_all();
337}
338
339static void sh_pmu_disable(struct pmu *pmu)
340{
341 if (!sh_pmu_initialized())
342 return;
343
344 sh_pmu->disable_all();
276} 345}
277 346
347static struct pmu pmu = {
348 .pmu_enable = sh_pmu_enable,
349 .pmu_disable = sh_pmu_disable,
350 .event_init = sh_pmu_event_init,
351 .add = sh_pmu_add,
352 .del = sh_pmu_del,
353 .start = sh_pmu_start,
354 .stop = sh_pmu_stop,
355 .read = sh_pmu_read,
356};
357
278static void sh_pmu_setup(int cpu) 358static void sh_pmu_setup(int cpu)
279{ 359{
280 struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); 360 struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
@@ -299,32 +379,17 @@ sh_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
299 return NOTIFY_OK; 379 return NOTIFY_OK;
300} 380}
301 381
302void hw_perf_enable(void) 382int __cpuinit register_sh_pmu(struct sh_pmu *_pmu)
303{
304 if (!sh_pmu_initialized())
305 return;
306
307 sh_pmu->enable_all();
308}
309
310void hw_perf_disable(void)
311{
312 if (!sh_pmu_initialized())
313 return;
314
315 sh_pmu->disable_all();
316}
317
318int __cpuinit register_sh_pmu(struct sh_pmu *pmu)
319{ 383{
320 if (sh_pmu) 384 if (sh_pmu)
321 return -EBUSY; 385 return -EBUSY;
322 sh_pmu = pmu; 386 sh_pmu = _pmu;
323 387
324 pr_info("Performance Events: %s support registered\n", pmu->name); 388 pr_info("Performance Events: %s support registered\n", _pmu->name);
325 389
326 WARN_ON(pmu->num_events > MAX_HWEVENTS); 390 WARN_ON(_pmu->num_events > MAX_HWEVENTS);
327 391
392 perf_pmu_register(&pmu);
328 perf_cpu_notifier(sh_pmu_notifier); 393 perf_cpu_notifier(sh_pmu_notifier);
329 return 0; 394 return 0;
330} 395}
diff --git a/arch/sh/oprofile/Makefile b/arch/sh/oprofile/Makefile
index 4886c5c1786c..e85aae73e3dc 100644
--- a/arch/sh/oprofile/Makefile
+++ b/arch/sh/oprofile/Makefile
@@ -6,4 +6,8 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
6 oprofilefs.o oprofile_stats.o \ 6 oprofilefs.o oprofile_stats.o \
7 timer_int.o ) 7 timer_int.o )
8 8
9ifeq ($(CONFIG_HW_PERF_EVENTS),y)
10DRIVER_OBJS += $(addprefix ../../../drivers/oprofile/, oprofile_perf.o)
11endif
12
9oprofile-y := $(DRIVER_OBJS) common.o backtrace.o 13oprofile-y := $(DRIVER_OBJS) common.o backtrace.o
diff --git a/arch/sh/oprofile/common.c b/arch/sh/oprofile/common.c
index ac604937f3ee..e10d89376f9b 100644
--- a/arch/sh/oprofile/common.c
+++ b/arch/sh/oprofile/common.c
@@ -17,114 +17,45 @@
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/errno.h> 18#include <linux/errno.h>
19#include <linux/smp.h> 19#include <linux/smp.h>
20#include <linux/perf_event.h>
20#include <asm/processor.h> 21#include <asm/processor.h>
21#include "op_impl.h"
22
23static struct op_sh_model *model;
24
25static struct op_counter_config ctr[20];
26 22
23#ifdef CONFIG_HW_PERF_EVENTS
27extern void sh_backtrace(struct pt_regs * const regs, unsigned int depth); 24extern void sh_backtrace(struct pt_regs * const regs, unsigned int depth);
28 25
29static int op_sh_setup(void) 26char *op_name_from_perf_id(void)
30{
31 /* Pre-compute the values to stuff in the hardware registers. */
32 model->reg_setup(ctr);
33
34 /* Configure the registers on all cpus. */
35 on_each_cpu(model->cpu_setup, NULL, 1);
36
37 return 0;
38}
39
40static int op_sh_create_files(struct super_block *sb, struct dentry *root)
41{ 27{
42 int i, ret = 0; 28 const char *pmu;
29 char buf[20];
30 int size;
43 31
44 for (i = 0; i < model->num_counters; i++) { 32 pmu = perf_pmu_name();
45 struct dentry *dir; 33 if (!pmu)
46 char buf[4]; 34 return NULL;
47 35
48 snprintf(buf, sizeof(buf), "%d", i); 36 size = snprintf(buf, sizeof(buf), "sh/%s", pmu);
49 dir = oprofilefs_mkdir(sb, root, buf); 37 if (size > -1 && size < sizeof(buf))
38 return buf;
50 39
51 ret |= oprofilefs_create_ulong(sb, dir, "enabled", &ctr[i].enabled); 40 return NULL;
52 ret |= oprofilefs_create_ulong(sb, dir, "event", &ctr[i].event);
53 ret |= oprofilefs_create_ulong(sb, dir, "kernel", &ctr[i].kernel);
54 ret |= oprofilefs_create_ulong(sb, dir, "user", &ctr[i].user);
55
56 if (model->create_files)
57 ret |= model->create_files(sb, dir);
58 else
59 ret |= oprofilefs_create_ulong(sb, dir, "count", &ctr[i].count);
60
61 /* Dummy entries */
62 ret |= oprofilefs_create_ulong(sb, dir, "unit_mask", &ctr[i].unit_mask);
63 }
64
65 return ret;
66} 41}
67 42
68static int op_sh_start(void) 43int __init oprofile_arch_init(struct oprofile_operations *ops)
69{ 44{
70 /* Enable performance monitoring for all counters. */ 45 ops->backtrace = sh_backtrace;
71 on_each_cpu(model->cpu_start, NULL, 1);
72 46
73 return 0; 47 return oprofile_perf_init(ops);
74} 48}
75 49
76static void op_sh_stop(void) 50void __exit oprofile_arch_exit(void)
77{ 51{
78 /* Disable performance monitoring for all counters. */ 52 oprofile_perf_exit();
79 on_each_cpu(model->cpu_stop, NULL, 1);
80} 53}
81 54#else
82int __init oprofile_arch_init(struct oprofile_operations *ops) 55int __init oprofile_arch_init(struct oprofile_operations *ops)
83{ 56{
84 struct op_sh_model *lmodel = NULL; 57 pr_info("oprofile: hardware counters not available\n");
85 int ret; 58 return -ENODEV;
86
87 /*
88 * Always assign the backtrace op. If the counter initialization
89 * fails, we fall back to the timer which will still make use of
90 * this.
91 */
92 ops->backtrace = sh_backtrace;
93
94 /*
95 * XXX
96 *
97 * All of the SH7750/SH-4A counters have been converted to perf,
98 * this infrastructure hook is left for other users until they've
99 * had a chance to convert over, at which point all of this
100 * will be deleted.
101 */
102
103 if (!lmodel)
104 return -ENODEV;
105 if (!(current_cpu_data.flags & CPU_HAS_PERF_COUNTER))
106 return -ENODEV;
107
108 ret = lmodel->init();
109 if (unlikely(ret != 0))
110 return ret;
111
112 model = lmodel;
113
114 ops->setup = op_sh_setup;
115 ops->create_files = op_sh_create_files;
116 ops->start = op_sh_start;
117 ops->stop = op_sh_stop;
118 ops->cpu_type = lmodel->cpu_type;
119
120 printk(KERN_INFO "oprofile: using %s performance monitoring.\n",
121 lmodel->cpu_type);
122
123 return 0;
124}
125
126void oprofile_arch_exit(void)
127{
128 if (model && model->exit)
129 model->exit();
130} 59}
60void __exit oprofile_arch_exit(void) {}
61#endif /* CONFIG_HW_PERF_EVENTS */
diff --git a/arch/sh/oprofile/op_impl.h b/arch/sh/oprofile/op_impl.h
deleted file mode 100644
index 1244479ceb29..000000000000
--- a/arch/sh/oprofile/op_impl.h
+++ /dev/null
@@ -1,33 +0,0 @@
1#ifndef __OP_IMPL_H
2#define __OP_IMPL_H
3
4/* Per-counter configuration as set via oprofilefs. */
5struct op_counter_config {
6 unsigned long enabled;
7 unsigned long event;
8
9 unsigned long count;
10
11 /* Dummy values for userspace tool compliance */
12 unsigned long kernel;
13 unsigned long user;
14 unsigned long unit_mask;
15};
16
17/* Per-architecture configury and hooks. */
18struct op_sh_model {
19 void (*reg_setup)(struct op_counter_config *);
20 int (*create_files)(struct super_block *sb, struct dentry *dir);
21 void (*cpu_setup)(void *dummy);
22 int (*init)(void);
23 void (*exit)(void);
24 void (*cpu_start)(void *args);
25 void (*cpu_stop)(void *args);
26 char *cpu_type;
27 unsigned char num_counters;
28};
29
30/* arch/sh/oprofile/common.c */
31extern void sh_backtrace(struct pt_regs * const regs, unsigned int depth);
32
33#endif /* __OP_IMPL_H */
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 491e9d6de191..3e9d31401fb2 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -26,10 +26,12 @@ config SPARC
26 select ARCH_WANT_OPTIONAL_GPIOLIB 26 select ARCH_WANT_OPTIONAL_GPIOLIB
27 select RTC_CLASS 27 select RTC_CLASS
28 select RTC_DRV_M48T59 28 select RTC_DRV_M48T59
29 select HAVE_IRQ_WORK
29 select HAVE_PERF_EVENTS 30 select HAVE_PERF_EVENTS
30 select PERF_USE_VMALLOC 31 select PERF_USE_VMALLOC
31 select HAVE_DMA_ATTRS 32 select HAVE_DMA_ATTRS
32 select HAVE_DMA_API_DEBUG 33 select HAVE_DMA_API_DEBUG
34 select HAVE_ARCH_JUMP_LABEL
33 35
34config SPARC32 36config SPARC32
35 def_bool !64BIT 37 def_bool !64BIT
@@ -53,6 +55,7 @@ config SPARC64
53 select RTC_DRV_BQ4802 55 select RTC_DRV_BQ4802
54 select RTC_DRV_SUN4V 56 select RTC_DRV_SUN4V
55 select RTC_DRV_STARFIRE 57 select RTC_DRV_STARFIRE
58 select HAVE_IRQ_WORK
56 select HAVE_PERF_EVENTS 59 select HAVE_PERF_EVENTS
57 select PERF_USE_VMALLOC 60 select PERF_USE_VMALLOC
58 61
diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h
new file mode 100644
index 000000000000..62e66d7b2fb6
--- /dev/null
+++ b/arch/sparc/include/asm/jump_label.h
@@ -0,0 +1,32 @@
1#ifndef _ASM_SPARC_JUMP_LABEL_H
2#define _ASM_SPARC_JUMP_LABEL_H
3
4#ifdef __KERNEL__
5
6#include <linux/types.h>
7#include <asm/system.h>
8
9#define JUMP_LABEL_NOP_SIZE 4
10
11#define JUMP_LABEL(key, label) \
12 do { \
13 asm goto("1:\n\t" \
14 "nop\n\t" \
15 "nop\n\t" \
16 ".pushsection __jump_table, \"a\"\n\t"\
17 ".word 1b, %l[" #label "], %c0\n\t" \
18 ".popsection \n\t" \
19 : : "i" (key) : : label);\
20 } while (0)
21
22#endif /* __KERNEL__ */
23
24typedef u32 jump_label_t;
25
26struct jump_entry {
27 jump_label_t code;
28 jump_label_t target;
29 jump_label_t key;
30};
31
32#endif
diff --git a/arch/sparc/include/asm/perf_event.h b/arch/sparc/include/asm/perf_event.h
index 727af70646cb..6e8bfa1786da 100644
--- a/arch/sparc/include/asm/perf_event.h
+++ b/arch/sparc/include/asm/perf_event.h
@@ -1,10 +1,6 @@
1#ifndef __ASM_SPARC_PERF_EVENT_H 1#ifndef __ASM_SPARC_PERF_EVENT_H
2#define __ASM_SPARC_PERF_EVENT_H 2#define __ASM_SPARC_PERF_EVENT_H
3 3
4extern void set_perf_event_pending(void);
5
6#define PERF_EVENT_INDEX_OFFSET 0
7
8#ifdef CONFIG_PERF_EVENTS 4#ifdef CONFIG_PERF_EVENTS
9#include <asm/ptrace.h> 5#include <asm/ptrace.h>
10 6
diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile
index 0c2dc1f24a9a..599398fbbc7c 100644
--- a/arch/sparc/kernel/Makefile
+++ b/arch/sparc/kernel/Makefile
@@ -119,3 +119,5 @@ obj-$(CONFIG_COMPAT) += $(audit--y)
119 119
120pc--$(CONFIG_PERF_EVENTS) := perf_event.o 120pc--$(CONFIG_PERF_EVENTS) := perf_event.o
121obj-$(CONFIG_SPARC64) += $(pc--y) 121obj-$(CONFIG_SPARC64) += $(pc--y)
122
123obj-$(CONFIG_SPARC64) += jump_label.o
diff --git a/arch/sparc/kernel/jump_label.c b/arch/sparc/kernel/jump_label.c
new file mode 100644
index 000000000000..ea2dafc93d78
--- /dev/null
+++ b/arch/sparc/kernel/jump_label.c
@@ -0,0 +1,47 @@
1#include <linux/kernel.h>
2#include <linux/types.h>
3#include <linux/mutex.h>
4#include <linux/cpu.h>
5
6#include <linux/jump_label.h>
7#include <linux/memory.h>
8
9#ifdef HAVE_JUMP_LABEL
10
11void arch_jump_label_transform(struct jump_entry *entry,
12 enum jump_label_type type)
13{
14 u32 val;
15 u32 *insn = (u32 *) (unsigned long) entry->code;
16
17 if (type == JUMP_LABEL_ENABLE) {
18 s32 off = (s32)entry->target - (s32)entry->code;
19
20#ifdef CONFIG_SPARC64
21 /* ba,pt %xcc, . + (off << 2) */
22 val = 0x10680000 | ((u32) off >> 2);
23#else
24 /* ba . + (off << 2) */
25 val = 0x10800000 | ((u32) off >> 2);
26#endif
27 } else {
28 val = 0x01000000;
29 }
30
31 get_online_cpus();
32 mutex_lock(&text_mutex);
33 *insn = val;
34 flushi(insn);
35 mutex_unlock(&text_mutex);
36 put_online_cpus();
37}
38
39void arch_jump_label_text_poke_early(jump_label_t addr)
40{
41 u32 *insn_p = (u32 *) (unsigned long) addr;
42
43 *insn_p = 0x01000000;
44 flushi(insn_p);
45}
46
47#endif
diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c
index f848aadf54dc..ee3c7dde8d9f 100644
--- a/arch/sparc/kernel/module.c
+++ b/arch/sparc/kernel/module.c
@@ -18,6 +18,9 @@
18#include <asm/spitfire.h> 18#include <asm/spitfire.h>
19 19
20#ifdef CONFIG_SPARC64 20#ifdef CONFIG_SPARC64
21
22#include <linux/jump_label.h>
23
21static void *module_map(unsigned long size) 24static void *module_map(unsigned long size)
22{ 25{
23 struct vm_struct *area; 26 struct vm_struct *area;
@@ -227,6 +230,9 @@ int module_finalize(const Elf_Ehdr *hdr,
227 const Elf_Shdr *sechdrs, 230 const Elf_Shdr *sechdrs,
228 struct module *me) 231 struct module *me)
229{ 232{
233 /* make jump label nops */
234 jump_label_apply_nops(me);
235
230 /* Cheetah's I-cache is fully coherent. */ 236 /* Cheetah's I-cache is fully coherent. */
231 if (tlb_type == spitfire) { 237 if (tlb_type == spitfire) {
232 unsigned long va; 238 unsigned long va;
diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c
index c4a6a50b4849..b87873c0e8ea 100644
--- a/arch/sparc/kernel/pcr.c
+++ b/arch/sparc/kernel/pcr.c
@@ -7,7 +7,7 @@
7#include <linux/init.h> 7#include <linux/init.h>
8#include <linux/irq.h> 8#include <linux/irq.h>
9 9
10#include <linux/perf_event.h> 10#include <linux/irq_work.h>
11#include <linux/ftrace.h> 11#include <linux/ftrace.h>
12 12
13#include <asm/pil.h> 13#include <asm/pil.h>
@@ -43,14 +43,14 @@ void __irq_entry deferred_pcr_work_irq(int irq, struct pt_regs *regs)
43 43
44 old_regs = set_irq_regs(regs); 44 old_regs = set_irq_regs(regs);
45 irq_enter(); 45 irq_enter();
46#ifdef CONFIG_PERF_EVENTS 46#ifdef CONFIG_IRQ_WORK
47 perf_event_do_pending(); 47 irq_work_run();
48#endif 48#endif
49 irq_exit(); 49 irq_exit();
50 set_irq_regs(old_regs); 50 set_irq_regs(old_regs);
51} 51}
52 52
53void set_perf_event_pending(void) 53void arch_irq_work_raise(void)
54{ 54{
55 set_softint(1 << PIL_DEFERRED_PCR_WORK); 55 set_softint(1 << PIL_DEFERRED_PCR_WORK);
56} 56}
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 6318e622cfb0..0d6deb55a2ae 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -658,13 +658,16 @@ static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr)
658 658
659 enc = perf_event_get_enc(cpuc->events[i]); 659 enc = perf_event_get_enc(cpuc->events[i]);
660 pcr &= ~mask_for_index(idx); 660 pcr &= ~mask_for_index(idx);
661 pcr |= event_encoding(enc, idx); 661 if (hwc->state & PERF_HES_STOPPED)
662 pcr |= nop_for_index(idx);
663 else
664 pcr |= event_encoding(enc, idx);
662 } 665 }
663out: 666out:
664 return pcr; 667 return pcr;
665} 668}
666 669
667void hw_perf_enable(void) 670static void sparc_pmu_enable(struct pmu *pmu)
668{ 671{
669 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 672 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
670 u64 pcr; 673 u64 pcr;
@@ -691,7 +694,7 @@ void hw_perf_enable(void)
691 pcr_ops->write(cpuc->pcr); 694 pcr_ops->write(cpuc->pcr);
692} 695}
693 696
694void hw_perf_disable(void) 697static void sparc_pmu_disable(struct pmu *pmu)
695{ 698{
696 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 699 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
697 u64 val; 700 u64 val;
@@ -710,19 +713,65 @@ void hw_perf_disable(void)
710 pcr_ops->write(cpuc->pcr); 713 pcr_ops->write(cpuc->pcr);
711} 714}
712 715
713static void sparc_pmu_disable(struct perf_event *event) 716static int active_event_index(struct cpu_hw_events *cpuc,
717 struct perf_event *event)
718{
719 int i;
720
721 for (i = 0; i < cpuc->n_events; i++) {
722 if (cpuc->event[i] == event)
723 break;
724 }
725 BUG_ON(i == cpuc->n_events);
726 return cpuc->current_idx[i];
727}
728
729static void sparc_pmu_start(struct perf_event *event, int flags)
730{
731 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
732 int idx = active_event_index(cpuc, event);
733
734 if (flags & PERF_EF_RELOAD) {
735 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
736 sparc_perf_event_set_period(event, &event->hw, idx);
737 }
738
739 event->hw.state = 0;
740
741 sparc_pmu_enable_event(cpuc, &event->hw, idx);
742}
743
744static void sparc_pmu_stop(struct perf_event *event, int flags)
745{
746 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
747 int idx = active_event_index(cpuc, event);
748
749 if (!(event->hw.state & PERF_HES_STOPPED)) {
750 sparc_pmu_disable_event(cpuc, &event->hw, idx);
751 event->hw.state |= PERF_HES_STOPPED;
752 }
753
754 if (!(event->hw.state & PERF_HES_UPTODATE) && (flags & PERF_EF_UPDATE)) {
755 sparc_perf_event_update(event, &event->hw, idx);
756 event->hw.state |= PERF_HES_UPTODATE;
757 }
758}
759
760static void sparc_pmu_del(struct perf_event *event, int _flags)
714{ 761{
715 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 762 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
716 struct hw_perf_event *hwc = &event->hw;
717 unsigned long flags; 763 unsigned long flags;
718 int i; 764 int i;
719 765
720 local_irq_save(flags); 766 local_irq_save(flags);
721 perf_disable(); 767 perf_pmu_disable(event->pmu);
722 768
723 for (i = 0; i < cpuc->n_events; i++) { 769 for (i = 0; i < cpuc->n_events; i++) {
724 if (event == cpuc->event[i]) { 770 if (event == cpuc->event[i]) {
725 int idx = cpuc->current_idx[i]; 771 /* Absorb the final count and turn off the
772 * event.
773 */
774 sparc_pmu_stop(event, PERF_EF_UPDATE);
726 775
727 /* Shift remaining entries down into 776 /* Shift remaining entries down into
728 * the existing slot. 777 * the existing slot.
@@ -734,13 +783,6 @@ static void sparc_pmu_disable(struct perf_event *event)
734 cpuc->current_idx[i]; 783 cpuc->current_idx[i];
735 } 784 }
736 785
737 /* Absorb the final count and turn off the
738 * event.
739 */
740 sparc_pmu_disable_event(cpuc, hwc, idx);
741 barrier();
742 sparc_perf_event_update(event, hwc, idx);
743
744 perf_event_update_userpage(event); 786 perf_event_update_userpage(event);
745 787
746 cpuc->n_events--; 788 cpuc->n_events--;
@@ -748,23 +790,10 @@ static void sparc_pmu_disable(struct perf_event *event)
748 } 790 }
749 } 791 }
750 792
751 perf_enable(); 793 perf_pmu_enable(event->pmu);
752 local_irq_restore(flags); 794 local_irq_restore(flags);
753} 795}
754 796
755static int active_event_index(struct cpu_hw_events *cpuc,
756 struct perf_event *event)
757{
758 int i;
759
760 for (i = 0; i < cpuc->n_events; i++) {
761 if (cpuc->event[i] == event)
762 break;
763 }
764 BUG_ON(i == cpuc->n_events);
765 return cpuc->current_idx[i];
766}
767
768static void sparc_pmu_read(struct perf_event *event) 797static void sparc_pmu_read(struct perf_event *event)
769{ 798{
770 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 799 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -774,15 +803,6 @@ static void sparc_pmu_read(struct perf_event *event)
774 sparc_perf_event_update(event, hwc, idx); 803 sparc_perf_event_update(event, hwc, idx);
775} 804}
776 805
777static void sparc_pmu_unthrottle(struct perf_event *event)
778{
779 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
780 int idx = active_event_index(cpuc, event);
781 struct hw_perf_event *hwc = &event->hw;
782
783 sparc_pmu_enable_event(cpuc, hwc, idx);
784}
785
786static atomic_t active_events = ATOMIC_INIT(0); 806static atomic_t active_events = ATOMIC_INIT(0);
787static DEFINE_MUTEX(pmc_grab_mutex); 807static DEFINE_MUTEX(pmc_grab_mutex);
788 808
@@ -877,7 +897,7 @@ static int sparc_check_constraints(struct perf_event **evts,
877 if (!n_ev) 897 if (!n_ev)
878 return 0; 898 return 0;
879 899
880 if (n_ev > perf_max_events) 900 if (n_ev > MAX_HWEVENTS)
881 return -1; 901 return -1;
882 902
883 msk0 = perf_event_get_msk(events[0]); 903 msk0 = perf_event_get_msk(events[0]);
@@ -984,23 +1004,27 @@ static int collect_events(struct perf_event *group, int max_count,
984 return n; 1004 return n;
985} 1005}
986 1006
987static int sparc_pmu_enable(struct perf_event *event) 1007static int sparc_pmu_add(struct perf_event *event, int ef_flags)
988{ 1008{
989 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1009 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
990 int n0, ret = -EAGAIN; 1010 int n0, ret = -EAGAIN;
991 unsigned long flags; 1011 unsigned long flags;
992 1012
993 local_irq_save(flags); 1013 local_irq_save(flags);
994 perf_disable(); 1014 perf_pmu_disable(event->pmu);
995 1015
996 n0 = cpuc->n_events; 1016 n0 = cpuc->n_events;
997 if (n0 >= perf_max_events) 1017 if (n0 >= MAX_HWEVENTS)
998 goto out; 1018 goto out;
999 1019
1000 cpuc->event[n0] = event; 1020 cpuc->event[n0] = event;
1001 cpuc->events[n0] = event->hw.event_base; 1021 cpuc->events[n0] = event->hw.event_base;
1002 cpuc->current_idx[n0] = PIC_NO_INDEX; 1022 cpuc->current_idx[n0] = PIC_NO_INDEX;
1003 1023
1024 event->hw.state = PERF_HES_UPTODATE;
1025 if (!(ef_flags & PERF_EF_START))
1026 event->hw.state |= PERF_HES_STOPPED;
1027
1004 /* 1028 /*
1005 * If group events scheduling transaction was started, 1029 * If group events scheduling transaction was started,
1006 * skip the schedulability test here, it will be peformed 1030 * skip the schedulability test here, it will be peformed
@@ -1020,12 +1044,12 @@ nocheck:
1020 1044
1021 ret = 0; 1045 ret = 0;
1022out: 1046out:
1023 perf_enable(); 1047 perf_pmu_enable(event->pmu);
1024 local_irq_restore(flags); 1048 local_irq_restore(flags);
1025 return ret; 1049 return ret;
1026} 1050}
1027 1051
1028static int __hw_perf_event_init(struct perf_event *event) 1052static int sparc_pmu_event_init(struct perf_event *event)
1029{ 1053{
1030 struct perf_event_attr *attr = &event->attr; 1054 struct perf_event_attr *attr = &event->attr;
1031 struct perf_event *evts[MAX_HWEVENTS]; 1055 struct perf_event *evts[MAX_HWEVENTS];
@@ -1038,22 +1062,33 @@ static int __hw_perf_event_init(struct perf_event *event)
1038 if (atomic_read(&nmi_active) < 0) 1062 if (atomic_read(&nmi_active) < 0)
1039 return -ENODEV; 1063 return -ENODEV;
1040 1064
1041 pmap = NULL; 1065 switch (attr->type) {
1042 if (attr->type == PERF_TYPE_HARDWARE) { 1066 case PERF_TYPE_HARDWARE:
1043 if (attr->config >= sparc_pmu->max_events) 1067 if (attr->config >= sparc_pmu->max_events)
1044 return -EINVAL; 1068 return -EINVAL;
1045 pmap = sparc_pmu->event_map(attr->config); 1069 pmap = sparc_pmu->event_map(attr->config);
1046 } else if (attr->type == PERF_TYPE_HW_CACHE) { 1070 break;
1071
1072 case PERF_TYPE_HW_CACHE:
1047 pmap = sparc_map_cache_event(attr->config); 1073 pmap = sparc_map_cache_event(attr->config);
1048 if (IS_ERR(pmap)) 1074 if (IS_ERR(pmap))
1049 return PTR_ERR(pmap); 1075 return PTR_ERR(pmap);
1050 } else if (attr->type != PERF_TYPE_RAW) 1076 break;
1051 return -EOPNOTSUPP; 1077
1078 case PERF_TYPE_RAW:
1079 pmap = NULL;
1080 break;
1081
1082 default:
1083 return -ENOENT;
1084
1085 }
1052 1086
1053 if (pmap) { 1087 if (pmap) {
1054 hwc->event_base = perf_event_encode(pmap); 1088 hwc->event_base = perf_event_encode(pmap);
1055 } else { 1089 } else {
1056 /* User gives us "(encoding << 16) | pic_mask" for 1090 /*
1091 * User gives us "(encoding << 16) | pic_mask" for
1057 * PERF_TYPE_RAW events. 1092 * PERF_TYPE_RAW events.
1058 */ 1093 */
1059 hwc->event_base = attr->config; 1094 hwc->event_base = attr->config;
@@ -1071,7 +1106,7 @@ static int __hw_perf_event_init(struct perf_event *event)
1071 n = 0; 1106 n = 0;
1072 if (event->group_leader != event) { 1107 if (event->group_leader != event) {
1073 n = collect_events(event->group_leader, 1108 n = collect_events(event->group_leader,
1074 perf_max_events - 1, 1109 MAX_HWEVENTS - 1,
1075 evts, events, current_idx_dmy); 1110 evts, events, current_idx_dmy);
1076 if (n < 0) 1111 if (n < 0)
1077 return -EINVAL; 1112 return -EINVAL;
@@ -1107,10 +1142,11 @@ static int __hw_perf_event_init(struct perf_event *event)
1107 * Set the flag to make pmu::enable() not perform the 1142 * Set the flag to make pmu::enable() not perform the
1108 * schedulability test, it will be performed at commit time 1143 * schedulability test, it will be performed at commit time
1109 */ 1144 */
1110static void sparc_pmu_start_txn(const struct pmu *pmu) 1145static void sparc_pmu_start_txn(struct pmu *pmu)
1111{ 1146{
1112 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 1147 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
1113 1148
1149 perf_pmu_disable(pmu);
1114 cpuhw->group_flag |= PERF_EVENT_TXN; 1150 cpuhw->group_flag |= PERF_EVENT_TXN;
1115} 1151}
1116 1152
@@ -1119,11 +1155,12 @@ static void sparc_pmu_start_txn(const struct pmu *pmu)
1119 * Clear the flag and pmu::enable() will perform the 1155 * Clear the flag and pmu::enable() will perform the
1120 * schedulability test. 1156 * schedulability test.
1121 */ 1157 */
1122static void sparc_pmu_cancel_txn(const struct pmu *pmu) 1158static void sparc_pmu_cancel_txn(struct pmu *pmu)
1123{ 1159{
1124 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 1160 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
1125 1161
1126 cpuhw->group_flag &= ~PERF_EVENT_TXN; 1162 cpuhw->group_flag &= ~PERF_EVENT_TXN;
1163 perf_pmu_enable(pmu);
1127} 1164}
1128 1165
1129/* 1166/*
@@ -1131,7 +1168,7 @@ static void sparc_pmu_cancel_txn(const struct pmu *pmu)
1131 * Perform the group schedulability test as a whole 1168 * Perform the group schedulability test as a whole
1132 * Return 0 if success 1169 * Return 0 if success
1133 */ 1170 */
1134static int sparc_pmu_commit_txn(const struct pmu *pmu) 1171static int sparc_pmu_commit_txn(struct pmu *pmu)
1135{ 1172{
1136 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1173 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1137 int n; 1174 int n;
@@ -1147,28 +1184,24 @@ static int sparc_pmu_commit_txn(const struct pmu *pmu)
1147 return -EAGAIN; 1184 return -EAGAIN;
1148 1185
1149 cpuc->group_flag &= ~PERF_EVENT_TXN; 1186 cpuc->group_flag &= ~PERF_EVENT_TXN;
1187 perf_pmu_enable(pmu);
1150 return 0; 1188 return 0;
1151} 1189}
1152 1190
1153static const struct pmu pmu = { 1191static struct pmu pmu = {
1154 .enable = sparc_pmu_enable, 1192 .pmu_enable = sparc_pmu_enable,
1155 .disable = sparc_pmu_disable, 1193 .pmu_disable = sparc_pmu_disable,
1194 .event_init = sparc_pmu_event_init,
1195 .add = sparc_pmu_add,
1196 .del = sparc_pmu_del,
1197 .start = sparc_pmu_start,
1198 .stop = sparc_pmu_stop,
1156 .read = sparc_pmu_read, 1199 .read = sparc_pmu_read,
1157 .unthrottle = sparc_pmu_unthrottle,
1158 .start_txn = sparc_pmu_start_txn, 1200 .start_txn = sparc_pmu_start_txn,
1159 .cancel_txn = sparc_pmu_cancel_txn, 1201 .cancel_txn = sparc_pmu_cancel_txn,
1160 .commit_txn = sparc_pmu_commit_txn, 1202 .commit_txn = sparc_pmu_commit_txn,
1161}; 1203};
1162 1204
1163const struct pmu *hw_perf_event_init(struct perf_event *event)
1164{
1165 int err = __hw_perf_event_init(event);
1166
1167 if (err)
1168 return ERR_PTR(err);
1169 return &pmu;
1170}
1171
1172void perf_event_print_debug(void) 1205void perf_event_print_debug(void)
1173{ 1206{
1174 unsigned long flags; 1207 unsigned long flags;
@@ -1244,7 +1277,7 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
1244 continue; 1277 continue;
1245 1278
1246 if (perf_event_overflow(event, 1, &data, regs)) 1279 if (perf_event_overflow(event, 1, &data, regs))
1247 sparc_pmu_disable_event(cpuc, hwc, idx); 1280 sparc_pmu_stop(event, 0);
1248 } 1281 }
1249 1282
1250 return NOTIFY_STOP; 1283 return NOTIFY_STOP;
@@ -1285,28 +1318,21 @@ void __init init_hw_perf_events(void)
1285 1318
1286 pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type); 1319 pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type);
1287 1320
1288 /* All sparc64 PMUs currently have 2 events. */ 1321 perf_pmu_register(&pmu);
1289 perf_max_events = 2;
1290
1291 register_die_notifier(&perf_event_nmi_notifier); 1322 register_die_notifier(&perf_event_nmi_notifier);
1292} 1323}
1293 1324
1294static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip) 1325void perf_callchain_kernel(struct perf_callchain_entry *entry,
1295{ 1326 struct pt_regs *regs)
1296 if (entry->nr < PERF_MAX_STACK_DEPTH)
1297 entry->ip[entry->nr++] = ip;
1298}
1299
1300static void perf_callchain_kernel(struct pt_regs *regs,
1301 struct perf_callchain_entry *entry)
1302{ 1327{
1303 unsigned long ksp, fp; 1328 unsigned long ksp, fp;
1304#ifdef CONFIG_FUNCTION_GRAPH_TRACER 1329#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1305 int graph = 0; 1330 int graph = 0;
1306#endif 1331#endif
1307 1332
1308 callchain_store(entry, PERF_CONTEXT_KERNEL); 1333 stack_trace_flush();
1309 callchain_store(entry, regs->tpc); 1334
1335 perf_callchain_store(entry, regs->tpc);
1310 1336
1311 ksp = regs->u_regs[UREG_I6]; 1337 ksp = regs->u_regs[UREG_I6];
1312 fp = ksp + STACK_BIAS; 1338 fp = ksp + STACK_BIAS;
@@ -1330,13 +1356,13 @@ static void perf_callchain_kernel(struct pt_regs *regs,
1330 pc = sf->callers_pc; 1356 pc = sf->callers_pc;
1331 fp = (unsigned long)sf->fp + STACK_BIAS; 1357 fp = (unsigned long)sf->fp + STACK_BIAS;
1332 } 1358 }
1333 callchain_store(entry, pc); 1359 perf_callchain_store(entry, pc);
1334#ifdef CONFIG_FUNCTION_GRAPH_TRACER 1360#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1335 if ((pc + 8UL) == (unsigned long) &return_to_handler) { 1361 if ((pc + 8UL) == (unsigned long) &return_to_handler) {
1336 int index = current->curr_ret_stack; 1362 int index = current->curr_ret_stack;
1337 if (current->ret_stack && index >= graph) { 1363 if (current->ret_stack && index >= graph) {
1338 pc = current->ret_stack[index - graph].ret; 1364 pc = current->ret_stack[index - graph].ret;
1339 callchain_store(entry, pc); 1365 perf_callchain_store(entry, pc);
1340 graph++; 1366 graph++;
1341 } 1367 }
1342 } 1368 }
@@ -1344,13 +1370,12 @@ static void perf_callchain_kernel(struct pt_regs *regs,
1344 } while (entry->nr < PERF_MAX_STACK_DEPTH); 1370 } while (entry->nr < PERF_MAX_STACK_DEPTH);
1345} 1371}
1346 1372
1347static void perf_callchain_user_64(struct pt_regs *regs, 1373static void perf_callchain_user_64(struct perf_callchain_entry *entry,
1348 struct perf_callchain_entry *entry) 1374 struct pt_regs *regs)
1349{ 1375{
1350 unsigned long ufp; 1376 unsigned long ufp;
1351 1377
1352 callchain_store(entry, PERF_CONTEXT_USER); 1378 perf_callchain_store(entry, regs->tpc);
1353 callchain_store(entry, regs->tpc);
1354 1379
1355 ufp = regs->u_regs[UREG_I6] + STACK_BIAS; 1380 ufp = regs->u_regs[UREG_I6] + STACK_BIAS;
1356 do { 1381 do {
@@ -1363,17 +1388,16 @@ static void perf_callchain_user_64(struct pt_regs *regs,
1363 1388
1364 pc = sf.callers_pc; 1389 pc = sf.callers_pc;
1365 ufp = (unsigned long)sf.fp + STACK_BIAS; 1390 ufp = (unsigned long)sf.fp + STACK_BIAS;
1366 callchain_store(entry, pc); 1391 perf_callchain_store(entry, pc);
1367 } while (entry->nr < PERF_MAX_STACK_DEPTH); 1392 } while (entry->nr < PERF_MAX_STACK_DEPTH);
1368} 1393}
1369 1394
1370static void perf_callchain_user_32(struct pt_regs *regs, 1395static void perf_callchain_user_32(struct perf_callchain_entry *entry,
1371 struct perf_callchain_entry *entry) 1396 struct pt_regs *regs)
1372{ 1397{
1373 unsigned long ufp; 1398 unsigned long ufp;
1374 1399
1375 callchain_store(entry, PERF_CONTEXT_USER); 1400 perf_callchain_store(entry, regs->tpc);
1376 callchain_store(entry, regs->tpc);
1377 1401
1378 ufp = regs->u_regs[UREG_I6] & 0xffffffffUL; 1402 ufp = regs->u_regs[UREG_I6] & 0xffffffffUL;
1379 do { 1403 do {
@@ -1386,34 +1410,16 @@ static void perf_callchain_user_32(struct pt_regs *regs,
1386 1410
1387 pc = sf.callers_pc; 1411 pc = sf.callers_pc;
1388 ufp = (unsigned long)sf.fp; 1412 ufp = (unsigned long)sf.fp;
1389 callchain_store(entry, pc); 1413 perf_callchain_store(entry, pc);
1390 } while (entry->nr < PERF_MAX_STACK_DEPTH); 1414 } while (entry->nr < PERF_MAX_STACK_DEPTH);
1391} 1415}
1392 1416
1393/* Like powerpc we can't get PMU interrupts within the PMU handler, 1417void
1394 * so no need for separate NMI and IRQ chains as on x86. 1418perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
1395 */
1396static DEFINE_PER_CPU(struct perf_callchain_entry, callchain);
1397
1398struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1399{ 1419{
1400 struct perf_callchain_entry *entry = &__get_cpu_var(callchain); 1420 flushw_user();
1401 1421 if (test_thread_flag(TIF_32BIT))
1402 entry->nr = 0; 1422 perf_callchain_user_32(entry, regs);
1403 if (!user_mode(regs)) { 1423 else
1404 stack_trace_flush(); 1424 perf_callchain_user_64(entry, regs);
1405 perf_callchain_kernel(regs, entry);
1406 if (current->mm)
1407 regs = task_pt_regs(current);
1408 else
1409 regs = NULL;
1410 }
1411 if (regs) {
1412 flushw_user();
1413 if (test_thread_flag(TIF_32BIT))
1414 perf_callchain_user_32(regs, entry);
1415 else
1416 perf_callchain_user_64(regs, entry);
1417 }
1418 return entry;
1419} 1425}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cea0cd9a316f..fd227d6b8d9c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -25,6 +25,7 @@ config X86
25 select HAVE_IDE 25 select HAVE_IDE
26 select HAVE_OPROFILE 26 select HAVE_OPROFILE
27 select HAVE_PERF_EVENTS if (!M386 && !M486) 27 select HAVE_PERF_EVENTS if (!M386 && !M486)
28 select HAVE_IRQ_WORK
28 select HAVE_IOREMAP_PROT 29 select HAVE_IOREMAP_PROT
29 select HAVE_KPROBES 30 select HAVE_KPROBES
30 select ARCH_WANT_OPTIONAL_GPIOLIB 31 select ARCH_WANT_OPTIONAL_GPIOLIB
@@ -33,6 +34,7 @@ config X86
33 select HAVE_KRETPROBES 34 select HAVE_KRETPROBES
34 select HAVE_OPTPROBES 35 select HAVE_OPTPROBES
35 select HAVE_FTRACE_MCOUNT_RECORD 36 select HAVE_FTRACE_MCOUNT_RECORD
37 select HAVE_C_RECORDMCOUNT
36 select HAVE_DYNAMIC_FTRACE 38 select HAVE_DYNAMIC_FTRACE
37 select HAVE_FUNCTION_TRACER 39 select HAVE_FUNCTION_TRACER
38 select HAVE_FUNCTION_GRAPH_TRACER 40 select HAVE_FUNCTION_GRAPH_TRACER
@@ -59,6 +61,8 @@ config X86
59 select ANON_INODES 61 select ANON_INODES
60 select HAVE_ARCH_KMEMCHECK 62 select HAVE_ARCH_KMEMCHECK
61 select HAVE_USER_RETURN_NOTIFIER 63 select HAVE_USER_RETURN_NOTIFIER
64 select HAVE_ARCH_JUMP_LABEL
65 select HAVE_TEXT_POKE_SMP
62 66
63config INSTRUCTION_DECODER 67config INSTRUCTION_DECODER
64 def_bool (KPROBES || PERF_EVENTS) 68 def_bool (KPROBES || PERF_EVENTS)
@@ -2125,6 +2129,10 @@ config HAVE_ATOMIC_IOMAP
2125 def_bool y 2129 def_bool y
2126 depends on X86_32 2130 depends on X86_32
2127 2131
2132config HAVE_TEXT_POKE_SMP
2133 bool
2134 select STOP_MACHINE if SMP
2135
2128source "net/Kconfig" 2136source "net/Kconfig"
2129 2137
2130source "drivers/Kconfig" 2138source "drivers/Kconfig"
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index bc6abb7bc7ee..76561d20ea2f 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -4,6 +4,7 @@
4#include <linux/types.h> 4#include <linux/types.h>
5#include <linux/stddef.h> 5#include <linux/stddef.h>
6#include <linux/stringify.h> 6#include <linux/stringify.h>
7#include <linux/jump_label.h>
7#include <asm/asm.h> 8#include <asm/asm.h>
8 9
9/* 10/*
@@ -160,6 +161,8 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
160#define __parainstructions_end NULL 161#define __parainstructions_end NULL
161#endif 162#endif
162 163
164extern void *text_poke_early(void *addr, const void *opcode, size_t len);
165
163/* 166/*
164 * Clear and restore the kernel write-protection flag on the local CPU. 167 * Clear and restore the kernel write-protection flag on the local CPU.
165 * Allows the kernel to edit read-only pages. 168 * Allows the kernel to edit read-only pages.
@@ -180,4 +183,12 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
180extern void *text_poke(void *addr, const void *opcode, size_t len); 183extern void *text_poke(void *addr, const void *opcode, size_t len);
181extern void *text_poke_smp(void *addr, const void *opcode, size_t len); 184extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
182 185
186#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
187#define IDEAL_NOP_SIZE_5 5
188extern unsigned char ideal_nop5[IDEAL_NOP_SIZE_5];
189extern void arch_init_ideal_nop5(void);
190#else
191static inline void arch_init_ideal_nop5(void) {}
192#endif
193
183#endif /* _ASM_X86_ALTERNATIVE_H */ 194#endif /* _ASM_X86_ALTERNATIVE_H */
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index 8e8ec663a98f..b8e96a18676b 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -49,8 +49,8 @@ BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
49BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) 49BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
50BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) 50BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
51 51
52#ifdef CONFIG_PERF_EVENTS 52#ifdef CONFIG_IRQ_WORK
53BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR) 53BUILD_INTERRUPT(irq_work_interrupt, IRQ_WORK_VECTOR)
54#endif 54#endif
55 55
56#ifdef CONFIG_X86_THERMAL_VECTOR 56#ifdef CONFIG_X86_THERMAL_VECTOR
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index aeab29aee617..55e4de613f0e 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -14,7 +14,7 @@ typedef struct {
14#endif 14#endif
15 unsigned int x86_platform_ipis; /* arch dependent */ 15 unsigned int x86_platform_ipis; /* arch dependent */
16 unsigned int apic_perf_irqs; 16 unsigned int apic_perf_irqs;
17 unsigned int apic_pending_irqs; 17 unsigned int apic_irq_work_irqs;
18#ifdef CONFIG_SMP 18#ifdef CONFIG_SMP
19 unsigned int irq_resched_count; 19 unsigned int irq_resched_count;
20 unsigned int irq_call_count; 20 unsigned int irq_call_count;
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 46c0fe05f230..3a54a1ca1a02 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -29,7 +29,7 @@
29extern void apic_timer_interrupt(void); 29extern void apic_timer_interrupt(void);
30extern void x86_platform_ipi(void); 30extern void x86_platform_ipi(void);
31extern void error_interrupt(void); 31extern void error_interrupt(void);
32extern void perf_pending_interrupt(void); 32extern void irq_work_interrupt(void);
33 33
34extern void spurious_interrupt(void); 34extern void spurious_interrupt(void);
35extern void thermal_interrupt(void); 35extern void thermal_interrupt(void);
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index e2ca30092557..6af0894dafb4 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -114,9 +114,9 @@
114#define X86_PLATFORM_IPI_VECTOR 0xed 114#define X86_PLATFORM_IPI_VECTOR 0xed
115 115
116/* 116/*
117 * Performance monitoring pending work vector: 117 * IRQ work vector:
118 */ 118 */
119#define LOCAL_PENDING_VECTOR 0xec 119#define IRQ_WORK_VECTOR 0xec
120 120
121#define UV_BAU_MESSAGE 0xea 121#define UV_BAU_MESSAGE 0xea
122 122
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
new file mode 100644
index 000000000000..f52d42e80585
--- /dev/null
+++ b/arch/x86/include/asm/jump_label.h
@@ -0,0 +1,37 @@
1#ifndef _ASM_X86_JUMP_LABEL_H
2#define _ASM_X86_JUMP_LABEL_H
3
4#ifdef __KERNEL__
5
6#include <linux/types.h>
7#include <asm/nops.h>
8
9#define JUMP_LABEL_NOP_SIZE 5
10
11# define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t"
12
13# define JUMP_LABEL(key, label) \
14 do { \
15 asm goto("1:" \
16 JUMP_LABEL_INITIAL_NOP \
17 ".pushsection __jump_table, \"a\" \n\t"\
18 _ASM_PTR "1b, %l[" #label "], %c0 \n\t" \
19 ".popsection \n\t" \
20 : : "i" (key) : : label); \
21 } while (0)
22
23#endif /* __KERNEL__ */
24
25#ifdef CONFIG_X86_64
26typedef u64 jump_label_t;
27#else
28typedef u32 jump_label_t;
29#endif
30
31struct jump_entry {
32 jump_label_t code;
33 jump_label_t target;
34 jump_label_t key;
35};
36
37#endif
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index def500776b16..a70cd216be5d 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -36,19 +36,6 @@
36#define P4_ESCR_EMASK(v) ((v) << P4_ESCR_EVENTMASK_SHIFT) 36#define P4_ESCR_EMASK(v) ((v) << P4_ESCR_EVENTMASK_SHIFT)
37#define P4_ESCR_TAG(v) ((v) << P4_ESCR_TAG_SHIFT) 37#define P4_ESCR_TAG(v) ((v) << P4_ESCR_TAG_SHIFT)
38 38
39/* Non HT mask */
40#define P4_ESCR_MASK \
41 (P4_ESCR_EVENT_MASK | \
42 P4_ESCR_EVENTMASK_MASK | \
43 P4_ESCR_TAG_MASK | \
44 P4_ESCR_TAG_ENABLE | \
45 P4_ESCR_T0_OS | \
46 P4_ESCR_T0_USR)
47
48/* HT mask */
49#define P4_ESCR_MASK_HT \
50 (P4_ESCR_MASK | P4_ESCR_T1_OS | P4_ESCR_T1_USR)
51
52#define P4_CCCR_OVF 0x80000000U 39#define P4_CCCR_OVF 0x80000000U
53#define P4_CCCR_CASCADE 0x40000000U 40#define P4_CCCR_CASCADE 0x40000000U
54#define P4_CCCR_OVF_PMI_T0 0x04000000U 41#define P4_CCCR_OVF_PMI_T0 0x04000000U
@@ -70,23 +57,6 @@
70#define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT) 57#define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT)
71#define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT) 58#define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT)
72 59
73/* Non HT mask */
74#define P4_CCCR_MASK \
75 (P4_CCCR_OVF | \
76 P4_CCCR_CASCADE | \
77 P4_CCCR_OVF_PMI_T0 | \
78 P4_CCCR_FORCE_OVF | \
79 P4_CCCR_EDGE | \
80 P4_CCCR_THRESHOLD_MASK | \
81 P4_CCCR_COMPLEMENT | \
82 P4_CCCR_COMPARE | \
83 P4_CCCR_ESCR_SELECT_MASK | \
84 P4_CCCR_ENABLE)
85
86/* HT mask */
87#define P4_CCCR_MASK_HT \
88 (P4_CCCR_MASK | P4_CCCR_OVF_PMI_T1 | P4_CCCR_THREAD_ANY)
89
90#define P4_GEN_ESCR_EMASK(class, name, bit) \ 60#define P4_GEN_ESCR_EMASK(class, name, bit) \
91 class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT) 61 class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT)
92#define P4_ESCR_EMASK_BIT(class, name) class##__##name 62#define P4_ESCR_EMASK_BIT(class, name) class##__##name
@@ -127,6 +97,28 @@
127#define P4_CONFIG_HT_SHIFT 63 97#define P4_CONFIG_HT_SHIFT 63
128#define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) 98#define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT)
129 99
100/*
101 * The bits we allow to pass for RAW events
102 */
103#define P4_CONFIG_MASK_ESCR \
104 P4_ESCR_EVENT_MASK | \
105 P4_ESCR_EVENTMASK_MASK | \
106 P4_ESCR_TAG_MASK | \
107 P4_ESCR_TAG_ENABLE
108
109#define P4_CONFIG_MASK_CCCR \
110 P4_CCCR_EDGE | \
111 P4_CCCR_THRESHOLD_MASK | \
112 P4_CCCR_COMPLEMENT | \
113 P4_CCCR_COMPARE | \
114 P4_CCCR_THREAD_ANY | \
115 P4_CCCR_RESERVED
116
117/* some dangerous bits are reserved for kernel internals */
118#define P4_CONFIG_MASK \
119 (p4_config_pack_escr(P4_CONFIG_MASK_ESCR)) | \
120 (p4_config_pack_cccr(P4_CONFIG_MASK_CCCR))
121
130static inline bool p4_is_event_cascaded(u64 config) 122static inline bool p4_is_event_cascaded(u64 config)
131{ 123{
132 u32 cccr = p4_config_unpack_cccr(config); 124 u32 cccr = p4_config_unpack_cccr(config);
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index fedf32a8c3ec..7490bf8d1459 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -34,7 +34,8 @@ GCOV_PROFILE_paravirt.o := n
34obj-y := process_$(BITS).o signal.o entry_$(BITS).o 34obj-y := process_$(BITS).o signal.o entry_$(BITS).o
35obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o 35obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
36obj-y += time.o ioport.o ldt.o dumpstack.o 36obj-y += time.o ioport.o ldt.o dumpstack.o
37obj-y += setup.o x86_init.o i8259.o irqinit.o 37obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o
38obj-$(CONFIG_IRQ_WORK) += irq_work.o
38obj-$(CONFIG_X86_VISWS) += visws_quirks.o 39obj-$(CONFIG_X86_VISWS) += visws_quirks.o
39obj-$(CONFIG_X86_32) += probe_roms_32.o 40obj-$(CONFIG_X86_32) += probe_roms_32.o
40obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o 41obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index f65ab8b014c4..a36bb90aef53 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -195,7 +195,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len)
195 195
196extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; 196extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
197extern s32 __smp_locks[], __smp_locks_end[]; 197extern s32 __smp_locks[], __smp_locks_end[];
198static void *text_poke_early(void *addr, const void *opcode, size_t len); 198void *text_poke_early(void *addr, const void *opcode, size_t len);
199 199
200/* Replace instructions with better alternatives for this CPU type. 200/* Replace instructions with better alternatives for this CPU type.
201 This runs before SMP is initialized to avoid SMP problems with 201 This runs before SMP is initialized to avoid SMP problems with
@@ -522,7 +522,7 @@ void __init alternative_instructions(void)
522 * instructions. And on the local CPU you need to be protected again NMI or MCE 522 * instructions. And on the local CPU you need to be protected again NMI or MCE
523 * handlers seeing an inconsistent instruction while you patch. 523 * handlers seeing an inconsistent instruction while you patch.
524 */ 524 */
525static void *__init_or_module text_poke_early(void *addr, const void *opcode, 525void *__init_or_module text_poke_early(void *addr, const void *opcode,
526 size_t len) 526 size_t len)
527{ 527{
528 unsigned long flags; 528 unsigned long flags;
@@ -637,7 +637,72 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
637 tpp.len = len; 637 tpp.len = len;
638 atomic_set(&stop_machine_first, 1); 638 atomic_set(&stop_machine_first, 1);
639 wrote_text = 0; 639 wrote_text = 0;
640 stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); 640 /* Use __stop_machine() because the caller already got online_cpus. */
641 __stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
641 return addr; 642 return addr;
642} 643}
643 644
645#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
646
647unsigned char ideal_nop5[IDEAL_NOP_SIZE_5];
648
649void __init arch_init_ideal_nop5(void)
650{
651 extern const unsigned char ftrace_test_p6nop[];
652 extern const unsigned char ftrace_test_nop5[];
653 extern const unsigned char ftrace_test_jmp[];
654 int faulted = 0;
655
656 /*
657 * There is no good nop for all x86 archs.
658 * We will default to using the P6_NOP5, but first we
659 * will test to make sure that the nop will actually
660 * work on this CPU. If it faults, we will then
661 * go to a lesser efficient 5 byte nop. If that fails
662 * we then just use a jmp as our nop. This isn't the most
663 * efficient nop, but we can not use a multi part nop
664 * since we would then risk being preempted in the middle
665 * of that nop, and if we enabled tracing then, it might
666 * cause a system crash.
667 *
668 * TODO: check the cpuid to determine the best nop.
669 */
670 asm volatile (
671 "ftrace_test_jmp:"
672 "jmp ftrace_test_p6nop\n"
673 "nop\n"
674 "nop\n"
675 "nop\n" /* 2 byte jmp + 3 bytes */
676 "ftrace_test_p6nop:"
677 P6_NOP5
678 "jmp 1f\n"
679 "ftrace_test_nop5:"
680 ".byte 0x66,0x66,0x66,0x66,0x90\n"
681 "1:"
682 ".section .fixup, \"ax\"\n"
683 "2: movl $1, %0\n"
684 " jmp ftrace_test_nop5\n"
685 "3: movl $2, %0\n"
686 " jmp 1b\n"
687 ".previous\n"
688 _ASM_EXTABLE(ftrace_test_p6nop, 2b)
689 _ASM_EXTABLE(ftrace_test_nop5, 3b)
690 : "=r"(faulted) : "0" (faulted));
691
692 switch (faulted) {
693 case 0:
694 pr_info("converting mcount calls to 0f 1f 44 00 00\n");
695 memcpy(ideal_nop5, ftrace_test_p6nop, IDEAL_NOP_SIZE_5);
696 break;
697 case 1:
698 pr_info("converting mcount calls to 66 66 66 66 90\n");
699 memcpy(ideal_nop5, ftrace_test_nop5, IDEAL_NOP_SIZE_5);
700 break;
701 case 2:
702 pr_info("converting mcount calls to jmp . + 5\n");
703 memcpy(ideal_nop5, ftrace_test_jmp, IDEAL_NOP_SIZE_5);
704 break;
705 }
706
707}
708#endif
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 03a5b0385ad6..fe73c1844a9a 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -531,7 +531,7 @@ static int x86_pmu_hw_config(struct perf_event *event)
531/* 531/*
532 * Setup the hardware configuration for a given attr_type 532 * Setup the hardware configuration for a given attr_type
533 */ 533 */
534static int __hw_perf_event_init(struct perf_event *event) 534static int __x86_pmu_event_init(struct perf_event *event)
535{ 535{
536 int err; 536 int err;
537 537
@@ -584,7 +584,7 @@ static void x86_pmu_disable_all(void)
584 } 584 }
585} 585}
586 586
587void hw_perf_disable(void) 587static void x86_pmu_disable(struct pmu *pmu)
588{ 588{
589 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 589 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
590 590
@@ -619,7 +619,7 @@ static void x86_pmu_enable_all(int added)
619 } 619 }
620} 620}
621 621
622static const struct pmu pmu; 622static struct pmu pmu;
623 623
624static inline int is_x86_event(struct perf_event *event) 624static inline int is_x86_event(struct perf_event *event)
625{ 625{
@@ -801,10 +801,10 @@ static inline int match_prev_assignment(struct hw_perf_event *hwc,
801 hwc->last_tag == cpuc->tags[i]; 801 hwc->last_tag == cpuc->tags[i];
802} 802}
803 803
804static int x86_pmu_start(struct perf_event *event); 804static void x86_pmu_start(struct perf_event *event, int flags);
805static void x86_pmu_stop(struct perf_event *event); 805static void x86_pmu_stop(struct perf_event *event, int flags);
806 806
807void hw_perf_enable(void) 807static void x86_pmu_enable(struct pmu *pmu)
808{ 808{
809 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 809 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
810 struct perf_event *event; 810 struct perf_event *event;
@@ -840,7 +840,14 @@ void hw_perf_enable(void)
840 match_prev_assignment(hwc, cpuc, i)) 840 match_prev_assignment(hwc, cpuc, i))
841 continue; 841 continue;
842 842
843 x86_pmu_stop(event); 843 /*
844 * Ensure we don't accidentally enable a stopped
845 * counter simply because we rescheduled.
846 */
847 if (hwc->state & PERF_HES_STOPPED)
848 hwc->state |= PERF_HES_ARCH;
849
850 x86_pmu_stop(event, PERF_EF_UPDATE);
844 } 851 }
845 852
846 for (i = 0; i < cpuc->n_events; i++) { 853 for (i = 0; i < cpuc->n_events; i++) {
@@ -852,7 +859,10 @@ void hw_perf_enable(void)
852 else if (i < n_running) 859 else if (i < n_running)
853 continue; 860 continue;
854 861
855 x86_pmu_start(event); 862 if (hwc->state & PERF_HES_ARCH)
863 continue;
864
865 x86_pmu_start(event, PERF_EF_RELOAD);
856 } 866 }
857 cpuc->n_added = 0; 867 cpuc->n_added = 0;
858 perf_events_lapic_init(); 868 perf_events_lapic_init();
@@ -953,15 +963,12 @@ static void x86_pmu_enable_event(struct perf_event *event)
953} 963}
954 964
955/* 965/*
956 * activate a single event 966 * Add a single event to the PMU.
957 * 967 *
958 * The event is added to the group of enabled events 968 * The event is added to the group of enabled events
959 * but only if it can be scehduled with existing events. 969 * but only if it can be scehduled with existing events.
960 *
961 * Called with PMU disabled. If successful and return value 1,
962 * then guaranteed to call perf_enable() and hw_perf_enable()
963 */ 970 */
964static int x86_pmu_enable(struct perf_event *event) 971static int x86_pmu_add(struct perf_event *event, int flags)
965{ 972{
966 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 973 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
967 struct hw_perf_event *hwc; 974 struct hw_perf_event *hwc;
@@ -970,58 +977,67 @@ static int x86_pmu_enable(struct perf_event *event)
970 977
971 hwc = &event->hw; 978 hwc = &event->hw;
972 979
980 perf_pmu_disable(event->pmu);
973 n0 = cpuc->n_events; 981 n0 = cpuc->n_events;
974 n = collect_events(cpuc, event, false); 982 ret = n = collect_events(cpuc, event, false);
975 if (n < 0) 983 if (ret < 0)
976 return n; 984 goto out;
985
986 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
987 if (!(flags & PERF_EF_START))
988 hwc->state |= PERF_HES_ARCH;
977 989
978 /* 990 /*
979 * If group events scheduling transaction was started, 991 * If group events scheduling transaction was started,
980 * skip the schedulability test here, it will be peformed 992 * skip the schedulability test here, it will be peformed
981 * at commit time(->commit_txn) as a whole 993 * at commit time (->commit_txn) as a whole
982 */ 994 */
983 if (cpuc->group_flag & PERF_EVENT_TXN) 995 if (cpuc->group_flag & PERF_EVENT_TXN)
984 goto out; 996 goto done_collect;
985 997
986 ret = x86_pmu.schedule_events(cpuc, n, assign); 998 ret = x86_pmu.schedule_events(cpuc, n, assign);
987 if (ret) 999 if (ret)
988 return ret; 1000 goto out;
989 /* 1001 /*
990 * copy new assignment, now we know it is possible 1002 * copy new assignment, now we know it is possible
991 * will be used by hw_perf_enable() 1003 * will be used by hw_perf_enable()
992 */ 1004 */
993 memcpy(cpuc->assign, assign, n*sizeof(int)); 1005 memcpy(cpuc->assign, assign, n*sizeof(int));
994 1006
995out: 1007done_collect:
996 cpuc->n_events = n; 1008 cpuc->n_events = n;
997 cpuc->n_added += n - n0; 1009 cpuc->n_added += n - n0;
998 cpuc->n_txn += n - n0; 1010 cpuc->n_txn += n - n0;
999 1011
1000 return 0; 1012 ret = 0;
1013out:
1014 perf_pmu_enable(event->pmu);
1015 return ret;
1001} 1016}
1002 1017
1003static int x86_pmu_start(struct perf_event *event) 1018static void x86_pmu_start(struct perf_event *event, int flags)
1004{ 1019{
1005 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1020 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1006 int idx = event->hw.idx; 1021 int idx = event->hw.idx;
1007 1022
1008 if (idx == -1) 1023 if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
1009 return -EAGAIN; 1024 return;
1025
1026 if (WARN_ON_ONCE(idx == -1))
1027 return;
1028
1029 if (flags & PERF_EF_RELOAD) {
1030 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
1031 x86_perf_event_set_period(event);
1032 }
1033
1034 event->hw.state = 0;
1010 1035
1011 x86_perf_event_set_period(event);
1012 cpuc->events[idx] = event; 1036 cpuc->events[idx] = event;
1013 __set_bit(idx, cpuc->active_mask); 1037 __set_bit(idx, cpuc->active_mask);
1014 __set_bit(idx, cpuc->running); 1038 __set_bit(idx, cpuc->running);
1015 x86_pmu.enable(event); 1039 x86_pmu.enable(event);
1016 perf_event_update_userpage(event); 1040 perf_event_update_userpage(event);
1017
1018 return 0;
1019}
1020
1021static void x86_pmu_unthrottle(struct perf_event *event)
1022{
1023 int ret = x86_pmu_start(event);
1024 WARN_ON_ONCE(ret);
1025} 1041}
1026 1042
1027void perf_event_print_debug(void) 1043void perf_event_print_debug(void)
@@ -1078,27 +1094,29 @@ void perf_event_print_debug(void)
1078 local_irq_restore(flags); 1094 local_irq_restore(flags);
1079} 1095}
1080 1096
1081static void x86_pmu_stop(struct perf_event *event) 1097static void x86_pmu_stop(struct perf_event *event, int flags)
1082{ 1098{
1083 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1099 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1084 struct hw_perf_event *hwc = &event->hw; 1100 struct hw_perf_event *hwc = &event->hw;
1085 int idx = hwc->idx;
1086 1101
1087 if (!__test_and_clear_bit(idx, cpuc->active_mask)) 1102 if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) {
1088 return; 1103 x86_pmu.disable(event);
1089 1104 cpuc->events[hwc->idx] = NULL;
1090 x86_pmu.disable(event); 1105 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
1091 1106 hwc->state |= PERF_HES_STOPPED;
1092 /* 1107 }
1093 * Drain the remaining delta count out of a event
1094 * that we are disabling:
1095 */
1096 x86_perf_event_update(event);
1097 1108
1098 cpuc->events[idx] = NULL; 1109 if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
1110 /*
1111 * Drain the remaining delta count out of a event
1112 * that we are disabling:
1113 */
1114 x86_perf_event_update(event);
1115 hwc->state |= PERF_HES_UPTODATE;
1116 }
1099} 1117}
1100 1118
1101static void x86_pmu_disable(struct perf_event *event) 1119static void x86_pmu_del(struct perf_event *event, int flags)
1102{ 1120{
1103 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1121 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1104 int i; 1122 int i;
@@ -1111,7 +1129,7 @@ static void x86_pmu_disable(struct perf_event *event)
1111 if (cpuc->group_flag & PERF_EVENT_TXN) 1129 if (cpuc->group_flag & PERF_EVENT_TXN)
1112 return; 1130 return;
1113 1131
1114 x86_pmu_stop(event); 1132 x86_pmu_stop(event, PERF_EF_UPDATE);
1115 1133
1116 for (i = 0; i < cpuc->n_events; i++) { 1134 for (i = 0; i < cpuc->n_events; i++) {
1117 if (event == cpuc->event_list[i]) { 1135 if (event == cpuc->event_list[i]) {
@@ -1134,7 +1152,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1134 struct perf_sample_data data; 1152 struct perf_sample_data data;
1135 struct cpu_hw_events *cpuc; 1153 struct cpu_hw_events *cpuc;
1136 struct perf_event *event; 1154 struct perf_event *event;
1137 struct hw_perf_event *hwc;
1138 int idx, handled = 0; 1155 int idx, handled = 0;
1139 u64 val; 1156 u64 val;
1140 1157
@@ -1155,7 +1172,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1155 } 1172 }
1156 1173
1157 event = cpuc->events[idx]; 1174 event = cpuc->events[idx];
1158 hwc = &event->hw;
1159 1175
1160 val = x86_perf_event_update(event); 1176 val = x86_perf_event_update(event);
1161 if (val & (1ULL << (x86_pmu.cntval_bits - 1))) 1177 if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
@@ -1171,7 +1187,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1171 continue; 1187 continue;
1172 1188
1173 if (perf_event_overflow(event, 1, &data, regs)) 1189 if (perf_event_overflow(event, 1, &data, regs))
1174 x86_pmu_stop(event); 1190 x86_pmu_stop(event, 0);
1175 } 1191 }
1176 1192
1177 if (handled) 1193 if (handled)
@@ -1180,25 +1196,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1180 return handled; 1196 return handled;
1181} 1197}
1182 1198
1183void smp_perf_pending_interrupt(struct pt_regs *regs)
1184{
1185 irq_enter();
1186 ack_APIC_irq();
1187 inc_irq_stat(apic_pending_irqs);
1188 perf_event_do_pending();
1189 irq_exit();
1190}
1191
1192void set_perf_event_pending(void)
1193{
1194#ifdef CONFIG_X86_LOCAL_APIC
1195 if (!x86_pmu.apic || !x86_pmu_initialized())
1196 return;
1197
1198 apic->send_IPI_self(LOCAL_PENDING_VECTOR);
1199#endif
1200}
1201
1202void perf_events_lapic_init(void) 1199void perf_events_lapic_init(void)
1203{ 1200{
1204 if (!x86_pmu.apic || !x86_pmu_initialized()) 1201 if (!x86_pmu.apic || !x86_pmu_initialized())
@@ -1388,7 +1385,6 @@ void __init init_hw_perf_events(void)
1388 x86_pmu.num_counters = X86_PMC_MAX_GENERIC; 1385 x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
1389 } 1386 }
1390 x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; 1387 x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
1391 perf_max_events = x86_pmu.num_counters;
1392 1388
1393 if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { 1389 if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
1394 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", 1390 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
@@ -1424,6 +1420,7 @@ void __init init_hw_perf_events(void)
1424 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); 1420 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
1425 pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); 1421 pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
1426 1422
1423 perf_pmu_register(&pmu);
1427 perf_cpu_notifier(x86_pmu_notifier); 1424 perf_cpu_notifier(x86_pmu_notifier);
1428} 1425}
1429 1426
@@ -1437,10 +1434,11 @@ static inline void x86_pmu_read(struct perf_event *event)
1437 * Set the flag to make pmu::enable() not perform the 1434 * Set the flag to make pmu::enable() not perform the
1438 * schedulability test, it will be performed at commit time 1435 * schedulability test, it will be performed at commit time
1439 */ 1436 */
1440static void x86_pmu_start_txn(const struct pmu *pmu) 1437static void x86_pmu_start_txn(struct pmu *pmu)
1441{ 1438{
1442 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1439 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1443 1440
1441 perf_pmu_disable(pmu);
1444 cpuc->group_flag |= PERF_EVENT_TXN; 1442 cpuc->group_flag |= PERF_EVENT_TXN;
1445 cpuc->n_txn = 0; 1443 cpuc->n_txn = 0;
1446} 1444}
@@ -1450,7 +1448,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu)
1450 * Clear the flag and pmu::enable() will perform the 1448 * Clear the flag and pmu::enable() will perform the
1451 * schedulability test. 1449 * schedulability test.
1452 */ 1450 */
1453static void x86_pmu_cancel_txn(const struct pmu *pmu) 1451static void x86_pmu_cancel_txn(struct pmu *pmu)
1454{ 1452{
1455 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1453 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1456 1454
@@ -1460,6 +1458,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
1460 */ 1458 */
1461 cpuc->n_added -= cpuc->n_txn; 1459 cpuc->n_added -= cpuc->n_txn;
1462 cpuc->n_events -= cpuc->n_txn; 1460 cpuc->n_events -= cpuc->n_txn;
1461 perf_pmu_enable(pmu);
1463} 1462}
1464 1463
1465/* 1464/*
@@ -1467,7 +1466,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
1467 * Perform the group schedulability test as a whole 1466 * Perform the group schedulability test as a whole
1468 * Return 0 if success 1467 * Return 0 if success
1469 */ 1468 */
1470static int x86_pmu_commit_txn(const struct pmu *pmu) 1469static int x86_pmu_commit_txn(struct pmu *pmu)
1471{ 1470{
1472 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1471 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1473 int assign[X86_PMC_IDX_MAX]; 1472 int assign[X86_PMC_IDX_MAX];
@@ -1489,22 +1488,10 @@ static int x86_pmu_commit_txn(const struct pmu *pmu)
1489 memcpy(cpuc->assign, assign, n*sizeof(int)); 1488 memcpy(cpuc->assign, assign, n*sizeof(int));
1490 1489
1491 cpuc->group_flag &= ~PERF_EVENT_TXN; 1490 cpuc->group_flag &= ~PERF_EVENT_TXN;
1492 1491 perf_pmu_enable(pmu);
1493 return 0; 1492 return 0;
1494} 1493}
1495 1494
1496static const struct pmu pmu = {
1497 .enable = x86_pmu_enable,
1498 .disable = x86_pmu_disable,
1499 .start = x86_pmu_start,
1500 .stop = x86_pmu_stop,
1501 .read = x86_pmu_read,
1502 .unthrottle = x86_pmu_unthrottle,
1503 .start_txn = x86_pmu_start_txn,
1504 .cancel_txn = x86_pmu_cancel_txn,
1505 .commit_txn = x86_pmu_commit_txn,
1506};
1507
1508/* 1495/*
1509 * validate that we can schedule this event 1496 * validate that we can schedule this event
1510 */ 1497 */
@@ -1579,12 +1566,22 @@ out:
1579 return ret; 1566 return ret;
1580} 1567}
1581 1568
1582const struct pmu *hw_perf_event_init(struct perf_event *event) 1569int x86_pmu_event_init(struct perf_event *event)
1583{ 1570{
1584 const struct pmu *tmp; 1571 struct pmu *tmp;
1585 int err; 1572 int err;
1586 1573
1587 err = __hw_perf_event_init(event); 1574 switch (event->attr.type) {
1575 case PERF_TYPE_RAW:
1576 case PERF_TYPE_HARDWARE:
1577 case PERF_TYPE_HW_CACHE:
1578 break;
1579
1580 default:
1581 return -ENOENT;
1582 }
1583
1584 err = __x86_pmu_event_init(event);
1588 if (!err) { 1585 if (!err) {
1589 /* 1586 /*
1590 * we temporarily connect event to its pmu 1587 * we temporarily connect event to its pmu
@@ -1604,26 +1601,31 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
1604 if (err) { 1601 if (err) {
1605 if (event->destroy) 1602 if (event->destroy)
1606 event->destroy(event); 1603 event->destroy(event);
1607 return ERR_PTR(err);
1608 } 1604 }
1609 1605
1610 return &pmu; 1606 return err;
1611} 1607}
1612 1608
1613/* 1609static struct pmu pmu = {
1614 * callchain support 1610 .pmu_enable = x86_pmu_enable,
1615 */ 1611 .pmu_disable = x86_pmu_disable,
1616 1612
1617static inline 1613 .event_init = x86_pmu_event_init,
1618void callchain_store(struct perf_callchain_entry *entry, u64 ip)
1619{
1620 if (entry->nr < PERF_MAX_STACK_DEPTH)
1621 entry->ip[entry->nr++] = ip;
1622}
1623 1614
1624static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); 1615 .add = x86_pmu_add,
1625static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); 1616 .del = x86_pmu_del,
1617 .start = x86_pmu_start,
1618 .stop = x86_pmu_stop,
1619 .read = x86_pmu_read,
1626 1620
1621 .start_txn = x86_pmu_start_txn,
1622 .cancel_txn = x86_pmu_cancel_txn,
1623 .commit_txn = x86_pmu_commit_txn,
1624};
1625
1626/*
1627 * callchain support
1628 */
1627 1629
1628static void 1630static void
1629backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) 1631backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
@@ -1645,7 +1647,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
1645{ 1647{
1646 struct perf_callchain_entry *entry = data; 1648 struct perf_callchain_entry *entry = data;
1647 1649
1648 callchain_store(entry, addr); 1650 perf_callchain_store(entry, addr);
1649} 1651}
1650 1652
1651static const struct stacktrace_ops backtrace_ops = { 1653static const struct stacktrace_ops backtrace_ops = {
@@ -1656,11 +1658,15 @@ static const struct stacktrace_ops backtrace_ops = {
1656 .walk_stack = print_context_stack_bp, 1658 .walk_stack = print_context_stack_bp,
1657}; 1659};
1658 1660
1659static void 1661void
1660perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) 1662perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
1661{ 1663{
1662 callchain_store(entry, PERF_CONTEXT_KERNEL); 1664 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1663 callchain_store(entry, regs->ip); 1665 /* TODO: We don't support guest os callchain now */
1666 return;
1667 }
1668
1669 perf_callchain_store(entry, regs->ip);
1664 1670
1665 dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); 1671 dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
1666} 1672}
@@ -1689,7 +1695,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
1689 if (fp < compat_ptr(regs->sp)) 1695 if (fp < compat_ptr(regs->sp))
1690 break; 1696 break;
1691 1697
1692 callchain_store(entry, frame.return_address); 1698 perf_callchain_store(entry, frame.return_address);
1693 fp = compat_ptr(frame.next_frame); 1699 fp = compat_ptr(frame.next_frame);
1694 } 1700 }
1695 return 1; 1701 return 1;
@@ -1702,19 +1708,20 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
1702} 1708}
1703#endif 1709#endif
1704 1710
1705static void 1711void
1706perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) 1712perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
1707{ 1713{
1708 struct stack_frame frame; 1714 struct stack_frame frame;
1709 const void __user *fp; 1715 const void __user *fp;
1710 1716
1711 if (!user_mode(regs)) 1717 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1712 regs = task_pt_regs(current); 1718 /* TODO: We don't support guest os callchain now */
1719 return;
1720 }
1713 1721
1714 fp = (void __user *)regs->bp; 1722 fp = (void __user *)regs->bp;
1715 1723
1716 callchain_store(entry, PERF_CONTEXT_USER); 1724 perf_callchain_store(entry, regs->ip);
1717 callchain_store(entry, regs->ip);
1718 1725
1719 if (perf_callchain_user32(regs, entry)) 1726 if (perf_callchain_user32(regs, entry))
1720 return; 1727 return;
@@ -1731,52 +1738,11 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
1731 if ((unsigned long)fp < regs->sp) 1738 if ((unsigned long)fp < regs->sp)
1732 break; 1739 break;
1733 1740
1734 callchain_store(entry, frame.return_address); 1741 perf_callchain_store(entry, frame.return_address);
1735 fp = frame.next_frame; 1742 fp = frame.next_frame;
1736 } 1743 }
1737} 1744}
1738 1745
1739static void
1740perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
1741{
1742 int is_user;
1743
1744 if (!regs)
1745 return;
1746
1747 is_user = user_mode(regs);
1748
1749 if (is_user && current->state != TASK_RUNNING)
1750 return;
1751
1752 if (!is_user)
1753 perf_callchain_kernel(regs, entry);
1754
1755 if (current->mm)
1756 perf_callchain_user(regs, entry);
1757}
1758
1759struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1760{
1761 struct perf_callchain_entry *entry;
1762
1763 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1764 /* TODO: We don't support guest os callchain now */
1765 return NULL;
1766 }
1767
1768 if (in_nmi())
1769 entry = &__get_cpu_var(pmc_nmi_entry);
1770 else
1771 entry = &__get_cpu_var(pmc_irq_entry);
1772
1773 entry->nr = 0;
1774
1775 perf_do_callchain(regs, entry);
1776
1777 return entry;
1778}
1779
1780unsigned long perf_instruction_pointer(struct pt_regs *regs) 1746unsigned long perf_instruction_pointer(struct pt_regs *regs)
1781{ 1747{
1782 unsigned long ip; 1748 unsigned long ip;
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index c2897b7b4a3b..46d58448c3af 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -52,7 +52,7 @@ static __initconst const u64 amd_hw_cache_event_ids
52 [ C(DTLB) ] = { 52 [ C(DTLB) ] = {
53 [ C(OP_READ) ] = { 53 [ C(OP_READ) ] = {
54 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ 54 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
55 [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */ 55 [ C(RESULT_MISS) ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */
56 }, 56 },
57 [ C(OP_WRITE) ] = { 57 [ C(OP_WRITE) ] = {
58 [ C(RESULT_ACCESS) ] = 0, 58 [ C(RESULT_ACCESS) ] = 0,
@@ -66,7 +66,7 @@ static __initconst const u64 amd_hw_cache_event_ids
66 [ C(ITLB) ] = { 66 [ C(ITLB) ] = {
67 [ C(OP_READ) ] = { 67 [ C(OP_READ) ] = {
68 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ 68 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */
69 [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */ 69 [ C(RESULT_MISS) ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */
70 }, 70 },
71 [ C(OP_WRITE) ] = { 71 [ C(OP_WRITE) ] = {
72 [ C(RESULT_ACCESS) ] = -1, 72 [ C(RESULT_ACCESS) ] = -1,
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index ee05c90012d2..c8f5c088cad1 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -713,18 +713,18 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
713 struct cpu_hw_events *cpuc; 713 struct cpu_hw_events *cpuc;
714 int bit, loops; 714 int bit, loops;
715 u64 status; 715 u64 status;
716 int handled = 0; 716 int handled;
717 717
718 perf_sample_data_init(&data, 0); 718 perf_sample_data_init(&data, 0);
719 719
720 cpuc = &__get_cpu_var(cpu_hw_events); 720 cpuc = &__get_cpu_var(cpu_hw_events);
721 721
722 intel_pmu_disable_all(); 722 intel_pmu_disable_all();
723 intel_pmu_drain_bts_buffer(); 723 handled = intel_pmu_drain_bts_buffer();
724 status = intel_pmu_get_status(); 724 status = intel_pmu_get_status();
725 if (!status) { 725 if (!status) {
726 intel_pmu_enable_all(0); 726 intel_pmu_enable_all(0);
727 return 0; 727 return handled;
728 } 728 }
729 729
730 loops = 0; 730 loops = 0;
@@ -763,7 +763,7 @@ again:
763 data.period = event->hw.last_period; 763 data.period = event->hw.last_period;
764 764
765 if (perf_event_overflow(event, 1, &data, regs)) 765 if (perf_event_overflow(event, 1, &data, regs))
766 x86_pmu_stop(event); 766 x86_pmu_stop(event, 0);
767 } 767 }
768 768
769 /* 769 /*
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 18018d1311cd..4977f9c400e5 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -214,7 +214,7 @@ static void intel_pmu_disable_bts(void)
214 update_debugctlmsr(debugctlmsr); 214 update_debugctlmsr(debugctlmsr);
215} 215}
216 216
217static void intel_pmu_drain_bts_buffer(void) 217static int intel_pmu_drain_bts_buffer(void)
218{ 218{
219 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 219 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
220 struct debug_store *ds = cpuc->ds; 220 struct debug_store *ds = cpuc->ds;
@@ -231,16 +231,16 @@ static void intel_pmu_drain_bts_buffer(void)
231 struct pt_regs regs; 231 struct pt_regs regs;
232 232
233 if (!event) 233 if (!event)
234 return; 234 return 0;
235 235
236 if (!ds) 236 if (!ds)
237 return; 237 return 0;
238 238
239 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; 239 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
240 top = (struct bts_record *)(unsigned long)ds->bts_index; 240 top = (struct bts_record *)(unsigned long)ds->bts_index;
241 241
242 if (top <= at) 242 if (top <= at)
243 return; 243 return 0;
244 244
245 ds->bts_index = ds->bts_buffer_base; 245 ds->bts_index = ds->bts_buffer_base;
246 246
@@ -256,7 +256,7 @@ static void intel_pmu_drain_bts_buffer(void)
256 perf_prepare_sample(&header, &data, event, &regs); 256 perf_prepare_sample(&header, &data, event, &regs);
257 257
258 if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1)) 258 if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1))
259 return; 259 return 1;
260 260
261 for (; at < top; at++) { 261 for (; at < top; at++) {
262 data.ip = at->from; 262 data.ip = at->from;
@@ -270,6 +270,7 @@ static void intel_pmu_drain_bts_buffer(void)
270 /* There's new data available. */ 270 /* There's new data available. */
271 event->hw.interrupts++; 271 event->hw.interrupts++;
272 event->pending_kill = POLL_IN; 272 event->pending_kill = POLL_IN;
273 return 1;
273} 274}
274 275
275/* 276/*
@@ -491,7 +492,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
491 regs.flags &= ~PERF_EFLAGS_EXACT; 492 regs.flags &= ~PERF_EFLAGS_EXACT;
492 493
493 if (perf_event_overflow(event, 1, &data, &regs)) 494 if (perf_event_overflow(event, 1, &data, &regs))
494 x86_pmu_stop(event); 495 x86_pmu_stop(event, 0);
495} 496}
496 497
497static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) 498static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index 249015173992..81400b93e694 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -18,6 +18,8 @@
18struct p4_event_bind { 18struct p4_event_bind {
19 unsigned int opcode; /* Event code and ESCR selector */ 19 unsigned int opcode; /* Event code and ESCR selector */
20 unsigned int escr_msr[2]; /* ESCR MSR for this event */ 20 unsigned int escr_msr[2]; /* ESCR MSR for this event */
21 unsigned int escr_emask; /* valid ESCR EventMask bits */
22 unsigned int shared; /* event is shared across threads */
21 char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ 23 char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */
22}; 24};
23 25
@@ -66,231 +68,435 @@ static struct p4_event_bind p4_event_bind_map[] = {
66 [P4_EVENT_TC_DELIVER_MODE] = { 68 [P4_EVENT_TC_DELIVER_MODE] = {
67 .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE), 69 .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE),
68 .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, 70 .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
71 .escr_emask =
72 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD) |
73 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DB) |
74 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DI) |
75 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BD) |
76 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BB) |
77 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BI) |
78 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, ID),
79 .shared = 1,
69 .cntr = { {4, 5, -1}, {6, 7, -1} }, 80 .cntr = { {4, 5, -1}, {6, 7, -1} },
70 }, 81 },
71 [P4_EVENT_BPU_FETCH_REQUEST] = { 82 [P4_EVENT_BPU_FETCH_REQUEST] = {
72 .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST), 83 .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST),
73 .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 }, 84 .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 },
85 .escr_emask =
86 P4_ESCR_EMASK_BIT(P4_EVENT_BPU_FETCH_REQUEST, TCMISS),
74 .cntr = { {0, -1, -1}, {2, -1, -1} }, 87 .cntr = { {0, -1, -1}, {2, -1, -1} },
75 }, 88 },
76 [P4_EVENT_ITLB_REFERENCE] = { 89 [P4_EVENT_ITLB_REFERENCE] = {
77 .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE), 90 .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE),
78 .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 }, 91 .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 },
92 .escr_emask =
93 P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT) |
94 P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, MISS) |
95 P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT_UK),
79 .cntr = { {0, -1, -1}, {2, -1, -1} }, 96 .cntr = { {0, -1, -1}, {2, -1, -1} },
80 }, 97 },
81 [P4_EVENT_MEMORY_CANCEL] = { 98 [P4_EVENT_MEMORY_CANCEL] = {
82 .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL), 99 .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL),
83 .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, 100 .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
101 .escr_emask =
102 P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, ST_RB_FULL) |
103 P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, 64K_CONF),
84 .cntr = { {8, 9, -1}, {10, 11, -1} }, 104 .cntr = { {8, 9, -1}, {10, 11, -1} },
85 }, 105 },
86 [P4_EVENT_MEMORY_COMPLETE] = { 106 [P4_EVENT_MEMORY_COMPLETE] = {
87 .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE), 107 .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE),
88 .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, 108 .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
109 .escr_emask =
110 P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, LSC) |
111 P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, SSC),
89 .cntr = { {8, 9, -1}, {10, 11, -1} }, 112 .cntr = { {8, 9, -1}, {10, 11, -1} },
90 }, 113 },
91 [P4_EVENT_LOAD_PORT_REPLAY] = { 114 [P4_EVENT_LOAD_PORT_REPLAY] = {
92 .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY), 115 .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY),
93 .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 }, 116 .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 },
117 .escr_emask =
118 P4_ESCR_EMASK_BIT(P4_EVENT_LOAD_PORT_REPLAY, SPLIT_LD),
94 .cntr = { {8, 9, -1}, {10, 11, -1} }, 119 .cntr = { {8, 9, -1}, {10, 11, -1} },
95 }, 120 },
96 [P4_EVENT_STORE_PORT_REPLAY] = { 121 [P4_EVENT_STORE_PORT_REPLAY] = {
97 .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY), 122 .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY),
98 .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, 123 .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
124 .escr_emask =
125 P4_ESCR_EMASK_BIT(P4_EVENT_STORE_PORT_REPLAY, SPLIT_ST),
99 .cntr = { {8, 9, -1}, {10, 11, -1} }, 126 .cntr = { {8, 9, -1}, {10, 11, -1} },
100 }, 127 },
101 [P4_EVENT_MOB_LOAD_REPLAY] = { 128 [P4_EVENT_MOB_LOAD_REPLAY] = {
102 .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY), 129 .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY),
103 .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 }, 130 .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 },
131 .escr_emask =
132 P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STA) |
133 P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STD) |
134 P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, PARTIAL_DATA) |
135 P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, UNALGN_ADDR),
104 .cntr = { {0, -1, -1}, {2, -1, -1} }, 136 .cntr = { {0, -1, -1}, {2, -1, -1} },
105 }, 137 },
106 [P4_EVENT_PAGE_WALK_TYPE] = { 138 [P4_EVENT_PAGE_WALK_TYPE] = {
107 .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE), 139 .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE),
108 .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 }, 140 .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 },
141 .escr_emask =
142 P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, DTMISS) |
143 P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, ITMISS),
144 .shared = 1,
109 .cntr = { {0, -1, -1}, {2, -1, -1} }, 145 .cntr = { {0, -1, -1}, {2, -1, -1} },
110 }, 146 },
111 [P4_EVENT_BSQ_CACHE_REFERENCE] = { 147 [P4_EVENT_BSQ_CACHE_REFERENCE] = {
112 .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE), 148 .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE),
113 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, 149 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 },
150 .escr_emask =
151 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) |
152 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) |
153 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) |
154 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) |
155 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) |
156 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM) |
157 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) |
158 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) |
159 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS),
114 .cntr = { {0, -1, -1}, {2, -1, -1} }, 160 .cntr = { {0, -1, -1}, {2, -1, -1} },
115 }, 161 },
116 [P4_EVENT_IOQ_ALLOCATION] = { 162 [P4_EVENT_IOQ_ALLOCATION] = {
117 .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION), 163 .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION),
118 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 164 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
165 .escr_emask =
166 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, DEFAULT) |
167 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_READ) |
168 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_WRITE) |
169 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_UC) |
170 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WC) |
171 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WT) |
172 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WP) |
173 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WB) |
174 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OWN) |
175 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OTHER) |
176 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, PREFETCH),
119 .cntr = { {0, -1, -1}, {2, -1, -1} }, 177 .cntr = { {0, -1, -1}, {2, -1, -1} },
120 }, 178 },
121 [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */ 179 [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */
122 .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES), 180 .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES),
123 .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 }, 181 .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 },
182 .escr_emask =
183 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, DEFAULT) |
184 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_READ) |
185 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_WRITE) |
186 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_UC) |
187 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WC) |
188 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WT) |
189 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WP) |
190 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WB) |
191 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OWN) |
192 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OTHER) |
193 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, PREFETCH),
124 .cntr = { {2, -1, -1}, {3, -1, -1} }, 194 .cntr = { {2, -1, -1}, {3, -1, -1} },
125 }, 195 },
126 [P4_EVENT_FSB_DATA_ACTIVITY] = { 196 [P4_EVENT_FSB_DATA_ACTIVITY] = {
127 .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY), 197 .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY),
128 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 198 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
199 .escr_emask =
200 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) |
201 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN) |
202 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OTHER) |
203 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_DRV) |
204 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OWN) |
205 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OTHER),
206 .shared = 1,
129 .cntr = { {0, -1, -1}, {2, -1, -1} }, 207 .cntr = { {0, -1, -1}, {2, -1, -1} },
130 }, 208 },
131 [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */ 209 [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */
132 .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION), 210 .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION),
133 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 }, 211 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 },
212 .escr_emask =
213 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE0) |
214 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE1) |
215 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN0) |
216 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN1) |
217 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_IO_TYPE) |
218 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LOCK_TYPE) |
219 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_CACHE_TYPE) |
220 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_SPLIT_TYPE) |
221 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_DEM_TYPE) |
222 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_ORD_TYPE) |
223 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE0) |
224 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE1) |
225 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE2),
134 .cntr = { {0, -1, -1}, {1, -1, -1} }, 226 .cntr = { {0, -1, -1}, {1, -1, -1} },
135 }, 227 },
136 [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */ 228 [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */
137 .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES), 229 .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES),
138 .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 }, 230 .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 },
231 .escr_emask =
232 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE0) |
233 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE1) |
234 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN0) |
235 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN1) |
236 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE) |
237 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE) |
238 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE) |
239 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE) |
240 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE) |
241 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE) |
242 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE0) |
243 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE1) |
244 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE2),
139 .cntr = { {2, -1, -1}, {3, -1, -1} }, 245 .cntr = { {2, -1, -1}, {3, -1, -1} },
140 }, 246 },
141 [P4_EVENT_SSE_INPUT_ASSIST] = { 247 [P4_EVENT_SSE_INPUT_ASSIST] = {
142 .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST), 248 .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST),
143 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 249 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
250 .escr_emask =
251 P4_ESCR_EMASK_BIT(P4_EVENT_SSE_INPUT_ASSIST, ALL),
252 .shared = 1,
144 .cntr = { {8, 9, -1}, {10, 11, -1} }, 253 .cntr = { {8, 9, -1}, {10, 11, -1} },
145 }, 254 },
146 [P4_EVENT_PACKED_SP_UOP] = { 255 [P4_EVENT_PACKED_SP_UOP] = {
147 .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP), 256 .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP),
148 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 257 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
258 .escr_emask =
259 P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_SP_UOP, ALL),
260 .shared = 1,
149 .cntr = { {8, 9, -1}, {10, 11, -1} }, 261 .cntr = { {8, 9, -1}, {10, 11, -1} },
150 }, 262 },
151 [P4_EVENT_PACKED_DP_UOP] = { 263 [P4_EVENT_PACKED_DP_UOP] = {
152 .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP), 264 .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP),
153 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 265 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
266 .escr_emask =
267 P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_DP_UOP, ALL),
268 .shared = 1,
154 .cntr = { {8, 9, -1}, {10, 11, -1} }, 269 .cntr = { {8, 9, -1}, {10, 11, -1} },
155 }, 270 },
156 [P4_EVENT_SCALAR_SP_UOP] = { 271 [P4_EVENT_SCALAR_SP_UOP] = {
157 .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP), 272 .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP),
158 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 273 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
274 .escr_emask =
275 P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_SP_UOP, ALL),
276 .shared = 1,
159 .cntr = { {8, 9, -1}, {10, 11, -1} }, 277 .cntr = { {8, 9, -1}, {10, 11, -1} },
160 }, 278 },
161 [P4_EVENT_SCALAR_DP_UOP] = { 279 [P4_EVENT_SCALAR_DP_UOP] = {
162 .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP), 280 .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP),
163 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 281 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
282 .escr_emask =
283 P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_DP_UOP, ALL),
284 .shared = 1,
164 .cntr = { {8, 9, -1}, {10, 11, -1} }, 285 .cntr = { {8, 9, -1}, {10, 11, -1} },
165 }, 286 },
166 [P4_EVENT_64BIT_MMX_UOP] = { 287 [P4_EVENT_64BIT_MMX_UOP] = {
167 .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP), 288 .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP),
168 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 289 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
290 .escr_emask =
291 P4_ESCR_EMASK_BIT(P4_EVENT_64BIT_MMX_UOP, ALL),
292 .shared = 1,
169 .cntr = { {8, 9, -1}, {10, 11, -1} }, 293 .cntr = { {8, 9, -1}, {10, 11, -1} },
170 }, 294 },
171 [P4_EVENT_128BIT_MMX_UOP] = { 295 [P4_EVENT_128BIT_MMX_UOP] = {
172 .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP), 296 .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP),
173 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 297 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
298 .escr_emask =
299 P4_ESCR_EMASK_BIT(P4_EVENT_128BIT_MMX_UOP, ALL),
300 .shared = 1,
174 .cntr = { {8, 9, -1}, {10, 11, -1} }, 301 .cntr = { {8, 9, -1}, {10, 11, -1} },
175 }, 302 },
176 [P4_EVENT_X87_FP_UOP] = { 303 [P4_EVENT_X87_FP_UOP] = {
177 .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP), 304 .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP),
178 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 305 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
306 .escr_emask =
307 P4_ESCR_EMASK_BIT(P4_EVENT_X87_FP_UOP, ALL),
308 .shared = 1,
179 .cntr = { {8, 9, -1}, {10, 11, -1} }, 309 .cntr = { {8, 9, -1}, {10, 11, -1} },
180 }, 310 },
181 [P4_EVENT_TC_MISC] = { 311 [P4_EVENT_TC_MISC] = {
182 .opcode = P4_OPCODE(P4_EVENT_TC_MISC), 312 .opcode = P4_OPCODE(P4_EVENT_TC_MISC),
183 .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, 313 .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
314 .escr_emask =
315 P4_ESCR_EMASK_BIT(P4_EVENT_TC_MISC, FLUSH),
184 .cntr = { {4, 5, -1}, {6, 7, -1} }, 316 .cntr = { {4, 5, -1}, {6, 7, -1} },
185 }, 317 },
186 [P4_EVENT_GLOBAL_POWER_EVENTS] = { 318 [P4_EVENT_GLOBAL_POWER_EVENTS] = {
187 .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS), 319 .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS),
188 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 320 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
321 .escr_emask =
322 P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING),
189 .cntr = { {0, -1, -1}, {2, -1, -1} }, 323 .cntr = { {0, -1, -1}, {2, -1, -1} },
190 }, 324 },
191 [P4_EVENT_TC_MS_XFER] = { 325 [P4_EVENT_TC_MS_XFER] = {
192 .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER), 326 .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER),
193 .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, 327 .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
328 .escr_emask =
329 P4_ESCR_EMASK_BIT(P4_EVENT_TC_MS_XFER, CISC),
194 .cntr = { {4, 5, -1}, {6, 7, -1} }, 330 .cntr = { {4, 5, -1}, {6, 7, -1} },
195 }, 331 },
196 [P4_EVENT_UOP_QUEUE_WRITES] = { 332 [P4_EVENT_UOP_QUEUE_WRITES] = {
197 .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES), 333 .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES),
198 .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, 334 .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
335 .escr_emask =
336 P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_BUILD) |
337 P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_DELIVER) |
338 P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_ROM),
199 .cntr = { {4, 5, -1}, {6, 7, -1} }, 339 .cntr = { {4, 5, -1}, {6, 7, -1} },
200 }, 340 },
201 [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = { 341 [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = {
202 .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE), 342 .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE),
203 .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 }, 343 .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 },
344 .escr_emask =
345 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL) |
346 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CALL) |
347 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, RETURN) |
348 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT),
204 .cntr = { {4, 5, -1}, {6, 7, -1} }, 349 .cntr = { {4, 5, -1}, {6, 7, -1} },
205 }, 350 },
206 [P4_EVENT_RETIRED_BRANCH_TYPE] = { 351 [P4_EVENT_RETIRED_BRANCH_TYPE] = {
207 .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE), 352 .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE),
208 .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 }, 353 .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 },
354 .escr_emask =
355 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) |
356 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) |
357 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) |
358 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT),
209 .cntr = { {4, 5, -1}, {6, 7, -1} }, 359 .cntr = { {4, 5, -1}, {6, 7, -1} },
210 }, 360 },
211 [P4_EVENT_RESOURCE_STALL] = { 361 [P4_EVENT_RESOURCE_STALL] = {
212 .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL), 362 .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL),
213 .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 }, 363 .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 },
364 .escr_emask =
365 P4_ESCR_EMASK_BIT(P4_EVENT_RESOURCE_STALL, SBFULL),
214 .cntr = { {12, 13, 16}, {14, 15, 17} }, 366 .cntr = { {12, 13, 16}, {14, 15, 17} },
215 }, 367 },
216 [P4_EVENT_WC_BUFFER] = { 368 [P4_EVENT_WC_BUFFER] = {
217 .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER), 369 .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER),
218 .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, 370 .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
371 .escr_emask =
372 P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_EVICTS) |
373 P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_FULL_EVICTS),
374 .shared = 1,
219 .cntr = { {8, 9, -1}, {10, 11, -1} }, 375 .cntr = { {8, 9, -1}, {10, 11, -1} },
220 }, 376 },
221 [P4_EVENT_B2B_CYCLES] = { 377 [P4_EVENT_B2B_CYCLES] = {
222 .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES), 378 .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES),
223 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 379 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
380 .escr_emask = 0,
224 .cntr = { {0, -1, -1}, {2, -1, -1} }, 381 .cntr = { {0, -1, -1}, {2, -1, -1} },
225 }, 382 },
226 [P4_EVENT_BNR] = { 383 [P4_EVENT_BNR] = {
227 .opcode = P4_OPCODE(P4_EVENT_BNR), 384 .opcode = P4_OPCODE(P4_EVENT_BNR),
228 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 385 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
386 .escr_emask = 0,
229 .cntr = { {0, -1, -1}, {2, -1, -1} }, 387 .cntr = { {0, -1, -1}, {2, -1, -1} },
230 }, 388 },
231 [P4_EVENT_SNOOP] = { 389 [P4_EVENT_SNOOP] = {
232 .opcode = P4_OPCODE(P4_EVENT_SNOOP), 390 .opcode = P4_OPCODE(P4_EVENT_SNOOP),
233 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 391 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
392 .escr_emask = 0,
234 .cntr = { {0, -1, -1}, {2, -1, -1} }, 393 .cntr = { {0, -1, -1}, {2, -1, -1} },
235 }, 394 },
236 [P4_EVENT_RESPONSE] = { 395 [P4_EVENT_RESPONSE] = {
237 .opcode = P4_OPCODE(P4_EVENT_RESPONSE), 396 .opcode = P4_OPCODE(P4_EVENT_RESPONSE),
238 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 397 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
398 .escr_emask = 0,
239 .cntr = { {0, -1, -1}, {2, -1, -1} }, 399 .cntr = { {0, -1, -1}, {2, -1, -1} },
240 }, 400 },
241 [P4_EVENT_FRONT_END_EVENT] = { 401 [P4_EVENT_FRONT_END_EVENT] = {
242 .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT), 402 .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT),
243 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 403 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
404 .escr_emask =
405 P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, NBOGUS) |
406 P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, BOGUS),
244 .cntr = { {12, 13, 16}, {14, 15, 17} }, 407 .cntr = { {12, 13, 16}, {14, 15, 17} },
245 }, 408 },
246 [P4_EVENT_EXECUTION_EVENT] = { 409 [P4_EVENT_EXECUTION_EVENT] = {
247 .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT), 410 .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT),
248 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 411 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
412 .escr_emask =
413 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0) |
414 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1) |
415 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2) |
416 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3) |
417 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) |
418 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) |
419 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) |
420 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3),
249 .cntr = { {12, 13, 16}, {14, 15, 17} }, 421 .cntr = { {12, 13, 16}, {14, 15, 17} },
250 }, 422 },
251 [P4_EVENT_REPLAY_EVENT] = { 423 [P4_EVENT_REPLAY_EVENT] = {
252 .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT), 424 .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT),
253 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 425 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
426 .escr_emask =
427 P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, NBOGUS) |
428 P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, BOGUS),
254 .cntr = { {12, 13, 16}, {14, 15, 17} }, 429 .cntr = { {12, 13, 16}, {14, 15, 17} },
255 }, 430 },
256 [P4_EVENT_INSTR_RETIRED] = { 431 [P4_EVENT_INSTR_RETIRED] = {
257 .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED), 432 .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED),
258 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, 433 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
434 .escr_emask =
435 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) |
436 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSTAG) |
437 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG) |
438 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSTAG),
259 .cntr = { {12, 13, 16}, {14, 15, 17} }, 439 .cntr = { {12, 13, 16}, {14, 15, 17} },
260 }, 440 },
261 [P4_EVENT_UOPS_RETIRED] = { 441 [P4_EVENT_UOPS_RETIRED] = {
262 .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED), 442 .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED),
263 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, 443 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
444 .escr_emask =
445 P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, NBOGUS) |
446 P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, BOGUS),
264 .cntr = { {12, 13, 16}, {14, 15, 17} }, 447 .cntr = { {12, 13, 16}, {14, 15, 17} },
265 }, 448 },
266 [P4_EVENT_UOP_TYPE] = { 449 [P4_EVENT_UOP_TYPE] = {
267 .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE), 450 .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE),
268 .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 }, 451 .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 },
452 .escr_emask =
453 P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGLOADS) |
454 P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGSTORES),
269 .cntr = { {12, 13, 16}, {14, 15, 17} }, 455 .cntr = { {12, 13, 16}, {14, 15, 17} },
270 }, 456 },
271 [P4_EVENT_BRANCH_RETIRED] = { 457 [P4_EVENT_BRANCH_RETIRED] = {
272 .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED), 458 .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED),
273 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 459 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
460 .escr_emask =
461 P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNP) |
462 P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNM) |
463 P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTP) |
464 P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTM),
274 .cntr = { {12, 13, 16}, {14, 15, 17} }, 465 .cntr = { {12, 13, 16}, {14, 15, 17} },
275 }, 466 },
276 [P4_EVENT_MISPRED_BRANCH_RETIRED] = { 467 [P4_EVENT_MISPRED_BRANCH_RETIRED] = {
277 .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED), 468 .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED),
278 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, 469 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
470 .escr_emask =
471 P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS),
279 .cntr = { {12, 13, 16}, {14, 15, 17} }, 472 .cntr = { {12, 13, 16}, {14, 15, 17} },
280 }, 473 },
281 [P4_EVENT_X87_ASSIST] = { 474 [P4_EVENT_X87_ASSIST] = {
282 .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST), 475 .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST),
283 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 476 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
477 .escr_emask =
478 P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSU) |
479 P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSO) |
480 P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAO) |
481 P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAU) |
482 P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, PREA),
284 .cntr = { {12, 13, 16}, {14, 15, 17} }, 483 .cntr = { {12, 13, 16}, {14, 15, 17} },
285 }, 484 },
286 [P4_EVENT_MACHINE_CLEAR] = { 485 [P4_EVENT_MACHINE_CLEAR] = {
287 .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR), 486 .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR),
288 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 487 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
488 .escr_emask =
489 P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, CLEAR) |
490 P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, MOCLEAR) |
491 P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, SMCLEAR),
289 .cntr = { {12, 13, 16}, {14, 15, 17} }, 492 .cntr = { {12, 13, 16}, {14, 15, 17} },
290 }, 493 },
291 [P4_EVENT_INSTR_COMPLETED] = { 494 [P4_EVENT_INSTR_COMPLETED] = {
292 .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED), 495 .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED),
293 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, 496 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
497 .escr_emask =
498 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, NBOGUS) |
499 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, BOGUS),
294 .cntr = { {12, 13, 16}, {14, 15, 17} }, 500 .cntr = { {12, 13, 16}, {14, 15, 17} },
295 }, 501 },
296}; 502};
@@ -428,29 +634,73 @@ static u64 p4_pmu_event_map(int hw_event)
428 return config; 634 return config;
429} 635}
430 636
637/* check cpu model specifics */
638static bool p4_event_match_cpu_model(unsigned int event_idx)
639{
640 /* INSTR_COMPLETED event only exist for model 3, 4, 6 (Prescott) */
641 if (event_idx == P4_EVENT_INSTR_COMPLETED) {
642 if (boot_cpu_data.x86_model != 3 &&
643 boot_cpu_data.x86_model != 4 &&
644 boot_cpu_data.x86_model != 6)
645 return false;
646 }
647
648 /*
649 * For info
650 * - IQ_ESCR0, IQ_ESCR1 only for models 1 and 2
651 */
652
653 return true;
654}
655
431static int p4_validate_raw_event(struct perf_event *event) 656static int p4_validate_raw_event(struct perf_event *event)
432{ 657{
433 unsigned int v; 658 unsigned int v, emask;
434 659
435 /* user data may have out-of-bound event index */ 660 /* User data may have out-of-bound event index */
436 v = p4_config_unpack_event(event->attr.config); 661 v = p4_config_unpack_event(event->attr.config);
437 if (v >= ARRAY_SIZE(p4_event_bind_map)) { 662 if (v >= ARRAY_SIZE(p4_event_bind_map))
438 pr_warning("P4 PMU: Unknown event code: %d\n", v); 663 return -EINVAL;
664
665 /* It may be unsupported: */
666 if (!p4_event_match_cpu_model(v))
439 return -EINVAL; 667 return -EINVAL;
668
669 /*
670 * NOTE: P4_CCCR_THREAD_ANY has not the same meaning as
671 * in Architectural Performance Monitoring, it means not
672 * on _which_ logical cpu to count but rather _when_, ie it
673 * depends on logical cpu state -- count event if one cpu active,
674 * none, both or any, so we just allow user to pass any value
675 * desired.
676 *
677 * In turn we always set Tx_OS/Tx_USR bits bound to logical
678 * cpu without their propagation to another cpu
679 */
680
681 /*
682 * if an event is shared accross the logical threads
683 * the user needs special permissions to be able to use it
684 */
685 if (p4_event_bind_map[v].shared) {
686 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
687 return -EACCES;
440 } 688 }
441 689
690 /* ESCR EventMask bits may be invalid */
691 emask = p4_config_unpack_escr(event->attr.config) & P4_ESCR_EVENTMASK_MASK;
692 if (emask & ~p4_event_bind_map[v].escr_emask)
693 return -EINVAL;
694
442 /* 695 /*
443 * it may have some screwed PEBS bits 696 * it may have some invalid PEBS bits
444 */ 697 */
445 if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) { 698 if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE))
446 pr_warning("P4 PMU: PEBS are not supported yet\n");
447 return -EINVAL; 699 return -EINVAL;
448 } 700
449 v = p4_config_unpack_metric(event->attr.config); 701 v = p4_config_unpack_metric(event->attr.config);
450 if (v >= ARRAY_SIZE(p4_pebs_bind_map)) { 702 if (v >= ARRAY_SIZE(p4_pebs_bind_map))
451 pr_warning("P4 PMU: Unknown metric code: %d\n", v);
452 return -EINVAL; 703 return -EINVAL;
453 }
454 704
455 return 0; 705 return 0;
456} 706}
@@ -478,27 +728,21 @@ static int p4_hw_config(struct perf_event *event)
478 728
479 if (event->attr.type == PERF_TYPE_RAW) { 729 if (event->attr.type == PERF_TYPE_RAW) {
480 730
731 /*
732 * Clear bits we reserve to be managed by kernel itself
733 * and never allowed from a user space
734 */
735 event->attr.config &= P4_CONFIG_MASK;
736
481 rc = p4_validate_raw_event(event); 737 rc = p4_validate_raw_event(event);
482 if (rc) 738 if (rc)
483 goto out; 739 goto out;
484 740
485 /* 741 /*
486 * We don't control raw events so it's up to the caller
487 * to pass sane values (and we don't count the thread number
488 * on HT machine but allow HT-compatible specifics to be
489 * passed on)
490 *
491 * Note that for RAW events we allow user to use P4_CCCR_RESERVED 742 * Note that for RAW events we allow user to use P4_CCCR_RESERVED
492 * bits since we keep additional info here (for cache events and etc) 743 * bits since we keep additional info here (for cache events and etc)
493 *
494 * XXX: HT wide things should check perf_paranoid_cpu() &&
495 * CAP_SYS_ADMIN
496 */ 744 */
497 event->hw.config |= event->attr.config & 745 event->hw.config |= event->attr.config;
498 (p4_config_pack_escr(P4_ESCR_MASK_HT) |
499 p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED));
500
501 event->hw.config &= ~P4_CCCR_FORCE_OVF;
502 } 746 }
503 747
504 rc = x86_setup_perfctr(event); 748 rc = x86_setup_perfctr(event);
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 17be5ec7cbba..c375c79065f8 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1023,9 +1023,9 @@ apicinterrupt ERROR_APIC_VECTOR \
1023apicinterrupt SPURIOUS_APIC_VECTOR \ 1023apicinterrupt SPURIOUS_APIC_VECTOR \
1024 spurious_interrupt smp_spurious_interrupt 1024 spurious_interrupt smp_spurious_interrupt
1025 1025
1026#ifdef CONFIG_PERF_EVENTS 1026#ifdef CONFIG_IRQ_WORK
1027apicinterrupt LOCAL_PENDING_VECTOR \ 1027apicinterrupt IRQ_WORK_VECTOR \
1028 perf_pending_interrupt smp_perf_pending_interrupt 1028 irq_work_interrupt smp_irq_work_interrupt
1029#endif 1029#endif
1030 1030
1031/* 1031/*
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index cd37469b54ee..3afb33f14d2d 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -257,14 +257,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
257 return mod_code_status; 257 return mod_code_status;
258} 258}
259 259
260
261
262
263static unsigned char ftrace_nop[MCOUNT_INSN_SIZE];
264
265static unsigned char *ftrace_nop_replace(void) 260static unsigned char *ftrace_nop_replace(void)
266{ 261{
267 return ftrace_nop; 262 return ideal_nop5;
268} 263}
269 264
270static int 265static int
@@ -338,62 +333,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
338 333
339int __init ftrace_dyn_arch_init(void *data) 334int __init ftrace_dyn_arch_init(void *data)
340{ 335{
341 extern const unsigned char ftrace_test_p6nop[];
342 extern const unsigned char ftrace_test_nop5[];
343 extern const unsigned char ftrace_test_jmp[];
344 int faulted = 0;
345
346 /*
347 * There is no good nop for all x86 archs.
348 * We will default to using the P6_NOP5, but first we
349 * will test to make sure that the nop will actually
350 * work on this CPU. If it faults, we will then
351 * go to a lesser efficient 5 byte nop. If that fails
352 * we then just use a jmp as our nop. This isn't the most
353 * efficient nop, but we can not use a multi part nop
354 * since we would then risk being preempted in the middle
355 * of that nop, and if we enabled tracing then, it might
356 * cause a system crash.
357 *
358 * TODO: check the cpuid to determine the best nop.
359 */
360 asm volatile (
361 "ftrace_test_jmp:"
362 "jmp ftrace_test_p6nop\n"
363 "nop\n"
364 "nop\n"
365 "nop\n" /* 2 byte jmp + 3 bytes */
366 "ftrace_test_p6nop:"
367 P6_NOP5
368 "jmp 1f\n"
369 "ftrace_test_nop5:"
370 ".byte 0x66,0x66,0x66,0x66,0x90\n"
371 "1:"
372 ".section .fixup, \"ax\"\n"
373 "2: movl $1, %0\n"
374 " jmp ftrace_test_nop5\n"
375 "3: movl $2, %0\n"
376 " jmp 1b\n"
377 ".previous\n"
378 _ASM_EXTABLE(ftrace_test_p6nop, 2b)
379 _ASM_EXTABLE(ftrace_test_nop5, 3b)
380 : "=r"(faulted) : "0" (faulted));
381
382 switch (faulted) {
383 case 0:
384 pr_info("converting mcount calls to 0f 1f 44 00 00\n");
385 memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE);
386 break;
387 case 1:
388 pr_info("converting mcount calls to 66 66 66 66 90\n");
389 memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE);
390 break;
391 case 2:
392 pr_info("converting mcount calls to jmp . + 5\n");
393 memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE);
394 break;
395 }
396
397 /* The return code is retured via data */ 336 /* The return code is retured via data */
398 *(unsigned long *)data = 0; 337 *(unsigned long *)data = 0;
399 338
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 91fd0c70a18a..44edb03fc9ec 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -67,10 +67,10 @@ static int show_other_interrupts(struct seq_file *p, int prec)
67 for_each_online_cpu(j) 67 for_each_online_cpu(j)
68 seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); 68 seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
69 seq_printf(p, " Performance monitoring interrupts\n"); 69 seq_printf(p, " Performance monitoring interrupts\n");
70 seq_printf(p, "%*s: ", prec, "PND"); 70 seq_printf(p, "%*s: ", prec, "IWI");
71 for_each_online_cpu(j) 71 for_each_online_cpu(j)
72 seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs); 72 seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs);
73 seq_printf(p, " Performance pending work\n"); 73 seq_printf(p, " IRQ work interrupts\n");
74#endif 74#endif
75 if (x86_platform_ipi_callback) { 75 if (x86_platform_ipi_callback) {
76 seq_printf(p, "%*s: ", prec, "PLT"); 76 seq_printf(p, "%*s: ", prec, "PLT");
@@ -185,7 +185,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
185 sum += irq_stats(cpu)->apic_timer_irqs; 185 sum += irq_stats(cpu)->apic_timer_irqs;
186 sum += irq_stats(cpu)->irq_spurious_count; 186 sum += irq_stats(cpu)->irq_spurious_count;
187 sum += irq_stats(cpu)->apic_perf_irqs; 187 sum += irq_stats(cpu)->apic_perf_irqs;
188 sum += irq_stats(cpu)->apic_pending_irqs; 188 sum += irq_stats(cpu)->apic_irq_work_irqs;
189#endif 189#endif
190 if (x86_platform_ipi_callback) 190 if (x86_platform_ipi_callback)
191 sum += irq_stats(cpu)->x86_platform_ipis; 191 sum += irq_stats(cpu)->x86_platform_ipis;
diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c
new file mode 100644
index 000000000000..ca8f703a1e70
--- /dev/null
+++ b/arch/x86/kernel/irq_work.c
@@ -0,0 +1,30 @@
1/*
2 * x86 specific code for irq_work
3 *
4 * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
5 */
6
7#include <linux/kernel.h>
8#include <linux/irq_work.h>
9#include <linux/hardirq.h>
10#include <asm/apic.h>
11
12void smp_irq_work_interrupt(struct pt_regs *regs)
13{
14 irq_enter();
15 ack_APIC_irq();
16 inc_irq_stat(apic_irq_work_irqs);
17 irq_work_run();
18 irq_exit();
19}
20
21void arch_irq_work_raise(void)
22{
23#ifdef CONFIG_X86_LOCAL_APIC
24 if (!cpu_has_apic)
25 return;
26
27 apic->send_IPI_self(IRQ_WORK_VECTOR);
28 apic_wait_icr_idle();
29#endif
30}
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 990ae7cfc578..713969b9266b 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -224,9 +224,9 @@ static void __init apic_intr_init(void)
224 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); 224 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
225 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); 225 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
226 226
227 /* Performance monitoring interrupts: */ 227 /* IRQ work interrupts: */
228# ifdef CONFIG_PERF_EVENTS 228# ifdef CONFIG_IRQ_WORK
229 alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt); 229 alloc_intr_gate(IRQ_WORK_VECTOR, irq_work_interrupt);
230# endif 230# endif
231 231
232#endif 232#endif
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
new file mode 100644
index 000000000000..961b6b30ba90
--- /dev/null
+++ b/arch/x86/kernel/jump_label.c
@@ -0,0 +1,50 @@
1/*
2 * jump label x86 support
3 *
4 * Copyright (C) 2009 Jason Baron <jbaron@redhat.com>
5 *
6 */
7#include <linux/jump_label.h>
8#include <linux/memory.h>
9#include <linux/uaccess.h>
10#include <linux/module.h>
11#include <linux/list.h>
12#include <linux/jhash.h>
13#include <linux/cpu.h>
14#include <asm/kprobes.h>
15#include <asm/alternative.h>
16
17#ifdef HAVE_JUMP_LABEL
18
19union jump_code_union {
20 char code[JUMP_LABEL_NOP_SIZE];
21 struct {
22 char jump;
23 int offset;
24 } __attribute__((packed));
25};
26
27void arch_jump_label_transform(struct jump_entry *entry,
28 enum jump_label_type type)
29{
30 union jump_code_union code;
31
32 if (type == JUMP_LABEL_ENABLE) {
33 code.jump = 0xe9;
34 code.offset = entry->target -
35 (entry->code + JUMP_LABEL_NOP_SIZE);
36 } else
37 memcpy(&code, ideal_nop5, JUMP_LABEL_NOP_SIZE);
38 get_online_cpus();
39 mutex_lock(&text_mutex);
40 text_poke_smp((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE);
41 mutex_unlock(&text_mutex);
42 put_online_cpus();
43}
44
45void arch_jump_label_text_poke_early(jump_label_t addr)
46{
47 text_poke_early((void *)addr, ideal_nop5, JUMP_LABEL_NOP_SIZE);
48}
49
50#endif
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 770ebfb349e9..1cbd54c0df99 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -230,9 +230,6 @@ static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
230 return 0; 230 return 0;
231} 231}
232 232
233/* Dummy buffers for kallsyms_lookup */
234static char __dummy_buf[KSYM_NAME_LEN];
235
236/* Check if paddr is at an instruction boundary */ 233/* Check if paddr is at an instruction boundary */
237static int __kprobes can_probe(unsigned long paddr) 234static int __kprobes can_probe(unsigned long paddr)
238{ 235{
@@ -241,7 +238,7 @@ static int __kprobes can_probe(unsigned long paddr)
241 struct insn insn; 238 struct insn insn;
242 kprobe_opcode_t buf[MAX_INSN_SIZE]; 239 kprobe_opcode_t buf[MAX_INSN_SIZE];
243 240
244 if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf)) 241 if (!kallsyms_lookup_size_offset(paddr, NULL, &offset))
245 return 0; 242 return 0;
246 243
247 /* Decode instructions */ 244 /* Decode instructions */
@@ -1129,7 +1126,7 @@ static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr,
1129 *(unsigned long *)addr = val; 1126 *(unsigned long *)addr = val;
1130} 1127}
1131 1128
1132void __kprobes kprobes_optinsn_template_holder(void) 1129static void __used __kprobes kprobes_optinsn_template_holder(void)
1133{ 1130{
1134 asm volatile ( 1131 asm volatile (
1135 ".global optprobe_template_entry\n" 1132 ".global optprobe_template_entry\n"
@@ -1221,7 +1218,8 @@ static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
1221 } 1218 }
1222 /* Check whether the address range is reserved */ 1219 /* Check whether the address range is reserved */
1223 if (ftrace_text_reserved(src, src + len - 1) || 1220 if (ftrace_text_reserved(src, src + len - 1) ||
1224 alternatives_text_reserved(src, src + len - 1)) 1221 alternatives_text_reserved(src, src + len - 1) ||
1222 jump_label_text_reserved(src, src + len - 1))
1225 return -EBUSY; 1223 return -EBUSY;
1226 1224
1227 return len; 1225 return len;
@@ -1269,11 +1267,9 @@ static int __kprobes can_optimize(unsigned long paddr)
1269 unsigned long addr, size = 0, offset = 0; 1267 unsigned long addr, size = 0, offset = 0;
1270 struct insn insn; 1268 struct insn insn;
1271 kprobe_opcode_t buf[MAX_INSN_SIZE]; 1269 kprobe_opcode_t buf[MAX_INSN_SIZE];
1272 /* Dummy buffers for lookup_symbol_attrs */
1273 static char __dummy_buf[KSYM_NAME_LEN];
1274 1270
1275 /* Lookup symbol including addr */ 1271 /* Lookup symbol including addr */
1276 if (!kallsyms_lookup(paddr, &size, &offset, NULL, __dummy_buf)) 1272 if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
1277 return 0; 1273 return 0;
1278 1274
1279 /* Check there is enough space for a relative jump. */ 1275 /* Check there is enough space for a relative jump. */
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 1c355c550960..8f2956091735 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -239,6 +239,9 @@ int module_finalize(const Elf_Ehdr *hdr,
239 apply_paravirt(pseg, pseg + para->sh_size); 239 apply_paravirt(pseg, pseg + para->sh_size);
240 } 240 }
241 241
242 /* make jump label nops */
243 jump_label_apply_nops(me);
244
242 return 0; 245 return 0;
243} 246}
244 247
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index c3a4fbb2b996..00e167870f71 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -112,6 +112,7 @@
112#include <asm/numa_64.h> 112#include <asm/numa_64.h>
113#endif 113#endif
114#include <asm/mce.h> 114#include <asm/mce.h>
115#include <asm/alternative.h>
115 116
116/* 117/*
117 * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. 118 * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
@@ -726,6 +727,7 @@ void __init setup_arch(char **cmdline_p)
726{ 727{
727 int acpi = 0; 728 int acpi = 0;
728 int k8 = 0; 729 int k8 = 0;
730 unsigned long flags;
729 731
730#ifdef CONFIG_X86_32 732#ifdef CONFIG_X86_32
731 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); 733 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
@@ -1071,6 +1073,10 @@ void __init setup_arch(char **cmdline_p)
1071 x86_init.oem.banner(); 1073 x86_init.oem.banner();
1072 1074
1073 mcheck_init(); 1075 mcheck_init();
1076
1077 local_irq_save(flags);
1078 arch_init_ideal_nop5();
1079 local_irq_restore(flags);
1074} 1080}
1075 1081
1076#ifdef CONFIG_X86_32 1082#ifdef CONFIG_X86_32
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 4c4508e8a204..a24c6cfdccc4 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -251,6 +251,8 @@ static noinline __kprobes int vmalloc_fault(unsigned long address)
251 if (!(address >= VMALLOC_START && address < VMALLOC_END)) 251 if (!(address >= VMALLOC_START && address < VMALLOC_END))
252 return -1; 252 return -1;
253 253
254 WARN_ON_ONCE(in_nmi());
255
254 /* 256 /*
255 * Synchronize this task's top level page-table 257 * Synchronize this task's top level page-table
256 * with the 'reference' page table. 258 * with the 'reference' page table.
@@ -369,6 +371,8 @@ static noinline __kprobes int vmalloc_fault(unsigned long address)
369 if (!(address >= VMALLOC_START && address < VMALLOC_END)) 371 if (!(address >= VMALLOC_START && address < VMALLOC_END))
370 return -1; 372 return -1;
371 373
374 WARN_ON_ONCE(in_nmi());
375
372 /* 376 /*
373 * Copy kernel mappings over when needed. This can also 377 * Copy kernel mappings over when needed. This can also
374 * happen within a race in page table update. In the later 378 * happen within a race in page table update. In the later
diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c
index b3b531a4f8e5..d87dd6d042d6 100644
--- a/arch/x86/mm/kmemcheck/kmemcheck.c
+++ b/arch/x86/mm/kmemcheck/kmemcheck.c
@@ -631,6 +631,8 @@ bool kmemcheck_fault(struct pt_regs *regs, unsigned long address,
631 if (!pte) 631 if (!pte)
632 return false; 632 return false;
633 633
634 WARN_ON_ONCE(in_nmi());
635
634 if (error_code & 2) 636 if (error_code & 2)
635 kmemcheck_access(regs, address, KMEMCHECK_WRITE); 637 kmemcheck_access(regs, address, KMEMCHECK_WRITE);
636 else 638 else
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index 3855096c59b8..2d49d4e19a36 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -14,6 +14,7 @@
14#include <asm/ptrace.h> 14#include <asm/ptrace.h>
15#include <asm/uaccess.h> 15#include <asm/uaccess.h>
16#include <asm/stacktrace.h> 16#include <asm/stacktrace.h>
17#include <linux/compat.h>
17 18
18static void backtrace_warning_symbol(void *data, char *msg, 19static void backtrace_warning_symbol(void *data, char *msg,
19 unsigned long symbol) 20 unsigned long symbol)
@@ -48,14 +49,12 @@ static struct stacktrace_ops backtrace_ops = {
48 .walk_stack = print_context_stack, 49 .walk_stack = print_context_stack,
49}; 50};
50 51
51struct frame_head { 52#ifdef CONFIG_COMPAT
52 struct frame_head *bp; 53static struct stack_frame_ia32 *
53 unsigned long ret; 54dump_user_backtrace_32(struct stack_frame_ia32 *head)
54} __attribute__((packed));
55
56static struct frame_head *dump_user_backtrace(struct frame_head *head)
57{ 55{
58 struct frame_head bufhead[2]; 56 struct stack_frame_ia32 bufhead[2];
57 struct stack_frame_ia32 *fp;
59 58
60 /* Also check accessibility of one struct frame_head beyond */ 59 /* Also check accessibility of one struct frame_head beyond */
61 if (!access_ok(VERIFY_READ, head, sizeof(bufhead))) 60 if (!access_ok(VERIFY_READ, head, sizeof(bufhead)))
@@ -63,20 +62,66 @@ static struct frame_head *dump_user_backtrace(struct frame_head *head)
63 if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead))) 62 if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead)))
64 return NULL; 63 return NULL;
65 64
66 oprofile_add_trace(bufhead[0].ret); 65 fp = (struct stack_frame_ia32 *) compat_ptr(bufhead[0].next_frame);
66
67 oprofile_add_trace(bufhead[0].return_address);
68
69 /* frame pointers should strictly progress back up the stack
70 * (towards higher addresses) */
71 if (head >= fp)
72 return NULL;
73
74 return fp;
75}
76
77static inline int
78x86_backtrace_32(struct pt_regs * const regs, unsigned int depth)
79{
80 struct stack_frame_ia32 *head;
81
82 /* User process is 32-bit */
83 if (!current || !test_thread_flag(TIF_IA32))
84 return 0;
85
86 head = (struct stack_frame_ia32 *) regs->bp;
87 while (depth-- && head)
88 head = dump_user_backtrace_32(head);
89
90 return 1;
91}
92
93#else
94static inline int
95x86_backtrace_32(struct pt_regs * const regs, unsigned int depth)
96{
97 return 0;
98}
99#endif /* CONFIG_COMPAT */
100
101static struct stack_frame *dump_user_backtrace(struct stack_frame *head)
102{
103 struct stack_frame bufhead[2];
104
105 /* Also check accessibility of one struct stack_frame beyond */
106 if (!access_ok(VERIFY_READ, head, sizeof(bufhead)))
107 return NULL;
108 if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead)))
109 return NULL;
110
111 oprofile_add_trace(bufhead[0].return_address);
67 112
68 /* frame pointers should strictly progress back up the stack 113 /* frame pointers should strictly progress back up the stack
69 * (towards higher addresses) */ 114 * (towards higher addresses) */
70 if (head >= bufhead[0].bp) 115 if (head >= bufhead[0].next_frame)
71 return NULL; 116 return NULL;
72 117
73 return bufhead[0].bp; 118 return bufhead[0].next_frame;
74} 119}
75 120
76void 121void
77x86_backtrace(struct pt_regs * const regs, unsigned int depth) 122x86_backtrace(struct pt_regs * const regs, unsigned int depth)
78{ 123{
79 struct frame_head *head = (struct frame_head *)frame_pointer(regs); 124 struct stack_frame *head = (struct stack_frame *)frame_pointer(regs);
80 125
81 if (!user_mode_vm(regs)) { 126 if (!user_mode_vm(regs)) {
82 unsigned long stack = kernel_stack_pointer(regs); 127 unsigned long stack = kernel_stack_pointer(regs);
@@ -86,6 +131,9 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth)
86 return; 131 return;
87 } 132 }
88 133
134 if (x86_backtrace_32(regs, depth))
135 return;
136
89 while (depth-- && head) 137 while (depth-- && head)
90 head = dump_user_backtrace(head); 138 head = dump_user_backtrace(head);
91} 139}
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index f1575c9a2572..bd1489c3ce09 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -695,9 +695,6 @@ static int __init ppro_init(char **cpu_type)
695 return 1; 695 return 1;
696} 696}
697 697
698/* in order to get sysfs right */
699static int using_nmi;
700
701int __init op_nmi_init(struct oprofile_operations *ops) 698int __init op_nmi_init(struct oprofile_operations *ops)
702{ 699{
703 __u8 vendor = boot_cpu_data.x86_vendor; 700 __u8 vendor = boot_cpu_data.x86_vendor;
@@ -705,8 +702,6 @@ int __init op_nmi_init(struct oprofile_operations *ops)
705 char *cpu_type = NULL; 702 char *cpu_type = NULL;
706 int ret = 0; 703 int ret = 0;
707 704
708 using_nmi = 0;
709
710 if (!cpu_has_apic) 705 if (!cpu_has_apic)
711 return -ENODEV; 706 return -ENODEV;
712 707
@@ -790,13 +785,11 @@ int __init op_nmi_init(struct oprofile_operations *ops)
790 if (ret) 785 if (ret)
791 return ret; 786 return ret;
792 787
793 using_nmi = 1;
794 printk(KERN_INFO "oprofile: using NMI interrupt.\n"); 788 printk(KERN_INFO "oprofile: using NMI interrupt.\n");
795 return 0; 789 return 0;
796} 790}
797 791
798void op_nmi_exit(void) 792void op_nmi_exit(void)
799{ 793{
800 if (using_nmi) 794 exit_sysfs();
801 exit_sysfs();
802} 795}
diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c
index b336cd9ee7a1..f9bda64fcd1b 100644
--- a/drivers/oprofile/oprof.c
+++ b/drivers/oprofile/oprof.c
@@ -225,26 +225,17 @@ post_sync:
225 mutex_unlock(&start_mutex); 225 mutex_unlock(&start_mutex);
226} 226}
227 227
228int oprofile_set_backtrace(unsigned long val) 228int oprofile_set_ulong(unsigned long *addr, unsigned long val)
229{ 229{
230 int err = 0; 230 int err = -EBUSY;
231 231
232 mutex_lock(&start_mutex); 232 mutex_lock(&start_mutex);
233 233 if (!oprofile_started) {
234 if (oprofile_started) { 234 *addr = val;
235 err = -EBUSY; 235 err = 0;
236 goto out;
237 }
238
239 if (!oprofile_ops.backtrace) {
240 err = -EINVAL;
241 goto out;
242 } 236 }
243
244 oprofile_backtrace_depth = val;
245
246out:
247 mutex_unlock(&start_mutex); 237 mutex_unlock(&start_mutex);
238
248 return err; 239 return err;
249} 240}
250 241
@@ -257,16 +248,9 @@ static int __init oprofile_init(void)
257 printk(KERN_INFO "oprofile: using timer interrupt.\n"); 248 printk(KERN_INFO "oprofile: using timer interrupt.\n");
258 err = oprofile_timer_init(&oprofile_ops); 249 err = oprofile_timer_init(&oprofile_ops);
259 if (err) 250 if (err)
260 goto out_arch; 251 return err;
261 } 252 }
262 err = oprofilefs_register(); 253 return oprofilefs_register();
263 if (err)
264 goto out_arch;
265 return 0;
266
267out_arch:
268 oprofile_arch_exit();
269 return err;
270} 254}
271 255
272 256
diff --git a/drivers/oprofile/oprof.h b/drivers/oprofile/oprof.h
index 47e12cb4ee8b..177b73de5e5f 100644
--- a/drivers/oprofile/oprof.h
+++ b/drivers/oprofile/oprof.h
@@ -37,7 +37,7 @@ void oprofile_create_files(struct super_block *sb, struct dentry *root);
37int oprofile_timer_init(struct oprofile_operations *ops); 37int oprofile_timer_init(struct oprofile_operations *ops);
38void oprofile_timer_exit(void); 38void oprofile_timer_exit(void);
39 39
40int oprofile_set_backtrace(unsigned long depth); 40int oprofile_set_ulong(unsigned long *addr, unsigned long val);
41int oprofile_set_timeout(unsigned long time); 41int oprofile_set_timeout(unsigned long time);
42 42
43#endif /* OPROF_H */ 43#endif /* OPROF_H */
diff --git a/drivers/oprofile/oprofile_files.c b/drivers/oprofile/oprofile_files.c
index bbd7516e0869..ccf099e684a4 100644
--- a/drivers/oprofile/oprofile_files.c
+++ b/drivers/oprofile/oprofile_files.c
@@ -79,14 +79,17 @@ static ssize_t depth_write(struct file *file, char const __user *buf, size_t cou
79 if (*offset) 79 if (*offset)
80 return -EINVAL; 80 return -EINVAL;
81 81
82 if (!oprofile_ops.backtrace)
83 return -EINVAL;
84
82 retval = oprofilefs_ulong_from_user(&val, buf, count); 85 retval = oprofilefs_ulong_from_user(&val, buf, count);
83 if (retval) 86 if (retval)
84 return retval; 87 return retval;
85 88
86 retval = oprofile_set_backtrace(val); 89 retval = oprofile_set_ulong(&oprofile_backtrace_depth, val);
87
88 if (retval) 90 if (retval)
89 return retval; 91 return retval;
92
90 return count; 93 return count;
91} 94}
92 95
diff --git a/drivers/oprofile/oprofile_perf.c b/drivers/oprofile/oprofile_perf.c
new file mode 100644
index 000000000000..9046f7b2ed79
--- /dev/null
+++ b/drivers/oprofile/oprofile_perf.c
@@ -0,0 +1,328 @@
1/*
2 * Copyright 2010 ARM Ltd.
3 *
4 * Perf-events backend for OProfile.
5 */
6#include <linux/perf_event.h>
7#include <linux/platform_device.h>
8#include <linux/oprofile.h>
9#include <linux/slab.h>
10
11/*
12 * Per performance monitor configuration as set via oprofilefs.
13 */
14struct op_counter_config {
15 unsigned long count;
16 unsigned long enabled;
17 unsigned long event;
18 unsigned long unit_mask;
19 unsigned long kernel;
20 unsigned long user;
21 struct perf_event_attr attr;
22};
23
24static int oprofile_perf_enabled;
25static DEFINE_MUTEX(oprofile_perf_mutex);
26
27static struct op_counter_config *counter_config;
28static struct perf_event **perf_events[nr_cpumask_bits];
29static int num_counters;
30
31/*
32 * Overflow callback for oprofile.
33 */
34static void op_overflow_handler(struct perf_event *event, int unused,
35 struct perf_sample_data *data, struct pt_regs *regs)
36{
37 int id;
38 u32 cpu = smp_processor_id();
39
40 for (id = 0; id < num_counters; ++id)
41 if (perf_events[cpu][id] == event)
42 break;
43
44 if (id != num_counters)
45 oprofile_add_sample(regs, id);
46 else
47 pr_warning("oprofile: ignoring spurious overflow "
48 "on cpu %u\n", cpu);
49}
50
51/*
52 * Called by oprofile_perf_setup to create perf attributes to mirror the oprofile
53 * settings in counter_config. Attributes are created as `pinned' events and
54 * so are permanently scheduled on the PMU.
55 */
56static void op_perf_setup(void)
57{
58 int i;
59 u32 size = sizeof(struct perf_event_attr);
60 struct perf_event_attr *attr;
61
62 for (i = 0; i < num_counters; ++i) {
63 attr = &counter_config[i].attr;
64 memset(attr, 0, size);
65 attr->type = PERF_TYPE_RAW;
66 attr->size = size;
67 attr->config = counter_config[i].event;
68 attr->sample_period = counter_config[i].count;
69 attr->pinned = 1;
70 }
71}
72
73static int op_create_counter(int cpu, int event)
74{
75 struct perf_event *pevent;
76
77 if (!counter_config[event].enabled || perf_events[cpu][event])
78 return 0;
79
80 pevent = perf_event_create_kernel_counter(&counter_config[event].attr,
81 cpu, NULL,
82 op_overflow_handler);
83
84 if (IS_ERR(pevent))
85 return PTR_ERR(pevent);
86
87 if (pevent->state != PERF_EVENT_STATE_ACTIVE) {
88 perf_event_release_kernel(pevent);
89 pr_warning("oprofile: failed to enable event %d "
90 "on CPU %d\n", event, cpu);
91 return -EBUSY;
92 }
93
94 perf_events[cpu][event] = pevent;
95
96 return 0;
97}
98
99static void op_destroy_counter(int cpu, int event)
100{
101 struct perf_event *pevent = perf_events[cpu][event];
102
103 if (pevent) {
104 perf_event_release_kernel(pevent);
105 perf_events[cpu][event] = NULL;
106 }
107}
108
109/*
110 * Called by oprofile_perf_start to create active perf events based on the
111 * perviously configured attributes.
112 */
113static int op_perf_start(void)
114{
115 int cpu, event, ret = 0;
116
117 for_each_online_cpu(cpu) {
118 for (event = 0; event < num_counters; ++event) {
119 ret = op_create_counter(cpu, event);
120 if (ret)
121 return ret;
122 }
123 }
124
125 return ret;
126}
127
128/*
129 * Called by oprofile_perf_stop at the end of a profiling run.
130 */
131static void op_perf_stop(void)
132{
133 int cpu, event;
134
135 for_each_online_cpu(cpu)
136 for (event = 0; event < num_counters; ++event)
137 op_destroy_counter(cpu, event);
138}
139
140static int oprofile_perf_create_files(struct super_block *sb, struct dentry *root)
141{
142 unsigned int i;
143
144 for (i = 0; i < num_counters; i++) {
145 struct dentry *dir;
146 char buf[4];
147
148 snprintf(buf, sizeof buf, "%d", i);
149 dir = oprofilefs_mkdir(sb, root, buf);
150 oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
151 oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
152 oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count);
153 oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);
154 oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);
155 oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
156 }
157
158 return 0;
159}
160
161static int oprofile_perf_setup(void)
162{
163 spin_lock(&oprofilefs_lock);
164 op_perf_setup();
165 spin_unlock(&oprofilefs_lock);
166 return 0;
167}
168
169static int oprofile_perf_start(void)
170{
171 int ret = -EBUSY;
172
173 mutex_lock(&oprofile_perf_mutex);
174 if (!oprofile_perf_enabled) {
175 ret = 0;
176 op_perf_start();
177 oprofile_perf_enabled = 1;
178 }
179 mutex_unlock(&oprofile_perf_mutex);
180 return ret;
181}
182
183static void oprofile_perf_stop(void)
184{
185 mutex_lock(&oprofile_perf_mutex);
186 if (oprofile_perf_enabled)
187 op_perf_stop();
188 oprofile_perf_enabled = 0;
189 mutex_unlock(&oprofile_perf_mutex);
190}
191
192#ifdef CONFIG_PM
193
194static int oprofile_perf_suspend(struct platform_device *dev, pm_message_t state)
195{
196 mutex_lock(&oprofile_perf_mutex);
197 if (oprofile_perf_enabled)
198 op_perf_stop();
199 mutex_unlock(&oprofile_perf_mutex);
200 return 0;
201}
202
203static int oprofile_perf_resume(struct platform_device *dev)
204{
205 mutex_lock(&oprofile_perf_mutex);
206 if (oprofile_perf_enabled && op_perf_start())
207 oprofile_perf_enabled = 0;
208 mutex_unlock(&oprofile_perf_mutex);
209 return 0;
210}
211
212static struct platform_driver oprofile_driver = {
213 .driver = {
214 .name = "oprofile-perf",
215 },
216 .resume = oprofile_perf_resume,
217 .suspend = oprofile_perf_suspend,
218};
219
220static struct platform_device *oprofile_pdev;
221
222static int __init init_driverfs(void)
223{
224 int ret;
225
226 ret = platform_driver_register(&oprofile_driver);
227 if (ret)
228 return ret;
229
230 oprofile_pdev = platform_device_register_simple(
231 oprofile_driver.driver.name, 0, NULL, 0);
232 if (IS_ERR(oprofile_pdev)) {
233 ret = PTR_ERR(oprofile_pdev);
234 platform_driver_unregister(&oprofile_driver);
235 }
236
237 return ret;
238}
239
240static void exit_driverfs(void)
241{
242 platform_device_unregister(oprofile_pdev);
243 platform_driver_unregister(&oprofile_driver);
244}
245
246#else
247
248static inline int init_driverfs(void) { return 0; }
249static inline void exit_driverfs(void) { }
250
251#endif /* CONFIG_PM */
252
253void oprofile_perf_exit(void)
254{
255 int cpu, id;
256 struct perf_event *event;
257
258 for_each_possible_cpu(cpu) {
259 for (id = 0; id < num_counters; ++id) {
260 event = perf_events[cpu][id];
261 if (event)
262 perf_event_release_kernel(event);
263 }
264
265 kfree(perf_events[cpu]);
266 }
267
268 kfree(counter_config);
269 exit_driverfs();
270}
271
272int __init oprofile_perf_init(struct oprofile_operations *ops)
273{
274 int cpu, ret = 0;
275
276 ret = init_driverfs();
277 if (ret)
278 return ret;
279
280 memset(&perf_events, 0, sizeof(perf_events));
281
282 num_counters = perf_num_counters();
283 if (num_counters <= 0) {
284 pr_info("oprofile: no performance counters\n");
285 ret = -ENODEV;
286 goto out;
287 }
288
289 counter_config = kcalloc(num_counters,
290 sizeof(struct op_counter_config), GFP_KERNEL);
291
292 if (!counter_config) {
293 pr_info("oprofile: failed to allocate %d "
294 "counters\n", num_counters);
295 ret = -ENOMEM;
296 num_counters = 0;
297 goto out;
298 }
299
300 for_each_possible_cpu(cpu) {
301 perf_events[cpu] = kcalloc(num_counters,
302 sizeof(struct perf_event *), GFP_KERNEL);
303 if (!perf_events[cpu]) {
304 pr_info("oprofile: failed to allocate %d perf events "
305 "for cpu %d\n", num_counters, cpu);
306 ret = -ENOMEM;
307 goto out;
308 }
309 }
310
311 ops->create_files = oprofile_perf_create_files;
312 ops->setup = oprofile_perf_setup;
313 ops->start = oprofile_perf_start;
314 ops->stop = oprofile_perf_stop;
315 ops->shutdown = oprofile_perf_stop;
316 ops->cpu_type = op_name_from_perf_id();
317
318 if (!ops->cpu_type)
319 ret = -ENODEV;
320 else
321 pr_info("oprofile: using %s\n", ops->cpu_type);
322
323out:
324 if (ret)
325 oprofile_perf_exit();
326
327 return ret;
328}
diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c
index 2766a6d3c2e9..1944621930d9 100644
--- a/drivers/oprofile/oprofilefs.c
+++ b/drivers/oprofile/oprofilefs.c
@@ -91,16 +91,20 @@ static ssize_t ulong_read_file(struct file *file, char __user *buf, size_t count
91 91
92static ssize_t ulong_write_file(struct file *file, char const __user *buf, size_t count, loff_t *offset) 92static ssize_t ulong_write_file(struct file *file, char const __user *buf, size_t count, loff_t *offset)
93{ 93{
94 unsigned long *value = file->private_data; 94 unsigned long value;
95 int retval; 95 int retval;
96 96
97 if (*offset) 97 if (*offset)
98 return -EINVAL; 98 return -EINVAL;
99 99
100 retval = oprofilefs_ulong_from_user(value, buf, count); 100 retval = oprofilefs_ulong_from_user(&value, buf, count);
101 if (retval)
102 return retval;
101 103
104 retval = oprofile_set_ulong(file->private_data, value);
102 if (retval) 105 if (retval)
103 return retval; 106 return retval;
107
104 return count; 108 return count;
105} 109}
106 110
@@ -126,50 +130,41 @@ static const struct file_operations ulong_ro_fops = {
126}; 130};
127 131
128 132
129static struct dentry *__oprofilefs_create_file(struct super_block *sb, 133static int __oprofilefs_create_file(struct super_block *sb,
130 struct dentry *root, char const *name, const struct file_operations *fops, 134 struct dentry *root, char const *name, const struct file_operations *fops,
131 int perm) 135 int perm, void *priv)
132{ 136{
133 struct dentry *dentry; 137 struct dentry *dentry;
134 struct inode *inode; 138 struct inode *inode;
135 139
136 dentry = d_alloc_name(root, name); 140 dentry = d_alloc_name(root, name);
137 if (!dentry) 141 if (!dentry)
138 return NULL; 142 return -ENOMEM;
139 inode = oprofilefs_get_inode(sb, S_IFREG | perm); 143 inode = oprofilefs_get_inode(sb, S_IFREG | perm);
140 if (!inode) { 144 if (!inode) {
141 dput(dentry); 145 dput(dentry);
142 return NULL; 146 return -ENOMEM;
143 } 147 }
144 inode->i_fop = fops; 148 inode->i_fop = fops;
145 d_add(dentry, inode); 149 d_add(dentry, inode);
146 return dentry; 150 dentry->d_inode->i_private = priv;
151 return 0;
147} 152}
148 153
149 154
150int oprofilefs_create_ulong(struct super_block *sb, struct dentry *root, 155int oprofilefs_create_ulong(struct super_block *sb, struct dentry *root,
151 char const *name, unsigned long *val) 156 char const *name, unsigned long *val)
152{ 157{
153 struct dentry *d = __oprofilefs_create_file(sb, root, name, 158 return __oprofilefs_create_file(sb, root, name,
154 &ulong_fops, 0644); 159 &ulong_fops, 0644, val);
155 if (!d)
156 return -EFAULT;
157
158 d->d_inode->i_private = val;
159 return 0;
160} 160}
161 161
162 162
163int oprofilefs_create_ro_ulong(struct super_block *sb, struct dentry *root, 163int oprofilefs_create_ro_ulong(struct super_block *sb, struct dentry *root,
164 char const *name, unsigned long *val) 164 char const *name, unsigned long *val)
165{ 165{
166 struct dentry *d = __oprofilefs_create_file(sb, root, name, 166 return __oprofilefs_create_file(sb, root, name,
167 &ulong_ro_fops, 0444); 167 &ulong_ro_fops, 0444, val);
168 if (!d)
169 return -EFAULT;
170
171 d->d_inode->i_private = val;
172 return 0;
173} 168}
174 169
175 170
@@ -189,31 +184,22 @@ static const struct file_operations atomic_ro_fops = {
189int oprofilefs_create_ro_atomic(struct super_block *sb, struct dentry *root, 184int oprofilefs_create_ro_atomic(struct super_block *sb, struct dentry *root,
190 char const *name, atomic_t *val) 185 char const *name, atomic_t *val)
191{ 186{
192 struct dentry *d = __oprofilefs_create_file(sb, root, name, 187 return __oprofilefs_create_file(sb, root, name,
193 &atomic_ro_fops, 0444); 188 &atomic_ro_fops, 0444, val);
194 if (!d)
195 return -EFAULT;
196
197 d->d_inode->i_private = val;
198 return 0;
199} 189}
200 190
201 191
202int oprofilefs_create_file(struct super_block *sb, struct dentry *root, 192int oprofilefs_create_file(struct super_block *sb, struct dentry *root,
203 char const *name, const struct file_operations *fops) 193 char const *name, const struct file_operations *fops)
204{ 194{
205 if (!__oprofilefs_create_file(sb, root, name, fops, 0644)) 195 return __oprofilefs_create_file(sb, root, name, fops, 0644, NULL);
206 return -EFAULT;
207 return 0;
208} 196}
209 197
210 198
211int oprofilefs_create_file_perm(struct super_block *sb, struct dentry *root, 199int oprofilefs_create_file_perm(struct super_block *sb, struct dentry *root,
212 char const *name, const struct file_operations *fops, int perm) 200 char const *name, const struct file_operations *fops, int perm)
213{ 201{
214 if (!__oprofilefs_create_file(sb, root, name, fops, perm)) 202 return __oprofilefs_create_file(sb, root, name, fops, perm, NULL);
215 return -EFAULT;
216 return 0;
217} 203}
218 204
219 205
diff --git a/include/asm-generic/hardirq.h b/include/asm-generic/hardirq.h
index 62f59080e5cc..04d0a977cd43 100644
--- a/include/asm-generic/hardirq.h
+++ b/include/asm-generic/hardirq.h
@@ -3,13 +3,13 @@
3 3
4#include <linux/cache.h> 4#include <linux/cache.h>
5#include <linux/threads.h> 5#include <linux/threads.h>
6#include <linux/irq.h>
7 6
8typedef struct { 7typedef struct {
9 unsigned int __softirq_pending; 8 unsigned int __softirq_pending;
10} ____cacheline_aligned irq_cpustat_t; 9} ____cacheline_aligned irq_cpustat_t;
11 10
12#include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */ 11#include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */
12#include <linux/irq.h>
13 13
14#ifndef ack_bad_irq 14#ifndef ack_bad_irq
15static inline void ack_bad_irq(unsigned int irq) 15static inline void ack_bad_irq(unsigned int irq)
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 8a92a170fb7d..ef2af9948eac 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -220,6 +220,8 @@
220 \ 220 \
221 BUG_TABLE \ 221 BUG_TABLE \
222 \ 222 \
223 JUMP_TABLE \
224 \
223 /* PCI quirks */ \ 225 /* PCI quirks */ \
224 .pci_fixup : AT(ADDR(.pci_fixup) - LOAD_OFFSET) { \ 226 .pci_fixup : AT(ADDR(.pci_fixup) - LOAD_OFFSET) { \
225 VMLINUX_SYMBOL(__start_pci_fixups_early) = .; \ 227 VMLINUX_SYMBOL(__start_pci_fixups_early) = .; \
@@ -563,6 +565,14 @@
563#define BUG_TABLE 565#define BUG_TABLE
564#endif 566#endif
565 567
568#define JUMP_TABLE \
569 . = ALIGN(8); \
570 __jump_table : AT(ADDR(__jump_table) - LOAD_OFFSET) { \
571 VMLINUX_SYMBOL(__start___jump_table) = .; \
572 *(__jump_table) \
573 VMLINUX_SYMBOL(__stop___jump_table) = .; \
574 }
575
566#ifdef CONFIG_PM_TRACE 576#ifdef CONFIG_PM_TRACE
567#define TRACEDATA \ 577#define TRACEDATA \
568 . = ALIGN(4); \ 578 . = ALIGN(4); \
diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index 52c0da4bdd18..bef3cda44c4c 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -1,6 +1,8 @@
1#ifndef _DYNAMIC_DEBUG_H 1#ifndef _DYNAMIC_DEBUG_H
2#define _DYNAMIC_DEBUG_H 2#define _DYNAMIC_DEBUG_H
3 3
4#include <linux/jump_label.h>
5
4/* dynamic_printk_enabled, and dynamic_printk_enabled2 are bitmasks in which 6/* dynamic_printk_enabled, and dynamic_printk_enabled2 are bitmasks in which
5 * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They 7 * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They
6 * use independent hash functions, to reduce the chance of false positives. 8 * use independent hash functions, to reduce the chance of false positives.
@@ -22,8 +24,6 @@ struct _ddebug {
22 const char *function; 24 const char *function;
23 const char *filename; 25 const char *filename;
24 const char *format; 26 const char *format;
25 char primary_hash;
26 char secondary_hash;
27 unsigned int lineno:24; 27 unsigned int lineno:24;
28 /* 28 /*
29 * The flags field controls the behaviour at the callsite. 29 * The flags field controls the behaviour at the callsite.
@@ -33,6 +33,7 @@ struct _ddebug {
33#define _DPRINTK_FLAGS_PRINT (1<<0) /* printk() a message using the format */ 33#define _DPRINTK_FLAGS_PRINT (1<<0) /* printk() a message using the format */
34#define _DPRINTK_FLAGS_DEFAULT 0 34#define _DPRINTK_FLAGS_DEFAULT 0
35 unsigned int flags:8; 35 unsigned int flags:8;
36 char enabled;
36} __attribute__((aligned(8))); 37} __attribute__((aligned(8)));
37 38
38 39
@@ -42,33 +43,35 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n,
42#if defined(CONFIG_DYNAMIC_DEBUG) 43#if defined(CONFIG_DYNAMIC_DEBUG)
43extern int ddebug_remove_module(const char *mod_name); 44extern int ddebug_remove_module(const char *mod_name);
44 45
45#define __dynamic_dbg_enabled(dd) ({ \
46 int __ret = 0; \
47 if (unlikely((dynamic_debug_enabled & (1LL << DEBUG_HASH)) && \
48 (dynamic_debug_enabled2 & (1LL << DEBUG_HASH2)))) \
49 if (unlikely(dd.flags)) \
50 __ret = 1; \
51 __ret; })
52
53#define dynamic_pr_debug(fmt, ...) do { \ 46#define dynamic_pr_debug(fmt, ...) do { \
47 __label__ do_printk; \
48 __label__ out; \
54 static struct _ddebug descriptor \ 49 static struct _ddebug descriptor \
55 __used \ 50 __used \
56 __attribute__((section("__verbose"), aligned(8))) = \ 51 __attribute__((section("__verbose"), aligned(8))) = \
57 { KBUILD_MODNAME, __func__, __FILE__, fmt, DEBUG_HASH, \ 52 { KBUILD_MODNAME, __func__, __FILE__, fmt, __LINE__, \
58 DEBUG_HASH2, __LINE__, _DPRINTK_FLAGS_DEFAULT }; \ 53 _DPRINTK_FLAGS_DEFAULT }; \
59 if (__dynamic_dbg_enabled(descriptor)) \ 54 JUMP_LABEL(&descriptor.enabled, do_printk); \
60 printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \ 55 goto out; \
56do_printk: \
57 printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \
58out: ; \
61 } while (0) 59 } while (0)
62 60
63 61
64#define dynamic_dev_dbg(dev, fmt, ...) do { \ 62#define dynamic_dev_dbg(dev, fmt, ...) do { \
63 __label__ do_printk; \
64 __label__ out; \
65 static struct _ddebug descriptor \ 65 static struct _ddebug descriptor \
66 __used \ 66 __used \
67 __attribute__((section("__verbose"), aligned(8))) = \ 67 __attribute__((section("__verbose"), aligned(8))) = \
68 { KBUILD_MODNAME, __func__, __FILE__, fmt, DEBUG_HASH, \ 68 { KBUILD_MODNAME, __func__, __FILE__, fmt, __LINE__, \
69 DEBUG_HASH2, __LINE__, _DPRINTK_FLAGS_DEFAULT }; \ 69 _DPRINTK_FLAGS_DEFAULT }; \
70 if (__dynamic_dbg_enabled(descriptor)) \ 70 JUMP_LABEL(&descriptor.enabled, do_printk); \
71 dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); \ 71 goto out; \
72do_printk: \
73 dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); \
74out: ; \
72 } while (0) 75 } while (0)
73 76
74#else 77#else
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 02b8b24f8f51..8beabb958f61 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -191,8 +191,8 @@ struct ftrace_event_call {
191 unsigned int flags; 191 unsigned int flags;
192 192
193#ifdef CONFIG_PERF_EVENTS 193#ifdef CONFIG_PERF_EVENTS
194 int perf_refcount; 194 int perf_refcount;
195 struct hlist_head *perf_events; 195 struct hlist_head __percpu *perf_events;
196#endif 196#endif
197}; 197};
198 198
@@ -252,8 +252,8 @@ DECLARE_PER_CPU(struct pt_regs, perf_trace_regs);
252 252
253extern int perf_trace_init(struct perf_event *event); 253extern int perf_trace_init(struct perf_event *event);
254extern void perf_trace_destroy(struct perf_event *event); 254extern void perf_trace_destroy(struct perf_event *event);
255extern int perf_trace_enable(struct perf_event *event); 255extern int perf_trace_add(struct perf_event *event, int flags);
256extern void perf_trace_disable(struct perf_event *event); 256extern void perf_trace_del(struct perf_event *event, int flags);
257extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, 257extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
258 char *filter_str); 258 char *filter_str);
259extern void ftrace_profile_free_filter(struct perf_event *event); 259extern void ftrace_profile_free_filter(struct perf_event *event);
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index a0384a4d1e6f..531495db1708 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -18,6 +18,7 @@
18#include <asm/atomic.h> 18#include <asm/atomic.h>
19#include <asm/ptrace.h> 19#include <asm/ptrace.h>
20#include <asm/system.h> 20#include <asm/system.h>
21#include <trace/events/irq.h>
21 22
22/* 23/*
23 * These correspond to the IORESOURCE_IRQ_* defines in 24 * These correspond to the IORESOURCE_IRQ_* defines in
@@ -407,7 +408,12 @@ asmlinkage void do_softirq(void);
407asmlinkage void __do_softirq(void); 408asmlinkage void __do_softirq(void);
408extern void open_softirq(int nr, void (*action)(struct softirq_action *)); 409extern void open_softirq(int nr, void (*action)(struct softirq_action *));
409extern void softirq_init(void); 410extern void softirq_init(void);
410#define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0) 411static inline void __raise_softirq_irqoff(unsigned int nr)
412{
413 trace_softirq_raise((struct softirq_action *)(unsigned long)nr, NULL);
414 or_softirq_pending(1UL << nr);
415}
416
411extern void raise_softirq_irqoff(unsigned int nr); 417extern void raise_softirq_irqoff(unsigned int nr);
412extern void raise_softirq(unsigned int nr); 418extern void raise_softirq(unsigned int nr);
413extern void wakeup_softirqd(void); 419extern void wakeup_softirqd(void);
diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
new file mode 100644
index 000000000000..4fa09d4d0b71
--- /dev/null
+++ b/include/linux/irq_work.h
@@ -0,0 +1,20 @@
1#ifndef _LINUX_IRQ_WORK_H
2#define _LINUX_IRQ_WORK_H
3
4struct irq_work {
5 struct irq_work *next;
6 void (*func)(struct irq_work *);
7};
8
9static inline
10void init_irq_work(struct irq_work *entry, void (*func)(struct irq_work *))
11{
12 entry->next = NULL;
13 entry->func = func;
14}
15
16bool irq_work_queue(struct irq_work *entry);
17void irq_work_run(void);
18void irq_work_sync(struct irq_work *entry);
19
20#endif /* _LINUX_IRQ_WORK_H */
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
new file mode 100644
index 000000000000..b67cb180e6e9
--- /dev/null
+++ b/include/linux/jump_label.h
@@ -0,0 +1,74 @@
1#ifndef _LINUX_JUMP_LABEL_H
2#define _LINUX_JUMP_LABEL_H
3
4#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_HAVE_ARCH_JUMP_LABEL)
5# include <asm/jump_label.h>
6# define HAVE_JUMP_LABEL
7#endif
8
9enum jump_label_type {
10 JUMP_LABEL_ENABLE,
11 JUMP_LABEL_DISABLE
12};
13
14struct module;
15
16#ifdef HAVE_JUMP_LABEL
17
18extern struct jump_entry __start___jump_table[];
19extern struct jump_entry __stop___jump_table[];
20
21extern void arch_jump_label_transform(struct jump_entry *entry,
22 enum jump_label_type type);
23extern void arch_jump_label_text_poke_early(jump_label_t addr);
24extern void jump_label_update(unsigned long key, enum jump_label_type type);
25extern void jump_label_apply_nops(struct module *mod);
26extern int jump_label_text_reserved(void *start, void *end);
27
28#define jump_label_enable(key) \
29 jump_label_update((unsigned long)key, JUMP_LABEL_ENABLE);
30
31#define jump_label_disable(key) \
32 jump_label_update((unsigned long)key, JUMP_LABEL_DISABLE);
33
34#else
35
36#define JUMP_LABEL(key, label) \
37do { \
38 if (unlikely(*key)) \
39 goto label; \
40} while (0)
41
42#define jump_label_enable(cond_var) \
43do { \
44 *(cond_var) = 1; \
45} while (0)
46
47#define jump_label_disable(cond_var) \
48do { \
49 *(cond_var) = 0; \
50} while (0)
51
52static inline int jump_label_apply_nops(struct module *mod)
53{
54 return 0;
55}
56
57static inline int jump_label_text_reserved(void *start, void *end)
58{
59 return 0;
60}
61
62#endif
63
64#define COND_STMT(key, stmt) \
65do { \
66 __label__ jl_enabled; \
67 JUMP_LABEL(key, jl_enabled); \
68 if (0) { \
69jl_enabled: \
70 stmt; \
71 } \
72} while (0)
73
74#endif
diff --git a/include/linux/jump_label_ref.h b/include/linux/jump_label_ref.h
new file mode 100644
index 000000000000..e5d012ad92c6
--- /dev/null
+++ b/include/linux/jump_label_ref.h
@@ -0,0 +1,44 @@
1#ifndef _LINUX_JUMP_LABEL_REF_H
2#define _LINUX_JUMP_LABEL_REF_H
3
4#include <linux/jump_label.h>
5#include <asm/atomic.h>
6
7#ifdef HAVE_JUMP_LABEL
8
9static inline void jump_label_inc(atomic_t *key)
10{
11 if (atomic_add_return(1, key) == 1)
12 jump_label_enable(key);
13}
14
15static inline void jump_label_dec(atomic_t *key)
16{
17 if (atomic_dec_and_test(key))
18 jump_label_disable(key);
19}
20
21#else /* !HAVE_JUMP_LABEL */
22
23static inline void jump_label_inc(atomic_t *key)
24{
25 atomic_inc(key);
26}
27
28static inline void jump_label_dec(atomic_t *key)
29{
30 atomic_dec(key);
31}
32
33#undef JUMP_LABEL
34#define JUMP_LABEL(key, label) \
35do { \
36 if (unlikely(__builtin_choose_expr( \
37 __builtin_types_compatible_p(typeof(key), atomic_t *), \
38 atomic_read((atomic_t *)(key)), *(key)))) \
39 goto label; \
40} while (0)
41
42#endif /* HAVE_JUMP_LABEL */
43
44#endif /* _LINUX_JUMP_LABEL_REF_H */
diff --git a/include/linux/module.h b/include/linux/module.h
index aace066bad8f..b29e7458b966 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -350,7 +350,10 @@ struct module
350 struct tracepoint *tracepoints; 350 struct tracepoint *tracepoints;
351 unsigned int num_tracepoints; 351 unsigned int num_tracepoints;
352#endif 352#endif
353 353#ifdef HAVE_JUMP_LABEL
354 struct jump_entry *jump_entries;
355 unsigned int num_jump_entries;
356#endif
354#ifdef CONFIG_TRACING 357#ifdef CONFIG_TRACING
355 const char **trace_bprintk_fmt_start; 358 const char **trace_bprintk_fmt_start;
356 unsigned int num_trace_bprintk_fmt; 359 unsigned int num_trace_bprintk_fmt;
diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h
index 5171639ecf0f..32fb81212fd1 100644
--- a/include/linux/oprofile.h
+++ b/include/linux/oprofile.h
@@ -15,6 +15,7 @@
15 15
16#include <linux/types.h> 16#include <linux/types.h>
17#include <linux/spinlock.h> 17#include <linux/spinlock.h>
18#include <linux/init.h>
18#include <asm/atomic.h> 19#include <asm/atomic.h>
19 20
20/* Each escaped entry is prefixed by ESCAPE_CODE 21/* Each escaped entry is prefixed by ESCAPE_CODE
@@ -185,4 +186,10 @@ int oprofile_add_data(struct op_entry *entry, unsigned long val);
185int oprofile_add_data64(struct op_entry *entry, u64 val); 186int oprofile_add_data64(struct op_entry *entry, u64 val);
186int oprofile_write_commit(struct op_entry *entry); 187int oprofile_write_commit(struct op_entry *entry);
187 188
189#ifdef CONFIG_PERF_EVENTS
190int __init oprofile_perf_init(struct oprofile_operations *ops);
191void oprofile_perf_exit(void);
192char *op_name_from_perf_id(void);
193#endif /* CONFIG_PERF_EVENTS */
194
188#endif /* OPROFILE_H */ 195#endif /* OPROFILE_H */
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 49466b13c5c6..0eb50832aa00 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -39,6 +39,15 @@
39 preempt_enable(); \ 39 preempt_enable(); \
40} while (0) 40} while (0)
41 41
42#define get_cpu_ptr(var) ({ \
43 preempt_disable(); \
44 this_cpu_ptr(var); })
45
46#define put_cpu_ptr(var) do { \
47 (void)(var); \
48 preempt_enable(); \
49} while (0)
50
42#ifdef CONFIG_SMP 51#ifdef CONFIG_SMP
43 52
44/* minimum unit size, also is the maximum supported allocation size */ 53/* minimum unit size, also is the maximum supported allocation size */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 716f99b682c1..057bf22a8323 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -486,6 +486,8 @@ struct perf_guest_info_callbacks {
486#include <linux/workqueue.h> 486#include <linux/workqueue.h>
487#include <linux/ftrace.h> 487#include <linux/ftrace.h>
488#include <linux/cpu.h> 488#include <linux/cpu.h>
489#include <linux/irq_work.h>
490#include <linux/jump_label_ref.h>
489#include <asm/atomic.h> 491#include <asm/atomic.h>
490#include <asm/local.h> 492#include <asm/local.h>
491 493
@@ -529,16 +531,22 @@ struct hw_perf_event {
529 int last_cpu; 531 int last_cpu;
530 }; 532 };
531 struct { /* software */ 533 struct { /* software */
532 s64 remaining;
533 struct hrtimer hrtimer; 534 struct hrtimer hrtimer;
534 }; 535 };
535#ifdef CONFIG_HAVE_HW_BREAKPOINT 536#ifdef CONFIG_HAVE_HW_BREAKPOINT
536 struct { /* breakpoint */ 537 struct { /* breakpoint */
537 struct arch_hw_breakpoint info; 538 struct arch_hw_breakpoint info;
538 struct list_head bp_list; 539 struct list_head bp_list;
540 /*
541 * Crufty hack to avoid the chicken and egg
542 * problem hw_breakpoint has with context
543 * creation and event initalization.
544 */
545 struct task_struct *bp_target;
539 }; 546 };
540#endif 547#endif
541 }; 548 };
549 int state;
542 local64_t prev_count; 550 local64_t prev_count;
543 u64 sample_period; 551 u64 sample_period;
544 u64 last_period; 552 u64 last_period;
@@ -550,6 +558,13 @@ struct hw_perf_event {
550#endif 558#endif
551}; 559};
552 560
561/*
562 * hw_perf_event::state flags
563 */
564#define PERF_HES_STOPPED 0x01 /* the counter is stopped */
565#define PERF_HES_UPTODATE 0x02 /* event->count up-to-date */
566#define PERF_HES_ARCH 0x04
567
553struct perf_event; 568struct perf_event;
554 569
555/* 570/*
@@ -561,36 +576,70 @@ struct perf_event;
561 * struct pmu - generic performance monitoring unit 576 * struct pmu - generic performance monitoring unit
562 */ 577 */
563struct pmu { 578struct pmu {
564 int (*enable) (struct perf_event *event); 579 struct list_head entry;
565 void (*disable) (struct perf_event *event); 580
566 int (*start) (struct perf_event *event); 581 int * __percpu pmu_disable_count;
567 void (*stop) (struct perf_event *event); 582 struct perf_cpu_context * __percpu pmu_cpu_context;
568 void (*read) (struct perf_event *event); 583 int task_ctx_nr;
569 void (*unthrottle) (struct perf_event *event); 584
585 /*
586 * Fully disable/enable this PMU, can be used to protect from the PMI
587 * as well as for lazy/batch writing of the MSRs.
588 */
589 void (*pmu_enable) (struct pmu *pmu); /* optional */
590 void (*pmu_disable) (struct pmu *pmu); /* optional */
570 591
571 /* 592 /*
572 * Group events scheduling is treated as a transaction, add group 593 * Try and initialize the event for this PMU.
573 * events as a whole and perform one schedulability test. If the test 594 * Should return -ENOENT when the @event doesn't match this PMU.
574 * fails, roll back the whole group
575 */ 595 */
596 int (*event_init) (struct perf_event *event);
597
598#define PERF_EF_START 0x01 /* start the counter when adding */
599#define PERF_EF_RELOAD 0x02 /* reload the counter when starting */
600#define PERF_EF_UPDATE 0x04 /* update the counter when stopping */
576 601
577 /* 602 /*
578 * Start the transaction, after this ->enable() doesn't need 603 * Adds/Removes a counter to/from the PMU, can be done inside
579 * to do schedulability tests. 604 * a transaction, see the ->*_txn() methods.
580 */ 605 */
581 void (*start_txn) (const struct pmu *pmu); 606 int (*add) (struct perf_event *event, int flags);
607 void (*del) (struct perf_event *event, int flags);
608
582 /* 609 /*
583 * If ->start_txn() disabled the ->enable() schedulability test 610 * Starts/Stops a counter present on the PMU. The PMI handler
611 * should stop the counter when perf_event_overflow() returns
612 * !0. ->start() will be used to continue.
613 */
614 void (*start) (struct perf_event *event, int flags);
615 void (*stop) (struct perf_event *event, int flags);
616
617 /*
618 * Updates the counter value of the event.
619 */
620 void (*read) (struct perf_event *event);
621
622 /*
623 * Group events scheduling is treated as a transaction, add
624 * group events as a whole and perform one schedulability test.
625 * If the test fails, roll back the whole group
626 *
627 * Start the transaction, after this ->add() doesn't need to
628 * do schedulability tests.
629 */
630 void (*start_txn) (struct pmu *pmu); /* optional */
631 /*
632 * If ->start_txn() disabled the ->add() schedulability test
584 * then ->commit_txn() is required to perform one. On success 633 * then ->commit_txn() is required to perform one. On success
585 * the transaction is closed. On error the transaction is kept 634 * the transaction is closed. On error the transaction is kept
586 * open until ->cancel_txn() is called. 635 * open until ->cancel_txn() is called.
587 */ 636 */
588 int (*commit_txn) (const struct pmu *pmu); 637 int (*commit_txn) (struct pmu *pmu); /* optional */
589 /* 638 /*
590 * Will cancel the transaction, assumes ->disable() is called for 639 * Will cancel the transaction, assumes ->del() is called
591 * each successfull ->enable() during the transaction. 640 * for each successfull ->add() during the transaction.
592 */ 641 */
593 void (*cancel_txn) (const struct pmu *pmu); 642 void (*cancel_txn) (struct pmu *pmu); /* optional */
594}; 643};
595 644
596/** 645/**
@@ -631,11 +680,6 @@ struct perf_buffer {
631 void *data_pages[0]; 680 void *data_pages[0];
632}; 681};
633 682
634struct perf_pending_entry {
635 struct perf_pending_entry *next;
636 void (*func)(struct perf_pending_entry *);
637};
638
639struct perf_sample_data; 683struct perf_sample_data;
640 684
641typedef void (*perf_overflow_handler_t)(struct perf_event *, int, 685typedef void (*perf_overflow_handler_t)(struct perf_event *, int,
@@ -656,6 +700,7 @@ struct swevent_hlist {
656 700
657#define PERF_ATTACH_CONTEXT 0x01 701#define PERF_ATTACH_CONTEXT 0x01
658#define PERF_ATTACH_GROUP 0x02 702#define PERF_ATTACH_GROUP 0x02
703#define PERF_ATTACH_TASK 0x04
659 704
660/** 705/**
661 * struct perf_event - performance event kernel representation: 706 * struct perf_event - performance event kernel representation:
@@ -669,7 +714,7 @@ struct perf_event {
669 int nr_siblings; 714 int nr_siblings;
670 int group_flags; 715 int group_flags;
671 struct perf_event *group_leader; 716 struct perf_event *group_leader;
672 const struct pmu *pmu; 717 struct pmu *pmu;
673 718
674 enum perf_event_active_state state; 719 enum perf_event_active_state state;
675 unsigned int attach_state; 720 unsigned int attach_state;
@@ -743,7 +788,7 @@ struct perf_event {
743 int pending_wakeup; 788 int pending_wakeup;
744 int pending_kill; 789 int pending_kill;
745 int pending_disable; 790 int pending_disable;
746 struct perf_pending_entry pending; 791 struct irq_work pending;
747 792
748 atomic_t event_limit; 793 atomic_t event_limit;
749 794
@@ -763,12 +808,19 @@ struct perf_event {
763#endif /* CONFIG_PERF_EVENTS */ 808#endif /* CONFIG_PERF_EVENTS */
764}; 809};
765 810
811enum perf_event_context_type {
812 task_context,
813 cpu_context,
814};
815
766/** 816/**
767 * struct perf_event_context - event context structure 817 * struct perf_event_context - event context structure
768 * 818 *
769 * Used as a container for task events and CPU events as well: 819 * Used as a container for task events and CPU events as well:
770 */ 820 */
771struct perf_event_context { 821struct perf_event_context {
822 enum perf_event_context_type type;
823 struct pmu *pmu;
772 /* 824 /*
773 * Protect the states of the events in the list, 825 * Protect the states of the events in the list,
774 * nr_active, and the list: 826 * nr_active, and the list:
@@ -808,6 +860,12 @@ struct perf_event_context {
808 struct rcu_head rcu_head; 860 struct rcu_head rcu_head;
809}; 861};
810 862
863/*
864 * Number of contexts where an event can trigger:
865 * task, softirq, hardirq, nmi.
866 */
867#define PERF_NR_CONTEXTS 4
868
811/** 869/**
812 * struct perf_event_cpu_context - per cpu event context structure 870 * struct perf_event_cpu_context - per cpu event context structure
813 */ 871 */
@@ -815,18 +873,9 @@ struct perf_cpu_context {
815 struct perf_event_context ctx; 873 struct perf_event_context ctx;
816 struct perf_event_context *task_ctx; 874 struct perf_event_context *task_ctx;
817 int active_oncpu; 875 int active_oncpu;
818 int max_pertask;
819 int exclusive; 876 int exclusive;
820 struct swevent_hlist *swevent_hlist; 877 struct list_head rotation_list;
821 struct mutex hlist_mutex; 878 int jiffies_interval;
822 int hlist_refcount;
823
824 /*
825 * Recursion avoidance:
826 *
827 * task, softirq, irq, nmi context
828 */
829 int recursion[4];
830}; 879};
831 880
832struct perf_output_handle { 881struct perf_output_handle {
@@ -842,26 +891,34 @@ struct perf_output_handle {
842 891
843#ifdef CONFIG_PERF_EVENTS 892#ifdef CONFIG_PERF_EVENTS
844 893
845/* 894extern int perf_pmu_register(struct pmu *pmu);
846 * Set by architecture code: 895extern void perf_pmu_unregister(struct pmu *pmu);
847 */ 896
848extern int perf_max_events; 897extern int perf_num_counters(void);
898extern const char *perf_pmu_name(void);
899extern void __perf_event_task_sched_in(struct task_struct *task);
900extern void __perf_event_task_sched_out(struct task_struct *task, struct task_struct *next);
849 901
850extern const struct pmu *hw_perf_event_init(struct perf_event *event); 902extern atomic_t perf_task_events;
903
904static inline void perf_event_task_sched_in(struct task_struct *task)
905{
906 COND_STMT(&perf_task_events, __perf_event_task_sched_in(task));
907}
908
909static inline
910void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next)
911{
912 COND_STMT(&perf_task_events, __perf_event_task_sched_out(task, next));
913}
851 914
852extern void perf_event_task_sched_in(struct task_struct *task);
853extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next);
854extern void perf_event_task_tick(struct task_struct *task);
855extern int perf_event_init_task(struct task_struct *child); 915extern int perf_event_init_task(struct task_struct *child);
856extern void perf_event_exit_task(struct task_struct *child); 916extern void perf_event_exit_task(struct task_struct *child);
857extern void perf_event_free_task(struct task_struct *task); 917extern void perf_event_free_task(struct task_struct *task);
858extern void set_perf_event_pending(void); 918extern void perf_event_delayed_put(struct task_struct *task);
859extern void perf_event_do_pending(void);
860extern void perf_event_print_debug(void); 919extern void perf_event_print_debug(void);
861extern void __perf_disable(void); 920extern void perf_pmu_disable(struct pmu *pmu);
862extern bool __perf_enable(void); 921extern void perf_pmu_enable(struct pmu *pmu);
863extern void perf_disable(void);
864extern void perf_enable(void);
865extern int perf_event_task_disable(void); 922extern int perf_event_task_disable(void);
866extern int perf_event_task_enable(void); 923extern int perf_event_task_enable(void);
867extern void perf_event_update_userpage(struct perf_event *event); 924extern void perf_event_update_userpage(struct perf_event *event);
@@ -869,7 +926,7 @@ extern int perf_event_release_kernel(struct perf_event *event);
869extern struct perf_event * 926extern struct perf_event *
870perf_event_create_kernel_counter(struct perf_event_attr *attr, 927perf_event_create_kernel_counter(struct perf_event_attr *attr,
871 int cpu, 928 int cpu,
872 pid_t pid, 929 struct task_struct *task,
873 perf_overflow_handler_t callback); 930 perf_overflow_handler_t callback);
874extern u64 perf_event_read_value(struct perf_event *event, 931extern u64 perf_event_read_value(struct perf_event *event,
875 u64 *enabled, u64 *running); 932 u64 *enabled, u64 *running);
@@ -920,14 +977,7 @@ extern int perf_event_overflow(struct perf_event *event, int nmi,
920 */ 977 */
921static inline int is_software_event(struct perf_event *event) 978static inline int is_software_event(struct perf_event *event)
922{ 979{
923 switch (event->attr.type) { 980 return event->pmu->task_ctx_nr == perf_sw_context;
924 case PERF_TYPE_SOFTWARE:
925 case PERF_TYPE_TRACEPOINT:
926 /* for now the breakpoint stuff also works as software event */
927 case PERF_TYPE_BREAKPOINT:
928 return 1;
929 }
930 return 0;
931} 981}
932 982
933extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; 983extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
@@ -954,18 +1004,20 @@ static inline void perf_fetch_caller_regs(struct pt_regs *regs)
954 perf_arch_fetch_caller_regs(regs, CALLER_ADDR0); 1004 perf_arch_fetch_caller_regs(regs, CALLER_ADDR0);
955} 1005}
956 1006
957static inline void 1007static __always_inline void
958perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) 1008perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
959{ 1009{
960 if (atomic_read(&perf_swevent_enabled[event_id])) { 1010 struct pt_regs hot_regs;
961 struct pt_regs hot_regs; 1011
962 1012 JUMP_LABEL(&perf_swevent_enabled[event_id], have_event);
963 if (!regs) { 1013 return;
964 perf_fetch_caller_regs(&hot_regs); 1014
965 regs = &hot_regs; 1015have_event:
966 } 1016 if (!regs) {
967 __perf_sw_event(event_id, nr, nmi, regs, addr); 1017 perf_fetch_caller_regs(&hot_regs);
1018 regs = &hot_regs;
968 } 1019 }
1020 __perf_sw_event(event_id, nr, nmi, regs, addr);
969} 1021}
970 1022
971extern void perf_event_mmap(struct vm_area_struct *vma); 1023extern void perf_event_mmap(struct vm_area_struct *vma);
@@ -976,7 +1028,21 @@ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks
976extern void perf_event_comm(struct task_struct *tsk); 1028extern void perf_event_comm(struct task_struct *tsk);
977extern void perf_event_fork(struct task_struct *tsk); 1029extern void perf_event_fork(struct task_struct *tsk);
978 1030
979extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); 1031/* Callchains */
1032DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
1033
1034extern void perf_callchain_user(struct perf_callchain_entry *entry,
1035 struct pt_regs *regs);
1036extern void perf_callchain_kernel(struct perf_callchain_entry *entry,
1037 struct pt_regs *regs);
1038
1039
1040static inline void
1041perf_callchain_store(struct perf_callchain_entry *entry, u64 ip)
1042{
1043 if (entry->nr < PERF_MAX_STACK_DEPTH)
1044 entry->ip[entry->nr++] = ip;
1045}
980 1046
981extern int sysctl_perf_event_paranoid; 1047extern int sysctl_perf_event_paranoid;
982extern int sysctl_perf_event_mlock; 1048extern int sysctl_perf_event_mlock;
@@ -1019,21 +1085,18 @@ extern int perf_swevent_get_recursion_context(void);
1019extern void perf_swevent_put_recursion_context(int rctx); 1085extern void perf_swevent_put_recursion_context(int rctx);
1020extern void perf_event_enable(struct perf_event *event); 1086extern void perf_event_enable(struct perf_event *event);
1021extern void perf_event_disable(struct perf_event *event); 1087extern void perf_event_disable(struct perf_event *event);
1088extern void perf_event_task_tick(void);
1022#else 1089#else
1023static inline void 1090static inline void
1024perf_event_task_sched_in(struct task_struct *task) { } 1091perf_event_task_sched_in(struct task_struct *task) { }
1025static inline void 1092static inline void
1026perf_event_task_sched_out(struct task_struct *task, 1093perf_event_task_sched_out(struct task_struct *task,
1027 struct task_struct *next) { } 1094 struct task_struct *next) { }
1028static inline void
1029perf_event_task_tick(struct task_struct *task) { }
1030static inline int perf_event_init_task(struct task_struct *child) { return 0; } 1095static inline int perf_event_init_task(struct task_struct *child) { return 0; }
1031static inline void perf_event_exit_task(struct task_struct *child) { } 1096static inline void perf_event_exit_task(struct task_struct *child) { }
1032static inline void perf_event_free_task(struct task_struct *task) { } 1097static inline void perf_event_free_task(struct task_struct *task) { }
1033static inline void perf_event_do_pending(void) { } 1098static inline void perf_event_delayed_put(struct task_struct *task) { }
1034static inline void perf_event_print_debug(void) { } 1099static inline void perf_event_print_debug(void) { }
1035static inline void perf_disable(void) { }
1036static inline void perf_enable(void) { }
1037static inline int perf_event_task_disable(void) { return -EINVAL; } 1100static inline int perf_event_task_disable(void) { return -EINVAL; }
1038static inline int perf_event_task_enable(void) { return -EINVAL; } 1101static inline int perf_event_task_enable(void) { return -EINVAL; }
1039 1102
@@ -1056,6 +1119,7 @@ static inline int perf_swevent_get_recursion_context(void) { return -1; }
1056static inline void perf_swevent_put_recursion_context(int rctx) { } 1119static inline void perf_swevent_put_recursion_context(int rctx) { }
1057static inline void perf_event_enable(struct perf_event *event) { } 1120static inline void perf_event_enable(struct perf_event *event) { }
1058static inline void perf_event_disable(struct perf_event *event) { } 1121static inline void perf_event_disable(struct perf_event *event) { }
1122static inline void perf_event_task_tick(void) { }
1059#endif 1123#endif
1060 1124
1061#define perf_output_put(handle, x) \ 1125#define perf_output_put(handle, x) \
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e18473f0eb78..61b4ecf1da50 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1160,6 +1160,13 @@ struct sched_rt_entity {
1160 1160
1161struct rcu_node; 1161struct rcu_node;
1162 1162
1163enum perf_event_task_context {
1164 perf_invalid_context = -1,
1165 perf_hw_context = 0,
1166 perf_sw_context,
1167 perf_nr_task_contexts,
1168};
1169
1163struct task_struct { 1170struct task_struct {
1164 volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ 1171 volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
1165 void *stack; 1172 void *stack;
@@ -1433,7 +1440,7 @@ struct task_struct {
1433 struct futex_pi_state *pi_state_cache; 1440 struct futex_pi_state *pi_state_cache;
1434#endif 1441#endif
1435#ifdef CONFIG_PERF_EVENTS 1442#ifdef CONFIG_PERF_EVENTS
1436 struct perf_event_context *perf_event_ctxp; 1443 struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
1437 struct mutex perf_event_mutex; 1444 struct mutex perf_event_mutex;
1438 struct list_head perf_event_list; 1445 struct list_head perf_event_list;
1439#endif 1446#endif
diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 6b524a0d02e4..1808960c5059 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -126,8 +126,8 @@ int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus);
126 126
127#else /* CONFIG_STOP_MACHINE && CONFIG_SMP */ 127#else /* CONFIG_STOP_MACHINE && CONFIG_SMP */
128 128
129static inline int stop_machine(int (*fn)(void *), void *data, 129static inline int __stop_machine(int (*fn)(void *), void *data,
130 const struct cpumask *cpus) 130 const struct cpumask *cpus)
131{ 131{
132 int ret; 132 int ret;
133 local_irq_disable(); 133 local_irq_disable();
@@ -136,5 +136,11 @@ static inline int stop_machine(int (*fn)(void *), void *data,
136 return ret; 136 return ret;
137} 137}
138 138
139static inline int stop_machine(int (*fn)(void *), void *data,
140 const struct cpumask *cpus)
141{
142 return __stop_machine(fn, data, cpus);
143}
144
139#endif /* CONFIG_STOP_MACHINE && CONFIG_SMP */ 145#endif /* CONFIG_STOP_MACHINE && CONFIG_SMP */
140#endif /* _LINUX_STOP_MACHINE */ 146#endif /* _LINUX_STOP_MACHINE */
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 103d1b61aacb..a4a90b6726ce 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -17,6 +17,7 @@
17#include <linux/errno.h> 17#include <linux/errno.h>
18#include <linux/types.h> 18#include <linux/types.h>
19#include <linux/rcupdate.h> 19#include <linux/rcupdate.h>
20#include <linux/jump_label.h>
20 21
21struct module; 22struct module;
22struct tracepoint; 23struct tracepoint;
@@ -145,7 +146,9 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin,
145 extern struct tracepoint __tracepoint_##name; \ 146 extern struct tracepoint __tracepoint_##name; \
146 static inline void trace_##name(proto) \ 147 static inline void trace_##name(proto) \
147 { \ 148 { \
148 if (unlikely(__tracepoint_##name.state)) \ 149 JUMP_LABEL(&__tracepoint_##name.state, do_trace); \
150 return; \
151do_trace: \
149 __DO_TRACE(&__tracepoint_##name, \ 152 __DO_TRACE(&__tracepoint_##name, \
150 TP_PROTO(data_proto), \ 153 TP_PROTO(data_proto), \
151 TP_ARGS(data_args)); \ 154 TP_ARGS(data_args)); \
diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
index 0e4cfb694fe7..6fa7cbab7d93 100644
--- a/include/trace/events/irq.h
+++ b/include/trace/events/irq.h
@@ -5,7 +5,9 @@
5#define _TRACE_IRQ_H 5#define _TRACE_IRQ_H
6 6
7#include <linux/tracepoint.h> 7#include <linux/tracepoint.h>
8#include <linux/interrupt.h> 8
9struct irqaction;
10struct softirq_action;
9 11
10#define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq } 12#define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq }
11#define show_softirq_name(val) \ 13#define show_softirq_name(val) \
@@ -93,7 +95,10 @@ DECLARE_EVENT_CLASS(softirq,
93 ), 95 ),
94 96
95 TP_fast_assign( 97 TP_fast_assign(
96 __entry->vec = (int)(h - vec); 98 if (vec)
99 __entry->vec = (int)(h - vec);
100 else
101 __entry->vec = (int)(long)h;
97 ), 102 ),
98 103
99 TP_printk("vec=%d [action=%s]", __entry->vec, 104 TP_printk("vec=%d [action=%s]", __entry->vec,
@@ -136,6 +141,23 @@ DEFINE_EVENT(softirq, softirq_exit,
136 TP_ARGS(h, vec) 141 TP_ARGS(h, vec)
137); 142);
138 143
144/**
145 * softirq_raise - called immediately when a softirq is raised
146 * @h: pointer to struct softirq_action
147 * @vec: pointer to first struct softirq_action in softirq_vec array
148 *
149 * The @h parameter contains a pointer to the softirq vector number which is
150 * raised. @vec is NULL and it means @h includes vector number not
151 * softirq_action. When used in combination with the softirq_entry tracepoint
152 * we can determine the softirq raise latency.
153 */
154DEFINE_EVENT(softirq, softirq_raise,
155
156 TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
157
158 TP_ARGS(h, vec)
159);
160
139#endif /* _TRACE_IRQ_H */ 161#endif /* _TRACE_IRQ_H */
140 162
141/* This part must be outside protection */ 163/* This part must be outside protection */
diff --git a/include/trace/events/napi.h b/include/trace/events/napi.h
index 188deca2f3c7..8fe1e93f531d 100644
--- a/include/trace/events/napi.h
+++ b/include/trace/events/napi.h
@@ -6,10 +6,31 @@
6 6
7#include <linux/netdevice.h> 7#include <linux/netdevice.h>
8#include <linux/tracepoint.h> 8#include <linux/tracepoint.h>
9#include <linux/ftrace.h>
10
11#define NO_DEV "(no_device)"
12
13TRACE_EVENT(napi_poll,
9 14
10DECLARE_TRACE(napi_poll,
11 TP_PROTO(struct napi_struct *napi), 15 TP_PROTO(struct napi_struct *napi),
12 TP_ARGS(napi)); 16
17 TP_ARGS(napi),
18
19 TP_STRUCT__entry(
20 __field( struct napi_struct *, napi)
21 __string( dev_name, napi->dev ? napi->dev->name : NO_DEV)
22 ),
23
24 TP_fast_assign(
25 __entry->napi = napi;
26 __assign_str(dev_name, napi->dev ? napi->dev->name : NO_DEV);
27 ),
28
29 TP_printk("napi poll on napi struct %p for device %s",
30 __entry->napi, __get_str(dev_name))
31);
32
33#undef NO_DEV
13 34
14#endif /* _TRACE_NAPI_H_ */ 35#endif /* _TRACE_NAPI_H_ */
15 36
diff --git a/include/trace/events/net.h b/include/trace/events/net.h
new file mode 100644
index 000000000000..5f247f5ffc56
--- /dev/null
+++ b/include/trace/events/net.h
@@ -0,0 +1,82 @@
1#undef TRACE_SYSTEM
2#define TRACE_SYSTEM net
3
4#if !defined(_TRACE_NET_H) || defined(TRACE_HEADER_MULTI_READ)
5#define _TRACE_NET_H
6
7#include <linux/skbuff.h>
8#include <linux/netdevice.h>
9#include <linux/ip.h>
10#include <linux/tracepoint.h>
11
12TRACE_EVENT(net_dev_xmit,
13
14 TP_PROTO(struct sk_buff *skb,
15 int rc),
16
17 TP_ARGS(skb, rc),
18
19 TP_STRUCT__entry(
20 __field( void *, skbaddr )
21 __field( unsigned int, len )
22 __field( int, rc )
23 __string( name, skb->dev->name )
24 ),
25
26 TP_fast_assign(
27 __entry->skbaddr = skb;
28 __entry->len = skb->len;
29 __entry->rc = rc;
30 __assign_str(name, skb->dev->name);
31 ),
32
33 TP_printk("dev=%s skbaddr=%p len=%u rc=%d",
34 __get_str(name), __entry->skbaddr, __entry->len, __entry->rc)
35);
36
37DECLARE_EVENT_CLASS(net_dev_template,
38
39 TP_PROTO(struct sk_buff *skb),
40
41 TP_ARGS(skb),
42
43 TP_STRUCT__entry(
44 __field( void *, skbaddr )
45 __field( unsigned int, len )
46 __string( name, skb->dev->name )
47 ),
48
49 TP_fast_assign(
50 __entry->skbaddr = skb;
51 __entry->len = skb->len;
52 __assign_str(name, skb->dev->name);
53 ),
54
55 TP_printk("dev=%s skbaddr=%p len=%u",
56 __get_str(name), __entry->skbaddr, __entry->len)
57)
58
59DEFINE_EVENT(net_dev_template, net_dev_queue,
60
61 TP_PROTO(struct sk_buff *skb),
62
63 TP_ARGS(skb)
64);
65
66DEFINE_EVENT(net_dev_template, netif_receive_skb,
67
68 TP_PROTO(struct sk_buff *skb),
69
70 TP_ARGS(skb)
71);
72
73DEFINE_EVENT(net_dev_template, netif_rx,
74
75 TP_PROTO(struct sk_buff *skb),
76
77 TP_ARGS(skb)
78);
79#endif /* _TRACE_NET_H */
80
81/* This part must be outside protection */
82#include <trace/define_trace.h>
diff --git a/include/trace/events/power.h b/include/trace/events/power.h
index 35a2a6e7bf1e..286784d69b8f 100644
--- a/include/trace/events/power.h
+++ b/include/trace/events/power.h
@@ -10,12 +10,17 @@
10#ifndef _TRACE_POWER_ENUM_ 10#ifndef _TRACE_POWER_ENUM_
11#define _TRACE_POWER_ENUM_ 11#define _TRACE_POWER_ENUM_
12enum { 12enum {
13 POWER_NONE = 0, 13 POWER_NONE = 0,
14 POWER_CSTATE = 1, 14 POWER_CSTATE = 1, /* C-State */
15 POWER_PSTATE = 2, 15 POWER_PSTATE = 2, /* Fequency change or DVFS */
16 POWER_SSTATE = 3, /* Suspend */
16}; 17};
17#endif 18#endif
18 19
20/*
21 * The power events are used for cpuidle & suspend (power_start, power_end)
22 * and for cpufreq (power_frequency)
23 */
19DECLARE_EVENT_CLASS(power, 24DECLARE_EVENT_CLASS(power,
20 25
21 TP_PROTO(unsigned int type, unsigned int state, unsigned int cpu_id), 26 TP_PROTO(unsigned int type, unsigned int state, unsigned int cpu_id),
@@ -70,6 +75,85 @@ TRACE_EVENT(power_end,
70 75
71); 76);
72 77
78/*
79 * The clock events are used for clock enable/disable and for
80 * clock rate change
81 */
82DECLARE_EVENT_CLASS(clock,
83
84 TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),
85
86 TP_ARGS(name, state, cpu_id),
87
88 TP_STRUCT__entry(
89 __string( name, name )
90 __field( u64, state )
91 __field( u64, cpu_id )
92 ),
93
94 TP_fast_assign(
95 __assign_str(name, name);
96 __entry->state = state;
97 __entry->cpu_id = cpu_id;
98 ),
99
100 TP_printk("%s state=%lu cpu_id=%lu", __get_str(name),
101 (unsigned long)__entry->state, (unsigned long)__entry->cpu_id)
102);
103
104DEFINE_EVENT(clock, clock_enable,
105
106 TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),
107
108 TP_ARGS(name, state, cpu_id)
109);
110
111DEFINE_EVENT(clock, clock_disable,
112
113 TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),
114
115 TP_ARGS(name, state, cpu_id)
116);
117
118DEFINE_EVENT(clock, clock_set_rate,
119
120 TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),
121
122 TP_ARGS(name, state, cpu_id)
123);
124
125/*
126 * The power domain events are used for power domains transitions
127 */
128DECLARE_EVENT_CLASS(power_domain,
129
130 TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),
131
132 TP_ARGS(name, state, cpu_id),
133
134 TP_STRUCT__entry(
135 __string( name, name )
136 __field( u64, state )
137 __field( u64, cpu_id )
138 ),
139
140 TP_fast_assign(
141 __assign_str(name, name);
142 __entry->state = state;
143 __entry->cpu_id = cpu_id;
144),
145
146 TP_printk("%s state=%lu cpu_id=%lu", __get_str(name),
147 (unsigned long)__entry->state, (unsigned long)__entry->cpu_id)
148);
149
150DEFINE_EVENT(power_domain, power_domain_target,
151
152 TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),
153
154 TP_ARGS(name, state, cpu_id)
155);
156
73#endif /* _TRACE_POWER_H */ 157#endif /* _TRACE_POWER_H */
74 158
75/* This part must be outside protection */ 159/* This part must be outside protection */
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index 4b2be6dc76f0..75ce9d500d8e 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -35,6 +35,23 @@ TRACE_EVENT(kfree_skb,
35 __entry->skbaddr, __entry->protocol, __entry->location) 35 __entry->skbaddr, __entry->protocol, __entry->location)
36); 36);
37 37
38TRACE_EVENT(consume_skb,
39
40 TP_PROTO(struct sk_buff *skb),
41
42 TP_ARGS(skb),
43
44 TP_STRUCT__entry(
45 __field( void *, skbaddr )
46 ),
47
48 TP_fast_assign(
49 __entry->skbaddr = skb;
50 ),
51
52 TP_printk("skbaddr=%p", __entry->skbaddr)
53);
54
38TRACE_EVENT(skb_copy_datagram_iovec, 55TRACE_EVENT(skb_copy_datagram_iovec,
39 56
40 TP_PROTO(const struct sk_buff *skb, int len), 57 TP_PROTO(const struct sk_buff *skb, int len),
diff --git a/init/Kconfig b/init/Kconfig
index a619a1ac7f4c..7b920aafa98a 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -21,6 +21,13 @@ config CONSTRUCTORS
21 depends on !UML 21 depends on !UML
22 default y 22 default y
23 23
24config HAVE_IRQ_WORK
25 bool
26
27config IRQ_WORK
28 bool
29 depends on HAVE_IRQ_WORK
30
24menu "General setup" 31menu "General setup"
25 32
26config EXPERIMENTAL 33config EXPERIMENTAL
@@ -1005,6 +1012,7 @@ config PERF_EVENTS
1005 default y if (PROFILING || PERF_COUNTERS) 1012 default y if (PROFILING || PERF_COUNTERS)
1006 depends on HAVE_PERF_EVENTS 1013 depends on HAVE_PERF_EVENTS
1007 select ANON_INODES 1014 select ANON_INODES
1015 select IRQ_WORK
1008 help 1016 help
1009 Enable kernel support for various performance events provided 1017 Enable kernel support for various performance events provided
1010 by software and hardware. 1018 by software and hardware.
diff --git a/kernel/Makefile b/kernel/Makefile
index 17046b6e7c90..e2c9d52cfe9e 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -10,7 +10,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \
10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ 10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
11 hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ 11 hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
12 notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \ 12 notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \
13 async.o range.o 13 async.o range.o jump_label.o
14obj-$(CONFIG_HAVE_EARLY_RES) += early_res.o 14obj-$(CONFIG_HAVE_EARLY_RES) += early_res.o
15obj-y += groups.o 15obj-y += groups.o
16 16
@@ -23,6 +23,7 @@ CFLAGS_REMOVE_rtmutex-debug.o = -pg
23CFLAGS_REMOVE_cgroup-debug.o = -pg 23CFLAGS_REMOVE_cgroup-debug.o = -pg
24CFLAGS_REMOVE_sched_clock.o = -pg 24CFLAGS_REMOVE_sched_clock.o = -pg
25CFLAGS_REMOVE_perf_event.o = -pg 25CFLAGS_REMOVE_perf_event.o = -pg
26CFLAGS_REMOVE_irq_work.o = -pg
26endif 27endif
27 28
28obj-$(CONFIG_FREEZER) += freezer.o 29obj-$(CONFIG_FREEZER) += freezer.o
@@ -101,6 +102,7 @@ obj-$(CONFIG_TRACING) += trace/
101obj-$(CONFIG_X86_DS) += trace/ 102obj-$(CONFIG_X86_DS) += trace/
102obj-$(CONFIG_RING_BUFFER) += trace/ 103obj-$(CONFIG_RING_BUFFER) += trace/
103obj-$(CONFIG_SMP) += sched_cpupri.o 104obj-$(CONFIG_SMP) += sched_cpupri.o
105obj-$(CONFIG_IRQ_WORK) += irq_work.o
104obj-$(CONFIG_PERF_EVENTS) += perf_event.o 106obj-$(CONFIG_PERF_EVENTS) += perf_event.o
105obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o 107obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
106obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o 108obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
diff --git a/kernel/exit.c b/kernel/exit.c
index 03120229db28..e2bdf37f9fde 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -149,9 +149,7 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
149{ 149{
150 struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); 150 struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
151 151
152#ifdef CONFIG_PERF_EVENTS 152 perf_event_delayed_put(tsk);
153 WARN_ON_ONCE(tsk->perf_event_ctxp);
154#endif
155 trace_sched_process_free(tsk); 153 trace_sched_process_free(tsk);
156 put_task_struct(tsk); 154 put_task_struct(tsk);
157} 155}
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index c7c2aed9e2dc..2c9120f0afca 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -113,12 +113,12 @@ static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
113 */ 113 */
114static int task_bp_pinned(struct perf_event *bp, enum bp_type_idx type) 114static int task_bp_pinned(struct perf_event *bp, enum bp_type_idx type)
115{ 115{
116 struct perf_event_context *ctx = bp->ctx; 116 struct task_struct *tsk = bp->hw.bp_target;
117 struct perf_event *iter; 117 struct perf_event *iter;
118 int count = 0; 118 int count = 0;
119 119
120 list_for_each_entry(iter, &bp_task_head, hw.bp_list) { 120 list_for_each_entry(iter, &bp_task_head, hw.bp_list) {
121 if (iter->ctx == ctx && find_slot_idx(iter) == type) 121 if (iter->hw.bp_target == tsk && find_slot_idx(iter) == type)
122 count += hw_breakpoint_weight(iter); 122 count += hw_breakpoint_weight(iter);
123 } 123 }
124 124
@@ -134,7 +134,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
134 enum bp_type_idx type) 134 enum bp_type_idx type)
135{ 135{
136 int cpu = bp->cpu; 136 int cpu = bp->cpu;
137 struct task_struct *tsk = bp->ctx->task; 137 struct task_struct *tsk = bp->hw.bp_target;
138 138
139 if (cpu >= 0) { 139 if (cpu >= 0) {
140 slots->pinned = per_cpu(nr_cpu_bp_pinned[type], cpu); 140 slots->pinned = per_cpu(nr_cpu_bp_pinned[type], cpu);
@@ -213,7 +213,7 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
213 int weight) 213 int weight)
214{ 214{
215 int cpu = bp->cpu; 215 int cpu = bp->cpu;
216 struct task_struct *tsk = bp->ctx->task; 216 struct task_struct *tsk = bp->hw.bp_target;
217 217
218 /* Pinned counter cpu profiling */ 218 /* Pinned counter cpu profiling */
219 if (!tsk) { 219 if (!tsk) {
@@ -433,8 +433,7 @@ register_user_hw_breakpoint(struct perf_event_attr *attr,
433 perf_overflow_handler_t triggered, 433 perf_overflow_handler_t triggered,
434 struct task_struct *tsk) 434 struct task_struct *tsk)
435{ 435{
436 return perf_event_create_kernel_counter(attr, -1, task_pid_vnr(tsk), 436 return perf_event_create_kernel_counter(attr, -1, tsk, triggered);
437 triggered);
438} 437}
439EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); 438EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
440 439
@@ -516,7 +515,7 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr,
516 get_online_cpus(); 515 get_online_cpus();
517 for_each_online_cpu(cpu) { 516 for_each_online_cpu(cpu) {
518 pevent = per_cpu_ptr(cpu_events, cpu); 517 pevent = per_cpu_ptr(cpu_events, cpu);
519 bp = perf_event_create_kernel_counter(attr, cpu, -1, triggered); 518 bp = perf_event_create_kernel_counter(attr, cpu, NULL, triggered);
520 519
521 *pevent = bp; 520 *pevent = bp;
522 521
@@ -566,6 +565,61 @@ static struct notifier_block hw_breakpoint_exceptions_nb = {
566 .priority = 0x7fffffff 565 .priority = 0x7fffffff
567}; 566};
568 567
568static void bp_perf_event_destroy(struct perf_event *event)
569{
570 release_bp_slot(event);
571}
572
573static int hw_breakpoint_event_init(struct perf_event *bp)
574{
575 int err;
576
577 if (bp->attr.type != PERF_TYPE_BREAKPOINT)
578 return -ENOENT;
579
580 err = register_perf_hw_breakpoint(bp);
581 if (err)
582 return err;
583
584 bp->destroy = bp_perf_event_destroy;
585
586 return 0;
587}
588
589static int hw_breakpoint_add(struct perf_event *bp, int flags)
590{
591 if (!(flags & PERF_EF_START))
592 bp->hw.state = PERF_HES_STOPPED;
593
594 return arch_install_hw_breakpoint(bp);
595}
596
597static void hw_breakpoint_del(struct perf_event *bp, int flags)
598{
599 arch_uninstall_hw_breakpoint(bp);
600}
601
602static void hw_breakpoint_start(struct perf_event *bp, int flags)
603{
604 bp->hw.state = 0;
605}
606
607static void hw_breakpoint_stop(struct perf_event *bp, int flags)
608{
609 bp->hw.state = PERF_HES_STOPPED;
610}
611
612static struct pmu perf_breakpoint = {
613 .task_ctx_nr = perf_sw_context, /* could eventually get its own */
614
615 .event_init = hw_breakpoint_event_init,
616 .add = hw_breakpoint_add,
617 .del = hw_breakpoint_del,
618 .start = hw_breakpoint_start,
619 .stop = hw_breakpoint_stop,
620 .read = hw_breakpoint_pmu_read,
621};
622
569static int __init init_hw_breakpoint(void) 623static int __init init_hw_breakpoint(void)
570{ 624{
571 unsigned int **task_bp_pinned; 625 unsigned int **task_bp_pinned;
@@ -587,6 +641,8 @@ static int __init init_hw_breakpoint(void)
587 641
588 constraints_initialized = 1; 642 constraints_initialized = 1;
589 643
644 perf_pmu_register(&perf_breakpoint);
645
590 return register_die_notifier(&hw_breakpoint_exceptions_nb); 646 return register_die_notifier(&hw_breakpoint_exceptions_nb);
591 647
592 err_alloc: 648 err_alloc:
@@ -602,8 +658,3 @@ static int __init init_hw_breakpoint(void)
602core_initcall(init_hw_breakpoint); 658core_initcall(init_hw_breakpoint);
603 659
604 660
605struct pmu perf_ops_bp = {
606 .enable = arch_install_hw_breakpoint,
607 .disable = arch_uninstall_hw_breakpoint,
608 .read = hw_breakpoint_pmu_read,
609};
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
new file mode 100644
index 000000000000..f16763ff8481
--- /dev/null
+++ b/kernel/irq_work.c
@@ -0,0 +1,164 @@
1/*
2 * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
3 *
4 * Provides a framework for enqueueing and running callbacks from hardirq
5 * context. The enqueueing is NMI-safe.
6 */
7
8#include <linux/kernel.h>
9#include <linux/module.h>
10#include <linux/irq_work.h>
11#include <linux/hardirq.h>
12
13/*
14 * An entry can be in one of four states:
15 *
16 * free NULL, 0 -> {claimed} : free to be used
17 * claimed NULL, 3 -> {pending} : claimed to be enqueued
18 * pending next, 3 -> {busy} : queued, pending callback
19 * busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed
20 *
21 * We use the lower two bits of the next pointer to keep PENDING and BUSY
22 * flags.
23 */
24
25#define IRQ_WORK_PENDING 1UL
26#define IRQ_WORK_BUSY 2UL
27#define IRQ_WORK_FLAGS 3UL
28
29static inline bool irq_work_is_set(struct irq_work *entry, int flags)
30{
31 return (unsigned long)entry->next & flags;
32}
33
34static inline struct irq_work *irq_work_next(struct irq_work *entry)
35{
36 unsigned long next = (unsigned long)entry->next;
37 next &= ~IRQ_WORK_FLAGS;
38 return (struct irq_work *)next;
39}
40
41static inline struct irq_work *next_flags(struct irq_work *entry, int flags)
42{
43 unsigned long next = (unsigned long)entry;
44 next |= flags;
45 return (struct irq_work *)next;
46}
47
48static DEFINE_PER_CPU(struct irq_work *, irq_work_list);
49
50/*
51 * Claim the entry so that no one else will poke at it.
52 */
53static bool irq_work_claim(struct irq_work *entry)
54{
55 struct irq_work *next, *nflags;
56
57 do {
58 next = entry->next;
59 if ((unsigned long)next & IRQ_WORK_PENDING)
60 return false;
61 nflags = next_flags(next, IRQ_WORK_FLAGS);
62 } while (cmpxchg(&entry->next, next, nflags) != next);
63
64 return true;
65}
66
67
68void __weak arch_irq_work_raise(void)
69{
70 /*
71 * Lame architectures will get the timer tick callback
72 */
73}
74
75/*
76 * Queue the entry and raise the IPI if needed.
77 */
78static void __irq_work_queue(struct irq_work *entry)
79{
80 struct irq_work **head, *next;
81
82 head = &get_cpu_var(irq_work_list);
83
84 do {
85 next = *head;
86 /* Can assign non-atomic because we keep the flags set. */
87 entry->next = next_flags(next, IRQ_WORK_FLAGS);
88 } while (cmpxchg(head, next, entry) != next);
89
90 /* The list was empty, raise self-interrupt to start processing. */
91 if (!irq_work_next(entry))
92 arch_irq_work_raise();
93
94 put_cpu_var(irq_work_list);
95}
96
97/*
98 * Enqueue the irq_work @entry, returns true on success, failure when the
99 * @entry was already enqueued by someone else.
100 *
101 * Can be re-enqueued while the callback is still in progress.
102 */
103bool irq_work_queue(struct irq_work *entry)
104{
105 if (!irq_work_claim(entry)) {
106 /*
107 * Already enqueued, can't do!
108 */
109 return false;
110 }
111
112 __irq_work_queue(entry);
113 return true;
114}
115EXPORT_SYMBOL_GPL(irq_work_queue);
116
117/*
118 * Run the irq_work entries on this cpu. Requires to be ran from hardirq
119 * context with local IRQs disabled.
120 */
121void irq_work_run(void)
122{
123 struct irq_work *list, **head;
124
125 head = &__get_cpu_var(irq_work_list);
126 if (*head == NULL)
127 return;
128
129 BUG_ON(!in_irq());
130 BUG_ON(!irqs_disabled());
131
132 list = xchg(head, NULL);
133 while (list != NULL) {
134 struct irq_work *entry = list;
135
136 list = irq_work_next(list);
137
138 /*
139 * Clear the PENDING bit, after this point the @entry
140 * can be re-used.
141 */
142 entry->next = next_flags(NULL, IRQ_WORK_BUSY);
143 entry->func(entry);
144 /*
145 * Clear the BUSY bit and return to the free state if
146 * no-one else claimed it meanwhile.
147 */
148 cmpxchg(&entry->next, next_flags(NULL, IRQ_WORK_BUSY), NULL);
149 }
150}
151EXPORT_SYMBOL_GPL(irq_work_run);
152
153/*
154 * Synchronize against the irq_work @entry, ensures the entry is not
155 * currently in use.
156 */
157void irq_work_sync(struct irq_work *entry)
158{
159 WARN_ON_ONCE(irqs_disabled());
160
161 while (irq_work_is_set(entry, IRQ_WORK_BUSY))
162 cpu_relax();
163}
164EXPORT_SYMBOL_GPL(irq_work_sync);
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
new file mode 100644
index 000000000000..7be868bf25c6
--- /dev/null
+++ b/kernel/jump_label.c
@@ -0,0 +1,429 @@
1/*
2 * jump label support
3 *
4 * Copyright (C) 2009 Jason Baron <jbaron@redhat.com>
5 *
6 */
7#include <linux/jump_label.h>
8#include <linux/memory.h>
9#include <linux/uaccess.h>
10#include <linux/module.h>
11#include <linux/list.h>
12#include <linux/jhash.h>
13#include <linux/slab.h>
14#include <linux/sort.h>
15#include <linux/err.h>
16
17#ifdef HAVE_JUMP_LABEL
18
19#define JUMP_LABEL_HASH_BITS 6
20#define JUMP_LABEL_TABLE_SIZE (1 << JUMP_LABEL_HASH_BITS)
21static struct hlist_head jump_label_table[JUMP_LABEL_TABLE_SIZE];
22
23/* mutex to protect coming/going of the the jump_label table */
24static DEFINE_MUTEX(jump_label_mutex);
25
26struct jump_label_entry {
27 struct hlist_node hlist;
28 struct jump_entry *table;
29 int nr_entries;
30 /* hang modules off here */
31 struct hlist_head modules;
32 unsigned long key;
33};
34
35struct jump_label_module_entry {
36 struct hlist_node hlist;
37 struct jump_entry *table;
38 int nr_entries;
39 struct module *mod;
40};
41
42static int jump_label_cmp(const void *a, const void *b)
43{
44 const struct jump_entry *jea = a;
45 const struct jump_entry *jeb = b;
46
47 if (jea->key < jeb->key)
48 return -1;
49
50 if (jea->key > jeb->key)
51 return 1;
52
53 return 0;
54}
55
56static void
57sort_jump_label_entries(struct jump_entry *start, struct jump_entry *stop)
58{
59 unsigned long size;
60
61 size = (((unsigned long)stop - (unsigned long)start)
62 / sizeof(struct jump_entry));
63 sort(start, size, sizeof(struct jump_entry), jump_label_cmp, NULL);
64}
65
66static struct jump_label_entry *get_jump_label_entry(jump_label_t key)
67{
68 struct hlist_head *head;
69 struct hlist_node *node;
70 struct jump_label_entry *e;
71 u32 hash = jhash((void *)&key, sizeof(jump_label_t), 0);
72
73 head = &jump_label_table[hash & (JUMP_LABEL_TABLE_SIZE - 1)];
74 hlist_for_each_entry(e, node, head, hlist) {
75 if (key == e->key)
76 return e;
77 }
78 return NULL;
79}
80
81static struct jump_label_entry *
82add_jump_label_entry(jump_label_t key, int nr_entries, struct jump_entry *table)
83{
84 struct hlist_head *head;
85 struct jump_label_entry *e;
86 u32 hash;
87
88 e = get_jump_label_entry(key);
89 if (e)
90 return ERR_PTR(-EEXIST);
91
92 e = kmalloc(sizeof(struct jump_label_entry), GFP_KERNEL);
93 if (!e)
94 return ERR_PTR(-ENOMEM);
95
96 hash = jhash((void *)&key, sizeof(jump_label_t), 0);
97 head = &jump_label_table[hash & (JUMP_LABEL_TABLE_SIZE - 1)];
98 e->key = key;
99 e->table = table;
100 e->nr_entries = nr_entries;
101 INIT_HLIST_HEAD(&(e->modules));
102 hlist_add_head(&e->hlist, head);
103 return e;
104}
105
106static int
107build_jump_label_hashtable(struct jump_entry *start, struct jump_entry *stop)
108{
109 struct jump_entry *iter, *iter_begin;
110 struct jump_label_entry *entry;
111 int count;
112
113 sort_jump_label_entries(start, stop);
114 iter = start;
115 while (iter < stop) {
116 entry = get_jump_label_entry(iter->key);
117 if (!entry) {
118 iter_begin = iter;
119 count = 0;
120 while ((iter < stop) &&
121 (iter->key == iter_begin->key)) {
122 iter++;
123 count++;
124 }
125 entry = add_jump_label_entry(iter_begin->key,
126 count, iter_begin);
127 if (IS_ERR(entry))
128 return PTR_ERR(entry);
129 } else {
130 WARN_ONCE(1, KERN_ERR "build_jump_hashtable: unexpected entry!\n");
131 return -1;
132 }
133 }
134 return 0;
135}
136
137/***
138 * jump_label_update - update jump label text
139 * @key - key value associated with a a jump label
140 * @type - enum set to JUMP_LABEL_ENABLE or JUMP_LABEL_DISABLE
141 *
142 * Will enable/disable the jump for jump label @key, depending on the
143 * value of @type.
144 *
145 */
146
147void jump_label_update(unsigned long key, enum jump_label_type type)
148{
149 struct jump_entry *iter;
150 struct jump_label_entry *entry;
151 struct hlist_node *module_node;
152 struct jump_label_module_entry *e_module;
153 int count;
154
155 mutex_lock(&jump_label_mutex);
156 entry = get_jump_label_entry((jump_label_t)key);
157 if (entry) {
158 count = entry->nr_entries;
159 iter = entry->table;
160 while (count--) {
161 if (kernel_text_address(iter->code))
162 arch_jump_label_transform(iter, type);
163 iter++;
164 }
165 /* eanble/disable jump labels in modules */
166 hlist_for_each_entry(e_module, module_node, &(entry->modules),
167 hlist) {
168 count = e_module->nr_entries;
169 iter = e_module->table;
170 while (count--) {
171 if (kernel_text_address(iter->code))
172 arch_jump_label_transform(iter, type);
173 iter++;
174 }
175 }
176 }
177 mutex_unlock(&jump_label_mutex);
178}
179
180static int addr_conflict(struct jump_entry *entry, void *start, void *end)
181{
182 if (entry->code <= (unsigned long)end &&
183 entry->code + JUMP_LABEL_NOP_SIZE > (unsigned long)start)
184 return 1;
185
186 return 0;
187}
188
189#ifdef CONFIG_MODULES
190
191static int module_conflict(void *start, void *end)
192{
193 struct hlist_head *head;
194 struct hlist_node *node, *node_next, *module_node, *module_node_next;
195 struct jump_label_entry *e;
196 struct jump_label_module_entry *e_module;
197 struct jump_entry *iter;
198 int i, count;
199 int conflict = 0;
200
201 for (i = 0; i < JUMP_LABEL_TABLE_SIZE; i++) {
202 head = &jump_label_table[i];
203 hlist_for_each_entry_safe(e, node, node_next, head, hlist) {
204 hlist_for_each_entry_safe(e_module, module_node,
205 module_node_next,
206 &(e->modules), hlist) {
207 count = e_module->nr_entries;
208 iter = e_module->table;
209 while (count--) {
210 if (addr_conflict(iter, start, end)) {
211 conflict = 1;
212 goto out;
213 }
214 iter++;
215 }
216 }
217 }
218 }
219out:
220 return conflict;
221}
222
223#endif
224
225/***
226 * jump_label_text_reserved - check if addr range is reserved
227 * @start: start text addr
228 * @end: end text addr
229 *
230 * checks if the text addr located between @start and @end
231 * overlaps with any of the jump label patch addresses. Code
232 * that wants to modify kernel text should first verify that
233 * it does not overlap with any of the jump label addresses.
234 *
235 * returns 1 if there is an overlap, 0 otherwise
236 */
237int jump_label_text_reserved(void *start, void *end)
238{
239 struct jump_entry *iter;
240 struct jump_entry *iter_start = __start___jump_table;
241 struct jump_entry *iter_stop = __start___jump_table;
242 int conflict = 0;
243
244 mutex_lock(&jump_label_mutex);
245 iter = iter_start;
246 while (iter < iter_stop) {
247 if (addr_conflict(iter, start, end)) {
248 conflict = 1;
249 goto out;
250 }
251 iter++;
252 }
253
254 /* now check modules */
255#ifdef CONFIG_MODULES
256 conflict = module_conflict(start, end);
257#endif
258out:
259 mutex_unlock(&jump_label_mutex);
260 return conflict;
261}
262
263static __init int init_jump_label(void)
264{
265 int ret;
266 struct jump_entry *iter_start = __start___jump_table;
267 struct jump_entry *iter_stop = __stop___jump_table;
268 struct jump_entry *iter;
269
270 mutex_lock(&jump_label_mutex);
271 ret = build_jump_label_hashtable(__start___jump_table,
272 __stop___jump_table);
273 iter = iter_start;
274 while (iter < iter_stop) {
275 arch_jump_label_text_poke_early(iter->code);
276 iter++;
277 }
278 mutex_unlock(&jump_label_mutex);
279 return ret;
280}
281early_initcall(init_jump_label);
282
283#ifdef CONFIG_MODULES
284
285static struct jump_label_module_entry *
286add_jump_label_module_entry(struct jump_label_entry *entry,
287 struct jump_entry *iter_begin,
288 int count, struct module *mod)
289{
290 struct jump_label_module_entry *e;
291
292 e = kmalloc(sizeof(struct jump_label_module_entry), GFP_KERNEL);
293 if (!e)
294 return ERR_PTR(-ENOMEM);
295 e->mod = mod;
296 e->nr_entries = count;
297 e->table = iter_begin;
298 hlist_add_head(&e->hlist, &entry->modules);
299 return e;
300}
301
302static int add_jump_label_module(struct module *mod)
303{
304 struct jump_entry *iter, *iter_begin;
305 struct jump_label_entry *entry;
306 struct jump_label_module_entry *module_entry;
307 int count;
308
309 /* if the module doesn't have jump label entries, just return */
310 if (!mod->num_jump_entries)
311 return 0;
312
313 sort_jump_label_entries(mod->jump_entries,
314 mod->jump_entries + mod->num_jump_entries);
315 iter = mod->jump_entries;
316 while (iter < mod->jump_entries + mod->num_jump_entries) {
317 entry = get_jump_label_entry(iter->key);
318 iter_begin = iter;
319 count = 0;
320 while ((iter < mod->jump_entries + mod->num_jump_entries) &&
321 (iter->key == iter_begin->key)) {
322 iter++;
323 count++;
324 }
325 if (!entry) {
326 entry = add_jump_label_entry(iter_begin->key, 0, NULL);
327 if (IS_ERR(entry))
328 return PTR_ERR(entry);
329 }
330 module_entry = add_jump_label_module_entry(entry, iter_begin,
331 count, mod);
332 if (IS_ERR(module_entry))
333 return PTR_ERR(module_entry);
334 }
335 return 0;
336}
337
338static void remove_jump_label_module(struct module *mod)
339{
340 struct hlist_head *head;
341 struct hlist_node *node, *node_next, *module_node, *module_node_next;
342 struct jump_label_entry *e;
343 struct jump_label_module_entry *e_module;
344 int i;
345
346 /* if the module doesn't have jump label entries, just return */
347 if (!mod->num_jump_entries)
348 return;
349
350 for (i = 0; i < JUMP_LABEL_TABLE_SIZE; i++) {
351 head = &jump_label_table[i];
352 hlist_for_each_entry_safe(e, node, node_next, head, hlist) {
353 hlist_for_each_entry_safe(e_module, module_node,
354 module_node_next,
355 &(e->modules), hlist) {
356 if (e_module->mod == mod) {
357 hlist_del(&e_module->hlist);
358 kfree(e_module);
359 }
360 }
361 if (hlist_empty(&e->modules) && (e->nr_entries == 0)) {
362 hlist_del(&e->hlist);
363 kfree(e);
364 }
365 }
366 }
367}
368
369static int
370jump_label_module_notify(struct notifier_block *self, unsigned long val,
371 void *data)
372{
373 struct module *mod = data;
374 int ret = 0;
375
376 switch (val) {
377 case MODULE_STATE_COMING:
378 mutex_lock(&jump_label_mutex);
379 ret = add_jump_label_module(mod);
380 if (ret)
381 remove_jump_label_module(mod);
382 mutex_unlock(&jump_label_mutex);
383 break;
384 case MODULE_STATE_GOING:
385 mutex_lock(&jump_label_mutex);
386 remove_jump_label_module(mod);
387 mutex_unlock(&jump_label_mutex);
388 break;
389 }
390 return ret;
391}
392
393/***
394 * apply_jump_label_nops - patch module jump labels with arch_get_jump_label_nop()
395 * @mod: module to patch
396 *
397 * Allow for run-time selection of the optimal nops. Before the module
398 * loads patch these with arch_get_jump_label_nop(), which is specified by
399 * the arch specific jump label code.
400 */
401void jump_label_apply_nops(struct module *mod)
402{
403 struct jump_entry *iter;
404
405 /* if the module doesn't have jump label entries, just return */
406 if (!mod->num_jump_entries)
407 return;
408
409 iter = mod->jump_entries;
410 while (iter < mod->jump_entries + mod->num_jump_entries) {
411 arch_jump_label_text_poke_early(iter->code);
412 iter++;
413 }
414}
415
416struct notifier_block jump_label_module_nb = {
417 .notifier_call = jump_label_module_notify,
418 .priority = 0,
419};
420
421static __init int init_jump_label_module(void)
422{
423 return register_module_notifier(&jump_label_module_nb);
424}
425early_initcall(init_jump_label_module);
426
427#endif /* CONFIG_MODULES */
428
429#endif
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 282035f3ae96..ec4210c6501e 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -47,6 +47,7 @@
47#include <linux/memory.h> 47#include <linux/memory.h>
48#include <linux/ftrace.h> 48#include <linux/ftrace.h>
49#include <linux/cpu.h> 49#include <linux/cpu.h>
50#include <linux/jump_label.h>
50 51
51#include <asm-generic/sections.h> 52#include <asm-generic/sections.h>
52#include <asm/cacheflush.h> 53#include <asm/cacheflush.h>
@@ -399,7 +400,7 @@ static inline int kprobe_optready(struct kprobe *p)
399 * Return an optimized kprobe whose optimizing code replaces 400 * Return an optimized kprobe whose optimizing code replaces
400 * instructions including addr (exclude breakpoint). 401 * instructions including addr (exclude breakpoint).
401 */ 402 */
402struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr) 403static struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr)
403{ 404{
404 int i; 405 int i;
405 struct kprobe *p = NULL; 406 struct kprobe *p = NULL;
@@ -831,6 +832,7 @@ void __kprobes recycle_rp_inst(struct kretprobe_instance *ri,
831 832
832void __kprobes kretprobe_hash_lock(struct task_struct *tsk, 833void __kprobes kretprobe_hash_lock(struct task_struct *tsk,
833 struct hlist_head **head, unsigned long *flags) 834 struct hlist_head **head, unsigned long *flags)
835__acquires(hlist_lock)
834{ 836{
835 unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); 837 unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
836 spinlock_t *hlist_lock; 838 spinlock_t *hlist_lock;
@@ -842,6 +844,7 @@ void __kprobes kretprobe_hash_lock(struct task_struct *tsk,
842 844
843static void __kprobes kretprobe_table_lock(unsigned long hash, 845static void __kprobes kretprobe_table_lock(unsigned long hash,
844 unsigned long *flags) 846 unsigned long *flags)
847__acquires(hlist_lock)
845{ 848{
846 spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); 849 spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
847 spin_lock_irqsave(hlist_lock, *flags); 850 spin_lock_irqsave(hlist_lock, *flags);
@@ -849,6 +852,7 @@ static void __kprobes kretprobe_table_lock(unsigned long hash,
849 852
850void __kprobes kretprobe_hash_unlock(struct task_struct *tsk, 853void __kprobes kretprobe_hash_unlock(struct task_struct *tsk,
851 unsigned long *flags) 854 unsigned long *flags)
855__releases(hlist_lock)
852{ 856{
853 unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); 857 unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
854 spinlock_t *hlist_lock; 858 spinlock_t *hlist_lock;
@@ -857,7 +861,9 @@ void __kprobes kretprobe_hash_unlock(struct task_struct *tsk,
857 spin_unlock_irqrestore(hlist_lock, *flags); 861 spin_unlock_irqrestore(hlist_lock, *flags);
858} 862}
859 863
860void __kprobes kretprobe_table_unlock(unsigned long hash, unsigned long *flags) 864static void __kprobes kretprobe_table_unlock(unsigned long hash,
865 unsigned long *flags)
866__releases(hlist_lock)
861{ 867{
862 spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); 868 spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
863 spin_unlock_irqrestore(hlist_lock, *flags); 869 spin_unlock_irqrestore(hlist_lock, *flags);
@@ -1141,7 +1147,8 @@ int __kprobes register_kprobe(struct kprobe *p)
1141 preempt_disable(); 1147 preempt_disable();
1142 if (!kernel_text_address((unsigned long) p->addr) || 1148 if (!kernel_text_address((unsigned long) p->addr) ||
1143 in_kprobes_functions((unsigned long) p->addr) || 1149 in_kprobes_functions((unsigned long) p->addr) ||
1144 ftrace_text_reserved(p->addr, p->addr)) { 1150 ftrace_text_reserved(p->addr, p->addr) ||
1151 jump_label_text_reserved(p->addr, p->addr)) {
1145 preempt_enable(); 1152 preempt_enable();
1146 return -EINVAL; 1153 return -EINVAL;
1147 } 1154 }
@@ -1339,18 +1346,19 @@ int __kprobes register_jprobes(struct jprobe **jps, int num)
1339 if (num <= 0) 1346 if (num <= 0)
1340 return -EINVAL; 1347 return -EINVAL;
1341 for (i = 0; i < num; i++) { 1348 for (i = 0; i < num; i++) {
1342 unsigned long addr; 1349 unsigned long addr, offset;
1343 jp = jps[i]; 1350 jp = jps[i];
1344 addr = arch_deref_entry_point(jp->entry); 1351 addr = arch_deref_entry_point(jp->entry);
1345 1352
1346 if (!kernel_text_address(addr)) 1353 /* Verify probepoint is a function entry point */
1347 ret = -EINVAL; 1354 if (kallsyms_lookup_size_offset(addr, NULL, &offset) &&
1348 else { 1355 offset == 0) {
1349 /* Todo: Verify probepoint is a function entry point */
1350 jp->kp.pre_handler = setjmp_pre_handler; 1356 jp->kp.pre_handler = setjmp_pre_handler;
1351 jp->kp.break_handler = longjmp_break_handler; 1357 jp->kp.break_handler = longjmp_break_handler;
1352 ret = register_kprobe(&jp->kp); 1358 ret = register_kprobe(&jp->kp);
1353 } 1359 } else
1360 ret = -EINVAL;
1361
1354 if (ret < 0) { 1362 if (ret < 0) {
1355 if (i > 0) 1363 if (i > 0)
1356 unregister_jprobes(jps, i); 1364 unregister_jprobes(jps, i);
diff --git a/kernel/module.c b/kernel/module.c
index ccd641991842..2df46301a7a4 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -55,6 +55,7 @@
55#include <linux/async.h> 55#include <linux/async.h>
56#include <linux/percpu.h> 56#include <linux/percpu.h>
57#include <linux/kmemleak.h> 57#include <linux/kmemleak.h>
58#include <linux/jump_label.h>
58 59
59#define CREATE_TRACE_POINTS 60#define CREATE_TRACE_POINTS
60#include <trace/events/module.h> 61#include <trace/events/module.h>
@@ -2309,6 +2310,11 @@ static void find_module_sections(struct module *mod, struct load_info *info)
2309 sizeof(*mod->tracepoints), 2310 sizeof(*mod->tracepoints),
2310 &mod->num_tracepoints); 2311 &mod->num_tracepoints);
2311#endif 2312#endif
2313#ifdef HAVE_JUMP_LABEL
2314 mod->jump_entries = section_objs(info, "__jump_table",
2315 sizeof(*mod->jump_entries),
2316 &mod->num_jump_entries);
2317#endif
2312#ifdef CONFIG_EVENT_TRACING 2318#ifdef CONFIG_EVENT_TRACING
2313 mod->trace_events = section_objs(info, "_ftrace_events", 2319 mod->trace_events = section_objs(info, "_ftrace_events",
2314 sizeof(*mod->trace_events), 2320 sizeof(*mod->trace_events),
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index b98bed3d8182..f309e8014c78 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -31,24 +31,18 @@
31#include <linux/kernel_stat.h> 31#include <linux/kernel_stat.h>
32#include <linux/perf_event.h> 32#include <linux/perf_event.h>
33#include <linux/ftrace_event.h> 33#include <linux/ftrace_event.h>
34#include <linux/hw_breakpoint.h>
35 34
36#include <asm/irq_regs.h> 35#include <asm/irq_regs.h>
37 36
38/* 37atomic_t perf_task_events __read_mostly;
39 * Each CPU has a list of per CPU events:
40 */
41static DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context);
42
43int perf_max_events __read_mostly = 1;
44static int perf_reserved_percpu __read_mostly;
45static int perf_overcommit __read_mostly = 1;
46
47static atomic_t nr_events __read_mostly;
48static atomic_t nr_mmap_events __read_mostly; 38static atomic_t nr_mmap_events __read_mostly;
49static atomic_t nr_comm_events __read_mostly; 39static atomic_t nr_comm_events __read_mostly;
50static atomic_t nr_task_events __read_mostly; 40static atomic_t nr_task_events __read_mostly;
51 41
42static LIST_HEAD(pmus);
43static DEFINE_MUTEX(pmus_lock);
44static struct srcu_struct pmus_srcu;
45
52/* 46/*
53 * perf event paranoia level: 47 * perf event paranoia level:
54 * -1 - not paranoid at all 48 * -1 - not paranoid at all
@@ -67,36 +61,43 @@ int sysctl_perf_event_sample_rate __read_mostly = 100000;
67 61
68static atomic64_t perf_event_id; 62static atomic64_t perf_event_id;
69 63
70/* 64void __weak perf_event_print_debug(void) { }
71 * Lock for (sysadmin-configurable) event reservations:
72 */
73static DEFINE_SPINLOCK(perf_resource_lock);
74 65
75/* 66extern __weak const char *perf_pmu_name(void)
76 * Architecture provided APIs - weak aliases:
77 */
78extern __weak const struct pmu *hw_perf_event_init(struct perf_event *event)
79{ 67{
80 return NULL; 68 return "pmu";
81} 69}
82 70
83void __weak hw_perf_disable(void) { barrier(); } 71void perf_pmu_disable(struct pmu *pmu)
84void __weak hw_perf_enable(void) { barrier(); } 72{
85 73 int *count = this_cpu_ptr(pmu->pmu_disable_count);
86void __weak perf_event_print_debug(void) { } 74 if (!(*count)++)
87 75 pmu->pmu_disable(pmu);
88static DEFINE_PER_CPU(int, perf_disable_count); 76}
89 77
90void perf_disable(void) 78void perf_pmu_enable(struct pmu *pmu)
91{ 79{
92 if (!__get_cpu_var(perf_disable_count)++) 80 int *count = this_cpu_ptr(pmu->pmu_disable_count);
93 hw_perf_disable(); 81 if (!--(*count))
82 pmu->pmu_enable(pmu);
94} 83}
95 84
96void perf_enable(void) 85static DEFINE_PER_CPU(struct list_head, rotation_list);
86
87/*
88 * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized
89 * because they're strictly cpu affine and rotate_start is called with IRQs
90 * disabled, while rotate_context is called from IRQ context.
91 */
92static void perf_pmu_rotate_start(struct pmu *pmu)
97{ 93{
98 if (!--__get_cpu_var(perf_disable_count)) 94 struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
99 hw_perf_enable(); 95 struct list_head *head = &__get_cpu_var(rotation_list);
96
97 WARN_ON(!irqs_disabled());
98
99 if (list_empty(&cpuctx->rotation_list))
100 list_add(&cpuctx->rotation_list, head);
100} 101}
101 102
102static void get_ctx(struct perf_event_context *ctx) 103static void get_ctx(struct perf_event_context *ctx)
@@ -151,13 +152,13 @@ static u64 primary_event_id(struct perf_event *event)
151 * the context could get moved to another task. 152 * the context could get moved to another task.
152 */ 153 */
153static struct perf_event_context * 154static struct perf_event_context *
154perf_lock_task_context(struct task_struct *task, unsigned long *flags) 155perf_lock_task_context(struct task_struct *task, int ctxn, unsigned long *flags)
155{ 156{
156 struct perf_event_context *ctx; 157 struct perf_event_context *ctx;
157 158
158 rcu_read_lock(); 159 rcu_read_lock();
159 retry: 160retry:
160 ctx = rcu_dereference(task->perf_event_ctxp); 161 ctx = rcu_dereference(task->perf_event_ctxp[ctxn]);
161 if (ctx) { 162 if (ctx) {
162 /* 163 /*
163 * If this context is a clone of another, it might 164 * If this context is a clone of another, it might
@@ -170,7 +171,7 @@ perf_lock_task_context(struct task_struct *task, unsigned long *flags)
170 * can't get swapped on us any more. 171 * can't get swapped on us any more.
171 */ 172 */
172 raw_spin_lock_irqsave(&ctx->lock, *flags); 173 raw_spin_lock_irqsave(&ctx->lock, *flags);
173 if (ctx != rcu_dereference(task->perf_event_ctxp)) { 174 if (ctx != rcu_dereference(task->perf_event_ctxp[ctxn])) {
174 raw_spin_unlock_irqrestore(&ctx->lock, *flags); 175 raw_spin_unlock_irqrestore(&ctx->lock, *flags);
175 goto retry; 176 goto retry;
176 } 177 }
@@ -189,12 +190,13 @@ perf_lock_task_context(struct task_struct *task, unsigned long *flags)
189 * can't get swapped to another task. This also increments its 190 * can't get swapped to another task. This also increments its
190 * reference count so that the context can't get freed. 191 * reference count so that the context can't get freed.
191 */ 192 */
192static struct perf_event_context *perf_pin_task_context(struct task_struct *task) 193static struct perf_event_context *
194perf_pin_task_context(struct task_struct *task, int ctxn)
193{ 195{
194 struct perf_event_context *ctx; 196 struct perf_event_context *ctx;
195 unsigned long flags; 197 unsigned long flags;
196 198
197 ctx = perf_lock_task_context(task, &flags); 199 ctx = perf_lock_task_context(task, ctxn, &flags);
198 if (ctx) { 200 if (ctx) {
199 ++ctx->pin_count; 201 ++ctx->pin_count;
200 raw_spin_unlock_irqrestore(&ctx->lock, flags); 202 raw_spin_unlock_irqrestore(&ctx->lock, flags);
@@ -302,6 +304,8 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
302 } 304 }
303 305
304 list_add_rcu(&event->event_entry, &ctx->event_list); 306 list_add_rcu(&event->event_entry, &ctx->event_list);
307 if (!ctx->nr_events)
308 perf_pmu_rotate_start(ctx->pmu);
305 ctx->nr_events++; 309 ctx->nr_events++;
306 if (event->attr.inherit_stat) 310 if (event->attr.inherit_stat)
307 ctx->nr_stat++; 311 ctx->nr_stat++;
@@ -311,7 +315,12 @@ static void perf_group_attach(struct perf_event *event)
311{ 315{
312 struct perf_event *group_leader = event->group_leader; 316 struct perf_event *group_leader = event->group_leader;
313 317
314 WARN_ON_ONCE(event->attach_state & PERF_ATTACH_GROUP); 318 /*
319 * We can have double attach due to group movement in perf_event_open.
320 */
321 if (event->attach_state & PERF_ATTACH_GROUP)
322 return;
323
315 event->attach_state |= PERF_ATTACH_GROUP; 324 event->attach_state |= PERF_ATTACH_GROUP;
316 325
317 if (group_leader == event) 326 if (group_leader == event)
@@ -408,8 +417,8 @@ event_filter_match(struct perf_event *event)
408 return event->cpu == -1 || event->cpu == smp_processor_id(); 417 return event->cpu == -1 || event->cpu == smp_processor_id();
409} 418}
410 419
411static void 420static int
412event_sched_out(struct perf_event *event, 421__event_sched_out(struct perf_event *event,
413 struct perf_cpu_context *cpuctx, 422 struct perf_cpu_context *cpuctx,
414 struct perf_event_context *ctx) 423 struct perf_event_context *ctx)
415{ 424{
@@ -428,15 +437,14 @@ event_sched_out(struct perf_event *event,
428 } 437 }
429 438
430 if (event->state != PERF_EVENT_STATE_ACTIVE) 439 if (event->state != PERF_EVENT_STATE_ACTIVE)
431 return; 440 return 0;
432 441
433 event->state = PERF_EVENT_STATE_INACTIVE; 442 event->state = PERF_EVENT_STATE_INACTIVE;
434 if (event->pending_disable) { 443 if (event->pending_disable) {
435 event->pending_disable = 0; 444 event->pending_disable = 0;
436 event->state = PERF_EVENT_STATE_OFF; 445 event->state = PERF_EVENT_STATE_OFF;
437 } 446 }
438 event->tstamp_stopped = ctx->time; 447 event->pmu->del(event, 0);
439 event->pmu->disable(event);
440 event->oncpu = -1; 448 event->oncpu = -1;
441 449
442 if (!is_software_event(event)) 450 if (!is_software_event(event))
@@ -444,6 +452,19 @@ event_sched_out(struct perf_event *event,
444 ctx->nr_active--; 452 ctx->nr_active--;
445 if (event->attr.exclusive || !cpuctx->active_oncpu) 453 if (event->attr.exclusive || !cpuctx->active_oncpu)
446 cpuctx->exclusive = 0; 454 cpuctx->exclusive = 0;
455 return 1;
456}
457
458static void
459event_sched_out(struct perf_event *event,
460 struct perf_cpu_context *cpuctx,
461 struct perf_event_context *ctx)
462{
463 int ret;
464
465 ret = __event_sched_out(event, cpuctx, ctx);
466 if (ret)
467 event->tstamp_stopped = ctx->time;
447} 468}
448 469
449static void 470static void
@@ -466,6 +487,12 @@ group_sched_out(struct perf_event *group_event,
466 cpuctx->exclusive = 0; 487 cpuctx->exclusive = 0;
467} 488}
468 489
490static inline struct perf_cpu_context *
491__get_cpu_context(struct perf_event_context *ctx)
492{
493 return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
494}
495
469/* 496/*
470 * Cross CPU call to remove a performance event 497 * Cross CPU call to remove a performance event
471 * 498 *
@@ -474,9 +501,9 @@ group_sched_out(struct perf_event *group_event,
474 */ 501 */
475static void __perf_event_remove_from_context(void *info) 502static void __perf_event_remove_from_context(void *info)
476{ 503{
477 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
478 struct perf_event *event = info; 504 struct perf_event *event = info;
479 struct perf_event_context *ctx = event->ctx; 505 struct perf_event_context *ctx = event->ctx;
506 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
480 507
481 /* 508 /*
482 * If this is a task context, we need to check whether it is 509 * If this is a task context, we need to check whether it is
@@ -487,27 +514,11 @@ static void __perf_event_remove_from_context(void *info)
487 return; 514 return;
488 515
489 raw_spin_lock(&ctx->lock); 516 raw_spin_lock(&ctx->lock);
490 /*
491 * Protect the list operation against NMI by disabling the
492 * events on a global level.
493 */
494 perf_disable();
495 517
496 event_sched_out(event, cpuctx, ctx); 518 event_sched_out(event, cpuctx, ctx);
497 519
498 list_del_event(event, ctx); 520 list_del_event(event, ctx);
499 521
500 if (!ctx->task) {
501 /*
502 * Allow more per task events with respect to the
503 * reservation:
504 */
505 cpuctx->max_pertask =
506 min(perf_max_events - ctx->nr_events,
507 perf_max_events - perf_reserved_percpu);
508 }
509
510 perf_enable();
511 raw_spin_unlock(&ctx->lock); 522 raw_spin_unlock(&ctx->lock);
512} 523}
513 524
@@ -572,8 +583,8 @@ retry:
572static void __perf_event_disable(void *info) 583static void __perf_event_disable(void *info)
573{ 584{
574 struct perf_event *event = info; 585 struct perf_event *event = info;
575 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
576 struct perf_event_context *ctx = event->ctx; 586 struct perf_event_context *ctx = event->ctx;
587 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
577 588
578 /* 589 /*
579 * If this is a per-task event, need to check whether this 590 * If this is a per-task event, need to check whether this
@@ -628,7 +639,7 @@ void perf_event_disable(struct perf_event *event)
628 return; 639 return;
629 } 640 }
630 641
631 retry: 642retry:
632 task_oncpu_function_call(task, __perf_event_disable, event); 643 task_oncpu_function_call(task, __perf_event_disable, event);
633 644
634 raw_spin_lock_irq(&ctx->lock); 645 raw_spin_lock_irq(&ctx->lock);
@@ -653,7 +664,7 @@ void perf_event_disable(struct perf_event *event)
653} 664}
654 665
655static int 666static int
656event_sched_in(struct perf_event *event, 667__event_sched_in(struct perf_event *event,
657 struct perf_cpu_context *cpuctx, 668 struct perf_cpu_context *cpuctx,
658 struct perf_event_context *ctx) 669 struct perf_event_context *ctx)
659{ 670{
@@ -667,14 +678,12 @@ event_sched_in(struct perf_event *event,
667 */ 678 */
668 smp_wmb(); 679 smp_wmb();
669 680
670 if (event->pmu->enable(event)) { 681 if (event->pmu->add(event, PERF_EF_START)) {
671 event->state = PERF_EVENT_STATE_INACTIVE; 682 event->state = PERF_EVENT_STATE_INACTIVE;
672 event->oncpu = -1; 683 event->oncpu = -1;
673 return -EAGAIN; 684 return -EAGAIN;
674 } 685 }
675 686
676 event->tstamp_running += ctx->time - event->tstamp_stopped;
677
678 if (!is_software_event(event)) 687 if (!is_software_event(event))
679 cpuctx->active_oncpu++; 688 cpuctx->active_oncpu++;
680 ctx->nr_active++; 689 ctx->nr_active++;
@@ -685,28 +694,56 @@ event_sched_in(struct perf_event *event,
685 return 0; 694 return 0;
686} 695}
687 696
697static inline int
698event_sched_in(struct perf_event *event,
699 struct perf_cpu_context *cpuctx,
700 struct perf_event_context *ctx)
701{
702 int ret = __event_sched_in(event, cpuctx, ctx);
703 if (ret)
704 return ret;
705 event->tstamp_running += ctx->time - event->tstamp_stopped;
706 return 0;
707}
708
709static void
710group_commit_event_sched_in(struct perf_event *group_event,
711 struct perf_cpu_context *cpuctx,
712 struct perf_event_context *ctx)
713{
714 struct perf_event *event;
715 u64 now = ctx->time;
716
717 group_event->tstamp_running += now - group_event->tstamp_stopped;
718 /*
719 * Schedule in siblings as one group (if any):
720 */
721 list_for_each_entry(event, &group_event->sibling_list, group_entry) {
722 event->tstamp_running += now - event->tstamp_stopped;
723 }
724}
725
688static int 726static int
689group_sched_in(struct perf_event *group_event, 727group_sched_in(struct perf_event *group_event,
690 struct perf_cpu_context *cpuctx, 728 struct perf_cpu_context *cpuctx,
691 struct perf_event_context *ctx) 729 struct perf_event_context *ctx)
692{ 730{
693 struct perf_event *event, *partial_group = NULL; 731 struct perf_event *event, *partial_group = NULL;
694 const struct pmu *pmu = group_event->pmu; 732 struct pmu *pmu = group_event->pmu;
695 bool txn = false;
696 733
697 if (group_event->state == PERF_EVENT_STATE_OFF) 734 if (group_event->state == PERF_EVENT_STATE_OFF)
698 return 0; 735 return 0;
699 736
700 /* Check if group transaction availabe */ 737 pmu->start_txn(pmu);
701 if (pmu->start_txn)
702 txn = true;
703 738
704 if (txn) 739 /*
705 pmu->start_txn(pmu); 740 * use __event_sched_in() to delay updating tstamp_running
706 741 * until the transaction is committed. In case of failure
707 if (event_sched_in(group_event, cpuctx, ctx)) { 742 * we will keep an unmodified tstamp_running which is a
708 if (txn) 743 * requirement to get correct timing information
709 pmu->cancel_txn(pmu); 744 */
745 if (__event_sched_in(group_event, cpuctx, ctx)) {
746 pmu->cancel_txn(pmu);
710 return -EAGAIN; 747 return -EAGAIN;
711 } 748 }
712 749
@@ -714,29 +751,33 @@ group_sched_in(struct perf_event *group_event,
714 * Schedule in siblings as one group (if any): 751 * Schedule in siblings as one group (if any):
715 */ 752 */
716 list_for_each_entry(event, &group_event->sibling_list, group_entry) { 753 list_for_each_entry(event, &group_event->sibling_list, group_entry) {
717 if (event_sched_in(event, cpuctx, ctx)) { 754 if (__event_sched_in(event, cpuctx, ctx)) {
718 partial_group = event; 755 partial_group = event;
719 goto group_error; 756 goto group_error;
720 } 757 }
721 } 758 }
722 759
723 if (!txn || !pmu->commit_txn(pmu)) 760 if (!pmu->commit_txn(pmu)) {
761 /* commit tstamp_running */
762 group_commit_event_sched_in(group_event, cpuctx, ctx);
724 return 0; 763 return 0;
725 764 }
726group_error: 765group_error:
727 /* 766 /*
728 * Groups can be scheduled in as one unit only, so undo any 767 * Groups can be scheduled in as one unit only, so undo any
729 * partial group before returning: 768 * partial group before returning:
769 *
770 * use __event_sched_out() to avoid updating tstamp_stopped
771 * because the event never actually ran
730 */ 772 */
731 list_for_each_entry(event, &group_event->sibling_list, group_entry) { 773 list_for_each_entry(event, &group_event->sibling_list, group_entry) {
732 if (event == partial_group) 774 if (event == partial_group)
733 break; 775 break;
734 event_sched_out(event, cpuctx, ctx); 776 __event_sched_out(event, cpuctx, ctx);
735 } 777 }
736 event_sched_out(group_event, cpuctx, ctx); 778 __event_sched_out(group_event, cpuctx, ctx);
737 779
738 if (txn) 780 pmu->cancel_txn(pmu);
739 pmu->cancel_txn(pmu);
740 781
741 return -EAGAIN; 782 return -EAGAIN;
742} 783}
@@ -789,10 +830,10 @@ static void add_event_to_ctx(struct perf_event *event,
789 */ 830 */
790static void __perf_install_in_context(void *info) 831static void __perf_install_in_context(void *info)
791{ 832{
792 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
793 struct perf_event *event = info; 833 struct perf_event *event = info;
794 struct perf_event_context *ctx = event->ctx; 834 struct perf_event_context *ctx = event->ctx;
795 struct perf_event *leader = event->group_leader; 835 struct perf_event *leader = event->group_leader;
836 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
796 int err; 837 int err;
797 838
798 /* 839 /*
@@ -812,12 +853,6 @@ static void __perf_install_in_context(void *info)
812 ctx->is_active = 1; 853 ctx->is_active = 1;
813 update_context_time(ctx); 854 update_context_time(ctx);
814 855
815 /*
816 * Protect the list operation against NMI by disabling the
817 * events on a global level. NOP for non NMI based events.
818 */
819 perf_disable();
820
821 add_event_to_ctx(event, ctx); 856 add_event_to_ctx(event, ctx);
822 857
823 if (event->cpu != -1 && event->cpu != smp_processor_id()) 858 if (event->cpu != -1 && event->cpu != smp_processor_id())
@@ -855,12 +890,7 @@ static void __perf_install_in_context(void *info)
855 } 890 }
856 } 891 }
857 892
858 if (!err && !ctx->task && cpuctx->max_pertask) 893unlock:
859 cpuctx->max_pertask--;
860
861 unlock:
862 perf_enable();
863
864 raw_spin_unlock(&ctx->lock); 894 raw_spin_unlock(&ctx->lock);
865} 895}
866 896
@@ -883,6 +913,8 @@ perf_install_in_context(struct perf_event_context *ctx,
883{ 913{
884 struct task_struct *task = ctx->task; 914 struct task_struct *task = ctx->task;
885 915
916 event->ctx = ctx;
917
886 if (!task) { 918 if (!task) {
887 /* 919 /*
888 * Per cpu events are installed via an smp call and 920 * Per cpu events are installed via an smp call and
@@ -931,10 +963,12 @@ static void __perf_event_mark_enabled(struct perf_event *event,
931 963
932 event->state = PERF_EVENT_STATE_INACTIVE; 964 event->state = PERF_EVENT_STATE_INACTIVE;
933 event->tstamp_enabled = ctx->time - event->total_time_enabled; 965 event->tstamp_enabled = ctx->time - event->total_time_enabled;
934 list_for_each_entry(sub, &event->sibling_list, group_entry) 966 list_for_each_entry(sub, &event->sibling_list, group_entry) {
935 if (sub->state >= PERF_EVENT_STATE_INACTIVE) 967 if (sub->state >= PERF_EVENT_STATE_INACTIVE) {
936 sub->tstamp_enabled = 968 sub->tstamp_enabled =
937 ctx->time - sub->total_time_enabled; 969 ctx->time - sub->total_time_enabled;
970 }
971 }
938} 972}
939 973
940/* 974/*
@@ -943,9 +977,9 @@ static void __perf_event_mark_enabled(struct perf_event *event,
943static void __perf_event_enable(void *info) 977static void __perf_event_enable(void *info)
944{ 978{
945 struct perf_event *event = info; 979 struct perf_event *event = info;
946 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
947 struct perf_event_context *ctx = event->ctx; 980 struct perf_event_context *ctx = event->ctx;
948 struct perf_event *leader = event->group_leader; 981 struct perf_event *leader = event->group_leader;
982 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
949 int err; 983 int err;
950 984
951 /* 985 /*
@@ -979,12 +1013,10 @@ static void __perf_event_enable(void *info)
979 if (!group_can_go_on(event, cpuctx, 1)) { 1013 if (!group_can_go_on(event, cpuctx, 1)) {
980 err = -EEXIST; 1014 err = -EEXIST;
981 } else { 1015 } else {
982 perf_disable();
983 if (event == leader) 1016 if (event == leader)
984 err = group_sched_in(event, cpuctx, ctx); 1017 err = group_sched_in(event, cpuctx, ctx);
985 else 1018 else
986 err = event_sched_in(event, cpuctx, ctx); 1019 err = event_sched_in(event, cpuctx, ctx);
987 perf_enable();
988 } 1020 }
989 1021
990 if (err) { 1022 if (err) {
@@ -1000,7 +1032,7 @@ static void __perf_event_enable(void *info)
1000 } 1032 }
1001 } 1033 }
1002 1034
1003 unlock: 1035unlock:
1004 raw_spin_unlock(&ctx->lock); 1036 raw_spin_unlock(&ctx->lock);
1005} 1037}
1006 1038
@@ -1041,7 +1073,7 @@ void perf_event_enable(struct perf_event *event)
1041 if (event->state == PERF_EVENT_STATE_ERROR) 1073 if (event->state == PERF_EVENT_STATE_ERROR)
1042 event->state = PERF_EVENT_STATE_OFF; 1074 event->state = PERF_EVENT_STATE_OFF;
1043 1075
1044 retry: 1076retry:
1045 raw_spin_unlock_irq(&ctx->lock); 1077 raw_spin_unlock_irq(&ctx->lock);
1046 task_oncpu_function_call(task, __perf_event_enable, event); 1078 task_oncpu_function_call(task, __perf_event_enable, event);
1047 1079
@@ -1061,7 +1093,7 @@ void perf_event_enable(struct perf_event *event)
1061 if (event->state == PERF_EVENT_STATE_OFF) 1093 if (event->state == PERF_EVENT_STATE_OFF)
1062 __perf_event_mark_enabled(event, ctx); 1094 __perf_event_mark_enabled(event, ctx);
1063 1095
1064 out: 1096out:
1065 raw_spin_unlock_irq(&ctx->lock); 1097 raw_spin_unlock_irq(&ctx->lock);
1066} 1098}
1067 1099
@@ -1092,26 +1124,26 @@ static void ctx_sched_out(struct perf_event_context *ctx,
1092 struct perf_event *event; 1124 struct perf_event *event;
1093 1125
1094 raw_spin_lock(&ctx->lock); 1126 raw_spin_lock(&ctx->lock);
1127 perf_pmu_disable(ctx->pmu);
1095 ctx->is_active = 0; 1128 ctx->is_active = 0;
1096 if (likely(!ctx->nr_events)) 1129 if (likely(!ctx->nr_events))
1097 goto out; 1130 goto out;
1098 update_context_time(ctx); 1131 update_context_time(ctx);
1099 1132
1100 perf_disable();
1101 if (!ctx->nr_active) 1133 if (!ctx->nr_active)
1102 goto out_enable; 1134 goto out;
1103 1135
1104 if (event_type & EVENT_PINNED) 1136 if (event_type & EVENT_PINNED) {
1105 list_for_each_entry(event, &ctx->pinned_groups, group_entry) 1137 list_for_each_entry(event, &ctx->pinned_groups, group_entry)
1106 group_sched_out(event, cpuctx, ctx); 1138 group_sched_out(event, cpuctx, ctx);
1139 }
1107 1140
1108 if (event_type & EVENT_FLEXIBLE) 1141 if (event_type & EVENT_FLEXIBLE) {
1109 list_for_each_entry(event, &ctx->flexible_groups, group_entry) 1142 list_for_each_entry(event, &ctx->flexible_groups, group_entry)
1110 group_sched_out(event, cpuctx, ctx); 1143 group_sched_out(event, cpuctx, ctx);
1111 1144 }
1112 out_enable: 1145out:
1113 perf_enable(); 1146 perf_pmu_enable(ctx->pmu);
1114 out:
1115 raw_spin_unlock(&ctx->lock); 1147 raw_spin_unlock(&ctx->lock);
1116} 1148}
1117 1149
@@ -1209,34 +1241,25 @@ static void perf_event_sync_stat(struct perf_event_context *ctx,
1209 } 1241 }
1210} 1242}
1211 1243
1212/* 1244void perf_event_context_sched_out(struct task_struct *task, int ctxn,
1213 * Called from scheduler to remove the events of the current task, 1245 struct task_struct *next)
1214 * with interrupts disabled.
1215 *
1216 * We stop each event and update the event value in event->count.
1217 *
1218 * This does not protect us against NMI, but disable()
1219 * sets the disabled bit in the control field of event _before_
1220 * accessing the event control register. If a NMI hits, then it will
1221 * not restart the event.
1222 */
1223void perf_event_task_sched_out(struct task_struct *task,
1224 struct task_struct *next)
1225{ 1246{
1226 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); 1247 struct perf_event_context *ctx = task->perf_event_ctxp[ctxn];
1227 struct perf_event_context *ctx = task->perf_event_ctxp;
1228 struct perf_event_context *next_ctx; 1248 struct perf_event_context *next_ctx;
1229 struct perf_event_context *parent; 1249 struct perf_event_context *parent;
1250 struct perf_cpu_context *cpuctx;
1230 int do_switch = 1; 1251 int do_switch = 1;
1231 1252
1232 perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); 1253 if (likely(!ctx))
1254 return;
1233 1255
1234 if (likely(!ctx || !cpuctx->task_ctx)) 1256 cpuctx = __get_cpu_context(ctx);
1257 if (!cpuctx->task_ctx)
1235 return; 1258 return;
1236 1259
1237 rcu_read_lock(); 1260 rcu_read_lock();
1238 parent = rcu_dereference(ctx->parent_ctx); 1261 parent = rcu_dereference(ctx->parent_ctx);
1239 next_ctx = next->perf_event_ctxp; 1262 next_ctx = next->perf_event_ctxp[ctxn];
1240 if (parent && next_ctx && 1263 if (parent && next_ctx &&
1241 rcu_dereference(next_ctx->parent_ctx) == parent) { 1264 rcu_dereference(next_ctx->parent_ctx) == parent) {
1242 /* 1265 /*
@@ -1255,8 +1278,8 @@ void perf_event_task_sched_out(struct task_struct *task,
1255 * XXX do we need a memory barrier of sorts 1278 * XXX do we need a memory barrier of sorts
1256 * wrt to rcu_dereference() of perf_event_ctxp 1279 * wrt to rcu_dereference() of perf_event_ctxp
1257 */ 1280 */
1258 task->perf_event_ctxp = next_ctx; 1281 task->perf_event_ctxp[ctxn] = next_ctx;
1259 next->perf_event_ctxp = ctx; 1282 next->perf_event_ctxp[ctxn] = ctx;
1260 ctx->task = next; 1283 ctx->task = next;
1261 next_ctx->task = task; 1284 next_ctx->task = task;
1262 do_switch = 0; 1285 do_switch = 0;
@@ -1274,10 +1297,35 @@ void perf_event_task_sched_out(struct task_struct *task,
1274 } 1297 }
1275} 1298}
1276 1299
1300#define for_each_task_context_nr(ctxn) \
1301 for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++)
1302
1303/*
1304 * Called from scheduler to remove the events of the current task,
1305 * with interrupts disabled.
1306 *
1307 * We stop each event and update the event value in event->count.
1308 *
1309 * This does not protect us against NMI, but disable()
1310 * sets the disabled bit in the control field of event _before_
1311 * accessing the event control register. If a NMI hits, then it will
1312 * not restart the event.
1313 */
1314void __perf_event_task_sched_out(struct task_struct *task,
1315 struct task_struct *next)
1316{
1317 int ctxn;
1318
1319 perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
1320
1321 for_each_task_context_nr(ctxn)
1322 perf_event_context_sched_out(task, ctxn, next);
1323}
1324
1277static void task_ctx_sched_out(struct perf_event_context *ctx, 1325static void task_ctx_sched_out(struct perf_event_context *ctx,
1278 enum event_type_t event_type) 1326 enum event_type_t event_type)
1279{ 1327{
1280 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); 1328 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
1281 1329
1282 if (!cpuctx->task_ctx) 1330 if (!cpuctx->task_ctx)
1283 return; 1331 return;
@@ -1292,14 +1340,6 @@ static void task_ctx_sched_out(struct perf_event_context *ctx,
1292/* 1340/*
1293 * Called with IRQs disabled 1341 * Called with IRQs disabled
1294 */ 1342 */
1295static void __perf_event_task_sched_out(struct perf_event_context *ctx)
1296{
1297 task_ctx_sched_out(ctx, EVENT_ALL);
1298}
1299
1300/*
1301 * Called with IRQs disabled
1302 */
1303static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx, 1343static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
1304 enum event_type_t event_type) 1344 enum event_type_t event_type)
1305{ 1345{
@@ -1350,9 +1390,10 @@ ctx_flexible_sched_in(struct perf_event_context *ctx,
1350 if (event->cpu != -1 && event->cpu != smp_processor_id()) 1390 if (event->cpu != -1 && event->cpu != smp_processor_id())
1351 continue; 1391 continue;
1352 1392
1353 if (group_can_go_on(event, cpuctx, can_add_hw)) 1393 if (group_can_go_on(event, cpuctx, can_add_hw)) {
1354 if (group_sched_in(event, cpuctx, ctx)) 1394 if (group_sched_in(event, cpuctx, ctx))
1355 can_add_hw = 0; 1395 can_add_hw = 0;
1396 }
1356 } 1397 }
1357} 1398}
1358 1399
@@ -1368,8 +1409,6 @@ ctx_sched_in(struct perf_event_context *ctx,
1368 1409
1369 ctx->timestamp = perf_clock(); 1410 ctx->timestamp = perf_clock();
1370 1411
1371 perf_disable();
1372
1373 /* 1412 /*
1374 * First go through the list and put on any pinned groups 1413 * First go through the list and put on any pinned groups
1375 * in order to give them the best chance of going on. 1414 * in order to give them the best chance of going on.
@@ -1381,8 +1420,7 @@ ctx_sched_in(struct perf_event_context *ctx,
1381 if (event_type & EVENT_FLEXIBLE) 1420 if (event_type & EVENT_FLEXIBLE)
1382 ctx_flexible_sched_in(ctx, cpuctx); 1421 ctx_flexible_sched_in(ctx, cpuctx);
1383 1422
1384 perf_enable(); 1423out:
1385 out:
1386 raw_spin_unlock(&ctx->lock); 1424 raw_spin_unlock(&ctx->lock);
1387} 1425}
1388 1426
@@ -1394,43 +1432,28 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
1394 ctx_sched_in(ctx, cpuctx, event_type); 1432 ctx_sched_in(ctx, cpuctx, event_type);
1395} 1433}
1396 1434
1397static void task_ctx_sched_in(struct task_struct *task, 1435static void task_ctx_sched_in(struct perf_event_context *ctx,
1398 enum event_type_t event_type) 1436 enum event_type_t event_type)
1399{ 1437{
1400 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); 1438 struct perf_cpu_context *cpuctx;
1401 struct perf_event_context *ctx = task->perf_event_ctxp;
1402 1439
1403 if (likely(!ctx)) 1440 cpuctx = __get_cpu_context(ctx);
1404 return;
1405 if (cpuctx->task_ctx == ctx) 1441 if (cpuctx->task_ctx == ctx)
1406 return; 1442 return;
1443
1407 ctx_sched_in(ctx, cpuctx, event_type); 1444 ctx_sched_in(ctx, cpuctx, event_type);
1408 cpuctx->task_ctx = ctx; 1445 cpuctx->task_ctx = ctx;
1409} 1446}
1410/*
1411 * Called from scheduler to add the events of the current task
1412 * with interrupts disabled.
1413 *
1414 * We restore the event value and then enable it.
1415 *
1416 * This does not protect us against NMI, but enable()
1417 * sets the enabled bit in the control field of event _before_
1418 * accessing the event control register. If a NMI hits, then it will
1419 * keep the event running.
1420 */
1421void perf_event_task_sched_in(struct task_struct *task)
1422{
1423 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
1424 struct perf_event_context *ctx = task->perf_event_ctxp;
1425 1447
1426 if (likely(!ctx)) 1448void perf_event_context_sched_in(struct perf_event_context *ctx)
1427 return; 1449{
1450 struct perf_cpu_context *cpuctx;
1428 1451
1452 cpuctx = __get_cpu_context(ctx);
1429 if (cpuctx->task_ctx == ctx) 1453 if (cpuctx->task_ctx == ctx)
1430 return; 1454 return;
1431 1455
1432 perf_disable(); 1456 perf_pmu_disable(ctx->pmu);
1433
1434 /* 1457 /*
1435 * We want to keep the following priority order: 1458 * We want to keep the following priority order:
1436 * cpu pinned (that don't need to move), task pinned, 1459 * cpu pinned (that don't need to move), task pinned,
@@ -1444,7 +1467,37 @@ void perf_event_task_sched_in(struct task_struct *task)
1444 1467
1445 cpuctx->task_ctx = ctx; 1468 cpuctx->task_ctx = ctx;
1446 1469
1447 perf_enable(); 1470 /*
1471 * Since these rotations are per-cpu, we need to ensure the
1472 * cpu-context we got scheduled on is actually rotating.
1473 */
1474 perf_pmu_rotate_start(ctx->pmu);
1475 perf_pmu_enable(ctx->pmu);
1476}
1477
1478/*
1479 * Called from scheduler to add the events of the current task
1480 * with interrupts disabled.
1481 *
1482 * We restore the event value and then enable it.
1483 *
1484 * This does not protect us against NMI, but enable()
1485 * sets the enabled bit in the control field of event _before_
1486 * accessing the event control register. If a NMI hits, then it will
1487 * keep the event running.
1488 */
1489void __perf_event_task_sched_in(struct task_struct *task)
1490{
1491 struct perf_event_context *ctx;
1492 int ctxn;
1493
1494 for_each_task_context_nr(ctxn) {
1495 ctx = task->perf_event_ctxp[ctxn];
1496 if (likely(!ctx))
1497 continue;
1498
1499 perf_event_context_sched_in(ctx);
1500 }
1448} 1501}
1449 1502
1450#define MAX_INTERRUPTS (~0ULL) 1503#define MAX_INTERRUPTS (~0ULL)
@@ -1524,22 +1577,6 @@ do { \
1524 return div64_u64(dividend, divisor); 1577 return div64_u64(dividend, divisor);
1525} 1578}
1526 1579
1527static void perf_event_stop(struct perf_event *event)
1528{
1529 if (!event->pmu->stop)
1530 return event->pmu->disable(event);
1531
1532 return event->pmu->stop(event);
1533}
1534
1535static int perf_event_start(struct perf_event *event)
1536{
1537 if (!event->pmu->start)
1538 return event->pmu->enable(event);
1539
1540 return event->pmu->start(event);
1541}
1542
1543static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) 1580static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
1544{ 1581{
1545 struct hw_perf_event *hwc = &event->hw; 1582 struct hw_perf_event *hwc = &event->hw;
@@ -1559,15 +1596,13 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
1559 hwc->sample_period = sample_period; 1596 hwc->sample_period = sample_period;
1560 1597
1561 if (local64_read(&hwc->period_left) > 8*sample_period) { 1598 if (local64_read(&hwc->period_left) > 8*sample_period) {
1562 perf_disable(); 1599 event->pmu->stop(event, PERF_EF_UPDATE);
1563 perf_event_stop(event);
1564 local64_set(&hwc->period_left, 0); 1600 local64_set(&hwc->period_left, 0);
1565 perf_event_start(event); 1601 event->pmu->start(event, PERF_EF_RELOAD);
1566 perf_enable();
1567 } 1602 }
1568} 1603}
1569 1604
1570static void perf_ctx_adjust_freq(struct perf_event_context *ctx) 1605static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
1571{ 1606{
1572 struct perf_event *event; 1607 struct perf_event *event;
1573 struct hw_perf_event *hwc; 1608 struct hw_perf_event *hwc;
@@ -1592,23 +1627,19 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
1592 */ 1627 */
1593 if (interrupts == MAX_INTERRUPTS) { 1628 if (interrupts == MAX_INTERRUPTS) {
1594 perf_log_throttle(event, 1); 1629 perf_log_throttle(event, 1);
1595 perf_disable(); 1630 event->pmu->start(event, 0);
1596 event->pmu->unthrottle(event);
1597 perf_enable();
1598 } 1631 }
1599 1632
1600 if (!event->attr.freq || !event->attr.sample_freq) 1633 if (!event->attr.freq || !event->attr.sample_freq)
1601 continue; 1634 continue;
1602 1635
1603 perf_disable();
1604 event->pmu->read(event); 1636 event->pmu->read(event);
1605 now = local64_read(&event->count); 1637 now = local64_read(&event->count);
1606 delta = now - hwc->freq_count_stamp; 1638 delta = now - hwc->freq_count_stamp;
1607 hwc->freq_count_stamp = now; 1639 hwc->freq_count_stamp = now;
1608 1640
1609 if (delta > 0) 1641 if (delta > 0)
1610 perf_adjust_period(event, TICK_NSEC, delta); 1642 perf_adjust_period(event, period, delta);
1611 perf_enable();
1612 } 1643 }
1613 raw_spin_unlock(&ctx->lock); 1644 raw_spin_unlock(&ctx->lock);
1614} 1645}
@@ -1626,32 +1657,38 @@ static void rotate_ctx(struct perf_event_context *ctx)
1626 raw_spin_unlock(&ctx->lock); 1657 raw_spin_unlock(&ctx->lock);
1627} 1658}
1628 1659
1629void perf_event_task_tick(struct task_struct *curr) 1660/*
1661 * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized
1662 * because they're strictly cpu affine and rotate_start is called with IRQs
1663 * disabled, while rotate_context is called from IRQ context.
1664 */
1665static void perf_rotate_context(struct perf_cpu_context *cpuctx)
1630{ 1666{
1631 struct perf_cpu_context *cpuctx; 1667 u64 interval = (u64)cpuctx->jiffies_interval * TICK_NSEC;
1632 struct perf_event_context *ctx; 1668 struct perf_event_context *ctx = NULL;
1633 int rotate = 0; 1669 int rotate = 0, remove = 1;
1634
1635 if (!atomic_read(&nr_events))
1636 return;
1637 1670
1638 cpuctx = &__get_cpu_var(perf_cpu_context); 1671 if (cpuctx->ctx.nr_events) {
1639 if (cpuctx->ctx.nr_events && 1672 remove = 0;
1640 cpuctx->ctx.nr_events != cpuctx->ctx.nr_active) 1673 if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
1641 rotate = 1; 1674 rotate = 1;
1675 }
1642 1676
1643 ctx = curr->perf_event_ctxp; 1677 ctx = cpuctx->task_ctx;
1644 if (ctx && ctx->nr_events && ctx->nr_events != ctx->nr_active) 1678 if (ctx && ctx->nr_events) {
1645 rotate = 1; 1679 remove = 0;
1680 if (ctx->nr_events != ctx->nr_active)
1681 rotate = 1;
1682 }
1646 1683
1647 perf_ctx_adjust_freq(&cpuctx->ctx); 1684 perf_pmu_disable(cpuctx->ctx.pmu);
1685 perf_ctx_adjust_freq(&cpuctx->ctx, interval);
1648 if (ctx) 1686 if (ctx)
1649 perf_ctx_adjust_freq(ctx); 1687 perf_ctx_adjust_freq(ctx, interval);
1650 1688
1651 if (!rotate) 1689 if (!rotate)
1652 return; 1690 goto done;
1653 1691
1654 perf_disable();
1655 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); 1692 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
1656 if (ctx) 1693 if (ctx)
1657 task_ctx_sched_out(ctx, EVENT_FLEXIBLE); 1694 task_ctx_sched_out(ctx, EVENT_FLEXIBLE);
@@ -1662,8 +1699,27 @@ void perf_event_task_tick(struct task_struct *curr)
1662 1699
1663 cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); 1700 cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE);
1664 if (ctx) 1701 if (ctx)
1665 task_ctx_sched_in(curr, EVENT_FLEXIBLE); 1702 task_ctx_sched_in(ctx, EVENT_FLEXIBLE);
1666 perf_enable(); 1703
1704done:
1705 if (remove)
1706 list_del_init(&cpuctx->rotation_list);
1707
1708 perf_pmu_enable(cpuctx->ctx.pmu);
1709}
1710
1711void perf_event_task_tick(void)
1712{
1713 struct list_head *head = &__get_cpu_var(rotation_list);
1714 struct perf_cpu_context *cpuctx, *tmp;
1715
1716 WARN_ON(!irqs_disabled());
1717
1718 list_for_each_entry_safe(cpuctx, tmp, head, rotation_list) {
1719 if (cpuctx->jiffies_interval == 1 ||
1720 !(jiffies % cpuctx->jiffies_interval))
1721 perf_rotate_context(cpuctx);
1722 }
1667} 1723}
1668 1724
1669static int event_enable_on_exec(struct perf_event *event, 1725static int event_enable_on_exec(struct perf_event *event,
@@ -1685,20 +1741,18 @@ static int event_enable_on_exec(struct perf_event *event,
1685 * Enable all of a task's events that have been marked enable-on-exec. 1741 * Enable all of a task's events that have been marked enable-on-exec.
1686 * This expects task == current. 1742 * This expects task == current.
1687 */ 1743 */
1688static void perf_event_enable_on_exec(struct task_struct *task) 1744static void perf_event_enable_on_exec(struct perf_event_context *ctx)
1689{ 1745{
1690 struct perf_event_context *ctx;
1691 struct perf_event *event; 1746 struct perf_event *event;
1692 unsigned long flags; 1747 unsigned long flags;
1693 int enabled = 0; 1748 int enabled = 0;
1694 int ret; 1749 int ret;
1695 1750
1696 local_irq_save(flags); 1751 local_irq_save(flags);
1697 ctx = task->perf_event_ctxp;
1698 if (!ctx || !ctx->nr_events) 1752 if (!ctx || !ctx->nr_events)
1699 goto out; 1753 goto out;
1700 1754
1701 __perf_event_task_sched_out(ctx); 1755 task_ctx_sched_out(ctx, EVENT_ALL);
1702 1756
1703 raw_spin_lock(&ctx->lock); 1757 raw_spin_lock(&ctx->lock);
1704 1758
@@ -1722,8 +1776,8 @@ static void perf_event_enable_on_exec(struct task_struct *task)
1722 1776
1723 raw_spin_unlock(&ctx->lock); 1777 raw_spin_unlock(&ctx->lock);
1724 1778
1725 perf_event_task_sched_in(task); 1779 perf_event_context_sched_in(ctx);
1726 out: 1780out:
1727 local_irq_restore(flags); 1781 local_irq_restore(flags);
1728} 1782}
1729 1783
@@ -1732,9 +1786,9 @@ static void perf_event_enable_on_exec(struct task_struct *task)
1732 */ 1786 */
1733static void __perf_event_read(void *info) 1787static void __perf_event_read(void *info)
1734{ 1788{
1735 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
1736 struct perf_event *event = info; 1789 struct perf_event *event = info;
1737 struct perf_event_context *ctx = event->ctx; 1790 struct perf_event_context *ctx = event->ctx;
1791 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
1738 1792
1739 /* 1793 /*
1740 * If this is a task context, we need to check whether it is 1794 * If this is a task context, we need to check whether it is
@@ -1773,7 +1827,13 @@ static u64 perf_event_read(struct perf_event *event)
1773 unsigned long flags; 1827 unsigned long flags;
1774 1828
1775 raw_spin_lock_irqsave(&ctx->lock, flags); 1829 raw_spin_lock_irqsave(&ctx->lock, flags);
1776 update_context_time(ctx); 1830 /*
1831 * may read while context is not active
1832 * (e.g., thread is blocked), in that case
1833 * we cannot update context time
1834 */
1835 if (ctx->is_active)
1836 update_context_time(ctx);
1777 update_event_times(event); 1837 update_event_times(event);
1778 raw_spin_unlock_irqrestore(&ctx->lock, flags); 1838 raw_spin_unlock_irqrestore(&ctx->lock, flags);
1779 } 1839 }
@@ -1782,11 +1842,219 @@ static u64 perf_event_read(struct perf_event *event)
1782} 1842}
1783 1843
1784/* 1844/*
1785 * Initialize the perf_event context in a task_struct: 1845 * Callchain support
1786 */ 1846 */
1847
1848struct callchain_cpus_entries {
1849 struct rcu_head rcu_head;
1850 struct perf_callchain_entry *cpu_entries[0];
1851};
1852
1853static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]);
1854static atomic_t nr_callchain_events;
1855static DEFINE_MUTEX(callchain_mutex);
1856struct callchain_cpus_entries *callchain_cpus_entries;
1857
1858
1859__weak void perf_callchain_kernel(struct perf_callchain_entry *entry,
1860 struct pt_regs *regs)
1861{
1862}
1863
1864__weak void perf_callchain_user(struct perf_callchain_entry *entry,
1865 struct pt_regs *regs)
1866{
1867}
1868
1869static void release_callchain_buffers_rcu(struct rcu_head *head)
1870{
1871 struct callchain_cpus_entries *entries;
1872 int cpu;
1873
1874 entries = container_of(head, struct callchain_cpus_entries, rcu_head);
1875
1876 for_each_possible_cpu(cpu)
1877 kfree(entries->cpu_entries[cpu]);
1878
1879 kfree(entries);
1880}
1881
1882static void release_callchain_buffers(void)
1883{
1884 struct callchain_cpus_entries *entries;
1885
1886 entries = callchain_cpus_entries;
1887 rcu_assign_pointer(callchain_cpus_entries, NULL);
1888 call_rcu(&entries->rcu_head, release_callchain_buffers_rcu);
1889}
1890
1891static int alloc_callchain_buffers(void)
1892{
1893 int cpu;
1894 int size;
1895 struct callchain_cpus_entries *entries;
1896
1897 /*
1898 * We can't use the percpu allocation API for data that can be
1899 * accessed from NMI. Use a temporary manual per cpu allocation
1900 * until that gets sorted out.
1901 */
1902 size = sizeof(*entries) + sizeof(struct perf_callchain_entry *) *
1903 num_possible_cpus();
1904
1905 entries = kzalloc(size, GFP_KERNEL);
1906 if (!entries)
1907 return -ENOMEM;
1908
1909 size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS;
1910
1911 for_each_possible_cpu(cpu) {
1912 entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL,
1913 cpu_to_node(cpu));
1914 if (!entries->cpu_entries[cpu])
1915 goto fail;
1916 }
1917
1918 rcu_assign_pointer(callchain_cpus_entries, entries);
1919
1920 return 0;
1921
1922fail:
1923 for_each_possible_cpu(cpu)
1924 kfree(entries->cpu_entries[cpu]);
1925 kfree(entries);
1926
1927 return -ENOMEM;
1928}
1929
1930static int get_callchain_buffers(void)
1931{
1932 int err = 0;
1933 int count;
1934
1935 mutex_lock(&callchain_mutex);
1936
1937 count = atomic_inc_return(&nr_callchain_events);
1938 if (WARN_ON_ONCE(count < 1)) {
1939 err = -EINVAL;
1940 goto exit;
1941 }
1942
1943 if (count > 1) {
1944 /* If the allocation failed, give up */
1945 if (!callchain_cpus_entries)
1946 err = -ENOMEM;
1947 goto exit;
1948 }
1949
1950 err = alloc_callchain_buffers();
1951 if (err)
1952 release_callchain_buffers();
1953exit:
1954 mutex_unlock(&callchain_mutex);
1955
1956 return err;
1957}
1958
1959static void put_callchain_buffers(void)
1960{
1961 if (atomic_dec_and_mutex_lock(&nr_callchain_events, &callchain_mutex)) {
1962 release_callchain_buffers();
1963 mutex_unlock(&callchain_mutex);
1964 }
1965}
1966
1967static int get_recursion_context(int *recursion)
1968{
1969 int rctx;
1970
1971 if (in_nmi())
1972 rctx = 3;
1973 else if (in_irq())
1974 rctx = 2;
1975 else if (in_softirq())
1976 rctx = 1;
1977 else
1978 rctx = 0;
1979
1980 if (recursion[rctx])
1981 return -1;
1982
1983 recursion[rctx]++;
1984 barrier();
1985
1986 return rctx;
1987}
1988
1989static inline void put_recursion_context(int *recursion, int rctx)
1990{
1991 barrier();
1992 recursion[rctx]--;
1993}
1994
1995static struct perf_callchain_entry *get_callchain_entry(int *rctx)
1996{
1997 int cpu;
1998 struct callchain_cpus_entries *entries;
1999
2000 *rctx = get_recursion_context(__get_cpu_var(callchain_recursion));
2001 if (*rctx == -1)
2002 return NULL;
2003
2004 entries = rcu_dereference(callchain_cpus_entries);
2005 if (!entries)
2006 return NULL;
2007
2008 cpu = smp_processor_id();
2009
2010 return &entries->cpu_entries[cpu][*rctx];
2011}
2012
1787static void 2013static void
1788__perf_event_init_context(struct perf_event_context *ctx, 2014put_callchain_entry(int rctx)
1789 struct task_struct *task) 2015{
2016 put_recursion_context(__get_cpu_var(callchain_recursion), rctx);
2017}
2018
2019static struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
2020{
2021 int rctx;
2022 struct perf_callchain_entry *entry;
2023
2024
2025 entry = get_callchain_entry(&rctx);
2026 if (rctx == -1)
2027 return NULL;
2028
2029 if (!entry)
2030 goto exit_put;
2031
2032 entry->nr = 0;
2033
2034 if (!user_mode(regs)) {
2035 perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
2036 perf_callchain_kernel(entry, regs);
2037 if (current->mm)
2038 regs = task_pt_regs(current);
2039 else
2040 regs = NULL;
2041 }
2042
2043 if (regs) {
2044 perf_callchain_store(entry, PERF_CONTEXT_USER);
2045 perf_callchain_user(entry, regs);
2046 }
2047
2048exit_put:
2049 put_callchain_entry(rctx);
2050
2051 return entry;
2052}
2053
2054/*
2055 * Initialize the perf_event context in a task_struct:
2056 */
2057static void __perf_event_init_context(struct perf_event_context *ctx)
1790{ 2058{
1791 raw_spin_lock_init(&ctx->lock); 2059 raw_spin_lock_init(&ctx->lock);
1792 mutex_init(&ctx->mutex); 2060 mutex_init(&ctx->mutex);
@@ -1794,45 +2062,38 @@ __perf_event_init_context(struct perf_event_context *ctx,
1794 INIT_LIST_HEAD(&ctx->flexible_groups); 2062 INIT_LIST_HEAD(&ctx->flexible_groups);
1795 INIT_LIST_HEAD(&ctx->event_list); 2063 INIT_LIST_HEAD(&ctx->event_list);
1796 atomic_set(&ctx->refcount, 1); 2064 atomic_set(&ctx->refcount, 1);
1797 ctx->task = task;
1798} 2065}
1799 2066
1800static struct perf_event_context *find_get_context(pid_t pid, int cpu) 2067static struct perf_event_context *
2068alloc_perf_context(struct pmu *pmu, struct task_struct *task)
1801{ 2069{
1802 struct perf_event_context *ctx; 2070 struct perf_event_context *ctx;
1803 struct perf_cpu_context *cpuctx;
1804 struct task_struct *task;
1805 unsigned long flags;
1806 int err;
1807
1808 if (pid == -1 && cpu != -1) {
1809 /* Must be root to operate on a CPU event: */
1810 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
1811 return ERR_PTR(-EACCES);
1812 2071
1813 if (cpu < 0 || cpu >= nr_cpumask_bits) 2072 ctx = kzalloc(sizeof(struct perf_event_context), GFP_KERNEL);
1814 return ERR_PTR(-EINVAL); 2073 if (!ctx)
2074 return NULL;
1815 2075
1816 /* 2076 __perf_event_init_context(ctx);
1817 * We could be clever and allow to attach a event to an 2077 if (task) {
1818 * offline CPU and activate it when the CPU comes up, but 2078 ctx->task = task;
1819 * that's for later. 2079 get_task_struct(task);
1820 */ 2080 }
1821 if (!cpu_online(cpu)) 2081 ctx->pmu = pmu;
1822 return ERR_PTR(-ENODEV);
1823 2082
1824 cpuctx = &per_cpu(perf_cpu_context, cpu); 2083 return ctx;
1825 ctx = &cpuctx->ctx; 2084}
1826 get_ctx(ctx);
1827 2085
1828 return ctx; 2086static struct task_struct *
1829 } 2087find_lively_task_by_vpid(pid_t vpid)
2088{
2089 struct task_struct *task;
2090 int err;
1830 2091
1831 rcu_read_lock(); 2092 rcu_read_lock();
1832 if (!pid) 2093 if (!vpid)
1833 task = current; 2094 task = current;
1834 else 2095 else
1835 task = find_task_by_vpid(pid); 2096 task = find_task_by_vpid(vpid);
1836 if (task) 2097 if (task)
1837 get_task_struct(task); 2098 get_task_struct(task);
1838 rcu_read_unlock(); 2099 rcu_read_unlock();
@@ -1852,36 +2113,78 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
1852 if (!ptrace_may_access(task, PTRACE_MODE_READ)) 2113 if (!ptrace_may_access(task, PTRACE_MODE_READ))
1853 goto errout; 2114 goto errout;
1854 2115
1855 retry: 2116 return task;
1856 ctx = perf_lock_task_context(task, &flags); 2117errout:
2118 put_task_struct(task);
2119 return ERR_PTR(err);
2120
2121}
2122
2123static struct perf_event_context *
2124find_get_context(struct pmu *pmu, struct task_struct *task, int cpu)
2125{
2126 struct perf_event_context *ctx;
2127 struct perf_cpu_context *cpuctx;
2128 unsigned long flags;
2129 int ctxn, err;
2130
2131 if (!task && cpu != -1) {
2132 /* Must be root to operate on a CPU event: */
2133 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
2134 return ERR_PTR(-EACCES);
2135
2136 if (cpu < 0 || cpu >= nr_cpumask_bits)
2137 return ERR_PTR(-EINVAL);
2138
2139 /*
2140 * We could be clever and allow to attach a event to an
2141 * offline CPU and activate it when the CPU comes up, but
2142 * that's for later.
2143 */
2144 if (!cpu_online(cpu))
2145 return ERR_PTR(-ENODEV);
2146
2147 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
2148 ctx = &cpuctx->ctx;
2149 get_ctx(ctx);
2150
2151 return ctx;
2152 }
2153
2154 err = -EINVAL;
2155 ctxn = pmu->task_ctx_nr;
2156 if (ctxn < 0)
2157 goto errout;
2158
2159retry:
2160 ctx = perf_lock_task_context(task, ctxn, &flags);
1857 if (ctx) { 2161 if (ctx) {
1858 unclone_ctx(ctx); 2162 unclone_ctx(ctx);
1859 raw_spin_unlock_irqrestore(&ctx->lock, flags); 2163 raw_spin_unlock_irqrestore(&ctx->lock, flags);
1860 } 2164 }
1861 2165
1862 if (!ctx) { 2166 if (!ctx) {
1863 ctx = kzalloc(sizeof(struct perf_event_context), GFP_KERNEL); 2167 ctx = alloc_perf_context(pmu, task);
1864 err = -ENOMEM; 2168 err = -ENOMEM;
1865 if (!ctx) 2169 if (!ctx)
1866 goto errout; 2170 goto errout;
1867 __perf_event_init_context(ctx, task); 2171
1868 get_ctx(ctx); 2172 get_ctx(ctx);
1869 if (cmpxchg(&task->perf_event_ctxp, NULL, ctx)) { 2173
2174 if (cmpxchg(&task->perf_event_ctxp[ctxn], NULL, ctx)) {
1870 /* 2175 /*
1871 * We raced with some other task; use 2176 * We raced with some other task; use
1872 * the context they set. 2177 * the context they set.
1873 */ 2178 */
2179 put_task_struct(task);
1874 kfree(ctx); 2180 kfree(ctx);
1875 goto retry; 2181 goto retry;
1876 } 2182 }
1877 get_task_struct(task);
1878 } 2183 }
1879 2184
1880 put_task_struct(task);
1881 return ctx; 2185 return ctx;
1882 2186
1883 errout: 2187errout:
1884 put_task_struct(task);
1885 return ERR_PTR(err); 2188 return ERR_PTR(err);
1886} 2189}
1887 2190
@@ -1898,21 +2201,23 @@ static void free_event_rcu(struct rcu_head *head)
1898 kfree(event); 2201 kfree(event);
1899} 2202}
1900 2203
1901static void perf_pending_sync(struct perf_event *event);
1902static void perf_buffer_put(struct perf_buffer *buffer); 2204static void perf_buffer_put(struct perf_buffer *buffer);
1903 2205
1904static void free_event(struct perf_event *event) 2206static void free_event(struct perf_event *event)
1905{ 2207{
1906 perf_pending_sync(event); 2208 irq_work_sync(&event->pending);
1907 2209
1908 if (!event->parent) { 2210 if (!event->parent) {
1909 atomic_dec(&nr_events); 2211 if (event->attach_state & PERF_ATTACH_TASK)
2212 jump_label_dec(&perf_task_events);
1910 if (event->attr.mmap || event->attr.mmap_data) 2213 if (event->attr.mmap || event->attr.mmap_data)
1911 atomic_dec(&nr_mmap_events); 2214 atomic_dec(&nr_mmap_events);
1912 if (event->attr.comm) 2215 if (event->attr.comm)
1913 atomic_dec(&nr_comm_events); 2216 atomic_dec(&nr_comm_events);
1914 if (event->attr.task) 2217 if (event->attr.task)
1915 atomic_dec(&nr_task_events); 2218 atomic_dec(&nr_task_events);
2219 if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
2220 put_callchain_buffers();
1916 } 2221 }
1917 2222
1918 if (event->buffer) { 2223 if (event->buffer) {
@@ -1923,7 +2228,9 @@ static void free_event(struct perf_event *event)
1923 if (event->destroy) 2228 if (event->destroy)
1924 event->destroy(event); 2229 event->destroy(event);
1925 2230
1926 put_ctx(event->ctx); 2231 if (event->ctx)
2232 put_ctx(event->ctx);
2233
1927 call_rcu(&event->rcu_head, free_event_rcu); 2234 call_rcu(&event->rcu_head, free_event_rcu);
1928} 2235}
1929 2236
@@ -2342,6 +2649,9 @@ int perf_event_task_disable(void)
2342 2649
2343static int perf_event_index(struct perf_event *event) 2650static int perf_event_index(struct perf_event *event)
2344{ 2651{
2652 if (event->hw.state & PERF_HES_STOPPED)
2653 return 0;
2654
2345 if (event->state != PERF_EVENT_STATE_ACTIVE) 2655 if (event->state != PERF_EVENT_STATE_ACTIVE)
2346 return 0; 2656 return 0;
2347 2657
@@ -2845,16 +3155,7 @@ void perf_event_wakeup(struct perf_event *event)
2845 } 3155 }
2846} 3156}
2847 3157
2848/* 3158static void perf_pending_event(struct irq_work *entry)
2849 * Pending wakeups
2850 *
2851 * Handle the case where we need to wakeup up from NMI (or rq->lock) context.
2852 *
2853 * The NMI bit means we cannot possibly take locks. Therefore, maintain a
2854 * single linked list and use cmpxchg() to add entries lockless.
2855 */
2856
2857static void perf_pending_event(struct perf_pending_entry *entry)
2858{ 3159{
2859 struct perf_event *event = container_of(entry, 3160 struct perf_event *event = container_of(entry,
2860 struct perf_event, pending); 3161 struct perf_event, pending);
@@ -2870,99 +3171,6 @@ static void perf_pending_event(struct perf_pending_entry *entry)
2870 } 3171 }
2871} 3172}
2872 3173
2873#define PENDING_TAIL ((struct perf_pending_entry *)-1UL)
2874
2875static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = {
2876 PENDING_TAIL,
2877};
2878
2879static void perf_pending_queue(struct perf_pending_entry *entry,
2880 void (*func)(struct perf_pending_entry *))
2881{
2882 struct perf_pending_entry **head;
2883
2884 if (cmpxchg(&entry->next, NULL, PENDING_TAIL) != NULL)
2885 return;
2886
2887 entry->func = func;
2888
2889 head = &get_cpu_var(perf_pending_head);
2890
2891 do {
2892 entry->next = *head;
2893 } while (cmpxchg(head, entry->next, entry) != entry->next);
2894
2895 set_perf_event_pending();
2896
2897 put_cpu_var(perf_pending_head);
2898}
2899
2900static int __perf_pending_run(void)
2901{
2902 struct perf_pending_entry *list;
2903 int nr = 0;
2904
2905 list = xchg(&__get_cpu_var(perf_pending_head), PENDING_TAIL);
2906 while (list != PENDING_TAIL) {
2907 void (*func)(struct perf_pending_entry *);
2908 struct perf_pending_entry *entry = list;
2909
2910 list = list->next;
2911
2912 func = entry->func;
2913 entry->next = NULL;
2914 /*
2915 * Ensure we observe the unqueue before we issue the wakeup,
2916 * so that we won't be waiting forever.
2917 * -- see perf_not_pending().
2918 */
2919 smp_wmb();
2920
2921 func(entry);
2922 nr++;
2923 }
2924
2925 return nr;
2926}
2927
2928static inline int perf_not_pending(struct perf_event *event)
2929{
2930 /*
2931 * If we flush on whatever cpu we run, there is a chance we don't
2932 * need to wait.
2933 */
2934 get_cpu();
2935 __perf_pending_run();
2936 put_cpu();
2937
2938 /*
2939 * Ensure we see the proper queue state before going to sleep
2940 * so that we do not miss the wakeup. -- see perf_pending_handle()
2941 */
2942 smp_rmb();
2943 return event->pending.next == NULL;
2944}
2945
2946static void perf_pending_sync(struct perf_event *event)
2947{
2948 wait_event(event->waitq, perf_not_pending(event));
2949}
2950
2951void perf_event_do_pending(void)
2952{
2953 __perf_pending_run();
2954}
2955
2956/*
2957 * Callchain support -- arch specific
2958 */
2959
2960__weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
2961{
2962 return NULL;
2963}
2964
2965
2966/* 3174/*
2967 * We assume there is only KVM supporting the callbacks. 3175 * We assume there is only KVM supporting the callbacks.
2968 * Later on, we might change it to a list if there is 3176 * Later on, we might change it to a list if there is
@@ -3012,8 +3220,7 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
3012 3220
3013 if (handle->nmi) { 3221 if (handle->nmi) {
3014 handle->event->pending_wakeup = 1; 3222 handle->event->pending_wakeup = 1;
3015 perf_pending_queue(&handle->event->pending, 3223 irq_work_queue(&handle->event->pending);
3016 perf_pending_event);
3017 } else 3224 } else
3018 perf_event_wakeup(handle->event); 3225 perf_event_wakeup(handle->event);
3019} 3226}
@@ -3069,7 +3276,7 @@ again:
3069 if (handle->wakeup != local_read(&buffer->wakeup)) 3276 if (handle->wakeup != local_read(&buffer->wakeup))
3070 perf_output_wakeup(handle); 3277 perf_output_wakeup(handle);
3071 3278
3072 out: 3279out:
3073 preempt_enable(); 3280 preempt_enable();
3074} 3281}
3075 3282
@@ -3457,14 +3664,20 @@ static void perf_event_output(struct perf_event *event, int nmi,
3457 struct perf_output_handle handle; 3664 struct perf_output_handle handle;
3458 struct perf_event_header header; 3665 struct perf_event_header header;
3459 3666
3667 /* protect the callchain buffers */
3668 rcu_read_lock();
3669
3460 perf_prepare_sample(&header, data, event, regs); 3670 perf_prepare_sample(&header, data, event, regs);
3461 3671
3462 if (perf_output_begin(&handle, event, header.size, nmi, 1)) 3672 if (perf_output_begin(&handle, event, header.size, nmi, 1))
3463 return; 3673 goto exit;
3464 3674
3465 perf_output_sample(&handle, &header, data, event); 3675 perf_output_sample(&handle, &header, data, event);
3466 3676
3467 perf_output_end(&handle); 3677 perf_output_end(&handle);
3678
3679exit:
3680 rcu_read_unlock();
3468} 3681}
3469 3682
3470/* 3683/*
@@ -3578,16 +3791,27 @@ static void perf_event_task_ctx(struct perf_event_context *ctx,
3578static void perf_event_task_event(struct perf_task_event *task_event) 3791static void perf_event_task_event(struct perf_task_event *task_event)
3579{ 3792{
3580 struct perf_cpu_context *cpuctx; 3793 struct perf_cpu_context *cpuctx;
3581 struct perf_event_context *ctx = task_event->task_ctx; 3794 struct perf_event_context *ctx;
3795 struct pmu *pmu;
3796 int ctxn;
3582 3797
3583 rcu_read_lock(); 3798 rcu_read_lock();
3584 cpuctx = &get_cpu_var(perf_cpu_context); 3799 list_for_each_entry_rcu(pmu, &pmus, entry) {
3585 perf_event_task_ctx(&cpuctx->ctx, task_event); 3800 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
3586 if (!ctx) 3801 perf_event_task_ctx(&cpuctx->ctx, task_event);
3587 ctx = rcu_dereference(current->perf_event_ctxp); 3802
3588 if (ctx) 3803 ctx = task_event->task_ctx;
3589 perf_event_task_ctx(ctx, task_event); 3804 if (!ctx) {
3590 put_cpu_var(perf_cpu_context); 3805 ctxn = pmu->task_ctx_nr;
3806 if (ctxn < 0)
3807 goto next;
3808 ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
3809 }
3810 if (ctx)
3811 perf_event_task_ctx(ctx, task_event);
3812next:
3813 put_cpu_ptr(pmu->pmu_cpu_context);
3814 }
3591 rcu_read_unlock(); 3815 rcu_read_unlock();
3592} 3816}
3593 3817
@@ -3692,8 +3916,10 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
3692{ 3916{
3693 struct perf_cpu_context *cpuctx; 3917 struct perf_cpu_context *cpuctx;
3694 struct perf_event_context *ctx; 3918 struct perf_event_context *ctx;
3695 unsigned int size;
3696 char comm[TASK_COMM_LEN]; 3919 char comm[TASK_COMM_LEN];
3920 unsigned int size;
3921 struct pmu *pmu;
3922 int ctxn;
3697 3923
3698 memset(comm, 0, sizeof(comm)); 3924 memset(comm, 0, sizeof(comm));
3699 strlcpy(comm, comm_event->task->comm, sizeof(comm)); 3925 strlcpy(comm, comm_event->task->comm, sizeof(comm));
@@ -3705,21 +3931,36 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
3705 comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; 3931 comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;
3706 3932
3707 rcu_read_lock(); 3933 rcu_read_lock();
3708 cpuctx = &get_cpu_var(perf_cpu_context); 3934 list_for_each_entry_rcu(pmu, &pmus, entry) {
3709 perf_event_comm_ctx(&cpuctx->ctx, comm_event); 3935 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
3710 ctx = rcu_dereference(current->perf_event_ctxp); 3936 perf_event_comm_ctx(&cpuctx->ctx, comm_event);
3711 if (ctx) 3937
3712 perf_event_comm_ctx(ctx, comm_event); 3938 ctxn = pmu->task_ctx_nr;
3713 put_cpu_var(perf_cpu_context); 3939 if (ctxn < 0)
3940 goto next;
3941
3942 ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
3943 if (ctx)
3944 perf_event_comm_ctx(ctx, comm_event);
3945next:
3946 put_cpu_ptr(pmu->pmu_cpu_context);
3947 }
3714 rcu_read_unlock(); 3948 rcu_read_unlock();
3715} 3949}
3716 3950
3717void perf_event_comm(struct task_struct *task) 3951void perf_event_comm(struct task_struct *task)
3718{ 3952{
3719 struct perf_comm_event comm_event; 3953 struct perf_comm_event comm_event;
3954 struct perf_event_context *ctx;
3955 int ctxn;
3956
3957 for_each_task_context_nr(ctxn) {
3958 ctx = task->perf_event_ctxp[ctxn];
3959 if (!ctx)
3960 continue;
3720 3961
3721 if (task->perf_event_ctxp) 3962 perf_event_enable_on_exec(ctx);
3722 perf_event_enable_on_exec(task); 3963 }
3723 3964
3724 if (!atomic_read(&nr_comm_events)) 3965 if (!atomic_read(&nr_comm_events))
3725 return; 3966 return;
@@ -3821,6 +4062,8 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
3821 char tmp[16]; 4062 char tmp[16];
3822 char *buf = NULL; 4063 char *buf = NULL;
3823 const char *name; 4064 const char *name;
4065 struct pmu *pmu;
4066 int ctxn;
3824 4067
3825 memset(tmp, 0, sizeof(tmp)); 4068 memset(tmp, 0, sizeof(tmp));
3826 4069
@@ -3873,12 +4116,23 @@ got_name:
3873 mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; 4116 mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
3874 4117
3875 rcu_read_lock(); 4118 rcu_read_lock();
3876 cpuctx = &get_cpu_var(perf_cpu_context); 4119 list_for_each_entry_rcu(pmu, &pmus, entry) {
3877 perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, vma->vm_flags & VM_EXEC); 4120 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
3878 ctx = rcu_dereference(current->perf_event_ctxp); 4121 perf_event_mmap_ctx(&cpuctx->ctx, mmap_event,
3879 if (ctx) 4122 vma->vm_flags & VM_EXEC);
3880 perf_event_mmap_ctx(ctx, mmap_event, vma->vm_flags & VM_EXEC); 4123
3881 put_cpu_var(perf_cpu_context); 4124 ctxn = pmu->task_ctx_nr;
4125 if (ctxn < 0)
4126 goto next;
4127
4128 ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
4129 if (ctx) {
4130 perf_event_mmap_ctx(ctx, mmap_event,
4131 vma->vm_flags & VM_EXEC);
4132 }
4133next:
4134 put_cpu_ptr(pmu->pmu_cpu_context);
4135 }
3882 rcu_read_unlock(); 4136 rcu_read_unlock();
3883 4137
3884 kfree(buf); 4138 kfree(buf);
@@ -3960,8 +4214,6 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
3960 struct hw_perf_event *hwc = &event->hw; 4214 struct hw_perf_event *hwc = &event->hw;
3961 int ret = 0; 4215 int ret = 0;
3962 4216
3963 throttle = (throttle && event->pmu->unthrottle != NULL);
3964
3965 if (!throttle) { 4217 if (!throttle) {
3966 hwc->interrupts++; 4218 hwc->interrupts++;
3967 } else { 4219 } else {
@@ -4004,8 +4256,7 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
4004 event->pending_kill = POLL_HUP; 4256 event->pending_kill = POLL_HUP;
4005 if (nmi) { 4257 if (nmi) {
4006 event->pending_disable = 1; 4258 event->pending_disable = 1;
4007 perf_pending_queue(&event->pending, 4259 irq_work_queue(&event->pending);
4008 perf_pending_event);
4009 } else 4260 } else
4010 perf_event_disable(event); 4261 perf_event_disable(event);
4011 } 4262 }
@@ -4029,6 +4280,17 @@ int perf_event_overflow(struct perf_event *event, int nmi,
4029 * Generic software event infrastructure 4280 * Generic software event infrastructure
4030 */ 4281 */
4031 4282
4283struct swevent_htable {
4284 struct swevent_hlist *swevent_hlist;
4285 struct mutex hlist_mutex;
4286 int hlist_refcount;
4287
4288 /* Recursion avoidance in each contexts */
4289 int recursion[PERF_NR_CONTEXTS];
4290};
4291
4292static DEFINE_PER_CPU(struct swevent_htable, swevent_htable);
4293
4032/* 4294/*
4033 * We directly increment event->count and keep a second value in 4295 * We directly increment event->count and keep a second value in
4034 * event->hw.period_left to count intervals. This period event 4296 * event->hw.period_left to count intervals. This period event
@@ -4086,7 +4348,7 @@ static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
4086 } 4348 }
4087} 4349}
4088 4350
4089static void perf_swevent_add(struct perf_event *event, u64 nr, 4351static void perf_swevent_event(struct perf_event *event, u64 nr,
4090 int nmi, struct perf_sample_data *data, 4352 int nmi, struct perf_sample_data *data,
4091 struct pt_regs *regs) 4353 struct pt_regs *regs)
4092{ 4354{
@@ -4112,6 +4374,9 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,
4112static int perf_exclude_event(struct perf_event *event, 4374static int perf_exclude_event(struct perf_event *event,
4113 struct pt_regs *regs) 4375 struct pt_regs *regs)
4114{ 4376{
4377 if (event->hw.state & PERF_HES_STOPPED)
4378 return 0;
4379
4115 if (regs) { 4380 if (regs) {
4116 if (event->attr.exclude_user && user_mode(regs)) 4381 if (event->attr.exclude_user && user_mode(regs))
4117 return 1; 4382 return 1;
@@ -4158,11 +4423,11 @@ __find_swevent_head(struct swevent_hlist *hlist, u64 type, u32 event_id)
4158 4423
4159/* For the read side: events when they trigger */ 4424/* For the read side: events when they trigger */
4160static inline struct hlist_head * 4425static inline struct hlist_head *
4161find_swevent_head_rcu(struct perf_cpu_context *ctx, u64 type, u32 event_id) 4426find_swevent_head_rcu(struct swevent_htable *swhash, u64 type, u32 event_id)
4162{ 4427{
4163 struct swevent_hlist *hlist; 4428 struct swevent_hlist *hlist;
4164 4429
4165 hlist = rcu_dereference(ctx->swevent_hlist); 4430 hlist = rcu_dereference(swhash->swevent_hlist);
4166 if (!hlist) 4431 if (!hlist)
4167 return NULL; 4432 return NULL;
4168 4433
@@ -4171,7 +4436,7 @@ find_swevent_head_rcu(struct perf_cpu_context *ctx, u64 type, u32 event_id)
4171 4436
4172/* For the event head insertion and removal in the hlist */ 4437/* For the event head insertion and removal in the hlist */
4173static inline struct hlist_head * 4438static inline struct hlist_head *
4174find_swevent_head(struct perf_cpu_context *ctx, struct perf_event *event) 4439find_swevent_head(struct swevent_htable *swhash, struct perf_event *event)
4175{ 4440{
4176 struct swevent_hlist *hlist; 4441 struct swevent_hlist *hlist;
4177 u32 event_id = event->attr.config; 4442 u32 event_id = event->attr.config;
@@ -4182,7 +4447,7 @@ find_swevent_head(struct perf_cpu_context *ctx, struct perf_event *event)
4182 * and release. Which makes the protected version suitable here. 4447 * and release. Which makes the protected version suitable here.
4183 * The context lock guarantees that. 4448 * The context lock guarantees that.
4184 */ 4449 */
4185 hlist = rcu_dereference_protected(ctx->swevent_hlist, 4450 hlist = rcu_dereference_protected(swhash->swevent_hlist,
4186 lockdep_is_held(&event->ctx->lock)); 4451 lockdep_is_held(&event->ctx->lock));
4187 if (!hlist) 4452 if (!hlist)
4188 return NULL; 4453 return NULL;
@@ -4195,23 +4460,19 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
4195 struct perf_sample_data *data, 4460 struct perf_sample_data *data,
4196 struct pt_regs *regs) 4461 struct pt_regs *regs)
4197{ 4462{
4198 struct perf_cpu_context *cpuctx; 4463 struct swevent_htable *swhash = &__get_cpu_var(swevent_htable);
4199 struct perf_event *event; 4464 struct perf_event *event;
4200 struct hlist_node *node; 4465 struct hlist_node *node;
4201 struct hlist_head *head; 4466 struct hlist_head *head;
4202 4467
4203 cpuctx = &__get_cpu_var(perf_cpu_context);
4204
4205 rcu_read_lock(); 4468 rcu_read_lock();
4206 4469 head = find_swevent_head_rcu(swhash, type, event_id);
4207 head = find_swevent_head_rcu(cpuctx, type, event_id);
4208
4209 if (!head) 4470 if (!head)
4210 goto end; 4471 goto end;
4211 4472
4212 hlist_for_each_entry_rcu(event, node, head, hlist_entry) { 4473 hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
4213 if (perf_swevent_match(event, type, event_id, data, regs)) 4474 if (perf_swevent_match(event, type, event_id, data, regs))
4214 perf_swevent_add(event, nr, nmi, data, regs); 4475 perf_swevent_event(event, nr, nmi, data, regs);
4215 } 4476 }
4216end: 4477end:
4217 rcu_read_unlock(); 4478 rcu_read_unlock();
@@ -4219,33 +4480,17 @@ end:
4219 4480
4220int perf_swevent_get_recursion_context(void) 4481int perf_swevent_get_recursion_context(void)
4221{ 4482{
4222 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); 4483 struct swevent_htable *swhash = &__get_cpu_var(swevent_htable);
4223 int rctx;
4224
4225 if (in_nmi())
4226 rctx = 3;
4227 else if (in_irq())
4228 rctx = 2;
4229 else if (in_softirq())
4230 rctx = 1;
4231 else
4232 rctx = 0;
4233
4234 if (cpuctx->recursion[rctx])
4235 return -1;
4236 4484
4237 cpuctx->recursion[rctx]++; 4485 return get_recursion_context(swhash->recursion);
4238 barrier();
4239
4240 return rctx;
4241} 4486}
4242EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); 4487EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
4243 4488
4244void inline perf_swevent_put_recursion_context(int rctx) 4489void inline perf_swevent_put_recursion_context(int rctx)
4245{ 4490{
4246 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); 4491 struct swevent_htable *swhash = &__get_cpu_var(swevent_htable);
4247 barrier(); 4492
4248 cpuctx->recursion[rctx]--; 4493 put_recursion_context(swhash->recursion, rctx);
4249} 4494}
4250 4495
4251void __perf_sw_event(u32 event_id, u64 nr, int nmi, 4496void __perf_sw_event(u32 event_id, u64 nr, int nmi,
@@ -4271,20 +4516,20 @@ static void perf_swevent_read(struct perf_event *event)
4271{ 4516{
4272} 4517}
4273 4518
4274static int perf_swevent_enable(struct perf_event *event) 4519static int perf_swevent_add(struct perf_event *event, int flags)
4275{ 4520{
4521 struct swevent_htable *swhash = &__get_cpu_var(swevent_htable);
4276 struct hw_perf_event *hwc = &event->hw; 4522 struct hw_perf_event *hwc = &event->hw;
4277 struct perf_cpu_context *cpuctx;
4278 struct hlist_head *head; 4523 struct hlist_head *head;
4279 4524
4280 cpuctx = &__get_cpu_var(perf_cpu_context);
4281
4282 if (hwc->sample_period) { 4525 if (hwc->sample_period) {
4283 hwc->last_period = hwc->sample_period; 4526 hwc->last_period = hwc->sample_period;
4284 perf_swevent_set_period(event); 4527 perf_swevent_set_period(event);
4285 } 4528 }
4286 4529
4287 head = find_swevent_head(cpuctx, event); 4530 hwc->state = !(flags & PERF_EF_START);
4531
4532 head = find_swevent_head(swhash, event);
4288 if (WARN_ON_ONCE(!head)) 4533 if (WARN_ON_ONCE(!head))
4289 return -EINVAL; 4534 return -EINVAL;
4290 4535
@@ -4293,202 +4538,27 @@ static int perf_swevent_enable(struct perf_event *event)
4293 return 0; 4538 return 0;
4294} 4539}
4295 4540
4296static void perf_swevent_disable(struct perf_event *event) 4541static void perf_swevent_del(struct perf_event *event, int flags)
4297{ 4542{
4298 hlist_del_rcu(&event->hlist_entry); 4543 hlist_del_rcu(&event->hlist_entry);
4299} 4544}
4300 4545
4301static void perf_swevent_void(struct perf_event *event) 4546static void perf_swevent_start(struct perf_event *event, int flags)
4302{
4303}
4304
4305static int perf_swevent_int(struct perf_event *event)
4306{
4307 return 0;
4308}
4309
4310static const struct pmu perf_ops_generic = {
4311 .enable = perf_swevent_enable,
4312 .disable = perf_swevent_disable,
4313 .start = perf_swevent_int,
4314 .stop = perf_swevent_void,
4315 .read = perf_swevent_read,
4316 .unthrottle = perf_swevent_void, /* hwc->interrupts already reset */
4317};
4318
4319/*
4320 * hrtimer based swevent callback
4321 */
4322
4323static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
4324{ 4547{
4325 enum hrtimer_restart ret = HRTIMER_RESTART; 4548 event->hw.state = 0;
4326 struct perf_sample_data data;
4327 struct pt_regs *regs;
4328 struct perf_event *event;
4329 u64 period;
4330
4331 event = container_of(hrtimer, struct perf_event, hw.hrtimer);
4332 event->pmu->read(event);
4333
4334 perf_sample_data_init(&data, 0);
4335 data.period = event->hw.last_period;
4336 regs = get_irq_regs();
4337
4338 if (regs && !perf_exclude_event(event, regs)) {
4339 if (!(event->attr.exclude_idle && current->pid == 0))
4340 if (perf_event_overflow(event, 0, &data, regs))
4341 ret = HRTIMER_NORESTART;
4342 }
4343
4344 period = max_t(u64, 10000, event->hw.sample_period);
4345 hrtimer_forward_now(hrtimer, ns_to_ktime(period));
4346
4347 return ret;
4348} 4549}
4349 4550
4350static void perf_swevent_start_hrtimer(struct perf_event *event) 4551static void perf_swevent_stop(struct perf_event *event, int flags)
4351{ 4552{
4352 struct hw_perf_event *hwc = &event->hw; 4553 event->hw.state = PERF_HES_STOPPED;
4353
4354 hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
4355 hwc->hrtimer.function = perf_swevent_hrtimer;
4356 if (hwc->sample_period) {
4357 u64 period;
4358
4359 if (hwc->remaining) {
4360 if (hwc->remaining < 0)
4361 period = 10000;
4362 else
4363 period = hwc->remaining;
4364 hwc->remaining = 0;
4365 } else {
4366 period = max_t(u64, 10000, hwc->sample_period);
4367 }
4368 __hrtimer_start_range_ns(&hwc->hrtimer,
4369 ns_to_ktime(period), 0,
4370 HRTIMER_MODE_REL, 0);
4371 }
4372}
4373
4374static void perf_swevent_cancel_hrtimer(struct perf_event *event)
4375{
4376 struct hw_perf_event *hwc = &event->hw;
4377
4378 if (hwc->sample_period) {
4379 ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer);
4380 hwc->remaining = ktime_to_ns(remaining);
4381
4382 hrtimer_cancel(&hwc->hrtimer);
4383 }
4384}
4385
4386/*
4387 * Software event: cpu wall time clock
4388 */
4389
4390static void cpu_clock_perf_event_update(struct perf_event *event)
4391{
4392 int cpu = raw_smp_processor_id();
4393 s64 prev;
4394 u64 now;
4395
4396 now = cpu_clock(cpu);
4397 prev = local64_xchg(&event->hw.prev_count, now);
4398 local64_add(now - prev, &event->count);
4399}
4400
4401static int cpu_clock_perf_event_enable(struct perf_event *event)
4402{
4403 struct hw_perf_event *hwc = &event->hw;
4404 int cpu = raw_smp_processor_id();
4405
4406 local64_set(&hwc->prev_count, cpu_clock(cpu));
4407 perf_swevent_start_hrtimer(event);
4408
4409 return 0;
4410}
4411
4412static void cpu_clock_perf_event_disable(struct perf_event *event)
4413{
4414 perf_swevent_cancel_hrtimer(event);
4415 cpu_clock_perf_event_update(event);
4416}
4417
4418static void cpu_clock_perf_event_read(struct perf_event *event)
4419{
4420 cpu_clock_perf_event_update(event);
4421}
4422
4423static const struct pmu perf_ops_cpu_clock = {
4424 .enable = cpu_clock_perf_event_enable,
4425 .disable = cpu_clock_perf_event_disable,
4426 .read = cpu_clock_perf_event_read,
4427};
4428
4429/*
4430 * Software event: task time clock
4431 */
4432
4433static void task_clock_perf_event_update(struct perf_event *event, u64 now)
4434{
4435 u64 prev;
4436 s64 delta;
4437
4438 prev = local64_xchg(&event->hw.prev_count, now);
4439 delta = now - prev;
4440 local64_add(delta, &event->count);
4441}
4442
4443static int task_clock_perf_event_enable(struct perf_event *event)
4444{
4445 struct hw_perf_event *hwc = &event->hw;
4446 u64 now;
4447
4448 now = event->ctx->time;
4449
4450 local64_set(&hwc->prev_count, now);
4451
4452 perf_swevent_start_hrtimer(event);
4453
4454 return 0;
4455}
4456
4457static void task_clock_perf_event_disable(struct perf_event *event)
4458{
4459 perf_swevent_cancel_hrtimer(event);
4460 task_clock_perf_event_update(event, event->ctx->time);
4461
4462}
4463
4464static void task_clock_perf_event_read(struct perf_event *event)
4465{
4466 u64 time;
4467
4468 if (!in_nmi()) {
4469 update_context_time(event->ctx);
4470 time = event->ctx->time;
4471 } else {
4472 u64 now = perf_clock();
4473 u64 delta = now - event->ctx->timestamp;
4474 time = event->ctx->time + delta;
4475 }
4476
4477 task_clock_perf_event_update(event, time);
4478} 4554}
4479 4555
4480static const struct pmu perf_ops_task_clock = {
4481 .enable = task_clock_perf_event_enable,
4482 .disable = task_clock_perf_event_disable,
4483 .read = task_clock_perf_event_read,
4484};
4485
4486/* Deref the hlist from the update side */ 4556/* Deref the hlist from the update side */
4487static inline struct swevent_hlist * 4557static inline struct swevent_hlist *
4488swevent_hlist_deref(struct perf_cpu_context *cpuctx) 4558swevent_hlist_deref(struct swevent_htable *swhash)
4489{ 4559{
4490 return rcu_dereference_protected(cpuctx->swevent_hlist, 4560 return rcu_dereference_protected(swhash->swevent_hlist,
4491 lockdep_is_held(&cpuctx->hlist_mutex)); 4561 lockdep_is_held(&swhash->hlist_mutex));
4492} 4562}
4493 4563
4494static void swevent_hlist_release_rcu(struct rcu_head *rcu_head) 4564static void swevent_hlist_release_rcu(struct rcu_head *rcu_head)
@@ -4499,27 +4569,27 @@ static void swevent_hlist_release_rcu(struct rcu_head *rcu_head)
4499 kfree(hlist); 4569 kfree(hlist);
4500} 4570}
4501 4571
4502static void swevent_hlist_release(struct perf_cpu_context *cpuctx) 4572static void swevent_hlist_release(struct swevent_htable *swhash)
4503{ 4573{
4504 struct swevent_hlist *hlist = swevent_hlist_deref(cpuctx); 4574 struct swevent_hlist *hlist = swevent_hlist_deref(swhash);
4505 4575
4506 if (!hlist) 4576 if (!hlist)
4507 return; 4577 return;
4508 4578
4509 rcu_assign_pointer(cpuctx->swevent_hlist, NULL); 4579 rcu_assign_pointer(swhash->swevent_hlist, NULL);
4510 call_rcu(&hlist->rcu_head, swevent_hlist_release_rcu); 4580 call_rcu(&hlist->rcu_head, swevent_hlist_release_rcu);
4511} 4581}
4512 4582
4513static void swevent_hlist_put_cpu(struct perf_event *event, int cpu) 4583static void swevent_hlist_put_cpu(struct perf_event *event, int cpu)
4514{ 4584{
4515 struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); 4585 struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
4516 4586
4517 mutex_lock(&cpuctx->hlist_mutex); 4587 mutex_lock(&swhash->hlist_mutex);
4518 4588
4519 if (!--cpuctx->hlist_refcount) 4589 if (!--swhash->hlist_refcount)
4520 swevent_hlist_release(cpuctx); 4590 swevent_hlist_release(swhash);
4521 4591
4522 mutex_unlock(&cpuctx->hlist_mutex); 4592 mutex_unlock(&swhash->hlist_mutex);
4523} 4593}
4524 4594
4525static void swevent_hlist_put(struct perf_event *event) 4595static void swevent_hlist_put(struct perf_event *event)
@@ -4537,12 +4607,12 @@ static void swevent_hlist_put(struct perf_event *event)
4537 4607
4538static int swevent_hlist_get_cpu(struct perf_event *event, int cpu) 4608static int swevent_hlist_get_cpu(struct perf_event *event, int cpu)
4539{ 4609{
4540 struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); 4610 struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
4541 int err = 0; 4611 int err = 0;
4542 4612
4543 mutex_lock(&cpuctx->hlist_mutex); 4613 mutex_lock(&swhash->hlist_mutex);
4544 4614
4545 if (!swevent_hlist_deref(cpuctx) && cpu_online(cpu)) { 4615 if (!swevent_hlist_deref(swhash) && cpu_online(cpu)) {
4546 struct swevent_hlist *hlist; 4616 struct swevent_hlist *hlist;
4547 4617
4548 hlist = kzalloc(sizeof(*hlist), GFP_KERNEL); 4618 hlist = kzalloc(sizeof(*hlist), GFP_KERNEL);
@@ -4550,11 +4620,11 @@ static int swevent_hlist_get_cpu(struct perf_event *event, int cpu)
4550 err = -ENOMEM; 4620 err = -ENOMEM;
4551 goto exit; 4621 goto exit;
4552 } 4622 }
4553 rcu_assign_pointer(cpuctx->swevent_hlist, hlist); 4623 rcu_assign_pointer(swhash->swevent_hlist, hlist);
4554 } 4624 }
4555 cpuctx->hlist_refcount++; 4625 swhash->hlist_refcount++;
4556 exit: 4626exit:
4557 mutex_unlock(&cpuctx->hlist_mutex); 4627 mutex_unlock(&swhash->hlist_mutex);
4558 4628
4559 return err; 4629 return err;
4560} 4630}
@@ -4578,7 +4648,7 @@ static int swevent_hlist_get(struct perf_event *event)
4578 put_online_cpus(); 4648 put_online_cpus();
4579 4649
4580 return 0; 4650 return 0;
4581 fail: 4651fail:
4582 for_each_possible_cpu(cpu) { 4652 for_each_possible_cpu(cpu) {
4583 if (cpu == failed_cpu) 4653 if (cpu == failed_cpu)
4584 break; 4654 break;
@@ -4589,17 +4659,64 @@ static int swevent_hlist_get(struct perf_event *event)
4589 return err; 4659 return err;
4590} 4660}
4591 4661
4592#ifdef CONFIG_EVENT_TRACING 4662atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
4663
4664static void sw_perf_event_destroy(struct perf_event *event)
4665{
4666 u64 event_id = event->attr.config;
4667
4668 WARN_ON(event->parent);
4669
4670 jump_label_dec(&perf_swevent_enabled[event_id]);
4671 swevent_hlist_put(event);
4672}
4673
4674static int perf_swevent_init(struct perf_event *event)
4675{
4676 int event_id = event->attr.config;
4677
4678 if (event->attr.type != PERF_TYPE_SOFTWARE)
4679 return -ENOENT;
4680
4681 switch (event_id) {
4682 case PERF_COUNT_SW_CPU_CLOCK:
4683 case PERF_COUNT_SW_TASK_CLOCK:
4684 return -ENOENT;
4593 4685
4594static const struct pmu perf_ops_tracepoint = { 4686 default:
4595 .enable = perf_trace_enable, 4687 break;
4596 .disable = perf_trace_disable, 4688 }
4597 .start = perf_swevent_int, 4689
4598 .stop = perf_swevent_void, 4690 if (event_id > PERF_COUNT_SW_MAX)
4691 return -ENOENT;
4692
4693 if (!event->parent) {
4694 int err;
4695
4696 err = swevent_hlist_get(event);
4697 if (err)
4698 return err;
4699
4700 jump_label_inc(&perf_swevent_enabled[event_id]);
4701 event->destroy = sw_perf_event_destroy;
4702 }
4703
4704 return 0;
4705}
4706
4707static struct pmu perf_swevent = {
4708 .task_ctx_nr = perf_sw_context,
4709
4710 .event_init = perf_swevent_init,
4711 .add = perf_swevent_add,
4712 .del = perf_swevent_del,
4713 .start = perf_swevent_start,
4714 .stop = perf_swevent_stop,
4599 .read = perf_swevent_read, 4715 .read = perf_swevent_read,
4600 .unthrottle = perf_swevent_void,
4601}; 4716};
4602 4717
4718#ifdef CONFIG_EVENT_TRACING
4719
4603static int perf_tp_filter_match(struct perf_event *event, 4720static int perf_tp_filter_match(struct perf_event *event,
4604 struct perf_sample_data *data) 4721 struct perf_sample_data *data)
4605{ 4722{
@@ -4643,7 +4760,7 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
4643 4760
4644 hlist_for_each_entry_rcu(event, node, head, hlist_entry) { 4761 hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
4645 if (perf_tp_event_match(event, &data, regs)) 4762 if (perf_tp_event_match(event, &data, regs))
4646 perf_swevent_add(event, count, 1, &data, regs); 4763 perf_swevent_event(event, count, 1, &data, regs);
4647 } 4764 }
4648 4765
4649 perf_swevent_put_recursion_context(rctx); 4766 perf_swevent_put_recursion_context(rctx);
@@ -4655,10 +4772,13 @@ static void tp_perf_event_destroy(struct perf_event *event)
4655 perf_trace_destroy(event); 4772 perf_trace_destroy(event);
4656} 4773}
4657 4774
4658static const struct pmu *tp_perf_event_init(struct perf_event *event) 4775static int perf_tp_event_init(struct perf_event *event)
4659{ 4776{
4660 int err; 4777 int err;
4661 4778
4779 if (event->attr.type != PERF_TYPE_TRACEPOINT)
4780 return -ENOENT;
4781
4662 /* 4782 /*
4663 * Raw tracepoint data is a severe data leak, only allow root to 4783 * Raw tracepoint data is a severe data leak, only allow root to
4664 * have these. 4784 * have these.
@@ -4666,15 +4786,31 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event)
4666 if ((event->attr.sample_type & PERF_SAMPLE_RAW) && 4786 if ((event->attr.sample_type & PERF_SAMPLE_RAW) &&
4667 perf_paranoid_tracepoint_raw() && 4787 perf_paranoid_tracepoint_raw() &&
4668 !capable(CAP_SYS_ADMIN)) 4788 !capable(CAP_SYS_ADMIN))
4669 return ERR_PTR(-EPERM); 4789 return -EPERM;
4670 4790
4671 err = perf_trace_init(event); 4791 err = perf_trace_init(event);
4672 if (err) 4792 if (err)
4673 return NULL; 4793 return err;
4674 4794
4675 event->destroy = tp_perf_event_destroy; 4795 event->destroy = tp_perf_event_destroy;
4676 4796
4677 return &perf_ops_tracepoint; 4797 return 0;
4798}
4799
4800static struct pmu perf_tracepoint = {
4801 .task_ctx_nr = perf_sw_context,
4802
4803 .event_init = perf_tp_event_init,
4804 .add = perf_trace_add,
4805 .del = perf_trace_del,
4806 .start = perf_swevent_start,
4807 .stop = perf_swevent_stop,
4808 .read = perf_swevent_read,
4809};
4810
4811static inline void perf_tp_register(void)
4812{
4813 perf_pmu_register(&perf_tracepoint);
4678} 4814}
4679 4815
4680static int perf_event_set_filter(struct perf_event *event, void __user *arg) 4816static int perf_event_set_filter(struct perf_event *event, void __user *arg)
@@ -4702,9 +4838,8 @@ static void perf_event_free_filter(struct perf_event *event)
4702 4838
4703#else 4839#else
4704 4840
4705static const struct pmu *tp_perf_event_init(struct perf_event *event) 4841static inline void perf_tp_register(void)
4706{ 4842{
4707 return NULL;
4708} 4843}
4709 4844
4710static int perf_event_set_filter(struct perf_event *event, void __user *arg) 4845static int perf_event_set_filter(struct perf_event *event, void __user *arg)
@@ -4719,105 +4854,389 @@ static void perf_event_free_filter(struct perf_event *event)
4719#endif /* CONFIG_EVENT_TRACING */ 4854#endif /* CONFIG_EVENT_TRACING */
4720 4855
4721#ifdef CONFIG_HAVE_HW_BREAKPOINT 4856#ifdef CONFIG_HAVE_HW_BREAKPOINT
4722static void bp_perf_event_destroy(struct perf_event *event) 4857void perf_bp_event(struct perf_event *bp, void *data)
4723{ 4858{
4724 release_bp_slot(event); 4859 struct perf_sample_data sample;
4860 struct pt_regs *regs = data;
4861
4862 perf_sample_data_init(&sample, bp->attr.bp_addr);
4863
4864 if (!bp->hw.state && !perf_exclude_event(bp, regs))
4865 perf_swevent_event(bp, 1, 1, &sample, regs);
4725} 4866}
4867#endif
4726 4868
4727static const struct pmu *bp_perf_event_init(struct perf_event *bp) 4869/*
4870 * hrtimer based swevent callback
4871 */
4872
4873static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
4728{ 4874{
4729 int err; 4875 enum hrtimer_restart ret = HRTIMER_RESTART;
4876 struct perf_sample_data data;
4877 struct pt_regs *regs;
4878 struct perf_event *event;
4879 u64 period;
4730 4880
4731 err = register_perf_hw_breakpoint(bp); 4881 event = container_of(hrtimer, struct perf_event, hw.hrtimer);
4732 if (err) 4882 event->pmu->read(event);
4733 return ERR_PTR(err); 4883
4884 perf_sample_data_init(&data, 0);
4885 data.period = event->hw.last_period;
4886 regs = get_irq_regs();
4887
4888 if (regs && !perf_exclude_event(event, regs)) {
4889 if (!(event->attr.exclude_idle && current->pid == 0))
4890 if (perf_event_overflow(event, 0, &data, regs))
4891 ret = HRTIMER_NORESTART;
4892 }
4893
4894 period = max_t(u64, 10000, event->hw.sample_period);
4895 hrtimer_forward_now(hrtimer, ns_to_ktime(period));
4734 4896
4735 bp->destroy = bp_perf_event_destroy; 4897 return ret;
4898}
4736 4899
4737 return &perf_ops_bp; 4900static void perf_swevent_start_hrtimer(struct perf_event *event)
4901{
4902 struct hw_perf_event *hwc = &event->hw;
4903
4904 hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
4905 hwc->hrtimer.function = perf_swevent_hrtimer;
4906 if (hwc->sample_period) {
4907 s64 period = local64_read(&hwc->period_left);
4908
4909 if (period) {
4910 if (period < 0)
4911 period = 10000;
4912
4913 local64_set(&hwc->period_left, 0);
4914 } else {
4915 period = max_t(u64, 10000, hwc->sample_period);
4916 }
4917 __hrtimer_start_range_ns(&hwc->hrtimer,
4918 ns_to_ktime(period), 0,
4919 HRTIMER_MODE_REL_PINNED, 0);
4920 }
4738} 4921}
4739 4922
4740void perf_bp_event(struct perf_event *bp, void *data) 4923static void perf_swevent_cancel_hrtimer(struct perf_event *event)
4741{ 4924{
4742 struct perf_sample_data sample; 4925 struct hw_perf_event *hwc = &event->hw;
4743 struct pt_regs *regs = data;
4744 4926
4745 perf_sample_data_init(&sample, bp->attr.bp_addr); 4927 if (hwc->sample_period) {
4928 ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer);
4929 local64_set(&hwc->period_left, ktime_to_ns(remaining));
4746 4930
4747 if (!perf_exclude_event(bp, regs)) 4931 hrtimer_cancel(&hwc->hrtimer);
4748 perf_swevent_add(bp, 1, 1, &sample, regs); 4932 }
4749} 4933}
4750#else 4934
4751static const struct pmu *bp_perf_event_init(struct perf_event *bp) 4935/*
4936 * Software event: cpu wall time clock
4937 */
4938
4939static void cpu_clock_event_update(struct perf_event *event)
4752{ 4940{
4753 return NULL; 4941 s64 prev;
4942 u64 now;
4943
4944 now = local_clock();
4945 prev = local64_xchg(&event->hw.prev_count, now);
4946 local64_add(now - prev, &event->count);
4754} 4947}
4755 4948
4756void perf_bp_event(struct perf_event *bp, void *regs) 4949static void cpu_clock_event_start(struct perf_event *event, int flags)
4757{ 4950{
4951 local64_set(&event->hw.prev_count, local_clock());
4952 perf_swevent_start_hrtimer(event);
4758} 4953}
4759#endif
4760 4954
4761atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; 4955static void cpu_clock_event_stop(struct perf_event *event, int flags)
4956{
4957 perf_swevent_cancel_hrtimer(event);
4958 cpu_clock_event_update(event);
4959}
4762 4960
4763static void sw_perf_event_destroy(struct perf_event *event) 4961static int cpu_clock_event_add(struct perf_event *event, int flags)
4764{ 4962{
4765 u64 event_id = event->attr.config; 4963 if (flags & PERF_EF_START)
4964 cpu_clock_event_start(event, flags);
4766 4965
4767 WARN_ON(event->parent); 4966 return 0;
4967}
4768 4968
4769 atomic_dec(&perf_swevent_enabled[event_id]); 4969static void cpu_clock_event_del(struct perf_event *event, int flags)
4770 swevent_hlist_put(event); 4970{
4971 cpu_clock_event_stop(event, flags);
4771} 4972}
4772 4973
4773static const struct pmu *sw_perf_event_init(struct perf_event *event) 4974static void cpu_clock_event_read(struct perf_event *event)
4774{ 4975{
4775 const struct pmu *pmu = NULL; 4976 cpu_clock_event_update(event);
4776 u64 event_id = event->attr.config; 4977}
4978
4979static int cpu_clock_event_init(struct perf_event *event)
4980{
4981 if (event->attr.type != PERF_TYPE_SOFTWARE)
4982 return -ENOENT;
4983
4984 if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK)
4985 return -ENOENT;
4986
4987 return 0;
4988}
4777 4989
4990static struct pmu perf_cpu_clock = {
4991 .task_ctx_nr = perf_sw_context,
4992
4993 .event_init = cpu_clock_event_init,
4994 .add = cpu_clock_event_add,
4995 .del = cpu_clock_event_del,
4996 .start = cpu_clock_event_start,
4997 .stop = cpu_clock_event_stop,
4998 .read = cpu_clock_event_read,
4999};
5000
5001/*
5002 * Software event: task time clock
5003 */
5004
5005static void task_clock_event_update(struct perf_event *event, u64 now)
5006{
5007 u64 prev;
5008 s64 delta;
5009
5010 prev = local64_xchg(&event->hw.prev_count, now);
5011 delta = now - prev;
5012 local64_add(delta, &event->count);
5013}
5014
5015static void task_clock_event_start(struct perf_event *event, int flags)
5016{
5017 local64_set(&event->hw.prev_count, event->ctx->time);
5018 perf_swevent_start_hrtimer(event);
5019}
5020
5021static void task_clock_event_stop(struct perf_event *event, int flags)
5022{
5023 perf_swevent_cancel_hrtimer(event);
5024 task_clock_event_update(event, event->ctx->time);
5025}
5026
5027static int task_clock_event_add(struct perf_event *event, int flags)
5028{
5029 if (flags & PERF_EF_START)
5030 task_clock_event_start(event, flags);
5031
5032 return 0;
5033}
5034
5035static void task_clock_event_del(struct perf_event *event, int flags)
5036{
5037 task_clock_event_stop(event, PERF_EF_UPDATE);
5038}
5039
5040static void task_clock_event_read(struct perf_event *event)
5041{
5042 u64 time;
5043
5044 if (!in_nmi()) {
5045 update_context_time(event->ctx);
5046 time = event->ctx->time;
5047 } else {
5048 u64 now = perf_clock();
5049 u64 delta = now - event->ctx->timestamp;
5050 time = event->ctx->time + delta;
5051 }
5052
5053 task_clock_event_update(event, time);
5054}
5055
5056static int task_clock_event_init(struct perf_event *event)
5057{
5058 if (event->attr.type != PERF_TYPE_SOFTWARE)
5059 return -ENOENT;
5060
5061 if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK)
5062 return -ENOENT;
5063
5064 return 0;
5065}
5066
5067static struct pmu perf_task_clock = {
5068 .task_ctx_nr = perf_sw_context,
5069
5070 .event_init = task_clock_event_init,
5071 .add = task_clock_event_add,
5072 .del = task_clock_event_del,
5073 .start = task_clock_event_start,
5074 .stop = task_clock_event_stop,
5075 .read = task_clock_event_read,
5076};
5077
5078static void perf_pmu_nop_void(struct pmu *pmu)
5079{
5080}
5081
5082static int perf_pmu_nop_int(struct pmu *pmu)
5083{
5084 return 0;
5085}
5086
5087static void perf_pmu_start_txn(struct pmu *pmu)
5088{
5089 perf_pmu_disable(pmu);
5090}
5091
5092static int perf_pmu_commit_txn(struct pmu *pmu)
5093{
5094 perf_pmu_enable(pmu);
5095 return 0;
5096}
5097
5098static void perf_pmu_cancel_txn(struct pmu *pmu)
5099{
5100 perf_pmu_enable(pmu);
5101}
5102
5103/*
5104 * Ensures all contexts with the same task_ctx_nr have the same
5105 * pmu_cpu_context too.
5106 */
5107static void *find_pmu_context(int ctxn)
5108{
5109 struct pmu *pmu;
5110
5111 if (ctxn < 0)
5112 return NULL;
5113
5114 list_for_each_entry(pmu, &pmus, entry) {
5115 if (pmu->task_ctx_nr == ctxn)
5116 return pmu->pmu_cpu_context;
5117 }
5118
5119 return NULL;
5120}
5121
5122static void free_pmu_context(void * __percpu cpu_context)
5123{
5124 struct pmu *pmu;
5125
5126 mutex_lock(&pmus_lock);
4778 /* 5127 /*
4779 * Software events (currently) can't in general distinguish 5128 * Like a real lame refcount.
4780 * between user, kernel and hypervisor events.
4781 * However, context switches and cpu migrations are considered
4782 * to be kernel events, and page faults are never hypervisor
4783 * events.
4784 */ 5129 */
4785 switch (event_id) { 5130 list_for_each_entry(pmu, &pmus, entry) {
4786 case PERF_COUNT_SW_CPU_CLOCK: 5131 if (pmu->pmu_cpu_context == cpu_context)
4787 pmu = &perf_ops_cpu_clock; 5132 goto out;
5133 }
4788 5134
4789 break; 5135 free_percpu(cpu_context);
4790 case PERF_COUNT_SW_TASK_CLOCK: 5136out:
4791 /* 5137 mutex_unlock(&pmus_lock);
4792 * If the user instantiates this as a per-cpu event, 5138}
4793 * use the cpu_clock event instead.
4794 */
4795 if (event->ctx->task)
4796 pmu = &perf_ops_task_clock;
4797 else
4798 pmu = &perf_ops_cpu_clock;
4799 5139
4800 break; 5140int perf_pmu_register(struct pmu *pmu)
4801 case PERF_COUNT_SW_PAGE_FAULTS: 5141{
4802 case PERF_COUNT_SW_PAGE_FAULTS_MIN: 5142 int cpu, ret;
4803 case PERF_COUNT_SW_PAGE_FAULTS_MAJ: 5143
4804 case PERF_COUNT_SW_CONTEXT_SWITCHES: 5144 mutex_lock(&pmus_lock);
4805 case PERF_COUNT_SW_CPU_MIGRATIONS: 5145 ret = -ENOMEM;
4806 case PERF_COUNT_SW_ALIGNMENT_FAULTS: 5146 pmu->pmu_disable_count = alloc_percpu(int);
4807 case PERF_COUNT_SW_EMULATION_FAULTS: 5147 if (!pmu->pmu_disable_count)
4808 if (!event->parent) { 5148 goto unlock;
4809 int err;
4810
4811 err = swevent_hlist_get(event);
4812 if (err)
4813 return ERR_PTR(err);
4814 5149
4815 atomic_inc(&perf_swevent_enabled[event_id]); 5150 pmu->pmu_cpu_context = find_pmu_context(pmu->task_ctx_nr);
4816 event->destroy = sw_perf_event_destroy; 5151 if (pmu->pmu_cpu_context)
5152 goto got_cpu_context;
5153
5154 pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context);
5155 if (!pmu->pmu_cpu_context)
5156 goto free_pdc;
5157
5158 for_each_possible_cpu(cpu) {
5159 struct perf_cpu_context *cpuctx;
5160
5161 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
5162 __perf_event_init_context(&cpuctx->ctx);
5163 cpuctx->ctx.type = cpu_context;
5164 cpuctx->ctx.pmu = pmu;
5165 cpuctx->jiffies_interval = 1;
5166 INIT_LIST_HEAD(&cpuctx->rotation_list);
5167 }
5168
5169got_cpu_context:
5170 if (!pmu->start_txn) {
5171 if (pmu->pmu_enable) {
5172 /*
5173 * If we have pmu_enable/pmu_disable calls, install
5174 * transaction stubs that use that to try and batch
5175 * hardware accesses.
5176 */
5177 pmu->start_txn = perf_pmu_start_txn;
5178 pmu->commit_txn = perf_pmu_commit_txn;
5179 pmu->cancel_txn = perf_pmu_cancel_txn;
5180 } else {
5181 pmu->start_txn = perf_pmu_nop_void;
5182 pmu->commit_txn = perf_pmu_nop_int;
5183 pmu->cancel_txn = perf_pmu_nop_void;
5184 }
5185 }
5186
5187 if (!pmu->pmu_enable) {
5188 pmu->pmu_enable = perf_pmu_nop_void;
5189 pmu->pmu_disable = perf_pmu_nop_void;
5190 }
5191
5192 list_add_rcu(&pmu->entry, &pmus);
5193 ret = 0;
5194unlock:
5195 mutex_unlock(&pmus_lock);
5196
5197 return ret;
5198
5199free_pdc:
5200 free_percpu(pmu->pmu_disable_count);
5201 goto unlock;
5202}
5203
5204void perf_pmu_unregister(struct pmu *pmu)
5205{
5206 mutex_lock(&pmus_lock);
5207 list_del_rcu(&pmu->entry);
5208 mutex_unlock(&pmus_lock);
5209
5210 /*
5211 * We dereference the pmu list under both SRCU and regular RCU, so
5212 * synchronize against both of those.
5213 */
5214 synchronize_srcu(&pmus_srcu);
5215 synchronize_rcu();
5216
5217 free_percpu(pmu->pmu_disable_count);
5218 free_pmu_context(pmu->pmu_cpu_context);
5219}
5220
5221struct pmu *perf_init_event(struct perf_event *event)
5222{
5223 struct pmu *pmu = NULL;
5224 int idx;
5225
5226 idx = srcu_read_lock(&pmus_srcu);
5227 list_for_each_entry_rcu(pmu, &pmus, entry) {
5228 int ret = pmu->event_init(event);
5229 if (!ret)
5230 goto unlock;
5231
5232 if (ret != -ENOENT) {
5233 pmu = ERR_PTR(ret);
5234 goto unlock;
4817 } 5235 }
4818 pmu = &perf_ops_generic;
4819 break;
4820 } 5236 }
5237 pmu = ERR_PTR(-ENOENT);
5238unlock:
5239 srcu_read_unlock(&pmus_srcu, idx);
4821 5240
4822 return pmu; 5241 return pmu;
4823} 5242}
@@ -4826,20 +5245,18 @@ static const struct pmu *sw_perf_event_init(struct perf_event *event)
4826 * Allocate and initialize a event structure 5245 * Allocate and initialize a event structure
4827 */ 5246 */
4828static struct perf_event * 5247static struct perf_event *
4829perf_event_alloc(struct perf_event_attr *attr, 5248perf_event_alloc(struct perf_event_attr *attr, int cpu,
4830 int cpu, 5249 struct task_struct *task,
4831 struct perf_event_context *ctx, 5250 struct perf_event *group_leader,
4832 struct perf_event *group_leader, 5251 struct perf_event *parent_event,
4833 struct perf_event *parent_event, 5252 perf_overflow_handler_t overflow_handler)
4834 perf_overflow_handler_t overflow_handler, 5253{
4835 gfp_t gfpflags) 5254 struct pmu *pmu;
4836{
4837 const struct pmu *pmu;
4838 struct perf_event *event; 5255 struct perf_event *event;
4839 struct hw_perf_event *hwc; 5256 struct hw_perf_event *hwc;
4840 long err; 5257 long err;
4841 5258
4842 event = kzalloc(sizeof(*event), gfpflags); 5259 event = kzalloc(sizeof(*event), GFP_KERNEL);
4843 if (!event) 5260 if (!event)
4844 return ERR_PTR(-ENOMEM); 5261 return ERR_PTR(-ENOMEM);
4845 5262
@@ -4857,6 +5274,7 @@ perf_event_alloc(struct perf_event_attr *attr,
4857 INIT_LIST_HEAD(&event->event_entry); 5274 INIT_LIST_HEAD(&event->event_entry);
4858 INIT_LIST_HEAD(&event->sibling_list); 5275 INIT_LIST_HEAD(&event->sibling_list);
4859 init_waitqueue_head(&event->waitq); 5276 init_waitqueue_head(&event->waitq);
5277 init_irq_work(&event->pending, perf_pending_event);
4860 5278
4861 mutex_init(&event->mmap_mutex); 5279 mutex_init(&event->mmap_mutex);
4862 5280
@@ -4864,7 +5282,6 @@ perf_event_alloc(struct perf_event_attr *attr,
4864 event->attr = *attr; 5282 event->attr = *attr;
4865 event->group_leader = group_leader; 5283 event->group_leader = group_leader;
4866 event->pmu = NULL; 5284 event->pmu = NULL;
4867 event->ctx = ctx;
4868 event->oncpu = -1; 5285 event->oncpu = -1;
4869 5286
4870 event->parent = parent_event; 5287 event->parent = parent_event;
@@ -4874,6 +5291,17 @@ perf_event_alloc(struct perf_event_attr *attr,
4874 5291
4875 event->state = PERF_EVENT_STATE_INACTIVE; 5292 event->state = PERF_EVENT_STATE_INACTIVE;
4876 5293
5294 if (task) {
5295 event->attach_state = PERF_ATTACH_TASK;
5296#ifdef CONFIG_HAVE_HW_BREAKPOINT
5297 /*
5298 * hw_breakpoint is a bit difficult here..
5299 */
5300 if (attr->type == PERF_TYPE_BREAKPOINT)
5301 event->hw.bp_target = task;
5302#endif
5303 }
5304
4877 if (!overflow_handler && parent_event) 5305 if (!overflow_handler && parent_event)
4878 overflow_handler = parent_event->overflow_handler; 5306 overflow_handler = parent_event->overflow_handler;
4879 5307
@@ -4898,29 +5326,8 @@ perf_event_alloc(struct perf_event_attr *attr,
4898 if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP)) 5326 if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP))
4899 goto done; 5327 goto done;
4900 5328
4901 switch (attr->type) { 5329 pmu = perf_init_event(event);
4902 case PERF_TYPE_RAW:
4903 case PERF_TYPE_HARDWARE:
4904 case PERF_TYPE_HW_CACHE:
4905 pmu = hw_perf_event_init(event);
4906 break;
4907
4908 case PERF_TYPE_SOFTWARE:
4909 pmu = sw_perf_event_init(event);
4910 break;
4911
4912 case PERF_TYPE_TRACEPOINT:
4913 pmu = tp_perf_event_init(event);
4914 break;
4915 5330
4916 case PERF_TYPE_BREAKPOINT:
4917 pmu = bp_perf_event_init(event);
4918 break;
4919
4920
4921 default:
4922 break;
4923 }
4924done: 5331done:
4925 err = 0; 5332 err = 0;
4926 if (!pmu) 5333 if (!pmu)
@@ -4938,13 +5345,21 @@ done:
4938 event->pmu = pmu; 5345 event->pmu = pmu;
4939 5346
4940 if (!event->parent) { 5347 if (!event->parent) {
4941 atomic_inc(&nr_events); 5348 if (event->attach_state & PERF_ATTACH_TASK)
5349 jump_label_inc(&perf_task_events);
4942 if (event->attr.mmap || event->attr.mmap_data) 5350 if (event->attr.mmap || event->attr.mmap_data)
4943 atomic_inc(&nr_mmap_events); 5351 atomic_inc(&nr_mmap_events);
4944 if (event->attr.comm) 5352 if (event->attr.comm)
4945 atomic_inc(&nr_comm_events); 5353 atomic_inc(&nr_comm_events);
4946 if (event->attr.task) 5354 if (event->attr.task)
4947 atomic_inc(&nr_task_events); 5355 atomic_inc(&nr_task_events);
5356 if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
5357 err = get_callchain_buffers();
5358 if (err) {
5359 free_event(event);
5360 return ERR_PTR(err);
5361 }
5362 }
4948 } 5363 }
4949 5364
4950 return event; 5365 return event;
@@ -5092,12 +5507,16 @@ SYSCALL_DEFINE5(perf_event_open,
5092 struct perf_event_attr __user *, attr_uptr, 5507 struct perf_event_attr __user *, attr_uptr,
5093 pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) 5508 pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
5094{ 5509{
5095 struct perf_event *event, *group_leader = NULL, *output_event = NULL; 5510 struct perf_event *group_leader = NULL, *output_event = NULL;
5511 struct perf_event *event, *sibling;
5096 struct perf_event_attr attr; 5512 struct perf_event_attr attr;
5097 struct perf_event_context *ctx; 5513 struct perf_event_context *ctx;
5098 struct file *event_file = NULL; 5514 struct file *event_file = NULL;
5099 struct file *group_file = NULL; 5515 struct file *group_file = NULL;
5516 struct task_struct *task = NULL;
5517 struct pmu *pmu;
5100 int event_fd; 5518 int event_fd;
5519 int move_group = 0;
5101 int fput_needed = 0; 5520 int fput_needed = 0;
5102 int err; 5521 int err;
5103 5522
@@ -5123,20 +5542,11 @@ SYSCALL_DEFINE5(perf_event_open,
5123 if (event_fd < 0) 5542 if (event_fd < 0)
5124 return event_fd; 5543 return event_fd;
5125 5544
5126 /*
5127 * Get the target context (task or percpu):
5128 */
5129 ctx = find_get_context(pid, cpu);
5130 if (IS_ERR(ctx)) {
5131 err = PTR_ERR(ctx);
5132 goto err_fd;
5133 }
5134
5135 if (group_fd != -1) { 5545 if (group_fd != -1) {
5136 group_leader = perf_fget_light(group_fd, &fput_needed); 5546 group_leader = perf_fget_light(group_fd, &fput_needed);
5137 if (IS_ERR(group_leader)) { 5547 if (IS_ERR(group_leader)) {
5138 err = PTR_ERR(group_leader); 5548 err = PTR_ERR(group_leader);
5139 goto err_put_context; 5549 goto err_fd;
5140 } 5550 }
5141 group_file = group_leader->filp; 5551 group_file = group_leader->filp;
5142 if (flags & PERF_FLAG_FD_OUTPUT) 5552 if (flags & PERF_FLAG_FD_OUTPUT)
@@ -5145,6 +5555,58 @@ SYSCALL_DEFINE5(perf_event_open,
5145 group_leader = NULL; 5555 group_leader = NULL;
5146 } 5556 }
5147 5557
5558 if (pid != -1) {
5559 task = find_lively_task_by_vpid(pid);
5560 if (IS_ERR(task)) {
5561 err = PTR_ERR(task);
5562 goto err_group_fd;
5563 }
5564 }
5565
5566 event = perf_event_alloc(&attr, cpu, task, group_leader, NULL, NULL);
5567 if (IS_ERR(event)) {
5568 err = PTR_ERR(event);
5569 goto err_task;
5570 }
5571
5572 /*
5573 * Special case software events and allow them to be part of
5574 * any hardware group.
5575 */
5576 pmu = event->pmu;
5577
5578 if (group_leader &&
5579 (is_software_event(event) != is_software_event(group_leader))) {
5580 if (is_software_event(event)) {
5581 /*
5582 * If event and group_leader are not both a software
5583 * event, and event is, then group leader is not.
5584 *
5585 * Allow the addition of software events to !software
5586 * groups, this is safe because software events never
5587 * fail to schedule.
5588 */
5589 pmu = group_leader->pmu;
5590 } else if (is_software_event(group_leader) &&
5591 (group_leader->group_flags & PERF_GROUP_SOFTWARE)) {
5592 /*
5593 * In case the group is a pure software group, and we
5594 * try to add a hardware event, move the whole group to
5595 * the hardware context.
5596 */
5597 move_group = 1;
5598 }
5599 }
5600
5601 /*
5602 * Get the target context (task or percpu):
5603 */
5604 ctx = find_get_context(pmu, task, cpu);
5605 if (IS_ERR(ctx)) {
5606 err = PTR_ERR(ctx);
5607 goto err_alloc;
5608 }
5609
5148 /* 5610 /*
5149 * Look up the group leader (we will attach this event to it): 5611 * Look up the group leader (we will attach this event to it):
5150 */ 5612 */
@@ -5156,42 +5618,66 @@ SYSCALL_DEFINE5(perf_event_open,
5156 * becoming part of another group-sibling): 5618 * becoming part of another group-sibling):
5157 */ 5619 */
5158 if (group_leader->group_leader != group_leader) 5620 if (group_leader->group_leader != group_leader)
5159 goto err_put_context; 5621 goto err_context;
5160 /* 5622 /*
5161 * Do not allow to attach to a group in a different 5623 * Do not allow to attach to a group in a different
5162 * task or CPU context: 5624 * task or CPU context:
5163 */ 5625 */
5164 if (group_leader->ctx != ctx) 5626 if (move_group) {
5165 goto err_put_context; 5627 if (group_leader->ctx->type != ctx->type)
5628 goto err_context;
5629 } else {
5630 if (group_leader->ctx != ctx)
5631 goto err_context;
5632 }
5633
5166 /* 5634 /*
5167 * Only a group leader can be exclusive or pinned 5635 * Only a group leader can be exclusive or pinned
5168 */ 5636 */
5169 if (attr.exclusive || attr.pinned) 5637 if (attr.exclusive || attr.pinned)
5170 goto err_put_context; 5638 goto err_context;
5171 }
5172
5173 event = perf_event_alloc(&attr, cpu, ctx, group_leader,
5174 NULL, NULL, GFP_KERNEL);
5175 if (IS_ERR(event)) {
5176 err = PTR_ERR(event);
5177 goto err_put_context;
5178 } 5639 }
5179 5640
5180 if (output_event) { 5641 if (output_event) {
5181 err = perf_event_set_output(event, output_event); 5642 err = perf_event_set_output(event, output_event);
5182 if (err) 5643 if (err)
5183 goto err_free_put_context; 5644 goto err_context;
5184 } 5645 }
5185 5646
5186 event_file = anon_inode_getfile("[perf_event]", &perf_fops, event, O_RDWR); 5647 event_file = anon_inode_getfile("[perf_event]", &perf_fops, event, O_RDWR);
5187 if (IS_ERR(event_file)) { 5648 if (IS_ERR(event_file)) {
5188 err = PTR_ERR(event_file); 5649 err = PTR_ERR(event_file);
5189 goto err_free_put_context; 5650 goto err_context;
5651 }
5652
5653 if (move_group) {
5654 struct perf_event_context *gctx = group_leader->ctx;
5655
5656 mutex_lock(&gctx->mutex);
5657 perf_event_remove_from_context(group_leader);
5658 list_for_each_entry(sibling, &group_leader->sibling_list,
5659 group_entry) {
5660 perf_event_remove_from_context(sibling);
5661 put_ctx(gctx);
5662 }
5663 mutex_unlock(&gctx->mutex);
5664 put_ctx(gctx);
5190 } 5665 }
5191 5666
5192 event->filp = event_file; 5667 event->filp = event_file;
5193 WARN_ON_ONCE(ctx->parent_ctx); 5668 WARN_ON_ONCE(ctx->parent_ctx);
5194 mutex_lock(&ctx->mutex); 5669 mutex_lock(&ctx->mutex);
5670
5671 if (move_group) {
5672 perf_install_in_context(ctx, group_leader, cpu);
5673 get_ctx(ctx);
5674 list_for_each_entry(sibling, &group_leader->sibling_list,
5675 group_entry) {
5676 perf_install_in_context(ctx, sibling, cpu);
5677 get_ctx(ctx);
5678 }
5679 }
5680
5195 perf_install_in_context(ctx, event, cpu); 5681 perf_install_in_context(ctx, event, cpu);
5196 ++ctx->generation; 5682 ++ctx->generation;
5197 mutex_unlock(&ctx->mutex); 5683 mutex_unlock(&ctx->mutex);
@@ -5212,11 +5698,15 @@ SYSCALL_DEFINE5(perf_event_open,
5212 fd_install(event_fd, event_file); 5698 fd_install(event_fd, event_file);
5213 return event_fd; 5699 return event_fd;
5214 5700
5215err_free_put_context: 5701err_context:
5702 put_ctx(ctx);
5703err_alloc:
5216 free_event(event); 5704 free_event(event);
5217err_put_context: 5705err_task:
5706 if (task)
5707 put_task_struct(task);
5708err_group_fd:
5218 fput_light(group_file, fput_needed); 5709 fput_light(group_file, fput_needed);
5219 put_ctx(ctx);
5220err_fd: 5710err_fd:
5221 put_unused_fd(event_fd); 5711 put_unused_fd(event_fd);
5222 return err; 5712 return err;
@@ -5227,32 +5717,31 @@ err_fd:
5227 * 5717 *
5228 * @attr: attributes of the counter to create 5718 * @attr: attributes of the counter to create
5229 * @cpu: cpu in which the counter is bound 5719 * @cpu: cpu in which the counter is bound
5230 * @pid: task to profile 5720 * @task: task to profile (NULL for percpu)
5231 */ 5721 */
5232struct perf_event * 5722struct perf_event *
5233perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, 5723perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
5234 pid_t pid, 5724 struct task_struct *task,
5235 perf_overflow_handler_t overflow_handler) 5725 perf_overflow_handler_t overflow_handler)
5236{ 5726{
5237 struct perf_event *event;
5238 struct perf_event_context *ctx; 5727 struct perf_event_context *ctx;
5728 struct perf_event *event;
5239 int err; 5729 int err;
5240 5730
5241 /* 5731 /*
5242 * Get the target context (task or percpu): 5732 * Get the target context (task or percpu):
5243 */ 5733 */
5244 5734
5245 ctx = find_get_context(pid, cpu); 5735 event = perf_event_alloc(attr, cpu, task, NULL, NULL, overflow_handler);
5246 if (IS_ERR(ctx)) {
5247 err = PTR_ERR(ctx);
5248 goto err_exit;
5249 }
5250
5251 event = perf_event_alloc(attr, cpu, ctx, NULL,
5252 NULL, overflow_handler, GFP_KERNEL);
5253 if (IS_ERR(event)) { 5736 if (IS_ERR(event)) {
5254 err = PTR_ERR(event); 5737 err = PTR_ERR(event);
5255 goto err_put_context; 5738 goto err;
5739 }
5740
5741 ctx = find_get_context(event->pmu, task, cpu);
5742 if (IS_ERR(ctx)) {
5743 err = PTR_ERR(ctx);
5744 goto err_free;
5256 } 5745 }
5257 5746
5258 event->filp = NULL; 5747 event->filp = NULL;
@@ -5270,112 +5759,13 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
5270 5759
5271 return event; 5760 return event;
5272 5761
5273 err_put_context: 5762err_free:
5274 put_ctx(ctx); 5763 free_event(event);
5275 err_exit: 5764err:
5276 return ERR_PTR(err); 5765 return ERR_PTR(err);
5277} 5766}
5278EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter); 5767EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);
5279 5768
5280/*
5281 * inherit a event from parent task to child task:
5282 */
5283static struct perf_event *
5284inherit_event(struct perf_event *parent_event,
5285 struct task_struct *parent,
5286 struct perf_event_context *parent_ctx,
5287 struct task_struct *child,
5288 struct perf_event *group_leader,
5289 struct perf_event_context *child_ctx)
5290{
5291 struct perf_event *child_event;
5292
5293 /*
5294 * Instead of creating recursive hierarchies of events,
5295 * we link inherited events back to the original parent,
5296 * which has a filp for sure, which we use as the reference
5297 * count:
5298 */
5299 if (parent_event->parent)
5300 parent_event = parent_event->parent;
5301
5302 child_event = perf_event_alloc(&parent_event->attr,
5303 parent_event->cpu, child_ctx,
5304 group_leader, parent_event,
5305 NULL, GFP_KERNEL);
5306 if (IS_ERR(child_event))
5307 return child_event;
5308 get_ctx(child_ctx);
5309
5310 /*
5311 * Make the child state follow the state of the parent event,
5312 * not its attr.disabled bit. We hold the parent's mutex,
5313 * so we won't race with perf_event_{en, dis}able_family.
5314 */
5315 if (parent_event->state >= PERF_EVENT_STATE_INACTIVE)
5316 child_event->state = PERF_EVENT_STATE_INACTIVE;
5317 else
5318 child_event->state = PERF_EVENT_STATE_OFF;
5319
5320 if (parent_event->attr.freq) {
5321 u64 sample_period = parent_event->hw.sample_period;
5322 struct hw_perf_event *hwc = &child_event->hw;
5323
5324 hwc->sample_period = sample_period;
5325 hwc->last_period = sample_period;
5326
5327 local64_set(&hwc->period_left, sample_period);
5328 }
5329
5330 child_event->overflow_handler = parent_event->overflow_handler;
5331
5332 /*
5333 * Link it up in the child's context:
5334 */
5335 add_event_to_ctx(child_event, child_ctx);
5336
5337 /*
5338 * Get a reference to the parent filp - we will fput it
5339 * when the child event exits. This is safe to do because
5340 * we are in the parent and we know that the filp still
5341 * exists and has a nonzero count:
5342 */
5343 atomic_long_inc(&parent_event->filp->f_count);
5344
5345 /*
5346 * Link this into the parent event's child list
5347 */
5348 WARN_ON_ONCE(parent_event->ctx->parent_ctx);
5349 mutex_lock(&parent_event->child_mutex);
5350 list_add_tail(&child_event->child_list, &parent_event->child_list);
5351 mutex_unlock(&parent_event->child_mutex);
5352
5353 return child_event;
5354}
5355
5356static int inherit_group(struct perf_event *parent_event,
5357 struct task_struct *parent,
5358 struct perf_event_context *parent_ctx,
5359 struct task_struct *child,
5360 struct perf_event_context *child_ctx)
5361{
5362 struct perf_event *leader;
5363 struct perf_event *sub;
5364 struct perf_event *child_ctr;
5365
5366 leader = inherit_event(parent_event, parent, parent_ctx,
5367 child, NULL, child_ctx);
5368 if (IS_ERR(leader))
5369 return PTR_ERR(leader);
5370 list_for_each_entry(sub, &parent_event->sibling_list, group_entry) {
5371 child_ctr = inherit_event(sub, parent, parent_ctx,
5372 child, leader, child_ctx);
5373 if (IS_ERR(child_ctr))
5374 return PTR_ERR(child_ctr);
5375 }
5376 return 0;
5377}
5378
5379static void sync_child_event(struct perf_event *child_event, 5769static void sync_child_event(struct perf_event *child_event,
5380 struct task_struct *child) 5770 struct task_struct *child)
5381{ 5771{
@@ -5432,16 +5822,13 @@ __perf_event_exit_task(struct perf_event *child_event,
5432 } 5822 }
5433} 5823}
5434 5824
5435/* 5825static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
5436 * When a child task exits, feed back event values to parent events.
5437 */
5438void perf_event_exit_task(struct task_struct *child)
5439{ 5826{
5440 struct perf_event *child_event, *tmp; 5827 struct perf_event *child_event, *tmp;
5441 struct perf_event_context *child_ctx; 5828 struct perf_event_context *child_ctx;
5442 unsigned long flags; 5829 unsigned long flags;
5443 5830
5444 if (likely(!child->perf_event_ctxp)) { 5831 if (likely(!child->perf_event_ctxp[ctxn])) {
5445 perf_event_task(child, NULL, 0); 5832 perf_event_task(child, NULL, 0);
5446 return; 5833 return;
5447 } 5834 }
@@ -5453,8 +5840,8 @@ void perf_event_exit_task(struct task_struct *child)
5453 * scheduled, so we are now safe from rescheduling changing 5840 * scheduled, so we are now safe from rescheduling changing
5454 * our context. 5841 * our context.
5455 */ 5842 */
5456 child_ctx = child->perf_event_ctxp; 5843 child_ctx = child->perf_event_ctxp[ctxn];
5457 __perf_event_task_sched_out(child_ctx); 5844 task_ctx_sched_out(child_ctx, EVENT_ALL);
5458 5845
5459 /* 5846 /*
5460 * Take the context lock here so that if find_get_context is 5847 * Take the context lock here so that if find_get_context is
@@ -5462,7 +5849,7 @@ void perf_event_exit_task(struct task_struct *child)
5462 * incremented the context's refcount before we do put_ctx below. 5849 * incremented the context's refcount before we do put_ctx below.
5463 */ 5850 */
5464 raw_spin_lock(&child_ctx->lock); 5851 raw_spin_lock(&child_ctx->lock);
5465 child->perf_event_ctxp = NULL; 5852 child->perf_event_ctxp[ctxn] = NULL;
5466 /* 5853 /*
5467 * If this context is a clone; unclone it so it can't get 5854 * If this context is a clone; unclone it so it can't get
5468 * swapped to another process while we're removing all 5855 * swapped to another process while we're removing all
@@ -5515,6 +5902,17 @@ again:
5515 put_ctx(child_ctx); 5902 put_ctx(child_ctx);
5516} 5903}
5517 5904
5905/*
5906 * When a child task exits, feed back event values to parent events.
5907 */
5908void perf_event_exit_task(struct task_struct *child)
5909{
5910 int ctxn;
5911
5912 for_each_task_context_nr(ctxn)
5913 perf_event_exit_task_context(child, ctxn);
5914}
5915
5518static void perf_free_event(struct perf_event *event, 5916static void perf_free_event(struct perf_event *event,
5519 struct perf_event_context *ctx) 5917 struct perf_event_context *ctx)
5520{ 5918{
@@ -5536,48 +5934,166 @@ static void perf_free_event(struct perf_event *event,
5536 5934
5537/* 5935/*
5538 * free an unexposed, unused context as created by inheritance by 5936 * free an unexposed, unused context as created by inheritance by
5539 * init_task below, used by fork() in case of fail. 5937 * perf_event_init_task below, used by fork() in case of fail.
5540 */ 5938 */
5541void perf_event_free_task(struct task_struct *task) 5939void perf_event_free_task(struct task_struct *task)
5542{ 5940{
5543 struct perf_event_context *ctx = task->perf_event_ctxp; 5941 struct perf_event_context *ctx;
5544 struct perf_event *event, *tmp; 5942 struct perf_event *event, *tmp;
5943 int ctxn;
5545 5944
5546 if (!ctx) 5945 for_each_task_context_nr(ctxn) {
5547 return; 5946 ctx = task->perf_event_ctxp[ctxn];
5947 if (!ctx)
5948 continue;
5548 5949
5549 mutex_lock(&ctx->mutex); 5950 mutex_lock(&ctx->mutex);
5550again: 5951again:
5551 list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) 5952 list_for_each_entry_safe(event, tmp, &ctx->pinned_groups,
5552 perf_free_event(event, ctx); 5953 group_entry)
5954 perf_free_event(event, ctx);
5553 5955
5554 list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, 5956 list_for_each_entry_safe(event, tmp, &ctx->flexible_groups,
5555 group_entry) 5957 group_entry)
5556 perf_free_event(event, ctx); 5958 perf_free_event(event, ctx);
5557 5959
5558 if (!list_empty(&ctx->pinned_groups) || 5960 if (!list_empty(&ctx->pinned_groups) ||
5559 !list_empty(&ctx->flexible_groups)) 5961 !list_empty(&ctx->flexible_groups))
5560 goto again; 5962 goto again;
5561 5963
5562 mutex_unlock(&ctx->mutex); 5964 mutex_unlock(&ctx->mutex);
5563 5965
5564 put_ctx(ctx); 5966 put_ctx(ctx);
5967 }
5968}
5969
5970void perf_event_delayed_put(struct task_struct *task)
5971{
5972 int ctxn;
5973
5974 for_each_task_context_nr(ctxn)
5975 WARN_ON_ONCE(task->perf_event_ctxp[ctxn]);
5976}
5977
5978/*
5979 * inherit a event from parent task to child task:
5980 */
5981static struct perf_event *
5982inherit_event(struct perf_event *parent_event,
5983 struct task_struct *parent,
5984 struct perf_event_context *parent_ctx,
5985 struct task_struct *child,
5986 struct perf_event *group_leader,
5987 struct perf_event_context *child_ctx)
5988{
5989 struct perf_event *child_event;
5990 unsigned long flags;
5991
5992 /*
5993 * Instead of creating recursive hierarchies of events,
5994 * we link inherited events back to the original parent,
5995 * which has a filp for sure, which we use as the reference
5996 * count:
5997 */
5998 if (parent_event->parent)
5999 parent_event = parent_event->parent;
6000
6001 child_event = perf_event_alloc(&parent_event->attr,
6002 parent_event->cpu,
6003 child,
6004 group_leader, parent_event,
6005 NULL);
6006 if (IS_ERR(child_event))
6007 return child_event;
6008 get_ctx(child_ctx);
6009
6010 /*
6011 * Make the child state follow the state of the parent event,
6012 * not its attr.disabled bit. We hold the parent's mutex,
6013 * so we won't race with perf_event_{en, dis}able_family.
6014 */
6015 if (parent_event->state >= PERF_EVENT_STATE_INACTIVE)
6016 child_event->state = PERF_EVENT_STATE_INACTIVE;
6017 else
6018 child_event->state = PERF_EVENT_STATE_OFF;
6019
6020 if (parent_event->attr.freq) {
6021 u64 sample_period = parent_event->hw.sample_period;
6022 struct hw_perf_event *hwc = &child_event->hw;
6023
6024 hwc->sample_period = sample_period;
6025 hwc->last_period = sample_period;
6026
6027 local64_set(&hwc->period_left, sample_period);
6028 }
6029
6030 child_event->ctx = child_ctx;
6031 child_event->overflow_handler = parent_event->overflow_handler;
6032
6033 /*
6034 * Link it up in the child's context:
6035 */
6036 raw_spin_lock_irqsave(&child_ctx->lock, flags);
6037 add_event_to_ctx(child_event, child_ctx);
6038 raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
6039
6040 /*
6041 * Get a reference to the parent filp - we will fput it
6042 * when the child event exits. This is safe to do because
6043 * we are in the parent and we know that the filp still
6044 * exists and has a nonzero count:
6045 */
6046 atomic_long_inc(&parent_event->filp->f_count);
6047
6048 /*
6049 * Link this into the parent event's child list
6050 */
6051 WARN_ON_ONCE(parent_event->ctx->parent_ctx);
6052 mutex_lock(&parent_event->child_mutex);
6053 list_add_tail(&child_event->child_list, &parent_event->child_list);
6054 mutex_unlock(&parent_event->child_mutex);
6055
6056 return child_event;
6057}
6058
6059static int inherit_group(struct perf_event *parent_event,
6060 struct task_struct *parent,
6061 struct perf_event_context *parent_ctx,
6062 struct task_struct *child,
6063 struct perf_event_context *child_ctx)
6064{
6065 struct perf_event *leader;
6066 struct perf_event *sub;
6067 struct perf_event *child_ctr;
6068
6069 leader = inherit_event(parent_event, parent, parent_ctx,
6070 child, NULL, child_ctx);
6071 if (IS_ERR(leader))
6072 return PTR_ERR(leader);
6073 list_for_each_entry(sub, &parent_event->sibling_list, group_entry) {
6074 child_ctr = inherit_event(sub, parent, parent_ctx,
6075 child, leader, child_ctx);
6076 if (IS_ERR(child_ctr))
6077 return PTR_ERR(child_ctr);
6078 }
6079 return 0;
5565} 6080}
5566 6081
5567static int 6082static int
5568inherit_task_group(struct perf_event *event, struct task_struct *parent, 6083inherit_task_group(struct perf_event *event, struct task_struct *parent,
5569 struct perf_event_context *parent_ctx, 6084 struct perf_event_context *parent_ctx,
5570 struct task_struct *child, 6085 struct task_struct *child, int ctxn,
5571 int *inherited_all) 6086 int *inherited_all)
5572{ 6087{
5573 int ret; 6088 int ret;
5574 struct perf_event_context *child_ctx = child->perf_event_ctxp; 6089 struct perf_event_context *child_ctx;
5575 6090
5576 if (!event->attr.inherit) { 6091 if (!event->attr.inherit) {
5577 *inherited_all = 0; 6092 *inherited_all = 0;
5578 return 0; 6093 return 0;
5579 } 6094 }
5580 6095
6096 child_ctx = child->perf_event_ctxp[ctxn];
5581 if (!child_ctx) { 6097 if (!child_ctx) {
5582 /* 6098 /*
5583 * This is executed from the parent task context, so 6099 * This is executed from the parent task context, so
@@ -5586,14 +6102,11 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent,
5586 * child. 6102 * child.
5587 */ 6103 */
5588 6104
5589 child_ctx = kzalloc(sizeof(struct perf_event_context), 6105 child_ctx = alloc_perf_context(event->pmu, child);
5590 GFP_KERNEL);
5591 if (!child_ctx) 6106 if (!child_ctx)
5592 return -ENOMEM; 6107 return -ENOMEM;
5593 6108
5594 __perf_event_init_context(child_ctx, child); 6109 child->perf_event_ctxp[ctxn] = child_ctx;
5595 child->perf_event_ctxp = child_ctx;
5596 get_task_struct(child);
5597 } 6110 }
5598 6111
5599 ret = inherit_group(event, parent, parent_ctx, 6112 ret = inherit_group(event, parent, parent_ctx,
@@ -5605,11 +6118,10 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent,
5605 return ret; 6118 return ret;
5606} 6119}
5607 6120
5608
5609/* 6121/*
5610 * Initialize the perf_event context in task_struct 6122 * Initialize the perf_event context in task_struct
5611 */ 6123 */
5612int perf_event_init_task(struct task_struct *child) 6124int perf_event_init_context(struct task_struct *child, int ctxn)
5613{ 6125{
5614 struct perf_event_context *child_ctx, *parent_ctx; 6126 struct perf_event_context *child_ctx, *parent_ctx;
5615 struct perf_event_context *cloned_ctx; 6127 struct perf_event_context *cloned_ctx;
@@ -5618,19 +6130,19 @@ int perf_event_init_task(struct task_struct *child)
5618 int inherited_all = 1; 6130 int inherited_all = 1;
5619 int ret = 0; 6131 int ret = 0;
5620 6132
5621 child->perf_event_ctxp = NULL; 6133 child->perf_event_ctxp[ctxn] = NULL;
5622 6134
5623 mutex_init(&child->perf_event_mutex); 6135 mutex_init(&child->perf_event_mutex);
5624 INIT_LIST_HEAD(&child->perf_event_list); 6136 INIT_LIST_HEAD(&child->perf_event_list);
5625 6137
5626 if (likely(!parent->perf_event_ctxp)) 6138 if (likely(!parent->perf_event_ctxp[ctxn]))
5627 return 0; 6139 return 0;
5628 6140
5629 /* 6141 /*
5630 * If the parent's context is a clone, pin it so it won't get 6142 * If the parent's context is a clone, pin it so it won't get
5631 * swapped under us. 6143 * swapped under us.
5632 */ 6144 */
5633 parent_ctx = perf_pin_task_context(parent); 6145 parent_ctx = perf_pin_task_context(parent, ctxn);
5634 6146
5635 /* 6147 /*
5636 * No need to check if parent_ctx != NULL here; since we saw 6148 * No need to check if parent_ctx != NULL here; since we saw
@@ -5650,20 +6162,20 @@ int perf_event_init_task(struct task_struct *child)
5650 * the list, not manipulating it: 6162 * the list, not manipulating it:
5651 */ 6163 */
5652 list_for_each_entry(event, &parent_ctx->pinned_groups, group_entry) { 6164 list_for_each_entry(event, &parent_ctx->pinned_groups, group_entry) {
5653 ret = inherit_task_group(event, parent, parent_ctx, child, 6165 ret = inherit_task_group(event, parent, parent_ctx,
5654 &inherited_all); 6166 child, ctxn, &inherited_all);
5655 if (ret) 6167 if (ret)
5656 break; 6168 break;
5657 } 6169 }
5658 6170
5659 list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { 6171 list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) {
5660 ret = inherit_task_group(event, parent, parent_ctx, child, 6172 ret = inherit_task_group(event, parent, parent_ctx,
5661 &inherited_all); 6173 child, ctxn, &inherited_all);
5662 if (ret) 6174 if (ret)
5663 break; 6175 break;
5664 } 6176 }
5665 6177
5666 child_ctx = child->perf_event_ctxp; 6178 child_ctx = child->perf_event_ctxp[ctxn];
5667 6179
5668 if (child_ctx && inherited_all) { 6180 if (child_ctx && inherited_all) {
5669 /* 6181 /*
@@ -5692,63 +6204,98 @@ int perf_event_init_task(struct task_struct *child)
5692 return ret; 6204 return ret;
5693} 6205}
5694 6206
6207/*
6208 * Initialize the perf_event context in task_struct
6209 */
6210int perf_event_init_task(struct task_struct *child)
6211{
6212 int ctxn, ret;
6213
6214 for_each_task_context_nr(ctxn) {
6215 ret = perf_event_init_context(child, ctxn);
6216 if (ret)
6217 return ret;
6218 }
6219
6220 return 0;
6221}
6222
5695static void __init perf_event_init_all_cpus(void) 6223static void __init perf_event_init_all_cpus(void)
5696{ 6224{
6225 struct swevent_htable *swhash;
5697 int cpu; 6226 int cpu;
5698 struct perf_cpu_context *cpuctx;
5699 6227
5700 for_each_possible_cpu(cpu) { 6228 for_each_possible_cpu(cpu) {
5701 cpuctx = &per_cpu(perf_cpu_context, cpu); 6229 swhash = &per_cpu(swevent_htable, cpu);
5702 mutex_init(&cpuctx->hlist_mutex); 6230 mutex_init(&swhash->hlist_mutex);
5703 __perf_event_init_context(&cpuctx->ctx, NULL); 6231 INIT_LIST_HEAD(&per_cpu(rotation_list, cpu));
5704 } 6232 }
5705} 6233}
5706 6234
5707static void __cpuinit perf_event_init_cpu(int cpu) 6235static void __cpuinit perf_event_init_cpu(int cpu)
5708{ 6236{
5709 struct perf_cpu_context *cpuctx; 6237 struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
5710
5711 cpuctx = &per_cpu(perf_cpu_context, cpu);
5712 6238
5713 spin_lock(&perf_resource_lock); 6239 mutex_lock(&swhash->hlist_mutex);
5714 cpuctx->max_pertask = perf_max_events - perf_reserved_percpu; 6240 if (swhash->hlist_refcount > 0) {
5715 spin_unlock(&perf_resource_lock);
5716
5717 mutex_lock(&cpuctx->hlist_mutex);
5718 if (cpuctx->hlist_refcount > 0) {
5719 struct swevent_hlist *hlist; 6241 struct swevent_hlist *hlist;
5720 6242
5721 hlist = kzalloc(sizeof(*hlist), GFP_KERNEL); 6243 hlist = kzalloc_node(sizeof(*hlist), GFP_KERNEL, cpu_to_node(cpu));
5722 WARN_ON_ONCE(!hlist); 6244 WARN_ON(!hlist);
5723 rcu_assign_pointer(cpuctx->swevent_hlist, hlist); 6245 rcu_assign_pointer(swhash->swevent_hlist, hlist);
5724 } 6246 }
5725 mutex_unlock(&cpuctx->hlist_mutex); 6247 mutex_unlock(&swhash->hlist_mutex);
5726} 6248}
5727 6249
5728#ifdef CONFIG_HOTPLUG_CPU 6250#ifdef CONFIG_HOTPLUG_CPU
5729static void __perf_event_exit_cpu(void *info) 6251static void perf_pmu_rotate_stop(struct pmu *pmu)
5730{ 6252{
5731 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); 6253 struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
5732 struct perf_event_context *ctx = &cpuctx->ctx; 6254
6255 WARN_ON(!irqs_disabled());
6256
6257 list_del_init(&cpuctx->rotation_list);
6258}
6259
6260static void __perf_event_exit_context(void *__info)
6261{
6262 struct perf_event_context *ctx = __info;
5733 struct perf_event *event, *tmp; 6263 struct perf_event *event, *tmp;
5734 6264
6265 perf_pmu_rotate_stop(ctx->pmu);
6266
5735 list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) 6267 list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry)
5736 __perf_event_remove_from_context(event); 6268 __perf_event_remove_from_context(event);
5737 list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry) 6269 list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry)
5738 __perf_event_remove_from_context(event); 6270 __perf_event_remove_from_context(event);
5739} 6271}
6272
6273static void perf_event_exit_cpu_context(int cpu)
6274{
6275 struct perf_event_context *ctx;
6276 struct pmu *pmu;
6277 int idx;
6278
6279 idx = srcu_read_lock(&pmus_srcu);
6280 list_for_each_entry_rcu(pmu, &pmus, entry) {
6281 ctx = &per_cpu_ptr(pmu->pmu_cpu_context, cpu)->ctx;
6282
6283 mutex_lock(&ctx->mutex);
6284 smp_call_function_single(cpu, __perf_event_exit_context, ctx, 1);
6285 mutex_unlock(&ctx->mutex);
6286 }
6287 srcu_read_unlock(&pmus_srcu, idx);
6288}
6289
5740static void perf_event_exit_cpu(int cpu) 6290static void perf_event_exit_cpu(int cpu)
5741{ 6291{
5742 struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); 6292 struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
5743 struct perf_event_context *ctx = &cpuctx->ctx;
5744 6293
5745 mutex_lock(&cpuctx->hlist_mutex); 6294 mutex_lock(&swhash->hlist_mutex);
5746 swevent_hlist_release(cpuctx); 6295 swevent_hlist_release(swhash);
5747 mutex_unlock(&cpuctx->hlist_mutex); 6296 mutex_unlock(&swhash->hlist_mutex);
5748 6297
5749 mutex_lock(&ctx->mutex); 6298 perf_event_exit_cpu_context(cpu);
5750 smp_call_function_single(cpu, __perf_event_exit_cpu, NULL, 1);
5751 mutex_unlock(&ctx->mutex);
5752} 6299}
5753#else 6300#else
5754static inline void perf_event_exit_cpu(int cpu) { } 6301static inline void perf_event_exit_cpu(int cpu) { }
@@ -5778,118 +6325,13 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
5778 return NOTIFY_OK; 6325 return NOTIFY_OK;
5779} 6326}
5780 6327
5781/*
5782 * This has to have a higher priority than migration_notifier in sched.c.
5783 */
5784static struct notifier_block __cpuinitdata perf_cpu_nb = {
5785 .notifier_call = perf_cpu_notify,
5786 .priority = 20,
5787};
5788
5789void __init perf_event_init(void) 6328void __init perf_event_init(void)
5790{ 6329{
5791 perf_event_init_all_cpus(); 6330 perf_event_init_all_cpus();
5792 perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE, 6331 init_srcu_struct(&pmus_srcu);
5793 (void *)(long)smp_processor_id()); 6332 perf_pmu_register(&perf_swevent);
5794 perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE, 6333 perf_pmu_register(&perf_cpu_clock);
5795 (void *)(long)smp_processor_id()); 6334 perf_pmu_register(&perf_task_clock);
5796 register_cpu_notifier(&perf_cpu_nb); 6335 perf_tp_register();
5797} 6336 perf_cpu_notifier(perf_cpu_notify);
5798
5799static ssize_t perf_show_reserve_percpu(struct sysdev_class *class,
5800 struct sysdev_class_attribute *attr,
5801 char *buf)
5802{
5803 return sprintf(buf, "%d\n", perf_reserved_percpu);
5804}
5805
5806static ssize_t
5807perf_set_reserve_percpu(struct sysdev_class *class,
5808 struct sysdev_class_attribute *attr,
5809 const char *buf,
5810 size_t count)
5811{
5812 struct perf_cpu_context *cpuctx;
5813 unsigned long val;
5814 int err, cpu, mpt;
5815
5816 err = strict_strtoul(buf, 10, &val);
5817 if (err)
5818 return err;
5819 if (val > perf_max_events)
5820 return -EINVAL;
5821
5822 spin_lock(&perf_resource_lock);
5823 perf_reserved_percpu = val;
5824 for_each_online_cpu(cpu) {
5825 cpuctx = &per_cpu(perf_cpu_context, cpu);
5826 raw_spin_lock_irq(&cpuctx->ctx.lock);
5827 mpt = min(perf_max_events - cpuctx->ctx.nr_events,
5828 perf_max_events - perf_reserved_percpu);
5829 cpuctx->max_pertask = mpt;
5830 raw_spin_unlock_irq(&cpuctx->ctx.lock);
5831 }
5832 spin_unlock(&perf_resource_lock);
5833
5834 return count;
5835}
5836
5837static ssize_t perf_show_overcommit(struct sysdev_class *class,
5838 struct sysdev_class_attribute *attr,
5839 char *buf)
5840{
5841 return sprintf(buf, "%d\n", perf_overcommit);
5842}
5843
5844static ssize_t
5845perf_set_overcommit(struct sysdev_class *class,
5846 struct sysdev_class_attribute *attr,
5847 const char *buf, size_t count)
5848{
5849 unsigned long val;
5850 int err;
5851
5852 err = strict_strtoul(buf, 10, &val);
5853 if (err)
5854 return err;
5855 if (val > 1)
5856 return -EINVAL;
5857
5858 spin_lock(&perf_resource_lock);
5859 perf_overcommit = val;
5860 spin_unlock(&perf_resource_lock);
5861
5862 return count;
5863}
5864
5865static SYSDEV_CLASS_ATTR(
5866 reserve_percpu,
5867 0644,
5868 perf_show_reserve_percpu,
5869 perf_set_reserve_percpu
5870 );
5871
5872static SYSDEV_CLASS_ATTR(
5873 overcommit,
5874 0644,
5875 perf_show_overcommit,
5876 perf_set_overcommit
5877 );
5878
5879static struct attribute *perfclass_attrs[] = {
5880 &attr_reserve_percpu.attr,
5881 &attr_overcommit.attr,
5882 NULL
5883};
5884
5885static struct attribute_group perfclass_attr_group = {
5886 .attrs = perfclass_attrs,
5887 .name = "perf_events",
5888};
5889
5890static int __init perf_event_sysfs_init(void)
5891{
5892 return sysfs_create_group(&cpu_sysdev_class.kset.kobj,
5893 &perfclass_attr_group);
5894} 6337}
5895device_initcall(perf_event_sysfs_init);
diff --git a/kernel/sched.c b/kernel/sched.c
index 0bba34a48d10..5a5cc33e4999 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3584,7 +3584,7 @@ void scheduler_tick(void)
3584 curr->sched_class->task_tick(rq, curr, 0); 3584 curr->sched_class->task_tick(rq, curr, 0);
3585 raw_spin_unlock(&rq->lock); 3585 raw_spin_unlock(&rq->lock);
3586 3586
3587 perf_event_task_tick(curr); 3587 perf_event_task_tick();
3588 3588
3589#ifdef CONFIG_SMP 3589#ifdef CONFIG_SMP
3590 rq->idle_at_tick = idle_cpu(cpu); 3590 rq->idle_at_tick = idle_cpu(cpu);
diff --git a/kernel/test_kprobes.c b/kernel/test_kprobes.c
index 4f104515a19b..f8b11a283171 100644
--- a/kernel/test_kprobes.c
+++ b/kernel/test_kprobes.c
@@ -115,7 +115,9 @@ static int test_kprobes(void)
115 int ret; 115 int ret;
116 struct kprobe *kps[2] = {&kp, &kp2}; 116 struct kprobe *kps[2] = {&kp, &kp2};
117 117
118 kp.addr = 0; /* addr should be cleard for reusing kprobe. */ 118 /* addr and flags should be cleard for reusing kprobe. */
119 kp.addr = NULL;
120 kp.flags = 0;
119 ret = register_kprobes(kps, 2); 121 ret = register_kprobes(kps, 2);
120 if (ret < 0) { 122 if (ret < 0) {
121 printk(KERN_ERR "Kprobe smoke test failed: " 123 printk(KERN_ERR "Kprobe smoke test failed: "
@@ -210,7 +212,9 @@ static int test_jprobes(void)
210 int ret; 212 int ret;
211 struct jprobe *jps[2] = {&jp, &jp2}; 213 struct jprobe *jps[2] = {&jp, &jp2};
212 214
213 jp.kp.addr = 0; /* addr should be cleard for reusing kprobe. */ 215 /* addr and flags should be cleard for reusing kprobe. */
216 jp.kp.addr = NULL;
217 jp.kp.flags = 0;
214 ret = register_jprobes(jps, 2); 218 ret = register_jprobes(jps, 2);
215 if (ret < 0) { 219 if (ret < 0) {
216 printk(KERN_ERR "Kprobe smoke test failed: " 220 printk(KERN_ERR "Kprobe smoke test failed: "
@@ -323,7 +327,9 @@ static int test_kretprobes(void)
323 int ret; 327 int ret;
324 struct kretprobe *rps[2] = {&rp, &rp2}; 328 struct kretprobe *rps[2] = {&rp, &rp2};
325 329
326 rp.kp.addr = 0; /* addr should be cleard for reusing kprobe. */ 330 /* addr and flags should be cleard for reusing kprobe. */
331 rp.kp.addr = NULL;
332 rp.kp.flags = 0;
327 ret = register_kretprobes(rps, 2); 333 ret = register_kretprobes(rps, 2);
328 if (ret < 0) { 334 if (ret < 0) {
329 printk(KERN_ERR "Kprobe smoke test failed: " 335 printk(KERN_ERR "Kprobe smoke test failed: "
diff --git a/kernel/timer.c b/kernel/timer.c
index 97bf05baade7..68a9ae7679b7 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -37,7 +37,7 @@
37#include <linux/delay.h> 37#include <linux/delay.h>
38#include <linux/tick.h> 38#include <linux/tick.h>
39#include <linux/kallsyms.h> 39#include <linux/kallsyms.h>
40#include <linux/perf_event.h> 40#include <linux/irq_work.h>
41#include <linux/sched.h> 41#include <linux/sched.h>
42#include <linux/slab.h> 42#include <linux/slab.h>
43 43
@@ -1279,7 +1279,10 @@ void update_process_times(int user_tick)
1279 run_local_timers(); 1279 run_local_timers();
1280 rcu_check_callbacks(cpu, user_tick); 1280 rcu_check_callbacks(cpu, user_tick);
1281 printk_tick(); 1281 printk_tick();
1282 perf_event_do_pending(); 1282#ifdef CONFIG_IRQ_WORK
1283 if (in_irq())
1284 irq_work_run();
1285#endif
1283 scheduler_tick(); 1286 scheduler_tick();
1284 run_posix_cpu_timers(p); 1287 run_posix_cpu_timers(p);
1285} 1288}
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 538501c6ea50..e550d2eda1df 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -49,6 +49,11 @@ config HAVE_SYSCALL_TRACEPOINTS
49 help 49 help
50 See Documentation/trace/ftrace-design.txt 50 See Documentation/trace/ftrace-design.txt
51 51
52config HAVE_C_RECORDMCOUNT
53 bool
54 help
55 C version of recordmcount available?
56
52config TRACER_MAX_TRACE 57config TRACER_MAX_TRACE
53 bool 58 bool
54 59
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index fa7ece649fe1..ebd80d50c474 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -884,10 +884,8 @@ enum {
884 FTRACE_ENABLE_CALLS = (1 << 0), 884 FTRACE_ENABLE_CALLS = (1 << 0),
885 FTRACE_DISABLE_CALLS = (1 << 1), 885 FTRACE_DISABLE_CALLS = (1 << 1),
886 FTRACE_UPDATE_TRACE_FUNC = (1 << 2), 886 FTRACE_UPDATE_TRACE_FUNC = (1 << 2),
887 FTRACE_ENABLE_MCOUNT = (1 << 3), 887 FTRACE_START_FUNC_RET = (1 << 3),
888 FTRACE_DISABLE_MCOUNT = (1 << 4), 888 FTRACE_STOP_FUNC_RET = (1 << 4),
889 FTRACE_START_FUNC_RET = (1 << 5),
890 FTRACE_STOP_FUNC_RET = (1 << 6),
891}; 889};
892 890
893static int ftrace_filtered; 891static int ftrace_filtered;
@@ -1226,8 +1224,6 @@ static void ftrace_shutdown(int command)
1226 1224
1227static void ftrace_startup_sysctl(void) 1225static void ftrace_startup_sysctl(void)
1228{ 1226{
1229 int command = FTRACE_ENABLE_MCOUNT;
1230
1231 if (unlikely(ftrace_disabled)) 1227 if (unlikely(ftrace_disabled))
1232 return; 1228 return;
1233 1229
@@ -1235,23 +1231,17 @@ static void ftrace_startup_sysctl(void)
1235 saved_ftrace_func = NULL; 1231 saved_ftrace_func = NULL;
1236 /* ftrace_start_up is true if we want ftrace running */ 1232 /* ftrace_start_up is true if we want ftrace running */
1237 if (ftrace_start_up) 1233 if (ftrace_start_up)
1238 command |= FTRACE_ENABLE_CALLS; 1234 ftrace_run_update_code(FTRACE_ENABLE_CALLS);
1239
1240 ftrace_run_update_code(command);
1241} 1235}
1242 1236
1243static void ftrace_shutdown_sysctl(void) 1237static void ftrace_shutdown_sysctl(void)
1244{ 1238{
1245 int command = FTRACE_DISABLE_MCOUNT;
1246
1247 if (unlikely(ftrace_disabled)) 1239 if (unlikely(ftrace_disabled))
1248 return; 1240 return;
1249 1241
1250 /* ftrace_start_up is true if ftrace is running */ 1242 /* ftrace_start_up is true if ftrace is running */
1251 if (ftrace_start_up) 1243 if (ftrace_start_up)
1252 command |= FTRACE_DISABLE_CALLS; 1244 ftrace_run_update_code(FTRACE_DISABLE_CALLS);
1253
1254 ftrace_run_update_code(command);
1255} 1245}
1256 1246
1257static cycle_t ftrace_update_time; 1247static cycle_t ftrace_update_time;
@@ -1368,24 +1358,29 @@ enum {
1368#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ 1358#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
1369 1359
1370struct ftrace_iterator { 1360struct ftrace_iterator {
1371 struct ftrace_page *pg; 1361 loff_t pos;
1372 int hidx; 1362 loff_t func_pos;
1373 int idx; 1363 struct ftrace_page *pg;
1374 unsigned flags; 1364 struct dyn_ftrace *func;
1375 struct trace_parser parser; 1365 struct ftrace_func_probe *probe;
1366 struct trace_parser parser;
1367 int hidx;
1368 int idx;
1369 unsigned flags;
1376}; 1370};
1377 1371
1378static void * 1372static void *
1379t_hash_next(struct seq_file *m, void *v, loff_t *pos) 1373t_hash_next(struct seq_file *m, loff_t *pos)
1380{ 1374{
1381 struct ftrace_iterator *iter = m->private; 1375 struct ftrace_iterator *iter = m->private;
1382 struct hlist_node *hnd = v; 1376 struct hlist_node *hnd = NULL;
1383 struct hlist_head *hhd; 1377 struct hlist_head *hhd;
1384 1378
1385 WARN_ON(!(iter->flags & FTRACE_ITER_HASH));
1386
1387 (*pos)++; 1379 (*pos)++;
1380 iter->pos = *pos;
1388 1381
1382 if (iter->probe)
1383 hnd = &iter->probe->node;
1389 retry: 1384 retry:
1390 if (iter->hidx >= FTRACE_FUNC_HASHSIZE) 1385 if (iter->hidx >= FTRACE_FUNC_HASHSIZE)
1391 return NULL; 1386 return NULL;
@@ -1408,7 +1403,12 @@ t_hash_next(struct seq_file *m, void *v, loff_t *pos)
1408 } 1403 }
1409 } 1404 }
1410 1405
1411 return hnd; 1406 if (WARN_ON_ONCE(!hnd))
1407 return NULL;
1408
1409 iter->probe = hlist_entry(hnd, struct ftrace_func_probe, node);
1410
1411 return iter;
1412} 1412}
1413 1413
1414static void *t_hash_start(struct seq_file *m, loff_t *pos) 1414static void *t_hash_start(struct seq_file *m, loff_t *pos)
@@ -1417,26 +1417,32 @@ static void *t_hash_start(struct seq_file *m, loff_t *pos)
1417 void *p = NULL; 1417 void *p = NULL;
1418 loff_t l; 1418 loff_t l;
1419 1419
1420 if (!(iter->flags & FTRACE_ITER_HASH)) 1420 if (iter->func_pos > *pos)
1421 *pos = 0; 1421 return NULL;
1422
1423 iter->flags |= FTRACE_ITER_HASH;
1424 1422
1425 iter->hidx = 0; 1423 iter->hidx = 0;
1426 for (l = 0; l <= *pos; ) { 1424 for (l = 0; l <= (*pos - iter->func_pos); ) {
1427 p = t_hash_next(m, p, &l); 1425 p = t_hash_next(m, &l);
1428 if (!p) 1426 if (!p)
1429 break; 1427 break;
1430 } 1428 }
1431 return p; 1429 if (!p)
1430 return NULL;
1431
1432 /* Only set this if we have an item */
1433 iter->flags |= FTRACE_ITER_HASH;
1434
1435 return iter;
1432} 1436}
1433 1437
1434static int t_hash_show(struct seq_file *m, void *v) 1438static int
1439t_hash_show(struct seq_file *m, struct ftrace_iterator *iter)
1435{ 1440{
1436 struct ftrace_func_probe *rec; 1441 struct ftrace_func_probe *rec;
1437 struct hlist_node *hnd = v;
1438 1442
1439 rec = hlist_entry(hnd, struct ftrace_func_probe, node); 1443 rec = iter->probe;
1444 if (WARN_ON_ONCE(!rec))
1445 return -EIO;
1440 1446
1441 if (rec->ops->print) 1447 if (rec->ops->print)
1442 return rec->ops->print(m, rec->ip, rec->ops, rec->data); 1448 return rec->ops->print(m, rec->ip, rec->ops, rec->data);
@@ -1457,12 +1463,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
1457 struct dyn_ftrace *rec = NULL; 1463 struct dyn_ftrace *rec = NULL;
1458 1464
1459 if (iter->flags & FTRACE_ITER_HASH) 1465 if (iter->flags & FTRACE_ITER_HASH)
1460 return t_hash_next(m, v, pos); 1466 return t_hash_next(m, pos);
1461 1467
1462 (*pos)++; 1468 (*pos)++;
1469 iter->pos = *pos;
1463 1470
1464 if (iter->flags & FTRACE_ITER_PRINTALL) 1471 if (iter->flags & FTRACE_ITER_PRINTALL)
1465 return NULL; 1472 return t_hash_start(m, pos);
1466 1473
1467 retry: 1474 retry:
1468 if (iter->idx >= iter->pg->index) { 1475 if (iter->idx >= iter->pg->index) {
@@ -1491,7 +1498,20 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
1491 } 1498 }
1492 } 1499 }
1493 1500
1494 return rec; 1501 if (!rec)
1502 return t_hash_start(m, pos);
1503
1504 iter->func_pos = *pos;
1505 iter->func = rec;
1506
1507 return iter;
1508}
1509
1510static void reset_iter_read(struct ftrace_iterator *iter)
1511{
1512 iter->pos = 0;
1513 iter->func_pos = 0;
1514 iter->flags &= ~(FTRACE_ITER_PRINTALL & FTRACE_ITER_HASH);
1495} 1515}
1496 1516
1497static void *t_start(struct seq_file *m, loff_t *pos) 1517static void *t_start(struct seq_file *m, loff_t *pos)
@@ -1502,6 +1522,12 @@ static void *t_start(struct seq_file *m, loff_t *pos)
1502 1522
1503 mutex_lock(&ftrace_lock); 1523 mutex_lock(&ftrace_lock);
1504 /* 1524 /*
1525 * If an lseek was done, then reset and start from beginning.
1526 */
1527 if (*pos < iter->pos)
1528 reset_iter_read(iter);
1529
1530 /*
1505 * For set_ftrace_filter reading, if we have the filter 1531 * For set_ftrace_filter reading, if we have the filter
1506 * off, we can short cut and just print out that all 1532 * off, we can short cut and just print out that all
1507 * functions are enabled. 1533 * functions are enabled.
@@ -1518,6 +1544,11 @@ static void *t_start(struct seq_file *m, loff_t *pos)
1518 if (iter->flags & FTRACE_ITER_HASH) 1544 if (iter->flags & FTRACE_ITER_HASH)
1519 return t_hash_start(m, pos); 1545 return t_hash_start(m, pos);
1520 1546
1547 /*
1548 * Unfortunately, we need to restart at ftrace_pages_start
1549 * every time we let go of the ftrace_mutex. This is because
1550 * those pointers can change without the lock.
1551 */
1521 iter->pg = ftrace_pages_start; 1552 iter->pg = ftrace_pages_start;
1522 iter->idx = 0; 1553 iter->idx = 0;
1523 for (l = 0; l <= *pos; ) { 1554 for (l = 0; l <= *pos; ) {
@@ -1526,10 +1557,14 @@ static void *t_start(struct seq_file *m, loff_t *pos)
1526 break; 1557 break;
1527 } 1558 }
1528 1559
1529 if (!p && iter->flags & FTRACE_ITER_FILTER) 1560 if (!p) {
1530 return t_hash_start(m, pos); 1561 if (iter->flags & FTRACE_ITER_FILTER)
1562 return t_hash_start(m, pos);
1531 1563
1532 return p; 1564 return NULL;
1565 }
1566
1567 return iter;
1533} 1568}
1534 1569
1535static void t_stop(struct seq_file *m, void *p) 1570static void t_stop(struct seq_file *m, void *p)
@@ -1540,16 +1575,18 @@ static void t_stop(struct seq_file *m, void *p)
1540static int t_show(struct seq_file *m, void *v) 1575static int t_show(struct seq_file *m, void *v)
1541{ 1576{
1542 struct ftrace_iterator *iter = m->private; 1577 struct ftrace_iterator *iter = m->private;
1543 struct dyn_ftrace *rec = v; 1578 struct dyn_ftrace *rec;
1544 1579
1545 if (iter->flags & FTRACE_ITER_HASH) 1580 if (iter->flags & FTRACE_ITER_HASH)
1546 return t_hash_show(m, v); 1581 return t_hash_show(m, iter);
1547 1582
1548 if (iter->flags & FTRACE_ITER_PRINTALL) { 1583 if (iter->flags & FTRACE_ITER_PRINTALL) {
1549 seq_printf(m, "#### all functions enabled ####\n"); 1584 seq_printf(m, "#### all functions enabled ####\n");
1550 return 0; 1585 return 0;
1551 } 1586 }
1552 1587
1588 rec = iter->func;
1589
1553 if (!rec) 1590 if (!rec)
1554 return 0; 1591 return 0;
1555 1592
@@ -1601,8 +1638,8 @@ ftrace_failures_open(struct inode *inode, struct file *file)
1601 1638
1602 ret = ftrace_avail_open(inode, file); 1639 ret = ftrace_avail_open(inode, file);
1603 if (!ret) { 1640 if (!ret) {
1604 m = (struct seq_file *)file->private_data; 1641 m = file->private_data;
1605 iter = (struct ftrace_iterator *)m->private; 1642 iter = m->private;
1606 iter->flags = FTRACE_ITER_FAILURES; 1643 iter->flags = FTRACE_ITER_FAILURES;
1607 } 1644 }
1608 1645
@@ -2418,7 +2455,7 @@ static const struct file_operations ftrace_filter_fops = {
2418 .open = ftrace_filter_open, 2455 .open = ftrace_filter_open,
2419 .read = seq_read, 2456 .read = seq_read,
2420 .write = ftrace_filter_write, 2457 .write = ftrace_filter_write,
2421 .llseek = no_llseek, 2458 .llseek = ftrace_regex_lseek,
2422 .release = ftrace_filter_release, 2459 .release = ftrace_filter_release,
2423}; 2460};
2424 2461
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index bca96377fd4e..c5a632a669e1 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2606,6 +2606,19 @@ void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
2606} 2606}
2607EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu); 2607EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
2608 2608
2609/*
2610 * The total entries in the ring buffer is the running counter
2611 * of entries entered into the ring buffer, minus the sum of
2612 * the entries read from the ring buffer and the number of
2613 * entries that were overwritten.
2614 */
2615static inline unsigned long
2616rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer)
2617{
2618 return local_read(&cpu_buffer->entries) -
2619 (local_read(&cpu_buffer->overrun) + cpu_buffer->read);
2620}
2621
2609/** 2622/**
2610 * ring_buffer_entries_cpu - get the number of entries in a cpu buffer 2623 * ring_buffer_entries_cpu - get the number of entries in a cpu buffer
2611 * @buffer: The ring buffer 2624 * @buffer: The ring buffer
@@ -2614,16 +2627,13 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
2614unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) 2627unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
2615{ 2628{
2616 struct ring_buffer_per_cpu *cpu_buffer; 2629 struct ring_buffer_per_cpu *cpu_buffer;
2617 unsigned long ret;
2618 2630
2619 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 2631 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2620 return 0; 2632 return 0;
2621 2633
2622 cpu_buffer = buffer->buffers[cpu]; 2634 cpu_buffer = buffer->buffers[cpu];
2623 ret = (local_read(&cpu_buffer->entries) - local_read(&cpu_buffer->overrun))
2624 - cpu_buffer->read;
2625 2635
2626 return ret; 2636 return rb_num_of_entries(cpu_buffer);
2627} 2637}
2628EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); 2638EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu);
2629 2639
@@ -2684,8 +2694,7 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer)
2684 /* if you care about this being correct, lock the buffer */ 2694 /* if you care about this being correct, lock the buffer */
2685 for_each_buffer_cpu(buffer, cpu) { 2695 for_each_buffer_cpu(buffer, cpu) {
2686 cpu_buffer = buffer->buffers[cpu]; 2696 cpu_buffer = buffer->buffers[cpu];
2687 entries += (local_read(&cpu_buffer->entries) - 2697 entries += rb_num_of_entries(cpu_buffer);
2688 local_read(&cpu_buffer->overrun)) - cpu_buffer->read;
2689 } 2698 }
2690 2699
2691 return entries; 2700 return entries;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 9ec59f541156..001bcd2ccf4a 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2196,7 +2196,7 @@ int tracing_open_generic(struct inode *inode, struct file *filp)
2196 2196
2197static int tracing_release(struct inode *inode, struct file *file) 2197static int tracing_release(struct inode *inode, struct file *file)
2198{ 2198{
2199 struct seq_file *m = (struct seq_file *)file->private_data; 2199 struct seq_file *m = file->private_data;
2200 struct trace_iterator *iter; 2200 struct trace_iterator *iter;
2201 int cpu; 2201 int cpu;
2202 2202
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index d39b3c5454a5..9021f8c0c0c3 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -343,6 +343,10 @@ void trace_function(struct trace_array *tr,
343 unsigned long ip, 343 unsigned long ip,
344 unsigned long parent_ip, 344 unsigned long parent_ip,
345 unsigned long flags, int pc); 345 unsigned long flags, int pc);
346void trace_graph_function(struct trace_array *tr,
347 unsigned long ip,
348 unsigned long parent_ip,
349 unsigned long flags, int pc);
346void trace_default_header(struct seq_file *m); 350void trace_default_header(struct seq_file *m);
347void print_trace_header(struct seq_file *m, struct trace_iterator *iter); 351void print_trace_header(struct seq_file *m, struct trace_iterator *iter);
348int trace_empty(struct trace_iterator *iter); 352int trace_empty(struct trace_iterator *iter);
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 31cc4cb0dbf2..39c059ca670e 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -9,7 +9,7 @@
9#include <linux/kprobes.h> 9#include <linux/kprobes.h>
10#include "trace.h" 10#include "trace.h"
11 11
12static char *perf_trace_buf[4]; 12static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS];
13 13
14/* 14/*
15 * Force it to be aligned to unsigned long to avoid misaligned accesses 15 * Force it to be aligned to unsigned long to avoid misaligned accesses
@@ -24,7 +24,7 @@ static int total_ref_count;
24static int perf_trace_event_init(struct ftrace_event_call *tp_event, 24static int perf_trace_event_init(struct ftrace_event_call *tp_event,
25 struct perf_event *p_event) 25 struct perf_event *p_event)
26{ 26{
27 struct hlist_head *list; 27 struct hlist_head __percpu *list;
28 int ret = -ENOMEM; 28 int ret = -ENOMEM;
29 int cpu; 29 int cpu;
30 30
@@ -42,11 +42,11 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event,
42 tp_event->perf_events = list; 42 tp_event->perf_events = list;
43 43
44 if (!total_ref_count) { 44 if (!total_ref_count) {
45 char *buf; 45 char __percpu *buf;
46 int i; 46 int i;
47 47
48 for (i = 0; i < 4; i++) { 48 for (i = 0; i < PERF_NR_CONTEXTS; i++) {
49 buf = (char *)alloc_percpu(perf_trace_t); 49 buf = (char __percpu *)alloc_percpu(perf_trace_t);
50 if (!buf) 50 if (!buf)
51 goto fail; 51 goto fail;
52 52
@@ -65,7 +65,7 @@ fail:
65 if (!total_ref_count) { 65 if (!total_ref_count) {
66 int i; 66 int i;
67 67
68 for (i = 0; i < 4; i++) { 68 for (i = 0; i < PERF_NR_CONTEXTS; i++) {
69 free_percpu(perf_trace_buf[i]); 69 free_percpu(perf_trace_buf[i]);
70 perf_trace_buf[i] = NULL; 70 perf_trace_buf[i] = NULL;
71 } 71 }
@@ -101,22 +101,26 @@ int perf_trace_init(struct perf_event *p_event)
101 return ret; 101 return ret;
102} 102}
103 103
104int perf_trace_enable(struct perf_event *p_event) 104int perf_trace_add(struct perf_event *p_event, int flags)
105{ 105{
106 struct ftrace_event_call *tp_event = p_event->tp_event; 106 struct ftrace_event_call *tp_event = p_event->tp_event;
107 struct hlist_head __percpu *pcpu_list;
107 struct hlist_head *list; 108 struct hlist_head *list;
108 109
109 list = tp_event->perf_events; 110 pcpu_list = tp_event->perf_events;
110 if (WARN_ON_ONCE(!list)) 111 if (WARN_ON_ONCE(!pcpu_list))
111 return -EINVAL; 112 return -EINVAL;
112 113
113 list = this_cpu_ptr(list); 114 if (!(flags & PERF_EF_START))
115 p_event->hw.state = PERF_HES_STOPPED;
116
117 list = this_cpu_ptr(pcpu_list);
114 hlist_add_head_rcu(&p_event->hlist_entry, list); 118 hlist_add_head_rcu(&p_event->hlist_entry, list);
115 119
116 return 0; 120 return 0;
117} 121}
118 122
119void perf_trace_disable(struct perf_event *p_event) 123void perf_trace_del(struct perf_event *p_event, int flags)
120{ 124{
121 hlist_del_rcu(&p_event->hlist_entry); 125 hlist_del_rcu(&p_event->hlist_entry);
122} 126}
@@ -142,7 +146,7 @@ void perf_trace_destroy(struct perf_event *p_event)
142 tp_event->perf_events = NULL; 146 tp_event->perf_events = NULL;
143 147
144 if (!--total_ref_count) { 148 if (!--total_ref_count) {
145 for (i = 0; i < 4; i++) { 149 for (i = 0; i < PERF_NR_CONTEXTS; i++) {
146 free_percpu(perf_trace_buf[i]); 150 free_percpu(perf_trace_buf[i]);
147 perf_trace_buf[i] = NULL; 151 perf_trace_buf[i] = NULL;
148 } 152 }
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 4c758f146328..398c0e8b332c 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -600,21 +600,29 @@ out:
600 600
601enum { 601enum {
602 FORMAT_HEADER = 1, 602 FORMAT_HEADER = 1,
603 FORMAT_PRINTFMT = 2, 603 FORMAT_FIELD_SEPERATOR = 2,
604 FORMAT_PRINTFMT = 3,
604}; 605};
605 606
606static void *f_next(struct seq_file *m, void *v, loff_t *pos) 607static void *f_next(struct seq_file *m, void *v, loff_t *pos)
607{ 608{
608 struct ftrace_event_call *call = m->private; 609 struct ftrace_event_call *call = m->private;
609 struct ftrace_event_field *field; 610 struct ftrace_event_field *field;
610 struct list_head *head; 611 struct list_head *common_head = &ftrace_common_fields;
612 struct list_head *head = trace_get_fields(call);
611 613
612 (*pos)++; 614 (*pos)++;
613 615
614 switch ((unsigned long)v) { 616 switch ((unsigned long)v) {
615 case FORMAT_HEADER: 617 case FORMAT_HEADER:
616 head = &ftrace_common_fields; 618 if (unlikely(list_empty(common_head)))
619 return NULL;
620
621 field = list_entry(common_head->prev,
622 struct ftrace_event_field, link);
623 return field;
617 624
625 case FORMAT_FIELD_SEPERATOR:
618 if (unlikely(list_empty(head))) 626 if (unlikely(list_empty(head)))
619 return NULL; 627 return NULL;
620 628
@@ -626,31 +634,10 @@ static void *f_next(struct seq_file *m, void *v, loff_t *pos)
626 return NULL; 634 return NULL;
627 } 635 }
628 636
629 head = trace_get_fields(call);
630
631 /*
632 * To separate common fields from event fields, the
633 * LSB is set on the first event field. Clear it in case.
634 */
635 v = (void *)((unsigned long)v & ~1L);
636
637 field = v; 637 field = v;
638 /* 638 if (field->link.prev == common_head)
639 * If this is a common field, and at the end of the list, then 639 return (void *)FORMAT_FIELD_SEPERATOR;
640 * continue with main list. 640 else if (field->link.prev == head)
641 */
642 if (field->link.prev == &ftrace_common_fields) {
643 if (unlikely(list_empty(head)))
644 return NULL;
645 field = list_entry(head->prev, struct ftrace_event_field, link);
646 /* Set the LSB to notify f_show to print an extra newline */
647 field = (struct ftrace_event_field *)
648 ((unsigned long)field | 1);
649 return field;
650 }
651
652 /* If we are done tell f_show to print the format */
653 if (field->link.prev == head)
654 return (void *)FORMAT_PRINTFMT; 641 return (void *)FORMAT_PRINTFMT;
655 642
656 field = list_entry(field->link.prev, struct ftrace_event_field, link); 643 field = list_entry(field->link.prev, struct ftrace_event_field, link);
@@ -688,22 +675,16 @@ static int f_show(struct seq_file *m, void *v)
688 seq_printf(m, "format:\n"); 675 seq_printf(m, "format:\n");
689 return 0; 676 return 0;
690 677
678 case FORMAT_FIELD_SEPERATOR:
679 seq_putc(m, '\n');
680 return 0;
681
691 case FORMAT_PRINTFMT: 682 case FORMAT_PRINTFMT:
692 seq_printf(m, "\nprint fmt: %s\n", 683 seq_printf(m, "\nprint fmt: %s\n",
693 call->print_fmt); 684 call->print_fmt);
694 return 0; 685 return 0;
695 } 686 }
696 687
697 /*
698 * To separate common fields from event fields, the
699 * LSB is set on the first event field. Clear it and
700 * print a newline if it is set.
701 */
702 if ((unsigned long)v & 1) {
703 seq_putc(m, '\n');
704 v = (void *)((unsigned long)v & ~1L);
705 }
706
707 field = v; 688 field = v;
708 689
709 /* 690 /*
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 6f233698518e..76b05980225c 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -15,15 +15,19 @@
15#include "trace.h" 15#include "trace.h"
16#include "trace_output.h" 16#include "trace_output.h"
17 17
18/* When set, irq functions will be ignored */
19static int ftrace_graph_skip_irqs;
20
18struct fgraph_cpu_data { 21struct fgraph_cpu_data {
19 pid_t last_pid; 22 pid_t last_pid;
20 int depth; 23 int depth;
24 int depth_irq;
21 int ignore; 25 int ignore;
22 unsigned long enter_funcs[FTRACE_RETFUNC_DEPTH]; 26 unsigned long enter_funcs[FTRACE_RETFUNC_DEPTH];
23}; 27};
24 28
25struct fgraph_data { 29struct fgraph_data {
26 struct fgraph_cpu_data *cpu_data; 30 struct fgraph_cpu_data __percpu *cpu_data;
27 31
28 /* Place to preserve last processed entry. */ 32 /* Place to preserve last processed entry. */
29 struct ftrace_graph_ent_entry ent; 33 struct ftrace_graph_ent_entry ent;
@@ -41,6 +45,7 @@ struct fgraph_data {
41#define TRACE_GRAPH_PRINT_PROC 0x8 45#define TRACE_GRAPH_PRINT_PROC 0x8
42#define TRACE_GRAPH_PRINT_DURATION 0x10 46#define TRACE_GRAPH_PRINT_DURATION 0x10
43#define TRACE_GRAPH_PRINT_ABS_TIME 0x20 47#define TRACE_GRAPH_PRINT_ABS_TIME 0x20
48#define TRACE_GRAPH_PRINT_IRQS 0x40
44 49
45static struct tracer_opt trace_opts[] = { 50static struct tracer_opt trace_opts[] = {
46 /* Display overruns? (for self-debug purpose) */ 51 /* Display overruns? (for self-debug purpose) */
@@ -55,13 +60,15 @@ static struct tracer_opt trace_opts[] = {
55 { TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) }, 60 { TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) },
56 /* Display absolute time of an entry */ 61 /* Display absolute time of an entry */
57 { TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) }, 62 { TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) },
63 /* Display interrupts */
64 { TRACER_OPT(funcgraph-irqs, TRACE_GRAPH_PRINT_IRQS) },
58 { } /* Empty entry */ 65 { } /* Empty entry */
59}; 66};
60 67
61static struct tracer_flags tracer_flags = { 68static struct tracer_flags tracer_flags = {
62 /* Don't display overruns and proc by default */ 69 /* Don't display overruns and proc by default */
63 .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD | 70 .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD |
64 TRACE_GRAPH_PRINT_DURATION, 71 TRACE_GRAPH_PRINT_DURATION | TRACE_GRAPH_PRINT_IRQS,
65 .opts = trace_opts 72 .opts = trace_opts
66}; 73};
67 74
@@ -204,6 +211,14 @@ int __trace_graph_entry(struct trace_array *tr,
204 return 1; 211 return 1;
205} 212}
206 213
214static inline int ftrace_graph_ignore_irqs(void)
215{
216 if (!ftrace_graph_skip_irqs)
217 return 0;
218
219 return in_irq();
220}
221
207int trace_graph_entry(struct ftrace_graph_ent *trace) 222int trace_graph_entry(struct ftrace_graph_ent *trace)
208{ 223{
209 struct trace_array *tr = graph_array; 224 struct trace_array *tr = graph_array;
@@ -218,7 +233,8 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
218 return 0; 233 return 0;
219 234
220 /* trace it when it is-nested-in or is a function enabled. */ 235 /* trace it when it is-nested-in or is a function enabled. */
221 if (!(trace->depth || ftrace_graph_addr(trace->func))) 236 if (!(trace->depth || ftrace_graph_addr(trace->func)) ||
237 ftrace_graph_ignore_irqs())
222 return 0; 238 return 0;
223 239
224 local_irq_save(flags); 240 local_irq_save(flags);
@@ -246,6 +262,34 @@ int trace_graph_thresh_entry(struct ftrace_graph_ent *trace)
246 return trace_graph_entry(trace); 262 return trace_graph_entry(trace);
247} 263}
248 264
265static void
266__trace_graph_function(struct trace_array *tr,
267 unsigned long ip, unsigned long flags, int pc)
268{
269 u64 time = trace_clock_local();
270 struct ftrace_graph_ent ent = {
271 .func = ip,
272 .depth = 0,
273 };
274 struct ftrace_graph_ret ret = {
275 .func = ip,
276 .depth = 0,
277 .calltime = time,
278 .rettime = time,
279 };
280
281 __trace_graph_entry(tr, &ent, flags, pc);
282 __trace_graph_return(tr, &ret, flags, pc);
283}
284
285void
286trace_graph_function(struct trace_array *tr,
287 unsigned long ip, unsigned long parent_ip,
288 unsigned long flags, int pc)
289{
290 __trace_graph_function(tr, ip, flags, pc);
291}
292
249void __trace_graph_return(struct trace_array *tr, 293void __trace_graph_return(struct trace_array *tr,
250 struct ftrace_graph_ret *trace, 294 struct ftrace_graph_ret *trace,
251 unsigned long flags, 295 unsigned long flags,
@@ -649,8 +693,9 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s)
649 693
650 /* Print nsecs (we don't want to exceed 7 numbers) */ 694 /* Print nsecs (we don't want to exceed 7 numbers) */
651 if (len < 7) { 695 if (len < 7) {
652 snprintf(nsecs_str, min(sizeof(nsecs_str), 8UL - len), "%03lu", 696 size_t slen = min_t(size_t, sizeof(nsecs_str), 8UL - len);
653 nsecs_rem); 697
698 snprintf(nsecs_str, slen, "%03lu", nsecs_rem);
654 ret = trace_seq_printf(s, ".%s", nsecs_str); 699 ret = trace_seq_printf(s, ".%s", nsecs_str);
655 if (!ret) 700 if (!ret)
656 return TRACE_TYPE_PARTIAL_LINE; 701 return TRACE_TYPE_PARTIAL_LINE;
@@ -855,6 +900,108 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s,
855 return 0; 900 return 0;
856} 901}
857 902
903/*
904 * Entry check for irq code
905 *
906 * returns 1 if
907 * - we are inside irq code
908 * - we just extered irq code
909 *
910 * retunns 0 if
911 * - funcgraph-interrupts option is set
912 * - we are not inside irq code
913 */
914static int
915check_irq_entry(struct trace_iterator *iter, u32 flags,
916 unsigned long addr, int depth)
917{
918 int cpu = iter->cpu;
919 int *depth_irq;
920 struct fgraph_data *data = iter->private;
921
922 /*
923 * If we are either displaying irqs, or we got called as
924 * a graph event and private data does not exist,
925 * then we bypass the irq check.
926 */
927 if ((flags & TRACE_GRAPH_PRINT_IRQS) ||
928 (!data))
929 return 0;
930
931 depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq);
932
933 /*
934 * We are inside the irq code
935 */
936 if (*depth_irq >= 0)
937 return 1;
938
939 if ((addr < (unsigned long)__irqentry_text_start) ||
940 (addr >= (unsigned long)__irqentry_text_end))
941 return 0;
942
943 /*
944 * We are entering irq code.
945 */
946 *depth_irq = depth;
947 return 1;
948}
949
950/*
951 * Return check for irq code
952 *
953 * returns 1 if
954 * - we are inside irq code
955 * - we just left irq code
956 *
957 * returns 0 if
958 * - funcgraph-interrupts option is set
959 * - we are not inside irq code
960 */
961static int
962check_irq_return(struct trace_iterator *iter, u32 flags, int depth)
963{
964 int cpu = iter->cpu;
965 int *depth_irq;
966 struct fgraph_data *data = iter->private;
967
968 /*
969 * If we are either displaying irqs, or we got called as
970 * a graph event and private data does not exist,
971 * then we bypass the irq check.
972 */
973 if ((flags & TRACE_GRAPH_PRINT_IRQS) ||
974 (!data))
975 return 0;
976
977 depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq);
978
979 /*
980 * We are not inside the irq code.
981 */
982 if (*depth_irq == -1)
983 return 0;
984
985 /*
986 * We are inside the irq code, and this is returning entry.
987 * Let's not trace it and clear the entry depth, since
988 * we are out of irq code.
989 *
990 * This condition ensures that we 'leave the irq code' once
991 * we are out of the entry depth. Thus protecting us from
992 * the RETURN entry loss.
993 */
994 if (*depth_irq >= depth) {
995 *depth_irq = -1;
996 return 1;
997 }
998
999 /*
1000 * We are inside the irq code, and this is not the entry.
1001 */
1002 return 1;
1003}
1004
858static enum print_line_t 1005static enum print_line_t
859print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, 1006print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
860 struct trace_iterator *iter, u32 flags) 1007 struct trace_iterator *iter, u32 flags)
@@ -865,6 +1012,9 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
865 static enum print_line_t ret; 1012 static enum print_line_t ret;
866 int cpu = iter->cpu; 1013 int cpu = iter->cpu;
867 1014
1015 if (check_irq_entry(iter, flags, call->func, call->depth))
1016 return TRACE_TYPE_HANDLED;
1017
868 if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func, flags)) 1018 if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func, flags))
869 return TRACE_TYPE_PARTIAL_LINE; 1019 return TRACE_TYPE_PARTIAL_LINE;
870 1020
@@ -902,6 +1052,9 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
902 int ret; 1052 int ret;
903 int i; 1053 int i;
904 1054
1055 if (check_irq_return(iter, flags, trace->depth))
1056 return TRACE_TYPE_HANDLED;
1057
905 if (data) { 1058 if (data) {
906 struct fgraph_cpu_data *cpu_data; 1059 struct fgraph_cpu_data *cpu_data;
907 int cpu = iter->cpu; 1060 int cpu = iter->cpu;
@@ -1054,7 +1207,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
1054 1207
1055 1208
1056enum print_line_t 1209enum print_line_t
1057print_graph_function_flags(struct trace_iterator *iter, u32 flags) 1210__print_graph_function_flags(struct trace_iterator *iter, u32 flags)
1058{ 1211{
1059 struct ftrace_graph_ent_entry *field; 1212 struct ftrace_graph_ent_entry *field;
1060 struct fgraph_data *data = iter->private; 1213 struct fgraph_data *data = iter->private;
@@ -1117,7 +1270,18 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags)
1117static enum print_line_t 1270static enum print_line_t
1118print_graph_function(struct trace_iterator *iter) 1271print_graph_function(struct trace_iterator *iter)
1119{ 1272{
1120 return print_graph_function_flags(iter, tracer_flags.val); 1273 return __print_graph_function_flags(iter, tracer_flags.val);
1274}
1275
1276enum print_line_t print_graph_function_flags(struct trace_iterator *iter,
1277 u32 flags)
1278{
1279 if (trace_flags & TRACE_ITER_LATENCY_FMT)
1280 flags |= TRACE_GRAPH_PRINT_DURATION;
1281 else
1282 flags |= TRACE_GRAPH_PRINT_ABS_TIME;
1283
1284 return __print_graph_function_flags(iter, flags);
1121} 1285}
1122 1286
1123static enum print_line_t 1287static enum print_line_t
@@ -1149,7 +1313,7 @@ static void print_lat_header(struct seq_file *s, u32 flags)
1149 seq_printf(s, "#%.*s|||| / \n", size, spaces); 1313 seq_printf(s, "#%.*s|||| / \n", size, spaces);
1150} 1314}
1151 1315
1152void print_graph_headers_flags(struct seq_file *s, u32 flags) 1316static void __print_graph_headers_flags(struct seq_file *s, u32 flags)
1153{ 1317{
1154 int lat = trace_flags & TRACE_ITER_LATENCY_FMT; 1318 int lat = trace_flags & TRACE_ITER_LATENCY_FMT;
1155 1319
@@ -1190,6 +1354,23 @@ void print_graph_headers(struct seq_file *s)
1190 print_graph_headers_flags(s, tracer_flags.val); 1354 print_graph_headers_flags(s, tracer_flags.val);
1191} 1355}
1192 1356
1357void print_graph_headers_flags(struct seq_file *s, u32 flags)
1358{
1359 struct trace_iterator *iter = s->private;
1360
1361 if (trace_flags & TRACE_ITER_LATENCY_FMT) {
1362 /* print nothing if the buffers are empty */
1363 if (trace_empty(iter))
1364 return;
1365
1366 print_trace_header(s, iter);
1367 flags |= TRACE_GRAPH_PRINT_DURATION;
1368 } else
1369 flags |= TRACE_GRAPH_PRINT_ABS_TIME;
1370
1371 __print_graph_headers_flags(s, flags);
1372}
1373
1193void graph_trace_open(struct trace_iterator *iter) 1374void graph_trace_open(struct trace_iterator *iter)
1194{ 1375{
1195 /* pid and depth on the last trace processed */ 1376 /* pid and depth on the last trace processed */
@@ -1210,9 +1391,12 @@ void graph_trace_open(struct trace_iterator *iter)
1210 pid_t *pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid); 1391 pid_t *pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
1211 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth); 1392 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
1212 int *ignore = &(per_cpu_ptr(data->cpu_data, cpu)->ignore); 1393 int *ignore = &(per_cpu_ptr(data->cpu_data, cpu)->ignore);
1394 int *depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq);
1395
1213 *pid = -1; 1396 *pid = -1;
1214 *depth = 0; 1397 *depth = 0;
1215 *ignore = 0; 1398 *ignore = 0;
1399 *depth_irq = -1;
1216 } 1400 }
1217 1401
1218 iter->private = data; 1402 iter->private = data;
@@ -1235,6 +1419,14 @@ void graph_trace_close(struct trace_iterator *iter)
1235 } 1419 }
1236} 1420}
1237 1421
1422static int func_graph_set_flag(u32 old_flags, u32 bit, int set)
1423{
1424 if (bit == TRACE_GRAPH_PRINT_IRQS)
1425 ftrace_graph_skip_irqs = !set;
1426
1427 return 0;
1428}
1429
1238static struct trace_event_functions graph_functions = { 1430static struct trace_event_functions graph_functions = {
1239 .trace = print_graph_function_event, 1431 .trace = print_graph_function_event,
1240}; 1432};
@@ -1261,6 +1453,7 @@ static struct tracer graph_trace __read_mostly = {
1261 .print_line = print_graph_function, 1453 .print_line = print_graph_function,
1262 .print_header = print_graph_headers, 1454 .print_header = print_graph_headers,
1263 .flags = &tracer_flags, 1455 .flags = &tracer_flags,
1456 .set_flag = func_graph_set_flag,
1264#ifdef CONFIG_FTRACE_SELFTEST 1457#ifdef CONFIG_FTRACE_SELFTEST
1265 .selftest = trace_selftest_startup_function_graph, 1458 .selftest = trace_selftest_startup_function_graph,
1266#endif 1459#endif
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 73a6b0601f2e..5cf8c602b880 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -87,14 +87,22 @@ static __cacheline_aligned_in_smp unsigned long max_sequence;
87 87
88#ifdef CONFIG_FUNCTION_TRACER 88#ifdef CONFIG_FUNCTION_TRACER
89/* 89/*
90 * irqsoff uses its own tracer function to keep the overhead down: 90 * Prologue for the preempt and irqs off function tracers.
91 *
92 * Returns 1 if it is OK to continue, and data->disabled is
93 * incremented.
94 * 0 if the trace is to be ignored, and data->disabled
95 * is kept the same.
96 *
97 * Note, this function is also used outside this ifdef but
98 * inside the #ifdef of the function graph tracer below.
99 * This is OK, since the function graph tracer is
100 * dependent on the function tracer.
91 */ 101 */
92static void 102static int func_prolog_dec(struct trace_array *tr,
93irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip) 103 struct trace_array_cpu **data,
104 unsigned long *flags)
94{ 105{
95 struct trace_array *tr = irqsoff_trace;
96 struct trace_array_cpu *data;
97 unsigned long flags;
98 long disabled; 106 long disabled;
99 int cpu; 107 int cpu;
100 108
@@ -106,18 +114,38 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
106 */ 114 */
107 cpu = raw_smp_processor_id(); 115 cpu = raw_smp_processor_id();
108 if (likely(!per_cpu(tracing_cpu, cpu))) 116 if (likely(!per_cpu(tracing_cpu, cpu)))
109 return; 117 return 0;
110 118
111 local_save_flags(flags); 119 local_save_flags(*flags);
112 /* slight chance to get a false positive on tracing_cpu */ 120 /* slight chance to get a false positive on tracing_cpu */
113 if (!irqs_disabled_flags(flags)) 121 if (!irqs_disabled_flags(*flags))
114 return; 122 return 0;
115 123
116 data = tr->data[cpu]; 124 *data = tr->data[cpu];
117 disabled = atomic_inc_return(&data->disabled); 125 disabled = atomic_inc_return(&(*data)->disabled);
118 126
119 if (likely(disabled == 1)) 127 if (likely(disabled == 1))
120 trace_function(tr, ip, parent_ip, flags, preempt_count()); 128 return 1;
129
130 atomic_dec(&(*data)->disabled);
131
132 return 0;
133}
134
135/*
136 * irqsoff uses its own tracer function to keep the overhead down:
137 */
138static void
139irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
140{
141 struct trace_array *tr = irqsoff_trace;
142 struct trace_array_cpu *data;
143 unsigned long flags;
144
145 if (!func_prolog_dec(tr, &data, &flags))
146 return;
147
148 trace_function(tr, ip, parent_ip, flags, preempt_count());
121 149
122 atomic_dec(&data->disabled); 150 atomic_dec(&data->disabled);
123} 151}
@@ -155,30 +183,16 @@ static int irqsoff_graph_entry(struct ftrace_graph_ent *trace)
155 struct trace_array *tr = irqsoff_trace; 183 struct trace_array *tr = irqsoff_trace;
156 struct trace_array_cpu *data; 184 struct trace_array_cpu *data;
157 unsigned long flags; 185 unsigned long flags;
158 long disabled;
159 int ret; 186 int ret;
160 int cpu;
161 int pc; 187 int pc;
162 188
163 cpu = raw_smp_processor_id(); 189 if (!func_prolog_dec(tr, &data, &flags))
164 if (likely(!per_cpu(tracing_cpu, cpu)))
165 return 0; 190 return 0;
166 191
167 local_save_flags(flags); 192 pc = preempt_count();
168 /* slight chance to get a false positive on tracing_cpu */ 193 ret = __trace_graph_entry(tr, trace, flags, pc);
169 if (!irqs_disabled_flags(flags))
170 return 0;
171
172 data = tr->data[cpu];
173 disabled = atomic_inc_return(&data->disabled);
174
175 if (likely(disabled == 1)) {
176 pc = preempt_count();
177 ret = __trace_graph_entry(tr, trace, flags, pc);
178 } else
179 ret = 0;
180
181 atomic_dec(&data->disabled); 194 atomic_dec(&data->disabled);
195
182 return ret; 196 return ret;
183} 197}
184 198
@@ -187,27 +201,13 @@ static void irqsoff_graph_return(struct ftrace_graph_ret *trace)
187 struct trace_array *tr = irqsoff_trace; 201 struct trace_array *tr = irqsoff_trace;
188 struct trace_array_cpu *data; 202 struct trace_array_cpu *data;
189 unsigned long flags; 203 unsigned long flags;
190 long disabled;
191 int cpu;
192 int pc; 204 int pc;
193 205
194 cpu = raw_smp_processor_id(); 206 if (!func_prolog_dec(tr, &data, &flags))
195 if (likely(!per_cpu(tracing_cpu, cpu)))
196 return; 207 return;
197 208
198 local_save_flags(flags); 209 pc = preempt_count();
199 /* slight chance to get a false positive on tracing_cpu */ 210 __trace_graph_return(tr, trace, flags, pc);
200 if (!irqs_disabled_flags(flags))
201 return;
202
203 data = tr->data[cpu];
204 disabled = atomic_inc_return(&data->disabled);
205
206 if (likely(disabled == 1)) {
207 pc = preempt_count();
208 __trace_graph_return(tr, trace, flags, pc);
209 }
210
211 atomic_dec(&data->disabled); 211 atomic_dec(&data->disabled);
212} 212}
213 213
@@ -229,75 +229,33 @@ static void irqsoff_trace_close(struct trace_iterator *iter)
229 229
230static enum print_line_t irqsoff_print_line(struct trace_iterator *iter) 230static enum print_line_t irqsoff_print_line(struct trace_iterator *iter)
231{ 231{
232 u32 flags = GRAPH_TRACER_FLAGS;
233
234 if (trace_flags & TRACE_ITER_LATENCY_FMT)
235 flags |= TRACE_GRAPH_PRINT_DURATION;
236 else
237 flags |= TRACE_GRAPH_PRINT_ABS_TIME;
238
239 /* 232 /*
240 * In graph mode call the graph tracer output function, 233 * In graph mode call the graph tracer output function,
241 * otherwise go with the TRACE_FN event handler 234 * otherwise go with the TRACE_FN event handler
242 */ 235 */
243 if (is_graph()) 236 if (is_graph())
244 return print_graph_function_flags(iter, flags); 237 return print_graph_function_flags(iter, GRAPH_TRACER_FLAGS);
245 238
246 return TRACE_TYPE_UNHANDLED; 239 return TRACE_TYPE_UNHANDLED;
247} 240}
248 241
249static void irqsoff_print_header(struct seq_file *s) 242static void irqsoff_print_header(struct seq_file *s)
250{ 243{
251 if (is_graph()) { 244 if (is_graph())
252 struct trace_iterator *iter = s->private; 245 print_graph_headers_flags(s, GRAPH_TRACER_FLAGS);
253 u32 flags = GRAPH_TRACER_FLAGS; 246 else
254
255 if (trace_flags & TRACE_ITER_LATENCY_FMT) {
256 /* print nothing if the buffers are empty */
257 if (trace_empty(iter))
258 return;
259
260 print_trace_header(s, iter);
261 flags |= TRACE_GRAPH_PRINT_DURATION;
262 } else
263 flags |= TRACE_GRAPH_PRINT_ABS_TIME;
264
265 print_graph_headers_flags(s, flags);
266 } else
267 trace_default_header(s); 247 trace_default_header(s);
268} 248}
269 249
270static void 250static void
271trace_graph_function(struct trace_array *tr,
272 unsigned long ip, unsigned long flags, int pc)
273{
274 u64 time = trace_clock_local();
275 struct ftrace_graph_ent ent = {
276 .func = ip,
277 .depth = 0,
278 };
279 struct ftrace_graph_ret ret = {
280 .func = ip,
281 .depth = 0,
282 .calltime = time,
283 .rettime = time,
284 };
285
286 __trace_graph_entry(tr, &ent, flags, pc);
287 __trace_graph_return(tr, &ret, flags, pc);
288}
289
290static void
291__trace_function(struct trace_array *tr, 251__trace_function(struct trace_array *tr,
292 unsigned long ip, unsigned long parent_ip, 252 unsigned long ip, unsigned long parent_ip,
293 unsigned long flags, int pc) 253 unsigned long flags, int pc)
294{ 254{
295 if (!is_graph()) 255 if (is_graph())
256 trace_graph_function(tr, ip, parent_ip, flags, pc);
257 else
296 trace_function(tr, ip, parent_ip, flags, pc); 258 trace_function(tr, ip, parent_ip, flags, pc);
297 else {
298 trace_graph_function(tr, parent_ip, flags, pc);
299 trace_graph_function(tr, ip, flags, pc);
300 }
301} 259}
302 260
303#else 261#else
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 4086eae6e81b..7319559ed59f 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -31,48 +31,98 @@ static int wakeup_rt;
31static arch_spinlock_t wakeup_lock = 31static arch_spinlock_t wakeup_lock =
32 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; 32 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
33 33
34static void wakeup_reset(struct trace_array *tr);
34static void __wakeup_reset(struct trace_array *tr); 35static void __wakeup_reset(struct trace_array *tr);
36static int wakeup_graph_entry(struct ftrace_graph_ent *trace);
37static void wakeup_graph_return(struct ftrace_graph_ret *trace);
35 38
36static int save_lat_flag; 39static int save_lat_flag;
37 40
41#define TRACE_DISPLAY_GRAPH 1
42
43static struct tracer_opt trace_opts[] = {
44#ifdef CONFIG_FUNCTION_GRAPH_TRACER
45 /* display latency trace as call graph */
46 { TRACER_OPT(display-graph, TRACE_DISPLAY_GRAPH) },
47#endif
48 { } /* Empty entry */
49};
50
51static struct tracer_flags tracer_flags = {
52 .val = 0,
53 .opts = trace_opts,
54};
55
56#define is_graph() (tracer_flags.val & TRACE_DISPLAY_GRAPH)
57
38#ifdef CONFIG_FUNCTION_TRACER 58#ifdef CONFIG_FUNCTION_TRACER
59
39/* 60/*
40 * irqsoff uses its own tracer function to keep the overhead down: 61 * Prologue for the wakeup function tracers.
62 *
63 * Returns 1 if it is OK to continue, and preemption
64 * is disabled and data->disabled is incremented.
65 * 0 if the trace is to be ignored, and preemption
66 * is not disabled and data->disabled is
67 * kept the same.
68 *
69 * Note, this function is also used outside this ifdef but
70 * inside the #ifdef of the function graph tracer below.
71 * This is OK, since the function graph tracer is
72 * dependent on the function tracer.
41 */ 73 */
42static void 74static int
43wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) 75func_prolog_preempt_disable(struct trace_array *tr,
76 struct trace_array_cpu **data,
77 int *pc)
44{ 78{
45 struct trace_array *tr = wakeup_trace;
46 struct trace_array_cpu *data;
47 unsigned long flags;
48 long disabled; 79 long disabled;
49 int cpu; 80 int cpu;
50 int pc;
51 81
52 if (likely(!wakeup_task)) 82 if (likely(!wakeup_task))
53 return; 83 return 0;
54 84
55 pc = preempt_count(); 85 *pc = preempt_count();
56 preempt_disable_notrace(); 86 preempt_disable_notrace();
57 87
58 cpu = raw_smp_processor_id(); 88 cpu = raw_smp_processor_id();
59 if (cpu != wakeup_current_cpu) 89 if (cpu != wakeup_current_cpu)
60 goto out_enable; 90 goto out_enable;
61 91
62 data = tr->data[cpu]; 92 *data = tr->data[cpu];
63 disabled = atomic_inc_return(&data->disabled); 93 disabled = atomic_inc_return(&(*data)->disabled);
64 if (unlikely(disabled != 1)) 94 if (unlikely(disabled != 1))
65 goto out; 95 goto out;
66 96
67 local_irq_save(flags); 97 return 1;
68 98
69 trace_function(tr, ip, parent_ip, flags, pc); 99out:
100 atomic_dec(&(*data)->disabled);
101
102out_enable:
103 preempt_enable_notrace();
104 return 0;
105}
70 106
107/*
108 * wakeup uses its own tracer function to keep the overhead down:
109 */
110static void
111wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
112{
113 struct trace_array *tr = wakeup_trace;
114 struct trace_array_cpu *data;
115 unsigned long flags;
116 int pc;
117
118 if (!func_prolog_preempt_disable(tr, &data, &pc))
119 return;
120
121 local_irq_save(flags);
122 trace_function(tr, ip, parent_ip, flags, pc);
71 local_irq_restore(flags); 123 local_irq_restore(flags);
72 124
73 out:
74 atomic_dec(&data->disabled); 125 atomic_dec(&data->disabled);
75 out_enable:
76 preempt_enable_notrace(); 126 preempt_enable_notrace();
77} 127}
78 128
@@ -82,6 +132,156 @@ static struct ftrace_ops trace_ops __read_mostly =
82}; 132};
83#endif /* CONFIG_FUNCTION_TRACER */ 133#endif /* CONFIG_FUNCTION_TRACER */
84 134
135static int start_func_tracer(int graph)
136{
137 int ret;
138
139 if (!graph)
140 ret = register_ftrace_function(&trace_ops);
141 else
142 ret = register_ftrace_graph(&wakeup_graph_return,
143 &wakeup_graph_entry);
144
145 if (!ret && tracing_is_enabled())
146 tracer_enabled = 1;
147 else
148 tracer_enabled = 0;
149
150 return ret;
151}
152
153static void stop_func_tracer(int graph)
154{
155 tracer_enabled = 0;
156
157 if (!graph)
158 unregister_ftrace_function(&trace_ops);
159 else
160 unregister_ftrace_graph();
161}
162
163#ifdef CONFIG_FUNCTION_GRAPH_TRACER
164static int wakeup_set_flag(u32 old_flags, u32 bit, int set)
165{
166
167 if (!(bit & TRACE_DISPLAY_GRAPH))
168 return -EINVAL;
169
170 if (!(is_graph() ^ set))
171 return 0;
172
173 stop_func_tracer(!set);
174
175 wakeup_reset(wakeup_trace);
176 tracing_max_latency = 0;
177
178 return start_func_tracer(set);
179}
180
181static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
182{
183 struct trace_array *tr = wakeup_trace;
184 struct trace_array_cpu *data;
185 unsigned long flags;
186 int pc, ret = 0;
187
188 if (!func_prolog_preempt_disable(tr, &data, &pc))
189 return 0;
190
191 local_save_flags(flags);
192 ret = __trace_graph_entry(tr, trace, flags, pc);
193 atomic_dec(&data->disabled);
194 preempt_enable_notrace();
195
196 return ret;
197}
198
199static void wakeup_graph_return(struct ftrace_graph_ret *trace)
200{
201 struct trace_array *tr = wakeup_trace;
202 struct trace_array_cpu *data;
203 unsigned long flags;
204 int pc;
205
206 if (!func_prolog_preempt_disable(tr, &data, &pc))
207 return;
208
209 local_save_flags(flags);
210 __trace_graph_return(tr, trace, flags, pc);
211 atomic_dec(&data->disabled);
212
213 preempt_enable_notrace();
214 return;
215}
216
217static void wakeup_trace_open(struct trace_iterator *iter)
218{
219 if (is_graph())
220 graph_trace_open(iter);
221}
222
223static void wakeup_trace_close(struct trace_iterator *iter)
224{
225 if (iter->private)
226 graph_trace_close(iter);
227}
228
229#define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_PROC)
230
231static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
232{
233 /*
234 * In graph mode call the graph tracer output function,
235 * otherwise go with the TRACE_FN event handler
236 */
237 if (is_graph())
238 return print_graph_function_flags(iter, GRAPH_TRACER_FLAGS);
239
240 return TRACE_TYPE_UNHANDLED;
241}
242
243static void wakeup_print_header(struct seq_file *s)
244{
245 if (is_graph())
246 print_graph_headers_flags(s, GRAPH_TRACER_FLAGS);
247 else
248 trace_default_header(s);
249}
250
251static void
252__trace_function(struct trace_array *tr,
253 unsigned long ip, unsigned long parent_ip,
254 unsigned long flags, int pc)
255{
256 if (is_graph())
257 trace_graph_function(tr, ip, parent_ip, flags, pc);
258 else
259 trace_function(tr, ip, parent_ip, flags, pc);
260}
261#else
262#define __trace_function trace_function
263
264static int wakeup_set_flag(u32 old_flags, u32 bit, int set)
265{
266 return -EINVAL;
267}
268
269static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
270{
271 return -1;
272}
273
274static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
275{
276 return TRACE_TYPE_UNHANDLED;
277}
278
279static void wakeup_graph_return(struct ftrace_graph_ret *trace) { }
280static void wakeup_print_header(struct seq_file *s) { }
281static void wakeup_trace_open(struct trace_iterator *iter) { }
282static void wakeup_trace_close(struct trace_iterator *iter) { }
283#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
284
85/* 285/*
86 * Should this new latency be reported/recorded? 286 * Should this new latency be reported/recorded?
87 */ 287 */
@@ -152,7 +352,7 @@ probe_wakeup_sched_switch(void *ignore,
152 /* The task we are waiting for is waking up */ 352 /* The task we are waiting for is waking up */
153 data = wakeup_trace->data[wakeup_cpu]; 353 data = wakeup_trace->data[wakeup_cpu];
154 354
155 trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc); 355 __trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc);
156 tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); 356 tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);
157 357
158 T0 = data->preempt_timestamp; 358 T0 = data->preempt_timestamp;
@@ -252,7 +452,7 @@ probe_wakeup(void *ignore, struct task_struct *p, int success)
252 * is not called by an assembly function (where as schedule is) 452 * is not called by an assembly function (where as schedule is)
253 * it should be safe to use it here. 453 * it should be safe to use it here.
254 */ 454 */
255 trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); 455 __trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
256 456
257out_locked: 457out_locked:
258 arch_spin_unlock(&wakeup_lock); 458 arch_spin_unlock(&wakeup_lock);
@@ -303,12 +503,8 @@ static void start_wakeup_tracer(struct trace_array *tr)
303 */ 503 */
304 smp_wmb(); 504 smp_wmb();
305 505
306 register_ftrace_function(&trace_ops); 506 if (start_func_tracer(is_graph()))
307 507 printk(KERN_ERR "failed to start wakeup tracer\n");
308 if (tracing_is_enabled())
309 tracer_enabled = 1;
310 else
311 tracer_enabled = 0;
312 508
313 return; 509 return;
314fail_deprobe_wake_new: 510fail_deprobe_wake_new:
@@ -320,7 +516,7 @@ fail_deprobe:
320static void stop_wakeup_tracer(struct trace_array *tr) 516static void stop_wakeup_tracer(struct trace_array *tr)
321{ 517{
322 tracer_enabled = 0; 518 tracer_enabled = 0;
323 unregister_ftrace_function(&trace_ops); 519 stop_func_tracer(is_graph());
324 unregister_trace_sched_switch(probe_wakeup_sched_switch, NULL); 520 unregister_trace_sched_switch(probe_wakeup_sched_switch, NULL);
325 unregister_trace_sched_wakeup_new(probe_wakeup, NULL); 521 unregister_trace_sched_wakeup_new(probe_wakeup, NULL);
326 unregister_trace_sched_wakeup(probe_wakeup, NULL); 522 unregister_trace_sched_wakeup(probe_wakeup, NULL);
@@ -379,9 +575,15 @@ static struct tracer wakeup_tracer __read_mostly =
379 .start = wakeup_tracer_start, 575 .start = wakeup_tracer_start,
380 .stop = wakeup_tracer_stop, 576 .stop = wakeup_tracer_stop,
381 .print_max = 1, 577 .print_max = 1,
578 .print_header = wakeup_print_header,
579 .print_line = wakeup_print_line,
580 .flags = &tracer_flags,
581 .set_flag = wakeup_set_flag,
382#ifdef CONFIG_FTRACE_SELFTEST 582#ifdef CONFIG_FTRACE_SELFTEST
383 .selftest = trace_selftest_startup_wakeup, 583 .selftest = trace_selftest_startup_wakeup,
384#endif 584#endif
585 .open = wakeup_trace_open,
586 .close = wakeup_trace_close,
385 .use_max_tr = 1, 587 .use_max_tr = 1,
386}; 588};
387 589
@@ -394,9 +596,15 @@ static struct tracer wakeup_rt_tracer __read_mostly =
394 .stop = wakeup_tracer_stop, 596 .stop = wakeup_tracer_stop,
395 .wait_pipe = poll_wait_pipe, 597 .wait_pipe = poll_wait_pipe,
396 .print_max = 1, 598 .print_max = 1,
599 .print_header = wakeup_print_header,
600 .print_line = wakeup_print_line,
601 .flags = &tracer_flags,
602 .set_flag = wakeup_set_flag,
397#ifdef CONFIG_FTRACE_SELFTEST 603#ifdef CONFIG_FTRACE_SELFTEST
398 .selftest = trace_selftest_startup_wakeup, 604 .selftest = trace_selftest_startup_wakeup,
399#endif 605#endif
606 .open = wakeup_trace_open,
607 .close = wakeup_trace_close,
400 .use_max_tr = 1, 608 .use_max_tr = 1,
401}; 609};
402 610
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index a7cc3793baf6..209b379a4721 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -263,6 +263,11 @@ int __init trace_workqueue_early_init(void)
263{ 263{
264 int ret, cpu; 264 int ret, cpu;
265 265
266 for_each_possible_cpu(cpu) {
267 spin_lock_init(&workqueue_cpu_stat(cpu)->lock);
268 INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list);
269 }
270
266 ret = register_trace_workqueue_insertion(probe_workqueue_insertion, NULL); 271 ret = register_trace_workqueue_insertion(probe_workqueue_insertion, NULL);
267 if (ret) 272 if (ret)
268 goto out; 273 goto out;
@@ -279,11 +284,6 @@ int __init trace_workqueue_early_init(void)
279 if (ret) 284 if (ret)
280 goto no_creation; 285 goto no_creation;
281 286
282 for_each_possible_cpu(cpu) {
283 spin_lock_init(&workqueue_cpu_stat(cpu)->lock);
284 INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list);
285 }
286
287 return 0; 287 return 0;
288 288
289no_creation: 289no_creation:
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index c77f3eceea25..e95ee7f31d43 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -25,6 +25,7 @@
25#include <linux/err.h> 25#include <linux/err.h>
26#include <linux/slab.h> 26#include <linux/slab.h>
27#include <linux/sched.h> 27#include <linux/sched.h>
28#include <linux/jump_label.h>
28 29
29extern struct tracepoint __start___tracepoints[]; 30extern struct tracepoint __start___tracepoints[];
30extern struct tracepoint __stop___tracepoints[]; 31extern struct tracepoint __stop___tracepoints[];
@@ -263,7 +264,13 @@ static void set_tracepoint(struct tracepoint_entry **entry,
263 * is used. 264 * is used.
264 */ 265 */
265 rcu_assign_pointer(elem->funcs, (*entry)->funcs); 266 rcu_assign_pointer(elem->funcs, (*entry)->funcs);
266 elem->state = active; 267 if (!elem->state && active) {
268 jump_label_enable(&elem->state);
269 elem->state = active;
270 } else if (elem->state && !active) {
271 jump_label_disable(&elem->state);
272 elem->state = active;
273 }
267} 274}
268 275
269/* 276/*
@@ -277,7 +284,10 @@ static void disable_tracepoint(struct tracepoint *elem)
277 if (elem->unregfunc && elem->state) 284 if (elem->unregfunc && elem->state)
278 elem->unregfunc(); 285 elem->unregfunc();
279 286
280 elem->state = 0; 287 if (elem->state) {
288 jump_label_disable(&elem->state);
289 elem->state = 0;
290 }
281 rcu_assign_pointer(elem->funcs, NULL); 291 rcu_assign_pointer(elem->funcs, NULL);
282} 292}
283 293
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 2feb2870d3a1..bafba687a6d8 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -43,7 +43,6 @@ static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
43static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); 43static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
44#endif 44#endif
45 45
46static int __read_mostly did_panic;
47static int __initdata no_watchdog; 46static int __initdata no_watchdog;
48 47
49 48
@@ -187,18 +186,6 @@ static int is_softlockup(unsigned long touch_ts)
187 return 0; 186 return 0;
188} 187}
189 188
190static int
191watchdog_panic(struct notifier_block *this, unsigned long event, void *ptr)
192{
193 did_panic = 1;
194
195 return NOTIFY_DONE;
196}
197
198static struct notifier_block panic_block = {
199 .notifier_call = watchdog_panic,
200};
201
202#ifdef CONFIG_HARDLOCKUP_DETECTOR 189#ifdef CONFIG_HARDLOCKUP_DETECTOR
203static struct perf_event_attr wd_hw_attr = { 190static struct perf_event_attr wd_hw_attr = {
204 .type = PERF_TYPE_HARDWARE, 191 .type = PERF_TYPE_HARDWARE,
@@ -371,14 +358,14 @@ static int watchdog_nmi_enable(int cpu)
371 /* Try to register using hardware perf events */ 358 /* Try to register using hardware perf events */
372 wd_attr = &wd_hw_attr; 359 wd_attr = &wd_hw_attr;
373 wd_attr->sample_period = hw_nmi_get_sample_period(); 360 wd_attr->sample_period = hw_nmi_get_sample_period();
374 event = perf_event_create_kernel_counter(wd_attr, cpu, -1, watchdog_overflow_callback); 361 event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback);
375 if (!IS_ERR(event)) { 362 if (!IS_ERR(event)) {
376 printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n"); 363 printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n");
377 goto out_save; 364 goto out_save;
378 } 365 }
379 366
380 printk(KERN_ERR "NMI watchdog failed to create perf event on cpu%i: %p\n", cpu, event); 367 printk(KERN_ERR "NMI watchdog failed to create perf event on cpu%i: %p\n", cpu, event);
381 return -1; 368 return PTR_ERR(event);
382 369
383 /* success path */ 370 /* success path */
384out_save: 371out_save:
@@ -422,17 +409,19 @@ static int watchdog_prepare_cpu(int cpu)
422static int watchdog_enable(int cpu) 409static int watchdog_enable(int cpu)
423{ 410{
424 struct task_struct *p = per_cpu(softlockup_watchdog, cpu); 411 struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
412 int err;
425 413
426 /* enable the perf event */ 414 /* enable the perf event */
427 if (watchdog_nmi_enable(cpu) != 0) 415 err = watchdog_nmi_enable(cpu);
428 return -1; 416 if (err)
417 return err;
429 418
430 /* create the watchdog thread */ 419 /* create the watchdog thread */
431 if (!p) { 420 if (!p) {
432 p = kthread_create(watchdog, (void *)(unsigned long)cpu, "watchdog/%d", cpu); 421 p = kthread_create(watchdog, (void *)(unsigned long)cpu, "watchdog/%d", cpu);
433 if (IS_ERR(p)) { 422 if (IS_ERR(p)) {
434 printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu); 423 printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu);
435 return -1; 424 return PTR_ERR(p);
436 } 425 }
437 kthread_bind(p, cpu); 426 kthread_bind(p, cpu);
438 per_cpu(watchdog_touch_ts, cpu) = 0; 427 per_cpu(watchdog_touch_ts, cpu) = 0;
@@ -484,6 +473,9 @@ static void watchdog_disable_all_cpus(void)
484{ 473{
485 int cpu; 474 int cpu;
486 475
476 if (no_watchdog)
477 return;
478
487 for_each_online_cpu(cpu) 479 for_each_online_cpu(cpu)
488 watchdog_disable(cpu); 480 watchdog_disable(cpu);
489 481
@@ -526,17 +518,16 @@ static int __cpuinit
526cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) 518cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
527{ 519{
528 int hotcpu = (unsigned long)hcpu; 520 int hotcpu = (unsigned long)hcpu;
521 int err = 0;
529 522
530 switch (action) { 523 switch (action) {
531 case CPU_UP_PREPARE: 524 case CPU_UP_PREPARE:
532 case CPU_UP_PREPARE_FROZEN: 525 case CPU_UP_PREPARE_FROZEN:
533 if (watchdog_prepare_cpu(hotcpu)) 526 err = watchdog_prepare_cpu(hotcpu);
534 return NOTIFY_BAD;
535 break; 527 break;
536 case CPU_ONLINE: 528 case CPU_ONLINE:
537 case CPU_ONLINE_FROZEN: 529 case CPU_ONLINE_FROZEN:
538 if (watchdog_enable(hotcpu)) 530 err = watchdog_enable(hotcpu);
539 return NOTIFY_BAD;
540 break; 531 break;
541#ifdef CONFIG_HOTPLUG_CPU 532#ifdef CONFIG_HOTPLUG_CPU
542 case CPU_UP_CANCELED: 533 case CPU_UP_CANCELED:
@@ -549,7 +540,7 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
549 break; 540 break;
550#endif /* CONFIG_HOTPLUG_CPU */ 541#endif /* CONFIG_HOTPLUG_CPU */
551 } 542 }
552 return NOTIFY_OK; 543 return notifier_from_errno(err);
553} 544}
554 545
555static struct notifier_block __cpuinitdata cpu_nfb = { 546static struct notifier_block __cpuinitdata cpu_nfb = {
@@ -565,13 +556,11 @@ static int __init spawn_watchdog_task(void)
565 return 0; 556 return 0;
566 557
567 err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); 558 err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
568 WARN_ON(err == NOTIFY_BAD); 559 WARN_ON(notifier_to_errno(err));
569 560
570 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); 561 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
571 register_cpu_notifier(&cpu_nfb); 562 register_cpu_notifier(&cpu_nfb);
572 563
573 atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
574
575 return 0; 564 return 0;
576} 565}
577early_initcall(spawn_watchdog_task); 566early_initcall(spawn_watchdog_task);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 9886cf5365ba..21ac83070a80 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -482,6 +482,7 @@ config PROVE_LOCKING
482 select DEBUG_SPINLOCK 482 select DEBUG_SPINLOCK
483 select DEBUG_MUTEXES 483 select DEBUG_MUTEXES
484 select DEBUG_LOCK_ALLOC 484 select DEBUG_LOCK_ALLOC
485 select TRACE_IRQFLAGS
485 default n 486 default n
486 help 487 help
487 This feature enables the kernel to prove that all locking 488 This feature enables the kernel to prove that all locking
@@ -596,11 +597,10 @@ config DEBUG_LOCKDEP
596 of more runtime overhead. 597 of more runtime overhead.
597 598
598config TRACE_IRQFLAGS 599config TRACE_IRQFLAGS
599 depends on DEBUG_KERNEL
600 bool 600 bool
601 default y 601 help
602 depends on TRACE_IRQFLAGS_SUPPORT 602 Enables hooks to interrupt enabling and disabling for
603 depends on PROVE_LOCKING 603 either tracing or lock debugging.
604 604
605config DEBUG_SPINLOCK_SLEEP 605config DEBUG_SPINLOCK_SLEEP
606 bool "Spinlock debugging: sleep-inside-spinlock checking" 606 bool "Spinlock debugging: sleep-inside-spinlock checking"
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 02afc2533728..7bd6df781ce5 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -26,19 +26,11 @@
26#include <linux/dynamic_debug.h> 26#include <linux/dynamic_debug.h>
27#include <linux/debugfs.h> 27#include <linux/debugfs.h>
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <linux/jump_label.h>
29 30
30extern struct _ddebug __start___verbose[]; 31extern struct _ddebug __start___verbose[];
31extern struct _ddebug __stop___verbose[]; 32extern struct _ddebug __stop___verbose[];
32 33
33/* dynamic_debug_enabled, and dynamic_debug_enabled2 are bitmasks in which
34 * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They
35 * use independent hash functions, to reduce the chance of false positives.
36 */
37long long dynamic_debug_enabled;
38EXPORT_SYMBOL_GPL(dynamic_debug_enabled);
39long long dynamic_debug_enabled2;
40EXPORT_SYMBOL_GPL(dynamic_debug_enabled2);
41
42struct ddebug_table { 34struct ddebug_table {
43 struct list_head link; 35 struct list_head link;
44 char *mod_name; 36 char *mod_name;
@@ -88,26 +80,6 @@ static char *ddebug_describe_flags(struct _ddebug *dp, char *buf,
88} 80}
89 81
90/* 82/*
91 * must be called with ddebug_lock held
92 */
93
94static int disabled_hash(char hash, bool first_table)
95{
96 struct ddebug_table *dt;
97 char table_hash_value;
98
99 list_for_each_entry(dt, &ddebug_tables, link) {
100 if (first_table)
101 table_hash_value = dt->ddebugs->primary_hash;
102 else
103 table_hash_value = dt->ddebugs->secondary_hash;
104 if (dt->num_enabled && (hash == table_hash_value))
105 return 0;
106 }
107 return 1;
108}
109
110/*
111 * Search the tables for _ddebug's which match the given 83 * Search the tables for _ddebug's which match the given
112 * `query' and apply the `flags' and `mask' to them. Tells 84 * `query' and apply the `flags' and `mask' to them. Tells
113 * the user which ddebug's were changed, or whether none 85 * the user which ddebug's were changed, or whether none
@@ -170,17 +142,9 @@ static void ddebug_change(const struct ddebug_query *query,
170 dt->num_enabled++; 142 dt->num_enabled++;
171 dp->flags = newflags; 143 dp->flags = newflags;
172 if (newflags) { 144 if (newflags) {
173 dynamic_debug_enabled |= 145 jump_label_enable(&dp->enabled);
174 (1LL << dp->primary_hash);
175 dynamic_debug_enabled2 |=
176 (1LL << dp->secondary_hash);
177 } else { 146 } else {
178 if (disabled_hash(dp->primary_hash, true)) 147 jump_label_disable(&dp->enabled);
179 dynamic_debug_enabled &=
180 ~(1LL << dp->primary_hash);
181 if (disabled_hash(dp->secondary_hash, false))
182 dynamic_debug_enabled2 &=
183 ~(1LL << dp->secondary_hash);
184 } 148 }
185 if (verbose) 149 if (verbose)
186 printk(KERN_INFO 150 printk(KERN_INFO
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 251997a95483..282806ba7a57 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -243,6 +243,7 @@ void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
243 unlock_sock_fast(sk, slow); 243 unlock_sock_fast(sk, slow);
244 244
245 /* skb is now orphaned, can be freed outside of locked section */ 245 /* skb is now orphaned, can be freed outside of locked section */
246 trace_kfree_skb(skb, skb_free_datagram_locked);
246 __kfree_skb(skb); 247 __kfree_skb(skb);
247} 248}
248EXPORT_SYMBOL(skb_free_datagram_locked); 249EXPORT_SYMBOL(skb_free_datagram_locked);
diff --git a/net/core/dev.c b/net/core/dev.c
index 660dd41aaaa6..7ec85e27beed 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -128,6 +128,8 @@
128#include <linux/jhash.h> 128#include <linux/jhash.h>
129#include <linux/random.h> 129#include <linux/random.h>
130#include <trace/events/napi.h> 130#include <trace/events/napi.h>
131#include <trace/events/net.h>
132#include <trace/events/skb.h>
131#include <linux/pci.h> 133#include <linux/pci.h>
132 134
133#include "net-sysfs.h" 135#include "net-sysfs.h"
@@ -1978,6 +1980,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1978 } 1980 }
1979 1981
1980 rc = ops->ndo_start_xmit(skb, dev); 1982 rc = ops->ndo_start_xmit(skb, dev);
1983 trace_net_dev_xmit(skb, rc);
1981 if (rc == NETDEV_TX_OK) 1984 if (rc == NETDEV_TX_OK)
1982 txq_trans_update(txq); 1985 txq_trans_update(txq);
1983 return rc; 1986 return rc;
@@ -1998,6 +2001,7 @@ gso:
1998 skb_dst_drop(nskb); 2001 skb_dst_drop(nskb);
1999 2002
2000 rc = ops->ndo_start_xmit(nskb, dev); 2003 rc = ops->ndo_start_xmit(nskb, dev);
2004 trace_net_dev_xmit(nskb, rc);
2001 if (unlikely(rc != NETDEV_TX_OK)) { 2005 if (unlikely(rc != NETDEV_TX_OK)) {
2002 if (rc & ~NETDEV_TX_MASK) 2006 if (rc & ~NETDEV_TX_MASK)
2003 goto out_kfree_gso_skb; 2007 goto out_kfree_gso_skb;
@@ -2186,6 +2190,7 @@ int dev_queue_xmit(struct sk_buff *skb)
2186#ifdef CONFIG_NET_CLS_ACT 2190#ifdef CONFIG_NET_CLS_ACT
2187 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS); 2191 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
2188#endif 2192#endif
2193 trace_net_dev_queue(skb);
2189 if (q->enqueue) { 2194 if (q->enqueue) {
2190 rc = __dev_xmit_skb(skb, q, dev, txq); 2195 rc = __dev_xmit_skb(skb, q, dev, txq);
2191 goto out; 2196 goto out;
@@ -2512,6 +2517,7 @@ int netif_rx(struct sk_buff *skb)
2512 if (netdev_tstamp_prequeue) 2517 if (netdev_tstamp_prequeue)
2513 net_timestamp_check(skb); 2518 net_timestamp_check(skb);
2514 2519
2520 trace_netif_rx(skb);
2515#ifdef CONFIG_RPS 2521#ifdef CONFIG_RPS
2516 { 2522 {
2517 struct rps_dev_flow voidflow, *rflow = &voidflow; 2523 struct rps_dev_flow voidflow, *rflow = &voidflow;
@@ -2571,6 +2577,7 @@ static void net_tx_action(struct softirq_action *h)
2571 clist = clist->next; 2577 clist = clist->next;
2572 2578
2573 WARN_ON(atomic_read(&skb->users)); 2579 WARN_ON(atomic_read(&skb->users));
2580 trace_kfree_skb(skb, net_tx_action);
2574 __kfree_skb(skb); 2581 __kfree_skb(skb);
2575 } 2582 }
2576 } 2583 }
@@ -2828,6 +2835,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
2828 if (!netdev_tstamp_prequeue) 2835 if (!netdev_tstamp_prequeue)
2829 net_timestamp_check(skb); 2836 net_timestamp_check(skb);
2830 2837
2838 trace_netif_receive_skb(skb);
2831 if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb)) 2839 if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
2832 return NET_RX_SUCCESS; 2840 return NET_RX_SUCCESS;
2833 2841
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index afa6380ed88a..7f1bb2aba03b 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -26,6 +26,7 @@
26 26
27#define CREATE_TRACE_POINTS 27#define CREATE_TRACE_POINTS
28#include <trace/events/skb.h> 28#include <trace/events/skb.h>
29#include <trace/events/net.h>
29#include <trace/events/napi.h> 30#include <trace/events/napi.h>
30 31
31EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); 32EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c83b421341c0..56ba3c4e4761 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -466,6 +466,7 @@ void consume_skb(struct sk_buff *skb)
466 smp_rmb(); 466 smp_rmb();
467 else if (likely(!atomic_dec_and_test(&skb->users))) 467 else if (likely(!atomic_dec_and_test(&skb->users)))
468 return; 468 return;
469 trace_consume_skb(skb);
469 __kfree_skb(skb); 470 __kfree_skb(skb);
470} 471}
471EXPORT_SYMBOL(consume_skb); 472EXPORT_SYMBOL(consume_skb);
diff --git a/scripts/Makefile b/scripts/Makefile
index 842dbc2d5aed..2e088109fbd5 100644
--- a/scripts/Makefile
+++ b/scripts/Makefile
@@ -11,6 +11,7 @@ hostprogs-$(CONFIG_KALLSYMS) += kallsyms
11hostprogs-$(CONFIG_LOGO) += pnmtologo 11hostprogs-$(CONFIG_LOGO) += pnmtologo
12hostprogs-$(CONFIG_VT) += conmakehash 12hostprogs-$(CONFIG_VT) += conmakehash
13hostprogs-$(CONFIG_IKCONFIG) += bin2c 13hostprogs-$(CONFIG_IKCONFIG) += bin2c
14hostprogs-$(BUILD_C_RECORDMCOUNT) += recordmcount
14 15
15always := $(hostprogs-y) $(hostprogs-m) 16always := $(hostprogs-y) $(hostprogs-m)
16 17
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index a1a5cf95a68d..843bd4f4ffc9 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -209,12 +209,22 @@ cmd_modversions = \
209endif 209endif
210 210
211ifdef CONFIG_FTRACE_MCOUNT_RECORD 211ifdef CONFIG_FTRACE_MCOUNT_RECORD
212ifdef BUILD_C_RECORDMCOUNT
213# Due to recursion, we must skip empty.o.
214# The empty.o file is created in the make process in order to determine
215# the target endianness and word size. It is made before all other C
216# files, including recordmcount.
217cmd_record_mcount = if [ $(@) != "scripts/mod/empty.o" ]; then \
218 $(objtree)/scripts/recordmcount "$(@)"; \
219 fi;
220else
212cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \ 221cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
213 "$(if $(CONFIG_CPU_BIG_ENDIAN),big,little)" \ 222 "$(if $(CONFIG_CPU_BIG_ENDIAN),big,little)" \
214 "$(if $(CONFIG_64BIT),64,32)" \ 223 "$(if $(CONFIG_64BIT),64,32)" \
215 "$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" "$(MV)" \ 224 "$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" "$(MV)" \
216 "$(if $(part-of-module),1,0)" "$(@)"; 225 "$(if $(part-of-module),1,0)" "$(@)";
217endif 226endif
227endif
218 228
219define rule_cc_o_c 229define rule_cc_o_c
220 $(call echo-cmd,checksrc) $(cmd_checksrc) \ 230 $(call echo-cmd,checksrc) $(cmd_checksrc) \
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 54fd1b700131..7bfcf1a09ac5 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -101,14 +101,6 @@ basename_flags = -D"KBUILD_BASENAME=KBUILD_STR($(call name-fix,$(basetarget)))"
101modname_flags = $(if $(filter 1,$(words $(modname))),\ 101modname_flags = $(if $(filter 1,$(words $(modname))),\
102 -D"KBUILD_MODNAME=KBUILD_STR($(call name-fix,$(modname)))") 102 -D"KBUILD_MODNAME=KBUILD_STR($(call name-fix,$(modname)))")
103 103
104#hash values
105ifdef CONFIG_DYNAMIC_DEBUG
106debug_flags = -D"DEBUG_HASH=$(shell ./scripts/basic/hash djb2 $(@D)$(modname))"\
107 -D"DEBUG_HASH2=$(shell ./scripts/basic/hash r5 $(@D)$(modname))"
108else
109debug_flags =
110endif
111
112orig_c_flags = $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(KBUILD_SUBDIR_CCFLAGS) \ 104orig_c_flags = $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(KBUILD_SUBDIR_CCFLAGS) \
113 $(ccflags-y) $(CFLAGS_$(basetarget).o) 105 $(ccflags-y) $(CFLAGS_$(basetarget).o)
114_c_flags = $(filter-out $(CFLAGS_REMOVE_$(basetarget).o), $(orig_c_flags)) 106_c_flags = $(filter-out $(CFLAGS_REMOVE_$(basetarget).o), $(orig_c_flags))
@@ -152,8 +144,7 @@ endif
152 144
153c_flags = -Wp,-MD,$(depfile) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) \ 145c_flags = -Wp,-MD,$(depfile) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) \
154 $(__c_flags) $(modkern_cflags) \ 146 $(__c_flags) $(modkern_cflags) \
155 -D"KBUILD_STR(s)=\#s" $(basename_flags) $(modname_flags) \ 147 -D"KBUILD_STR(s)=\#s" $(basename_flags) $(modname_flags)
156 $(debug_flags)
157 148
158a_flags = -Wp,-MD,$(depfile) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) \ 149a_flags = -Wp,-MD,$(depfile) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) \
159 $(__a_flags) $(modkern_aflags) 150 $(__a_flags) $(modkern_aflags)
diff --git a/scripts/basic/Makefile b/scripts/basic/Makefile
index 09559951df12..4c324a1f1e0e 100644
--- a/scripts/basic/Makefile
+++ b/scripts/basic/Makefile
@@ -9,7 +9,7 @@
9# fixdep: Used to generate dependency information during build process 9# fixdep: Used to generate dependency information during build process
10# docproc: Used in Documentation/DocBook 10# docproc: Used in Documentation/DocBook
11 11
12hostprogs-y := fixdep docproc hash 12hostprogs-y := fixdep docproc
13always := $(hostprogs-y) 13always := $(hostprogs-y)
14 14
15# fixdep is needed to compile other host programs 15# fixdep is needed to compile other host programs
diff --git a/scripts/basic/hash.c b/scripts/basic/hash.c
deleted file mode 100644
index 2ef5d3f666b8..000000000000
--- a/scripts/basic/hash.c
+++ /dev/null
@@ -1,64 +0,0 @@
1/*
2 * Copyright (C) 2008 Red Hat, Inc., Jason Baron <jbaron@redhat.com>
3 *
4 */
5
6#include <stdio.h>
7#include <stdlib.h>
8#include <string.h>
9
10#define DYNAMIC_DEBUG_HASH_BITS 6
11
12static const char *program;
13
14static void usage(void)
15{
16 printf("Usage: %s <djb2|r5> <modname>\n", program);
17 exit(1);
18}
19
20/* djb2 hashing algorithm by Dan Bernstein. From:
21 * http://www.cse.yorku.ca/~oz/hash.html
22 */
23
24static unsigned int djb2_hash(char *str)
25{
26 unsigned long hash = 5381;
27 int c;
28
29 c = *str;
30 while (c) {
31 hash = ((hash << 5) + hash) + c;
32 c = *++str;
33 }
34 return (unsigned int)(hash & ((1 << DYNAMIC_DEBUG_HASH_BITS) - 1));
35}
36
37static unsigned int r5_hash(char *str)
38{
39 unsigned long hash = 0;
40 int c;
41
42 c = *str;
43 while (c) {
44 hash = (hash + (c << 4) + (c >> 4)) * 11;
45 c = *++str;
46 }
47 return (unsigned int)(hash & ((1 << DYNAMIC_DEBUG_HASH_BITS) - 1));
48}
49
50int main(int argc, char *argv[])
51{
52 program = argv[0];
53
54 if (argc != 3)
55 usage();
56 if (!strcmp(argv[1], "djb2"))
57 printf("%d\n", djb2_hash(argv[2]));
58 else if (!strcmp(argv[1], "r5"))
59 printf("%d\n", r5_hash(argv[2]));
60 else
61 usage();
62 exit(0);
63}
64
diff --git a/scripts/gcc-goto.sh b/scripts/gcc-goto.sh
new file mode 100644
index 000000000000..520d16b1ffaf
--- /dev/null
+++ b/scripts/gcc-goto.sh
@@ -0,0 +1,5 @@
1#!/bin/sh
2# Test for gcc 'asm goto' suport
3# Copyright (C) 2010, Jason Baron <jbaron@redhat.com>
4
5echo "int main(void) { entry: asm goto (\"\"::::entry); return 0; }" | $@ -x c - -c -o /dev/null >/dev/null 2>&1 && echo "y"
diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c
new file mode 100644
index 000000000000..26e1271259ba
--- /dev/null
+++ b/scripts/recordmcount.c
@@ -0,0 +1,363 @@
1/*
2 * recordmcount.c: construct a table of the locations of calls to 'mcount'
3 * so that ftrace can find them quickly.
4 * Copyright 2009 John F. Reiser <jreiser@BitWagon.com>. All rights reserved.
5 * Licensed under the GNU General Public License, version 2 (GPLv2).
6 *
7 * Restructured to fit Linux format, as well as other updates:
8 * Copyright 2010 Steven Rostedt <srostedt@redhat.com>, Red Hat Inc.
9 */
10
11/*
12 * Strategy: alter the .o file in-place.
13 *
14 * Append a new STRTAB that has the new section names, followed by a new array
15 * ElfXX_Shdr[] that has the new section headers, followed by the section
16 * contents for __mcount_loc and its relocations. The old shstrtab strings,
17 * and the old ElfXX_Shdr[] array, remain as "garbage" (commonly, a couple
18 * kilobytes.) Subsequent processing by /bin/ld (or the kernel module loader)
19 * will ignore the garbage regions, because they are not designated by the
20 * new .e_shoff nor the new ElfXX_Shdr[]. [In order to remove the garbage,
21 * then use "ld -r" to create a new file that omits the garbage.]
22 */
23
24#include <sys/types.h>
25#include <sys/mman.h>
26#include <sys/stat.h>
27#include <elf.h>
28#include <fcntl.h>
29#include <setjmp.h>
30#include <stdio.h>
31#include <stdlib.h>
32#include <string.h>
33#include <unistd.h>
34
35static int fd_map; /* File descriptor for file being modified. */
36static int mmap_failed; /* Boolean flag. */
37static void *ehdr_curr; /* current ElfXX_Ehdr * for resource cleanup */
38static char gpfx; /* prefix for global symbol name (sometimes '_') */
39static struct stat sb; /* Remember .st_size, etc. */
40static jmp_buf jmpenv; /* setjmp/longjmp per-file error escape */
41
42/* setjmp() return values */
43enum {
44 SJ_SETJMP = 0, /* hardwired first return */
45 SJ_FAIL,
46 SJ_SUCCEED
47};
48
49/* Per-file resource cleanup when multiple files. */
50static void
51cleanup(void)
52{
53 if (!mmap_failed)
54 munmap(ehdr_curr, sb.st_size);
55 else
56 free(ehdr_curr);
57 close(fd_map);
58}
59
60static void __attribute__((noreturn))
61fail_file(void)
62{
63 cleanup();
64 longjmp(jmpenv, SJ_FAIL);
65}
66
67static void __attribute__((noreturn))
68succeed_file(void)
69{
70 cleanup();
71 longjmp(jmpenv, SJ_SUCCEED);
72}
73
74/* ulseek, uread, ...: Check return value for errors. */
75
76static off_t
77ulseek(int const fd, off_t const offset, int const whence)
78{
79 off_t const w = lseek(fd, offset, whence);
80 if ((off_t)-1 == w) {
81 perror("lseek");
82 fail_file();
83 }
84 return w;
85}
86
87static size_t
88uread(int const fd, void *const buf, size_t const count)
89{
90 size_t const n = read(fd, buf, count);
91 if (n != count) {
92 perror("read");
93 fail_file();
94 }
95 return n;
96}
97
98static size_t
99uwrite(int const fd, void const *const buf, size_t const count)
100{
101 size_t const n = write(fd, buf, count);
102 if (n != count) {
103 perror("write");
104 fail_file();
105 }
106 return n;
107}
108
109static void *
110umalloc(size_t size)
111{
112 void *const addr = malloc(size);
113 if (0 == addr) {
114 fprintf(stderr, "malloc failed: %zu bytes\n", size);
115 fail_file();
116 }
117 return addr;
118}
119
120/*
121 * Get the whole file as a programming convenience in order to avoid
122 * malloc+lseek+read+free of many pieces. If successful, then mmap
123 * avoids copying unused pieces; else just read the whole file.
124 * Open for both read and write; new info will be appended to the file.
125 * Use MAP_PRIVATE so that a few changes to the in-memory ElfXX_Ehdr
126 * do not propagate to the file until an explicit overwrite at the last.
127 * This preserves most aspects of consistency (all except .st_size)
128 * for simultaneous readers of the file while we are appending to it.
129 * However, multiple writers still are bad. We choose not to use
130 * locking because it is expensive and the use case of kernel build
131 * makes multiple writers unlikely.
132 */
133static void *mmap_file(char const *fname)
134{
135 void *addr;
136
137 fd_map = open(fname, O_RDWR);
138 if (0 > fd_map || 0 > fstat(fd_map, &sb)) {
139 perror(fname);
140 fail_file();
141 }
142 if (!S_ISREG(sb.st_mode)) {
143 fprintf(stderr, "not a regular file: %s\n", fname);
144 fail_file();
145 }
146 addr = mmap(0, sb.st_size, PROT_READ|PROT_WRITE, MAP_PRIVATE,
147 fd_map, 0);
148 mmap_failed = 0;
149 if (MAP_FAILED == addr) {
150 mmap_failed = 1;
151 addr = umalloc(sb.st_size);
152 uread(fd_map, addr, sb.st_size);
153 }
154 return addr;
155}
156
157/* w8rev, w8nat, ...: Handle endianness. */
158
159static uint64_t w8rev(uint64_t const x)
160{
161 return ((0xff & (x >> (0 * 8))) << (7 * 8))
162 | ((0xff & (x >> (1 * 8))) << (6 * 8))
163 | ((0xff & (x >> (2 * 8))) << (5 * 8))
164 | ((0xff & (x >> (3 * 8))) << (4 * 8))
165 | ((0xff & (x >> (4 * 8))) << (3 * 8))
166 | ((0xff & (x >> (5 * 8))) << (2 * 8))
167 | ((0xff & (x >> (6 * 8))) << (1 * 8))
168 | ((0xff & (x >> (7 * 8))) << (0 * 8));
169}
170
171static uint32_t w4rev(uint32_t const x)
172{
173 return ((0xff & (x >> (0 * 8))) << (3 * 8))
174 | ((0xff & (x >> (1 * 8))) << (2 * 8))
175 | ((0xff & (x >> (2 * 8))) << (1 * 8))
176 | ((0xff & (x >> (3 * 8))) << (0 * 8));
177}
178
179static uint32_t w2rev(uint16_t const x)
180{
181 return ((0xff & (x >> (0 * 8))) << (1 * 8))
182 | ((0xff & (x >> (1 * 8))) << (0 * 8));
183}
184
185static uint64_t w8nat(uint64_t const x)
186{
187 return x;
188}
189
190static uint32_t w4nat(uint32_t const x)
191{
192 return x;
193}
194
195static uint32_t w2nat(uint16_t const x)
196{
197 return x;
198}
199
200static uint64_t (*w8)(uint64_t);
201static uint32_t (*w)(uint32_t);
202static uint32_t (*w2)(uint16_t);
203
204/* Names of the sections that could contain calls to mcount. */
205static int
206is_mcounted_section_name(char const *const txtname)
207{
208 return 0 == strcmp(".text", txtname) ||
209 0 == strcmp(".sched.text", txtname) ||
210 0 == strcmp(".spinlock.text", txtname) ||
211 0 == strcmp(".irqentry.text", txtname) ||
212 0 == strcmp(".text.unlikely", txtname);
213}
214
215/* 32 bit and 64 bit are very similar */
216#include "recordmcount.h"
217#define RECORD_MCOUNT_64
218#include "recordmcount.h"
219
220static void
221do_file(char const *const fname)
222{
223 Elf32_Ehdr *const ehdr = mmap_file(fname);
224 unsigned int reltype = 0;
225
226 ehdr_curr = ehdr;
227 w = w4nat;
228 w2 = w2nat;
229 w8 = w8nat;
230 switch (ehdr->e_ident[EI_DATA]) {
231 static unsigned int const endian = 1;
232 default: {
233 fprintf(stderr, "unrecognized ELF data encoding %d: %s\n",
234 ehdr->e_ident[EI_DATA], fname);
235 fail_file();
236 } break;
237 case ELFDATA2LSB: {
238 if (1 != *(unsigned char const *)&endian) {
239 /* main() is big endian, file.o is little endian. */
240 w = w4rev;
241 w2 = w2rev;
242 w8 = w8rev;
243 }
244 } break;
245 case ELFDATA2MSB: {
246 if (0 != *(unsigned char const *)&endian) {
247 /* main() is little endian, file.o is big endian. */
248 w = w4rev;
249 w2 = w2rev;
250 w8 = w8rev;
251 }
252 } break;
253 } /* end switch */
254 if (0 != memcmp(ELFMAG, ehdr->e_ident, SELFMAG)
255 || ET_REL != w2(ehdr->e_type)
256 || EV_CURRENT != ehdr->e_ident[EI_VERSION]) {
257 fprintf(stderr, "unrecognized ET_REL file %s\n", fname);
258 fail_file();
259 }
260
261 gpfx = 0;
262 switch (w2(ehdr->e_machine)) {
263 default: {
264 fprintf(stderr, "unrecognized e_machine %d %s\n",
265 w2(ehdr->e_machine), fname);
266 fail_file();
267 } break;
268 case EM_386: reltype = R_386_32; break;
269 case EM_ARM: reltype = R_ARM_ABS32; break;
270 case EM_IA_64: reltype = R_IA64_IMM64; gpfx = '_'; break;
271 case EM_PPC: reltype = R_PPC_ADDR32; gpfx = '_'; break;
272 case EM_PPC64: reltype = R_PPC64_ADDR64; gpfx = '_'; break;
273 case EM_S390: /* reltype: e_class */ gpfx = '_'; break;
274 case EM_SH: reltype = R_SH_DIR32; break;
275 case EM_SPARCV9: reltype = R_SPARC_64; gpfx = '_'; break;
276 case EM_X86_64: reltype = R_X86_64_64; break;
277 } /* end switch */
278
279 switch (ehdr->e_ident[EI_CLASS]) {
280 default: {
281 fprintf(stderr, "unrecognized ELF class %d %s\n",
282 ehdr->e_ident[EI_CLASS], fname);
283 fail_file();
284 } break;
285 case ELFCLASS32: {
286 if (sizeof(Elf32_Ehdr) != w2(ehdr->e_ehsize)
287 || sizeof(Elf32_Shdr) != w2(ehdr->e_shentsize)) {
288 fprintf(stderr,
289 "unrecognized ET_REL file: %s\n", fname);
290 fail_file();
291 }
292 if (EM_S390 == w2(ehdr->e_machine))
293 reltype = R_390_32;
294 do32(ehdr, fname, reltype);
295 } break;
296 case ELFCLASS64: {
297 Elf64_Ehdr *const ghdr = (Elf64_Ehdr *)ehdr;
298 if (sizeof(Elf64_Ehdr) != w2(ghdr->e_ehsize)
299 || sizeof(Elf64_Shdr) != w2(ghdr->e_shentsize)) {
300 fprintf(stderr,
301 "unrecognized ET_REL file: %s\n", fname);
302 fail_file();
303 }
304 if (EM_S390 == w2(ghdr->e_machine))
305 reltype = R_390_64;
306 do64(ghdr, fname, reltype);
307 } break;
308 } /* end switch */
309
310 cleanup();
311}
312
313int
314main(int argc, char const *argv[])
315{
316 const char ftrace[] = "kernel/trace/ftrace.o";
317 int ftrace_size = sizeof(ftrace) - 1;
318 int n_error = 0; /* gcc-4.3.0 false positive complaint */
319
320 if (argc <= 1) {
321 fprintf(stderr, "usage: recordmcount file.o...\n");
322 return 0;
323 }
324
325 /* Process each file in turn, allowing deep failure. */
326 for (--argc, ++argv; 0 < argc; --argc, ++argv) {
327 int const sjval = setjmp(jmpenv);
328 int len;
329
330 /*
331 * The file kernel/trace/ftrace.o references the mcount
332 * function but does not call it. Since ftrace.o should
333 * not be traced anyway, we just skip it.
334 */
335 len = strlen(argv[0]);
336 if (len >= ftrace_size &&
337 strcmp(argv[0] + (len - ftrace_size), ftrace) == 0)
338 continue;
339
340 switch (sjval) {
341 default: {
342 fprintf(stderr, "internal error: %s\n", argv[0]);
343 exit(1);
344 } break;
345 case SJ_SETJMP: { /* normal sequence */
346 /* Avoid problems if early cleanup() */
347 fd_map = -1;
348 ehdr_curr = NULL;
349 mmap_failed = 1;
350 do_file(argv[0]);
351 } break;
352 case SJ_FAIL: { /* error in do_file or below */
353 ++n_error;
354 } break;
355 case SJ_SUCCEED: { /* premature success */
356 /* do nothing */
357 } break;
358 } /* end switch */
359 }
360 return !!n_error;
361}
362
363
diff --git a/scripts/recordmcount.h b/scripts/recordmcount.h
new file mode 100644
index 000000000000..7f39d0943d2d
--- /dev/null
+++ b/scripts/recordmcount.h
@@ -0,0 +1,366 @@
1/*
2 * recordmcount.h
3 *
4 * This code was taken out of recordmcount.c written by
5 * Copyright 2009 John F. Reiser <jreiser@BitWagon.com>. All rights reserved.
6 *
7 * The original code had the same algorithms for both 32bit
8 * and 64bit ELF files, but the code was duplicated to support
9 * the difference in structures that were used. This
10 * file creates a macro of everything that is different between
11 * the 64 and 32 bit code, such that by including this header
12 * twice we can create both sets of functions by including this
13 * header once with RECORD_MCOUNT_64 undefined, and again with
14 * it defined.
15 *
16 * This conversion to macros was done by:
17 * Copyright 2010 Steven Rostedt <srostedt@redhat.com>, Red Hat Inc.
18 *
19 * Licensed under the GNU General Public License, version 2 (GPLv2).
20 */
21#undef append_func
22#undef sift_rel_mcount
23#undef find_secsym_ndx
24#undef __has_rel_mcount
25#undef has_rel_mcount
26#undef tot_relsize
27#undef do_func
28#undef Elf_Ehdr
29#undef Elf_Shdr
30#undef Elf_Rel
31#undef Elf_Rela
32#undef Elf_Sym
33#undef ELF_R_SYM
34#undef ELF_R_INFO
35#undef ELF_ST_BIND
36#undef uint_t
37#undef _w
38#undef _align
39#undef _size
40
41#ifdef RECORD_MCOUNT_64
42# define append_func append64
43# define sift_rel_mcount sift64_rel_mcount
44# define find_secsym_ndx find64_secsym_ndx
45# define __has_rel_mcount __has64_rel_mcount
46# define has_rel_mcount has64_rel_mcount
47# define tot_relsize tot64_relsize
48# define do_func do64
49# define Elf_Ehdr Elf64_Ehdr
50# define Elf_Shdr Elf64_Shdr
51# define Elf_Rel Elf64_Rel
52# define Elf_Rela Elf64_Rela
53# define Elf_Sym Elf64_Sym
54# define ELF_R_SYM ELF64_R_SYM
55# define ELF_R_INFO ELF64_R_INFO
56# define ELF_ST_BIND ELF64_ST_BIND
57# define uint_t uint64_t
58# define _w w8
59# define _align 7u
60# define _size 8
61#else
62# define append_func append32
63# define sift_rel_mcount sift32_rel_mcount
64# define find_secsym_ndx find32_secsym_ndx
65# define __has_rel_mcount __has32_rel_mcount
66# define has_rel_mcount has32_rel_mcount
67# define tot_relsize tot32_relsize
68# define do_func do32
69# define Elf_Ehdr Elf32_Ehdr
70# define Elf_Shdr Elf32_Shdr
71# define Elf_Rel Elf32_Rel
72# define Elf_Rela Elf32_Rela
73# define Elf_Sym Elf32_Sym
74# define ELF_R_SYM ELF32_R_SYM
75# define ELF_R_INFO ELF32_R_INFO
76# define ELF_ST_BIND ELF32_ST_BIND
77# define uint_t uint32_t
78# define _w w
79# define _align 3u
80# define _size 4
81#endif
82
83/* Append the new shstrtab, Elf_Shdr[], __mcount_loc and its relocations. */
84static void append_func(Elf_Ehdr *const ehdr,
85 Elf_Shdr *const shstr,
86 uint_t const *const mloc0,
87 uint_t const *const mlocp,
88 Elf_Rel const *const mrel0,
89 Elf_Rel const *const mrelp,
90 unsigned int const rel_entsize,
91 unsigned int const symsec_sh_link)
92{
93 /* Begin constructing output file */
94 Elf_Shdr mcsec;
95 char const *mc_name = (sizeof(Elf_Rela) == rel_entsize)
96 ? ".rela__mcount_loc"
97 : ".rel__mcount_loc";
98 unsigned const old_shnum = w2(ehdr->e_shnum);
99 uint_t const old_shoff = _w(ehdr->e_shoff);
100 uint_t const old_shstr_sh_size = _w(shstr->sh_size);
101 uint_t const old_shstr_sh_offset = _w(shstr->sh_offset);
102 uint_t t = 1 + strlen(mc_name) + _w(shstr->sh_size);
103 uint_t new_e_shoff;
104
105 shstr->sh_size = _w(t);
106 shstr->sh_offset = _w(sb.st_size);
107 t += sb.st_size;
108 t += (_align & -t); /* word-byte align */
109 new_e_shoff = t;
110
111 /* body for new shstrtab */
112 ulseek(fd_map, sb.st_size, SEEK_SET);
113 uwrite(fd_map, old_shstr_sh_offset + (void *)ehdr, old_shstr_sh_size);
114 uwrite(fd_map, mc_name, 1 + strlen(mc_name));
115
116 /* old(modified) Elf_Shdr table, word-byte aligned */
117 ulseek(fd_map, t, SEEK_SET);
118 t += sizeof(Elf_Shdr) * old_shnum;
119 uwrite(fd_map, old_shoff + (void *)ehdr,
120 sizeof(Elf_Shdr) * old_shnum);
121
122 /* new sections __mcount_loc and .rel__mcount_loc */
123 t += 2*sizeof(mcsec);
124 mcsec.sh_name = w((sizeof(Elf_Rela) == rel_entsize) + strlen(".rel")
125 + old_shstr_sh_size);
126 mcsec.sh_type = w(SHT_PROGBITS);
127 mcsec.sh_flags = _w(SHF_ALLOC);
128 mcsec.sh_addr = 0;
129 mcsec.sh_offset = _w(t);
130 mcsec.sh_size = _w((void *)mlocp - (void *)mloc0);
131 mcsec.sh_link = 0;
132 mcsec.sh_info = 0;
133 mcsec.sh_addralign = _w(_size);
134 mcsec.sh_entsize = _w(_size);
135 uwrite(fd_map, &mcsec, sizeof(mcsec));
136
137 mcsec.sh_name = w(old_shstr_sh_size);
138 mcsec.sh_type = (sizeof(Elf_Rela) == rel_entsize)
139 ? w(SHT_RELA)
140 : w(SHT_REL);
141 mcsec.sh_flags = 0;
142 mcsec.sh_addr = 0;
143 mcsec.sh_offset = _w((void *)mlocp - (void *)mloc0 + t);
144 mcsec.sh_size = _w((void *)mrelp - (void *)mrel0);
145 mcsec.sh_link = w(symsec_sh_link);
146 mcsec.sh_info = w(old_shnum);
147 mcsec.sh_addralign = _w(_size);
148 mcsec.sh_entsize = _w(rel_entsize);
149 uwrite(fd_map, &mcsec, sizeof(mcsec));
150
151 uwrite(fd_map, mloc0, (void *)mlocp - (void *)mloc0);
152 uwrite(fd_map, mrel0, (void *)mrelp - (void *)mrel0);
153
154 ehdr->e_shoff = _w(new_e_shoff);
155 ehdr->e_shnum = w2(2 + w2(ehdr->e_shnum)); /* {.rel,}__mcount_loc */
156 ulseek(fd_map, 0, SEEK_SET);
157 uwrite(fd_map, ehdr, sizeof(*ehdr));
158}
159
160
161/*
162 * Look at the relocations in order to find the calls to mcount.
163 * Accumulate the section offsets that are found, and their relocation info,
164 * onto the end of the existing arrays.
165 */
166static uint_t *sift_rel_mcount(uint_t *mlocp,
167 unsigned const offbase,
168 Elf_Rel **const mrelpp,
169 Elf_Shdr const *const relhdr,
170 Elf_Ehdr const *const ehdr,
171 unsigned const recsym,
172 uint_t const recval,
173 unsigned const reltype)
174{
175 uint_t *const mloc0 = mlocp;
176 Elf_Rel *mrelp = *mrelpp;
177 Elf_Shdr *const shdr0 = (Elf_Shdr *)(_w(ehdr->e_shoff)
178 + (void *)ehdr);
179 unsigned const symsec_sh_link = w(relhdr->sh_link);
180 Elf_Shdr const *const symsec = &shdr0[symsec_sh_link];
181 Elf_Sym const *const sym0 = (Elf_Sym const *)(_w(symsec->sh_offset)
182 + (void *)ehdr);
183
184 Elf_Shdr const *const strsec = &shdr0[w(symsec->sh_link)];
185 char const *const str0 = (char const *)(_w(strsec->sh_offset)
186 + (void *)ehdr);
187
188 Elf_Rel const *const rel0 = (Elf_Rel const *)(_w(relhdr->sh_offset)
189 + (void *)ehdr);
190 unsigned rel_entsize = _w(relhdr->sh_entsize);
191 unsigned const nrel = _w(relhdr->sh_size) / rel_entsize;
192 Elf_Rel const *relp = rel0;
193
194 unsigned mcountsym = 0;
195 unsigned t;
196
197 for (t = nrel; t; --t) {
198 if (!mcountsym) {
199 Elf_Sym const *const symp =
200 &sym0[ELF_R_SYM(_w(relp->r_info))];
201 char const *symname = &str0[w(symp->st_name)];
202
203 if ('.' == symname[0])
204 ++symname; /* ppc64 hack */
205 if (0 == strcmp((('_' == gpfx) ? "_mcount" : "mcount"),
206 symname))
207 mcountsym = ELF_R_SYM(_w(relp->r_info));
208 }
209
210 if (mcountsym == ELF_R_SYM(_w(relp->r_info))) {
211 uint_t const addend = _w(_w(relp->r_offset) - recval);
212
213 mrelp->r_offset = _w(offbase
214 + ((void *)mlocp - (void *)mloc0));
215 mrelp->r_info = _w(ELF_R_INFO(recsym, reltype));
216 if (sizeof(Elf_Rela) == rel_entsize) {
217 ((Elf_Rela *)mrelp)->r_addend = addend;
218 *mlocp++ = 0;
219 } else
220 *mlocp++ = addend;
221
222 mrelp = (Elf_Rel *)(rel_entsize + (void *)mrelp);
223 }
224 relp = (Elf_Rel const *)(rel_entsize + (void *)relp);
225 }
226 *mrelpp = mrelp;
227 return mlocp;
228}
229
230
231/*
232 * Find a symbol in the given section, to be used as the base for relocating
233 * the table of offsets of calls to mcount. A local or global symbol suffices,
234 * but avoid a Weak symbol because it may be overridden; the change in value
235 * would invalidate the relocations of the offsets of the calls to mcount.
236 * Often the found symbol will be the unnamed local symbol generated by
237 * GNU 'as' for the start of each section. For example:
238 * Num: Value Size Type Bind Vis Ndx Name
239 * 2: 00000000 0 SECTION LOCAL DEFAULT 1
240 */
241static unsigned find_secsym_ndx(unsigned const txtndx,
242 char const *const txtname,
243 uint_t *const recvalp,
244 Elf_Shdr const *const symhdr,
245 Elf_Ehdr const *const ehdr)
246{
247 Elf_Sym const *const sym0 = (Elf_Sym const *)(_w(symhdr->sh_offset)
248 + (void *)ehdr);
249 unsigned const nsym = _w(symhdr->sh_size) / _w(symhdr->sh_entsize);
250 Elf_Sym const *symp;
251 unsigned t;
252
253 for (symp = sym0, t = nsym; t; --t, ++symp) {
254 unsigned int const st_bind = ELF_ST_BIND(symp->st_info);
255
256 if (txtndx == w2(symp->st_shndx)
257 /* avoid STB_WEAK */
258 && (STB_LOCAL == st_bind || STB_GLOBAL == st_bind)) {
259 *recvalp = _w(symp->st_value);
260 return symp - sym0;
261 }
262 }
263 fprintf(stderr, "Cannot find symbol for section %d: %s.\n",
264 txtndx, txtname);
265 fail_file();
266}
267
268
269/* Evade ISO C restriction: no declaration after statement in has_rel_mcount. */
270static char const *
271__has_rel_mcount(Elf_Shdr const *const relhdr, /* is SHT_REL or SHT_RELA */
272 Elf_Shdr const *const shdr0,
273 char const *const shstrtab,
274 char const *const fname)
275{
276 /* .sh_info depends on .sh_type == SHT_REL[,A] */
277 Elf_Shdr const *const txthdr = &shdr0[w(relhdr->sh_info)];
278 char const *const txtname = &shstrtab[w(txthdr->sh_name)];
279
280 if (0 == strcmp("__mcount_loc", txtname)) {
281 fprintf(stderr, "warning: __mcount_loc already exists: %s\n",
282 fname);
283 succeed_file();
284 }
285 if (SHT_PROGBITS != w(txthdr->sh_type) ||
286 !is_mcounted_section_name(txtname))
287 return NULL;
288 return txtname;
289}
290
291static char const *has_rel_mcount(Elf_Shdr const *const relhdr,
292 Elf_Shdr const *const shdr0,
293 char const *const shstrtab,
294 char const *const fname)
295{
296 if (SHT_REL != w(relhdr->sh_type) && SHT_RELA != w(relhdr->sh_type))
297 return NULL;
298 return __has_rel_mcount(relhdr, shdr0, shstrtab, fname);
299}
300
301
302static unsigned tot_relsize(Elf_Shdr const *const shdr0,
303 unsigned nhdr,
304 const char *const shstrtab,
305 const char *const fname)
306{
307 unsigned totrelsz = 0;
308 Elf_Shdr const *shdrp = shdr0;
309
310 for (; nhdr; --nhdr, ++shdrp) {
311 if (has_rel_mcount(shdrp, shdr0, shstrtab, fname))
312 totrelsz += _w(shdrp->sh_size);
313 }
314 return totrelsz;
315}
316
317
318/* Overall supervision for Elf32 ET_REL file. */
319static void
320do_func(Elf_Ehdr *const ehdr, char const *const fname, unsigned const reltype)
321{
322 Elf_Shdr *const shdr0 = (Elf_Shdr *)(_w(ehdr->e_shoff)
323 + (void *)ehdr);
324 unsigned const nhdr = w2(ehdr->e_shnum);
325 Elf_Shdr *const shstr = &shdr0[w2(ehdr->e_shstrndx)];
326 char const *const shstrtab = (char const *)(_w(shstr->sh_offset)
327 + (void *)ehdr);
328
329 Elf_Shdr const *relhdr;
330 unsigned k;
331
332 /* Upper bound on space: assume all relevant relocs are for mcount. */
333 unsigned const totrelsz = tot_relsize(shdr0, nhdr, shstrtab, fname);
334 Elf_Rel *const mrel0 = umalloc(totrelsz);
335 Elf_Rel * mrelp = mrel0;
336
337 /* 2*sizeof(address) <= sizeof(Elf_Rel) */
338 uint_t *const mloc0 = umalloc(totrelsz>>1);
339 uint_t * mlocp = mloc0;
340
341 unsigned rel_entsize = 0;
342 unsigned symsec_sh_link = 0;
343
344 for (relhdr = shdr0, k = nhdr; k; --k, ++relhdr) {
345 char const *const txtname = has_rel_mcount(relhdr, shdr0,
346 shstrtab, fname);
347 if (txtname) {
348 uint_t recval = 0;
349 unsigned const recsym = find_secsym_ndx(
350 w(relhdr->sh_info), txtname, &recval,
351 &shdr0[symsec_sh_link = w(relhdr->sh_link)],
352 ehdr);
353
354 rel_entsize = _w(relhdr->sh_entsize);
355 mlocp = sift_rel_mcount(mlocp,
356 (void *)mlocp - (void *)mloc0, &mrelp,
357 relhdr, ehdr, recsym, recval, reltype);
358 }
359 }
360 if (mloc0 != mlocp) {
361 append_func(ehdr, shstr, mloc0, mlocp, mrel0, mrelp,
362 rel_entsize, symsec_sh_link);
363 }
364 free(mrel0);
365 free(mloc0);
366}
diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt
index 5164a655c39f..b2c63309a651 100644
--- a/tools/perf/Documentation/perf-annotate.txt
+++ b/tools/perf/Documentation/perf-annotate.txt
@@ -8,7 +8,7 @@ perf-annotate - Read perf.data (created by perf record) and display annotated co
8SYNOPSIS 8SYNOPSIS
9-------- 9--------
10[verse] 10[verse]
11'perf annotate' [-i <file> | --input=file] symbol_name 11'perf annotate' [-i <file> | --input=file] [symbol_name]
12 12
13DESCRIPTION 13DESCRIPTION
14----------- 14-----------
@@ -24,6 +24,13 @@ OPTIONS
24--input=:: 24--input=::
25 Input file name. (default: perf.data) 25 Input file name. (default: perf.data)
26 26
27--stdio:: Use the stdio interface.
28
29--tui:: Use the TUI interface Use of --tui requires a tty, if one is not
30 present, as when piping to other commands, the stdio interface is
31 used. This interfaces starts by centering on the line with more
32 samples, TAB/UNTAB cycles thru the lines with more samples.
33
27SEE ALSO 34SEE ALSO
28-------- 35--------
29linkperf:perf-record[1] 36linkperf:perf-record[1], linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index abfabe9147a4..12052c9ed0ba 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -65,6 +65,13 @@ OPTIONS
65 the tree is considered as a new profiled object. + 65 the tree is considered as a new profiled object. +
66 Default: fractal,0.5. 66 Default: fractal,0.5.
67 67
68--stdio:: Use the stdio interface.
69
70--tui:: Use the TUI interface, that is integrated with annotate and allows
71 zooming into DSOs or threads, among other features. Use of --tui
72 requires a tty, if one is not present, as when piping to other
73 commands, the stdio interface is used.
74
68SEE ALSO 75SEE ALSO
69-------- 76--------
70linkperf:perf-stat[1] 77linkperf:perf-stat[1]
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 1950e19af1cf..d1db0f676a4b 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -313,6 +313,9 @@ TEST_PROGRAMS =
313 313
314SCRIPT_SH += perf-archive.sh 314SCRIPT_SH += perf-archive.sh
315 315
316grep-libs = $(filter -l%,$(1))
317strip-libs = $(filter-out -l%,$(1))
318
316# 319#
317# No Perl scripts right now: 320# No Perl scripts right now:
318# 321#
@@ -588,14 +591,17 @@ endif
588ifdef NO_LIBPERL 591ifdef NO_LIBPERL
589 BASIC_CFLAGS += -DNO_LIBPERL 592 BASIC_CFLAGS += -DNO_LIBPERL
590else 593else
591 PERL_EMBED_LDOPTS = `perl -MExtUtils::Embed -e ldopts 2>/dev/null` 594 PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null)
595 PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS))
596 PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS))
592 PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null` 597 PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null`
593 FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS) 598 FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS)
594 599
595 ifneq ($(call try-cc,$(SOURCE_PERL_EMBED),$(FLAGS_PERL_EMBED)),y) 600 ifneq ($(call try-cc,$(SOURCE_PERL_EMBED),$(FLAGS_PERL_EMBED)),y)
596 BASIC_CFLAGS += -DNO_LIBPERL 601 BASIC_CFLAGS += -DNO_LIBPERL
597 else 602 else
598 ALL_LDFLAGS += $(PERL_EMBED_LDOPTS) 603 ALL_LDFLAGS += $(PERL_EMBED_LDFLAGS)
604 EXTLIBS += $(PERL_EMBED_LIBADD)
599 LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-perl.o 605 LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-perl.o
600 LIB_OBJS += $(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o 606 LIB_OBJS += $(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o
601 endif 607 endif
@@ -604,13 +610,16 @@ endif
604ifdef NO_LIBPYTHON 610ifdef NO_LIBPYTHON
605 BASIC_CFLAGS += -DNO_LIBPYTHON 611 BASIC_CFLAGS += -DNO_LIBPYTHON
606else 612else
607 PYTHON_EMBED_LDOPTS = `python-config --ldflags 2>/dev/null` 613 PYTHON_EMBED_LDOPTS = $(shell python-config --ldflags 2>/dev/null)
614 PYTHON_EMBED_LDFLAGS = $(call strip-libs,$(PYTHON_EMBED_LDOPTS))
615 PYTHON_EMBED_LIBADD = $(call grep-libs,$(PYTHON_EMBED_LDOPTS))
608 PYTHON_EMBED_CCOPTS = `python-config --cflags 2>/dev/null` 616 PYTHON_EMBED_CCOPTS = `python-config --cflags 2>/dev/null`
609 FLAGS_PYTHON_EMBED=$(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS) 617 FLAGS_PYTHON_EMBED=$(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
610 ifneq ($(call try-cc,$(SOURCE_PYTHON_EMBED),$(FLAGS_PYTHON_EMBED)),y) 618 ifneq ($(call try-cc,$(SOURCE_PYTHON_EMBED),$(FLAGS_PYTHON_EMBED)),y)
611 BASIC_CFLAGS += -DNO_LIBPYTHON 619 BASIC_CFLAGS += -DNO_LIBPYTHON
612 else 620 else
613 ALL_LDFLAGS += $(PYTHON_EMBED_LDOPTS) 621 ALL_LDFLAGS += $(PYTHON_EMBED_LDFLAGS)
622 EXTLIBS += $(PYTHON_EMBED_LIBADD)
614 LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o 623 LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o
615 LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o 624 LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o
616 endif 625 endif
@@ -653,6 +662,15 @@ else
653 endif 662 endif
654endif 663endif
655 664
665
666ifdef NO_STRLCPY
667 BASIC_CFLAGS += -DNO_STRLCPY
668else
669 ifneq ($(call try-cc,$(SOURCE_STRLCPY),),y)
670 BASIC_CFLAGS += -DNO_STRLCPY
671 endif
672endif
673
656ifndef CC_LD_DYNPATH 674ifndef CC_LD_DYNPATH
657 ifdef NO_R_TO_GCC_LINKER 675 ifdef NO_R_TO_GCC_LINKER
658 # Some gcc does not accept and pass -R to the linker to specify 676 # Some gcc does not accept and pass -R to the linker to specify
@@ -910,8 +928,8 @@ $(OUTPUT)perf.o: perf.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
910 $(ALL_CFLAGS) -c $(filter %.c,$^) -o $@ 928 $(ALL_CFLAGS) -c $(filter %.c,$^) -o $@
911 929
912$(OUTPUT)perf$X: $(OUTPUT)perf.o $(BUILTIN_OBJS) $(PERFLIBS) 930$(OUTPUT)perf$X: $(OUTPUT)perf.o $(BUILTIN_OBJS) $(PERFLIBS)
913 $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(OUTPUT)perf.o \ 931 $(QUIET_LINK)$(CC) $(ALL_CFLAGS) $(ALL_LDFLAGS) $(OUTPUT)perf.o \
914 $(BUILTIN_OBJS) $(ALL_LDFLAGS) $(LIBS) 932 $(BUILTIN_OBJS) $(LIBS) -o $@
915 933
916$(OUTPUT)builtin-help.o: builtin-help.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS 934$(OUTPUT)builtin-help.o: builtin-help.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
917 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) \ 935 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) \
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 1478dc64bf15..6d5604d8df95 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -28,7 +28,7 @@
28 28
29static char const *input_name = "perf.data"; 29static char const *input_name = "perf.data";
30 30
31static bool force; 31static bool force, use_tui, use_stdio;
32 32
33static bool full_paths; 33static bool full_paths;
34 34
@@ -321,7 +321,7 @@ static int hist_entry__tty_annotate(struct hist_entry *he)
321 321
322static void hists__find_annotations(struct hists *self) 322static void hists__find_annotations(struct hists *self)
323{ 323{
324 struct rb_node *first = rb_first(&self->entries), *nd = first; 324 struct rb_node *nd = rb_first(&self->entries), *next;
325 int key = KEY_RIGHT; 325 int key = KEY_RIGHT;
326 326
327 while (nd) { 327 while (nd) {
@@ -343,20 +343,19 @@ find_next:
343 343
344 if (use_browser > 0) { 344 if (use_browser > 0) {
345 key = hist_entry__tui_annotate(he); 345 key = hist_entry__tui_annotate(he);
346 if (is_exit_key(key))
347 break;
348 switch (key) { 346 switch (key) {
349 case KEY_RIGHT: 347 case KEY_RIGHT:
350 case '\t': 348 next = rb_next(nd);
351 nd = rb_next(nd);
352 break; 349 break;
353 case KEY_LEFT: 350 case KEY_LEFT:
354 if (nd == first) 351 next = rb_prev(nd);
355 continue;
356 nd = rb_prev(nd);
357 default:
358 break; 352 break;
353 default:
354 return;
359 } 355 }
356
357 if (next != NULL)
358 nd = next;
360 } else { 359 } else {
361 hist_entry__tty_annotate(he); 360 hist_entry__tty_annotate(he);
362 nd = rb_next(nd); 361 nd = rb_next(nd);
@@ -428,6 +427,8 @@ static const struct option options[] = {
428 "be more verbose (show symbol address, etc)"), 427 "be more verbose (show symbol address, etc)"),
429 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 428 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
430 "dump raw trace in ASCII"), 429 "dump raw trace in ASCII"),
430 OPT_BOOLEAN(0, "tui", &use_tui, "Use the TUI interface"),
431 OPT_BOOLEAN(0, "stdio", &use_stdio, "Use the stdio interface"),
431 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 432 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
432 "file", "vmlinux pathname"), 433 "file", "vmlinux pathname"),
433 OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, 434 OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
@@ -443,6 +444,11 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __used)
443{ 444{
444 argc = parse_options(argc, argv, options, annotate_usage, 0); 445 argc = parse_options(argc, argv, options, annotate_usage, 0);
445 446
447 if (use_stdio)
448 use_browser = 0;
449 else if (use_tui)
450 use_browser = 1;
451
446 setup_browser(); 452 setup_browser();
447 453
448 symbol_conf.priv_size = sizeof(struct sym_priv); 454 symbol_conf.priv_size = sizeof(struct sym_priv);
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 55fc1f46892a..5de405d45230 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -32,7 +32,7 @@
32 32
33static char const *input_name = "perf.data"; 33static char const *input_name = "perf.data";
34 34
35static bool force; 35static bool force, use_tui, use_stdio;
36static bool hide_unresolved; 36static bool hide_unresolved;
37static bool dont_use_callchains; 37static bool dont_use_callchains;
38 38
@@ -107,7 +107,8 @@ static int perf_session__add_hist_entry(struct perf_session *self,
107 goto out_free_syms; 107 goto out_free_syms;
108 err = 0; 108 err = 0;
109 if (symbol_conf.use_callchain) { 109 if (symbol_conf.use_callchain) {
110 err = append_chain(he->callchain, data->callchain, syms, data->period); 110 err = callchain_append(he->callchain, data->callchain, syms,
111 data->period);
111 if (err) 112 if (err)
112 goto out_free_syms; 113 goto out_free_syms;
113 } 114 }
@@ -450,6 +451,8 @@ static const struct option options[] = {
450 "Show per-thread event counters"), 451 "Show per-thread event counters"),
451 OPT_STRING(0, "pretty", &pretty_printing_style, "key", 452 OPT_STRING(0, "pretty", &pretty_printing_style, "key",
452 "pretty printing style key: normal raw"), 453 "pretty printing style key: normal raw"),
454 OPT_BOOLEAN(0, "tui", &use_tui, "Use the TUI interface"),
455 OPT_BOOLEAN(0, "stdio", &use_stdio, "Use the stdio interface"),
453 OPT_STRING('s', "sort", &sort_order, "key[,key2...]", 456 OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
454 "sort by key(s): pid, comm, dso, symbol, parent"), 457 "sort by key(s): pid, comm, dso, symbol, parent"),
455 OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, 458 OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization,
@@ -482,8 +485,15 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
482{ 485{
483 argc = parse_options(argc, argv, options, report_usage, 0); 486 argc = parse_options(argc, argv, options, report_usage, 0);
484 487
488 if (use_stdio)
489 use_browser = 0;
490 else if (use_tui)
491 use_browser = 1;
492
485 if (strcmp(input_name, "-") != 0) 493 if (strcmp(input_name, "-") != 0)
486 setup_browser(); 494 setup_browser();
495 else
496 use_browser = 0;
487 /* 497 /*
488 * Only in the newt browser we are doing integrated annotation, 498 * Only in the newt browser we are doing integrated annotation,
489 * so don't allocate extra space that won't be used in the stdio 499 * so don't allocate extra space that won't be used in the stdio
diff --git a/tools/perf/feature-tests.mak b/tools/perf/feature-tests.mak
index 7a7b60859053..b253db634f04 100644
--- a/tools/perf/feature-tests.mak
+++ b/tools/perf/feature-tests.mak
@@ -110,6 +110,17 @@ int main(void)
110} 110}
111endef 111endef
112 112
113define SOURCE_STRLCPY
114#include <stdlib.h>
115extern size_t strlcpy(char *dest, const char *src, size_t size);
116
117int main(void)
118{
119 strlcpy(NULL, NULL, 0);
120 return 0;
121}
122endef
123
113# try-cc 124# try-cc
114# Usage: option = $(call try-cc, source-to-build, cc-options) 125# Usage: option = $(call try-cc, source-to-build, cc-options)
115try-cc = $(shell sh -c \ 126try-cc = $(shell sh -c \
diff --git a/tools/perf/scripts/python/bin/netdev-times-record b/tools/perf/scripts/python/bin/netdev-times-record
new file mode 100644
index 000000000000..d931a828126b
--- /dev/null
+++ b/tools/perf/scripts/python/bin/netdev-times-record
@@ -0,0 +1,8 @@
1#!/bin/bash
2perf record -a -e net:net_dev_xmit -e net:net_dev_queue \
3 -e net:netif_receive_skb -e net:netif_rx \
4 -e skb:consume_skb -e skb:kfree_skb \
5 -e skb:skb_copy_datagram_iovec -e napi:napi_poll \
6 -e irq:irq_handler_entry -e irq:irq_handler_exit \
7 -e irq:softirq_entry -e irq:softirq_exit \
8 -e irq:softirq_raise $@
diff --git a/tools/perf/scripts/python/bin/netdev-times-report b/tools/perf/scripts/python/bin/netdev-times-report
new file mode 100644
index 000000000000..c3d0a638123d
--- /dev/null
+++ b/tools/perf/scripts/python/bin/netdev-times-report
@@ -0,0 +1,5 @@
1#!/bin/bash
2# description: display a process of packet and processing time
3# args: [tx] [rx] [dev=] [debug]
4
5perf trace -s ~/libexec/perf-core/scripts/python/netdev-times.py $@
diff --git a/tools/perf/scripts/python/netdev-times.py b/tools/perf/scripts/python/netdev-times.py
new file mode 100644
index 000000000000..9aa0a32972e8
--- /dev/null
+++ b/tools/perf/scripts/python/netdev-times.py
@@ -0,0 +1,464 @@
1# Display a process of packets and processed time.
2# It helps us to investigate networking or network device.
3#
4# options
5# tx: show only tx chart
6# rx: show only rx chart
7# dev=: show only thing related to specified device
8# debug: work with debug mode. It shows buffer status.
9
10import os
11import sys
12
13sys.path.append(os.environ['PERF_EXEC_PATH'] + \
14 '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
15
16from perf_trace_context import *
17from Core import *
18from Util import *
19
20all_event_list = []; # insert all tracepoint event related with this script
21irq_dic = {}; # key is cpu and value is a list which stacks irqs
22 # which raise NET_RX softirq
23net_rx_dic = {}; # key is cpu and value include time of NET_RX softirq-entry
24 # and a list which stacks receive
25receive_hunk_list = []; # a list which include a sequence of receive events
26rx_skb_list = []; # received packet list for matching
27 # skb_copy_datagram_iovec
28
29buffer_budget = 65536; # the budget of rx_skb_list, tx_queue_list and
30 # tx_xmit_list
31of_count_rx_skb_list = 0; # overflow count
32
33tx_queue_list = []; # list of packets which pass through dev_queue_xmit
34of_count_tx_queue_list = 0; # overflow count
35
36tx_xmit_list = []; # list of packets which pass through dev_hard_start_xmit
37of_count_tx_xmit_list = 0; # overflow count
38
39tx_free_list = []; # list of packets which is freed
40
41# options
42show_tx = 0;
43show_rx = 0;
44dev = 0; # store a name of device specified by option "dev="
45debug = 0;
46
47# indices of event_info tuple
48EINFO_IDX_NAME= 0
49EINFO_IDX_CONTEXT=1
50EINFO_IDX_CPU= 2
51EINFO_IDX_TIME= 3
52EINFO_IDX_PID= 4
53EINFO_IDX_COMM= 5
54
55# Calculate a time interval(msec) from src(nsec) to dst(nsec)
56def diff_msec(src, dst):
57 return (dst - src) / 1000000.0
58
59# Display a process of transmitting a packet
60def print_transmit(hunk):
61 if dev != 0 and hunk['dev'].find(dev) < 0:
62 return
63 print "%7s %5d %6d.%06dsec %12.3fmsec %12.3fmsec" % \
64 (hunk['dev'], hunk['len'],
65 nsecs_secs(hunk['queue_t']),
66 nsecs_nsecs(hunk['queue_t'])/1000,
67 diff_msec(hunk['queue_t'], hunk['xmit_t']),
68 diff_msec(hunk['xmit_t'], hunk['free_t']))
69
70# Format for displaying rx packet processing
71PF_IRQ_ENTRY= " irq_entry(+%.3fmsec irq=%d:%s)"
72PF_SOFT_ENTRY=" softirq_entry(+%.3fmsec)"
73PF_NAPI_POLL= " napi_poll_exit(+%.3fmsec %s)"
74PF_JOINT= " |"
75PF_WJOINT= " | |"
76PF_NET_RECV= " |---netif_receive_skb(+%.3fmsec skb=%x len=%d)"
77PF_NET_RX= " |---netif_rx(+%.3fmsec skb=%x)"
78PF_CPY_DGRAM= " | skb_copy_datagram_iovec(+%.3fmsec %d:%s)"
79PF_KFREE_SKB= " | kfree_skb(+%.3fmsec location=%x)"
80PF_CONS_SKB= " | consume_skb(+%.3fmsec)"
81
82# Display a process of received packets and interrputs associated with
83# a NET_RX softirq
84def print_receive(hunk):
85 show_hunk = 0
86 irq_list = hunk['irq_list']
87 cpu = irq_list[0]['cpu']
88 base_t = irq_list[0]['irq_ent_t']
89 # check if this hunk should be showed
90 if dev != 0:
91 for i in range(len(irq_list)):
92 if irq_list[i]['name'].find(dev) >= 0:
93 show_hunk = 1
94 break
95 else:
96 show_hunk = 1
97 if show_hunk == 0:
98 return
99
100 print "%d.%06dsec cpu=%d" % \
101 (nsecs_secs(base_t), nsecs_nsecs(base_t)/1000, cpu)
102 for i in range(len(irq_list)):
103 print PF_IRQ_ENTRY % \
104 (diff_msec(base_t, irq_list[i]['irq_ent_t']),
105 irq_list[i]['irq'], irq_list[i]['name'])
106 print PF_JOINT
107 irq_event_list = irq_list[i]['event_list']
108 for j in range(len(irq_event_list)):
109 irq_event = irq_event_list[j]
110 if irq_event['event'] == 'netif_rx':
111 print PF_NET_RX % \
112 (diff_msec(base_t, irq_event['time']),
113 irq_event['skbaddr'])
114 print PF_JOINT
115 print PF_SOFT_ENTRY % \
116 diff_msec(base_t, hunk['sirq_ent_t'])
117 print PF_JOINT
118 event_list = hunk['event_list']
119 for i in range(len(event_list)):
120 event = event_list[i]
121 if event['event_name'] == 'napi_poll':
122 print PF_NAPI_POLL % \
123 (diff_msec(base_t, event['event_t']), event['dev'])
124 if i == len(event_list) - 1:
125 print ""
126 else:
127 print PF_JOINT
128 else:
129 print PF_NET_RECV % \
130 (diff_msec(base_t, event['event_t']), event['skbaddr'],
131 event['len'])
132 if 'comm' in event.keys():
133 print PF_WJOINT
134 print PF_CPY_DGRAM % \
135 (diff_msec(base_t, event['comm_t']),
136 event['pid'], event['comm'])
137 elif 'handle' in event.keys():
138 print PF_WJOINT
139 if event['handle'] == "kfree_skb":
140 print PF_KFREE_SKB % \
141 (diff_msec(base_t,
142 event['comm_t']),
143 event['location'])
144 elif event['handle'] == "consume_skb":
145 print PF_CONS_SKB % \
146 diff_msec(base_t,
147 event['comm_t'])
148 print PF_JOINT
149
150def trace_begin():
151 global show_tx
152 global show_rx
153 global dev
154 global debug
155
156 for i in range(len(sys.argv)):
157 if i == 0:
158 continue
159 arg = sys.argv[i]
160 if arg == 'tx':
161 show_tx = 1
162 elif arg =='rx':
163 show_rx = 1
164 elif arg.find('dev=',0, 4) >= 0:
165 dev = arg[4:]
166 elif arg == 'debug':
167 debug = 1
168 if show_tx == 0 and show_rx == 0:
169 show_tx = 1
170 show_rx = 1
171
172def trace_end():
173 # order all events in time
174 all_event_list.sort(lambda a,b :cmp(a[EINFO_IDX_TIME],
175 b[EINFO_IDX_TIME]))
176 # process all events
177 for i in range(len(all_event_list)):
178 event_info = all_event_list[i]
179 name = event_info[EINFO_IDX_NAME]
180 if name == 'irq__softirq_exit':
181 handle_irq_softirq_exit(event_info)
182 elif name == 'irq__softirq_entry':
183 handle_irq_softirq_entry(event_info)
184 elif name == 'irq__softirq_raise':
185 handle_irq_softirq_raise(event_info)
186 elif name == 'irq__irq_handler_entry':
187 handle_irq_handler_entry(event_info)
188 elif name == 'irq__irq_handler_exit':
189 handle_irq_handler_exit(event_info)
190 elif name == 'napi__napi_poll':
191 handle_napi_poll(event_info)
192 elif name == 'net__netif_receive_skb':
193 handle_netif_receive_skb(event_info)
194 elif name == 'net__netif_rx':
195 handle_netif_rx(event_info)
196 elif name == 'skb__skb_copy_datagram_iovec':
197 handle_skb_copy_datagram_iovec(event_info)
198 elif name == 'net__net_dev_queue':
199 handle_net_dev_queue(event_info)
200 elif name == 'net__net_dev_xmit':
201 handle_net_dev_xmit(event_info)
202 elif name == 'skb__kfree_skb':
203 handle_kfree_skb(event_info)
204 elif name == 'skb__consume_skb':
205 handle_consume_skb(event_info)
206 # display receive hunks
207 if show_rx:
208 for i in range(len(receive_hunk_list)):
209 print_receive(receive_hunk_list[i])
210 # display transmit hunks
211 if show_tx:
212 print " dev len Qdisc " \
213 " netdevice free"
214 for i in range(len(tx_free_list)):
215 print_transmit(tx_free_list[i])
216 if debug:
217 print "debug buffer status"
218 print "----------------------------"
219 print "xmit Qdisc:remain:%d overflow:%d" % \
220 (len(tx_queue_list), of_count_tx_queue_list)
221 print "xmit netdevice:remain:%d overflow:%d" % \
222 (len(tx_xmit_list), of_count_tx_xmit_list)
223 print "receive:remain:%d overflow:%d" % \
224 (len(rx_skb_list), of_count_rx_skb_list)
225
226# called from perf, when it finds a correspoinding event
227def irq__softirq_entry(name, context, cpu, sec, nsec, pid, comm, vec):
228 if symbol_str("irq__softirq_entry", "vec", vec) != "NET_RX":
229 return
230 event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, vec)
231 all_event_list.append(event_info)
232
233def irq__softirq_exit(name, context, cpu, sec, nsec, pid, comm, vec):
234 if symbol_str("irq__softirq_entry", "vec", vec) != "NET_RX":
235 return
236 event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, vec)
237 all_event_list.append(event_info)
238
239def irq__softirq_raise(name, context, cpu, sec, nsec, pid, comm, vec):
240 if symbol_str("irq__softirq_entry", "vec", vec) != "NET_RX":
241 return
242 event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, vec)
243 all_event_list.append(event_info)
244
245def irq__irq_handler_entry(name, context, cpu, sec, nsec, pid, comm,
246 irq, irq_name):
247 event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
248 irq, irq_name)
249 all_event_list.append(event_info)
250
251def irq__irq_handler_exit(name, context, cpu, sec, nsec, pid, comm, irq, ret):
252 event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, irq, ret)
253 all_event_list.append(event_info)
254
255def napi__napi_poll(name, context, cpu, sec, nsec, pid, comm, napi, dev_name):
256 event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
257 napi, dev_name)
258 all_event_list.append(event_info)
259
260def net__netif_receive_skb(name, context, cpu, sec, nsec, pid, comm, skbaddr,
261 skblen, dev_name):
262 event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
263 skbaddr, skblen, dev_name)
264 all_event_list.append(event_info)
265
266def net__netif_rx(name, context, cpu, sec, nsec, pid, comm, skbaddr,
267 skblen, dev_name):
268 event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
269 skbaddr, skblen, dev_name)
270 all_event_list.append(event_info)
271
272def net__net_dev_queue(name, context, cpu, sec, nsec, pid, comm,
273 skbaddr, skblen, dev_name):
274 event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
275 skbaddr, skblen, dev_name)
276 all_event_list.append(event_info)
277
278def net__net_dev_xmit(name, context, cpu, sec, nsec, pid, comm,
279 skbaddr, skblen, rc, dev_name):
280 event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
281 skbaddr, skblen, rc ,dev_name)
282 all_event_list.append(event_info)
283
284def skb__kfree_skb(name, context, cpu, sec, nsec, pid, comm,
285 skbaddr, protocol, location):
286 event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
287 skbaddr, protocol, location)
288 all_event_list.append(event_info)
289
290def skb__consume_skb(name, context, cpu, sec, nsec, pid, comm, skbaddr):
291 event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
292 skbaddr)
293 all_event_list.append(event_info)
294
295def skb__skb_copy_datagram_iovec(name, context, cpu, sec, nsec, pid, comm,
296 skbaddr, skblen):
297 event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
298 skbaddr, skblen)
299 all_event_list.append(event_info)
300
301def handle_irq_handler_entry(event_info):
302 (name, context, cpu, time, pid, comm, irq, irq_name) = event_info
303 if cpu not in irq_dic.keys():
304 irq_dic[cpu] = []
305 irq_record = {'irq':irq, 'name':irq_name, 'cpu':cpu, 'irq_ent_t':time}
306 irq_dic[cpu].append(irq_record)
307
308def handle_irq_handler_exit(event_info):
309 (name, context, cpu, time, pid, comm, irq, ret) = event_info
310 if cpu not in irq_dic.keys():
311 return
312 irq_record = irq_dic[cpu].pop()
313 if irq != irq_record['irq']:
314 return
315 irq_record.update({'irq_ext_t':time})
316 # if an irq doesn't include NET_RX softirq, drop.
317 if 'event_list' in irq_record.keys():
318 irq_dic[cpu].append(irq_record)
319
320def handle_irq_softirq_raise(event_info):
321 (name, context, cpu, time, pid, comm, vec) = event_info
322 if cpu not in irq_dic.keys() \
323 or len(irq_dic[cpu]) == 0:
324 return
325 irq_record = irq_dic[cpu].pop()
326 if 'event_list' in irq_record.keys():
327 irq_event_list = irq_record['event_list']
328 else:
329 irq_event_list = []
330 irq_event_list.append({'time':time, 'event':'sirq_raise'})
331 irq_record.update({'event_list':irq_event_list})
332 irq_dic[cpu].append(irq_record)
333
334def handle_irq_softirq_entry(event_info):
335 (name, context, cpu, time, pid, comm, vec) = event_info
336 net_rx_dic[cpu] = {'sirq_ent_t':time, 'event_list':[]}
337
338def handle_irq_softirq_exit(event_info):
339 (name, context, cpu, time, pid, comm, vec) = event_info
340 irq_list = []
341 event_list = 0
342 if cpu in irq_dic.keys():
343 irq_list = irq_dic[cpu]
344 del irq_dic[cpu]
345 if cpu in net_rx_dic.keys():
346 sirq_ent_t = net_rx_dic[cpu]['sirq_ent_t']
347 event_list = net_rx_dic[cpu]['event_list']
348 del net_rx_dic[cpu]
349 if irq_list == [] or event_list == 0:
350 return
351 rec_data = {'sirq_ent_t':sirq_ent_t, 'sirq_ext_t':time,
352 'irq_list':irq_list, 'event_list':event_list}
353 # merge information realted to a NET_RX softirq
354 receive_hunk_list.append(rec_data)
355
356def handle_napi_poll(event_info):
357 (name, context, cpu, time, pid, comm, napi, dev_name) = event_info
358 if cpu in net_rx_dic.keys():
359 event_list = net_rx_dic[cpu]['event_list']
360 rec_data = {'event_name':'napi_poll',
361 'dev':dev_name, 'event_t':time}
362 event_list.append(rec_data)
363
364def handle_netif_rx(event_info):
365 (name, context, cpu, time, pid, comm,
366 skbaddr, skblen, dev_name) = event_info
367 if cpu not in irq_dic.keys() \
368 or len(irq_dic[cpu]) == 0:
369 return
370 irq_record = irq_dic[cpu].pop()
371 if 'event_list' in irq_record.keys():
372 irq_event_list = irq_record['event_list']
373 else:
374 irq_event_list = []
375 irq_event_list.append({'time':time, 'event':'netif_rx',
376 'skbaddr':skbaddr, 'skblen':skblen, 'dev_name':dev_name})
377 irq_record.update({'event_list':irq_event_list})
378 irq_dic[cpu].append(irq_record)
379
380def handle_netif_receive_skb(event_info):
381 global of_count_rx_skb_list
382
383 (name, context, cpu, time, pid, comm,
384 skbaddr, skblen, dev_name) = event_info
385 if cpu in net_rx_dic.keys():
386 rec_data = {'event_name':'netif_receive_skb',
387 'event_t':time, 'skbaddr':skbaddr, 'len':skblen}
388 event_list = net_rx_dic[cpu]['event_list']
389 event_list.append(rec_data)
390 rx_skb_list.insert(0, rec_data)
391 if len(rx_skb_list) > buffer_budget:
392 rx_skb_list.pop()
393 of_count_rx_skb_list += 1
394
395def handle_net_dev_queue(event_info):
396 global of_count_tx_queue_list
397
398 (name, context, cpu, time, pid, comm,
399 skbaddr, skblen, dev_name) = event_info
400 skb = {'dev':dev_name, 'skbaddr':skbaddr, 'len':skblen, 'queue_t':time}
401 tx_queue_list.insert(0, skb)
402 if len(tx_queue_list) > buffer_budget:
403 tx_queue_list.pop()
404 of_count_tx_queue_list += 1
405
406def handle_net_dev_xmit(event_info):
407 global of_count_tx_xmit_list
408
409 (name, context, cpu, time, pid, comm,
410 skbaddr, skblen, rc, dev_name) = event_info
411 if rc == 0: # NETDEV_TX_OK
412 for i in range(len(tx_queue_list)):
413 skb = tx_queue_list[i]
414 if skb['skbaddr'] == skbaddr:
415 skb['xmit_t'] = time
416 tx_xmit_list.insert(0, skb)
417 del tx_queue_list[i]
418 if len(tx_xmit_list) > buffer_budget:
419 tx_xmit_list.pop()
420 of_count_tx_xmit_list += 1
421 return
422
423def handle_kfree_skb(event_info):
424 (name, context, cpu, time, pid, comm,
425 skbaddr, protocol, location) = event_info
426 for i in range(len(tx_queue_list)):
427 skb = tx_queue_list[i]
428 if skb['skbaddr'] == skbaddr:
429 del tx_queue_list[i]
430 return
431 for i in range(len(tx_xmit_list)):
432 skb = tx_xmit_list[i]
433 if skb['skbaddr'] == skbaddr:
434 skb['free_t'] = time
435 tx_free_list.append(skb)
436 del tx_xmit_list[i]
437 return
438 for i in range(len(rx_skb_list)):
439 rec_data = rx_skb_list[i]
440 if rec_data['skbaddr'] == skbaddr:
441 rec_data.update({'handle':"kfree_skb",
442 'comm':comm, 'pid':pid, 'comm_t':time})
443 del rx_skb_list[i]
444 return
445
446def handle_consume_skb(event_info):
447 (name, context, cpu, time, pid, comm, skbaddr) = event_info
448 for i in range(len(tx_xmit_list)):
449 skb = tx_xmit_list[i]
450 if skb['skbaddr'] == skbaddr:
451 skb['free_t'] = time
452 tx_free_list.append(skb)
453 del tx_xmit_list[i]
454 return
455
456def handle_skb_copy_datagram_iovec(event_info):
457 (name, context, cpu, time, pid, comm, skbaddr, skblen) = event_info
458 for i in range(len(rx_skb_list)):
459 rec_data = rx_skb_list[i]
460 if skbaddr == rec_data['skbaddr']:
461 rec_data.update({'handle':"skb_copy_datagram_iovec",
462 'comm':comm, 'pid':pid, 'comm_t':time})
463 del rx_skb_list[i]
464 return
diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h
index 27e9ebe4076e..a7729797fd96 100644
--- a/tools/perf/util/cache.h
+++ b/tools/perf/util/cache.h
@@ -82,6 +82,8 @@ extern char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2
82extern char *perf_pathdup(const char *fmt, ...) 82extern char *perf_pathdup(const char *fmt, ...)
83 __attribute__((format (printf, 1, 2))); 83 __attribute__((format (printf, 1, 2)));
84 84
85#ifdef NO_STRLCPY
85extern size_t strlcpy(char *dest, const char *src, size_t size); 86extern size_t strlcpy(char *dest, const char *src, size_t size);
87#endif
86 88
87#endif /* __PERF_CACHE_H */ 89#endif /* __PERF_CACHE_H */
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index f231f43424d2..e12d539417b2 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -28,6 +28,9 @@ bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event)
28#define chain_for_each_child(child, parent) \ 28#define chain_for_each_child(child, parent) \
29 list_for_each_entry(child, &parent->children, brothers) 29 list_for_each_entry(child, &parent->children, brothers)
30 30
31#define chain_for_each_child_safe(child, next, parent) \
32 list_for_each_entry_safe(child, next, &parent->children, brothers)
33
31static void 34static void
32rb_insert_callchain(struct rb_root *root, struct callchain_node *chain, 35rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
33 enum chain_mode mode) 36 enum chain_mode mode)
@@ -86,10 +89,10 @@ __sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node,
86 * sort them by hit 89 * sort them by hit
87 */ 90 */
88static void 91static void
89sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node, 92sort_chain_flat(struct rb_root *rb_root, struct callchain_root *root,
90 u64 min_hit, struct callchain_param *param __used) 93 u64 min_hit, struct callchain_param *param __used)
91{ 94{
92 __sort_chain_flat(rb_root, node, min_hit); 95 __sort_chain_flat(rb_root, &root->node, min_hit);
93} 96}
94 97
95static void __sort_chain_graph_abs(struct callchain_node *node, 98static void __sort_chain_graph_abs(struct callchain_node *node,
@@ -108,11 +111,11 @@ static void __sort_chain_graph_abs(struct callchain_node *node,
108} 111}
109 112
110static void 113static void
111sort_chain_graph_abs(struct rb_root *rb_root, struct callchain_node *chain_root, 114sort_chain_graph_abs(struct rb_root *rb_root, struct callchain_root *chain_root,
112 u64 min_hit, struct callchain_param *param __used) 115 u64 min_hit, struct callchain_param *param __used)
113{ 116{
114 __sort_chain_graph_abs(chain_root, min_hit); 117 __sort_chain_graph_abs(&chain_root->node, min_hit);
115 rb_root->rb_node = chain_root->rb_root.rb_node; 118 rb_root->rb_node = chain_root->node.rb_root.rb_node;
116} 119}
117 120
118static void __sort_chain_graph_rel(struct callchain_node *node, 121static void __sort_chain_graph_rel(struct callchain_node *node,
@@ -133,11 +136,11 @@ static void __sort_chain_graph_rel(struct callchain_node *node,
133} 136}
134 137
135static void 138static void
136sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_node *chain_root, 139sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_root *chain_root,
137 u64 min_hit __used, struct callchain_param *param) 140 u64 min_hit __used, struct callchain_param *param)
138{ 141{
139 __sort_chain_graph_rel(chain_root, param->min_percent / 100.0); 142 __sort_chain_graph_rel(&chain_root->node, param->min_percent / 100.0);
140 rb_root->rb_node = chain_root->rb_root.rb_node; 143 rb_root->rb_node = chain_root->node.rb_root.rb_node;
141} 144}
142 145
143int register_callchain_param(struct callchain_param *param) 146int register_callchain_param(struct callchain_param *param)
@@ -284,19 +287,18 @@ split_add_child(struct callchain_node *parent, struct resolved_chain *chain,
284} 287}
285 288
286static int 289static int
287__append_chain(struct callchain_node *root, struct resolved_chain *chain, 290append_chain(struct callchain_node *root, struct resolved_chain *chain,
288 unsigned int start, u64 period); 291 unsigned int start, u64 period);
289 292
290static void 293static void
291__append_chain_children(struct callchain_node *root, 294append_chain_children(struct callchain_node *root, struct resolved_chain *chain,
292 struct resolved_chain *chain, 295 unsigned int start, u64 period)
293 unsigned int start, u64 period)
294{ 296{
295 struct callchain_node *rnode; 297 struct callchain_node *rnode;
296 298
297 /* lookup in childrens */ 299 /* lookup in childrens */
298 chain_for_each_child(rnode, root) { 300 chain_for_each_child(rnode, root) {
299 unsigned int ret = __append_chain(rnode, chain, start, period); 301 unsigned int ret = append_chain(rnode, chain, start, period);
300 302
301 if (!ret) 303 if (!ret)
302 goto inc_children_hit; 304 goto inc_children_hit;
@@ -309,8 +311,8 @@ inc_children_hit:
309} 311}
310 312
311static int 313static int
312__append_chain(struct callchain_node *root, struct resolved_chain *chain, 314append_chain(struct callchain_node *root, struct resolved_chain *chain,
313 unsigned int start, u64 period) 315 unsigned int start, u64 period)
314{ 316{
315 struct callchain_list *cnode; 317 struct callchain_list *cnode;
316 unsigned int i = start; 318 unsigned int i = start;
@@ -357,7 +359,7 @@ __append_chain(struct callchain_node *root, struct resolved_chain *chain,
357 } 359 }
358 360
359 /* We match the node and still have a part remaining */ 361 /* We match the node and still have a part remaining */
360 __append_chain_children(root, chain, i, period); 362 append_chain_children(root, chain, i, period);
361 363
362 return 0; 364 return 0;
363} 365}
@@ -380,8 +382,8 @@ static void filter_context(struct ip_callchain *old, struct resolved_chain *new,
380} 382}
381 383
382 384
383int append_chain(struct callchain_node *root, struct ip_callchain *chain, 385int callchain_append(struct callchain_root *root, struct ip_callchain *chain,
384 struct map_symbol *syms, u64 period) 386 struct map_symbol *syms, u64 period)
385{ 387{
386 struct resolved_chain *filtered; 388 struct resolved_chain *filtered;
387 389
@@ -398,9 +400,65 @@ int append_chain(struct callchain_node *root, struct ip_callchain *chain,
398 if (!filtered->nr) 400 if (!filtered->nr)
399 goto end; 401 goto end;
400 402
401 __append_chain_children(root, filtered, 0, period); 403 append_chain_children(&root->node, filtered, 0, period);
404
405 if (filtered->nr > root->max_depth)
406 root->max_depth = filtered->nr;
402end: 407end:
403 free(filtered); 408 free(filtered);
404 409
405 return 0; 410 return 0;
406} 411}
412
413static int
414merge_chain_branch(struct callchain_node *dst, struct callchain_node *src,
415 struct resolved_chain *chain)
416{
417 struct callchain_node *child, *next_child;
418 struct callchain_list *list, *next_list;
419 int old_pos = chain->nr;
420 int err = 0;
421
422 list_for_each_entry_safe(list, next_list, &src->val, list) {
423 chain->ips[chain->nr].ip = list->ip;
424 chain->ips[chain->nr].ms = list->ms;
425 chain->nr++;
426 list_del(&list->list);
427 free(list);
428 }
429
430 if (src->hit)
431 append_chain_children(dst, chain, 0, src->hit);
432
433 chain_for_each_child_safe(child, next_child, src) {
434 err = merge_chain_branch(dst, child, chain);
435 if (err)
436 break;
437
438 list_del(&child->brothers);
439 free(child);
440 }
441
442 chain->nr = old_pos;
443
444 return err;
445}
446
447int callchain_merge(struct callchain_root *dst, struct callchain_root *src)
448{
449 struct resolved_chain *chain;
450 int err;
451
452 chain = malloc(sizeof(*chain) +
453 src->max_depth * sizeof(struct resolved_ip));
454 if (!chain)
455 return -ENOMEM;
456
457 chain->nr = 0;
458
459 err = merge_chain_branch(&dst->node, &src->node, chain);
460
461 free(chain);
462
463 return err;
464}
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 6de4313924fb..c15fb8c24ad2 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -26,9 +26,14 @@ struct callchain_node {
26 u64 children_hit; 26 u64 children_hit;
27}; 27};
28 28
29struct callchain_root {
30 u64 max_depth;
31 struct callchain_node node;
32};
33
29struct callchain_param; 34struct callchain_param;
30 35
31typedef void (*sort_chain_func_t)(struct rb_root *, struct callchain_node *, 36typedef void (*sort_chain_func_t)(struct rb_root *, struct callchain_root *,
32 u64, struct callchain_param *); 37 u64, struct callchain_param *);
33 38
34struct callchain_param { 39struct callchain_param {
@@ -44,15 +49,16 @@ struct callchain_list {
44 struct list_head list; 49 struct list_head list;
45}; 50};
46 51
47static inline void callchain_init(struct callchain_node *node) 52static inline void callchain_init(struct callchain_root *root)
48{ 53{
49 INIT_LIST_HEAD(&node->brothers); 54 INIT_LIST_HEAD(&root->node.brothers);
50 INIT_LIST_HEAD(&node->children); 55 INIT_LIST_HEAD(&root->node.children);
51 INIT_LIST_HEAD(&node->val); 56 INIT_LIST_HEAD(&root->node.val);
52 57
53 node->children_hit = 0; 58 root->node.parent = NULL;
54 node->parent = NULL; 59 root->node.hit = 0;
55 node->hit = 0; 60 root->node.children_hit = 0;
61 root->max_depth = 0;
56} 62}
57 63
58static inline u64 cumul_hits(struct callchain_node *node) 64static inline u64 cumul_hits(struct callchain_node *node)
@@ -61,8 +67,9 @@ static inline u64 cumul_hits(struct callchain_node *node)
61} 67}
62 68
63int register_callchain_param(struct callchain_param *param); 69int register_callchain_param(struct callchain_param *param);
64int append_chain(struct callchain_node *root, struct ip_callchain *chain, 70int callchain_append(struct callchain_root *root, struct ip_callchain *chain,
65 struct map_symbol *syms, u64 period); 71 struct map_symbol *syms, u64 period);
72int callchain_merge(struct callchain_root *dst, struct callchain_root *src);
66 73
67bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event); 74bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event);
68#endif /* __PERF_CALLCHAIN_H */ 75#endif /* __PERF_CALLCHAIN_H */
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index be22ae6ef055..2022e8740994 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -87,7 +87,7 @@ static void hist_entry__add_cpumode_period(struct hist_entry *self,
87 87
88static struct hist_entry *hist_entry__new(struct hist_entry *template) 88static struct hist_entry *hist_entry__new(struct hist_entry *template)
89{ 89{
90 size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_node) : 0; 90 size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_root) : 0;
91 struct hist_entry *self = malloc(sizeof(*self) + callchain_size); 91 struct hist_entry *self = malloc(sizeof(*self) + callchain_size);
92 92
93 if (self != NULL) { 93 if (self != NULL) {
@@ -226,6 +226,8 @@ static bool collapse__insert_entry(struct rb_root *root, struct hist_entry *he)
226 226
227 if (!cmp) { 227 if (!cmp) {
228 iter->period += he->period; 228 iter->period += he->period;
229 if (symbol_conf.use_callchain)
230 callchain_merge(iter->callchain, he->callchain);
229 hist_entry__free(he); 231 hist_entry__free(he);
230 return false; 232 return false;
231 } 233 }
diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c
index 58a470d036dd..bd7497711424 100644
--- a/tools/perf/util/path.c
+++ b/tools/perf/util/path.c
@@ -22,6 +22,7 @@ static const char *get_perf_dir(void)
22 return "."; 22 return ".";
23} 23}
24 24
25#ifdef NO_STRLCPY
25size_t strlcpy(char *dest, const char *src, size_t size) 26size_t strlcpy(char *dest, const char *src, size_t size)
26{ 27{
27 size_t ret = strlen(src); 28 size_t ret = strlen(src);
@@ -33,7 +34,7 @@ size_t strlcpy(char *dest, const char *src, size_t size)
33 } 34 }
34 return ret; 35 return ret;
35} 36}
36 37#endif
37 38
38static char *get_pathname(void) 39static char *get_pathname(void)
39{ 40{
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 46e531d09e8b..0b91053a7d11 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -70,7 +70,7 @@ struct hist_entry {
70 struct hist_entry *pair; 70 struct hist_entry *pair;
71 struct rb_root sorted_chain; 71 struct rb_root sorted_chain;
72 }; 72 };
73 struct callchain_node callchain[0]; 73 struct callchain_root callchain[0];
74}; 74};
75 75
76enum sort_type { 76enum sort_type {
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index b2f5ae97f33d..b39f499e575a 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -388,6 +388,20 @@ size_t dso__fprintf_buildid(struct dso *self, FILE *fp)
388 return fprintf(fp, "%s", sbuild_id); 388 return fprintf(fp, "%s", sbuild_id);
389} 389}
390 390
391size_t dso__fprintf_symbols_by_name(struct dso *self, enum map_type type, FILE *fp)
392{
393 size_t ret = 0;
394 struct rb_node *nd;
395 struct symbol_name_rb_node *pos;
396
397 for (nd = rb_first(&self->symbol_names[type]); nd; nd = rb_next(nd)) {
398 pos = rb_entry(nd, struct symbol_name_rb_node, rb_node);
399 fprintf(fp, "%s\n", pos->sym.name);
400 }
401
402 return ret;
403}
404
391size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp) 405size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp)
392{ 406{
393 struct rb_node *nd; 407 struct rb_node *nd;
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index ea95c2756f05..038f2201ee09 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -182,6 +182,7 @@ size_t machines__fprintf_dsos(struct rb_root *self, FILE *fp);
182size_t machines__fprintf_dsos_buildid(struct rb_root *self, FILE *fp, bool with_hits); 182size_t machines__fprintf_dsos_buildid(struct rb_root *self, FILE *fp, bool with_hits);
183 183
184size_t dso__fprintf_buildid(struct dso *self, FILE *fp); 184size_t dso__fprintf_buildid(struct dso *self, FILE *fp);
185size_t dso__fprintf_symbols_by_name(struct dso *self, enum map_type type, FILE *fp);
185size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp); 186size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp);
186 187
187enum dso_origin { 188enum dso_origin {
diff --git a/tools/perf/util/ui/browser.c b/tools/perf/util/ui/browser.c
index 66f2d583d8c4..6d0df809a2ed 100644
--- a/tools/perf/util/ui/browser.c
+++ b/tools/perf/util/ui/browser.c
@@ -1,16 +1,6 @@
1#define _GNU_SOURCE
2#include <stdio.h>
3#undef _GNU_SOURCE
4/*
5 * slang versions <= 2.0.6 have a "#if HAVE_LONG_LONG" that breaks
6 * the build if it isn't defined. Use the equivalent one that glibc
7 * has on features.h.
8 */
9#include <features.h>
10#ifndef HAVE_LONG_LONG
11#define HAVE_LONG_LONG __GLIBC_HAVE_LONG_LONG
12#endif
13#include <slang.h> 1#include <slang.h>
2#include "libslang.h"
3#include <linux/compiler.h>
14#include <linux/list.h> 4#include <linux/list.h>
15#include <linux/rbtree.h> 5#include <linux/rbtree.h>
16#include <stdlib.h> 6#include <stdlib.h>
@@ -19,17 +9,9 @@
19#include "helpline.h" 9#include "helpline.h"
20#include "../color.h" 10#include "../color.h"
21#include "../util.h" 11#include "../util.h"
12#include <stdio.h>
22 13
23#if SLANG_VERSION < 20104 14static int ui_browser__percent_color(double percent, bool current)
24#define sltt_set_color(obj, name, fg, bg) \
25 SLtt_set_color(obj,(char *)name, (char *)fg, (char *)bg)
26#else
27#define sltt_set_color SLtt_set_color
28#endif
29
30newtComponent newt_form__new(void);
31
32int ui_browser__percent_color(double percent, bool current)
33{ 15{
34 if (current) 16 if (current)
35 return HE_COLORSET_SELECTED; 17 return HE_COLORSET_SELECTED;
@@ -40,6 +22,23 @@ int ui_browser__percent_color(double percent, bool current)
40 return HE_COLORSET_NORMAL; 22 return HE_COLORSET_NORMAL;
41} 23}
42 24
25void ui_browser__set_color(struct ui_browser *self __used, int color)
26{
27 SLsmg_set_color(color);
28}
29
30void ui_browser__set_percent_color(struct ui_browser *self,
31 double percent, bool current)
32{
33 int color = ui_browser__percent_color(percent, current);
34 ui_browser__set_color(self, color);
35}
36
37void ui_browser__gotorc(struct ui_browser *self, int y, int x)
38{
39 SLsmg_gotorc(self->y + y, self->x + x);
40}
41
43void ui_browser__list_head_seek(struct ui_browser *self, off_t offset, int whence) 42void ui_browser__list_head_seek(struct ui_browser *self, off_t offset, int whence)
44{ 43{
45 struct list_head *head = self->entries; 44 struct list_head *head = self->entries;
@@ -111,7 +110,7 @@ unsigned int ui_browser__rb_tree_refresh(struct ui_browser *self)
111 nd = self->top; 110 nd = self->top;
112 111
113 while (nd != NULL) { 112 while (nd != NULL) {
114 SLsmg_gotorc(self->y + row, self->x); 113 ui_browser__gotorc(self, row, 0);
115 self->write(self, nd, row); 114 self->write(self, nd, row);
116 if (++row == self->height) 115 if (++row == self->height)
117 break; 116 break;
@@ -131,13 +130,10 @@ void ui_browser__refresh_dimensions(struct ui_browser *self)
131 int cols, rows; 130 int cols, rows;
132 newtGetScreenSize(&cols, &rows); 131 newtGetScreenSize(&cols, &rows);
133 132
134 if (self->width > cols - 4) 133 self->width = cols - 1;
135 self->width = cols - 4; 134 self->height = rows - 2;
136 self->height = rows - 5; 135 self->y = 1;
137 if (self->height > self->nr_entries) 136 self->x = 0;
138 self->height = self->nr_entries;
139 self->y = (rows - self->height) / 2;
140 self->x = (cols - self->width) / 2;
141} 137}
142 138
143void ui_browser__reset_index(struct ui_browser *self) 139void ui_browser__reset_index(struct ui_browser *self)
@@ -146,34 +142,48 @@ void ui_browser__reset_index(struct ui_browser *self)
146 self->seek(self, 0, SEEK_SET); 142 self->seek(self, 0, SEEK_SET);
147} 143}
148 144
145void ui_browser__add_exit_key(struct ui_browser *self, int key)
146{
147 newtFormAddHotKey(self->form, key);
148}
149
150void ui_browser__add_exit_keys(struct ui_browser *self, int keys[])
151{
152 int i = 0;
153
154 while (keys[i] && i < 64) {
155 ui_browser__add_exit_key(self, keys[i]);
156 ++i;
157 }
158}
159
149int ui_browser__show(struct ui_browser *self, const char *title, 160int ui_browser__show(struct ui_browser *self, const char *title,
150 const char *helpline, ...) 161 const char *helpline, ...)
151{ 162{
152 va_list ap; 163 va_list ap;
164 int keys[] = { NEWT_KEY_UP, NEWT_KEY_DOWN, NEWT_KEY_PGUP,
165 NEWT_KEY_PGDN, NEWT_KEY_HOME, NEWT_KEY_END, ' ',
166 NEWT_KEY_LEFT, NEWT_KEY_ESCAPE, 'q', CTRL('c'), 0 };
153 167
154 if (self->form != NULL) { 168 if (self->form != NULL)
155 newtFormDestroy(self->form); 169 newtFormDestroy(self->form);
156 newtPopWindow(); 170
157 }
158 ui_browser__refresh_dimensions(self); 171 ui_browser__refresh_dimensions(self);
159 newtCenteredWindow(self->width, self->height, title); 172 self->form = newtForm(NULL, NULL, 0);
160 self->form = newt_form__new();
161 if (self->form == NULL) 173 if (self->form == NULL)
162 return -1; 174 return -1;
163 175
164 self->sb = newtVerticalScrollbar(self->width, 0, self->height, 176 self->sb = newtVerticalScrollbar(self->width, 1, self->height,
165 HE_COLORSET_NORMAL, 177 HE_COLORSET_NORMAL,
166 HE_COLORSET_SELECTED); 178 HE_COLORSET_SELECTED);
167 if (self->sb == NULL) 179 if (self->sb == NULL)
168 return -1; 180 return -1;
169 181
170 newtFormAddHotKey(self->form, NEWT_KEY_UP); 182 SLsmg_gotorc(0, 0);
171 newtFormAddHotKey(self->form, NEWT_KEY_DOWN); 183 ui_browser__set_color(self, NEWT_COLORSET_ROOT);
172 newtFormAddHotKey(self->form, NEWT_KEY_PGUP); 184 slsmg_write_nstring(title, self->width);
173 newtFormAddHotKey(self->form, NEWT_KEY_PGDN); 185
174 newtFormAddHotKey(self->form, NEWT_KEY_HOME); 186 ui_browser__add_exit_keys(self, keys);
175 newtFormAddHotKey(self->form, NEWT_KEY_END);
176 newtFormAddHotKey(self->form, ' ');
177 newtFormAddComponent(self->form, self->sb); 187 newtFormAddComponent(self->form, self->sb);
178 188
179 va_start(ap, helpline); 189 va_start(ap, helpline);
@@ -185,7 +195,6 @@ int ui_browser__show(struct ui_browser *self, const char *title,
185void ui_browser__hide(struct ui_browser *self) 195void ui_browser__hide(struct ui_browser *self)
186{ 196{
187 newtFormDestroy(self->form); 197 newtFormDestroy(self->form);
188 newtPopWindow();
189 self->form = NULL; 198 self->form = NULL;
190 ui_helpline__pop(); 199 ui_helpline__pop();
191} 200}
@@ -196,28 +205,28 @@ int ui_browser__refresh(struct ui_browser *self)
196 205
197 newtScrollbarSet(self->sb, self->index, self->nr_entries - 1); 206 newtScrollbarSet(self->sb, self->index, self->nr_entries - 1);
198 row = self->refresh(self); 207 row = self->refresh(self);
199 SLsmg_set_color(HE_COLORSET_NORMAL); 208 ui_browser__set_color(self, HE_COLORSET_NORMAL);
200 SLsmg_fill_region(self->y + row, self->x, 209 SLsmg_fill_region(self->y + row, self->x,
201 self->height - row, self->width, ' '); 210 self->height - row, self->width, ' ');
202 211
203 return 0; 212 return 0;
204} 213}
205 214
206int ui_browser__run(struct ui_browser *self, struct newtExitStruct *es) 215int ui_browser__run(struct ui_browser *self)
207{ 216{
217 struct newtExitStruct es;
218
208 if (ui_browser__refresh(self) < 0) 219 if (ui_browser__refresh(self) < 0)
209 return -1; 220 return -1;
210 221
211 while (1) { 222 while (1) {
212 off_t offset; 223 off_t offset;
213 224
214 newtFormRun(self->form, es); 225 newtFormRun(self->form, &es);
215 226
216 if (es->reason != NEWT_EXIT_HOTKEY) 227 if (es.reason != NEWT_EXIT_HOTKEY)
217 break; 228 break;
218 if (is_exit_key(es->u.key)) 229 switch (es.u.key) {
219 return es->u.key;
220 switch (es->u.key) {
221 case NEWT_KEY_DOWN: 230 case NEWT_KEY_DOWN:
222 if (self->index == self->nr_entries - 1) 231 if (self->index == self->nr_entries - 1)
223 break; 232 break;
@@ -274,12 +283,12 @@ int ui_browser__run(struct ui_browser *self, struct newtExitStruct *es)
274 self->seek(self, -offset, SEEK_END); 283 self->seek(self, -offset, SEEK_END);
275 break; 284 break;
276 default: 285 default:
277 return es->u.key; 286 return es.u.key;
278 } 287 }
279 if (ui_browser__refresh(self) < 0) 288 if (ui_browser__refresh(self) < 0)
280 return -1; 289 return -1;
281 } 290 }
282 return 0; 291 return -1;
283} 292}
284 293
285unsigned int ui_browser__list_head_refresh(struct ui_browser *self) 294unsigned int ui_browser__list_head_refresh(struct ui_browser *self)
@@ -294,7 +303,7 @@ unsigned int ui_browser__list_head_refresh(struct ui_browser *self)
294 pos = self->top; 303 pos = self->top;
295 304
296 list_for_each_from(pos, head) { 305 list_for_each_from(pos, head) {
297 SLsmg_gotorc(self->y + row, self->x); 306 ui_browser__gotorc(self, row, 0);
298 self->write(self, pos, row); 307 self->write(self, pos, row);
299 if (++row == self->height) 308 if (++row == self->height)
300 break; 309 break;
diff --git a/tools/perf/util/ui/browser.h b/tools/perf/util/ui/browser.h
index 0b9f829214f7..0dc7e4da36f5 100644
--- a/tools/perf/util/ui/browser.h
+++ b/tools/perf/util/ui/browser.h
@@ -25,16 +25,21 @@ struct ui_browser {
25}; 25};
26 26
27 27
28int ui_browser__percent_color(double percent, bool current); 28void ui_browser__set_color(struct ui_browser *self, int color);
29void ui_browser__set_percent_color(struct ui_browser *self,
30 double percent, bool current);
29bool ui_browser__is_current_entry(struct ui_browser *self, unsigned row); 31bool ui_browser__is_current_entry(struct ui_browser *self, unsigned row);
30void ui_browser__refresh_dimensions(struct ui_browser *self); 32void ui_browser__refresh_dimensions(struct ui_browser *self);
31void ui_browser__reset_index(struct ui_browser *self); 33void ui_browser__reset_index(struct ui_browser *self);
32 34
35void ui_browser__gotorc(struct ui_browser *self, int y, int x);
36void ui_browser__add_exit_key(struct ui_browser *self, int key);
37void ui_browser__add_exit_keys(struct ui_browser *self, int keys[]);
33int ui_browser__show(struct ui_browser *self, const char *title, 38int ui_browser__show(struct ui_browser *self, const char *title,
34 const char *helpline, ...); 39 const char *helpline, ...);
35void ui_browser__hide(struct ui_browser *self); 40void ui_browser__hide(struct ui_browser *self);
36int ui_browser__refresh(struct ui_browser *self); 41int ui_browser__refresh(struct ui_browser *self);
37int ui_browser__run(struct ui_browser *self, struct newtExitStruct *es); 42int ui_browser__run(struct ui_browser *self);
38 43
39void ui_browser__rb_tree_seek(struct ui_browser *self, off_t offset, int whence); 44void ui_browser__rb_tree_seek(struct ui_browser *self, off_t offset, int whence);
40unsigned int ui_browser__rb_tree_refresh(struct ui_browser *self); 45unsigned int ui_browser__rb_tree_refresh(struct ui_browser *self);
diff --git a/tools/perf/util/ui/browsers/annotate.c b/tools/perf/util/ui/browsers/annotate.c
index a90273e63f4f..82b78f99251b 100644
--- a/tools/perf/util/ui/browsers/annotate.c
+++ b/tools/perf/util/ui/browsers/annotate.c
@@ -40,14 +40,12 @@ static void annotate_browser__write(struct ui_browser *self, void *entry, int ro
40 40
41 if (ol->offset != -1) { 41 if (ol->offset != -1) {
42 struct objdump_line_rb_node *olrb = objdump_line__rb(ol); 42 struct objdump_line_rb_node *olrb = objdump_line__rb(ol);
43 int color = ui_browser__percent_color(olrb->percent, current_entry); 43 ui_browser__set_percent_color(self, olrb->percent, current_entry);
44 SLsmg_set_color(color);
45 slsmg_printf(" %7.2f ", olrb->percent); 44 slsmg_printf(" %7.2f ", olrb->percent);
46 if (!current_entry) 45 if (!current_entry)
47 SLsmg_set_color(HE_COLORSET_CODE); 46 ui_browser__set_color(self, HE_COLORSET_CODE);
48 } else { 47 } else {
49 int color = ui_browser__percent_color(0, current_entry); 48 ui_browser__set_percent_color(self, 0, current_entry);
50 SLsmg_set_color(color);
51 slsmg_write_nstring(" ", 9); 49 slsmg_write_nstring(" ", 9);
52 } 50 }
53 51
@@ -135,32 +133,31 @@ static void annotate_browser__set_top(struct annotate_browser *self,
135 self->curr_hot = nd; 133 self->curr_hot = nd;
136} 134}
137 135
138static int annotate_browser__run(struct annotate_browser *self, 136static int annotate_browser__run(struct annotate_browser *self)
139 struct newtExitStruct *es)
140{ 137{
141 struct rb_node *nd; 138 struct rb_node *nd;
142 struct hist_entry *he = self->b.priv; 139 struct hist_entry *he = self->b.priv;
140 int key;
143 141
144 if (ui_browser__show(&self->b, he->ms.sym->name, 142 if (ui_browser__show(&self->b, he->ms.sym->name,
145 "<- or ESC: exit, TAB/shift+TAB: cycle thru samples") < 0) 143 "<-, -> or ESC: exit, TAB/shift+TAB: cycle thru samples") < 0)
146 return -1; 144 return -1;
147 145 /*
148 newtFormAddHotKey(self->b.form, NEWT_KEY_LEFT); 146 * To allow builtin-annotate to cycle thru multiple symbols by
149 newtFormAddHotKey(self->b.form, NEWT_KEY_RIGHT); 147 * examining the exit key for this function.
148 */
149 ui_browser__add_exit_key(&self->b, NEWT_KEY_RIGHT);
150 150
151 nd = self->curr_hot; 151 nd = self->curr_hot;
152 if (nd) { 152 if (nd) {
153 newtFormAddHotKey(self->b.form, NEWT_KEY_TAB); 153 int tabs[] = { NEWT_KEY_TAB, NEWT_KEY_UNTAB, 0 };
154 newtFormAddHotKey(self->b.form, NEWT_KEY_UNTAB); 154 ui_browser__add_exit_keys(&self->b, tabs);
155 } 155 }
156 156
157 while (1) { 157 while (1) {
158 ui_browser__run(&self->b, es); 158 key = ui_browser__run(&self->b);
159
160 if (es->reason != NEWT_EXIT_HOTKEY)
161 break;
162 159
163 switch (es->u.key) { 160 switch (key) {
164 case NEWT_KEY_TAB: 161 case NEWT_KEY_TAB:
165 nd = rb_prev(nd); 162 nd = rb_prev(nd);
166 if (nd == NULL) 163 if (nd == NULL)
@@ -179,12 +176,11 @@ static int annotate_browser__run(struct annotate_browser *self,
179 } 176 }
180out: 177out:
181 ui_browser__hide(&self->b); 178 ui_browser__hide(&self->b);
182 return es->u.key; 179 return key;
183} 180}
184 181
185int hist_entry__tui_annotate(struct hist_entry *self) 182int hist_entry__tui_annotate(struct hist_entry *self)
186{ 183{
187 struct newtExitStruct es;
188 struct objdump_line *pos, *n; 184 struct objdump_line *pos, *n;
189 struct objdump_line_rb_node *rbpos; 185 struct objdump_line_rb_node *rbpos;
190 LIST_HEAD(head); 186 LIST_HEAD(head);
@@ -232,7 +228,7 @@ int hist_entry__tui_annotate(struct hist_entry *self)
232 annotate_browser__set_top(&browser, browser.curr_hot); 228 annotate_browser__set_top(&browser, browser.curr_hot);
233 229
234 browser.b.width += 18; /* Percentage */ 230 browser.b.width += 18; /* Percentage */
235 ret = annotate_browser__run(&browser, &es); 231 ret = annotate_browser__run(&browser);
236 list_for_each_entry_safe(pos, n, &head, node) { 232 list_for_each_entry_safe(pos, n, &head, node) {
237 list_del(&pos->node); 233 list_del(&pos->node);
238 objdump_line__free(pos); 234 objdump_line__free(pos);
diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c
index 6866aa4c41e0..ebda8c3fde9e 100644
--- a/tools/perf/util/ui/browsers/hists.c
+++ b/tools/perf/util/ui/browsers/hists.c
@@ -58,6 +58,11 @@ static char callchain_list__folded(const struct callchain_list *self)
58 return map_symbol__folded(&self->ms); 58 return map_symbol__folded(&self->ms);
59} 59}
60 60
61static void map_symbol__set_folding(struct map_symbol *self, bool unfold)
62{
63 self->unfolded = unfold ? self->has_children : false;
64}
65
61static int callchain_node__count_rows_rb_tree(struct callchain_node *self) 66static int callchain_node__count_rows_rb_tree(struct callchain_node *self)
62{ 67{
63 int n = 0; 68 int n = 0;
@@ -129,16 +134,16 @@ static void callchain_node__init_have_children_rb_tree(struct callchain_node *se
129 for (nd = rb_first(&self->rb_root); nd; nd = rb_next(nd)) { 134 for (nd = rb_first(&self->rb_root); nd; nd = rb_next(nd)) {
130 struct callchain_node *child = rb_entry(nd, struct callchain_node, rb_node); 135 struct callchain_node *child = rb_entry(nd, struct callchain_node, rb_node);
131 struct callchain_list *chain; 136 struct callchain_list *chain;
132 int first = true; 137 bool first = true;
133 138
134 list_for_each_entry(chain, &child->val, list) { 139 list_for_each_entry(chain, &child->val, list) {
135 if (first) { 140 if (first) {
136 first = false; 141 first = false;
137 chain->ms.has_children = chain->list.next != &child->val || 142 chain->ms.has_children = chain->list.next != &child->val ||
138 rb_first(&child->rb_root) != NULL; 143 !RB_EMPTY_ROOT(&child->rb_root);
139 } else 144 } else
140 chain->ms.has_children = chain->list.next == &child->val && 145 chain->ms.has_children = chain->list.next == &child->val &&
141 rb_first(&child->rb_root) != NULL; 146 !RB_EMPTY_ROOT(&child->rb_root);
142 } 147 }
143 148
144 callchain_node__init_have_children_rb_tree(child); 149 callchain_node__init_have_children_rb_tree(child);
@@ -150,7 +155,7 @@ static void callchain_node__init_have_children(struct callchain_node *self)
150 struct callchain_list *chain; 155 struct callchain_list *chain;
151 156
152 list_for_each_entry(chain, &self->val, list) 157 list_for_each_entry(chain, &self->val, list)
153 chain->ms.has_children = rb_first(&self->rb_root) != NULL; 158 chain->ms.has_children = !RB_EMPTY_ROOT(&self->rb_root);
154 159
155 callchain_node__init_have_children_rb_tree(self); 160 callchain_node__init_have_children_rb_tree(self);
156} 161}
@@ -168,6 +173,7 @@ static void callchain__init_have_children(struct rb_root *self)
168static void hist_entry__init_have_children(struct hist_entry *self) 173static void hist_entry__init_have_children(struct hist_entry *self)
169{ 174{
170 if (!self->init_have_children) { 175 if (!self->init_have_children) {
176 self->ms.has_children = !RB_EMPTY_ROOT(&self->sorted_chain);
171 callchain__init_have_children(&self->sorted_chain); 177 callchain__init_have_children(&self->sorted_chain);
172 self->init_have_children = true; 178 self->init_have_children = true;
173 } 179 }
@@ -195,43 +201,114 @@ static bool hist_browser__toggle_fold(struct hist_browser *self)
195 return false; 201 return false;
196} 202}
197 203
198static int hist_browser__run(struct hist_browser *self, const char *title, 204static int callchain_node__set_folding_rb_tree(struct callchain_node *self, bool unfold)
199 struct newtExitStruct *es) 205{
206 int n = 0;
207 struct rb_node *nd;
208
209 for (nd = rb_first(&self->rb_root); nd; nd = rb_next(nd)) {
210 struct callchain_node *child = rb_entry(nd, struct callchain_node, rb_node);
211 struct callchain_list *chain;
212 bool has_children = false;
213
214 list_for_each_entry(chain, &child->val, list) {
215 ++n;
216 map_symbol__set_folding(&chain->ms, unfold);
217 has_children = chain->ms.has_children;
218 }
219
220 if (has_children)
221 n += callchain_node__set_folding_rb_tree(child, unfold);
222 }
223
224 return n;
225}
226
227static int callchain_node__set_folding(struct callchain_node *node, bool unfold)
228{
229 struct callchain_list *chain;
230 bool has_children = false;
231 int n = 0;
232
233 list_for_each_entry(chain, &node->val, list) {
234 ++n;
235 map_symbol__set_folding(&chain->ms, unfold);
236 has_children = chain->ms.has_children;
237 }
238
239 if (has_children)
240 n += callchain_node__set_folding_rb_tree(node, unfold);
241
242 return n;
243}
244
245static int callchain__set_folding(struct rb_root *chain, bool unfold)
246{
247 struct rb_node *nd;
248 int n = 0;
249
250 for (nd = rb_first(chain); nd; nd = rb_next(nd)) {
251 struct callchain_node *node = rb_entry(nd, struct callchain_node, rb_node);
252 n += callchain_node__set_folding(node, unfold);
253 }
254
255 return n;
256}
257
258static void hist_entry__set_folding(struct hist_entry *self, bool unfold)
259{
260 hist_entry__init_have_children(self);
261 map_symbol__set_folding(&self->ms, unfold);
262
263 if (self->ms.has_children) {
264 int n = callchain__set_folding(&self->sorted_chain, unfold);
265 self->nr_rows = unfold ? n : 0;
266 } else
267 self->nr_rows = 0;
268}
269
270static void hists__set_folding(struct hists *self, bool unfold)
271{
272 struct rb_node *nd;
273
274 self->nr_entries = 0;
275
276 for (nd = rb_first(&self->entries); nd; nd = rb_next(nd)) {
277 struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
278 hist_entry__set_folding(he, unfold);
279 self->nr_entries += 1 + he->nr_rows;
280 }
281}
282
283static void hist_browser__set_folding(struct hist_browser *self, bool unfold)
284{
285 hists__set_folding(self->hists, unfold);
286 self->b.nr_entries = self->hists->nr_entries;
287 /* Go to the start, we may be way after valid entries after a collapse */
288 ui_browser__reset_index(&self->b);
289}
290
291static int hist_browser__run(struct hist_browser *self, const char *title)
200{ 292{
201 char str[256], unit; 293 int key;
202 unsigned long nr_events = self->hists->stats.nr_events[PERF_RECORD_SAMPLE]; 294 int exit_keys[] = { 'a', '?', 'h', 'C', 'd', 'D', 'E', 't',
295 NEWT_KEY_ENTER, NEWT_KEY_RIGHT, NEWT_KEY_LEFT, 0, };
203 296
204 self->b.entries = &self->hists->entries; 297 self->b.entries = &self->hists->entries;
205 self->b.nr_entries = self->hists->nr_entries; 298 self->b.nr_entries = self->hists->nr_entries;
206 299
207 hist_browser__refresh_dimensions(self); 300 hist_browser__refresh_dimensions(self);
208 301
209 nr_events = convert_unit(nr_events, &unit);
210 snprintf(str, sizeof(str), "Events: %lu%c ",
211 nr_events, unit);
212 newtDrawRootText(0, 0, str);
213
214 if (ui_browser__show(&self->b, title, 302 if (ui_browser__show(&self->b, title,
215 "Press '?' for help on key bindings") < 0) 303 "Press '?' for help on key bindings") < 0)
216 return -1; 304 return -1;
217 305
218 newtFormAddHotKey(self->b.form, 'a'); 306 ui_browser__add_exit_keys(&self->b, exit_keys);
219 newtFormAddHotKey(self->b.form, '?');
220 newtFormAddHotKey(self->b.form, 'h');
221 newtFormAddHotKey(self->b.form, 'd');
222 newtFormAddHotKey(self->b.form, 'D');
223 newtFormAddHotKey(self->b.form, 't');
224
225 newtFormAddHotKey(self->b.form, NEWT_KEY_LEFT);
226 newtFormAddHotKey(self->b.form, NEWT_KEY_RIGHT);
227 newtFormAddHotKey(self->b.form, NEWT_KEY_ENTER);
228 307
229 while (1) { 308 while (1) {
230 ui_browser__run(&self->b, es); 309 key = ui_browser__run(&self->b);
231 310
232 if (es->reason != NEWT_EXIT_HOTKEY) 311 switch (key) {
233 break;
234 switch (es->u.key) {
235 case 'D': { /* Debug */ 312 case 'D': { /* Debug */
236 static int seq; 313 static int seq;
237 struct hist_entry *h = rb_entry(self->b.top, 314 struct hist_entry *h = rb_entry(self->b.top,
@@ -245,18 +322,26 @@ static int hist_browser__run(struct hist_browser *self, const char *title,
245 self->b.top_idx, 322 self->b.top_idx,
246 h->row_offset, h->nr_rows); 323 h->row_offset, h->nr_rows);
247 } 324 }
248 continue; 325 break;
326 case 'C':
327 /* Collapse the whole world. */
328 hist_browser__set_folding(self, false);
329 break;
330 case 'E':
331 /* Expand the whole world. */
332 hist_browser__set_folding(self, true);
333 break;
249 case NEWT_KEY_ENTER: 334 case NEWT_KEY_ENTER:
250 if (hist_browser__toggle_fold(self)) 335 if (hist_browser__toggle_fold(self))
251 break; 336 break;
252 /* fall thru */ 337 /* fall thru */
253 default: 338 default:
254 return 0; 339 goto out;
255 } 340 }
256 } 341 }
257 342out:
258 ui_browser__hide(&self->b); 343 ui_browser__hide(&self->b);
259 return 0; 344 return key;
260} 345}
261 346
262static char *callchain_list__sym_name(struct callchain_list *self, 347static char *callchain_list__sym_name(struct callchain_list *self,
@@ -306,15 +391,10 @@ static int hist_browser__show_callchain_node_rb_tree(struct hist_browser *self,
306 int color; 391 int color;
307 bool was_first = first; 392 bool was_first = first;
308 393
309 if (first) { 394 if (first)
310 first = false; 395 first = false;
311 chain->ms.has_children = chain->list.next != &child->val || 396 else
312 rb_first(&child->rb_root) != NULL;
313 } else {
314 extra_offset = LEVEL_OFFSET_STEP; 397 extra_offset = LEVEL_OFFSET_STEP;
315 chain->ms.has_children = chain->list.next == &child->val &&
316 rb_first(&child->rb_root) != NULL;
317 }
318 398
319 folded_sign = callchain_list__folded(chain); 399 folded_sign = callchain_list__folded(chain);
320 if (*row_offset != 0) { 400 if (*row_offset != 0) {
@@ -341,8 +421,8 @@ static int hist_browser__show_callchain_node_rb_tree(struct hist_browser *self,
341 *is_current_entry = true; 421 *is_current_entry = true;
342 } 422 }
343 423
344 SLsmg_set_color(color); 424 ui_browser__set_color(&self->b, color);
345 SLsmg_gotorc(self->b.y + row, self->b.x); 425 ui_browser__gotorc(&self->b, row, 0);
346 slsmg_write_nstring(" ", offset + extra_offset); 426 slsmg_write_nstring(" ", offset + extra_offset);
347 slsmg_printf("%c ", folded_sign); 427 slsmg_printf("%c ", folded_sign);
348 slsmg_write_nstring(str, width); 428 slsmg_write_nstring(str, width);
@@ -384,12 +464,7 @@ static int hist_browser__show_callchain_node(struct hist_browser *self,
384 list_for_each_entry(chain, &node->val, list) { 464 list_for_each_entry(chain, &node->val, list) {
385 char ipstr[BITS_PER_LONG / 4 + 1], *s; 465 char ipstr[BITS_PER_LONG / 4 + 1], *s;
386 int color; 466 int color;
387 /* 467
388 * FIXME: This should be moved to somewhere else,
389 * probably when the callchain is created, so as not to
390 * traverse it all over again
391 */
392 chain->ms.has_children = rb_first(&node->rb_root) != NULL;
393 folded_sign = callchain_list__folded(chain); 468 folded_sign = callchain_list__folded(chain);
394 469
395 if (*row_offset != 0) { 470 if (*row_offset != 0) {
@@ -405,8 +480,8 @@ static int hist_browser__show_callchain_node(struct hist_browser *self,
405 } 480 }
406 481
407 s = callchain_list__sym_name(chain, ipstr, sizeof(ipstr)); 482 s = callchain_list__sym_name(chain, ipstr, sizeof(ipstr));
408 SLsmg_gotorc(self->b.y + row, self->b.x); 483 ui_browser__gotorc(&self->b, row, 0);
409 SLsmg_set_color(color); 484 ui_browser__set_color(&self->b, color);
410 slsmg_write_nstring(" ", offset); 485 slsmg_write_nstring(" ", offset);
411 slsmg_printf("%c ", folded_sign); 486 slsmg_printf("%c ", folded_sign);
412 slsmg_write_nstring(s, width - 2); 487 slsmg_write_nstring(s, width - 2);
@@ -465,7 +540,7 @@ static int hist_browser__show_entry(struct hist_browser *self,
465 } 540 }
466 541
467 if (symbol_conf.use_callchain) { 542 if (symbol_conf.use_callchain) {
468 entry->ms.has_children = !RB_EMPTY_ROOT(&entry->sorted_chain); 543 hist_entry__init_have_children(entry);
469 folded_sign = hist_entry__folded(entry); 544 folded_sign = hist_entry__folded(entry);
470 } 545 }
471 546
@@ -484,8 +559,8 @@ static int hist_browser__show_entry(struct hist_browser *self,
484 color = HE_COLORSET_NORMAL; 559 color = HE_COLORSET_NORMAL;
485 } 560 }
486 561
487 SLsmg_set_color(color); 562 ui_browser__set_color(&self->b, color);
488 SLsmg_gotorc(self->b.y + row, self->b.x); 563 ui_browser__gotorc(&self->b, row, 0);
489 if (symbol_conf.use_callchain) { 564 if (symbol_conf.use_callchain) {
490 slsmg_printf("%c ", folded_sign); 565 slsmg_printf("%c ", folded_sign);
491 width -= 2; 566 width -= 2;
@@ -687,8 +762,6 @@ static struct hist_browser *hist_browser__new(struct hists *hists)
687 762
688static void hist_browser__delete(struct hist_browser *self) 763static void hist_browser__delete(struct hist_browser *self)
689{ 764{
690 newtFormDestroy(self->b.form);
691 newtPopWindow();
692 free(self); 765 free(self);
693} 766}
694 767
@@ -702,21 +775,26 @@ static struct thread *hist_browser__selected_thread(struct hist_browser *self)
702 return self->he_selection->thread; 775 return self->he_selection->thread;
703} 776}
704 777
705static int hist_browser__title(char *bf, size_t size, const char *ev_name, 778static int hists__browser_title(struct hists *self, char *bf, size_t size,
706 const struct dso *dso, const struct thread *thread) 779 const char *ev_name, const struct dso *dso,
780 const struct thread *thread)
707{ 781{
708 int printed = 0; 782 char unit;
783 int printed;
784 unsigned long nr_events = self->stats.nr_events[PERF_RECORD_SAMPLE];
785
786 nr_events = convert_unit(nr_events, &unit);
787 printed = snprintf(bf, size, "Events: %lu%c %s", nr_events, unit, ev_name);
709 788
710 if (thread) 789 if (thread)
711 printed += snprintf(bf + printed, size - printed, 790 printed += snprintf(bf + printed, size - printed,
712 "Thread: %s(%d)", 791 ", Thread: %s(%d)",
713 (thread->comm_set ? thread->comm : ""), 792 (thread->comm_set ? thread->comm : ""),
714 thread->pid); 793 thread->pid);
715 if (dso) 794 if (dso)
716 printed += snprintf(bf + printed, size - printed, 795 printed += snprintf(bf + printed, size - printed,
717 "%sDSO: %s", thread ? " " : "", 796 ", DSO: %s", dso->short_name);
718 dso->short_name); 797 return printed;
719 return printed ?: snprintf(bf, size, "Event: %s", ev_name);
720} 798}
721 799
722int hists__browse(struct hists *self, const char *helpline, const char *ev_name) 800int hists__browse(struct hists *self, const char *helpline, const char *ev_name)
@@ -725,7 +803,6 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name)
725 struct pstack *fstack; 803 struct pstack *fstack;
726 const struct thread *thread_filter = NULL; 804 const struct thread *thread_filter = NULL;
727 const struct dso *dso_filter = NULL; 805 const struct dso *dso_filter = NULL;
728 struct newtExitStruct es;
729 char msg[160]; 806 char msg[160];
730 int key = -1; 807 int key = -1;
731 808
@@ -738,9 +815,8 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name)
738 815
739 ui_helpline__push(helpline); 816 ui_helpline__push(helpline);
740 817
741 hist_browser__title(msg, sizeof(msg), ev_name, 818 hists__browser_title(self, msg, sizeof(msg), ev_name,
742 dso_filter, thread_filter); 819 dso_filter, thread_filter);
743
744 while (1) { 820 while (1) {
745 const struct thread *thread; 821 const struct thread *thread;
746 const struct dso *dso; 822 const struct dso *dso;
@@ -749,70 +825,63 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name)
749 annotate = -2, zoom_dso = -2, zoom_thread = -2, 825 annotate = -2, zoom_dso = -2, zoom_thread = -2,
750 browse_map = -2; 826 browse_map = -2;
751 827
752 if (hist_browser__run(browser, msg, &es)) 828 key = hist_browser__run(browser, msg);
753 break;
754 829
755 thread = hist_browser__selected_thread(browser); 830 thread = hist_browser__selected_thread(browser);
756 dso = browser->selection->map ? browser->selection->map->dso : NULL; 831 dso = browser->selection->map ? browser->selection->map->dso : NULL;
757 832
758 if (es.reason == NEWT_EXIT_HOTKEY) { 833 switch (key) {
759 key = es.u.key; 834 case NEWT_KEY_TAB:
760 835 case NEWT_KEY_UNTAB:
761 switch (key) { 836 /*
762 case NEWT_KEY_F1: 837 * Exit the browser, let hists__browser_tree
763 goto do_help; 838 * go to the next or previous
764 case NEWT_KEY_TAB: 839 */
765 case NEWT_KEY_UNTAB: 840 goto out_free_stack;
766 /* 841 case 'a':
767 * Exit the browser, let hists__browser_tree 842 if (browser->selection->map == NULL &&
768 * go to the next or previous 843 browser->selection->map->dso->annotate_warned)
769 */
770 goto out_free_stack;
771 default:;
772 }
773
774 switch (key) {
775 case 'a':
776 if (browser->selection->map == NULL ||
777 browser->selection->map->dso->annotate_warned)
778 continue;
779 goto do_annotate;
780 case 'd':
781 goto zoom_dso;
782 case 't':
783 goto zoom_thread;
784 case 'h':
785 case '?':
786do_help:
787 ui__help_window("-> Zoom into DSO/Threads & Annotate current symbol\n"
788 "<- Zoom out\n"
789 "a Annotate current symbol\n"
790 "h/?/F1 Show this window\n"
791 "d Zoom into current DSO\n"
792 "t Zoom into current Thread\n"
793 "q/CTRL+C Exit browser");
794 continue; 844 continue;
795 default:; 845 goto do_annotate;
796 } 846 case 'd':
797 if (is_exit_key(key)) { 847 goto zoom_dso;
798 if (key == NEWT_KEY_ESCAPE && 848 case 't':
799 !ui__dialog_yesno("Do you really want to exit?")) 849 goto zoom_thread;
800 continue; 850 case NEWT_KEY_F1:
801 break; 851 case 'h':
802 } 852 case '?':
803 853 ui__help_window("-> Zoom into DSO/Threads & Annotate current symbol\n"
804 if (es.u.key == NEWT_KEY_LEFT) { 854 "<- Zoom out\n"
805 const void *top; 855 "a Annotate current symbol\n"
856 "h/?/F1 Show this window\n"
857 "C Collapse all callchains\n"
858 "E Expand all callchains\n"
859 "d Zoom into current DSO\n"
860 "t Zoom into current Thread\n"
861 "q/CTRL+C Exit browser");
862 continue;
863 case NEWT_KEY_ENTER:
864 case NEWT_KEY_RIGHT:
865 /* menu */
866 break;
867 case NEWT_KEY_LEFT: {
868 const void *top;
806 869
807 if (pstack__empty(fstack)) 870 if (pstack__empty(fstack))
808 continue;
809 top = pstack__pop(fstack);
810 if (top == &dso_filter)
811 goto zoom_out_dso;
812 if (top == &thread_filter)
813 goto zoom_out_thread;
814 continue; 871 continue;
815 } 872 top = pstack__pop(fstack);
873 if (top == &dso_filter)
874 goto zoom_out_dso;
875 if (top == &thread_filter)
876 goto zoom_out_thread;
877 continue;
878 }
879 case NEWT_KEY_ESCAPE:
880 if (!ui__dialog_yesno("Do you really want to exit?"))
881 continue;
882 /* Fall thru */
883 default:
884 goto out_free_stack;
816 } 885 }
817 886
818 if (browser->selection->sym != NULL && 887 if (browser->selection->sym != NULL &&
@@ -885,8 +954,8 @@ zoom_out_dso:
885 pstack__push(fstack, &dso_filter); 954 pstack__push(fstack, &dso_filter);
886 } 955 }
887 hists__filter_by_dso(self, dso_filter); 956 hists__filter_by_dso(self, dso_filter);
888 hist_browser__title(msg, sizeof(msg), ev_name, 957 hists__browser_title(self, msg, sizeof(msg), ev_name,
889 dso_filter, thread_filter); 958 dso_filter, thread_filter);
890 hist_browser__reset(browser); 959 hist_browser__reset(browser);
891 } else if (choice == zoom_thread) { 960 } else if (choice == zoom_thread) {
892zoom_thread: 961zoom_thread:
@@ -903,8 +972,8 @@ zoom_out_thread:
903 pstack__push(fstack, &thread_filter); 972 pstack__push(fstack, &thread_filter);
904 } 973 }
905 hists__filter_by_thread(self, thread_filter); 974 hists__filter_by_thread(self, thread_filter);
906 hist_browser__title(msg, sizeof(msg), ev_name, 975 hists__browser_title(self, msg, sizeof(msg), ev_name,
907 dso_filter, thread_filter); 976 dso_filter, thread_filter);
908 hist_browser__reset(browser); 977 hist_browser__reset(browser);
909 } 978 }
910 } 979 }
@@ -925,10 +994,6 @@ int hists__tui_browse_tree(struct rb_root *self, const char *help)
925 const char *ev_name = __event_name(hists->type, hists->config); 994 const char *ev_name = __event_name(hists->type, hists->config);
926 995
927 key = hists__browse(hists, help, ev_name); 996 key = hists__browse(hists, help, ev_name);
928
929 if (is_exit_key(key))
930 break;
931
932 switch (key) { 997 switch (key) {
933 case NEWT_KEY_TAB: 998 case NEWT_KEY_TAB:
934 next = rb_next(nd); 999 next = rb_next(nd);
@@ -940,7 +1005,7 @@ int hists__tui_browse_tree(struct rb_root *self, const char *help)
940 continue; 1005 continue;
941 nd = rb_prev(nd); 1006 nd = rb_prev(nd);
942 default: 1007 default:
943 break; 1008 return key;
944 } 1009 }
945 } 1010 }
946 1011
diff --git a/tools/perf/util/ui/browsers/map.c b/tools/perf/util/ui/browsers/map.c
index 142b825b42bf..e35437dfa5b4 100644
--- a/tools/perf/util/ui/browsers/map.c
+++ b/tools/perf/util/ui/browsers/map.c
@@ -1,6 +1,5 @@
1#include "../libslang.h" 1#include "../libslang.h"
2#include <elf.h> 2#include <elf.h>
3#include <newt.h>
4#include <sys/ttydefaults.h> 3#include <sys/ttydefaults.h>
5#include <ctype.h> 4#include <ctype.h>
6#include <string.h> 5#include <string.h>
@@ -47,7 +46,6 @@ out_free_form:
47struct map_browser { 46struct map_browser {
48 struct ui_browser b; 47 struct ui_browser b;
49 struct map *map; 48 struct map *map;
50 u16 namelen;
51 u8 addrlen; 49 u8 addrlen;
52}; 50};
53 51
@@ -56,14 +54,16 @@ static void map_browser__write(struct ui_browser *self, void *nd, int row)
56 struct symbol *sym = rb_entry(nd, struct symbol, rb_node); 54 struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
57 struct map_browser *mb = container_of(self, struct map_browser, b); 55 struct map_browser *mb = container_of(self, struct map_browser, b);
58 bool current_entry = ui_browser__is_current_entry(self, row); 56 bool current_entry = ui_browser__is_current_entry(self, row);
59 int color = ui_browser__percent_color(0, current_entry); 57 int width;
60 58
61 SLsmg_set_color(color); 59 ui_browser__set_percent_color(self, 0, current_entry);
62 slsmg_printf("%*llx %*llx %c ", 60 slsmg_printf("%*llx %*llx %c ",
63 mb->addrlen, sym->start, mb->addrlen, sym->end, 61 mb->addrlen, sym->start, mb->addrlen, sym->end,
64 sym->binding == STB_GLOBAL ? 'g' : 62 sym->binding == STB_GLOBAL ? 'g' :
65 sym->binding == STB_LOCAL ? 'l' : 'w'); 63 sym->binding == STB_LOCAL ? 'l' : 'w');
66 slsmg_write_nstring(sym->name, mb->namelen); 64 width = self->width - ((mb->addrlen * 2) + 4);
65 if (width > 0)
66 slsmg_write_nstring(sym->name, width);
67} 67}
68 68
69/* FIXME uber-kludgy, see comment on cmd_report... */ 69/* FIXME uber-kludgy, see comment on cmd_report... */
@@ -98,31 +98,29 @@ static int map_browser__search(struct map_browser *self)
98 return 0; 98 return 0;
99} 99}
100 100
101static int map_browser__run(struct map_browser *self, struct newtExitStruct *es) 101static int map_browser__run(struct map_browser *self)
102{ 102{
103 int key;
104
103 if (ui_browser__show(&self->b, self->map->dso->long_name, 105 if (ui_browser__show(&self->b, self->map->dso->long_name,
104 "Press <- or ESC to exit, %s / to search", 106 "Press <- or ESC to exit, %s / to search",
105 verbose ? "" : "restart with -v to use") < 0) 107 verbose ? "" : "restart with -v to use") < 0)
106 return -1; 108 return -1;
107 109
108 newtFormAddHotKey(self->b.form, NEWT_KEY_LEFT);
109 newtFormAddHotKey(self->b.form, NEWT_KEY_ENTER);
110 if (verbose) 110 if (verbose)
111 newtFormAddHotKey(self->b.form, '/'); 111 ui_browser__add_exit_key(&self->b, '/');
112 112
113 while (1) { 113 while (1) {
114 ui_browser__run(&self->b, es); 114 key = ui_browser__run(&self->b);
115 115
116 if (es->reason != NEWT_EXIT_HOTKEY) 116 if (verbose && key == '/')
117 break;
118 if (verbose && es->u.key == '/')
119 map_browser__search(self); 117 map_browser__search(self);
120 else 118 else
121 break; 119 break;
122 } 120 }
123 121
124 ui_browser__hide(&self->b); 122 ui_browser__hide(&self->b);
125 return 0; 123 return key;
126} 124}
127 125
128int map__browse(struct map *self) 126int map__browse(struct map *self)
@@ -136,7 +134,6 @@ int map__browse(struct map *self)
136 }, 134 },
137 .map = self, 135 .map = self,
138 }; 136 };
139 struct newtExitStruct es;
140 struct rb_node *nd; 137 struct rb_node *nd;
141 char tmp[BITS_PER_LONG / 4]; 138 char tmp[BITS_PER_LONG / 4];
142 u64 maxaddr = 0; 139 u64 maxaddr = 0;
@@ -144,8 +141,6 @@ int map__browse(struct map *self)
144 for (nd = rb_first(mb.b.entries); nd; nd = rb_next(nd)) { 141 for (nd = rb_first(mb.b.entries); nd; nd = rb_next(nd)) {
145 struct symbol *pos = rb_entry(nd, struct symbol, rb_node); 142 struct symbol *pos = rb_entry(nd, struct symbol, rb_node);
146 143
147 if (mb.namelen < pos->namelen)
148 mb.namelen = pos->namelen;
149 if (maxaddr < pos->end) 144 if (maxaddr < pos->end)
150 maxaddr = pos->end; 145 maxaddr = pos->end;
151 if (verbose) { 146 if (verbose) {
@@ -156,6 +151,5 @@ int map__browse(struct map *self)
156 } 151 }
157 152
158 mb.addrlen = snprintf(tmp, sizeof(tmp), "%llx", maxaddr); 153 mb.addrlen = snprintf(tmp, sizeof(tmp), "%llx", maxaddr);
159 mb.b.width += mb.addrlen * 2 + 4 + mb.namelen; 154 return map_browser__run(&mb);
160 return map_browser__run(&mb, &es);
161} 155}
diff --git a/tools/perf/util/ui/util.c b/tools/perf/util/ui/util.c
index 04600e26ceea..9706d9d40279 100644
--- a/tools/perf/util/ui/util.c
+++ b/tools/perf/util/ui/util.c
@@ -11,8 +11,6 @@
11#include "helpline.h" 11#include "helpline.h"
12#include "util.h" 12#include "util.h"
13 13
14newtComponent newt_form__new(void);
15
16static void newt_form__set_exit_keys(newtComponent self) 14static void newt_form__set_exit_keys(newtComponent self)
17{ 15{
18 newtFormAddHotKey(self, NEWT_KEY_LEFT); 16 newtFormAddHotKey(self, NEWT_KEY_LEFT);
@@ -22,7 +20,7 @@ static void newt_form__set_exit_keys(newtComponent self)
22 newtFormAddHotKey(self, CTRL('c')); 20 newtFormAddHotKey(self, CTRL('c'));
23} 21}
24 22
25newtComponent newt_form__new(void) 23static newtComponent newt_form__new(void)
26{ 24{
27 newtComponent self = newtForm(NULL, NULL, 0); 25 newtComponent self = newtForm(NULL, NULL, 0);
28 if (self) 26 if (self)
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index f380fed74359..7562707ddd1c 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -266,19 +266,6 @@ bool strglobmatch(const char *str, const char *pat);
266bool strlazymatch(const char *str, const char *pat); 266bool strlazymatch(const char *str, const char *pat);
267unsigned long convert_unit(unsigned long value, char *unit); 267unsigned long convert_unit(unsigned long value, char *unit);
268 268
269#ifndef ESC
270#define ESC 27
271#endif
272
273static inline bool is_exit_key(int key)
274{
275 char up;
276 if (key == CTRL('c') || key == ESC)
277 return true;
278 up = toupper(key);
279 return up == 'Q';
280}
281
282#define _STR(x) #x 269#define _STR(x) #x
283#define STR(x) _STR(x) 270#define STR(x) _STR(x)
284 271