aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Makefile5
-rw-r--r--arch/Kconfig3
-rw-r--r--arch/alpha/kernel/perf_event.c128
-rw-r--r--arch/arm/kernel/perf_event.c198
-rw-r--r--arch/arm/oprofile/common.c2
-rw-r--r--arch/powerpc/kernel/perf_callchain.c86
-rw-r--r--arch/powerpc/kernel/perf_event.c164
-rw-r--r--arch/powerpc/kernel/perf_event_fsl_emb.c148
-rw-r--r--arch/sh/kernel/perf_callchain.c50
-rw-r--r--arch/sh/kernel/perf_event.c141
-rw-r--r--arch/sparc/Kconfig1
-rw-r--r--arch/sparc/include/asm/jump_label.h32
-rw-r--r--arch/sparc/kernel/Makefile2
-rw-r--r--arch/sparc/kernel/jump_label.c47
-rw-r--r--arch/sparc/kernel/module.c6
-rw-r--r--arch/sparc/kernel/perf_event.c240
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/alternative.h11
-rw-r--r--arch/x86/include/asm/jump_label.h37
-rw-r--r--arch/x86/include/asm/perf_event_p4.h52
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/alternative.c68
-rw-r--r--arch/x86/kernel/cpu/perf_event.c261
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c8
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c13
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c292
-rw-r--r--arch/x86/kernel/ftrace.c63
-rw-r--r--arch/x86/kernel/jump_label.c50
-rw-r--r--arch/x86/kernel/kprobes.c14
-rw-r--r--arch/x86/kernel/module.c3
-rw-r--r--arch/x86/kernel/setup.c6
-rw-r--r--include/asm-generic/hardirq.h2
-rw-r--r--include/asm-generic/vmlinux.lds.h10
-rw-r--r--include/linux/dynamic_debug.h39
-rw-r--r--include/linux/ftrace_event.h8
-rw-r--r--include/linux/interrupt.h8
-rw-r--r--include/linux/jump_label.h64
-rw-r--r--include/linux/module.h5
-rw-r--r--include/linux/percpu.h9
-rw-r--r--include/linux/perf_event.h155
-rw-r--r--include/linux/sched.h9
-rw-r--r--include/linux/tracepoint.h5
-rw-r--r--include/trace/events/irq.h26
-rw-r--r--include/trace/events/napi.h25
-rw-r--r--include/trace/events/net.h82
-rw-r--r--include/trace/events/power.h90
-rw-r--r--include/trace/events/skb.h17
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/exit.c4
-rw-r--r--kernel/hw_breakpoint.c67
-rw-r--r--kernel/jump_label.c429
-rw-r--r--kernel/kprobes.c26
-rw-r--r--kernel/module.c6
-rw-r--r--kernel/perf_event.c2355
-rw-r--r--kernel/sched.c2
-rw-r--r--kernel/trace/ftrace.c123
-rw-r--r--kernel/trace/ring_buffer.c21
-rw-r--r--kernel/trace/trace_event_perf.c28
-rw-r--r--kernel/trace/trace_events.c55
-rw-r--r--kernel/trace/trace_functions_graph.c126
-rw-r--r--kernel/trace/trace_workqueue.c10
-rw-r--r--kernel/tracepoint.c14
-rw-r--r--kernel/watchdog.c41
-rw-r--r--lib/Kconfig.debug8
-rw-r--r--lib/dynamic_debug.c42
-rw-r--r--net/core/datagram.c1
-rw-r--r--net/core/dev.c8
-rw-r--r--net/core/net-traces.c1
-rw-r--r--net/core/skbuff.c1
-rw-r--r--scripts/Makefile.lib11
-rw-r--r--scripts/basic/Makefile2
-rw-r--r--scripts/basic/hash.c64
-rw-r--r--scripts/gcc-goto.sh5
-rw-r--r--tools/perf/Documentation/perf-annotate.txt11
-rw-r--r--tools/perf/Documentation/perf-report.txt7
-rw-r--r--tools/perf/Makefile30
-rw-r--r--tools/perf/builtin-annotate.c26
-rw-r--r--tools/perf/builtin-report.c14
-rw-r--r--tools/perf/feature-tests.mak11
-rw-r--r--tools/perf/scripts/python/bin/netdev-times-record8
-rw-r--r--tools/perf/scripts/python/bin/netdev-times-report5
-rw-r--r--tools/perf/scripts/python/netdev-times.py464
-rw-r--r--tools/perf/util/cache.h2
-rw-r--r--tools/perf/util/callchain.c98
-rw-r--r--tools/perf/util/callchain.h27
-rw-r--r--tools/perf/util/hist.c4
-rw-r--r--tools/perf/util/path.c3
-rw-r--r--tools/perf/util/sort.h2
-rw-r--r--tools/perf/util/symbol.c14
-rw-r--r--tools/perf/util/symbol.h1
-rw-r--r--tools/perf/util/ui/browser.c117
-rw-r--r--tools/perf/util/ui/browser.h9
-rw-r--r--tools/perf/util/ui/browsers/annotate.c38
-rw-r--r--tools/perf/util/ui/browsers/hists.c327
-rw-r--r--tools/perf/util/ui/browsers/map.c32
-rw-r--r--tools/perf/util/ui/util.c4
-rw-r--r--tools/perf/util/util.h13
97 files changed, 4984 insertions, 2423 deletions
diff --git a/Makefile b/Makefile
index 77b5c6ed0ce5..534c09c255de 100644
--- a/Makefile
+++ b/Makefile
@@ -591,6 +591,11 @@ KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow)
591# conserve stack if available 591# conserve stack if available
592KBUILD_CFLAGS += $(call cc-option,-fconserve-stack) 592KBUILD_CFLAGS += $(call cc-option,-fconserve-stack)
593 593
594# check for 'asm goto'
595ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC)), y)
596 KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO
597endif
598
594# Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments 599# Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments
595# But warn user when we do so 600# But warn user when we do so
596warn-assign = \ 601warn-assign = \
diff --git a/arch/Kconfig b/arch/Kconfig
index fe48fc7a3eba..53d7f619a1b9 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -158,4 +158,7 @@ config HAVE_PERF_EVENTS_NMI
158 subsystem. Also has support for calculating CPU cycle events 158 subsystem. Also has support for calculating CPU cycle events
159 to determine how many clock cycles in a given period. 159 to determine how many clock cycles in a given period.
160 160
161config HAVE_ARCH_JUMP_LABEL
162 bool
163
161source "kernel/gcov/Kconfig" 164source "kernel/gcov/Kconfig"
diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c
index 85d8e4f58c83..1cc49683fb69 100644
--- a/arch/alpha/kernel/perf_event.c
+++ b/arch/alpha/kernel/perf_event.c
@@ -307,7 +307,7 @@ again:
307 new_raw_count) != prev_raw_count) 307 new_raw_count) != prev_raw_count)
308 goto again; 308 goto again;
309 309
310 delta = (new_raw_count - (prev_raw_count & alpha_pmu->pmc_count_mask[idx])) + ovf; 310 delta = (new_raw_count - (prev_raw_count & alpha_pmu->pmc_count_mask[idx])) + ovf;
311 311
312 /* It is possible on very rare occasions that the PMC has overflowed 312 /* It is possible on very rare occasions that the PMC has overflowed
313 * but the interrupt is yet to come. Detect and fix this situation. 313 * but the interrupt is yet to come. Detect and fix this situation.
@@ -402,14 +402,13 @@ static void maybe_change_configuration(struct cpu_hw_events *cpuc)
402 struct hw_perf_event *hwc = &pe->hw; 402 struct hw_perf_event *hwc = &pe->hw;
403 int idx = hwc->idx; 403 int idx = hwc->idx;
404 404
405 if (cpuc->current_idx[j] != PMC_NO_INDEX) { 405 if (cpuc->current_idx[j] == PMC_NO_INDEX) {
406 cpuc->idx_mask |= (1<<cpuc->current_idx[j]); 406 alpha_perf_event_set_period(pe, hwc, idx);
407 continue; 407 cpuc->current_idx[j] = idx;
408 } 408 }
409 409
410 alpha_perf_event_set_period(pe, hwc, idx); 410 if (!(hwc->state & PERF_HES_STOPPED))
411 cpuc->current_idx[j] = idx; 411 cpuc->idx_mask |= (1<<cpuc->current_idx[j]);
412 cpuc->idx_mask |= (1<<cpuc->current_idx[j]);
413 } 412 }
414 cpuc->config = cpuc->event[0]->hw.config_base; 413 cpuc->config = cpuc->event[0]->hw.config_base;
415} 414}
@@ -420,12 +419,13 @@ static void maybe_change_configuration(struct cpu_hw_events *cpuc)
420 * - this function is called from outside this module via the pmu struct 419 * - this function is called from outside this module via the pmu struct
421 * returned from perf event initialisation. 420 * returned from perf event initialisation.
422 */ 421 */
423static int alpha_pmu_enable(struct perf_event *event) 422static int alpha_pmu_add(struct perf_event *event, int flags)
424{ 423{
425 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 424 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
425 struct hw_perf_event *hwc = &event->hw;
426 int n0; 426 int n0;
427 int ret; 427 int ret;
428 unsigned long flags; 428 unsigned long irq_flags;
429 429
430 /* 430 /*
431 * The Sparc code has the IRQ disable first followed by the perf 431 * The Sparc code has the IRQ disable first followed by the perf
@@ -435,8 +435,8 @@ static int alpha_pmu_enable(struct perf_event *event)
435 * nevertheless we disable the PMCs first to enable a potential 435 * nevertheless we disable the PMCs first to enable a potential
436 * final PMI to occur before we disable interrupts. 436 * final PMI to occur before we disable interrupts.
437 */ 437 */
438 perf_disable(); 438 perf_pmu_disable(event->pmu);
439 local_irq_save(flags); 439 local_irq_save(irq_flags);
440 440
441 /* Default to error to be returned */ 441 /* Default to error to be returned */
442 ret = -EAGAIN; 442 ret = -EAGAIN;
@@ -455,8 +455,12 @@ static int alpha_pmu_enable(struct perf_event *event)
455 } 455 }
456 } 456 }
457 457
458 local_irq_restore(flags); 458 hwc->state = PERF_HES_UPTODATE;
459 perf_enable(); 459 if (!(flags & PERF_EF_START))
460 hwc->state |= PERF_HES_STOPPED;
461
462 local_irq_restore(irq_flags);
463 perf_pmu_enable(event->pmu);
460 464
461 return ret; 465 return ret;
462} 466}
@@ -467,15 +471,15 @@ static int alpha_pmu_enable(struct perf_event *event)
467 * - this function is called from outside this module via the pmu struct 471 * - this function is called from outside this module via the pmu struct
468 * returned from perf event initialisation. 472 * returned from perf event initialisation.
469 */ 473 */
470static void alpha_pmu_disable(struct perf_event *event) 474static void alpha_pmu_del(struct perf_event *event, int flags)
471{ 475{
472 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 476 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
473 struct hw_perf_event *hwc = &event->hw; 477 struct hw_perf_event *hwc = &event->hw;
474 unsigned long flags; 478 unsigned long irq_flags;
475 int j; 479 int j;
476 480
477 perf_disable(); 481 perf_pmu_disable(event->pmu);
478 local_irq_save(flags); 482 local_irq_save(irq_flags);
479 483
480 for (j = 0; j < cpuc->n_events; j++) { 484 for (j = 0; j < cpuc->n_events; j++) {
481 if (event == cpuc->event[j]) { 485 if (event == cpuc->event[j]) {
@@ -501,8 +505,8 @@ static void alpha_pmu_disable(struct perf_event *event)
501 } 505 }
502 } 506 }
503 507
504 local_irq_restore(flags); 508 local_irq_restore(irq_flags);
505 perf_enable(); 509 perf_pmu_enable(event->pmu);
506} 510}
507 511
508 512
@@ -514,13 +518,44 @@ static void alpha_pmu_read(struct perf_event *event)
514} 518}
515 519
516 520
517static void alpha_pmu_unthrottle(struct perf_event *event) 521static void alpha_pmu_stop(struct perf_event *event, int flags)
522{
523 struct hw_perf_event *hwc = &event->hw;
524 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
525
526 if (!(hwc->state & PERF_HES_STOPPED)) {
527 cpuc->idx_mask &= ~(1UL<<hwc->idx);
528 hwc->state |= PERF_HES_STOPPED;
529 }
530
531 if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
532 alpha_perf_event_update(event, hwc, hwc->idx, 0);
533 hwc->state |= PERF_HES_UPTODATE;
534 }
535
536 if (cpuc->enabled)
537 wrperfmon(PERFMON_CMD_DISABLE, (1UL<<hwc->idx));
538}
539
540
541static void alpha_pmu_start(struct perf_event *event, int flags)
518{ 542{
519 struct hw_perf_event *hwc = &event->hw; 543 struct hw_perf_event *hwc = &event->hw;
520 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 544 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
521 545
546 if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
547 return;
548
549 if (flags & PERF_EF_RELOAD) {
550 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
551 alpha_perf_event_set_period(event, hwc, hwc->idx);
552 }
553
554 hwc->state = 0;
555
522 cpuc->idx_mask |= 1UL<<hwc->idx; 556 cpuc->idx_mask |= 1UL<<hwc->idx;
523 wrperfmon(PERFMON_CMD_ENABLE, (1UL<<hwc->idx)); 557 if (cpuc->enabled)
558 wrperfmon(PERFMON_CMD_ENABLE, (1UL<<hwc->idx));
524} 559}
525 560
526 561
@@ -642,39 +677,36 @@ static int __hw_perf_event_init(struct perf_event *event)
642 return 0; 677 return 0;
643} 678}
644 679
645static const struct pmu pmu = {
646 .enable = alpha_pmu_enable,
647 .disable = alpha_pmu_disable,
648 .read = alpha_pmu_read,
649 .unthrottle = alpha_pmu_unthrottle,
650};
651
652
653/* 680/*
654 * Main entry point to initialise a HW performance event. 681 * Main entry point to initialise a HW performance event.
655 */ 682 */
656const struct pmu *hw_perf_event_init(struct perf_event *event) 683static int alpha_pmu_event_init(struct perf_event *event)
657{ 684{
658 int err; 685 int err;
659 686
687 switch (event->attr.type) {
688 case PERF_TYPE_RAW:
689 case PERF_TYPE_HARDWARE:
690 case PERF_TYPE_HW_CACHE:
691 break;
692
693 default:
694 return -ENOENT;
695 }
696
660 if (!alpha_pmu) 697 if (!alpha_pmu)
661 return ERR_PTR(-ENODEV); 698 return -ENODEV;
662 699
663 /* Do the real initialisation work. */ 700 /* Do the real initialisation work. */
664 err = __hw_perf_event_init(event); 701 err = __hw_perf_event_init(event);
665 702
666 if (err) 703 return err;
667 return ERR_PTR(err);
668
669 return &pmu;
670} 704}
671 705
672
673
674/* 706/*
675 * Main entry point - enable HW performance counters. 707 * Main entry point - enable HW performance counters.
676 */ 708 */
677void hw_perf_enable(void) 709static void alpha_pmu_enable(struct pmu *pmu)
678{ 710{
679 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 711 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
680 712
@@ -700,7 +732,7 @@ void hw_perf_enable(void)
700 * Main entry point - disable HW performance counters. 732 * Main entry point - disable HW performance counters.
701 */ 733 */
702 734
703void hw_perf_disable(void) 735static void alpha_pmu_disable(struct pmu *pmu)
704{ 736{
705 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 737 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
706 738
@@ -713,6 +745,17 @@ void hw_perf_disable(void)
713 wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask); 745 wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask);
714} 746}
715 747
748static struct pmu pmu = {
749 .pmu_enable = alpha_pmu_enable,
750 .pmu_disable = alpha_pmu_disable,
751 .event_init = alpha_pmu_event_init,
752 .add = alpha_pmu_add,
753 .del = alpha_pmu_del,
754 .start = alpha_pmu_start,
755 .stop = alpha_pmu_stop,
756 .read = alpha_pmu_read,
757};
758
716 759
717/* 760/*
718 * Main entry point - don't know when this is called but it 761 * Main entry point - don't know when this is called but it
@@ -766,7 +809,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
766 wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask); 809 wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask);
767 810
768 /* la_ptr is the counter that overflowed. */ 811 /* la_ptr is the counter that overflowed. */
769 if (unlikely(la_ptr >= perf_max_events)) { 812 if (unlikely(la_ptr >= alpha_pmu->num_pmcs)) {
770 /* This should never occur! */ 813 /* This should never occur! */
771 irq_err_count++; 814 irq_err_count++;
772 pr_warning("PMI: silly index %ld\n", la_ptr); 815 pr_warning("PMI: silly index %ld\n", la_ptr);
@@ -807,7 +850,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
807 /* Interrupts coming too quickly; "throttle" the 850 /* Interrupts coming too quickly; "throttle" the
808 * counter, i.e., disable it for a little while. 851 * counter, i.e., disable it for a little while.
809 */ 852 */
810 cpuc->idx_mask &= ~(1UL<<idx); 853 alpha_pmu_stop(event, 0);
811 } 854 }
812 } 855 }
813 wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask); 856 wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask);
@@ -837,6 +880,7 @@ void __init init_hw_perf_events(void)
837 880
838 /* And set up PMU specification */ 881 /* And set up PMU specification */
839 alpha_pmu = &ev67_pmu; 882 alpha_pmu = &ev67_pmu;
840 perf_max_events = alpha_pmu->num_pmcs; 883
884 perf_pmu_register(&pmu);
841} 885}
842 886
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index ecbb0288e5dd..ad19c276b10f 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -221,46 +221,56 @@ again:
221} 221}
222 222
223static void 223static void
224armpmu_disable(struct perf_event *event) 224armpmu_read(struct perf_event *event)
225{ 225{
226 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
227 struct hw_perf_event *hwc = &event->hw; 226 struct hw_perf_event *hwc = &event->hw;
228 int idx = hwc->idx;
229
230 WARN_ON(idx < 0);
231
232 clear_bit(idx, cpuc->active_mask);
233 armpmu->disable(hwc, idx);
234
235 barrier();
236 227
237 armpmu_event_update(event, hwc, idx); 228 /* Don't read disabled counters! */
238 cpuc->events[idx] = NULL; 229 if (hwc->idx < 0)
239 clear_bit(idx, cpuc->used_mask); 230 return;
240 231
241 perf_event_update_userpage(event); 232 armpmu_event_update(event, hwc, hwc->idx);
242} 233}
243 234
244static void 235static void
245armpmu_read(struct perf_event *event) 236armpmu_stop(struct perf_event *event, int flags)
246{ 237{
247 struct hw_perf_event *hwc = &event->hw; 238 struct hw_perf_event *hwc = &event->hw;
248 239
249 /* Don't read disabled counters! */ 240 if (!armpmu)
250 if (hwc->idx < 0)
251 return; 241 return;
252 242
253 armpmu_event_update(event, hwc, hwc->idx); 243 /*
244 * ARM pmu always has to update the counter, so ignore
245 * PERF_EF_UPDATE, see comments in armpmu_start().
246 */
247 if (!(hwc->state & PERF_HES_STOPPED)) {
248 armpmu->disable(hwc, hwc->idx);
249 barrier(); /* why? */
250 armpmu_event_update(event, hwc, hwc->idx);
251 hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
252 }
254} 253}
255 254
256static void 255static void
257armpmu_unthrottle(struct perf_event *event) 256armpmu_start(struct perf_event *event, int flags)
258{ 257{
259 struct hw_perf_event *hwc = &event->hw; 258 struct hw_perf_event *hwc = &event->hw;
260 259
260 if (!armpmu)
261 return;
262
263 /*
264 * ARM pmu always has to reprogram the period, so ignore
265 * PERF_EF_RELOAD, see the comment below.
266 */
267 if (flags & PERF_EF_RELOAD)
268 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
269
270 hwc->state = 0;
261 /* 271 /*
262 * Set the period again. Some counters can't be stopped, so when we 272 * Set the period again. Some counters can't be stopped, so when we
263 * were throttled we simply disabled the IRQ source and the counter 273 * were stopped we simply disabled the IRQ source and the counter
264 * may have been left counting. If we don't do this step then we may 274 * may have been left counting. If we don't do this step then we may
265 * get an interrupt too soon or *way* too late if the overflow has 275 * get an interrupt too soon or *way* too late if the overflow has
266 * happened since disabling. 276 * happened since disabling.
@@ -269,14 +279,33 @@ armpmu_unthrottle(struct perf_event *event)
269 armpmu->enable(hwc, hwc->idx); 279 armpmu->enable(hwc, hwc->idx);
270} 280}
271 281
282static void
283armpmu_del(struct perf_event *event, int flags)
284{
285 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
286 struct hw_perf_event *hwc = &event->hw;
287 int idx = hwc->idx;
288
289 WARN_ON(idx < 0);
290
291 clear_bit(idx, cpuc->active_mask);
292 armpmu_stop(event, PERF_EF_UPDATE);
293 cpuc->events[idx] = NULL;
294 clear_bit(idx, cpuc->used_mask);
295
296 perf_event_update_userpage(event);
297}
298
272static int 299static int
273armpmu_enable(struct perf_event *event) 300armpmu_add(struct perf_event *event, int flags)
274{ 301{
275 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 302 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
276 struct hw_perf_event *hwc = &event->hw; 303 struct hw_perf_event *hwc = &event->hw;
277 int idx; 304 int idx;
278 int err = 0; 305 int err = 0;
279 306
307 perf_pmu_disable(event->pmu);
308
280 /* If we don't have a space for the counter then finish early. */ 309 /* If we don't have a space for the counter then finish early. */
281 idx = armpmu->get_event_idx(cpuc, hwc); 310 idx = armpmu->get_event_idx(cpuc, hwc);
282 if (idx < 0) { 311 if (idx < 0) {
@@ -293,25 +322,19 @@ armpmu_enable(struct perf_event *event)
293 cpuc->events[idx] = event; 322 cpuc->events[idx] = event;
294 set_bit(idx, cpuc->active_mask); 323 set_bit(idx, cpuc->active_mask);
295 324
296 /* Set the period for the event. */ 325 hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
297 armpmu_event_set_period(event, hwc, idx); 326 if (flags & PERF_EF_START)
298 327 armpmu_start(event, PERF_EF_RELOAD);
299 /* Enable the event. */
300 armpmu->enable(hwc, idx);
301 328
302 /* Propagate our changes to the userspace mapping. */ 329 /* Propagate our changes to the userspace mapping. */
303 perf_event_update_userpage(event); 330 perf_event_update_userpage(event);
304 331
305out: 332out:
333 perf_pmu_enable(event->pmu);
306 return err; 334 return err;
307} 335}
308 336
309static struct pmu pmu = { 337static struct pmu pmu;
310 .enable = armpmu_enable,
311 .disable = armpmu_disable,
312 .unthrottle = armpmu_unthrottle,
313 .read = armpmu_read,
314};
315 338
316static int 339static int
317validate_event(struct cpu_hw_events *cpuc, 340validate_event(struct cpu_hw_events *cpuc,
@@ -491,20 +514,29 @@ __hw_perf_event_init(struct perf_event *event)
491 return err; 514 return err;
492} 515}
493 516
494const struct pmu * 517static int armpmu_event_init(struct perf_event *event)
495hw_perf_event_init(struct perf_event *event)
496{ 518{
497 int err = 0; 519 int err = 0;
498 520
521 switch (event->attr.type) {
522 case PERF_TYPE_RAW:
523 case PERF_TYPE_HARDWARE:
524 case PERF_TYPE_HW_CACHE:
525 break;
526
527 default:
528 return -ENOENT;
529 }
530
499 if (!armpmu) 531 if (!armpmu)
500 return ERR_PTR(-ENODEV); 532 return -ENODEV;
501 533
502 event->destroy = hw_perf_event_destroy; 534 event->destroy = hw_perf_event_destroy;
503 535
504 if (!atomic_inc_not_zero(&active_events)) { 536 if (!atomic_inc_not_zero(&active_events)) {
505 if (atomic_read(&active_events) > perf_max_events) { 537 if (atomic_read(&active_events) > armpmu.num_events) {
506 atomic_dec(&active_events); 538 atomic_dec(&active_events);
507 return ERR_PTR(-ENOSPC); 539 return -ENOSPC;
508 } 540 }
509 541
510 mutex_lock(&pmu_reserve_mutex); 542 mutex_lock(&pmu_reserve_mutex);
@@ -518,17 +550,16 @@ hw_perf_event_init(struct perf_event *event)
518 } 550 }
519 551
520 if (err) 552 if (err)
521 return ERR_PTR(err); 553 return err;
522 554
523 err = __hw_perf_event_init(event); 555 err = __hw_perf_event_init(event);
524 if (err) 556 if (err)
525 hw_perf_event_destroy(event); 557 hw_perf_event_destroy(event);
526 558
527 return err ? ERR_PTR(err) : &pmu; 559 return err;
528} 560}
529 561
530void 562static void armpmu_enable(struct pmu *pmu)
531hw_perf_enable(void)
532{ 563{
533 /* Enable all of the perf events on hardware. */ 564 /* Enable all of the perf events on hardware. */
534 int idx; 565 int idx;
@@ -549,13 +580,23 @@ hw_perf_enable(void)
549 armpmu->start(); 580 armpmu->start();
550} 581}
551 582
552void 583static void armpmu_disable(struct pmu *pmu)
553hw_perf_disable(void)
554{ 584{
555 if (armpmu) 585 if (armpmu)
556 armpmu->stop(); 586 armpmu->stop();
557} 587}
558 588
589static struct pmu pmu = {
590 .pmu_enable = armpmu_enable,
591 .pmu_disable = armpmu_disable,
592 .event_init = armpmu_event_init,
593 .add = armpmu_add,
594 .del = armpmu_del,
595 .start = armpmu_start,
596 .stop = armpmu_stop,
597 .read = armpmu_read,
598};
599
559/* 600/*
560 * ARMv6 Performance counter handling code. 601 * ARMv6 Performance counter handling code.
561 * 602 *
@@ -2933,14 +2974,12 @@ init_hw_perf_events(void)
2933 armpmu = &armv6pmu; 2974 armpmu = &armv6pmu;
2934 memcpy(armpmu_perf_cache_map, armv6_perf_cache_map, 2975 memcpy(armpmu_perf_cache_map, armv6_perf_cache_map,
2935 sizeof(armv6_perf_cache_map)); 2976 sizeof(armv6_perf_cache_map));
2936 perf_max_events = armv6pmu.num_events;
2937 break; 2977 break;
2938 case 0xB020: /* ARM11mpcore */ 2978 case 0xB020: /* ARM11mpcore */
2939 armpmu = &armv6mpcore_pmu; 2979 armpmu = &armv6mpcore_pmu;
2940 memcpy(armpmu_perf_cache_map, 2980 memcpy(armpmu_perf_cache_map,
2941 armv6mpcore_perf_cache_map, 2981 armv6mpcore_perf_cache_map,
2942 sizeof(armv6mpcore_perf_cache_map)); 2982 sizeof(armv6mpcore_perf_cache_map));
2943 perf_max_events = armv6mpcore_pmu.num_events;
2944 break; 2983 break;
2945 case 0xC080: /* Cortex-A8 */ 2984 case 0xC080: /* Cortex-A8 */
2946 armv7pmu.id = ARM_PERF_PMU_ID_CA8; 2985 armv7pmu.id = ARM_PERF_PMU_ID_CA8;
@@ -2952,7 +2991,6 @@ init_hw_perf_events(void)
2952 /* Reset PMNC and read the nb of CNTx counters 2991 /* Reset PMNC and read the nb of CNTx counters
2953 supported */ 2992 supported */
2954 armv7pmu.num_events = armv7_reset_read_pmnc(); 2993 armv7pmu.num_events = armv7_reset_read_pmnc();
2955 perf_max_events = armv7pmu.num_events;
2956 break; 2994 break;
2957 case 0xC090: /* Cortex-A9 */ 2995 case 0xC090: /* Cortex-A9 */
2958 armv7pmu.id = ARM_PERF_PMU_ID_CA9; 2996 armv7pmu.id = ARM_PERF_PMU_ID_CA9;
@@ -2964,7 +3002,6 @@ init_hw_perf_events(void)
2964 /* Reset PMNC and read the nb of CNTx counters 3002 /* Reset PMNC and read the nb of CNTx counters
2965 supported */ 3003 supported */
2966 armv7pmu.num_events = armv7_reset_read_pmnc(); 3004 armv7pmu.num_events = armv7_reset_read_pmnc();
2967 perf_max_events = armv7pmu.num_events;
2968 break; 3005 break;
2969 } 3006 }
2970 /* Intel CPUs [xscale]. */ 3007 /* Intel CPUs [xscale]. */
@@ -2975,13 +3012,11 @@ init_hw_perf_events(void)
2975 armpmu = &xscale1pmu; 3012 armpmu = &xscale1pmu;
2976 memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, 3013 memcpy(armpmu_perf_cache_map, xscale_perf_cache_map,
2977 sizeof(xscale_perf_cache_map)); 3014 sizeof(xscale_perf_cache_map));
2978 perf_max_events = xscale1pmu.num_events;
2979 break; 3015 break;
2980 case 2: 3016 case 2:
2981 armpmu = &xscale2pmu; 3017 armpmu = &xscale2pmu;
2982 memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, 3018 memcpy(armpmu_perf_cache_map, xscale_perf_cache_map,
2983 sizeof(xscale_perf_cache_map)); 3019 sizeof(xscale_perf_cache_map));
2984 perf_max_events = xscale2pmu.num_events;
2985 break; 3020 break;
2986 } 3021 }
2987 } 3022 }
@@ -2991,9 +3026,10 @@ init_hw_perf_events(void)
2991 arm_pmu_names[armpmu->id], armpmu->num_events); 3026 arm_pmu_names[armpmu->id], armpmu->num_events);
2992 } else { 3027 } else {
2993 pr_info("no hardware support available\n"); 3028 pr_info("no hardware support available\n");
2994 perf_max_events = -1;
2995 } 3029 }
2996 3030
3031 perf_pmu_register(&pmu);
3032
2997 return 0; 3033 return 0;
2998} 3034}
2999arch_initcall(init_hw_perf_events); 3035arch_initcall(init_hw_perf_events);
@@ -3001,13 +3037,6 @@ arch_initcall(init_hw_perf_events);
3001/* 3037/*
3002 * Callchain handling code. 3038 * Callchain handling code.
3003 */ 3039 */
3004static inline void
3005callchain_store(struct perf_callchain_entry *entry,
3006 u64 ip)
3007{
3008 if (entry->nr < PERF_MAX_STACK_DEPTH)
3009 entry->ip[entry->nr++] = ip;
3010}
3011 3040
3012/* 3041/*
3013 * The registers we're interested in are at the end of the variable 3042 * The registers we're interested in are at the end of the variable
@@ -3039,7 +3068,7 @@ user_backtrace(struct frame_tail *tail,
3039 if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail))) 3068 if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail)))
3040 return NULL; 3069 return NULL;
3041 3070
3042 callchain_store(entry, buftail.lr); 3071 perf_callchain_store(entry, buftail.lr);
3043 3072
3044 /* 3073 /*
3045 * Frame pointers should strictly progress back up the stack 3074 * Frame pointers should strictly progress back up the stack
@@ -3051,16 +3080,11 @@ user_backtrace(struct frame_tail *tail,
3051 return buftail.fp - 1; 3080 return buftail.fp - 1;
3052} 3081}
3053 3082
3054static void 3083void
3055perf_callchain_user(struct pt_regs *regs, 3084perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
3056 struct perf_callchain_entry *entry)
3057{ 3085{
3058 struct frame_tail *tail; 3086 struct frame_tail *tail;
3059 3087
3060 callchain_store(entry, PERF_CONTEXT_USER);
3061
3062 if (!user_mode(regs))
3063 regs = task_pt_regs(current);
3064 3088
3065 tail = (struct frame_tail *)regs->ARM_fp - 1; 3089 tail = (struct frame_tail *)regs->ARM_fp - 1;
3066 3090
@@ -3078,56 +3102,18 @@ callchain_trace(struct stackframe *fr,
3078 void *data) 3102 void *data)
3079{ 3103{
3080 struct perf_callchain_entry *entry = data; 3104 struct perf_callchain_entry *entry = data;
3081 callchain_store(entry, fr->pc); 3105 perf_callchain_store(entry, fr->pc);
3082 return 0; 3106 return 0;
3083} 3107}
3084 3108
3085static void 3109void
3086perf_callchain_kernel(struct pt_regs *regs, 3110perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
3087 struct perf_callchain_entry *entry)
3088{ 3111{
3089 struct stackframe fr; 3112 struct stackframe fr;
3090 3113
3091 callchain_store(entry, PERF_CONTEXT_KERNEL);
3092 fr.fp = regs->ARM_fp; 3114 fr.fp = regs->ARM_fp;
3093 fr.sp = regs->ARM_sp; 3115 fr.sp = regs->ARM_sp;
3094 fr.lr = regs->ARM_lr; 3116 fr.lr = regs->ARM_lr;
3095 fr.pc = regs->ARM_pc; 3117 fr.pc = regs->ARM_pc;
3096 walk_stackframe(&fr, callchain_trace, entry); 3118 walk_stackframe(&fr, callchain_trace, entry);
3097} 3119}
3098
3099static void
3100perf_do_callchain(struct pt_regs *regs,
3101 struct perf_callchain_entry *entry)
3102{
3103 int is_user;
3104
3105 if (!regs)
3106 return;
3107
3108 is_user = user_mode(regs);
3109
3110 if (!current || !current->pid)
3111 return;
3112
3113 if (is_user && current->state != TASK_RUNNING)
3114 return;
3115
3116 if (!is_user)
3117 perf_callchain_kernel(regs, entry);
3118
3119 if (current->mm)
3120 perf_callchain_user(regs, entry);
3121}
3122
3123static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
3124
3125struct perf_callchain_entry *
3126perf_callchain(struct pt_regs *regs)
3127{
3128 struct perf_callchain_entry *entry = &__get_cpu_var(pmc_irq_entry);
3129
3130 entry->nr = 0;
3131 perf_do_callchain(regs, entry);
3132 return entry;
3133}
diff --git a/arch/arm/oprofile/common.c b/arch/arm/oprofile/common.c
index 72e09eb642dd..d1fb5b2f5218 100644
--- a/arch/arm/oprofile/common.c
+++ b/arch/arm/oprofile/common.c
@@ -96,7 +96,7 @@ static int op_create_counter(int cpu, int event)
96 return ret; 96 return ret;
97 97
98 pevent = perf_event_create_kernel_counter(&counter_config[event].attr, 98 pevent = perf_event_create_kernel_counter(&counter_config[event].attr,
99 cpu, -1, 99 cpu, NULL,
100 op_overflow_handler); 100 op_overflow_handler);
101 101
102 if (IS_ERR(pevent)) { 102 if (IS_ERR(pevent)) {
diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c
index 95ad9dad298e..d05ae4204bbf 100644
--- a/arch/powerpc/kernel/perf_callchain.c
+++ b/arch/powerpc/kernel/perf_callchain.c
@@ -23,18 +23,6 @@
23#include "ppc32.h" 23#include "ppc32.h"
24#endif 24#endif
25 25
26/*
27 * Store another value in a callchain_entry.
28 */
29static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip)
30{
31 unsigned int nr = entry->nr;
32
33 if (nr < PERF_MAX_STACK_DEPTH) {
34 entry->ip[nr] = ip;
35 entry->nr = nr + 1;
36 }
37}
38 26
39/* 27/*
40 * Is sp valid as the address of the next kernel stack frame after prev_sp? 28 * Is sp valid as the address of the next kernel stack frame after prev_sp?
@@ -58,8 +46,8 @@ static int valid_next_sp(unsigned long sp, unsigned long prev_sp)
58 return 0; 46 return 0;
59} 47}
60 48
61static void perf_callchain_kernel(struct pt_regs *regs, 49void
62 struct perf_callchain_entry *entry) 50perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
63{ 51{
64 unsigned long sp, next_sp; 52 unsigned long sp, next_sp;
65 unsigned long next_ip; 53 unsigned long next_ip;
@@ -69,8 +57,7 @@ static void perf_callchain_kernel(struct pt_regs *regs,
69 57
70 lr = regs->link; 58 lr = regs->link;
71 sp = regs->gpr[1]; 59 sp = regs->gpr[1];
72 callchain_store(entry, PERF_CONTEXT_KERNEL); 60 perf_callchain_store(entry, regs->nip);
73 callchain_store(entry, regs->nip);
74 61
75 if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) 62 if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD))
76 return; 63 return;
@@ -89,7 +76,7 @@ static void perf_callchain_kernel(struct pt_regs *regs,
89 next_ip = regs->nip; 76 next_ip = regs->nip;
90 lr = regs->link; 77 lr = regs->link;
91 level = 0; 78 level = 0;
92 callchain_store(entry, PERF_CONTEXT_KERNEL); 79 perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
93 80
94 } else { 81 } else {
95 if (level == 0) 82 if (level == 0)
@@ -111,7 +98,7 @@ static void perf_callchain_kernel(struct pt_regs *regs,
111 ++level; 98 ++level;
112 } 99 }
113 100
114 callchain_store(entry, next_ip); 101 perf_callchain_store(entry, next_ip);
115 if (!valid_next_sp(next_sp, sp)) 102 if (!valid_next_sp(next_sp, sp))
116 return; 103 return;
117 sp = next_sp; 104 sp = next_sp;
@@ -233,8 +220,8 @@ static int sane_signal_64_frame(unsigned long sp)
233 puc == (unsigned long) &sf->uc; 220 puc == (unsigned long) &sf->uc;
234} 221}
235 222
236static void perf_callchain_user_64(struct pt_regs *regs, 223static void perf_callchain_user_64(struct perf_callchain_entry *entry,
237 struct perf_callchain_entry *entry) 224 struct pt_regs *regs)
238{ 225{
239 unsigned long sp, next_sp; 226 unsigned long sp, next_sp;
240 unsigned long next_ip; 227 unsigned long next_ip;
@@ -246,8 +233,7 @@ static void perf_callchain_user_64(struct pt_regs *regs,
246 next_ip = regs->nip; 233 next_ip = regs->nip;
247 lr = regs->link; 234 lr = regs->link;
248 sp = regs->gpr[1]; 235 sp = regs->gpr[1];
249 callchain_store(entry, PERF_CONTEXT_USER); 236 perf_callchain_store(entry, next_ip);
250 callchain_store(entry, next_ip);
251 237
252 for (;;) { 238 for (;;) {
253 fp = (unsigned long __user *) sp; 239 fp = (unsigned long __user *) sp;
@@ -276,14 +262,14 @@ static void perf_callchain_user_64(struct pt_regs *regs,
276 read_user_stack_64(&uregs[PT_R1], &sp)) 262 read_user_stack_64(&uregs[PT_R1], &sp))
277 return; 263 return;
278 level = 0; 264 level = 0;
279 callchain_store(entry, PERF_CONTEXT_USER); 265 perf_callchain_store(entry, PERF_CONTEXT_USER);
280 callchain_store(entry, next_ip); 266 perf_callchain_store(entry, next_ip);
281 continue; 267 continue;
282 } 268 }
283 269
284 if (level == 0) 270 if (level == 0)
285 next_ip = lr; 271 next_ip = lr;
286 callchain_store(entry, next_ip); 272 perf_callchain_store(entry, next_ip);
287 ++level; 273 ++level;
288 sp = next_sp; 274 sp = next_sp;
289 } 275 }
@@ -315,8 +301,8 @@ static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
315 return __get_user_inatomic(*ret, ptr); 301 return __get_user_inatomic(*ret, ptr);
316} 302}
317 303
318static inline void perf_callchain_user_64(struct pt_regs *regs, 304static inline void perf_callchain_user_64(struct perf_callchain_entry *entry,
319 struct perf_callchain_entry *entry) 305 struct pt_regs *regs)
320{ 306{
321} 307}
322 308
@@ -435,8 +421,8 @@ static unsigned int __user *signal_frame_32_regs(unsigned int sp,
435 return mctx->mc_gregs; 421 return mctx->mc_gregs;
436} 422}
437 423
438static void perf_callchain_user_32(struct pt_regs *regs, 424static void perf_callchain_user_32(struct perf_callchain_entry *entry,
439 struct perf_callchain_entry *entry) 425 struct pt_regs *regs)
440{ 426{
441 unsigned int sp, next_sp; 427 unsigned int sp, next_sp;
442 unsigned int next_ip; 428 unsigned int next_ip;
@@ -447,8 +433,7 @@ static void perf_callchain_user_32(struct pt_regs *regs,
447 next_ip = regs->nip; 433 next_ip = regs->nip;
448 lr = regs->link; 434 lr = regs->link;
449 sp = regs->gpr[1]; 435 sp = regs->gpr[1];
450 callchain_store(entry, PERF_CONTEXT_USER); 436 perf_callchain_store(entry, next_ip);
451 callchain_store(entry, next_ip);
452 437
453 while (entry->nr < PERF_MAX_STACK_DEPTH) { 438 while (entry->nr < PERF_MAX_STACK_DEPTH) {
454 fp = (unsigned int __user *) (unsigned long) sp; 439 fp = (unsigned int __user *) (unsigned long) sp;
@@ -470,45 +455,24 @@ static void perf_callchain_user_32(struct pt_regs *regs,
470 read_user_stack_32(&uregs[PT_R1], &sp)) 455 read_user_stack_32(&uregs[PT_R1], &sp))
471 return; 456 return;
472 level = 0; 457 level = 0;
473 callchain_store(entry, PERF_CONTEXT_USER); 458 perf_callchain_store(entry, PERF_CONTEXT_USER);
474 callchain_store(entry, next_ip); 459 perf_callchain_store(entry, next_ip);
475 continue; 460 continue;
476 } 461 }
477 462
478 if (level == 0) 463 if (level == 0)
479 next_ip = lr; 464 next_ip = lr;
480 callchain_store(entry, next_ip); 465 perf_callchain_store(entry, next_ip);
481 ++level; 466 ++level;
482 sp = next_sp; 467 sp = next_sp;
483 } 468 }
484} 469}
485 470
486/* 471void
487 * Since we can't get PMU interrupts inside a PMU interrupt handler, 472perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
488 * we don't need separate irq and nmi entries here.
489 */
490static DEFINE_PER_CPU(struct perf_callchain_entry, cpu_perf_callchain);
491
492struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
493{ 473{
494 struct perf_callchain_entry *entry = &__get_cpu_var(cpu_perf_callchain); 474 if (current_is_64bit())
495 475 perf_callchain_user_64(entry, regs);
496 entry->nr = 0; 476 else
497 477 perf_callchain_user_32(entry, regs);
498 if (!user_mode(regs)) {
499 perf_callchain_kernel(regs, entry);
500 if (current->mm)
501 regs = task_pt_regs(current);
502 else
503 regs = NULL;
504 }
505
506 if (regs) {
507 if (current_is_64bit())
508 perf_callchain_user_64(regs, entry);
509 else
510 perf_callchain_user_32(regs, entry);
511 }
512
513 return entry;
514} 478}
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index d301a30445e0..9cb4924b6c07 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -402,6 +402,9 @@ static void power_pmu_read(struct perf_event *event)
402{ 402{
403 s64 val, delta, prev; 403 s64 val, delta, prev;
404 404
405 if (event->hw.state & PERF_HES_STOPPED)
406 return;
407
405 if (!event->hw.idx) 408 if (!event->hw.idx)
406 return; 409 return;
407 /* 410 /*
@@ -517,7 +520,7 @@ static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0)
517 * Disable all events to prevent PMU interrupts and to allow 520 * Disable all events to prevent PMU interrupts and to allow
518 * events to be added or removed. 521 * events to be added or removed.
519 */ 522 */
520void hw_perf_disable(void) 523static void power_pmu_disable(struct pmu *pmu)
521{ 524{
522 struct cpu_hw_events *cpuhw; 525 struct cpu_hw_events *cpuhw;
523 unsigned long flags; 526 unsigned long flags;
@@ -565,7 +568,7 @@ void hw_perf_disable(void)
565 * If we were previously disabled and events were added, then 568 * If we were previously disabled and events were added, then
566 * put the new config on the PMU. 569 * put the new config on the PMU.
567 */ 570 */
568void hw_perf_enable(void) 571static void power_pmu_enable(struct pmu *pmu)
569{ 572{
570 struct perf_event *event; 573 struct perf_event *event;
571 struct cpu_hw_events *cpuhw; 574 struct cpu_hw_events *cpuhw;
@@ -672,6 +675,8 @@ void hw_perf_enable(void)
672 } 675 }
673 local64_set(&event->hw.prev_count, val); 676 local64_set(&event->hw.prev_count, val);
674 event->hw.idx = idx; 677 event->hw.idx = idx;
678 if (event->hw.state & PERF_HES_STOPPED)
679 val = 0;
675 write_pmc(idx, val); 680 write_pmc(idx, val);
676 perf_event_update_userpage(event); 681 perf_event_update_userpage(event);
677 } 682 }
@@ -727,7 +732,7 @@ static int collect_events(struct perf_event *group, int max_count,
727 * re-enable the PMU in order to get hw_perf_enable to do the 732 * re-enable the PMU in order to get hw_perf_enable to do the
728 * actual work of reconfiguring the PMU. 733 * actual work of reconfiguring the PMU.
729 */ 734 */
730static int power_pmu_enable(struct perf_event *event) 735static int power_pmu_add(struct perf_event *event, int ef_flags)
731{ 736{
732 struct cpu_hw_events *cpuhw; 737 struct cpu_hw_events *cpuhw;
733 unsigned long flags; 738 unsigned long flags;
@@ -735,7 +740,7 @@ static int power_pmu_enable(struct perf_event *event)
735 int ret = -EAGAIN; 740 int ret = -EAGAIN;
736 741
737 local_irq_save(flags); 742 local_irq_save(flags);
738 perf_disable(); 743 perf_pmu_disable(event->pmu);
739 744
740 /* 745 /*
741 * Add the event to the list (if there is room) 746 * Add the event to the list (if there is room)
@@ -749,6 +754,9 @@ static int power_pmu_enable(struct perf_event *event)
749 cpuhw->events[n0] = event->hw.config; 754 cpuhw->events[n0] = event->hw.config;
750 cpuhw->flags[n0] = event->hw.event_base; 755 cpuhw->flags[n0] = event->hw.event_base;
751 756
757 if (!(ef_flags & PERF_EF_START))
758 event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
759
752 /* 760 /*
753 * If group events scheduling transaction was started, 761 * If group events scheduling transaction was started,
754 * skip the schedulability test here, it will be peformed 762 * skip the schedulability test here, it will be peformed
@@ -769,7 +777,7 @@ nocheck:
769 777
770 ret = 0; 778 ret = 0;
771 out: 779 out:
772 perf_enable(); 780 perf_pmu_enable(event->pmu);
773 local_irq_restore(flags); 781 local_irq_restore(flags);
774 return ret; 782 return ret;
775} 783}
@@ -777,14 +785,14 @@ nocheck:
777/* 785/*
778 * Remove a event from the PMU. 786 * Remove a event from the PMU.
779 */ 787 */
780static void power_pmu_disable(struct perf_event *event) 788static void power_pmu_del(struct perf_event *event, int ef_flags)
781{ 789{
782 struct cpu_hw_events *cpuhw; 790 struct cpu_hw_events *cpuhw;
783 long i; 791 long i;
784 unsigned long flags; 792 unsigned long flags;
785 793
786 local_irq_save(flags); 794 local_irq_save(flags);
787 perf_disable(); 795 perf_pmu_disable(event->pmu);
788 796
789 power_pmu_read(event); 797 power_pmu_read(event);
790 798
@@ -821,34 +829,60 @@ static void power_pmu_disable(struct perf_event *event)
821 cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); 829 cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
822 } 830 }
823 831
824 perf_enable(); 832 perf_pmu_enable(event->pmu);
825 local_irq_restore(flags); 833 local_irq_restore(flags);
826} 834}
827 835
828/* 836/*
829 * Re-enable interrupts on a event after they were throttled 837 * POWER-PMU does not support disabling individual counters, hence
830 * because they were coming too fast. 838 * program their cycle counter to their max value and ignore the interrupts.
831 */ 839 */
832static void power_pmu_unthrottle(struct perf_event *event) 840
841static void power_pmu_start(struct perf_event *event, int ef_flags)
842{
843 unsigned long flags;
844 s64 left;
845
846 if (!event->hw.idx || !event->hw.sample_period)
847 return;
848
849 if (!(event->hw.state & PERF_HES_STOPPED))
850 return;
851
852 if (ef_flags & PERF_EF_RELOAD)
853 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
854
855 local_irq_save(flags);
856 perf_pmu_disable(event->pmu);
857
858 event->hw.state = 0;
859 left = local64_read(&event->hw.period_left);
860 write_pmc(event->hw.idx, left);
861
862 perf_event_update_userpage(event);
863 perf_pmu_enable(event->pmu);
864 local_irq_restore(flags);
865}
866
867static void power_pmu_stop(struct perf_event *event, int ef_flags)
833{ 868{
834 s64 val, left;
835 unsigned long flags; 869 unsigned long flags;
836 870
837 if (!event->hw.idx || !event->hw.sample_period) 871 if (!event->hw.idx || !event->hw.sample_period)
838 return; 872 return;
873
874 if (event->hw.state & PERF_HES_STOPPED)
875 return;
876
839 local_irq_save(flags); 877 local_irq_save(flags);
840 perf_disable(); 878 perf_pmu_disable(event->pmu);
879
841 power_pmu_read(event); 880 power_pmu_read(event);
842 left = event->hw.sample_period; 881 event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
843 event->hw.last_period = left; 882 write_pmc(event->hw.idx, 0);
844 val = 0; 883
845 if (left < 0x80000000L)
846 val = 0x80000000L - left;
847 write_pmc(event->hw.idx, val);
848 local64_set(&event->hw.prev_count, val);
849 local64_set(&event->hw.period_left, left);
850 perf_event_update_userpage(event); 884 perf_event_update_userpage(event);
851 perf_enable(); 885 perf_pmu_enable(event->pmu);
852 local_irq_restore(flags); 886 local_irq_restore(flags);
853} 887}
854 888
@@ -857,10 +891,11 @@ static void power_pmu_unthrottle(struct perf_event *event)
857 * Set the flag to make pmu::enable() not perform the 891 * Set the flag to make pmu::enable() not perform the
858 * schedulability test, it will be performed at commit time 892 * schedulability test, it will be performed at commit time
859 */ 893 */
860void power_pmu_start_txn(const struct pmu *pmu) 894void power_pmu_start_txn(struct pmu *pmu)
861{ 895{
862 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 896 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
863 897
898 perf_pmu_disable(pmu);
864 cpuhw->group_flag |= PERF_EVENT_TXN; 899 cpuhw->group_flag |= PERF_EVENT_TXN;
865 cpuhw->n_txn_start = cpuhw->n_events; 900 cpuhw->n_txn_start = cpuhw->n_events;
866} 901}
@@ -870,11 +905,12 @@ void power_pmu_start_txn(const struct pmu *pmu)
870 * Clear the flag and pmu::enable() will perform the 905 * Clear the flag and pmu::enable() will perform the
871 * schedulability test. 906 * schedulability test.
872 */ 907 */
873void power_pmu_cancel_txn(const struct pmu *pmu) 908void power_pmu_cancel_txn(struct pmu *pmu)
874{ 909{
875 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 910 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
876 911
877 cpuhw->group_flag &= ~PERF_EVENT_TXN; 912 cpuhw->group_flag &= ~PERF_EVENT_TXN;
913 perf_pmu_enable(pmu);
878} 914}
879 915
880/* 916/*
@@ -882,7 +918,7 @@ void power_pmu_cancel_txn(const struct pmu *pmu)
882 * Perform the group schedulability test as a whole 918 * Perform the group schedulability test as a whole
883 * Return 0 if success 919 * Return 0 if success
884 */ 920 */
885int power_pmu_commit_txn(const struct pmu *pmu) 921int power_pmu_commit_txn(struct pmu *pmu)
886{ 922{
887 struct cpu_hw_events *cpuhw; 923 struct cpu_hw_events *cpuhw;
888 long i, n; 924 long i, n;
@@ -901,19 +937,10 @@ int power_pmu_commit_txn(const struct pmu *pmu)
901 cpuhw->event[i]->hw.config = cpuhw->events[i]; 937 cpuhw->event[i]->hw.config = cpuhw->events[i];
902 938
903 cpuhw->group_flag &= ~PERF_EVENT_TXN; 939 cpuhw->group_flag &= ~PERF_EVENT_TXN;
940 perf_pmu_enable(pmu);
904 return 0; 941 return 0;
905} 942}
906 943
907struct pmu power_pmu = {
908 .enable = power_pmu_enable,
909 .disable = power_pmu_disable,
910 .read = power_pmu_read,
911 .unthrottle = power_pmu_unthrottle,
912 .start_txn = power_pmu_start_txn,
913 .cancel_txn = power_pmu_cancel_txn,
914 .commit_txn = power_pmu_commit_txn,
915};
916
917/* 944/*
918 * Return 1 if we might be able to put event on a limited PMC, 945 * Return 1 if we might be able to put event on a limited PMC,
919 * or 0 if not. 946 * or 0 if not.
@@ -1014,7 +1041,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp)
1014 return 0; 1041 return 0;
1015} 1042}
1016 1043
1017const struct pmu *hw_perf_event_init(struct perf_event *event) 1044static int power_pmu_event_init(struct perf_event *event)
1018{ 1045{
1019 u64 ev; 1046 u64 ev;
1020 unsigned long flags; 1047 unsigned long flags;
@@ -1026,25 +1053,27 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
1026 struct cpu_hw_events *cpuhw; 1053 struct cpu_hw_events *cpuhw;
1027 1054
1028 if (!ppmu) 1055 if (!ppmu)
1029 return ERR_PTR(-ENXIO); 1056 return -ENOENT;
1057
1030 switch (event->attr.type) { 1058 switch (event->attr.type) {
1031 case PERF_TYPE_HARDWARE: 1059 case PERF_TYPE_HARDWARE:
1032 ev = event->attr.config; 1060 ev = event->attr.config;
1033 if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) 1061 if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
1034 return ERR_PTR(-EOPNOTSUPP); 1062 return -EOPNOTSUPP;
1035 ev = ppmu->generic_events[ev]; 1063 ev = ppmu->generic_events[ev];
1036 break; 1064 break;
1037 case PERF_TYPE_HW_CACHE: 1065 case PERF_TYPE_HW_CACHE:
1038 err = hw_perf_cache_event(event->attr.config, &ev); 1066 err = hw_perf_cache_event(event->attr.config, &ev);
1039 if (err) 1067 if (err)
1040 return ERR_PTR(err); 1068 return err;
1041 break; 1069 break;
1042 case PERF_TYPE_RAW: 1070 case PERF_TYPE_RAW:
1043 ev = event->attr.config; 1071 ev = event->attr.config;
1044 break; 1072 break;
1045 default: 1073 default:
1046 return ERR_PTR(-EINVAL); 1074 return -ENOENT;
1047 } 1075 }
1076
1048 event->hw.config_base = ev; 1077 event->hw.config_base = ev;
1049 event->hw.idx = 0; 1078 event->hw.idx = 0;
1050 1079
@@ -1081,7 +1110,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
1081 */ 1110 */
1082 ev = normal_pmc_alternative(ev, flags); 1111 ev = normal_pmc_alternative(ev, flags);
1083 if (!ev) 1112 if (!ev)
1084 return ERR_PTR(-EINVAL); 1113 return -EINVAL;
1085 } 1114 }
1086 } 1115 }
1087 1116
@@ -1095,19 +1124,19 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
1095 n = collect_events(event->group_leader, ppmu->n_counter - 1, 1124 n = collect_events(event->group_leader, ppmu->n_counter - 1,
1096 ctrs, events, cflags); 1125 ctrs, events, cflags);
1097 if (n < 0) 1126 if (n < 0)
1098 return ERR_PTR(-EINVAL); 1127 return -EINVAL;
1099 } 1128 }
1100 events[n] = ev; 1129 events[n] = ev;
1101 ctrs[n] = event; 1130 ctrs[n] = event;
1102 cflags[n] = flags; 1131 cflags[n] = flags;
1103 if (check_excludes(ctrs, cflags, n, 1)) 1132 if (check_excludes(ctrs, cflags, n, 1))
1104 return ERR_PTR(-EINVAL); 1133 return -EINVAL;
1105 1134
1106 cpuhw = &get_cpu_var(cpu_hw_events); 1135 cpuhw = &get_cpu_var(cpu_hw_events);
1107 err = power_check_constraints(cpuhw, events, cflags, n + 1); 1136 err = power_check_constraints(cpuhw, events, cflags, n + 1);
1108 put_cpu_var(cpu_hw_events); 1137 put_cpu_var(cpu_hw_events);
1109 if (err) 1138 if (err)
1110 return ERR_PTR(-EINVAL); 1139 return -EINVAL;
1111 1140
1112 event->hw.config = events[n]; 1141 event->hw.config = events[n];
1113 event->hw.event_base = cflags[n]; 1142 event->hw.event_base = cflags[n];
@@ -1132,11 +1161,23 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
1132 } 1161 }
1133 event->destroy = hw_perf_event_destroy; 1162 event->destroy = hw_perf_event_destroy;
1134 1163
1135 if (err) 1164 return err;
1136 return ERR_PTR(err);
1137 return &power_pmu;
1138} 1165}
1139 1166
1167struct pmu power_pmu = {
1168 .pmu_enable = power_pmu_enable,
1169 .pmu_disable = power_pmu_disable,
1170 .event_init = power_pmu_event_init,
1171 .add = power_pmu_add,
1172 .del = power_pmu_del,
1173 .start = power_pmu_start,
1174 .stop = power_pmu_stop,
1175 .read = power_pmu_read,
1176 .start_txn = power_pmu_start_txn,
1177 .cancel_txn = power_pmu_cancel_txn,
1178 .commit_txn = power_pmu_commit_txn,
1179};
1180
1140/* 1181/*
1141 * A counter has overflowed; update its count and record 1182 * A counter has overflowed; update its count and record
1142 * things if requested. Note that interrupts are hard-disabled 1183 * things if requested. Note that interrupts are hard-disabled
@@ -1149,6 +1190,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
1149 s64 prev, delta, left; 1190 s64 prev, delta, left;
1150 int record = 0; 1191 int record = 0;
1151 1192
1193 if (event->hw.state & PERF_HES_STOPPED) {
1194 write_pmc(event->hw.idx, 0);
1195 return;
1196 }
1197
1152 /* we don't have to worry about interrupts here */ 1198 /* we don't have to worry about interrupts here */
1153 prev = local64_read(&event->hw.prev_count); 1199 prev = local64_read(&event->hw.prev_count);
1154 delta = (val - prev) & 0xfffffffful; 1200 delta = (val - prev) & 0xfffffffful;
@@ -1171,6 +1217,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
1171 val = 0x80000000LL - left; 1217 val = 0x80000000LL - left;
1172 } 1218 }
1173 1219
1220 write_pmc(event->hw.idx, val);
1221 local64_set(&event->hw.prev_count, val);
1222 local64_set(&event->hw.period_left, left);
1223 perf_event_update_userpage(event);
1224
1174 /* 1225 /*
1175 * Finally record data if requested. 1226 * Finally record data if requested.
1176 */ 1227 */
@@ -1183,23 +1234,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
1183 if (event->attr.sample_type & PERF_SAMPLE_ADDR) 1234 if (event->attr.sample_type & PERF_SAMPLE_ADDR)
1184 perf_get_data_addr(regs, &data.addr); 1235 perf_get_data_addr(regs, &data.addr);
1185 1236
1186 if (perf_event_overflow(event, nmi, &data, regs)) { 1237 if (perf_event_overflow(event, nmi, &data, regs))
1187 /* 1238 power_pmu_stop(event, 0);
1188 * Interrupts are coming too fast - throttle them
1189 * by setting the event to 0, so it will be
1190 * at least 2^30 cycles until the next interrupt
1191 * (assuming each event counts at most 2 counts
1192 * per cycle).
1193 */
1194 val = 0;
1195 left = ~0ULL >> 1;
1196 }
1197 } 1239 }
1198
1199 write_pmc(event->hw.idx, val);
1200 local64_set(&event->hw.prev_count, val);
1201 local64_set(&event->hw.period_left, left);
1202 perf_event_update_userpage(event);
1203} 1240}
1204 1241
1205/* 1242/*
@@ -1342,6 +1379,7 @@ int register_power_pmu(struct power_pmu *pmu)
1342 freeze_events_kernel = MMCR0_FCHV; 1379 freeze_events_kernel = MMCR0_FCHV;
1343#endif /* CONFIG_PPC64 */ 1380#endif /* CONFIG_PPC64 */
1344 1381
1382 perf_pmu_register(&power_pmu);
1345 perf_cpu_notifier(power_pmu_notifier); 1383 perf_cpu_notifier(power_pmu_notifier);
1346 1384
1347 return 0; 1385 return 0;
diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c
index 1ba45471ae43..7ecca59ddf77 100644
--- a/arch/powerpc/kernel/perf_event_fsl_emb.c
+++ b/arch/powerpc/kernel/perf_event_fsl_emb.c
@@ -156,6 +156,9 @@ static void fsl_emb_pmu_read(struct perf_event *event)
156{ 156{
157 s64 val, delta, prev; 157 s64 val, delta, prev;
158 158
159 if (event->hw.state & PERF_HES_STOPPED)
160 return;
161
159 /* 162 /*
160 * Performance monitor interrupts come even when interrupts 163 * Performance monitor interrupts come even when interrupts
161 * are soft-disabled, as long as interrupts are hard-enabled. 164 * are soft-disabled, as long as interrupts are hard-enabled.
@@ -177,7 +180,7 @@ static void fsl_emb_pmu_read(struct perf_event *event)
177 * Disable all events to prevent PMU interrupts and to allow 180 * Disable all events to prevent PMU interrupts and to allow
178 * events to be added or removed. 181 * events to be added or removed.
179 */ 182 */
180void hw_perf_disable(void) 183static void fsl_emb_pmu_disable(struct pmu *pmu)
181{ 184{
182 struct cpu_hw_events *cpuhw; 185 struct cpu_hw_events *cpuhw;
183 unsigned long flags; 186 unsigned long flags;
@@ -216,7 +219,7 @@ void hw_perf_disable(void)
216 * If we were previously disabled and events were added, then 219 * If we were previously disabled and events were added, then
217 * put the new config on the PMU. 220 * put the new config on the PMU.
218 */ 221 */
219void hw_perf_enable(void) 222static void fsl_emb_pmu_enable(struct pmu *pmu)
220{ 223{
221 struct cpu_hw_events *cpuhw; 224 struct cpu_hw_events *cpuhw;
222 unsigned long flags; 225 unsigned long flags;
@@ -262,8 +265,8 @@ static int collect_events(struct perf_event *group, int max_count,
262 return n; 265 return n;
263} 266}
264 267
265/* perf must be disabled, context locked on entry */ 268/* context locked on entry */
266static int fsl_emb_pmu_enable(struct perf_event *event) 269static int fsl_emb_pmu_add(struct perf_event *event, int flags)
267{ 270{
268 struct cpu_hw_events *cpuhw; 271 struct cpu_hw_events *cpuhw;
269 int ret = -EAGAIN; 272 int ret = -EAGAIN;
@@ -271,6 +274,7 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
271 u64 val; 274 u64 val;
272 int i; 275 int i;
273 276
277 perf_pmu_disable(event->pmu);
274 cpuhw = &get_cpu_var(cpu_hw_events); 278 cpuhw = &get_cpu_var(cpu_hw_events);
275 279
276 if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) 280 if (event->hw.config & FSL_EMB_EVENT_RESTRICTED)
@@ -301,6 +305,12 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
301 val = 0x80000000L - left; 305 val = 0x80000000L - left;
302 } 306 }
303 local64_set(&event->hw.prev_count, val); 307 local64_set(&event->hw.prev_count, val);
308
309 if (!(flags & PERF_EF_START)) {
310 event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
311 val = 0;
312 }
313
304 write_pmc(i, val); 314 write_pmc(i, val);
305 perf_event_update_userpage(event); 315 perf_event_update_userpage(event);
306 316
@@ -310,15 +320,17 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
310 ret = 0; 320 ret = 0;
311 out: 321 out:
312 put_cpu_var(cpu_hw_events); 322 put_cpu_var(cpu_hw_events);
323 perf_pmu_enable(event->pmu);
313 return ret; 324 return ret;
314} 325}
315 326
316/* perf must be disabled, context locked on entry */ 327/* context locked on entry */
317static void fsl_emb_pmu_disable(struct perf_event *event) 328static void fsl_emb_pmu_del(struct perf_event *event, int flags)
318{ 329{
319 struct cpu_hw_events *cpuhw; 330 struct cpu_hw_events *cpuhw;
320 int i = event->hw.idx; 331 int i = event->hw.idx;
321 332
333 perf_pmu_disable(event->pmu);
322 if (i < 0) 334 if (i < 0)
323 goto out; 335 goto out;
324 336
@@ -346,44 +358,57 @@ static void fsl_emb_pmu_disable(struct perf_event *event)
346 cpuhw->n_events--; 358 cpuhw->n_events--;
347 359
348 out: 360 out:
361 perf_pmu_enable(event->pmu);
349 put_cpu_var(cpu_hw_events); 362 put_cpu_var(cpu_hw_events);
350} 363}
351 364
352/* 365static void fsl_emb_pmu_start(struct perf_event *event, int ef_flags)
353 * Re-enable interrupts on a event after they were throttled
354 * because they were coming too fast.
355 *
356 * Context is locked on entry, but perf is not disabled.
357 */
358static void fsl_emb_pmu_unthrottle(struct perf_event *event)
359{ 366{
360 s64 val, left;
361 unsigned long flags; 367 unsigned long flags;
368 s64 left;
362 369
363 if (event->hw.idx < 0 || !event->hw.sample_period) 370 if (event->hw.idx < 0 || !event->hw.sample_period)
364 return; 371 return;
372
373 if (!(event->hw.state & PERF_HES_STOPPED))
374 return;
375
376 if (ef_flags & PERF_EF_RELOAD)
377 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
378
365 local_irq_save(flags); 379 local_irq_save(flags);
366 perf_disable(); 380 perf_pmu_disable(event->pmu);
367 fsl_emb_pmu_read(event); 381
368 left = event->hw.sample_period; 382 event->hw.state = 0;
369 event->hw.last_period = left; 383 left = local64_read(&event->hw.period_left);
370 val = 0; 384 write_pmc(event->hw.idx, left);
371 if (left < 0x80000000L) 385
372 val = 0x80000000L - left;
373 write_pmc(event->hw.idx, val);
374 local64_set(&event->hw.prev_count, val);
375 local64_set(&event->hw.period_left, left);
376 perf_event_update_userpage(event); 386 perf_event_update_userpage(event);
377 perf_enable(); 387 perf_pmu_enable(event->pmu);
378 local_irq_restore(flags); 388 local_irq_restore(flags);
379} 389}
380 390
381static struct pmu fsl_emb_pmu = { 391static void fsl_emb_pmu_stop(struct perf_event *event, int ef_flags)
382 .enable = fsl_emb_pmu_enable, 392{
383 .disable = fsl_emb_pmu_disable, 393 unsigned long flags;
384 .read = fsl_emb_pmu_read, 394
385 .unthrottle = fsl_emb_pmu_unthrottle, 395 if (event->hw.idx < 0 || !event->hw.sample_period)
386}; 396 return;
397
398 if (event->hw.state & PERF_HES_STOPPED)
399 return;
400
401 local_irq_save(flags);
402 perf_pmu_disable(event->pmu);
403
404 fsl_emb_pmu_read(event);
405 event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
406 write_pmc(event->hw.idx, 0);
407
408 perf_event_update_userpage(event);
409 perf_pmu_enable(event->pmu);
410 local_irq_restore(flags);
411}
387 412
388/* 413/*
389 * Release the PMU if this is the last perf_event. 414 * Release the PMU if this is the last perf_event.
@@ -428,7 +453,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp)
428 return 0; 453 return 0;
429} 454}
430 455
431const struct pmu *hw_perf_event_init(struct perf_event *event) 456static int fsl_emb_pmu_event_init(struct perf_event *event)
432{ 457{
433 u64 ev; 458 u64 ev;
434 struct perf_event *events[MAX_HWEVENTS]; 459 struct perf_event *events[MAX_HWEVENTS];
@@ -441,14 +466,14 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
441 case PERF_TYPE_HARDWARE: 466 case PERF_TYPE_HARDWARE:
442 ev = event->attr.config; 467 ev = event->attr.config;
443 if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) 468 if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
444 return ERR_PTR(-EOPNOTSUPP); 469 return -EOPNOTSUPP;
445 ev = ppmu->generic_events[ev]; 470 ev = ppmu->generic_events[ev];
446 break; 471 break;
447 472
448 case PERF_TYPE_HW_CACHE: 473 case PERF_TYPE_HW_CACHE:
449 err = hw_perf_cache_event(event->attr.config, &ev); 474 err = hw_perf_cache_event(event->attr.config, &ev);
450 if (err) 475 if (err)
451 return ERR_PTR(err); 476 return err;
452 break; 477 break;
453 478
454 case PERF_TYPE_RAW: 479 case PERF_TYPE_RAW:
@@ -456,12 +481,12 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
456 break; 481 break;
457 482
458 default: 483 default:
459 return ERR_PTR(-EINVAL); 484 return -ENOENT;
460 } 485 }
461 486
462 event->hw.config = ppmu->xlate_event(ev); 487 event->hw.config = ppmu->xlate_event(ev);
463 if (!(event->hw.config & FSL_EMB_EVENT_VALID)) 488 if (!(event->hw.config & FSL_EMB_EVENT_VALID))
464 return ERR_PTR(-EINVAL); 489 return -EINVAL;
465 490
466 /* 491 /*
467 * If this is in a group, check if it can go on with all the 492 * If this is in a group, check if it can go on with all the
@@ -473,7 +498,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
473 n = collect_events(event->group_leader, 498 n = collect_events(event->group_leader,
474 ppmu->n_counter - 1, events); 499 ppmu->n_counter - 1, events);
475 if (n < 0) 500 if (n < 0)
476 return ERR_PTR(-EINVAL); 501 return -EINVAL;
477 } 502 }
478 503
479 if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) { 504 if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) {
@@ -484,7 +509,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
484 } 509 }
485 510
486 if (num_restricted >= ppmu->n_restricted) 511 if (num_restricted >= ppmu->n_restricted)
487 return ERR_PTR(-EINVAL); 512 return -EINVAL;
488 } 513 }
489 514
490 event->hw.idx = -1; 515 event->hw.idx = -1;
@@ -497,7 +522,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
497 if (event->attr.exclude_kernel) 522 if (event->attr.exclude_kernel)
498 event->hw.config_base |= PMLCA_FCS; 523 event->hw.config_base |= PMLCA_FCS;
499 if (event->attr.exclude_idle) 524 if (event->attr.exclude_idle)
500 return ERR_PTR(-ENOTSUPP); 525 return -ENOTSUPP;
501 526
502 event->hw.last_period = event->hw.sample_period; 527 event->hw.last_period = event->hw.sample_period;
503 local64_set(&event->hw.period_left, event->hw.last_period); 528 local64_set(&event->hw.period_left, event->hw.last_period);
@@ -523,11 +548,20 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
523 } 548 }
524 event->destroy = hw_perf_event_destroy; 549 event->destroy = hw_perf_event_destroy;
525 550
526 if (err) 551 return err;
527 return ERR_PTR(err);
528 return &fsl_emb_pmu;
529} 552}
530 553
554static struct pmu fsl_emb_pmu = {
555 .pmu_enable = fsl_emb_pmu_enable,
556 .pmu_disable = fsl_emb_pmu_disable,
557 .event_init = fsl_emb_pmu_event_init,
558 .add = fsl_emb_pmu_add,
559 .del = fsl_emb_pmu_del,
560 .start = fsl_emb_pmu_start,
561 .stop = fsl_emb_pmu_stop,
562 .read = fsl_emb_pmu_read,
563};
564
531/* 565/*
532 * A counter has overflowed; update its count and record 566 * A counter has overflowed; update its count and record
533 * things if requested. Note that interrupts are hard-disabled 567 * things if requested. Note that interrupts are hard-disabled
@@ -540,6 +574,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
540 s64 prev, delta, left; 574 s64 prev, delta, left;
541 int record = 0; 575 int record = 0;
542 576
577 if (event->hw.state & PERF_HES_STOPPED) {
578 write_pmc(event->hw.idx, 0);
579 return;
580 }
581
543 /* we don't have to worry about interrupts here */ 582 /* we don't have to worry about interrupts here */
544 prev = local64_read(&event->hw.prev_count); 583 prev = local64_read(&event->hw.prev_count);
545 delta = (val - prev) & 0xfffffffful; 584 delta = (val - prev) & 0xfffffffful;
@@ -562,6 +601,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
562 val = 0x80000000LL - left; 601 val = 0x80000000LL - left;
563 } 602 }
564 603
604 write_pmc(event->hw.idx, val);
605 local64_set(&event->hw.prev_count, val);
606 local64_set(&event->hw.period_left, left);
607 perf_event_update_userpage(event);
608
565 /* 609 /*
566 * Finally record data if requested. 610 * Finally record data if requested.
567 */ 611 */
@@ -571,23 +615,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
571 perf_sample_data_init(&data, 0); 615 perf_sample_data_init(&data, 0);
572 data.period = event->hw.last_period; 616 data.period = event->hw.last_period;
573 617
574 if (perf_event_overflow(event, nmi, &data, regs)) { 618 if (perf_event_overflow(event, nmi, &data, regs))
575 /* 619 fsl_emb_pmu_stop(event, 0);
576 * Interrupts are coming too fast - throttle them
577 * by setting the event to 0, so it will be
578 * at least 2^30 cycles until the next interrupt
579 * (assuming each event counts at most 2 counts
580 * per cycle).
581 */
582 val = 0;
583 left = ~0ULL >> 1;
584 }
585 } 620 }
586
587 write_pmc(event->hw.idx, val);
588 local64_set(&event->hw.prev_count, val);
589 local64_set(&event->hw.period_left, left);
590 perf_event_update_userpage(event);
591} 621}
592 622
593static void perf_event_interrupt(struct pt_regs *regs) 623static void perf_event_interrupt(struct pt_regs *regs)
@@ -651,5 +681,7 @@ int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu)
651 pr_info("%s performance monitor hardware support registered\n", 681 pr_info("%s performance monitor hardware support registered\n",
652 pmu->name); 682 pmu->name);
653 683
684 perf_pmu_register(&fsl_emb_pmu);
685
654 return 0; 686 return 0;
655} 687}
diff --git a/arch/sh/kernel/perf_callchain.c b/arch/sh/kernel/perf_callchain.c
index a9dd3abde28e..d5ca1ef50fa9 100644
--- a/arch/sh/kernel/perf_callchain.c
+++ b/arch/sh/kernel/perf_callchain.c
@@ -14,11 +14,6 @@
14#include <asm/unwinder.h> 14#include <asm/unwinder.h>
15#include <asm/ptrace.h> 15#include <asm/ptrace.h>
16 16
17static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip)
18{
19 if (entry->nr < PERF_MAX_STACK_DEPTH)
20 entry->ip[entry->nr++] = ip;
21}
22 17
23static void callchain_warning(void *data, char *msg) 18static void callchain_warning(void *data, char *msg)
24{ 19{
@@ -39,7 +34,7 @@ static void callchain_address(void *data, unsigned long addr, int reliable)
39 struct perf_callchain_entry *entry = data; 34 struct perf_callchain_entry *entry = data;
40 35
41 if (reliable) 36 if (reliable)
42 callchain_store(entry, addr); 37 perf_callchain_store(entry, addr);
43} 38}
44 39
45static const struct stacktrace_ops callchain_ops = { 40static const struct stacktrace_ops callchain_ops = {
@@ -49,47 +44,10 @@ static const struct stacktrace_ops callchain_ops = {
49 .address = callchain_address, 44 .address = callchain_address,
50}; 45};
51 46
52static void 47void
53perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) 48perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
54{ 49{
55 callchain_store(entry, PERF_CONTEXT_KERNEL); 50 perf_callchain_store(entry, regs->pc);
56 callchain_store(entry, regs->pc);
57 51
58 unwind_stack(NULL, regs, NULL, &callchain_ops, entry); 52 unwind_stack(NULL, regs, NULL, &callchain_ops, entry);
59} 53}
60
61static void
62perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
63{
64 int is_user;
65
66 if (!regs)
67 return;
68
69 is_user = user_mode(regs);
70
71 if (is_user && current->state != TASK_RUNNING)
72 return;
73
74 /*
75 * Only the kernel side is implemented for now.
76 */
77 if (!is_user)
78 perf_callchain_kernel(regs, entry);
79}
80
81/*
82 * No need for separate IRQ and NMI entries.
83 */
84static DEFINE_PER_CPU(struct perf_callchain_entry, callchain);
85
86struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
87{
88 struct perf_callchain_entry *entry = &__get_cpu_var(callchain);
89
90 entry->nr = 0;
91
92 perf_do_callchain(regs, entry);
93
94 return entry;
95}
diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c
index 7a3dc3567258..036f7a9296fa 100644
--- a/arch/sh/kernel/perf_event.c
+++ b/arch/sh/kernel/perf_event.c
@@ -206,50 +206,80 @@ again:
206 local64_add(delta, &event->count); 206 local64_add(delta, &event->count);
207} 207}
208 208
209static void sh_pmu_disable(struct perf_event *event) 209static void sh_pmu_stop(struct perf_event *event, int flags)
210{ 210{
211 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 211 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
212 struct hw_perf_event *hwc = &event->hw; 212 struct hw_perf_event *hwc = &event->hw;
213 int idx = hwc->idx; 213 int idx = hwc->idx;
214 214
215 clear_bit(idx, cpuc->active_mask); 215 if (!(event->hw.state & PERF_HES_STOPPED)) {
216 sh_pmu->disable(hwc, idx); 216 sh_pmu->disable(hwc, idx);
217 cpuc->events[idx] = NULL;
218 event->hw.state |= PERF_HES_STOPPED;
219 }
220
221 if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) {
222 sh_perf_event_update(event, &event->hw, idx);
223 event->hw.state |= PERF_HES_UPTODATE;
224 }
225}
217 226
218 barrier(); 227static void sh_pmu_start(struct perf_event *event, int flags)
228{
229 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
230 struct hw_perf_event *hwc = &event->hw;
231 int idx = hwc->idx;
219 232
220 sh_perf_event_update(event, &event->hw, idx); 233 if (WARN_ON_ONCE(idx == -1))
234 return;
235
236 if (flags & PERF_EF_RELOAD)
237 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
238
239 cpuc->events[idx] = event;
240 event->hw.state = 0;
241 sh_pmu->enable(hwc, idx);
242}
243
244static void sh_pmu_del(struct perf_event *event, int flags)
245{
246 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
221 247
222 cpuc->events[idx] = NULL; 248 sh_pmu_stop(event, PERF_EF_UPDATE);
223 clear_bit(idx, cpuc->used_mask); 249 __clear_bit(event->hw.idx, cpuc->used_mask);
224 250
225 perf_event_update_userpage(event); 251 perf_event_update_userpage(event);
226} 252}
227 253
228static int sh_pmu_enable(struct perf_event *event) 254static int sh_pmu_add(struct perf_event *event, int flags)
229{ 255{
230 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 256 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
231 struct hw_perf_event *hwc = &event->hw; 257 struct hw_perf_event *hwc = &event->hw;
232 int idx = hwc->idx; 258 int idx = hwc->idx;
259 int ret = -EAGAIN;
260
261 perf_pmu_disable(event->pmu);
233 262
234 if (test_and_set_bit(idx, cpuc->used_mask)) { 263 if (__test_and_set_bit(idx, cpuc->used_mask)) {
235 idx = find_first_zero_bit(cpuc->used_mask, sh_pmu->num_events); 264 idx = find_first_zero_bit(cpuc->used_mask, sh_pmu->num_events);
236 if (idx == sh_pmu->num_events) 265 if (idx == sh_pmu->num_events)
237 return -EAGAIN; 266 goto out;
238 267
239 set_bit(idx, cpuc->used_mask); 268 __set_bit(idx, cpuc->used_mask);
240 hwc->idx = idx; 269 hwc->idx = idx;
241 } 270 }
242 271
243 sh_pmu->disable(hwc, idx); 272 sh_pmu->disable(hwc, idx);
244 273
245 cpuc->events[idx] = event; 274 event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
246 set_bit(idx, cpuc->active_mask); 275 if (flags & PERF_EF_START)
247 276 sh_pmu_start(event, PERF_EF_RELOAD);
248 sh_pmu->enable(hwc, idx);
249 277
250 perf_event_update_userpage(event); 278 perf_event_update_userpage(event);
251 279 ret = 0;
252 return 0; 280out:
281 perf_pmu_enable(event->pmu);
282 return ret;
253} 283}
254 284
255static void sh_pmu_read(struct perf_event *event) 285static void sh_pmu_read(struct perf_event *event)
@@ -257,24 +287,56 @@ static void sh_pmu_read(struct perf_event *event)
257 sh_perf_event_update(event, &event->hw, event->hw.idx); 287 sh_perf_event_update(event, &event->hw, event->hw.idx);
258} 288}
259 289
260static const struct pmu pmu = { 290static int sh_pmu_event_init(struct perf_event *event)
261 .enable = sh_pmu_enable,
262 .disable = sh_pmu_disable,
263 .read = sh_pmu_read,
264};
265
266const struct pmu *hw_perf_event_init(struct perf_event *event)
267{ 291{
268 int err = __hw_perf_event_init(event); 292 int err;
293
294 switch (event->attr.type) {
295 case PERF_TYPE_RAW:
296 case PERF_TYPE_HW_CACHE:
297 case PERF_TYPE_HARDWARE:
298 err = __hw_perf_event_init(event);
299 break;
300
301 default:
302 return -ENOENT;
303 }
304
269 if (unlikely(err)) { 305 if (unlikely(err)) {
270 if (event->destroy) 306 if (event->destroy)
271 event->destroy(event); 307 event->destroy(event);
272 return ERR_PTR(err);
273 } 308 }
274 309
275 return &pmu; 310 return err;
311}
312
313static void sh_pmu_enable(struct pmu *pmu)
314{
315 if (!sh_pmu_initialized())
316 return;
317
318 sh_pmu->enable_all();
319}
320
321static void sh_pmu_disable(struct pmu *pmu)
322{
323 if (!sh_pmu_initialized())
324 return;
325
326 sh_pmu->disable_all();
276} 327}
277 328
329static struct pmu pmu = {
330 .pmu_enable = sh_pmu_enable,
331 .pmu_disable = sh_pmu_disable,
332 .event_init = sh_pmu_event_init,
333 .add = sh_pmu_add,
334 .del = sh_pmu_del,
335 .start = sh_pmu_start,
336 .stop = sh_pmu_stop,
337 .read = sh_pmu_read,
338};
339
278static void sh_pmu_setup(int cpu) 340static void sh_pmu_setup(int cpu)
279{ 341{
280 struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); 342 struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
@@ -299,32 +361,17 @@ sh_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
299 return NOTIFY_OK; 361 return NOTIFY_OK;
300} 362}
301 363
302void hw_perf_enable(void) 364int __cpuinit register_sh_pmu(struct sh_pmu *_pmu)
303{
304 if (!sh_pmu_initialized())
305 return;
306
307 sh_pmu->enable_all();
308}
309
310void hw_perf_disable(void)
311{
312 if (!sh_pmu_initialized())
313 return;
314
315 sh_pmu->disable_all();
316}
317
318int __cpuinit register_sh_pmu(struct sh_pmu *pmu)
319{ 365{
320 if (sh_pmu) 366 if (sh_pmu)
321 return -EBUSY; 367 return -EBUSY;
322 sh_pmu = pmu; 368 sh_pmu = _pmu;
323 369
324 pr_info("Performance Events: %s support registered\n", pmu->name); 370 pr_info("Performance Events: %s support registered\n", _pmu->name);
325 371
326 WARN_ON(pmu->num_events > MAX_HWEVENTS); 372 WARN_ON(_pmu->num_events > MAX_HWEVENTS);
327 373
374 perf_pmu_register(&pmu);
328 perf_cpu_notifier(sh_pmu_notifier); 375 perf_cpu_notifier(sh_pmu_notifier);
329 return 0; 376 return 0;
330} 377}
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 491e9d6de191..9212cd42a832 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -30,6 +30,7 @@ config SPARC
30 select PERF_USE_VMALLOC 30 select PERF_USE_VMALLOC
31 select HAVE_DMA_ATTRS 31 select HAVE_DMA_ATTRS
32 select HAVE_DMA_API_DEBUG 32 select HAVE_DMA_API_DEBUG
33 select HAVE_ARCH_JUMP_LABEL
33 34
34config SPARC32 35config SPARC32
35 def_bool !64BIT 36 def_bool !64BIT
diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h
new file mode 100644
index 000000000000..62e66d7b2fb6
--- /dev/null
+++ b/arch/sparc/include/asm/jump_label.h
@@ -0,0 +1,32 @@
1#ifndef _ASM_SPARC_JUMP_LABEL_H
2#define _ASM_SPARC_JUMP_LABEL_H
3
4#ifdef __KERNEL__
5
6#include <linux/types.h>
7#include <asm/system.h>
8
9#define JUMP_LABEL_NOP_SIZE 4
10
11#define JUMP_LABEL(key, label) \
12 do { \
13 asm goto("1:\n\t" \
14 "nop\n\t" \
15 "nop\n\t" \
16 ".pushsection __jump_table, \"a\"\n\t"\
17 ".word 1b, %l[" #label "], %c0\n\t" \
18 ".popsection \n\t" \
19 : : "i" (key) : : label);\
20 } while (0)
21
22#endif /* __KERNEL__ */
23
24typedef u32 jump_label_t;
25
26struct jump_entry {
27 jump_label_t code;
28 jump_label_t target;
29 jump_label_t key;
30};
31
32#endif
diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile
index 0c2dc1f24a9a..599398fbbc7c 100644
--- a/arch/sparc/kernel/Makefile
+++ b/arch/sparc/kernel/Makefile
@@ -119,3 +119,5 @@ obj-$(CONFIG_COMPAT) += $(audit--y)
119 119
120pc--$(CONFIG_PERF_EVENTS) := perf_event.o 120pc--$(CONFIG_PERF_EVENTS) := perf_event.o
121obj-$(CONFIG_SPARC64) += $(pc--y) 121obj-$(CONFIG_SPARC64) += $(pc--y)
122
123obj-$(CONFIG_SPARC64) += jump_label.o
diff --git a/arch/sparc/kernel/jump_label.c b/arch/sparc/kernel/jump_label.c
new file mode 100644
index 000000000000..ea2dafc93d78
--- /dev/null
+++ b/arch/sparc/kernel/jump_label.c
@@ -0,0 +1,47 @@
1#include <linux/kernel.h>
2#include <linux/types.h>
3#include <linux/mutex.h>
4#include <linux/cpu.h>
5
6#include <linux/jump_label.h>
7#include <linux/memory.h>
8
9#ifdef HAVE_JUMP_LABEL
10
11void arch_jump_label_transform(struct jump_entry *entry,
12 enum jump_label_type type)
13{
14 u32 val;
15 u32 *insn = (u32 *) (unsigned long) entry->code;
16
17 if (type == JUMP_LABEL_ENABLE) {
18 s32 off = (s32)entry->target - (s32)entry->code;
19
20#ifdef CONFIG_SPARC64
21 /* ba,pt %xcc, . + (off << 2) */
22 val = 0x10680000 | ((u32) off >> 2);
23#else
24 /* ba . + (off << 2) */
25 val = 0x10800000 | ((u32) off >> 2);
26#endif
27 } else {
28 val = 0x01000000;
29 }
30
31 get_online_cpus();
32 mutex_lock(&text_mutex);
33 *insn = val;
34 flushi(insn);
35 mutex_unlock(&text_mutex);
36 put_online_cpus();
37}
38
39void arch_jump_label_text_poke_early(jump_label_t addr)
40{
41 u32 *insn_p = (u32 *) (unsigned long) addr;
42
43 *insn_p = 0x01000000;
44 flushi(insn_p);
45}
46
47#endif
diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c
index f848aadf54dc..ee3c7dde8d9f 100644
--- a/arch/sparc/kernel/module.c
+++ b/arch/sparc/kernel/module.c
@@ -18,6 +18,9 @@
18#include <asm/spitfire.h> 18#include <asm/spitfire.h>
19 19
20#ifdef CONFIG_SPARC64 20#ifdef CONFIG_SPARC64
21
22#include <linux/jump_label.h>
23
21static void *module_map(unsigned long size) 24static void *module_map(unsigned long size)
22{ 25{
23 struct vm_struct *area; 26 struct vm_struct *area;
@@ -227,6 +230,9 @@ int module_finalize(const Elf_Ehdr *hdr,
227 const Elf_Shdr *sechdrs, 230 const Elf_Shdr *sechdrs,
228 struct module *me) 231 struct module *me)
229{ 232{
233 /* make jump label nops */
234 jump_label_apply_nops(me);
235
230 /* Cheetah's I-cache is fully coherent. */ 236 /* Cheetah's I-cache is fully coherent. */
231 if (tlb_type == spitfire) { 237 if (tlb_type == spitfire) {
232 unsigned long va; 238 unsigned long va;
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 6318e622cfb0..0d6deb55a2ae 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -658,13 +658,16 @@ static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr)
658 658
659 enc = perf_event_get_enc(cpuc->events[i]); 659 enc = perf_event_get_enc(cpuc->events[i]);
660 pcr &= ~mask_for_index(idx); 660 pcr &= ~mask_for_index(idx);
661 pcr |= event_encoding(enc, idx); 661 if (hwc->state & PERF_HES_STOPPED)
662 pcr |= nop_for_index(idx);
663 else
664 pcr |= event_encoding(enc, idx);
662 } 665 }
663out: 666out:
664 return pcr; 667 return pcr;
665} 668}
666 669
667void hw_perf_enable(void) 670static void sparc_pmu_enable(struct pmu *pmu)
668{ 671{
669 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 672 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
670 u64 pcr; 673 u64 pcr;
@@ -691,7 +694,7 @@ void hw_perf_enable(void)
691 pcr_ops->write(cpuc->pcr); 694 pcr_ops->write(cpuc->pcr);
692} 695}
693 696
694void hw_perf_disable(void) 697static void sparc_pmu_disable(struct pmu *pmu)
695{ 698{
696 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 699 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
697 u64 val; 700 u64 val;
@@ -710,19 +713,65 @@ void hw_perf_disable(void)
710 pcr_ops->write(cpuc->pcr); 713 pcr_ops->write(cpuc->pcr);
711} 714}
712 715
713static void sparc_pmu_disable(struct perf_event *event) 716static int active_event_index(struct cpu_hw_events *cpuc,
717 struct perf_event *event)
718{
719 int i;
720
721 for (i = 0; i < cpuc->n_events; i++) {
722 if (cpuc->event[i] == event)
723 break;
724 }
725 BUG_ON(i == cpuc->n_events);
726 return cpuc->current_idx[i];
727}
728
729static void sparc_pmu_start(struct perf_event *event, int flags)
730{
731 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
732 int idx = active_event_index(cpuc, event);
733
734 if (flags & PERF_EF_RELOAD) {
735 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
736 sparc_perf_event_set_period(event, &event->hw, idx);
737 }
738
739 event->hw.state = 0;
740
741 sparc_pmu_enable_event(cpuc, &event->hw, idx);
742}
743
744static void sparc_pmu_stop(struct perf_event *event, int flags)
745{
746 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
747 int idx = active_event_index(cpuc, event);
748
749 if (!(event->hw.state & PERF_HES_STOPPED)) {
750 sparc_pmu_disable_event(cpuc, &event->hw, idx);
751 event->hw.state |= PERF_HES_STOPPED;
752 }
753
754 if (!(event->hw.state & PERF_HES_UPTODATE) && (flags & PERF_EF_UPDATE)) {
755 sparc_perf_event_update(event, &event->hw, idx);
756 event->hw.state |= PERF_HES_UPTODATE;
757 }
758}
759
760static void sparc_pmu_del(struct perf_event *event, int _flags)
714{ 761{
715 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 762 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
716 struct hw_perf_event *hwc = &event->hw;
717 unsigned long flags; 763 unsigned long flags;
718 int i; 764 int i;
719 765
720 local_irq_save(flags); 766 local_irq_save(flags);
721 perf_disable(); 767 perf_pmu_disable(event->pmu);
722 768
723 for (i = 0; i < cpuc->n_events; i++) { 769 for (i = 0; i < cpuc->n_events; i++) {
724 if (event == cpuc->event[i]) { 770 if (event == cpuc->event[i]) {
725 int idx = cpuc->current_idx[i]; 771 /* Absorb the final count and turn off the
772 * event.
773 */
774 sparc_pmu_stop(event, PERF_EF_UPDATE);
726 775
727 /* Shift remaining entries down into 776 /* Shift remaining entries down into
728 * the existing slot. 777 * the existing slot.
@@ -734,13 +783,6 @@ static void sparc_pmu_disable(struct perf_event *event)
734 cpuc->current_idx[i]; 783 cpuc->current_idx[i];
735 } 784 }
736 785
737 /* Absorb the final count and turn off the
738 * event.
739 */
740 sparc_pmu_disable_event(cpuc, hwc, idx);
741 barrier();
742 sparc_perf_event_update(event, hwc, idx);
743
744 perf_event_update_userpage(event); 786 perf_event_update_userpage(event);
745 787
746 cpuc->n_events--; 788 cpuc->n_events--;
@@ -748,23 +790,10 @@ static void sparc_pmu_disable(struct perf_event *event)
748 } 790 }
749 } 791 }
750 792
751 perf_enable(); 793 perf_pmu_enable(event->pmu);
752 local_irq_restore(flags); 794 local_irq_restore(flags);
753} 795}
754 796
755static int active_event_index(struct cpu_hw_events *cpuc,
756 struct perf_event *event)
757{
758 int i;
759
760 for (i = 0; i < cpuc->n_events; i++) {
761 if (cpuc->event[i] == event)
762 break;
763 }
764 BUG_ON(i == cpuc->n_events);
765 return cpuc->current_idx[i];
766}
767
768static void sparc_pmu_read(struct perf_event *event) 797static void sparc_pmu_read(struct perf_event *event)
769{ 798{
770 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 799 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -774,15 +803,6 @@ static void sparc_pmu_read(struct perf_event *event)
774 sparc_perf_event_update(event, hwc, idx); 803 sparc_perf_event_update(event, hwc, idx);
775} 804}
776 805
777static void sparc_pmu_unthrottle(struct perf_event *event)
778{
779 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
780 int idx = active_event_index(cpuc, event);
781 struct hw_perf_event *hwc = &event->hw;
782
783 sparc_pmu_enable_event(cpuc, hwc, idx);
784}
785
786static atomic_t active_events = ATOMIC_INIT(0); 806static atomic_t active_events = ATOMIC_INIT(0);
787static DEFINE_MUTEX(pmc_grab_mutex); 807static DEFINE_MUTEX(pmc_grab_mutex);
788 808
@@ -877,7 +897,7 @@ static int sparc_check_constraints(struct perf_event **evts,
877 if (!n_ev) 897 if (!n_ev)
878 return 0; 898 return 0;
879 899
880 if (n_ev > perf_max_events) 900 if (n_ev > MAX_HWEVENTS)
881 return -1; 901 return -1;
882 902
883 msk0 = perf_event_get_msk(events[0]); 903 msk0 = perf_event_get_msk(events[0]);
@@ -984,23 +1004,27 @@ static int collect_events(struct perf_event *group, int max_count,
984 return n; 1004 return n;
985} 1005}
986 1006
987static int sparc_pmu_enable(struct perf_event *event) 1007static int sparc_pmu_add(struct perf_event *event, int ef_flags)
988{ 1008{
989 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1009 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
990 int n0, ret = -EAGAIN; 1010 int n0, ret = -EAGAIN;
991 unsigned long flags; 1011 unsigned long flags;
992 1012
993 local_irq_save(flags); 1013 local_irq_save(flags);
994 perf_disable(); 1014 perf_pmu_disable(event->pmu);
995 1015
996 n0 = cpuc->n_events; 1016 n0 = cpuc->n_events;
997 if (n0 >= perf_max_events) 1017 if (n0 >= MAX_HWEVENTS)
998 goto out; 1018 goto out;
999 1019
1000 cpuc->event[n0] = event; 1020 cpuc->event[n0] = event;
1001 cpuc->events[n0] = event->hw.event_base; 1021 cpuc->events[n0] = event->hw.event_base;
1002 cpuc->current_idx[n0] = PIC_NO_INDEX; 1022 cpuc->current_idx[n0] = PIC_NO_INDEX;
1003 1023
1024 event->hw.state = PERF_HES_UPTODATE;
1025 if (!(ef_flags & PERF_EF_START))
1026 event->hw.state |= PERF_HES_STOPPED;
1027
1004 /* 1028 /*
1005 * If group events scheduling transaction was started, 1029 * If group events scheduling transaction was started,
1006 * skip the schedulability test here, it will be peformed 1030 * skip the schedulability test here, it will be peformed
@@ -1020,12 +1044,12 @@ nocheck:
1020 1044
1021 ret = 0; 1045 ret = 0;
1022out: 1046out:
1023 perf_enable(); 1047 perf_pmu_enable(event->pmu);
1024 local_irq_restore(flags); 1048 local_irq_restore(flags);
1025 return ret; 1049 return ret;
1026} 1050}
1027 1051
1028static int __hw_perf_event_init(struct perf_event *event) 1052static int sparc_pmu_event_init(struct perf_event *event)
1029{ 1053{
1030 struct perf_event_attr *attr = &event->attr; 1054 struct perf_event_attr *attr = &event->attr;
1031 struct perf_event *evts[MAX_HWEVENTS]; 1055 struct perf_event *evts[MAX_HWEVENTS];
@@ -1038,22 +1062,33 @@ static int __hw_perf_event_init(struct perf_event *event)
1038 if (atomic_read(&nmi_active) < 0) 1062 if (atomic_read(&nmi_active) < 0)
1039 return -ENODEV; 1063 return -ENODEV;
1040 1064
1041 pmap = NULL; 1065 switch (attr->type) {
1042 if (attr->type == PERF_TYPE_HARDWARE) { 1066 case PERF_TYPE_HARDWARE:
1043 if (attr->config >= sparc_pmu->max_events) 1067 if (attr->config >= sparc_pmu->max_events)
1044 return -EINVAL; 1068 return -EINVAL;
1045 pmap = sparc_pmu->event_map(attr->config); 1069 pmap = sparc_pmu->event_map(attr->config);
1046 } else if (attr->type == PERF_TYPE_HW_CACHE) { 1070 break;
1071
1072 case PERF_TYPE_HW_CACHE:
1047 pmap = sparc_map_cache_event(attr->config); 1073 pmap = sparc_map_cache_event(attr->config);
1048 if (IS_ERR(pmap)) 1074 if (IS_ERR(pmap))
1049 return PTR_ERR(pmap); 1075 return PTR_ERR(pmap);
1050 } else if (attr->type != PERF_TYPE_RAW) 1076 break;
1051 return -EOPNOTSUPP; 1077
1078 case PERF_TYPE_RAW:
1079 pmap = NULL;
1080 break;
1081
1082 default:
1083 return -ENOENT;
1084
1085 }
1052 1086
1053 if (pmap) { 1087 if (pmap) {
1054 hwc->event_base = perf_event_encode(pmap); 1088 hwc->event_base = perf_event_encode(pmap);
1055 } else { 1089 } else {
1056 /* User gives us "(encoding << 16) | pic_mask" for 1090 /*
1091 * User gives us "(encoding << 16) | pic_mask" for
1057 * PERF_TYPE_RAW events. 1092 * PERF_TYPE_RAW events.
1058 */ 1093 */
1059 hwc->event_base = attr->config; 1094 hwc->event_base = attr->config;
@@ -1071,7 +1106,7 @@ static int __hw_perf_event_init(struct perf_event *event)
1071 n = 0; 1106 n = 0;
1072 if (event->group_leader != event) { 1107 if (event->group_leader != event) {
1073 n = collect_events(event->group_leader, 1108 n = collect_events(event->group_leader,
1074 perf_max_events - 1, 1109 MAX_HWEVENTS - 1,
1075 evts, events, current_idx_dmy); 1110 evts, events, current_idx_dmy);
1076 if (n < 0) 1111 if (n < 0)
1077 return -EINVAL; 1112 return -EINVAL;
@@ -1107,10 +1142,11 @@ static int __hw_perf_event_init(struct perf_event *event)
1107 * Set the flag to make pmu::enable() not perform the 1142 * Set the flag to make pmu::enable() not perform the
1108 * schedulability test, it will be performed at commit time 1143 * schedulability test, it will be performed at commit time
1109 */ 1144 */
1110static void sparc_pmu_start_txn(const struct pmu *pmu) 1145static void sparc_pmu_start_txn(struct pmu *pmu)
1111{ 1146{
1112 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 1147 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
1113 1148
1149 perf_pmu_disable(pmu);
1114 cpuhw->group_flag |= PERF_EVENT_TXN; 1150 cpuhw->group_flag |= PERF_EVENT_TXN;
1115} 1151}
1116 1152
@@ -1119,11 +1155,12 @@ static void sparc_pmu_start_txn(const struct pmu *pmu)
1119 * Clear the flag and pmu::enable() will perform the 1155 * Clear the flag and pmu::enable() will perform the
1120 * schedulability test. 1156 * schedulability test.
1121 */ 1157 */
1122static void sparc_pmu_cancel_txn(const struct pmu *pmu) 1158static void sparc_pmu_cancel_txn(struct pmu *pmu)
1123{ 1159{
1124 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 1160 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
1125 1161
1126 cpuhw->group_flag &= ~PERF_EVENT_TXN; 1162 cpuhw->group_flag &= ~PERF_EVENT_TXN;
1163 perf_pmu_enable(pmu);
1127} 1164}
1128 1165
1129/* 1166/*
@@ -1131,7 +1168,7 @@ static void sparc_pmu_cancel_txn(const struct pmu *pmu)
1131 * Perform the group schedulability test as a whole 1168 * Perform the group schedulability test as a whole
1132 * Return 0 if success 1169 * Return 0 if success
1133 */ 1170 */
1134static int sparc_pmu_commit_txn(const struct pmu *pmu) 1171static int sparc_pmu_commit_txn(struct pmu *pmu)
1135{ 1172{
1136 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1173 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1137 int n; 1174 int n;
@@ -1147,28 +1184,24 @@ static int sparc_pmu_commit_txn(const struct pmu *pmu)
1147 return -EAGAIN; 1184 return -EAGAIN;
1148 1185
1149 cpuc->group_flag &= ~PERF_EVENT_TXN; 1186 cpuc->group_flag &= ~PERF_EVENT_TXN;
1187 perf_pmu_enable(pmu);
1150 return 0; 1188 return 0;
1151} 1189}
1152 1190
1153static const struct pmu pmu = { 1191static struct pmu pmu = {
1154 .enable = sparc_pmu_enable, 1192 .pmu_enable = sparc_pmu_enable,
1155 .disable = sparc_pmu_disable, 1193 .pmu_disable = sparc_pmu_disable,
1194 .event_init = sparc_pmu_event_init,
1195 .add = sparc_pmu_add,
1196 .del = sparc_pmu_del,
1197 .start = sparc_pmu_start,
1198 .stop = sparc_pmu_stop,
1156 .read = sparc_pmu_read, 1199 .read = sparc_pmu_read,
1157 .unthrottle = sparc_pmu_unthrottle,
1158 .start_txn = sparc_pmu_start_txn, 1200 .start_txn = sparc_pmu_start_txn,
1159 .cancel_txn = sparc_pmu_cancel_txn, 1201 .cancel_txn = sparc_pmu_cancel_txn,
1160 .commit_txn = sparc_pmu_commit_txn, 1202 .commit_txn = sparc_pmu_commit_txn,
1161}; 1203};
1162 1204
1163const struct pmu *hw_perf_event_init(struct perf_event *event)
1164{
1165 int err = __hw_perf_event_init(event);
1166
1167 if (err)
1168 return ERR_PTR(err);
1169 return &pmu;
1170}
1171
1172void perf_event_print_debug(void) 1205void perf_event_print_debug(void)
1173{ 1206{
1174 unsigned long flags; 1207 unsigned long flags;
@@ -1244,7 +1277,7 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
1244 continue; 1277 continue;
1245 1278
1246 if (perf_event_overflow(event, 1, &data, regs)) 1279 if (perf_event_overflow(event, 1, &data, regs))
1247 sparc_pmu_disable_event(cpuc, hwc, idx); 1280 sparc_pmu_stop(event, 0);
1248 } 1281 }
1249 1282
1250 return NOTIFY_STOP; 1283 return NOTIFY_STOP;
@@ -1285,28 +1318,21 @@ void __init init_hw_perf_events(void)
1285 1318
1286 pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type); 1319 pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type);
1287 1320
1288 /* All sparc64 PMUs currently have 2 events. */ 1321 perf_pmu_register(&pmu);
1289 perf_max_events = 2;
1290
1291 register_die_notifier(&perf_event_nmi_notifier); 1322 register_die_notifier(&perf_event_nmi_notifier);
1292} 1323}
1293 1324
1294static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip) 1325void perf_callchain_kernel(struct perf_callchain_entry *entry,
1295{ 1326 struct pt_regs *regs)
1296 if (entry->nr < PERF_MAX_STACK_DEPTH)
1297 entry->ip[entry->nr++] = ip;
1298}
1299
1300static void perf_callchain_kernel(struct pt_regs *regs,
1301 struct perf_callchain_entry *entry)
1302{ 1327{
1303 unsigned long ksp, fp; 1328 unsigned long ksp, fp;
1304#ifdef CONFIG_FUNCTION_GRAPH_TRACER 1329#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1305 int graph = 0; 1330 int graph = 0;
1306#endif 1331#endif
1307 1332
1308 callchain_store(entry, PERF_CONTEXT_KERNEL); 1333 stack_trace_flush();
1309 callchain_store(entry, regs->tpc); 1334
1335 perf_callchain_store(entry, regs->tpc);
1310 1336
1311 ksp = regs->u_regs[UREG_I6]; 1337 ksp = regs->u_regs[UREG_I6];
1312 fp = ksp + STACK_BIAS; 1338 fp = ksp + STACK_BIAS;
@@ -1330,13 +1356,13 @@ static void perf_callchain_kernel(struct pt_regs *regs,
1330 pc = sf->callers_pc; 1356 pc = sf->callers_pc;
1331 fp = (unsigned long)sf->fp + STACK_BIAS; 1357 fp = (unsigned long)sf->fp + STACK_BIAS;
1332 } 1358 }
1333 callchain_store(entry, pc); 1359 perf_callchain_store(entry, pc);
1334#ifdef CONFIG_FUNCTION_GRAPH_TRACER 1360#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1335 if ((pc + 8UL) == (unsigned long) &return_to_handler) { 1361 if ((pc + 8UL) == (unsigned long) &return_to_handler) {
1336 int index = current->curr_ret_stack; 1362 int index = current->curr_ret_stack;
1337 if (current->ret_stack && index >= graph) { 1363 if (current->ret_stack && index >= graph) {
1338 pc = current->ret_stack[index - graph].ret; 1364 pc = current->ret_stack[index - graph].ret;
1339 callchain_store(entry, pc); 1365 perf_callchain_store(entry, pc);
1340 graph++; 1366 graph++;
1341 } 1367 }
1342 } 1368 }
@@ -1344,13 +1370,12 @@ static void perf_callchain_kernel(struct pt_regs *regs,
1344 } while (entry->nr < PERF_MAX_STACK_DEPTH); 1370 } while (entry->nr < PERF_MAX_STACK_DEPTH);
1345} 1371}
1346 1372
1347static void perf_callchain_user_64(struct pt_regs *regs, 1373static void perf_callchain_user_64(struct perf_callchain_entry *entry,
1348 struct perf_callchain_entry *entry) 1374 struct pt_regs *regs)
1349{ 1375{
1350 unsigned long ufp; 1376 unsigned long ufp;
1351 1377
1352 callchain_store(entry, PERF_CONTEXT_USER); 1378 perf_callchain_store(entry, regs->tpc);
1353 callchain_store(entry, regs->tpc);
1354 1379
1355 ufp = regs->u_regs[UREG_I6] + STACK_BIAS; 1380 ufp = regs->u_regs[UREG_I6] + STACK_BIAS;
1356 do { 1381 do {
@@ -1363,17 +1388,16 @@ static void perf_callchain_user_64(struct pt_regs *regs,
1363 1388
1364 pc = sf.callers_pc; 1389 pc = sf.callers_pc;
1365 ufp = (unsigned long)sf.fp + STACK_BIAS; 1390 ufp = (unsigned long)sf.fp + STACK_BIAS;
1366 callchain_store(entry, pc); 1391 perf_callchain_store(entry, pc);
1367 } while (entry->nr < PERF_MAX_STACK_DEPTH); 1392 } while (entry->nr < PERF_MAX_STACK_DEPTH);
1368} 1393}
1369 1394
1370static void perf_callchain_user_32(struct pt_regs *regs, 1395static void perf_callchain_user_32(struct perf_callchain_entry *entry,
1371 struct perf_callchain_entry *entry) 1396 struct pt_regs *regs)
1372{ 1397{
1373 unsigned long ufp; 1398 unsigned long ufp;
1374 1399
1375 callchain_store(entry, PERF_CONTEXT_USER); 1400 perf_callchain_store(entry, regs->tpc);
1376 callchain_store(entry, regs->tpc);
1377 1401
1378 ufp = regs->u_regs[UREG_I6] & 0xffffffffUL; 1402 ufp = regs->u_regs[UREG_I6] & 0xffffffffUL;
1379 do { 1403 do {
@@ -1386,34 +1410,16 @@ static void perf_callchain_user_32(struct pt_regs *regs,
1386 1410
1387 pc = sf.callers_pc; 1411 pc = sf.callers_pc;
1388 ufp = (unsigned long)sf.fp; 1412 ufp = (unsigned long)sf.fp;
1389 callchain_store(entry, pc); 1413 perf_callchain_store(entry, pc);
1390 } while (entry->nr < PERF_MAX_STACK_DEPTH); 1414 } while (entry->nr < PERF_MAX_STACK_DEPTH);
1391} 1415}
1392 1416
1393/* Like powerpc we can't get PMU interrupts within the PMU handler, 1417void
1394 * so no need for separate NMI and IRQ chains as on x86. 1418perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
1395 */
1396static DEFINE_PER_CPU(struct perf_callchain_entry, callchain);
1397
1398struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1399{ 1419{
1400 struct perf_callchain_entry *entry = &__get_cpu_var(callchain); 1420 flushw_user();
1401 1421 if (test_thread_flag(TIF_32BIT))
1402 entry->nr = 0; 1422 perf_callchain_user_32(entry, regs);
1403 if (!user_mode(regs)) { 1423 else
1404 stack_trace_flush(); 1424 perf_callchain_user_64(entry, regs);
1405 perf_callchain_kernel(regs, entry);
1406 if (current->mm)
1407 regs = task_pt_regs(current);
1408 else
1409 regs = NULL;
1410 }
1411 if (regs) {
1412 flushw_user();
1413 if (test_thread_flag(TIF_32BIT))
1414 perf_callchain_user_32(regs, entry);
1415 else
1416 perf_callchain_user_64(regs, entry);
1417 }
1418 return entry;
1419} 1425}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cea0cd9a316f..b431a0824a93 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -59,6 +59,7 @@ config X86
59 select ANON_INODES 59 select ANON_INODES
60 select HAVE_ARCH_KMEMCHECK 60 select HAVE_ARCH_KMEMCHECK
61 select HAVE_USER_RETURN_NOTIFIER 61 select HAVE_USER_RETURN_NOTIFIER
62 select HAVE_ARCH_JUMP_LABEL
62 63
63config INSTRUCTION_DECODER 64config INSTRUCTION_DECODER
64 def_bool (KPROBES || PERF_EVENTS) 65 def_bool (KPROBES || PERF_EVENTS)
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index bc6abb7bc7ee..76561d20ea2f 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -4,6 +4,7 @@
4#include <linux/types.h> 4#include <linux/types.h>
5#include <linux/stddef.h> 5#include <linux/stddef.h>
6#include <linux/stringify.h> 6#include <linux/stringify.h>
7#include <linux/jump_label.h>
7#include <asm/asm.h> 8#include <asm/asm.h>
8 9
9/* 10/*
@@ -160,6 +161,8 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
160#define __parainstructions_end NULL 161#define __parainstructions_end NULL
161#endif 162#endif
162 163
164extern void *text_poke_early(void *addr, const void *opcode, size_t len);
165
163/* 166/*
164 * Clear and restore the kernel write-protection flag on the local CPU. 167 * Clear and restore the kernel write-protection flag on the local CPU.
165 * Allows the kernel to edit read-only pages. 168 * Allows the kernel to edit read-only pages.
@@ -180,4 +183,12 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
180extern void *text_poke(void *addr, const void *opcode, size_t len); 183extern void *text_poke(void *addr, const void *opcode, size_t len);
181extern void *text_poke_smp(void *addr, const void *opcode, size_t len); 184extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
182 185
186#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
187#define IDEAL_NOP_SIZE_5 5
188extern unsigned char ideal_nop5[IDEAL_NOP_SIZE_5];
189extern void arch_init_ideal_nop5(void);
190#else
191static inline void arch_init_ideal_nop5(void) {}
192#endif
193
183#endif /* _ASM_X86_ALTERNATIVE_H */ 194#endif /* _ASM_X86_ALTERNATIVE_H */
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
new file mode 100644
index 000000000000..f52d42e80585
--- /dev/null
+++ b/arch/x86/include/asm/jump_label.h
@@ -0,0 +1,37 @@
1#ifndef _ASM_X86_JUMP_LABEL_H
2#define _ASM_X86_JUMP_LABEL_H
3
4#ifdef __KERNEL__
5
6#include <linux/types.h>
7#include <asm/nops.h>
8
9#define JUMP_LABEL_NOP_SIZE 5
10
11# define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t"
12
13# define JUMP_LABEL(key, label) \
14 do { \
15 asm goto("1:" \
16 JUMP_LABEL_INITIAL_NOP \
17 ".pushsection __jump_table, \"a\" \n\t"\
18 _ASM_PTR "1b, %l[" #label "], %c0 \n\t" \
19 ".popsection \n\t" \
20 : : "i" (key) : : label); \
21 } while (0)
22
23#endif /* __KERNEL__ */
24
25#ifdef CONFIG_X86_64
26typedef u64 jump_label_t;
27#else
28typedef u32 jump_label_t;
29#endif
30
31struct jump_entry {
32 jump_label_t code;
33 jump_label_t target;
34 jump_label_t key;
35};
36
37#endif
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index def500776b16..a70cd216be5d 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -36,19 +36,6 @@
36#define P4_ESCR_EMASK(v) ((v) << P4_ESCR_EVENTMASK_SHIFT) 36#define P4_ESCR_EMASK(v) ((v) << P4_ESCR_EVENTMASK_SHIFT)
37#define P4_ESCR_TAG(v) ((v) << P4_ESCR_TAG_SHIFT) 37#define P4_ESCR_TAG(v) ((v) << P4_ESCR_TAG_SHIFT)
38 38
39/* Non HT mask */
40#define P4_ESCR_MASK \
41 (P4_ESCR_EVENT_MASK | \
42 P4_ESCR_EVENTMASK_MASK | \
43 P4_ESCR_TAG_MASK | \
44 P4_ESCR_TAG_ENABLE | \
45 P4_ESCR_T0_OS | \
46 P4_ESCR_T0_USR)
47
48/* HT mask */
49#define P4_ESCR_MASK_HT \
50 (P4_ESCR_MASK | P4_ESCR_T1_OS | P4_ESCR_T1_USR)
51
52#define P4_CCCR_OVF 0x80000000U 39#define P4_CCCR_OVF 0x80000000U
53#define P4_CCCR_CASCADE 0x40000000U 40#define P4_CCCR_CASCADE 0x40000000U
54#define P4_CCCR_OVF_PMI_T0 0x04000000U 41#define P4_CCCR_OVF_PMI_T0 0x04000000U
@@ -70,23 +57,6 @@
70#define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT) 57#define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT)
71#define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT) 58#define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT)
72 59
73/* Non HT mask */
74#define P4_CCCR_MASK \
75 (P4_CCCR_OVF | \
76 P4_CCCR_CASCADE | \
77 P4_CCCR_OVF_PMI_T0 | \
78 P4_CCCR_FORCE_OVF | \
79 P4_CCCR_EDGE | \
80 P4_CCCR_THRESHOLD_MASK | \
81 P4_CCCR_COMPLEMENT | \
82 P4_CCCR_COMPARE | \
83 P4_CCCR_ESCR_SELECT_MASK | \
84 P4_CCCR_ENABLE)
85
86/* HT mask */
87#define P4_CCCR_MASK_HT \
88 (P4_CCCR_MASK | P4_CCCR_OVF_PMI_T1 | P4_CCCR_THREAD_ANY)
89
90#define P4_GEN_ESCR_EMASK(class, name, bit) \ 60#define P4_GEN_ESCR_EMASK(class, name, bit) \
91 class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT) 61 class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT)
92#define P4_ESCR_EMASK_BIT(class, name) class##__##name 62#define P4_ESCR_EMASK_BIT(class, name) class##__##name
@@ -127,6 +97,28 @@
127#define P4_CONFIG_HT_SHIFT 63 97#define P4_CONFIG_HT_SHIFT 63
128#define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) 98#define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT)
129 99
100/*
101 * The bits we allow to pass for RAW events
102 */
103#define P4_CONFIG_MASK_ESCR \
104 P4_ESCR_EVENT_MASK | \
105 P4_ESCR_EVENTMASK_MASK | \
106 P4_ESCR_TAG_MASK | \
107 P4_ESCR_TAG_ENABLE
108
109#define P4_CONFIG_MASK_CCCR \
110 P4_CCCR_EDGE | \
111 P4_CCCR_THRESHOLD_MASK | \
112 P4_CCCR_COMPLEMENT | \
113 P4_CCCR_COMPARE | \
114 P4_CCCR_THREAD_ANY | \
115 P4_CCCR_RESERVED
116
117/* some dangerous bits are reserved for kernel internals */
118#define P4_CONFIG_MASK \
119 (p4_config_pack_escr(P4_CONFIG_MASK_ESCR)) | \
120 (p4_config_pack_cccr(P4_CONFIG_MASK_CCCR))
121
130static inline bool p4_is_event_cascaded(u64 config) 122static inline bool p4_is_event_cascaded(u64 config)
131{ 123{
132 u32 cccr = p4_config_unpack_cccr(config); 124 u32 cccr = p4_config_unpack_cccr(config);
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index fedf32a8c3ec..9d3f485e5dd0 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -34,7 +34,7 @@ GCOV_PROFILE_paravirt.o := n
34obj-y := process_$(BITS).o signal.o entry_$(BITS).o 34obj-y := process_$(BITS).o signal.o entry_$(BITS).o
35obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o 35obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
36obj-y += time.o ioport.o ldt.o dumpstack.o 36obj-y += time.o ioport.o ldt.o dumpstack.o
37obj-y += setup.o x86_init.o i8259.o irqinit.o 37obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o
38obj-$(CONFIG_X86_VISWS) += visws_quirks.o 38obj-$(CONFIG_X86_VISWS) += visws_quirks.o
39obj-$(CONFIG_X86_32) += probe_roms_32.o 39obj-$(CONFIG_X86_32) += probe_roms_32.o
40obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o 40obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index f65ab8b014c4..cb0e6d385f6d 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -195,7 +195,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len)
195 195
196extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; 196extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
197extern s32 __smp_locks[], __smp_locks_end[]; 197extern s32 __smp_locks[], __smp_locks_end[];
198static void *text_poke_early(void *addr, const void *opcode, size_t len); 198void *text_poke_early(void *addr, const void *opcode, size_t len);
199 199
200/* Replace instructions with better alternatives for this CPU type. 200/* Replace instructions with better alternatives for this CPU type.
201 This runs before SMP is initialized to avoid SMP problems with 201 This runs before SMP is initialized to avoid SMP problems with
@@ -522,7 +522,7 @@ void __init alternative_instructions(void)
522 * instructions. And on the local CPU you need to be protected again NMI or MCE 522 * instructions. And on the local CPU you need to be protected again NMI or MCE
523 * handlers seeing an inconsistent instruction while you patch. 523 * handlers seeing an inconsistent instruction while you patch.
524 */ 524 */
525static void *__init_or_module text_poke_early(void *addr, const void *opcode, 525void *__init_or_module text_poke_early(void *addr, const void *opcode,
526 size_t len) 526 size_t len)
527{ 527{
528 unsigned long flags; 528 unsigned long flags;
@@ -641,3 +641,67 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
641 return addr; 641 return addr;
642} 642}
643 643
644#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
645
646unsigned char ideal_nop5[IDEAL_NOP_SIZE_5];
647
648void __init arch_init_ideal_nop5(void)
649{
650 extern const unsigned char ftrace_test_p6nop[];
651 extern const unsigned char ftrace_test_nop5[];
652 extern const unsigned char ftrace_test_jmp[];
653 int faulted = 0;
654
655 /*
656 * There is no good nop for all x86 archs.
657 * We will default to using the P6_NOP5, but first we
658 * will test to make sure that the nop will actually
659 * work on this CPU. If it faults, we will then
660 * go to a lesser efficient 5 byte nop. If that fails
661 * we then just use a jmp as our nop. This isn't the most
662 * efficient nop, but we can not use a multi part nop
663 * since we would then risk being preempted in the middle
664 * of that nop, and if we enabled tracing then, it might
665 * cause a system crash.
666 *
667 * TODO: check the cpuid to determine the best nop.
668 */
669 asm volatile (
670 "ftrace_test_jmp:"
671 "jmp ftrace_test_p6nop\n"
672 "nop\n"
673 "nop\n"
674 "nop\n" /* 2 byte jmp + 3 bytes */
675 "ftrace_test_p6nop:"
676 P6_NOP5
677 "jmp 1f\n"
678 "ftrace_test_nop5:"
679 ".byte 0x66,0x66,0x66,0x66,0x90\n"
680 "1:"
681 ".section .fixup, \"ax\"\n"
682 "2: movl $1, %0\n"
683 " jmp ftrace_test_nop5\n"
684 "3: movl $2, %0\n"
685 " jmp 1b\n"
686 ".previous\n"
687 _ASM_EXTABLE(ftrace_test_p6nop, 2b)
688 _ASM_EXTABLE(ftrace_test_nop5, 3b)
689 : "=r"(faulted) : "0" (faulted));
690
691 switch (faulted) {
692 case 0:
693 pr_info("converting mcount calls to 0f 1f 44 00 00\n");
694 memcpy(ideal_nop5, ftrace_test_p6nop, IDEAL_NOP_SIZE_5);
695 break;
696 case 1:
697 pr_info("converting mcount calls to 66 66 66 66 90\n");
698 memcpy(ideal_nop5, ftrace_test_nop5, IDEAL_NOP_SIZE_5);
699 break;
700 case 2:
701 pr_info("converting mcount calls to jmp . + 5\n");
702 memcpy(ideal_nop5, ftrace_test_jmp, IDEAL_NOP_SIZE_5);
703 break;
704 }
705
706}
707#endif
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 03a5b0385ad6..e2513f26ba8b 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -531,7 +531,7 @@ static int x86_pmu_hw_config(struct perf_event *event)
531/* 531/*
532 * Setup the hardware configuration for a given attr_type 532 * Setup the hardware configuration for a given attr_type
533 */ 533 */
534static int __hw_perf_event_init(struct perf_event *event) 534static int __x86_pmu_event_init(struct perf_event *event)
535{ 535{
536 int err; 536 int err;
537 537
@@ -584,7 +584,7 @@ static void x86_pmu_disable_all(void)
584 } 584 }
585} 585}
586 586
587void hw_perf_disable(void) 587static void x86_pmu_disable(struct pmu *pmu)
588{ 588{
589 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 589 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
590 590
@@ -619,7 +619,7 @@ static void x86_pmu_enable_all(int added)
619 } 619 }
620} 620}
621 621
622static const struct pmu pmu; 622static struct pmu pmu;
623 623
624static inline int is_x86_event(struct perf_event *event) 624static inline int is_x86_event(struct perf_event *event)
625{ 625{
@@ -801,10 +801,10 @@ static inline int match_prev_assignment(struct hw_perf_event *hwc,
801 hwc->last_tag == cpuc->tags[i]; 801 hwc->last_tag == cpuc->tags[i];
802} 802}
803 803
804static int x86_pmu_start(struct perf_event *event); 804static void x86_pmu_start(struct perf_event *event, int flags);
805static void x86_pmu_stop(struct perf_event *event); 805static void x86_pmu_stop(struct perf_event *event, int flags);
806 806
807void hw_perf_enable(void) 807static void x86_pmu_enable(struct pmu *pmu)
808{ 808{
809 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 809 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
810 struct perf_event *event; 810 struct perf_event *event;
@@ -840,7 +840,14 @@ void hw_perf_enable(void)
840 match_prev_assignment(hwc, cpuc, i)) 840 match_prev_assignment(hwc, cpuc, i))
841 continue; 841 continue;
842 842
843 x86_pmu_stop(event); 843 /*
844 * Ensure we don't accidentally enable a stopped
845 * counter simply because we rescheduled.
846 */
847 if (hwc->state & PERF_HES_STOPPED)
848 hwc->state |= PERF_HES_ARCH;
849
850 x86_pmu_stop(event, PERF_EF_UPDATE);
844 } 851 }
845 852
846 for (i = 0; i < cpuc->n_events; i++) { 853 for (i = 0; i < cpuc->n_events; i++) {
@@ -852,7 +859,10 @@ void hw_perf_enable(void)
852 else if (i < n_running) 859 else if (i < n_running)
853 continue; 860 continue;
854 861
855 x86_pmu_start(event); 862 if (hwc->state & PERF_HES_ARCH)
863 continue;
864
865 x86_pmu_start(event, PERF_EF_RELOAD);
856 } 866 }
857 cpuc->n_added = 0; 867 cpuc->n_added = 0;
858 perf_events_lapic_init(); 868 perf_events_lapic_init();
@@ -953,15 +963,12 @@ static void x86_pmu_enable_event(struct perf_event *event)
953} 963}
954 964
955/* 965/*
956 * activate a single event 966 * Add a single event to the PMU.
957 * 967 *
958 * The event is added to the group of enabled events 968 * The event is added to the group of enabled events
959 * but only if it can be scehduled with existing events. 969 * but only if it can be scehduled with existing events.
960 *
961 * Called with PMU disabled. If successful and return value 1,
962 * then guaranteed to call perf_enable() and hw_perf_enable()
963 */ 970 */
964static int x86_pmu_enable(struct perf_event *event) 971static int x86_pmu_add(struct perf_event *event, int flags)
965{ 972{
966 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 973 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
967 struct hw_perf_event *hwc; 974 struct hw_perf_event *hwc;
@@ -970,58 +977,67 @@ static int x86_pmu_enable(struct perf_event *event)
970 977
971 hwc = &event->hw; 978 hwc = &event->hw;
972 979
980 perf_pmu_disable(event->pmu);
973 n0 = cpuc->n_events; 981 n0 = cpuc->n_events;
974 n = collect_events(cpuc, event, false); 982 ret = n = collect_events(cpuc, event, false);
975 if (n < 0) 983 if (ret < 0)
976 return n; 984 goto out;
985
986 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
987 if (!(flags & PERF_EF_START))
988 hwc->state |= PERF_HES_ARCH;
977 989
978 /* 990 /*
979 * If group events scheduling transaction was started, 991 * If group events scheduling transaction was started,
980 * skip the schedulability test here, it will be peformed 992 * skip the schedulability test here, it will be peformed
981 * at commit time(->commit_txn) as a whole 993 * at commit time (->commit_txn) as a whole
982 */ 994 */
983 if (cpuc->group_flag & PERF_EVENT_TXN) 995 if (cpuc->group_flag & PERF_EVENT_TXN)
984 goto out; 996 goto done_collect;
985 997
986 ret = x86_pmu.schedule_events(cpuc, n, assign); 998 ret = x86_pmu.schedule_events(cpuc, n, assign);
987 if (ret) 999 if (ret)
988 return ret; 1000 goto out;
989 /* 1001 /*
990 * copy new assignment, now we know it is possible 1002 * copy new assignment, now we know it is possible
991 * will be used by hw_perf_enable() 1003 * will be used by hw_perf_enable()
992 */ 1004 */
993 memcpy(cpuc->assign, assign, n*sizeof(int)); 1005 memcpy(cpuc->assign, assign, n*sizeof(int));
994 1006
995out: 1007done_collect:
996 cpuc->n_events = n; 1008 cpuc->n_events = n;
997 cpuc->n_added += n - n0; 1009 cpuc->n_added += n - n0;
998 cpuc->n_txn += n - n0; 1010 cpuc->n_txn += n - n0;
999 1011
1000 return 0; 1012 ret = 0;
1013out:
1014 perf_pmu_enable(event->pmu);
1015 return ret;
1001} 1016}
1002 1017
1003static int x86_pmu_start(struct perf_event *event) 1018static void x86_pmu_start(struct perf_event *event, int flags)
1004{ 1019{
1005 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1020 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1006 int idx = event->hw.idx; 1021 int idx = event->hw.idx;
1007 1022
1008 if (idx == -1) 1023 if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
1009 return -EAGAIN; 1024 return;
1025
1026 if (WARN_ON_ONCE(idx == -1))
1027 return;
1028
1029 if (flags & PERF_EF_RELOAD) {
1030 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
1031 x86_perf_event_set_period(event);
1032 }
1033
1034 event->hw.state = 0;
1010 1035
1011 x86_perf_event_set_period(event);
1012 cpuc->events[idx] = event; 1036 cpuc->events[idx] = event;
1013 __set_bit(idx, cpuc->active_mask); 1037 __set_bit(idx, cpuc->active_mask);
1014 __set_bit(idx, cpuc->running); 1038 __set_bit(idx, cpuc->running);
1015 x86_pmu.enable(event); 1039 x86_pmu.enable(event);
1016 perf_event_update_userpage(event); 1040 perf_event_update_userpage(event);
1017
1018 return 0;
1019}
1020
1021static void x86_pmu_unthrottle(struct perf_event *event)
1022{
1023 int ret = x86_pmu_start(event);
1024 WARN_ON_ONCE(ret);
1025} 1041}
1026 1042
1027void perf_event_print_debug(void) 1043void perf_event_print_debug(void)
@@ -1078,27 +1094,29 @@ void perf_event_print_debug(void)
1078 local_irq_restore(flags); 1094 local_irq_restore(flags);
1079} 1095}
1080 1096
1081static void x86_pmu_stop(struct perf_event *event) 1097static void x86_pmu_stop(struct perf_event *event, int flags)
1082{ 1098{
1083 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1099 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1084 struct hw_perf_event *hwc = &event->hw; 1100 struct hw_perf_event *hwc = &event->hw;
1085 int idx = hwc->idx;
1086 1101
1087 if (!__test_and_clear_bit(idx, cpuc->active_mask)) 1102 if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) {
1088 return; 1103 x86_pmu.disable(event);
1089 1104 cpuc->events[hwc->idx] = NULL;
1090 x86_pmu.disable(event); 1105 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
1091 1106 hwc->state |= PERF_HES_STOPPED;
1092 /* 1107 }
1093 * Drain the remaining delta count out of a event
1094 * that we are disabling:
1095 */
1096 x86_perf_event_update(event);
1097 1108
1098 cpuc->events[idx] = NULL; 1109 if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
1110 /*
1111 * Drain the remaining delta count out of a event
1112 * that we are disabling:
1113 */
1114 x86_perf_event_update(event);
1115 hwc->state |= PERF_HES_UPTODATE;
1116 }
1099} 1117}
1100 1118
1101static void x86_pmu_disable(struct perf_event *event) 1119static void x86_pmu_del(struct perf_event *event, int flags)
1102{ 1120{
1103 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1121 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1104 int i; 1122 int i;
@@ -1111,7 +1129,7 @@ static void x86_pmu_disable(struct perf_event *event)
1111 if (cpuc->group_flag & PERF_EVENT_TXN) 1129 if (cpuc->group_flag & PERF_EVENT_TXN)
1112 return; 1130 return;
1113 1131
1114 x86_pmu_stop(event); 1132 x86_pmu_stop(event, PERF_EF_UPDATE);
1115 1133
1116 for (i = 0; i < cpuc->n_events; i++) { 1134 for (i = 0; i < cpuc->n_events; i++) {
1117 if (event == cpuc->event_list[i]) { 1135 if (event == cpuc->event_list[i]) {
@@ -1134,7 +1152,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1134 struct perf_sample_data data; 1152 struct perf_sample_data data;
1135 struct cpu_hw_events *cpuc; 1153 struct cpu_hw_events *cpuc;
1136 struct perf_event *event; 1154 struct perf_event *event;
1137 struct hw_perf_event *hwc;
1138 int idx, handled = 0; 1155 int idx, handled = 0;
1139 u64 val; 1156 u64 val;
1140 1157
@@ -1155,7 +1172,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1155 } 1172 }
1156 1173
1157 event = cpuc->events[idx]; 1174 event = cpuc->events[idx];
1158 hwc = &event->hw;
1159 1175
1160 val = x86_perf_event_update(event); 1176 val = x86_perf_event_update(event);
1161 if (val & (1ULL << (x86_pmu.cntval_bits - 1))) 1177 if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
@@ -1171,7 +1187,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1171 continue; 1187 continue;
1172 1188
1173 if (perf_event_overflow(event, 1, &data, regs)) 1189 if (perf_event_overflow(event, 1, &data, regs))
1174 x86_pmu_stop(event); 1190 x86_pmu_stop(event, 0);
1175 } 1191 }
1176 1192
1177 if (handled) 1193 if (handled)
@@ -1388,7 +1404,6 @@ void __init init_hw_perf_events(void)
1388 x86_pmu.num_counters = X86_PMC_MAX_GENERIC; 1404 x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
1389 } 1405 }
1390 x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; 1406 x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
1391 perf_max_events = x86_pmu.num_counters;
1392 1407
1393 if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { 1408 if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
1394 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", 1409 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
@@ -1424,6 +1439,7 @@ void __init init_hw_perf_events(void)
1424 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); 1439 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
1425 pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); 1440 pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
1426 1441
1442 perf_pmu_register(&pmu);
1427 perf_cpu_notifier(x86_pmu_notifier); 1443 perf_cpu_notifier(x86_pmu_notifier);
1428} 1444}
1429 1445
@@ -1437,10 +1453,11 @@ static inline void x86_pmu_read(struct perf_event *event)
1437 * Set the flag to make pmu::enable() not perform the 1453 * Set the flag to make pmu::enable() not perform the
1438 * schedulability test, it will be performed at commit time 1454 * schedulability test, it will be performed at commit time
1439 */ 1455 */
1440static void x86_pmu_start_txn(const struct pmu *pmu) 1456static void x86_pmu_start_txn(struct pmu *pmu)
1441{ 1457{
1442 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1458 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1443 1459
1460 perf_pmu_disable(pmu);
1444 cpuc->group_flag |= PERF_EVENT_TXN; 1461 cpuc->group_flag |= PERF_EVENT_TXN;
1445 cpuc->n_txn = 0; 1462 cpuc->n_txn = 0;
1446} 1463}
@@ -1450,7 +1467,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu)
1450 * Clear the flag and pmu::enable() will perform the 1467 * Clear the flag and pmu::enable() will perform the
1451 * schedulability test. 1468 * schedulability test.
1452 */ 1469 */
1453static void x86_pmu_cancel_txn(const struct pmu *pmu) 1470static void x86_pmu_cancel_txn(struct pmu *pmu)
1454{ 1471{
1455 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1472 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1456 1473
@@ -1460,6 +1477,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
1460 */ 1477 */
1461 cpuc->n_added -= cpuc->n_txn; 1478 cpuc->n_added -= cpuc->n_txn;
1462 cpuc->n_events -= cpuc->n_txn; 1479 cpuc->n_events -= cpuc->n_txn;
1480 perf_pmu_enable(pmu);
1463} 1481}
1464 1482
1465/* 1483/*
@@ -1467,7 +1485,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
1467 * Perform the group schedulability test as a whole 1485 * Perform the group schedulability test as a whole
1468 * Return 0 if success 1486 * Return 0 if success
1469 */ 1487 */
1470static int x86_pmu_commit_txn(const struct pmu *pmu) 1488static int x86_pmu_commit_txn(struct pmu *pmu)
1471{ 1489{
1472 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1490 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1473 int assign[X86_PMC_IDX_MAX]; 1491 int assign[X86_PMC_IDX_MAX];
@@ -1489,22 +1507,10 @@ static int x86_pmu_commit_txn(const struct pmu *pmu)
1489 memcpy(cpuc->assign, assign, n*sizeof(int)); 1507 memcpy(cpuc->assign, assign, n*sizeof(int));
1490 1508
1491 cpuc->group_flag &= ~PERF_EVENT_TXN; 1509 cpuc->group_flag &= ~PERF_EVENT_TXN;
1492 1510 perf_pmu_enable(pmu);
1493 return 0; 1511 return 0;
1494} 1512}
1495 1513
1496static const struct pmu pmu = {
1497 .enable = x86_pmu_enable,
1498 .disable = x86_pmu_disable,
1499 .start = x86_pmu_start,
1500 .stop = x86_pmu_stop,
1501 .read = x86_pmu_read,
1502 .unthrottle = x86_pmu_unthrottle,
1503 .start_txn = x86_pmu_start_txn,
1504 .cancel_txn = x86_pmu_cancel_txn,
1505 .commit_txn = x86_pmu_commit_txn,
1506};
1507
1508/* 1514/*
1509 * validate that we can schedule this event 1515 * validate that we can schedule this event
1510 */ 1516 */
@@ -1579,12 +1585,22 @@ out:
1579 return ret; 1585 return ret;
1580} 1586}
1581 1587
1582const struct pmu *hw_perf_event_init(struct perf_event *event) 1588int x86_pmu_event_init(struct perf_event *event)
1583{ 1589{
1584 const struct pmu *tmp; 1590 struct pmu *tmp;
1585 int err; 1591 int err;
1586 1592
1587 err = __hw_perf_event_init(event); 1593 switch (event->attr.type) {
1594 case PERF_TYPE_RAW:
1595 case PERF_TYPE_HARDWARE:
1596 case PERF_TYPE_HW_CACHE:
1597 break;
1598
1599 default:
1600 return -ENOENT;
1601 }
1602
1603 err = __x86_pmu_event_init(event);
1588 if (!err) { 1604 if (!err) {
1589 /* 1605 /*
1590 * we temporarily connect event to its pmu 1606 * we temporarily connect event to its pmu
@@ -1604,26 +1620,31 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
1604 if (err) { 1620 if (err) {
1605 if (event->destroy) 1621 if (event->destroy)
1606 event->destroy(event); 1622 event->destroy(event);
1607 return ERR_PTR(err);
1608 } 1623 }
1609 1624
1610 return &pmu; 1625 return err;
1611} 1626}
1612 1627
1613/* 1628static struct pmu pmu = {
1614 * callchain support 1629 .pmu_enable = x86_pmu_enable,
1615 */ 1630 .pmu_disable = x86_pmu_disable,
1616 1631
1617static inline 1632 .event_init = x86_pmu_event_init,
1618void callchain_store(struct perf_callchain_entry *entry, u64 ip)
1619{
1620 if (entry->nr < PERF_MAX_STACK_DEPTH)
1621 entry->ip[entry->nr++] = ip;
1622}
1623 1633
1624static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); 1634 .add = x86_pmu_add,
1625static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); 1635 .del = x86_pmu_del,
1636 .start = x86_pmu_start,
1637 .stop = x86_pmu_stop,
1638 .read = x86_pmu_read,
1626 1639
1640 .start_txn = x86_pmu_start_txn,
1641 .cancel_txn = x86_pmu_cancel_txn,
1642 .commit_txn = x86_pmu_commit_txn,
1643};
1644
1645/*
1646 * callchain support
1647 */
1627 1648
1628static void 1649static void
1629backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) 1650backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
@@ -1645,7 +1666,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
1645{ 1666{
1646 struct perf_callchain_entry *entry = data; 1667 struct perf_callchain_entry *entry = data;
1647 1668
1648 callchain_store(entry, addr); 1669 perf_callchain_store(entry, addr);
1649} 1670}
1650 1671
1651static const struct stacktrace_ops backtrace_ops = { 1672static const struct stacktrace_ops backtrace_ops = {
@@ -1656,11 +1677,15 @@ static const struct stacktrace_ops backtrace_ops = {
1656 .walk_stack = print_context_stack_bp, 1677 .walk_stack = print_context_stack_bp,
1657}; 1678};
1658 1679
1659static void 1680void
1660perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) 1681perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
1661{ 1682{
1662 callchain_store(entry, PERF_CONTEXT_KERNEL); 1683 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1663 callchain_store(entry, regs->ip); 1684 /* TODO: We don't support guest os callchain now */
1685 return;
1686 }
1687
1688 perf_callchain_store(entry, regs->ip);
1664 1689
1665 dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); 1690 dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
1666} 1691}
@@ -1689,7 +1714,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
1689 if (fp < compat_ptr(regs->sp)) 1714 if (fp < compat_ptr(regs->sp))
1690 break; 1715 break;
1691 1716
1692 callchain_store(entry, frame.return_address); 1717 perf_callchain_store(entry, frame.return_address);
1693 fp = compat_ptr(frame.next_frame); 1718 fp = compat_ptr(frame.next_frame);
1694 } 1719 }
1695 return 1; 1720 return 1;
@@ -1702,19 +1727,20 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
1702} 1727}
1703#endif 1728#endif
1704 1729
1705static void 1730void
1706perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) 1731perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
1707{ 1732{
1708 struct stack_frame frame; 1733 struct stack_frame frame;
1709 const void __user *fp; 1734 const void __user *fp;
1710 1735
1711 if (!user_mode(regs)) 1736 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1712 regs = task_pt_regs(current); 1737 /* TODO: We don't support guest os callchain now */
1738 return;
1739 }
1713 1740
1714 fp = (void __user *)regs->bp; 1741 fp = (void __user *)regs->bp;
1715 1742
1716 callchain_store(entry, PERF_CONTEXT_USER); 1743 perf_callchain_store(entry, regs->ip);
1717 callchain_store(entry, regs->ip);
1718 1744
1719 if (perf_callchain_user32(regs, entry)) 1745 if (perf_callchain_user32(regs, entry))
1720 return; 1746 return;
@@ -1731,52 +1757,11 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
1731 if ((unsigned long)fp < regs->sp) 1757 if ((unsigned long)fp < regs->sp)
1732 break; 1758 break;
1733 1759
1734 callchain_store(entry, frame.return_address); 1760 perf_callchain_store(entry, frame.return_address);
1735 fp = frame.next_frame; 1761 fp = frame.next_frame;
1736 } 1762 }
1737} 1763}
1738 1764
1739static void
1740perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
1741{
1742 int is_user;
1743
1744 if (!regs)
1745 return;
1746
1747 is_user = user_mode(regs);
1748
1749 if (is_user && current->state != TASK_RUNNING)
1750 return;
1751
1752 if (!is_user)
1753 perf_callchain_kernel(regs, entry);
1754
1755 if (current->mm)
1756 perf_callchain_user(regs, entry);
1757}
1758
1759struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1760{
1761 struct perf_callchain_entry *entry;
1762
1763 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1764 /* TODO: We don't support guest os callchain now */
1765 return NULL;
1766 }
1767
1768 if (in_nmi())
1769 entry = &__get_cpu_var(pmc_nmi_entry);
1770 else
1771 entry = &__get_cpu_var(pmc_irq_entry);
1772
1773 entry->nr = 0;
1774
1775 perf_do_callchain(regs, entry);
1776
1777 return entry;
1778}
1779
1780unsigned long perf_instruction_pointer(struct pt_regs *regs) 1765unsigned long perf_instruction_pointer(struct pt_regs *regs)
1781{ 1766{
1782 unsigned long ip; 1767 unsigned long ip;
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index ee05c90012d2..c8f5c088cad1 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -713,18 +713,18 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
713 struct cpu_hw_events *cpuc; 713 struct cpu_hw_events *cpuc;
714 int bit, loops; 714 int bit, loops;
715 u64 status; 715 u64 status;
716 int handled = 0; 716 int handled;
717 717
718 perf_sample_data_init(&data, 0); 718 perf_sample_data_init(&data, 0);
719 719
720 cpuc = &__get_cpu_var(cpu_hw_events); 720 cpuc = &__get_cpu_var(cpu_hw_events);
721 721
722 intel_pmu_disable_all(); 722 intel_pmu_disable_all();
723 intel_pmu_drain_bts_buffer(); 723 handled = intel_pmu_drain_bts_buffer();
724 status = intel_pmu_get_status(); 724 status = intel_pmu_get_status();
725 if (!status) { 725 if (!status) {
726 intel_pmu_enable_all(0); 726 intel_pmu_enable_all(0);
727 return 0; 727 return handled;
728 } 728 }
729 729
730 loops = 0; 730 loops = 0;
@@ -763,7 +763,7 @@ again:
763 data.period = event->hw.last_period; 763 data.period = event->hw.last_period;
764 764
765 if (perf_event_overflow(event, 1, &data, regs)) 765 if (perf_event_overflow(event, 1, &data, regs))
766 x86_pmu_stop(event); 766 x86_pmu_stop(event, 0);
767 } 767 }
768 768
769 /* 769 /*
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 18018d1311cd..4977f9c400e5 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -214,7 +214,7 @@ static void intel_pmu_disable_bts(void)
214 update_debugctlmsr(debugctlmsr); 214 update_debugctlmsr(debugctlmsr);
215} 215}
216 216
217static void intel_pmu_drain_bts_buffer(void) 217static int intel_pmu_drain_bts_buffer(void)
218{ 218{
219 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 219 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
220 struct debug_store *ds = cpuc->ds; 220 struct debug_store *ds = cpuc->ds;
@@ -231,16 +231,16 @@ static void intel_pmu_drain_bts_buffer(void)
231 struct pt_regs regs; 231 struct pt_regs regs;
232 232
233 if (!event) 233 if (!event)
234 return; 234 return 0;
235 235
236 if (!ds) 236 if (!ds)
237 return; 237 return 0;
238 238
239 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; 239 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
240 top = (struct bts_record *)(unsigned long)ds->bts_index; 240 top = (struct bts_record *)(unsigned long)ds->bts_index;
241 241
242 if (top <= at) 242 if (top <= at)
243 return; 243 return 0;
244 244
245 ds->bts_index = ds->bts_buffer_base; 245 ds->bts_index = ds->bts_buffer_base;
246 246
@@ -256,7 +256,7 @@ static void intel_pmu_drain_bts_buffer(void)
256 perf_prepare_sample(&header, &data, event, &regs); 256 perf_prepare_sample(&header, &data, event, &regs);
257 257
258 if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1)) 258 if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1))
259 return; 259 return 1;
260 260
261 for (; at < top; at++) { 261 for (; at < top; at++) {
262 data.ip = at->from; 262 data.ip = at->from;
@@ -270,6 +270,7 @@ static void intel_pmu_drain_bts_buffer(void)
270 /* There's new data available. */ 270 /* There's new data available. */
271 event->hw.interrupts++; 271 event->hw.interrupts++;
272 event->pending_kill = POLL_IN; 272 event->pending_kill = POLL_IN;
273 return 1;
273} 274}
274 275
275/* 276/*
@@ -491,7 +492,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
491 regs.flags &= ~PERF_EFLAGS_EXACT; 492 regs.flags &= ~PERF_EFLAGS_EXACT;
492 493
493 if (perf_event_overflow(event, 1, &data, &regs)) 494 if (perf_event_overflow(event, 1, &data, &regs))
494 x86_pmu_stop(event); 495 x86_pmu_stop(event, 0);
495} 496}
496 497
497static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) 498static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index 249015173992..81400b93e694 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -18,6 +18,8 @@
18struct p4_event_bind { 18struct p4_event_bind {
19 unsigned int opcode; /* Event code and ESCR selector */ 19 unsigned int opcode; /* Event code and ESCR selector */
20 unsigned int escr_msr[2]; /* ESCR MSR for this event */ 20 unsigned int escr_msr[2]; /* ESCR MSR for this event */
21 unsigned int escr_emask; /* valid ESCR EventMask bits */
22 unsigned int shared; /* event is shared across threads */
21 char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ 23 char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */
22}; 24};
23 25
@@ -66,231 +68,435 @@ static struct p4_event_bind p4_event_bind_map[] = {
66 [P4_EVENT_TC_DELIVER_MODE] = { 68 [P4_EVENT_TC_DELIVER_MODE] = {
67 .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE), 69 .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE),
68 .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, 70 .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
71 .escr_emask =
72 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD) |
73 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DB) |
74 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DI) |
75 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BD) |
76 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BB) |
77 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BI) |
78 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, ID),
79 .shared = 1,
69 .cntr = { {4, 5, -1}, {6, 7, -1} }, 80 .cntr = { {4, 5, -1}, {6, 7, -1} },
70 }, 81 },
71 [P4_EVENT_BPU_FETCH_REQUEST] = { 82 [P4_EVENT_BPU_FETCH_REQUEST] = {
72 .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST), 83 .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST),
73 .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 }, 84 .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 },
85 .escr_emask =
86 P4_ESCR_EMASK_BIT(P4_EVENT_BPU_FETCH_REQUEST, TCMISS),
74 .cntr = { {0, -1, -1}, {2, -1, -1} }, 87 .cntr = { {0, -1, -1}, {2, -1, -1} },
75 }, 88 },
76 [P4_EVENT_ITLB_REFERENCE] = { 89 [P4_EVENT_ITLB_REFERENCE] = {
77 .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE), 90 .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE),
78 .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 }, 91 .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 },
92 .escr_emask =
93 P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT) |
94 P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, MISS) |
95 P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT_UK),
79 .cntr = { {0, -1, -1}, {2, -1, -1} }, 96 .cntr = { {0, -1, -1}, {2, -1, -1} },
80 }, 97 },
81 [P4_EVENT_MEMORY_CANCEL] = { 98 [P4_EVENT_MEMORY_CANCEL] = {
82 .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL), 99 .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL),
83 .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, 100 .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
101 .escr_emask =
102 P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, ST_RB_FULL) |
103 P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, 64K_CONF),
84 .cntr = { {8, 9, -1}, {10, 11, -1} }, 104 .cntr = { {8, 9, -1}, {10, 11, -1} },
85 }, 105 },
86 [P4_EVENT_MEMORY_COMPLETE] = { 106 [P4_EVENT_MEMORY_COMPLETE] = {
87 .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE), 107 .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE),
88 .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, 108 .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
109 .escr_emask =
110 P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, LSC) |
111 P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, SSC),
89 .cntr = { {8, 9, -1}, {10, 11, -1} }, 112 .cntr = { {8, 9, -1}, {10, 11, -1} },
90 }, 113 },
91 [P4_EVENT_LOAD_PORT_REPLAY] = { 114 [P4_EVENT_LOAD_PORT_REPLAY] = {
92 .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY), 115 .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY),
93 .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 }, 116 .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 },
117 .escr_emask =
118 P4_ESCR_EMASK_BIT(P4_EVENT_LOAD_PORT_REPLAY, SPLIT_LD),
94 .cntr = { {8, 9, -1}, {10, 11, -1} }, 119 .cntr = { {8, 9, -1}, {10, 11, -1} },
95 }, 120 },
96 [P4_EVENT_STORE_PORT_REPLAY] = { 121 [P4_EVENT_STORE_PORT_REPLAY] = {
97 .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY), 122 .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY),
98 .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, 123 .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
124 .escr_emask =
125 P4_ESCR_EMASK_BIT(P4_EVENT_STORE_PORT_REPLAY, SPLIT_ST),
99 .cntr = { {8, 9, -1}, {10, 11, -1} }, 126 .cntr = { {8, 9, -1}, {10, 11, -1} },
100 }, 127 },
101 [P4_EVENT_MOB_LOAD_REPLAY] = { 128 [P4_EVENT_MOB_LOAD_REPLAY] = {
102 .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY), 129 .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY),
103 .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 }, 130 .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 },
131 .escr_emask =
132 P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STA) |
133 P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STD) |
134 P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, PARTIAL_DATA) |
135 P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, UNALGN_ADDR),
104 .cntr = { {0, -1, -1}, {2, -1, -1} }, 136 .cntr = { {0, -1, -1}, {2, -1, -1} },
105 }, 137 },
106 [P4_EVENT_PAGE_WALK_TYPE] = { 138 [P4_EVENT_PAGE_WALK_TYPE] = {
107 .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE), 139 .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE),
108 .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 }, 140 .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 },
141 .escr_emask =
142 P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, DTMISS) |
143 P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, ITMISS),
144 .shared = 1,
109 .cntr = { {0, -1, -1}, {2, -1, -1} }, 145 .cntr = { {0, -1, -1}, {2, -1, -1} },
110 }, 146 },
111 [P4_EVENT_BSQ_CACHE_REFERENCE] = { 147 [P4_EVENT_BSQ_CACHE_REFERENCE] = {
112 .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE), 148 .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE),
113 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, 149 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 },
150 .escr_emask =
151 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) |
152 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) |
153 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) |
154 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) |
155 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) |
156 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM) |
157 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) |
158 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) |
159 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS),
114 .cntr = { {0, -1, -1}, {2, -1, -1} }, 160 .cntr = { {0, -1, -1}, {2, -1, -1} },
115 }, 161 },
116 [P4_EVENT_IOQ_ALLOCATION] = { 162 [P4_EVENT_IOQ_ALLOCATION] = {
117 .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION), 163 .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION),
118 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 164 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
165 .escr_emask =
166 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, DEFAULT) |
167 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_READ) |
168 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_WRITE) |
169 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_UC) |
170 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WC) |
171 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WT) |
172 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WP) |
173 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WB) |
174 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OWN) |
175 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OTHER) |
176 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, PREFETCH),
119 .cntr = { {0, -1, -1}, {2, -1, -1} }, 177 .cntr = { {0, -1, -1}, {2, -1, -1} },
120 }, 178 },
121 [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */ 179 [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */
122 .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES), 180 .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES),
123 .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 }, 181 .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 },
182 .escr_emask =
183 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, DEFAULT) |
184 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_READ) |
185 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_WRITE) |
186 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_UC) |
187 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WC) |
188 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WT) |
189 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WP) |
190 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WB) |
191 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OWN) |
192 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OTHER) |
193 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, PREFETCH),
124 .cntr = { {2, -1, -1}, {3, -1, -1} }, 194 .cntr = { {2, -1, -1}, {3, -1, -1} },
125 }, 195 },
126 [P4_EVENT_FSB_DATA_ACTIVITY] = { 196 [P4_EVENT_FSB_DATA_ACTIVITY] = {
127 .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY), 197 .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY),
128 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 198 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
199 .escr_emask =
200 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) |
201 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN) |
202 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OTHER) |
203 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_DRV) |
204 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OWN) |
205 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OTHER),
206 .shared = 1,
129 .cntr = { {0, -1, -1}, {2, -1, -1} }, 207 .cntr = { {0, -1, -1}, {2, -1, -1} },
130 }, 208 },
131 [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */ 209 [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */
132 .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION), 210 .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION),
133 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 }, 211 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 },
212 .escr_emask =
213 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE0) |
214 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE1) |
215 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN0) |
216 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN1) |
217 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_IO_TYPE) |
218 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LOCK_TYPE) |
219 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_CACHE_TYPE) |
220 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_SPLIT_TYPE) |
221 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_DEM_TYPE) |
222 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_ORD_TYPE) |
223 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE0) |
224 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE1) |
225 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE2),
134 .cntr = { {0, -1, -1}, {1, -1, -1} }, 226 .cntr = { {0, -1, -1}, {1, -1, -1} },
135 }, 227 },
136 [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */ 228 [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */
137 .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES), 229 .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES),
138 .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 }, 230 .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 },
231 .escr_emask =
232 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE0) |
233 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE1) |
234 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN0) |
235 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN1) |
236 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE) |
237 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE) |
238 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE) |
239 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE) |
240 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE) |
241 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE) |
242 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE0) |
243 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE1) |
244 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE2),
139 .cntr = { {2, -1, -1}, {3, -1, -1} }, 245 .cntr = { {2, -1, -1}, {3, -1, -1} },
140 }, 246 },
141 [P4_EVENT_SSE_INPUT_ASSIST] = { 247 [P4_EVENT_SSE_INPUT_ASSIST] = {
142 .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST), 248 .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST),
143 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 249 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
250 .escr_emask =
251 P4_ESCR_EMASK_BIT(P4_EVENT_SSE_INPUT_ASSIST, ALL),
252 .shared = 1,
144 .cntr = { {8, 9, -1}, {10, 11, -1} }, 253 .cntr = { {8, 9, -1}, {10, 11, -1} },
145 }, 254 },
146 [P4_EVENT_PACKED_SP_UOP] = { 255 [P4_EVENT_PACKED_SP_UOP] = {
147 .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP), 256 .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP),
148 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 257 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
258 .escr_emask =
259 P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_SP_UOP, ALL),
260 .shared = 1,
149 .cntr = { {8, 9, -1}, {10, 11, -1} }, 261 .cntr = { {8, 9, -1}, {10, 11, -1} },
150 }, 262 },
151 [P4_EVENT_PACKED_DP_UOP] = { 263 [P4_EVENT_PACKED_DP_UOP] = {
152 .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP), 264 .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP),
153 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 265 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
266 .escr_emask =
267 P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_DP_UOP, ALL),
268 .shared = 1,
154 .cntr = { {8, 9, -1}, {10, 11, -1} }, 269 .cntr = { {8, 9, -1}, {10, 11, -1} },
155 }, 270 },
156 [P4_EVENT_SCALAR_SP_UOP] = { 271 [P4_EVENT_SCALAR_SP_UOP] = {
157 .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP), 272 .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP),
158 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 273 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
274 .escr_emask =
275 P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_SP_UOP, ALL),
276 .shared = 1,
159 .cntr = { {8, 9, -1}, {10, 11, -1} }, 277 .cntr = { {8, 9, -1}, {10, 11, -1} },
160 }, 278 },
161 [P4_EVENT_SCALAR_DP_UOP] = { 279 [P4_EVENT_SCALAR_DP_UOP] = {
162 .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP), 280 .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP),
163 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 281 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
282 .escr_emask =
283 P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_DP_UOP, ALL),
284 .shared = 1,
164 .cntr = { {8, 9, -1}, {10, 11, -1} }, 285 .cntr = { {8, 9, -1}, {10, 11, -1} },
165 }, 286 },
166 [P4_EVENT_64BIT_MMX_UOP] = { 287 [P4_EVENT_64BIT_MMX_UOP] = {
167 .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP), 288 .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP),
168 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 289 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
290 .escr_emask =
291 P4_ESCR_EMASK_BIT(P4_EVENT_64BIT_MMX_UOP, ALL),
292 .shared = 1,
169 .cntr = { {8, 9, -1}, {10, 11, -1} }, 293 .cntr = { {8, 9, -1}, {10, 11, -1} },
170 }, 294 },
171 [P4_EVENT_128BIT_MMX_UOP] = { 295 [P4_EVENT_128BIT_MMX_UOP] = {
172 .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP), 296 .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP),
173 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 297 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
298 .escr_emask =
299 P4_ESCR_EMASK_BIT(P4_EVENT_128BIT_MMX_UOP, ALL),
300 .shared = 1,
174 .cntr = { {8, 9, -1}, {10, 11, -1} }, 301 .cntr = { {8, 9, -1}, {10, 11, -1} },
175 }, 302 },
176 [P4_EVENT_X87_FP_UOP] = { 303 [P4_EVENT_X87_FP_UOP] = {
177 .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP), 304 .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP),
178 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 305 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
306 .escr_emask =
307 P4_ESCR_EMASK_BIT(P4_EVENT_X87_FP_UOP, ALL),
308 .shared = 1,
179 .cntr = { {8, 9, -1}, {10, 11, -1} }, 309 .cntr = { {8, 9, -1}, {10, 11, -1} },
180 }, 310 },
181 [P4_EVENT_TC_MISC] = { 311 [P4_EVENT_TC_MISC] = {
182 .opcode = P4_OPCODE(P4_EVENT_TC_MISC), 312 .opcode = P4_OPCODE(P4_EVENT_TC_MISC),
183 .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, 313 .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
314 .escr_emask =
315 P4_ESCR_EMASK_BIT(P4_EVENT_TC_MISC, FLUSH),
184 .cntr = { {4, 5, -1}, {6, 7, -1} }, 316 .cntr = { {4, 5, -1}, {6, 7, -1} },
185 }, 317 },
186 [P4_EVENT_GLOBAL_POWER_EVENTS] = { 318 [P4_EVENT_GLOBAL_POWER_EVENTS] = {
187 .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS), 319 .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS),
188 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 320 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
321 .escr_emask =
322 P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING),
189 .cntr = { {0, -1, -1}, {2, -1, -1} }, 323 .cntr = { {0, -1, -1}, {2, -1, -1} },
190 }, 324 },
191 [P4_EVENT_TC_MS_XFER] = { 325 [P4_EVENT_TC_MS_XFER] = {
192 .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER), 326 .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER),
193 .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, 327 .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
328 .escr_emask =
329 P4_ESCR_EMASK_BIT(P4_EVENT_TC_MS_XFER, CISC),
194 .cntr = { {4, 5, -1}, {6, 7, -1} }, 330 .cntr = { {4, 5, -1}, {6, 7, -1} },
195 }, 331 },
196 [P4_EVENT_UOP_QUEUE_WRITES] = { 332 [P4_EVENT_UOP_QUEUE_WRITES] = {
197 .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES), 333 .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES),
198 .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, 334 .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
335 .escr_emask =
336 P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_BUILD) |
337 P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_DELIVER) |
338 P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_ROM),
199 .cntr = { {4, 5, -1}, {6, 7, -1} }, 339 .cntr = { {4, 5, -1}, {6, 7, -1} },
200 }, 340 },
201 [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = { 341 [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = {
202 .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE), 342 .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE),
203 .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 }, 343 .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 },
344 .escr_emask =
345 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL) |
346 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CALL) |
347 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, RETURN) |
348 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT),
204 .cntr = { {4, 5, -1}, {6, 7, -1} }, 349 .cntr = { {4, 5, -1}, {6, 7, -1} },
205 }, 350 },
206 [P4_EVENT_RETIRED_BRANCH_TYPE] = { 351 [P4_EVENT_RETIRED_BRANCH_TYPE] = {
207 .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE), 352 .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE),
208 .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 }, 353 .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 },
354 .escr_emask =
355 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) |
356 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) |
357 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) |
358 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT),
209 .cntr = { {4, 5, -1}, {6, 7, -1} }, 359 .cntr = { {4, 5, -1}, {6, 7, -1} },
210 }, 360 },
211 [P4_EVENT_RESOURCE_STALL] = { 361 [P4_EVENT_RESOURCE_STALL] = {
212 .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL), 362 .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL),
213 .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 }, 363 .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 },
364 .escr_emask =
365 P4_ESCR_EMASK_BIT(P4_EVENT_RESOURCE_STALL, SBFULL),
214 .cntr = { {12, 13, 16}, {14, 15, 17} }, 366 .cntr = { {12, 13, 16}, {14, 15, 17} },
215 }, 367 },
216 [P4_EVENT_WC_BUFFER] = { 368 [P4_EVENT_WC_BUFFER] = {
217 .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER), 369 .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER),
218 .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, 370 .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
371 .escr_emask =
372 P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_EVICTS) |
373 P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_FULL_EVICTS),
374 .shared = 1,
219 .cntr = { {8, 9, -1}, {10, 11, -1} }, 375 .cntr = { {8, 9, -1}, {10, 11, -1} },
220 }, 376 },
221 [P4_EVENT_B2B_CYCLES] = { 377 [P4_EVENT_B2B_CYCLES] = {
222 .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES), 378 .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES),
223 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 379 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
380 .escr_emask = 0,
224 .cntr = { {0, -1, -1}, {2, -1, -1} }, 381 .cntr = { {0, -1, -1}, {2, -1, -1} },
225 }, 382 },
226 [P4_EVENT_BNR] = { 383 [P4_EVENT_BNR] = {
227 .opcode = P4_OPCODE(P4_EVENT_BNR), 384 .opcode = P4_OPCODE(P4_EVENT_BNR),
228 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 385 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
386 .escr_emask = 0,
229 .cntr = { {0, -1, -1}, {2, -1, -1} }, 387 .cntr = { {0, -1, -1}, {2, -1, -1} },
230 }, 388 },
231 [P4_EVENT_SNOOP] = { 389 [P4_EVENT_SNOOP] = {
232 .opcode = P4_OPCODE(P4_EVENT_SNOOP), 390 .opcode = P4_OPCODE(P4_EVENT_SNOOP),
233 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 391 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
392 .escr_emask = 0,
234 .cntr = { {0, -1, -1}, {2, -1, -1} }, 393 .cntr = { {0, -1, -1}, {2, -1, -1} },
235 }, 394 },
236 [P4_EVENT_RESPONSE] = { 395 [P4_EVENT_RESPONSE] = {
237 .opcode = P4_OPCODE(P4_EVENT_RESPONSE), 396 .opcode = P4_OPCODE(P4_EVENT_RESPONSE),
238 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 397 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
398 .escr_emask = 0,
239 .cntr = { {0, -1, -1}, {2, -1, -1} }, 399 .cntr = { {0, -1, -1}, {2, -1, -1} },
240 }, 400 },
241 [P4_EVENT_FRONT_END_EVENT] = { 401 [P4_EVENT_FRONT_END_EVENT] = {
242 .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT), 402 .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT),
243 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 403 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
404 .escr_emask =
405 P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, NBOGUS) |
406 P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, BOGUS),
244 .cntr = { {12, 13, 16}, {14, 15, 17} }, 407 .cntr = { {12, 13, 16}, {14, 15, 17} },
245 }, 408 },
246 [P4_EVENT_EXECUTION_EVENT] = { 409 [P4_EVENT_EXECUTION_EVENT] = {
247 .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT), 410 .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT),
248 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 411 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
412 .escr_emask =
413 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0) |
414 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1) |
415 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2) |
416 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3) |
417 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) |
418 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) |
419 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) |
420 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3),
249 .cntr = { {12, 13, 16}, {14, 15, 17} }, 421 .cntr = { {12, 13, 16}, {14, 15, 17} },
250 }, 422 },
251 [P4_EVENT_REPLAY_EVENT] = { 423 [P4_EVENT_REPLAY_EVENT] = {
252 .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT), 424 .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT),
253 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 425 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
426 .escr_emask =
427 P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, NBOGUS) |
428 P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, BOGUS),
254 .cntr = { {12, 13, 16}, {14, 15, 17} }, 429 .cntr = { {12, 13, 16}, {14, 15, 17} },
255 }, 430 },
256 [P4_EVENT_INSTR_RETIRED] = { 431 [P4_EVENT_INSTR_RETIRED] = {
257 .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED), 432 .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED),
258 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, 433 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
434 .escr_emask =
435 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) |
436 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSTAG) |
437 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG) |
438 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSTAG),
259 .cntr = { {12, 13, 16}, {14, 15, 17} }, 439 .cntr = { {12, 13, 16}, {14, 15, 17} },
260 }, 440 },
261 [P4_EVENT_UOPS_RETIRED] = { 441 [P4_EVENT_UOPS_RETIRED] = {
262 .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED), 442 .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED),
263 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, 443 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
444 .escr_emask =
445 P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, NBOGUS) |
446 P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, BOGUS),
264 .cntr = { {12, 13, 16}, {14, 15, 17} }, 447 .cntr = { {12, 13, 16}, {14, 15, 17} },
265 }, 448 },
266 [P4_EVENT_UOP_TYPE] = { 449 [P4_EVENT_UOP_TYPE] = {
267 .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE), 450 .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE),
268 .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 }, 451 .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 },
452 .escr_emask =
453 P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGLOADS) |
454 P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGSTORES),
269 .cntr = { {12, 13, 16}, {14, 15, 17} }, 455 .cntr = { {12, 13, 16}, {14, 15, 17} },
270 }, 456 },
271 [P4_EVENT_BRANCH_RETIRED] = { 457 [P4_EVENT_BRANCH_RETIRED] = {
272 .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED), 458 .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED),
273 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 459 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
460 .escr_emask =
461 P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNP) |
462 P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNM) |
463 P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTP) |
464 P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTM),
274 .cntr = { {12, 13, 16}, {14, 15, 17} }, 465 .cntr = { {12, 13, 16}, {14, 15, 17} },
275 }, 466 },
276 [P4_EVENT_MISPRED_BRANCH_RETIRED] = { 467 [P4_EVENT_MISPRED_BRANCH_RETIRED] = {
277 .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED), 468 .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED),
278 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, 469 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
470 .escr_emask =
471 P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS),
279 .cntr = { {12, 13, 16}, {14, 15, 17} }, 472 .cntr = { {12, 13, 16}, {14, 15, 17} },
280 }, 473 },
281 [P4_EVENT_X87_ASSIST] = { 474 [P4_EVENT_X87_ASSIST] = {
282 .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST), 475 .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST),
283 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 476 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
477 .escr_emask =
478 P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSU) |
479 P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSO) |
480 P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAO) |
481 P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAU) |
482 P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, PREA),
284 .cntr = { {12, 13, 16}, {14, 15, 17} }, 483 .cntr = { {12, 13, 16}, {14, 15, 17} },
285 }, 484 },
286 [P4_EVENT_MACHINE_CLEAR] = { 485 [P4_EVENT_MACHINE_CLEAR] = {
287 .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR), 486 .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR),
288 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 487 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
488 .escr_emask =
489 P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, CLEAR) |
490 P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, MOCLEAR) |
491 P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, SMCLEAR),
289 .cntr = { {12, 13, 16}, {14, 15, 17} }, 492 .cntr = { {12, 13, 16}, {14, 15, 17} },
290 }, 493 },
291 [P4_EVENT_INSTR_COMPLETED] = { 494 [P4_EVENT_INSTR_COMPLETED] = {
292 .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED), 495 .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED),
293 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, 496 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
497 .escr_emask =
498 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, NBOGUS) |
499 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, BOGUS),
294 .cntr = { {12, 13, 16}, {14, 15, 17} }, 500 .cntr = { {12, 13, 16}, {14, 15, 17} },
295 }, 501 },
296}; 502};
@@ -428,29 +634,73 @@ static u64 p4_pmu_event_map(int hw_event)
428 return config; 634 return config;
429} 635}
430 636
637/* check cpu model specifics */
638static bool p4_event_match_cpu_model(unsigned int event_idx)
639{
640 /* INSTR_COMPLETED event only exist for model 3, 4, 6 (Prescott) */
641 if (event_idx == P4_EVENT_INSTR_COMPLETED) {
642 if (boot_cpu_data.x86_model != 3 &&
643 boot_cpu_data.x86_model != 4 &&
644 boot_cpu_data.x86_model != 6)
645 return false;
646 }
647
648 /*
649 * For info
650 * - IQ_ESCR0, IQ_ESCR1 only for models 1 and 2
651 */
652
653 return true;
654}
655
431static int p4_validate_raw_event(struct perf_event *event) 656static int p4_validate_raw_event(struct perf_event *event)
432{ 657{
433 unsigned int v; 658 unsigned int v, emask;
434 659
435 /* user data may have out-of-bound event index */ 660 /* User data may have out-of-bound event index */
436 v = p4_config_unpack_event(event->attr.config); 661 v = p4_config_unpack_event(event->attr.config);
437 if (v >= ARRAY_SIZE(p4_event_bind_map)) { 662 if (v >= ARRAY_SIZE(p4_event_bind_map))
438 pr_warning("P4 PMU: Unknown event code: %d\n", v); 663 return -EINVAL;
664
665 /* It may be unsupported: */
666 if (!p4_event_match_cpu_model(v))
439 return -EINVAL; 667 return -EINVAL;
668
669 /*
670 * NOTE: P4_CCCR_THREAD_ANY has not the same meaning as
671 * in Architectural Performance Monitoring, it means not
672 * on _which_ logical cpu to count but rather _when_, ie it
673 * depends on logical cpu state -- count event if one cpu active,
674 * none, both or any, so we just allow user to pass any value
675 * desired.
676 *
677 * In turn we always set Tx_OS/Tx_USR bits bound to logical
678 * cpu without their propagation to another cpu
679 */
680
681 /*
682 * if an event is shared accross the logical threads
683 * the user needs special permissions to be able to use it
684 */
685 if (p4_event_bind_map[v].shared) {
686 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
687 return -EACCES;
440 } 688 }
441 689
690 /* ESCR EventMask bits may be invalid */
691 emask = p4_config_unpack_escr(event->attr.config) & P4_ESCR_EVENTMASK_MASK;
692 if (emask & ~p4_event_bind_map[v].escr_emask)
693 return -EINVAL;
694
442 /* 695 /*
443 * it may have some screwed PEBS bits 696 * it may have some invalid PEBS bits
444 */ 697 */
445 if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) { 698 if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE))
446 pr_warning("P4 PMU: PEBS are not supported yet\n");
447 return -EINVAL; 699 return -EINVAL;
448 } 700
449 v = p4_config_unpack_metric(event->attr.config); 701 v = p4_config_unpack_metric(event->attr.config);
450 if (v >= ARRAY_SIZE(p4_pebs_bind_map)) { 702 if (v >= ARRAY_SIZE(p4_pebs_bind_map))
451 pr_warning("P4 PMU: Unknown metric code: %d\n", v);
452 return -EINVAL; 703 return -EINVAL;
453 }
454 704
455 return 0; 705 return 0;
456} 706}
@@ -478,27 +728,21 @@ static int p4_hw_config(struct perf_event *event)
478 728
479 if (event->attr.type == PERF_TYPE_RAW) { 729 if (event->attr.type == PERF_TYPE_RAW) {
480 730
731 /*
732 * Clear bits we reserve to be managed by kernel itself
733 * and never allowed from a user space
734 */
735 event->attr.config &= P4_CONFIG_MASK;
736
481 rc = p4_validate_raw_event(event); 737 rc = p4_validate_raw_event(event);
482 if (rc) 738 if (rc)
483 goto out; 739 goto out;
484 740
485 /* 741 /*
486 * We don't control raw events so it's up to the caller
487 * to pass sane values (and we don't count the thread number
488 * on HT machine but allow HT-compatible specifics to be
489 * passed on)
490 *
491 * Note that for RAW events we allow user to use P4_CCCR_RESERVED 742 * Note that for RAW events we allow user to use P4_CCCR_RESERVED
492 * bits since we keep additional info here (for cache events and etc) 743 * bits since we keep additional info here (for cache events and etc)
493 *
494 * XXX: HT wide things should check perf_paranoid_cpu() &&
495 * CAP_SYS_ADMIN
496 */ 744 */
497 event->hw.config |= event->attr.config & 745 event->hw.config |= event->attr.config;
498 (p4_config_pack_escr(P4_ESCR_MASK_HT) |
499 p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED));
500
501 event->hw.config &= ~P4_CCCR_FORCE_OVF;
502 } 746 }
503 747
504 rc = x86_setup_perfctr(event); 748 rc = x86_setup_perfctr(event);
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index cd37469b54ee..3afb33f14d2d 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -257,14 +257,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
257 return mod_code_status; 257 return mod_code_status;
258} 258}
259 259
260
261
262
263static unsigned char ftrace_nop[MCOUNT_INSN_SIZE];
264
265static unsigned char *ftrace_nop_replace(void) 260static unsigned char *ftrace_nop_replace(void)
266{ 261{
267 return ftrace_nop; 262 return ideal_nop5;
268} 263}
269 264
270static int 265static int
@@ -338,62 +333,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
338 333
339int __init ftrace_dyn_arch_init(void *data) 334int __init ftrace_dyn_arch_init(void *data)
340{ 335{
341 extern const unsigned char ftrace_test_p6nop[];
342 extern const unsigned char ftrace_test_nop5[];
343 extern const unsigned char ftrace_test_jmp[];
344 int faulted = 0;
345
346 /*
347 * There is no good nop for all x86 archs.
348 * We will default to using the P6_NOP5, but first we
349 * will test to make sure that the nop will actually
350 * work on this CPU. If it faults, we will then
351 * go to a lesser efficient 5 byte nop. If that fails
352 * we then just use a jmp as our nop. This isn't the most
353 * efficient nop, but we can not use a multi part nop
354 * since we would then risk being preempted in the middle
355 * of that nop, and if we enabled tracing then, it might
356 * cause a system crash.
357 *
358 * TODO: check the cpuid to determine the best nop.
359 */
360 asm volatile (
361 "ftrace_test_jmp:"
362 "jmp ftrace_test_p6nop\n"
363 "nop\n"
364 "nop\n"
365 "nop\n" /* 2 byte jmp + 3 bytes */
366 "ftrace_test_p6nop:"
367 P6_NOP5
368 "jmp 1f\n"
369 "ftrace_test_nop5:"
370 ".byte 0x66,0x66,0x66,0x66,0x90\n"
371 "1:"
372 ".section .fixup, \"ax\"\n"
373 "2: movl $1, %0\n"
374 " jmp ftrace_test_nop5\n"
375 "3: movl $2, %0\n"
376 " jmp 1b\n"
377 ".previous\n"
378 _ASM_EXTABLE(ftrace_test_p6nop, 2b)
379 _ASM_EXTABLE(ftrace_test_nop5, 3b)
380 : "=r"(faulted) : "0" (faulted));
381
382 switch (faulted) {
383 case 0:
384 pr_info("converting mcount calls to 0f 1f 44 00 00\n");
385 memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE);
386 break;
387 case 1:
388 pr_info("converting mcount calls to 66 66 66 66 90\n");
389 memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE);
390 break;
391 case 2:
392 pr_info("converting mcount calls to jmp . + 5\n");
393 memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE);
394 break;
395 }
396
397 /* The return code is retured via data */ 336 /* The return code is retured via data */
398 *(unsigned long *)data = 0; 337 *(unsigned long *)data = 0;
399 338
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
new file mode 100644
index 000000000000..961b6b30ba90
--- /dev/null
+++ b/arch/x86/kernel/jump_label.c
@@ -0,0 +1,50 @@
1/*
2 * jump label x86 support
3 *
4 * Copyright (C) 2009 Jason Baron <jbaron@redhat.com>
5 *
6 */
7#include <linux/jump_label.h>
8#include <linux/memory.h>
9#include <linux/uaccess.h>
10#include <linux/module.h>
11#include <linux/list.h>
12#include <linux/jhash.h>
13#include <linux/cpu.h>
14#include <asm/kprobes.h>
15#include <asm/alternative.h>
16
17#ifdef HAVE_JUMP_LABEL
18
19union jump_code_union {
20 char code[JUMP_LABEL_NOP_SIZE];
21 struct {
22 char jump;
23 int offset;
24 } __attribute__((packed));
25};
26
27void arch_jump_label_transform(struct jump_entry *entry,
28 enum jump_label_type type)
29{
30 union jump_code_union code;
31
32 if (type == JUMP_LABEL_ENABLE) {
33 code.jump = 0xe9;
34 code.offset = entry->target -
35 (entry->code + JUMP_LABEL_NOP_SIZE);
36 } else
37 memcpy(&code, ideal_nop5, JUMP_LABEL_NOP_SIZE);
38 get_online_cpus();
39 mutex_lock(&text_mutex);
40 text_poke_smp((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE);
41 mutex_unlock(&text_mutex);
42 put_online_cpus();
43}
44
45void arch_jump_label_text_poke_early(jump_label_t addr)
46{
47 text_poke_early((void *)addr, ideal_nop5, JUMP_LABEL_NOP_SIZE);
48}
49
50#endif
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 770ebfb349e9..1cbd54c0df99 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -230,9 +230,6 @@ static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
230 return 0; 230 return 0;
231} 231}
232 232
233/* Dummy buffers for kallsyms_lookup */
234static char __dummy_buf[KSYM_NAME_LEN];
235
236/* Check if paddr is at an instruction boundary */ 233/* Check if paddr is at an instruction boundary */
237static int __kprobes can_probe(unsigned long paddr) 234static int __kprobes can_probe(unsigned long paddr)
238{ 235{
@@ -241,7 +238,7 @@ static int __kprobes can_probe(unsigned long paddr)
241 struct insn insn; 238 struct insn insn;
242 kprobe_opcode_t buf[MAX_INSN_SIZE]; 239 kprobe_opcode_t buf[MAX_INSN_SIZE];
243 240
244 if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf)) 241 if (!kallsyms_lookup_size_offset(paddr, NULL, &offset))
245 return 0; 242 return 0;
246 243
247 /* Decode instructions */ 244 /* Decode instructions */
@@ -1129,7 +1126,7 @@ static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr,
1129 *(unsigned long *)addr = val; 1126 *(unsigned long *)addr = val;
1130} 1127}
1131 1128
1132void __kprobes kprobes_optinsn_template_holder(void) 1129static void __used __kprobes kprobes_optinsn_template_holder(void)
1133{ 1130{
1134 asm volatile ( 1131 asm volatile (
1135 ".global optprobe_template_entry\n" 1132 ".global optprobe_template_entry\n"
@@ -1221,7 +1218,8 @@ static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
1221 } 1218 }
1222 /* Check whether the address range is reserved */ 1219 /* Check whether the address range is reserved */
1223 if (ftrace_text_reserved(src, src + len - 1) || 1220 if (ftrace_text_reserved(src, src + len - 1) ||
1224 alternatives_text_reserved(src, src + len - 1)) 1221 alternatives_text_reserved(src, src + len - 1) ||
1222 jump_label_text_reserved(src, src + len - 1))
1225 return -EBUSY; 1223 return -EBUSY;
1226 1224
1227 return len; 1225 return len;
@@ -1269,11 +1267,9 @@ static int __kprobes can_optimize(unsigned long paddr)
1269 unsigned long addr, size = 0, offset = 0; 1267 unsigned long addr, size = 0, offset = 0;
1270 struct insn insn; 1268 struct insn insn;
1271 kprobe_opcode_t buf[MAX_INSN_SIZE]; 1269 kprobe_opcode_t buf[MAX_INSN_SIZE];
1272 /* Dummy buffers for lookup_symbol_attrs */
1273 static char __dummy_buf[KSYM_NAME_LEN];
1274 1270
1275 /* Lookup symbol including addr */ 1271 /* Lookup symbol including addr */
1276 if (!kallsyms_lookup(paddr, &size, &offset, NULL, __dummy_buf)) 1272 if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
1277 return 0; 1273 return 0;
1278 1274
1279 /* Check there is enough space for a relative jump. */ 1275 /* Check there is enough space for a relative jump. */
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 1c355c550960..8f2956091735 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -239,6 +239,9 @@ int module_finalize(const Elf_Ehdr *hdr,
239 apply_paravirt(pseg, pseg + para->sh_size); 239 apply_paravirt(pseg, pseg + para->sh_size);
240 } 240 }
241 241
242 /* make jump label nops */
243 jump_label_apply_nops(me);
244
242 return 0; 245 return 0;
243} 246}
244 247
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index c3a4fbb2b996..00e167870f71 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -112,6 +112,7 @@
112#include <asm/numa_64.h> 112#include <asm/numa_64.h>
113#endif 113#endif
114#include <asm/mce.h> 114#include <asm/mce.h>
115#include <asm/alternative.h>
115 116
116/* 117/*
117 * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. 118 * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
@@ -726,6 +727,7 @@ void __init setup_arch(char **cmdline_p)
726{ 727{
727 int acpi = 0; 728 int acpi = 0;
728 int k8 = 0; 729 int k8 = 0;
730 unsigned long flags;
729 731
730#ifdef CONFIG_X86_32 732#ifdef CONFIG_X86_32
731 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); 733 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
@@ -1071,6 +1073,10 @@ void __init setup_arch(char **cmdline_p)
1071 x86_init.oem.banner(); 1073 x86_init.oem.banner();
1072 1074
1073 mcheck_init(); 1075 mcheck_init();
1076
1077 local_irq_save(flags);
1078 arch_init_ideal_nop5();
1079 local_irq_restore(flags);
1074} 1080}
1075 1081
1076#ifdef CONFIG_X86_32 1082#ifdef CONFIG_X86_32
diff --git a/include/asm-generic/hardirq.h b/include/asm-generic/hardirq.h
index 62f59080e5cc..04d0a977cd43 100644
--- a/include/asm-generic/hardirq.h
+++ b/include/asm-generic/hardirq.h
@@ -3,13 +3,13 @@
3 3
4#include <linux/cache.h> 4#include <linux/cache.h>
5#include <linux/threads.h> 5#include <linux/threads.h>
6#include <linux/irq.h>
7 6
8typedef struct { 7typedef struct {
9 unsigned int __softirq_pending; 8 unsigned int __softirq_pending;
10} ____cacheline_aligned irq_cpustat_t; 9} ____cacheline_aligned irq_cpustat_t;
11 10
12#include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */ 11#include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */
12#include <linux/irq.h>
13 13
14#ifndef ack_bad_irq 14#ifndef ack_bad_irq
15static inline void ack_bad_irq(unsigned int irq) 15static inline void ack_bad_irq(unsigned int irq)
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 8a92a170fb7d..ef2af9948eac 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -220,6 +220,8 @@
220 \ 220 \
221 BUG_TABLE \ 221 BUG_TABLE \
222 \ 222 \
223 JUMP_TABLE \
224 \
223 /* PCI quirks */ \ 225 /* PCI quirks */ \
224 .pci_fixup : AT(ADDR(.pci_fixup) - LOAD_OFFSET) { \ 226 .pci_fixup : AT(ADDR(.pci_fixup) - LOAD_OFFSET) { \
225 VMLINUX_SYMBOL(__start_pci_fixups_early) = .; \ 227 VMLINUX_SYMBOL(__start_pci_fixups_early) = .; \
@@ -563,6 +565,14 @@
563#define BUG_TABLE 565#define BUG_TABLE
564#endif 566#endif
565 567
568#define JUMP_TABLE \
569 . = ALIGN(8); \
570 __jump_table : AT(ADDR(__jump_table) - LOAD_OFFSET) { \
571 VMLINUX_SYMBOL(__start___jump_table) = .; \
572 *(__jump_table) \
573 VMLINUX_SYMBOL(__stop___jump_table) = .; \
574 }
575
566#ifdef CONFIG_PM_TRACE 576#ifdef CONFIG_PM_TRACE
567#define TRACEDATA \ 577#define TRACEDATA \
568 . = ALIGN(4); \ 578 . = ALIGN(4); \
diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index 52c0da4bdd18..bef3cda44c4c 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -1,6 +1,8 @@
1#ifndef _DYNAMIC_DEBUG_H 1#ifndef _DYNAMIC_DEBUG_H
2#define _DYNAMIC_DEBUG_H 2#define _DYNAMIC_DEBUG_H
3 3
4#include <linux/jump_label.h>
5
4/* dynamic_printk_enabled, and dynamic_printk_enabled2 are bitmasks in which 6/* dynamic_printk_enabled, and dynamic_printk_enabled2 are bitmasks in which
5 * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They 7 * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They
6 * use independent hash functions, to reduce the chance of false positives. 8 * use independent hash functions, to reduce the chance of false positives.
@@ -22,8 +24,6 @@ struct _ddebug {
22 const char *function; 24 const char *function;
23 const char *filename; 25 const char *filename;
24 const char *format; 26 const char *format;
25 char primary_hash;
26 char secondary_hash;
27 unsigned int lineno:24; 27 unsigned int lineno:24;
28 /* 28 /*
29 * The flags field controls the behaviour at the callsite. 29 * The flags field controls the behaviour at the callsite.
@@ -33,6 +33,7 @@ struct _ddebug {
33#define _DPRINTK_FLAGS_PRINT (1<<0) /* printk() a message using the format */ 33#define _DPRINTK_FLAGS_PRINT (1<<0) /* printk() a message using the format */
34#define _DPRINTK_FLAGS_DEFAULT 0 34#define _DPRINTK_FLAGS_DEFAULT 0
35 unsigned int flags:8; 35 unsigned int flags:8;
36 char enabled;
36} __attribute__((aligned(8))); 37} __attribute__((aligned(8)));
37 38
38 39
@@ -42,33 +43,35 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n,
42#if defined(CONFIG_DYNAMIC_DEBUG) 43#if defined(CONFIG_DYNAMIC_DEBUG)
43extern int ddebug_remove_module(const char *mod_name); 44extern int ddebug_remove_module(const char *mod_name);
44 45
45#define __dynamic_dbg_enabled(dd) ({ \
46 int __ret = 0; \
47 if (unlikely((dynamic_debug_enabled & (1LL << DEBUG_HASH)) && \
48 (dynamic_debug_enabled2 & (1LL << DEBUG_HASH2)))) \
49 if (unlikely(dd.flags)) \
50 __ret = 1; \
51 __ret; })
52
53#define dynamic_pr_debug(fmt, ...) do { \ 46#define dynamic_pr_debug(fmt, ...) do { \
47 __label__ do_printk; \
48 __label__ out; \
54 static struct _ddebug descriptor \ 49 static struct _ddebug descriptor \
55 __used \ 50 __used \
56 __attribute__((section("__verbose"), aligned(8))) = \ 51 __attribute__((section("__verbose"), aligned(8))) = \
57 { KBUILD_MODNAME, __func__, __FILE__, fmt, DEBUG_HASH, \ 52 { KBUILD_MODNAME, __func__, __FILE__, fmt, __LINE__, \
58 DEBUG_HASH2, __LINE__, _DPRINTK_FLAGS_DEFAULT }; \ 53 _DPRINTK_FLAGS_DEFAULT }; \
59 if (__dynamic_dbg_enabled(descriptor)) \ 54 JUMP_LABEL(&descriptor.enabled, do_printk); \
60 printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \ 55 goto out; \
56do_printk: \
57 printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \
58out: ; \
61 } while (0) 59 } while (0)
62 60
63 61
64#define dynamic_dev_dbg(dev, fmt, ...) do { \ 62#define dynamic_dev_dbg(dev, fmt, ...) do { \
63 __label__ do_printk; \
64 __label__ out; \
65 static struct _ddebug descriptor \ 65 static struct _ddebug descriptor \
66 __used \ 66 __used \
67 __attribute__((section("__verbose"), aligned(8))) = \ 67 __attribute__((section("__verbose"), aligned(8))) = \
68 { KBUILD_MODNAME, __func__, __FILE__, fmt, DEBUG_HASH, \ 68 { KBUILD_MODNAME, __func__, __FILE__, fmt, __LINE__, \
69 DEBUG_HASH2, __LINE__, _DPRINTK_FLAGS_DEFAULT }; \ 69 _DPRINTK_FLAGS_DEFAULT }; \
70 if (__dynamic_dbg_enabled(descriptor)) \ 70 JUMP_LABEL(&descriptor.enabled, do_printk); \
71 dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); \ 71 goto out; \
72do_printk: \
73 dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); \
74out: ; \
72 } while (0) 75 } while (0)
73 76
74#else 77#else
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 02b8b24f8f51..8beabb958f61 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -191,8 +191,8 @@ struct ftrace_event_call {
191 unsigned int flags; 191 unsigned int flags;
192 192
193#ifdef CONFIG_PERF_EVENTS 193#ifdef CONFIG_PERF_EVENTS
194 int perf_refcount; 194 int perf_refcount;
195 struct hlist_head *perf_events; 195 struct hlist_head __percpu *perf_events;
196#endif 196#endif
197}; 197};
198 198
@@ -252,8 +252,8 @@ DECLARE_PER_CPU(struct pt_regs, perf_trace_regs);
252 252
253extern int perf_trace_init(struct perf_event *event); 253extern int perf_trace_init(struct perf_event *event);
254extern void perf_trace_destroy(struct perf_event *event); 254extern void perf_trace_destroy(struct perf_event *event);
255extern int perf_trace_enable(struct perf_event *event); 255extern int perf_trace_add(struct perf_event *event, int flags);
256extern void perf_trace_disable(struct perf_event *event); 256extern void perf_trace_del(struct perf_event *event, int flags);
257extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, 257extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
258 char *filter_str); 258 char *filter_str);
259extern void ftrace_profile_free_filter(struct perf_event *event); 259extern void ftrace_profile_free_filter(struct perf_event *event);
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index a0384a4d1e6f..531495db1708 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -18,6 +18,7 @@
18#include <asm/atomic.h> 18#include <asm/atomic.h>
19#include <asm/ptrace.h> 19#include <asm/ptrace.h>
20#include <asm/system.h> 20#include <asm/system.h>
21#include <trace/events/irq.h>
21 22
22/* 23/*
23 * These correspond to the IORESOURCE_IRQ_* defines in 24 * These correspond to the IORESOURCE_IRQ_* defines in
@@ -407,7 +408,12 @@ asmlinkage void do_softirq(void);
407asmlinkage void __do_softirq(void); 408asmlinkage void __do_softirq(void);
408extern void open_softirq(int nr, void (*action)(struct softirq_action *)); 409extern void open_softirq(int nr, void (*action)(struct softirq_action *));
409extern void softirq_init(void); 410extern void softirq_init(void);
410#define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0) 411static inline void __raise_softirq_irqoff(unsigned int nr)
412{
413 trace_softirq_raise((struct softirq_action *)(unsigned long)nr, NULL);
414 or_softirq_pending(1UL << nr);
415}
416
411extern void raise_softirq_irqoff(unsigned int nr); 417extern void raise_softirq_irqoff(unsigned int nr);
412extern void raise_softirq(unsigned int nr); 418extern void raise_softirq(unsigned int nr);
413extern void wakeup_softirqd(void); 419extern void wakeup_softirqd(void);
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
new file mode 100644
index 000000000000..b72cd9f92c2e
--- /dev/null
+++ b/include/linux/jump_label.h
@@ -0,0 +1,64 @@
1#ifndef _LINUX_JUMP_LABEL_H
2#define _LINUX_JUMP_LABEL_H
3
4#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_HAVE_ARCH_JUMP_LABEL)
5# include <asm/jump_label.h>
6# define HAVE_JUMP_LABEL
7#endif
8
9enum jump_label_type {
10 JUMP_LABEL_ENABLE,
11 JUMP_LABEL_DISABLE
12};
13
14struct module;
15
16#ifdef HAVE_JUMP_LABEL
17
18extern struct jump_entry __start___jump_table[];
19extern struct jump_entry __stop___jump_table[];
20
21extern void arch_jump_label_transform(struct jump_entry *entry,
22 enum jump_label_type type);
23extern void arch_jump_label_text_poke_early(jump_label_t addr);
24extern void jump_label_update(unsigned long key, enum jump_label_type type);
25extern void jump_label_apply_nops(struct module *mod);
26extern int jump_label_text_reserved(void *start, void *end);
27
28#define enable_jump_label(key) \
29 jump_label_update((unsigned long)key, JUMP_LABEL_ENABLE);
30
31#define disable_jump_label(key) \
32 jump_label_update((unsigned long)key, JUMP_LABEL_DISABLE);
33
34#else
35
36#define JUMP_LABEL(key, label) \
37do { \
38 if (unlikely(*key)) \
39 goto label; \
40} while (0)
41
42#define enable_jump_label(cond_var) \
43do { \
44 *(cond_var) = 1; \
45} while (0)
46
47#define disable_jump_label(cond_var) \
48do { \
49 *(cond_var) = 0; \
50} while (0)
51
52static inline int jump_label_apply_nops(struct module *mod)
53{
54 return 0;
55}
56
57static inline int jump_label_text_reserved(void *start, void *end)
58{
59 return 0;
60}
61
62#endif
63
64#endif
diff --git a/include/linux/module.h b/include/linux/module.h
index aace066bad8f..b29e7458b966 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -350,7 +350,10 @@ struct module
350 struct tracepoint *tracepoints; 350 struct tracepoint *tracepoints;
351 unsigned int num_tracepoints; 351 unsigned int num_tracepoints;
352#endif 352#endif
353 353#ifdef HAVE_JUMP_LABEL
354 struct jump_entry *jump_entries;
355 unsigned int num_jump_entries;
356#endif
354#ifdef CONFIG_TRACING 357#ifdef CONFIG_TRACING
355 const char **trace_bprintk_fmt_start; 358 const char **trace_bprintk_fmt_start;
356 unsigned int num_trace_bprintk_fmt; 359 unsigned int num_trace_bprintk_fmt;
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 49466b13c5c6..0eb50832aa00 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -39,6 +39,15 @@
39 preempt_enable(); \ 39 preempt_enable(); \
40} while (0) 40} while (0)
41 41
42#define get_cpu_ptr(var) ({ \
43 preempt_disable(); \
44 this_cpu_ptr(var); })
45
46#define put_cpu_ptr(var) do { \
47 (void)(var); \
48 preempt_enable(); \
49} while (0)
50
42#ifdef CONFIG_SMP 51#ifdef CONFIG_SMP
43 52
44/* minimum unit size, also is the maximum supported allocation size */ 53/* minimum unit size, also is the maximum supported allocation size */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 716f99b682c1..61b1e2d760fd 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -529,7 +529,6 @@ struct hw_perf_event {
529 int last_cpu; 529 int last_cpu;
530 }; 530 };
531 struct { /* software */ 531 struct { /* software */
532 s64 remaining;
533 struct hrtimer hrtimer; 532 struct hrtimer hrtimer;
534 }; 533 };
535#ifdef CONFIG_HAVE_HW_BREAKPOINT 534#ifdef CONFIG_HAVE_HW_BREAKPOINT
@@ -539,6 +538,7 @@ struct hw_perf_event {
539 }; 538 };
540#endif 539#endif
541 }; 540 };
541 int state;
542 local64_t prev_count; 542 local64_t prev_count;
543 u64 sample_period; 543 u64 sample_period;
544 u64 last_period; 544 u64 last_period;
@@ -550,6 +550,13 @@ struct hw_perf_event {
550#endif 550#endif
551}; 551};
552 552
553/*
554 * hw_perf_event::state flags
555 */
556#define PERF_HES_STOPPED 0x01 /* the counter is stopped */
557#define PERF_HES_UPTODATE 0x02 /* event->count up-to-date */
558#define PERF_HES_ARCH 0x04
559
553struct perf_event; 560struct perf_event;
554 561
555/* 562/*
@@ -561,36 +568,70 @@ struct perf_event;
561 * struct pmu - generic performance monitoring unit 568 * struct pmu - generic performance monitoring unit
562 */ 569 */
563struct pmu { 570struct pmu {
564 int (*enable) (struct perf_event *event); 571 struct list_head entry;
565 void (*disable) (struct perf_event *event); 572
566 int (*start) (struct perf_event *event); 573 int * __percpu pmu_disable_count;
567 void (*stop) (struct perf_event *event); 574 struct perf_cpu_context * __percpu pmu_cpu_context;
568 void (*read) (struct perf_event *event); 575 int task_ctx_nr;
569 void (*unthrottle) (struct perf_event *event);
570 576
571 /* 577 /*
572 * Group events scheduling is treated as a transaction, add group 578 * Fully disable/enable this PMU, can be used to protect from the PMI
573 * events as a whole and perform one schedulability test. If the test 579 * as well as for lazy/batch writing of the MSRs.
574 * fails, roll back the whole group
575 */ 580 */
581 void (*pmu_enable) (struct pmu *pmu); /* optional */
582 void (*pmu_disable) (struct pmu *pmu); /* optional */
576 583
577 /* 584 /*
578 * Start the transaction, after this ->enable() doesn't need 585 * Try and initialize the event for this PMU.
579 * to do schedulability tests. 586 * Should return -ENOENT when the @event doesn't match this PMU.
580 */ 587 */
581 void (*start_txn) (const struct pmu *pmu); 588 int (*event_init) (struct perf_event *event);
589
590#define PERF_EF_START 0x01 /* start the counter when adding */
591#define PERF_EF_RELOAD 0x02 /* reload the counter when starting */
592#define PERF_EF_UPDATE 0x04 /* update the counter when stopping */
593
582 /* 594 /*
583 * If ->start_txn() disabled the ->enable() schedulability test 595 * Adds/Removes a counter to/from the PMU, can be done inside
596 * a transaction, see the ->*_txn() methods.
597 */
598 int (*add) (struct perf_event *event, int flags);
599 void (*del) (struct perf_event *event, int flags);
600
601 /*
602 * Starts/Stops a counter present on the PMU. The PMI handler
603 * should stop the counter when perf_event_overflow() returns
604 * !0. ->start() will be used to continue.
605 */
606 void (*start) (struct perf_event *event, int flags);
607 void (*stop) (struct perf_event *event, int flags);
608
609 /*
610 * Updates the counter value of the event.
611 */
612 void (*read) (struct perf_event *event);
613
614 /*
615 * Group events scheduling is treated as a transaction, add
616 * group events as a whole and perform one schedulability test.
617 * If the test fails, roll back the whole group
618 *
619 * Start the transaction, after this ->add() doesn't need to
620 * do schedulability tests.
621 */
622 void (*start_txn) (struct pmu *pmu); /* optional */
623 /*
624 * If ->start_txn() disabled the ->add() schedulability test
584 * then ->commit_txn() is required to perform one. On success 625 * then ->commit_txn() is required to perform one. On success
585 * the transaction is closed. On error the transaction is kept 626 * the transaction is closed. On error the transaction is kept
586 * open until ->cancel_txn() is called. 627 * open until ->cancel_txn() is called.
587 */ 628 */
588 int (*commit_txn) (const struct pmu *pmu); 629 int (*commit_txn) (struct pmu *pmu); /* optional */
589 /* 630 /*
590 * Will cancel the transaction, assumes ->disable() is called for 631 * Will cancel the transaction, assumes ->del() is called
591 * each successfull ->enable() during the transaction. 632 * for each successfull ->add() during the transaction.
592 */ 633 */
593 void (*cancel_txn) (const struct pmu *pmu); 634 void (*cancel_txn) (struct pmu *pmu); /* optional */
594}; 635};
595 636
596/** 637/**
@@ -669,7 +710,7 @@ struct perf_event {
669 int nr_siblings; 710 int nr_siblings;
670 int group_flags; 711 int group_flags;
671 struct perf_event *group_leader; 712 struct perf_event *group_leader;
672 const struct pmu *pmu; 713 struct pmu *pmu;
673 714
674 enum perf_event_active_state state; 715 enum perf_event_active_state state;
675 unsigned int attach_state; 716 unsigned int attach_state;
@@ -763,12 +804,19 @@ struct perf_event {
763#endif /* CONFIG_PERF_EVENTS */ 804#endif /* CONFIG_PERF_EVENTS */
764}; 805};
765 806
807enum perf_event_context_type {
808 task_context,
809 cpu_context,
810};
811
766/** 812/**
767 * struct perf_event_context - event context structure 813 * struct perf_event_context - event context structure
768 * 814 *
769 * Used as a container for task events and CPU events as well: 815 * Used as a container for task events and CPU events as well:
770 */ 816 */
771struct perf_event_context { 817struct perf_event_context {
818 enum perf_event_context_type type;
819 struct pmu *pmu;
772 /* 820 /*
773 * Protect the states of the events in the list, 821 * Protect the states of the events in the list,
774 * nr_active, and the list: 822 * nr_active, and the list:
@@ -808,6 +856,12 @@ struct perf_event_context {
808 struct rcu_head rcu_head; 856 struct rcu_head rcu_head;
809}; 857};
810 858
859/*
860 * Number of contexts where an event can trigger:
861 * task, softirq, hardirq, nmi.
862 */
863#define PERF_NR_CONTEXTS 4
864
811/** 865/**
812 * struct perf_event_cpu_context - per cpu event context structure 866 * struct perf_event_cpu_context - per cpu event context structure
813 */ 867 */
@@ -815,18 +869,9 @@ struct perf_cpu_context {
815 struct perf_event_context ctx; 869 struct perf_event_context ctx;
816 struct perf_event_context *task_ctx; 870 struct perf_event_context *task_ctx;
817 int active_oncpu; 871 int active_oncpu;
818 int max_pertask;
819 int exclusive; 872 int exclusive;
820 struct swevent_hlist *swevent_hlist; 873 struct list_head rotation_list;
821 struct mutex hlist_mutex; 874 int jiffies_interval;
822 int hlist_refcount;
823
824 /*
825 * Recursion avoidance:
826 *
827 * task, softirq, irq, nmi context
828 */
829 int recursion[4];
830}; 875};
831 876
832struct perf_output_handle { 877struct perf_output_handle {
@@ -842,26 +887,20 @@ struct perf_output_handle {
842 887
843#ifdef CONFIG_PERF_EVENTS 888#ifdef CONFIG_PERF_EVENTS
844 889
845/* 890extern int perf_pmu_register(struct pmu *pmu);
846 * Set by architecture code: 891extern void perf_pmu_unregister(struct pmu *pmu);
847 */
848extern int perf_max_events;
849
850extern const struct pmu *hw_perf_event_init(struct perf_event *event);
851 892
852extern void perf_event_task_sched_in(struct task_struct *task); 893extern void perf_event_task_sched_in(struct task_struct *task);
853extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next); 894extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next);
854extern void perf_event_task_tick(struct task_struct *task);
855extern int perf_event_init_task(struct task_struct *child); 895extern int perf_event_init_task(struct task_struct *child);
856extern void perf_event_exit_task(struct task_struct *child); 896extern void perf_event_exit_task(struct task_struct *child);
857extern void perf_event_free_task(struct task_struct *task); 897extern void perf_event_free_task(struct task_struct *task);
898extern void perf_event_delayed_put(struct task_struct *task);
858extern void set_perf_event_pending(void); 899extern void set_perf_event_pending(void);
859extern void perf_event_do_pending(void); 900extern void perf_event_do_pending(void);
860extern void perf_event_print_debug(void); 901extern void perf_event_print_debug(void);
861extern void __perf_disable(void); 902extern void perf_pmu_disable(struct pmu *pmu);
862extern bool __perf_enable(void); 903extern void perf_pmu_enable(struct pmu *pmu);
863extern void perf_disable(void);
864extern void perf_enable(void);
865extern int perf_event_task_disable(void); 904extern int perf_event_task_disable(void);
866extern int perf_event_task_enable(void); 905extern int perf_event_task_enable(void);
867extern void perf_event_update_userpage(struct perf_event *event); 906extern void perf_event_update_userpage(struct perf_event *event);
@@ -869,7 +908,7 @@ extern int perf_event_release_kernel(struct perf_event *event);
869extern struct perf_event * 908extern struct perf_event *
870perf_event_create_kernel_counter(struct perf_event_attr *attr, 909perf_event_create_kernel_counter(struct perf_event_attr *attr,
871 int cpu, 910 int cpu,
872 pid_t pid, 911 struct task_struct *task,
873 perf_overflow_handler_t callback); 912 perf_overflow_handler_t callback);
874extern u64 perf_event_read_value(struct perf_event *event, 913extern u64 perf_event_read_value(struct perf_event *event,
875 u64 *enabled, u64 *running); 914 u64 *enabled, u64 *running);
@@ -920,14 +959,7 @@ extern int perf_event_overflow(struct perf_event *event, int nmi,
920 */ 959 */
921static inline int is_software_event(struct perf_event *event) 960static inline int is_software_event(struct perf_event *event)
922{ 961{
923 switch (event->attr.type) { 962 return event->pmu->task_ctx_nr == perf_sw_context;
924 case PERF_TYPE_SOFTWARE:
925 case PERF_TYPE_TRACEPOINT:
926 /* for now the breakpoint stuff also works as software event */
927 case PERF_TYPE_BREAKPOINT:
928 return 1;
929 }
930 return 0;
931} 963}
932 964
933extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; 965extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
@@ -976,7 +1008,21 @@ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks
976extern void perf_event_comm(struct task_struct *tsk); 1008extern void perf_event_comm(struct task_struct *tsk);
977extern void perf_event_fork(struct task_struct *tsk); 1009extern void perf_event_fork(struct task_struct *tsk);
978 1010
979extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); 1011/* Callchains */
1012DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
1013
1014extern void perf_callchain_user(struct perf_callchain_entry *entry,
1015 struct pt_regs *regs);
1016extern void perf_callchain_kernel(struct perf_callchain_entry *entry,
1017 struct pt_regs *regs);
1018
1019
1020static inline void
1021perf_callchain_store(struct perf_callchain_entry *entry, u64 ip)
1022{
1023 if (entry->nr < PERF_MAX_STACK_DEPTH)
1024 entry->ip[entry->nr++] = ip;
1025}
980 1026
981extern int sysctl_perf_event_paranoid; 1027extern int sysctl_perf_event_paranoid;
982extern int sysctl_perf_event_mlock; 1028extern int sysctl_perf_event_mlock;
@@ -1019,21 +1065,19 @@ extern int perf_swevent_get_recursion_context(void);
1019extern void perf_swevent_put_recursion_context(int rctx); 1065extern void perf_swevent_put_recursion_context(int rctx);
1020extern void perf_event_enable(struct perf_event *event); 1066extern void perf_event_enable(struct perf_event *event);
1021extern void perf_event_disable(struct perf_event *event); 1067extern void perf_event_disable(struct perf_event *event);
1068extern void perf_event_task_tick(void);
1022#else 1069#else
1023static inline void 1070static inline void
1024perf_event_task_sched_in(struct task_struct *task) { } 1071perf_event_task_sched_in(struct task_struct *task) { }
1025static inline void 1072static inline void
1026perf_event_task_sched_out(struct task_struct *task, 1073perf_event_task_sched_out(struct task_struct *task,
1027 struct task_struct *next) { } 1074 struct task_struct *next) { }
1028static inline void
1029perf_event_task_tick(struct task_struct *task) { }
1030static inline int perf_event_init_task(struct task_struct *child) { return 0; } 1075static inline int perf_event_init_task(struct task_struct *child) { return 0; }
1031static inline void perf_event_exit_task(struct task_struct *child) { } 1076static inline void perf_event_exit_task(struct task_struct *child) { }
1032static inline void perf_event_free_task(struct task_struct *task) { } 1077static inline void perf_event_free_task(struct task_struct *task) { }
1078static inline void perf_event_delayed_put(struct task_struct *task) { }
1033static inline void perf_event_do_pending(void) { } 1079static inline void perf_event_do_pending(void) { }
1034static inline void perf_event_print_debug(void) { } 1080static inline void perf_event_print_debug(void) { }
1035static inline void perf_disable(void) { }
1036static inline void perf_enable(void) { }
1037static inline int perf_event_task_disable(void) { return -EINVAL; } 1081static inline int perf_event_task_disable(void) { return -EINVAL; }
1038static inline int perf_event_task_enable(void) { return -EINVAL; } 1082static inline int perf_event_task_enable(void) { return -EINVAL; }
1039 1083
@@ -1056,6 +1100,7 @@ static inline int perf_swevent_get_recursion_context(void) { return -1; }
1056static inline void perf_swevent_put_recursion_context(int rctx) { } 1100static inline void perf_swevent_put_recursion_context(int rctx) { }
1057static inline void perf_event_enable(struct perf_event *event) { } 1101static inline void perf_event_enable(struct perf_event *event) { }
1058static inline void perf_event_disable(struct perf_event *event) { } 1102static inline void perf_event_disable(struct perf_event *event) { }
1103static inline void perf_event_task_tick(void) { }
1059#endif 1104#endif
1060 1105
1061#define perf_output_put(handle, x) \ 1106#define perf_output_put(handle, x) \
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1e2a6db2d7dd..eb3c1ceec06e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1160,6 +1160,13 @@ struct sched_rt_entity {
1160 1160
1161struct rcu_node; 1161struct rcu_node;
1162 1162
1163enum perf_event_task_context {
1164 perf_invalid_context = -1,
1165 perf_hw_context = 0,
1166 perf_sw_context,
1167 perf_nr_task_contexts,
1168};
1169
1163struct task_struct { 1170struct task_struct {
1164 volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ 1171 volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
1165 void *stack; 1172 void *stack;
@@ -1431,7 +1438,7 @@ struct task_struct {
1431 struct futex_pi_state *pi_state_cache; 1438 struct futex_pi_state *pi_state_cache;
1432#endif 1439#endif
1433#ifdef CONFIG_PERF_EVENTS 1440#ifdef CONFIG_PERF_EVENTS
1434 struct perf_event_context *perf_event_ctxp; 1441 struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
1435 struct mutex perf_event_mutex; 1442 struct mutex perf_event_mutex;
1436 struct list_head perf_event_list; 1443 struct list_head perf_event_list;
1437#endif 1444#endif
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 103d1b61aacb..a4a90b6726ce 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -17,6 +17,7 @@
17#include <linux/errno.h> 17#include <linux/errno.h>
18#include <linux/types.h> 18#include <linux/types.h>
19#include <linux/rcupdate.h> 19#include <linux/rcupdate.h>
20#include <linux/jump_label.h>
20 21
21struct module; 22struct module;
22struct tracepoint; 23struct tracepoint;
@@ -145,7 +146,9 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin,
145 extern struct tracepoint __tracepoint_##name; \ 146 extern struct tracepoint __tracepoint_##name; \
146 static inline void trace_##name(proto) \ 147 static inline void trace_##name(proto) \
147 { \ 148 { \
148 if (unlikely(__tracepoint_##name.state)) \ 149 JUMP_LABEL(&__tracepoint_##name.state, do_trace); \
150 return; \
151do_trace: \
149 __DO_TRACE(&__tracepoint_##name, \ 152 __DO_TRACE(&__tracepoint_##name, \
150 TP_PROTO(data_proto), \ 153 TP_PROTO(data_proto), \
151 TP_ARGS(data_args)); \ 154 TP_ARGS(data_args)); \
diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
index 0e4cfb694fe7..6fa7cbab7d93 100644
--- a/include/trace/events/irq.h
+++ b/include/trace/events/irq.h
@@ -5,7 +5,9 @@
5#define _TRACE_IRQ_H 5#define _TRACE_IRQ_H
6 6
7#include <linux/tracepoint.h> 7#include <linux/tracepoint.h>
8#include <linux/interrupt.h> 8
9struct irqaction;
10struct softirq_action;
9 11
10#define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq } 12#define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq }
11#define show_softirq_name(val) \ 13#define show_softirq_name(val) \
@@ -93,7 +95,10 @@ DECLARE_EVENT_CLASS(softirq,
93 ), 95 ),
94 96
95 TP_fast_assign( 97 TP_fast_assign(
96 __entry->vec = (int)(h - vec); 98 if (vec)
99 __entry->vec = (int)(h - vec);
100 else
101 __entry->vec = (int)(long)h;
97 ), 102 ),
98 103
99 TP_printk("vec=%d [action=%s]", __entry->vec, 104 TP_printk("vec=%d [action=%s]", __entry->vec,
@@ -136,6 +141,23 @@ DEFINE_EVENT(softirq, softirq_exit,
136 TP_ARGS(h, vec) 141 TP_ARGS(h, vec)
137); 142);
138 143
144/**
145 * softirq_raise - called immediately when a softirq is raised
146 * @h: pointer to struct softirq_action
147 * @vec: pointer to first struct softirq_action in softirq_vec array
148 *
149 * The @h parameter contains a pointer to the softirq vector number which is
150 * raised. @vec is NULL and it means @h includes vector number not
151 * softirq_action. When used in combination with the softirq_entry tracepoint
152 * we can determine the softirq raise latency.
153 */
154DEFINE_EVENT(softirq, softirq_raise,
155
156 TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
157
158 TP_ARGS(h, vec)
159);
160
139#endif /* _TRACE_IRQ_H */ 161#endif /* _TRACE_IRQ_H */
140 162
141/* This part must be outside protection */ 163/* This part must be outside protection */
diff --git a/include/trace/events/napi.h b/include/trace/events/napi.h
index 188deca2f3c7..8fe1e93f531d 100644
--- a/include/trace/events/napi.h
+++ b/include/trace/events/napi.h
@@ -6,10 +6,31 @@
6 6
7#include <linux/netdevice.h> 7#include <linux/netdevice.h>
8#include <linux/tracepoint.h> 8#include <linux/tracepoint.h>
9#include <linux/ftrace.h>
10
11#define NO_DEV "(no_device)"
12
13TRACE_EVENT(napi_poll,
9 14
10DECLARE_TRACE(napi_poll,
11 TP_PROTO(struct napi_struct *napi), 15 TP_PROTO(struct napi_struct *napi),
12 TP_ARGS(napi)); 16
17 TP_ARGS(napi),
18
19 TP_STRUCT__entry(
20 __field( struct napi_struct *, napi)
21 __string( dev_name, napi->dev ? napi->dev->name : NO_DEV)
22 ),
23
24 TP_fast_assign(
25 __entry->napi = napi;
26 __assign_str(dev_name, napi->dev ? napi->dev->name : NO_DEV);
27 ),
28
29 TP_printk("napi poll on napi struct %p for device %s",
30 __entry->napi, __get_str(dev_name))
31);
32
33#undef NO_DEV
13 34
14#endif /* _TRACE_NAPI_H_ */ 35#endif /* _TRACE_NAPI_H_ */
15 36
diff --git a/include/trace/events/net.h b/include/trace/events/net.h
new file mode 100644
index 000000000000..5f247f5ffc56
--- /dev/null
+++ b/include/trace/events/net.h
@@ -0,0 +1,82 @@
1#undef TRACE_SYSTEM
2#define TRACE_SYSTEM net
3
4#if !defined(_TRACE_NET_H) || defined(TRACE_HEADER_MULTI_READ)
5#define _TRACE_NET_H
6
7#include <linux/skbuff.h>
8#include <linux/netdevice.h>
9#include <linux/ip.h>
10#include <linux/tracepoint.h>
11
12TRACE_EVENT(net_dev_xmit,
13
14 TP_PROTO(struct sk_buff *skb,
15 int rc),
16
17 TP_ARGS(skb, rc),
18
19 TP_STRUCT__entry(
20 __field( void *, skbaddr )
21 __field( unsigned int, len )
22 __field( int, rc )
23 __string( name, skb->dev->name )
24 ),
25
26 TP_fast_assign(
27 __entry->skbaddr = skb;
28 __entry->len = skb->len;
29 __entry->rc = rc;
30 __assign_str(name, skb->dev->name);
31 ),
32
33 TP_printk("dev=%s skbaddr=%p len=%u rc=%d",
34 __get_str(name), __entry->skbaddr, __entry->len, __entry->rc)
35);
36
37DECLARE_EVENT_CLASS(net_dev_template,
38
39 TP_PROTO(struct sk_buff *skb),
40
41 TP_ARGS(skb),
42
43 TP_STRUCT__entry(
44 __field( void *, skbaddr )
45 __field( unsigned int, len )
46 __string( name, skb->dev->name )
47 ),
48
49 TP_fast_assign(
50 __entry->skbaddr = skb;
51 __entry->len = skb->len;
52 __assign_str(name, skb->dev->name);
53 ),
54
55 TP_printk("dev=%s skbaddr=%p len=%u",
56 __get_str(name), __entry->skbaddr, __entry->len)
57)
58
59DEFINE_EVENT(net_dev_template, net_dev_queue,
60
61 TP_PROTO(struct sk_buff *skb),
62
63 TP_ARGS(skb)
64);
65
66DEFINE_EVENT(net_dev_template, netif_receive_skb,
67
68 TP_PROTO(struct sk_buff *skb),
69
70 TP_ARGS(skb)
71);
72
73DEFINE_EVENT(net_dev_template, netif_rx,
74
75 TP_PROTO(struct sk_buff *skb),
76
77 TP_ARGS(skb)
78);
79#endif /* _TRACE_NET_H */
80
81/* This part must be outside protection */
82#include <trace/define_trace.h>
diff --git a/include/trace/events/power.h b/include/trace/events/power.h
index 35a2a6e7bf1e..286784d69b8f 100644
--- a/include/trace/events/power.h
+++ b/include/trace/events/power.h
@@ -10,12 +10,17 @@
10#ifndef _TRACE_POWER_ENUM_ 10#ifndef _TRACE_POWER_ENUM_
11#define _TRACE_POWER_ENUM_ 11#define _TRACE_POWER_ENUM_
12enum { 12enum {
13 POWER_NONE = 0, 13 POWER_NONE = 0,
14 POWER_CSTATE = 1, 14 POWER_CSTATE = 1, /* C-State */
15 POWER_PSTATE = 2, 15 POWER_PSTATE = 2, /* Fequency change or DVFS */
16 POWER_SSTATE = 3, /* Suspend */
16}; 17};
17#endif 18#endif
18 19
20/*
21 * The power events are used for cpuidle & suspend (power_start, power_end)
22 * and for cpufreq (power_frequency)
23 */
19DECLARE_EVENT_CLASS(power, 24DECLARE_EVENT_CLASS(power,
20 25
21 TP_PROTO(unsigned int type, unsigned int state, unsigned int cpu_id), 26 TP_PROTO(unsigned int type, unsigned int state, unsigned int cpu_id),
@@ -70,6 +75,85 @@ TRACE_EVENT(power_end,
70 75
71); 76);
72 77
78/*
79 * The clock events are used for clock enable/disable and for
80 * clock rate change
81 */
82DECLARE_EVENT_CLASS(clock,
83
84 TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),
85
86 TP_ARGS(name, state, cpu_id),
87
88 TP_STRUCT__entry(
89 __string( name, name )
90 __field( u64, state )
91 __field( u64, cpu_id )
92 ),
93
94 TP_fast_assign(
95 __assign_str(name, name);
96 __entry->state = state;
97 __entry->cpu_id = cpu_id;
98 ),
99
100 TP_printk("%s state=%lu cpu_id=%lu", __get_str(name),
101 (unsigned long)__entry->state, (unsigned long)__entry->cpu_id)
102);
103
104DEFINE_EVENT(clock, clock_enable,
105
106 TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),
107
108 TP_ARGS(name, state, cpu_id)
109);
110
111DEFINE_EVENT(clock, clock_disable,
112
113 TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),
114
115 TP_ARGS(name, state, cpu_id)
116);
117
118DEFINE_EVENT(clock, clock_set_rate,
119
120 TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),
121
122 TP_ARGS(name, state, cpu_id)
123);
124
125/*
126 * The power domain events are used for power domains transitions
127 */
128DECLARE_EVENT_CLASS(power_domain,
129
130 TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),
131
132 TP_ARGS(name, state, cpu_id),
133
134 TP_STRUCT__entry(
135 __string( name, name )
136 __field( u64, state )
137 __field( u64, cpu_id )
138 ),
139
140 TP_fast_assign(
141 __assign_str(name, name);
142 __entry->state = state;
143 __entry->cpu_id = cpu_id;
144),
145
146 TP_printk("%s state=%lu cpu_id=%lu", __get_str(name),
147 (unsigned long)__entry->state, (unsigned long)__entry->cpu_id)
148);
149
150DEFINE_EVENT(power_domain, power_domain_target,
151
152 TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),
153
154 TP_ARGS(name, state, cpu_id)
155);
156
73#endif /* _TRACE_POWER_H */ 157#endif /* _TRACE_POWER_H */
74 158
75/* This part must be outside protection */ 159/* This part must be outside protection */
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index 4b2be6dc76f0..75ce9d500d8e 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -35,6 +35,23 @@ TRACE_EVENT(kfree_skb,
35 __entry->skbaddr, __entry->protocol, __entry->location) 35 __entry->skbaddr, __entry->protocol, __entry->location)
36); 36);
37 37
38TRACE_EVENT(consume_skb,
39
40 TP_PROTO(struct sk_buff *skb),
41
42 TP_ARGS(skb),
43
44 TP_STRUCT__entry(
45 __field( void *, skbaddr )
46 ),
47
48 TP_fast_assign(
49 __entry->skbaddr = skb;
50 ),
51
52 TP_printk("skbaddr=%p", __entry->skbaddr)
53);
54
38TRACE_EVENT(skb_copy_datagram_iovec, 55TRACE_EVENT(skb_copy_datagram_iovec,
39 56
40 TP_PROTO(const struct sk_buff *skb, int len), 57 TP_PROTO(const struct sk_buff *skb, int len),
diff --git a/kernel/Makefile b/kernel/Makefile
index 0b72d1a74be0..d52b473c99a1 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -10,7 +10,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \
10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ 10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
11 hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ 11 hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
12 notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \ 12 notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \
13 async.o range.o 13 async.o range.o jump_label.o
14obj-$(CONFIG_HAVE_EARLY_RES) += early_res.o 14obj-$(CONFIG_HAVE_EARLY_RES) += early_res.o
15obj-y += groups.o 15obj-y += groups.o
16 16
diff --git a/kernel/exit.c b/kernel/exit.c
index 03120229db28..e2bdf37f9fde 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -149,9 +149,7 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
149{ 149{
150 struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); 150 struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
151 151
152#ifdef CONFIG_PERF_EVENTS 152 perf_event_delayed_put(tsk);
153 WARN_ON_ONCE(tsk->perf_event_ctxp);
154#endif
155 trace_sched_process_free(tsk); 153 trace_sched_process_free(tsk);
156 put_task_struct(tsk); 154 put_task_struct(tsk);
157} 155}
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index c7c2aed9e2dc..3b714e839c10 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -433,8 +433,7 @@ register_user_hw_breakpoint(struct perf_event_attr *attr,
433 perf_overflow_handler_t triggered, 433 perf_overflow_handler_t triggered,
434 struct task_struct *tsk) 434 struct task_struct *tsk)
435{ 435{
436 return perf_event_create_kernel_counter(attr, -1, task_pid_vnr(tsk), 436 return perf_event_create_kernel_counter(attr, -1, tsk, triggered);
437 triggered);
438} 437}
439EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); 438EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
440 439
@@ -516,7 +515,7 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr,
516 get_online_cpus(); 515 get_online_cpus();
517 for_each_online_cpu(cpu) { 516 for_each_online_cpu(cpu) {
518 pevent = per_cpu_ptr(cpu_events, cpu); 517 pevent = per_cpu_ptr(cpu_events, cpu);
519 bp = perf_event_create_kernel_counter(attr, cpu, -1, triggered); 518 bp = perf_event_create_kernel_counter(attr, cpu, NULL, triggered);
520 519
521 *pevent = bp; 520 *pevent = bp;
522 521
@@ -566,6 +565,61 @@ static struct notifier_block hw_breakpoint_exceptions_nb = {
566 .priority = 0x7fffffff 565 .priority = 0x7fffffff
567}; 566};
568 567
568static void bp_perf_event_destroy(struct perf_event *event)
569{
570 release_bp_slot(event);
571}
572
573static int hw_breakpoint_event_init(struct perf_event *bp)
574{
575 int err;
576
577 if (bp->attr.type != PERF_TYPE_BREAKPOINT)
578 return -ENOENT;
579
580 err = register_perf_hw_breakpoint(bp);
581 if (err)
582 return err;
583
584 bp->destroy = bp_perf_event_destroy;
585
586 return 0;
587}
588
589static int hw_breakpoint_add(struct perf_event *bp, int flags)
590{
591 if (!(flags & PERF_EF_START))
592 bp->hw.state = PERF_HES_STOPPED;
593
594 return arch_install_hw_breakpoint(bp);
595}
596
597static void hw_breakpoint_del(struct perf_event *bp, int flags)
598{
599 arch_uninstall_hw_breakpoint(bp);
600}
601
602static void hw_breakpoint_start(struct perf_event *bp, int flags)
603{
604 bp->hw.state = 0;
605}
606
607static void hw_breakpoint_stop(struct perf_event *bp, int flags)
608{
609 bp->hw.state = PERF_HES_STOPPED;
610}
611
612static struct pmu perf_breakpoint = {
613 .task_ctx_nr = perf_sw_context, /* could eventually get its own */
614
615 .event_init = hw_breakpoint_event_init,
616 .add = hw_breakpoint_add,
617 .del = hw_breakpoint_del,
618 .start = hw_breakpoint_start,
619 .stop = hw_breakpoint_stop,
620 .read = hw_breakpoint_pmu_read,
621};
622
569static int __init init_hw_breakpoint(void) 623static int __init init_hw_breakpoint(void)
570{ 624{
571 unsigned int **task_bp_pinned; 625 unsigned int **task_bp_pinned;
@@ -587,6 +641,8 @@ static int __init init_hw_breakpoint(void)
587 641
588 constraints_initialized = 1; 642 constraints_initialized = 1;
589 643
644 perf_pmu_register(&perf_breakpoint);
645
590 return register_die_notifier(&hw_breakpoint_exceptions_nb); 646 return register_die_notifier(&hw_breakpoint_exceptions_nb);
591 647
592 err_alloc: 648 err_alloc:
@@ -602,8 +658,3 @@ static int __init init_hw_breakpoint(void)
602core_initcall(init_hw_breakpoint); 658core_initcall(init_hw_breakpoint);
603 659
604 660
605struct pmu perf_ops_bp = {
606 .enable = arch_install_hw_breakpoint,
607 .disable = arch_uninstall_hw_breakpoint,
608 .read = hw_breakpoint_pmu_read,
609};
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
new file mode 100644
index 000000000000..7be868bf25c6
--- /dev/null
+++ b/kernel/jump_label.c
@@ -0,0 +1,429 @@
1/*
2 * jump label support
3 *
4 * Copyright (C) 2009 Jason Baron <jbaron@redhat.com>
5 *
6 */
7#include <linux/jump_label.h>
8#include <linux/memory.h>
9#include <linux/uaccess.h>
10#include <linux/module.h>
11#include <linux/list.h>
12#include <linux/jhash.h>
13#include <linux/slab.h>
14#include <linux/sort.h>
15#include <linux/err.h>
16
17#ifdef HAVE_JUMP_LABEL
18
19#define JUMP_LABEL_HASH_BITS 6
20#define JUMP_LABEL_TABLE_SIZE (1 << JUMP_LABEL_HASH_BITS)
21static struct hlist_head jump_label_table[JUMP_LABEL_TABLE_SIZE];
22
23/* mutex to protect coming/going of the the jump_label table */
24static DEFINE_MUTEX(jump_label_mutex);
25
26struct jump_label_entry {
27 struct hlist_node hlist;
28 struct jump_entry *table;
29 int nr_entries;
30 /* hang modules off here */
31 struct hlist_head modules;
32 unsigned long key;
33};
34
35struct jump_label_module_entry {
36 struct hlist_node hlist;
37 struct jump_entry *table;
38 int nr_entries;
39 struct module *mod;
40};
41
42static int jump_label_cmp(const void *a, const void *b)
43{
44 const struct jump_entry *jea = a;
45 const struct jump_entry *jeb = b;
46
47 if (jea->key < jeb->key)
48 return -1;
49
50 if (jea->key > jeb->key)
51 return 1;
52
53 return 0;
54}
55
56static void
57sort_jump_label_entries(struct jump_entry *start, struct jump_entry *stop)
58{
59 unsigned long size;
60
61 size = (((unsigned long)stop - (unsigned long)start)
62 / sizeof(struct jump_entry));
63 sort(start, size, sizeof(struct jump_entry), jump_label_cmp, NULL);
64}
65
66static struct jump_label_entry *get_jump_label_entry(jump_label_t key)
67{
68 struct hlist_head *head;
69 struct hlist_node *node;
70 struct jump_label_entry *e;
71 u32 hash = jhash((void *)&key, sizeof(jump_label_t), 0);
72
73 head = &jump_label_table[hash & (JUMP_LABEL_TABLE_SIZE - 1)];
74 hlist_for_each_entry(e, node, head, hlist) {
75 if (key == e->key)
76 return e;
77 }
78 return NULL;
79}
80
81static struct jump_label_entry *
82add_jump_label_entry(jump_label_t key, int nr_entries, struct jump_entry *table)
83{
84 struct hlist_head *head;
85 struct jump_label_entry *e;
86 u32 hash;
87
88 e = get_jump_label_entry(key);
89 if (e)
90 return ERR_PTR(-EEXIST);
91
92 e = kmalloc(sizeof(struct jump_label_entry), GFP_KERNEL);
93 if (!e)
94 return ERR_PTR(-ENOMEM);
95
96 hash = jhash((void *)&key, sizeof(jump_label_t), 0);
97 head = &jump_label_table[hash & (JUMP_LABEL_TABLE_SIZE - 1)];
98 e->key = key;
99 e->table = table;
100 e->nr_entries = nr_entries;
101 INIT_HLIST_HEAD(&(e->modules));
102 hlist_add_head(&e->hlist, head);
103 return e;
104}
105
106static int
107build_jump_label_hashtable(struct jump_entry *start, struct jump_entry *stop)
108{
109 struct jump_entry *iter, *iter_begin;
110 struct jump_label_entry *entry;
111 int count;
112
113 sort_jump_label_entries(start, stop);
114 iter = start;
115 while (iter < stop) {
116 entry = get_jump_label_entry(iter->key);
117 if (!entry) {
118 iter_begin = iter;
119 count = 0;
120 while ((iter < stop) &&
121 (iter->key == iter_begin->key)) {
122 iter++;
123 count++;
124 }
125 entry = add_jump_label_entry(iter_begin->key,
126 count, iter_begin);
127 if (IS_ERR(entry))
128 return PTR_ERR(entry);
129 } else {
130 WARN_ONCE(1, KERN_ERR "build_jump_hashtable: unexpected entry!\n");
131 return -1;
132 }
133 }
134 return 0;
135}
136
137/***
138 * jump_label_update - update jump label text
139 * @key - key value associated with a a jump label
140 * @type - enum set to JUMP_LABEL_ENABLE or JUMP_LABEL_DISABLE
141 *
142 * Will enable/disable the jump for jump label @key, depending on the
143 * value of @type.
144 *
145 */
146
147void jump_label_update(unsigned long key, enum jump_label_type type)
148{
149 struct jump_entry *iter;
150 struct jump_label_entry *entry;
151 struct hlist_node *module_node;
152 struct jump_label_module_entry *e_module;
153 int count;
154
155 mutex_lock(&jump_label_mutex);
156 entry = get_jump_label_entry((jump_label_t)key);
157 if (entry) {
158 count = entry->nr_entries;
159 iter = entry->table;
160 while (count--) {
161 if (kernel_text_address(iter->code))
162 arch_jump_label_transform(iter, type);
163 iter++;
164 }
165 /* eanble/disable jump labels in modules */
166 hlist_for_each_entry(e_module, module_node, &(entry->modules),
167 hlist) {
168 count = e_module->nr_entries;
169 iter = e_module->table;
170 while (count--) {
171 if (kernel_text_address(iter->code))
172 arch_jump_label_transform(iter, type);
173 iter++;
174 }
175 }
176 }
177 mutex_unlock(&jump_label_mutex);
178}
179
180static int addr_conflict(struct jump_entry *entry, void *start, void *end)
181{
182 if (entry->code <= (unsigned long)end &&
183 entry->code + JUMP_LABEL_NOP_SIZE > (unsigned long)start)
184 return 1;
185
186 return 0;
187}
188
189#ifdef CONFIG_MODULES
190
191static int module_conflict(void *start, void *end)
192{
193 struct hlist_head *head;
194 struct hlist_node *node, *node_next, *module_node, *module_node_next;
195 struct jump_label_entry *e;
196 struct jump_label_module_entry *e_module;
197 struct jump_entry *iter;
198 int i, count;
199 int conflict = 0;
200
201 for (i = 0; i < JUMP_LABEL_TABLE_SIZE; i++) {
202 head = &jump_label_table[i];
203 hlist_for_each_entry_safe(e, node, node_next, head, hlist) {
204 hlist_for_each_entry_safe(e_module, module_node,
205 module_node_next,
206 &(e->modules), hlist) {
207 count = e_module->nr_entries;
208 iter = e_module->table;
209 while (count--) {
210 if (addr_conflict(iter, start, end)) {
211 conflict = 1;
212 goto out;
213 }
214 iter++;
215 }
216 }
217 }
218 }
219out:
220 return conflict;
221}
222
223#endif
224
225/***
226 * jump_label_text_reserved - check if addr range is reserved
227 * @start: start text addr
228 * @end: end text addr
229 *
230 * checks if the text addr located between @start and @end
231 * overlaps with any of the jump label patch addresses. Code
232 * that wants to modify kernel text should first verify that
233 * it does not overlap with any of the jump label addresses.
234 *
235 * returns 1 if there is an overlap, 0 otherwise
236 */
237int jump_label_text_reserved(void *start, void *end)
238{
239 struct jump_entry *iter;
240 struct jump_entry *iter_start = __start___jump_table;
241 struct jump_entry *iter_stop = __start___jump_table;
242 int conflict = 0;
243
244 mutex_lock(&jump_label_mutex);
245 iter = iter_start;
246 while (iter < iter_stop) {
247 if (addr_conflict(iter, start, end)) {
248 conflict = 1;
249 goto out;
250 }
251 iter++;
252 }
253
254 /* now check modules */
255#ifdef CONFIG_MODULES
256 conflict = module_conflict(start, end);
257#endif
258out:
259 mutex_unlock(&jump_label_mutex);
260 return conflict;
261}
262
263static __init int init_jump_label(void)
264{
265 int ret;
266 struct jump_entry *iter_start = __start___jump_table;
267 struct jump_entry *iter_stop = __stop___jump_table;
268 struct jump_entry *iter;
269
270 mutex_lock(&jump_label_mutex);
271 ret = build_jump_label_hashtable(__start___jump_table,
272 __stop___jump_table);
273 iter = iter_start;
274 while (iter < iter_stop) {
275 arch_jump_label_text_poke_early(iter->code);
276 iter++;
277 }
278 mutex_unlock(&jump_label_mutex);
279 return ret;
280}
281early_initcall(init_jump_label);
282
283#ifdef CONFIG_MODULES
284
285static struct jump_label_module_entry *
286add_jump_label_module_entry(struct jump_label_entry *entry,
287 struct jump_entry *iter_begin,
288 int count, struct module *mod)
289{
290 struct jump_label_module_entry *e;
291
292 e = kmalloc(sizeof(struct jump_label_module_entry), GFP_KERNEL);
293 if (!e)
294 return ERR_PTR(-ENOMEM);
295 e->mod = mod;
296 e->nr_entries = count;
297 e->table = iter_begin;
298 hlist_add_head(&e->hlist, &entry->modules);
299 return e;
300}
301
302static int add_jump_label_module(struct module *mod)
303{
304 struct jump_entry *iter, *iter_begin;
305 struct jump_label_entry *entry;
306 struct jump_label_module_entry *module_entry;
307 int count;
308
309 /* if the module doesn't have jump label entries, just return */
310 if (!mod->num_jump_entries)
311 return 0;
312
313 sort_jump_label_entries(mod->jump_entries,
314 mod->jump_entries + mod->num_jump_entries);
315 iter = mod->jump_entries;
316 while (iter < mod->jump_entries + mod->num_jump_entries) {
317 entry = get_jump_label_entry(iter->key);
318 iter_begin = iter;
319 count = 0;
320 while ((iter < mod->jump_entries + mod->num_jump_entries) &&
321 (iter->key == iter_begin->key)) {
322 iter++;
323 count++;
324 }
325 if (!entry) {
326 entry = add_jump_label_entry(iter_begin->key, 0, NULL);
327 if (IS_ERR(entry))
328 return PTR_ERR(entry);
329 }
330 module_entry = add_jump_label_module_entry(entry, iter_begin,
331 count, mod);
332 if (IS_ERR(module_entry))
333 return PTR_ERR(module_entry);
334 }
335 return 0;
336}
337
338static void remove_jump_label_module(struct module *mod)
339{
340 struct hlist_head *head;
341 struct hlist_node *node, *node_next, *module_node, *module_node_next;
342 struct jump_label_entry *e;
343 struct jump_label_module_entry *e_module;
344 int i;
345
346 /* if the module doesn't have jump label entries, just return */
347 if (!mod->num_jump_entries)
348 return;
349
350 for (i = 0; i < JUMP_LABEL_TABLE_SIZE; i++) {
351 head = &jump_label_table[i];
352 hlist_for_each_entry_safe(e, node, node_next, head, hlist) {
353 hlist_for_each_entry_safe(e_module, module_node,
354 module_node_next,
355 &(e->modules), hlist) {
356 if (e_module->mod == mod) {
357 hlist_del(&e_module->hlist);
358 kfree(e_module);
359 }
360 }
361 if (hlist_empty(&e->modules) && (e->nr_entries == 0)) {
362 hlist_del(&e->hlist);
363 kfree(e);
364 }
365 }
366 }
367}
368
369static int
370jump_label_module_notify(struct notifier_block *self, unsigned long val,
371 void *data)
372{
373 struct module *mod = data;
374 int ret = 0;
375
376 switch (val) {
377 case MODULE_STATE_COMING:
378 mutex_lock(&jump_label_mutex);
379 ret = add_jump_label_module(mod);
380 if (ret)
381 remove_jump_label_module(mod);
382 mutex_unlock(&jump_label_mutex);
383 break;
384 case MODULE_STATE_GOING:
385 mutex_lock(&jump_label_mutex);
386 remove_jump_label_module(mod);
387 mutex_unlock(&jump_label_mutex);
388 break;
389 }
390 return ret;
391}
392
393/***
394 * apply_jump_label_nops - patch module jump labels with arch_get_jump_label_nop()
395 * @mod: module to patch
396 *
397 * Allow for run-time selection of the optimal nops. Before the module
398 * loads patch these with arch_get_jump_label_nop(), which is specified by
399 * the arch specific jump label code.
400 */
401void jump_label_apply_nops(struct module *mod)
402{
403 struct jump_entry *iter;
404
405 /* if the module doesn't have jump label entries, just return */
406 if (!mod->num_jump_entries)
407 return;
408
409 iter = mod->jump_entries;
410 while (iter < mod->jump_entries + mod->num_jump_entries) {
411 arch_jump_label_text_poke_early(iter->code);
412 iter++;
413 }
414}
415
416struct notifier_block jump_label_module_nb = {
417 .notifier_call = jump_label_module_notify,
418 .priority = 0,
419};
420
421static __init int init_jump_label_module(void)
422{
423 return register_module_notifier(&jump_label_module_nb);
424}
425early_initcall(init_jump_label_module);
426
427#endif /* CONFIG_MODULES */
428
429#endif
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 282035f3ae96..ec4210c6501e 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -47,6 +47,7 @@
47#include <linux/memory.h> 47#include <linux/memory.h>
48#include <linux/ftrace.h> 48#include <linux/ftrace.h>
49#include <linux/cpu.h> 49#include <linux/cpu.h>
50#include <linux/jump_label.h>
50 51
51#include <asm-generic/sections.h> 52#include <asm-generic/sections.h>
52#include <asm/cacheflush.h> 53#include <asm/cacheflush.h>
@@ -399,7 +400,7 @@ static inline int kprobe_optready(struct kprobe *p)
399 * Return an optimized kprobe whose optimizing code replaces 400 * Return an optimized kprobe whose optimizing code replaces
400 * instructions including addr (exclude breakpoint). 401 * instructions including addr (exclude breakpoint).
401 */ 402 */
402struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr) 403static struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr)
403{ 404{
404 int i; 405 int i;
405 struct kprobe *p = NULL; 406 struct kprobe *p = NULL;
@@ -831,6 +832,7 @@ void __kprobes recycle_rp_inst(struct kretprobe_instance *ri,
831 832
832void __kprobes kretprobe_hash_lock(struct task_struct *tsk, 833void __kprobes kretprobe_hash_lock(struct task_struct *tsk,
833 struct hlist_head **head, unsigned long *flags) 834 struct hlist_head **head, unsigned long *flags)
835__acquires(hlist_lock)
834{ 836{
835 unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); 837 unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
836 spinlock_t *hlist_lock; 838 spinlock_t *hlist_lock;
@@ -842,6 +844,7 @@ void __kprobes kretprobe_hash_lock(struct task_struct *tsk,
842 844
843static void __kprobes kretprobe_table_lock(unsigned long hash, 845static void __kprobes kretprobe_table_lock(unsigned long hash,
844 unsigned long *flags) 846 unsigned long *flags)
847__acquires(hlist_lock)
845{ 848{
846 spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); 849 spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
847 spin_lock_irqsave(hlist_lock, *flags); 850 spin_lock_irqsave(hlist_lock, *flags);
@@ -849,6 +852,7 @@ static void __kprobes kretprobe_table_lock(unsigned long hash,
849 852
850void __kprobes kretprobe_hash_unlock(struct task_struct *tsk, 853void __kprobes kretprobe_hash_unlock(struct task_struct *tsk,
851 unsigned long *flags) 854 unsigned long *flags)
855__releases(hlist_lock)
852{ 856{
853 unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); 857 unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
854 spinlock_t *hlist_lock; 858 spinlock_t *hlist_lock;
@@ -857,7 +861,9 @@ void __kprobes kretprobe_hash_unlock(struct task_struct *tsk,
857 spin_unlock_irqrestore(hlist_lock, *flags); 861 spin_unlock_irqrestore(hlist_lock, *flags);
858} 862}
859 863
860void __kprobes kretprobe_table_unlock(unsigned long hash, unsigned long *flags) 864static void __kprobes kretprobe_table_unlock(unsigned long hash,
865 unsigned long *flags)
866__releases(hlist_lock)
861{ 867{
862 spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); 868 spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
863 spin_unlock_irqrestore(hlist_lock, *flags); 869 spin_unlock_irqrestore(hlist_lock, *flags);
@@ -1141,7 +1147,8 @@ int __kprobes register_kprobe(struct kprobe *p)
1141 preempt_disable(); 1147 preempt_disable();
1142 if (!kernel_text_address((unsigned long) p->addr) || 1148 if (!kernel_text_address((unsigned long) p->addr) ||
1143 in_kprobes_functions((unsigned long) p->addr) || 1149 in_kprobes_functions((unsigned long) p->addr) ||
1144 ftrace_text_reserved(p->addr, p->addr)) { 1150 ftrace_text_reserved(p->addr, p->addr) ||
1151 jump_label_text_reserved(p->addr, p->addr)) {
1145 preempt_enable(); 1152 preempt_enable();
1146 return -EINVAL; 1153 return -EINVAL;
1147 } 1154 }
@@ -1339,18 +1346,19 @@ int __kprobes register_jprobes(struct jprobe **jps, int num)
1339 if (num <= 0) 1346 if (num <= 0)
1340 return -EINVAL; 1347 return -EINVAL;
1341 for (i = 0; i < num; i++) { 1348 for (i = 0; i < num; i++) {
1342 unsigned long addr; 1349 unsigned long addr, offset;
1343 jp = jps[i]; 1350 jp = jps[i];
1344 addr = arch_deref_entry_point(jp->entry); 1351 addr = arch_deref_entry_point(jp->entry);
1345 1352
1346 if (!kernel_text_address(addr)) 1353 /* Verify probepoint is a function entry point */
1347 ret = -EINVAL; 1354 if (kallsyms_lookup_size_offset(addr, NULL, &offset) &&
1348 else { 1355 offset == 0) {
1349 /* Todo: Verify probepoint is a function entry point */
1350 jp->kp.pre_handler = setjmp_pre_handler; 1356 jp->kp.pre_handler = setjmp_pre_handler;
1351 jp->kp.break_handler = longjmp_break_handler; 1357 jp->kp.break_handler = longjmp_break_handler;
1352 ret = register_kprobe(&jp->kp); 1358 ret = register_kprobe(&jp->kp);
1353 } 1359 } else
1360 ret = -EINVAL;
1361
1354 if (ret < 0) { 1362 if (ret < 0) {
1355 if (i > 0) 1363 if (i > 0)
1356 unregister_jprobes(jps, i); 1364 unregister_jprobes(jps, i);
diff --git a/kernel/module.c b/kernel/module.c
index ccd641991842..2df46301a7a4 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -55,6 +55,7 @@
55#include <linux/async.h> 55#include <linux/async.h>
56#include <linux/percpu.h> 56#include <linux/percpu.h>
57#include <linux/kmemleak.h> 57#include <linux/kmemleak.h>
58#include <linux/jump_label.h>
58 59
59#define CREATE_TRACE_POINTS 60#define CREATE_TRACE_POINTS
60#include <trace/events/module.h> 61#include <trace/events/module.h>
@@ -2309,6 +2310,11 @@ static void find_module_sections(struct module *mod, struct load_info *info)
2309 sizeof(*mod->tracepoints), 2310 sizeof(*mod->tracepoints),
2310 &mod->num_tracepoints); 2311 &mod->num_tracepoints);
2311#endif 2312#endif
2313#ifdef HAVE_JUMP_LABEL
2314 mod->jump_entries = section_objs(info, "__jump_table",
2315 sizeof(*mod->jump_entries),
2316 &mod->num_jump_entries);
2317#endif
2312#ifdef CONFIG_EVENT_TRACING 2318#ifdef CONFIG_EVENT_TRACING
2313 mod->trace_events = section_objs(info, "_ftrace_events", 2319 mod->trace_events = section_objs(info, "_ftrace_events",
2314 sizeof(*mod->trace_events), 2320 sizeof(*mod->trace_events),
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index db5b56064687..64507eaa2d9e 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -31,24 +31,18 @@
31#include <linux/kernel_stat.h> 31#include <linux/kernel_stat.h>
32#include <linux/perf_event.h> 32#include <linux/perf_event.h>
33#include <linux/ftrace_event.h> 33#include <linux/ftrace_event.h>
34#include <linux/hw_breakpoint.h>
35 34
36#include <asm/irq_regs.h> 35#include <asm/irq_regs.h>
37 36
38/*
39 * Each CPU has a list of per CPU events:
40 */
41static DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context);
42
43int perf_max_events __read_mostly = 1;
44static int perf_reserved_percpu __read_mostly;
45static int perf_overcommit __read_mostly = 1;
46
47static atomic_t nr_events __read_mostly; 37static atomic_t nr_events __read_mostly;
48static atomic_t nr_mmap_events __read_mostly; 38static atomic_t nr_mmap_events __read_mostly;
49static atomic_t nr_comm_events __read_mostly; 39static atomic_t nr_comm_events __read_mostly;
50static atomic_t nr_task_events __read_mostly; 40static atomic_t nr_task_events __read_mostly;
51 41
42static LIST_HEAD(pmus);
43static DEFINE_MUTEX(pmus_lock);
44static struct srcu_struct pmus_srcu;
45
52/* 46/*
53 * perf event paranoia level: 47 * perf event paranoia level:
54 * -1 - not paranoid at all 48 * -1 - not paranoid at all
@@ -67,36 +61,38 @@ int sysctl_perf_event_sample_rate __read_mostly = 100000;
67 61
68static atomic64_t perf_event_id; 62static atomic64_t perf_event_id;
69 63
70/* 64void __weak perf_event_print_debug(void) { }
71 * Lock for (sysadmin-configurable) event reservations:
72 */
73static DEFINE_SPINLOCK(perf_resource_lock);
74 65
75/* 66void perf_pmu_disable(struct pmu *pmu)
76 * Architecture provided APIs - weak aliases:
77 */
78extern __weak const struct pmu *hw_perf_event_init(struct perf_event *event)
79{ 67{
80 return NULL; 68 int *count = this_cpu_ptr(pmu->pmu_disable_count);
69 if (!(*count)++)
70 pmu->pmu_disable(pmu);
81} 71}
82 72
83void __weak hw_perf_disable(void) { barrier(); } 73void perf_pmu_enable(struct pmu *pmu)
84void __weak hw_perf_enable(void) { barrier(); }
85
86void __weak perf_event_print_debug(void) { }
87
88static DEFINE_PER_CPU(int, perf_disable_count);
89
90void perf_disable(void)
91{ 74{
92 if (!__get_cpu_var(perf_disable_count)++) 75 int *count = this_cpu_ptr(pmu->pmu_disable_count);
93 hw_perf_disable(); 76 if (!--(*count))
77 pmu->pmu_enable(pmu);
94} 78}
95 79
96void perf_enable(void) 80static DEFINE_PER_CPU(struct list_head, rotation_list);
81
82/*
83 * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized
84 * because they're strictly cpu affine and rotate_start is called with IRQs
85 * disabled, while rotate_context is called from IRQ context.
86 */
87static void perf_pmu_rotate_start(struct pmu *pmu)
97{ 88{
98 if (!--__get_cpu_var(perf_disable_count)) 89 struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
99 hw_perf_enable(); 90 struct list_head *head = &__get_cpu_var(rotation_list);
91
92 WARN_ON(!irqs_disabled());
93
94 if (list_empty(&cpuctx->rotation_list))
95 list_add(&cpuctx->rotation_list, head);
100} 96}
101 97
102static void get_ctx(struct perf_event_context *ctx) 98static void get_ctx(struct perf_event_context *ctx)
@@ -151,13 +147,13 @@ static u64 primary_event_id(struct perf_event *event)
151 * the context could get moved to another task. 147 * the context could get moved to another task.
152 */ 148 */
153static struct perf_event_context * 149static struct perf_event_context *
154perf_lock_task_context(struct task_struct *task, unsigned long *flags) 150perf_lock_task_context(struct task_struct *task, int ctxn, unsigned long *flags)
155{ 151{
156 struct perf_event_context *ctx; 152 struct perf_event_context *ctx;
157 153
158 rcu_read_lock(); 154 rcu_read_lock();
159 retry: 155retry:
160 ctx = rcu_dereference(task->perf_event_ctxp); 156 ctx = rcu_dereference(task->perf_event_ctxp[ctxn]);
161 if (ctx) { 157 if (ctx) {
162 /* 158 /*
163 * If this context is a clone of another, it might 159 * If this context is a clone of another, it might
@@ -170,7 +166,7 @@ perf_lock_task_context(struct task_struct *task, unsigned long *flags)
170 * can't get swapped on us any more. 166 * can't get swapped on us any more.
171 */ 167 */
172 raw_spin_lock_irqsave(&ctx->lock, *flags); 168 raw_spin_lock_irqsave(&ctx->lock, *flags);
173 if (ctx != rcu_dereference(task->perf_event_ctxp)) { 169 if (ctx != rcu_dereference(task->perf_event_ctxp[ctxn])) {
174 raw_spin_unlock_irqrestore(&ctx->lock, *flags); 170 raw_spin_unlock_irqrestore(&ctx->lock, *flags);
175 goto retry; 171 goto retry;
176 } 172 }
@@ -189,12 +185,13 @@ perf_lock_task_context(struct task_struct *task, unsigned long *flags)
189 * can't get swapped to another task. This also increments its 185 * can't get swapped to another task. This also increments its
190 * reference count so that the context can't get freed. 186 * reference count so that the context can't get freed.
191 */ 187 */
192static struct perf_event_context *perf_pin_task_context(struct task_struct *task) 188static struct perf_event_context *
189perf_pin_task_context(struct task_struct *task, int ctxn)
193{ 190{
194 struct perf_event_context *ctx; 191 struct perf_event_context *ctx;
195 unsigned long flags; 192 unsigned long flags;
196 193
197 ctx = perf_lock_task_context(task, &flags); 194 ctx = perf_lock_task_context(task, ctxn, &flags);
198 if (ctx) { 195 if (ctx) {
199 ++ctx->pin_count; 196 ++ctx->pin_count;
200 raw_spin_unlock_irqrestore(&ctx->lock, flags); 197 raw_spin_unlock_irqrestore(&ctx->lock, flags);
@@ -302,6 +299,8 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
302 } 299 }
303 300
304 list_add_rcu(&event->event_entry, &ctx->event_list); 301 list_add_rcu(&event->event_entry, &ctx->event_list);
302 if (!ctx->nr_events)
303 perf_pmu_rotate_start(ctx->pmu);
305 ctx->nr_events++; 304 ctx->nr_events++;
306 if (event->attr.inherit_stat) 305 if (event->attr.inherit_stat)
307 ctx->nr_stat++; 306 ctx->nr_stat++;
@@ -436,7 +435,7 @@ event_sched_out(struct perf_event *event,
436 event->state = PERF_EVENT_STATE_OFF; 435 event->state = PERF_EVENT_STATE_OFF;
437 } 436 }
438 event->tstamp_stopped = ctx->time; 437 event->tstamp_stopped = ctx->time;
439 event->pmu->disable(event); 438 event->pmu->del(event, 0);
440 event->oncpu = -1; 439 event->oncpu = -1;
441 440
442 if (!is_software_event(event)) 441 if (!is_software_event(event))
@@ -466,6 +465,12 @@ group_sched_out(struct perf_event *group_event,
466 cpuctx->exclusive = 0; 465 cpuctx->exclusive = 0;
467} 466}
468 467
468static inline struct perf_cpu_context *
469__get_cpu_context(struct perf_event_context *ctx)
470{
471 return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
472}
473
469/* 474/*
470 * Cross CPU call to remove a performance event 475 * Cross CPU call to remove a performance event
471 * 476 *
@@ -474,9 +479,9 @@ group_sched_out(struct perf_event *group_event,
474 */ 479 */
475static void __perf_event_remove_from_context(void *info) 480static void __perf_event_remove_from_context(void *info)
476{ 481{
477 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
478 struct perf_event *event = info; 482 struct perf_event *event = info;
479 struct perf_event_context *ctx = event->ctx; 483 struct perf_event_context *ctx = event->ctx;
484 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
480 485
481 /* 486 /*
482 * If this is a task context, we need to check whether it is 487 * If this is a task context, we need to check whether it is
@@ -487,27 +492,11 @@ static void __perf_event_remove_from_context(void *info)
487 return; 492 return;
488 493
489 raw_spin_lock(&ctx->lock); 494 raw_spin_lock(&ctx->lock);
490 /*
491 * Protect the list operation against NMI by disabling the
492 * events on a global level.
493 */
494 perf_disable();
495 495
496 event_sched_out(event, cpuctx, ctx); 496 event_sched_out(event, cpuctx, ctx);
497 497
498 list_del_event(event, ctx); 498 list_del_event(event, ctx);
499 499
500 if (!ctx->task) {
501 /*
502 * Allow more per task events with respect to the
503 * reservation:
504 */
505 cpuctx->max_pertask =
506 min(perf_max_events - ctx->nr_events,
507 perf_max_events - perf_reserved_percpu);
508 }
509
510 perf_enable();
511 raw_spin_unlock(&ctx->lock); 500 raw_spin_unlock(&ctx->lock);
512} 501}
513 502
@@ -572,8 +561,8 @@ retry:
572static void __perf_event_disable(void *info) 561static void __perf_event_disable(void *info)
573{ 562{
574 struct perf_event *event = info; 563 struct perf_event *event = info;
575 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
576 struct perf_event_context *ctx = event->ctx; 564 struct perf_event_context *ctx = event->ctx;
565 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
577 566
578 /* 567 /*
579 * If this is a per-task event, need to check whether this 568 * If this is a per-task event, need to check whether this
@@ -628,7 +617,7 @@ void perf_event_disable(struct perf_event *event)
628 return; 617 return;
629 } 618 }
630 619
631 retry: 620retry:
632 task_oncpu_function_call(task, __perf_event_disable, event); 621 task_oncpu_function_call(task, __perf_event_disable, event);
633 622
634 raw_spin_lock_irq(&ctx->lock); 623 raw_spin_lock_irq(&ctx->lock);
@@ -667,7 +656,7 @@ event_sched_in(struct perf_event *event,
667 */ 656 */
668 smp_wmb(); 657 smp_wmb();
669 658
670 if (event->pmu->enable(event)) { 659 if (event->pmu->add(event, PERF_EF_START)) {
671 event->state = PERF_EVENT_STATE_INACTIVE; 660 event->state = PERF_EVENT_STATE_INACTIVE;
672 event->oncpu = -1; 661 event->oncpu = -1;
673 return -EAGAIN; 662 return -EAGAIN;
@@ -691,22 +680,15 @@ group_sched_in(struct perf_event *group_event,
691 struct perf_event_context *ctx) 680 struct perf_event_context *ctx)
692{ 681{
693 struct perf_event *event, *partial_group = NULL; 682 struct perf_event *event, *partial_group = NULL;
694 const struct pmu *pmu = group_event->pmu; 683 struct pmu *pmu = group_event->pmu;
695 bool txn = false;
696 684
697 if (group_event->state == PERF_EVENT_STATE_OFF) 685 if (group_event->state == PERF_EVENT_STATE_OFF)
698 return 0; 686 return 0;
699 687
700 /* Check if group transaction availabe */ 688 pmu->start_txn(pmu);
701 if (pmu->start_txn)
702 txn = true;
703
704 if (txn)
705 pmu->start_txn(pmu);
706 689
707 if (event_sched_in(group_event, cpuctx, ctx)) { 690 if (event_sched_in(group_event, cpuctx, ctx)) {
708 if (txn) 691 pmu->cancel_txn(pmu);
709 pmu->cancel_txn(pmu);
710 return -EAGAIN; 692 return -EAGAIN;
711 } 693 }
712 694
@@ -720,7 +702,7 @@ group_sched_in(struct perf_event *group_event,
720 } 702 }
721 } 703 }
722 704
723 if (!txn || !pmu->commit_txn(pmu)) 705 if (!pmu->commit_txn(pmu))
724 return 0; 706 return 0;
725 707
726group_error: 708group_error:
@@ -735,8 +717,7 @@ group_error:
735 } 717 }
736 event_sched_out(group_event, cpuctx, ctx); 718 event_sched_out(group_event, cpuctx, ctx);
737 719
738 if (txn) 720 pmu->cancel_txn(pmu);
739 pmu->cancel_txn(pmu);
740 721
741 return -EAGAIN; 722 return -EAGAIN;
742} 723}
@@ -789,10 +770,10 @@ static void add_event_to_ctx(struct perf_event *event,
789 */ 770 */
790static void __perf_install_in_context(void *info) 771static void __perf_install_in_context(void *info)
791{ 772{
792 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
793 struct perf_event *event = info; 773 struct perf_event *event = info;
794 struct perf_event_context *ctx = event->ctx; 774 struct perf_event_context *ctx = event->ctx;
795 struct perf_event *leader = event->group_leader; 775 struct perf_event *leader = event->group_leader;
776 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
796 int err; 777 int err;
797 778
798 /* 779 /*
@@ -812,12 +793,6 @@ static void __perf_install_in_context(void *info)
812 ctx->is_active = 1; 793 ctx->is_active = 1;
813 update_context_time(ctx); 794 update_context_time(ctx);
814 795
815 /*
816 * Protect the list operation against NMI by disabling the
817 * events on a global level. NOP for non NMI based events.
818 */
819 perf_disable();
820
821 add_event_to_ctx(event, ctx); 796 add_event_to_ctx(event, ctx);
822 797
823 if (event->cpu != -1 && event->cpu != smp_processor_id()) 798 if (event->cpu != -1 && event->cpu != smp_processor_id())
@@ -855,12 +830,7 @@ static void __perf_install_in_context(void *info)
855 } 830 }
856 } 831 }
857 832
858 if (!err && !ctx->task && cpuctx->max_pertask) 833unlock:
859 cpuctx->max_pertask--;
860
861 unlock:
862 perf_enable();
863
864 raw_spin_unlock(&ctx->lock); 834 raw_spin_unlock(&ctx->lock);
865} 835}
866 836
@@ -883,6 +853,8 @@ perf_install_in_context(struct perf_event_context *ctx,
883{ 853{
884 struct task_struct *task = ctx->task; 854 struct task_struct *task = ctx->task;
885 855
856 event->ctx = ctx;
857
886 if (!task) { 858 if (!task) {
887 /* 859 /*
888 * Per cpu events are installed via an smp call and 860 * Per cpu events are installed via an smp call and
@@ -931,10 +903,12 @@ static void __perf_event_mark_enabled(struct perf_event *event,
931 903
932 event->state = PERF_EVENT_STATE_INACTIVE; 904 event->state = PERF_EVENT_STATE_INACTIVE;
933 event->tstamp_enabled = ctx->time - event->total_time_enabled; 905 event->tstamp_enabled = ctx->time - event->total_time_enabled;
934 list_for_each_entry(sub, &event->sibling_list, group_entry) 906 list_for_each_entry(sub, &event->sibling_list, group_entry) {
935 if (sub->state >= PERF_EVENT_STATE_INACTIVE) 907 if (sub->state >= PERF_EVENT_STATE_INACTIVE) {
936 sub->tstamp_enabled = 908 sub->tstamp_enabled =
937 ctx->time - sub->total_time_enabled; 909 ctx->time - sub->total_time_enabled;
910 }
911 }
938} 912}
939 913
940/* 914/*
@@ -943,9 +917,9 @@ static void __perf_event_mark_enabled(struct perf_event *event,
943static void __perf_event_enable(void *info) 917static void __perf_event_enable(void *info)
944{ 918{
945 struct perf_event *event = info; 919 struct perf_event *event = info;
946 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
947 struct perf_event_context *ctx = event->ctx; 920 struct perf_event_context *ctx = event->ctx;
948 struct perf_event *leader = event->group_leader; 921 struct perf_event *leader = event->group_leader;
922 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
949 int err; 923 int err;
950 924
951 /* 925 /*
@@ -979,12 +953,10 @@ static void __perf_event_enable(void *info)
979 if (!group_can_go_on(event, cpuctx, 1)) { 953 if (!group_can_go_on(event, cpuctx, 1)) {
980 err = -EEXIST; 954 err = -EEXIST;
981 } else { 955 } else {
982 perf_disable();
983 if (event == leader) 956 if (event == leader)
984 err = group_sched_in(event, cpuctx, ctx); 957 err = group_sched_in(event, cpuctx, ctx);
985 else 958 else
986 err = event_sched_in(event, cpuctx, ctx); 959 err = event_sched_in(event, cpuctx, ctx);
987 perf_enable();
988 } 960 }
989 961
990 if (err) { 962 if (err) {
@@ -1000,7 +972,7 @@ static void __perf_event_enable(void *info)
1000 } 972 }
1001 } 973 }
1002 974
1003 unlock: 975unlock:
1004 raw_spin_unlock(&ctx->lock); 976 raw_spin_unlock(&ctx->lock);
1005} 977}
1006 978
@@ -1041,7 +1013,7 @@ void perf_event_enable(struct perf_event *event)
1041 if (event->state == PERF_EVENT_STATE_ERROR) 1013 if (event->state == PERF_EVENT_STATE_ERROR)
1042 event->state = PERF_EVENT_STATE_OFF; 1014 event->state = PERF_EVENT_STATE_OFF;
1043 1015
1044 retry: 1016retry:
1045 raw_spin_unlock_irq(&ctx->lock); 1017 raw_spin_unlock_irq(&ctx->lock);
1046 task_oncpu_function_call(task, __perf_event_enable, event); 1018 task_oncpu_function_call(task, __perf_event_enable, event);
1047 1019
@@ -1061,7 +1033,7 @@ void perf_event_enable(struct perf_event *event)
1061 if (event->state == PERF_EVENT_STATE_OFF) 1033 if (event->state == PERF_EVENT_STATE_OFF)
1062 __perf_event_mark_enabled(event, ctx); 1034 __perf_event_mark_enabled(event, ctx);
1063 1035
1064 out: 1036out:
1065 raw_spin_unlock_irq(&ctx->lock); 1037 raw_spin_unlock_irq(&ctx->lock);
1066} 1038}
1067 1039
@@ -1092,26 +1064,26 @@ static void ctx_sched_out(struct perf_event_context *ctx,
1092 struct perf_event *event; 1064 struct perf_event *event;
1093 1065
1094 raw_spin_lock(&ctx->lock); 1066 raw_spin_lock(&ctx->lock);
1067 perf_pmu_disable(ctx->pmu);
1095 ctx->is_active = 0; 1068 ctx->is_active = 0;
1096 if (likely(!ctx->nr_events)) 1069 if (likely(!ctx->nr_events))
1097 goto out; 1070 goto out;
1098 update_context_time(ctx); 1071 update_context_time(ctx);
1099 1072
1100 perf_disable();
1101 if (!ctx->nr_active) 1073 if (!ctx->nr_active)
1102 goto out_enable; 1074 goto out;
1103 1075
1104 if (event_type & EVENT_PINNED) 1076 if (event_type & EVENT_PINNED) {
1105 list_for_each_entry(event, &ctx->pinned_groups, group_entry) 1077 list_for_each_entry(event, &ctx->pinned_groups, group_entry)
1106 group_sched_out(event, cpuctx, ctx); 1078 group_sched_out(event, cpuctx, ctx);
1079 }
1107 1080
1108 if (event_type & EVENT_FLEXIBLE) 1081 if (event_type & EVENT_FLEXIBLE) {
1109 list_for_each_entry(event, &ctx->flexible_groups, group_entry) 1082 list_for_each_entry(event, &ctx->flexible_groups, group_entry)
1110 group_sched_out(event, cpuctx, ctx); 1083 group_sched_out(event, cpuctx, ctx);
1111 1084 }
1112 out_enable: 1085out:
1113 perf_enable(); 1086 perf_pmu_enable(ctx->pmu);
1114 out:
1115 raw_spin_unlock(&ctx->lock); 1087 raw_spin_unlock(&ctx->lock);
1116} 1088}
1117 1089
@@ -1209,34 +1181,25 @@ static void perf_event_sync_stat(struct perf_event_context *ctx,
1209 } 1181 }
1210} 1182}
1211 1183
1212/* 1184void perf_event_context_sched_out(struct task_struct *task, int ctxn,
1213 * Called from scheduler to remove the events of the current task, 1185 struct task_struct *next)
1214 * with interrupts disabled.
1215 *
1216 * We stop each event and update the event value in event->count.
1217 *
1218 * This does not protect us against NMI, but disable()
1219 * sets the disabled bit in the control field of event _before_
1220 * accessing the event control register. If a NMI hits, then it will
1221 * not restart the event.
1222 */
1223void perf_event_task_sched_out(struct task_struct *task,
1224 struct task_struct *next)
1225{ 1186{
1226 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); 1187 struct perf_event_context *ctx = task->perf_event_ctxp[ctxn];
1227 struct perf_event_context *ctx = task->perf_event_ctxp;
1228 struct perf_event_context *next_ctx; 1188 struct perf_event_context *next_ctx;
1229 struct perf_event_context *parent; 1189 struct perf_event_context *parent;
1190 struct perf_cpu_context *cpuctx;
1230 int do_switch = 1; 1191 int do_switch = 1;
1231 1192
1232 perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); 1193 if (likely(!ctx))
1194 return;
1233 1195
1234 if (likely(!ctx || !cpuctx->task_ctx)) 1196 cpuctx = __get_cpu_context(ctx);
1197 if (!cpuctx->task_ctx)
1235 return; 1198 return;
1236 1199
1237 rcu_read_lock(); 1200 rcu_read_lock();
1238 parent = rcu_dereference(ctx->parent_ctx); 1201 parent = rcu_dereference(ctx->parent_ctx);
1239 next_ctx = next->perf_event_ctxp; 1202 next_ctx = next->perf_event_ctxp[ctxn];
1240 if (parent && next_ctx && 1203 if (parent && next_ctx &&
1241 rcu_dereference(next_ctx->parent_ctx) == parent) { 1204 rcu_dereference(next_ctx->parent_ctx) == parent) {
1242 /* 1205 /*
@@ -1255,8 +1218,8 @@ void perf_event_task_sched_out(struct task_struct *task,
1255 * XXX do we need a memory barrier of sorts 1218 * XXX do we need a memory barrier of sorts
1256 * wrt to rcu_dereference() of perf_event_ctxp 1219 * wrt to rcu_dereference() of perf_event_ctxp
1257 */ 1220 */
1258 task->perf_event_ctxp = next_ctx; 1221 task->perf_event_ctxp[ctxn] = next_ctx;
1259 next->perf_event_ctxp = ctx; 1222 next->perf_event_ctxp[ctxn] = ctx;
1260 ctx->task = next; 1223 ctx->task = next;
1261 next_ctx->task = task; 1224 next_ctx->task = task;
1262 do_switch = 0; 1225 do_switch = 0;
@@ -1274,10 +1237,35 @@ void perf_event_task_sched_out(struct task_struct *task,
1274 } 1237 }
1275} 1238}
1276 1239
1240#define for_each_task_context_nr(ctxn) \
1241 for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++)
1242
1243/*
1244 * Called from scheduler to remove the events of the current task,
1245 * with interrupts disabled.
1246 *
1247 * We stop each event and update the event value in event->count.
1248 *
1249 * This does not protect us against NMI, but disable()
1250 * sets the disabled bit in the control field of event _before_
1251 * accessing the event control register. If a NMI hits, then it will
1252 * not restart the event.
1253 */
1254void perf_event_task_sched_out(struct task_struct *task,
1255 struct task_struct *next)
1256{
1257 int ctxn;
1258
1259 perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
1260
1261 for_each_task_context_nr(ctxn)
1262 perf_event_context_sched_out(task, ctxn, next);
1263}
1264
1277static void task_ctx_sched_out(struct perf_event_context *ctx, 1265static void task_ctx_sched_out(struct perf_event_context *ctx,
1278 enum event_type_t event_type) 1266 enum event_type_t event_type)
1279{ 1267{
1280 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); 1268 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
1281 1269
1282 if (!cpuctx->task_ctx) 1270 if (!cpuctx->task_ctx)
1283 return; 1271 return;
@@ -1350,9 +1338,10 @@ ctx_flexible_sched_in(struct perf_event_context *ctx,
1350 if (event->cpu != -1 && event->cpu != smp_processor_id()) 1338 if (event->cpu != -1 && event->cpu != smp_processor_id())
1351 continue; 1339 continue;
1352 1340
1353 if (group_can_go_on(event, cpuctx, can_add_hw)) 1341 if (group_can_go_on(event, cpuctx, can_add_hw)) {
1354 if (group_sched_in(event, cpuctx, ctx)) 1342 if (group_sched_in(event, cpuctx, ctx))
1355 can_add_hw = 0; 1343 can_add_hw = 0;
1344 }
1356 } 1345 }
1357} 1346}
1358 1347
@@ -1368,8 +1357,6 @@ ctx_sched_in(struct perf_event_context *ctx,
1368 1357
1369 ctx->timestamp = perf_clock(); 1358 ctx->timestamp = perf_clock();
1370 1359
1371 perf_disable();
1372
1373 /* 1360 /*
1374 * First go through the list and put on any pinned groups 1361 * First go through the list and put on any pinned groups
1375 * in order to give them the best chance of going on. 1362 * in order to give them the best chance of going on.
@@ -1381,8 +1368,7 @@ ctx_sched_in(struct perf_event_context *ctx,
1381 if (event_type & EVENT_FLEXIBLE) 1368 if (event_type & EVENT_FLEXIBLE)
1382 ctx_flexible_sched_in(ctx, cpuctx); 1369 ctx_flexible_sched_in(ctx, cpuctx);
1383 1370
1384 perf_enable(); 1371out:
1385 out:
1386 raw_spin_unlock(&ctx->lock); 1372 raw_spin_unlock(&ctx->lock);
1387} 1373}
1388 1374
@@ -1394,43 +1380,28 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
1394 ctx_sched_in(ctx, cpuctx, event_type); 1380 ctx_sched_in(ctx, cpuctx, event_type);
1395} 1381}
1396 1382
1397static void task_ctx_sched_in(struct task_struct *task, 1383static void task_ctx_sched_in(struct perf_event_context *ctx,
1398 enum event_type_t event_type) 1384 enum event_type_t event_type)
1399{ 1385{
1400 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); 1386 struct perf_cpu_context *cpuctx;
1401 struct perf_event_context *ctx = task->perf_event_ctxp;
1402 1387
1403 if (likely(!ctx)) 1388 cpuctx = __get_cpu_context(ctx);
1404 return;
1405 if (cpuctx->task_ctx == ctx) 1389 if (cpuctx->task_ctx == ctx)
1406 return; 1390 return;
1391
1407 ctx_sched_in(ctx, cpuctx, event_type); 1392 ctx_sched_in(ctx, cpuctx, event_type);
1408 cpuctx->task_ctx = ctx; 1393 cpuctx->task_ctx = ctx;
1409} 1394}
1410/*
1411 * Called from scheduler to add the events of the current task
1412 * with interrupts disabled.
1413 *
1414 * We restore the event value and then enable it.
1415 *
1416 * This does not protect us against NMI, but enable()
1417 * sets the enabled bit in the control field of event _before_
1418 * accessing the event control register. If a NMI hits, then it will
1419 * keep the event running.
1420 */
1421void perf_event_task_sched_in(struct task_struct *task)
1422{
1423 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
1424 struct perf_event_context *ctx = task->perf_event_ctxp;
1425 1395
1426 if (likely(!ctx)) 1396void perf_event_context_sched_in(struct perf_event_context *ctx)
1427 return; 1397{
1398 struct perf_cpu_context *cpuctx;
1428 1399
1400 cpuctx = __get_cpu_context(ctx);
1429 if (cpuctx->task_ctx == ctx) 1401 if (cpuctx->task_ctx == ctx)
1430 return; 1402 return;
1431 1403
1432 perf_disable(); 1404 perf_pmu_disable(ctx->pmu);
1433
1434 /* 1405 /*
1435 * We want to keep the following priority order: 1406 * We want to keep the following priority order:
1436 * cpu pinned (that don't need to move), task pinned, 1407 * cpu pinned (that don't need to move), task pinned,
@@ -1444,7 +1415,37 @@ void perf_event_task_sched_in(struct task_struct *task)
1444 1415
1445 cpuctx->task_ctx = ctx; 1416 cpuctx->task_ctx = ctx;
1446 1417
1447 perf_enable(); 1418 /*
1419 * Since these rotations are per-cpu, we need to ensure the
1420 * cpu-context we got scheduled on is actually rotating.
1421 */
1422 perf_pmu_rotate_start(ctx->pmu);
1423 perf_pmu_enable(ctx->pmu);
1424}
1425
1426/*
1427 * Called from scheduler to add the events of the current task
1428 * with interrupts disabled.
1429 *
1430 * We restore the event value and then enable it.
1431 *
1432 * This does not protect us against NMI, but enable()
1433 * sets the enabled bit in the control field of event _before_
1434 * accessing the event control register. If a NMI hits, then it will
1435 * keep the event running.
1436 */
1437void perf_event_task_sched_in(struct task_struct *task)
1438{
1439 struct perf_event_context *ctx;
1440 int ctxn;
1441
1442 for_each_task_context_nr(ctxn) {
1443 ctx = task->perf_event_ctxp[ctxn];
1444 if (likely(!ctx))
1445 continue;
1446
1447 perf_event_context_sched_in(ctx);
1448 }
1448} 1449}
1449 1450
1450#define MAX_INTERRUPTS (~0ULL) 1451#define MAX_INTERRUPTS (~0ULL)
@@ -1524,22 +1525,6 @@ do { \
1524 return div64_u64(dividend, divisor); 1525 return div64_u64(dividend, divisor);
1525} 1526}
1526 1527
1527static void perf_event_stop(struct perf_event *event)
1528{
1529 if (!event->pmu->stop)
1530 return event->pmu->disable(event);
1531
1532 return event->pmu->stop(event);
1533}
1534
1535static int perf_event_start(struct perf_event *event)
1536{
1537 if (!event->pmu->start)
1538 return event->pmu->enable(event);
1539
1540 return event->pmu->start(event);
1541}
1542
1543static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) 1528static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
1544{ 1529{
1545 struct hw_perf_event *hwc = &event->hw; 1530 struct hw_perf_event *hwc = &event->hw;
@@ -1559,15 +1544,13 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
1559 hwc->sample_period = sample_period; 1544 hwc->sample_period = sample_period;
1560 1545
1561 if (local64_read(&hwc->period_left) > 8*sample_period) { 1546 if (local64_read(&hwc->period_left) > 8*sample_period) {
1562 perf_disable(); 1547 event->pmu->stop(event, PERF_EF_UPDATE);
1563 perf_event_stop(event);
1564 local64_set(&hwc->period_left, 0); 1548 local64_set(&hwc->period_left, 0);
1565 perf_event_start(event); 1549 event->pmu->start(event, PERF_EF_RELOAD);
1566 perf_enable();
1567 } 1550 }
1568} 1551}
1569 1552
1570static void perf_ctx_adjust_freq(struct perf_event_context *ctx) 1553static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
1571{ 1554{
1572 struct perf_event *event; 1555 struct perf_event *event;
1573 struct hw_perf_event *hwc; 1556 struct hw_perf_event *hwc;
@@ -1592,23 +1575,19 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
1592 */ 1575 */
1593 if (interrupts == MAX_INTERRUPTS) { 1576 if (interrupts == MAX_INTERRUPTS) {
1594 perf_log_throttle(event, 1); 1577 perf_log_throttle(event, 1);
1595 perf_disable(); 1578 event->pmu->start(event, 0);
1596 event->pmu->unthrottle(event);
1597 perf_enable();
1598 } 1579 }
1599 1580
1600 if (!event->attr.freq || !event->attr.sample_freq) 1581 if (!event->attr.freq || !event->attr.sample_freq)
1601 continue; 1582 continue;
1602 1583
1603 perf_disable();
1604 event->pmu->read(event); 1584 event->pmu->read(event);
1605 now = local64_read(&event->count); 1585 now = local64_read(&event->count);
1606 delta = now - hwc->freq_count_stamp; 1586 delta = now - hwc->freq_count_stamp;
1607 hwc->freq_count_stamp = now; 1587 hwc->freq_count_stamp = now;
1608 1588
1609 if (delta > 0) 1589 if (delta > 0)
1610 perf_adjust_period(event, TICK_NSEC, delta); 1590 perf_adjust_period(event, period, delta);
1611 perf_enable();
1612 } 1591 }
1613 raw_spin_unlock(&ctx->lock); 1592 raw_spin_unlock(&ctx->lock);
1614} 1593}
@@ -1626,32 +1605,38 @@ static void rotate_ctx(struct perf_event_context *ctx)
1626 raw_spin_unlock(&ctx->lock); 1605 raw_spin_unlock(&ctx->lock);
1627} 1606}
1628 1607
1629void perf_event_task_tick(struct task_struct *curr) 1608/*
1609 * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized
1610 * because they're strictly cpu affine and rotate_start is called with IRQs
1611 * disabled, while rotate_context is called from IRQ context.
1612 */
1613static void perf_rotate_context(struct perf_cpu_context *cpuctx)
1630{ 1614{
1631 struct perf_cpu_context *cpuctx; 1615 u64 interval = (u64)cpuctx->jiffies_interval * TICK_NSEC;
1632 struct perf_event_context *ctx; 1616 struct perf_event_context *ctx = NULL;
1633 int rotate = 0; 1617 int rotate = 0, remove = 1;
1634 1618
1635 if (!atomic_read(&nr_events)) 1619 if (cpuctx->ctx.nr_events) {
1636 return; 1620 remove = 0;
1637 1621 if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
1638 cpuctx = &__get_cpu_var(perf_cpu_context); 1622 rotate = 1;
1639 if (cpuctx->ctx.nr_events && 1623 }
1640 cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
1641 rotate = 1;
1642 1624
1643 ctx = curr->perf_event_ctxp; 1625 ctx = cpuctx->task_ctx;
1644 if (ctx && ctx->nr_events && ctx->nr_events != ctx->nr_active) 1626 if (ctx && ctx->nr_events) {
1645 rotate = 1; 1627 remove = 0;
1628 if (ctx->nr_events != ctx->nr_active)
1629 rotate = 1;
1630 }
1646 1631
1647 perf_ctx_adjust_freq(&cpuctx->ctx); 1632 perf_pmu_disable(cpuctx->ctx.pmu);
1633 perf_ctx_adjust_freq(&cpuctx->ctx, interval);
1648 if (ctx) 1634 if (ctx)
1649 perf_ctx_adjust_freq(ctx); 1635 perf_ctx_adjust_freq(ctx, interval);
1650 1636
1651 if (!rotate) 1637 if (!rotate)
1652 return; 1638 goto done;
1653 1639
1654 perf_disable();
1655 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); 1640 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
1656 if (ctx) 1641 if (ctx)
1657 task_ctx_sched_out(ctx, EVENT_FLEXIBLE); 1642 task_ctx_sched_out(ctx, EVENT_FLEXIBLE);
@@ -1662,8 +1647,27 @@ void perf_event_task_tick(struct task_struct *curr)
1662 1647
1663 cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); 1648 cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE);
1664 if (ctx) 1649 if (ctx)
1665 task_ctx_sched_in(curr, EVENT_FLEXIBLE); 1650 task_ctx_sched_in(ctx, EVENT_FLEXIBLE);
1666 perf_enable(); 1651
1652done:
1653 if (remove)
1654 list_del_init(&cpuctx->rotation_list);
1655
1656 perf_pmu_enable(cpuctx->ctx.pmu);
1657}
1658
1659void perf_event_task_tick(void)
1660{
1661 struct list_head *head = &__get_cpu_var(rotation_list);
1662 struct perf_cpu_context *cpuctx, *tmp;
1663
1664 WARN_ON(!irqs_disabled());
1665
1666 list_for_each_entry_safe(cpuctx, tmp, head, rotation_list) {
1667 if (cpuctx->jiffies_interval == 1 ||
1668 !(jiffies % cpuctx->jiffies_interval))
1669 perf_rotate_context(cpuctx);
1670 }
1667} 1671}
1668 1672
1669static int event_enable_on_exec(struct perf_event *event, 1673static int event_enable_on_exec(struct perf_event *event,
@@ -1685,20 +1689,18 @@ static int event_enable_on_exec(struct perf_event *event,
1685 * Enable all of a task's events that have been marked enable-on-exec. 1689 * Enable all of a task's events that have been marked enable-on-exec.
1686 * This expects task == current. 1690 * This expects task == current.
1687 */ 1691 */
1688static void perf_event_enable_on_exec(struct task_struct *task) 1692static void perf_event_enable_on_exec(struct perf_event_context *ctx)
1689{ 1693{
1690 struct perf_event_context *ctx;
1691 struct perf_event *event; 1694 struct perf_event *event;
1692 unsigned long flags; 1695 unsigned long flags;
1693 int enabled = 0; 1696 int enabled = 0;
1694 int ret; 1697 int ret;
1695 1698
1696 local_irq_save(flags); 1699 local_irq_save(flags);
1697 ctx = task->perf_event_ctxp;
1698 if (!ctx || !ctx->nr_events) 1700 if (!ctx || !ctx->nr_events)
1699 goto out; 1701 goto out;
1700 1702
1701 __perf_event_task_sched_out(ctx); 1703 task_ctx_sched_out(ctx, EVENT_ALL);
1702 1704
1703 raw_spin_lock(&ctx->lock); 1705 raw_spin_lock(&ctx->lock);
1704 1706
@@ -1722,8 +1724,8 @@ static void perf_event_enable_on_exec(struct task_struct *task)
1722 1724
1723 raw_spin_unlock(&ctx->lock); 1725 raw_spin_unlock(&ctx->lock);
1724 1726
1725 perf_event_task_sched_in(task); 1727 perf_event_context_sched_in(ctx);
1726 out: 1728out:
1727 local_irq_restore(flags); 1729 local_irq_restore(flags);
1728} 1730}
1729 1731
@@ -1732,9 +1734,9 @@ static void perf_event_enable_on_exec(struct task_struct *task)
1732 */ 1734 */
1733static void __perf_event_read(void *info) 1735static void __perf_event_read(void *info)
1734{ 1736{
1735 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
1736 struct perf_event *event = info; 1737 struct perf_event *event = info;
1737 struct perf_event_context *ctx = event->ctx; 1738 struct perf_event_context *ctx = event->ctx;
1739 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
1738 1740
1739 /* 1741 /*
1740 * If this is a task context, we need to check whether it is 1742 * If this is a task context, we need to check whether it is
@@ -1782,11 +1784,219 @@ static u64 perf_event_read(struct perf_event *event)
1782} 1784}
1783 1785
1784/* 1786/*
1785 * Initialize the perf_event context in a task_struct: 1787 * Callchain support
1786 */ 1788 */
1789
1790struct callchain_cpus_entries {
1791 struct rcu_head rcu_head;
1792 struct perf_callchain_entry *cpu_entries[0];
1793};
1794
1795static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]);
1796static atomic_t nr_callchain_events;
1797static DEFINE_MUTEX(callchain_mutex);
1798struct callchain_cpus_entries *callchain_cpus_entries;
1799
1800
1801__weak void perf_callchain_kernel(struct perf_callchain_entry *entry,
1802 struct pt_regs *regs)
1803{
1804}
1805
1806__weak void perf_callchain_user(struct perf_callchain_entry *entry,
1807 struct pt_regs *regs)
1808{
1809}
1810
1811static void release_callchain_buffers_rcu(struct rcu_head *head)
1812{
1813 struct callchain_cpus_entries *entries;
1814 int cpu;
1815
1816 entries = container_of(head, struct callchain_cpus_entries, rcu_head);
1817
1818 for_each_possible_cpu(cpu)
1819 kfree(entries->cpu_entries[cpu]);
1820
1821 kfree(entries);
1822}
1823
1824static void release_callchain_buffers(void)
1825{
1826 struct callchain_cpus_entries *entries;
1827
1828 entries = callchain_cpus_entries;
1829 rcu_assign_pointer(callchain_cpus_entries, NULL);
1830 call_rcu(&entries->rcu_head, release_callchain_buffers_rcu);
1831}
1832
1833static int alloc_callchain_buffers(void)
1834{
1835 int cpu;
1836 int size;
1837 struct callchain_cpus_entries *entries;
1838
1839 /*
1840 * We can't use the percpu allocation API for data that can be
1841 * accessed from NMI. Use a temporary manual per cpu allocation
1842 * until that gets sorted out.
1843 */
1844 size = sizeof(*entries) + sizeof(struct perf_callchain_entry *) *
1845 num_possible_cpus();
1846
1847 entries = kzalloc(size, GFP_KERNEL);
1848 if (!entries)
1849 return -ENOMEM;
1850
1851 size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS;
1852
1853 for_each_possible_cpu(cpu) {
1854 entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL,
1855 cpu_to_node(cpu));
1856 if (!entries->cpu_entries[cpu])
1857 goto fail;
1858 }
1859
1860 rcu_assign_pointer(callchain_cpus_entries, entries);
1861
1862 return 0;
1863
1864fail:
1865 for_each_possible_cpu(cpu)
1866 kfree(entries->cpu_entries[cpu]);
1867 kfree(entries);
1868
1869 return -ENOMEM;
1870}
1871
1872static int get_callchain_buffers(void)
1873{
1874 int err = 0;
1875 int count;
1876
1877 mutex_lock(&callchain_mutex);
1878
1879 count = atomic_inc_return(&nr_callchain_events);
1880 if (WARN_ON_ONCE(count < 1)) {
1881 err = -EINVAL;
1882 goto exit;
1883 }
1884
1885 if (count > 1) {
1886 /* If the allocation failed, give up */
1887 if (!callchain_cpus_entries)
1888 err = -ENOMEM;
1889 goto exit;
1890 }
1891
1892 err = alloc_callchain_buffers();
1893 if (err)
1894 release_callchain_buffers();
1895exit:
1896 mutex_unlock(&callchain_mutex);
1897
1898 return err;
1899}
1900
1901static void put_callchain_buffers(void)
1902{
1903 if (atomic_dec_and_mutex_lock(&nr_callchain_events, &callchain_mutex)) {
1904 release_callchain_buffers();
1905 mutex_unlock(&callchain_mutex);
1906 }
1907}
1908
1909static int get_recursion_context(int *recursion)
1910{
1911 int rctx;
1912
1913 if (in_nmi())
1914 rctx = 3;
1915 else if (in_irq())
1916 rctx = 2;
1917 else if (in_softirq())
1918 rctx = 1;
1919 else
1920 rctx = 0;
1921
1922 if (recursion[rctx])
1923 return -1;
1924
1925 recursion[rctx]++;
1926 barrier();
1927
1928 return rctx;
1929}
1930
1931static inline void put_recursion_context(int *recursion, int rctx)
1932{
1933 barrier();
1934 recursion[rctx]--;
1935}
1936
1937static struct perf_callchain_entry *get_callchain_entry(int *rctx)
1938{
1939 int cpu;
1940 struct callchain_cpus_entries *entries;
1941
1942 *rctx = get_recursion_context(__get_cpu_var(callchain_recursion));
1943 if (*rctx == -1)
1944 return NULL;
1945
1946 entries = rcu_dereference(callchain_cpus_entries);
1947 if (!entries)
1948 return NULL;
1949
1950 cpu = smp_processor_id();
1951
1952 return &entries->cpu_entries[cpu][*rctx];
1953}
1954
1787static void 1955static void
1788__perf_event_init_context(struct perf_event_context *ctx, 1956put_callchain_entry(int rctx)
1789 struct task_struct *task) 1957{
1958 put_recursion_context(__get_cpu_var(callchain_recursion), rctx);
1959}
1960
1961static struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1962{
1963 int rctx;
1964 struct perf_callchain_entry *entry;
1965
1966
1967 entry = get_callchain_entry(&rctx);
1968 if (rctx == -1)
1969 return NULL;
1970
1971 if (!entry)
1972 goto exit_put;
1973
1974 entry->nr = 0;
1975
1976 if (!user_mode(regs)) {
1977 perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
1978 perf_callchain_kernel(entry, regs);
1979 if (current->mm)
1980 regs = task_pt_regs(current);
1981 else
1982 regs = NULL;
1983 }
1984
1985 if (regs) {
1986 perf_callchain_store(entry, PERF_CONTEXT_USER);
1987 perf_callchain_user(entry, regs);
1988 }
1989
1990exit_put:
1991 put_callchain_entry(rctx);
1992
1993 return entry;
1994}
1995
1996/*
1997 * Initialize the perf_event context in a task_struct:
1998 */
1999static void __perf_event_init_context(struct perf_event_context *ctx)
1790{ 2000{
1791 raw_spin_lock_init(&ctx->lock); 2001 raw_spin_lock_init(&ctx->lock);
1792 mutex_init(&ctx->mutex); 2002 mutex_init(&ctx->mutex);
@@ -1794,45 +2004,38 @@ __perf_event_init_context(struct perf_event_context *ctx,
1794 INIT_LIST_HEAD(&ctx->flexible_groups); 2004 INIT_LIST_HEAD(&ctx->flexible_groups);
1795 INIT_LIST_HEAD(&ctx->event_list); 2005 INIT_LIST_HEAD(&ctx->event_list);
1796 atomic_set(&ctx->refcount, 1); 2006 atomic_set(&ctx->refcount, 1);
1797 ctx->task = task;
1798} 2007}
1799 2008
1800static struct perf_event_context *find_get_context(pid_t pid, int cpu) 2009static struct perf_event_context *
2010alloc_perf_context(struct pmu *pmu, struct task_struct *task)
1801{ 2011{
1802 struct perf_event_context *ctx; 2012 struct perf_event_context *ctx;
1803 struct perf_cpu_context *cpuctx;
1804 struct task_struct *task;
1805 unsigned long flags;
1806 int err;
1807
1808 if (pid == -1 && cpu != -1) {
1809 /* Must be root to operate on a CPU event: */
1810 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
1811 return ERR_PTR(-EACCES);
1812 2013
1813 if (cpu < 0 || cpu >= nr_cpumask_bits) 2014 ctx = kzalloc(sizeof(struct perf_event_context), GFP_KERNEL);
1814 return ERR_PTR(-EINVAL); 2015 if (!ctx)
2016 return NULL;
1815 2017
1816 /* 2018 __perf_event_init_context(ctx);
1817 * We could be clever and allow to attach a event to an 2019 if (task) {
1818 * offline CPU and activate it when the CPU comes up, but 2020 ctx->task = task;
1819 * that's for later. 2021 get_task_struct(task);
1820 */ 2022 }
1821 if (!cpu_online(cpu)) 2023 ctx->pmu = pmu;
1822 return ERR_PTR(-ENODEV);
1823 2024
1824 cpuctx = &per_cpu(perf_cpu_context, cpu); 2025 return ctx;
1825 ctx = &cpuctx->ctx; 2026}
1826 get_ctx(ctx);
1827 2027
1828 return ctx; 2028static struct task_struct *
1829 } 2029find_lively_task_by_vpid(pid_t vpid)
2030{
2031 struct task_struct *task;
2032 int err;
1830 2033
1831 rcu_read_lock(); 2034 rcu_read_lock();
1832 if (!pid) 2035 if (!vpid)
1833 task = current; 2036 task = current;
1834 else 2037 else
1835 task = find_task_by_vpid(pid); 2038 task = find_task_by_vpid(vpid);
1836 if (task) 2039 if (task)
1837 get_task_struct(task); 2040 get_task_struct(task);
1838 rcu_read_unlock(); 2041 rcu_read_unlock();
@@ -1852,35 +2055,79 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
1852 if (!ptrace_may_access(task, PTRACE_MODE_READ)) 2055 if (!ptrace_may_access(task, PTRACE_MODE_READ))
1853 goto errout; 2056 goto errout;
1854 2057
1855 retry: 2058 return task;
1856 ctx = perf_lock_task_context(task, &flags); 2059errout:
2060 put_task_struct(task);
2061 return ERR_PTR(err);
2062
2063}
2064
2065static struct perf_event_context *
2066find_get_context(struct pmu *pmu, struct task_struct *task, int cpu)
2067{
2068 struct perf_event_context *ctx;
2069 struct perf_cpu_context *cpuctx;
2070 unsigned long flags;
2071 int ctxn, err;
2072
2073 if (!task && cpu != -1) {
2074 /* Must be root to operate on a CPU event: */
2075 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
2076 return ERR_PTR(-EACCES);
2077
2078 if (cpu < 0 || cpu >= nr_cpumask_bits)
2079 return ERR_PTR(-EINVAL);
2080
2081 /*
2082 * We could be clever and allow to attach a event to an
2083 * offline CPU and activate it when the CPU comes up, but
2084 * that's for later.
2085 */
2086 if (!cpu_online(cpu))
2087 return ERR_PTR(-ENODEV);
2088
2089 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
2090 ctx = &cpuctx->ctx;
2091 get_ctx(ctx);
2092
2093 return ctx;
2094 }
2095
2096 err = -EINVAL;
2097 ctxn = pmu->task_ctx_nr;
2098 if (ctxn < 0)
2099 goto errout;
2100
2101retry:
2102 ctx = perf_lock_task_context(task, ctxn, &flags);
1857 if (ctx) { 2103 if (ctx) {
1858 unclone_ctx(ctx); 2104 unclone_ctx(ctx);
1859 raw_spin_unlock_irqrestore(&ctx->lock, flags); 2105 raw_spin_unlock_irqrestore(&ctx->lock, flags);
1860 } 2106 }
1861 2107
1862 if (!ctx) { 2108 if (!ctx) {
1863 ctx = kzalloc(sizeof(struct perf_event_context), GFP_KERNEL); 2109 ctx = alloc_perf_context(pmu, task);
1864 err = -ENOMEM; 2110 err = -ENOMEM;
1865 if (!ctx) 2111 if (!ctx)
1866 goto errout; 2112 goto errout;
1867 __perf_event_init_context(ctx, task); 2113
1868 get_ctx(ctx); 2114 get_ctx(ctx);
1869 if (cmpxchg(&task->perf_event_ctxp, NULL, ctx)) { 2115
2116 if (cmpxchg(&task->perf_event_ctxp[ctxn], NULL, ctx)) {
1870 /* 2117 /*
1871 * We raced with some other task; use 2118 * We raced with some other task; use
1872 * the context they set. 2119 * the context they set.
1873 */ 2120 */
2121 put_task_struct(task);
1874 kfree(ctx); 2122 kfree(ctx);
1875 goto retry; 2123 goto retry;
1876 } 2124 }
1877 get_task_struct(task);
1878 } 2125 }
1879 2126
1880 put_task_struct(task); 2127 put_task_struct(task);
1881 return ctx; 2128 return ctx;
1882 2129
1883 errout: 2130errout:
1884 put_task_struct(task); 2131 put_task_struct(task);
1885 return ERR_PTR(err); 2132 return ERR_PTR(err);
1886} 2133}
@@ -1913,6 +2160,8 @@ static void free_event(struct perf_event *event)
1913 atomic_dec(&nr_comm_events); 2160 atomic_dec(&nr_comm_events);
1914 if (event->attr.task) 2161 if (event->attr.task)
1915 atomic_dec(&nr_task_events); 2162 atomic_dec(&nr_task_events);
2163 if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
2164 put_callchain_buffers();
1916 } 2165 }
1917 2166
1918 if (event->buffer) { 2167 if (event->buffer) {
@@ -1923,7 +2172,9 @@ static void free_event(struct perf_event *event)
1923 if (event->destroy) 2172 if (event->destroy)
1924 event->destroy(event); 2173 event->destroy(event);
1925 2174
1926 put_ctx(event->ctx); 2175 if (event->ctx)
2176 put_ctx(event->ctx);
2177
1927 call_rcu(&event->rcu_head, free_event_rcu); 2178 call_rcu(&event->rcu_head, free_event_rcu);
1928} 2179}
1929 2180
@@ -2344,6 +2595,9 @@ int perf_event_task_disable(void)
2344 2595
2345static int perf_event_index(struct perf_event *event) 2596static int perf_event_index(struct perf_event *event)
2346{ 2597{
2598 if (event->hw.state & PERF_HES_STOPPED)
2599 return 0;
2600
2347 if (event->state != PERF_EVENT_STATE_ACTIVE) 2601 if (event->state != PERF_EVENT_STATE_ACTIVE)
2348 return 0; 2602 return 0;
2349 2603
@@ -2956,16 +3210,6 @@ void perf_event_do_pending(void)
2956} 3210}
2957 3211
2958/* 3212/*
2959 * Callchain support -- arch specific
2960 */
2961
2962__weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
2963{
2964 return NULL;
2965}
2966
2967
2968/*
2969 * We assume there is only KVM supporting the callbacks. 3213 * We assume there is only KVM supporting the callbacks.
2970 * Later on, we might change it to a list if there is 3214 * Later on, we might change it to a list if there is
2971 * another virtualization implementation supporting the callbacks. 3215 * another virtualization implementation supporting the callbacks.
@@ -3071,7 +3315,7 @@ again:
3071 if (handle->wakeup != local_read(&buffer->wakeup)) 3315 if (handle->wakeup != local_read(&buffer->wakeup))
3072 perf_output_wakeup(handle); 3316 perf_output_wakeup(handle);
3073 3317
3074 out: 3318out:
3075 preempt_enable(); 3319 preempt_enable();
3076} 3320}
3077 3321
@@ -3459,14 +3703,20 @@ static void perf_event_output(struct perf_event *event, int nmi,
3459 struct perf_output_handle handle; 3703 struct perf_output_handle handle;
3460 struct perf_event_header header; 3704 struct perf_event_header header;
3461 3705
3706 /* protect the callchain buffers */
3707 rcu_read_lock();
3708
3462 perf_prepare_sample(&header, data, event, regs); 3709 perf_prepare_sample(&header, data, event, regs);
3463 3710
3464 if (perf_output_begin(&handle, event, header.size, nmi, 1)) 3711 if (perf_output_begin(&handle, event, header.size, nmi, 1))
3465 return; 3712 goto exit;
3466 3713
3467 perf_output_sample(&handle, &header, data, event); 3714 perf_output_sample(&handle, &header, data, event);
3468 3715
3469 perf_output_end(&handle); 3716 perf_output_end(&handle);
3717
3718exit:
3719 rcu_read_unlock();
3470} 3720}
3471 3721
3472/* 3722/*
@@ -3580,16 +3830,27 @@ static void perf_event_task_ctx(struct perf_event_context *ctx,
3580static void perf_event_task_event(struct perf_task_event *task_event) 3830static void perf_event_task_event(struct perf_task_event *task_event)
3581{ 3831{
3582 struct perf_cpu_context *cpuctx; 3832 struct perf_cpu_context *cpuctx;
3583 struct perf_event_context *ctx = task_event->task_ctx; 3833 struct perf_event_context *ctx;
3834 struct pmu *pmu;
3835 int ctxn;
3584 3836
3585 rcu_read_lock(); 3837 rcu_read_lock();
3586 cpuctx = &get_cpu_var(perf_cpu_context); 3838 list_for_each_entry_rcu(pmu, &pmus, entry) {
3587 perf_event_task_ctx(&cpuctx->ctx, task_event); 3839 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
3588 if (!ctx) 3840 perf_event_task_ctx(&cpuctx->ctx, task_event);
3589 ctx = rcu_dereference(current->perf_event_ctxp); 3841
3590 if (ctx) 3842 ctx = task_event->task_ctx;
3591 perf_event_task_ctx(ctx, task_event); 3843 if (!ctx) {
3592 put_cpu_var(perf_cpu_context); 3844 ctxn = pmu->task_ctx_nr;
3845 if (ctxn < 0)
3846 goto next;
3847 ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
3848 }
3849 if (ctx)
3850 perf_event_task_ctx(ctx, task_event);
3851next:
3852 put_cpu_ptr(pmu->pmu_cpu_context);
3853 }
3593 rcu_read_unlock(); 3854 rcu_read_unlock();
3594} 3855}
3595 3856
@@ -3694,8 +3955,10 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
3694{ 3955{
3695 struct perf_cpu_context *cpuctx; 3956 struct perf_cpu_context *cpuctx;
3696 struct perf_event_context *ctx; 3957 struct perf_event_context *ctx;
3697 unsigned int size;
3698 char comm[TASK_COMM_LEN]; 3958 char comm[TASK_COMM_LEN];
3959 unsigned int size;
3960 struct pmu *pmu;
3961 int ctxn;
3699 3962
3700 memset(comm, 0, sizeof(comm)); 3963 memset(comm, 0, sizeof(comm));
3701 strlcpy(comm, comm_event->task->comm, sizeof(comm)); 3964 strlcpy(comm, comm_event->task->comm, sizeof(comm));
@@ -3707,21 +3970,36 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
3707 comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; 3970 comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;
3708 3971
3709 rcu_read_lock(); 3972 rcu_read_lock();
3710 cpuctx = &get_cpu_var(perf_cpu_context); 3973 list_for_each_entry_rcu(pmu, &pmus, entry) {
3711 perf_event_comm_ctx(&cpuctx->ctx, comm_event); 3974 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
3712 ctx = rcu_dereference(current->perf_event_ctxp); 3975 perf_event_comm_ctx(&cpuctx->ctx, comm_event);
3713 if (ctx) 3976
3714 perf_event_comm_ctx(ctx, comm_event); 3977 ctxn = pmu->task_ctx_nr;
3715 put_cpu_var(perf_cpu_context); 3978 if (ctxn < 0)
3979 goto next;
3980
3981 ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
3982 if (ctx)
3983 perf_event_comm_ctx(ctx, comm_event);
3984next:
3985 put_cpu_ptr(pmu->pmu_cpu_context);
3986 }
3716 rcu_read_unlock(); 3987 rcu_read_unlock();
3717} 3988}
3718 3989
3719void perf_event_comm(struct task_struct *task) 3990void perf_event_comm(struct task_struct *task)
3720{ 3991{
3721 struct perf_comm_event comm_event; 3992 struct perf_comm_event comm_event;
3993 struct perf_event_context *ctx;
3994 int ctxn;
3722 3995
3723 if (task->perf_event_ctxp) 3996 for_each_task_context_nr(ctxn) {
3724 perf_event_enable_on_exec(task); 3997 ctx = task->perf_event_ctxp[ctxn];
3998 if (!ctx)
3999 continue;
4000
4001 perf_event_enable_on_exec(ctx);
4002 }
3725 4003
3726 if (!atomic_read(&nr_comm_events)) 4004 if (!atomic_read(&nr_comm_events))
3727 return; 4005 return;
@@ -3823,6 +4101,8 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
3823 char tmp[16]; 4101 char tmp[16];
3824 char *buf = NULL; 4102 char *buf = NULL;
3825 const char *name; 4103 const char *name;
4104 struct pmu *pmu;
4105 int ctxn;
3826 4106
3827 memset(tmp, 0, sizeof(tmp)); 4107 memset(tmp, 0, sizeof(tmp));
3828 4108
@@ -3875,12 +4155,23 @@ got_name:
3875 mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; 4155 mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
3876 4156
3877 rcu_read_lock(); 4157 rcu_read_lock();
3878 cpuctx = &get_cpu_var(perf_cpu_context); 4158 list_for_each_entry_rcu(pmu, &pmus, entry) {
3879 perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, vma->vm_flags & VM_EXEC); 4159 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
3880 ctx = rcu_dereference(current->perf_event_ctxp); 4160 perf_event_mmap_ctx(&cpuctx->ctx, mmap_event,
3881 if (ctx) 4161 vma->vm_flags & VM_EXEC);
3882 perf_event_mmap_ctx(ctx, mmap_event, vma->vm_flags & VM_EXEC); 4162
3883 put_cpu_var(perf_cpu_context); 4163 ctxn = pmu->task_ctx_nr;
4164 if (ctxn < 0)
4165 goto next;
4166
4167 ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
4168 if (ctx) {
4169 perf_event_mmap_ctx(ctx, mmap_event,
4170 vma->vm_flags & VM_EXEC);
4171 }
4172next:
4173 put_cpu_ptr(pmu->pmu_cpu_context);
4174 }
3884 rcu_read_unlock(); 4175 rcu_read_unlock();
3885 4176
3886 kfree(buf); 4177 kfree(buf);
@@ -3962,8 +4253,6 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
3962 struct hw_perf_event *hwc = &event->hw; 4253 struct hw_perf_event *hwc = &event->hw;
3963 int ret = 0; 4254 int ret = 0;
3964 4255
3965 throttle = (throttle && event->pmu->unthrottle != NULL);
3966
3967 if (!throttle) { 4256 if (!throttle) {
3968 hwc->interrupts++; 4257 hwc->interrupts++;
3969 } else { 4258 } else {
@@ -4031,6 +4320,17 @@ int perf_event_overflow(struct perf_event *event, int nmi,
4031 * Generic software event infrastructure 4320 * Generic software event infrastructure
4032 */ 4321 */
4033 4322
4323struct swevent_htable {
4324 struct swevent_hlist *swevent_hlist;
4325 struct mutex hlist_mutex;
4326 int hlist_refcount;
4327
4328 /* Recursion avoidance in each contexts */
4329 int recursion[PERF_NR_CONTEXTS];
4330};
4331
4332static DEFINE_PER_CPU(struct swevent_htable, swevent_htable);
4333
4034/* 4334/*
4035 * We directly increment event->count and keep a second value in 4335 * We directly increment event->count and keep a second value in
4036 * event->hw.period_left to count intervals. This period event 4336 * event->hw.period_left to count intervals. This period event
@@ -4088,7 +4388,7 @@ static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
4088 } 4388 }
4089} 4389}
4090 4390
4091static void perf_swevent_add(struct perf_event *event, u64 nr, 4391static void perf_swevent_event(struct perf_event *event, u64 nr,
4092 int nmi, struct perf_sample_data *data, 4392 int nmi, struct perf_sample_data *data,
4093 struct pt_regs *regs) 4393 struct pt_regs *regs)
4094{ 4394{
@@ -4114,6 +4414,9 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,
4114static int perf_exclude_event(struct perf_event *event, 4414static int perf_exclude_event(struct perf_event *event,
4115 struct pt_regs *regs) 4415 struct pt_regs *regs)
4116{ 4416{
4417 if (event->hw.state & PERF_HES_STOPPED)
4418 return 0;
4419
4117 if (regs) { 4420 if (regs) {
4118 if (event->attr.exclude_user && user_mode(regs)) 4421 if (event->attr.exclude_user && user_mode(regs))
4119 return 1; 4422 return 1;
@@ -4160,11 +4463,11 @@ __find_swevent_head(struct swevent_hlist *hlist, u64 type, u32 event_id)
4160 4463
4161/* For the read side: events when they trigger */ 4464/* For the read side: events when they trigger */
4162static inline struct hlist_head * 4465static inline struct hlist_head *
4163find_swevent_head_rcu(struct perf_cpu_context *ctx, u64 type, u32 event_id) 4466find_swevent_head_rcu(struct swevent_htable *swhash, u64 type, u32 event_id)
4164{ 4467{
4165 struct swevent_hlist *hlist; 4468 struct swevent_hlist *hlist;
4166 4469
4167 hlist = rcu_dereference(ctx->swevent_hlist); 4470 hlist = rcu_dereference(swhash->swevent_hlist);
4168 if (!hlist) 4471 if (!hlist)
4169 return NULL; 4472 return NULL;
4170 4473
@@ -4173,7 +4476,7 @@ find_swevent_head_rcu(struct perf_cpu_context *ctx, u64 type, u32 event_id)
4173 4476
4174/* For the event head insertion and removal in the hlist */ 4477/* For the event head insertion and removal in the hlist */
4175static inline struct hlist_head * 4478static inline struct hlist_head *
4176find_swevent_head(struct perf_cpu_context *ctx, struct perf_event *event) 4479find_swevent_head(struct swevent_htable *swhash, struct perf_event *event)
4177{ 4480{
4178 struct swevent_hlist *hlist; 4481 struct swevent_hlist *hlist;
4179 u32 event_id = event->attr.config; 4482 u32 event_id = event->attr.config;
@@ -4184,7 +4487,7 @@ find_swevent_head(struct perf_cpu_context *ctx, struct perf_event *event)
4184 * and release. Which makes the protected version suitable here. 4487 * and release. Which makes the protected version suitable here.
4185 * The context lock guarantees that. 4488 * The context lock guarantees that.
4186 */ 4489 */
4187 hlist = rcu_dereference_protected(ctx->swevent_hlist, 4490 hlist = rcu_dereference_protected(swhash->swevent_hlist,
4188 lockdep_is_held(&event->ctx->lock)); 4491 lockdep_is_held(&event->ctx->lock));
4189 if (!hlist) 4492 if (!hlist)
4190 return NULL; 4493 return NULL;
@@ -4197,23 +4500,19 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
4197 struct perf_sample_data *data, 4500 struct perf_sample_data *data,
4198 struct pt_regs *regs) 4501 struct pt_regs *regs)
4199{ 4502{
4200 struct perf_cpu_context *cpuctx; 4503 struct swevent_htable *swhash = &__get_cpu_var(swevent_htable);
4201 struct perf_event *event; 4504 struct perf_event *event;
4202 struct hlist_node *node; 4505 struct hlist_node *node;
4203 struct hlist_head *head; 4506 struct hlist_head *head;
4204 4507
4205 cpuctx = &__get_cpu_var(perf_cpu_context);
4206
4207 rcu_read_lock(); 4508 rcu_read_lock();
4208 4509 head = find_swevent_head_rcu(swhash, type, event_id);
4209 head = find_swevent_head_rcu(cpuctx, type, event_id);
4210
4211 if (!head) 4510 if (!head)
4212 goto end; 4511 goto end;
4213 4512
4214 hlist_for_each_entry_rcu(event, node, head, hlist_entry) { 4513 hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
4215 if (perf_swevent_match(event, type, event_id, data, regs)) 4514 if (perf_swevent_match(event, type, event_id, data, regs))
4216 perf_swevent_add(event, nr, nmi, data, regs); 4515 perf_swevent_event(event, nr, nmi, data, regs);
4217 } 4516 }
4218end: 4517end:
4219 rcu_read_unlock(); 4518 rcu_read_unlock();
@@ -4221,33 +4520,17 @@ end:
4221 4520
4222int perf_swevent_get_recursion_context(void) 4521int perf_swevent_get_recursion_context(void)
4223{ 4522{
4224 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); 4523 struct swevent_htable *swhash = &__get_cpu_var(swevent_htable);
4225 int rctx;
4226
4227 if (in_nmi())
4228 rctx = 3;
4229 else if (in_irq())
4230 rctx = 2;
4231 else if (in_softirq())
4232 rctx = 1;
4233 else
4234 rctx = 0;
4235 4524
4236 if (cpuctx->recursion[rctx]) 4525 return get_recursion_context(swhash->recursion);
4237 return -1;
4238
4239 cpuctx->recursion[rctx]++;
4240 barrier();
4241
4242 return rctx;
4243} 4526}
4244EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); 4527EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
4245 4528
4246void inline perf_swevent_put_recursion_context(int rctx) 4529void inline perf_swevent_put_recursion_context(int rctx)
4247{ 4530{
4248 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); 4531 struct swevent_htable *swhash = &__get_cpu_var(swevent_htable);
4249 barrier(); 4532
4250 cpuctx->recursion[rctx]--; 4533 put_recursion_context(swhash->recursion, rctx);
4251} 4534}
4252 4535
4253void __perf_sw_event(u32 event_id, u64 nr, int nmi, 4536void __perf_sw_event(u32 event_id, u64 nr, int nmi,
@@ -4273,20 +4556,20 @@ static void perf_swevent_read(struct perf_event *event)
4273{ 4556{
4274} 4557}
4275 4558
4276static int perf_swevent_enable(struct perf_event *event) 4559static int perf_swevent_add(struct perf_event *event, int flags)
4277{ 4560{
4561 struct swevent_htable *swhash = &__get_cpu_var(swevent_htable);
4278 struct hw_perf_event *hwc = &event->hw; 4562 struct hw_perf_event *hwc = &event->hw;
4279 struct perf_cpu_context *cpuctx;
4280 struct hlist_head *head; 4563 struct hlist_head *head;
4281 4564
4282 cpuctx = &__get_cpu_var(perf_cpu_context);
4283
4284 if (hwc->sample_period) { 4565 if (hwc->sample_period) {
4285 hwc->last_period = hwc->sample_period; 4566 hwc->last_period = hwc->sample_period;
4286 perf_swevent_set_period(event); 4567 perf_swevent_set_period(event);
4287 } 4568 }
4288 4569
4289 head = find_swevent_head(cpuctx, event); 4570 hwc->state = !(flags & PERF_EF_START);
4571
4572 head = find_swevent_head(swhash, event);
4290 if (WARN_ON_ONCE(!head)) 4573 if (WARN_ON_ONCE(!head))
4291 return -EINVAL; 4574 return -EINVAL;
4292 4575
@@ -4295,202 +4578,27 @@ static int perf_swevent_enable(struct perf_event *event)
4295 return 0; 4578 return 0;
4296} 4579}
4297 4580
4298static void perf_swevent_disable(struct perf_event *event) 4581static void perf_swevent_del(struct perf_event *event, int flags)
4299{ 4582{
4300 hlist_del_rcu(&event->hlist_entry); 4583 hlist_del_rcu(&event->hlist_entry);
4301} 4584}
4302 4585
4303static void perf_swevent_void(struct perf_event *event) 4586static void perf_swevent_start(struct perf_event *event, int flags)
4304{
4305}
4306
4307static int perf_swevent_int(struct perf_event *event)
4308{
4309 return 0;
4310}
4311
4312static const struct pmu perf_ops_generic = {
4313 .enable = perf_swevent_enable,
4314 .disable = perf_swevent_disable,
4315 .start = perf_swevent_int,
4316 .stop = perf_swevent_void,
4317 .read = perf_swevent_read,
4318 .unthrottle = perf_swevent_void, /* hwc->interrupts already reset */
4319};
4320
4321/*
4322 * hrtimer based swevent callback
4323 */
4324
4325static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
4326{
4327 enum hrtimer_restart ret = HRTIMER_RESTART;
4328 struct perf_sample_data data;
4329 struct pt_regs *regs;
4330 struct perf_event *event;
4331 u64 period;
4332
4333 event = container_of(hrtimer, struct perf_event, hw.hrtimer);
4334 event->pmu->read(event);
4335
4336 perf_sample_data_init(&data, 0);
4337 data.period = event->hw.last_period;
4338 regs = get_irq_regs();
4339
4340 if (regs && !perf_exclude_event(event, regs)) {
4341 if (!(event->attr.exclude_idle && current->pid == 0))
4342 if (perf_event_overflow(event, 0, &data, regs))
4343 ret = HRTIMER_NORESTART;
4344 }
4345
4346 period = max_t(u64, 10000, event->hw.sample_period);
4347 hrtimer_forward_now(hrtimer, ns_to_ktime(period));
4348
4349 return ret;
4350}
4351
4352static void perf_swevent_start_hrtimer(struct perf_event *event)
4353{ 4587{
4354 struct hw_perf_event *hwc = &event->hw; 4588 event->hw.state = 0;
4355
4356 hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
4357 hwc->hrtimer.function = perf_swevent_hrtimer;
4358 if (hwc->sample_period) {
4359 u64 period;
4360
4361 if (hwc->remaining) {
4362 if (hwc->remaining < 0)
4363 period = 10000;
4364 else
4365 period = hwc->remaining;
4366 hwc->remaining = 0;
4367 } else {
4368 period = max_t(u64, 10000, hwc->sample_period);
4369 }
4370 __hrtimer_start_range_ns(&hwc->hrtimer,
4371 ns_to_ktime(period), 0,
4372 HRTIMER_MODE_REL, 0);
4373 }
4374} 4589}
4375 4590
4376static void perf_swevent_cancel_hrtimer(struct perf_event *event) 4591static void perf_swevent_stop(struct perf_event *event, int flags)
4377{ 4592{
4378 struct hw_perf_event *hwc = &event->hw; 4593 event->hw.state = PERF_HES_STOPPED;
4379
4380 if (hwc->sample_period) {
4381 ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer);
4382 hwc->remaining = ktime_to_ns(remaining);
4383
4384 hrtimer_cancel(&hwc->hrtimer);
4385 }
4386} 4594}
4387 4595
4388/*
4389 * Software event: cpu wall time clock
4390 */
4391
4392static void cpu_clock_perf_event_update(struct perf_event *event)
4393{
4394 int cpu = raw_smp_processor_id();
4395 s64 prev;
4396 u64 now;
4397
4398 now = cpu_clock(cpu);
4399 prev = local64_xchg(&event->hw.prev_count, now);
4400 local64_add(now - prev, &event->count);
4401}
4402
4403static int cpu_clock_perf_event_enable(struct perf_event *event)
4404{
4405 struct hw_perf_event *hwc = &event->hw;
4406 int cpu = raw_smp_processor_id();
4407
4408 local64_set(&hwc->prev_count, cpu_clock(cpu));
4409 perf_swevent_start_hrtimer(event);
4410
4411 return 0;
4412}
4413
4414static void cpu_clock_perf_event_disable(struct perf_event *event)
4415{
4416 perf_swevent_cancel_hrtimer(event);
4417 cpu_clock_perf_event_update(event);
4418}
4419
4420static void cpu_clock_perf_event_read(struct perf_event *event)
4421{
4422 cpu_clock_perf_event_update(event);
4423}
4424
4425static const struct pmu perf_ops_cpu_clock = {
4426 .enable = cpu_clock_perf_event_enable,
4427 .disable = cpu_clock_perf_event_disable,
4428 .read = cpu_clock_perf_event_read,
4429};
4430
4431/*
4432 * Software event: task time clock
4433 */
4434
4435static void task_clock_perf_event_update(struct perf_event *event, u64 now)
4436{
4437 u64 prev;
4438 s64 delta;
4439
4440 prev = local64_xchg(&event->hw.prev_count, now);
4441 delta = now - prev;
4442 local64_add(delta, &event->count);
4443}
4444
4445static int task_clock_perf_event_enable(struct perf_event *event)
4446{
4447 struct hw_perf_event *hwc = &event->hw;
4448 u64 now;
4449
4450 now = event->ctx->time;
4451
4452 local64_set(&hwc->prev_count, now);
4453
4454 perf_swevent_start_hrtimer(event);
4455
4456 return 0;
4457}
4458
4459static void task_clock_perf_event_disable(struct perf_event *event)
4460{
4461 perf_swevent_cancel_hrtimer(event);
4462 task_clock_perf_event_update(event, event->ctx->time);
4463
4464}
4465
4466static void task_clock_perf_event_read(struct perf_event *event)
4467{
4468 u64 time;
4469
4470 if (!in_nmi()) {
4471 update_context_time(event->ctx);
4472 time = event->ctx->time;
4473 } else {
4474 u64 now = perf_clock();
4475 u64 delta = now - event->ctx->timestamp;
4476 time = event->ctx->time + delta;
4477 }
4478
4479 task_clock_perf_event_update(event, time);
4480}
4481
4482static const struct pmu perf_ops_task_clock = {
4483 .enable = task_clock_perf_event_enable,
4484 .disable = task_clock_perf_event_disable,
4485 .read = task_clock_perf_event_read,
4486};
4487
4488/* Deref the hlist from the update side */ 4596/* Deref the hlist from the update side */
4489static inline struct swevent_hlist * 4597static inline struct swevent_hlist *
4490swevent_hlist_deref(struct perf_cpu_context *cpuctx) 4598swevent_hlist_deref(struct swevent_htable *swhash)
4491{ 4599{
4492 return rcu_dereference_protected(cpuctx->swevent_hlist, 4600 return rcu_dereference_protected(swhash->swevent_hlist,
4493 lockdep_is_held(&cpuctx->hlist_mutex)); 4601 lockdep_is_held(&swhash->hlist_mutex));
4494} 4602}
4495 4603
4496static void swevent_hlist_release_rcu(struct rcu_head *rcu_head) 4604static void swevent_hlist_release_rcu(struct rcu_head *rcu_head)
@@ -4501,27 +4609,27 @@ static void swevent_hlist_release_rcu(struct rcu_head *rcu_head)
4501 kfree(hlist); 4609 kfree(hlist);
4502} 4610}
4503 4611
4504static void swevent_hlist_release(struct perf_cpu_context *cpuctx) 4612static void swevent_hlist_release(struct swevent_htable *swhash)
4505{ 4613{
4506 struct swevent_hlist *hlist = swevent_hlist_deref(cpuctx); 4614 struct swevent_hlist *hlist = swevent_hlist_deref(swhash);
4507 4615
4508 if (!hlist) 4616 if (!hlist)
4509 return; 4617 return;
4510 4618
4511 rcu_assign_pointer(cpuctx->swevent_hlist, NULL); 4619 rcu_assign_pointer(swhash->swevent_hlist, NULL);
4512 call_rcu(&hlist->rcu_head, swevent_hlist_release_rcu); 4620 call_rcu(&hlist->rcu_head, swevent_hlist_release_rcu);
4513} 4621}
4514 4622
4515static void swevent_hlist_put_cpu(struct perf_event *event, int cpu) 4623static void swevent_hlist_put_cpu(struct perf_event *event, int cpu)
4516{ 4624{
4517 struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); 4625 struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
4518 4626
4519 mutex_lock(&cpuctx->hlist_mutex); 4627 mutex_lock(&swhash->hlist_mutex);
4520 4628
4521 if (!--cpuctx->hlist_refcount) 4629 if (!--swhash->hlist_refcount)
4522 swevent_hlist_release(cpuctx); 4630 swevent_hlist_release(swhash);
4523 4631
4524 mutex_unlock(&cpuctx->hlist_mutex); 4632 mutex_unlock(&swhash->hlist_mutex);
4525} 4633}
4526 4634
4527static void swevent_hlist_put(struct perf_event *event) 4635static void swevent_hlist_put(struct perf_event *event)
@@ -4539,12 +4647,12 @@ static void swevent_hlist_put(struct perf_event *event)
4539 4647
4540static int swevent_hlist_get_cpu(struct perf_event *event, int cpu) 4648static int swevent_hlist_get_cpu(struct perf_event *event, int cpu)
4541{ 4649{
4542 struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); 4650 struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
4543 int err = 0; 4651 int err = 0;
4544 4652
4545 mutex_lock(&cpuctx->hlist_mutex); 4653 mutex_lock(&swhash->hlist_mutex);
4546 4654
4547 if (!swevent_hlist_deref(cpuctx) && cpu_online(cpu)) { 4655 if (!swevent_hlist_deref(swhash) && cpu_online(cpu)) {
4548 struct swevent_hlist *hlist; 4656 struct swevent_hlist *hlist;
4549 4657
4550 hlist = kzalloc(sizeof(*hlist), GFP_KERNEL); 4658 hlist = kzalloc(sizeof(*hlist), GFP_KERNEL);
@@ -4552,11 +4660,11 @@ static int swevent_hlist_get_cpu(struct perf_event *event, int cpu)
4552 err = -ENOMEM; 4660 err = -ENOMEM;
4553 goto exit; 4661 goto exit;
4554 } 4662 }
4555 rcu_assign_pointer(cpuctx->swevent_hlist, hlist); 4663 rcu_assign_pointer(swhash->swevent_hlist, hlist);
4556 } 4664 }
4557 cpuctx->hlist_refcount++; 4665 swhash->hlist_refcount++;
4558 exit: 4666exit:
4559 mutex_unlock(&cpuctx->hlist_mutex); 4667 mutex_unlock(&swhash->hlist_mutex);
4560 4668
4561 return err; 4669 return err;
4562} 4670}
@@ -4580,7 +4688,7 @@ static int swevent_hlist_get(struct perf_event *event)
4580 put_online_cpus(); 4688 put_online_cpus();
4581 4689
4582 return 0; 4690 return 0;
4583 fail: 4691fail:
4584 for_each_possible_cpu(cpu) { 4692 for_each_possible_cpu(cpu) {
4585 if (cpu == failed_cpu) 4693 if (cpu == failed_cpu)
4586 break; 4694 break;
@@ -4591,17 +4699,64 @@ static int swevent_hlist_get(struct perf_event *event)
4591 return err; 4699 return err;
4592} 4700}
4593 4701
4594#ifdef CONFIG_EVENT_TRACING 4702atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
4703
4704static void sw_perf_event_destroy(struct perf_event *event)
4705{
4706 u64 event_id = event->attr.config;
4707
4708 WARN_ON(event->parent);
4709
4710 atomic_dec(&perf_swevent_enabled[event_id]);
4711 swevent_hlist_put(event);
4712}
4713
4714static int perf_swevent_init(struct perf_event *event)
4715{
4716 int event_id = event->attr.config;
4595 4717
4596static const struct pmu perf_ops_tracepoint = { 4718 if (event->attr.type != PERF_TYPE_SOFTWARE)
4597 .enable = perf_trace_enable, 4719 return -ENOENT;
4598 .disable = perf_trace_disable, 4720
4599 .start = perf_swevent_int, 4721 switch (event_id) {
4600 .stop = perf_swevent_void, 4722 case PERF_COUNT_SW_CPU_CLOCK:
4723 case PERF_COUNT_SW_TASK_CLOCK:
4724 return -ENOENT;
4725
4726 default:
4727 break;
4728 }
4729
4730 if (event_id > PERF_COUNT_SW_MAX)
4731 return -ENOENT;
4732
4733 if (!event->parent) {
4734 int err;
4735
4736 err = swevent_hlist_get(event);
4737 if (err)
4738 return err;
4739
4740 atomic_inc(&perf_swevent_enabled[event_id]);
4741 event->destroy = sw_perf_event_destroy;
4742 }
4743
4744 return 0;
4745}
4746
4747static struct pmu perf_swevent = {
4748 .task_ctx_nr = perf_sw_context,
4749
4750 .event_init = perf_swevent_init,
4751 .add = perf_swevent_add,
4752 .del = perf_swevent_del,
4753 .start = perf_swevent_start,
4754 .stop = perf_swevent_stop,
4601 .read = perf_swevent_read, 4755 .read = perf_swevent_read,
4602 .unthrottle = perf_swevent_void,
4603}; 4756};
4604 4757
4758#ifdef CONFIG_EVENT_TRACING
4759
4605static int perf_tp_filter_match(struct perf_event *event, 4760static int perf_tp_filter_match(struct perf_event *event,
4606 struct perf_sample_data *data) 4761 struct perf_sample_data *data)
4607{ 4762{
@@ -4645,7 +4800,7 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
4645 4800
4646 hlist_for_each_entry_rcu(event, node, head, hlist_entry) { 4801 hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
4647 if (perf_tp_event_match(event, &data, regs)) 4802 if (perf_tp_event_match(event, &data, regs))
4648 perf_swevent_add(event, count, 1, &data, regs); 4803 perf_swevent_event(event, count, 1, &data, regs);
4649 } 4804 }
4650 4805
4651 perf_swevent_put_recursion_context(rctx); 4806 perf_swevent_put_recursion_context(rctx);
@@ -4657,10 +4812,13 @@ static void tp_perf_event_destroy(struct perf_event *event)
4657 perf_trace_destroy(event); 4812 perf_trace_destroy(event);
4658} 4813}
4659 4814
4660static const struct pmu *tp_perf_event_init(struct perf_event *event) 4815static int perf_tp_event_init(struct perf_event *event)
4661{ 4816{
4662 int err; 4817 int err;
4663 4818
4819 if (event->attr.type != PERF_TYPE_TRACEPOINT)
4820 return -ENOENT;
4821
4664 /* 4822 /*
4665 * Raw tracepoint data is a severe data leak, only allow root to 4823 * Raw tracepoint data is a severe data leak, only allow root to
4666 * have these. 4824 * have these.
@@ -4668,15 +4826,31 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event)
4668 if ((event->attr.sample_type & PERF_SAMPLE_RAW) && 4826 if ((event->attr.sample_type & PERF_SAMPLE_RAW) &&
4669 perf_paranoid_tracepoint_raw() && 4827 perf_paranoid_tracepoint_raw() &&
4670 !capable(CAP_SYS_ADMIN)) 4828 !capable(CAP_SYS_ADMIN))
4671 return ERR_PTR(-EPERM); 4829 return -EPERM;
4672 4830
4673 err = perf_trace_init(event); 4831 err = perf_trace_init(event);
4674 if (err) 4832 if (err)
4675 return NULL; 4833 return err;
4676 4834
4677 event->destroy = tp_perf_event_destroy; 4835 event->destroy = tp_perf_event_destroy;
4678 4836
4679 return &perf_ops_tracepoint; 4837 return 0;
4838}
4839
4840static struct pmu perf_tracepoint = {
4841 .task_ctx_nr = perf_sw_context,
4842
4843 .event_init = perf_tp_event_init,
4844 .add = perf_trace_add,
4845 .del = perf_trace_del,
4846 .start = perf_swevent_start,
4847 .stop = perf_swevent_stop,
4848 .read = perf_swevent_read,
4849};
4850
4851static inline void perf_tp_register(void)
4852{
4853 perf_pmu_register(&perf_tracepoint);
4680} 4854}
4681 4855
4682static int perf_event_set_filter(struct perf_event *event, void __user *arg) 4856static int perf_event_set_filter(struct perf_event *event, void __user *arg)
@@ -4704,9 +4878,8 @@ static void perf_event_free_filter(struct perf_event *event)
4704 4878
4705#else 4879#else
4706 4880
4707static const struct pmu *tp_perf_event_init(struct perf_event *event) 4881static inline void perf_tp_register(void)
4708{ 4882{
4709 return NULL;
4710} 4883}
4711 4884
4712static int perf_event_set_filter(struct perf_event *event, void __user *arg) 4885static int perf_event_set_filter(struct perf_event *event, void __user *arg)
@@ -4721,105 +4894,389 @@ static void perf_event_free_filter(struct perf_event *event)
4721#endif /* CONFIG_EVENT_TRACING */ 4894#endif /* CONFIG_EVENT_TRACING */
4722 4895
4723#ifdef CONFIG_HAVE_HW_BREAKPOINT 4896#ifdef CONFIG_HAVE_HW_BREAKPOINT
4724static void bp_perf_event_destroy(struct perf_event *event) 4897void perf_bp_event(struct perf_event *bp, void *data)
4725{ 4898{
4726 release_bp_slot(event); 4899 struct perf_sample_data sample;
4900 struct pt_regs *regs = data;
4901
4902 perf_sample_data_init(&sample, bp->attr.bp_addr);
4903
4904 if (!bp->hw.state && !perf_exclude_event(bp, regs))
4905 perf_swevent_event(bp, 1, 1, &sample, regs);
4727} 4906}
4907#endif
4908
4909/*
4910 * hrtimer based swevent callback
4911 */
4728 4912
4729static const struct pmu *bp_perf_event_init(struct perf_event *bp) 4913static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
4730{ 4914{
4731 int err; 4915 enum hrtimer_restart ret = HRTIMER_RESTART;
4916 struct perf_sample_data data;
4917 struct pt_regs *regs;
4918 struct perf_event *event;
4919 u64 period;
4732 4920
4733 err = register_perf_hw_breakpoint(bp); 4921 event = container_of(hrtimer, struct perf_event, hw.hrtimer);
4734 if (err) 4922 event->pmu->read(event);
4735 return ERR_PTR(err);
4736 4923
4737 bp->destroy = bp_perf_event_destroy; 4924 perf_sample_data_init(&data, 0);
4925 data.period = event->hw.last_period;
4926 regs = get_irq_regs();
4927
4928 if (regs && !perf_exclude_event(event, regs)) {
4929 if (!(event->attr.exclude_idle && current->pid == 0))
4930 if (perf_event_overflow(event, 0, &data, regs))
4931 ret = HRTIMER_NORESTART;
4932 }
4738 4933
4739 return &perf_ops_bp; 4934 period = max_t(u64, 10000, event->hw.sample_period);
4935 hrtimer_forward_now(hrtimer, ns_to_ktime(period));
4936
4937 return ret;
4740} 4938}
4741 4939
4742void perf_bp_event(struct perf_event *bp, void *data) 4940static void perf_swevent_start_hrtimer(struct perf_event *event)
4743{ 4941{
4744 struct perf_sample_data sample; 4942 struct hw_perf_event *hwc = &event->hw;
4745 struct pt_regs *regs = data;
4746 4943
4747 perf_sample_data_init(&sample, bp->attr.bp_addr); 4944 hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
4945 hwc->hrtimer.function = perf_swevent_hrtimer;
4946 if (hwc->sample_period) {
4947 s64 period = local64_read(&hwc->period_left);
4748 4948
4749 if (!perf_exclude_event(bp, regs)) 4949 if (period) {
4750 perf_swevent_add(bp, 1, 1, &sample, regs); 4950 if (period < 0)
4951 period = 10000;
4952
4953 local64_set(&hwc->period_left, 0);
4954 } else {
4955 period = max_t(u64, 10000, hwc->sample_period);
4956 }
4957 __hrtimer_start_range_ns(&hwc->hrtimer,
4958 ns_to_ktime(period), 0,
4959 HRTIMER_MODE_REL_PINNED, 0);
4960 }
4751} 4961}
4752#else 4962
4753static const struct pmu *bp_perf_event_init(struct perf_event *bp) 4963static void perf_swevent_cancel_hrtimer(struct perf_event *event)
4754{ 4964{
4755 return NULL; 4965 struct hw_perf_event *hwc = &event->hw;
4966
4967 if (hwc->sample_period) {
4968 ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer);
4969 local64_set(&hwc->period_left, ktime_to_ns(remaining));
4970
4971 hrtimer_cancel(&hwc->hrtimer);
4972 }
4756} 4973}
4757 4974
4758void perf_bp_event(struct perf_event *bp, void *regs) 4975/*
4976 * Software event: cpu wall time clock
4977 */
4978
4979static void cpu_clock_event_update(struct perf_event *event)
4759{ 4980{
4981 s64 prev;
4982 u64 now;
4983
4984 now = local_clock();
4985 prev = local64_xchg(&event->hw.prev_count, now);
4986 local64_add(now - prev, &event->count);
4760} 4987}
4761#endif
4762 4988
4763atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; 4989static void cpu_clock_event_start(struct perf_event *event, int flags)
4990{
4991 local64_set(&event->hw.prev_count, local_clock());
4992 perf_swevent_start_hrtimer(event);
4993}
4764 4994
4765static void sw_perf_event_destroy(struct perf_event *event) 4995static void cpu_clock_event_stop(struct perf_event *event, int flags)
4766{ 4996{
4767 u64 event_id = event->attr.config; 4997 perf_swevent_cancel_hrtimer(event);
4998 cpu_clock_event_update(event);
4999}
4768 5000
4769 WARN_ON(event->parent); 5001static int cpu_clock_event_add(struct perf_event *event, int flags)
5002{
5003 if (flags & PERF_EF_START)
5004 cpu_clock_event_start(event, flags);
4770 5005
4771 atomic_dec(&perf_swevent_enabled[event_id]); 5006 return 0;
4772 swevent_hlist_put(event);
4773} 5007}
4774 5008
4775static const struct pmu *sw_perf_event_init(struct perf_event *event) 5009static void cpu_clock_event_del(struct perf_event *event, int flags)
4776{ 5010{
4777 const struct pmu *pmu = NULL; 5011 cpu_clock_event_stop(event, flags);
4778 u64 event_id = event->attr.config; 5012}
5013
5014static void cpu_clock_event_read(struct perf_event *event)
5015{
5016 cpu_clock_event_update(event);
5017}
5018
5019static int cpu_clock_event_init(struct perf_event *event)
5020{
5021 if (event->attr.type != PERF_TYPE_SOFTWARE)
5022 return -ENOENT;
5023
5024 if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK)
5025 return -ENOENT;
5026
5027 return 0;
5028}
5029
5030static struct pmu perf_cpu_clock = {
5031 .task_ctx_nr = perf_sw_context,
5032
5033 .event_init = cpu_clock_event_init,
5034 .add = cpu_clock_event_add,
5035 .del = cpu_clock_event_del,
5036 .start = cpu_clock_event_start,
5037 .stop = cpu_clock_event_stop,
5038 .read = cpu_clock_event_read,
5039};
5040
5041/*
5042 * Software event: task time clock
5043 */
5044
5045static void task_clock_event_update(struct perf_event *event, u64 now)
5046{
5047 u64 prev;
5048 s64 delta;
5049
5050 prev = local64_xchg(&event->hw.prev_count, now);
5051 delta = now - prev;
5052 local64_add(delta, &event->count);
5053}
5054
5055static void task_clock_event_start(struct perf_event *event, int flags)
5056{
5057 local64_set(&event->hw.prev_count, event->ctx->time);
5058 perf_swevent_start_hrtimer(event);
5059}
5060
5061static void task_clock_event_stop(struct perf_event *event, int flags)
5062{
5063 perf_swevent_cancel_hrtimer(event);
5064 task_clock_event_update(event, event->ctx->time);
5065}
5066
5067static int task_clock_event_add(struct perf_event *event, int flags)
5068{
5069 if (flags & PERF_EF_START)
5070 task_clock_event_start(event, flags);
5071
5072 return 0;
5073}
5074
5075static void task_clock_event_del(struct perf_event *event, int flags)
5076{
5077 task_clock_event_stop(event, PERF_EF_UPDATE);
5078}
5079
5080static void task_clock_event_read(struct perf_event *event)
5081{
5082 u64 time;
5083
5084 if (!in_nmi()) {
5085 update_context_time(event->ctx);
5086 time = event->ctx->time;
5087 } else {
5088 u64 now = perf_clock();
5089 u64 delta = now - event->ctx->timestamp;
5090 time = event->ctx->time + delta;
5091 }
5092
5093 task_clock_event_update(event, time);
5094}
5095
5096static int task_clock_event_init(struct perf_event *event)
5097{
5098 if (event->attr.type != PERF_TYPE_SOFTWARE)
5099 return -ENOENT;
5100
5101 if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK)
5102 return -ENOENT;
5103
5104 return 0;
5105}
5106
5107static struct pmu perf_task_clock = {
5108 .task_ctx_nr = perf_sw_context,
5109
5110 .event_init = task_clock_event_init,
5111 .add = task_clock_event_add,
5112 .del = task_clock_event_del,
5113 .start = task_clock_event_start,
5114 .stop = task_clock_event_stop,
5115 .read = task_clock_event_read,
5116};
5117
5118static void perf_pmu_nop_void(struct pmu *pmu)
5119{
5120}
5121
5122static int perf_pmu_nop_int(struct pmu *pmu)
5123{
5124 return 0;
5125}
5126
5127static void perf_pmu_start_txn(struct pmu *pmu)
5128{
5129 perf_pmu_disable(pmu);
5130}
5131
5132static int perf_pmu_commit_txn(struct pmu *pmu)
5133{
5134 perf_pmu_enable(pmu);
5135 return 0;
5136}
4779 5137
5138static void perf_pmu_cancel_txn(struct pmu *pmu)
5139{
5140 perf_pmu_enable(pmu);
5141}
5142
5143/*
5144 * Ensures all contexts with the same task_ctx_nr have the same
5145 * pmu_cpu_context too.
5146 */
5147static void *find_pmu_context(int ctxn)
5148{
5149 struct pmu *pmu;
5150
5151 if (ctxn < 0)
5152 return NULL;
5153
5154 list_for_each_entry(pmu, &pmus, entry) {
5155 if (pmu->task_ctx_nr == ctxn)
5156 return pmu->pmu_cpu_context;
5157 }
5158
5159 return NULL;
5160}
5161
5162static void free_pmu_context(void * __percpu cpu_context)
5163{
5164 struct pmu *pmu;
5165
5166 mutex_lock(&pmus_lock);
4780 /* 5167 /*
4781 * Software events (currently) can't in general distinguish 5168 * Like a real lame refcount.
4782 * between user, kernel and hypervisor events.
4783 * However, context switches and cpu migrations are considered
4784 * to be kernel events, and page faults are never hypervisor
4785 * events.
4786 */ 5169 */
4787 switch (event_id) { 5170 list_for_each_entry(pmu, &pmus, entry) {
4788 case PERF_COUNT_SW_CPU_CLOCK: 5171 if (pmu->pmu_cpu_context == cpu_context)
4789 pmu = &perf_ops_cpu_clock; 5172 goto out;
5173 }
4790 5174
4791 break; 5175 free_percpu(cpu_context);
4792 case PERF_COUNT_SW_TASK_CLOCK: 5176out:
4793 /* 5177 mutex_unlock(&pmus_lock);
4794 * If the user instantiates this as a per-cpu event, 5178}
4795 * use the cpu_clock event instead.
4796 */
4797 if (event->ctx->task)
4798 pmu = &perf_ops_task_clock;
4799 else
4800 pmu = &perf_ops_cpu_clock;
4801 5179
4802 break; 5180int perf_pmu_register(struct pmu *pmu)
4803 case PERF_COUNT_SW_PAGE_FAULTS: 5181{
4804 case PERF_COUNT_SW_PAGE_FAULTS_MIN: 5182 int cpu, ret;
4805 case PERF_COUNT_SW_PAGE_FAULTS_MAJ:
4806 case PERF_COUNT_SW_CONTEXT_SWITCHES:
4807 case PERF_COUNT_SW_CPU_MIGRATIONS:
4808 case PERF_COUNT_SW_ALIGNMENT_FAULTS:
4809 case PERF_COUNT_SW_EMULATION_FAULTS:
4810 if (!event->parent) {
4811 int err;
4812
4813 err = swevent_hlist_get(event);
4814 if (err)
4815 return ERR_PTR(err);
4816 5183
4817 atomic_inc(&perf_swevent_enabled[event_id]); 5184 mutex_lock(&pmus_lock);
4818 event->destroy = sw_perf_event_destroy; 5185 ret = -ENOMEM;
5186 pmu->pmu_disable_count = alloc_percpu(int);
5187 if (!pmu->pmu_disable_count)
5188 goto unlock;
5189
5190 pmu->pmu_cpu_context = find_pmu_context(pmu->task_ctx_nr);
5191 if (pmu->pmu_cpu_context)
5192 goto got_cpu_context;
5193
5194 pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context);
5195 if (!pmu->pmu_cpu_context)
5196 goto free_pdc;
5197
5198 for_each_possible_cpu(cpu) {
5199 struct perf_cpu_context *cpuctx;
5200
5201 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
5202 __perf_event_init_context(&cpuctx->ctx);
5203 cpuctx->ctx.type = cpu_context;
5204 cpuctx->ctx.pmu = pmu;
5205 cpuctx->jiffies_interval = 1;
5206 INIT_LIST_HEAD(&cpuctx->rotation_list);
5207 }
5208
5209got_cpu_context:
5210 if (!pmu->start_txn) {
5211 if (pmu->pmu_enable) {
5212 /*
5213 * If we have pmu_enable/pmu_disable calls, install
5214 * transaction stubs that use that to try and batch
5215 * hardware accesses.
5216 */
5217 pmu->start_txn = perf_pmu_start_txn;
5218 pmu->commit_txn = perf_pmu_commit_txn;
5219 pmu->cancel_txn = perf_pmu_cancel_txn;
5220 } else {
5221 pmu->start_txn = perf_pmu_nop_void;
5222 pmu->commit_txn = perf_pmu_nop_int;
5223 pmu->cancel_txn = perf_pmu_nop_void;
5224 }
5225 }
5226
5227 if (!pmu->pmu_enable) {
5228 pmu->pmu_enable = perf_pmu_nop_void;
5229 pmu->pmu_disable = perf_pmu_nop_void;
5230 }
5231
5232 list_add_rcu(&pmu->entry, &pmus);
5233 ret = 0;
5234unlock:
5235 mutex_unlock(&pmus_lock);
5236
5237 return ret;
5238
5239free_pdc:
5240 free_percpu(pmu->pmu_disable_count);
5241 goto unlock;
5242}
5243
5244void perf_pmu_unregister(struct pmu *pmu)
5245{
5246 mutex_lock(&pmus_lock);
5247 list_del_rcu(&pmu->entry);
5248 mutex_unlock(&pmus_lock);
5249
5250 /*
5251 * We dereference the pmu list under both SRCU and regular RCU, so
5252 * synchronize against both of those.
5253 */
5254 synchronize_srcu(&pmus_srcu);
5255 synchronize_rcu();
5256
5257 free_percpu(pmu->pmu_disable_count);
5258 free_pmu_context(pmu->pmu_cpu_context);
5259}
5260
5261struct pmu *perf_init_event(struct perf_event *event)
5262{
5263 struct pmu *pmu = NULL;
5264 int idx;
5265
5266 idx = srcu_read_lock(&pmus_srcu);
5267 list_for_each_entry_rcu(pmu, &pmus, entry) {
5268 int ret = pmu->event_init(event);
5269 if (!ret)
5270 goto unlock;
5271
5272 if (ret != -ENOENT) {
5273 pmu = ERR_PTR(ret);
5274 goto unlock;
4819 } 5275 }
4820 pmu = &perf_ops_generic;
4821 break;
4822 } 5276 }
5277 pmu = ERR_PTR(-ENOENT);
5278unlock:
5279 srcu_read_unlock(&pmus_srcu, idx);
4823 5280
4824 return pmu; 5281 return pmu;
4825} 5282}
@@ -4828,20 +5285,17 @@ static const struct pmu *sw_perf_event_init(struct perf_event *event)
4828 * Allocate and initialize a event structure 5285 * Allocate and initialize a event structure
4829 */ 5286 */
4830static struct perf_event * 5287static struct perf_event *
4831perf_event_alloc(struct perf_event_attr *attr, 5288perf_event_alloc(struct perf_event_attr *attr, int cpu,
4832 int cpu,
4833 struct perf_event_context *ctx,
4834 struct perf_event *group_leader, 5289 struct perf_event *group_leader,
4835 struct perf_event *parent_event, 5290 struct perf_event *parent_event,
4836 perf_overflow_handler_t overflow_handler, 5291 perf_overflow_handler_t overflow_handler)
4837 gfp_t gfpflags)
4838{ 5292{
4839 const struct pmu *pmu; 5293 struct pmu *pmu;
4840 struct perf_event *event; 5294 struct perf_event *event;
4841 struct hw_perf_event *hwc; 5295 struct hw_perf_event *hwc;
4842 long err; 5296 long err;
4843 5297
4844 event = kzalloc(sizeof(*event), gfpflags); 5298 event = kzalloc(sizeof(*event), GFP_KERNEL);
4845 if (!event) 5299 if (!event)
4846 return ERR_PTR(-ENOMEM); 5300 return ERR_PTR(-ENOMEM);
4847 5301
@@ -4866,7 +5320,6 @@ perf_event_alloc(struct perf_event_attr *attr,
4866 event->attr = *attr; 5320 event->attr = *attr;
4867 event->group_leader = group_leader; 5321 event->group_leader = group_leader;
4868 event->pmu = NULL; 5322 event->pmu = NULL;
4869 event->ctx = ctx;
4870 event->oncpu = -1; 5323 event->oncpu = -1;
4871 5324
4872 event->parent = parent_event; 5325 event->parent = parent_event;
@@ -4900,29 +5353,8 @@ perf_event_alloc(struct perf_event_attr *attr,
4900 if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP)) 5353 if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP))
4901 goto done; 5354 goto done;
4902 5355
4903 switch (attr->type) { 5356 pmu = perf_init_event(event);
4904 case PERF_TYPE_RAW:
4905 case PERF_TYPE_HARDWARE:
4906 case PERF_TYPE_HW_CACHE:
4907 pmu = hw_perf_event_init(event);
4908 break;
4909 5357
4910 case PERF_TYPE_SOFTWARE:
4911 pmu = sw_perf_event_init(event);
4912 break;
4913
4914 case PERF_TYPE_TRACEPOINT:
4915 pmu = tp_perf_event_init(event);
4916 break;
4917
4918 case PERF_TYPE_BREAKPOINT:
4919 pmu = bp_perf_event_init(event);
4920 break;
4921
4922
4923 default:
4924 break;
4925 }
4926done: 5358done:
4927 err = 0; 5359 err = 0;
4928 if (!pmu) 5360 if (!pmu)
@@ -4947,6 +5379,13 @@ done:
4947 atomic_inc(&nr_comm_events); 5379 atomic_inc(&nr_comm_events);
4948 if (event->attr.task) 5380 if (event->attr.task)
4949 atomic_inc(&nr_task_events); 5381 atomic_inc(&nr_task_events);
5382 if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
5383 err = get_callchain_buffers();
5384 if (err) {
5385 free_event(event);
5386 return ERR_PTR(err);
5387 }
5388 }
4950 } 5389 }
4951 5390
4952 return event; 5391 return event;
@@ -5094,12 +5533,16 @@ SYSCALL_DEFINE5(perf_event_open,
5094 struct perf_event_attr __user *, attr_uptr, 5533 struct perf_event_attr __user *, attr_uptr,
5095 pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) 5534 pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
5096{ 5535{
5097 struct perf_event *event, *group_leader = NULL, *output_event = NULL; 5536 struct perf_event *group_leader = NULL, *output_event = NULL;
5537 struct perf_event *event, *sibling;
5098 struct perf_event_attr attr; 5538 struct perf_event_attr attr;
5099 struct perf_event_context *ctx; 5539 struct perf_event_context *ctx;
5100 struct file *event_file = NULL; 5540 struct file *event_file = NULL;
5101 struct file *group_file = NULL; 5541 struct file *group_file = NULL;
5542 struct task_struct *task = NULL;
5543 struct pmu *pmu;
5102 int event_fd; 5544 int event_fd;
5545 int move_group = 0;
5103 int fput_needed = 0; 5546 int fput_needed = 0;
5104 int err; 5547 int err;
5105 5548
@@ -5125,20 +5568,11 @@ SYSCALL_DEFINE5(perf_event_open,
5125 if (event_fd < 0) 5568 if (event_fd < 0)
5126 return event_fd; 5569 return event_fd;
5127 5570
5128 /*
5129 * Get the target context (task or percpu):
5130 */
5131 ctx = find_get_context(pid, cpu);
5132 if (IS_ERR(ctx)) {
5133 err = PTR_ERR(ctx);
5134 goto err_fd;
5135 }
5136
5137 if (group_fd != -1) { 5571 if (group_fd != -1) {
5138 group_leader = perf_fget_light(group_fd, &fput_needed); 5572 group_leader = perf_fget_light(group_fd, &fput_needed);
5139 if (IS_ERR(group_leader)) { 5573 if (IS_ERR(group_leader)) {
5140 err = PTR_ERR(group_leader); 5574 err = PTR_ERR(group_leader);
5141 goto err_put_context; 5575 goto err_fd;
5142 } 5576 }
5143 group_file = group_leader->filp; 5577 group_file = group_leader->filp;
5144 if (flags & PERF_FLAG_FD_OUTPUT) 5578 if (flags & PERF_FLAG_FD_OUTPUT)
@@ -5147,6 +5581,58 @@ SYSCALL_DEFINE5(perf_event_open,
5147 group_leader = NULL; 5581 group_leader = NULL;
5148 } 5582 }
5149 5583
5584 event = perf_event_alloc(&attr, cpu, group_leader, NULL, NULL);
5585 if (IS_ERR(event)) {
5586 err = PTR_ERR(event);
5587 goto err_fd;
5588 }
5589
5590 /*
5591 * Special case software events and allow them to be part of
5592 * any hardware group.
5593 */
5594 pmu = event->pmu;
5595
5596 if (group_leader &&
5597 (is_software_event(event) != is_software_event(group_leader))) {
5598 if (is_software_event(event)) {
5599 /*
5600 * If event and group_leader are not both a software
5601 * event, and event is, then group leader is not.
5602 *
5603 * Allow the addition of software events to !software
5604 * groups, this is safe because software events never
5605 * fail to schedule.
5606 */
5607 pmu = group_leader->pmu;
5608 } else if (is_software_event(group_leader) &&
5609 (group_leader->group_flags & PERF_GROUP_SOFTWARE)) {
5610 /*
5611 * In case the group is a pure software group, and we
5612 * try to add a hardware event, move the whole group to
5613 * the hardware context.
5614 */
5615 move_group = 1;
5616 }
5617 }
5618
5619 if (pid != -1) {
5620 task = find_lively_task_by_vpid(pid);
5621 if (IS_ERR(task)) {
5622 err = PTR_ERR(task);
5623 goto err_group_fd;
5624 }
5625 }
5626
5627 /*
5628 * Get the target context (task or percpu):
5629 */
5630 ctx = find_get_context(pmu, task, cpu);
5631 if (IS_ERR(ctx)) {
5632 err = PTR_ERR(ctx);
5633 goto err_group_fd;
5634 }
5635
5150 /* 5636 /*
5151 * Look up the group leader (we will attach this event to it): 5637 * Look up the group leader (we will attach this event to it):
5152 */ 5638 */
@@ -5158,42 +5644,66 @@ SYSCALL_DEFINE5(perf_event_open,
5158 * becoming part of another group-sibling): 5644 * becoming part of another group-sibling):
5159 */ 5645 */
5160 if (group_leader->group_leader != group_leader) 5646 if (group_leader->group_leader != group_leader)
5161 goto err_put_context; 5647 goto err_context;
5162 /* 5648 /*
5163 * Do not allow to attach to a group in a different 5649 * Do not allow to attach to a group in a different
5164 * task or CPU context: 5650 * task or CPU context:
5165 */ 5651 */
5166 if (group_leader->ctx != ctx) 5652 if (move_group) {
5167 goto err_put_context; 5653 if (group_leader->ctx->type != ctx->type)
5654 goto err_context;
5655 } else {
5656 if (group_leader->ctx != ctx)
5657 goto err_context;
5658 }
5659
5168 /* 5660 /*
5169 * Only a group leader can be exclusive or pinned 5661 * Only a group leader can be exclusive or pinned
5170 */ 5662 */
5171 if (attr.exclusive || attr.pinned) 5663 if (attr.exclusive || attr.pinned)
5172 goto err_put_context; 5664 goto err_context;
5173 }
5174
5175 event = perf_event_alloc(&attr, cpu, ctx, group_leader,
5176 NULL, NULL, GFP_KERNEL);
5177 if (IS_ERR(event)) {
5178 err = PTR_ERR(event);
5179 goto err_put_context;
5180 } 5665 }
5181 5666
5182 if (output_event) { 5667 if (output_event) {
5183 err = perf_event_set_output(event, output_event); 5668 err = perf_event_set_output(event, output_event);
5184 if (err) 5669 if (err)
5185 goto err_free_put_context; 5670 goto err_context;
5186 } 5671 }
5187 5672
5188 event_file = anon_inode_getfile("[perf_event]", &perf_fops, event, O_RDWR); 5673 event_file = anon_inode_getfile("[perf_event]", &perf_fops, event, O_RDWR);
5189 if (IS_ERR(event_file)) { 5674 if (IS_ERR(event_file)) {
5190 err = PTR_ERR(event_file); 5675 err = PTR_ERR(event_file);
5191 goto err_free_put_context; 5676 goto err_context;
5677 }
5678
5679 if (move_group) {
5680 struct perf_event_context *gctx = group_leader->ctx;
5681
5682 mutex_lock(&gctx->mutex);
5683 perf_event_remove_from_context(group_leader);
5684 list_for_each_entry(sibling, &group_leader->sibling_list,
5685 group_entry) {
5686 perf_event_remove_from_context(sibling);
5687 put_ctx(gctx);
5688 }
5689 mutex_unlock(&gctx->mutex);
5690 put_ctx(gctx);
5192 } 5691 }
5193 5692
5194 event->filp = event_file; 5693 event->filp = event_file;
5195 WARN_ON_ONCE(ctx->parent_ctx); 5694 WARN_ON_ONCE(ctx->parent_ctx);
5196 mutex_lock(&ctx->mutex); 5695 mutex_lock(&ctx->mutex);
5696
5697 if (move_group) {
5698 perf_install_in_context(ctx, group_leader, cpu);
5699 get_ctx(ctx);
5700 list_for_each_entry(sibling, &group_leader->sibling_list,
5701 group_entry) {
5702 perf_install_in_context(ctx, sibling, cpu);
5703 get_ctx(ctx);
5704 }
5705 }
5706
5197 perf_install_in_context(ctx, event, cpu); 5707 perf_install_in_context(ctx, event, cpu);
5198 ++ctx->generation; 5708 ++ctx->generation;
5199 mutex_unlock(&ctx->mutex); 5709 mutex_unlock(&ctx->mutex);
@@ -5214,11 +5724,11 @@ SYSCALL_DEFINE5(perf_event_open,
5214 fd_install(event_fd, event_file); 5724 fd_install(event_fd, event_file);
5215 return event_fd; 5725 return event_fd;
5216 5726
5217err_free_put_context: 5727err_context:
5218 free_event(event);
5219err_put_context:
5220 fput_light(group_file, fput_needed);
5221 put_ctx(ctx); 5728 put_ctx(ctx);
5729err_group_fd:
5730 fput_light(group_file, fput_needed);
5731 free_event(event);
5222err_fd: 5732err_fd:
5223 put_unused_fd(event_fd); 5733 put_unused_fd(event_fd);
5224 return err; 5734 return err;
@@ -5229,32 +5739,31 @@ err_fd:
5229 * 5739 *
5230 * @attr: attributes of the counter to create 5740 * @attr: attributes of the counter to create
5231 * @cpu: cpu in which the counter is bound 5741 * @cpu: cpu in which the counter is bound
5232 * @pid: task to profile 5742 * @task: task to profile (NULL for percpu)
5233 */ 5743 */
5234struct perf_event * 5744struct perf_event *
5235perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, 5745perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
5236 pid_t pid, 5746 struct task_struct *task,
5237 perf_overflow_handler_t overflow_handler) 5747 perf_overflow_handler_t overflow_handler)
5238{ 5748{
5239 struct perf_event *event;
5240 struct perf_event_context *ctx; 5749 struct perf_event_context *ctx;
5750 struct perf_event *event;
5241 int err; 5751 int err;
5242 5752
5243 /* 5753 /*
5244 * Get the target context (task or percpu): 5754 * Get the target context (task or percpu):
5245 */ 5755 */
5246 5756
5247 ctx = find_get_context(pid, cpu); 5757 event = perf_event_alloc(attr, cpu, NULL, NULL, overflow_handler);
5248 if (IS_ERR(ctx)) {
5249 err = PTR_ERR(ctx);
5250 goto err_exit;
5251 }
5252
5253 event = perf_event_alloc(attr, cpu, ctx, NULL,
5254 NULL, overflow_handler, GFP_KERNEL);
5255 if (IS_ERR(event)) { 5758 if (IS_ERR(event)) {
5256 err = PTR_ERR(event); 5759 err = PTR_ERR(event);
5257 goto err_put_context; 5760 goto err;
5761 }
5762
5763 ctx = find_get_context(event->pmu, task, cpu);
5764 if (IS_ERR(ctx)) {
5765 err = PTR_ERR(ctx);
5766 goto err_free;
5258 } 5767 }
5259 5768
5260 event->filp = NULL; 5769 event->filp = NULL;
@@ -5272,112 +5781,13 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
5272 5781
5273 return event; 5782 return event;
5274 5783
5275 err_put_context: 5784err_free:
5276 put_ctx(ctx); 5785 free_event(event);
5277 err_exit: 5786err:
5278 return ERR_PTR(err); 5787 return ERR_PTR(err);
5279} 5788}
5280EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter); 5789EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);
5281 5790
5282/*
5283 * inherit a event from parent task to child task:
5284 */
5285static struct perf_event *
5286inherit_event(struct perf_event *parent_event,
5287 struct task_struct *parent,
5288 struct perf_event_context *parent_ctx,
5289 struct task_struct *child,
5290 struct perf_event *group_leader,
5291 struct perf_event_context *child_ctx)
5292{
5293 struct perf_event *child_event;
5294
5295 /*
5296 * Instead of creating recursive hierarchies of events,
5297 * we link inherited events back to the original parent,
5298 * which has a filp for sure, which we use as the reference
5299 * count:
5300 */
5301 if (parent_event->parent)
5302 parent_event = parent_event->parent;
5303
5304 child_event = perf_event_alloc(&parent_event->attr,
5305 parent_event->cpu, child_ctx,
5306 group_leader, parent_event,
5307 NULL, GFP_KERNEL);
5308 if (IS_ERR(child_event))
5309 return child_event;
5310 get_ctx(child_ctx);
5311
5312 /*
5313 * Make the child state follow the state of the parent event,
5314 * not its attr.disabled bit. We hold the parent's mutex,
5315 * so we won't race with perf_event_{en, dis}able_family.
5316 */
5317 if (parent_event->state >= PERF_EVENT_STATE_INACTIVE)
5318 child_event->state = PERF_EVENT_STATE_INACTIVE;
5319 else
5320 child_event->state = PERF_EVENT_STATE_OFF;
5321
5322 if (parent_event->attr.freq) {
5323 u64 sample_period = parent_event->hw.sample_period;
5324 struct hw_perf_event *hwc = &child_event->hw;
5325
5326 hwc->sample_period = sample_period;
5327 hwc->last_period = sample_period;
5328
5329 local64_set(&hwc->period_left, sample_period);
5330 }
5331
5332 child_event->overflow_handler = parent_event->overflow_handler;
5333
5334 /*
5335 * Link it up in the child's context:
5336 */
5337 add_event_to_ctx(child_event, child_ctx);
5338
5339 /*
5340 * Get a reference to the parent filp - we will fput it
5341 * when the child event exits. This is safe to do because
5342 * we are in the parent and we know that the filp still
5343 * exists and has a nonzero count:
5344 */
5345 atomic_long_inc(&parent_event->filp->f_count);
5346
5347 /*
5348 * Link this into the parent event's child list
5349 */
5350 WARN_ON_ONCE(parent_event->ctx->parent_ctx);
5351 mutex_lock(&parent_event->child_mutex);
5352 list_add_tail(&child_event->child_list, &parent_event->child_list);
5353 mutex_unlock(&parent_event->child_mutex);
5354
5355 return child_event;
5356}
5357
5358static int inherit_group(struct perf_event *parent_event,
5359 struct task_struct *parent,
5360 struct perf_event_context *parent_ctx,
5361 struct task_struct *child,
5362 struct perf_event_context *child_ctx)
5363{
5364 struct perf_event *leader;
5365 struct perf_event *sub;
5366 struct perf_event *child_ctr;
5367
5368 leader = inherit_event(parent_event, parent, parent_ctx,
5369 child, NULL, child_ctx);
5370 if (IS_ERR(leader))
5371 return PTR_ERR(leader);
5372 list_for_each_entry(sub, &parent_event->sibling_list, group_entry) {
5373 child_ctr = inherit_event(sub, parent, parent_ctx,
5374 child, leader, child_ctx);
5375 if (IS_ERR(child_ctr))
5376 return PTR_ERR(child_ctr);
5377 }
5378 return 0;
5379}
5380
5381static void sync_child_event(struct perf_event *child_event, 5791static void sync_child_event(struct perf_event *child_event,
5382 struct task_struct *child) 5792 struct task_struct *child)
5383{ 5793{
@@ -5434,16 +5844,13 @@ __perf_event_exit_task(struct perf_event *child_event,
5434 } 5844 }
5435} 5845}
5436 5846
5437/* 5847static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
5438 * When a child task exits, feed back event values to parent events.
5439 */
5440void perf_event_exit_task(struct task_struct *child)
5441{ 5848{
5442 struct perf_event *child_event, *tmp; 5849 struct perf_event *child_event, *tmp;
5443 struct perf_event_context *child_ctx; 5850 struct perf_event_context *child_ctx;
5444 unsigned long flags; 5851 unsigned long flags;
5445 5852
5446 if (likely(!child->perf_event_ctxp)) { 5853 if (likely(!child->perf_event_ctxp[ctxn])) {
5447 perf_event_task(child, NULL, 0); 5854 perf_event_task(child, NULL, 0);
5448 return; 5855 return;
5449 } 5856 }
@@ -5455,7 +5862,7 @@ void perf_event_exit_task(struct task_struct *child)
5455 * scheduled, so we are now safe from rescheduling changing 5862 * scheduled, so we are now safe from rescheduling changing
5456 * our context. 5863 * our context.
5457 */ 5864 */
5458 child_ctx = child->perf_event_ctxp; 5865 child_ctx = child->perf_event_ctxp[ctxn];
5459 __perf_event_task_sched_out(child_ctx); 5866 __perf_event_task_sched_out(child_ctx);
5460 5867
5461 /* 5868 /*
@@ -5464,7 +5871,7 @@ void perf_event_exit_task(struct task_struct *child)
5464 * incremented the context's refcount before we do put_ctx below. 5871 * incremented the context's refcount before we do put_ctx below.
5465 */ 5872 */
5466 raw_spin_lock(&child_ctx->lock); 5873 raw_spin_lock(&child_ctx->lock);
5467 child->perf_event_ctxp = NULL; 5874 child->perf_event_ctxp[ctxn] = NULL;
5468 /* 5875 /*
5469 * If this context is a clone; unclone it so it can't get 5876 * If this context is a clone; unclone it so it can't get
5470 * swapped to another process while we're removing all 5877 * swapped to another process while we're removing all
@@ -5517,6 +5924,17 @@ again:
5517 put_ctx(child_ctx); 5924 put_ctx(child_ctx);
5518} 5925}
5519 5926
5927/*
5928 * When a child task exits, feed back event values to parent events.
5929 */
5930void perf_event_exit_task(struct task_struct *child)
5931{
5932 int ctxn;
5933
5934 for_each_task_context_nr(ctxn)
5935 perf_event_exit_task_context(child, ctxn);
5936}
5937
5520static void perf_free_event(struct perf_event *event, 5938static void perf_free_event(struct perf_event *event,
5521 struct perf_event_context *ctx) 5939 struct perf_event_context *ctx)
5522{ 5940{
@@ -5538,48 +5956,165 @@ static void perf_free_event(struct perf_event *event,
5538 5956
5539/* 5957/*
5540 * free an unexposed, unused context as created by inheritance by 5958 * free an unexposed, unused context as created by inheritance by
5541 * init_task below, used by fork() in case of fail. 5959 * perf_event_init_task below, used by fork() in case of fail.
5542 */ 5960 */
5543void perf_event_free_task(struct task_struct *task) 5961void perf_event_free_task(struct task_struct *task)
5544{ 5962{
5545 struct perf_event_context *ctx = task->perf_event_ctxp; 5963 struct perf_event_context *ctx;
5546 struct perf_event *event, *tmp; 5964 struct perf_event *event, *tmp;
5965 int ctxn;
5547 5966
5548 if (!ctx) 5967 for_each_task_context_nr(ctxn) {
5549 return; 5968 ctx = task->perf_event_ctxp[ctxn];
5969 if (!ctx)
5970 continue;
5550 5971
5551 mutex_lock(&ctx->mutex); 5972 mutex_lock(&ctx->mutex);
5552again: 5973again:
5553 list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) 5974 list_for_each_entry_safe(event, tmp, &ctx->pinned_groups,
5554 perf_free_event(event, ctx); 5975 group_entry)
5976 perf_free_event(event, ctx);
5555 5977
5556 list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, 5978 list_for_each_entry_safe(event, tmp, &ctx->flexible_groups,
5557 group_entry) 5979 group_entry)
5558 perf_free_event(event, ctx); 5980 perf_free_event(event, ctx);
5559 5981
5560 if (!list_empty(&ctx->pinned_groups) || 5982 if (!list_empty(&ctx->pinned_groups) ||
5561 !list_empty(&ctx->flexible_groups)) 5983 !list_empty(&ctx->flexible_groups))
5562 goto again; 5984 goto again;
5563 5985
5564 mutex_unlock(&ctx->mutex); 5986 mutex_unlock(&ctx->mutex);
5565 5987
5566 put_ctx(ctx); 5988 put_ctx(ctx);
5989 }
5990}
5991
5992void perf_event_delayed_put(struct task_struct *task)
5993{
5994 int ctxn;
5995
5996 for_each_task_context_nr(ctxn)
5997 WARN_ON_ONCE(task->perf_event_ctxp[ctxn]);
5998}
5999
6000/*
6001 * inherit a event from parent task to child task:
6002 */
6003static struct perf_event *
6004inherit_event(struct perf_event *parent_event,
6005 struct task_struct *parent,
6006 struct perf_event_context *parent_ctx,
6007 struct task_struct *child,
6008 struct perf_event *group_leader,
6009 struct perf_event_context *child_ctx)
6010{
6011 struct perf_event *child_event;
6012 unsigned long flags;
6013
6014 /*
6015 * Instead of creating recursive hierarchies of events,
6016 * we link inherited events back to the original parent,
6017 * which has a filp for sure, which we use as the reference
6018 * count:
6019 */
6020 if (parent_event->parent)
6021 parent_event = parent_event->parent;
6022
6023 child_event = perf_event_alloc(&parent_event->attr,
6024 parent_event->cpu,
6025 group_leader, parent_event,
6026 NULL);
6027 if (IS_ERR(child_event))
6028 return child_event;
6029 get_ctx(child_ctx);
6030
6031 /*
6032 * Make the child state follow the state of the parent event,
6033 * not its attr.disabled bit. We hold the parent's mutex,
6034 * so we won't race with perf_event_{en, dis}able_family.
6035 */
6036 if (parent_event->state >= PERF_EVENT_STATE_INACTIVE)
6037 child_event->state = PERF_EVENT_STATE_INACTIVE;
6038 else
6039 child_event->state = PERF_EVENT_STATE_OFF;
6040
6041 if (parent_event->attr.freq) {
6042 u64 sample_period = parent_event->hw.sample_period;
6043 struct hw_perf_event *hwc = &child_event->hw;
6044
6045 hwc->sample_period = sample_period;
6046 hwc->last_period = sample_period;
6047
6048 local64_set(&hwc->period_left, sample_period);
6049 }
6050
6051 child_event->ctx = child_ctx;
6052 child_event->overflow_handler = parent_event->overflow_handler;
6053
6054 /*
6055 * Link it up in the child's context:
6056 */
6057 raw_spin_lock_irqsave(&child_ctx->lock, flags);
6058 add_event_to_ctx(child_event, child_ctx);
6059 raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
6060
6061 /*
6062 * Get a reference to the parent filp - we will fput it
6063 * when the child event exits. This is safe to do because
6064 * we are in the parent and we know that the filp still
6065 * exists and has a nonzero count:
6066 */
6067 atomic_long_inc(&parent_event->filp->f_count);
6068
6069 /*
6070 * Link this into the parent event's child list
6071 */
6072 WARN_ON_ONCE(parent_event->ctx->parent_ctx);
6073 mutex_lock(&parent_event->child_mutex);
6074 list_add_tail(&child_event->child_list, &parent_event->child_list);
6075 mutex_unlock(&parent_event->child_mutex);
6076
6077 return child_event;
6078}
6079
6080static int inherit_group(struct perf_event *parent_event,
6081 struct task_struct *parent,
6082 struct perf_event_context *parent_ctx,
6083 struct task_struct *child,
6084 struct perf_event_context *child_ctx)
6085{
6086 struct perf_event *leader;
6087 struct perf_event *sub;
6088 struct perf_event *child_ctr;
6089
6090 leader = inherit_event(parent_event, parent, parent_ctx,
6091 child, NULL, child_ctx);
6092 if (IS_ERR(leader))
6093 return PTR_ERR(leader);
6094 list_for_each_entry(sub, &parent_event->sibling_list, group_entry) {
6095 child_ctr = inherit_event(sub, parent, parent_ctx,
6096 child, leader, child_ctx);
6097 if (IS_ERR(child_ctr))
6098 return PTR_ERR(child_ctr);
6099 }
6100 return 0;
5567} 6101}
5568 6102
5569static int 6103static int
5570inherit_task_group(struct perf_event *event, struct task_struct *parent, 6104inherit_task_group(struct perf_event *event, struct task_struct *parent,
5571 struct perf_event_context *parent_ctx, 6105 struct perf_event_context *parent_ctx,
5572 struct task_struct *child, 6106 struct task_struct *child, int ctxn,
5573 int *inherited_all) 6107 int *inherited_all)
5574{ 6108{
5575 int ret; 6109 int ret;
5576 struct perf_event_context *child_ctx = child->perf_event_ctxp; 6110 struct perf_event_context *child_ctx;
5577 6111
5578 if (!event->attr.inherit) { 6112 if (!event->attr.inherit) {
5579 *inherited_all = 0; 6113 *inherited_all = 0;
5580 return 0; 6114 return 0;
5581 } 6115 }
5582 6116
6117 child_ctx = child->perf_event_ctxp[ctxn];
5583 if (!child_ctx) { 6118 if (!child_ctx) {
5584 /* 6119 /*
5585 * This is executed from the parent task context, so 6120 * This is executed from the parent task context, so
@@ -5588,14 +6123,11 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent,
5588 * child. 6123 * child.
5589 */ 6124 */
5590 6125
5591 child_ctx = kzalloc(sizeof(struct perf_event_context), 6126 child_ctx = alloc_perf_context(event->pmu, child);
5592 GFP_KERNEL);
5593 if (!child_ctx) 6127 if (!child_ctx)
5594 return -ENOMEM; 6128 return -ENOMEM;
5595 6129
5596 __perf_event_init_context(child_ctx, child); 6130 child->perf_event_ctxp[ctxn] = child_ctx;
5597 child->perf_event_ctxp = child_ctx;
5598 get_task_struct(child);
5599 } 6131 }
5600 6132
5601 ret = inherit_group(event, parent, parent_ctx, 6133 ret = inherit_group(event, parent, parent_ctx,
@@ -5607,11 +6139,10 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent,
5607 return ret; 6139 return ret;
5608} 6140}
5609 6141
5610
5611/* 6142/*
5612 * Initialize the perf_event context in task_struct 6143 * Initialize the perf_event context in task_struct
5613 */ 6144 */
5614int perf_event_init_task(struct task_struct *child) 6145int perf_event_init_context(struct task_struct *child, int ctxn)
5615{ 6146{
5616 struct perf_event_context *child_ctx, *parent_ctx; 6147 struct perf_event_context *child_ctx, *parent_ctx;
5617 struct perf_event_context *cloned_ctx; 6148 struct perf_event_context *cloned_ctx;
@@ -5620,19 +6151,19 @@ int perf_event_init_task(struct task_struct *child)
5620 int inherited_all = 1; 6151 int inherited_all = 1;
5621 int ret = 0; 6152 int ret = 0;
5622 6153
5623 child->perf_event_ctxp = NULL; 6154 child->perf_event_ctxp[ctxn] = NULL;
5624 6155
5625 mutex_init(&child->perf_event_mutex); 6156 mutex_init(&child->perf_event_mutex);
5626 INIT_LIST_HEAD(&child->perf_event_list); 6157 INIT_LIST_HEAD(&child->perf_event_list);
5627 6158
5628 if (likely(!parent->perf_event_ctxp)) 6159 if (likely(!parent->perf_event_ctxp[ctxn]))
5629 return 0; 6160 return 0;
5630 6161
5631 /* 6162 /*
5632 * If the parent's context is a clone, pin it so it won't get 6163 * If the parent's context is a clone, pin it so it won't get
5633 * swapped under us. 6164 * swapped under us.
5634 */ 6165 */
5635 parent_ctx = perf_pin_task_context(parent); 6166 parent_ctx = perf_pin_task_context(parent, ctxn);
5636 6167
5637 /* 6168 /*
5638 * No need to check if parent_ctx != NULL here; since we saw 6169 * No need to check if parent_ctx != NULL here; since we saw
@@ -5652,20 +6183,20 @@ int perf_event_init_task(struct task_struct *child)
5652 * the list, not manipulating it: 6183 * the list, not manipulating it:
5653 */ 6184 */
5654 list_for_each_entry(event, &parent_ctx->pinned_groups, group_entry) { 6185 list_for_each_entry(event, &parent_ctx->pinned_groups, group_entry) {
5655 ret = inherit_task_group(event, parent, parent_ctx, child, 6186 ret = inherit_task_group(event, parent, parent_ctx,
5656 &inherited_all); 6187 child, ctxn, &inherited_all);
5657 if (ret) 6188 if (ret)
5658 break; 6189 break;
5659 } 6190 }
5660 6191
5661 list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { 6192 list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) {
5662 ret = inherit_task_group(event, parent, parent_ctx, child, 6193 ret = inherit_task_group(event, parent, parent_ctx,
5663 &inherited_all); 6194 child, ctxn, &inherited_all);
5664 if (ret) 6195 if (ret)
5665 break; 6196 break;
5666 } 6197 }
5667 6198
5668 child_ctx = child->perf_event_ctxp; 6199 child_ctx = child->perf_event_ctxp[ctxn];
5669 6200
5670 if (child_ctx && inherited_all) { 6201 if (child_ctx && inherited_all) {
5671 /* 6202 /*
@@ -5694,63 +6225,98 @@ int perf_event_init_task(struct task_struct *child)
5694 return ret; 6225 return ret;
5695} 6226}
5696 6227
6228/*
6229 * Initialize the perf_event context in task_struct
6230 */
6231int perf_event_init_task(struct task_struct *child)
6232{
6233 int ctxn, ret;
6234
6235 for_each_task_context_nr(ctxn) {
6236 ret = perf_event_init_context(child, ctxn);
6237 if (ret)
6238 return ret;
6239 }
6240
6241 return 0;
6242}
6243
5697static void __init perf_event_init_all_cpus(void) 6244static void __init perf_event_init_all_cpus(void)
5698{ 6245{
6246 struct swevent_htable *swhash;
5699 int cpu; 6247 int cpu;
5700 struct perf_cpu_context *cpuctx;
5701 6248
5702 for_each_possible_cpu(cpu) { 6249 for_each_possible_cpu(cpu) {
5703 cpuctx = &per_cpu(perf_cpu_context, cpu); 6250 swhash = &per_cpu(swevent_htable, cpu);
5704 mutex_init(&cpuctx->hlist_mutex); 6251 mutex_init(&swhash->hlist_mutex);
5705 __perf_event_init_context(&cpuctx->ctx, NULL); 6252 INIT_LIST_HEAD(&per_cpu(rotation_list, cpu));
5706 } 6253 }
5707} 6254}
5708 6255
5709static void __cpuinit perf_event_init_cpu(int cpu) 6256static void __cpuinit perf_event_init_cpu(int cpu)
5710{ 6257{
5711 struct perf_cpu_context *cpuctx; 6258 struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
5712
5713 cpuctx = &per_cpu(perf_cpu_context, cpu);
5714
5715 spin_lock(&perf_resource_lock);
5716 cpuctx->max_pertask = perf_max_events - perf_reserved_percpu;
5717 spin_unlock(&perf_resource_lock);
5718 6259
5719 mutex_lock(&cpuctx->hlist_mutex); 6260 mutex_lock(&swhash->hlist_mutex);
5720 if (cpuctx->hlist_refcount > 0) { 6261 if (swhash->hlist_refcount > 0) {
5721 struct swevent_hlist *hlist; 6262 struct swevent_hlist *hlist;
5722 6263
5723 hlist = kzalloc(sizeof(*hlist), GFP_KERNEL); 6264 hlist = kzalloc_node(sizeof(*hlist), GFP_KERNEL, cpu_to_node(cpu));
5724 WARN_ON_ONCE(!hlist); 6265 WARN_ON(!hlist);
5725 rcu_assign_pointer(cpuctx->swevent_hlist, hlist); 6266 rcu_assign_pointer(swhash->swevent_hlist, hlist);
5726 } 6267 }
5727 mutex_unlock(&cpuctx->hlist_mutex); 6268 mutex_unlock(&swhash->hlist_mutex);
5728} 6269}
5729 6270
5730#ifdef CONFIG_HOTPLUG_CPU 6271#ifdef CONFIG_HOTPLUG_CPU
5731static void __perf_event_exit_cpu(void *info) 6272static void perf_pmu_rotate_stop(struct pmu *pmu)
5732{ 6273{
5733 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); 6274 struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
5734 struct perf_event_context *ctx = &cpuctx->ctx; 6275
6276 WARN_ON(!irqs_disabled());
6277
6278 list_del_init(&cpuctx->rotation_list);
6279}
6280
6281static void __perf_event_exit_context(void *__info)
6282{
6283 struct perf_event_context *ctx = __info;
5735 struct perf_event *event, *tmp; 6284 struct perf_event *event, *tmp;
5736 6285
6286 perf_pmu_rotate_stop(ctx->pmu);
6287
5737 list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) 6288 list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry)
5738 __perf_event_remove_from_context(event); 6289 __perf_event_remove_from_context(event);
5739 list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry) 6290 list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry)
5740 __perf_event_remove_from_context(event); 6291 __perf_event_remove_from_context(event);
5741} 6292}
6293
6294static void perf_event_exit_cpu_context(int cpu)
6295{
6296 struct perf_event_context *ctx;
6297 struct pmu *pmu;
6298 int idx;
6299
6300 idx = srcu_read_lock(&pmus_srcu);
6301 list_for_each_entry_rcu(pmu, &pmus, entry) {
6302 ctx = &per_cpu_ptr(pmu->pmu_cpu_context, cpu)->ctx;
6303
6304 mutex_lock(&ctx->mutex);
6305 smp_call_function_single(cpu, __perf_event_exit_context, ctx, 1);
6306 mutex_unlock(&ctx->mutex);
6307 }
6308 srcu_read_unlock(&pmus_srcu, idx);
6309}
6310
5742static void perf_event_exit_cpu(int cpu) 6311static void perf_event_exit_cpu(int cpu)
5743{ 6312{
5744 struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); 6313 struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
5745 struct perf_event_context *ctx = &cpuctx->ctx;
5746 6314
5747 mutex_lock(&cpuctx->hlist_mutex); 6315 mutex_lock(&swhash->hlist_mutex);
5748 swevent_hlist_release(cpuctx); 6316 swevent_hlist_release(swhash);
5749 mutex_unlock(&cpuctx->hlist_mutex); 6317 mutex_unlock(&swhash->hlist_mutex);
5750 6318
5751 mutex_lock(&ctx->mutex); 6319 perf_event_exit_cpu_context(cpu);
5752 smp_call_function_single(cpu, __perf_event_exit_cpu, NULL, 1);
5753 mutex_unlock(&ctx->mutex);
5754} 6320}
5755#else 6321#else
5756static inline void perf_event_exit_cpu(int cpu) { } 6322static inline void perf_event_exit_cpu(int cpu) { }
@@ -5780,118 +6346,13 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
5780 return NOTIFY_OK; 6346 return NOTIFY_OK;
5781} 6347}
5782 6348
5783/*
5784 * This has to have a higher priority than migration_notifier in sched.c.
5785 */
5786static struct notifier_block __cpuinitdata perf_cpu_nb = {
5787 .notifier_call = perf_cpu_notify,
5788 .priority = 20,
5789};
5790
5791void __init perf_event_init(void) 6349void __init perf_event_init(void)
5792{ 6350{
5793 perf_event_init_all_cpus(); 6351 perf_event_init_all_cpus();
5794 perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE, 6352 init_srcu_struct(&pmus_srcu);
5795 (void *)(long)smp_processor_id()); 6353 perf_pmu_register(&perf_swevent);
5796 perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE, 6354 perf_pmu_register(&perf_cpu_clock);
5797 (void *)(long)smp_processor_id()); 6355 perf_pmu_register(&perf_task_clock);
5798 register_cpu_notifier(&perf_cpu_nb); 6356 perf_tp_register();
5799} 6357 perf_cpu_notifier(perf_cpu_notify);
5800
5801static ssize_t perf_show_reserve_percpu(struct sysdev_class *class,
5802 struct sysdev_class_attribute *attr,
5803 char *buf)
5804{
5805 return sprintf(buf, "%d\n", perf_reserved_percpu);
5806}
5807
5808static ssize_t
5809perf_set_reserve_percpu(struct sysdev_class *class,
5810 struct sysdev_class_attribute *attr,
5811 const char *buf,
5812 size_t count)
5813{
5814 struct perf_cpu_context *cpuctx;
5815 unsigned long val;
5816 int err, cpu, mpt;
5817
5818 err = strict_strtoul(buf, 10, &val);
5819 if (err)
5820 return err;
5821 if (val > perf_max_events)
5822 return -EINVAL;
5823
5824 spin_lock(&perf_resource_lock);
5825 perf_reserved_percpu = val;
5826 for_each_online_cpu(cpu) {
5827 cpuctx = &per_cpu(perf_cpu_context, cpu);
5828 raw_spin_lock_irq(&cpuctx->ctx.lock);
5829 mpt = min(perf_max_events - cpuctx->ctx.nr_events,
5830 perf_max_events - perf_reserved_percpu);
5831 cpuctx->max_pertask = mpt;
5832 raw_spin_unlock_irq(&cpuctx->ctx.lock);
5833 }
5834 spin_unlock(&perf_resource_lock);
5835
5836 return count;
5837}
5838
5839static ssize_t perf_show_overcommit(struct sysdev_class *class,
5840 struct sysdev_class_attribute *attr,
5841 char *buf)
5842{
5843 return sprintf(buf, "%d\n", perf_overcommit);
5844}
5845
5846static ssize_t
5847perf_set_overcommit(struct sysdev_class *class,
5848 struct sysdev_class_attribute *attr,
5849 const char *buf, size_t count)
5850{
5851 unsigned long val;
5852 int err;
5853
5854 err = strict_strtoul(buf, 10, &val);
5855 if (err)
5856 return err;
5857 if (val > 1)
5858 return -EINVAL;
5859
5860 spin_lock(&perf_resource_lock);
5861 perf_overcommit = val;
5862 spin_unlock(&perf_resource_lock);
5863
5864 return count;
5865}
5866
5867static SYSDEV_CLASS_ATTR(
5868 reserve_percpu,
5869 0644,
5870 perf_show_reserve_percpu,
5871 perf_set_reserve_percpu
5872 );
5873
5874static SYSDEV_CLASS_ATTR(
5875 overcommit,
5876 0644,
5877 perf_show_overcommit,
5878 perf_set_overcommit
5879 );
5880
5881static struct attribute *perfclass_attrs[] = {
5882 &attr_reserve_percpu.attr,
5883 &attr_overcommit.attr,
5884 NULL
5885};
5886
5887static struct attribute_group perfclass_attr_group = {
5888 .attrs = perfclass_attrs,
5889 .name = "perf_events",
5890};
5891
5892static int __init perf_event_sysfs_init(void)
5893{
5894 return sysfs_create_group(&cpu_sysdev_class.kset.kobj,
5895 &perfclass_attr_group);
5896} 6358}
5897device_initcall(perf_event_sysfs_init);
diff --git a/kernel/sched.c b/kernel/sched.c
index dc85ceb90832..c0d2067f3e0d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3584,7 +3584,7 @@ void scheduler_tick(void)
3584 curr->sched_class->task_tick(rq, curr, 0); 3584 curr->sched_class->task_tick(rq, curr, 0);
3585 raw_spin_unlock(&rq->lock); 3585 raw_spin_unlock(&rq->lock);
3586 3586
3587 perf_event_task_tick(curr); 3587 perf_event_task_tick();
3588 3588
3589#ifdef CONFIG_SMP 3589#ifdef CONFIG_SMP
3590 rq->idle_at_tick = idle_cpu(cpu); 3590 rq->idle_at_tick = idle_cpu(cpu);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index fa7ece649fe1..65fb077ea79c 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -884,10 +884,8 @@ enum {
884 FTRACE_ENABLE_CALLS = (1 << 0), 884 FTRACE_ENABLE_CALLS = (1 << 0),
885 FTRACE_DISABLE_CALLS = (1 << 1), 885 FTRACE_DISABLE_CALLS = (1 << 1),
886 FTRACE_UPDATE_TRACE_FUNC = (1 << 2), 886 FTRACE_UPDATE_TRACE_FUNC = (1 << 2),
887 FTRACE_ENABLE_MCOUNT = (1 << 3), 887 FTRACE_START_FUNC_RET = (1 << 3),
888 FTRACE_DISABLE_MCOUNT = (1 << 4), 888 FTRACE_STOP_FUNC_RET = (1 << 4),
889 FTRACE_START_FUNC_RET = (1 << 5),
890 FTRACE_STOP_FUNC_RET = (1 << 6),
891}; 889};
892 890
893static int ftrace_filtered; 891static int ftrace_filtered;
@@ -1226,8 +1224,6 @@ static void ftrace_shutdown(int command)
1226 1224
1227static void ftrace_startup_sysctl(void) 1225static void ftrace_startup_sysctl(void)
1228{ 1226{
1229 int command = FTRACE_ENABLE_MCOUNT;
1230
1231 if (unlikely(ftrace_disabled)) 1227 if (unlikely(ftrace_disabled))
1232 return; 1228 return;
1233 1229
@@ -1235,23 +1231,17 @@ static void ftrace_startup_sysctl(void)
1235 saved_ftrace_func = NULL; 1231 saved_ftrace_func = NULL;
1236 /* ftrace_start_up is true if we want ftrace running */ 1232 /* ftrace_start_up is true if we want ftrace running */
1237 if (ftrace_start_up) 1233 if (ftrace_start_up)
1238 command |= FTRACE_ENABLE_CALLS; 1234 ftrace_run_update_code(FTRACE_ENABLE_CALLS);
1239
1240 ftrace_run_update_code(command);
1241} 1235}
1242 1236
1243static void ftrace_shutdown_sysctl(void) 1237static void ftrace_shutdown_sysctl(void)
1244{ 1238{
1245 int command = FTRACE_DISABLE_MCOUNT;
1246
1247 if (unlikely(ftrace_disabled)) 1239 if (unlikely(ftrace_disabled))
1248 return; 1240 return;
1249 1241
1250 /* ftrace_start_up is true if ftrace is running */ 1242 /* ftrace_start_up is true if ftrace is running */
1251 if (ftrace_start_up) 1243 if (ftrace_start_up)
1252 command |= FTRACE_DISABLE_CALLS; 1244 ftrace_run_update_code(FTRACE_DISABLE_CALLS);
1253
1254 ftrace_run_update_code(command);
1255} 1245}
1256 1246
1257static cycle_t ftrace_update_time; 1247static cycle_t ftrace_update_time;
@@ -1368,24 +1358,29 @@ enum {
1368#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ 1358#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
1369 1359
1370struct ftrace_iterator { 1360struct ftrace_iterator {
1371 struct ftrace_page *pg; 1361 loff_t pos;
1372 int hidx; 1362 loff_t func_pos;
1373 int idx; 1363 struct ftrace_page *pg;
1374 unsigned flags; 1364 struct dyn_ftrace *func;
1375 struct trace_parser parser; 1365 struct ftrace_func_probe *probe;
1366 struct trace_parser parser;
1367 int hidx;
1368 int idx;
1369 unsigned flags;
1376}; 1370};
1377 1371
1378static void * 1372static void *
1379t_hash_next(struct seq_file *m, void *v, loff_t *pos) 1373t_hash_next(struct seq_file *m, loff_t *pos)
1380{ 1374{
1381 struct ftrace_iterator *iter = m->private; 1375 struct ftrace_iterator *iter = m->private;
1382 struct hlist_node *hnd = v; 1376 struct hlist_node *hnd = NULL;
1383 struct hlist_head *hhd; 1377 struct hlist_head *hhd;
1384 1378
1385 WARN_ON(!(iter->flags & FTRACE_ITER_HASH));
1386
1387 (*pos)++; 1379 (*pos)++;
1380 iter->pos = *pos;
1388 1381
1382 if (iter->probe)
1383 hnd = &iter->probe->node;
1389 retry: 1384 retry:
1390 if (iter->hidx >= FTRACE_FUNC_HASHSIZE) 1385 if (iter->hidx >= FTRACE_FUNC_HASHSIZE)
1391 return NULL; 1386 return NULL;
@@ -1408,7 +1403,12 @@ t_hash_next(struct seq_file *m, void *v, loff_t *pos)
1408 } 1403 }
1409 } 1404 }
1410 1405
1411 return hnd; 1406 if (WARN_ON_ONCE(!hnd))
1407 return NULL;
1408
1409 iter->probe = hlist_entry(hnd, struct ftrace_func_probe, node);
1410
1411 return iter;
1412} 1412}
1413 1413
1414static void *t_hash_start(struct seq_file *m, loff_t *pos) 1414static void *t_hash_start(struct seq_file *m, loff_t *pos)
@@ -1417,26 +1417,32 @@ static void *t_hash_start(struct seq_file *m, loff_t *pos)
1417 void *p = NULL; 1417 void *p = NULL;
1418 loff_t l; 1418 loff_t l;
1419 1419
1420 if (!(iter->flags & FTRACE_ITER_HASH)) 1420 if (iter->func_pos > *pos)
1421 *pos = 0; 1421 return NULL;
1422
1423 iter->flags |= FTRACE_ITER_HASH;
1424 1422
1425 iter->hidx = 0; 1423 iter->hidx = 0;
1426 for (l = 0; l <= *pos; ) { 1424 for (l = 0; l <= (*pos - iter->func_pos); ) {
1427 p = t_hash_next(m, p, &l); 1425 p = t_hash_next(m, &l);
1428 if (!p) 1426 if (!p)
1429 break; 1427 break;
1430 } 1428 }
1431 return p; 1429 if (!p)
1430 return NULL;
1431
1432 /* Only set this if we have an item */
1433 iter->flags |= FTRACE_ITER_HASH;
1434
1435 return iter;
1432} 1436}
1433 1437
1434static int t_hash_show(struct seq_file *m, void *v) 1438static int
1439t_hash_show(struct seq_file *m, struct ftrace_iterator *iter)
1435{ 1440{
1436 struct ftrace_func_probe *rec; 1441 struct ftrace_func_probe *rec;
1437 struct hlist_node *hnd = v;
1438 1442
1439 rec = hlist_entry(hnd, struct ftrace_func_probe, node); 1443 rec = iter->probe;
1444 if (WARN_ON_ONCE(!rec))
1445 return -EIO;
1440 1446
1441 if (rec->ops->print) 1447 if (rec->ops->print)
1442 return rec->ops->print(m, rec->ip, rec->ops, rec->data); 1448 return rec->ops->print(m, rec->ip, rec->ops, rec->data);
@@ -1457,12 +1463,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
1457 struct dyn_ftrace *rec = NULL; 1463 struct dyn_ftrace *rec = NULL;
1458 1464
1459 if (iter->flags & FTRACE_ITER_HASH) 1465 if (iter->flags & FTRACE_ITER_HASH)
1460 return t_hash_next(m, v, pos); 1466 return t_hash_next(m, pos);
1461 1467
1462 (*pos)++; 1468 (*pos)++;
1469 iter->pos = *pos;
1463 1470
1464 if (iter->flags & FTRACE_ITER_PRINTALL) 1471 if (iter->flags & FTRACE_ITER_PRINTALL)
1465 return NULL; 1472 return t_hash_start(m, pos);
1466 1473
1467 retry: 1474 retry:
1468 if (iter->idx >= iter->pg->index) { 1475 if (iter->idx >= iter->pg->index) {
@@ -1491,7 +1498,20 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
1491 } 1498 }
1492 } 1499 }
1493 1500
1494 return rec; 1501 if (!rec)
1502 return t_hash_start(m, pos);
1503
1504 iter->func_pos = *pos;
1505 iter->func = rec;
1506
1507 return iter;
1508}
1509
1510static void reset_iter_read(struct ftrace_iterator *iter)
1511{
1512 iter->pos = 0;
1513 iter->func_pos = 0;
1514 iter->flags &= ~(FTRACE_ITER_PRINTALL & FTRACE_ITER_HASH);
1495} 1515}
1496 1516
1497static void *t_start(struct seq_file *m, loff_t *pos) 1517static void *t_start(struct seq_file *m, loff_t *pos)
@@ -1502,6 +1522,12 @@ static void *t_start(struct seq_file *m, loff_t *pos)
1502 1522
1503 mutex_lock(&ftrace_lock); 1523 mutex_lock(&ftrace_lock);
1504 /* 1524 /*
1525 * If an lseek was done, then reset and start from beginning.
1526 */
1527 if (*pos < iter->pos)
1528 reset_iter_read(iter);
1529
1530 /*
1505 * For set_ftrace_filter reading, if we have the filter 1531 * For set_ftrace_filter reading, if we have the filter
1506 * off, we can short cut and just print out that all 1532 * off, we can short cut and just print out that all
1507 * functions are enabled. 1533 * functions are enabled.
@@ -1518,6 +1544,11 @@ static void *t_start(struct seq_file *m, loff_t *pos)
1518 if (iter->flags & FTRACE_ITER_HASH) 1544 if (iter->flags & FTRACE_ITER_HASH)
1519 return t_hash_start(m, pos); 1545 return t_hash_start(m, pos);
1520 1546
1547 /*
1548 * Unfortunately, we need to restart at ftrace_pages_start
1549 * every time we let go of the ftrace_mutex. This is because
1550 * those pointers can change without the lock.
1551 */
1521 iter->pg = ftrace_pages_start; 1552 iter->pg = ftrace_pages_start;
1522 iter->idx = 0; 1553 iter->idx = 0;
1523 for (l = 0; l <= *pos; ) { 1554 for (l = 0; l <= *pos; ) {
@@ -1526,10 +1557,14 @@ static void *t_start(struct seq_file *m, loff_t *pos)
1526 break; 1557 break;
1527 } 1558 }
1528 1559
1529 if (!p && iter->flags & FTRACE_ITER_FILTER) 1560 if (!p) {
1530 return t_hash_start(m, pos); 1561 if (iter->flags & FTRACE_ITER_FILTER)
1562 return t_hash_start(m, pos);
1531 1563
1532 return p; 1564 return NULL;
1565 }
1566
1567 return iter;
1533} 1568}
1534 1569
1535static void t_stop(struct seq_file *m, void *p) 1570static void t_stop(struct seq_file *m, void *p)
@@ -1540,16 +1575,18 @@ static void t_stop(struct seq_file *m, void *p)
1540static int t_show(struct seq_file *m, void *v) 1575static int t_show(struct seq_file *m, void *v)
1541{ 1576{
1542 struct ftrace_iterator *iter = m->private; 1577 struct ftrace_iterator *iter = m->private;
1543 struct dyn_ftrace *rec = v; 1578 struct dyn_ftrace *rec;
1544 1579
1545 if (iter->flags & FTRACE_ITER_HASH) 1580 if (iter->flags & FTRACE_ITER_HASH)
1546 return t_hash_show(m, v); 1581 return t_hash_show(m, iter);
1547 1582
1548 if (iter->flags & FTRACE_ITER_PRINTALL) { 1583 if (iter->flags & FTRACE_ITER_PRINTALL) {
1549 seq_printf(m, "#### all functions enabled ####\n"); 1584 seq_printf(m, "#### all functions enabled ####\n");
1550 return 0; 1585 return 0;
1551 } 1586 }
1552 1587
1588 rec = iter->func;
1589
1553 if (!rec) 1590 if (!rec)
1554 return 0; 1591 return 0;
1555 1592
@@ -2418,7 +2455,7 @@ static const struct file_operations ftrace_filter_fops = {
2418 .open = ftrace_filter_open, 2455 .open = ftrace_filter_open,
2419 .read = seq_read, 2456 .read = seq_read,
2420 .write = ftrace_filter_write, 2457 .write = ftrace_filter_write,
2421 .llseek = no_llseek, 2458 .llseek = ftrace_regex_lseek,
2422 .release = ftrace_filter_release, 2459 .release = ftrace_filter_release,
2423}; 2460};
2424 2461
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 492197e2f86c..4e2f03410377 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2606,6 +2606,19 @@ void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
2606} 2606}
2607EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu); 2607EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
2608 2608
2609/*
2610 * The total entries in the ring buffer is the running counter
2611 * of entries entered into the ring buffer, minus the sum of
2612 * the entries read from the ring buffer and the number of
2613 * entries that were overwritten.
2614 */
2615static inline unsigned long
2616rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer)
2617{
2618 return local_read(&cpu_buffer->entries) -
2619 (local_read(&cpu_buffer->overrun) + cpu_buffer->read);
2620}
2621
2609/** 2622/**
2610 * ring_buffer_entries_cpu - get the number of entries in a cpu buffer 2623 * ring_buffer_entries_cpu - get the number of entries in a cpu buffer
2611 * @buffer: The ring buffer 2624 * @buffer: The ring buffer
@@ -2614,16 +2627,13 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
2614unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) 2627unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
2615{ 2628{
2616 struct ring_buffer_per_cpu *cpu_buffer; 2629 struct ring_buffer_per_cpu *cpu_buffer;
2617 unsigned long ret;
2618 2630
2619 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 2631 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2620 return 0; 2632 return 0;
2621 2633
2622 cpu_buffer = buffer->buffers[cpu]; 2634 cpu_buffer = buffer->buffers[cpu];
2623 ret = (local_read(&cpu_buffer->entries) - local_read(&cpu_buffer->overrun))
2624 - cpu_buffer->read;
2625 2635
2626 return ret; 2636 return rb_num_of_entries(cpu_buffer);
2627} 2637}
2628EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); 2638EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu);
2629 2639
@@ -2684,8 +2694,7 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer)
2684 /* if you care about this being correct, lock the buffer */ 2694 /* if you care about this being correct, lock the buffer */
2685 for_each_buffer_cpu(buffer, cpu) { 2695 for_each_buffer_cpu(buffer, cpu) {
2686 cpu_buffer = buffer->buffers[cpu]; 2696 cpu_buffer = buffer->buffers[cpu];
2687 entries += (local_read(&cpu_buffer->entries) - 2697 entries += rb_num_of_entries(cpu_buffer);
2688 local_read(&cpu_buffer->overrun)) - cpu_buffer->read;
2689 } 2698 }
2690 2699
2691 return entries; 2700 return entries;
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 31cc4cb0dbf2..39c059ca670e 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -9,7 +9,7 @@
9#include <linux/kprobes.h> 9#include <linux/kprobes.h>
10#include "trace.h" 10#include "trace.h"
11 11
12static char *perf_trace_buf[4]; 12static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS];
13 13
14/* 14/*
15 * Force it to be aligned to unsigned long to avoid misaligned accesses 15 * Force it to be aligned to unsigned long to avoid misaligned accesses
@@ -24,7 +24,7 @@ static int total_ref_count;
24static int perf_trace_event_init(struct ftrace_event_call *tp_event, 24static int perf_trace_event_init(struct ftrace_event_call *tp_event,
25 struct perf_event *p_event) 25 struct perf_event *p_event)
26{ 26{
27 struct hlist_head *list; 27 struct hlist_head __percpu *list;
28 int ret = -ENOMEM; 28 int ret = -ENOMEM;
29 int cpu; 29 int cpu;
30 30
@@ -42,11 +42,11 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event,
42 tp_event->perf_events = list; 42 tp_event->perf_events = list;
43 43
44 if (!total_ref_count) { 44 if (!total_ref_count) {
45 char *buf; 45 char __percpu *buf;
46 int i; 46 int i;
47 47
48 for (i = 0; i < 4; i++) { 48 for (i = 0; i < PERF_NR_CONTEXTS; i++) {
49 buf = (char *)alloc_percpu(perf_trace_t); 49 buf = (char __percpu *)alloc_percpu(perf_trace_t);
50 if (!buf) 50 if (!buf)
51 goto fail; 51 goto fail;
52 52
@@ -65,7 +65,7 @@ fail:
65 if (!total_ref_count) { 65 if (!total_ref_count) {
66 int i; 66 int i;
67 67
68 for (i = 0; i < 4; i++) { 68 for (i = 0; i < PERF_NR_CONTEXTS; i++) {
69 free_percpu(perf_trace_buf[i]); 69 free_percpu(perf_trace_buf[i]);
70 perf_trace_buf[i] = NULL; 70 perf_trace_buf[i] = NULL;
71 } 71 }
@@ -101,22 +101,26 @@ int perf_trace_init(struct perf_event *p_event)
101 return ret; 101 return ret;
102} 102}
103 103
104int perf_trace_enable(struct perf_event *p_event) 104int perf_trace_add(struct perf_event *p_event, int flags)
105{ 105{
106 struct ftrace_event_call *tp_event = p_event->tp_event; 106 struct ftrace_event_call *tp_event = p_event->tp_event;
107 struct hlist_head __percpu *pcpu_list;
107 struct hlist_head *list; 108 struct hlist_head *list;
108 109
109 list = tp_event->perf_events; 110 pcpu_list = tp_event->perf_events;
110 if (WARN_ON_ONCE(!list)) 111 if (WARN_ON_ONCE(!pcpu_list))
111 return -EINVAL; 112 return -EINVAL;
112 113
113 list = this_cpu_ptr(list); 114 if (!(flags & PERF_EF_START))
115 p_event->hw.state = PERF_HES_STOPPED;
116
117 list = this_cpu_ptr(pcpu_list);
114 hlist_add_head_rcu(&p_event->hlist_entry, list); 118 hlist_add_head_rcu(&p_event->hlist_entry, list);
115 119
116 return 0; 120 return 0;
117} 121}
118 122
119void perf_trace_disable(struct perf_event *p_event) 123void perf_trace_del(struct perf_event *p_event, int flags)
120{ 124{
121 hlist_del_rcu(&p_event->hlist_entry); 125 hlist_del_rcu(&p_event->hlist_entry);
122} 126}
@@ -142,7 +146,7 @@ void perf_trace_destroy(struct perf_event *p_event)
142 tp_event->perf_events = NULL; 146 tp_event->perf_events = NULL;
143 147
144 if (!--total_ref_count) { 148 if (!--total_ref_count) {
145 for (i = 0; i < 4; i++) { 149 for (i = 0; i < PERF_NR_CONTEXTS; i++) {
146 free_percpu(perf_trace_buf[i]); 150 free_percpu(perf_trace_buf[i]);
147 perf_trace_buf[i] = NULL; 151 perf_trace_buf[i] = NULL;
148 } 152 }
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 4c758f146328..398c0e8b332c 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -600,21 +600,29 @@ out:
600 600
601enum { 601enum {
602 FORMAT_HEADER = 1, 602 FORMAT_HEADER = 1,
603 FORMAT_PRINTFMT = 2, 603 FORMAT_FIELD_SEPERATOR = 2,
604 FORMAT_PRINTFMT = 3,
604}; 605};
605 606
606static void *f_next(struct seq_file *m, void *v, loff_t *pos) 607static void *f_next(struct seq_file *m, void *v, loff_t *pos)
607{ 608{
608 struct ftrace_event_call *call = m->private; 609 struct ftrace_event_call *call = m->private;
609 struct ftrace_event_field *field; 610 struct ftrace_event_field *field;
610 struct list_head *head; 611 struct list_head *common_head = &ftrace_common_fields;
612 struct list_head *head = trace_get_fields(call);
611 613
612 (*pos)++; 614 (*pos)++;
613 615
614 switch ((unsigned long)v) { 616 switch ((unsigned long)v) {
615 case FORMAT_HEADER: 617 case FORMAT_HEADER:
616 head = &ftrace_common_fields; 618 if (unlikely(list_empty(common_head)))
619 return NULL;
620
621 field = list_entry(common_head->prev,
622 struct ftrace_event_field, link);
623 return field;
617 624
625 case FORMAT_FIELD_SEPERATOR:
618 if (unlikely(list_empty(head))) 626 if (unlikely(list_empty(head)))
619 return NULL; 627 return NULL;
620 628
@@ -626,31 +634,10 @@ static void *f_next(struct seq_file *m, void *v, loff_t *pos)
626 return NULL; 634 return NULL;
627 } 635 }
628 636
629 head = trace_get_fields(call);
630
631 /*
632 * To separate common fields from event fields, the
633 * LSB is set on the first event field. Clear it in case.
634 */
635 v = (void *)((unsigned long)v & ~1L);
636
637 field = v; 637 field = v;
638 /* 638 if (field->link.prev == common_head)
639 * If this is a common field, and at the end of the list, then 639 return (void *)FORMAT_FIELD_SEPERATOR;
640 * continue with main list. 640 else if (field->link.prev == head)
641 */
642 if (field->link.prev == &ftrace_common_fields) {
643 if (unlikely(list_empty(head)))
644 return NULL;
645 field = list_entry(head->prev, struct ftrace_event_field, link);
646 /* Set the LSB to notify f_show to print an extra newline */
647 field = (struct ftrace_event_field *)
648 ((unsigned long)field | 1);
649 return field;
650 }
651
652 /* If we are done tell f_show to print the format */
653 if (field->link.prev == head)
654 return (void *)FORMAT_PRINTFMT; 641 return (void *)FORMAT_PRINTFMT;
655 642
656 field = list_entry(field->link.prev, struct ftrace_event_field, link); 643 field = list_entry(field->link.prev, struct ftrace_event_field, link);
@@ -688,22 +675,16 @@ static int f_show(struct seq_file *m, void *v)
688 seq_printf(m, "format:\n"); 675 seq_printf(m, "format:\n");
689 return 0; 676 return 0;
690 677
678 case FORMAT_FIELD_SEPERATOR:
679 seq_putc(m, '\n');
680 return 0;
681
691 case FORMAT_PRINTFMT: 682 case FORMAT_PRINTFMT:
692 seq_printf(m, "\nprint fmt: %s\n", 683 seq_printf(m, "\nprint fmt: %s\n",
693 call->print_fmt); 684 call->print_fmt);
694 return 0; 685 return 0;
695 } 686 }
696 687
697 /*
698 * To separate common fields from event fields, the
699 * LSB is set on the first event field. Clear it and
700 * print a newline if it is set.
701 */
702 if ((unsigned long)v & 1) {
703 seq_putc(m, '\n');
704 v = (void *)((unsigned long)v & ~1L);
705 }
706
707 field = v; 688 field = v;
708 689
709 /* 690 /*
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 6f233698518e..02c708ae0d42 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -15,15 +15,19 @@
15#include "trace.h" 15#include "trace.h"
16#include "trace_output.h" 16#include "trace_output.h"
17 17
18/* When set, irq functions will be ignored */
19static int ftrace_graph_skip_irqs;
20
18struct fgraph_cpu_data { 21struct fgraph_cpu_data {
19 pid_t last_pid; 22 pid_t last_pid;
20 int depth; 23 int depth;
24 int depth_irq;
21 int ignore; 25 int ignore;
22 unsigned long enter_funcs[FTRACE_RETFUNC_DEPTH]; 26 unsigned long enter_funcs[FTRACE_RETFUNC_DEPTH];
23}; 27};
24 28
25struct fgraph_data { 29struct fgraph_data {
26 struct fgraph_cpu_data *cpu_data; 30 struct fgraph_cpu_data __percpu *cpu_data;
27 31
28 /* Place to preserve last processed entry. */ 32 /* Place to preserve last processed entry. */
29 struct ftrace_graph_ent_entry ent; 33 struct ftrace_graph_ent_entry ent;
@@ -41,6 +45,7 @@ struct fgraph_data {
41#define TRACE_GRAPH_PRINT_PROC 0x8 45#define TRACE_GRAPH_PRINT_PROC 0x8
42#define TRACE_GRAPH_PRINT_DURATION 0x10 46#define TRACE_GRAPH_PRINT_DURATION 0x10
43#define TRACE_GRAPH_PRINT_ABS_TIME 0x20 47#define TRACE_GRAPH_PRINT_ABS_TIME 0x20
48#define TRACE_GRAPH_PRINT_IRQS 0x40
44 49
45static struct tracer_opt trace_opts[] = { 50static struct tracer_opt trace_opts[] = {
46 /* Display overruns? (for self-debug purpose) */ 51 /* Display overruns? (for self-debug purpose) */
@@ -55,13 +60,15 @@ static struct tracer_opt trace_opts[] = {
55 { TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) }, 60 { TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) },
56 /* Display absolute time of an entry */ 61 /* Display absolute time of an entry */
57 { TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) }, 62 { TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) },
63 /* Display interrupts */
64 { TRACER_OPT(funcgraph-irqs, TRACE_GRAPH_PRINT_IRQS) },
58 { } /* Empty entry */ 65 { } /* Empty entry */
59}; 66};
60 67
61static struct tracer_flags tracer_flags = { 68static struct tracer_flags tracer_flags = {
62 /* Don't display overruns and proc by default */ 69 /* Don't display overruns and proc by default */
63 .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD | 70 .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD |
64 TRACE_GRAPH_PRINT_DURATION, 71 TRACE_GRAPH_PRINT_DURATION | TRACE_GRAPH_PRINT_IRQS,
65 .opts = trace_opts 72 .opts = trace_opts
66}; 73};
67 74
@@ -204,6 +211,14 @@ int __trace_graph_entry(struct trace_array *tr,
204 return 1; 211 return 1;
205} 212}
206 213
214static inline int ftrace_graph_ignore_irqs(void)
215{
216 if (!ftrace_graph_skip_irqs)
217 return 0;
218
219 return in_irq();
220}
221
207int trace_graph_entry(struct ftrace_graph_ent *trace) 222int trace_graph_entry(struct ftrace_graph_ent *trace)
208{ 223{
209 struct trace_array *tr = graph_array; 224 struct trace_array *tr = graph_array;
@@ -218,7 +233,8 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
218 return 0; 233 return 0;
219 234
220 /* trace it when it is-nested-in or is a function enabled. */ 235 /* trace it when it is-nested-in or is a function enabled. */
221 if (!(trace->depth || ftrace_graph_addr(trace->func))) 236 if (!(trace->depth || ftrace_graph_addr(trace->func)) ||
237 ftrace_graph_ignore_irqs())
222 return 0; 238 return 0;
223 239
224 local_irq_save(flags); 240 local_irq_save(flags);
@@ -855,6 +871,92 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s,
855 return 0; 871 return 0;
856} 872}
857 873
874/*
875 * Entry check for irq code
876 *
877 * returns 1 if
878 * - we are inside irq code
879 * - we just extered irq code
880 *
881 * retunns 0 if
882 * - funcgraph-interrupts option is set
883 * - we are not inside irq code
884 */
885static int
886check_irq_entry(struct trace_iterator *iter, u32 flags,
887 unsigned long addr, int depth)
888{
889 int cpu = iter->cpu;
890 struct fgraph_data *data = iter->private;
891 int *depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq);
892
893 if (flags & TRACE_GRAPH_PRINT_IRQS)
894 return 0;
895
896 /*
897 * We are inside the irq code
898 */
899 if (*depth_irq >= 0)
900 return 1;
901
902 if ((addr < (unsigned long)__irqentry_text_start) ||
903 (addr >= (unsigned long)__irqentry_text_end))
904 return 0;
905
906 /*
907 * We are entering irq code.
908 */
909 *depth_irq = depth;
910 return 1;
911}
912
913/*
914 * Return check for irq code
915 *
916 * returns 1 if
917 * - we are inside irq code
918 * - we just left irq code
919 *
920 * returns 0 if
921 * - funcgraph-interrupts option is set
922 * - we are not inside irq code
923 */
924static int
925check_irq_return(struct trace_iterator *iter, u32 flags, int depth)
926{
927 int cpu = iter->cpu;
928 struct fgraph_data *data = iter->private;
929 int *depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq);
930
931 if (flags & TRACE_GRAPH_PRINT_IRQS)
932 return 0;
933
934 /*
935 * We are not inside the irq code.
936 */
937 if (*depth_irq == -1)
938 return 0;
939
940 /*
941 * We are inside the irq code, and this is returning entry.
942 * Let's not trace it and clear the entry depth, since
943 * we are out of irq code.
944 *
945 * This condition ensures that we 'leave the irq code' once
946 * we are out of the entry depth. Thus protecting us from
947 * the RETURN entry loss.
948 */
949 if (*depth_irq >= depth) {
950 *depth_irq = -1;
951 return 1;
952 }
953
954 /*
955 * We are inside the irq code, and this is not the entry.
956 */
957 return 1;
958}
959
858static enum print_line_t 960static enum print_line_t
859print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, 961print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
860 struct trace_iterator *iter, u32 flags) 962 struct trace_iterator *iter, u32 flags)
@@ -865,6 +967,9 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
865 static enum print_line_t ret; 967 static enum print_line_t ret;
866 int cpu = iter->cpu; 968 int cpu = iter->cpu;
867 969
970 if (check_irq_entry(iter, flags, call->func, call->depth))
971 return TRACE_TYPE_HANDLED;
972
868 if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func, flags)) 973 if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func, flags))
869 return TRACE_TYPE_PARTIAL_LINE; 974 return TRACE_TYPE_PARTIAL_LINE;
870 975
@@ -902,6 +1007,9 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
902 int ret; 1007 int ret;
903 int i; 1008 int i;
904 1009
1010 if (check_irq_return(iter, flags, trace->depth))
1011 return TRACE_TYPE_HANDLED;
1012
905 if (data) { 1013 if (data) {
906 struct fgraph_cpu_data *cpu_data; 1014 struct fgraph_cpu_data *cpu_data;
907 int cpu = iter->cpu; 1015 int cpu = iter->cpu;
@@ -1210,9 +1318,12 @@ void graph_trace_open(struct trace_iterator *iter)
1210 pid_t *pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid); 1318 pid_t *pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
1211 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth); 1319 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
1212 int *ignore = &(per_cpu_ptr(data->cpu_data, cpu)->ignore); 1320 int *ignore = &(per_cpu_ptr(data->cpu_data, cpu)->ignore);
1321 int *depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq);
1322
1213 *pid = -1; 1323 *pid = -1;
1214 *depth = 0; 1324 *depth = 0;
1215 *ignore = 0; 1325 *ignore = 0;
1326 *depth_irq = -1;
1216 } 1327 }
1217 1328
1218 iter->private = data; 1329 iter->private = data;
@@ -1235,6 +1346,14 @@ void graph_trace_close(struct trace_iterator *iter)
1235 } 1346 }
1236} 1347}
1237 1348
1349static int func_graph_set_flag(u32 old_flags, u32 bit, int set)
1350{
1351 if (bit == TRACE_GRAPH_PRINT_IRQS)
1352 ftrace_graph_skip_irqs = !set;
1353
1354 return 0;
1355}
1356
1238static struct trace_event_functions graph_functions = { 1357static struct trace_event_functions graph_functions = {
1239 .trace = print_graph_function_event, 1358 .trace = print_graph_function_event,
1240}; 1359};
@@ -1261,6 +1380,7 @@ static struct tracer graph_trace __read_mostly = {
1261 .print_line = print_graph_function, 1380 .print_line = print_graph_function,
1262 .print_header = print_graph_headers, 1381 .print_header = print_graph_headers,
1263 .flags = &tracer_flags, 1382 .flags = &tracer_flags,
1383 .set_flag = func_graph_set_flag,
1264#ifdef CONFIG_FTRACE_SELFTEST 1384#ifdef CONFIG_FTRACE_SELFTEST
1265 .selftest = trace_selftest_startup_function_graph, 1385 .selftest = trace_selftest_startup_function_graph,
1266#endif 1386#endif
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index a7cc3793baf6..209b379a4721 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -263,6 +263,11 @@ int __init trace_workqueue_early_init(void)
263{ 263{
264 int ret, cpu; 264 int ret, cpu;
265 265
266 for_each_possible_cpu(cpu) {
267 spin_lock_init(&workqueue_cpu_stat(cpu)->lock);
268 INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list);
269 }
270
266 ret = register_trace_workqueue_insertion(probe_workqueue_insertion, NULL); 271 ret = register_trace_workqueue_insertion(probe_workqueue_insertion, NULL);
267 if (ret) 272 if (ret)
268 goto out; 273 goto out;
@@ -279,11 +284,6 @@ int __init trace_workqueue_early_init(void)
279 if (ret) 284 if (ret)
280 goto no_creation; 285 goto no_creation;
281 286
282 for_each_possible_cpu(cpu) {
283 spin_lock_init(&workqueue_cpu_stat(cpu)->lock);
284 INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list);
285 }
286
287 return 0; 287 return 0;
288 288
289no_creation: 289no_creation:
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index c77f3eceea25..d6073a50a6ca 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -25,6 +25,7 @@
25#include <linux/err.h> 25#include <linux/err.h>
26#include <linux/slab.h> 26#include <linux/slab.h>
27#include <linux/sched.h> 27#include <linux/sched.h>
28#include <linux/jump_label.h>
28 29
29extern struct tracepoint __start___tracepoints[]; 30extern struct tracepoint __start___tracepoints[];
30extern struct tracepoint __stop___tracepoints[]; 31extern struct tracepoint __stop___tracepoints[];
@@ -263,7 +264,13 @@ static void set_tracepoint(struct tracepoint_entry **entry,
263 * is used. 264 * is used.
264 */ 265 */
265 rcu_assign_pointer(elem->funcs, (*entry)->funcs); 266 rcu_assign_pointer(elem->funcs, (*entry)->funcs);
266 elem->state = active; 267 if (!elem->state && active) {
268 enable_jump_label(&elem->state);
269 elem->state = active;
270 } else if (elem->state && !active) {
271 disable_jump_label(&elem->state);
272 elem->state = active;
273 }
267} 274}
268 275
269/* 276/*
@@ -277,7 +284,10 @@ static void disable_tracepoint(struct tracepoint *elem)
277 if (elem->unregfunc && elem->state) 284 if (elem->unregfunc && elem->state)
278 elem->unregfunc(); 285 elem->unregfunc();
279 286
280 elem->state = 0; 287 if (elem->state) {
288 disable_jump_label(&elem->state);
289 elem->state = 0;
290 }
281 rcu_assign_pointer(elem->funcs, NULL); 291 rcu_assign_pointer(elem->funcs, NULL);
282} 292}
283 293
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 7f9c3c52ecc1..dc8e16824b51 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -43,7 +43,6 @@ static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
43static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); 43static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
44#endif 44#endif
45 45
46static int __read_mostly did_panic;
47static int __initdata no_watchdog; 46static int __initdata no_watchdog;
48 47
49 48
@@ -187,18 +186,6 @@ static int is_softlockup(unsigned long touch_ts)
187 return 0; 186 return 0;
188} 187}
189 188
190static int
191watchdog_panic(struct notifier_block *this, unsigned long event, void *ptr)
192{
193 did_panic = 1;
194
195 return NOTIFY_DONE;
196}
197
198static struct notifier_block panic_block = {
199 .notifier_call = watchdog_panic,
200};
201
202#ifdef CONFIG_HARDLOCKUP_DETECTOR 189#ifdef CONFIG_HARDLOCKUP_DETECTOR
203static struct perf_event_attr wd_hw_attr = { 190static struct perf_event_attr wd_hw_attr = {
204 .type = PERF_TYPE_HARDWARE, 191 .type = PERF_TYPE_HARDWARE,
@@ -371,14 +358,14 @@ static int watchdog_nmi_enable(int cpu)
371 /* Try to register using hardware perf events */ 358 /* Try to register using hardware perf events */
372 wd_attr = &wd_hw_attr; 359 wd_attr = &wd_hw_attr;
373 wd_attr->sample_period = hw_nmi_get_sample_period(); 360 wd_attr->sample_period = hw_nmi_get_sample_period();
374 event = perf_event_create_kernel_counter(wd_attr, cpu, -1, watchdog_overflow_callback); 361 event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback);
375 if (!IS_ERR(event)) { 362 if (!IS_ERR(event)) {
376 printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n"); 363 printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n");
377 goto out_save; 364 goto out_save;
378 } 365 }
379 366
380 printk(KERN_ERR "NMI watchdog failed to create perf event on cpu%i: %p\n", cpu, event); 367 printk(KERN_ERR "NMI watchdog failed to create perf event on cpu%i: %p\n", cpu, event);
381 return -1; 368 return PTR_ERR(event);
382 369
383 /* success path */ 370 /* success path */
384out_save: 371out_save:
@@ -422,17 +409,19 @@ static int watchdog_prepare_cpu(int cpu)
422static int watchdog_enable(int cpu) 409static int watchdog_enable(int cpu)
423{ 410{
424 struct task_struct *p = per_cpu(softlockup_watchdog, cpu); 411 struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
412 int err;
425 413
426 /* enable the perf event */ 414 /* enable the perf event */
427 if (watchdog_nmi_enable(cpu) != 0) 415 err = watchdog_nmi_enable(cpu);
428 return -1; 416 if (err)
417 return err;
429 418
430 /* create the watchdog thread */ 419 /* create the watchdog thread */
431 if (!p) { 420 if (!p) {
432 p = kthread_create(watchdog, (void *)(unsigned long)cpu, "watchdog/%d", cpu); 421 p = kthread_create(watchdog, (void *)(unsigned long)cpu, "watchdog/%d", cpu);
433 if (IS_ERR(p)) { 422 if (IS_ERR(p)) {
434 printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu); 423 printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu);
435 return -1; 424 return PTR_ERR(p);
436 } 425 }
437 kthread_bind(p, cpu); 426 kthread_bind(p, cpu);
438 per_cpu(watchdog_touch_ts, cpu) = 0; 427 per_cpu(watchdog_touch_ts, cpu) = 0;
@@ -484,6 +473,9 @@ static void watchdog_disable_all_cpus(void)
484{ 473{
485 int cpu; 474 int cpu;
486 475
476 if (no_watchdog)
477 return;
478
487 for_each_online_cpu(cpu) 479 for_each_online_cpu(cpu)
488 watchdog_disable(cpu); 480 watchdog_disable(cpu);
489 481
@@ -526,17 +518,16 @@ static int __cpuinit
526cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) 518cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
527{ 519{
528 int hotcpu = (unsigned long)hcpu; 520 int hotcpu = (unsigned long)hcpu;
521 int err = 0;
529 522
530 switch (action) { 523 switch (action) {
531 case CPU_UP_PREPARE: 524 case CPU_UP_PREPARE:
532 case CPU_UP_PREPARE_FROZEN: 525 case CPU_UP_PREPARE_FROZEN:
533 if (watchdog_prepare_cpu(hotcpu)) 526 err = watchdog_prepare_cpu(hotcpu);
534 return NOTIFY_BAD;
535 break; 527 break;
536 case CPU_ONLINE: 528 case CPU_ONLINE:
537 case CPU_ONLINE_FROZEN: 529 case CPU_ONLINE_FROZEN:
538 if (watchdog_enable(hotcpu)) 530 err = watchdog_enable(hotcpu);
539 return NOTIFY_BAD;
540 break; 531 break;
541#ifdef CONFIG_HOTPLUG_CPU 532#ifdef CONFIG_HOTPLUG_CPU
542 case CPU_UP_CANCELED: 533 case CPU_UP_CANCELED:
@@ -549,7 +540,7 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
549 break; 540 break;
550#endif /* CONFIG_HOTPLUG_CPU */ 541#endif /* CONFIG_HOTPLUG_CPU */
551 } 542 }
552 return NOTIFY_OK; 543 return notifier_from_errno(err);
553} 544}
554 545
555static struct notifier_block __cpuinitdata cpu_nfb = { 546static struct notifier_block __cpuinitdata cpu_nfb = {
@@ -565,13 +556,11 @@ static int __init spawn_watchdog_task(void)
565 return 0; 556 return 0;
566 557
567 err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); 558 err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
568 WARN_ON(err == NOTIFY_BAD); 559 WARN_ON(notifier_to_errno(err));
569 560
570 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); 561 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
571 register_cpu_notifier(&cpu_nfb); 562 register_cpu_notifier(&cpu_nfb);
572 563
573 atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
574
575 return 0; 564 return 0;
576} 565}
577early_initcall(spawn_watchdog_task); 566early_initcall(spawn_watchdog_task);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 1b4afd2e6ca0..e85d549b6eac 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -482,6 +482,7 @@ config PROVE_LOCKING
482 select DEBUG_SPINLOCK 482 select DEBUG_SPINLOCK
483 select DEBUG_MUTEXES 483 select DEBUG_MUTEXES
484 select DEBUG_LOCK_ALLOC 484 select DEBUG_LOCK_ALLOC
485 select TRACE_IRQFLAGS
485 default n 486 default n
486 help 487 help
487 This feature enables the kernel to prove that all locking 488 This feature enables the kernel to prove that all locking
@@ -579,11 +580,10 @@ config DEBUG_LOCKDEP
579 of more runtime overhead. 580 of more runtime overhead.
580 581
581config TRACE_IRQFLAGS 582config TRACE_IRQFLAGS
582 depends on DEBUG_KERNEL
583 bool 583 bool
584 default y 584 help
585 depends on TRACE_IRQFLAGS_SUPPORT 585 Enables hooks to interrupt enabling and disabling for
586 depends on PROVE_LOCKING 586 either tracing or lock debugging.
587 587
588config DEBUG_SPINLOCK_SLEEP 588config DEBUG_SPINLOCK_SLEEP
589 bool "Spinlock debugging: sleep-inside-spinlock checking" 589 bool "Spinlock debugging: sleep-inside-spinlock checking"
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 02afc2533728..e925c7b960f1 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -26,19 +26,11 @@
26#include <linux/dynamic_debug.h> 26#include <linux/dynamic_debug.h>
27#include <linux/debugfs.h> 27#include <linux/debugfs.h>
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <linux/jump_label.h>
29 30
30extern struct _ddebug __start___verbose[]; 31extern struct _ddebug __start___verbose[];
31extern struct _ddebug __stop___verbose[]; 32extern struct _ddebug __stop___verbose[];
32 33
33/* dynamic_debug_enabled, and dynamic_debug_enabled2 are bitmasks in which
34 * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They
35 * use independent hash functions, to reduce the chance of false positives.
36 */
37long long dynamic_debug_enabled;
38EXPORT_SYMBOL_GPL(dynamic_debug_enabled);
39long long dynamic_debug_enabled2;
40EXPORT_SYMBOL_GPL(dynamic_debug_enabled2);
41
42struct ddebug_table { 34struct ddebug_table {
43 struct list_head link; 35 struct list_head link;
44 char *mod_name; 36 char *mod_name;
@@ -88,26 +80,6 @@ static char *ddebug_describe_flags(struct _ddebug *dp, char *buf,
88} 80}
89 81
90/* 82/*
91 * must be called with ddebug_lock held
92 */
93
94static int disabled_hash(char hash, bool first_table)
95{
96 struct ddebug_table *dt;
97 char table_hash_value;
98
99 list_for_each_entry(dt, &ddebug_tables, link) {
100 if (first_table)
101 table_hash_value = dt->ddebugs->primary_hash;
102 else
103 table_hash_value = dt->ddebugs->secondary_hash;
104 if (dt->num_enabled && (hash == table_hash_value))
105 return 0;
106 }
107 return 1;
108}
109
110/*
111 * Search the tables for _ddebug's which match the given 83 * Search the tables for _ddebug's which match the given
112 * `query' and apply the `flags' and `mask' to them. Tells 84 * `query' and apply the `flags' and `mask' to them. Tells
113 * the user which ddebug's were changed, or whether none 85 * the user which ddebug's were changed, or whether none
@@ -170,17 +142,9 @@ static void ddebug_change(const struct ddebug_query *query,
170 dt->num_enabled++; 142 dt->num_enabled++;
171 dp->flags = newflags; 143 dp->flags = newflags;
172 if (newflags) { 144 if (newflags) {
173 dynamic_debug_enabled |= 145 enable_jump_label(&dp->enabled);
174 (1LL << dp->primary_hash);
175 dynamic_debug_enabled2 |=
176 (1LL << dp->secondary_hash);
177 } else { 146 } else {
178 if (disabled_hash(dp->primary_hash, true)) 147 disable_jump_label(&dp->enabled);
179 dynamic_debug_enabled &=
180 ~(1LL << dp->primary_hash);
181 if (disabled_hash(dp->secondary_hash, false))
182 dynamic_debug_enabled2 &=
183 ~(1LL << dp->secondary_hash);
184 } 148 }
185 if (verbose) 149 if (verbose)
186 printk(KERN_INFO 150 printk(KERN_INFO
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 251997a95483..282806ba7a57 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -243,6 +243,7 @@ void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
243 unlock_sock_fast(sk, slow); 243 unlock_sock_fast(sk, slow);
244 244
245 /* skb is now orphaned, can be freed outside of locked section */ 245 /* skb is now orphaned, can be freed outside of locked section */
246 trace_kfree_skb(skb, skb_free_datagram_locked);
246 __kfree_skb(skb); 247 __kfree_skb(skb);
247} 248}
248EXPORT_SYMBOL(skb_free_datagram_locked); 249EXPORT_SYMBOL(skb_free_datagram_locked);
diff --git a/net/core/dev.c b/net/core/dev.c
index 660dd41aaaa6..7ec85e27beed 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -128,6 +128,8 @@
128#include <linux/jhash.h> 128#include <linux/jhash.h>
129#include <linux/random.h> 129#include <linux/random.h>
130#include <trace/events/napi.h> 130#include <trace/events/napi.h>
131#include <trace/events/net.h>
132#include <trace/events/skb.h>
131#include <linux/pci.h> 133#include <linux/pci.h>
132 134
133#include "net-sysfs.h" 135#include "net-sysfs.h"
@@ -1978,6 +1980,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1978 } 1980 }
1979 1981
1980 rc = ops->ndo_start_xmit(skb, dev); 1982 rc = ops->ndo_start_xmit(skb, dev);
1983 trace_net_dev_xmit(skb, rc);
1981 if (rc == NETDEV_TX_OK) 1984 if (rc == NETDEV_TX_OK)
1982 txq_trans_update(txq); 1985 txq_trans_update(txq);
1983 return rc; 1986 return rc;
@@ -1998,6 +2001,7 @@ gso:
1998 skb_dst_drop(nskb); 2001 skb_dst_drop(nskb);
1999 2002
2000 rc = ops->ndo_start_xmit(nskb, dev); 2003 rc = ops->ndo_start_xmit(nskb, dev);
2004 trace_net_dev_xmit(nskb, rc);
2001 if (unlikely(rc != NETDEV_TX_OK)) { 2005 if (unlikely(rc != NETDEV_TX_OK)) {
2002 if (rc & ~NETDEV_TX_MASK) 2006 if (rc & ~NETDEV_TX_MASK)
2003 goto out_kfree_gso_skb; 2007 goto out_kfree_gso_skb;
@@ -2186,6 +2190,7 @@ int dev_queue_xmit(struct sk_buff *skb)
2186#ifdef CONFIG_NET_CLS_ACT 2190#ifdef CONFIG_NET_CLS_ACT
2187 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS); 2191 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
2188#endif 2192#endif
2193 trace_net_dev_queue(skb);
2189 if (q->enqueue) { 2194 if (q->enqueue) {
2190 rc = __dev_xmit_skb(skb, q, dev, txq); 2195 rc = __dev_xmit_skb(skb, q, dev, txq);
2191 goto out; 2196 goto out;
@@ -2512,6 +2517,7 @@ int netif_rx(struct sk_buff *skb)
2512 if (netdev_tstamp_prequeue) 2517 if (netdev_tstamp_prequeue)
2513 net_timestamp_check(skb); 2518 net_timestamp_check(skb);
2514 2519
2520 trace_netif_rx(skb);
2515#ifdef CONFIG_RPS 2521#ifdef CONFIG_RPS
2516 { 2522 {
2517 struct rps_dev_flow voidflow, *rflow = &voidflow; 2523 struct rps_dev_flow voidflow, *rflow = &voidflow;
@@ -2571,6 +2577,7 @@ static void net_tx_action(struct softirq_action *h)
2571 clist = clist->next; 2577 clist = clist->next;
2572 2578
2573 WARN_ON(atomic_read(&skb->users)); 2579 WARN_ON(atomic_read(&skb->users));
2580 trace_kfree_skb(skb, net_tx_action);
2574 __kfree_skb(skb); 2581 __kfree_skb(skb);
2575 } 2582 }
2576 } 2583 }
@@ -2828,6 +2835,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
2828 if (!netdev_tstamp_prequeue) 2835 if (!netdev_tstamp_prequeue)
2829 net_timestamp_check(skb); 2836 net_timestamp_check(skb);
2830 2837
2838 trace_netif_receive_skb(skb);
2831 if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb)) 2839 if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
2832 return NET_RX_SUCCESS; 2840 return NET_RX_SUCCESS;
2833 2841
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index afa6380ed88a..7f1bb2aba03b 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -26,6 +26,7 @@
26 26
27#define CREATE_TRACE_POINTS 27#define CREATE_TRACE_POINTS
28#include <trace/events/skb.h> 28#include <trace/events/skb.h>
29#include <trace/events/net.h>
29#include <trace/events/napi.h> 30#include <trace/events/napi.h>
30 31
31EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); 32EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c83b421341c0..56ba3c4e4761 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -466,6 +466,7 @@ void consume_skb(struct sk_buff *skb)
466 smp_rmb(); 466 smp_rmb();
467 else if (likely(!atomic_dec_and_test(&skb->users))) 467 else if (likely(!atomic_dec_and_test(&skb->users)))
468 return; 468 return;
469 trace_consume_skb(skb);
469 __kfree_skb(skb); 470 __kfree_skb(skb);
470} 471}
471EXPORT_SYMBOL(consume_skb); 472EXPORT_SYMBOL(consume_skb);
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 54fd1b700131..7bfcf1a09ac5 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -101,14 +101,6 @@ basename_flags = -D"KBUILD_BASENAME=KBUILD_STR($(call name-fix,$(basetarget)))"
101modname_flags = $(if $(filter 1,$(words $(modname))),\ 101modname_flags = $(if $(filter 1,$(words $(modname))),\
102 -D"KBUILD_MODNAME=KBUILD_STR($(call name-fix,$(modname)))") 102 -D"KBUILD_MODNAME=KBUILD_STR($(call name-fix,$(modname)))")
103 103
104#hash values
105ifdef CONFIG_DYNAMIC_DEBUG
106debug_flags = -D"DEBUG_HASH=$(shell ./scripts/basic/hash djb2 $(@D)$(modname))"\
107 -D"DEBUG_HASH2=$(shell ./scripts/basic/hash r5 $(@D)$(modname))"
108else
109debug_flags =
110endif
111
112orig_c_flags = $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(KBUILD_SUBDIR_CCFLAGS) \ 104orig_c_flags = $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(KBUILD_SUBDIR_CCFLAGS) \
113 $(ccflags-y) $(CFLAGS_$(basetarget).o) 105 $(ccflags-y) $(CFLAGS_$(basetarget).o)
114_c_flags = $(filter-out $(CFLAGS_REMOVE_$(basetarget).o), $(orig_c_flags)) 106_c_flags = $(filter-out $(CFLAGS_REMOVE_$(basetarget).o), $(orig_c_flags))
@@ -152,8 +144,7 @@ endif
152 144
153c_flags = -Wp,-MD,$(depfile) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) \ 145c_flags = -Wp,-MD,$(depfile) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) \
154 $(__c_flags) $(modkern_cflags) \ 146 $(__c_flags) $(modkern_cflags) \
155 -D"KBUILD_STR(s)=\#s" $(basename_flags) $(modname_flags) \ 147 -D"KBUILD_STR(s)=\#s" $(basename_flags) $(modname_flags)
156 $(debug_flags)
157 148
158a_flags = -Wp,-MD,$(depfile) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) \ 149a_flags = -Wp,-MD,$(depfile) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) \
159 $(__a_flags) $(modkern_aflags) 150 $(__a_flags) $(modkern_aflags)
diff --git a/scripts/basic/Makefile b/scripts/basic/Makefile
index 09559951df12..4c324a1f1e0e 100644
--- a/scripts/basic/Makefile
+++ b/scripts/basic/Makefile
@@ -9,7 +9,7 @@
9# fixdep: Used to generate dependency information during build process 9# fixdep: Used to generate dependency information during build process
10# docproc: Used in Documentation/DocBook 10# docproc: Used in Documentation/DocBook
11 11
12hostprogs-y := fixdep docproc hash 12hostprogs-y := fixdep docproc
13always := $(hostprogs-y) 13always := $(hostprogs-y)
14 14
15# fixdep is needed to compile other host programs 15# fixdep is needed to compile other host programs
diff --git a/scripts/basic/hash.c b/scripts/basic/hash.c
deleted file mode 100644
index 2ef5d3f666b8..000000000000
--- a/scripts/basic/hash.c
+++ /dev/null
@@ -1,64 +0,0 @@
1/*
2 * Copyright (C) 2008 Red Hat, Inc., Jason Baron <jbaron@redhat.com>
3 *
4 */
5
6#include <stdio.h>
7#include <stdlib.h>
8#include <string.h>
9
10#define DYNAMIC_DEBUG_HASH_BITS 6
11
12static const char *program;
13
14static void usage(void)
15{
16 printf("Usage: %s <djb2|r5> <modname>\n", program);
17 exit(1);
18}
19
20/* djb2 hashing algorithm by Dan Bernstein. From:
21 * http://www.cse.yorku.ca/~oz/hash.html
22 */
23
24static unsigned int djb2_hash(char *str)
25{
26 unsigned long hash = 5381;
27 int c;
28
29 c = *str;
30 while (c) {
31 hash = ((hash << 5) + hash) + c;
32 c = *++str;
33 }
34 return (unsigned int)(hash & ((1 << DYNAMIC_DEBUG_HASH_BITS) - 1));
35}
36
37static unsigned int r5_hash(char *str)
38{
39 unsigned long hash = 0;
40 int c;
41
42 c = *str;
43 while (c) {
44 hash = (hash + (c << 4) + (c >> 4)) * 11;
45 c = *++str;
46 }
47 return (unsigned int)(hash & ((1 << DYNAMIC_DEBUG_HASH_BITS) - 1));
48}
49
50int main(int argc, char *argv[])
51{
52 program = argv[0];
53
54 if (argc != 3)
55 usage();
56 if (!strcmp(argv[1], "djb2"))
57 printf("%d\n", djb2_hash(argv[2]));
58 else if (!strcmp(argv[1], "r5"))
59 printf("%d\n", r5_hash(argv[2]));
60 else
61 usage();
62 exit(0);
63}
64
diff --git a/scripts/gcc-goto.sh b/scripts/gcc-goto.sh
new file mode 100644
index 000000000000..520d16b1ffaf
--- /dev/null
+++ b/scripts/gcc-goto.sh
@@ -0,0 +1,5 @@
1#!/bin/sh
2# Test for gcc 'asm goto' suport
3# Copyright (C) 2010, Jason Baron <jbaron@redhat.com>
4
5echo "int main(void) { entry: asm goto (\"\"::::entry); return 0; }" | $@ -x c - -c -o /dev/null >/dev/null 2>&1 && echo "y"
diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt
index 5164a655c39f..b2c63309a651 100644
--- a/tools/perf/Documentation/perf-annotate.txt
+++ b/tools/perf/Documentation/perf-annotate.txt
@@ -8,7 +8,7 @@ perf-annotate - Read perf.data (created by perf record) and display annotated co
8SYNOPSIS 8SYNOPSIS
9-------- 9--------
10[verse] 10[verse]
11'perf annotate' [-i <file> | --input=file] symbol_name 11'perf annotate' [-i <file> | --input=file] [symbol_name]
12 12
13DESCRIPTION 13DESCRIPTION
14----------- 14-----------
@@ -24,6 +24,13 @@ OPTIONS
24--input=:: 24--input=::
25 Input file name. (default: perf.data) 25 Input file name. (default: perf.data)
26 26
27--stdio:: Use the stdio interface.
28
29--tui:: Use the TUI interface Use of --tui requires a tty, if one is not
30 present, as when piping to other commands, the stdio interface is
31 used. This interfaces starts by centering on the line with more
32 samples, TAB/UNTAB cycles thru the lines with more samples.
33
27SEE ALSO 34SEE ALSO
28-------- 35--------
29linkperf:perf-record[1] 36linkperf:perf-record[1], linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index abfabe9147a4..12052c9ed0ba 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -65,6 +65,13 @@ OPTIONS
65 the tree is considered as a new profiled object. + 65 the tree is considered as a new profiled object. +
66 Default: fractal,0.5. 66 Default: fractal,0.5.
67 67
68--stdio:: Use the stdio interface.
69
70--tui:: Use the TUI interface, that is integrated with annotate and allows
71 zooming into DSOs or threads, among other features. Use of --tui
72 requires a tty, if one is not present, as when piping to other
73 commands, the stdio interface is used.
74
68SEE ALSO 75SEE ALSO
69-------- 76--------
70linkperf:perf-stat[1] 77linkperf:perf-stat[1]
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 1950e19af1cf..d1db0f676a4b 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -313,6 +313,9 @@ TEST_PROGRAMS =
313 313
314SCRIPT_SH += perf-archive.sh 314SCRIPT_SH += perf-archive.sh
315 315
316grep-libs = $(filter -l%,$(1))
317strip-libs = $(filter-out -l%,$(1))
318
316# 319#
317# No Perl scripts right now: 320# No Perl scripts right now:
318# 321#
@@ -588,14 +591,17 @@ endif
588ifdef NO_LIBPERL 591ifdef NO_LIBPERL
589 BASIC_CFLAGS += -DNO_LIBPERL 592 BASIC_CFLAGS += -DNO_LIBPERL
590else 593else
591 PERL_EMBED_LDOPTS = `perl -MExtUtils::Embed -e ldopts 2>/dev/null` 594 PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null)
595 PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS))
596 PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS))
592 PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null` 597 PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null`
593 FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS) 598 FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS)
594 599
595 ifneq ($(call try-cc,$(SOURCE_PERL_EMBED),$(FLAGS_PERL_EMBED)),y) 600 ifneq ($(call try-cc,$(SOURCE_PERL_EMBED),$(FLAGS_PERL_EMBED)),y)
596 BASIC_CFLAGS += -DNO_LIBPERL 601 BASIC_CFLAGS += -DNO_LIBPERL
597 else 602 else
598 ALL_LDFLAGS += $(PERL_EMBED_LDOPTS) 603 ALL_LDFLAGS += $(PERL_EMBED_LDFLAGS)
604 EXTLIBS += $(PERL_EMBED_LIBADD)
599 LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-perl.o 605 LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-perl.o
600 LIB_OBJS += $(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o 606 LIB_OBJS += $(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o
601 endif 607 endif
@@ -604,13 +610,16 @@ endif
604ifdef NO_LIBPYTHON 610ifdef NO_LIBPYTHON
605 BASIC_CFLAGS += -DNO_LIBPYTHON 611 BASIC_CFLAGS += -DNO_LIBPYTHON
606else 612else
607 PYTHON_EMBED_LDOPTS = `python-config --ldflags 2>/dev/null` 613 PYTHON_EMBED_LDOPTS = $(shell python-config --ldflags 2>/dev/null)
614 PYTHON_EMBED_LDFLAGS = $(call strip-libs,$(PYTHON_EMBED_LDOPTS))
615 PYTHON_EMBED_LIBADD = $(call grep-libs,$(PYTHON_EMBED_LDOPTS))
608 PYTHON_EMBED_CCOPTS = `python-config --cflags 2>/dev/null` 616 PYTHON_EMBED_CCOPTS = `python-config --cflags 2>/dev/null`
609 FLAGS_PYTHON_EMBED=$(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS) 617 FLAGS_PYTHON_EMBED=$(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
610 ifneq ($(call try-cc,$(SOURCE_PYTHON_EMBED),$(FLAGS_PYTHON_EMBED)),y) 618 ifneq ($(call try-cc,$(SOURCE_PYTHON_EMBED),$(FLAGS_PYTHON_EMBED)),y)
611 BASIC_CFLAGS += -DNO_LIBPYTHON 619 BASIC_CFLAGS += -DNO_LIBPYTHON
612 else 620 else
613 ALL_LDFLAGS += $(PYTHON_EMBED_LDOPTS) 621 ALL_LDFLAGS += $(PYTHON_EMBED_LDFLAGS)
622 EXTLIBS += $(PYTHON_EMBED_LIBADD)
614 LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o 623 LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o
615 LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o 624 LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o
616 endif 625 endif
@@ -653,6 +662,15 @@ else
653 endif 662 endif
654endif 663endif
655 664
665
666ifdef NO_STRLCPY
667 BASIC_CFLAGS += -DNO_STRLCPY
668else
669 ifneq ($(call try-cc,$(SOURCE_STRLCPY),),y)
670 BASIC_CFLAGS += -DNO_STRLCPY
671 endif
672endif
673
656ifndef CC_LD_DYNPATH 674ifndef CC_LD_DYNPATH
657 ifdef NO_R_TO_GCC_LINKER 675 ifdef NO_R_TO_GCC_LINKER
658 # Some gcc does not accept and pass -R to the linker to specify 676 # Some gcc does not accept and pass -R to the linker to specify
@@ -910,8 +928,8 @@ $(OUTPUT)perf.o: perf.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
910 $(ALL_CFLAGS) -c $(filter %.c,$^) -o $@ 928 $(ALL_CFLAGS) -c $(filter %.c,$^) -o $@
911 929
912$(OUTPUT)perf$X: $(OUTPUT)perf.o $(BUILTIN_OBJS) $(PERFLIBS) 930$(OUTPUT)perf$X: $(OUTPUT)perf.o $(BUILTIN_OBJS) $(PERFLIBS)
913 $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(OUTPUT)perf.o \ 931 $(QUIET_LINK)$(CC) $(ALL_CFLAGS) $(ALL_LDFLAGS) $(OUTPUT)perf.o \
914 $(BUILTIN_OBJS) $(ALL_LDFLAGS) $(LIBS) 932 $(BUILTIN_OBJS) $(LIBS) -o $@
915 933
916$(OUTPUT)builtin-help.o: builtin-help.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS 934$(OUTPUT)builtin-help.o: builtin-help.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
917 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) \ 935 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) \
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 1478dc64bf15..6d5604d8df95 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -28,7 +28,7 @@
28 28
29static char const *input_name = "perf.data"; 29static char const *input_name = "perf.data";
30 30
31static bool force; 31static bool force, use_tui, use_stdio;
32 32
33static bool full_paths; 33static bool full_paths;
34 34
@@ -321,7 +321,7 @@ static int hist_entry__tty_annotate(struct hist_entry *he)
321 321
322static void hists__find_annotations(struct hists *self) 322static void hists__find_annotations(struct hists *self)
323{ 323{
324 struct rb_node *first = rb_first(&self->entries), *nd = first; 324 struct rb_node *nd = rb_first(&self->entries), *next;
325 int key = KEY_RIGHT; 325 int key = KEY_RIGHT;
326 326
327 while (nd) { 327 while (nd) {
@@ -343,20 +343,19 @@ find_next:
343 343
344 if (use_browser > 0) { 344 if (use_browser > 0) {
345 key = hist_entry__tui_annotate(he); 345 key = hist_entry__tui_annotate(he);
346 if (is_exit_key(key))
347 break;
348 switch (key) { 346 switch (key) {
349 case KEY_RIGHT: 347 case KEY_RIGHT:
350 case '\t': 348 next = rb_next(nd);
351 nd = rb_next(nd);
352 break; 349 break;
353 case KEY_LEFT: 350 case KEY_LEFT:
354 if (nd == first) 351 next = rb_prev(nd);
355 continue;
356 nd = rb_prev(nd);
357 default:
358 break; 352 break;
353 default:
354 return;
359 } 355 }
356
357 if (next != NULL)
358 nd = next;
360 } else { 359 } else {
361 hist_entry__tty_annotate(he); 360 hist_entry__tty_annotate(he);
362 nd = rb_next(nd); 361 nd = rb_next(nd);
@@ -428,6 +427,8 @@ static const struct option options[] = {
428 "be more verbose (show symbol address, etc)"), 427 "be more verbose (show symbol address, etc)"),
429 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 428 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
430 "dump raw trace in ASCII"), 429 "dump raw trace in ASCII"),
430 OPT_BOOLEAN(0, "tui", &use_tui, "Use the TUI interface"),
431 OPT_BOOLEAN(0, "stdio", &use_stdio, "Use the stdio interface"),
431 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 432 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
432 "file", "vmlinux pathname"), 433 "file", "vmlinux pathname"),
433 OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, 434 OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
@@ -443,6 +444,11 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __used)
443{ 444{
444 argc = parse_options(argc, argv, options, annotate_usage, 0); 445 argc = parse_options(argc, argv, options, annotate_usage, 0);
445 446
447 if (use_stdio)
448 use_browser = 0;
449 else if (use_tui)
450 use_browser = 1;
451
446 setup_browser(); 452 setup_browser();
447 453
448 symbol_conf.priv_size = sizeof(struct sym_priv); 454 symbol_conf.priv_size = sizeof(struct sym_priv);
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 55fc1f46892a..5de405d45230 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -32,7 +32,7 @@
32 32
33static char const *input_name = "perf.data"; 33static char const *input_name = "perf.data";
34 34
35static bool force; 35static bool force, use_tui, use_stdio;
36static bool hide_unresolved; 36static bool hide_unresolved;
37static bool dont_use_callchains; 37static bool dont_use_callchains;
38 38
@@ -107,7 +107,8 @@ static int perf_session__add_hist_entry(struct perf_session *self,
107 goto out_free_syms; 107 goto out_free_syms;
108 err = 0; 108 err = 0;
109 if (symbol_conf.use_callchain) { 109 if (symbol_conf.use_callchain) {
110 err = append_chain(he->callchain, data->callchain, syms, data->period); 110 err = callchain_append(he->callchain, data->callchain, syms,
111 data->period);
111 if (err) 112 if (err)
112 goto out_free_syms; 113 goto out_free_syms;
113 } 114 }
@@ -450,6 +451,8 @@ static const struct option options[] = {
450 "Show per-thread event counters"), 451 "Show per-thread event counters"),
451 OPT_STRING(0, "pretty", &pretty_printing_style, "key", 452 OPT_STRING(0, "pretty", &pretty_printing_style, "key",
452 "pretty printing style key: normal raw"), 453 "pretty printing style key: normal raw"),
454 OPT_BOOLEAN(0, "tui", &use_tui, "Use the TUI interface"),
455 OPT_BOOLEAN(0, "stdio", &use_stdio, "Use the stdio interface"),
453 OPT_STRING('s', "sort", &sort_order, "key[,key2...]", 456 OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
454 "sort by key(s): pid, comm, dso, symbol, parent"), 457 "sort by key(s): pid, comm, dso, symbol, parent"),
455 OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, 458 OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization,
@@ -482,8 +485,15 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
482{ 485{
483 argc = parse_options(argc, argv, options, report_usage, 0); 486 argc = parse_options(argc, argv, options, report_usage, 0);
484 487
488 if (use_stdio)
489 use_browser = 0;
490 else if (use_tui)
491 use_browser = 1;
492
485 if (strcmp(input_name, "-") != 0) 493 if (strcmp(input_name, "-") != 0)
486 setup_browser(); 494 setup_browser();
495 else
496 use_browser = 0;
487 /* 497 /*
488 * Only in the newt browser we are doing integrated annotation, 498 * Only in the newt browser we are doing integrated annotation,
489 * so don't allocate extra space that won't be used in the stdio 499 * so don't allocate extra space that won't be used in the stdio
diff --git a/tools/perf/feature-tests.mak b/tools/perf/feature-tests.mak
index 7a7b60859053..b253db634f04 100644
--- a/tools/perf/feature-tests.mak
+++ b/tools/perf/feature-tests.mak
@@ -110,6 +110,17 @@ int main(void)
110} 110}
111endef 111endef
112 112
113define SOURCE_STRLCPY
114#include <stdlib.h>
115extern size_t strlcpy(char *dest, const char *src, size_t size);
116
117int main(void)
118{
119 strlcpy(NULL, NULL, 0);
120 return 0;
121}
122endef
123
113# try-cc 124# try-cc
114# Usage: option = $(call try-cc, source-to-build, cc-options) 125# Usage: option = $(call try-cc, source-to-build, cc-options)
115try-cc = $(shell sh -c \ 126try-cc = $(shell sh -c \
diff --git a/tools/perf/scripts/python/bin/netdev-times-record b/tools/perf/scripts/python/bin/netdev-times-record
new file mode 100644
index 000000000000..d931a828126b
--- /dev/null
+++ b/tools/perf/scripts/python/bin/netdev-times-record
@@ -0,0 +1,8 @@
1#!/bin/bash
2perf record -a -e net:net_dev_xmit -e net:net_dev_queue \
3 -e net:netif_receive_skb -e net:netif_rx \
4 -e skb:consume_skb -e skb:kfree_skb \
5 -e skb:skb_copy_datagram_iovec -e napi:napi_poll \
6 -e irq:irq_handler_entry -e irq:irq_handler_exit \
7 -e irq:softirq_entry -e irq:softirq_exit \
8 -e irq:softirq_raise $@
diff --git a/tools/perf/scripts/python/bin/netdev-times-report b/tools/perf/scripts/python/bin/netdev-times-report
new file mode 100644
index 000000000000..c3d0a638123d
--- /dev/null
+++ b/tools/perf/scripts/python/bin/netdev-times-report
@@ -0,0 +1,5 @@
1#!/bin/bash
2# description: display a process of packet and processing time
3# args: [tx] [rx] [dev=] [debug]
4
5perf trace -s ~/libexec/perf-core/scripts/python/netdev-times.py $@
diff --git a/tools/perf/scripts/python/netdev-times.py b/tools/perf/scripts/python/netdev-times.py
new file mode 100644
index 000000000000..9aa0a32972e8
--- /dev/null
+++ b/tools/perf/scripts/python/netdev-times.py
@@ -0,0 +1,464 @@
1# Display a process of packets and processed time.
2# It helps us to investigate networking or network device.
3#
4# options
5# tx: show only tx chart
6# rx: show only rx chart
7# dev=: show only thing related to specified device
8# debug: work with debug mode. It shows buffer status.
9
10import os
11import sys
12
13sys.path.append(os.environ['PERF_EXEC_PATH'] + \
14 '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
15
16from perf_trace_context import *
17from Core import *
18from Util import *
19
20all_event_list = []; # insert all tracepoint event related with this script
21irq_dic = {}; # key is cpu and value is a list which stacks irqs
22 # which raise NET_RX softirq
23net_rx_dic = {}; # key is cpu and value include time of NET_RX softirq-entry
24 # and a list which stacks receive
25receive_hunk_list = []; # a list which include a sequence of receive events
26rx_skb_list = []; # received packet list for matching
27 # skb_copy_datagram_iovec
28
29buffer_budget = 65536; # the budget of rx_skb_list, tx_queue_list and
30 # tx_xmit_list
31of_count_rx_skb_list = 0; # overflow count
32
33tx_queue_list = []; # list of packets which pass through dev_queue_xmit
34of_count_tx_queue_list = 0; # overflow count
35
36tx_xmit_list = []; # list of packets which pass through dev_hard_start_xmit
37of_count_tx_xmit_list = 0; # overflow count
38
39tx_free_list = []; # list of packets which is freed
40
41# options
42show_tx = 0;
43show_rx = 0;
44dev = 0; # store a name of device specified by option "dev="
45debug = 0;
46
47# indices of event_info tuple
48EINFO_IDX_NAME= 0
49EINFO_IDX_CONTEXT=1
50EINFO_IDX_CPU= 2
51EINFO_IDX_TIME= 3
52EINFO_IDX_PID= 4
53EINFO_IDX_COMM= 5
54
55# Calculate a time interval(msec) from src(nsec) to dst(nsec)
56def diff_msec(src, dst):
57 return (dst - src) / 1000000.0
58
59# Display a process of transmitting a packet
60def print_transmit(hunk):
61 if dev != 0 and hunk['dev'].find(dev) < 0:
62 return
63 print "%7s %5d %6d.%06dsec %12.3fmsec %12.3fmsec" % \
64 (hunk['dev'], hunk['len'],
65 nsecs_secs(hunk['queue_t']),
66 nsecs_nsecs(hunk['queue_t'])/1000,
67 diff_msec(hunk['queue_t'], hunk['xmit_t']),
68 diff_msec(hunk['xmit_t'], hunk['free_t']))
69
70# Format for displaying rx packet processing
71PF_IRQ_ENTRY= " irq_entry(+%.3fmsec irq=%d:%s)"
72PF_SOFT_ENTRY=" softirq_entry(+%.3fmsec)"
73PF_NAPI_POLL= " napi_poll_exit(+%.3fmsec %s)"
74PF_JOINT= " |"
75PF_WJOINT= " | |"
76PF_NET_RECV= " |---netif_receive_skb(+%.3fmsec skb=%x len=%d)"
77PF_NET_RX= " |---netif_rx(+%.3fmsec skb=%x)"
78PF_CPY_DGRAM= " | skb_copy_datagram_iovec(+%.3fmsec %d:%s)"
79PF_KFREE_SKB= " | kfree_skb(+%.3fmsec location=%x)"
80PF_CONS_SKB= " | consume_skb(+%.3fmsec)"
81
82# Display a process of received packets and interrputs associated with
83# a NET_RX softirq
84def print_receive(hunk):
85 show_hunk = 0
86 irq_list = hunk['irq_list']
87 cpu = irq_list[0]['cpu']
88 base_t = irq_list[0]['irq_ent_t']
89 # check if this hunk should be showed
90 if dev != 0:
91 for i in range(len(irq_list)):
92 if irq_list[i]['name'].find(dev) >= 0:
93 show_hunk = 1
94 break
95 else:
96 show_hunk = 1
97 if show_hunk == 0:
98 return
99
100 print "%d.%06dsec cpu=%d" % \
101 (nsecs_secs(base_t), nsecs_nsecs(base_t)/1000, cpu)
102 for i in range(len(irq_list)):
103 print PF_IRQ_ENTRY % \
104 (diff_msec(base_t, irq_list[i]['irq_ent_t']),
105 irq_list[i]['irq'], irq_list[i]['name'])
106 print PF_JOINT
107 irq_event_list = irq_list[i]['event_list']
108 for j in range(len(irq_event_list)):
109 irq_event = irq_event_list[j]
110 if irq_event['event'] == 'netif_rx':
111 print PF_NET_RX % \
112 (diff_msec(base_t, irq_event['time']),
113 irq_event['skbaddr'])
114 print PF_JOINT
115 print PF_SOFT_ENTRY % \
116 diff_msec(base_t, hunk['sirq_ent_t'])
117 print PF_JOINT
118 event_list = hunk['event_list']
119 for i in range(len(event_list)):
120 event = event_list[i]
121 if event['event_name'] == 'napi_poll':
122 print PF_NAPI_POLL % \
123 (diff_msec(base_t, event['event_t']), event['dev'])
124 if i == len(event_list) - 1:
125 print ""
126 else:
127 print PF_JOINT
128 else:
129 print PF_NET_RECV % \
130 (diff_msec(base_t, event['event_t']), event['skbaddr'],
131 event['len'])
132 if 'comm' in event.keys():
133 print PF_WJOINT
134 print PF_CPY_DGRAM % \
135 (diff_msec(base_t, event['comm_t']),
136 event['pid'], event['comm'])
137 elif 'handle' in event.keys():
138 print PF_WJOINT
139 if event['handle'] == "kfree_skb":
140 print PF_KFREE_SKB % \
141 (diff_msec(base_t,
142 event['comm_t']),
143 event['location'])
144 elif event['handle'] == "consume_skb":
145 print PF_CONS_SKB % \
146 diff_msec(base_t,
147 event['comm_t'])
148 print PF_JOINT
149
150def trace_begin():
151 global show_tx
152 global show_rx
153 global dev
154 global debug
155
156 for i in range(len(sys.argv)):
157 if i == 0:
158 continue
159 arg = sys.argv[i]
160 if arg == 'tx':
161 show_tx = 1
162 elif arg =='rx':
163 show_rx = 1
164 elif arg.find('dev=',0, 4) >= 0:
165 dev = arg[4:]
166 elif arg == 'debug':
167 debug = 1
168 if show_tx == 0 and show_rx == 0:
169 show_tx = 1
170 show_rx = 1
171
172def trace_end():
173 # order all events in time
174 all_event_list.sort(lambda a,b :cmp(a[EINFO_IDX_TIME],
175 b[EINFO_IDX_TIME]))
176 # process all events
177 for i in range(len(all_event_list)):
178 event_info = all_event_list[i]
179 name = event_info[EINFO_IDX_NAME]
180 if name == 'irq__softirq_exit':
181 handle_irq_softirq_exit(event_info)
182 elif name == 'irq__softirq_entry':
183 handle_irq_softirq_entry(event_info)
184 elif name == 'irq__softirq_raise':
185 handle_irq_softirq_raise(event_info)
186 elif name == 'irq__irq_handler_entry':
187 handle_irq_handler_entry(event_info)
188 elif name == 'irq__irq_handler_exit':
189 handle_irq_handler_exit(event_info)
190 elif name == 'napi__napi_poll':
191 handle_napi_poll(event_info)
192 elif name == 'net__netif_receive_skb':
193 handle_netif_receive_skb(event_info)
194 elif name == 'net__netif_rx':
195 handle_netif_rx(event_info)
196 elif name == 'skb__skb_copy_datagram_iovec':
197 handle_skb_copy_datagram_iovec(event_info)
198 elif name == 'net__net_dev_queue':
199 handle_net_dev_queue(event_info)
200 elif name == 'net__net_dev_xmit':
201 handle_net_dev_xmit(event_info)
202 elif name == 'skb__kfree_skb':
203 handle_kfree_skb(event_info)
204 elif name == 'skb__consume_skb':
205 handle_consume_skb(event_info)
206 # display receive hunks
207 if show_rx:
208 for i in range(len(receive_hunk_list)):
209 print_receive(receive_hunk_list[i])
210 # display transmit hunks
211 if show_tx:
212 print " dev len Qdisc " \
213 " netdevice free"
214 for i in range(len(tx_free_list)):
215 print_transmit(tx_free_list[i])
216 if debug:
217 print "debug buffer status"
218 print "----------------------------"
219 print "xmit Qdisc:remain:%d overflow:%d" % \
220 (len(tx_queue_list), of_count_tx_queue_list)
221 print "xmit netdevice:remain:%d overflow:%d" % \
222 (len(tx_xmit_list), of_count_tx_xmit_list)
223 print "receive:remain:%d overflow:%d" % \
224 (len(rx_skb_list), of_count_rx_skb_list)
225
226# called from perf, when it finds a correspoinding event
227def irq__softirq_entry(name, context, cpu, sec, nsec, pid, comm, vec):
228 if symbol_str("irq__softirq_entry", "vec", vec) != "NET_RX":
229 return
230 event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, vec)
231 all_event_list.append(event_info)
232
233def irq__softirq_exit(name, context, cpu, sec, nsec, pid, comm, vec):
234 if symbol_str("irq__softirq_entry", "vec", vec) != "NET_RX":
235 return
236 event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, vec)
237 all_event_list.append(event_info)
238
239def irq__softirq_raise(name, context, cpu, sec, nsec, pid, comm, vec):
240 if symbol_str("irq__softirq_entry", "vec", vec) != "NET_RX":
241 return
242 event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, vec)
243 all_event_list.append(event_info)
244
245def irq__irq_handler_entry(name, context, cpu, sec, nsec, pid, comm,
246 irq, irq_name):
247 event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
248 irq, irq_name)
249 all_event_list.append(event_info)
250
251def irq__irq_handler_exit(name, context, cpu, sec, nsec, pid, comm, irq, ret):
252 event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, irq, ret)
253 all_event_list.append(event_info)
254
255def napi__napi_poll(name, context, cpu, sec, nsec, pid, comm, napi, dev_name):
256 event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
257 napi, dev_name)
258 all_event_list.append(event_info)
259
260def net__netif_receive_skb(name, context, cpu, sec, nsec, pid, comm, skbaddr,
261 skblen, dev_name):
262 event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
263 skbaddr, skblen, dev_name)
264 all_event_list.append(event_info)
265
266def net__netif_rx(name, context, cpu, sec, nsec, pid, comm, skbaddr,
267 skblen, dev_name):
268 event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
269 skbaddr, skblen, dev_name)
270 all_event_list.append(event_info)
271
272def net__net_dev_queue(name, context, cpu, sec, nsec, pid, comm,
273 skbaddr, skblen, dev_name):
274 event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
275 skbaddr, skblen, dev_name)
276 all_event_list.append(event_info)
277
278def net__net_dev_xmit(name, context, cpu, sec, nsec, pid, comm,
279 skbaddr, skblen, rc, dev_name):
280 event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
281 skbaddr, skblen, rc ,dev_name)
282 all_event_list.append(event_info)
283
284def skb__kfree_skb(name, context, cpu, sec, nsec, pid, comm,
285 skbaddr, protocol, location):
286 event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
287 skbaddr, protocol, location)
288 all_event_list.append(event_info)
289
290def skb__consume_skb(name, context, cpu, sec, nsec, pid, comm, skbaddr):
291 event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
292 skbaddr)
293 all_event_list.append(event_info)
294
295def skb__skb_copy_datagram_iovec(name, context, cpu, sec, nsec, pid, comm,
296 skbaddr, skblen):
297 event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
298 skbaddr, skblen)
299 all_event_list.append(event_info)
300
301def handle_irq_handler_entry(event_info):
302 (name, context, cpu, time, pid, comm, irq, irq_name) = event_info
303 if cpu not in irq_dic.keys():
304 irq_dic[cpu] = []
305 irq_record = {'irq':irq, 'name':irq_name, 'cpu':cpu, 'irq_ent_t':time}
306 irq_dic[cpu].append(irq_record)
307
308def handle_irq_handler_exit(event_info):
309 (name, context, cpu, time, pid, comm, irq, ret) = event_info
310 if cpu not in irq_dic.keys():
311 return
312 irq_record = irq_dic[cpu].pop()
313 if irq != irq_record['irq']:
314 return
315 irq_record.update({'irq_ext_t':time})
316 # if an irq doesn't include NET_RX softirq, drop.
317 if 'event_list' in irq_record.keys():
318 irq_dic[cpu].append(irq_record)
319
320def handle_irq_softirq_raise(event_info):
321 (name, context, cpu, time, pid, comm, vec) = event_info
322 if cpu not in irq_dic.keys() \
323 or len(irq_dic[cpu]) == 0:
324 return
325 irq_record = irq_dic[cpu].pop()
326 if 'event_list' in irq_record.keys():
327 irq_event_list = irq_record['event_list']
328 else:
329 irq_event_list = []
330 irq_event_list.append({'time':time, 'event':'sirq_raise'})
331 irq_record.update({'event_list':irq_event_list})
332 irq_dic[cpu].append(irq_record)
333
334def handle_irq_softirq_entry(event_info):
335 (name, context, cpu, time, pid, comm, vec) = event_info
336 net_rx_dic[cpu] = {'sirq_ent_t':time, 'event_list':[]}
337
338def handle_irq_softirq_exit(event_info):
339 (name, context, cpu, time, pid, comm, vec) = event_info
340 irq_list = []
341 event_list = 0
342 if cpu in irq_dic.keys():
343 irq_list = irq_dic[cpu]
344 del irq_dic[cpu]
345 if cpu in net_rx_dic.keys():
346 sirq_ent_t = net_rx_dic[cpu]['sirq_ent_t']
347 event_list = net_rx_dic[cpu]['event_list']
348 del net_rx_dic[cpu]
349 if irq_list == [] or event_list == 0:
350 return
351 rec_data = {'sirq_ent_t':sirq_ent_t, 'sirq_ext_t':time,
352 'irq_list':irq_list, 'event_list':event_list}
353 # merge information realted to a NET_RX softirq
354 receive_hunk_list.append(rec_data)
355
356def handle_napi_poll(event_info):
357 (name, context, cpu, time, pid, comm, napi, dev_name) = event_info
358 if cpu in net_rx_dic.keys():
359 event_list = net_rx_dic[cpu]['event_list']
360 rec_data = {'event_name':'napi_poll',
361 'dev':dev_name, 'event_t':time}
362 event_list.append(rec_data)
363
364def handle_netif_rx(event_info):
365 (name, context, cpu, time, pid, comm,
366 skbaddr, skblen, dev_name) = event_info
367 if cpu not in irq_dic.keys() \
368 or len(irq_dic[cpu]) == 0:
369 return
370 irq_record = irq_dic[cpu].pop()
371 if 'event_list' in irq_record.keys():
372 irq_event_list = irq_record['event_list']
373 else:
374 irq_event_list = []
375 irq_event_list.append({'time':time, 'event':'netif_rx',
376 'skbaddr':skbaddr, 'skblen':skblen, 'dev_name':dev_name})
377 irq_record.update({'event_list':irq_event_list})
378 irq_dic[cpu].append(irq_record)
379
380def handle_netif_receive_skb(event_info):
381 global of_count_rx_skb_list
382
383 (name, context, cpu, time, pid, comm,
384 skbaddr, skblen, dev_name) = event_info
385 if cpu in net_rx_dic.keys():
386 rec_data = {'event_name':'netif_receive_skb',
387 'event_t':time, 'skbaddr':skbaddr, 'len':skblen}
388 event_list = net_rx_dic[cpu]['event_list']
389 event_list.append(rec_data)
390 rx_skb_list.insert(0, rec_data)
391 if len(rx_skb_list) > buffer_budget:
392 rx_skb_list.pop()
393 of_count_rx_skb_list += 1
394
395def handle_net_dev_queue(event_info):
396 global of_count_tx_queue_list
397
398 (name, context, cpu, time, pid, comm,
399 skbaddr, skblen, dev_name) = event_info
400 skb = {'dev':dev_name, 'skbaddr':skbaddr, 'len':skblen, 'queue_t':time}
401 tx_queue_list.insert(0, skb)
402 if len(tx_queue_list) > buffer_budget:
403 tx_queue_list.pop()
404 of_count_tx_queue_list += 1
405
406def handle_net_dev_xmit(event_info):
407 global of_count_tx_xmit_list
408
409 (name, context, cpu, time, pid, comm,
410 skbaddr, skblen, rc, dev_name) = event_info
411 if rc == 0: # NETDEV_TX_OK
412 for i in range(len(tx_queue_list)):
413 skb = tx_queue_list[i]
414 if skb['skbaddr'] == skbaddr:
415 skb['xmit_t'] = time
416 tx_xmit_list.insert(0, skb)
417 del tx_queue_list[i]
418 if len(tx_xmit_list) > buffer_budget:
419 tx_xmit_list.pop()
420 of_count_tx_xmit_list += 1
421 return
422
423def handle_kfree_skb(event_info):
424 (name, context, cpu, time, pid, comm,
425 skbaddr, protocol, location) = event_info
426 for i in range(len(tx_queue_list)):
427 skb = tx_queue_list[i]
428 if skb['skbaddr'] == skbaddr:
429 del tx_queue_list[i]
430 return
431 for i in range(len(tx_xmit_list)):
432 skb = tx_xmit_list[i]
433 if skb['skbaddr'] == skbaddr:
434 skb['free_t'] = time
435 tx_free_list.append(skb)
436 del tx_xmit_list[i]
437 return
438 for i in range(len(rx_skb_list)):
439 rec_data = rx_skb_list[i]
440 if rec_data['skbaddr'] == skbaddr:
441 rec_data.update({'handle':"kfree_skb",
442 'comm':comm, 'pid':pid, 'comm_t':time})
443 del rx_skb_list[i]
444 return
445
446def handle_consume_skb(event_info):
447 (name, context, cpu, time, pid, comm, skbaddr) = event_info
448 for i in range(len(tx_xmit_list)):
449 skb = tx_xmit_list[i]
450 if skb['skbaddr'] == skbaddr:
451 skb['free_t'] = time
452 tx_free_list.append(skb)
453 del tx_xmit_list[i]
454 return
455
456def handle_skb_copy_datagram_iovec(event_info):
457 (name, context, cpu, time, pid, comm, skbaddr, skblen) = event_info
458 for i in range(len(rx_skb_list)):
459 rec_data = rx_skb_list[i]
460 if skbaddr == rec_data['skbaddr']:
461 rec_data.update({'handle':"skb_copy_datagram_iovec",
462 'comm':comm, 'pid':pid, 'comm_t':time})
463 del rx_skb_list[i]
464 return
diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h
index 27e9ebe4076e..a7729797fd96 100644
--- a/tools/perf/util/cache.h
+++ b/tools/perf/util/cache.h
@@ -82,6 +82,8 @@ extern char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2
82extern char *perf_pathdup(const char *fmt, ...) 82extern char *perf_pathdup(const char *fmt, ...)
83 __attribute__((format (printf, 1, 2))); 83 __attribute__((format (printf, 1, 2)));
84 84
85#ifdef NO_STRLCPY
85extern size_t strlcpy(char *dest, const char *src, size_t size); 86extern size_t strlcpy(char *dest, const char *src, size_t size);
87#endif
86 88
87#endif /* __PERF_CACHE_H */ 89#endif /* __PERF_CACHE_H */
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index f231f43424d2..e12d539417b2 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -28,6 +28,9 @@ bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event)
28#define chain_for_each_child(child, parent) \ 28#define chain_for_each_child(child, parent) \
29 list_for_each_entry(child, &parent->children, brothers) 29 list_for_each_entry(child, &parent->children, brothers)
30 30
31#define chain_for_each_child_safe(child, next, parent) \
32 list_for_each_entry_safe(child, next, &parent->children, brothers)
33
31static void 34static void
32rb_insert_callchain(struct rb_root *root, struct callchain_node *chain, 35rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
33 enum chain_mode mode) 36 enum chain_mode mode)
@@ -86,10 +89,10 @@ __sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node,
86 * sort them by hit 89 * sort them by hit
87 */ 90 */
88static void 91static void
89sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node, 92sort_chain_flat(struct rb_root *rb_root, struct callchain_root *root,
90 u64 min_hit, struct callchain_param *param __used) 93 u64 min_hit, struct callchain_param *param __used)
91{ 94{
92 __sort_chain_flat(rb_root, node, min_hit); 95 __sort_chain_flat(rb_root, &root->node, min_hit);
93} 96}
94 97
95static void __sort_chain_graph_abs(struct callchain_node *node, 98static void __sort_chain_graph_abs(struct callchain_node *node,
@@ -108,11 +111,11 @@ static void __sort_chain_graph_abs(struct callchain_node *node,
108} 111}
109 112
110static void 113static void
111sort_chain_graph_abs(struct rb_root *rb_root, struct callchain_node *chain_root, 114sort_chain_graph_abs(struct rb_root *rb_root, struct callchain_root *chain_root,
112 u64 min_hit, struct callchain_param *param __used) 115 u64 min_hit, struct callchain_param *param __used)
113{ 116{
114 __sort_chain_graph_abs(chain_root, min_hit); 117 __sort_chain_graph_abs(&chain_root->node, min_hit);
115 rb_root->rb_node = chain_root->rb_root.rb_node; 118 rb_root->rb_node = chain_root->node.rb_root.rb_node;
116} 119}
117 120
118static void __sort_chain_graph_rel(struct callchain_node *node, 121static void __sort_chain_graph_rel(struct callchain_node *node,
@@ -133,11 +136,11 @@ static void __sort_chain_graph_rel(struct callchain_node *node,
133} 136}
134 137
135static void 138static void
136sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_node *chain_root, 139sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_root *chain_root,
137 u64 min_hit __used, struct callchain_param *param) 140 u64 min_hit __used, struct callchain_param *param)
138{ 141{
139 __sort_chain_graph_rel(chain_root, param->min_percent / 100.0); 142 __sort_chain_graph_rel(&chain_root->node, param->min_percent / 100.0);
140 rb_root->rb_node = chain_root->rb_root.rb_node; 143 rb_root->rb_node = chain_root->node.rb_root.rb_node;
141} 144}
142 145
143int register_callchain_param(struct callchain_param *param) 146int register_callchain_param(struct callchain_param *param)
@@ -284,19 +287,18 @@ split_add_child(struct callchain_node *parent, struct resolved_chain *chain,
284} 287}
285 288
286static int 289static int
287__append_chain(struct callchain_node *root, struct resolved_chain *chain, 290append_chain(struct callchain_node *root, struct resolved_chain *chain,
288 unsigned int start, u64 period); 291 unsigned int start, u64 period);
289 292
290static void 293static void
291__append_chain_children(struct callchain_node *root, 294append_chain_children(struct callchain_node *root, struct resolved_chain *chain,
292 struct resolved_chain *chain, 295 unsigned int start, u64 period)
293 unsigned int start, u64 period)
294{ 296{
295 struct callchain_node *rnode; 297 struct callchain_node *rnode;
296 298
297 /* lookup in childrens */ 299 /* lookup in childrens */
298 chain_for_each_child(rnode, root) { 300 chain_for_each_child(rnode, root) {
299 unsigned int ret = __append_chain(rnode, chain, start, period); 301 unsigned int ret = append_chain(rnode, chain, start, period);
300 302
301 if (!ret) 303 if (!ret)
302 goto inc_children_hit; 304 goto inc_children_hit;
@@ -309,8 +311,8 @@ inc_children_hit:
309} 311}
310 312
311static int 313static int
312__append_chain(struct callchain_node *root, struct resolved_chain *chain, 314append_chain(struct callchain_node *root, struct resolved_chain *chain,
313 unsigned int start, u64 period) 315 unsigned int start, u64 period)
314{ 316{
315 struct callchain_list *cnode; 317 struct callchain_list *cnode;
316 unsigned int i = start; 318 unsigned int i = start;
@@ -357,7 +359,7 @@ __append_chain(struct callchain_node *root, struct resolved_chain *chain,
357 } 359 }
358 360
359 /* We match the node and still have a part remaining */ 361 /* We match the node and still have a part remaining */
360 __append_chain_children(root, chain, i, period); 362 append_chain_children(root, chain, i, period);
361 363
362 return 0; 364 return 0;
363} 365}
@@ -380,8 +382,8 @@ static void filter_context(struct ip_callchain *old, struct resolved_chain *new,
380} 382}
381 383
382 384
383int append_chain(struct callchain_node *root, struct ip_callchain *chain, 385int callchain_append(struct callchain_root *root, struct ip_callchain *chain,
384 struct map_symbol *syms, u64 period) 386 struct map_symbol *syms, u64 period)
385{ 387{
386 struct resolved_chain *filtered; 388 struct resolved_chain *filtered;
387 389
@@ -398,9 +400,65 @@ int append_chain(struct callchain_node *root, struct ip_callchain *chain,
398 if (!filtered->nr) 400 if (!filtered->nr)
399 goto end; 401 goto end;
400 402
401 __append_chain_children(root, filtered, 0, period); 403 append_chain_children(&root->node, filtered, 0, period);
404
405 if (filtered->nr > root->max_depth)
406 root->max_depth = filtered->nr;
402end: 407end:
403 free(filtered); 408 free(filtered);
404 409
405 return 0; 410 return 0;
406} 411}
412
413static int
414merge_chain_branch(struct callchain_node *dst, struct callchain_node *src,
415 struct resolved_chain *chain)
416{
417 struct callchain_node *child, *next_child;
418 struct callchain_list *list, *next_list;
419 int old_pos = chain->nr;
420 int err = 0;
421
422 list_for_each_entry_safe(list, next_list, &src->val, list) {
423 chain->ips[chain->nr].ip = list->ip;
424 chain->ips[chain->nr].ms = list->ms;
425 chain->nr++;
426 list_del(&list->list);
427 free(list);
428 }
429
430 if (src->hit)
431 append_chain_children(dst, chain, 0, src->hit);
432
433 chain_for_each_child_safe(child, next_child, src) {
434 err = merge_chain_branch(dst, child, chain);
435 if (err)
436 break;
437
438 list_del(&child->brothers);
439 free(child);
440 }
441
442 chain->nr = old_pos;
443
444 return err;
445}
446
447int callchain_merge(struct callchain_root *dst, struct callchain_root *src)
448{
449 struct resolved_chain *chain;
450 int err;
451
452 chain = malloc(sizeof(*chain) +
453 src->max_depth * sizeof(struct resolved_ip));
454 if (!chain)
455 return -ENOMEM;
456
457 chain->nr = 0;
458
459 err = merge_chain_branch(&dst->node, &src->node, chain);
460
461 free(chain);
462
463 return err;
464}
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 6de4313924fb..c15fb8c24ad2 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -26,9 +26,14 @@ struct callchain_node {
26 u64 children_hit; 26 u64 children_hit;
27}; 27};
28 28
29struct callchain_root {
30 u64 max_depth;
31 struct callchain_node node;
32};
33
29struct callchain_param; 34struct callchain_param;
30 35
31typedef void (*sort_chain_func_t)(struct rb_root *, struct callchain_node *, 36typedef void (*sort_chain_func_t)(struct rb_root *, struct callchain_root *,
32 u64, struct callchain_param *); 37 u64, struct callchain_param *);
33 38
34struct callchain_param { 39struct callchain_param {
@@ -44,15 +49,16 @@ struct callchain_list {
44 struct list_head list; 49 struct list_head list;
45}; 50};
46 51
47static inline void callchain_init(struct callchain_node *node) 52static inline void callchain_init(struct callchain_root *root)
48{ 53{
49 INIT_LIST_HEAD(&node->brothers); 54 INIT_LIST_HEAD(&root->node.brothers);
50 INIT_LIST_HEAD(&node->children); 55 INIT_LIST_HEAD(&root->node.children);
51 INIT_LIST_HEAD(&node->val); 56 INIT_LIST_HEAD(&root->node.val);
52 57
53 node->children_hit = 0; 58 root->node.parent = NULL;
54 node->parent = NULL; 59 root->node.hit = 0;
55 node->hit = 0; 60 root->node.children_hit = 0;
61 root->max_depth = 0;
56} 62}
57 63
58static inline u64 cumul_hits(struct callchain_node *node) 64static inline u64 cumul_hits(struct callchain_node *node)
@@ -61,8 +67,9 @@ static inline u64 cumul_hits(struct callchain_node *node)
61} 67}
62 68
63int register_callchain_param(struct callchain_param *param); 69int register_callchain_param(struct callchain_param *param);
64int append_chain(struct callchain_node *root, struct ip_callchain *chain, 70int callchain_append(struct callchain_root *root, struct ip_callchain *chain,
65 struct map_symbol *syms, u64 period); 71 struct map_symbol *syms, u64 period);
72int callchain_merge(struct callchain_root *dst, struct callchain_root *src);
66 73
67bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event); 74bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event);
68#endif /* __PERF_CALLCHAIN_H */ 75#endif /* __PERF_CALLCHAIN_H */
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index be22ae6ef055..2022e8740994 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -87,7 +87,7 @@ static void hist_entry__add_cpumode_period(struct hist_entry *self,
87 87
88static struct hist_entry *hist_entry__new(struct hist_entry *template) 88static struct hist_entry *hist_entry__new(struct hist_entry *template)
89{ 89{
90 size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_node) : 0; 90 size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_root) : 0;
91 struct hist_entry *self = malloc(sizeof(*self) + callchain_size); 91 struct hist_entry *self = malloc(sizeof(*self) + callchain_size);
92 92
93 if (self != NULL) { 93 if (self != NULL) {
@@ -226,6 +226,8 @@ static bool collapse__insert_entry(struct rb_root *root, struct hist_entry *he)
226 226
227 if (!cmp) { 227 if (!cmp) {
228 iter->period += he->period; 228 iter->period += he->period;
229 if (symbol_conf.use_callchain)
230 callchain_merge(iter->callchain, he->callchain);
229 hist_entry__free(he); 231 hist_entry__free(he);
230 return false; 232 return false;
231 } 233 }
diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c
index 58a470d036dd..bd7497711424 100644
--- a/tools/perf/util/path.c
+++ b/tools/perf/util/path.c
@@ -22,6 +22,7 @@ static const char *get_perf_dir(void)
22 return "."; 22 return ".";
23} 23}
24 24
25#ifdef NO_STRLCPY
25size_t strlcpy(char *dest, const char *src, size_t size) 26size_t strlcpy(char *dest, const char *src, size_t size)
26{ 27{
27 size_t ret = strlen(src); 28 size_t ret = strlen(src);
@@ -33,7 +34,7 @@ size_t strlcpy(char *dest, const char *src, size_t size)
33 } 34 }
34 return ret; 35 return ret;
35} 36}
36 37#endif
37 38
38static char *get_pathname(void) 39static char *get_pathname(void)
39{ 40{
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 46e531d09e8b..0b91053a7d11 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -70,7 +70,7 @@ struct hist_entry {
70 struct hist_entry *pair; 70 struct hist_entry *pair;
71 struct rb_root sorted_chain; 71 struct rb_root sorted_chain;
72 }; 72 };
73 struct callchain_node callchain[0]; 73 struct callchain_root callchain[0];
74}; 74};
75 75
76enum sort_type { 76enum sort_type {
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index b2f5ae97f33d..b39f499e575a 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -388,6 +388,20 @@ size_t dso__fprintf_buildid(struct dso *self, FILE *fp)
388 return fprintf(fp, "%s", sbuild_id); 388 return fprintf(fp, "%s", sbuild_id);
389} 389}
390 390
391size_t dso__fprintf_symbols_by_name(struct dso *self, enum map_type type, FILE *fp)
392{
393 size_t ret = 0;
394 struct rb_node *nd;
395 struct symbol_name_rb_node *pos;
396
397 for (nd = rb_first(&self->symbol_names[type]); nd; nd = rb_next(nd)) {
398 pos = rb_entry(nd, struct symbol_name_rb_node, rb_node);
399 fprintf(fp, "%s\n", pos->sym.name);
400 }
401
402 return ret;
403}
404
391size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp) 405size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp)
392{ 406{
393 struct rb_node *nd; 407 struct rb_node *nd;
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index ea95c2756f05..038f2201ee09 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -182,6 +182,7 @@ size_t machines__fprintf_dsos(struct rb_root *self, FILE *fp);
182size_t machines__fprintf_dsos_buildid(struct rb_root *self, FILE *fp, bool with_hits); 182size_t machines__fprintf_dsos_buildid(struct rb_root *self, FILE *fp, bool with_hits);
183 183
184size_t dso__fprintf_buildid(struct dso *self, FILE *fp); 184size_t dso__fprintf_buildid(struct dso *self, FILE *fp);
185size_t dso__fprintf_symbols_by_name(struct dso *self, enum map_type type, FILE *fp);
185size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp); 186size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp);
186 187
187enum dso_origin { 188enum dso_origin {
diff --git a/tools/perf/util/ui/browser.c b/tools/perf/util/ui/browser.c
index 66f2d583d8c4..6d0df809a2ed 100644
--- a/tools/perf/util/ui/browser.c
+++ b/tools/perf/util/ui/browser.c
@@ -1,16 +1,6 @@
1#define _GNU_SOURCE
2#include <stdio.h>
3#undef _GNU_SOURCE
4/*
5 * slang versions <= 2.0.6 have a "#if HAVE_LONG_LONG" that breaks
6 * the build if it isn't defined. Use the equivalent one that glibc
7 * has on features.h.
8 */
9#include <features.h>
10#ifndef HAVE_LONG_LONG
11#define HAVE_LONG_LONG __GLIBC_HAVE_LONG_LONG
12#endif
13#include <slang.h> 1#include <slang.h>
2#include "libslang.h"
3#include <linux/compiler.h>
14#include <linux/list.h> 4#include <linux/list.h>
15#include <linux/rbtree.h> 5#include <linux/rbtree.h>
16#include <stdlib.h> 6#include <stdlib.h>
@@ -19,17 +9,9 @@
19#include "helpline.h" 9#include "helpline.h"
20#include "../color.h" 10#include "../color.h"
21#include "../util.h" 11#include "../util.h"
12#include <stdio.h>
22 13
23#if SLANG_VERSION < 20104 14static int ui_browser__percent_color(double percent, bool current)
24#define sltt_set_color(obj, name, fg, bg) \
25 SLtt_set_color(obj,(char *)name, (char *)fg, (char *)bg)
26#else
27#define sltt_set_color SLtt_set_color
28#endif
29
30newtComponent newt_form__new(void);
31
32int ui_browser__percent_color(double percent, bool current)
33{ 15{
34 if (current) 16 if (current)
35 return HE_COLORSET_SELECTED; 17 return HE_COLORSET_SELECTED;
@@ -40,6 +22,23 @@ int ui_browser__percent_color(double percent, bool current)
40 return HE_COLORSET_NORMAL; 22 return HE_COLORSET_NORMAL;
41} 23}
42 24
25void ui_browser__set_color(struct ui_browser *self __used, int color)
26{
27 SLsmg_set_color(color);
28}
29
30void ui_browser__set_percent_color(struct ui_browser *self,
31 double percent, bool current)
32{
33 int color = ui_browser__percent_color(percent, current);
34 ui_browser__set_color(self, color);
35}
36
37void ui_browser__gotorc(struct ui_browser *self, int y, int x)
38{
39 SLsmg_gotorc(self->y + y, self->x + x);
40}
41
43void ui_browser__list_head_seek(struct ui_browser *self, off_t offset, int whence) 42void ui_browser__list_head_seek(struct ui_browser *self, off_t offset, int whence)
44{ 43{
45 struct list_head *head = self->entries; 44 struct list_head *head = self->entries;
@@ -111,7 +110,7 @@ unsigned int ui_browser__rb_tree_refresh(struct ui_browser *self)
111 nd = self->top; 110 nd = self->top;
112 111
113 while (nd != NULL) { 112 while (nd != NULL) {
114 SLsmg_gotorc(self->y + row, self->x); 113 ui_browser__gotorc(self, row, 0);
115 self->write(self, nd, row); 114 self->write(self, nd, row);
116 if (++row == self->height) 115 if (++row == self->height)
117 break; 116 break;
@@ -131,13 +130,10 @@ void ui_browser__refresh_dimensions(struct ui_browser *self)
131 int cols, rows; 130 int cols, rows;
132 newtGetScreenSize(&cols, &rows); 131 newtGetScreenSize(&cols, &rows);
133 132
134 if (self->width > cols - 4) 133 self->width = cols - 1;
135 self->width = cols - 4; 134 self->height = rows - 2;
136 self->height = rows - 5; 135 self->y = 1;
137 if (self->height > self->nr_entries) 136 self->x = 0;
138 self->height = self->nr_entries;
139 self->y = (rows - self->height) / 2;
140 self->x = (cols - self->width) / 2;
141} 137}
142 138
143void ui_browser__reset_index(struct ui_browser *self) 139void ui_browser__reset_index(struct ui_browser *self)
@@ -146,34 +142,48 @@ void ui_browser__reset_index(struct ui_browser *self)
146 self->seek(self, 0, SEEK_SET); 142 self->seek(self, 0, SEEK_SET);
147} 143}
148 144
145void ui_browser__add_exit_key(struct ui_browser *self, int key)
146{
147 newtFormAddHotKey(self->form, key);
148}
149
150void ui_browser__add_exit_keys(struct ui_browser *self, int keys[])
151{
152 int i = 0;
153
154 while (keys[i] && i < 64) {
155 ui_browser__add_exit_key(self, keys[i]);
156 ++i;
157 }
158}
159
149int ui_browser__show(struct ui_browser *self, const char *title, 160int ui_browser__show(struct ui_browser *self, const char *title,
150 const char *helpline, ...) 161 const char *helpline, ...)
151{ 162{
152 va_list ap; 163 va_list ap;
164 int keys[] = { NEWT_KEY_UP, NEWT_KEY_DOWN, NEWT_KEY_PGUP,
165 NEWT_KEY_PGDN, NEWT_KEY_HOME, NEWT_KEY_END, ' ',
166 NEWT_KEY_LEFT, NEWT_KEY_ESCAPE, 'q', CTRL('c'), 0 };
153 167
154 if (self->form != NULL) { 168 if (self->form != NULL)
155 newtFormDestroy(self->form); 169 newtFormDestroy(self->form);
156 newtPopWindow(); 170
157 }
158 ui_browser__refresh_dimensions(self); 171 ui_browser__refresh_dimensions(self);
159 newtCenteredWindow(self->width, self->height, title); 172 self->form = newtForm(NULL, NULL, 0);
160 self->form = newt_form__new();
161 if (self->form == NULL) 173 if (self->form == NULL)
162 return -1; 174 return -1;
163 175
164 self->sb = newtVerticalScrollbar(self->width, 0, self->height, 176 self->sb = newtVerticalScrollbar(self->width, 1, self->height,
165 HE_COLORSET_NORMAL, 177 HE_COLORSET_NORMAL,
166 HE_COLORSET_SELECTED); 178 HE_COLORSET_SELECTED);
167 if (self->sb == NULL) 179 if (self->sb == NULL)
168 return -1; 180 return -1;
169 181
170 newtFormAddHotKey(self->form, NEWT_KEY_UP); 182 SLsmg_gotorc(0, 0);
171 newtFormAddHotKey(self->form, NEWT_KEY_DOWN); 183 ui_browser__set_color(self, NEWT_COLORSET_ROOT);
172 newtFormAddHotKey(self->form, NEWT_KEY_PGUP); 184 slsmg_write_nstring(title, self->width);
173 newtFormAddHotKey(self->form, NEWT_KEY_PGDN); 185
174 newtFormAddHotKey(self->form, NEWT_KEY_HOME); 186 ui_browser__add_exit_keys(self, keys);
175 newtFormAddHotKey(self->form, NEWT_KEY_END);
176 newtFormAddHotKey(self->form, ' ');
177 newtFormAddComponent(self->form, self->sb); 187 newtFormAddComponent(self->form, self->sb);
178 188
179 va_start(ap, helpline); 189 va_start(ap, helpline);
@@ -185,7 +195,6 @@ int ui_browser__show(struct ui_browser *self, const char *title,
185void ui_browser__hide(struct ui_browser *self) 195void ui_browser__hide(struct ui_browser *self)
186{ 196{
187 newtFormDestroy(self->form); 197 newtFormDestroy(self->form);
188 newtPopWindow();
189 self->form = NULL; 198 self->form = NULL;
190 ui_helpline__pop(); 199 ui_helpline__pop();
191} 200}
@@ -196,28 +205,28 @@ int ui_browser__refresh(struct ui_browser *self)
196 205
197 newtScrollbarSet(self->sb, self->index, self->nr_entries - 1); 206 newtScrollbarSet(self->sb, self->index, self->nr_entries - 1);
198 row = self->refresh(self); 207 row = self->refresh(self);
199 SLsmg_set_color(HE_COLORSET_NORMAL); 208 ui_browser__set_color(self, HE_COLORSET_NORMAL);
200 SLsmg_fill_region(self->y + row, self->x, 209 SLsmg_fill_region(self->y + row, self->x,
201 self->height - row, self->width, ' '); 210 self->height - row, self->width, ' ');
202 211
203 return 0; 212 return 0;
204} 213}
205 214
206int ui_browser__run(struct ui_browser *self, struct newtExitStruct *es) 215int ui_browser__run(struct ui_browser *self)
207{ 216{
217 struct newtExitStruct es;
218
208 if (ui_browser__refresh(self) < 0) 219 if (ui_browser__refresh(self) < 0)
209 return -1; 220 return -1;
210 221
211 while (1) { 222 while (1) {
212 off_t offset; 223 off_t offset;
213 224
214 newtFormRun(self->form, es); 225 newtFormRun(self->form, &es);
215 226
216 if (es->reason != NEWT_EXIT_HOTKEY) 227 if (es.reason != NEWT_EXIT_HOTKEY)
217 break; 228 break;
218 if (is_exit_key(es->u.key)) 229 switch (es.u.key) {
219 return es->u.key;
220 switch (es->u.key) {
221 case NEWT_KEY_DOWN: 230 case NEWT_KEY_DOWN:
222 if (self->index == self->nr_entries - 1) 231 if (self->index == self->nr_entries - 1)
223 break; 232 break;
@@ -274,12 +283,12 @@ int ui_browser__run(struct ui_browser *self, struct newtExitStruct *es)
274 self->seek(self, -offset, SEEK_END); 283 self->seek(self, -offset, SEEK_END);
275 break; 284 break;
276 default: 285 default:
277 return es->u.key; 286 return es.u.key;
278 } 287 }
279 if (ui_browser__refresh(self) < 0) 288 if (ui_browser__refresh(self) < 0)
280 return -1; 289 return -1;
281 } 290 }
282 return 0; 291 return -1;
283} 292}
284 293
285unsigned int ui_browser__list_head_refresh(struct ui_browser *self) 294unsigned int ui_browser__list_head_refresh(struct ui_browser *self)
@@ -294,7 +303,7 @@ unsigned int ui_browser__list_head_refresh(struct ui_browser *self)
294 pos = self->top; 303 pos = self->top;
295 304
296 list_for_each_from(pos, head) { 305 list_for_each_from(pos, head) {
297 SLsmg_gotorc(self->y + row, self->x); 306 ui_browser__gotorc(self, row, 0);
298 self->write(self, pos, row); 307 self->write(self, pos, row);
299 if (++row == self->height) 308 if (++row == self->height)
300 break; 309 break;
diff --git a/tools/perf/util/ui/browser.h b/tools/perf/util/ui/browser.h
index 0b9f829214f7..0dc7e4da36f5 100644
--- a/tools/perf/util/ui/browser.h
+++ b/tools/perf/util/ui/browser.h
@@ -25,16 +25,21 @@ struct ui_browser {
25}; 25};
26 26
27 27
28int ui_browser__percent_color(double percent, bool current); 28void ui_browser__set_color(struct ui_browser *self, int color);
29void ui_browser__set_percent_color(struct ui_browser *self,
30 double percent, bool current);
29bool ui_browser__is_current_entry(struct ui_browser *self, unsigned row); 31bool ui_browser__is_current_entry(struct ui_browser *self, unsigned row);
30void ui_browser__refresh_dimensions(struct ui_browser *self); 32void ui_browser__refresh_dimensions(struct ui_browser *self);
31void ui_browser__reset_index(struct ui_browser *self); 33void ui_browser__reset_index(struct ui_browser *self);
32 34
35void ui_browser__gotorc(struct ui_browser *self, int y, int x);
36void ui_browser__add_exit_key(struct ui_browser *self, int key);
37void ui_browser__add_exit_keys(struct ui_browser *self, int keys[]);
33int ui_browser__show(struct ui_browser *self, const char *title, 38int ui_browser__show(struct ui_browser *self, const char *title,
34 const char *helpline, ...); 39 const char *helpline, ...);
35void ui_browser__hide(struct ui_browser *self); 40void ui_browser__hide(struct ui_browser *self);
36int ui_browser__refresh(struct ui_browser *self); 41int ui_browser__refresh(struct ui_browser *self);
37int ui_browser__run(struct ui_browser *self, struct newtExitStruct *es); 42int ui_browser__run(struct ui_browser *self);
38 43
39void ui_browser__rb_tree_seek(struct ui_browser *self, off_t offset, int whence); 44void ui_browser__rb_tree_seek(struct ui_browser *self, off_t offset, int whence);
40unsigned int ui_browser__rb_tree_refresh(struct ui_browser *self); 45unsigned int ui_browser__rb_tree_refresh(struct ui_browser *self);
diff --git a/tools/perf/util/ui/browsers/annotate.c b/tools/perf/util/ui/browsers/annotate.c
index a90273e63f4f..82b78f99251b 100644
--- a/tools/perf/util/ui/browsers/annotate.c
+++ b/tools/perf/util/ui/browsers/annotate.c
@@ -40,14 +40,12 @@ static void annotate_browser__write(struct ui_browser *self, void *entry, int ro
40 40
41 if (ol->offset != -1) { 41 if (ol->offset != -1) {
42 struct objdump_line_rb_node *olrb = objdump_line__rb(ol); 42 struct objdump_line_rb_node *olrb = objdump_line__rb(ol);
43 int color = ui_browser__percent_color(olrb->percent, current_entry); 43 ui_browser__set_percent_color(self, olrb->percent, current_entry);
44 SLsmg_set_color(color);
45 slsmg_printf(" %7.2f ", olrb->percent); 44 slsmg_printf(" %7.2f ", olrb->percent);
46 if (!current_entry) 45 if (!current_entry)
47 SLsmg_set_color(HE_COLORSET_CODE); 46 ui_browser__set_color(self, HE_COLORSET_CODE);
48 } else { 47 } else {
49 int color = ui_browser__percent_color(0, current_entry); 48 ui_browser__set_percent_color(self, 0, current_entry);
50 SLsmg_set_color(color);
51 slsmg_write_nstring(" ", 9); 49 slsmg_write_nstring(" ", 9);
52 } 50 }
53 51
@@ -135,32 +133,31 @@ static void annotate_browser__set_top(struct annotate_browser *self,
135 self->curr_hot = nd; 133 self->curr_hot = nd;
136} 134}
137 135
138static int annotate_browser__run(struct annotate_browser *self, 136static int annotate_browser__run(struct annotate_browser *self)
139 struct newtExitStruct *es)
140{ 137{
141 struct rb_node *nd; 138 struct rb_node *nd;
142 struct hist_entry *he = self->b.priv; 139 struct hist_entry *he = self->b.priv;
140 int key;
143 141
144 if (ui_browser__show(&self->b, he->ms.sym->name, 142 if (ui_browser__show(&self->b, he->ms.sym->name,
145 "<- or ESC: exit, TAB/shift+TAB: cycle thru samples") < 0) 143 "<-, -> or ESC: exit, TAB/shift+TAB: cycle thru samples") < 0)
146 return -1; 144 return -1;
147 145 /*
148 newtFormAddHotKey(self->b.form, NEWT_KEY_LEFT); 146 * To allow builtin-annotate to cycle thru multiple symbols by
149 newtFormAddHotKey(self->b.form, NEWT_KEY_RIGHT); 147 * examining the exit key for this function.
148 */
149 ui_browser__add_exit_key(&self->b, NEWT_KEY_RIGHT);
150 150
151 nd = self->curr_hot; 151 nd = self->curr_hot;
152 if (nd) { 152 if (nd) {
153 newtFormAddHotKey(self->b.form, NEWT_KEY_TAB); 153 int tabs[] = { NEWT_KEY_TAB, NEWT_KEY_UNTAB, 0 };
154 newtFormAddHotKey(self->b.form, NEWT_KEY_UNTAB); 154 ui_browser__add_exit_keys(&self->b, tabs);
155 } 155 }
156 156
157 while (1) { 157 while (1) {
158 ui_browser__run(&self->b, es); 158 key = ui_browser__run(&self->b);
159
160 if (es->reason != NEWT_EXIT_HOTKEY)
161 break;
162 159
163 switch (es->u.key) { 160 switch (key) {
164 case NEWT_KEY_TAB: 161 case NEWT_KEY_TAB:
165 nd = rb_prev(nd); 162 nd = rb_prev(nd);
166 if (nd == NULL) 163 if (nd == NULL)
@@ -179,12 +176,11 @@ static int annotate_browser__run(struct annotate_browser *self,
179 } 176 }
180out: 177out:
181 ui_browser__hide(&self->b); 178 ui_browser__hide(&self->b);
182 return es->u.key; 179 return key;
183} 180}
184 181
185int hist_entry__tui_annotate(struct hist_entry *self) 182int hist_entry__tui_annotate(struct hist_entry *self)
186{ 183{
187 struct newtExitStruct es;
188 struct objdump_line *pos, *n; 184 struct objdump_line *pos, *n;
189 struct objdump_line_rb_node *rbpos; 185 struct objdump_line_rb_node *rbpos;
190 LIST_HEAD(head); 186 LIST_HEAD(head);
@@ -232,7 +228,7 @@ int hist_entry__tui_annotate(struct hist_entry *self)
232 annotate_browser__set_top(&browser, browser.curr_hot); 228 annotate_browser__set_top(&browser, browser.curr_hot);
233 229
234 browser.b.width += 18; /* Percentage */ 230 browser.b.width += 18; /* Percentage */
235 ret = annotate_browser__run(&browser, &es); 231 ret = annotate_browser__run(&browser);
236 list_for_each_entry_safe(pos, n, &head, node) { 232 list_for_each_entry_safe(pos, n, &head, node) {
237 list_del(&pos->node); 233 list_del(&pos->node);
238 objdump_line__free(pos); 234 objdump_line__free(pos);
diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c
index 6866aa4c41e0..ebda8c3fde9e 100644
--- a/tools/perf/util/ui/browsers/hists.c
+++ b/tools/perf/util/ui/browsers/hists.c
@@ -58,6 +58,11 @@ static char callchain_list__folded(const struct callchain_list *self)
58 return map_symbol__folded(&self->ms); 58 return map_symbol__folded(&self->ms);
59} 59}
60 60
61static void map_symbol__set_folding(struct map_symbol *self, bool unfold)
62{
63 self->unfolded = unfold ? self->has_children : false;
64}
65
61static int callchain_node__count_rows_rb_tree(struct callchain_node *self) 66static int callchain_node__count_rows_rb_tree(struct callchain_node *self)
62{ 67{
63 int n = 0; 68 int n = 0;
@@ -129,16 +134,16 @@ static void callchain_node__init_have_children_rb_tree(struct callchain_node *se
129 for (nd = rb_first(&self->rb_root); nd; nd = rb_next(nd)) { 134 for (nd = rb_first(&self->rb_root); nd; nd = rb_next(nd)) {
130 struct callchain_node *child = rb_entry(nd, struct callchain_node, rb_node); 135 struct callchain_node *child = rb_entry(nd, struct callchain_node, rb_node);
131 struct callchain_list *chain; 136 struct callchain_list *chain;
132 int first = true; 137 bool first = true;
133 138
134 list_for_each_entry(chain, &child->val, list) { 139 list_for_each_entry(chain, &child->val, list) {
135 if (first) { 140 if (first) {
136 first = false; 141 first = false;
137 chain->ms.has_children = chain->list.next != &child->val || 142 chain->ms.has_children = chain->list.next != &child->val ||
138 rb_first(&child->rb_root) != NULL; 143 !RB_EMPTY_ROOT(&child->rb_root);
139 } else 144 } else
140 chain->ms.has_children = chain->list.next == &child->val && 145 chain->ms.has_children = chain->list.next == &child->val &&
141 rb_first(&child->rb_root) != NULL; 146 !RB_EMPTY_ROOT(&child->rb_root);
142 } 147 }
143 148
144 callchain_node__init_have_children_rb_tree(child); 149 callchain_node__init_have_children_rb_tree(child);
@@ -150,7 +155,7 @@ static void callchain_node__init_have_children(struct callchain_node *self)
150 struct callchain_list *chain; 155 struct callchain_list *chain;
151 156
152 list_for_each_entry(chain, &self->val, list) 157 list_for_each_entry(chain, &self->val, list)
153 chain->ms.has_children = rb_first(&self->rb_root) != NULL; 158 chain->ms.has_children = !RB_EMPTY_ROOT(&self->rb_root);
154 159
155 callchain_node__init_have_children_rb_tree(self); 160 callchain_node__init_have_children_rb_tree(self);
156} 161}
@@ -168,6 +173,7 @@ static void callchain__init_have_children(struct rb_root *self)
168static void hist_entry__init_have_children(struct hist_entry *self) 173static void hist_entry__init_have_children(struct hist_entry *self)
169{ 174{
170 if (!self->init_have_children) { 175 if (!self->init_have_children) {
176 self->ms.has_children = !RB_EMPTY_ROOT(&self->sorted_chain);
171 callchain__init_have_children(&self->sorted_chain); 177 callchain__init_have_children(&self->sorted_chain);
172 self->init_have_children = true; 178 self->init_have_children = true;
173 } 179 }
@@ -195,43 +201,114 @@ static bool hist_browser__toggle_fold(struct hist_browser *self)
195 return false; 201 return false;
196} 202}
197 203
198static int hist_browser__run(struct hist_browser *self, const char *title, 204static int callchain_node__set_folding_rb_tree(struct callchain_node *self, bool unfold)
199 struct newtExitStruct *es) 205{
206 int n = 0;
207 struct rb_node *nd;
208
209 for (nd = rb_first(&self->rb_root); nd; nd = rb_next(nd)) {
210 struct callchain_node *child = rb_entry(nd, struct callchain_node, rb_node);
211 struct callchain_list *chain;
212 bool has_children = false;
213
214 list_for_each_entry(chain, &child->val, list) {
215 ++n;
216 map_symbol__set_folding(&chain->ms, unfold);
217 has_children = chain->ms.has_children;
218 }
219
220 if (has_children)
221 n += callchain_node__set_folding_rb_tree(child, unfold);
222 }
223
224 return n;
225}
226
227static int callchain_node__set_folding(struct callchain_node *node, bool unfold)
228{
229 struct callchain_list *chain;
230 bool has_children = false;
231 int n = 0;
232
233 list_for_each_entry(chain, &node->val, list) {
234 ++n;
235 map_symbol__set_folding(&chain->ms, unfold);
236 has_children = chain->ms.has_children;
237 }
238
239 if (has_children)
240 n += callchain_node__set_folding_rb_tree(node, unfold);
241
242 return n;
243}
244
245static int callchain__set_folding(struct rb_root *chain, bool unfold)
246{
247 struct rb_node *nd;
248 int n = 0;
249
250 for (nd = rb_first(chain); nd; nd = rb_next(nd)) {
251 struct callchain_node *node = rb_entry(nd, struct callchain_node, rb_node);
252 n += callchain_node__set_folding(node, unfold);
253 }
254
255 return n;
256}
257
258static void hist_entry__set_folding(struct hist_entry *self, bool unfold)
259{
260 hist_entry__init_have_children(self);
261 map_symbol__set_folding(&self->ms, unfold);
262
263 if (self->ms.has_children) {
264 int n = callchain__set_folding(&self->sorted_chain, unfold);
265 self->nr_rows = unfold ? n : 0;
266 } else
267 self->nr_rows = 0;
268}
269
270static void hists__set_folding(struct hists *self, bool unfold)
271{
272 struct rb_node *nd;
273
274 self->nr_entries = 0;
275
276 for (nd = rb_first(&self->entries); nd; nd = rb_next(nd)) {
277 struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
278 hist_entry__set_folding(he, unfold);
279 self->nr_entries += 1 + he->nr_rows;
280 }
281}
282
283static void hist_browser__set_folding(struct hist_browser *self, bool unfold)
284{
285 hists__set_folding(self->hists, unfold);
286 self->b.nr_entries = self->hists->nr_entries;
287 /* Go to the start, we may be way after valid entries after a collapse */
288 ui_browser__reset_index(&self->b);
289}
290
291static int hist_browser__run(struct hist_browser *self, const char *title)
200{ 292{
201 char str[256], unit; 293 int key;
202 unsigned long nr_events = self->hists->stats.nr_events[PERF_RECORD_SAMPLE]; 294 int exit_keys[] = { 'a', '?', 'h', 'C', 'd', 'D', 'E', 't',
295 NEWT_KEY_ENTER, NEWT_KEY_RIGHT, NEWT_KEY_LEFT, 0, };
203 296
204 self->b.entries = &self->hists->entries; 297 self->b.entries = &self->hists->entries;
205 self->b.nr_entries = self->hists->nr_entries; 298 self->b.nr_entries = self->hists->nr_entries;
206 299
207 hist_browser__refresh_dimensions(self); 300 hist_browser__refresh_dimensions(self);
208 301
209 nr_events = convert_unit(nr_events, &unit);
210 snprintf(str, sizeof(str), "Events: %lu%c ",
211 nr_events, unit);
212 newtDrawRootText(0, 0, str);
213
214 if (ui_browser__show(&self->b, title, 302 if (ui_browser__show(&self->b, title,
215 "Press '?' for help on key bindings") < 0) 303 "Press '?' for help on key bindings") < 0)
216 return -1; 304 return -1;
217 305
218 newtFormAddHotKey(self->b.form, 'a'); 306 ui_browser__add_exit_keys(&self->b, exit_keys);
219 newtFormAddHotKey(self->b.form, '?');
220 newtFormAddHotKey(self->b.form, 'h');
221 newtFormAddHotKey(self->b.form, 'd');
222 newtFormAddHotKey(self->b.form, 'D');
223 newtFormAddHotKey(self->b.form, 't');
224
225 newtFormAddHotKey(self->b.form, NEWT_KEY_LEFT);
226 newtFormAddHotKey(self->b.form, NEWT_KEY_RIGHT);
227 newtFormAddHotKey(self->b.form, NEWT_KEY_ENTER);
228 307
229 while (1) { 308 while (1) {
230 ui_browser__run(&self->b, es); 309 key = ui_browser__run(&self->b);
231 310
232 if (es->reason != NEWT_EXIT_HOTKEY) 311 switch (key) {
233 break;
234 switch (es->u.key) {
235 case 'D': { /* Debug */ 312 case 'D': { /* Debug */
236 static int seq; 313 static int seq;
237 struct hist_entry *h = rb_entry(self->b.top, 314 struct hist_entry *h = rb_entry(self->b.top,
@@ -245,18 +322,26 @@ static int hist_browser__run(struct hist_browser *self, const char *title,
245 self->b.top_idx, 322 self->b.top_idx,
246 h->row_offset, h->nr_rows); 323 h->row_offset, h->nr_rows);
247 } 324 }
248 continue; 325 break;
326 case 'C':
327 /* Collapse the whole world. */
328 hist_browser__set_folding(self, false);
329 break;
330 case 'E':
331 /* Expand the whole world. */
332 hist_browser__set_folding(self, true);
333 break;
249 case NEWT_KEY_ENTER: 334 case NEWT_KEY_ENTER:
250 if (hist_browser__toggle_fold(self)) 335 if (hist_browser__toggle_fold(self))
251 break; 336 break;
252 /* fall thru */ 337 /* fall thru */
253 default: 338 default:
254 return 0; 339 goto out;
255 } 340 }
256 } 341 }
257 342out:
258 ui_browser__hide(&self->b); 343 ui_browser__hide(&self->b);
259 return 0; 344 return key;
260} 345}
261 346
262static char *callchain_list__sym_name(struct callchain_list *self, 347static char *callchain_list__sym_name(struct callchain_list *self,
@@ -306,15 +391,10 @@ static int hist_browser__show_callchain_node_rb_tree(struct hist_browser *self,
306 int color; 391 int color;
307 bool was_first = first; 392 bool was_first = first;
308 393
309 if (first) { 394 if (first)
310 first = false; 395 first = false;
311 chain->ms.has_children = chain->list.next != &child->val || 396 else
312 rb_first(&child->rb_root) != NULL;
313 } else {
314 extra_offset = LEVEL_OFFSET_STEP; 397 extra_offset = LEVEL_OFFSET_STEP;
315 chain->ms.has_children = chain->list.next == &child->val &&
316 rb_first(&child->rb_root) != NULL;
317 }
318 398
319 folded_sign = callchain_list__folded(chain); 399 folded_sign = callchain_list__folded(chain);
320 if (*row_offset != 0) { 400 if (*row_offset != 0) {
@@ -341,8 +421,8 @@ static int hist_browser__show_callchain_node_rb_tree(struct hist_browser *self,
341 *is_current_entry = true; 421 *is_current_entry = true;
342 } 422 }
343 423
344 SLsmg_set_color(color); 424 ui_browser__set_color(&self->b, color);
345 SLsmg_gotorc(self->b.y + row, self->b.x); 425 ui_browser__gotorc(&self->b, row, 0);
346 slsmg_write_nstring(" ", offset + extra_offset); 426 slsmg_write_nstring(" ", offset + extra_offset);
347 slsmg_printf("%c ", folded_sign); 427 slsmg_printf("%c ", folded_sign);
348 slsmg_write_nstring(str, width); 428 slsmg_write_nstring(str, width);
@@ -384,12 +464,7 @@ static int hist_browser__show_callchain_node(struct hist_browser *self,
384 list_for_each_entry(chain, &node->val, list) { 464 list_for_each_entry(chain, &node->val, list) {
385 char ipstr[BITS_PER_LONG / 4 + 1], *s; 465 char ipstr[BITS_PER_LONG / 4 + 1], *s;
386 int color; 466 int color;
387 /* 467
388 * FIXME: This should be moved to somewhere else,
389 * probably when the callchain is created, so as not to
390 * traverse it all over again
391 */
392 chain->ms.has_children = rb_first(&node->rb_root) != NULL;
393 folded_sign = callchain_list__folded(chain); 468 folded_sign = callchain_list__folded(chain);
394 469
395 if (*row_offset != 0) { 470 if (*row_offset != 0) {
@@ -405,8 +480,8 @@ static int hist_browser__show_callchain_node(struct hist_browser *self,
405 } 480 }
406 481
407 s = callchain_list__sym_name(chain, ipstr, sizeof(ipstr)); 482 s = callchain_list__sym_name(chain, ipstr, sizeof(ipstr));
408 SLsmg_gotorc(self->b.y + row, self->b.x); 483 ui_browser__gotorc(&self->b, row, 0);
409 SLsmg_set_color(color); 484 ui_browser__set_color(&self->b, color);
410 slsmg_write_nstring(" ", offset); 485 slsmg_write_nstring(" ", offset);
411 slsmg_printf("%c ", folded_sign); 486 slsmg_printf("%c ", folded_sign);
412 slsmg_write_nstring(s, width - 2); 487 slsmg_write_nstring(s, width - 2);
@@ -465,7 +540,7 @@ static int hist_browser__show_entry(struct hist_browser *self,
465 } 540 }
466 541
467 if (symbol_conf.use_callchain) { 542 if (symbol_conf.use_callchain) {
468 entry->ms.has_children = !RB_EMPTY_ROOT(&entry->sorted_chain); 543 hist_entry__init_have_children(entry);
469 folded_sign = hist_entry__folded(entry); 544 folded_sign = hist_entry__folded(entry);
470 } 545 }
471 546
@@ -484,8 +559,8 @@ static int hist_browser__show_entry(struct hist_browser *self,
484 color = HE_COLORSET_NORMAL; 559 color = HE_COLORSET_NORMAL;
485 } 560 }
486 561
487 SLsmg_set_color(color); 562 ui_browser__set_color(&self->b, color);
488 SLsmg_gotorc(self->b.y + row, self->b.x); 563 ui_browser__gotorc(&self->b, row, 0);
489 if (symbol_conf.use_callchain) { 564 if (symbol_conf.use_callchain) {
490 slsmg_printf("%c ", folded_sign); 565 slsmg_printf("%c ", folded_sign);
491 width -= 2; 566 width -= 2;
@@ -687,8 +762,6 @@ static struct hist_browser *hist_browser__new(struct hists *hists)
687 762
688static void hist_browser__delete(struct hist_browser *self) 763static void hist_browser__delete(struct hist_browser *self)
689{ 764{
690 newtFormDestroy(self->b.form);
691 newtPopWindow();
692 free(self); 765 free(self);
693} 766}
694 767
@@ -702,21 +775,26 @@ static struct thread *hist_browser__selected_thread(struct hist_browser *self)
702 return self->he_selection->thread; 775 return self->he_selection->thread;
703} 776}
704 777
705static int hist_browser__title(char *bf, size_t size, const char *ev_name, 778static int hists__browser_title(struct hists *self, char *bf, size_t size,
706 const struct dso *dso, const struct thread *thread) 779 const char *ev_name, const struct dso *dso,
780 const struct thread *thread)
707{ 781{
708 int printed = 0; 782 char unit;
783 int printed;
784 unsigned long nr_events = self->stats.nr_events[PERF_RECORD_SAMPLE];
785
786 nr_events = convert_unit(nr_events, &unit);
787 printed = snprintf(bf, size, "Events: %lu%c %s", nr_events, unit, ev_name);
709 788
710 if (thread) 789 if (thread)
711 printed += snprintf(bf + printed, size - printed, 790 printed += snprintf(bf + printed, size - printed,
712 "Thread: %s(%d)", 791 ", Thread: %s(%d)",
713 (thread->comm_set ? thread->comm : ""), 792 (thread->comm_set ? thread->comm : ""),
714 thread->pid); 793 thread->pid);
715 if (dso) 794 if (dso)
716 printed += snprintf(bf + printed, size - printed, 795 printed += snprintf(bf + printed, size - printed,
717 "%sDSO: %s", thread ? " " : "", 796 ", DSO: %s", dso->short_name);
718 dso->short_name); 797 return printed;
719 return printed ?: snprintf(bf, size, "Event: %s", ev_name);
720} 798}
721 799
722int hists__browse(struct hists *self, const char *helpline, const char *ev_name) 800int hists__browse(struct hists *self, const char *helpline, const char *ev_name)
@@ -725,7 +803,6 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name)
725 struct pstack *fstack; 803 struct pstack *fstack;
726 const struct thread *thread_filter = NULL; 804 const struct thread *thread_filter = NULL;
727 const struct dso *dso_filter = NULL; 805 const struct dso *dso_filter = NULL;
728 struct newtExitStruct es;
729 char msg[160]; 806 char msg[160];
730 int key = -1; 807 int key = -1;
731 808
@@ -738,9 +815,8 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name)
738 815
739 ui_helpline__push(helpline); 816 ui_helpline__push(helpline);
740 817
741 hist_browser__title(msg, sizeof(msg), ev_name, 818 hists__browser_title(self, msg, sizeof(msg), ev_name,
742 dso_filter, thread_filter); 819 dso_filter, thread_filter);
743
744 while (1) { 820 while (1) {
745 const struct thread *thread; 821 const struct thread *thread;
746 const struct dso *dso; 822 const struct dso *dso;
@@ -749,70 +825,63 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name)
749 annotate = -2, zoom_dso = -2, zoom_thread = -2, 825 annotate = -2, zoom_dso = -2, zoom_thread = -2,
750 browse_map = -2; 826 browse_map = -2;
751 827
752 if (hist_browser__run(browser, msg, &es)) 828 key = hist_browser__run(browser, msg);
753 break;
754 829
755 thread = hist_browser__selected_thread(browser); 830 thread = hist_browser__selected_thread(browser);
756 dso = browser->selection->map ? browser->selection->map->dso : NULL; 831 dso = browser->selection->map ? browser->selection->map->dso : NULL;
757 832
758 if (es.reason == NEWT_EXIT_HOTKEY) { 833 switch (key) {
759 key = es.u.key; 834 case NEWT_KEY_TAB:
760 835 case NEWT_KEY_UNTAB:
761 switch (key) { 836 /*
762 case NEWT_KEY_F1: 837 * Exit the browser, let hists__browser_tree
763 goto do_help; 838 * go to the next or previous
764 case NEWT_KEY_TAB: 839 */
765 case NEWT_KEY_UNTAB: 840 goto out_free_stack;
766 /* 841 case 'a':
767 * Exit the browser, let hists__browser_tree 842 if (browser->selection->map == NULL &&
768 * go to the next or previous 843 browser->selection->map->dso->annotate_warned)
769 */
770 goto out_free_stack;
771 default:;
772 }
773
774 switch (key) {
775 case 'a':
776 if (browser->selection->map == NULL ||
777 browser->selection->map->dso->annotate_warned)
778 continue;
779 goto do_annotate;
780 case 'd':
781 goto zoom_dso;
782 case 't':
783 goto zoom_thread;
784 case 'h':
785 case '?':
786do_help:
787 ui__help_window("-> Zoom into DSO/Threads & Annotate current symbol\n"
788 "<- Zoom out\n"
789 "a Annotate current symbol\n"
790 "h/?/F1 Show this window\n"
791 "d Zoom into current DSO\n"
792 "t Zoom into current Thread\n"
793 "q/CTRL+C Exit browser");
794 continue; 844 continue;
795 default:; 845 goto do_annotate;
796 } 846 case 'd':
797 if (is_exit_key(key)) { 847 goto zoom_dso;
798 if (key == NEWT_KEY_ESCAPE && 848 case 't':
799 !ui__dialog_yesno("Do you really want to exit?")) 849 goto zoom_thread;
800 continue; 850 case NEWT_KEY_F1:
801 break; 851 case 'h':
802 } 852 case '?':
803 853 ui__help_window("-> Zoom into DSO/Threads & Annotate current symbol\n"
804 if (es.u.key == NEWT_KEY_LEFT) { 854 "<- Zoom out\n"
805 const void *top; 855 "a Annotate current symbol\n"
856 "h/?/F1 Show this window\n"
857 "C Collapse all callchains\n"
858 "E Expand all callchains\n"
859 "d Zoom into current DSO\n"
860 "t Zoom into current Thread\n"
861 "q/CTRL+C Exit browser");
862 continue;
863 case NEWT_KEY_ENTER:
864 case NEWT_KEY_RIGHT:
865 /* menu */
866 break;
867 case NEWT_KEY_LEFT: {
868 const void *top;
806 869
807 if (pstack__empty(fstack)) 870 if (pstack__empty(fstack))
808 continue;
809 top = pstack__pop(fstack);
810 if (top == &dso_filter)
811 goto zoom_out_dso;
812 if (top == &thread_filter)
813 goto zoom_out_thread;
814 continue; 871 continue;
815 } 872 top = pstack__pop(fstack);
873 if (top == &dso_filter)
874 goto zoom_out_dso;
875 if (top == &thread_filter)
876 goto zoom_out_thread;
877 continue;
878 }
879 case NEWT_KEY_ESCAPE:
880 if (!ui__dialog_yesno("Do you really want to exit?"))
881 continue;
882 /* Fall thru */
883 default:
884 goto out_free_stack;
816 } 885 }
817 886
818 if (browser->selection->sym != NULL && 887 if (browser->selection->sym != NULL &&
@@ -885,8 +954,8 @@ zoom_out_dso:
885 pstack__push(fstack, &dso_filter); 954 pstack__push(fstack, &dso_filter);
886 } 955 }
887 hists__filter_by_dso(self, dso_filter); 956 hists__filter_by_dso(self, dso_filter);
888 hist_browser__title(msg, sizeof(msg), ev_name, 957 hists__browser_title(self, msg, sizeof(msg), ev_name,
889 dso_filter, thread_filter); 958 dso_filter, thread_filter);
890 hist_browser__reset(browser); 959 hist_browser__reset(browser);
891 } else if (choice == zoom_thread) { 960 } else if (choice == zoom_thread) {
892zoom_thread: 961zoom_thread:
@@ -903,8 +972,8 @@ zoom_out_thread:
903 pstack__push(fstack, &thread_filter); 972 pstack__push(fstack, &thread_filter);
904 } 973 }
905 hists__filter_by_thread(self, thread_filter); 974 hists__filter_by_thread(self, thread_filter);
906 hist_browser__title(msg, sizeof(msg), ev_name, 975 hists__browser_title(self, msg, sizeof(msg), ev_name,
907 dso_filter, thread_filter); 976 dso_filter, thread_filter);
908 hist_browser__reset(browser); 977 hist_browser__reset(browser);
909 } 978 }
910 } 979 }
@@ -925,10 +994,6 @@ int hists__tui_browse_tree(struct rb_root *self, const char *help)
925 const char *ev_name = __event_name(hists->type, hists->config); 994 const char *ev_name = __event_name(hists->type, hists->config);
926 995
927 key = hists__browse(hists, help, ev_name); 996 key = hists__browse(hists, help, ev_name);
928
929 if (is_exit_key(key))
930 break;
931
932 switch (key) { 997 switch (key) {
933 case NEWT_KEY_TAB: 998 case NEWT_KEY_TAB:
934 next = rb_next(nd); 999 next = rb_next(nd);
@@ -940,7 +1005,7 @@ int hists__tui_browse_tree(struct rb_root *self, const char *help)
940 continue; 1005 continue;
941 nd = rb_prev(nd); 1006 nd = rb_prev(nd);
942 default: 1007 default:
943 break; 1008 return key;
944 } 1009 }
945 } 1010 }
946 1011
diff --git a/tools/perf/util/ui/browsers/map.c b/tools/perf/util/ui/browsers/map.c
index 142b825b42bf..e35437dfa5b4 100644
--- a/tools/perf/util/ui/browsers/map.c
+++ b/tools/perf/util/ui/browsers/map.c
@@ -1,6 +1,5 @@
1#include "../libslang.h" 1#include "../libslang.h"
2#include <elf.h> 2#include <elf.h>
3#include <newt.h>
4#include <sys/ttydefaults.h> 3#include <sys/ttydefaults.h>
5#include <ctype.h> 4#include <ctype.h>
6#include <string.h> 5#include <string.h>
@@ -47,7 +46,6 @@ out_free_form:
47struct map_browser { 46struct map_browser {
48 struct ui_browser b; 47 struct ui_browser b;
49 struct map *map; 48 struct map *map;
50 u16 namelen;
51 u8 addrlen; 49 u8 addrlen;
52}; 50};
53 51
@@ -56,14 +54,16 @@ static void map_browser__write(struct ui_browser *self, void *nd, int row)
56 struct symbol *sym = rb_entry(nd, struct symbol, rb_node); 54 struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
57 struct map_browser *mb = container_of(self, struct map_browser, b); 55 struct map_browser *mb = container_of(self, struct map_browser, b);
58 bool current_entry = ui_browser__is_current_entry(self, row); 56 bool current_entry = ui_browser__is_current_entry(self, row);
59 int color = ui_browser__percent_color(0, current_entry); 57 int width;
60 58
61 SLsmg_set_color(color); 59 ui_browser__set_percent_color(self, 0, current_entry);
62 slsmg_printf("%*llx %*llx %c ", 60 slsmg_printf("%*llx %*llx %c ",
63 mb->addrlen, sym->start, mb->addrlen, sym->end, 61 mb->addrlen, sym->start, mb->addrlen, sym->end,
64 sym->binding == STB_GLOBAL ? 'g' : 62 sym->binding == STB_GLOBAL ? 'g' :
65 sym->binding == STB_LOCAL ? 'l' : 'w'); 63 sym->binding == STB_LOCAL ? 'l' : 'w');
66 slsmg_write_nstring(sym->name, mb->namelen); 64 width = self->width - ((mb->addrlen * 2) + 4);
65 if (width > 0)
66 slsmg_write_nstring(sym->name, width);
67} 67}
68 68
69/* FIXME uber-kludgy, see comment on cmd_report... */ 69/* FIXME uber-kludgy, see comment on cmd_report... */
@@ -98,31 +98,29 @@ static int map_browser__search(struct map_browser *self)
98 return 0; 98 return 0;
99} 99}
100 100
101static int map_browser__run(struct map_browser *self, struct newtExitStruct *es) 101static int map_browser__run(struct map_browser *self)
102{ 102{
103 int key;
104
103 if (ui_browser__show(&self->b, self->map->dso->long_name, 105 if (ui_browser__show(&self->b, self->map->dso->long_name,
104 "Press <- or ESC to exit, %s / to search", 106 "Press <- or ESC to exit, %s / to search",
105 verbose ? "" : "restart with -v to use") < 0) 107 verbose ? "" : "restart with -v to use") < 0)
106 return -1; 108 return -1;
107 109
108 newtFormAddHotKey(self->b.form, NEWT_KEY_LEFT);
109 newtFormAddHotKey(self->b.form, NEWT_KEY_ENTER);
110 if (verbose) 110 if (verbose)
111 newtFormAddHotKey(self->b.form, '/'); 111 ui_browser__add_exit_key(&self->b, '/');
112 112
113 while (1) { 113 while (1) {
114 ui_browser__run(&self->b, es); 114 key = ui_browser__run(&self->b);
115 115
116 if (es->reason != NEWT_EXIT_HOTKEY) 116 if (verbose && key == '/')
117 break;
118 if (verbose && es->u.key == '/')
119 map_browser__search(self); 117 map_browser__search(self);
120 else 118 else
121 break; 119 break;
122 } 120 }
123 121
124 ui_browser__hide(&self->b); 122 ui_browser__hide(&self->b);
125 return 0; 123 return key;
126} 124}
127 125
128int map__browse(struct map *self) 126int map__browse(struct map *self)
@@ -136,7 +134,6 @@ int map__browse(struct map *self)
136 }, 134 },
137 .map = self, 135 .map = self,
138 }; 136 };
139 struct newtExitStruct es;
140 struct rb_node *nd; 137 struct rb_node *nd;
141 char tmp[BITS_PER_LONG / 4]; 138 char tmp[BITS_PER_LONG / 4];
142 u64 maxaddr = 0; 139 u64 maxaddr = 0;
@@ -144,8 +141,6 @@ int map__browse(struct map *self)
144 for (nd = rb_first(mb.b.entries); nd; nd = rb_next(nd)) { 141 for (nd = rb_first(mb.b.entries); nd; nd = rb_next(nd)) {
145 struct symbol *pos = rb_entry(nd, struct symbol, rb_node); 142 struct symbol *pos = rb_entry(nd, struct symbol, rb_node);
146 143
147 if (mb.namelen < pos->namelen)
148 mb.namelen = pos->namelen;
149 if (maxaddr < pos->end) 144 if (maxaddr < pos->end)
150 maxaddr = pos->end; 145 maxaddr = pos->end;
151 if (verbose) { 146 if (verbose) {
@@ -156,6 +151,5 @@ int map__browse(struct map *self)
156 } 151 }
157 152
158 mb.addrlen = snprintf(tmp, sizeof(tmp), "%llx", maxaddr); 153 mb.addrlen = snprintf(tmp, sizeof(tmp), "%llx", maxaddr);
159 mb.b.width += mb.addrlen * 2 + 4 + mb.namelen; 154 return map_browser__run(&mb);
160 return map_browser__run(&mb, &es);
161} 155}
diff --git a/tools/perf/util/ui/util.c b/tools/perf/util/ui/util.c
index 04600e26ceea..9706d9d40279 100644
--- a/tools/perf/util/ui/util.c
+++ b/tools/perf/util/ui/util.c
@@ -11,8 +11,6 @@
11#include "helpline.h" 11#include "helpline.h"
12#include "util.h" 12#include "util.h"
13 13
14newtComponent newt_form__new(void);
15
16static void newt_form__set_exit_keys(newtComponent self) 14static void newt_form__set_exit_keys(newtComponent self)
17{ 15{
18 newtFormAddHotKey(self, NEWT_KEY_LEFT); 16 newtFormAddHotKey(self, NEWT_KEY_LEFT);
@@ -22,7 +20,7 @@ static void newt_form__set_exit_keys(newtComponent self)
22 newtFormAddHotKey(self, CTRL('c')); 20 newtFormAddHotKey(self, CTRL('c'));
23} 21}
24 22
25newtComponent newt_form__new(void) 23static newtComponent newt_form__new(void)
26{ 24{
27 newtComponent self = newtForm(NULL, NULL, 0); 25 newtComponent self = newtForm(NULL, NULL, 0);
28 if (self) 26 if (self)
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index f380fed74359..7562707ddd1c 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -266,19 +266,6 @@ bool strglobmatch(const char *str, const char *pat);
266bool strlazymatch(const char *str, const char *pat); 266bool strlazymatch(const char *str, const char *pat);
267unsigned long convert_unit(unsigned long value, char *unit); 267unsigned long convert_unit(unsigned long value, char *unit);
268 268
269#ifndef ESC
270#define ESC 27
271#endif
272
273static inline bool is_exit_key(int key)
274{
275 char up;
276 if (key == CTRL('c') || key == ESC)
277 return true;
278 up = toupper(key);
279 return up == 'Q';
280}
281
282#define _STR(x) #x 269#define _STR(x) #x
283#define STR(x) _STR(x) 270#define STR(x) _STR(x)
284 271