aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/alpha/kernel/perf_event.c128
-rw-r--r--arch/arm/kernel/perf_event.c198
-rw-r--r--arch/arm/oprofile/common.c2
-rw-r--r--arch/powerpc/kernel/perf_callchain.c86
-rw-r--r--arch/powerpc/kernel/perf_event.c164
-rw-r--r--arch/powerpc/kernel/perf_event_fsl_emb.c148
-rw-r--r--arch/sh/kernel/perf_callchain.c50
-rw-r--r--arch/sh/kernel/perf_event.c141
-rw-r--r--arch/sparc/kernel/perf_event.c233
-rw-r--r--arch/x86/include/asm/perf_event_p4.h52
-rw-r--r--arch/x86/kernel/cpu/perf_event.c259
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c8
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c13
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c292
-rw-r--r--arch/x86/kernel/kprobes.c11
-rw-r--r--include/asm-generic/hardirq.h2
-rw-r--r--include/linux/ftrace_event.h8
-rw-r--r--include/linux/interrupt.h8
-rw-r--r--include/linux/perf_event.h155
-rw-r--r--include/linux/sched.h9
-rw-r--r--include/trace/events/irq.h26
-rw-r--r--include/trace/events/napi.h25
-rw-r--r--include/trace/events/net.h82
-rw-r--r--include/trace/events/power.h90
-rw-r--r--include/trace/events/skb.h17
-rw-r--r--kernel/exit.c4
-rw-r--r--kernel/hw_breakpoint.c66
-rw-r--r--kernel/kprobes.c22
-rw-r--r--kernel/perf_event.c2344
-rw-r--r--kernel/sched.c2
-rw-r--r--kernel/trace/ftrace.c123
-rw-r--r--kernel/trace/ring_buffer.c21
-rw-r--r--kernel/trace/trace_event_perf.c28
-rw-r--r--kernel/trace/trace_events.c55
-rw-r--r--kernel/trace/trace_functions_graph.c126
-rw-r--r--kernel/watchdog.c41
-rw-r--r--lib/Kconfig.debug8
-rw-r--r--net/core/datagram.c1
-rw-r--r--net/core/dev.c8
-rw-r--r--net/core/net-traces.c1
-rw-r--r--net/core/skbuff.c1
-rw-r--r--tools/perf/Documentation/perf-annotate.txt11
-rw-r--r--tools/perf/Documentation/perf-report.txt7
-rw-r--r--tools/perf/Makefile30
-rw-r--r--tools/perf/builtin-annotate.c26
-rw-r--r--tools/perf/builtin-report.c14
-rw-r--r--tools/perf/feature-tests.mak11
-rw-r--r--tools/perf/scripts/python/bin/netdev-times-record8
-rw-r--r--tools/perf/scripts/python/bin/netdev-times-report5
-rw-r--r--tools/perf/scripts/python/netdev-times.py464
-rw-r--r--tools/perf/util/cache.h2
-rw-r--r--tools/perf/util/callchain.c98
-rw-r--r--tools/perf/util/callchain.h27
-rw-r--r--tools/perf/util/hist.c4
-rw-r--r--tools/perf/util/path.c3
-rw-r--r--tools/perf/util/sort.h2
-rw-r--r--tools/perf/util/symbol.c14
-rw-r--r--tools/perf/util/symbol.h1
-rw-r--r--tools/perf/util/ui/browser.c117
-rw-r--r--tools/perf/util/ui/browser.h9
-rw-r--r--tools/perf/util/ui/browsers/annotate.c38
-rw-r--r--tools/perf/util/ui/browsers/hists.c327
-rw-r--r--tools/perf/util/ui/browsers/map.c32
-rw-r--r--tools/perf/util/ui/util.c4
-rw-r--r--tools/perf/util/util.h13
65 files changed, 4117 insertions, 2208 deletions
diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c
index 85d8e4f58c83..1cc49683fb69 100644
--- a/arch/alpha/kernel/perf_event.c
+++ b/arch/alpha/kernel/perf_event.c
@@ -307,7 +307,7 @@ again:
307 new_raw_count) != prev_raw_count) 307 new_raw_count) != prev_raw_count)
308 goto again; 308 goto again;
309 309
310 delta = (new_raw_count - (prev_raw_count & alpha_pmu->pmc_count_mask[idx])) + ovf; 310 delta = (new_raw_count - (prev_raw_count & alpha_pmu->pmc_count_mask[idx])) + ovf;
311 311
312 /* It is possible on very rare occasions that the PMC has overflowed 312 /* It is possible on very rare occasions that the PMC has overflowed
313 * but the interrupt is yet to come. Detect and fix this situation. 313 * but the interrupt is yet to come. Detect and fix this situation.
@@ -402,14 +402,13 @@ static void maybe_change_configuration(struct cpu_hw_events *cpuc)
402 struct hw_perf_event *hwc = &pe->hw; 402 struct hw_perf_event *hwc = &pe->hw;
403 int idx = hwc->idx; 403 int idx = hwc->idx;
404 404
405 if (cpuc->current_idx[j] != PMC_NO_INDEX) { 405 if (cpuc->current_idx[j] == PMC_NO_INDEX) {
406 cpuc->idx_mask |= (1<<cpuc->current_idx[j]); 406 alpha_perf_event_set_period(pe, hwc, idx);
407 continue; 407 cpuc->current_idx[j] = idx;
408 } 408 }
409 409
410 alpha_perf_event_set_period(pe, hwc, idx); 410 if (!(hwc->state & PERF_HES_STOPPED))
411 cpuc->current_idx[j] = idx; 411 cpuc->idx_mask |= (1<<cpuc->current_idx[j]);
412 cpuc->idx_mask |= (1<<cpuc->current_idx[j]);
413 } 412 }
414 cpuc->config = cpuc->event[0]->hw.config_base; 413 cpuc->config = cpuc->event[0]->hw.config_base;
415} 414}
@@ -420,12 +419,13 @@ static void maybe_change_configuration(struct cpu_hw_events *cpuc)
420 * - this function is called from outside this module via the pmu struct 419 * - this function is called from outside this module via the pmu struct
421 * returned from perf event initialisation. 420 * returned from perf event initialisation.
422 */ 421 */
423static int alpha_pmu_enable(struct perf_event *event) 422static int alpha_pmu_add(struct perf_event *event, int flags)
424{ 423{
425 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 424 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
425 struct hw_perf_event *hwc = &event->hw;
426 int n0; 426 int n0;
427 int ret; 427 int ret;
428 unsigned long flags; 428 unsigned long irq_flags;
429 429
430 /* 430 /*
431 * The Sparc code has the IRQ disable first followed by the perf 431 * The Sparc code has the IRQ disable first followed by the perf
@@ -435,8 +435,8 @@ static int alpha_pmu_enable(struct perf_event *event)
435 * nevertheless we disable the PMCs first to enable a potential 435 * nevertheless we disable the PMCs first to enable a potential
436 * final PMI to occur before we disable interrupts. 436 * final PMI to occur before we disable interrupts.
437 */ 437 */
438 perf_disable(); 438 perf_pmu_disable(event->pmu);
439 local_irq_save(flags); 439 local_irq_save(irq_flags);
440 440
441 /* Default to error to be returned */ 441 /* Default to error to be returned */
442 ret = -EAGAIN; 442 ret = -EAGAIN;
@@ -455,8 +455,12 @@ static int alpha_pmu_enable(struct perf_event *event)
455 } 455 }
456 } 456 }
457 457
458 local_irq_restore(flags); 458 hwc->state = PERF_HES_UPTODATE;
459 perf_enable(); 459 if (!(flags & PERF_EF_START))
460 hwc->state |= PERF_HES_STOPPED;
461
462 local_irq_restore(irq_flags);
463 perf_pmu_enable(event->pmu);
460 464
461 return ret; 465 return ret;
462} 466}
@@ -467,15 +471,15 @@ static int alpha_pmu_enable(struct perf_event *event)
467 * - this function is called from outside this module via the pmu struct 471 * - this function is called from outside this module via the pmu struct
468 * returned from perf event initialisation. 472 * returned from perf event initialisation.
469 */ 473 */
470static void alpha_pmu_disable(struct perf_event *event) 474static void alpha_pmu_del(struct perf_event *event, int flags)
471{ 475{
472 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 476 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
473 struct hw_perf_event *hwc = &event->hw; 477 struct hw_perf_event *hwc = &event->hw;
474 unsigned long flags; 478 unsigned long irq_flags;
475 int j; 479 int j;
476 480
477 perf_disable(); 481 perf_pmu_disable(event->pmu);
478 local_irq_save(flags); 482 local_irq_save(irq_flags);
479 483
480 for (j = 0; j < cpuc->n_events; j++) { 484 for (j = 0; j < cpuc->n_events; j++) {
481 if (event == cpuc->event[j]) { 485 if (event == cpuc->event[j]) {
@@ -501,8 +505,8 @@ static void alpha_pmu_disable(struct perf_event *event)
501 } 505 }
502 } 506 }
503 507
504 local_irq_restore(flags); 508 local_irq_restore(irq_flags);
505 perf_enable(); 509 perf_pmu_enable(event->pmu);
506} 510}
507 511
508 512
@@ -514,13 +518,44 @@ static void alpha_pmu_read(struct perf_event *event)
514} 518}
515 519
516 520
517static void alpha_pmu_unthrottle(struct perf_event *event) 521static void alpha_pmu_stop(struct perf_event *event, int flags)
522{
523 struct hw_perf_event *hwc = &event->hw;
524 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
525
526 if (!(hwc->state & PERF_HES_STOPPED)) {
527 cpuc->idx_mask &= ~(1UL<<hwc->idx);
528 hwc->state |= PERF_HES_STOPPED;
529 }
530
531 if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
532 alpha_perf_event_update(event, hwc, hwc->idx, 0);
533 hwc->state |= PERF_HES_UPTODATE;
534 }
535
536 if (cpuc->enabled)
537 wrperfmon(PERFMON_CMD_DISABLE, (1UL<<hwc->idx));
538}
539
540
541static void alpha_pmu_start(struct perf_event *event, int flags)
518{ 542{
519 struct hw_perf_event *hwc = &event->hw; 543 struct hw_perf_event *hwc = &event->hw;
520 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 544 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
521 545
546 if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
547 return;
548
549 if (flags & PERF_EF_RELOAD) {
550 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
551 alpha_perf_event_set_period(event, hwc, hwc->idx);
552 }
553
554 hwc->state = 0;
555
522 cpuc->idx_mask |= 1UL<<hwc->idx; 556 cpuc->idx_mask |= 1UL<<hwc->idx;
523 wrperfmon(PERFMON_CMD_ENABLE, (1UL<<hwc->idx)); 557 if (cpuc->enabled)
558 wrperfmon(PERFMON_CMD_ENABLE, (1UL<<hwc->idx));
524} 559}
525 560
526 561
@@ -642,39 +677,36 @@ static int __hw_perf_event_init(struct perf_event *event)
642 return 0; 677 return 0;
643} 678}
644 679
645static const struct pmu pmu = {
646 .enable = alpha_pmu_enable,
647 .disable = alpha_pmu_disable,
648 .read = alpha_pmu_read,
649 .unthrottle = alpha_pmu_unthrottle,
650};
651
652
653/* 680/*
654 * Main entry point to initialise a HW performance event. 681 * Main entry point to initialise a HW performance event.
655 */ 682 */
656const struct pmu *hw_perf_event_init(struct perf_event *event) 683static int alpha_pmu_event_init(struct perf_event *event)
657{ 684{
658 int err; 685 int err;
659 686
687 switch (event->attr.type) {
688 case PERF_TYPE_RAW:
689 case PERF_TYPE_HARDWARE:
690 case PERF_TYPE_HW_CACHE:
691 break;
692
693 default:
694 return -ENOENT;
695 }
696
660 if (!alpha_pmu) 697 if (!alpha_pmu)
661 return ERR_PTR(-ENODEV); 698 return -ENODEV;
662 699
663 /* Do the real initialisation work. */ 700 /* Do the real initialisation work. */
664 err = __hw_perf_event_init(event); 701 err = __hw_perf_event_init(event);
665 702
666 if (err) 703 return err;
667 return ERR_PTR(err);
668
669 return &pmu;
670} 704}
671 705
672
673
674/* 706/*
675 * Main entry point - enable HW performance counters. 707 * Main entry point - enable HW performance counters.
676 */ 708 */
677void hw_perf_enable(void) 709static void alpha_pmu_enable(struct pmu *pmu)
678{ 710{
679 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 711 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
680 712
@@ -700,7 +732,7 @@ void hw_perf_enable(void)
700 * Main entry point - disable HW performance counters. 732 * Main entry point - disable HW performance counters.
701 */ 733 */
702 734
703void hw_perf_disable(void) 735static void alpha_pmu_disable(struct pmu *pmu)
704{ 736{
705 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 737 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
706 738
@@ -713,6 +745,17 @@ void hw_perf_disable(void)
713 wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask); 745 wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask);
714} 746}
715 747
748static struct pmu pmu = {
749 .pmu_enable = alpha_pmu_enable,
750 .pmu_disable = alpha_pmu_disable,
751 .event_init = alpha_pmu_event_init,
752 .add = alpha_pmu_add,
753 .del = alpha_pmu_del,
754 .start = alpha_pmu_start,
755 .stop = alpha_pmu_stop,
756 .read = alpha_pmu_read,
757};
758
716 759
717/* 760/*
718 * Main entry point - don't know when this is called but it 761 * Main entry point - don't know when this is called but it
@@ -766,7 +809,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
766 wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask); 809 wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask);
767 810
768 /* la_ptr is the counter that overflowed. */ 811 /* la_ptr is the counter that overflowed. */
769 if (unlikely(la_ptr >= perf_max_events)) { 812 if (unlikely(la_ptr >= alpha_pmu->num_pmcs)) {
770 /* This should never occur! */ 813 /* This should never occur! */
771 irq_err_count++; 814 irq_err_count++;
772 pr_warning("PMI: silly index %ld\n", la_ptr); 815 pr_warning("PMI: silly index %ld\n", la_ptr);
@@ -807,7 +850,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
807 /* Interrupts coming too quickly; "throttle" the 850 /* Interrupts coming too quickly; "throttle" the
808 * counter, i.e., disable it for a little while. 851 * counter, i.e., disable it for a little while.
809 */ 852 */
810 cpuc->idx_mask &= ~(1UL<<idx); 853 alpha_pmu_stop(event, 0);
811 } 854 }
812 } 855 }
813 wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask); 856 wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask);
@@ -837,6 +880,7 @@ void __init init_hw_perf_events(void)
837 880
838 /* And set up PMU specification */ 881 /* And set up PMU specification */
839 alpha_pmu = &ev67_pmu; 882 alpha_pmu = &ev67_pmu;
840 perf_max_events = alpha_pmu->num_pmcs; 883
884 perf_pmu_register(&pmu);
841} 885}
842 886
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index ecbb0288e5dd..ad19c276b10f 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -221,46 +221,56 @@ again:
221} 221}
222 222
223static void 223static void
224armpmu_disable(struct perf_event *event) 224armpmu_read(struct perf_event *event)
225{ 225{
226 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
227 struct hw_perf_event *hwc = &event->hw; 226 struct hw_perf_event *hwc = &event->hw;
228 int idx = hwc->idx;
229
230 WARN_ON(idx < 0);
231
232 clear_bit(idx, cpuc->active_mask);
233 armpmu->disable(hwc, idx);
234
235 barrier();
236 227
237 armpmu_event_update(event, hwc, idx); 228 /* Don't read disabled counters! */
238 cpuc->events[idx] = NULL; 229 if (hwc->idx < 0)
239 clear_bit(idx, cpuc->used_mask); 230 return;
240 231
241 perf_event_update_userpage(event); 232 armpmu_event_update(event, hwc, hwc->idx);
242} 233}
243 234
244static void 235static void
245armpmu_read(struct perf_event *event) 236armpmu_stop(struct perf_event *event, int flags)
246{ 237{
247 struct hw_perf_event *hwc = &event->hw; 238 struct hw_perf_event *hwc = &event->hw;
248 239
249 /* Don't read disabled counters! */ 240 if (!armpmu)
250 if (hwc->idx < 0)
251 return; 241 return;
252 242
253 armpmu_event_update(event, hwc, hwc->idx); 243 /*
244 * ARM pmu always has to update the counter, so ignore
245 * PERF_EF_UPDATE, see comments in armpmu_start().
246 */
247 if (!(hwc->state & PERF_HES_STOPPED)) {
248 armpmu->disable(hwc, hwc->idx);
249 barrier(); /* why? */
250 armpmu_event_update(event, hwc, hwc->idx);
251 hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
252 }
254} 253}
255 254
256static void 255static void
257armpmu_unthrottle(struct perf_event *event) 256armpmu_start(struct perf_event *event, int flags)
258{ 257{
259 struct hw_perf_event *hwc = &event->hw; 258 struct hw_perf_event *hwc = &event->hw;
260 259
260 if (!armpmu)
261 return;
262
263 /*
264 * ARM pmu always has to reprogram the period, so ignore
265 * PERF_EF_RELOAD, see the comment below.
266 */
267 if (flags & PERF_EF_RELOAD)
268 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
269
270 hwc->state = 0;
261 /* 271 /*
262 * Set the period again. Some counters can't be stopped, so when we 272 * Set the period again. Some counters can't be stopped, so when we
263 * were throttled we simply disabled the IRQ source and the counter 273 * were stopped we simply disabled the IRQ source and the counter
264 * may have been left counting. If we don't do this step then we may 274 * may have been left counting. If we don't do this step then we may
265 * get an interrupt too soon or *way* too late if the overflow has 275 * get an interrupt too soon or *way* too late if the overflow has
266 * happened since disabling. 276 * happened since disabling.
@@ -269,14 +279,33 @@ armpmu_unthrottle(struct perf_event *event)
269 armpmu->enable(hwc, hwc->idx); 279 armpmu->enable(hwc, hwc->idx);
270} 280}
271 281
282static void
283armpmu_del(struct perf_event *event, int flags)
284{
285 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
286 struct hw_perf_event *hwc = &event->hw;
287 int idx = hwc->idx;
288
289 WARN_ON(idx < 0);
290
291 clear_bit(idx, cpuc->active_mask);
292 armpmu_stop(event, PERF_EF_UPDATE);
293 cpuc->events[idx] = NULL;
294 clear_bit(idx, cpuc->used_mask);
295
296 perf_event_update_userpage(event);
297}
298
272static int 299static int
273armpmu_enable(struct perf_event *event) 300armpmu_add(struct perf_event *event, int flags)
274{ 301{
275 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 302 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
276 struct hw_perf_event *hwc = &event->hw; 303 struct hw_perf_event *hwc = &event->hw;
277 int idx; 304 int idx;
278 int err = 0; 305 int err = 0;
279 306
307 perf_pmu_disable(event->pmu);
308
280 /* If we don't have a space for the counter then finish early. */ 309 /* If we don't have a space for the counter then finish early. */
281 idx = armpmu->get_event_idx(cpuc, hwc); 310 idx = armpmu->get_event_idx(cpuc, hwc);
282 if (idx < 0) { 311 if (idx < 0) {
@@ -293,25 +322,19 @@ armpmu_enable(struct perf_event *event)
293 cpuc->events[idx] = event; 322 cpuc->events[idx] = event;
294 set_bit(idx, cpuc->active_mask); 323 set_bit(idx, cpuc->active_mask);
295 324
296 /* Set the period for the event. */ 325 hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
297 armpmu_event_set_period(event, hwc, idx); 326 if (flags & PERF_EF_START)
298 327 armpmu_start(event, PERF_EF_RELOAD);
299 /* Enable the event. */
300 armpmu->enable(hwc, idx);
301 328
302 /* Propagate our changes to the userspace mapping. */ 329 /* Propagate our changes to the userspace mapping. */
303 perf_event_update_userpage(event); 330 perf_event_update_userpage(event);
304 331
305out: 332out:
333 perf_pmu_enable(event->pmu);
306 return err; 334 return err;
307} 335}
308 336
309static struct pmu pmu = { 337static struct pmu pmu;
310 .enable = armpmu_enable,
311 .disable = armpmu_disable,
312 .unthrottle = armpmu_unthrottle,
313 .read = armpmu_read,
314};
315 338
316static int 339static int
317validate_event(struct cpu_hw_events *cpuc, 340validate_event(struct cpu_hw_events *cpuc,
@@ -491,20 +514,29 @@ __hw_perf_event_init(struct perf_event *event)
491 return err; 514 return err;
492} 515}
493 516
494const struct pmu * 517static int armpmu_event_init(struct perf_event *event)
495hw_perf_event_init(struct perf_event *event)
496{ 518{
497 int err = 0; 519 int err = 0;
498 520
521 switch (event->attr.type) {
522 case PERF_TYPE_RAW:
523 case PERF_TYPE_HARDWARE:
524 case PERF_TYPE_HW_CACHE:
525 break;
526
527 default:
528 return -ENOENT;
529 }
530
499 if (!armpmu) 531 if (!armpmu)
500 return ERR_PTR(-ENODEV); 532 return -ENODEV;
501 533
502 event->destroy = hw_perf_event_destroy; 534 event->destroy = hw_perf_event_destroy;
503 535
504 if (!atomic_inc_not_zero(&active_events)) { 536 if (!atomic_inc_not_zero(&active_events)) {
505 if (atomic_read(&active_events) > perf_max_events) { 537 if (atomic_read(&active_events) > armpmu.num_events) {
506 atomic_dec(&active_events); 538 atomic_dec(&active_events);
507 return ERR_PTR(-ENOSPC); 539 return -ENOSPC;
508 } 540 }
509 541
510 mutex_lock(&pmu_reserve_mutex); 542 mutex_lock(&pmu_reserve_mutex);
@@ -518,17 +550,16 @@ hw_perf_event_init(struct perf_event *event)
518 } 550 }
519 551
520 if (err) 552 if (err)
521 return ERR_PTR(err); 553 return err;
522 554
523 err = __hw_perf_event_init(event); 555 err = __hw_perf_event_init(event);
524 if (err) 556 if (err)
525 hw_perf_event_destroy(event); 557 hw_perf_event_destroy(event);
526 558
527 return err ? ERR_PTR(err) : &pmu; 559 return err;
528} 560}
529 561
530void 562static void armpmu_enable(struct pmu *pmu)
531hw_perf_enable(void)
532{ 563{
533 /* Enable all of the perf events on hardware. */ 564 /* Enable all of the perf events on hardware. */
534 int idx; 565 int idx;
@@ -549,13 +580,23 @@ hw_perf_enable(void)
549 armpmu->start(); 580 armpmu->start();
550} 581}
551 582
552void 583static void armpmu_disable(struct pmu *pmu)
553hw_perf_disable(void)
554{ 584{
555 if (armpmu) 585 if (armpmu)
556 armpmu->stop(); 586 armpmu->stop();
557} 587}
558 588
589static struct pmu pmu = {
590 .pmu_enable = armpmu_enable,
591 .pmu_disable = armpmu_disable,
592 .event_init = armpmu_event_init,
593 .add = armpmu_add,
594 .del = armpmu_del,
595 .start = armpmu_start,
596 .stop = armpmu_stop,
597 .read = armpmu_read,
598};
599
559/* 600/*
560 * ARMv6 Performance counter handling code. 601 * ARMv6 Performance counter handling code.
561 * 602 *
@@ -2933,14 +2974,12 @@ init_hw_perf_events(void)
2933 armpmu = &armv6pmu; 2974 armpmu = &armv6pmu;
2934 memcpy(armpmu_perf_cache_map, armv6_perf_cache_map, 2975 memcpy(armpmu_perf_cache_map, armv6_perf_cache_map,
2935 sizeof(armv6_perf_cache_map)); 2976 sizeof(armv6_perf_cache_map));
2936 perf_max_events = armv6pmu.num_events;
2937 break; 2977 break;
2938 case 0xB020: /* ARM11mpcore */ 2978 case 0xB020: /* ARM11mpcore */
2939 armpmu = &armv6mpcore_pmu; 2979 armpmu = &armv6mpcore_pmu;
2940 memcpy(armpmu_perf_cache_map, 2980 memcpy(armpmu_perf_cache_map,
2941 armv6mpcore_perf_cache_map, 2981 armv6mpcore_perf_cache_map,
2942 sizeof(armv6mpcore_perf_cache_map)); 2982 sizeof(armv6mpcore_perf_cache_map));
2943 perf_max_events = armv6mpcore_pmu.num_events;
2944 break; 2983 break;
2945 case 0xC080: /* Cortex-A8 */ 2984 case 0xC080: /* Cortex-A8 */
2946 armv7pmu.id = ARM_PERF_PMU_ID_CA8; 2985 armv7pmu.id = ARM_PERF_PMU_ID_CA8;
@@ -2952,7 +2991,6 @@ init_hw_perf_events(void)
2952 /* Reset PMNC and read the nb of CNTx counters 2991 /* Reset PMNC and read the nb of CNTx counters
2953 supported */ 2992 supported */
2954 armv7pmu.num_events = armv7_reset_read_pmnc(); 2993 armv7pmu.num_events = armv7_reset_read_pmnc();
2955 perf_max_events = armv7pmu.num_events;
2956 break; 2994 break;
2957 case 0xC090: /* Cortex-A9 */ 2995 case 0xC090: /* Cortex-A9 */
2958 armv7pmu.id = ARM_PERF_PMU_ID_CA9; 2996 armv7pmu.id = ARM_PERF_PMU_ID_CA9;
@@ -2964,7 +3002,6 @@ init_hw_perf_events(void)
2964 /* Reset PMNC and read the nb of CNTx counters 3002 /* Reset PMNC and read the nb of CNTx counters
2965 supported */ 3003 supported */
2966 armv7pmu.num_events = armv7_reset_read_pmnc(); 3004 armv7pmu.num_events = armv7_reset_read_pmnc();
2967 perf_max_events = armv7pmu.num_events;
2968 break; 3005 break;
2969 } 3006 }
2970 /* Intel CPUs [xscale]. */ 3007 /* Intel CPUs [xscale]. */
@@ -2975,13 +3012,11 @@ init_hw_perf_events(void)
2975 armpmu = &xscale1pmu; 3012 armpmu = &xscale1pmu;
2976 memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, 3013 memcpy(armpmu_perf_cache_map, xscale_perf_cache_map,
2977 sizeof(xscale_perf_cache_map)); 3014 sizeof(xscale_perf_cache_map));
2978 perf_max_events = xscale1pmu.num_events;
2979 break; 3015 break;
2980 case 2: 3016 case 2:
2981 armpmu = &xscale2pmu; 3017 armpmu = &xscale2pmu;
2982 memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, 3018 memcpy(armpmu_perf_cache_map, xscale_perf_cache_map,
2983 sizeof(xscale_perf_cache_map)); 3019 sizeof(xscale_perf_cache_map));
2984 perf_max_events = xscale2pmu.num_events;
2985 break; 3020 break;
2986 } 3021 }
2987 } 3022 }
@@ -2991,9 +3026,10 @@ init_hw_perf_events(void)
2991 arm_pmu_names[armpmu->id], armpmu->num_events); 3026 arm_pmu_names[armpmu->id], armpmu->num_events);
2992 } else { 3027 } else {
2993 pr_info("no hardware support available\n"); 3028 pr_info("no hardware support available\n");
2994 perf_max_events = -1;
2995 } 3029 }
2996 3030
3031 perf_pmu_register(&pmu);
3032
2997 return 0; 3033 return 0;
2998} 3034}
2999arch_initcall(init_hw_perf_events); 3035arch_initcall(init_hw_perf_events);
@@ -3001,13 +3037,6 @@ arch_initcall(init_hw_perf_events);
3001/* 3037/*
3002 * Callchain handling code. 3038 * Callchain handling code.
3003 */ 3039 */
3004static inline void
3005callchain_store(struct perf_callchain_entry *entry,
3006 u64 ip)
3007{
3008 if (entry->nr < PERF_MAX_STACK_DEPTH)
3009 entry->ip[entry->nr++] = ip;
3010}
3011 3040
3012/* 3041/*
3013 * The registers we're interested in are at the end of the variable 3042 * The registers we're interested in are at the end of the variable
@@ -3039,7 +3068,7 @@ user_backtrace(struct frame_tail *tail,
3039 if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail))) 3068 if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail)))
3040 return NULL; 3069 return NULL;
3041 3070
3042 callchain_store(entry, buftail.lr); 3071 perf_callchain_store(entry, buftail.lr);
3043 3072
3044 /* 3073 /*
3045 * Frame pointers should strictly progress back up the stack 3074 * Frame pointers should strictly progress back up the stack
@@ -3051,16 +3080,11 @@ user_backtrace(struct frame_tail *tail,
3051 return buftail.fp - 1; 3080 return buftail.fp - 1;
3052} 3081}
3053 3082
3054static void 3083void
3055perf_callchain_user(struct pt_regs *regs, 3084perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
3056 struct perf_callchain_entry *entry)
3057{ 3085{
3058 struct frame_tail *tail; 3086 struct frame_tail *tail;
3059 3087
3060 callchain_store(entry, PERF_CONTEXT_USER);
3061
3062 if (!user_mode(regs))
3063 regs = task_pt_regs(current);
3064 3088
3065 tail = (struct frame_tail *)regs->ARM_fp - 1; 3089 tail = (struct frame_tail *)regs->ARM_fp - 1;
3066 3090
@@ -3078,56 +3102,18 @@ callchain_trace(struct stackframe *fr,
3078 void *data) 3102 void *data)
3079{ 3103{
3080 struct perf_callchain_entry *entry = data; 3104 struct perf_callchain_entry *entry = data;
3081 callchain_store(entry, fr->pc); 3105 perf_callchain_store(entry, fr->pc);
3082 return 0; 3106 return 0;
3083} 3107}
3084 3108
3085static void 3109void
3086perf_callchain_kernel(struct pt_regs *regs, 3110perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
3087 struct perf_callchain_entry *entry)
3088{ 3111{
3089 struct stackframe fr; 3112 struct stackframe fr;
3090 3113
3091 callchain_store(entry, PERF_CONTEXT_KERNEL);
3092 fr.fp = regs->ARM_fp; 3114 fr.fp = regs->ARM_fp;
3093 fr.sp = regs->ARM_sp; 3115 fr.sp = regs->ARM_sp;
3094 fr.lr = regs->ARM_lr; 3116 fr.lr = regs->ARM_lr;
3095 fr.pc = regs->ARM_pc; 3117 fr.pc = regs->ARM_pc;
3096 walk_stackframe(&fr, callchain_trace, entry); 3118 walk_stackframe(&fr, callchain_trace, entry);
3097} 3119}
3098
3099static void
3100perf_do_callchain(struct pt_regs *regs,
3101 struct perf_callchain_entry *entry)
3102{
3103 int is_user;
3104
3105 if (!regs)
3106 return;
3107
3108 is_user = user_mode(regs);
3109
3110 if (!current || !current->pid)
3111 return;
3112
3113 if (is_user && current->state != TASK_RUNNING)
3114 return;
3115
3116 if (!is_user)
3117 perf_callchain_kernel(regs, entry);
3118
3119 if (current->mm)
3120 perf_callchain_user(regs, entry);
3121}
3122
3123static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
3124
3125struct perf_callchain_entry *
3126perf_callchain(struct pt_regs *regs)
3127{
3128 struct perf_callchain_entry *entry = &__get_cpu_var(pmc_irq_entry);
3129
3130 entry->nr = 0;
3131 perf_do_callchain(regs, entry);
3132 return entry;
3133}
diff --git a/arch/arm/oprofile/common.c b/arch/arm/oprofile/common.c
index 0691176899ff..aad63e611b36 100644
--- a/arch/arm/oprofile/common.c
+++ b/arch/arm/oprofile/common.c
@@ -96,7 +96,7 @@ static int op_create_counter(int cpu, int event)
96 return ret; 96 return ret;
97 97
98 pevent = perf_event_create_kernel_counter(&counter_config[event].attr, 98 pevent = perf_event_create_kernel_counter(&counter_config[event].attr,
99 cpu, -1, 99 cpu, NULL,
100 op_overflow_handler); 100 op_overflow_handler);
101 101
102 if (IS_ERR(pevent)) { 102 if (IS_ERR(pevent)) {
diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c
index 95ad9dad298e..d05ae4204bbf 100644
--- a/arch/powerpc/kernel/perf_callchain.c
+++ b/arch/powerpc/kernel/perf_callchain.c
@@ -23,18 +23,6 @@
23#include "ppc32.h" 23#include "ppc32.h"
24#endif 24#endif
25 25
26/*
27 * Store another value in a callchain_entry.
28 */
29static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip)
30{
31 unsigned int nr = entry->nr;
32
33 if (nr < PERF_MAX_STACK_DEPTH) {
34 entry->ip[nr] = ip;
35 entry->nr = nr + 1;
36 }
37}
38 26
39/* 27/*
40 * Is sp valid as the address of the next kernel stack frame after prev_sp? 28 * Is sp valid as the address of the next kernel stack frame after prev_sp?
@@ -58,8 +46,8 @@ static int valid_next_sp(unsigned long sp, unsigned long prev_sp)
58 return 0; 46 return 0;
59} 47}
60 48
61static void perf_callchain_kernel(struct pt_regs *regs, 49void
62 struct perf_callchain_entry *entry) 50perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
63{ 51{
64 unsigned long sp, next_sp; 52 unsigned long sp, next_sp;
65 unsigned long next_ip; 53 unsigned long next_ip;
@@ -69,8 +57,7 @@ static void perf_callchain_kernel(struct pt_regs *regs,
69 57
70 lr = regs->link; 58 lr = regs->link;
71 sp = regs->gpr[1]; 59 sp = regs->gpr[1];
72 callchain_store(entry, PERF_CONTEXT_KERNEL); 60 perf_callchain_store(entry, regs->nip);
73 callchain_store(entry, regs->nip);
74 61
75 if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) 62 if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD))
76 return; 63 return;
@@ -89,7 +76,7 @@ static void perf_callchain_kernel(struct pt_regs *regs,
89 next_ip = regs->nip; 76 next_ip = regs->nip;
90 lr = regs->link; 77 lr = regs->link;
91 level = 0; 78 level = 0;
92 callchain_store(entry, PERF_CONTEXT_KERNEL); 79 perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
93 80
94 } else { 81 } else {
95 if (level == 0) 82 if (level == 0)
@@ -111,7 +98,7 @@ static void perf_callchain_kernel(struct pt_regs *regs,
111 ++level; 98 ++level;
112 } 99 }
113 100
114 callchain_store(entry, next_ip); 101 perf_callchain_store(entry, next_ip);
115 if (!valid_next_sp(next_sp, sp)) 102 if (!valid_next_sp(next_sp, sp))
116 return; 103 return;
117 sp = next_sp; 104 sp = next_sp;
@@ -233,8 +220,8 @@ static int sane_signal_64_frame(unsigned long sp)
233 puc == (unsigned long) &sf->uc; 220 puc == (unsigned long) &sf->uc;
234} 221}
235 222
236static void perf_callchain_user_64(struct pt_regs *regs, 223static void perf_callchain_user_64(struct perf_callchain_entry *entry,
237 struct perf_callchain_entry *entry) 224 struct pt_regs *regs)
238{ 225{
239 unsigned long sp, next_sp; 226 unsigned long sp, next_sp;
240 unsigned long next_ip; 227 unsigned long next_ip;
@@ -246,8 +233,7 @@ static void perf_callchain_user_64(struct pt_regs *regs,
246 next_ip = regs->nip; 233 next_ip = regs->nip;
247 lr = regs->link; 234 lr = regs->link;
248 sp = regs->gpr[1]; 235 sp = regs->gpr[1];
249 callchain_store(entry, PERF_CONTEXT_USER); 236 perf_callchain_store(entry, next_ip);
250 callchain_store(entry, next_ip);
251 237
252 for (;;) { 238 for (;;) {
253 fp = (unsigned long __user *) sp; 239 fp = (unsigned long __user *) sp;
@@ -276,14 +262,14 @@ static void perf_callchain_user_64(struct pt_regs *regs,
276 read_user_stack_64(&uregs[PT_R1], &sp)) 262 read_user_stack_64(&uregs[PT_R1], &sp))
277 return; 263 return;
278 level = 0; 264 level = 0;
279 callchain_store(entry, PERF_CONTEXT_USER); 265 perf_callchain_store(entry, PERF_CONTEXT_USER);
280 callchain_store(entry, next_ip); 266 perf_callchain_store(entry, next_ip);
281 continue; 267 continue;
282 } 268 }
283 269
284 if (level == 0) 270 if (level == 0)
285 next_ip = lr; 271 next_ip = lr;
286 callchain_store(entry, next_ip); 272 perf_callchain_store(entry, next_ip);
287 ++level; 273 ++level;
288 sp = next_sp; 274 sp = next_sp;
289 } 275 }
@@ -315,8 +301,8 @@ static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
315 return __get_user_inatomic(*ret, ptr); 301 return __get_user_inatomic(*ret, ptr);
316} 302}
317 303
318static inline void perf_callchain_user_64(struct pt_regs *regs, 304static inline void perf_callchain_user_64(struct perf_callchain_entry *entry,
319 struct perf_callchain_entry *entry) 305 struct pt_regs *regs)
320{ 306{
321} 307}
322 308
@@ -435,8 +421,8 @@ static unsigned int __user *signal_frame_32_regs(unsigned int sp,
435 return mctx->mc_gregs; 421 return mctx->mc_gregs;
436} 422}
437 423
438static void perf_callchain_user_32(struct pt_regs *regs, 424static void perf_callchain_user_32(struct perf_callchain_entry *entry,
439 struct perf_callchain_entry *entry) 425 struct pt_regs *regs)
440{ 426{
441 unsigned int sp, next_sp; 427 unsigned int sp, next_sp;
442 unsigned int next_ip; 428 unsigned int next_ip;
@@ -447,8 +433,7 @@ static void perf_callchain_user_32(struct pt_regs *regs,
447 next_ip = regs->nip; 433 next_ip = regs->nip;
448 lr = regs->link; 434 lr = regs->link;
449 sp = regs->gpr[1]; 435 sp = regs->gpr[1];
450 callchain_store(entry, PERF_CONTEXT_USER); 436 perf_callchain_store(entry, next_ip);
451 callchain_store(entry, next_ip);
452 437
453 while (entry->nr < PERF_MAX_STACK_DEPTH) { 438 while (entry->nr < PERF_MAX_STACK_DEPTH) {
454 fp = (unsigned int __user *) (unsigned long) sp; 439 fp = (unsigned int __user *) (unsigned long) sp;
@@ -470,45 +455,24 @@ static void perf_callchain_user_32(struct pt_regs *regs,
470 read_user_stack_32(&uregs[PT_R1], &sp)) 455 read_user_stack_32(&uregs[PT_R1], &sp))
471 return; 456 return;
472 level = 0; 457 level = 0;
473 callchain_store(entry, PERF_CONTEXT_USER); 458 perf_callchain_store(entry, PERF_CONTEXT_USER);
474 callchain_store(entry, next_ip); 459 perf_callchain_store(entry, next_ip);
475 continue; 460 continue;
476 } 461 }
477 462
478 if (level == 0) 463 if (level == 0)
479 next_ip = lr; 464 next_ip = lr;
480 callchain_store(entry, next_ip); 465 perf_callchain_store(entry, next_ip);
481 ++level; 466 ++level;
482 sp = next_sp; 467 sp = next_sp;
483 } 468 }
484} 469}
485 470
486/* 471void
487 * Since we can't get PMU interrupts inside a PMU interrupt handler, 472perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
488 * we don't need separate irq and nmi entries here.
489 */
490static DEFINE_PER_CPU(struct perf_callchain_entry, cpu_perf_callchain);
491
492struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
493{ 473{
494 struct perf_callchain_entry *entry = &__get_cpu_var(cpu_perf_callchain); 474 if (current_is_64bit())
495 475 perf_callchain_user_64(entry, regs);
496 entry->nr = 0; 476 else
497 477 perf_callchain_user_32(entry, regs);
498 if (!user_mode(regs)) {
499 perf_callchain_kernel(regs, entry);
500 if (current->mm)
501 regs = task_pt_regs(current);
502 else
503 regs = NULL;
504 }
505
506 if (regs) {
507 if (current_is_64bit())
508 perf_callchain_user_64(regs, entry);
509 else
510 perf_callchain_user_32(regs, entry);
511 }
512
513 return entry;
514} 478}
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index d301a30445e0..9cb4924b6c07 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -402,6 +402,9 @@ static void power_pmu_read(struct perf_event *event)
402{ 402{
403 s64 val, delta, prev; 403 s64 val, delta, prev;
404 404
405 if (event->hw.state & PERF_HES_STOPPED)
406 return;
407
405 if (!event->hw.idx) 408 if (!event->hw.idx)
406 return; 409 return;
407 /* 410 /*
@@ -517,7 +520,7 @@ static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0)
517 * Disable all events to prevent PMU interrupts and to allow 520 * Disable all events to prevent PMU interrupts and to allow
518 * events to be added or removed. 521 * events to be added or removed.
519 */ 522 */
520void hw_perf_disable(void) 523static void power_pmu_disable(struct pmu *pmu)
521{ 524{
522 struct cpu_hw_events *cpuhw; 525 struct cpu_hw_events *cpuhw;
523 unsigned long flags; 526 unsigned long flags;
@@ -565,7 +568,7 @@ void hw_perf_disable(void)
565 * If we were previously disabled and events were added, then 568 * If we were previously disabled and events were added, then
566 * put the new config on the PMU. 569 * put the new config on the PMU.
567 */ 570 */
568void hw_perf_enable(void) 571static void power_pmu_enable(struct pmu *pmu)
569{ 572{
570 struct perf_event *event; 573 struct perf_event *event;
571 struct cpu_hw_events *cpuhw; 574 struct cpu_hw_events *cpuhw;
@@ -672,6 +675,8 @@ void hw_perf_enable(void)
672 } 675 }
673 local64_set(&event->hw.prev_count, val); 676 local64_set(&event->hw.prev_count, val);
674 event->hw.idx = idx; 677 event->hw.idx = idx;
678 if (event->hw.state & PERF_HES_STOPPED)
679 val = 0;
675 write_pmc(idx, val); 680 write_pmc(idx, val);
676 perf_event_update_userpage(event); 681 perf_event_update_userpage(event);
677 } 682 }
@@ -727,7 +732,7 @@ static int collect_events(struct perf_event *group, int max_count,
727 * re-enable the PMU in order to get hw_perf_enable to do the 732 * re-enable the PMU in order to get hw_perf_enable to do the
728 * actual work of reconfiguring the PMU. 733 * actual work of reconfiguring the PMU.
729 */ 734 */
730static int power_pmu_enable(struct perf_event *event) 735static int power_pmu_add(struct perf_event *event, int ef_flags)
731{ 736{
732 struct cpu_hw_events *cpuhw; 737 struct cpu_hw_events *cpuhw;
733 unsigned long flags; 738 unsigned long flags;
@@ -735,7 +740,7 @@ static int power_pmu_enable(struct perf_event *event)
735 int ret = -EAGAIN; 740 int ret = -EAGAIN;
736 741
737 local_irq_save(flags); 742 local_irq_save(flags);
738 perf_disable(); 743 perf_pmu_disable(event->pmu);
739 744
740 /* 745 /*
741 * Add the event to the list (if there is room) 746 * Add the event to the list (if there is room)
@@ -749,6 +754,9 @@ static int power_pmu_enable(struct perf_event *event)
749 cpuhw->events[n0] = event->hw.config; 754 cpuhw->events[n0] = event->hw.config;
750 cpuhw->flags[n0] = event->hw.event_base; 755 cpuhw->flags[n0] = event->hw.event_base;
751 756
757 if (!(ef_flags & PERF_EF_START))
758 event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
759
752 /* 760 /*
753 * If group events scheduling transaction was started, 761 * If group events scheduling transaction was started,
754 * skip the schedulability test here, it will be peformed 762 * skip the schedulability test here, it will be peformed
@@ -769,7 +777,7 @@ nocheck:
769 777
770 ret = 0; 778 ret = 0;
771 out: 779 out:
772 perf_enable(); 780 perf_pmu_enable(event->pmu);
773 local_irq_restore(flags); 781 local_irq_restore(flags);
774 return ret; 782 return ret;
775} 783}
@@ -777,14 +785,14 @@ nocheck:
777/* 785/*
778 * Remove a event from the PMU. 786 * Remove a event from the PMU.
779 */ 787 */
780static void power_pmu_disable(struct perf_event *event) 788static void power_pmu_del(struct perf_event *event, int ef_flags)
781{ 789{
782 struct cpu_hw_events *cpuhw; 790 struct cpu_hw_events *cpuhw;
783 long i; 791 long i;
784 unsigned long flags; 792 unsigned long flags;
785 793
786 local_irq_save(flags); 794 local_irq_save(flags);
787 perf_disable(); 795 perf_pmu_disable(event->pmu);
788 796
789 power_pmu_read(event); 797 power_pmu_read(event);
790 798
@@ -821,34 +829,60 @@ static void power_pmu_disable(struct perf_event *event)
821 cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); 829 cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
822 } 830 }
823 831
824 perf_enable(); 832 perf_pmu_enable(event->pmu);
825 local_irq_restore(flags); 833 local_irq_restore(flags);
826} 834}
827 835
828/* 836/*
829 * Re-enable interrupts on a event after they were throttled 837 * POWER-PMU does not support disabling individual counters, hence
830 * because they were coming too fast. 838 * program their cycle counter to their max value and ignore the interrupts.
831 */ 839 */
832static void power_pmu_unthrottle(struct perf_event *event) 840
841static void power_pmu_start(struct perf_event *event, int ef_flags)
842{
843 unsigned long flags;
844 s64 left;
845
846 if (!event->hw.idx || !event->hw.sample_period)
847 return;
848
849 if (!(event->hw.state & PERF_HES_STOPPED))
850 return;
851
852 if (ef_flags & PERF_EF_RELOAD)
853 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
854
855 local_irq_save(flags);
856 perf_pmu_disable(event->pmu);
857
858 event->hw.state = 0;
859 left = local64_read(&event->hw.period_left);
860 write_pmc(event->hw.idx, left);
861
862 perf_event_update_userpage(event);
863 perf_pmu_enable(event->pmu);
864 local_irq_restore(flags);
865}
866
867static void power_pmu_stop(struct perf_event *event, int ef_flags)
833{ 868{
834 s64 val, left;
835 unsigned long flags; 869 unsigned long flags;
836 870
837 if (!event->hw.idx || !event->hw.sample_period) 871 if (!event->hw.idx || !event->hw.sample_period)
838 return; 872 return;
873
874 if (event->hw.state & PERF_HES_STOPPED)
875 return;
876
839 local_irq_save(flags); 877 local_irq_save(flags);
840 perf_disable(); 878 perf_pmu_disable(event->pmu);
879
841 power_pmu_read(event); 880 power_pmu_read(event);
842 left = event->hw.sample_period; 881 event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
843 event->hw.last_period = left; 882 write_pmc(event->hw.idx, 0);
844 val = 0; 883
845 if (left < 0x80000000L)
846 val = 0x80000000L - left;
847 write_pmc(event->hw.idx, val);
848 local64_set(&event->hw.prev_count, val);
849 local64_set(&event->hw.period_left, left);
850 perf_event_update_userpage(event); 884 perf_event_update_userpage(event);
851 perf_enable(); 885 perf_pmu_enable(event->pmu);
852 local_irq_restore(flags); 886 local_irq_restore(flags);
853} 887}
854 888
@@ -857,10 +891,11 @@ static void power_pmu_unthrottle(struct perf_event *event)
857 * Set the flag to make pmu::enable() not perform the 891 * Set the flag to make pmu::enable() not perform the
858 * schedulability test, it will be performed at commit time 892 * schedulability test, it will be performed at commit time
859 */ 893 */
860void power_pmu_start_txn(const struct pmu *pmu) 894void power_pmu_start_txn(struct pmu *pmu)
861{ 895{
862 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 896 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
863 897
898 perf_pmu_disable(pmu);
864 cpuhw->group_flag |= PERF_EVENT_TXN; 899 cpuhw->group_flag |= PERF_EVENT_TXN;
865 cpuhw->n_txn_start = cpuhw->n_events; 900 cpuhw->n_txn_start = cpuhw->n_events;
866} 901}
@@ -870,11 +905,12 @@ void power_pmu_start_txn(const struct pmu *pmu)
870 * Clear the flag and pmu::enable() will perform the 905 * Clear the flag and pmu::enable() will perform the
871 * schedulability test. 906 * schedulability test.
872 */ 907 */
873void power_pmu_cancel_txn(const struct pmu *pmu) 908void power_pmu_cancel_txn(struct pmu *pmu)
874{ 909{
875 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 910 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
876 911
877 cpuhw->group_flag &= ~PERF_EVENT_TXN; 912 cpuhw->group_flag &= ~PERF_EVENT_TXN;
913 perf_pmu_enable(pmu);
878} 914}
879 915
880/* 916/*
@@ -882,7 +918,7 @@ void power_pmu_cancel_txn(const struct pmu *pmu)
882 * Perform the group schedulability test as a whole 918 * Perform the group schedulability test as a whole
883 * Return 0 if success 919 * Return 0 if success
884 */ 920 */
885int power_pmu_commit_txn(const struct pmu *pmu) 921int power_pmu_commit_txn(struct pmu *pmu)
886{ 922{
887 struct cpu_hw_events *cpuhw; 923 struct cpu_hw_events *cpuhw;
888 long i, n; 924 long i, n;
@@ -901,19 +937,10 @@ int power_pmu_commit_txn(const struct pmu *pmu)
901 cpuhw->event[i]->hw.config = cpuhw->events[i]; 937 cpuhw->event[i]->hw.config = cpuhw->events[i];
902 938
903 cpuhw->group_flag &= ~PERF_EVENT_TXN; 939 cpuhw->group_flag &= ~PERF_EVENT_TXN;
940 perf_pmu_enable(pmu);
904 return 0; 941 return 0;
905} 942}
906 943
907struct pmu power_pmu = {
908 .enable = power_pmu_enable,
909 .disable = power_pmu_disable,
910 .read = power_pmu_read,
911 .unthrottle = power_pmu_unthrottle,
912 .start_txn = power_pmu_start_txn,
913 .cancel_txn = power_pmu_cancel_txn,
914 .commit_txn = power_pmu_commit_txn,
915};
916
917/* 944/*
918 * Return 1 if we might be able to put event on a limited PMC, 945 * Return 1 if we might be able to put event on a limited PMC,
919 * or 0 if not. 946 * or 0 if not.
@@ -1014,7 +1041,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp)
1014 return 0; 1041 return 0;
1015} 1042}
1016 1043
1017const struct pmu *hw_perf_event_init(struct perf_event *event) 1044static int power_pmu_event_init(struct perf_event *event)
1018{ 1045{
1019 u64 ev; 1046 u64 ev;
1020 unsigned long flags; 1047 unsigned long flags;
@@ -1026,25 +1053,27 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
1026 struct cpu_hw_events *cpuhw; 1053 struct cpu_hw_events *cpuhw;
1027 1054
1028 if (!ppmu) 1055 if (!ppmu)
1029 return ERR_PTR(-ENXIO); 1056 return -ENOENT;
1057
1030 switch (event->attr.type) { 1058 switch (event->attr.type) {
1031 case PERF_TYPE_HARDWARE: 1059 case PERF_TYPE_HARDWARE:
1032 ev = event->attr.config; 1060 ev = event->attr.config;
1033 if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) 1061 if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
1034 return ERR_PTR(-EOPNOTSUPP); 1062 return -EOPNOTSUPP;
1035 ev = ppmu->generic_events[ev]; 1063 ev = ppmu->generic_events[ev];
1036 break; 1064 break;
1037 case PERF_TYPE_HW_CACHE: 1065 case PERF_TYPE_HW_CACHE:
1038 err = hw_perf_cache_event(event->attr.config, &ev); 1066 err = hw_perf_cache_event(event->attr.config, &ev);
1039 if (err) 1067 if (err)
1040 return ERR_PTR(err); 1068 return err;
1041 break; 1069 break;
1042 case PERF_TYPE_RAW: 1070 case PERF_TYPE_RAW:
1043 ev = event->attr.config; 1071 ev = event->attr.config;
1044 break; 1072 break;
1045 default: 1073 default:
1046 return ERR_PTR(-EINVAL); 1074 return -ENOENT;
1047 } 1075 }
1076
1048 event->hw.config_base = ev; 1077 event->hw.config_base = ev;
1049 event->hw.idx = 0; 1078 event->hw.idx = 0;
1050 1079
@@ -1081,7 +1110,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
1081 */ 1110 */
1082 ev = normal_pmc_alternative(ev, flags); 1111 ev = normal_pmc_alternative(ev, flags);
1083 if (!ev) 1112 if (!ev)
1084 return ERR_PTR(-EINVAL); 1113 return -EINVAL;
1085 } 1114 }
1086 } 1115 }
1087 1116
@@ -1095,19 +1124,19 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
1095 n = collect_events(event->group_leader, ppmu->n_counter - 1, 1124 n = collect_events(event->group_leader, ppmu->n_counter - 1,
1096 ctrs, events, cflags); 1125 ctrs, events, cflags);
1097 if (n < 0) 1126 if (n < 0)
1098 return ERR_PTR(-EINVAL); 1127 return -EINVAL;
1099 } 1128 }
1100 events[n] = ev; 1129 events[n] = ev;
1101 ctrs[n] = event; 1130 ctrs[n] = event;
1102 cflags[n] = flags; 1131 cflags[n] = flags;
1103 if (check_excludes(ctrs, cflags, n, 1)) 1132 if (check_excludes(ctrs, cflags, n, 1))
1104 return ERR_PTR(-EINVAL); 1133 return -EINVAL;
1105 1134
1106 cpuhw = &get_cpu_var(cpu_hw_events); 1135 cpuhw = &get_cpu_var(cpu_hw_events);
1107 err = power_check_constraints(cpuhw, events, cflags, n + 1); 1136 err = power_check_constraints(cpuhw, events, cflags, n + 1);
1108 put_cpu_var(cpu_hw_events); 1137 put_cpu_var(cpu_hw_events);
1109 if (err) 1138 if (err)
1110 return ERR_PTR(-EINVAL); 1139 return -EINVAL;
1111 1140
1112 event->hw.config = events[n]; 1141 event->hw.config = events[n];
1113 event->hw.event_base = cflags[n]; 1142 event->hw.event_base = cflags[n];
@@ -1132,11 +1161,23 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
1132 } 1161 }
1133 event->destroy = hw_perf_event_destroy; 1162 event->destroy = hw_perf_event_destroy;
1134 1163
1135 if (err) 1164 return err;
1136 return ERR_PTR(err);
1137 return &power_pmu;
1138} 1165}
1139 1166
1167struct pmu power_pmu = {
1168 .pmu_enable = power_pmu_enable,
1169 .pmu_disable = power_pmu_disable,
1170 .event_init = power_pmu_event_init,
1171 .add = power_pmu_add,
1172 .del = power_pmu_del,
1173 .start = power_pmu_start,
1174 .stop = power_pmu_stop,
1175 .read = power_pmu_read,
1176 .start_txn = power_pmu_start_txn,
1177 .cancel_txn = power_pmu_cancel_txn,
1178 .commit_txn = power_pmu_commit_txn,
1179};
1180
1140/* 1181/*
1141 * A counter has overflowed; update its count and record 1182 * A counter has overflowed; update its count and record
1142 * things if requested. Note that interrupts are hard-disabled 1183 * things if requested. Note that interrupts are hard-disabled
@@ -1149,6 +1190,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
1149 s64 prev, delta, left; 1190 s64 prev, delta, left;
1150 int record = 0; 1191 int record = 0;
1151 1192
1193 if (event->hw.state & PERF_HES_STOPPED) {
1194 write_pmc(event->hw.idx, 0);
1195 return;
1196 }
1197
1152 /* we don't have to worry about interrupts here */ 1198 /* we don't have to worry about interrupts here */
1153 prev = local64_read(&event->hw.prev_count); 1199 prev = local64_read(&event->hw.prev_count);
1154 delta = (val - prev) & 0xfffffffful; 1200 delta = (val - prev) & 0xfffffffful;
@@ -1171,6 +1217,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
1171 val = 0x80000000LL - left; 1217 val = 0x80000000LL - left;
1172 } 1218 }
1173 1219
1220 write_pmc(event->hw.idx, val);
1221 local64_set(&event->hw.prev_count, val);
1222 local64_set(&event->hw.period_left, left);
1223 perf_event_update_userpage(event);
1224
1174 /* 1225 /*
1175 * Finally record data if requested. 1226 * Finally record data if requested.
1176 */ 1227 */
@@ -1183,23 +1234,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
1183 if (event->attr.sample_type & PERF_SAMPLE_ADDR) 1234 if (event->attr.sample_type & PERF_SAMPLE_ADDR)
1184 perf_get_data_addr(regs, &data.addr); 1235 perf_get_data_addr(regs, &data.addr);
1185 1236
1186 if (perf_event_overflow(event, nmi, &data, regs)) { 1237 if (perf_event_overflow(event, nmi, &data, regs))
1187 /* 1238 power_pmu_stop(event, 0);
1188 * Interrupts are coming too fast - throttle them
1189 * by setting the event to 0, so it will be
1190 * at least 2^30 cycles until the next interrupt
1191 * (assuming each event counts at most 2 counts
1192 * per cycle).
1193 */
1194 val = 0;
1195 left = ~0ULL >> 1;
1196 }
1197 } 1239 }
1198
1199 write_pmc(event->hw.idx, val);
1200 local64_set(&event->hw.prev_count, val);
1201 local64_set(&event->hw.period_left, left);
1202 perf_event_update_userpage(event);
1203} 1240}
1204 1241
1205/* 1242/*
@@ -1342,6 +1379,7 @@ int register_power_pmu(struct power_pmu *pmu)
1342 freeze_events_kernel = MMCR0_FCHV; 1379 freeze_events_kernel = MMCR0_FCHV;
1343#endif /* CONFIG_PPC64 */ 1380#endif /* CONFIG_PPC64 */
1344 1381
1382 perf_pmu_register(&power_pmu);
1345 perf_cpu_notifier(power_pmu_notifier); 1383 perf_cpu_notifier(power_pmu_notifier);
1346 1384
1347 return 0; 1385 return 0;
diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c
index 1ba45471ae43..7ecca59ddf77 100644
--- a/arch/powerpc/kernel/perf_event_fsl_emb.c
+++ b/arch/powerpc/kernel/perf_event_fsl_emb.c
@@ -156,6 +156,9 @@ static void fsl_emb_pmu_read(struct perf_event *event)
156{ 156{
157 s64 val, delta, prev; 157 s64 val, delta, prev;
158 158
159 if (event->hw.state & PERF_HES_STOPPED)
160 return;
161
159 /* 162 /*
160 * Performance monitor interrupts come even when interrupts 163 * Performance monitor interrupts come even when interrupts
161 * are soft-disabled, as long as interrupts are hard-enabled. 164 * are soft-disabled, as long as interrupts are hard-enabled.
@@ -177,7 +180,7 @@ static void fsl_emb_pmu_read(struct perf_event *event)
177 * Disable all events to prevent PMU interrupts and to allow 180 * Disable all events to prevent PMU interrupts and to allow
178 * events to be added or removed. 181 * events to be added or removed.
179 */ 182 */
180void hw_perf_disable(void) 183static void fsl_emb_pmu_disable(struct pmu *pmu)
181{ 184{
182 struct cpu_hw_events *cpuhw; 185 struct cpu_hw_events *cpuhw;
183 unsigned long flags; 186 unsigned long flags;
@@ -216,7 +219,7 @@ void hw_perf_disable(void)
216 * If we were previously disabled and events were added, then 219 * If we were previously disabled and events were added, then
217 * put the new config on the PMU. 220 * put the new config on the PMU.
218 */ 221 */
219void hw_perf_enable(void) 222static void fsl_emb_pmu_enable(struct pmu *pmu)
220{ 223{
221 struct cpu_hw_events *cpuhw; 224 struct cpu_hw_events *cpuhw;
222 unsigned long flags; 225 unsigned long flags;
@@ -262,8 +265,8 @@ static int collect_events(struct perf_event *group, int max_count,
262 return n; 265 return n;
263} 266}
264 267
265/* perf must be disabled, context locked on entry */ 268/* context locked on entry */
266static int fsl_emb_pmu_enable(struct perf_event *event) 269static int fsl_emb_pmu_add(struct perf_event *event, int flags)
267{ 270{
268 struct cpu_hw_events *cpuhw; 271 struct cpu_hw_events *cpuhw;
269 int ret = -EAGAIN; 272 int ret = -EAGAIN;
@@ -271,6 +274,7 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
271 u64 val; 274 u64 val;
272 int i; 275 int i;
273 276
277 perf_pmu_disable(event->pmu);
274 cpuhw = &get_cpu_var(cpu_hw_events); 278 cpuhw = &get_cpu_var(cpu_hw_events);
275 279
276 if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) 280 if (event->hw.config & FSL_EMB_EVENT_RESTRICTED)
@@ -301,6 +305,12 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
301 val = 0x80000000L - left; 305 val = 0x80000000L - left;
302 } 306 }
303 local64_set(&event->hw.prev_count, val); 307 local64_set(&event->hw.prev_count, val);
308
309 if (!(flags & PERF_EF_START)) {
310 event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
311 val = 0;
312 }
313
304 write_pmc(i, val); 314 write_pmc(i, val);
305 perf_event_update_userpage(event); 315 perf_event_update_userpage(event);
306 316
@@ -310,15 +320,17 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
310 ret = 0; 320 ret = 0;
311 out: 321 out:
312 put_cpu_var(cpu_hw_events); 322 put_cpu_var(cpu_hw_events);
323 perf_pmu_enable(event->pmu);
313 return ret; 324 return ret;
314} 325}
315 326
316/* perf must be disabled, context locked on entry */ 327/* context locked on entry */
317static void fsl_emb_pmu_disable(struct perf_event *event) 328static void fsl_emb_pmu_del(struct perf_event *event, int flags)
318{ 329{
319 struct cpu_hw_events *cpuhw; 330 struct cpu_hw_events *cpuhw;
320 int i = event->hw.idx; 331 int i = event->hw.idx;
321 332
333 perf_pmu_disable(event->pmu);
322 if (i < 0) 334 if (i < 0)
323 goto out; 335 goto out;
324 336
@@ -346,44 +358,57 @@ static void fsl_emb_pmu_disable(struct perf_event *event)
346 cpuhw->n_events--; 358 cpuhw->n_events--;
347 359
348 out: 360 out:
361 perf_pmu_enable(event->pmu);
349 put_cpu_var(cpu_hw_events); 362 put_cpu_var(cpu_hw_events);
350} 363}
351 364
352/* 365static void fsl_emb_pmu_start(struct perf_event *event, int ef_flags)
353 * Re-enable interrupts on a event after they were throttled
354 * because they were coming too fast.
355 *
356 * Context is locked on entry, but perf is not disabled.
357 */
358static void fsl_emb_pmu_unthrottle(struct perf_event *event)
359{ 366{
360 s64 val, left;
361 unsigned long flags; 367 unsigned long flags;
368 s64 left;
362 369
363 if (event->hw.idx < 0 || !event->hw.sample_period) 370 if (event->hw.idx < 0 || !event->hw.sample_period)
364 return; 371 return;
372
373 if (!(event->hw.state & PERF_HES_STOPPED))
374 return;
375
376 if (ef_flags & PERF_EF_RELOAD)
377 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
378
365 local_irq_save(flags); 379 local_irq_save(flags);
366 perf_disable(); 380 perf_pmu_disable(event->pmu);
367 fsl_emb_pmu_read(event); 381
368 left = event->hw.sample_period; 382 event->hw.state = 0;
369 event->hw.last_period = left; 383 left = local64_read(&event->hw.period_left);
370 val = 0; 384 write_pmc(event->hw.idx, left);
371 if (left < 0x80000000L) 385
372 val = 0x80000000L - left;
373 write_pmc(event->hw.idx, val);
374 local64_set(&event->hw.prev_count, val);
375 local64_set(&event->hw.period_left, left);
376 perf_event_update_userpage(event); 386 perf_event_update_userpage(event);
377 perf_enable(); 387 perf_pmu_enable(event->pmu);
378 local_irq_restore(flags); 388 local_irq_restore(flags);
379} 389}
380 390
381static struct pmu fsl_emb_pmu = { 391static void fsl_emb_pmu_stop(struct perf_event *event, int ef_flags)
382 .enable = fsl_emb_pmu_enable, 392{
383 .disable = fsl_emb_pmu_disable, 393 unsigned long flags;
384 .read = fsl_emb_pmu_read, 394
385 .unthrottle = fsl_emb_pmu_unthrottle, 395 if (event->hw.idx < 0 || !event->hw.sample_period)
386}; 396 return;
397
398 if (event->hw.state & PERF_HES_STOPPED)
399 return;
400
401 local_irq_save(flags);
402 perf_pmu_disable(event->pmu);
403
404 fsl_emb_pmu_read(event);
405 event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
406 write_pmc(event->hw.idx, 0);
407
408 perf_event_update_userpage(event);
409 perf_pmu_enable(event->pmu);
410 local_irq_restore(flags);
411}
387 412
388/* 413/*
389 * Release the PMU if this is the last perf_event. 414 * Release the PMU if this is the last perf_event.
@@ -428,7 +453,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp)
428 return 0; 453 return 0;
429} 454}
430 455
431const struct pmu *hw_perf_event_init(struct perf_event *event) 456static int fsl_emb_pmu_event_init(struct perf_event *event)
432{ 457{
433 u64 ev; 458 u64 ev;
434 struct perf_event *events[MAX_HWEVENTS]; 459 struct perf_event *events[MAX_HWEVENTS];
@@ -441,14 +466,14 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
441 case PERF_TYPE_HARDWARE: 466 case PERF_TYPE_HARDWARE:
442 ev = event->attr.config; 467 ev = event->attr.config;
443 if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) 468 if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
444 return ERR_PTR(-EOPNOTSUPP); 469 return -EOPNOTSUPP;
445 ev = ppmu->generic_events[ev]; 470 ev = ppmu->generic_events[ev];
446 break; 471 break;
447 472
448 case PERF_TYPE_HW_CACHE: 473 case PERF_TYPE_HW_CACHE:
449 err = hw_perf_cache_event(event->attr.config, &ev); 474 err = hw_perf_cache_event(event->attr.config, &ev);
450 if (err) 475 if (err)
451 return ERR_PTR(err); 476 return err;
452 break; 477 break;
453 478
454 case PERF_TYPE_RAW: 479 case PERF_TYPE_RAW:
@@ -456,12 +481,12 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
456 break; 481 break;
457 482
458 default: 483 default:
459 return ERR_PTR(-EINVAL); 484 return -ENOENT;
460 } 485 }
461 486
462 event->hw.config = ppmu->xlate_event(ev); 487 event->hw.config = ppmu->xlate_event(ev);
463 if (!(event->hw.config & FSL_EMB_EVENT_VALID)) 488 if (!(event->hw.config & FSL_EMB_EVENT_VALID))
464 return ERR_PTR(-EINVAL); 489 return -EINVAL;
465 490
466 /* 491 /*
467 * If this is in a group, check if it can go on with all the 492 * If this is in a group, check if it can go on with all the
@@ -473,7 +498,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
473 n = collect_events(event->group_leader, 498 n = collect_events(event->group_leader,
474 ppmu->n_counter - 1, events); 499 ppmu->n_counter - 1, events);
475 if (n < 0) 500 if (n < 0)
476 return ERR_PTR(-EINVAL); 501 return -EINVAL;
477 } 502 }
478 503
479 if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) { 504 if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) {
@@ -484,7 +509,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
484 } 509 }
485 510
486 if (num_restricted >= ppmu->n_restricted) 511 if (num_restricted >= ppmu->n_restricted)
487 return ERR_PTR(-EINVAL); 512 return -EINVAL;
488 } 513 }
489 514
490 event->hw.idx = -1; 515 event->hw.idx = -1;
@@ -497,7 +522,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
497 if (event->attr.exclude_kernel) 522 if (event->attr.exclude_kernel)
498 event->hw.config_base |= PMLCA_FCS; 523 event->hw.config_base |= PMLCA_FCS;
499 if (event->attr.exclude_idle) 524 if (event->attr.exclude_idle)
500 return ERR_PTR(-ENOTSUPP); 525 return -ENOTSUPP;
501 526
502 event->hw.last_period = event->hw.sample_period; 527 event->hw.last_period = event->hw.sample_period;
503 local64_set(&event->hw.period_left, event->hw.last_period); 528 local64_set(&event->hw.period_left, event->hw.last_period);
@@ -523,11 +548,20 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
523 } 548 }
524 event->destroy = hw_perf_event_destroy; 549 event->destroy = hw_perf_event_destroy;
525 550
526 if (err) 551 return err;
527 return ERR_PTR(err);
528 return &fsl_emb_pmu;
529} 552}
530 553
554static struct pmu fsl_emb_pmu = {
555 .pmu_enable = fsl_emb_pmu_enable,
556 .pmu_disable = fsl_emb_pmu_disable,
557 .event_init = fsl_emb_pmu_event_init,
558 .add = fsl_emb_pmu_add,
559 .del = fsl_emb_pmu_del,
560 .start = fsl_emb_pmu_start,
561 .stop = fsl_emb_pmu_stop,
562 .read = fsl_emb_pmu_read,
563};
564
531/* 565/*
532 * A counter has overflowed; update its count and record 566 * A counter has overflowed; update its count and record
533 * things if requested. Note that interrupts are hard-disabled 567 * things if requested. Note that interrupts are hard-disabled
@@ -540,6 +574,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
540 s64 prev, delta, left; 574 s64 prev, delta, left;
541 int record = 0; 575 int record = 0;
542 576
577 if (event->hw.state & PERF_HES_STOPPED) {
578 write_pmc(event->hw.idx, 0);
579 return;
580 }
581
543 /* we don't have to worry about interrupts here */ 582 /* we don't have to worry about interrupts here */
544 prev = local64_read(&event->hw.prev_count); 583 prev = local64_read(&event->hw.prev_count);
545 delta = (val - prev) & 0xfffffffful; 584 delta = (val - prev) & 0xfffffffful;
@@ -562,6 +601,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
562 val = 0x80000000LL - left; 601 val = 0x80000000LL - left;
563 } 602 }
564 603
604 write_pmc(event->hw.idx, val);
605 local64_set(&event->hw.prev_count, val);
606 local64_set(&event->hw.period_left, left);
607 perf_event_update_userpage(event);
608
565 /* 609 /*
566 * Finally record data if requested. 610 * Finally record data if requested.
567 */ 611 */
@@ -571,23 +615,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
571 perf_sample_data_init(&data, 0); 615 perf_sample_data_init(&data, 0);
572 data.period = event->hw.last_period; 616 data.period = event->hw.last_period;
573 617
574 if (perf_event_overflow(event, nmi, &data, regs)) { 618 if (perf_event_overflow(event, nmi, &data, regs))
575 /* 619 fsl_emb_pmu_stop(event, 0);
576 * Interrupts are coming too fast - throttle them
577 * by setting the event to 0, so it will be
578 * at least 2^30 cycles until the next interrupt
579 * (assuming each event counts at most 2 counts
580 * per cycle).
581 */
582 val = 0;
583 left = ~0ULL >> 1;
584 }
585 } 620 }
586
587 write_pmc(event->hw.idx, val);
588 local64_set(&event->hw.prev_count, val);
589 local64_set(&event->hw.period_left, left);
590 perf_event_update_userpage(event);
591} 621}
592 622
593static void perf_event_interrupt(struct pt_regs *regs) 623static void perf_event_interrupt(struct pt_regs *regs)
@@ -651,5 +681,7 @@ int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu)
651 pr_info("%s performance monitor hardware support registered\n", 681 pr_info("%s performance monitor hardware support registered\n",
652 pmu->name); 682 pmu->name);
653 683
684 perf_pmu_register(&fsl_emb_pmu);
685
654 return 0; 686 return 0;
655} 687}
diff --git a/arch/sh/kernel/perf_callchain.c b/arch/sh/kernel/perf_callchain.c
index a9dd3abde28e..d5ca1ef50fa9 100644
--- a/arch/sh/kernel/perf_callchain.c
+++ b/arch/sh/kernel/perf_callchain.c
@@ -14,11 +14,6 @@
14#include <asm/unwinder.h> 14#include <asm/unwinder.h>
15#include <asm/ptrace.h> 15#include <asm/ptrace.h>
16 16
17static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip)
18{
19 if (entry->nr < PERF_MAX_STACK_DEPTH)
20 entry->ip[entry->nr++] = ip;
21}
22 17
23static void callchain_warning(void *data, char *msg) 18static void callchain_warning(void *data, char *msg)
24{ 19{
@@ -39,7 +34,7 @@ static void callchain_address(void *data, unsigned long addr, int reliable)
39 struct perf_callchain_entry *entry = data; 34 struct perf_callchain_entry *entry = data;
40 35
41 if (reliable) 36 if (reliable)
42 callchain_store(entry, addr); 37 perf_callchain_store(entry, addr);
43} 38}
44 39
45static const struct stacktrace_ops callchain_ops = { 40static const struct stacktrace_ops callchain_ops = {
@@ -49,47 +44,10 @@ static const struct stacktrace_ops callchain_ops = {
49 .address = callchain_address, 44 .address = callchain_address,
50}; 45};
51 46
52static void 47void
53perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) 48perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
54{ 49{
55 callchain_store(entry, PERF_CONTEXT_KERNEL); 50 perf_callchain_store(entry, regs->pc);
56 callchain_store(entry, regs->pc);
57 51
58 unwind_stack(NULL, regs, NULL, &callchain_ops, entry); 52 unwind_stack(NULL, regs, NULL, &callchain_ops, entry);
59} 53}
60
61static void
62perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
63{
64 int is_user;
65
66 if (!regs)
67 return;
68
69 is_user = user_mode(regs);
70
71 if (is_user && current->state != TASK_RUNNING)
72 return;
73
74 /*
75 * Only the kernel side is implemented for now.
76 */
77 if (!is_user)
78 perf_callchain_kernel(regs, entry);
79}
80
81/*
82 * No need for separate IRQ and NMI entries.
83 */
84static DEFINE_PER_CPU(struct perf_callchain_entry, callchain);
85
86struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
87{
88 struct perf_callchain_entry *entry = &__get_cpu_var(callchain);
89
90 entry->nr = 0;
91
92 perf_do_callchain(regs, entry);
93
94 return entry;
95}
diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c
index 7a3dc3567258..036f7a9296fa 100644
--- a/arch/sh/kernel/perf_event.c
+++ b/arch/sh/kernel/perf_event.c
@@ -206,50 +206,80 @@ again:
206 local64_add(delta, &event->count); 206 local64_add(delta, &event->count);
207} 207}
208 208
209static void sh_pmu_disable(struct perf_event *event) 209static void sh_pmu_stop(struct perf_event *event, int flags)
210{ 210{
211 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 211 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
212 struct hw_perf_event *hwc = &event->hw; 212 struct hw_perf_event *hwc = &event->hw;
213 int idx = hwc->idx; 213 int idx = hwc->idx;
214 214
215 clear_bit(idx, cpuc->active_mask); 215 if (!(event->hw.state & PERF_HES_STOPPED)) {
216 sh_pmu->disable(hwc, idx); 216 sh_pmu->disable(hwc, idx);
217 cpuc->events[idx] = NULL;
218 event->hw.state |= PERF_HES_STOPPED;
219 }
220
221 if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) {
222 sh_perf_event_update(event, &event->hw, idx);
223 event->hw.state |= PERF_HES_UPTODATE;
224 }
225}
217 226
218 barrier(); 227static void sh_pmu_start(struct perf_event *event, int flags)
228{
229 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
230 struct hw_perf_event *hwc = &event->hw;
231 int idx = hwc->idx;
219 232
220 sh_perf_event_update(event, &event->hw, idx); 233 if (WARN_ON_ONCE(idx == -1))
234 return;
235
236 if (flags & PERF_EF_RELOAD)
237 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
238
239 cpuc->events[idx] = event;
240 event->hw.state = 0;
241 sh_pmu->enable(hwc, idx);
242}
243
244static void sh_pmu_del(struct perf_event *event, int flags)
245{
246 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
221 247
222 cpuc->events[idx] = NULL; 248 sh_pmu_stop(event, PERF_EF_UPDATE);
223 clear_bit(idx, cpuc->used_mask); 249 __clear_bit(event->hw.idx, cpuc->used_mask);
224 250
225 perf_event_update_userpage(event); 251 perf_event_update_userpage(event);
226} 252}
227 253
228static int sh_pmu_enable(struct perf_event *event) 254static int sh_pmu_add(struct perf_event *event, int flags)
229{ 255{
230 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 256 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
231 struct hw_perf_event *hwc = &event->hw; 257 struct hw_perf_event *hwc = &event->hw;
232 int idx = hwc->idx; 258 int idx = hwc->idx;
259 int ret = -EAGAIN;
260
261 perf_pmu_disable(event->pmu);
233 262
234 if (test_and_set_bit(idx, cpuc->used_mask)) { 263 if (__test_and_set_bit(idx, cpuc->used_mask)) {
235 idx = find_first_zero_bit(cpuc->used_mask, sh_pmu->num_events); 264 idx = find_first_zero_bit(cpuc->used_mask, sh_pmu->num_events);
236 if (idx == sh_pmu->num_events) 265 if (idx == sh_pmu->num_events)
237 return -EAGAIN; 266 goto out;
238 267
239 set_bit(idx, cpuc->used_mask); 268 __set_bit(idx, cpuc->used_mask);
240 hwc->idx = idx; 269 hwc->idx = idx;
241 } 270 }
242 271
243 sh_pmu->disable(hwc, idx); 272 sh_pmu->disable(hwc, idx);
244 273
245 cpuc->events[idx] = event; 274 event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
246 set_bit(idx, cpuc->active_mask); 275 if (flags & PERF_EF_START)
247 276 sh_pmu_start(event, PERF_EF_RELOAD);
248 sh_pmu->enable(hwc, idx);
249 277
250 perf_event_update_userpage(event); 278 perf_event_update_userpage(event);
251 279 ret = 0;
252 return 0; 280out:
281 perf_pmu_enable(event->pmu);
282 return ret;
253} 283}
254 284
255static void sh_pmu_read(struct perf_event *event) 285static void sh_pmu_read(struct perf_event *event)
@@ -257,24 +287,56 @@ static void sh_pmu_read(struct perf_event *event)
257 sh_perf_event_update(event, &event->hw, event->hw.idx); 287 sh_perf_event_update(event, &event->hw, event->hw.idx);
258} 288}
259 289
260static const struct pmu pmu = { 290static int sh_pmu_event_init(struct perf_event *event)
261 .enable = sh_pmu_enable,
262 .disable = sh_pmu_disable,
263 .read = sh_pmu_read,
264};
265
266const struct pmu *hw_perf_event_init(struct perf_event *event)
267{ 291{
268 int err = __hw_perf_event_init(event); 292 int err;
293
294 switch (event->attr.type) {
295 case PERF_TYPE_RAW:
296 case PERF_TYPE_HW_CACHE:
297 case PERF_TYPE_HARDWARE:
298 err = __hw_perf_event_init(event);
299 break;
300
301 default:
302 return -ENOENT;
303 }
304
269 if (unlikely(err)) { 305 if (unlikely(err)) {
270 if (event->destroy) 306 if (event->destroy)
271 event->destroy(event); 307 event->destroy(event);
272 return ERR_PTR(err);
273 } 308 }
274 309
275 return &pmu; 310 return err;
311}
312
313static void sh_pmu_enable(struct pmu *pmu)
314{
315 if (!sh_pmu_initialized())
316 return;
317
318 sh_pmu->enable_all();
319}
320
321static void sh_pmu_disable(struct pmu *pmu)
322{
323 if (!sh_pmu_initialized())
324 return;
325
326 sh_pmu->disable_all();
276} 327}
277 328
329static struct pmu pmu = {
330 .pmu_enable = sh_pmu_enable,
331 .pmu_disable = sh_pmu_disable,
332 .event_init = sh_pmu_event_init,
333 .add = sh_pmu_add,
334 .del = sh_pmu_del,
335 .start = sh_pmu_start,
336 .stop = sh_pmu_stop,
337 .read = sh_pmu_read,
338};
339
278static void sh_pmu_setup(int cpu) 340static void sh_pmu_setup(int cpu)
279{ 341{
280 struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); 342 struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
@@ -299,32 +361,17 @@ sh_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
299 return NOTIFY_OK; 361 return NOTIFY_OK;
300} 362}
301 363
302void hw_perf_enable(void) 364int __cpuinit register_sh_pmu(struct sh_pmu *_pmu)
303{
304 if (!sh_pmu_initialized())
305 return;
306
307 sh_pmu->enable_all();
308}
309
310void hw_perf_disable(void)
311{
312 if (!sh_pmu_initialized())
313 return;
314
315 sh_pmu->disable_all();
316}
317
318int __cpuinit register_sh_pmu(struct sh_pmu *pmu)
319{ 365{
320 if (sh_pmu) 366 if (sh_pmu)
321 return -EBUSY; 367 return -EBUSY;
322 sh_pmu = pmu; 368 sh_pmu = _pmu;
323 369
324 pr_info("Performance Events: %s support registered\n", pmu->name); 370 pr_info("Performance Events: %s support registered\n", _pmu->name);
325 371
326 WARN_ON(pmu->num_events > MAX_HWEVENTS); 372 WARN_ON(_pmu->num_events > MAX_HWEVENTS);
327 373
374 perf_pmu_register(&pmu);
328 perf_cpu_notifier(sh_pmu_notifier); 375 perf_cpu_notifier(sh_pmu_notifier);
329 return 0; 376 return 0;
330} 377}
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 357ced3c33ff..f9a706759364 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -658,13 +658,16 @@ static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr)
658 658
659 enc = perf_event_get_enc(cpuc->events[i]); 659 enc = perf_event_get_enc(cpuc->events[i]);
660 pcr &= ~mask_for_index(idx); 660 pcr &= ~mask_for_index(idx);
661 pcr |= event_encoding(enc, idx); 661 if (hwc->state & PERF_HES_STOPPED)
662 pcr |= nop_for_index(idx);
663 else
664 pcr |= event_encoding(enc, idx);
662 } 665 }
663out: 666out:
664 return pcr; 667 return pcr;
665} 668}
666 669
667void hw_perf_enable(void) 670static void sparc_pmu_enable(struct pmu *pmu)
668{ 671{
669 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 672 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
670 u64 pcr; 673 u64 pcr;
@@ -691,7 +694,7 @@ void hw_perf_enable(void)
691 pcr_ops->write(cpuc->pcr); 694 pcr_ops->write(cpuc->pcr);
692} 695}
693 696
694void hw_perf_disable(void) 697static void sparc_pmu_disable(struct pmu *pmu)
695{ 698{
696 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 699 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
697 u64 val; 700 u64 val;
@@ -710,19 +713,65 @@ void hw_perf_disable(void)
710 pcr_ops->write(cpuc->pcr); 713 pcr_ops->write(cpuc->pcr);
711} 714}
712 715
713static void sparc_pmu_disable(struct perf_event *event) 716static int active_event_index(struct cpu_hw_events *cpuc,
717 struct perf_event *event)
718{
719 int i;
720
721 for (i = 0; i < cpuc->n_events; i++) {
722 if (cpuc->event[i] == event)
723 break;
724 }
725 BUG_ON(i == cpuc->n_events);
726 return cpuc->current_idx[i];
727}
728
729static void sparc_pmu_start(struct perf_event *event, int flags)
730{
731 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
732 int idx = active_event_index(cpuc, event);
733
734 if (flags & PERF_EF_RELOAD) {
735 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
736 sparc_perf_event_set_period(event, &event->hw, idx);
737 }
738
739 event->hw.state = 0;
740
741 sparc_pmu_enable_event(cpuc, &event->hw, idx);
742}
743
744static void sparc_pmu_stop(struct perf_event *event, int flags)
745{
746 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
747 int idx = active_event_index(cpuc, event);
748
749 if (!(event->hw.state & PERF_HES_STOPPED)) {
750 sparc_pmu_disable_event(cpuc, &event->hw, idx);
751 event->hw.state |= PERF_HES_STOPPED;
752 }
753
754 if (!(event->hw.state & PERF_HES_UPTODATE) && (flags & PERF_EF_UPDATE)) {
755 sparc_perf_event_update(event, &event->hw, idx);
756 event->hw.state |= PERF_HES_UPTODATE;
757 }
758}
759
760static void sparc_pmu_del(struct perf_event *event, int _flags)
714{ 761{
715 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 762 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
716 struct hw_perf_event *hwc = &event->hw;
717 unsigned long flags; 763 unsigned long flags;
718 int i; 764 int i;
719 765
720 local_irq_save(flags); 766 local_irq_save(flags);
721 perf_disable(); 767 perf_pmu_disable(event->pmu);
722 768
723 for (i = 0; i < cpuc->n_events; i++) { 769 for (i = 0; i < cpuc->n_events; i++) {
724 if (event == cpuc->event[i]) { 770 if (event == cpuc->event[i]) {
725 int idx = cpuc->current_idx[i]; 771 /* Absorb the final count and turn off the
772 * event.
773 */
774 sparc_pmu_stop(event, PERF_EF_UPDATE);
726 775
727 /* Shift remaining entries down into 776 /* Shift remaining entries down into
728 * the existing slot. 777 * the existing slot.
@@ -734,13 +783,6 @@ static void sparc_pmu_disable(struct perf_event *event)
734 cpuc->current_idx[i]; 783 cpuc->current_idx[i];
735 } 784 }
736 785
737 /* Absorb the final count and turn off the
738 * event.
739 */
740 sparc_pmu_disable_event(cpuc, hwc, idx);
741 barrier();
742 sparc_perf_event_update(event, hwc, idx);
743
744 perf_event_update_userpage(event); 786 perf_event_update_userpage(event);
745 787
746 cpuc->n_events--; 788 cpuc->n_events--;
@@ -748,23 +790,10 @@ static void sparc_pmu_disable(struct perf_event *event)
748 } 790 }
749 } 791 }
750 792
751 perf_enable(); 793 perf_pmu_enable(event->pmu);
752 local_irq_restore(flags); 794 local_irq_restore(flags);
753} 795}
754 796
755static int active_event_index(struct cpu_hw_events *cpuc,
756 struct perf_event *event)
757{
758 int i;
759
760 for (i = 0; i < cpuc->n_events; i++) {
761 if (cpuc->event[i] == event)
762 break;
763 }
764 BUG_ON(i == cpuc->n_events);
765 return cpuc->current_idx[i];
766}
767
768static void sparc_pmu_read(struct perf_event *event) 797static void sparc_pmu_read(struct perf_event *event)
769{ 798{
770 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 799 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -774,15 +803,6 @@ static void sparc_pmu_read(struct perf_event *event)
774 sparc_perf_event_update(event, hwc, idx); 803 sparc_perf_event_update(event, hwc, idx);
775} 804}
776 805
777static void sparc_pmu_unthrottle(struct perf_event *event)
778{
779 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
780 int idx = active_event_index(cpuc, event);
781 struct hw_perf_event *hwc = &event->hw;
782
783 sparc_pmu_enable_event(cpuc, hwc, idx);
784}
785
786static atomic_t active_events = ATOMIC_INIT(0); 806static atomic_t active_events = ATOMIC_INIT(0);
787static DEFINE_MUTEX(pmc_grab_mutex); 807static DEFINE_MUTEX(pmc_grab_mutex);
788 808
@@ -877,7 +897,7 @@ static int sparc_check_constraints(struct perf_event **evts,
877 if (!n_ev) 897 if (!n_ev)
878 return 0; 898 return 0;
879 899
880 if (n_ev > perf_max_events) 900 if (n_ev > MAX_HWEVENTS)
881 return -1; 901 return -1;
882 902
883 msk0 = perf_event_get_msk(events[0]); 903 msk0 = perf_event_get_msk(events[0]);
@@ -984,23 +1004,27 @@ static int collect_events(struct perf_event *group, int max_count,
984 return n; 1004 return n;
985} 1005}
986 1006
987static int sparc_pmu_enable(struct perf_event *event) 1007static int sparc_pmu_add(struct perf_event *event, int ef_flags)
988{ 1008{
989 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1009 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
990 int n0, ret = -EAGAIN; 1010 int n0, ret = -EAGAIN;
991 unsigned long flags; 1011 unsigned long flags;
992 1012
993 local_irq_save(flags); 1013 local_irq_save(flags);
994 perf_disable(); 1014 perf_pmu_disable(event->pmu);
995 1015
996 n0 = cpuc->n_events; 1016 n0 = cpuc->n_events;
997 if (n0 >= perf_max_events) 1017 if (n0 >= MAX_HWEVENTS)
998 goto out; 1018 goto out;
999 1019
1000 cpuc->event[n0] = event; 1020 cpuc->event[n0] = event;
1001 cpuc->events[n0] = event->hw.event_base; 1021 cpuc->events[n0] = event->hw.event_base;
1002 cpuc->current_idx[n0] = PIC_NO_INDEX; 1022 cpuc->current_idx[n0] = PIC_NO_INDEX;
1003 1023
1024 event->hw.state = PERF_HES_UPTODATE;
1025 if (!(ef_flags & PERF_EF_START))
1026 event->hw.state |= PERF_HES_STOPPED;
1027
1004 /* 1028 /*
1005 * If group events scheduling transaction was started, 1029 * If group events scheduling transaction was started,
1006 * skip the schedulability test here, it will be peformed 1030 * skip the schedulability test here, it will be peformed
@@ -1020,12 +1044,12 @@ nocheck:
1020 1044
1021 ret = 0; 1045 ret = 0;
1022out: 1046out:
1023 perf_enable(); 1047 perf_pmu_enable(event->pmu);
1024 local_irq_restore(flags); 1048 local_irq_restore(flags);
1025 return ret; 1049 return ret;
1026} 1050}
1027 1051
1028static int __hw_perf_event_init(struct perf_event *event) 1052static int sparc_pmu_event_init(struct perf_event *event)
1029{ 1053{
1030 struct perf_event_attr *attr = &event->attr; 1054 struct perf_event_attr *attr = &event->attr;
1031 struct perf_event *evts[MAX_HWEVENTS]; 1055 struct perf_event *evts[MAX_HWEVENTS];
@@ -1038,17 +1062,27 @@ static int __hw_perf_event_init(struct perf_event *event)
1038 if (atomic_read(&nmi_active) < 0) 1062 if (atomic_read(&nmi_active) < 0)
1039 return -ENODEV; 1063 return -ENODEV;
1040 1064
1041 if (attr->type == PERF_TYPE_HARDWARE) { 1065 switch (attr->type) {
1066 case PERF_TYPE_HARDWARE:
1042 if (attr->config >= sparc_pmu->max_events) 1067 if (attr->config >= sparc_pmu->max_events)
1043 return -EINVAL; 1068 return -EINVAL;
1044 pmap = sparc_pmu->event_map(attr->config); 1069 pmap = sparc_pmu->event_map(attr->config);
1045 } else if (attr->type == PERF_TYPE_HW_CACHE) { 1070 break;
1071
1072 case PERF_TYPE_HW_CACHE:
1046 pmap = sparc_map_cache_event(attr->config); 1073 pmap = sparc_map_cache_event(attr->config);
1047 if (IS_ERR(pmap)) 1074 if (IS_ERR(pmap))
1048 return PTR_ERR(pmap); 1075 return PTR_ERR(pmap);
1049 } else 1076 break;
1077
1078 case PERF_TYPE_RAW:
1050 return -EOPNOTSUPP; 1079 return -EOPNOTSUPP;
1051 1080
1081 default:
1082 return -ENOENT;
1083
1084 }
1085
1052 /* We save the enable bits in the config_base. */ 1086 /* We save the enable bits in the config_base. */
1053 hwc->config_base = sparc_pmu->irq_bit; 1087 hwc->config_base = sparc_pmu->irq_bit;
1054 if (!attr->exclude_user) 1088 if (!attr->exclude_user)
@@ -1063,7 +1097,7 @@ static int __hw_perf_event_init(struct perf_event *event)
1063 n = 0; 1097 n = 0;
1064 if (event->group_leader != event) { 1098 if (event->group_leader != event) {
1065 n = collect_events(event->group_leader, 1099 n = collect_events(event->group_leader,
1066 perf_max_events - 1, 1100 MAX_HWEVENTS - 1,
1067 evts, events, current_idx_dmy); 1101 evts, events, current_idx_dmy);
1068 if (n < 0) 1102 if (n < 0)
1069 return -EINVAL; 1103 return -EINVAL;
@@ -1099,10 +1133,11 @@ static int __hw_perf_event_init(struct perf_event *event)
1099 * Set the flag to make pmu::enable() not perform the 1133 * Set the flag to make pmu::enable() not perform the
1100 * schedulability test, it will be performed at commit time 1134 * schedulability test, it will be performed at commit time
1101 */ 1135 */
1102static void sparc_pmu_start_txn(const struct pmu *pmu) 1136static void sparc_pmu_start_txn(struct pmu *pmu)
1103{ 1137{
1104 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 1138 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
1105 1139
1140 perf_pmu_disable(pmu);
1106 cpuhw->group_flag |= PERF_EVENT_TXN; 1141 cpuhw->group_flag |= PERF_EVENT_TXN;
1107} 1142}
1108 1143
@@ -1111,11 +1146,12 @@ static void sparc_pmu_start_txn(const struct pmu *pmu)
1111 * Clear the flag and pmu::enable() will perform the 1146 * Clear the flag and pmu::enable() will perform the
1112 * schedulability test. 1147 * schedulability test.
1113 */ 1148 */
1114static void sparc_pmu_cancel_txn(const struct pmu *pmu) 1149static void sparc_pmu_cancel_txn(struct pmu *pmu)
1115{ 1150{
1116 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 1151 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
1117 1152
1118 cpuhw->group_flag &= ~PERF_EVENT_TXN; 1153 cpuhw->group_flag &= ~PERF_EVENT_TXN;
1154 perf_pmu_enable(pmu);
1119} 1155}
1120 1156
1121/* 1157/*
@@ -1123,7 +1159,7 @@ static void sparc_pmu_cancel_txn(const struct pmu *pmu)
1123 * Perform the group schedulability test as a whole 1159 * Perform the group schedulability test as a whole
1124 * Return 0 if success 1160 * Return 0 if success
1125 */ 1161 */
1126static int sparc_pmu_commit_txn(const struct pmu *pmu) 1162static int sparc_pmu_commit_txn(struct pmu *pmu)
1127{ 1163{
1128 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1164 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1129 int n; 1165 int n;
@@ -1139,28 +1175,24 @@ static int sparc_pmu_commit_txn(const struct pmu *pmu)
1139 return -EAGAIN; 1175 return -EAGAIN;
1140 1176
1141 cpuc->group_flag &= ~PERF_EVENT_TXN; 1177 cpuc->group_flag &= ~PERF_EVENT_TXN;
1178 perf_pmu_enable(pmu);
1142 return 0; 1179 return 0;
1143} 1180}
1144 1181
1145static const struct pmu pmu = { 1182static struct pmu pmu = {
1146 .enable = sparc_pmu_enable, 1183 .pmu_enable = sparc_pmu_enable,
1147 .disable = sparc_pmu_disable, 1184 .pmu_disable = sparc_pmu_disable,
1185 .event_init = sparc_pmu_event_init,
1186 .add = sparc_pmu_add,
1187 .del = sparc_pmu_del,
1188 .start = sparc_pmu_start,
1189 .stop = sparc_pmu_stop,
1148 .read = sparc_pmu_read, 1190 .read = sparc_pmu_read,
1149 .unthrottle = sparc_pmu_unthrottle,
1150 .start_txn = sparc_pmu_start_txn, 1191 .start_txn = sparc_pmu_start_txn,
1151 .cancel_txn = sparc_pmu_cancel_txn, 1192 .cancel_txn = sparc_pmu_cancel_txn,
1152 .commit_txn = sparc_pmu_commit_txn, 1193 .commit_txn = sparc_pmu_commit_txn,
1153}; 1194};
1154 1195
1155const struct pmu *hw_perf_event_init(struct perf_event *event)
1156{
1157 int err = __hw_perf_event_init(event);
1158
1159 if (err)
1160 return ERR_PTR(err);
1161 return &pmu;
1162}
1163
1164void perf_event_print_debug(void) 1196void perf_event_print_debug(void)
1165{ 1197{
1166 unsigned long flags; 1198 unsigned long flags;
@@ -1236,7 +1268,7 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
1236 continue; 1268 continue;
1237 1269
1238 if (perf_event_overflow(event, 1, &data, regs)) 1270 if (perf_event_overflow(event, 1, &data, regs))
1239 sparc_pmu_disable_event(cpuc, hwc, idx); 1271 sparc_pmu_stop(event, 0);
1240 } 1272 }
1241 1273
1242 return NOTIFY_STOP; 1274 return NOTIFY_STOP;
@@ -1277,28 +1309,21 @@ void __init init_hw_perf_events(void)
1277 1309
1278 pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type); 1310 pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type);
1279 1311
1280 /* All sparc64 PMUs currently have 2 events. */ 1312 perf_pmu_register(&pmu);
1281 perf_max_events = 2;
1282
1283 register_die_notifier(&perf_event_nmi_notifier); 1313 register_die_notifier(&perf_event_nmi_notifier);
1284} 1314}
1285 1315
1286static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip) 1316void perf_callchain_kernel(struct perf_callchain_entry *entry,
1287{ 1317 struct pt_regs *regs)
1288 if (entry->nr < PERF_MAX_STACK_DEPTH)
1289 entry->ip[entry->nr++] = ip;
1290}
1291
1292static void perf_callchain_kernel(struct pt_regs *regs,
1293 struct perf_callchain_entry *entry)
1294{ 1318{
1295 unsigned long ksp, fp; 1319 unsigned long ksp, fp;
1296#ifdef CONFIG_FUNCTION_GRAPH_TRACER 1320#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1297 int graph = 0; 1321 int graph = 0;
1298#endif 1322#endif
1299 1323
1300 callchain_store(entry, PERF_CONTEXT_KERNEL); 1324 stack_trace_flush();
1301 callchain_store(entry, regs->tpc); 1325
1326 perf_callchain_store(entry, regs->tpc);
1302 1327
1303 ksp = regs->u_regs[UREG_I6]; 1328 ksp = regs->u_regs[UREG_I6];
1304 fp = ksp + STACK_BIAS; 1329 fp = ksp + STACK_BIAS;
@@ -1322,13 +1347,13 @@ static void perf_callchain_kernel(struct pt_regs *regs,
1322 pc = sf->callers_pc; 1347 pc = sf->callers_pc;
1323 fp = (unsigned long)sf->fp + STACK_BIAS; 1348 fp = (unsigned long)sf->fp + STACK_BIAS;
1324 } 1349 }
1325 callchain_store(entry, pc); 1350 perf_callchain_store(entry, pc);
1326#ifdef CONFIG_FUNCTION_GRAPH_TRACER 1351#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1327 if ((pc + 8UL) == (unsigned long) &return_to_handler) { 1352 if ((pc + 8UL) == (unsigned long) &return_to_handler) {
1328 int index = current->curr_ret_stack; 1353 int index = current->curr_ret_stack;
1329 if (current->ret_stack && index >= graph) { 1354 if (current->ret_stack && index >= graph) {
1330 pc = current->ret_stack[index - graph].ret; 1355 pc = current->ret_stack[index - graph].ret;
1331 callchain_store(entry, pc); 1356 perf_callchain_store(entry, pc);
1332 graph++; 1357 graph++;
1333 } 1358 }
1334 } 1359 }
@@ -1336,13 +1361,12 @@ static void perf_callchain_kernel(struct pt_regs *regs,
1336 } while (entry->nr < PERF_MAX_STACK_DEPTH); 1361 } while (entry->nr < PERF_MAX_STACK_DEPTH);
1337} 1362}
1338 1363
1339static void perf_callchain_user_64(struct pt_regs *regs, 1364static void perf_callchain_user_64(struct perf_callchain_entry *entry,
1340 struct perf_callchain_entry *entry) 1365 struct pt_regs *regs)
1341{ 1366{
1342 unsigned long ufp; 1367 unsigned long ufp;
1343 1368
1344 callchain_store(entry, PERF_CONTEXT_USER); 1369 perf_callchain_store(entry, regs->tpc);
1345 callchain_store(entry, regs->tpc);
1346 1370
1347 ufp = regs->u_regs[UREG_I6] + STACK_BIAS; 1371 ufp = regs->u_regs[UREG_I6] + STACK_BIAS;
1348 do { 1372 do {
@@ -1355,17 +1379,16 @@ static void perf_callchain_user_64(struct pt_regs *regs,
1355 1379
1356 pc = sf.callers_pc; 1380 pc = sf.callers_pc;
1357 ufp = (unsigned long)sf.fp + STACK_BIAS; 1381 ufp = (unsigned long)sf.fp + STACK_BIAS;
1358 callchain_store(entry, pc); 1382 perf_callchain_store(entry, pc);
1359 } while (entry->nr < PERF_MAX_STACK_DEPTH); 1383 } while (entry->nr < PERF_MAX_STACK_DEPTH);
1360} 1384}
1361 1385
1362static void perf_callchain_user_32(struct pt_regs *regs, 1386static void perf_callchain_user_32(struct perf_callchain_entry *entry,
1363 struct perf_callchain_entry *entry) 1387 struct pt_regs *regs)
1364{ 1388{
1365 unsigned long ufp; 1389 unsigned long ufp;
1366 1390
1367 callchain_store(entry, PERF_CONTEXT_USER); 1391 perf_callchain_store(entry, regs->tpc);
1368 callchain_store(entry, regs->tpc);
1369 1392
1370 ufp = regs->u_regs[UREG_I6] & 0xffffffffUL; 1393 ufp = regs->u_regs[UREG_I6] & 0xffffffffUL;
1371 do { 1394 do {
@@ -1378,34 +1401,16 @@ static void perf_callchain_user_32(struct pt_regs *regs,
1378 1401
1379 pc = sf.callers_pc; 1402 pc = sf.callers_pc;
1380 ufp = (unsigned long)sf.fp; 1403 ufp = (unsigned long)sf.fp;
1381 callchain_store(entry, pc); 1404 perf_callchain_store(entry, pc);
1382 } while (entry->nr < PERF_MAX_STACK_DEPTH); 1405 } while (entry->nr < PERF_MAX_STACK_DEPTH);
1383} 1406}
1384 1407
1385/* Like powerpc we can't get PMU interrupts within the PMU handler, 1408void
1386 * so no need for separate NMI and IRQ chains as on x86. 1409perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
1387 */
1388static DEFINE_PER_CPU(struct perf_callchain_entry, callchain);
1389
1390struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1391{ 1410{
1392 struct perf_callchain_entry *entry = &__get_cpu_var(callchain); 1411 flushw_user();
1393 1412 if (test_thread_flag(TIF_32BIT))
1394 entry->nr = 0; 1413 perf_callchain_user_32(entry, regs);
1395 if (!user_mode(regs)) { 1414 else
1396 stack_trace_flush(); 1415 perf_callchain_user_64(entry, regs);
1397 perf_callchain_kernel(regs, entry);
1398 if (current->mm)
1399 regs = task_pt_regs(current);
1400 else
1401 regs = NULL;
1402 }
1403 if (regs) {
1404 flushw_user();
1405 if (test_thread_flag(TIF_32BIT))
1406 perf_callchain_user_32(regs, entry);
1407 else
1408 perf_callchain_user_64(regs, entry);
1409 }
1410 return entry;
1411} 1416}
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index def500776b16..a70cd216be5d 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -36,19 +36,6 @@
36#define P4_ESCR_EMASK(v) ((v) << P4_ESCR_EVENTMASK_SHIFT) 36#define P4_ESCR_EMASK(v) ((v) << P4_ESCR_EVENTMASK_SHIFT)
37#define P4_ESCR_TAG(v) ((v) << P4_ESCR_TAG_SHIFT) 37#define P4_ESCR_TAG(v) ((v) << P4_ESCR_TAG_SHIFT)
38 38
39/* Non HT mask */
40#define P4_ESCR_MASK \
41 (P4_ESCR_EVENT_MASK | \
42 P4_ESCR_EVENTMASK_MASK | \
43 P4_ESCR_TAG_MASK | \
44 P4_ESCR_TAG_ENABLE | \
45 P4_ESCR_T0_OS | \
46 P4_ESCR_T0_USR)
47
48/* HT mask */
49#define P4_ESCR_MASK_HT \
50 (P4_ESCR_MASK | P4_ESCR_T1_OS | P4_ESCR_T1_USR)
51
52#define P4_CCCR_OVF 0x80000000U 39#define P4_CCCR_OVF 0x80000000U
53#define P4_CCCR_CASCADE 0x40000000U 40#define P4_CCCR_CASCADE 0x40000000U
54#define P4_CCCR_OVF_PMI_T0 0x04000000U 41#define P4_CCCR_OVF_PMI_T0 0x04000000U
@@ -70,23 +57,6 @@
70#define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT) 57#define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT)
71#define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT) 58#define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT)
72 59
73/* Non HT mask */
74#define P4_CCCR_MASK \
75 (P4_CCCR_OVF | \
76 P4_CCCR_CASCADE | \
77 P4_CCCR_OVF_PMI_T0 | \
78 P4_CCCR_FORCE_OVF | \
79 P4_CCCR_EDGE | \
80 P4_CCCR_THRESHOLD_MASK | \
81 P4_CCCR_COMPLEMENT | \
82 P4_CCCR_COMPARE | \
83 P4_CCCR_ESCR_SELECT_MASK | \
84 P4_CCCR_ENABLE)
85
86/* HT mask */
87#define P4_CCCR_MASK_HT \
88 (P4_CCCR_MASK | P4_CCCR_OVF_PMI_T1 | P4_CCCR_THREAD_ANY)
89
90#define P4_GEN_ESCR_EMASK(class, name, bit) \ 60#define P4_GEN_ESCR_EMASK(class, name, bit) \
91 class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT) 61 class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT)
92#define P4_ESCR_EMASK_BIT(class, name) class##__##name 62#define P4_ESCR_EMASK_BIT(class, name) class##__##name
@@ -127,6 +97,28 @@
127#define P4_CONFIG_HT_SHIFT 63 97#define P4_CONFIG_HT_SHIFT 63
128#define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) 98#define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT)
129 99
100/*
101 * The bits we allow to pass for RAW events
102 */
103#define P4_CONFIG_MASK_ESCR \
104 P4_ESCR_EVENT_MASK | \
105 P4_ESCR_EVENTMASK_MASK | \
106 P4_ESCR_TAG_MASK | \
107 P4_ESCR_TAG_ENABLE
108
109#define P4_CONFIG_MASK_CCCR \
110 P4_CCCR_EDGE | \
111 P4_CCCR_THRESHOLD_MASK | \
112 P4_CCCR_COMPLEMENT | \
113 P4_CCCR_COMPARE | \
114 P4_CCCR_THREAD_ANY | \
115 P4_CCCR_RESERVED
116
117/* some dangerous bits are reserved for kernel internals */
118#define P4_CONFIG_MASK \
119 (p4_config_pack_escr(P4_CONFIG_MASK_ESCR)) | \
120 (p4_config_pack_cccr(P4_CONFIG_MASK_CCCR))
121
130static inline bool p4_is_event_cascaded(u64 config) 122static inline bool p4_is_event_cascaded(u64 config)
131{ 123{
132 u32 cccr = p4_config_unpack_cccr(config); 124 u32 cccr = p4_config_unpack_cccr(config);
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 3efdf2870a35..0fb17050360f 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -530,7 +530,7 @@ static int x86_pmu_hw_config(struct perf_event *event)
530/* 530/*
531 * Setup the hardware configuration for a given attr_type 531 * Setup the hardware configuration for a given attr_type
532 */ 532 */
533static int __hw_perf_event_init(struct perf_event *event) 533static int __x86_pmu_event_init(struct perf_event *event)
534{ 534{
535 int err; 535 int err;
536 536
@@ -583,7 +583,7 @@ static void x86_pmu_disable_all(void)
583 } 583 }
584} 584}
585 585
586void hw_perf_disable(void) 586static void x86_pmu_disable(struct pmu *pmu)
587{ 587{
588 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 588 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
589 589
@@ -618,7 +618,7 @@ static void x86_pmu_enable_all(int added)
618 } 618 }
619} 619}
620 620
621static const struct pmu pmu; 621static struct pmu pmu;
622 622
623static inline int is_x86_event(struct perf_event *event) 623static inline int is_x86_event(struct perf_event *event)
624{ 624{
@@ -800,10 +800,10 @@ static inline int match_prev_assignment(struct hw_perf_event *hwc,
800 hwc->last_tag == cpuc->tags[i]; 800 hwc->last_tag == cpuc->tags[i];
801} 801}
802 802
803static int x86_pmu_start(struct perf_event *event); 803static void x86_pmu_start(struct perf_event *event, int flags);
804static void x86_pmu_stop(struct perf_event *event); 804static void x86_pmu_stop(struct perf_event *event, int flags);
805 805
806void hw_perf_enable(void) 806static void x86_pmu_enable(struct pmu *pmu)
807{ 807{
808 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 808 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
809 struct perf_event *event; 809 struct perf_event *event;
@@ -839,7 +839,14 @@ void hw_perf_enable(void)
839 match_prev_assignment(hwc, cpuc, i)) 839 match_prev_assignment(hwc, cpuc, i))
840 continue; 840 continue;
841 841
842 x86_pmu_stop(event); 842 /*
843 * Ensure we don't accidentally enable a stopped
844 * counter simply because we rescheduled.
845 */
846 if (hwc->state & PERF_HES_STOPPED)
847 hwc->state |= PERF_HES_ARCH;
848
849 x86_pmu_stop(event, PERF_EF_UPDATE);
843 } 850 }
844 851
845 for (i = 0; i < cpuc->n_events; i++) { 852 for (i = 0; i < cpuc->n_events; i++) {
@@ -851,7 +858,10 @@ void hw_perf_enable(void)
851 else if (i < n_running) 858 else if (i < n_running)
852 continue; 859 continue;
853 860
854 x86_pmu_start(event); 861 if (hwc->state & PERF_HES_ARCH)
862 continue;
863
864 x86_pmu_start(event, PERF_EF_RELOAD);
855 } 865 }
856 cpuc->n_added = 0; 866 cpuc->n_added = 0;
857 perf_events_lapic_init(); 867 perf_events_lapic_init();
@@ -952,15 +962,12 @@ static void x86_pmu_enable_event(struct perf_event *event)
952} 962}
953 963
954/* 964/*
955 * activate a single event 965 * Add a single event to the PMU.
956 * 966 *
957 * The event is added to the group of enabled events 967 * The event is added to the group of enabled events
958 * but only if it can be scehduled with existing events. 968 * but only if it can be scehduled with existing events.
959 *
960 * Called with PMU disabled. If successful and return value 1,
961 * then guaranteed to call perf_enable() and hw_perf_enable()
962 */ 969 */
963static int x86_pmu_enable(struct perf_event *event) 970static int x86_pmu_add(struct perf_event *event, int flags)
964{ 971{
965 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 972 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
966 struct hw_perf_event *hwc; 973 struct hw_perf_event *hwc;
@@ -969,57 +976,66 @@ static int x86_pmu_enable(struct perf_event *event)
969 976
970 hwc = &event->hw; 977 hwc = &event->hw;
971 978
979 perf_pmu_disable(event->pmu);
972 n0 = cpuc->n_events; 980 n0 = cpuc->n_events;
973 n = collect_events(cpuc, event, false); 981 ret = n = collect_events(cpuc, event, false);
974 if (n < 0) 982 if (ret < 0)
975 return n; 983 goto out;
984
985 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
986 if (!(flags & PERF_EF_START))
987 hwc->state |= PERF_HES_ARCH;
976 988
977 /* 989 /*
978 * If group events scheduling transaction was started, 990 * If group events scheduling transaction was started,
979 * skip the schedulability test here, it will be peformed 991 * skip the schedulability test here, it will be peformed
980 * at commit time(->commit_txn) as a whole 992 * at commit time (->commit_txn) as a whole
981 */ 993 */
982 if (cpuc->group_flag & PERF_EVENT_TXN) 994 if (cpuc->group_flag & PERF_EVENT_TXN)
983 goto out; 995 goto done_collect;
984 996
985 ret = x86_pmu.schedule_events(cpuc, n, assign); 997 ret = x86_pmu.schedule_events(cpuc, n, assign);
986 if (ret) 998 if (ret)
987 return ret; 999 goto out;
988 /* 1000 /*
989 * copy new assignment, now we know it is possible 1001 * copy new assignment, now we know it is possible
990 * will be used by hw_perf_enable() 1002 * will be used by hw_perf_enable()
991 */ 1003 */
992 memcpy(cpuc->assign, assign, n*sizeof(int)); 1004 memcpy(cpuc->assign, assign, n*sizeof(int));
993 1005
994out: 1006done_collect:
995 cpuc->n_events = n; 1007 cpuc->n_events = n;
996 cpuc->n_added += n - n0; 1008 cpuc->n_added += n - n0;
997 cpuc->n_txn += n - n0; 1009 cpuc->n_txn += n - n0;
998 1010
999 return 0; 1011 ret = 0;
1012out:
1013 perf_pmu_enable(event->pmu);
1014 return ret;
1000} 1015}
1001 1016
1002static int x86_pmu_start(struct perf_event *event) 1017static void x86_pmu_start(struct perf_event *event, int flags)
1003{ 1018{
1004 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1019 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1005 int idx = event->hw.idx; 1020 int idx = event->hw.idx;
1006 1021
1007 if (idx == -1) 1022 if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
1008 return -EAGAIN; 1023 return;
1024
1025 if (WARN_ON_ONCE(idx == -1))
1026 return;
1027
1028 if (flags & PERF_EF_RELOAD) {
1029 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
1030 x86_perf_event_set_period(event);
1031 }
1032
1033 event->hw.state = 0;
1009 1034
1010 x86_perf_event_set_period(event);
1011 cpuc->events[idx] = event; 1035 cpuc->events[idx] = event;
1012 __set_bit(idx, cpuc->active_mask); 1036 __set_bit(idx, cpuc->active_mask);
1013 x86_pmu.enable(event); 1037 x86_pmu.enable(event);
1014 perf_event_update_userpage(event); 1038 perf_event_update_userpage(event);
1015
1016 return 0;
1017}
1018
1019static void x86_pmu_unthrottle(struct perf_event *event)
1020{
1021 int ret = x86_pmu_start(event);
1022 WARN_ON_ONCE(ret);
1023} 1039}
1024 1040
1025void perf_event_print_debug(void) 1041void perf_event_print_debug(void)
@@ -1076,27 +1092,29 @@ void perf_event_print_debug(void)
1076 local_irq_restore(flags); 1092 local_irq_restore(flags);
1077} 1093}
1078 1094
1079static void x86_pmu_stop(struct perf_event *event) 1095static void x86_pmu_stop(struct perf_event *event, int flags)
1080{ 1096{
1081 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1097 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1082 struct hw_perf_event *hwc = &event->hw; 1098 struct hw_perf_event *hwc = &event->hw;
1083 int idx = hwc->idx;
1084 1099
1085 if (!__test_and_clear_bit(idx, cpuc->active_mask)) 1100 if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) {
1086 return; 1101 x86_pmu.disable(event);
1087 1102 cpuc->events[hwc->idx] = NULL;
1088 x86_pmu.disable(event); 1103 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
1089 1104 hwc->state |= PERF_HES_STOPPED;
1090 /* 1105 }
1091 * Drain the remaining delta count out of a event
1092 * that we are disabling:
1093 */
1094 x86_perf_event_update(event);
1095 1106
1096 cpuc->events[idx] = NULL; 1107 if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
1108 /*
1109 * Drain the remaining delta count out of a event
1110 * that we are disabling:
1111 */
1112 x86_perf_event_update(event);
1113 hwc->state |= PERF_HES_UPTODATE;
1114 }
1097} 1115}
1098 1116
1099static void x86_pmu_disable(struct perf_event *event) 1117static void x86_pmu_del(struct perf_event *event, int flags)
1100{ 1118{
1101 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1119 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1102 int i; 1120 int i;
@@ -1109,7 +1127,7 @@ static void x86_pmu_disable(struct perf_event *event)
1109 if (cpuc->group_flag & PERF_EVENT_TXN) 1127 if (cpuc->group_flag & PERF_EVENT_TXN)
1110 return; 1128 return;
1111 1129
1112 x86_pmu_stop(event); 1130 x86_pmu_stop(event, PERF_EF_UPDATE);
1113 1131
1114 for (i = 0; i < cpuc->n_events; i++) { 1132 for (i = 0; i < cpuc->n_events; i++) {
1115 if (event == cpuc->event_list[i]) { 1133 if (event == cpuc->event_list[i]) {
@@ -1161,7 +1179,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1161 continue; 1179 continue;
1162 1180
1163 if (perf_event_overflow(event, 1, &data, regs)) 1181 if (perf_event_overflow(event, 1, &data, regs))
1164 x86_pmu_stop(event); 1182 x86_pmu_stop(event, 0);
1165 } 1183 }
1166 1184
1167 if (handled) 1185 if (handled)
@@ -1378,7 +1396,6 @@ void __init init_hw_perf_events(void)
1378 x86_pmu.num_counters = X86_PMC_MAX_GENERIC; 1396 x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
1379 } 1397 }
1380 x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; 1398 x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
1381 perf_max_events = x86_pmu.num_counters;
1382 1399
1383 if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { 1400 if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
1384 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", 1401 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
@@ -1414,6 +1431,7 @@ void __init init_hw_perf_events(void)
1414 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); 1431 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
1415 pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); 1432 pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
1416 1433
1434 perf_pmu_register(&pmu);
1417 perf_cpu_notifier(x86_pmu_notifier); 1435 perf_cpu_notifier(x86_pmu_notifier);
1418} 1436}
1419 1437
@@ -1427,10 +1445,11 @@ static inline void x86_pmu_read(struct perf_event *event)
1427 * Set the flag to make pmu::enable() not perform the 1445 * Set the flag to make pmu::enable() not perform the
1428 * schedulability test, it will be performed at commit time 1446 * schedulability test, it will be performed at commit time
1429 */ 1447 */
1430static void x86_pmu_start_txn(const struct pmu *pmu) 1448static void x86_pmu_start_txn(struct pmu *pmu)
1431{ 1449{
1432 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1450 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1433 1451
1452 perf_pmu_disable(pmu);
1434 cpuc->group_flag |= PERF_EVENT_TXN; 1453 cpuc->group_flag |= PERF_EVENT_TXN;
1435 cpuc->n_txn = 0; 1454 cpuc->n_txn = 0;
1436} 1455}
@@ -1440,7 +1459,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu)
1440 * Clear the flag and pmu::enable() will perform the 1459 * Clear the flag and pmu::enable() will perform the
1441 * schedulability test. 1460 * schedulability test.
1442 */ 1461 */
1443static void x86_pmu_cancel_txn(const struct pmu *pmu) 1462static void x86_pmu_cancel_txn(struct pmu *pmu)
1444{ 1463{
1445 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1464 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1446 1465
@@ -1450,6 +1469,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
1450 */ 1469 */
1451 cpuc->n_added -= cpuc->n_txn; 1470 cpuc->n_added -= cpuc->n_txn;
1452 cpuc->n_events -= cpuc->n_txn; 1471 cpuc->n_events -= cpuc->n_txn;
1472 perf_pmu_enable(pmu);
1453} 1473}
1454 1474
1455/* 1475/*
@@ -1457,7 +1477,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
1457 * Perform the group schedulability test as a whole 1477 * Perform the group schedulability test as a whole
1458 * Return 0 if success 1478 * Return 0 if success
1459 */ 1479 */
1460static int x86_pmu_commit_txn(const struct pmu *pmu) 1480static int x86_pmu_commit_txn(struct pmu *pmu)
1461{ 1481{
1462 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1482 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1463 int assign[X86_PMC_IDX_MAX]; 1483 int assign[X86_PMC_IDX_MAX];
@@ -1479,22 +1499,10 @@ static int x86_pmu_commit_txn(const struct pmu *pmu)
1479 memcpy(cpuc->assign, assign, n*sizeof(int)); 1499 memcpy(cpuc->assign, assign, n*sizeof(int));
1480 1500
1481 cpuc->group_flag &= ~PERF_EVENT_TXN; 1501 cpuc->group_flag &= ~PERF_EVENT_TXN;
1482 1502 perf_pmu_enable(pmu);
1483 return 0; 1503 return 0;
1484} 1504}
1485 1505
1486static const struct pmu pmu = {
1487 .enable = x86_pmu_enable,
1488 .disable = x86_pmu_disable,
1489 .start = x86_pmu_start,
1490 .stop = x86_pmu_stop,
1491 .read = x86_pmu_read,
1492 .unthrottle = x86_pmu_unthrottle,
1493 .start_txn = x86_pmu_start_txn,
1494 .cancel_txn = x86_pmu_cancel_txn,
1495 .commit_txn = x86_pmu_commit_txn,
1496};
1497
1498/* 1506/*
1499 * validate that we can schedule this event 1507 * validate that we can schedule this event
1500 */ 1508 */
@@ -1569,12 +1577,22 @@ out:
1569 return ret; 1577 return ret;
1570} 1578}
1571 1579
1572const struct pmu *hw_perf_event_init(struct perf_event *event) 1580int x86_pmu_event_init(struct perf_event *event)
1573{ 1581{
1574 const struct pmu *tmp; 1582 struct pmu *tmp;
1575 int err; 1583 int err;
1576 1584
1577 err = __hw_perf_event_init(event); 1585 switch (event->attr.type) {
1586 case PERF_TYPE_RAW:
1587 case PERF_TYPE_HARDWARE:
1588 case PERF_TYPE_HW_CACHE:
1589 break;
1590
1591 default:
1592 return -ENOENT;
1593 }
1594
1595 err = __x86_pmu_event_init(event);
1578 if (!err) { 1596 if (!err) {
1579 /* 1597 /*
1580 * we temporarily connect event to its pmu 1598 * we temporarily connect event to its pmu
@@ -1594,26 +1612,31 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
1594 if (err) { 1612 if (err) {
1595 if (event->destroy) 1613 if (event->destroy)
1596 event->destroy(event); 1614 event->destroy(event);
1597 return ERR_PTR(err);
1598 } 1615 }
1599 1616
1600 return &pmu; 1617 return err;
1601} 1618}
1602 1619
1603/* 1620static struct pmu pmu = {
1604 * callchain support 1621 .pmu_enable = x86_pmu_enable,
1605 */ 1622 .pmu_disable = x86_pmu_disable,
1606 1623
1607static inline 1624 .event_init = x86_pmu_event_init,
1608void callchain_store(struct perf_callchain_entry *entry, u64 ip)
1609{
1610 if (entry->nr < PERF_MAX_STACK_DEPTH)
1611 entry->ip[entry->nr++] = ip;
1612}
1613 1625
1614static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); 1626 .add = x86_pmu_add,
1615static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); 1627 .del = x86_pmu_del,
1628 .start = x86_pmu_start,
1629 .stop = x86_pmu_stop,
1630 .read = x86_pmu_read,
1631
1632 .start_txn = x86_pmu_start_txn,
1633 .cancel_txn = x86_pmu_cancel_txn,
1634 .commit_txn = x86_pmu_commit_txn,
1635};
1616 1636
1637/*
1638 * callchain support
1639 */
1617 1640
1618static void 1641static void
1619backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) 1642backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
@@ -1635,7 +1658,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
1635{ 1658{
1636 struct perf_callchain_entry *entry = data; 1659 struct perf_callchain_entry *entry = data;
1637 1660
1638 callchain_store(entry, addr); 1661 perf_callchain_store(entry, addr);
1639} 1662}
1640 1663
1641static const struct stacktrace_ops backtrace_ops = { 1664static const struct stacktrace_ops backtrace_ops = {
@@ -1646,11 +1669,15 @@ static const struct stacktrace_ops backtrace_ops = {
1646 .walk_stack = print_context_stack_bp, 1669 .walk_stack = print_context_stack_bp,
1647}; 1670};
1648 1671
1649static void 1672void
1650perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) 1673perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
1651{ 1674{
1652 callchain_store(entry, PERF_CONTEXT_KERNEL); 1675 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1653 callchain_store(entry, regs->ip); 1676 /* TODO: We don't support guest os callchain now */
1677 return;
1678 }
1679
1680 perf_callchain_store(entry, regs->ip);
1654 1681
1655 dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); 1682 dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
1656} 1683}
@@ -1679,7 +1706,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
1679 if (fp < compat_ptr(regs->sp)) 1706 if (fp < compat_ptr(regs->sp))
1680 break; 1707 break;
1681 1708
1682 callchain_store(entry, frame.return_address); 1709 perf_callchain_store(entry, frame.return_address);
1683 fp = compat_ptr(frame.next_frame); 1710 fp = compat_ptr(frame.next_frame);
1684 } 1711 }
1685 return 1; 1712 return 1;
@@ -1692,19 +1719,20 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
1692} 1719}
1693#endif 1720#endif
1694 1721
1695static void 1722void
1696perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) 1723perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
1697{ 1724{
1698 struct stack_frame frame; 1725 struct stack_frame frame;
1699 const void __user *fp; 1726 const void __user *fp;
1700 1727
1701 if (!user_mode(regs)) 1728 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1702 regs = task_pt_regs(current); 1729 /* TODO: We don't support guest os callchain now */
1730 return;
1731 }
1703 1732
1704 fp = (void __user *)regs->bp; 1733 fp = (void __user *)regs->bp;
1705 1734
1706 callchain_store(entry, PERF_CONTEXT_USER); 1735 perf_callchain_store(entry, regs->ip);
1707 callchain_store(entry, regs->ip);
1708 1736
1709 if (perf_callchain_user32(regs, entry)) 1737 if (perf_callchain_user32(regs, entry))
1710 return; 1738 return;
@@ -1721,52 +1749,11 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
1721 if ((unsigned long)fp < regs->sp) 1749 if ((unsigned long)fp < regs->sp)
1722 break; 1750 break;
1723 1751
1724 callchain_store(entry, frame.return_address); 1752 perf_callchain_store(entry, frame.return_address);
1725 fp = frame.next_frame; 1753 fp = frame.next_frame;
1726 } 1754 }
1727} 1755}
1728 1756
1729static void
1730perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
1731{
1732 int is_user;
1733
1734 if (!regs)
1735 return;
1736
1737 is_user = user_mode(regs);
1738
1739 if (is_user && current->state != TASK_RUNNING)
1740 return;
1741
1742 if (!is_user)
1743 perf_callchain_kernel(regs, entry);
1744
1745 if (current->mm)
1746 perf_callchain_user(regs, entry);
1747}
1748
1749struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1750{
1751 struct perf_callchain_entry *entry;
1752
1753 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1754 /* TODO: We don't support guest os callchain now */
1755 return NULL;
1756 }
1757
1758 if (in_nmi())
1759 entry = &__get_cpu_var(pmc_nmi_entry);
1760 else
1761 entry = &__get_cpu_var(pmc_irq_entry);
1762
1763 entry->nr = 0;
1764
1765 perf_do_callchain(regs, entry);
1766
1767 return entry;
1768}
1769
1770unsigned long perf_instruction_pointer(struct pt_regs *regs) 1757unsigned long perf_instruction_pointer(struct pt_regs *regs)
1771{ 1758{
1772 unsigned long ip; 1759 unsigned long ip;
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index ee05c90012d2..c8f5c088cad1 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -713,18 +713,18 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
713 struct cpu_hw_events *cpuc; 713 struct cpu_hw_events *cpuc;
714 int bit, loops; 714 int bit, loops;
715 u64 status; 715 u64 status;
716 int handled = 0; 716 int handled;
717 717
718 perf_sample_data_init(&data, 0); 718 perf_sample_data_init(&data, 0);
719 719
720 cpuc = &__get_cpu_var(cpu_hw_events); 720 cpuc = &__get_cpu_var(cpu_hw_events);
721 721
722 intel_pmu_disable_all(); 722 intel_pmu_disable_all();
723 intel_pmu_drain_bts_buffer(); 723 handled = intel_pmu_drain_bts_buffer();
724 status = intel_pmu_get_status(); 724 status = intel_pmu_get_status();
725 if (!status) { 725 if (!status) {
726 intel_pmu_enable_all(0); 726 intel_pmu_enable_all(0);
727 return 0; 727 return handled;
728 } 728 }
729 729
730 loops = 0; 730 loops = 0;
@@ -763,7 +763,7 @@ again:
763 data.period = event->hw.last_period; 763 data.period = event->hw.last_period;
764 764
765 if (perf_event_overflow(event, 1, &data, regs)) 765 if (perf_event_overflow(event, 1, &data, regs))
766 x86_pmu_stop(event); 766 x86_pmu_stop(event, 0);
767 } 767 }
768 768
769 /* 769 /*
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 18018d1311cd..4977f9c400e5 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -214,7 +214,7 @@ static void intel_pmu_disable_bts(void)
214 update_debugctlmsr(debugctlmsr); 214 update_debugctlmsr(debugctlmsr);
215} 215}
216 216
217static void intel_pmu_drain_bts_buffer(void) 217static int intel_pmu_drain_bts_buffer(void)
218{ 218{
219 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 219 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
220 struct debug_store *ds = cpuc->ds; 220 struct debug_store *ds = cpuc->ds;
@@ -231,16 +231,16 @@ static void intel_pmu_drain_bts_buffer(void)
231 struct pt_regs regs; 231 struct pt_regs regs;
232 232
233 if (!event) 233 if (!event)
234 return; 234 return 0;
235 235
236 if (!ds) 236 if (!ds)
237 return; 237 return 0;
238 238
239 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; 239 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
240 top = (struct bts_record *)(unsigned long)ds->bts_index; 240 top = (struct bts_record *)(unsigned long)ds->bts_index;
241 241
242 if (top <= at) 242 if (top <= at)
243 return; 243 return 0;
244 244
245 ds->bts_index = ds->bts_buffer_base; 245 ds->bts_index = ds->bts_buffer_base;
246 246
@@ -256,7 +256,7 @@ static void intel_pmu_drain_bts_buffer(void)
256 perf_prepare_sample(&header, &data, event, &regs); 256 perf_prepare_sample(&header, &data, event, &regs);
257 257
258 if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1)) 258 if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1))
259 return; 259 return 1;
260 260
261 for (; at < top; at++) { 261 for (; at < top; at++) {
262 data.ip = at->from; 262 data.ip = at->from;
@@ -270,6 +270,7 @@ static void intel_pmu_drain_bts_buffer(void)
270 /* There's new data available. */ 270 /* There's new data available. */
271 event->hw.interrupts++; 271 event->hw.interrupts++;
272 event->pending_kill = POLL_IN; 272 event->pending_kill = POLL_IN;
273 return 1;
273} 274}
274 275
275/* 276/*
@@ -491,7 +492,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
491 regs.flags &= ~PERF_EFLAGS_EXACT; 492 regs.flags &= ~PERF_EFLAGS_EXACT;
492 493
493 if (perf_event_overflow(event, 1, &data, &regs)) 494 if (perf_event_overflow(event, 1, &data, &regs))
494 x86_pmu_stop(event); 495 x86_pmu_stop(event, 0);
495} 496}
496 497
497static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) 498static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index b560db3305be..c70c878ee02a 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -18,6 +18,8 @@
18struct p4_event_bind { 18struct p4_event_bind {
19 unsigned int opcode; /* Event code and ESCR selector */ 19 unsigned int opcode; /* Event code and ESCR selector */
20 unsigned int escr_msr[2]; /* ESCR MSR for this event */ 20 unsigned int escr_msr[2]; /* ESCR MSR for this event */
21 unsigned int escr_emask; /* valid ESCR EventMask bits */
22 unsigned int shared; /* event is shared across threads */
21 char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ 23 char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */
22}; 24};
23 25
@@ -66,231 +68,435 @@ static struct p4_event_bind p4_event_bind_map[] = {
66 [P4_EVENT_TC_DELIVER_MODE] = { 68 [P4_EVENT_TC_DELIVER_MODE] = {
67 .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE), 69 .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE),
68 .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, 70 .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
71 .escr_emask =
72 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD) |
73 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DB) |
74 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DI) |
75 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BD) |
76 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BB) |
77 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BI) |
78 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, ID),
79 .shared = 1,
69 .cntr = { {4, 5, -1}, {6, 7, -1} }, 80 .cntr = { {4, 5, -1}, {6, 7, -1} },
70 }, 81 },
71 [P4_EVENT_BPU_FETCH_REQUEST] = { 82 [P4_EVENT_BPU_FETCH_REQUEST] = {
72 .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST), 83 .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST),
73 .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 }, 84 .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 },
85 .escr_emask =
86 P4_ESCR_EMASK_BIT(P4_EVENT_BPU_FETCH_REQUEST, TCMISS),
74 .cntr = { {0, -1, -1}, {2, -1, -1} }, 87 .cntr = { {0, -1, -1}, {2, -1, -1} },
75 }, 88 },
76 [P4_EVENT_ITLB_REFERENCE] = { 89 [P4_EVENT_ITLB_REFERENCE] = {
77 .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE), 90 .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE),
78 .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 }, 91 .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 },
92 .escr_emask =
93 P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT) |
94 P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, MISS) |
95 P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT_UK),
79 .cntr = { {0, -1, -1}, {2, -1, -1} }, 96 .cntr = { {0, -1, -1}, {2, -1, -1} },
80 }, 97 },
81 [P4_EVENT_MEMORY_CANCEL] = { 98 [P4_EVENT_MEMORY_CANCEL] = {
82 .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL), 99 .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL),
83 .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, 100 .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
101 .escr_emask =
102 P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, ST_RB_FULL) |
103 P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, 64K_CONF),
84 .cntr = { {8, 9, -1}, {10, 11, -1} }, 104 .cntr = { {8, 9, -1}, {10, 11, -1} },
85 }, 105 },
86 [P4_EVENT_MEMORY_COMPLETE] = { 106 [P4_EVENT_MEMORY_COMPLETE] = {
87 .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE), 107 .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE),
88 .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, 108 .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
109 .escr_emask =
110 P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, LSC) |
111 P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, SSC),
89 .cntr = { {8, 9, -1}, {10, 11, -1} }, 112 .cntr = { {8, 9, -1}, {10, 11, -1} },
90 }, 113 },
91 [P4_EVENT_LOAD_PORT_REPLAY] = { 114 [P4_EVENT_LOAD_PORT_REPLAY] = {
92 .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY), 115 .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY),
93 .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 }, 116 .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 },
117 .escr_emask =
118 P4_ESCR_EMASK_BIT(P4_EVENT_LOAD_PORT_REPLAY, SPLIT_LD),
94 .cntr = { {8, 9, -1}, {10, 11, -1} }, 119 .cntr = { {8, 9, -1}, {10, 11, -1} },
95 }, 120 },
96 [P4_EVENT_STORE_PORT_REPLAY] = { 121 [P4_EVENT_STORE_PORT_REPLAY] = {
97 .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY), 122 .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY),
98 .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, 123 .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
124 .escr_emask =
125 P4_ESCR_EMASK_BIT(P4_EVENT_STORE_PORT_REPLAY, SPLIT_ST),
99 .cntr = { {8, 9, -1}, {10, 11, -1} }, 126 .cntr = { {8, 9, -1}, {10, 11, -1} },
100 }, 127 },
101 [P4_EVENT_MOB_LOAD_REPLAY] = { 128 [P4_EVENT_MOB_LOAD_REPLAY] = {
102 .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY), 129 .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY),
103 .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 }, 130 .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 },
131 .escr_emask =
132 P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STA) |
133 P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STD) |
134 P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, PARTIAL_DATA) |
135 P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, UNALGN_ADDR),
104 .cntr = { {0, -1, -1}, {2, -1, -1} }, 136 .cntr = { {0, -1, -1}, {2, -1, -1} },
105 }, 137 },
106 [P4_EVENT_PAGE_WALK_TYPE] = { 138 [P4_EVENT_PAGE_WALK_TYPE] = {
107 .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE), 139 .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE),
108 .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 }, 140 .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 },
141 .escr_emask =
142 P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, DTMISS) |
143 P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, ITMISS),
144 .shared = 1,
109 .cntr = { {0, -1, -1}, {2, -1, -1} }, 145 .cntr = { {0, -1, -1}, {2, -1, -1} },
110 }, 146 },
111 [P4_EVENT_BSQ_CACHE_REFERENCE] = { 147 [P4_EVENT_BSQ_CACHE_REFERENCE] = {
112 .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE), 148 .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE),
113 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, 149 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 },
150 .escr_emask =
151 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) |
152 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) |
153 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) |
154 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) |
155 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) |
156 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM) |
157 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) |
158 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) |
159 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS),
114 .cntr = { {0, -1, -1}, {2, -1, -1} }, 160 .cntr = { {0, -1, -1}, {2, -1, -1} },
115 }, 161 },
116 [P4_EVENT_IOQ_ALLOCATION] = { 162 [P4_EVENT_IOQ_ALLOCATION] = {
117 .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION), 163 .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION),
118 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 164 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
165 .escr_emask =
166 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, DEFAULT) |
167 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_READ) |
168 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_WRITE) |
169 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_UC) |
170 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WC) |
171 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WT) |
172 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WP) |
173 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WB) |
174 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OWN) |
175 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OTHER) |
176 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, PREFETCH),
119 .cntr = { {0, -1, -1}, {2, -1, -1} }, 177 .cntr = { {0, -1, -1}, {2, -1, -1} },
120 }, 178 },
121 [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */ 179 [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */
122 .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES), 180 .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES),
123 .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 }, 181 .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 },
182 .escr_emask =
183 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, DEFAULT) |
184 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_READ) |
185 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_WRITE) |
186 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_UC) |
187 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WC) |
188 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WT) |
189 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WP) |
190 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WB) |
191 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OWN) |
192 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OTHER) |
193 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, PREFETCH),
124 .cntr = { {2, -1, -1}, {3, -1, -1} }, 194 .cntr = { {2, -1, -1}, {3, -1, -1} },
125 }, 195 },
126 [P4_EVENT_FSB_DATA_ACTIVITY] = { 196 [P4_EVENT_FSB_DATA_ACTIVITY] = {
127 .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY), 197 .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY),
128 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 198 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
199 .escr_emask =
200 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) |
201 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN) |
202 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OTHER) |
203 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_DRV) |
204 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OWN) |
205 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OTHER),
206 .shared = 1,
129 .cntr = { {0, -1, -1}, {2, -1, -1} }, 207 .cntr = { {0, -1, -1}, {2, -1, -1} },
130 }, 208 },
131 [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */ 209 [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */
132 .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION), 210 .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION),
133 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 }, 211 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 },
212 .escr_emask =
213 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE0) |
214 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE1) |
215 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN0) |
216 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN1) |
217 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_IO_TYPE) |
218 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LOCK_TYPE) |
219 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_CACHE_TYPE) |
220 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_SPLIT_TYPE) |
221 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_DEM_TYPE) |
222 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_ORD_TYPE) |
223 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE0) |
224 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE1) |
225 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE2),
134 .cntr = { {0, -1, -1}, {1, -1, -1} }, 226 .cntr = { {0, -1, -1}, {1, -1, -1} },
135 }, 227 },
136 [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */ 228 [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */
137 .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES), 229 .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES),
138 .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 }, 230 .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 },
231 .escr_emask =
232 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE0) |
233 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE1) |
234 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN0) |
235 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN1) |
236 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE) |
237 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE) |
238 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE) |
239 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE) |
240 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE) |
241 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE) |
242 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE0) |
243 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE1) |
244 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE2),
139 .cntr = { {2, -1, -1}, {3, -1, -1} }, 245 .cntr = { {2, -1, -1}, {3, -1, -1} },
140 }, 246 },
141 [P4_EVENT_SSE_INPUT_ASSIST] = { 247 [P4_EVENT_SSE_INPUT_ASSIST] = {
142 .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST), 248 .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST),
143 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 249 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
250 .escr_emask =
251 P4_ESCR_EMASK_BIT(P4_EVENT_SSE_INPUT_ASSIST, ALL),
252 .shared = 1,
144 .cntr = { {8, 9, -1}, {10, 11, -1} }, 253 .cntr = { {8, 9, -1}, {10, 11, -1} },
145 }, 254 },
146 [P4_EVENT_PACKED_SP_UOP] = { 255 [P4_EVENT_PACKED_SP_UOP] = {
147 .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP), 256 .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP),
148 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 257 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
258 .escr_emask =
259 P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_SP_UOP, ALL),
260 .shared = 1,
149 .cntr = { {8, 9, -1}, {10, 11, -1} }, 261 .cntr = { {8, 9, -1}, {10, 11, -1} },
150 }, 262 },
151 [P4_EVENT_PACKED_DP_UOP] = { 263 [P4_EVENT_PACKED_DP_UOP] = {
152 .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP), 264 .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP),
153 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 265 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
266 .escr_emask =
267 P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_DP_UOP, ALL),
268 .shared = 1,
154 .cntr = { {8, 9, -1}, {10, 11, -1} }, 269 .cntr = { {8, 9, -1}, {10, 11, -1} },
155 }, 270 },
156 [P4_EVENT_SCALAR_SP_UOP] = { 271 [P4_EVENT_SCALAR_SP_UOP] = {
157 .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP), 272 .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP),
158 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 273 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
274 .escr_emask =
275 P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_SP_UOP, ALL),
276 .shared = 1,
159 .cntr = { {8, 9, -1}, {10, 11, -1} }, 277 .cntr = { {8, 9, -1}, {10, 11, -1} },
160 }, 278 },
161 [P4_EVENT_SCALAR_DP_UOP] = { 279 [P4_EVENT_SCALAR_DP_UOP] = {
162 .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP), 280 .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP),
163 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 281 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
282 .escr_emask =
283 P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_DP_UOP, ALL),
284 .shared = 1,
164 .cntr = { {8, 9, -1}, {10, 11, -1} }, 285 .cntr = { {8, 9, -1}, {10, 11, -1} },
165 }, 286 },
166 [P4_EVENT_64BIT_MMX_UOP] = { 287 [P4_EVENT_64BIT_MMX_UOP] = {
167 .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP), 288 .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP),
168 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 289 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
290 .escr_emask =
291 P4_ESCR_EMASK_BIT(P4_EVENT_64BIT_MMX_UOP, ALL),
292 .shared = 1,
169 .cntr = { {8, 9, -1}, {10, 11, -1} }, 293 .cntr = { {8, 9, -1}, {10, 11, -1} },
170 }, 294 },
171 [P4_EVENT_128BIT_MMX_UOP] = { 295 [P4_EVENT_128BIT_MMX_UOP] = {
172 .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP), 296 .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP),
173 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 297 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
298 .escr_emask =
299 P4_ESCR_EMASK_BIT(P4_EVENT_128BIT_MMX_UOP, ALL),
300 .shared = 1,
174 .cntr = { {8, 9, -1}, {10, 11, -1} }, 301 .cntr = { {8, 9, -1}, {10, 11, -1} },
175 }, 302 },
176 [P4_EVENT_X87_FP_UOP] = { 303 [P4_EVENT_X87_FP_UOP] = {
177 .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP), 304 .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP),
178 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 305 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
306 .escr_emask =
307 P4_ESCR_EMASK_BIT(P4_EVENT_X87_FP_UOP, ALL),
308 .shared = 1,
179 .cntr = { {8, 9, -1}, {10, 11, -1} }, 309 .cntr = { {8, 9, -1}, {10, 11, -1} },
180 }, 310 },
181 [P4_EVENT_TC_MISC] = { 311 [P4_EVENT_TC_MISC] = {
182 .opcode = P4_OPCODE(P4_EVENT_TC_MISC), 312 .opcode = P4_OPCODE(P4_EVENT_TC_MISC),
183 .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, 313 .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
314 .escr_emask =
315 P4_ESCR_EMASK_BIT(P4_EVENT_TC_MISC, FLUSH),
184 .cntr = { {4, 5, -1}, {6, 7, -1} }, 316 .cntr = { {4, 5, -1}, {6, 7, -1} },
185 }, 317 },
186 [P4_EVENT_GLOBAL_POWER_EVENTS] = { 318 [P4_EVENT_GLOBAL_POWER_EVENTS] = {
187 .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS), 319 .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS),
188 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 320 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
321 .escr_emask =
322 P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING),
189 .cntr = { {0, -1, -1}, {2, -1, -1} }, 323 .cntr = { {0, -1, -1}, {2, -1, -1} },
190 }, 324 },
191 [P4_EVENT_TC_MS_XFER] = { 325 [P4_EVENT_TC_MS_XFER] = {
192 .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER), 326 .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER),
193 .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, 327 .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
328 .escr_emask =
329 P4_ESCR_EMASK_BIT(P4_EVENT_TC_MS_XFER, CISC),
194 .cntr = { {4, 5, -1}, {6, 7, -1} }, 330 .cntr = { {4, 5, -1}, {6, 7, -1} },
195 }, 331 },
196 [P4_EVENT_UOP_QUEUE_WRITES] = { 332 [P4_EVENT_UOP_QUEUE_WRITES] = {
197 .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES), 333 .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES),
198 .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, 334 .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
335 .escr_emask =
336 P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_BUILD) |
337 P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_DELIVER) |
338 P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_ROM),
199 .cntr = { {4, 5, -1}, {6, 7, -1} }, 339 .cntr = { {4, 5, -1}, {6, 7, -1} },
200 }, 340 },
201 [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = { 341 [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = {
202 .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE), 342 .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE),
203 .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 }, 343 .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 },
344 .escr_emask =
345 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL) |
346 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CALL) |
347 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, RETURN) |
348 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT),
204 .cntr = { {4, 5, -1}, {6, 7, -1} }, 349 .cntr = { {4, 5, -1}, {6, 7, -1} },
205 }, 350 },
206 [P4_EVENT_RETIRED_BRANCH_TYPE] = { 351 [P4_EVENT_RETIRED_BRANCH_TYPE] = {
207 .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE), 352 .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE),
208 .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 }, 353 .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 },
354 .escr_emask =
355 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) |
356 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) |
357 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) |
358 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT),
209 .cntr = { {4, 5, -1}, {6, 7, -1} }, 359 .cntr = { {4, 5, -1}, {6, 7, -1} },
210 }, 360 },
211 [P4_EVENT_RESOURCE_STALL] = { 361 [P4_EVENT_RESOURCE_STALL] = {
212 .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL), 362 .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL),
213 .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 }, 363 .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 },
364 .escr_emask =
365 P4_ESCR_EMASK_BIT(P4_EVENT_RESOURCE_STALL, SBFULL),
214 .cntr = { {12, 13, 16}, {14, 15, 17} }, 366 .cntr = { {12, 13, 16}, {14, 15, 17} },
215 }, 367 },
216 [P4_EVENT_WC_BUFFER] = { 368 [P4_EVENT_WC_BUFFER] = {
217 .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER), 369 .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER),
218 .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, 370 .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
371 .escr_emask =
372 P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_EVICTS) |
373 P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_FULL_EVICTS),
374 .shared = 1,
219 .cntr = { {8, 9, -1}, {10, 11, -1} }, 375 .cntr = { {8, 9, -1}, {10, 11, -1} },
220 }, 376 },
221 [P4_EVENT_B2B_CYCLES] = { 377 [P4_EVENT_B2B_CYCLES] = {
222 .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES), 378 .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES),
223 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 379 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
380 .escr_emask = 0,
224 .cntr = { {0, -1, -1}, {2, -1, -1} }, 381 .cntr = { {0, -1, -1}, {2, -1, -1} },
225 }, 382 },
226 [P4_EVENT_BNR] = { 383 [P4_EVENT_BNR] = {
227 .opcode = P4_OPCODE(P4_EVENT_BNR), 384 .opcode = P4_OPCODE(P4_EVENT_BNR),
228 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 385 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
386 .escr_emask = 0,
229 .cntr = { {0, -1, -1}, {2, -1, -1} }, 387 .cntr = { {0, -1, -1}, {2, -1, -1} },
230 }, 388 },
231 [P4_EVENT_SNOOP] = { 389 [P4_EVENT_SNOOP] = {
232 .opcode = P4_OPCODE(P4_EVENT_SNOOP), 390 .opcode = P4_OPCODE(P4_EVENT_SNOOP),
233 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 391 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
392 .escr_emask = 0,
234 .cntr = { {0, -1, -1}, {2, -1, -1} }, 393 .cntr = { {0, -1, -1}, {2, -1, -1} },
235 }, 394 },
236 [P4_EVENT_RESPONSE] = { 395 [P4_EVENT_RESPONSE] = {
237 .opcode = P4_OPCODE(P4_EVENT_RESPONSE), 396 .opcode = P4_OPCODE(P4_EVENT_RESPONSE),
238 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 397 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
398 .escr_emask = 0,
239 .cntr = { {0, -1, -1}, {2, -1, -1} }, 399 .cntr = { {0, -1, -1}, {2, -1, -1} },
240 }, 400 },
241 [P4_EVENT_FRONT_END_EVENT] = { 401 [P4_EVENT_FRONT_END_EVENT] = {
242 .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT), 402 .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT),
243 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 403 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
404 .escr_emask =
405 P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, NBOGUS) |
406 P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, BOGUS),
244 .cntr = { {12, 13, 16}, {14, 15, 17} }, 407 .cntr = { {12, 13, 16}, {14, 15, 17} },
245 }, 408 },
246 [P4_EVENT_EXECUTION_EVENT] = { 409 [P4_EVENT_EXECUTION_EVENT] = {
247 .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT), 410 .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT),
248 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 411 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
412 .escr_emask =
413 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0) |
414 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1) |
415 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2) |
416 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3) |
417 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) |
418 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) |
419 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) |
420 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3),
249 .cntr = { {12, 13, 16}, {14, 15, 17} }, 421 .cntr = { {12, 13, 16}, {14, 15, 17} },
250 }, 422 },
251 [P4_EVENT_REPLAY_EVENT] = { 423 [P4_EVENT_REPLAY_EVENT] = {
252 .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT), 424 .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT),
253 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 425 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
426 .escr_emask =
427 P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, NBOGUS) |
428 P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, BOGUS),
254 .cntr = { {12, 13, 16}, {14, 15, 17} }, 429 .cntr = { {12, 13, 16}, {14, 15, 17} },
255 }, 430 },
256 [P4_EVENT_INSTR_RETIRED] = { 431 [P4_EVENT_INSTR_RETIRED] = {
257 .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED), 432 .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED),
258 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, 433 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
434 .escr_emask =
435 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) |
436 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSTAG) |
437 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG) |
438 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSTAG),
259 .cntr = { {12, 13, 16}, {14, 15, 17} }, 439 .cntr = { {12, 13, 16}, {14, 15, 17} },
260 }, 440 },
261 [P4_EVENT_UOPS_RETIRED] = { 441 [P4_EVENT_UOPS_RETIRED] = {
262 .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED), 442 .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED),
263 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, 443 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
444 .escr_emask =
445 P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, NBOGUS) |
446 P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, BOGUS),
264 .cntr = { {12, 13, 16}, {14, 15, 17} }, 447 .cntr = { {12, 13, 16}, {14, 15, 17} },
265 }, 448 },
266 [P4_EVENT_UOP_TYPE] = { 449 [P4_EVENT_UOP_TYPE] = {
267 .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE), 450 .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE),
268 .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 }, 451 .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 },
452 .escr_emask =
453 P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGLOADS) |
454 P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGSTORES),
269 .cntr = { {12, 13, 16}, {14, 15, 17} }, 455 .cntr = { {12, 13, 16}, {14, 15, 17} },
270 }, 456 },
271 [P4_EVENT_BRANCH_RETIRED] = { 457 [P4_EVENT_BRANCH_RETIRED] = {
272 .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED), 458 .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED),
273 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 459 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
460 .escr_emask =
461 P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNP) |
462 P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNM) |
463 P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTP) |
464 P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTM),
274 .cntr = { {12, 13, 16}, {14, 15, 17} }, 465 .cntr = { {12, 13, 16}, {14, 15, 17} },
275 }, 466 },
276 [P4_EVENT_MISPRED_BRANCH_RETIRED] = { 467 [P4_EVENT_MISPRED_BRANCH_RETIRED] = {
277 .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED), 468 .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED),
278 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, 469 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
470 .escr_emask =
471 P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS),
279 .cntr = { {12, 13, 16}, {14, 15, 17} }, 472 .cntr = { {12, 13, 16}, {14, 15, 17} },
280 }, 473 },
281 [P4_EVENT_X87_ASSIST] = { 474 [P4_EVENT_X87_ASSIST] = {
282 .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST), 475 .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST),
283 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 476 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
477 .escr_emask =
478 P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSU) |
479 P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSO) |
480 P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAO) |
481 P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAU) |
482 P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, PREA),
284 .cntr = { {12, 13, 16}, {14, 15, 17} }, 483 .cntr = { {12, 13, 16}, {14, 15, 17} },
285 }, 484 },
286 [P4_EVENT_MACHINE_CLEAR] = { 485 [P4_EVENT_MACHINE_CLEAR] = {
287 .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR), 486 .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR),
288 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 487 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
488 .escr_emask =
489 P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, CLEAR) |
490 P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, MOCLEAR) |
491 P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, SMCLEAR),
289 .cntr = { {12, 13, 16}, {14, 15, 17} }, 492 .cntr = { {12, 13, 16}, {14, 15, 17} },
290 }, 493 },
291 [P4_EVENT_INSTR_COMPLETED] = { 494 [P4_EVENT_INSTR_COMPLETED] = {
292 .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED), 495 .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED),
293 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, 496 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
497 .escr_emask =
498 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, NBOGUS) |
499 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, BOGUS),
294 .cntr = { {12, 13, 16}, {14, 15, 17} }, 500 .cntr = { {12, 13, 16}, {14, 15, 17} },
295 }, 501 },
296}; 502};
@@ -428,29 +634,73 @@ static u64 p4_pmu_event_map(int hw_event)
428 return config; 634 return config;
429} 635}
430 636
637/* check cpu model specifics */
638static bool p4_event_match_cpu_model(unsigned int event_idx)
639{
640 /* INSTR_COMPLETED event only exist for model 3, 4, 6 (Prescott) */
641 if (event_idx == P4_EVENT_INSTR_COMPLETED) {
642 if (boot_cpu_data.x86_model != 3 &&
643 boot_cpu_data.x86_model != 4 &&
644 boot_cpu_data.x86_model != 6)
645 return false;
646 }
647
648 /*
649 * For info
650 * - IQ_ESCR0, IQ_ESCR1 only for models 1 and 2
651 */
652
653 return true;
654}
655
431static int p4_validate_raw_event(struct perf_event *event) 656static int p4_validate_raw_event(struct perf_event *event)
432{ 657{
433 unsigned int v; 658 unsigned int v, emask;
434 659
435 /* user data may have out-of-bound event index */ 660 /* User data may have out-of-bound event index */
436 v = p4_config_unpack_event(event->attr.config); 661 v = p4_config_unpack_event(event->attr.config);
437 if (v >= ARRAY_SIZE(p4_event_bind_map)) { 662 if (v >= ARRAY_SIZE(p4_event_bind_map))
438 pr_warning("P4 PMU: Unknown event code: %d\n", v); 663 return -EINVAL;
664
665 /* It may be unsupported: */
666 if (!p4_event_match_cpu_model(v))
439 return -EINVAL; 667 return -EINVAL;
668
669 /*
670 * NOTE: P4_CCCR_THREAD_ANY has not the same meaning as
671 * in Architectural Performance Monitoring, it means not
672 * on _which_ logical cpu to count but rather _when_, ie it
673 * depends on logical cpu state -- count event if one cpu active,
674 * none, both or any, so we just allow user to pass any value
675 * desired.
676 *
677 * In turn we always set Tx_OS/Tx_USR bits bound to logical
678 * cpu without their propagation to another cpu
679 */
680
681 /*
682 * if an event is shared accross the logical threads
683 * the user needs special permissions to be able to use it
684 */
685 if (p4_event_bind_map[v].shared) {
686 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
687 return -EACCES;
440 } 688 }
441 689
690 /* ESCR EventMask bits may be invalid */
691 emask = p4_config_unpack_escr(event->attr.config) & P4_ESCR_EVENTMASK_MASK;
692 if (emask & ~p4_event_bind_map[v].escr_emask)
693 return -EINVAL;
694
442 /* 695 /*
443 * it may have some screwed PEBS bits 696 * it may have some invalid PEBS bits
444 */ 697 */
445 if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) { 698 if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE))
446 pr_warning("P4 PMU: PEBS are not supported yet\n");
447 return -EINVAL; 699 return -EINVAL;
448 } 700
449 v = p4_config_unpack_metric(event->attr.config); 701 v = p4_config_unpack_metric(event->attr.config);
450 if (v >= ARRAY_SIZE(p4_pebs_bind_map)) { 702 if (v >= ARRAY_SIZE(p4_pebs_bind_map))
451 pr_warning("P4 PMU: Unknown metric code: %d\n", v);
452 return -EINVAL; 703 return -EINVAL;
453 }
454 704
455 return 0; 705 return 0;
456} 706}
@@ -478,27 +728,21 @@ static int p4_hw_config(struct perf_event *event)
478 728
479 if (event->attr.type == PERF_TYPE_RAW) { 729 if (event->attr.type == PERF_TYPE_RAW) {
480 730
731 /*
732 * Clear bits we reserve to be managed by kernel itself
733 * and never allowed from a user space
734 */
735 event->attr.config &= P4_CONFIG_MASK;
736
481 rc = p4_validate_raw_event(event); 737 rc = p4_validate_raw_event(event);
482 if (rc) 738 if (rc)
483 goto out; 739 goto out;
484 740
485 /* 741 /*
486 * We don't control raw events so it's up to the caller
487 * to pass sane values (and we don't count the thread number
488 * on HT machine but allow HT-compatible specifics to be
489 * passed on)
490 *
491 * Note that for RAW events we allow user to use P4_CCCR_RESERVED 742 * Note that for RAW events we allow user to use P4_CCCR_RESERVED
492 * bits since we keep additional info here (for cache events and etc) 743 * bits since we keep additional info here (for cache events and etc)
493 *
494 * XXX: HT wide things should check perf_paranoid_cpu() &&
495 * CAP_SYS_ADMIN
496 */ 744 */
497 event->hw.config |= event->attr.config & 745 event->hw.config |= event->attr.config;
498 (p4_config_pack_escr(P4_ESCR_MASK_HT) |
499 p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED));
500
501 event->hw.config &= ~P4_CCCR_FORCE_OVF;
502 } 746 }
503 747
504 rc = x86_setup_perfctr(event); 748 rc = x86_setup_perfctr(event);
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 770ebfb349e9..e05952af5d26 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -230,9 +230,6 @@ static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
230 return 0; 230 return 0;
231} 231}
232 232
233/* Dummy buffers for kallsyms_lookup */
234static char __dummy_buf[KSYM_NAME_LEN];
235
236/* Check if paddr is at an instruction boundary */ 233/* Check if paddr is at an instruction boundary */
237static int __kprobes can_probe(unsigned long paddr) 234static int __kprobes can_probe(unsigned long paddr)
238{ 235{
@@ -241,7 +238,7 @@ static int __kprobes can_probe(unsigned long paddr)
241 struct insn insn; 238 struct insn insn;
242 kprobe_opcode_t buf[MAX_INSN_SIZE]; 239 kprobe_opcode_t buf[MAX_INSN_SIZE];
243 240
244 if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf)) 241 if (!kallsyms_lookup_size_offset(paddr, NULL, &offset))
245 return 0; 242 return 0;
246 243
247 /* Decode instructions */ 244 /* Decode instructions */
@@ -1129,7 +1126,7 @@ static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr,
1129 *(unsigned long *)addr = val; 1126 *(unsigned long *)addr = val;
1130} 1127}
1131 1128
1132void __kprobes kprobes_optinsn_template_holder(void) 1129static void __used __kprobes kprobes_optinsn_template_holder(void)
1133{ 1130{
1134 asm volatile ( 1131 asm volatile (
1135 ".global optprobe_template_entry\n" 1132 ".global optprobe_template_entry\n"
@@ -1269,11 +1266,9 @@ static int __kprobes can_optimize(unsigned long paddr)
1269 unsigned long addr, size = 0, offset = 0; 1266 unsigned long addr, size = 0, offset = 0;
1270 struct insn insn; 1267 struct insn insn;
1271 kprobe_opcode_t buf[MAX_INSN_SIZE]; 1268 kprobe_opcode_t buf[MAX_INSN_SIZE];
1272 /* Dummy buffers for lookup_symbol_attrs */
1273 static char __dummy_buf[KSYM_NAME_LEN];
1274 1269
1275 /* Lookup symbol including addr */ 1270 /* Lookup symbol including addr */
1276 if (!kallsyms_lookup(paddr, &size, &offset, NULL, __dummy_buf)) 1271 if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
1277 return 0; 1272 return 0;
1278 1273
1279 /* Check there is enough space for a relative jump. */ 1274 /* Check there is enough space for a relative jump. */
diff --git a/include/asm-generic/hardirq.h b/include/asm-generic/hardirq.h
index 62f59080e5cc..04d0a977cd43 100644
--- a/include/asm-generic/hardirq.h
+++ b/include/asm-generic/hardirq.h
@@ -3,13 +3,13 @@
3 3
4#include <linux/cache.h> 4#include <linux/cache.h>
5#include <linux/threads.h> 5#include <linux/threads.h>
6#include <linux/irq.h>
7 6
8typedef struct { 7typedef struct {
9 unsigned int __softirq_pending; 8 unsigned int __softirq_pending;
10} ____cacheline_aligned irq_cpustat_t; 9} ____cacheline_aligned irq_cpustat_t;
11 10
12#include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */ 11#include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */
12#include <linux/irq.h>
13 13
14#ifndef ack_bad_irq 14#ifndef ack_bad_irq
15static inline void ack_bad_irq(unsigned int irq) 15static inline void ack_bad_irq(unsigned int irq)
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 02b8b24f8f51..8beabb958f61 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -191,8 +191,8 @@ struct ftrace_event_call {
191 unsigned int flags; 191 unsigned int flags;
192 192
193#ifdef CONFIG_PERF_EVENTS 193#ifdef CONFIG_PERF_EVENTS
194 int perf_refcount; 194 int perf_refcount;
195 struct hlist_head *perf_events; 195 struct hlist_head __percpu *perf_events;
196#endif 196#endif
197}; 197};
198 198
@@ -252,8 +252,8 @@ DECLARE_PER_CPU(struct pt_regs, perf_trace_regs);
252 252
253extern int perf_trace_init(struct perf_event *event); 253extern int perf_trace_init(struct perf_event *event);
254extern void perf_trace_destroy(struct perf_event *event); 254extern void perf_trace_destroy(struct perf_event *event);
255extern int perf_trace_enable(struct perf_event *event); 255extern int perf_trace_add(struct perf_event *event, int flags);
256extern void perf_trace_disable(struct perf_event *event); 256extern void perf_trace_del(struct perf_event *event, int flags);
257extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, 257extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
258 char *filter_str); 258 char *filter_str);
259extern void ftrace_profile_free_filter(struct perf_event *event); 259extern void ftrace_profile_free_filter(struct perf_event *event);
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index a0384a4d1e6f..531495db1708 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -18,6 +18,7 @@
18#include <asm/atomic.h> 18#include <asm/atomic.h>
19#include <asm/ptrace.h> 19#include <asm/ptrace.h>
20#include <asm/system.h> 20#include <asm/system.h>
21#include <trace/events/irq.h>
21 22
22/* 23/*
23 * These correspond to the IORESOURCE_IRQ_* defines in 24 * These correspond to the IORESOURCE_IRQ_* defines in
@@ -407,7 +408,12 @@ asmlinkage void do_softirq(void);
407asmlinkage void __do_softirq(void); 408asmlinkage void __do_softirq(void);
408extern void open_softirq(int nr, void (*action)(struct softirq_action *)); 409extern void open_softirq(int nr, void (*action)(struct softirq_action *));
409extern void softirq_init(void); 410extern void softirq_init(void);
410#define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0) 411static inline void __raise_softirq_irqoff(unsigned int nr)
412{
413 trace_softirq_raise((struct softirq_action *)(unsigned long)nr, NULL);
414 or_softirq_pending(1UL << nr);
415}
416
411extern void raise_softirq_irqoff(unsigned int nr); 417extern void raise_softirq_irqoff(unsigned int nr);
412extern void raise_softirq(unsigned int nr); 418extern void raise_softirq(unsigned int nr);
413extern void wakeup_softirqd(void); 419extern void wakeup_softirqd(void);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 716f99b682c1..61b1e2d760fd 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -529,7 +529,6 @@ struct hw_perf_event {
529 int last_cpu; 529 int last_cpu;
530 }; 530 };
531 struct { /* software */ 531 struct { /* software */
532 s64 remaining;
533 struct hrtimer hrtimer; 532 struct hrtimer hrtimer;
534 }; 533 };
535#ifdef CONFIG_HAVE_HW_BREAKPOINT 534#ifdef CONFIG_HAVE_HW_BREAKPOINT
@@ -539,6 +538,7 @@ struct hw_perf_event {
539 }; 538 };
540#endif 539#endif
541 }; 540 };
541 int state;
542 local64_t prev_count; 542 local64_t prev_count;
543 u64 sample_period; 543 u64 sample_period;
544 u64 last_period; 544 u64 last_period;
@@ -550,6 +550,13 @@ struct hw_perf_event {
550#endif 550#endif
551}; 551};
552 552
553/*
554 * hw_perf_event::state flags
555 */
556#define PERF_HES_STOPPED 0x01 /* the counter is stopped */
557#define PERF_HES_UPTODATE 0x02 /* event->count up-to-date */
558#define PERF_HES_ARCH 0x04
559
553struct perf_event; 560struct perf_event;
554 561
555/* 562/*
@@ -561,36 +568,70 @@ struct perf_event;
561 * struct pmu - generic performance monitoring unit 568 * struct pmu - generic performance monitoring unit
562 */ 569 */
563struct pmu { 570struct pmu {
564 int (*enable) (struct perf_event *event); 571 struct list_head entry;
565 void (*disable) (struct perf_event *event); 572
566 int (*start) (struct perf_event *event); 573 int * __percpu pmu_disable_count;
567 void (*stop) (struct perf_event *event); 574 struct perf_cpu_context * __percpu pmu_cpu_context;
568 void (*read) (struct perf_event *event); 575 int task_ctx_nr;
569 void (*unthrottle) (struct perf_event *event);
570 576
571 /* 577 /*
572 * Group events scheduling is treated as a transaction, add group 578 * Fully disable/enable this PMU, can be used to protect from the PMI
573 * events as a whole and perform one schedulability test. If the test 579 * as well as for lazy/batch writing of the MSRs.
574 * fails, roll back the whole group
575 */ 580 */
581 void (*pmu_enable) (struct pmu *pmu); /* optional */
582 void (*pmu_disable) (struct pmu *pmu); /* optional */
576 583
577 /* 584 /*
578 * Start the transaction, after this ->enable() doesn't need 585 * Try and initialize the event for this PMU.
579 * to do schedulability tests. 586 * Should return -ENOENT when the @event doesn't match this PMU.
580 */ 587 */
581 void (*start_txn) (const struct pmu *pmu); 588 int (*event_init) (struct perf_event *event);
589
590#define PERF_EF_START 0x01 /* start the counter when adding */
591#define PERF_EF_RELOAD 0x02 /* reload the counter when starting */
592#define PERF_EF_UPDATE 0x04 /* update the counter when stopping */
593
582 /* 594 /*
583 * If ->start_txn() disabled the ->enable() schedulability test 595 * Adds/Removes a counter to/from the PMU, can be done inside
596 * a transaction, see the ->*_txn() methods.
597 */
598 int (*add) (struct perf_event *event, int flags);
599 void (*del) (struct perf_event *event, int flags);
600
601 /*
602 * Starts/Stops a counter present on the PMU. The PMI handler
603 * should stop the counter when perf_event_overflow() returns
604 * !0. ->start() will be used to continue.
605 */
606 void (*start) (struct perf_event *event, int flags);
607 void (*stop) (struct perf_event *event, int flags);
608
609 /*
610 * Updates the counter value of the event.
611 */
612 void (*read) (struct perf_event *event);
613
614 /*
615 * Group events scheduling is treated as a transaction, add
616 * group events as a whole and perform one schedulability test.
617 * If the test fails, roll back the whole group
618 *
619 * Start the transaction, after this ->add() doesn't need to
620 * do schedulability tests.
621 */
622 void (*start_txn) (struct pmu *pmu); /* optional */
623 /*
624 * If ->start_txn() disabled the ->add() schedulability test
584 * then ->commit_txn() is required to perform one. On success 625 * then ->commit_txn() is required to perform one. On success
585 * the transaction is closed. On error the transaction is kept 626 * the transaction is closed. On error the transaction is kept
586 * open until ->cancel_txn() is called. 627 * open until ->cancel_txn() is called.
587 */ 628 */
588 int (*commit_txn) (const struct pmu *pmu); 629 int (*commit_txn) (struct pmu *pmu); /* optional */
589 /* 630 /*
590 * Will cancel the transaction, assumes ->disable() is called for 631 * Will cancel the transaction, assumes ->del() is called
591 * each successfull ->enable() during the transaction. 632 * for each successfull ->add() during the transaction.
592 */ 633 */
593 void (*cancel_txn) (const struct pmu *pmu); 634 void (*cancel_txn) (struct pmu *pmu); /* optional */
594}; 635};
595 636
596/** 637/**
@@ -669,7 +710,7 @@ struct perf_event {
669 int nr_siblings; 710 int nr_siblings;
670 int group_flags; 711 int group_flags;
671 struct perf_event *group_leader; 712 struct perf_event *group_leader;
672 const struct pmu *pmu; 713 struct pmu *pmu;
673 714
674 enum perf_event_active_state state; 715 enum perf_event_active_state state;
675 unsigned int attach_state; 716 unsigned int attach_state;
@@ -763,12 +804,19 @@ struct perf_event {
763#endif /* CONFIG_PERF_EVENTS */ 804#endif /* CONFIG_PERF_EVENTS */
764}; 805};
765 806
807enum perf_event_context_type {
808 task_context,
809 cpu_context,
810};
811
766/** 812/**
767 * struct perf_event_context - event context structure 813 * struct perf_event_context - event context structure
768 * 814 *
769 * Used as a container for task events and CPU events as well: 815 * Used as a container for task events and CPU events as well:
770 */ 816 */
771struct perf_event_context { 817struct perf_event_context {
818 enum perf_event_context_type type;
819 struct pmu *pmu;
772 /* 820 /*
773 * Protect the states of the events in the list, 821 * Protect the states of the events in the list,
774 * nr_active, and the list: 822 * nr_active, and the list:
@@ -808,6 +856,12 @@ struct perf_event_context {
808 struct rcu_head rcu_head; 856 struct rcu_head rcu_head;
809}; 857};
810 858
859/*
860 * Number of contexts where an event can trigger:
861 * task, softirq, hardirq, nmi.
862 */
863#define PERF_NR_CONTEXTS 4
864
811/** 865/**
812 * struct perf_event_cpu_context - per cpu event context structure 866 * struct perf_event_cpu_context - per cpu event context structure
813 */ 867 */
@@ -815,18 +869,9 @@ struct perf_cpu_context {
815 struct perf_event_context ctx; 869 struct perf_event_context ctx;
816 struct perf_event_context *task_ctx; 870 struct perf_event_context *task_ctx;
817 int active_oncpu; 871 int active_oncpu;
818 int max_pertask;
819 int exclusive; 872 int exclusive;
820 struct swevent_hlist *swevent_hlist; 873 struct list_head rotation_list;
821 struct mutex hlist_mutex; 874 int jiffies_interval;
822 int hlist_refcount;
823
824 /*
825 * Recursion avoidance:
826 *
827 * task, softirq, irq, nmi context
828 */
829 int recursion[4];
830}; 875};
831 876
832struct perf_output_handle { 877struct perf_output_handle {
@@ -842,26 +887,20 @@ struct perf_output_handle {
842 887
843#ifdef CONFIG_PERF_EVENTS 888#ifdef CONFIG_PERF_EVENTS
844 889
845/* 890extern int perf_pmu_register(struct pmu *pmu);
846 * Set by architecture code: 891extern void perf_pmu_unregister(struct pmu *pmu);
847 */
848extern int perf_max_events;
849
850extern const struct pmu *hw_perf_event_init(struct perf_event *event);
851 892
852extern void perf_event_task_sched_in(struct task_struct *task); 893extern void perf_event_task_sched_in(struct task_struct *task);
853extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next); 894extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next);
854extern void perf_event_task_tick(struct task_struct *task);
855extern int perf_event_init_task(struct task_struct *child); 895extern int perf_event_init_task(struct task_struct *child);
856extern void perf_event_exit_task(struct task_struct *child); 896extern void perf_event_exit_task(struct task_struct *child);
857extern void perf_event_free_task(struct task_struct *task); 897extern void perf_event_free_task(struct task_struct *task);
898extern void perf_event_delayed_put(struct task_struct *task);
858extern void set_perf_event_pending(void); 899extern void set_perf_event_pending(void);
859extern void perf_event_do_pending(void); 900extern void perf_event_do_pending(void);
860extern void perf_event_print_debug(void); 901extern void perf_event_print_debug(void);
861extern void __perf_disable(void); 902extern void perf_pmu_disable(struct pmu *pmu);
862extern bool __perf_enable(void); 903extern void perf_pmu_enable(struct pmu *pmu);
863extern void perf_disable(void);
864extern void perf_enable(void);
865extern int perf_event_task_disable(void); 904extern int perf_event_task_disable(void);
866extern int perf_event_task_enable(void); 905extern int perf_event_task_enable(void);
867extern void perf_event_update_userpage(struct perf_event *event); 906extern void perf_event_update_userpage(struct perf_event *event);
@@ -869,7 +908,7 @@ extern int perf_event_release_kernel(struct perf_event *event);
869extern struct perf_event * 908extern struct perf_event *
870perf_event_create_kernel_counter(struct perf_event_attr *attr, 909perf_event_create_kernel_counter(struct perf_event_attr *attr,
871 int cpu, 910 int cpu,
872 pid_t pid, 911 struct task_struct *task,
873 perf_overflow_handler_t callback); 912 perf_overflow_handler_t callback);
874extern u64 perf_event_read_value(struct perf_event *event, 913extern u64 perf_event_read_value(struct perf_event *event,
875 u64 *enabled, u64 *running); 914 u64 *enabled, u64 *running);
@@ -920,14 +959,7 @@ extern int perf_event_overflow(struct perf_event *event, int nmi,
920 */ 959 */
921static inline int is_software_event(struct perf_event *event) 960static inline int is_software_event(struct perf_event *event)
922{ 961{
923 switch (event->attr.type) { 962 return event->pmu->task_ctx_nr == perf_sw_context;
924 case PERF_TYPE_SOFTWARE:
925 case PERF_TYPE_TRACEPOINT:
926 /* for now the breakpoint stuff also works as software event */
927 case PERF_TYPE_BREAKPOINT:
928 return 1;
929 }
930 return 0;
931} 963}
932 964
933extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; 965extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
@@ -976,7 +1008,21 @@ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks
976extern void perf_event_comm(struct task_struct *tsk); 1008extern void perf_event_comm(struct task_struct *tsk);
977extern void perf_event_fork(struct task_struct *tsk); 1009extern void perf_event_fork(struct task_struct *tsk);
978 1010
979extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); 1011/* Callchains */
1012DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
1013
1014extern void perf_callchain_user(struct perf_callchain_entry *entry,
1015 struct pt_regs *regs);
1016extern void perf_callchain_kernel(struct perf_callchain_entry *entry,
1017 struct pt_regs *regs);
1018
1019
1020static inline void
1021perf_callchain_store(struct perf_callchain_entry *entry, u64 ip)
1022{
1023 if (entry->nr < PERF_MAX_STACK_DEPTH)
1024 entry->ip[entry->nr++] = ip;
1025}
980 1026
981extern int sysctl_perf_event_paranoid; 1027extern int sysctl_perf_event_paranoid;
982extern int sysctl_perf_event_mlock; 1028extern int sysctl_perf_event_mlock;
@@ -1019,21 +1065,19 @@ extern int perf_swevent_get_recursion_context(void);
1019extern void perf_swevent_put_recursion_context(int rctx); 1065extern void perf_swevent_put_recursion_context(int rctx);
1020extern void perf_event_enable(struct perf_event *event); 1066extern void perf_event_enable(struct perf_event *event);
1021extern void perf_event_disable(struct perf_event *event); 1067extern void perf_event_disable(struct perf_event *event);
1068extern void perf_event_task_tick(void);
1022#else 1069#else
1023static inline void 1070static inline void
1024perf_event_task_sched_in(struct task_struct *task) { } 1071perf_event_task_sched_in(struct task_struct *task) { }
1025static inline void 1072static inline void
1026perf_event_task_sched_out(struct task_struct *task, 1073perf_event_task_sched_out(struct task_struct *task,
1027 struct task_struct *next) { } 1074 struct task_struct *next) { }
1028static inline void
1029perf_event_task_tick(struct task_struct *task) { }
1030static inline int perf_event_init_task(struct task_struct *child) { return 0; } 1075static inline int perf_event_init_task(struct task_struct *child) { return 0; }
1031static inline void perf_event_exit_task(struct task_struct *child) { } 1076static inline void perf_event_exit_task(struct task_struct *child) { }
1032static inline void perf_event_free_task(struct task_struct *task) { } 1077static inline void perf_event_free_task(struct task_struct *task) { }
1078static inline void perf_event_delayed_put(struct task_struct *task) { }
1033static inline void perf_event_do_pending(void) { } 1079static inline void perf_event_do_pending(void) { }
1034static inline void perf_event_print_debug(void) { } 1080static inline void perf_event_print_debug(void) { }
1035static inline void perf_disable(void) { }
1036static inline void perf_enable(void) { }
1037static inline int perf_event_task_disable(void) { return -EINVAL; } 1081static inline int perf_event_task_disable(void) { return -EINVAL; }
1038static inline int perf_event_task_enable(void) { return -EINVAL; } 1082static inline int perf_event_task_enable(void) { return -EINVAL; }
1039 1083
@@ -1056,6 +1100,7 @@ static inline int perf_swevent_get_recursion_context(void) { return -1; }
1056static inline void perf_swevent_put_recursion_context(int rctx) { } 1100static inline void perf_swevent_put_recursion_context(int rctx) { }
1057static inline void perf_event_enable(struct perf_event *event) { } 1101static inline void perf_event_enable(struct perf_event *event) { }
1058static inline void perf_event_disable(struct perf_event *event) { } 1102static inline void perf_event_disable(struct perf_event *event) { }
1103static inline void perf_event_task_tick(void) { }
1059#endif 1104#endif
1060 1105
1061#define perf_output_put(handle, x) \ 1106#define perf_output_put(handle, x) \
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1e2a6db2d7dd..eb3c1ceec06e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1160,6 +1160,13 @@ struct sched_rt_entity {
1160 1160
1161struct rcu_node; 1161struct rcu_node;
1162 1162
1163enum perf_event_task_context {
1164 perf_invalid_context = -1,
1165 perf_hw_context = 0,
1166 perf_sw_context,
1167 perf_nr_task_contexts,
1168};
1169
1163struct task_struct { 1170struct task_struct {
1164 volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ 1171 volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
1165 void *stack; 1172 void *stack;
@@ -1431,7 +1438,7 @@ struct task_struct {
1431 struct futex_pi_state *pi_state_cache; 1438 struct futex_pi_state *pi_state_cache;
1432#endif 1439#endif
1433#ifdef CONFIG_PERF_EVENTS 1440#ifdef CONFIG_PERF_EVENTS
1434 struct perf_event_context *perf_event_ctxp; 1441 struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
1435 struct mutex perf_event_mutex; 1442 struct mutex perf_event_mutex;
1436 struct list_head perf_event_list; 1443 struct list_head perf_event_list;
1437#endif 1444#endif
diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
index 0e4cfb694fe7..6fa7cbab7d93 100644
--- a/include/trace/events/irq.h
+++ b/include/trace/events/irq.h
@@ -5,7 +5,9 @@
5#define _TRACE_IRQ_H 5#define _TRACE_IRQ_H
6 6
7#include <linux/tracepoint.h> 7#include <linux/tracepoint.h>
8#include <linux/interrupt.h> 8
9struct irqaction;
10struct softirq_action;
9 11
10#define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq } 12#define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq }
11#define show_softirq_name(val) \ 13#define show_softirq_name(val) \
@@ -93,7 +95,10 @@ DECLARE_EVENT_CLASS(softirq,
93 ), 95 ),
94 96
95 TP_fast_assign( 97 TP_fast_assign(
96 __entry->vec = (int)(h - vec); 98 if (vec)
99 __entry->vec = (int)(h - vec);
100 else
101 __entry->vec = (int)(long)h;
97 ), 102 ),
98 103
99 TP_printk("vec=%d [action=%s]", __entry->vec, 104 TP_printk("vec=%d [action=%s]", __entry->vec,
@@ -136,6 +141,23 @@ DEFINE_EVENT(softirq, softirq_exit,
136 TP_ARGS(h, vec) 141 TP_ARGS(h, vec)
137); 142);
138 143
144/**
145 * softirq_raise - called immediately when a softirq is raised
146 * @h: pointer to struct softirq_action
147 * @vec: pointer to first struct softirq_action in softirq_vec array
148 *
149 * The @h parameter contains a pointer to the softirq vector number which is
150 * raised. @vec is NULL and it means @h includes vector number not
151 * softirq_action. When used in combination with the softirq_entry tracepoint
152 * we can determine the softirq raise latency.
153 */
154DEFINE_EVENT(softirq, softirq_raise,
155
156 TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
157
158 TP_ARGS(h, vec)
159);
160
139#endif /* _TRACE_IRQ_H */ 161#endif /* _TRACE_IRQ_H */
140 162
141/* This part must be outside protection */ 163/* This part must be outside protection */
diff --git a/include/trace/events/napi.h b/include/trace/events/napi.h
index 188deca2f3c7..8fe1e93f531d 100644
--- a/include/trace/events/napi.h
+++ b/include/trace/events/napi.h
@@ -6,10 +6,31 @@
6 6
7#include <linux/netdevice.h> 7#include <linux/netdevice.h>
8#include <linux/tracepoint.h> 8#include <linux/tracepoint.h>
9#include <linux/ftrace.h>
10
11#define NO_DEV "(no_device)"
12
13TRACE_EVENT(napi_poll,
9 14
10DECLARE_TRACE(napi_poll,
11 TP_PROTO(struct napi_struct *napi), 15 TP_PROTO(struct napi_struct *napi),
12 TP_ARGS(napi)); 16
17 TP_ARGS(napi),
18
19 TP_STRUCT__entry(
20 __field( struct napi_struct *, napi)
21 __string( dev_name, napi->dev ? napi->dev->name : NO_DEV)
22 ),
23
24 TP_fast_assign(
25 __entry->napi = napi;
26 __assign_str(dev_name, napi->dev ? napi->dev->name : NO_DEV);
27 ),
28
29 TP_printk("napi poll on napi struct %p for device %s",
30 __entry->napi, __get_str(dev_name))
31);
32
33#undef NO_DEV
13 34
14#endif /* _TRACE_NAPI_H_ */ 35#endif /* _TRACE_NAPI_H_ */
15 36
diff --git a/include/trace/events/net.h b/include/trace/events/net.h
new file mode 100644
index 000000000000..5f247f5ffc56
--- /dev/null
+++ b/include/trace/events/net.h
@@ -0,0 +1,82 @@
1#undef TRACE_SYSTEM
2#define TRACE_SYSTEM net
3
4#if !defined(_TRACE_NET_H) || defined(TRACE_HEADER_MULTI_READ)
5#define _TRACE_NET_H
6
7#include <linux/skbuff.h>
8#include <linux/netdevice.h>
9#include <linux/ip.h>
10#include <linux/tracepoint.h>
11
12TRACE_EVENT(net_dev_xmit,
13
14 TP_PROTO(struct sk_buff *skb,
15 int rc),
16
17 TP_ARGS(skb, rc),
18
19 TP_STRUCT__entry(
20 __field( void *, skbaddr )
21 __field( unsigned int, len )
22 __field( int, rc )
23 __string( name, skb->dev->name )
24 ),
25
26 TP_fast_assign(
27 __entry->skbaddr = skb;
28 __entry->len = skb->len;
29 __entry->rc = rc;
30 __assign_str(name, skb->dev->name);
31 ),
32
33 TP_printk("dev=%s skbaddr=%p len=%u rc=%d",
34 __get_str(name), __entry->skbaddr, __entry->len, __entry->rc)
35);
36
37DECLARE_EVENT_CLASS(net_dev_template,
38
39 TP_PROTO(struct sk_buff *skb),
40
41 TP_ARGS(skb),
42
43 TP_STRUCT__entry(
44 __field( void *, skbaddr )
45 __field( unsigned int, len )
46 __string( name, skb->dev->name )
47 ),
48
49 TP_fast_assign(
50 __entry->skbaddr = skb;
51 __entry->len = skb->len;
52 __assign_str(name, skb->dev->name);
53 ),
54
55 TP_printk("dev=%s skbaddr=%p len=%u",
56 __get_str(name), __entry->skbaddr, __entry->len)
57)
58
59DEFINE_EVENT(net_dev_template, net_dev_queue,
60
61 TP_PROTO(struct sk_buff *skb),
62
63 TP_ARGS(skb)
64);
65
66DEFINE_EVENT(net_dev_template, netif_receive_skb,
67
68 TP_PROTO(struct sk_buff *skb),
69
70 TP_ARGS(skb)
71);
72
73DEFINE_EVENT(net_dev_template, netif_rx,
74
75 TP_PROTO(struct sk_buff *skb),
76
77 TP_ARGS(skb)
78);
79#endif /* _TRACE_NET_H */
80
81/* This part must be outside protection */
82#include <trace/define_trace.h>
diff --git a/include/trace/events/power.h b/include/trace/events/power.h
index 35a2a6e7bf1e..286784d69b8f 100644
--- a/include/trace/events/power.h
+++ b/include/trace/events/power.h
@@ -10,12 +10,17 @@
10#ifndef _TRACE_POWER_ENUM_ 10#ifndef _TRACE_POWER_ENUM_
11#define _TRACE_POWER_ENUM_ 11#define _TRACE_POWER_ENUM_
12enum { 12enum {
13 POWER_NONE = 0, 13 POWER_NONE = 0,
14 POWER_CSTATE = 1, 14 POWER_CSTATE = 1, /* C-State */
15 POWER_PSTATE = 2, 15 POWER_PSTATE = 2, /* Fequency change or DVFS */
16 POWER_SSTATE = 3, /* Suspend */
16}; 17};
17#endif 18#endif
18 19
20/*
21 * The power events are used for cpuidle & suspend (power_start, power_end)
22 * and for cpufreq (power_frequency)
23 */
19DECLARE_EVENT_CLASS(power, 24DECLARE_EVENT_CLASS(power,
20 25
21 TP_PROTO(unsigned int type, unsigned int state, unsigned int cpu_id), 26 TP_PROTO(unsigned int type, unsigned int state, unsigned int cpu_id),
@@ -70,6 +75,85 @@ TRACE_EVENT(power_end,
70 75
71); 76);
72 77
78/*
79 * The clock events are used for clock enable/disable and for
80 * clock rate change
81 */
82DECLARE_EVENT_CLASS(clock,
83
84 TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),
85
86 TP_ARGS(name, state, cpu_id),
87
88 TP_STRUCT__entry(
89 __string( name, name )
90 __field( u64, state )
91 __field( u64, cpu_id )
92 ),
93
94 TP_fast_assign(
95 __assign_str(name, name);
96 __entry->state = state;
97 __entry->cpu_id = cpu_id;
98 ),
99
100 TP_printk("%s state=%lu cpu_id=%lu", __get_str(name),
101 (unsigned long)__entry->state, (unsigned long)__entry->cpu_id)
102);
103
104DEFINE_EVENT(clock, clock_enable,
105
106 TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),
107
108 TP_ARGS(name, state, cpu_id)
109);
110
111DEFINE_EVENT(clock, clock_disable,
112
113 TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),
114
115 TP_ARGS(name, state, cpu_id)
116);
117
118DEFINE_EVENT(clock, clock_set_rate,
119
120 TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),
121
122 TP_ARGS(name, state, cpu_id)
123);
124
125/*
126 * The power domain events are used for power domains transitions
127 */
128DECLARE_EVENT_CLASS(power_domain,
129
130 TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),
131
132 TP_ARGS(name, state, cpu_id),
133
134 TP_STRUCT__entry(
135 __string( name, name )
136 __field( u64, state )
137 __field( u64, cpu_id )
138 ),
139
140 TP_fast_assign(
141 __assign_str(name, name);
142 __entry->state = state;
143 __entry->cpu_id = cpu_id;
144),
145
146 TP_printk("%s state=%lu cpu_id=%lu", __get_str(name),
147 (unsigned long)__entry->state, (unsigned long)__entry->cpu_id)
148);
149
150DEFINE_EVENT(power_domain, power_domain_target,
151
152 TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),
153
154 TP_ARGS(name, state, cpu_id)
155);
156
73#endif /* _TRACE_POWER_H */ 157#endif /* _TRACE_POWER_H */
74 158
75/* This part must be outside protection */ 159/* This part must be outside protection */
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index 4b2be6dc76f0..75ce9d500d8e 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -35,6 +35,23 @@ TRACE_EVENT(kfree_skb,
35 __entry->skbaddr, __entry->protocol, __entry->location) 35 __entry->skbaddr, __entry->protocol, __entry->location)
36); 36);
37 37
38TRACE_EVENT(consume_skb,
39
40 TP_PROTO(struct sk_buff *skb),
41
42 TP_ARGS(skb),
43
44 TP_STRUCT__entry(
45 __field( void *, skbaddr )
46 ),
47
48 TP_fast_assign(
49 __entry->skbaddr = skb;
50 ),
51
52 TP_printk("skbaddr=%p", __entry->skbaddr)
53);
54
38TRACE_EVENT(skb_copy_datagram_iovec, 55TRACE_EVENT(skb_copy_datagram_iovec,
39 56
40 TP_PROTO(const struct sk_buff *skb, int len), 57 TP_PROTO(const struct sk_buff *skb, int len),
diff --git a/kernel/exit.c b/kernel/exit.c
index 03120229db28..e2bdf37f9fde 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -149,9 +149,7 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
149{ 149{
150 struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); 150 struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
151 151
152#ifdef CONFIG_PERF_EVENTS 152 perf_event_delayed_put(tsk);
153 WARN_ON_ONCE(tsk->perf_event_ctxp);
154#endif
155 trace_sched_process_free(tsk); 153 trace_sched_process_free(tsk);
156 put_task_struct(tsk); 154 put_task_struct(tsk);
157} 155}
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index d71a987fd2bf..3b714e839c10 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -433,7 +433,7 @@ register_user_hw_breakpoint(struct perf_event_attr *attr,
433 perf_overflow_handler_t triggered, 433 perf_overflow_handler_t triggered,
434 struct task_struct *tsk) 434 struct task_struct *tsk)
435{ 435{
436 return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered); 436 return perf_event_create_kernel_counter(attr, -1, tsk, triggered);
437} 437}
438EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); 438EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
439 439
@@ -515,7 +515,7 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr,
515 get_online_cpus(); 515 get_online_cpus();
516 for_each_online_cpu(cpu) { 516 for_each_online_cpu(cpu) {
517 pevent = per_cpu_ptr(cpu_events, cpu); 517 pevent = per_cpu_ptr(cpu_events, cpu);
518 bp = perf_event_create_kernel_counter(attr, cpu, -1, triggered); 518 bp = perf_event_create_kernel_counter(attr, cpu, NULL, triggered);
519 519
520 *pevent = bp; 520 *pevent = bp;
521 521
@@ -565,6 +565,61 @@ static struct notifier_block hw_breakpoint_exceptions_nb = {
565 .priority = 0x7fffffff 565 .priority = 0x7fffffff
566}; 566};
567 567
568static void bp_perf_event_destroy(struct perf_event *event)
569{
570 release_bp_slot(event);
571}
572
573static int hw_breakpoint_event_init(struct perf_event *bp)
574{
575 int err;
576
577 if (bp->attr.type != PERF_TYPE_BREAKPOINT)
578 return -ENOENT;
579
580 err = register_perf_hw_breakpoint(bp);
581 if (err)
582 return err;
583
584 bp->destroy = bp_perf_event_destroy;
585
586 return 0;
587}
588
589static int hw_breakpoint_add(struct perf_event *bp, int flags)
590{
591 if (!(flags & PERF_EF_START))
592 bp->hw.state = PERF_HES_STOPPED;
593
594 return arch_install_hw_breakpoint(bp);
595}
596
597static void hw_breakpoint_del(struct perf_event *bp, int flags)
598{
599 arch_uninstall_hw_breakpoint(bp);
600}
601
602static void hw_breakpoint_start(struct perf_event *bp, int flags)
603{
604 bp->hw.state = 0;
605}
606
607static void hw_breakpoint_stop(struct perf_event *bp, int flags)
608{
609 bp->hw.state = PERF_HES_STOPPED;
610}
611
612static struct pmu perf_breakpoint = {
613 .task_ctx_nr = perf_sw_context, /* could eventually get its own */
614
615 .event_init = hw_breakpoint_event_init,
616 .add = hw_breakpoint_add,
617 .del = hw_breakpoint_del,
618 .start = hw_breakpoint_start,
619 .stop = hw_breakpoint_stop,
620 .read = hw_breakpoint_pmu_read,
621};
622
568static int __init init_hw_breakpoint(void) 623static int __init init_hw_breakpoint(void)
569{ 624{
570 unsigned int **task_bp_pinned; 625 unsigned int **task_bp_pinned;
@@ -586,6 +641,8 @@ static int __init init_hw_breakpoint(void)
586 641
587 constraints_initialized = 1; 642 constraints_initialized = 1;
588 643
644 perf_pmu_register(&perf_breakpoint);
645
589 return register_die_notifier(&hw_breakpoint_exceptions_nb); 646 return register_die_notifier(&hw_breakpoint_exceptions_nb);
590 647
591 err_alloc: 648 err_alloc:
@@ -601,8 +658,3 @@ static int __init init_hw_breakpoint(void)
601core_initcall(init_hw_breakpoint); 658core_initcall(init_hw_breakpoint);
602 659
603 660
604struct pmu perf_ops_bp = {
605 .enable = arch_install_hw_breakpoint,
606 .disable = arch_uninstall_hw_breakpoint,
607 .read = hw_breakpoint_pmu_read,
608};
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 282035f3ae96..6dd5359e1f0e 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -399,7 +399,7 @@ static inline int kprobe_optready(struct kprobe *p)
399 * Return an optimized kprobe whose optimizing code replaces 399 * Return an optimized kprobe whose optimizing code replaces
400 * instructions including addr (exclude breakpoint). 400 * instructions including addr (exclude breakpoint).
401 */ 401 */
402struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr) 402static struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr)
403{ 403{
404 int i; 404 int i;
405 struct kprobe *p = NULL; 405 struct kprobe *p = NULL;
@@ -831,6 +831,7 @@ void __kprobes recycle_rp_inst(struct kretprobe_instance *ri,
831 831
832void __kprobes kretprobe_hash_lock(struct task_struct *tsk, 832void __kprobes kretprobe_hash_lock(struct task_struct *tsk,
833 struct hlist_head **head, unsigned long *flags) 833 struct hlist_head **head, unsigned long *flags)
834__acquires(hlist_lock)
834{ 835{
835 unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); 836 unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
836 spinlock_t *hlist_lock; 837 spinlock_t *hlist_lock;
@@ -842,6 +843,7 @@ void __kprobes kretprobe_hash_lock(struct task_struct *tsk,
842 843
843static void __kprobes kretprobe_table_lock(unsigned long hash, 844static void __kprobes kretprobe_table_lock(unsigned long hash,
844 unsigned long *flags) 845 unsigned long *flags)
846__acquires(hlist_lock)
845{ 847{
846 spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); 848 spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
847 spin_lock_irqsave(hlist_lock, *flags); 849 spin_lock_irqsave(hlist_lock, *flags);
@@ -849,6 +851,7 @@ static void __kprobes kretprobe_table_lock(unsigned long hash,
849 851
850void __kprobes kretprobe_hash_unlock(struct task_struct *tsk, 852void __kprobes kretprobe_hash_unlock(struct task_struct *tsk,
851 unsigned long *flags) 853 unsigned long *flags)
854__releases(hlist_lock)
852{ 855{
853 unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); 856 unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
854 spinlock_t *hlist_lock; 857 spinlock_t *hlist_lock;
@@ -857,7 +860,9 @@ void __kprobes kretprobe_hash_unlock(struct task_struct *tsk,
857 spin_unlock_irqrestore(hlist_lock, *flags); 860 spin_unlock_irqrestore(hlist_lock, *flags);
858} 861}
859 862
860void __kprobes kretprobe_table_unlock(unsigned long hash, unsigned long *flags) 863static void __kprobes kretprobe_table_unlock(unsigned long hash,
864 unsigned long *flags)
865__releases(hlist_lock)
861{ 866{
862 spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); 867 spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
863 spin_unlock_irqrestore(hlist_lock, *flags); 868 spin_unlock_irqrestore(hlist_lock, *flags);
@@ -1339,18 +1344,19 @@ int __kprobes register_jprobes(struct jprobe **jps, int num)
1339 if (num <= 0) 1344 if (num <= 0)
1340 return -EINVAL; 1345 return -EINVAL;
1341 for (i = 0; i < num; i++) { 1346 for (i = 0; i < num; i++) {
1342 unsigned long addr; 1347 unsigned long addr, offset;
1343 jp = jps[i]; 1348 jp = jps[i];
1344 addr = arch_deref_entry_point(jp->entry); 1349 addr = arch_deref_entry_point(jp->entry);
1345 1350
1346 if (!kernel_text_address(addr)) 1351 /* Verify probepoint is a function entry point */
1347 ret = -EINVAL; 1352 if (kallsyms_lookup_size_offset(addr, NULL, &offset) &&
1348 else { 1353 offset == 0) {
1349 /* Todo: Verify probepoint is a function entry point */
1350 jp->kp.pre_handler = setjmp_pre_handler; 1354 jp->kp.pre_handler = setjmp_pre_handler;
1351 jp->kp.break_handler = longjmp_break_handler; 1355 jp->kp.break_handler = longjmp_break_handler;
1352 ret = register_kprobe(&jp->kp); 1356 ret = register_kprobe(&jp->kp);
1353 } 1357 } else
1358 ret = -EINVAL;
1359
1354 if (ret < 0) { 1360 if (ret < 0) {
1355 if (i > 0) 1361 if (i > 0)
1356 unregister_jprobes(jps, i); 1362 unregister_jprobes(jps, i);
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index db5b56064687..baae1367e945 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -31,24 +31,18 @@
31#include <linux/kernel_stat.h> 31#include <linux/kernel_stat.h>
32#include <linux/perf_event.h> 32#include <linux/perf_event.h>
33#include <linux/ftrace_event.h> 33#include <linux/ftrace_event.h>
34#include <linux/hw_breakpoint.h>
35 34
36#include <asm/irq_regs.h> 35#include <asm/irq_regs.h>
37 36
38/*
39 * Each CPU has a list of per CPU events:
40 */
41static DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context);
42
43int perf_max_events __read_mostly = 1;
44static int perf_reserved_percpu __read_mostly;
45static int perf_overcommit __read_mostly = 1;
46
47static atomic_t nr_events __read_mostly; 37static atomic_t nr_events __read_mostly;
48static atomic_t nr_mmap_events __read_mostly; 38static atomic_t nr_mmap_events __read_mostly;
49static atomic_t nr_comm_events __read_mostly; 39static atomic_t nr_comm_events __read_mostly;
50static atomic_t nr_task_events __read_mostly; 40static atomic_t nr_task_events __read_mostly;
51 41
42static LIST_HEAD(pmus);
43static DEFINE_MUTEX(pmus_lock);
44static struct srcu_struct pmus_srcu;
45
52/* 46/*
53 * perf event paranoia level: 47 * perf event paranoia level:
54 * -1 - not paranoid at all 48 * -1 - not paranoid at all
@@ -67,36 +61,38 @@ int sysctl_perf_event_sample_rate __read_mostly = 100000;
67 61
68static atomic64_t perf_event_id; 62static atomic64_t perf_event_id;
69 63
70/* 64void __weak perf_event_print_debug(void) { }
71 * Lock for (sysadmin-configurable) event reservations:
72 */
73static DEFINE_SPINLOCK(perf_resource_lock);
74 65
75/* 66void perf_pmu_disable(struct pmu *pmu)
76 * Architecture provided APIs - weak aliases:
77 */
78extern __weak const struct pmu *hw_perf_event_init(struct perf_event *event)
79{ 67{
80 return NULL; 68 int *count = this_cpu_ptr(pmu->pmu_disable_count);
69 if (!(*count)++)
70 pmu->pmu_disable(pmu);
81} 71}
82 72
83void __weak hw_perf_disable(void) { barrier(); } 73void perf_pmu_enable(struct pmu *pmu)
84void __weak hw_perf_enable(void) { barrier(); }
85
86void __weak perf_event_print_debug(void) { }
87
88static DEFINE_PER_CPU(int, perf_disable_count);
89
90void perf_disable(void)
91{ 74{
92 if (!__get_cpu_var(perf_disable_count)++) 75 int *count = this_cpu_ptr(pmu->pmu_disable_count);
93 hw_perf_disable(); 76 if (!--(*count))
77 pmu->pmu_enable(pmu);
94} 78}
95 79
96void perf_enable(void) 80static DEFINE_PER_CPU(struct list_head, rotation_list);
81
82/*
83 * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized
84 * because they're strictly cpu affine and rotate_start is called with IRQs
85 * disabled, while rotate_context is called from IRQ context.
86 */
87static void perf_pmu_rotate_start(struct pmu *pmu)
97{ 88{
98 if (!--__get_cpu_var(perf_disable_count)) 89 struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
99 hw_perf_enable(); 90 struct list_head *head = &__get_cpu_var(rotation_list);
91
92 WARN_ON(!irqs_disabled());
93
94 if (list_empty(&cpuctx->rotation_list))
95 list_add(&cpuctx->rotation_list, head);
100} 96}
101 97
102static void get_ctx(struct perf_event_context *ctx) 98static void get_ctx(struct perf_event_context *ctx)
@@ -151,13 +147,13 @@ static u64 primary_event_id(struct perf_event *event)
151 * the context could get moved to another task. 147 * the context could get moved to another task.
152 */ 148 */
153static struct perf_event_context * 149static struct perf_event_context *
154perf_lock_task_context(struct task_struct *task, unsigned long *flags) 150perf_lock_task_context(struct task_struct *task, int ctxn, unsigned long *flags)
155{ 151{
156 struct perf_event_context *ctx; 152 struct perf_event_context *ctx;
157 153
158 rcu_read_lock(); 154 rcu_read_lock();
159 retry: 155retry:
160 ctx = rcu_dereference(task->perf_event_ctxp); 156 ctx = rcu_dereference(task->perf_event_ctxp[ctxn]);
161 if (ctx) { 157 if (ctx) {
162 /* 158 /*
163 * If this context is a clone of another, it might 159 * If this context is a clone of another, it might
@@ -170,7 +166,7 @@ perf_lock_task_context(struct task_struct *task, unsigned long *flags)
170 * can't get swapped on us any more. 166 * can't get swapped on us any more.
171 */ 167 */
172 raw_spin_lock_irqsave(&ctx->lock, *flags); 168 raw_spin_lock_irqsave(&ctx->lock, *flags);
173 if (ctx != rcu_dereference(task->perf_event_ctxp)) { 169 if (ctx != rcu_dereference(task->perf_event_ctxp[ctxn])) {
174 raw_spin_unlock_irqrestore(&ctx->lock, *flags); 170 raw_spin_unlock_irqrestore(&ctx->lock, *flags);
175 goto retry; 171 goto retry;
176 } 172 }
@@ -189,12 +185,13 @@ perf_lock_task_context(struct task_struct *task, unsigned long *flags)
189 * can't get swapped to another task. This also increments its 185 * can't get swapped to another task. This also increments its
190 * reference count so that the context can't get freed. 186 * reference count so that the context can't get freed.
191 */ 187 */
192static struct perf_event_context *perf_pin_task_context(struct task_struct *task) 188static struct perf_event_context *
189perf_pin_task_context(struct task_struct *task, int ctxn)
193{ 190{
194 struct perf_event_context *ctx; 191 struct perf_event_context *ctx;
195 unsigned long flags; 192 unsigned long flags;
196 193
197 ctx = perf_lock_task_context(task, &flags); 194 ctx = perf_lock_task_context(task, ctxn, &flags);
198 if (ctx) { 195 if (ctx) {
199 ++ctx->pin_count; 196 ++ctx->pin_count;
200 raw_spin_unlock_irqrestore(&ctx->lock, flags); 197 raw_spin_unlock_irqrestore(&ctx->lock, flags);
@@ -302,6 +299,8 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
302 } 299 }
303 300
304 list_add_rcu(&event->event_entry, &ctx->event_list); 301 list_add_rcu(&event->event_entry, &ctx->event_list);
302 if (!ctx->nr_events)
303 perf_pmu_rotate_start(ctx->pmu);
305 ctx->nr_events++; 304 ctx->nr_events++;
306 if (event->attr.inherit_stat) 305 if (event->attr.inherit_stat)
307 ctx->nr_stat++; 306 ctx->nr_stat++;
@@ -436,7 +435,7 @@ event_sched_out(struct perf_event *event,
436 event->state = PERF_EVENT_STATE_OFF; 435 event->state = PERF_EVENT_STATE_OFF;
437 } 436 }
438 event->tstamp_stopped = ctx->time; 437 event->tstamp_stopped = ctx->time;
439 event->pmu->disable(event); 438 event->pmu->del(event, 0);
440 event->oncpu = -1; 439 event->oncpu = -1;
441 440
442 if (!is_software_event(event)) 441 if (!is_software_event(event))
@@ -466,6 +465,12 @@ group_sched_out(struct perf_event *group_event,
466 cpuctx->exclusive = 0; 465 cpuctx->exclusive = 0;
467} 466}
468 467
468static inline struct perf_cpu_context *
469__get_cpu_context(struct perf_event_context *ctx)
470{
471 return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
472}
473
469/* 474/*
470 * Cross CPU call to remove a performance event 475 * Cross CPU call to remove a performance event
471 * 476 *
@@ -474,9 +479,9 @@ group_sched_out(struct perf_event *group_event,
474 */ 479 */
475static void __perf_event_remove_from_context(void *info) 480static void __perf_event_remove_from_context(void *info)
476{ 481{
477 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
478 struct perf_event *event = info; 482 struct perf_event *event = info;
479 struct perf_event_context *ctx = event->ctx; 483 struct perf_event_context *ctx = event->ctx;
484 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
480 485
481 /* 486 /*
482 * If this is a task context, we need to check whether it is 487 * If this is a task context, we need to check whether it is
@@ -487,27 +492,11 @@ static void __perf_event_remove_from_context(void *info)
487 return; 492 return;
488 493
489 raw_spin_lock(&ctx->lock); 494 raw_spin_lock(&ctx->lock);
490 /*
491 * Protect the list operation against NMI by disabling the
492 * events on a global level.
493 */
494 perf_disable();
495 495
496 event_sched_out(event, cpuctx, ctx); 496 event_sched_out(event, cpuctx, ctx);
497 497
498 list_del_event(event, ctx); 498 list_del_event(event, ctx);
499 499
500 if (!ctx->task) {
501 /*
502 * Allow more per task events with respect to the
503 * reservation:
504 */
505 cpuctx->max_pertask =
506 min(perf_max_events - ctx->nr_events,
507 perf_max_events - perf_reserved_percpu);
508 }
509
510 perf_enable();
511 raw_spin_unlock(&ctx->lock); 500 raw_spin_unlock(&ctx->lock);
512} 501}
513 502
@@ -572,8 +561,8 @@ retry:
572static void __perf_event_disable(void *info) 561static void __perf_event_disable(void *info)
573{ 562{
574 struct perf_event *event = info; 563 struct perf_event *event = info;
575 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
576 struct perf_event_context *ctx = event->ctx; 564 struct perf_event_context *ctx = event->ctx;
565 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
577 566
578 /* 567 /*
579 * If this is a per-task event, need to check whether this 568 * If this is a per-task event, need to check whether this
@@ -628,7 +617,7 @@ void perf_event_disable(struct perf_event *event)
628 return; 617 return;
629 } 618 }
630 619
631 retry: 620retry:
632 task_oncpu_function_call(task, __perf_event_disable, event); 621 task_oncpu_function_call(task, __perf_event_disable, event);
633 622
634 raw_spin_lock_irq(&ctx->lock); 623 raw_spin_lock_irq(&ctx->lock);
@@ -667,7 +656,7 @@ event_sched_in(struct perf_event *event,
667 */ 656 */
668 smp_wmb(); 657 smp_wmb();
669 658
670 if (event->pmu->enable(event)) { 659 if (event->pmu->add(event, PERF_EF_START)) {
671 event->state = PERF_EVENT_STATE_INACTIVE; 660 event->state = PERF_EVENT_STATE_INACTIVE;
672 event->oncpu = -1; 661 event->oncpu = -1;
673 return -EAGAIN; 662 return -EAGAIN;
@@ -691,22 +680,15 @@ group_sched_in(struct perf_event *group_event,
691 struct perf_event_context *ctx) 680 struct perf_event_context *ctx)
692{ 681{
693 struct perf_event *event, *partial_group = NULL; 682 struct perf_event *event, *partial_group = NULL;
694 const struct pmu *pmu = group_event->pmu; 683 struct pmu *pmu = group_event->pmu;
695 bool txn = false;
696 684
697 if (group_event->state == PERF_EVENT_STATE_OFF) 685 if (group_event->state == PERF_EVENT_STATE_OFF)
698 return 0; 686 return 0;
699 687
700 /* Check if group transaction availabe */ 688 pmu->start_txn(pmu);
701 if (pmu->start_txn)
702 txn = true;
703
704 if (txn)
705 pmu->start_txn(pmu);
706 689
707 if (event_sched_in(group_event, cpuctx, ctx)) { 690 if (event_sched_in(group_event, cpuctx, ctx)) {
708 if (txn) 691 pmu->cancel_txn(pmu);
709 pmu->cancel_txn(pmu);
710 return -EAGAIN; 692 return -EAGAIN;
711 } 693 }
712 694
@@ -720,7 +702,7 @@ group_sched_in(struct perf_event *group_event,
720 } 702 }
721 } 703 }
722 704
723 if (!txn || !pmu->commit_txn(pmu)) 705 if (!pmu->commit_txn(pmu))
724 return 0; 706 return 0;
725 707
726group_error: 708group_error:
@@ -735,8 +717,7 @@ group_error:
735 } 717 }
736 event_sched_out(group_event, cpuctx, ctx); 718 event_sched_out(group_event, cpuctx, ctx);
737 719
738 if (txn) 720 pmu->cancel_txn(pmu);
739 pmu->cancel_txn(pmu);
740 721
741 return -EAGAIN; 722 return -EAGAIN;
742} 723}
@@ -789,10 +770,10 @@ static void add_event_to_ctx(struct perf_event *event,
789 */ 770 */
790static void __perf_install_in_context(void *info) 771static void __perf_install_in_context(void *info)
791{ 772{
792 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
793 struct perf_event *event = info; 773 struct perf_event *event = info;
794 struct perf_event_context *ctx = event->ctx; 774 struct perf_event_context *ctx = event->ctx;
795 struct perf_event *leader = event->group_leader; 775 struct perf_event *leader = event->group_leader;
776 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
796 int err; 777 int err;
797 778
798 /* 779 /*
@@ -812,12 +793,6 @@ static void __perf_install_in_context(void *info)
812 ctx->is_active = 1; 793 ctx->is_active = 1;
813 update_context_time(ctx); 794 update_context_time(ctx);
814 795
815 /*
816 * Protect the list operation against NMI by disabling the
817 * events on a global level. NOP for non NMI based events.
818 */
819 perf_disable();
820
821 add_event_to_ctx(event, ctx); 796 add_event_to_ctx(event, ctx);
822 797
823 if (event->cpu != -1 && event->cpu != smp_processor_id()) 798 if (event->cpu != -1 && event->cpu != smp_processor_id())
@@ -855,12 +830,7 @@ static void __perf_install_in_context(void *info)
855 } 830 }
856 } 831 }
857 832
858 if (!err && !ctx->task && cpuctx->max_pertask) 833unlock:
859 cpuctx->max_pertask--;
860
861 unlock:
862 perf_enable();
863
864 raw_spin_unlock(&ctx->lock); 834 raw_spin_unlock(&ctx->lock);
865} 835}
866 836
@@ -883,6 +853,8 @@ perf_install_in_context(struct perf_event_context *ctx,
883{ 853{
884 struct task_struct *task = ctx->task; 854 struct task_struct *task = ctx->task;
885 855
856 event->ctx = ctx;
857
886 if (!task) { 858 if (!task) {
887 /* 859 /*
888 * Per cpu events are installed via an smp call and 860 * Per cpu events are installed via an smp call and
@@ -931,10 +903,12 @@ static void __perf_event_mark_enabled(struct perf_event *event,
931 903
932 event->state = PERF_EVENT_STATE_INACTIVE; 904 event->state = PERF_EVENT_STATE_INACTIVE;
933 event->tstamp_enabled = ctx->time - event->total_time_enabled; 905 event->tstamp_enabled = ctx->time - event->total_time_enabled;
934 list_for_each_entry(sub, &event->sibling_list, group_entry) 906 list_for_each_entry(sub, &event->sibling_list, group_entry) {
935 if (sub->state >= PERF_EVENT_STATE_INACTIVE) 907 if (sub->state >= PERF_EVENT_STATE_INACTIVE) {
936 sub->tstamp_enabled = 908 sub->tstamp_enabled =
937 ctx->time - sub->total_time_enabled; 909 ctx->time - sub->total_time_enabled;
910 }
911 }
938} 912}
939 913
940/* 914/*
@@ -943,9 +917,9 @@ static void __perf_event_mark_enabled(struct perf_event *event,
943static void __perf_event_enable(void *info) 917static void __perf_event_enable(void *info)
944{ 918{
945 struct perf_event *event = info; 919 struct perf_event *event = info;
946 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
947 struct perf_event_context *ctx = event->ctx; 920 struct perf_event_context *ctx = event->ctx;
948 struct perf_event *leader = event->group_leader; 921 struct perf_event *leader = event->group_leader;
922 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
949 int err; 923 int err;
950 924
951 /* 925 /*
@@ -979,12 +953,10 @@ static void __perf_event_enable(void *info)
979 if (!group_can_go_on(event, cpuctx, 1)) { 953 if (!group_can_go_on(event, cpuctx, 1)) {
980 err = -EEXIST; 954 err = -EEXIST;
981 } else { 955 } else {
982 perf_disable();
983 if (event == leader) 956 if (event == leader)
984 err = group_sched_in(event, cpuctx, ctx); 957 err = group_sched_in(event, cpuctx, ctx);
985 else 958 else
986 err = event_sched_in(event, cpuctx, ctx); 959 err = event_sched_in(event, cpuctx, ctx);
987 perf_enable();
988 } 960 }
989 961
990 if (err) { 962 if (err) {
@@ -1000,7 +972,7 @@ static void __perf_event_enable(void *info)
1000 } 972 }
1001 } 973 }
1002 974
1003 unlock: 975unlock:
1004 raw_spin_unlock(&ctx->lock); 976 raw_spin_unlock(&ctx->lock);
1005} 977}
1006 978
@@ -1041,7 +1013,7 @@ void perf_event_enable(struct perf_event *event)
1041 if (event->state == PERF_EVENT_STATE_ERROR) 1013 if (event->state == PERF_EVENT_STATE_ERROR)
1042 event->state = PERF_EVENT_STATE_OFF; 1014 event->state = PERF_EVENT_STATE_OFF;
1043 1015
1044 retry: 1016retry:
1045 raw_spin_unlock_irq(&ctx->lock); 1017 raw_spin_unlock_irq(&ctx->lock);
1046 task_oncpu_function_call(task, __perf_event_enable, event); 1018 task_oncpu_function_call(task, __perf_event_enable, event);
1047 1019
@@ -1061,7 +1033,7 @@ void perf_event_enable(struct perf_event *event)
1061 if (event->state == PERF_EVENT_STATE_OFF) 1033 if (event->state == PERF_EVENT_STATE_OFF)
1062 __perf_event_mark_enabled(event, ctx); 1034 __perf_event_mark_enabled(event, ctx);
1063 1035
1064 out: 1036out:
1065 raw_spin_unlock_irq(&ctx->lock); 1037 raw_spin_unlock_irq(&ctx->lock);
1066} 1038}
1067 1039
@@ -1092,26 +1064,26 @@ static void ctx_sched_out(struct perf_event_context *ctx,
1092 struct perf_event *event; 1064 struct perf_event *event;
1093 1065
1094 raw_spin_lock(&ctx->lock); 1066 raw_spin_lock(&ctx->lock);
1067 perf_pmu_disable(ctx->pmu);
1095 ctx->is_active = 0; 1068 ctx->is_active = 0;
1096 if (likely(!ctx->nr_events)) 1069 if (likely(!ctx->nr_events))
1097 goto out; 1070 goto out;
1098 update_context_time(ctx); 1071 update_context_time(ctx);
1099 1072
1100 perf_disable();
1101 if (!ctx->nr_active) 1073 if (!ctx->nr_active)
1102 goto out_enable; 1074 goto out;
1103 1075
1104 if (event_type & EVENT_PINNED) 1076 if (event_type & EVENT_PINNED) {
1105 list_for_each_entry(event, &ctx->pinned_groups, group_entry) 1077 list_for_each_entry(event, &ctx->pinned_groups, group_entry)
1106 group_sched_out(event, cpuctx, ctx); 1078 group_sched_out(event, cpuctx, ctx);
1079 }
1107 1080
1108 if (event_type & EVENT_FLEXIBLE) 1081 if (event_type & EVENT_FLEXIBLE) {
1109 list_for_each_entry(event, &ctx->flexible_groups, group_entry) 1082 list_for_each_entry(event, &ctx->flexible_groups, group_entry)
1110 group_sched_out(event, cpuctx, ctx); 1083 group_sched_out(event, cpuctx, ctx);
1111 1084 }
1112 out_enable: 1085out:
1113 perf_enable(); 1086 perf_pmu_enable(ctx->pmu);
1114 out:
1115 raw_spin_unlock(&ctx->lock); 1087 raw_spin_unlock(&ctx->lock);
1116} 1088}
1117 1089
@@ -1209,34 +1181,25 @@ static void perf_event_sync_stat(struct perf_event_context *ctx,
1209 } 1181 }
1210} 1182}
1211 1183
1212/* 1184void perf_event_context_sched_out(struct task_struct *task, int ctxn,
1213 * Called from scheduler to remove the events of the current task, 1185 struct task_struct *next)
1214 * with interrupts disabled.
1215 *
1216 * We stop each event and update the event value in event->count.
1217 *
1218 * This does not protect us against NMI, but disable()
1219 * sets the disabled bit in the control field of event _before_
1220 * accessing the event control register. If a NMI hits, then it will
1221 * not restart the event.
1222 */
1223void perf_event_task_sched_out(struct task_struct *task,
1224 struct task_struct *next)
1225{ 1186{
1226 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); 1187 struct perf_event_context *ctx = task->perf_event_ctxp[ctxn];
1227 struct perf_event_context *ctx = task->perf_event_ctxp;
1228 struct perf_event_context *next_ctx; 1188 struct perf_event_context *next_ctx;
1229 struct perf_event_context *parent; 1189 struct perf_event_context *parent;
1190 struct perf_cpu_context *cpuctx;
1230 int do_switch = 1; 1191 int do_switch = 1;
1231 1192
1232 perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); 1193 if (likely(!ctx))
1194 return;
1233 1195
1234 if (likely(!ctx || !cpuctx->task_ctx)) 1196 cpuctx = __get_cpu_context(ctx);
1197 if (!cpuctx->task_ctx)
1235 return; 1198 return;
1236 1199
1237 rcu_read_lock(); 1200 rcu_read_lock();
1238 parent = rcu_dereference(ctx->parent_ctx); 1201 parent = rcu_dereference(ctx->parent_ctx);
1239 next_ctx = next->perf_event_ctxp; 1202 next_ctx = next->perf_event_ctxp[ctxn];
1240 if (parent && next_ctx && 1203 if (parent && next_ctx &&
1241 rcu_dereference(next_ctx->parent_ctx) == parent) { 1204 rcu_dereference(next_ctx->parent_ctx) == parent) {
1242 /* 1205 /*
@@ -1255,8 +1218,8 @@ void perf_event_task_sched_out(struct task_struct *task,
1255 * XXX do we need a memory barrier of sorts 1218 * XXX do we need a memory barrier of sorts
1256 * wrt to rcu_dereference() of perf_event_ctxp 1219 * wrt to rcu_dereference() of perf_event_ctxp
1257 */ 1220 */
1258 task->perf_event_ctxp = next_ctx; 1221 task->perf_event_ctxp[ctxn] = next_ctx;
1259 next->perf_event_ctxp = ctx; 1222 next->perf_event_ctxp[ctxn] = ctx;
1260 ctx->task = next; 1223 ctx->task = next;
1261 next_ctx->task = task; 1224 next_ctx->task = task;
1262 do_switch = 0; 1225 do_switch = 0;
@@ -1274,10 +1237,35 @@ void perf_event_task_sched_out(struct task_struct *task,
1274 } 1237 }
1275} 1238}
1276 1239
1240#define for_each_task_context_nr(ctxn) \
1241 for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++)
1242
1243/*
1244 * Called from scheduler to remove the events of the current task,
1245 * with interrupts disabled.
1246 *
1247 * We stop each event and update the event value in event->count.
1248 *
1249 * This does not protect us against NMI, but disable()
1250 * sets the disabled bit in the control field of event _before_
1251 * accessing the event control register. If a NMI hits, then it will
1252 * not restart the event.
1253 */
1254void perf_event_task_sched_out(struct task_struct *task,
1255 struct task_struct *next)
1256{
1257 int ctxn;
1258
1259 perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
1260
1261 for_each_task_context_nr(ctxn)
1262 perf_event_context_sched_out(task, ctxn, next);
1263}
1264
1277static void task_ctx_sched_out(struct perf_event_context *ctx, 1265static void task_ctx_sched_out(struct perf_event_context *ctx,
1278 enum event_type_t event_type) 1266 enum event_type_t event_type)
1279{ 1267{
1280 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); 1268 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
1281 1269
1282 if (!cpuctx->task_ctx) 1270 if (!cpuctx->task_ctx)
1283 return; 1271 return;
@@ -1350,9 +1338,10 @@ ctx_flexible_sched_in(struct perf_event_context *ctx,
1350 if (event->cpu != -1 && event->cpu != smp_processor_id()) 1338 if (event->cpu != -1 && event->cpu != smp_processor_id())
1351 continue; 1339 continue;
1352 1340
1353 if (group_can_go_on(event, cpuctx, can_add_hw)) 1341 if (group_can_go_on(event, cpuctx, can_add_hw)) {
1354 if (group_sched_in(event, cpuctx, ctx)) 1342 if (group_sched_in(event, cpuctx, ctx))
1355 can_add_hw = 0; 1343 can_add_hw = 0;
1344 }
1356 } 1345 }
1357} 1346}
1358 1347
@@ -1368,8 +1357,6 @@ ctx_sched_in(struct perf_event_context *ctx,
1368 1357
1369 ctx->timestamp = perf_clock(); 1358 ctx->timestamp = perf_clock();
1370 1359
1371 perf_disable();
1372
1373 /* 1360 /*
1374 * First go through the list and put on any pinned groups 1361 * First go through the list and put on any pinned groups
1375 * in order to give them the best chance of going on. 1362 * in order to give them the best chance of going on.
@@ -1381,8 +1368,7 @@ ctx_sched_in(struct perf_event_context *ctx,
1381 if (event_type & EVENT_FLEXIBLE) 1368 if (event_type & EVENT_FLEXIBLE)
1382 ctx_flexible_sched_in(ctx, cpuctx); 1369 ctx_flexible_sched_in(ctx, cpuctx);
1383 1370
1384 perf_enable(); 1371out:
1385 out:
1386 raw_spin_unlock(&ctx->lock); 1372 raw_spin_unlock(&ctx->lock);
1387} 1373}
1388 1374
@@ -1394,43 +1380,28 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
1394 ctx_sched_in(ctx, cpuctx, event_type); 1380 ctx_sched_in(ctx, cpuctx, event_type);
1395} 1381}
1396 1382
1397static void task_ctx_sched_in(struct task_struct *task, 1383static void task_ctx_sched_in(struct perf_event_context *ctx,
1398 enum event_type_t event_type) 1384 enum event_type_t event_type)
1399{ 1385{
1400 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); 1386 struct perf_cpu_context *cpuctx;
1401 struct perf_event_context *ctx = task->perf_event_ctxp;
1402 1387
1403 if (likely(!ctx)) 1388 cpuctx = __get_cpu_context(ctx);
1404 return;
1405 if (cpuctx->task_ctx == ctx) 1389 if (cpuctx->task_ctx == ctx)
1406 return; 1390 return;
1391
1407 ctx_sched_in(ctx, cpuctx, event_type); 1392 ctx_sched_in(ctx, cpuctx, event_type);
1408 cpuctx->task_ctx = ctx; 1393 cpuctx->task_ctx = ctx;
1409} 1394}
1410/*
1411 * Called from scheduler to add the events of the current task
1412 * with interrupts disabled.
1413 *
1414 * We restore the event value and then enable it.
1415 *
1416 * This does not protect us against NMI, but enable()
1417 * sets the enabled bit in the control field of event _before_
1418 * accessing the event control register. If a NMI hits, then it will
1419 * keep the event running.
1420 */
1421void perf_event_task_sched_in(struct task_struct *task)
1422{
1423 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
1424 struct perf_event_context *ctx = task->perf_event_ctxp;
1425 1395
1426 if (likely(!ctx)) 1396void perf_event_context_sched_in(struct perf_event_context *ctx)
1427 return; 1397{
1398 struct perf_cpu_context *cpuctx;
1428 1399
1400 cpuctx = __get_cpu_context(ctx);
1429 if (cpuctx->task_ctx == ctx) 1401 if (cpuctx->task_ctx == ctx)
1430 return; 1402 return;
1431 1403
1432 perf_disable(); 1404 perf_pmu_disable(ctx->pmu);
1433
1434 /* 1405 /*
1435 * We want to keep the following priority order: 1406 * We want to keep the following priority order:
1436 * cpu pinned (that don't need to move), task pinned, 1407 * cpu pinned (that don't need to move), task pinned,
@@ -1444,7 +1415,37 @@ void perf_event_task_sched_in(struct task_struct *task)
1444 1415
1445 cpuctx->task_ctx = ctx; 1416 cpuctx->task_ctx = ctx;
1446 1417
1447 perf_enable(); 1418 /*
1419 * Since these rotations are per-cpu, we need to ensure the
1420 * cpu-context we got scheduled on is actually rotating.
1421 */
1422 perf_pmu_rotate_start(ctx->pmu);
1423 perf_pmu_enable(ctx->pmu);
1424}
1425
1426/*
1427 * Called from scheduler to add the events of the current task
1428 * with interrupts disabled.
1429 *
1430 * We restore the event value and then enable it.
1431 *
1432 * This does not protect us against NMI, but enable()
1433 * sets the enabled bit in the control field of event _before_
1434 * accessing the event control register. If a NMI hits, then it will
1435 * keep the event running.
1436 */
1437void perf_event_task_sched_in(struct task_struct *task)
1438{
1439 struct perf_event_context *ctx;
1440 int ctxn;
1441
1442 for_each_task_context_nr(ctxn) {
1443 ctx = task->perf_event_ctxp[ctxn];
1444 if (likely(!ctx))
1445 continue;
1446
1447 perf_event_context_sched_in(ctx);
1448 }
1448} 1449}
1449 1450
1450#define MAX_INTERRUPTS (~0ULL) 1451#define MAX_INTERRUPTS (~0ULL)
@@ -1524,22 +1525,6 @@ do { \
1524 return div64_u64(dividend, divisor); 1525 return div64_u64(dividend, divisor);
1525} 1526}
1526 1527
1527static void perf_event_stop(struct perf_event *event)
1528{
1529 if (!event->pmu->stop)
1530 return event->pmu->disable(event);
1531
1532 return event->pmu->stop(event);
1533}
1534
1535static int perf_event_start(struct perf_event *event)
1536{
1537 if (!event->pmu->start)
1538 return event->pmu->enable(event);
1539
1540 return event->pmu->start(event);
1541}
1542
1543static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) 1528static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
1544{ 1529{
1545 struct hw_perf_event *hwc = &event->hw; 1530 struct hw_perf_event *hwc = &event->hw;
@@ -1559,15 +1544,13 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
1559 hwc->sample_period = sample_period; 1544 hwc->sample_period = sample_period;
1560 1545
1561 if (local64_read(&hwc->period_left) > 8*sample_period) { 1546 if (local64_read(&hwc->period_left) > 8*sample_period) {
1562 perf_disable(); 1547 event->pmu->stop(event, PERF_EF_UPDATE);
1563 perf_event_stop(event);
1564 local64_set(&hwc->period_left, 0); 1548 local64_set(&hwc->period_left, 0);
1565 perf_event_start(event); 1549 event->pmu->start(event, PERF_EF_RELOAD);
1566 perf_enable();
1567 } 1550 }
1568} 1551}
1569 1552
1570static void perf_ctx_adjust_freq(struct perf_event_context *ctx) 1553static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
1571{ 1554{
1572 struct perf_event *event; 1555 struct perf_event *event;
1573 struct hw_perf_event *hwc; 1556 struct hw_perf_event *hwc;
@@ -1592,23 +1575,19 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
1592 */ 1575 */
1593 if (interrupts == MAX_INTERRUPTS) { 1576 if (interrupts == MAX_INTERRUPTS) {
1594 perf_log_throttle(event, 1); 1577 perf_log_throttle(event, 1);
1595 perf_disable(); 1578 event->pmu->start(event, 0);
1596 event->pmu->unthrottle(event);
1597 perf_enable();
1598 } 1579 }
1599 1580
1600 if (!event->attr.freq || !event->attr.sample_freq) 1581 if (!event->attr.freq || !event->attr.sample_freq)
1601 continue; 1582 continue;
1602 1583
1603 perf_disable();
1604 event->pmu->read(event); 1584 event->pmu->read(event);
1605 now = local64_read(&event->count); 1585 now = local64_read(&event->count);
1606 delta = now - hwc->freq_count_stamp; 1586 delta = now - hwc->freq_count_stamp;
1607 hwc->freq_count_stamp = now; 1587 hwc->freq_count_stamp = now;
1608 1588
1609 if (delta > 0) 1589 if (delta > 0)
1610 perf_adjust_period(event, TICK_NSEC, delta); 1590 perf_adjust_period(event, period, delta);
1611 perf_enable();
1612 } 1591 }
1613 raw_spin_unlock(&ctx->lock); 1592 raw_spin_unlock(&ctx->lock);
1614} 1593}
@@ -1626,32 +1605,38 @@ static void rotate_ctx(struct perf_event_context *ctx)
1626 raw_spin_unlock(&ctx->lock); 1605 raw_spin_unlock(&ctx->lock);
1627} 1606}
1628 1607
1629void perf_event_task_tick(struct task_struct *curr) 1608/*
1609 * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized
1610 * because they're strictly cpu affine and rotate_start is called with IRQs
1611 * disabled, while rotate_context is called from IRQ context.
1612 */
1613static void perf_rotate_context(struct perf_cpu_context *cpuctx)
1630{ 1614{
1631 struct perf_cpu_context *cpuctx; 1615 u64 interval = (u64)cpuctx->jiffies_interval * TICK_NSEC;
1632 struct perf_event_context *ctx; 1616 struct perf_event_context *ctx = NULL;
1633 int rotate = 0; 1617 int rotate = 0, remove = 1;
1634 1618
1635 if (!atomic_read(&nr_events)) 1619 if (cpuctx->ctx.nr_events) {
1636 return; 1620 remove = 0;
1637 1621 if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
1638 cpuctx = &__get_cpu_var(perf_cpu_context); 1622 rotate = 1;
1639 if (cpuctx->ctx.nr_events && 1623 }
1640 cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
1641 rotate = 1;
1642 1624
1643 ctx = curr->perf_event_ctxp; 1625 ctx = cpuctx->task_ctx;
1644 if (ctx && ctx->nr_events && ctx->nr_events != ctx->nr_active) 1626 if (ctx && ctx->nr_events) {
1645 rotate = 1; 1627 remove = 0;
1628 if (ctx->nr_events != ctx->nr_active)
1629 rotate = 1;
1630 }
1646 1631
1647 perf_ctx_adjust_freq(&cpuctx->ctx); 1632 perf_pmu_disable(cpuctx->ctx.pmu);
1633 perf_ctx_adjust_freq(&cpuctx->ctx, interval);
1648 if (ctx) 1634 if (ctx)
1649 perf_ctx_adjust_freq(ctx); 1635 perf_ctx_adjust_freq(ctx, interval);
1650 1636
1651 if (!rotate) 1637 if (!rotate)
1652 return; 1638 goto done;
1653 1639
1654 perf_disable();
1655 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); 1640 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
1656 if (ctx) 1641 if (ctx)
1657 task_ctx_sched_out(ctx, EVENT_FLEXIBLE); 1642 task_ctx_sched_out(ctx, EVENT_FLEXIBLE);
@@ -1662,8 +1647,27 @@ void perf_event_task_tick(struct task_struct *curr)
1662 1647
1663 cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); 1648 cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE);
1664 if (ctx) 1649 if (ctx)
1665 task_ctx_sched_in(curr, EVENT_FLEXIBLE); 1650 task_ctx_sched_in(ctx, EVENT_FLEXIBLE);
1666 perf_enable(); 1651
1652done:
1653 if (remove)
1654 list_del_init(&cpuctx->rotation_list);
1655
1656 perf_pmu_enable(cpuctx->ctx.pmu);
1657}
1658
1659void perf_event_task_tick(void)
1660{
1661 struct list_head *head = &__get_cpu_var(rotation_list);
1662 struct perf_cpu_context *cpuctx, *tmp;
1663
1664 WARN_ON(!irqs_disabled());
1665
1666 list_for_each_entry_safe(cpuctx, tmp, head, rotation_list) {
1667 if (cpuctx->jiffies_interval == 1 ||
1668 !(jiffies % cpuctx->jiffies_interval))
1669 perf_rotate_context(cpuctx);
1670 }
1667} 1671}
1668 1672
1669static int event_enable_on_exec(struct perf_event *event, 1673static int event_enable_on_exec(struct perf_event *event,
@@ -1685,20 +1689,18 @@ static int event_enable_on_exec(struct perf_event *event,
1685 * Enable all of a task's events that have been marked enable-on-exec. 1689 * Enable all of a task's events that have been marked enable-on-exec.
1686 * This expects task == current. 1690 * This expects task == current.
1687 */ 1691 */
1688static void perf_event_enable_on_exec(struct task_struct *task) 1692static void perf_event_enable_on_exec(struct perf_event_context *ctx)
1689{ 1693{
1690 struct perf_event_context *ctx;
1691 struct perf_event *event; 1694 struct perf_event *event;
1692 unsigned long flags; 1695 unsigned long flags;
1693 int enabled = 0; 1696 int enabled = 0;
1694 int ret; 1697 int ret;
1695 1698
1696 local_irq_save(flags); 1699 local_irq_save(flags);
1697 ctx = task->perf_event_ctxp;
1698 if (!ctx || !ctx->nr_events) 1700 if (!ctx || !ctx->nr_events)
1699 goto out; 1701 goto out;
1700 1702
1701 __perf_event_task_sched_out(ctx); 1703 task_ctx_sched_out(ctx, EVENT_ALL);
1702 1704
1703 raw_spin_lock(&ctx->lock); 1705 raw_spin_lock(&ctx->lock);
1704 1706
@@ -1722,8 +1724,8 @@ static void perf_event_enable_on_exec(struct task_struct *task)
1722 1724
1723 raw_spin_unlock(&ctx->lock); 1725 raw_spin_unlock(&ctx->lock);
1724 1726
1725 perf_event_task_sched_in(task); 1727 perf_event_context_sched_in(ctx);
1726 out: 1728out:
1727 local_irq_restore(flags); 1729 local_irq_restore(flags);
1728} 1730}
1729 1731
@@ -1732,9 +1734,9 @@ static void perf_event_enable_on_exec(struct task_struct *task)
1732 */ 1734 */
1733static void __perf_event_read(void *info) 1735static void __perf_event_read(void *info)
1734{ 1736{
1735 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
1736 struct perf_event *event = info; 1737 struct perf_event *event = info;
1737 struct perf_event_context *ctx = event->ctx; 1738 struct perf_event_context *ctx = event->ctx;
1739 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
1738 1740
1739 /* 1741 /*
1740 * If this is a task context, we need to check whether it is 1742 * If this is a task context, we need to check whether it is
@@ -1782,11 +1784,219 @@ static u64 perf_event_read(struct perf_event *event)
1782} 1784}
1783 1785
1784/* 1786/*
1785 * Initialize the perf_event context in a task_struct: 1787 * Callchain support
1786 */ 1788 */
1789
1790struct callchain_cpus_entries {
1791 struct rcu_head rcu_head;
1792 struct perf_callchain_entry *cpu_entries[0];
1793};
1794
1795static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]);
1796static atomic_t nr_callchain_events;
1797static DEFINE_MUTEX(callchain_mutex);
1798struct callchain_cpus_entries *callchain_cpus_entries;
1799
1800
1801__weak void perf_callchain_kernel(struct perf_callchain_entry *entry,
1802 struct pt_regs *regs)
1803{
1804}
1805
1806__weak void perf_callchain_user(struct perf_callchain_entry *entry,
1807 struct pt_regs *regs)
1808{
1809}
1810
1811static void release_callchain_buffers_rcu(struct rcu_head *head)
1812{
1813 struct callchain_cpus_entries *entries;
1814 int cpu;
1815
1816 entries = container_of(head, struct callchain_cpus_entries, rcu_head);
1817
1818 for_each_possible_cpu(cpu)
1819 kfree(entries->cpu_entries[cpu]);
1820
1821 kfree(entries);
1822}
1823
1824static void release_callchain_buffers(void)
1825{
1826 struct callchain_cpus_entries *entries;
1827
1828 entries = callchain_cpus_entries;
1829 rcu_assign_pointer(callchain_cpus_entries, NULL);
1830 call_rcu(&entries->rcu_head, release_callchain_buffers_rcu);
1831}
1832
1833static int alloc_callchain_buffers(void)
1834{
1835 int cpu;
1836 int size;
1837 struct callchain_cpus_entries *entries;
1838
1839 /*
1840 * We can't use the percpu allocation API for data that can be
1841 * accessed from NMI. Use a temporary manual per cpu allocation
1842 * until that gets sorted out.
1843 */
1844 size = sizeof(*entries) + sizeof(struct perf_callchain_entry *) *
1845 num_possible_cpus();
1846
1847 entries = kzalloc(size, GFP_KERNEL);
1848 if (!entries)
1849 return -ENOMEM;
1850
1851 size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS;
1852
1853 for_each_possible_cpu(cpu) {
1854 entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL,
1855 cpu_to_node(cpu));
1856 if (!entries->cpu_entries[cpu])
1857 goto fail;
1858 }
1859
1860 rcu_assign_pointer(callchain_cpus_entries, entries);
1861
1862 return 0;
1863
1864fail:
1865 for_each_possible_cpu(cpu)
1866 kfree(entries->cpu_entries[cpu]);
1867 kfree(entries);
1868
1869 return -ENOMEM;
1870}
1871
1872static int get_callchain_buffers(void)
1873{
1874 int err = 0;
1875 int count;
1876
1877 mutex_lock(&callchain_mutex);
1878
1879 count = atomic_inc_return(&nr_callchain_events);
1880 if (WARN_ON_ONCE(count < 1)) {
1881 err = -EINVAL;
1882 goto exit;
1883 }
1884
1885 if (count > 1) {
1886 /* If the allocation failed, give up */
1887 if (!callchain_cpus_entries)
1888 err = -ENOMEM;
1889 goto exit;
1890 }
1891
1892 err = alloc_callchain_buffers();
1893 if (err)
1894 release_callchain_buffers();
1895exit:
1896 mutex_unlock(&callchain_mutex);
1897
1898 return err;
1899}
1900
1901static void put_callchain_buffers(void)
1902{
1903 if (atomic_dec_and_mutex_lock(&nr_callchain_events, &callchain_mutex)) {
1904 release_callchain_buffers();
1905 mutex_unlock(&callchain_mutex);
1906 }
1907}
1908
1909static int get_recursion_context(int *recursion)
1910{
1911 int rctx;
1912
1913 if (in_nmi())
1914 rctx = 3;
1915 else if (in_irq())
1916 rctx = 2;
1917 else if (in_softirq())
1918 rctx = 1;
1919 else
1920 rctx = 0;
1921
1922 if (recursion[rctx])
1923 return -1;
1924
1925 recursion[rctx]++;
1926 barrier();
1927
1928 return rctx;
1929}
1930
1931static inline void put_recursion_context(int *recursion, int rctx)
1932{
1933 barrier();
1934 recursion[rctx]--;
1935}
1936
1937static struct perf_callchain_entry *get_callchain_entry(int *rctx)
1938{
1939 int cpu;
1940 struct callchain_cpus_entries *entries;
1941
1942 *rctx = get_recursion_context(__get_cpu_var(callchain_recursion));
1943 if (*rctx == -1)
1944 return NULL;
1945
1946 entries = rcu_dereference(callchain_cpus_entries);
1947 if (!entries)
1948 return NULL;
1949
1950 cpu = smp_processor_id();
1951
1952 return &entries->cpu_entries[cpu][*rctx];
1953}
1954
1787static void 1955static void
1788__perf_event_init_context(struct perf_event_context *ctx, 1956put_callchain_entry(int rctx)
1789 struct task_struct *task) 1957{
1958 put_recursion_context(__get_cpu_var(callchain_recursion), rctx);
1959}
1960
1961static struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1962{
1963 int rctx;
1964 struct perf_callchain_entry *entry;
1965
1966
1967 entry = get_callchain_entry(&rctx);
1968 if (rctx == -1)
1969 return NULL;
1970
1971 if (!entry)
1972 goto exit_put;
1973
1974 entry->nr = 0;
1975
1976 if (!user_mode(regs)) {
1977 perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
1978 perf_callchain_kernel(entry, regs);
1979 if (current->mm)
1980 regs = task_pt_regs(current);
1981 else
1982 regs = NULL;
1983 }
1984
1985 if (regs) {
1986 perf_callchain_store(entry, PERF_CONTEXT_USER);
1987 perf_callchain_user(entry, regs);
1988 }
1989
1990exit_put:
1991 put_callchain_entry(rctx);
1992
1993 return entry;
1994}
1995
1996/*
1997 * Initialize the perf_event context in a task_struct:
1998 */
1999static void __perf_event_init_context(struct perf_event_context *ctx)
1790{ 2000{
1791 raw_spin_lock_init(&ctx->lock); 2001 raw_spin_lock_init(&ctx->lock);
1792 mutex_init(&ctx->mutex); 2002 mutex_init(&ctx->mutex);
@@ -1794,45 +2004,38 @@ __perf_event_init_context(struct perf_event_context *ctx,
1794 INIT_LIST_HEAD(&ctx->flexible_groups); 2004 INIT_LIST_HEAD(&ctx->flexible_groups);
1795 INIT_LIST_HEAD(&ctx->event_list); 2005 INIT_LIST_HEAD(&ctx->event_list);
1796 atomic_set(&ctx->refcount, 1); 2006 atomic_set(&ctx->refcount, 1);
1797 ctx->task = task;
1798} 2007}
1799 2008
1800static struct perf_event_context *find_get_context(pid_t pid, int cpu) 2009static struct perf_event_context *
2010alloc_perf_context(struct pmu *pmu, struct task_struct *task)
1801{ 2011{
1802 struct perf_event_context *ctx; 2012 struct perf_event_context *ctx;
1803 struct perf_cpu_context *cpuctx;
1804 struct task_struct *task;
1805 unsigned long flags;
1806 int err;
1807
1808 if (pid == -1 && cpu != -1) {
1809 /* Must be root to operate on a CPU event: */
1810 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
1811 return ERR_PTR(-EACCES);
1812 2013
1813 if (cpu < 0 || cpu >= nr_cpumask_bits) 2014 ctx = kzalloc(sizeof(struct perf_event_context), GFP_KERNEL);
1814 return ERR_PTR(-EINVAL); 2015 if (!ctx)
2016 return NULL;
1815 2017
1816 /* 2018 __perf_event_init_context(ctx);
1817 * We could be clever and allow to attach a event to an 2019 if (task) {
1818 * offline CPU and activate it when the CPU comes up, but 2020 ctx->task = task;
1819 * that's for later. 2021 get_task_struct(task);
1820 */ 2022 }
1821 if (!cpu_online(cpu)) 2023 ctx->pmu = pmu;
1822 return ERR_PTR(-ENODEV);
1823 2024
1824 cpuctx = &per_cpu(perf_cpu_context, cpu); 2025 return ctx;
1825 ctx = &cpuctx->ctx; 2026}
1826 get_ctx(ctx);
1827 2027
1828 return ctx; 2028static struct task_struct *
1829 } 2029find_lively_task_by_vpid(pid_t vpid)
2030{
2031 struct task_struct *task;
2032 int err;
1830 2033
1831 rcu_read_lock(); 2034 rcu_read_lock();
1832 if (!pid) 2035 if (!vpid)
1833 task = current; 2036 task = current;
1834 else 2037 else
1835 task = find_task_by_vpid(pid); 2038 task = find_task_by_vpid(vpid);
1836 if (task) 2039 if (task)
1837 get_task_struct(task); 2040 get_task_struct(task);
1838 rcu_read_unlock(); 2041 rcu_read_unlock();
@@ -1852,35 +2055,79 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
1852 if (!ptrace_may_access(task, PTRACE_MODE_READ)) 2055 if (!ptrace_may_access(task, PTRACE_MODE_READ))
1853 goto errout; 2056 goto errout;
1854 2057
1855 retry: 2058 return task;
1856 ctx = perf_lock_task_context(task, &flags); 2059errout:
2060 put_task_struct(task);
2061 return ERR_PTR(err);
2062
2063}
2064
2065static struct perf_event_context *
2066find_get_context(struct pmu *pmu, struct task_struct *task, int cpu)
2067{
2068 struct perf_event_context *ctx;
2069 struct perf_cpu_context *cpuctx;
2070 unsigned long flags;
2071 int ctxn, err;
2072
2073 if (!task && cpu != -1) {
2074 /* Must be root to operate on a CPU event: */
2075 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
2076 return ERR_PTR(-EACCES);
2077
2078 if (cpu < 0 || cpu >= nr_cpumask_bits)
2079 return ERR_PTR(-EINVAL);
2080
2081 /*
2082 * We could be clever and allow to attach a event to an
2083 * offline CPU and activate it when the CPU comes up, but
2084 * that's for later.
2085 */
2086 if (!cpu_online(cpu))
2087 return ERR_PTR(-ENODEV);
2088
2089 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
2090 ctx = &cpuctx->ctx;
2091 get_ctx(ctx);
2092
2093 return ctx;
2094 }
2095
2096 err = -EINVAL;
2097 ctxn = pmu->task_ctx_nr;
2098 if (ctxn < 0)
2099 goto errout;
2100
2101retry:
2102 ctx = perf_lock_task_context(task, ctxn, &flags);
1857 if (ctx) { 2103 if (ctx) {
1858 unclone_ctx(ctx); 2104 unclone_ctx(ctx);
1859 raw_spin_unlock_irqrestore(&ctx->lock, flags); 2105 raw_spin_unlock_irqrestore(&ctx->lock, flags);
1860 } 2106 }
1861 2107
1862 if (!ctx) { 2108 if (!ctx) {
1863 ctx = kzalloc(sizeof(struct perf_event_context), GFP_KERNEL); 2109 ctx = alloc_perf_context(pmu, task);
1864 err = -ENOMEM; 2110 err = -ENOMEM;
1865 if (!ctx) 2111 if (!ctx)
1866 goto errout; 2112 goto errout;
1867 __perf_event_init_context(ctx, task); 2113
1868 get_ctx(ctx); 2114 get_ctx(ctx);
1869 if (cmpxchg(&task->perf_event_ctxp, NULL, ctx)) { 2115
2116 if (cmpxchg(&task->perf_event_ctxp[ctxn], NULL, ctx)) {
1870 /* 2117 /*
1871 * We raced with some other task; use 2118 * We raced with some other task; use
1872 * the context they set. 2119 * the context they set.
1873 */ 2120 */
2121 put_task_struct(task);
1874 kfree(ctx); 2122 kfree(ctx);
1875 goto retry; 2123 goto retry;
1876 } 2124 }
1877 get_task_struct(task);
1878 } 2125 }
1879 2126
1880 put_task_struct(task); 2127 put_task_struct(task);
1881 return ctx; 2128 return ctx;
1882 2129
1883 errout: 2130errout:
1884 put_task_struct(task); 2131 put_task_struct(task);
1885 return ERR_PTR(err); 2132 return ERR_PTR(err);
1886} 2133}
@@ -1913,6 +2160,8 @@ static void free_event(struct perf_event *event)
1913 atomic_dec(&nr_comm_events); 2160 atomic_dec(&nr_comm_events);
1914 if (event->attr.task) 2161 if (event->attr.task)
1915 atomic_dec(&nr_task_events); 2162 atomic_dec(&nr_task_events);
2163 if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
2164 put_callchain_buffers();
1916 } 2165 }
1917 2166
1918 if (event->buffer) { 2167 if (event->buffer) {
@@ -1923,7 +2172,9 @@ static void free_event(struct perf_event *event)
1923 if (event->destroy) 2172 if (event->destroy)
1924 event->destroy(event); 2173 event->destroy(event);
1925 2174
1926 put_ctx(event->ctx); 2175 if (event->ctx)
2176 put_ctx(event->ctx);
2177
1927 call_rcu(&event->rcu_head, free_event_rcu); 2178 call_rcu(&event->rcu_head, free_event_rcu);
1928} 2179}
1929 2180
@@ -2344,6 +2595,9 @@ int perf_event_task_disable(void)
2344 2595
2345static int perf_event_index(struct perf_event *event) 2596static int perf_event_index(struct perf_event *event)
2346{ 2597{
2598 if (event->hw.state & PERF_HES_STOPPED)
2599 return 0;
2600
2347 if (event->state != PERF_EVENT_STATE_ACTIVE) 2601 if (event->state != PERF_EVENT_STATE_ACTIVE)
2348 return 0; 2602 return 0;
2349 2603
@@ -2956,16 +3210,6 @@ void perf_event_do_pending(void)
2956} 3210}
2957 3211
2958/* 3212/*
2959 * Callchain support -- arch specific
2960 */
2961
2962__weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
2963{
2964 return NULL;
2965}
2966
2967
2968/*
2969 * We assume there is only KVM supporting the callbacks. 3213 * We assume there is only KVM supporting the callbacks.
2970 * Later on, we might change it to a list if there is 3214 * Later on, we might change it to a list if there is
2971 * another virtualization implementation supporting the callbacks. 3215 * another virtualization implementation supporting the callbacks.
@@ -3071,7 +3315,7 @@ again:
3071 if (handle->wakeup != local_read(&buffer->wakeup)) 3315 if (handle->wakeup != local_read(&buffer->wakeup))
3072 perf_output_wakeup(handle); 3316 perf_output_wakeup(handle);
3073 3317
3074 out: 3318out:
3075 preempt_enable(); 3319 preempt_enable();
3076} 3320}
3077 3321
@@ -3459,14 +3703,20 @@ static void perf_event_output(struct perf_event *event, int nmi,
3459 struct perf_output_handle handle; 3703 struct perf_output_handle handle;
3460 struct perf_event_header header; 3704 struct perf_event_header header;
3461 3705
3706 /* protect the callchain buffers */
3707 rcu_read_lock();
3708
3462 perf_prepare_sample(&header, data, event, regs); 3709 perf_prepare_sample(&header, data, event, regs);
3463 3710
3464 if (perf_output_begin(&handle, event, header.size, nmi, 1)) 3711 if (perf_output_begin(&handle, event, header.size, nmi, 1))
3465 return; 3712 goto exit;
3466 3713
3467 perf_output_sample(&handle, &header, data, event); 3714 perf_output_sample(&handle, &header, data, event);
3468 3715
3469 perf_output_end(&handle); 3716 perf_output_end(&handle);
3717
3718exit:
3719 rcu_read_unlock();
3470} 3720}
3471 3721
3472/* 3722/*
@@ -3580,16 +3830,25 @@ static void perf_event_task_ctx(struct perf_event_context *ctx,
3580static void perf_event_task_event(struct perf_task_event *task_event) 3830static void perf_event_task_event(struct perf_task_event *task_event)
3581{ 3831{
3582 struct perf_cpu_context *cpuctx; 3832 struct perf_cpu_context *cpuctx;
3583 struct perf_event_context *ctx = task_event->task_ctx; 3833 struct perf_event_context *ctx;
3834 struct pmu *pmu;
3835 int ctxn;
3584 3836
3585 rcu_read_lock(); 3837 rcu_read_lock();
3586 cpuctx = &get_cpu_var(perf_cpu_context); 3838 list_for_each_entry_rcu(pmu, &pmus, entry) {
3587 perf_event_task_ctx(&cpuctx->ctx, task_event); 3839 cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
3588 if (!ctx) 3840 perf_event_task_ctx(&cpuctx->ctx, task_event);
3589 ctx = rcu_dereference(current->perf_event_ctxp); 3841
3590 if (ctx) 3842 ctx = task_event->task_ctx;
3591 perf_event_task_ctx(ctx, task_event); 3843 if (!ctx) {
3592 put_cpu_var(perf_cpu_context); 3844 ctxn = pmu->task_ctx_nr;
3845 if (ctxn < 0)
3846 continue;
3847 ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
3848 }
3849 if (ctx)
3850 perf_event_task_ctx(ctx, task_event);
3851 }
3593 rcu_read_unlock(); 3852 rcu_read_unlock();
3594} 3853}
3595 3854
@@ -3694,8 +3953,10 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
3694{ 3953{
3695 struct perf_cpu_context *cpuctx; 3954 struct perf_cpu_context *cpuctx;
3696 struct perf_event_context *ctx; 3955 struct perf_event_context *ctx;
3697 unsigned int size;
3698 char comm[TASK_COMM_LEN]; 3956 char comm[TASK_COMM_LEN];
3957 unsigned int size;
3958 struct pmu *pmu;
3959 int ctxn;
3699 3960
3700 memset(comm, 0, sizeof(comm)); 3961 memset(comm, 0, sizeof(comm));
3701 strlcpy(comm, comm_event->task->comm, sizeof(comm)); 3962 strlcpy(comm, comm_event->task->comm, sizeof(comm));
@@ -3707,21 +3968,34 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
3707 comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; 3968 comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;
3708 3969
3709 rcu_read_lock(); 3970 rcu_read_lock();
3710 cpuctx = &get_cpu_var(perf_cpu_context); 3971 list_for_each_entry_rcu(pmu, &pmus, entry) {
3711 perf_event_comm_ctx(&cpuctx->ctx, comm_event); 3972 cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
3712 ctx = rcu_dereference(current->perf_event_ctxp); 3973 perf_event_comm_ctx(&cpuctx->ctx, comm_event);
3713 if (ctx) 3974
3714 perf_event_comm_ctx(ctx, comm_event); 3975 ctxn = pmu->task_ctx_nr;
3715 put_cpu_var(perf_cpu_context); 3976 if (ctxn < 0)
3977 continue;
3978
3979 ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
3980 if (ctx)
3981 perf_event_comm_ctx(ctx, comm_event);
3982 }
3716 rcu_read_unlock(); 3983 rcu_read_unlock();
3717} 3984}
3718 3985
3719void perf_event_comm(struct task_struct *task) 3986void perf_event_comm(struct task_struct *task)
3720{ 3987{
3721 struct perf_comm_event comm_event; 3988 struct perf_comm_event comm_event;
3989 struct perf_event_context *ctx;
3990 int ctxn;
3991
3992 for_each_task_context_nr(ctxn) {
3993 ctx = task->perf_event_ctxp[ctxn];
3994 if (!ctx)
3995 continue;
3722 3996
3723 if (task->perf_event_ctxp) 3997 perf_event_enable_on_exec(ctx);
3724 perf_event_enable_on_exec(task); 3998 }
3725 3999
3726 if (!atomic_read(&nr_comm_events)) 4000 if (!atomic_read(&nr_comm_events))
3727 return; 4001 return;
@@ -3823,6 +4097,8 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
3823 char tmp[16]; 4097 char tmp[16];
3824 char *buf = NULL; 4098 char *buf = NULL;
3825 const char *name; 4099 const char *name;
4100 struct pmu *pmu;
4101 int ctxn;
3826 4102
3827 memset(tmp, 0, sizeof(tmp)); 4103 memset(tmp, 0, sizeof(tmp));
3828 4104
@@ -3875,12 +4151,21 @@ got_name:
3875 mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; 4151 mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
3876 4152
3877 rcu_read_lock(); 4153 rcu_read_lock();
3878 cpuctx = &get_cpu_var(perf_cpu_context); 4154 list_for_each_entry_rcu(pmu, &pmus, entry) {
3879 perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, vma->vm_flags & VM_EXEC); 4155 cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
3880 ctx = rcu_dereference(current->perf_event_ctxp); 4156 perf_event_mmap_ctx(&cpuctx->ctx, mmap_event,
3881 if (ctx) 4157 vma->vm_flags & VM_EXEC);
3882 perf_event_mmap_ctx(ctx, mmap_event, vma->vm_flags & VM_EXEC); 4158
3883 put_cpu_var(perf_cpu_context); 4159 ctxn = pmu->task_ctx_nr;
4160 if (ctxn < 0)
4161 continue;
4162
4163 ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
4164 if (ctx) {
4165 perf_event_mmap_ctx(ctx, mmap_event,
4166 vma->vm_flags & VM_EXEC);
4167 }
4168 }
3884 rcu_read_unlock(); 4169 rcu_read_unlock();
3885 4170
3886 kfree(buf); 4171 kfree(buf);
@@ -3962,8 +4247,6 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
3962 struct hw_perf_event *hwc = &event->hw; 4247 struct hw_perf_event *hwc = &event->hw;
3963 int ret = 0; 4248 int ret = 0;
3964 4249
3965 throttle = (throttle && event->pmu->unthrottle != NULL);
3966
3967 if (!throttle) { 4250 if (!throttle) {
3968 hwc->interrupts++; 4251 hwc->interrupts++;
3969 } else { 4252 } else {
@@ -4031,6 +4314,17 @@ int perf_event_overflow(struct perf_event *event, int nmi,
4031 * Generic software event infrastructure 4314 * Generic software event infrastructure
4032 */ 4315 */
4033 4316
4317struct swevent_htable {
4318 struct swevent_hlist *swevent_hlist;
4319 struct mutex hlist_mutex;
4320 int hlist_refcount;
4321
4322 /* Recursion avoidance in each contexts */
4323 int recursion[PERF_NR_CONTEXTS];
4324};
4325
4326static DEFINE_PER_CPU(struct swevent_htable, swevent_htable);
4327
4034/* 4328/*
4035 * We directly increment event->count and keep a second value in 4329 * We directly increment event->count and keep a second value in
4036 * event->hw.period_left to count intervals. This period event 4330 * event->hw.period_left to count intervals. This period event
@@ -4088,7 +4382,7 @@ static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
4088 } 4382 }
4089} 4383}
4090 4384
4091static void perf_swevent_add(struct perf_event *event, u64 nr, 4385static void perf_swevent_event(struct perf_event *event, u64 nr,
4092 int nmi, struct perf_sample_data *data, 4386 int nmi, struct perf_sample_data *data,
4093 struct pt_regs *regs) 4387 struct pt_regs *regs)
4094{ 4388{
@@ -4114,6 +4408,9 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,
4114static int perf_exclude_event(struct perf_event *event, 4408static int perf_exclude_event(struct perf_event *event,
4115 struct pt_regs *regs) 4409 struct pt_regs *regs)
4116{ 4410{
4411 if (event->hw.state & PERF_HES_STOPPED)
4412 return 0;
4413
4117 if (regs) { 4414 if (regs) {
4118 if (event->attr.exclude_user && user_mode(regs)) 4415 if (event->attr.exclude_user && user_mode(regs))
4119 return 1; 4416 return 1;
@@ -4160,11 +4457,11 @@ __find_swevent_head(struct swevent_hlist *hlist, u64 type, u32 event_id)
4160 4457
4161/* For the read side: events when they trigger */ 4458/* For the read side: events when they trigger */
4162static inline struct hlist_head * 4459static inline struct hlist_head *
4163find_swevent_head_rcu(struct perf_cpu_context *ctx, u64 type, u32 event_id) 4460find_swevent_head_rcu(struct swevent_htable *swhash, u64 type, u32 event_id)
4164{ 4461{
4165 struct swevent_hlist *hlist; 4462 struct swevent_hlist *hlist;
4166 4463
4167 hlist = rcu_dereference(ctx->swevent_hlist); 4464 hlist = rcu_dereference(swhash->swevent_hlist);
4168 if (!hlist) 4465 if (!hlist)
4169 return NULL; 4466 return NULL;
4170 4467
@@ -4173,7 +4470,7 @@ find_swevent_head_rcu(struct perf_cpu_context *ctx, u64 type, u32 event_id)
4173 4470
4174/* For the event head insertion and removal in the hlist */ 4471/* For the event head insertion and removal in the hlist */
4175static inline struct hlist_head * 4472static inline struct hlist_head *
4176find_swevent_head(struct perf_cpu_context *ctx, struct perf_event *event) 4473find_swevent_head(struct swevent_htable *swhash, struct perf_event *event)
4177{ 4474{
4178 struct swevent_hlist *hlist; 4475 struct swevent_hlist *hlist;
4179 u32 event_id = event->attr.config; 4476 u32 event_id = event->attr.config;
@@ -4184,7 +4481,7 @@ find_swevent_head(struct perf_cpu_context *ctx, struct perf_event *event)
4184 * and release. Which makes the protected version suitable here. 4481 * and release. Which makes the protected version suitable here.
4185 * The context lock guarantees that. 4482 * The context lock guarantees that.
4186 */ 4483 */
4187 hlist = rcu_dereference_protected(ctx->swevent_hlist, 4484 hlist = rcu_dereference_protected(swhash->swevent_hlist,
4188 lockdep_is_held(&event->ctx->lock)); 4485 lockdep_is_held(&event->ctx->lock));
4189 if (!hlist) 4486 if (!hlist)
4190 return NULL; 4487 return NULL;
@@ -4197,23 +4494,19 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
4197 struct perf_sample_data *data, 4494 struct perf_sample_data *data,
4198 struct pt_regs *regs) 4495 struct pt_regs *regs)
4199{ 4496{
4200 struct perf_cpu_context *cpuctx; 4497 struct swevent_htable *swhash = &__get_cpu_var(swevent_htable);
4201 struct perf_event *event; 4498 struct perf_event *event;
4202 struct hlist_node *node; 4499 struct hlist_node *node;
4203 struct hlist_head *head; 4500 struct hlist_head *head;
4204 4501
4205 cpuctx = &__get_cpu_var(perf_cpu_context);
4206
4207 rcu_read_lock(); 4502 rcu_read_lock();
4208 4503 head = find_swevent_head_rcu(swhash, type, event_id);
4209 head = find_swevent_head_rcu(cpuctx, type, event_id);
4210
4211 if (!head) 4504 if (!head)
4212 goto end; 4505 goto end;
4213 4506
4214 hlist_for_each_entry_rcu(event, node, head, hlist_entry) { 4507 hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
4215 if (perf_swevent_match(event, type, event_id, data, regs)) 4508 if (perf_swevent_match(event, type, event_id, data, regs))
4216 perf_swevent_add(event, nr, nmi, data, regs); 4509 perf_swevent_event(event, nr, nmi, data, regs);
4217 } 4510 }
4218end: 4511end:
4219 rcu_read_unlock(); 4512 rcu_read_unlock();
@@ -4221,33 +4514,17 @@ end:
4221 4514
4222int perf_swevent_get_recursion_context(void) 4515int perf_swevent_get_recursion_context(void)
4223{ 4516{
4224 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); 4517 struct swevent_htable *swhash = &__get_cpu_var(swevent_htable);
4225 int rctx;
4226 4518
4227 if (in_nmi()) 4519 return get_recursion_context(swhash->recursion);
4228 rctx = 3;
4229 else if (in_irq())
4230 rctx = 2;
4231 else if (in_softirq())
4232 rctx = 1;
4233 else
4234 rctx = 0;
4235
4236 if (cpuctx->recursion[rctx])
4237 return -1;
4238
4239 cpuctx->recursion[rctx]++;
4240 barrier();
4241
4242 return rctx;
4243} 4520}
4244EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); 4521EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
4245 4522
4246void inline perf_swevent_put_recursion_context(int rctx) 4523void inline perf_swevent_put_recursion_context(int rctx)
4247{ 4524{
4248 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); 4525 struct swevent_htable *swhash = &__get_cpu_var(swevent_htable);
4249 barrier(); 4526
4250 cpuctx->recursion[rctx]--; 4527 put_recursion_context(swhash->recursion, rctx);
4251} 4528}
4252 4529
4253void __perf_sw_event(u32 event_id, u64 nr, int nmi, 4530void __perf_sw_event(u32 event_id, u64 nr, int nmi,
@@ -4273,20 +4550,20 @@ static void perf_swevent_read(struct perf_event *event)
4273{ 4550{
4274} 4551}
4275 4552
4276static int perf_swevent_enable(struct perf_event *event) 4553static int perf_swevent_add(struct perf_event *event, int flags)
4277{ 4554{
4555 struct swevent_htable *swhash = &__get_cpu_var(swevent_htable);
4278 struct hw_perf_event *hwc = &event->hw; 4556 struct hw_perf_event *hwc = &event->hw;
4279 struct perf_cpu_context *cpuctx;
4280 struct hlist_head *head; 4557 struct hlist_head *head;
4281 4558
4282 cpuctx = &__get_cpu_var(perf_cpu_context);
4283
4284 if (hwc->sample_period) { 4559 if (hwc->sample_period) {
4285 hwc->last_period = hwc->sample_period; 4560 hwc->last_period = hwc->sample_period;
4286 perf_swevent_set_period(event); 4561 perf_swevent_set_period(event);
4287 } 4562 }
4288 4563
4289 head = find_swevent_head(cpuctx, event); 4564 hwc->state = !(flags & PERF_EF_START);
4565
4566 head = find_swevent_head(swhash, event);
4290 if (WARN_ON_ONCE(!head)) 4567 if (WARN_ON_ONCE(!head))
4291 return -EINVAL; 4568 return -EINVAL;
4292 4569
@@ -4295,202 +4572,27 @@ static int perf_swevent_enable(struct perf_event *event)
4295 return 0; 4572 return 0;
4296} 4573}
4297 4574
4298static void perf_swevent_disable(struct perf_event *event) 4575static void perf_swevent_del(struct perf_event *event, int flags)
4299{ 4576{
4300 hlist_del_rcu(&event->hlist_entry); 4577 hlist_del_rcu(&event->hlist_entry);
4301} 4578}
4302 4579
4303static void perf_swevent_void(struct perf_event *event) 4580static void perf_swevent_start(struct perf_event *event, int flags)
4304{ 4581{
4582 event->hw.state = 0;
4305} 4583}
4306 4584
4307static int perf_swevent_int(struct perf_event *event) 4585static void perf_swevent_stop(struct perf_event *event, int flags)
4308{ 4586{
4309 return 0; 4587 event->hw.state = PERF_HES_STOPPED;
4310} 4588}
4311 4589
4312static const struct pmu perf_ops_generic = {
4313 .enable = perf_swevent_enable,
4314 .disable = perf_swevent_disable,
4315 .start = perf_swevent_int,
4316 .stop = perf_swevent_void,
4317 .read = perf_swevent_read,
4318 .unthrottle = perf_swevent_void, /* hwc->interrupts already reset */
4319};
4320
4321/*
4322 * hrtimer based swevent callback
4323 */
4324
4325static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
4326{
4327 enum hrtimer_restart ret = HRTIMER_RESTART;
4328 struct perf_sample_data data;
4329 struct pt_regs *regs;
4330 struct perf_event *event;
4331 u64 period;
4332
4333 event = container_of(hrtimer, struct perf_event, hw.hrtimer);
4334 event->pmu->read(event);
4335
4336 perf_sample_data_init(&data, 0);
4337 data.period = event->hw.last_period;
4338 regs = get_irq_regs();
4339
4340 if (regs && !perf_exclude_event(event, regs)) {
4341 if (!(event->attr.exclude_idle && current->pid == 0))
4342 if (perf_event_overflow(event, 0, &data, regs))
4343 ret = HRTIMER_NORESTART;
4344 }
4345
4346 period = max_t(u64, 10000, event->hw.sample_period);
4347 hrtimer_forward_now(hrtimer, ns_to_ktime(period));
4348
4349 return ret;
4350}
4351
4352static void perf_swevent_start_hrtimer(struct perf_event *event)
4353{
4354 struct hw_perf_event *hwc = &event->hw;
4355
4356 hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
4357 hwc->hrtimer.function = perf_swevent_hrtimer;
4358 if (hwc->sample_period) {
4359 u64 period;
4360
4361 if (hwc->remaining) {
4362 if (hwc->remaining < 0)
4363 period = 10000;
4364 else
4365 period = hwc->remaining;
4366 hwc->remaining = 0;
4367 } else {
4368 period = max_t(u64, 10000, hwc->sample_period);
4369 }
4370 __hrtimer_start_range_ns(&hwc->hrtimer,
4371 ns_to_ktime(period), 0,
4372 HRTIMER_MODE_REL, 0);
4373 }
4374}
4375
4376static void perf_swevent_cancel_hrtimer(struct perf_event *event)
4377{
4378 struct hw_perf_event *hwc = &event->hw;
4379
4380 if (hwc->sample_period) {
4381 ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer);
4382 hwc->remaining = ktime_to_ns(remaining);
4383
4384 hrtimer_cancel(&hwc->hrtimer);
4385 }
4386}
4387
4388/*
4389 * Software event: cpu wall time clock
4390 */
4391
4392static void cpu_clock_perf_event_update(struct perf_event *event)
4393{
4394 int cpu = raw_smp_processor_id();
4395 s64 prev;
4396 u64 now;
4397
4398 now = cpu_clock(cpu);
4399 prev = local64_xchg(&event->hw.prev_count, now);
4400 local64_add(now - prev, &event->count);
4401}
4402
4403static int cpu_clock_perf_event_enable(struct perf_event *event)
4404{
4405 struct hw_perf_event *hwc = &event->hw;
4406 int cpu = raw_smp_processor_id();
4407
4408 local64_set(&hwc->prev_count, cpu_clock(cpu));
4409 perf_swevent_start_hrtimer(event);
4410
4411 return 0;
4412}
4413
4414static void cpu_clock_perf_event_disable(struct perf_event *event)
4415{
4416 perf_swevent_cancel_hrtimer(event);
4417 cpu_clock_perf_event_update(event);
4418}
4419
4420static void cpu_clock_perf_event_read(struct perf_event *event)
4421{
4422 cpu_clock_perf_event_update(event);
4423}
4424
4425static const struct pmu perf_ops_cpu_clock = {
4426 .enable = cpu_clock_perf_event_enable,
4427 .disable = cpu_clock_perf_event_disable,
4428 .read = cpu_clock_perf_event_read,
4429};
4430
4431/*
4432 * Software event: task time clock
4433 */
4434
4435static void task_clock_perf_event_update(struct perf_event *event, u64 now)
4436{
4437 u64 prev;
4438 s64 delta;
4439
4440 prev = local64_xchg(&event->hw.prev_count, now);
4441 delta = now - prev;
4442 local64_add(delta, &event->count);
4443}
4444
4445static int task_clock_perf_event_enable(struct perf_event *event)
4446{
4447 struct hw_perf_event *hwc = &event->hw;
4448 u64 now;
4449
4450 now = event->ctx->time;
4451
4452 local64_set(&hwc->prev_count, now);
4453
4454 perf_swevent_start_hrtimer(event);
4455
4456 return 0;
4457}
4458
4459static void task_clock_perf_event_disable(struct perf_event *event)
4460{
4461 perf_swevent_cancel_hrtimer(event);
4462 task_clock_perf_event_update(event, event->ctx->time);
4463
4464}
4465
4466static void task_clock_perf_event_read(struct perf_event *event)
4467{
4468 u64 time;
4469
4470 if (!in_nmi()) {
4471 update_context_time(event->ctx);
4472 time = event->ctx->time;
4473 } else {
4474 u64 now = perf_clock();
4475 u64 delta = now - event->ctx->timestamp;
4476 time = event->ctx->time + delta;
4477 }
4478
4479 task_clock_perf_event_update(event, time);
4480}
4481
4482static const struct pmu perf_ops_task_clock = {
4483 .enable = task_clock_perf_event_enable,
4484 .disable = task_clock_perf_event_disable,
4485 .read = task_clock_perf_event_read,
4486};
4487
4488/* Deref the hlist from the update side */ 4590/* Deref the hlist from the update side */
4489static inline struct swevent_hlist * 4591static inline struct swevent_hlist *
4490swevent_hlist_deref(struct perf_cpu_context *cpuctx) 4592swevent_hlist_deref(struct swevent_htable *swhash)
4491{ 4593{
4492 return rcu_dereference_protected(cpuctx->swevent_hlist, 4594 return rcu_dereference_protected(swhash->swevent_hlist,
4493 lockdep_is_held(&cpuctx->hlist_mutex)); 4595 lockdep_is_held(&swhash->hlist_mutex));
4494} 4596}
4495 4597
4496static void swevent_hlist_release_rcu(struct rcu_head *rcu_head) 4598static void swevent_hlist_release_rcu(struct rcu_head *rcu_head)
@@ -4501,27 +4603,27 @@ static void swevent_hlist_release_rcu(struct rcu_head *rcu_head)
4501 kfree(hlist); 4603 kfree(hlist);
4502} 4604}
4503 4605
4504static void swevent_hlist_release(struct perf_cpu_context *cpuctx) 4606static void swevent_hlist_release(struct swevent_htable *swhash)
4505{ 4607{
4506 struct swevent_hlist *hlist = swevent_hlist_deref(cpuctx); 4608 struct swevent_hlist *hlist = swevent_hlist_deref(swhash);
4507 4609
4508 if (!hlist) 4610 if (!hlist)
4509 return; 4611 return;
4510 4612
4511 rcu_assign_pointer(cpuctx->swevent_hlist, NULL); 4613 rcu_assign_pointer(swhash->swevent_hlist, NULL);
4512 call_rcu(&hlist->rcu_head, swevent_hlist_release_rcu); 4614 call_rcu(&hlist->rcu_head, swevent_hlist_release_rcu);
4513} 4615}
4514 4616
4515static void swevent_hlist_put_cpu(struct perf_event *event, int cpu) 4617static void swevent_hlist_put_cpu(struct perf_event *event, int cpu)
4516{ 4618{
4517 struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); 4619 struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
4518 4620
4519 mutex_lock(&cpuctx->hlist_mutex); 4621 mutex_lock(&swhash->hlist_mutex);
4520 4622
4521 if (!--cpuctx->hlist_refcount) 4623 if (!--swhash->hlist_refcount)
4522 swevent_hlist_release(cpuctx); 4624 swevent_hlist_release(swhash);
4523 4625
4524 mutex_unlock(&cpuctx->hlist_mutex); 4626 mutex_unlock(&swhash->hlist_mutex);
4525} 4627}
4526 4628
4527static void swevent_hlist_put(struct perf_event *event) 4629static void swevent_hlist_put(struct perf_event *event)
@@ -4539,12 +4641,12 @@ static void swevent_hlist_put(struct perf_event *event)
4539 4641
4540static int swevent_hlist_get_cpu(struct perf_event *event, int cpu) 4642static int swevent_hlist_get_cpu(struct perf_event *event, int cpu)
4541{ 4643{
4542 struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); 4644 struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
4543 int err = 0; 4645 int err = 0;
4544 4646
4545 mutex_lock(&cpuctx->hlist_mutex); 4647 mutex_lock(&swhash->hlist_mutex);
4546 4648
4547 if (!swevent_hlist_deref(cpuctx) && cpu_online(cpu)) { 4649 if (!swevent_hlist_deref(swhash) && cpu_online(cpu)) {
4548 struct swevent_hlist *hlist; 4650 struct swevent_hlist *hlist;
4549 4651
4550 hlist = kzalloc(sizeof(*hlist), GFP_KERNEL); 4652 hlist = kzalloc(sizeof(*hlist), GFP_KERNEL);
@@ -4552,11 +4654,11 @@ static int swevent_hlist_get_cpu(struct perf_event *event, int cpu)
4552 err = -ENOMEM; 4654 err = -ENOMEM;
4553 goto exit; 4655 goto exit;
4554 } 4656 }
4555 rcu_assign_pointer(cpuctx->swevent_hlist, hlist); 4657 rcu_assign_pointer(swhash->swevent_hlist, hlist);
4556 } 4658 }
4557 cpuctx->hlist_refcount++; 4659 swhash->hlist_refcount++;
4558 exit: 4660exit:
4559 mutex_unlock(&cpuctx->hlist_mutex); 4661 mutex_unlock(&swhash->hlist_mutex);
4560 4662
4561 return err; 4663 return err;
4562} 4664}
@@ -4580,7 +4682,7 @@ static int swevent_hlist_get(struct perf_event *event)
4580 put_online_cpus(); 4682 put_online_cpus();
4581 4683
4582 return 0; 4684 return 0;
4583 fail: 4685fail:
4584 for_each_possible_cpu(cpu) { 4686 for_each_possible_cpu(cpu) {
4585 if (cpu == failed_cpu) 4687 if (cpu == failed_cpu)
4586 break; 4688 break;
@@ -4591,17 +4693,64 @@ static int swevent_hlist_get(struct perf_event *event)
4591 return err; 4693 return err;
4592} 4694}
4593 4695
4594#ifdef CONFIG_EVENT_TRACING 4696atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
4697
4698static void sw_perf_event_destroy(struct perf_event *event)
4699{
4700 u64 event_id = event->attr.config;
4701
4702 WARN_ON(event->parent);
4703
4704 atomic_dec(&perf_swevent_enabled[event_id]);
4705 swevent_hlist_put(event);
4706}
4707
4708static int perf_swevent_init(struct perf_event *event)
4709{
4710 int event_id = event->attr.config;
4711
4712 if (event->attr.type != PERF_TYPE_SOFTWARE)
4713 return -ENOENT;
4714
4715 switch (event_id) {
4716 case PERF_COUNT_SW_CPU_CLOCK:
4717 case PERF_COUNT_SW_TASK_CLOCK:
4718 return -ENOENT;
4719
4720 default:
4721 break;
4722 }
4723
4724 if (event_id > PERF_COUNT_SW_MAX)
4725 return -ENOENT;
4726
4727 if (!event->parent) {
4728 int err;
4729
4730 err = swevent_hlist_get(event);
4731 if (err)
4732 return err;
4733
4734 atomic_inc(&perf_swevent_enabled[event_id]);
4735 event->destroy = sw_perf_event_destroy;
4736 }
4595 4737
4596static const struct pmu perf_ops_tracepoint = { 4738 return 0;
4597 .enable = perf_trace_enable, 4739}
4598 .disable = perf_trace_disable, 4740
4599 .start = perf_swevent_int, 4741static struct pmu perf_swevent = {
4600 .stop = perf_swevent_void, 4742 .task_ctx_nr = perf_sw_context,
4743
4744 .event_init = perf_swevent_init,
4745 .add = perf_swevent_add,
4746 .del = perf_swevent_del,
4747 .start = perf_swevent_start,
4748 .stop = perf_swevent_stop,
4601 .read = perf_swevent_read, 4749 .read = perf_swevent_read,
4602 .unthrottle = perf_swevent_void,
4603}; 4750};
4604 4751
4752#ifdef CONFIG_EVENT_TRACING
4753
4605static int perf_tp_filter_match(struct perf_event *event, 4754static int perf_tp_filter_match(struct perf_event *event,
4606 struct perf_sample_data *data) 4755 struct perf_sample_data *data)
4607{ 4756{
@@ -4645,7 +4794,7 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
4645 4794
4646 hlist_for_each_entry_rcu(event, node, head, hlist_entry) { 4795 hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
4647 if (perf_tp_event_match(event, &data, regs)) 4796 if (perf_tp_event_match(event, &data, regs))
4648 perf_swevent_add(event, count, 1, &data, regs); 4797 perf_swevent_event(event, count, 1, &data, regs);
4649 } 4798 }
4650 4799
4651 perf_swevent_put_recursion_context(rctx); 4800 perf_swevent_put_recursion_context(rctx);
@@ -4657,10 +4806,13 @@ static void tp_perf_event_destroy(struct perf_event *event)
4657 perf_trace_destroy(event); 4806 perf_trace_destroy(event);
4658} 4807}
4659 4808
4660static const struct pmu *tp_perf_event_init(struct perf_event *event) 4809static int perf_tp_event_init(struct perf_event *event)
4661{ 4810{
4662 int err; 4811 int err;
4663 4812
4813 if (event->attr.type != PERF_TYPE_TRACEPOINT)
4814 return -ENOENT;
4815
4664 /* 4816 /*
4665 * Raw tracepoint data is a severe data leak, only allow root to 4817 * Raw tracepoint data is a severe data leak, only allow root to
4666 * have these. 4818 * have these.
@@ -4668,15 +4820,31 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event)
4668 if ((event->attr.sample_type & PERF_SAMPLE_RAW) && 4820 if ((event->attr.sample_type & PERF_SAMPLE_RAW) &&
4669 perf_paranoid_tracepoint_raw() && 4821 perf_paranoid_tracepoint_raw() &&
4670 !capable(CAP_SYS_ADMIN)) 4822 !capable(CAP_SYS_ADMIN))
4671 return ERR_PTR(-EPERM); 4823 return -EPERM;
4672 4824
4673 err = perf_trace_init(event); 4825 err = perf_trace_init(event);
4674 if (err) 4826 if (err)
4675 return NULL; 4827 return err;
4676 4828
4677 event->destroy = tp_perf_event_destroy; 4829 event->destroy = tp_perf_event_destroy;
4678 4830
4679 return &perf_ops_tracepoint; 4831 return 0;
4832}
4833
4834static struct pmu perf_tracepoint = {
4835 .task_ctx_nr = perf_sw_context,
4836
4837 .event_init = perf_tp_event_init,
4838 .add = perf_trace_add,
4839 .del = perf_trace_del,
4840 .start = perf_swevent_start,
4841 .stop = perf_swevent_stop,
4842 .read = perf_swevent_read,
4843};
4844
4845static inline void perf_tp_register(void)
4846{
4847 perf_pmu_register(&perf_tracepoint);
4680} 4848}
4681 4849
4682static int perf_event_set_filter(struct perf_event *event, void __user *arg) 4850static int perf_event_set_filter(struct perf_event *event, void __user *arg)
@@ -4704,9 +4872,8 @@ static void perf_event_free_filter(struct perf_event *event)
4704 4872
4705#else 4873#else
4706 4874
4707static const struct pmu *tp_perf_event_init(struct perf_event *event) 4875static inline void perf_tp_register(void)
4708{ 4876{
4709 return NULL;
4710} 4877}
4711 4878
4712static int perf_event_set_filter(struct perf_event *event, void __user *arg) 4879static int perf_event_set_filter(struct perf_event *event, void __user *arg)
@@ -4721,106 +4888,390 @@ static void perf_event_free_filter(struct perf_event *event)
4721#endif /* CONFIG_EVENT_TRACING */ 4888#endif /* CONFIG_EVENT_TRACING */
4722 4889
4723#ifdef CONFIG_HAVE_HW_BREAKPOINT 4890#ifdef CONFIG_HAVE_HW_BREAKPOINT
4724static void bp_perf_event_destroy(struct perf_event *event) 4891void perf_bp_event(struct perf_event *bp, void *data)
4725{ 4892{
4726 release_bp_slot(event); 4893 struct perf_sample_data sample;
4894 struct pt_regs *regs = data;
4895
4896 perf_sample_data_init(&sample, bp->attr.bp_addr);
4897
4898 if (!bp->hw.state && !perf_exclude_event(bp, regs))
4899 perf_swevent_event(bp, 1, 1, &sample, regs);
4727} 4900}
4901#endif
4902
4903/*
4904 * hrtimer based swevent callback
4905 */
4728 4906
4729static const struct pmu *bp_perf_event_init(struct perf_event *bp) 4907static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
4730{ 4908{
4731 int err; 4909 enum hrtimer_restart ret = HRTIMER_RESTART;
4910 struct perf_sample_data data;
4911 struct pt_regs *regs;
4912 struct perf_event *event;
4913 u64 period;
4732 4914
4733 err = register_perf_hw_breakpoint(bp); 4915 event = container_of(hrtimer, struct perf_event, hw.hrtimer);
4734 if (err) 4916 event->pmu->read(event);
4735 return ERR_PTR(err);
4736 4917
4737 bp->destroy = bp_perf_event_destroy; 4918 perf_sample_data_init(&data, 0);
4919 data.period = event->hw.last_period;
4920 regs = get_irq_regs();
4738 4921
4739 return &perf_ops_bp; 4922 if (regs && !perf_exclude_event(event, regs)) {
4923 if (!(event->attr.exclude_idle && current->pid == 0))
4924 if (perf_event_overflow(event, 0, &data, regs))
4925 ret = HRTIMER_NORESTART;
4926 }
4927
4928 period = max_t(u64, 10000, event->hw.sample_period);
4929 hrtimer_forward_now(hrtimer, ns_to_ktime(period));
4930
4931 return ret;
4740} 4932}
4741 4933
4742void perf_bp_event(struct perf_event *bp, void *data) 4934static void perf_swevent_start_hrtimer(struct perf_event *event)
4743{ 4935{
4744 struct perf_sample_data sample; 4936 struct hw_perf_event *hwc = &event->hw;
4745 struct pt_regs *regs = data;
4746 4937
4747 perf_sample_data_init(&sample, bp->attr.bp_addr); 4938 hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
4939 hwc->hrtimer.function = perf_swevent_hrtimer;
4940 if (hwc->sample_period) {
4941 s64 period = local64_read(&hwc->period_left);
4748 4942
4749 if (!perf_exclude_event(bp, regs)) 4943 if (period) {
4750 perf_swevent_add(bp, 1, 1, &sample, regs); 4944 if (period < 0)
4945 period = 10000;
4946
4947 local64_set(&hwc->period_left, 0);
4948 } else {
4949 period = max_t(u64, 10000, hwc->sample_period);
4950 }
4951 __hrtimer_start_range_ns(&hwc->hrtimer,
4952 ns_to_ktime(period), 0,
4953 HRTIMER_MODE_REL_PINNED, 0);
4954 }
4751} 4955}
4752#else 4956
4753static const struct pmu *bp_perf_event_init(struct perf_event *bp) 4957static void perf_swevent_cancel_hrtimer(struct perf_event *event)
4754{ 4958{
4755 return NULL; 4959 struct hw_perf_event *hwc = &event->hw;
4960
4961 if (hwc->sample_period) {
4962 ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer);
4963 local64_set(&hwc->period_left, ktime_to_ns(remaining));
4964
4965 hrtimer_cancel(&hwc->hrtimer);
4966 }
4756} 4967}
4757 4968
4758void perf_bp_event(struct perf_event *bp, void *regs) 4969/*
4970 * Software event: cpu wall time clock
4971 */
4972
4973static void cpu_clock_event_update(struct perf_event *event)
4759{ 4974{
4975 s64 prev;
4976 u64 now;
4977
4978 now = local_clock();
4979 prev = local64_xchg(&event->hw.prev_count, now);
4980 local64_add(now - prev, &event->count);
4760} 4981}
4761#endif
4762 4982
4763atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; 4983static void cpu_clock_event_start(struct perf_event *event, int flags)
4984{
4985 local64_set(&event->hw.prev_count, local_clock());
4986 perf_swevent_start_hrtimer(event);
4987}
4764 4988
4765static void sw_perf_event_destroy(struct perf_event *event) 4989static void cpu_clock_event_stop(struct perf_event *event, int flags)
4766{ 4990{
4767 u64 event_id = event->attr.config; 4991 perf_swevent_cancel_hrtimer(event);
4992 cpu_clock_event_update(event);
4993}
4768 4994
4769 WARN_ON(event->parent); 4995static int cpu_clock_event_add(struct perf_event *event, int flags)
4996{
4997 if (flags & PERF_EF_START)
4998 cpu_clock_event_start(event, flags);
4770 4999
4771 atomic_dec(&perf_swevent_enabled[event_id]); 5000 return 0;
4772 swevent_hlist_put(event);
4773} 5001}
4774 5002
4775static const struct pmu *sw_perf_event_init(struct perf_event *event) 5003static void cpu_clock_event_del(struct perf_event *event, int flags)
4776{ 5004{
4777 const struct pmu *pmu = NULL; 5005 cpu_clock_event_stop(event, flags);
4778 u64 event_id = event->attr.config; 5006}
5007
5008static void cpu_clock_event_read(struct perf_event *event)
5009{
5010 cpu_clock_event_update(event);
5011}
5012
5013static int cpu_clock_event_init(struct perf_event *event)
5014{
5015 if (event->attr.type != PERF_TYPE_SOFTWARE)
5016 return -ENOENT;
5017
5018 if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK)
5019 return -ENOENT;
5020
5021 return 0;
5022}
4779 5023
5024static struct pmu perf_cpu_clock = {
5025 .task_ctx_nr = perf_sw_context,
5026
5027 .event_init = cpu_clock_event_init,
5028 .add = cpu_clock_event_add,
5029 .del = cpu_clock_event_del,
5030 .start = cpu_clock_event_start,
5031 .stop = cpu_clock_event_stop,
5032 .read = cpu_clock_event_read,
5033};
5034
5035/*
5036 * Software event: task time clock
5037 */
5038
5039static void task_clock_event_update(struct perf_event *event, u64 now)
5040{
5041 u64 prev;
5042 s64 delta;
5043
5044 prev = local64_xchg(&event->hw.prev_count, now);
5045 delta = now - prev;
5046 local64_add(delta, &event->count);
5047}
5048
5049static void task_clock_event_start(struct perf_event *event, int flags)
5050{
5051 local64_set(&event->hw.prev_count, event->ctx->time);
5052 perf_swevent_start_hrtimer(event);
5053}
5054
5055static void task_clock_event_stop(struct perf_event *event, int flags)
5056{
5057 perf_swevent_cancel_hrtimer(event);
5058 task_clock_event_update(event, event->ctx->time);
5059}
5060
5061static int task_clock_event_add(struct perf_event *event, int flags)
5062{
5063 if (flags & PERF_EF_START)
5064 task_clock_event_start(event, flags);
5065
5066 return 0;
5067}
5068
5069static void task_clock_event_del(struct perf_event *event, int flags)
5070{
5071 task_clock_event_stop(event, PERF_EF_UPDATE);
5072}
5073
5074static void task_clock_event_read(struct perf_event *event)
5075{
5076 u64 time;
5077
5078 if (!in_nmi()) {
5079 update_context_time(event->ctx);
5080 time = event->ctx->time;
5081 } else {
5082 u64 now = perf_clock();
5083 u64 delta = now - event->ctx->timestamp;
5084 time = event->ctx->time + delta;
5085 }
5086
5087 task_clock_event_update(event, time);
5088}
5089
5090static int task_clock_event_init(struct perf_event *event)
5091{
5092 if (event->attr.type != PERF_TYPE_SOFTWARE)
5093 return -ENOENT;
5094
5095 if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK)
5096 return -ENOENT;
5097
5098 return 0;
5099}
5100
5101static struct pmu perf_task_clock = {
5102 .task_ctx_nr = perf_sw_context,
5103
5104 .event_init = task_clock_event_init,
5105 .add = task_clock_event_add,
5106 .del = task_clock_event_del,
5107 .start = task_clock_event_start,
5108 .stop = task_clock_event_stop,
5109 .read = task_clock_event_read,
5110};
5111
5112static void perf_pmu_nop_void(struct pmu *pmu)
5113{
5114}
5115
5116static int perf_pmu_nop_int(struct pmu *pmu)
5117{
5118 return 0;
5119}
5120
5121static void perf_pmu_start_txn(struct pmu *pmu)
5122{
5123 perf_pmu_disable(pmu);
5124}
5125
5126static int perf_pmu_commit_txn(struct pmu *pmu)
5127{
5128 perf_pmu_enable(pmu);
5129 return 0;
5130}
5131
5132static void perf_pmu_cancel_txn(struct pmu *pmu)
5133{
5134 perf_pmu_enable(pmu);
5135}
5136
5137/*
5138 * Ensures all contexts with the same task_ctx_nr have the same
5139 * pmu_cpu_context too.
5140 */
5141static void *find_pmu_context(int ctxn)
5142{
5143 struct pmu *pmu;
5144
5145 if (ctxn < 0)
5146 return NULL;
5147
5148 list_for_each_entry(pmu, &pmus, entry) {
5149 if (pmu->task_ctx_nr == ctxn)
5150 return pmu->pmu_cpu_context;
5151 }
5152
5153 return NULL;
5154}
5155
5156static void free_pmu_context(void * __percpu cpu_context)
5157{
5158 struct pmu *pmu;
5159
5160 mutex_lock(&pmus_lock);
4780 /* 5161 /*
4781 * Software events (currently) can't in general distinguish 5162 * Like a real lame refcount.
4782 * between user, kernel and hypervisor events.
4783 * However, context switches and cpu migrations are considered
4784 * to be kernel events, and page faults are never hypervisor
4785 * events.
4786 */ 5163 */
4787 switch (event_id) { 5164 list_for_each_entry(pmu, &pmus, entry) {
4788 case PERF_COUNT_SW_CPU_CLOCK: 5165 if (pmu->pmu_cpu_context == cpu_context)
4789 pmu = &perf_ops_cpu_clock; 5166 goto out;
5167 }
4790 5168
4791 break; 5169 free_percpu(cpu_context);
4792 case PERF_COUNT_SW_TASK_CLOCK: 5170out:
4793 /* 5171 mutex_unlock(&pmus_lock);
4794 * If the user instantiates this as a per-cpu event, 5172}
4795 * use the cpu_clock event instead.
4796 */
4797 if (event->ctx->task)
4798 pmu = &perf_ops_task_clock;
4799 else
4800 pmu = &perf_ops_cpu_clock;
4801 5173
4802 break; 5174int perf_pmu_register(struct pmu *pmu)
4803 case PERF_COUNT_SW_PAGE_FAULTS: 5175{
4804 case PERF_COUNT_SW_PAGE_FAULTS_MIN: 5176 int cpu, ret;
4805 case PERF_COUNT_SW_PAGE_FAULTS_MAJ: 5177
4806 case PERF_COUNT_SW_CONTEXT_SWITCHES: 5178 mutex_lock(&pmus_lock);
4807 case PERF_COUNT_SW_CPU_MIGRATIONS: 5179 ret = -ENOMEM;
4808 case PERF_COUNT_SW_ALIGNMENT_FAULTS: 5180 pmu->pmu_disable_count = alloc_percpu(int);
4809 case PERF_COUNT_SW_EMULATION_FAULTS: 5181 if (!pmu->pmu_disable_count)
4810 if (!event->parent) { 5182 goto unlock;
4811 int err; 5183
4812 5184 pmu->pmu_cpu_context = find_pmu_context(pmu->task_ctx_nr);
4813 err = swevent_hlist_get(event); 5185 if (pmu->pmu_cpu_context)
4814 if (err) 5186 goto got_cpu_context;
4815 return ERR_PTR(err); 5187
5188 pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context);
5189 if (!pmu->pmu_cpu_context)
5190 goto free_pdc;
5191
5192 for_each_possible_cpu(cpu) {
5193 struct perf_cpu_context *cpuctx;
5194
5195 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
5196 __perf_event_init_context(&cpuctx->ctx);
5197 cpuctx->ctx.type = cpu_context;
5198 cpuctx->ctx.pmu = pmu;
5199 cpuctx->jiffies_interval = 1;
5200 INIT_LIST_HEAD(&cpuctx->rotation_list);
5201 }
4816 5202
4817 atomic_inc(&perf_swevent_enabled[event_id]); 5203got_cpu_context:
4818 event->destroy = sw_perf_event_destroy; 5204 if (!pmu->start_txn) {
5205 if (pmu->pmu_enable) {
5206 /*
5207 * If we have pmu_enable/pmu_disable calls, install
5208 * transaction stubs that use that to try and batch
5209 * hardware accesses.
5210 */
5211 pmu->start_txn = perf_pmu_start_txn;
5212 pmu->commit_txn = perf_pmu_commit_txn;
5213 pmu->cancel_txn = perf_pmu_cancel_txn;
5214 } else {
5215 pmu->start_txn = perf_pmu_nop_void;
5216 pmu->commit_txn = perf_pmu_nop_int;
5217 pmu->cancel_txn = perf_pmu_nop_void;
4819 } 5218 }
4820 pmu = &perf_ops_generic;
4821 break;
4822 } 5219 }
4823 5220
5221 if (!pmu->pmu_enable) {
5222 pmu->pmu_enable = perf_pmu_nop_void;
5223 pmu->pmu_disable = perf_pmu_nop_void;
5224 }
5225
5226 list_add_rcu(&pmu->entry, &pmus);
5227 ret = 0;
5228unlock:
5229 mutex_unlock(&pmus_lock);
5230
5231 return ret;
5232
5233free_pdc:
5234 free_percpu(pmu->pmu_disable_count);
5235 goto unlock;
5236}
5237
5238void perf_pmu_unregister(struct pmu *pmu)
5239{
5240 mutex_lock(&pmus_lock);
5241 list_del_rcu(&pmu->entry);
5242 mutex_unlock(&pmus_lock);
5243
5244 /*
5245 * We dereference the pmu list under both SRCU and regular RCU, so
5246 * synchronize against both of those.
5247 */
5248 synchronize_srcu(&pmus_srcu);
5249 synchronize_rcu();
5250
5251 free_percpu(pmu->pmu_disable_count);
5252 free_pmu_context(pmu->pmu_cpu_context);
5253}
5254
5255struct pmu *perf_init_event(struct perf_event *event)
5256{
5257 struct pmu *pmu = NULL;
5258 int idx;
5259
5260 idx = srcu_read_lock(&pmus_srcu);
5261 list_for_each_entry_rcu(pmu, &pmus, entry) {
5262 int ret = pmu->event_init(event);
5263 if (!ret)
5264 goto unlock;
5265
5266 if (ret != -ENOENT) {
5267 pmu = ERR_PTR(ret);
5268 goto unlock;
5269 }
5270 }
5271 pmu = ERR_PTR(-ENOENT);
5272unlock:
5273 srcu_read_unlock(&pmus_srcu, idx);
5274
4824 return pmu; 5275 return pmu;
4825} 5276}
4826 5277
@@ -4828,20 +5279,17 @@ static const struct pmu *sw_perf_event_init(struct perf_event *event)
4828 * Allocate and initialize a event structure 5279 * Allocate and initialize a event structure
4829 */ 5280 */
4830static struct perf_event * 5281static struct perf_event *
4831perf_event_alloc(struct perf_event_attr *attr, 5282perf_event_alloc(struct perf_event_attr *attr, int cpu,
4832 int cpu,
4833 struct perf_event_context *ctx,
4834 struct perf_event *group_leader, 5283 struct perf_event *group_leader,
4835 struct perf_event *parent_event, 5284 struct perf_event *parent_event,
4836 perf_overflow_handler_t overflow_handler, 5285 perf_overflow_handler_t overflow_handler)
4837 gfp_t gfpflags)
4838{ 5286{
4839 const struct pmu *pmu; 5287 struct pmu *pmu;
4840 struct perf_event *event; 5288 struct perf_event *event;
4841 struct hw_perf_event *hwc; 5289 struct hw_perf_event *hwc;
4842 long err; 5290 long err;
4843 5291
4844 event = kzalloc(sizeof(*event), gfpflags); 5292 event = kzalloc(sizeof(*event), GFP_KERNEL);
4845 if (!event) 5293 if (!event)
4846 return ERR_PTR(-ENOMEM); 5294 return ERR_PTR(-ENOMEM);
4847 5295
@@ -4866,7 +5314,6 @@ perf_event_alloc(struct perf_event_attr *attr,
4866 event->attr = *attr; 5314 event->attr = *attr;
4867 event->group_leader = group_leader; 5315 event->group_leader = group_leader;
4868 event->pmu = NULL; 5316 event->pmu = NULL;
4869 event->ctx = ctx;
4870 event->oncpu = -1; 5317 event->oncpu = -1;
4871 5318
4872 event->parent = parent_event; 5319 event->parent = parent_event;
@@ -4900,29 +5347,8 @@ perf_event_alloc(struct perf_event_attr *attr,
4900 if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP)) 5347 if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP))
4901 goto done; 5348 goto done;
4902 5349
4903 switch (attr->type) { 5350 pmu = perf_init_event(event);
4904 case PERF_TYPE_RAW:
4905 case PERF_TYPE_HARDWARE:
4906 case PERF_TYPE_HW_CACHE:
4907 pmu = hw_perf_event_init(event);
4908 break;
4909
4910 case PERF_TYPE_SOFTWARE:
4911 pmu = sw_perf_event_init(event);
4912 break;
4913
4914 case PERF_TYPE_TRACEPOINT:
4915 pmu = tp_perf_event_init(event);
4916 break;
4917
4918 case PERF_TYPE_BREAKPOINT:
4919 pmu = bp_perf_event_init(event);
4920 break;
4921 5351
4922
4923 default:
4924 break;
4925 }
4926done: 5352done:
4927 err = 0; 5353 err = 0;
4928 if (!pmu) 5354 if (!pmu)
@@ -4947,6 +5373,13 @@ done:
4947 atomic_inc(&nr_comm_events); 5373 atomic_inc(&nr_comm_events);
4948 if (event->attr.task) 5374 if (event->attr.task)
4949 atomic_inc(&nr_task_events); 5375 atomic_inc(&nr_task_events);
5376 if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
5377 err = get_callchain_buffers();
5378 if (err) {
5379 free_event(event);
5380 return ERR_PTR(err);
5381 }
5382 }
4950 } 5383 }
4951 5384
4952 return event; 5385 return event;
@@ -5094,12 +5527,16 @@ SYSCALL_DEFINE5(perf_event_open,
5094 struct perf_event_attr __user *, attr_uptr, 5527 struct perf_event_attr __user *, attr_uptr,
5095 pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) 5528 pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
5096{ 5529{
5097 struct perf_event *event, *group_leader = NULL, *output_event = NULL; 5530 struct perf_event *group_leader = NULL, *output_event = NULL;
5531 struct perf_event *event, *sibling;
5098 struct perf_event_attr attr; 5532 struct perf_event_attr attr;
5099 struct perf_event_context *ctx; 5533 struct perf_event_context *ctx;
5100 struct file *event_file = NULL; 5534 struct file *event_file = NULL;
5101 struct file *group_file = NULL; 5535 struct file *group_file = NULL;
5536 struct task_struct *task = NULL;
5537 struct pmu *pmu;
5102 int event_fd; 5538 int event_fd;
5539 int move_group = 0;
5103 int fput_needed = 0; 5540 int fput_needed = 0;
5104 int err; 5541 int err;
5105 5542
@@ -5125,20 +5562,11 @@ SYSCALL_DEFINE5(perf_event_open,
5125 if (event_fd < 0) 5562 if (event_fd < 0)
5126 return event_fd; 5563 return event_fd;
5127 5564
5128 /*
5129 * Get the target context (task or percpu):
5130 */
5131 ctx = find_get_context(pid, cpu);
5132 if (IS_ERR(ctx)) {
5133 err = PTR_ERR(ctx);
5134 goto err_fd;
5135 }
5136
5137 if (group_fd != -1) { 5565 if (group_fd != -1) {
5138 group_leader = perf_fget_light(group_fd, &fput_needed); 5566 group_leader = perf_fget_light(group_fd, &fput_needed);
5139 if (IS_ERR(group_leader)) { 5567 if (IS_ERR(group_leader)) {
5140 err = PTR_ERR(group_leader); 5568 err = PTR_ERR(group_leader);
5141 goto err_put_context; 5569 goto err_fd;
5142 } 5570 }
5143 group_file = group_leader->filp; 5571 group_file = group_leader->filp;
5144 if (flags & PERF_FLAG_FD_OUTPUT) 5572 if (flags & PERF_FLAG_FD_OUTPUT)
@@ -5147,6 +5575,53 @@ SYSCALL_DEFINE5(perf_event_open,
5147 group_leader = NULL; 5575 group_leader = NULL;
5148 } 5576 }
5149 5577
5578 event = perf_event_alloc(&attr, cpu, group_leader, NULL, NULL);
5579 if (IS_ERR(event)) {
5580 err = PTR_ERR(event);
5581 goto err_fd;
5582 }
5583
5584 /*
5585 * Special case software events and allow them to be part of
5586 * any hardware group.
5587 */
5588 pmu = event->pmu;
5589
5590 if (group_leader &&
5591 (is_software_event(event) != is_software_event(group_leader))) {
5592 if (is_software_event(event)) {
5593 /*
5594 * If event and group_leader are not both a software
5595 * event, and event is, then group leader is not.
5596 *
5597 * Allow the addition of software events to !software
5598 * groups, this is safe because software events never
5599 * fail to schedule.
5600 */
5601 pmu = group_leader->pmu;
5602 } else if (is_software_event(group_leader) &&
5603 (group_leader->group_flags & PERF_GROUP_SOFTWARE)) {
5604 /*
5605 * In case the group is a pure software group, and we
5606 * try to add a hardware event, move the whole group to
5607 * the hardware context.
5608 */
5609 move_group = 1;
5610 }
5611 }
5612
5613 if (pid != -1)
5614 task = find_lively_task_by_vpid(pid);
5615
5616 /*
5617 * Get the target context (task or percpu):
5618 */
5619 ctx = find_get_context(pmu, task, cpu);
5620 if (IS_ERR(ctx)) {
5621 err = PTR_ERR(ctx);
5622 goto err_group_fd;
5623 }
5624
5150 /* 5625 /*
5151 * Look up the group leader (we will attach this event to it): 5626 * Look up the group leader (we will attach this event to it):
5152 */ 5627 */
@@ -5158,42 +5633,66 @@ SYSCALL_DEFINE5(perf_event_open,
5158 * becoming part of another group-sibling): 5633 * becoming part of another group-sibling):
5159 */ 5634 */
5160 if (group_leader->group_leader != group_leader) 5635 if (group_leader->group_leader != group_leader)
5161 goto err_put_context; 5636 goto err_context;
5162 /* 5637 /*
5163 * Do not allow to attach to a group in a different 5638 * Do not allow to attach to a group in a different
5164 * task or CPU context: 5639 * task or CPU context:
5165 */ 5640 */
5166 if (group_leader->ctx != ctx) 5641 if (move_group) {
5167 goto err_put_context; 5642 if (group_leader->ctx->type != ctx->type)
5643 goto err_context;
5644 } else {
5645 if (group_leader->ctx != ctx)
5646 goto err_context;
5647 }
5648
5168 /* 5649 /*
5169 * Only a group leader can be exclusive or pinned 5650 * Only a group leader can be exclusive or pinned
5170 */ 5651 */
5171 if (attr.exclusive || attr.pinned) 5652 if (attr.exclusive || attr.pinned)
5172 goto err_put_context; 5653 goto err_context;
5173 }
5174
5175 event = perf_event_alloc(&attr, cpu, ctx, group_leader,
5176 NULL, NULL, GFP_KERNEL);
5177 if (IS_ERR(event)) {
5178 err = PTR_ERR(event);
5179 goto err_put_context;
5180 } 5654 }
5181 5655
5182 if (output_event) { 5656 if (output_event) {
5183 err = perf_event_set_output(event, output_event); 5657 err = perf_event_set_output(event, output_event);
5184 if (err) 5658 if (err)
5185 goto err_free_put_context; 5659 goto err_context;
5186 } 5660 }
5187 5661
5188 event_file = anon_inode_getfile("[perf_event]", &perf_fops, event, O_RDWR); 5662 event_file = anon_inode_getfile("[perf_event]", &perf_fops, event, O_RDWR);
5189 if (IS_ERR(event_file)) { 5663 if (IS_ERR(event_file)) {
5190 err = PTR_ERR(event_file); 5664 err = PTR_ERR(event_file);
5191 goto err_free_put_context; 5665 goto err_context;
5666 }
5667
5668 if (move_group) {
5669 struct perf_event_context *gctx = group_leader->ctx;
5670
5671 mutex_lock(&gctx->mutex);
5672 perf_event_remove_from_context(group_leader);
5673 list_for_each_entry(sibling, &group_leader->sibling_list,
5674 group_entry) {
5675 perf_event_remove_from_context(sibling);
5676 put_ctx(gctx);
5677 }
5678 mutex_unlock(&gctx->mutex);
5679 put_ctx(gctx);
5192 } 5680 }
5193 5681
5194 event->filp = event_file; 5682 event->filp = event_file;
5195 WARN_ON_ONCE(ctx->parent_ctx); 5683 WARN_ON_ONCE(ctx->parent_ctx);
5196 mutex_lock(&ctx->mutex); 5684 mutex_lock(&ctx->mutex);
5685
5686 if (move_group) {
5687 perf_install_in_context(ctx, group_leader, cpu);
5688 get_ctx(ctx);
5689 list_for_each_entry(sibling, &group_leader->sibling_list,
5690 group_entry) {
5691 perf_install_in_context(ctx, sibling, cpu);
5692 get_ctx(ctx);
5693 }
5694 }
5695
5197 perf_install_in_context(ctx, event, cpu); 5696 perf_install_in_context(ctx, event, cpu);
5198 ++ctx->generation; 5697 ++ctx->generation;
5199 mutex_unlock(&ctx->mutex); 5698 mutex_unlock(&ctx->mutex);
@@ -5214,11 +5713,11 @@ SYSCALL_DEFINE5(perf_event_open,
5214 fd_install(event_fd, event_file); 5713 fd_install(event_fd, event_file);
5215 return event_fd; 5714 return event_fd;
5216 5715
5217err_free_put_context: 5716err_context:
5218 free_event(event);
5219err_put_context:
5220 fput_light(group_file, fput_needed);
5221 put_ctx(ctx); 5717 put_ctx(ctx);
5718err_group_fd:
5719 fput_light(group_file, fput_needed);
5720 free_event(event);
5222err_fd: 5721err_fd:
5223 put_unused_fd(event_fd); 5722 put_unused_fd(event_fd);
5224 return err; 5723 return err;
@@ -5229,32 +5728,31 @@ err_fd:
5229 * 5728 *
5230 * @attr: attributes of the counter to create 5729 * @attr: attributes of the counter to create
5231 * @cpu: cpu in which the counter is bound 5730 * @cpu: cpu in which the counter is bound
5232 * @pid: task to profile 5731 * @task: task to profile (NULL for percpu)
5233 */ 5732 */
5234struct perf_event * 5733struct perf_event *
5235perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, 5734perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
5236 pid_t pid, 5735 struct task_struct *task,
5237 perf_overflow_handler_t overflow_handler) 5736 perf_overflow_handler_t overflow_handler)
5238{ 5737{
5239 struct perf_event *event;
5240 struct perf_event_context *ctx; 5738 struct perf_event_context *ctx;
5739 struct perf_event *event;
5241 int err; 5740 int err;
5242 5741
5243 /* 5742 /*
5244 * Get the target context (task or percpu): 5743 * Get the target context (task or percpu):
5245 */ 5744 */
5246 5745
5247 ctx = find_get_context(pid, cpu); 5746 event = perf_event_alloc(attr, cpu, NULL, NULL, overflow_handler);
5248 if (IS_ERR(ctx)) {
5249 err = PTR_ERR(ctx);
5250 goto err_exit;
5251 }
5252
5253 event = perf_event_alloc(attr, cpu, ctx, NULL,
5254 NULL, overflow_handler, GFP_KERNEL);
5255 if (IS_ERR(event)) { 5747 if (IS_ERR(event)) {
5256 err = PTR_ERR(event); 5748 err = PTR_ERR(event);
5257 goto err_put_context; 5749 goto err;
5750 }
5751
5752 ctx = find_get_context(event->pmu, task, cpu);
5753 if (IS_ERR(ctx)) {
5754 err = PTR_ERR(ctx);
5755 goto err_free;
5258 } 5756 }
5259 5757
5260 event->filp = NULL; 5758 event->filp = NULL;
@@ -5272,112 +5770,13 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
5272 5770
5273 return event; 5771 return event;
5274 5772
5275 err_put_context: 5773err_free:
5276 put_ctx(ctx); 5774 free_event(event);
5277 err_exit: 5775err:
5278 return ERR_PTR(err); 5776 return ERR_PTR(err);
5279} 5777}
5280EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter); 5778EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);
5281 5779
5282/*
5283 * inherit a event from parent task to child task:
5284 */
5285static struct perf_event *
5286inherit_event(struct perf_event *parent_event,
5287 struct task_struct *parent,
5288 struct perf_event_context *parent_ctx,
5289 struct task_struct *child,
5290 struct perf_event *group_leader,
5291 struct perf_event_context *child_ctx)
5292{
5293 struct perf_event *child_event;
5294
5295 /*
5296 * Instead of creating recursive hierarchies of events,
5297 * we link inherited events back to the original parent,
5298 * which has a filp for sure, which we use as the reference
5299 * count:
5300 */
5301 if (parent_event->parent)
5302 parent_event = parent_event->parent;
5303
5304 child_event = perf_event_alloc(&parent_event->attr,
5305 parent_event->cpu, child_ctx,
5306 group_leader, parent_event,
5307 NULL, GFP_KERNEL);
5308 if (IS_ERR(child_event))
5309 return child_event;
5310 get_ctx(child_ctx);
5311
5312 /*
5313 * Make the child state follow the state of the parent event,
5314 * not its attr.disabled bit. We hold the parent's mutex,
5315 * so we won't race with perf_event_{en, dis}able_family.
5316 */
5317 if (parent_event->state >= PERF_EVENT_STATE_INACTIVE)
5318 child_event->state = PERF_EVENT_STATE_INACTIVE;
5319 else
5320 child_event->state = PERF_EVENT_STATE_OFF;
5321
5322 if (parent_event->attr.freq) {
5323 u64 sample_period = parent_event->hw.sample_period;
5324 struct hw_perf_event *hwc = &child_event->hw;
5325
5326 hwc->sample_period = sample_period;
5327 hwc->last_period = sample_period;
5328
5329 local64_set(&hwc->period_left, sample_period);
5330 }
5331
5332 child_event->overflow_handler = parent_event->overflow_handler;
5333
5334 /*
5335 * Link it up in the child's context:
5336 */
5337 add_event_to_ctx(child_event, child_ctx);
5338
5339 /*
5340 * Get a reference to the parent filp - we will fput it
5341 * when the child event exits. This is safe to do because
5342 * we are in the parent and we know that the filp still
5343 * exists and has a nonzero count:
5344 */
5345 atomic_long_inc(&parent_event->filp->f_count);
5346
5347 /*
5348 * Link this into the parent event's child list
5349 */
5350 WARN_ON_ONCE(parent_event->ctx->parent_ctx);
5351 mutex_lock(&parent_event->child_mutex);
5352 list_add_tail(&child_event->child_list, &parent_event->child_list);
5353 mutex_unlock(&parent_event->child_mutex);
5354
5355 return child_event;
5356}
5357
5358static int inherit_group(struct perf_event *parent_event,
5359 struct task_struct *parent,
5360 struct perf_event_context *parent_ctx,
5361 struct task_struct *child,
5362 struct perf_event_context *child_ctx)
5363{
5364 struct perf_event *leader;
5365 struct perf_event *sub;
5366 struct perf_event *child_ctr;
5367
5368 leader = inherit_event(parent_event, parent, parent_ctx,
5369 child, NULL, child_ctx);
5370 if (IS_ERR(leader))
5371 return PTR_ERR(leader);
5372 list_for_each_entry(sub, &parent_event->sibling_list, group_entry) {
5373 child_ctr = inherit_event(sub, parent, parent_ctx,
5374 child, leader, child_ctx);
5375 if (IS_ERR(child_ctr))
5376 return PTR_ERR(child_ctr);
5377 }
5378 return 0;
5379}
5380
5381static void sync_child_event(struct perf_event *child_event, 5780static void sync_child_event(struct perf_event *child_event,
5382 struct task_struct *child) 5781 struct task_struct *child)
5383{ 5782{
@@ -5434,16 +5833,13 @@ __perf_event_exit_task(struct perf_event *child_event,
5434 } 5833 }
5435} 5834}
5436 5835
5437/* 5836static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
5438 * When a child task exits, feed back event values to parent events.
5439 */
5440void perf_event_exit_task(struct task_struct *child)
5441{ 5837{
5442 struct perf_event *child_event, *tmp; 5838 struct perf_event *child_event, *tmp;
5443 struct perf_event_context *child_ctx; 5839 struct perf_event_context *child_ctx;
5444 unsigned long flags; 5840 unsigned long flags;
5445 5841
5446 if (likely(!child->perf_event_ctxp)) { 5842 if (likely(!child->perf_event_ctxp[ctxn])) {
5447 perf_event_task(child, NULL, 0); 5843 perf_event_task(child, NULL, 0);
5448 return; 5844 return;
5449 } 5845 }
@@ -5455,7 +5851,7 @@ void perf_event_exit_task(struct task_struct *child)
5455 * scheduled, so we are now safe from rescheduling changing 5851 * scheduled, so we are now safe from rescheduling changing
5456 * our context. 5852 * our context.
5457 */ 5853 */
5458 child_ctx = child->perf_event_ctxp; 5854 child_ctx = child->perf_event_ctxp[ctxn];
5459 __perf_event_task_sched_out(child_ctx); 5855 __perf_event_task_sched_out(child_ctx);
5460 5856
5461 /* 5857 /*
@@ -5464,7 +5860,7 @@ void perf_event_exit_task(struct task_struct *child)
5464 * incremented the context's refcount before we do put_ctx below. 5860 * incremented the context's refcount before we do put_ctx below.
5465 */ 5861 */
5466 raw_spin_lock(&child_ctx->lock); 5862 raw_spin_lock(&child_ctx->lock);
5467 child->perf_event_ctxp = NULL; 5863 child->perf_event_ctxp[ctxn] = NULL;
5468 /* 5864 /*
5469 * If this context is a clone; unclone it so it can't get 5865 * If this context is a clone; unclone it so it can't get
5470 * swapped to another process while we're removing all 5866 * swapped to another process while we're removing all
@@ -5517,6 +5913,17 @@ again:
5517 put_ctx(child_ctx); 5913 put_ctx(child_ctx);
5518} 5914}
5519 5915
5916/*
5917 * When a child task exits, feed back event values to parent events.
5918 */
5919void perf_event_exit_task(struct task_struct *child)
5920{
5921 int ctxn;
5922
5923 for_each_task_context_nr(ctxn)
5924 perf_event_exit_task_context(child, ctxn);
5925}
5926
5520static void perf_free_event(struct perf_event *event, 5927static void perf_free_event(struct perf_event *event,
5521 struct perf_event_context *ctx) 5928 struct perf_event_context *ctx)
5522{ 5929{
@@ -5538,48 +5945,165 @@ static void perf_free_event(struct perf_event *event,
5538 5945
5539/* 5946/*
5540 * free an unexposed, unused context as created by inheritance by 5947 * free an unexposed, unused context as created by inheritance by
5541 * init_task below, used by fork() in case of fail. 5948 * perf_event_init_task below, used by fork() in case of fail.
5542 */ 5949 */
5543void perf_event_free_task(struct task_struct *task) 5950void perf_event_free_task(struct task_struct *task)
5544{ 5951{
5545 struct perf_event_context *ctx = task->perf_event_ctxp; 5952 struct perf_event_context *ctx;
5546 struct perf_event *event, *tmp; 5953 struct perf_event *event, *tmp;
5954 int ctxn;
5547 5955
5548 if (!ctx) 5956 for_each_task_context_nr(ctxn) {
5549 return; 5957 ctx = task->perf_event_ctxp[ctxn];
5958 if (!ctx)
5959 continue;
5550 5960
5551 mutex_lock(&ctx->mutex); 5961 mutex_lock(&ctx->mutex);
5552again: 5962again:
5553 list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) 5963 list_for_each_entry_safe(event, tmp, &ctx->pinned_groups,
5554 perf_free_event(event, ctx); 5964 group_entry)
5965 perf_free_event(event, ctx);
5555 5966
5556 list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, 5967 list_for_each_entry_safe(event, tmp, &ctx->flexible_groups,
5557 group_entry) 5968 group_entry)
5558 perf_free_event(event, ctx); 5969 perf_free_event(event, ctx);
5559 5970
5560 if (!list_empty(&ctx->pinned_groups) || 5971 if (!list_empty(&ctx->pinned_groups) ||
5561 !list_empty(&ctx->flexible_groups)) 5972 !list_empty(&ctx->flexible_groups))
5562 goto again; 5973 goto again;
5563 5974
5564 mutex_unlock(&ctx->mutex); 5975 mutex_unlock(&ctx->mutex);
5565 5976
5566 put_ctx(ctx); 5977 put_ctx(ctx);
5978 }
5979}
5980
5981void perf_event_delayed_put(struct task_struct *task)
5982{
5983 int ctxn;
5984
5985 for_each_task_context_nr(ctxn)
5986 WARN_ON_ONCE(task->perf_event_ctxp[ctxn]);
5987}
5988
5989/*
5990 * inherit a event from parent task to child task:
5991 */
5992static struct perf_event *
5993inherit_event(struct perf_event *parent_event,
5994 struct task_struct *parent,
5995 struct perf_event_context *parent_ctx,
5996 struct task_struct *child,
5997 struct perf_event *group_leader,
5998 struct perf_event_context *child_ctx)
5999{
6000 struct perf_event *child_event;
6001 unsigned long flags;
6002
6003 /*
6004 * Instead of creating recursive hierarchies of events,
6005 * we link inherited events back to the original parent,
6006 * which has a filp for sure, which we use as the reference
6007 * count:
6008 */
6009 if (parent_event->parent)
6010 parent_event = parent_event->parent;
6011
6012 child_event = perf_event_alloc(&parent_event->attr,
6013 parent_event->cpu,
6014 group_leader, parent_event,
6015 NULL);
6016 if (IS_ERR(child_event))
6017 return child_event;
6018 get_ctx(child_ctx);
6019
6020 /*
6021 * Make the child state follow the state of the parent event,
6022 * not its attr.disabled bit. We hold the parent's mutex,
6023 * so we won't race with perf_event_{en, dis}able_family.
6024 */
6025 if (parent_event->state >= PERF_EVENT_STATE_INACTIVE)
6026 child_event->state = PERF_EVENT_STATE_INACTIVE;
6027 else
6028 child_event->state = PERF_EVENT_STATE_OFF;
6029
6030 if (parent_event->attr.freq) {
6031 u64 sample_period = parent_event->hw.sample_period;
6032 struct hw_perf_event *hwc = &child_event->hw;
6033
6034 hwc->sample_period = sample_period;
6035 hwc->last_period = sample_period;
6036
6037 local64_set(&hwc->period_left, sample_period);
6038 }
6039
6040 child_event->ctx = child_ctx;
6041 child_event->overflow_handler = parent_event->overflow_handler;
6042
6043 /*
6044 * Link it up in the child's context:
6045 */
6046 raw_spin_lock_irqsave(&child_ctx->lock, flags);
6047 add_event_to_ctx(child_event, child_ctx);
6048 raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
6049
6050 /*
6051 * Get a reference to the parent filp - we will fput it
6052 * when the child event exits. This is safe to do because
6053 * we are in the parent and we know that the filp still
6054 * exists and has a nonzero count:
6055 */
6056 atomic_long_inc(&parent_event->filp->f_count);
6057
6058 /*
6059 * Link this into the parent event's child list
6060 */
6061 WARN_ON_ONCE(parent_event->ctx->parent_ctx);
6062 mutex_lock(&parent_event->child_mutex);
6063 list_add_tail(&child_event->child_list, &parent_event->child_list);
6064 mutex_unlock(&parent_event->child_mutex);
6065
6066 return child_event;
6067}
6068
6069static int inherit_group(struct perf_event *parent_event,
6070 struct task_struct *parent,
6071 struct perf_event_context *parent_ctx,
6072 struct task_struct *child,
6073 struct perf_event_context *child_ctx)
6074{
6075 struct perf_event *leader;
6076 struct perf_event *sub;
6077 struct perf_event *child_ctr;
6078
6079 leader = inherit_event(parent_event, parent, parent_ctx,
6080 child, NULL, child_ctx);
6081 if (IS_ERR(leader))
6082 return PTR_ERR(leader);
6083 list_for_each_entry(sub, &parent_event->sibling_list, group_entry) {
6084 child_ctr = inherit_event(sub, parent, parent_ctx,
6085 child, leader, child_ctx);
6086 if (IS_ERR(child_ctr))
6087 return PTR_ERR(child_ctr);
6088 }
6089 return 0;
5567} 6090}
5568 6091
5569static int 6092static int
5570inherit_task_group(struct perf_event *event, struct task_struct *parent, 6093inherit_task_group(struct perf_event *event, struct task_struct *parent,
5571 struct perf_event_context *parent_ctx, 6094 struct perf_event_context *parent_ctx,
5572 struct task_struct *child, 6095 struct task_struct *child, int ctxn,
5573 int *inherited_all) 6096 int *inherited_all)
5574{ 6097{
5575 int ret; 6098 int ret;
5576 struct perf_event_context *child_ctx = child->perf_event_ctxp; 6099 struct perf_event_context *child_ctx;
5577 6100
5578 if (!event->attr.inherit) { 6101 if (!event->attr.inherit) {
5579 *inherited_all = 0; 6102 *inherited_all = 0;
5580 return 0; 6103 return 0;
5581 } 6104 }
5582 6105
6106 child_ctx = child->perf_event_ctxp[ctxn];
5583 if (!child_ctx) { 6107 if (!child_ctx) {
5584 /* 6108 /*
5585 * This is executed from the parent task context, so 6109 * This is executed from the parent task context, so
@@ -5588,14 +6112,11 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent,
5588 * child. 6112 * child.
5589 */ 6113 */
5590 6114
5591 child_ctx = kzalloc(sizeof(struct perf_event_context), 6115 child_ctx = alloc_perf_context(event->pmu, child);
5592 GFP_KERNEL);
5593 if (!child_ctx) 6116 if (!child_ctx)
5594 return -ENOMEM; 6117 return -ENOMEM;
5595 6118
5596 __perf_event_init_context(child_ctx, child); 6119 child->perf_event_ctxp[ctxn] = child_ctx;
5597 child->perf_event_ctxp = child_ctx;
5598 get_task_struct(child);
5599 } 6120 }
5600 6121
5601 ret = inherit_group(event, parent, parent_ctx, 6122 ret = inherit_group(event, parent, parent_ctx,
@@ -5607,11 +6128,10 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent,
5607 return ret; 6128 return ret;
5608} 6129}
5609 6130
5610
5611/* 6131/*
5612 * Initialize the perf_event context in task_struct 6132 * Initialize the perf_event context in task_struct
5613 */ 6133 */
5614int perf_event_init_task(struct task_struct *child) 6134int perf_event_init_context(struct task_struct *child, int ctxn)
5615{ 6135{
5616 struct perf_event_context *child_ctx, *parent_ctx; 6136 struct perf_event_context *child_ctx, *parent_ctx;
5617 struct perf_event_context *cloned_ctx; 6137 struct perf_event_context *cloned_ctx;
@@ -5620,19 +6140,19 @@ int perf_event_init_task(struct task_struct *child)
5620 int inherited_all = 1; 6140 int inherited_all = 1;
5621 int ret = 0; 6141 int ret = 0;
5622 6142
5623 child->perf_event_ctxp = NULL; 6143 child->perf_event_ctxp[ctxn] = NULL;
5624 6144
5625 mutex_init(&child->perf_event_mutex); 6145 mutex_init(&child->perf_event_mutex);
5626 INIT_LIST_HEAD(&child->perf_event_list); 6146 INIT_LIST_HEAD(&child->perf_event_list);
5627 6147
5628 if (likely(!parent->perf_event_ctxp)) 6148 if (likely(!parent->perf_event_ctxp[ctxn]))
5629 return 0; 6149 return 0;
5630 6150
5631 /* 6151 /*
5632 * If the parent's context is a clone, pin it so it won't get 6152 * If the parent's context is a clone, pin it so it won't get
5633 * swapped under us. 6153 * swapped under us.
5634 */ 6154 */
5635 parent_ctx = perf_pin_task_context(parent); 6155 parent_ctx = perf_pin_task_context(parent, ctxn);
5636 6156
5637 /* 6157 /*
5638 * No need to check if parent_ctx != NULL here; since we saw 6158 * No need to check if parent_ctx != NULL here; since we saw
@@ -5652,20 +6172,20 @@ int perf_event_init_task(struct task_struct *child)
5652 * the list, not manipulating it: 6172 * the list, not manipulating it:
5653 */ 6173 */
5654 list_for_each_entry(event, &parent_ctx->pinned_groups, group_entry) { 6174 list_for_each_entry(event, &parent_ctx->pinned_groups, group_entry) {
5655 ret = inherit_task_group(event, parent, parent_ctx, child, 6175 ret = inherit_task_group(event, parent, parent_ctx,
5656 &inherited_all); 6176 child, ctxn, &inherited_all);
5657 if (ret) 6177 if (ret)
5658 break; 6178 break;
5659 } 6179 }
5660 6180
5661 list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { 6181 list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) {
5662 ret = inherit_task_group(event, parent, parent_ctx, child, 6182 ret = inherit_task_group(event, parent, parent_ctx,
5663 &inherited_all); 6183 child, ctxn, &inherited_all);
5664 if (ret) 6184 if (ret)
5665 break; 6185 break;
5666 } 6186 }
5667 6187
5668 child_ctx = child->perf_event_ctxp; 6188 child_ctx = child->perf_event_ctxp[ctxn];
5669 6189
5670 if (child_ctx && inherited_all) { 6190 if (child_ctx && inherited_all) {
5671 /* 6191 /*
@@ -5694,63 +6214,98 @@ int perf_event_init_task(struct task_struct *child)
5694 return ret; 6214 return ret;
5695} 6215}
5696 6216
6217/*
6218 * Initialize the perf_event context in task_struct
6219 */
6220int perf_event_init_task(struct task_struct *child)
6221{
6222 int ctxn, ret;
6223
6224 for_each_task_context_nr(ctxn) {
6225 ret = perf_event_init_context(child, ctxn);
6226 if (ret)
6227 return ret;
6228 }
6229
6230 return 0;
6231}
6232
5697static void __init perf_event_init_all_cpus(void) 6233static void __init perf_event_init_all_cpus(void)
5698{ 6234{
6235 struct swevent_htable *swhash;
5699 int cpu; 6236 int cpu;
5700 struct perf_cpu_context *cpuctx;
5701 6237
5702 for_each_possible_cpu(cpu) { 6238 for_each_possible_cpu(cpu) {
5703 cpuctx = &per_cpu(perf_cpu_context, cpu); 6239 swhash = &per_cpu(swevent_htable, cpu);
5704 mutex_init(&cpuctx->hlist_mutex); 6240 mutex_init(&swhash->hlist_mutex);
5705 __perf_event_init_context(&cpuctx->ctx, NULL); 6241 INIT_LIST_HEAD(&per_cpu(rotation_list, cpu));
5706 } 6242 }
5707} 6243}
5708 6244
5709static void __cpuinit perf_event_init_cpu(int cpu) 6245static void __cpuinit perf_event_init_cpu(int cpu)
5710{ 6246{
5711 struct perf_cpu_context *cpuctx; 6247 struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
5712
5713 cpuctx = &per_cpu(perf_cpu_context, cpu);
5714
5715 spin_lock(&perf_resource_lock);
5716 cpuctx->max_pertask = perf_max_events - perf_reserved_percpu;
5717 spin_unlock(&perf_resource_lock);
5718 6248
5719 mutex_lock(&cpuctx->hlist_mutex); 6249 mutex_lock(&swhash->hlist_mutex);
5720 if (cpuctx->hlist_refcount > 0) { 6250 if (swhash->hlist_refcount > 0) {
5721 struct swevent_hlist *hlist; 6251 struct swevent_hlist *hlist;
5722 6252
5723 hlist = kzalloc(sizeof(*hlist), GFP_KERNEL); 6253 hlist = kzalloc_node(sizeof(*hlist), GFP_KERNEL, cpu_to_node(cpu));
5724 WARN_ON_ONCE(!hlist); 6254 WARN_ON(!hlist);
5725 rcu_assign_pointer(cpuctx->swevent_hlist, hlist); 6255 rcu_assign_pointer(swhash->swevent_hlist, hlist);
5726 } 6256 }
5727 mutex_unlock(&cpuctx->hlist_mutex); 6257 mutex_unlock(&swhash->hlist_mutex);
5728} 6258}
5729 6259
5730#ifdef CONFIG_HOTPLUG_CPU 6260#ifdef CONFIG_HOTPLUG_CPU
5731static void __perf_event_exit_cpu(void *info) 6261static void perf_pmu_rotate_stop(struct pmu *pmu)
5732{ 6262{
5733 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); 6263 struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
5734 struct perf_event_context *ctx = &cpuctx->ctx; 6264
6265 WARN_ON(!irqs_disabled());
6266
6267 list_del_init(&cpuctx->rotation_list);
6268}
6269
6270static void __perf_event_exit_context(void *__info)
6271{
6272 struct perf_event_context *ctx = __info;
5735 struct perf_event *event, *tmp; 6273 struct perf_event *event, *tmp;
5736 6274
6275 perf_pmu_rotate_stop(ctx->pmu);
6276
5737 list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) 6277 list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry)
5738 __perf_event_remove_from_context(event); 6278 __perf_event_remove_from_context(event);
5739 list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry) 6279 list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry)
5740 __perf_event_remove_from_context(event); 6280 __perf_event_remove_from_context(event);
5741} 6281}
6282
6283static void perf_event_exit_cpu_context(int cpu)
6284{
6285 struct perf_event_context *ctx;
6286 struct pmu *pmu;
6287 int idx;
6288
6289 idx = srcu_read_lock(&pmus_srcu);
6290 list_for_each_entry_rcu(pmu, &pmus, entry) {
6291 ctx = &per_cpu_ptr(pmu->pmu_cpu_context, cpu)->ctx;
6292
6293 mutex_lock(&ctx->mutex);
6294 smp_call_function_single(cpu, __perf_event_exit_context, ctx, 1);
6295 mutex_unlock(&ctx->mutex);
6296 }
6297 srcu_read_unlock(&pmus_srcu, idx);
6298}
6299
5742static void perf_event_exit_cpu(int cpu) 6300static void perf_event_exit_cpu(int cpu)
5743{ 6301{
5744 struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); 6302 struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
5745 struct perf_event_context *ctx = &cpuctx->ctx;
5746 6303
5747 mutex_lock(&cpuctx->hlist_mutex); 6304 mutex_lock(&swhash->hlist_mutex);
5748 swevent_hlist_release(cpuctx); 6305 swevent_hlist_release(swhash);
5749 mutex_unlock(&cpuctx->hlist_mutex); 6306 mutex_unlock(&swhash->hlist_mutex);
5750 6307
5751 mutex_lock(&ctx->mutex); 6308 perf_event_exit_cpu_context(cpu);
5752 smp_call_function_single(cpu, __perf_event_exit_cpu, NULL, 1);
5753 mutex_unlock(&ctx->mutex);
5754} 6309}
5755#else 6310#else
5756static inline void perf_event_exit_cpu(int cpu) { } 6311static inline void perf_event_exit_cpu(int cpu) { }
@@ -5780,118 +6335,13 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
5780 return NOTIFY_OK; 6335 return NOTIFY_OK;
5781} 6336}
5782 6337
5783/*
5784 * This has to have a higher priority than migration_notifier in sched.c.
5785 */
5786static struct notifier_block __cpuinitdata perf_cpu_nb = {
5787 .notifier_call = perf_cpu_notify,
5788 .priority = 20,
5789};
5790
5791void __init perf_event_init(void) 6338void __init perf_event_init(void)
5792{ 6339{
5793 perf_event_init_all_cpus(); 6340 perf_event_init_all_cpus();
5794 perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE, 6341 init_srcu_struct(&pmus_srcu);
5795 (void *)(long)smp_processor_id()); 6342 perf_pmu_register(&perf_swevent);
5796 perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE, 6343 perf_pmu_register(&perf_cpu_clock);
5797 (void *)(long)smp_processor_id()); 6344 perf_pmu_register(&perf_task_clock);
5798 register_cpu_notifier(&perf_cpu_nb); 6345 perf_tp_register();
5799} 6346 perf_cpu_notifier(perf_cpu_notify);
5800
5801static ssize_t perf_show_reserve_percpu(struct sysdev_class *class,
5802 struct sysdev_class_attribute *attr,
5803 char *buf)
5804{
5805 return sprintf(buf, "%d\n", perf_reserved_percpu);
5806}
5807
5808static ssize_t
5809perf_set_reserve_percpu(struct sysdev_class *class,
5810 struct sysdev_class_attribute *attr,
5811 const char *buf,
5812 size_t count)
5813{
5814 struct perf_cpu_context *cpuctx;
5815 unsigned long val;
5816 int err, cpu, mpt;
5817
5818 err = strict_strtoul(buf, 10, &val);
5819 if (err)
5820 return err;
5821 if (val > perf_max_events)
5822 return -EINVAL;
5823
5824 spin_lock(&perf_resource_lock);
5825 perf_reserved_percpu = val;
5826 for_each_online_cpu(cpu) {
5827 cpuctx = &per_cpu(perf_cpu_context, cpu);
5828 raw_spin_lock_irq(&cpuctx->ctx.lock);
5829 mpt = min(perf_max_events - cpuctx->ctx.nr_events,
5830 perf_max_events - perf_reserved_percpu);
5831 cpuctx->max_pertask = mpt;
5832 raw_spin_unlock_irq(&cpuctx->ctx.lock);
5833 }
5834 spin_unlock(&perf_resource_lock);
5835
5836 return count;
5837}
5838
5839static ssize_t perf_show_overcommit(struct sysdev_class *class,
5840 struct sysdev_class_attribute *attr,
5841 char *buf)
5842{
5843 return sprintf(buf, "%d\n", perf_overcommit);
5844}
5845
5846static ssize_t
5847perf_set_overcommit(struct sysdev_class *class,
5848 struct sysdev_class_attribute *attr,
5849 const char *buf, size_t count)
5850{
5851 unsigned long val;
5852 int err;
5853
5854 err = strict_strtoul(buf, 10, &val);
5855 if (err)
5856 return err;
5857 if (val > 1)
5858 return -EINVAL;
5859
5860 spin_lock(&perf_resource_lock);
5861 perf_overcommit = val;
5862 spin_unlock(&perf_resource_lock);
5863
5864 return count;
5865}
5866
5867static SYSDEV_CLASS_ATTR(
5868 reserve_percpu,
5869 0644,
5870 perf_show_reserve_percpu,
5871 perf_set_reserve_percpu
5872 );
5873
5874static SYSDEV_CLASS_ATTR(
5875 overcommit,
5876 0644,
5877 perf_show_overcommit,
5878 perf_set_overcommit
5879 );
5880
5881static struct attribute *perfclass_attrs[] = {
5882 &attr_reserve_percpu.attr,
5883 &attr_overcommit.attr,
5884 NULL
5885};
5886
5887static struct attribute_group perfclass_attr_group = {
5888 .attrs = perfclass_attrs,
5889 .name = "perf_events",
5890};
5891
5892static int __init perf_event_sysfs_init(void)
5893{
5894 return sysfs_create_group(&cpu_sysdev_class.kset.kobj,
5895 &perfclass_attr_group);
5896} 6347}
5897device_initcall(perf_event_sysfs_init);
diff --git a/kernel/sched.c b/kernel/sched.c
index ed09d4f2a69c..794819eab9ca 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3584,7 +3584,7 @@ void scheduler_tick(void)
3584 curr->sched_class->task_tick(rq, curr, 0); 3584 curr->sched_class->task_tick(rq, curr, 0);
3585 raw_spin_unlock(&rq->lock); 3585 raw_spin_unlock(&rq->lock);
3586 3586
3587 perf_event_task_tick(curr); 3587 perf_event_task_tick();
3588 3588
3589#ifdef CONFIG_SMP 3589#ifdef CONFIG_SMP
3590 rq->idle_at_tick = idle_cpu(cpu); 3590 rq->idle_at_tick = idle_cpu(cpu);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index fa7ece649fe1..65fb077ea79c 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -884,10 +884,8 @@ enum {
884 FTRACE_ENABLE_CALLS = (1 << 0), 884 FTRACE_ENABLE_CALLS = (1 << 0),
885 FTRACE_DISABLE_CALLS = (1 << 1), 885 FTRACE_DISABLE_CALLS = (1 << 1),
886 FTRACE_UPDATE_TRACE_FUNC = (1 << 2), 886 FTRACE_UPDATE_TRACE_FUNC = (1 << 2),
887 FTRACE_ENABLE_MCOUNT = (1 << 3), 887 FTRACE_START_FUNC_RET = (1 << 3),
888 FTRACE_DISABLE_MCOUNT = (1 << 4), 888 FTRACE_STOP_FUNC_RET = (1 << 4),
889 FTRACE_START_FUNC_RET = (1 << 5),
890 FTRACE_STOP_FUNC_RET = (1 << 6),
891}; 889};
892 890
893static int ftrace_filtered; 891static int ftrace_filtered;
@@ -1226,8 +1224,6 @@ static void ftrace_shutdown(int command)
1226 1224
1227static void ftrace_startup_sysctl(void) 1225static void ftrace_startup_sysctl(void)
1228{ 1226{
1229 int command = FTRACE_ENABLE_MCOUNT;
1230
1231 if (unlikely(ftrace_disabled)) 1227 if (unlikely(ftrace_disabled))
1232 return; 1228 return;
1233 1229
@@ -1235,23 +1231,17 @@ static void ftrace_startup_sysctl(void)
1235 saved_ftrace_func = NULL; 1231 saved_ftrace_func = NULL;
1236 /* ftrace_start_up is true if we want ftrace running */ 1232 /* ftrace_start_up is true if we want ftrace running */
1237 if (ftrace_start_up) 1233 if (ftrace_start_up)
1238 command |= FTRACE_ENABLE_CALLS; 1234 ftrace_run_update_code(FTRACE_ENABLE_CALLS);
1239
1240 ftrace_run_update_code(command);
1241} 1235}
1242 1236
1243static void ftrace_shutdown_sysctl(void) 1237static void ftrace_shutdown_sysctl(void)
1244{ 1238{
1245 int command = FTRACE_DISABLE_MCOUNT;
1246
1247 if (unlikely(ftrace_disabled)) 1239 if (unlikely(ftrace_disabled))
1248 return; 1240 return;
1249 1241
1250 /* ftrace_start_up is true if ftrace is running */ 1242 /* ftrace_start_up is true if ftrace is running */
1251 if (ftrace_start_up) 1243 if (ftrace_start_up)
1252 command |= FTRACE_DISABLE_CALLS; 1244 ftrace_run_update_code(FTRACE_DISABLE_CALLS);
1253
1254 ftrace_run_update_code(command);
1255} 1245}
1256 1246
1257static cycle_t ftrace_update_time; 1247static cycle_t ftrace_update_time;
@@ -1368,24 +1358,29 @@ enum {
1368#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ 1358#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
1369 1359
1370struct ftrace_iterator { 1360struct ftrace_iterator {
1371 struct ftrace_page *pg; 1361 loff_t pos;
1372 int hidx; 1362 loff_t func_pos;
1373 int idx; 1363 struct ftrace_page *pg;
1374 unsigned flags; 1364 struct dyn_ftrace *func;
1375 struct trace_parser parser; 1365 struct ftrace_func_probe *probe;
1366 struct trace_parser parser;
1367 int hidx;
1368 int idx;
1369 unsigned flags;
1376}; 1370};
1377 1371
1378static void * 1372static void *
1379t_hash_next(struct seq_file *m, void *v, loff_t *pos) 1373t_hash_next(struct seq_file *m, loff_t *pos)
1380{ 1374{
1381 struct ftrace_iterator *iter = m->private; 1375 struct ftrace_iterator *iter = m->private;
1382 struct hlist_node *hnd = v; 1376 struct hlist_node *hnd = NULL;
1383 struct hlist_head *hhd; 1377 struct hlist_head *hhd;
1384 1378
1385 WARN_ON(!(iter->flags & FTRACE_ITER_HASH));
1386
1387 (*pos)++; 1379 (*pos)++;
1380 iter->pos = *pos;
1388 1381
1382 if (iter->probe)
1383 hnd = &iter->probe->node;
1389 retry: 1384 retry:
1390 if (iter->hidx >= FTRACE_FUNC_HASHSIZE) 1385 if (iter->hidx >= FTRACE_FUNC_HASHSIZE)
1391 return NULL; 1386 return NULL;
@@ -1408,7 +1403,12 @@ t_hash_next(struct seq_file *m, void *v, loff_t *pos)
1408 } 1403 }
1409 } 1404 }
1410 1405
1411 return hnd; 1406 if (WARN_ON_ONCE(!hnd))
1407 return NULL;
1408
1409 iter->probe = hlist_entry(hnd, struct ftrace_func_probe, node);
1410
1411 return iter;
1412} 1412}
1413 1413
1414static void *t_hash_start(struct seq_file *m, loff_t *pos) 1414static void *t_hash_start(struct seq_file *m, loff_t *pos)
@@ -1417,26 +1417,32 @@ static void *t_hash_start(struct seq_file *m, loff_t *pos)
1417 void *p = NULL; 1417 void *p = NULL;
1418 loff_t l; 1418 loff_t l;
1419 1419
1420 if (!(iter->flags & FTRACE_ITER_HASH)) 1420 if (iter->func_pos > *pos)
1421 *pos = 0; 1421 return NULL;
1422
1423 iter->flags |= FTRACE_ITER_HASH;
1424 1422
1425 iter->hidx = 0; 1423 iter->hidx = 0;
1426 for (l = 0; l <= *pos; ) { 1424 for (l = 0; l <= (*pos - iter->func_pos); ) {
1427 p = t_hash_next(m, p, &l); 1425 p = t_hash_next(m, &l);
1428 if (!p) 1426 if (!p)
1429 break; 1427 break;
1430 } 1428 }
1431 return p; 1429 if (!p)
1430 return NULL;
1431
1432 /* Only set this if we have an item */
1433 iter->flags |= FTRACE_ITER_HASH;
1434
1435 return iter;
1432} 1436}
1433 1437
1434static int t_hash_show(struct seq_file *m, void *v) 1438static int
1439t_hash_show(struct seq_file *m, struct ftrace_iterator *iter)
1435{ 1440{
1436 struct ftrace_func_probe *rec; 1441 struct ftrace_func_probe *rec;
1437 struct hlist_node *hnd = v;
1438 1442
1439 rec = hlist_entry(hnd, struct ftrace_func_probe, node); 1443 rec = iter->probe;
1444 if (WARN_ON_ONCE(!rec))
1445 return -EIO;
1440 1446
1441 if (rec->ops->print) 1447 if (rec->ops->print)
1442 return rec->ops->print(m, rec->ip, rec->ops, rec->data); 1448 return rec->ops->print(m, rec->ip, rec->ops, rec->data);
@@ -1457,12 +1463,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
1457 struct dyn_ftrace *rec = NULL; 1463 struct dyn_ftrace *rec = NULL;
1458 1464
1459 if (iter->flags & FTRACE_ITER_HASH) 1465 if (iter->flags & FTRACE_ITER_HASH)
1460 return t_hash_next(m, v, pos); 1466 return t_hash_next(m, pos);
1461 1467
1462 (*pos)++; 1468 (*pos)++;
1469 iter->pos = *pos;
1463 1470
1464 if (iter->flags & FTRACE_ITER_PRINTALL) 1471 if (iter->flags & FTRACE_ITER_PRINTALL)
1465 return NULL; 1472 return t_hash_start(m, pos);
1466 1473
1467 retry: 1474 retry:
1468 if (iter->idx >= iter->pg->index) { 1475 if (iter->idx >= iter->pg->index) {
@@ -1491,7 +1498,20 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
1491 } 1498 }
1492 } 1499 }
1493 1500
1494 return rec; 1501 if (!rec)
1502 return t_hash_start(m, pos);
1503
1504 iter->func_pos = *pos;
1505 iter->func = rec;
1506
1507 return iter;
1508}
1509
1510static void reset_iter_read(struct ftrace_iterator *iter)
1511{
1512 iter->pos = 0;
1513 iter->func_pos = 0;
1514 iter->flags &= ~(FTRACE_ITER_PRINTALL & FTRACE_ITER_HASH);
1495} 1515}
1496 1516
1497static void *t_start(struct seq_file *m, loff_t *pos) 1517static void *t_start(struct seq_file *m, loff_t *pos)
@@ -1502,6 +1522,12 @@ static void *t_start(struct seq_file *m, loff_t *pos)
1502 1522
1503 mutex_lock(&ftrace_lock); 1523 mutex_lock(&ftrace_lock);
1504 /* 1524 /*
1525 * If an lseek was done, then reset and start from beginning.
1526 */
1527 if (*pos < iter->pos)
1528 reset_iter_read(iter);
1529
1530 /*
1505 * For set_ftrace_filter reading, if we have the filter 1531 * For set_ftrace_filter reading, if we have the filter
1506 * off, we can short cut and just print out that all 1532 * off, we can short cut and just print out that all
1507 * functions are enabled. 1533 * functions are enabled.
@@ -1518,6 +1544,11 @@ static void *t_start(struct seq_file *m, loff_t *pos)
1518 if (iter->flags & FTRACE_ITER_HASH) 1544 if (iter->flags & FTRACE_ITER_HASH)
1519 return t_hash_start(m, pos); 1545 return t_hash_start(m, pos);
1520 1546
1547 /*
1548 * Unfortunately, we need to restart at ftrace_pages_start
1549 * every time we let go of the ftrace_mutex. This is because
1550 * those pointers can change without the lock.
1551 */
1521 iter->pg = ftrace_pages_start; 1552 iter->pg = ftrace_pages_start;
1522 iter->idx = 0; 1553 iter->idx = 0;
1523 for (l = 0; l <= *pos; ) { 1554 for (l = 0; l <= *pos; ) {
@@ -1526,10 +1557,14 @@ static void *t_start(struct seq_file *m, loff_t *pos)
1526 break; 1557 break;
1527 } 1558 }
1528 1559
1529 if (!p && iter->flags & FTRACE_ITER_FILTER) 1560 if (!p) {
1530 return t_hash_start(m, pos); 1561 if (iter->flags & FTRACE_ITER_FILTER)
1562 return t_hash_start(m, pos);
1531 1563
1532 return p; 1564 return NULL;
1565 }
1566
1567 return iter;
1533} 1568}
1534 1569
1535static void t_stop(struct seq_file *m, void *p) 1570static void t_stop(struct seq_file *m, void *p)
@@ -1540,16 +1575,18 @@ static void t_stop(struct seq_file *m, void *p)
1540static int t_show(struct seq_file *m, void *v) 1575static int t_show(struct seq_file *m, void *v)
1541{ 1576{
1542 struct ftrace_iterator *iter = m->private; 1577 struct ftrace_iterator *iter = m->private;
1543 struct dyn_ftrace *rec = v; 1578 struct dyn_ftrace *rec;
1544 1579
1545 if (iter->flags & FTRACE_ITER_HASH) 1580 if (iter->flags & FTRACE_ITER_HASH)
1546 return t_hash_show(m, v); 1581 return t_hash_show(m, iter);
1547 1582
1548 if (iter->flags & FTRACE_ITER_PRINTALL) { 1583 if (iter->flags & FTRACE_ITER_PRINTALL) {
1549 seq_printf(m, "#### all functions enabled ####\n"); 1584 seq_printf(m, "#### all functions enabled ####\n");
1550 return 0; 1585 return 0;
1551 } 1586 }
1552 1587
1588 rec = iter->func;
1589
1553 if (!rec) 1590 if (!rec)
1554 return 0; 1591 return 0;
1555 1592
@@ -2418,7 +2455,7 @@ static const struct file_operations ftrace_filter_fops = {
2418 .open = ftrace_filter_open, 2455 .open = ftrace_filter_open,
2419 .read = seq_read, 2456 .read = seq_read,
2420 .write = ftrace_filter_write, 2457 .write = ftrace_filter_write,
2421 .llseek = no_llseek, 2458 .llseek = ftrace_regex_lseek,
2422 .release = ftrace_filter_release, 2459 .release = ftrace_filter_release,
2423}; 2460};
2424 2461
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 492197e2f86c..4e2f03410377 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2606,6 +2606,19 @@ void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
2606} 2606}
2607EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu); 2607EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
2608 2608
2609/*
2610 * The total entries in the ring buffer is the running counter
2611 * of entries entered into the ring buffer, minus the sum of
2612 * the entries read from the ring buffer and the number of
2613 * entries that were overwritten.
2614 */
2615static inline unsigned long
2616rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer)
2617{
2618 return local_read(&cpu_buffer->entries) -
2619 (local_read(&cpu_buffer->overrun) + cpu_buffer->read);
2620}
2621
2609/** 2622/**
2610 * ring_buffer_entries_cpu - get the number of entries in a cpu buffer 2623 * ring_buffer_entries_cpu - get the number of entries in a cpu buffer
2611 * @buffer: The ring buffer 2624 * @buffer: The ring buffer
@@ -2614,16 +2627,13 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
2614unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) 2627unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
2615{ 2628{
2616 struct ring_buffer_per_cpu *cpu_buffer; 2629 struct ring_buffer_per_cpu *cpu_buffer;
2617 unsigned long ret;
2618 2630
2619 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 2631 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2620 return 0; 2632 return 0;
2621 2633
2622 cpu_buffer = buffer->buffers[cpu]; 2634 cpu_buffer = buffer->buffers[cpu];
2623 ret = (local_read(&cpu_buffer->entries) - local_read(&cpu_buffer->overrun))
2624 - cpu_buffer->read;
2625 2635
2626 return ret; 2636 return rb_num_of_entries(cpu_buffer);
2627} 2637}
2628EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); 2638EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu);
2629 2639
@@ -2684,8 +2694,7 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer)
2684 /* if you care about this being correct, lock the buffer */ 2694 /* if you care about this being correct, lock the buffer */
2685 for_each_buffer_cpu(buffer, cpu) { 2695 for_each_buffer_cpu(buffer, cpu) {
2686 cpu_buffer = buffer->buffers[cpu]; 2696 cpu_buffer = buffer->buffers[cpu];
2687 entries += (local_read(&cpu_buffer->entries) - 2697 entries += rb_num_of_entries(cpu_buffer);
2688 local_read(&cpu_buffer->overrun)) - cpu_buffer->read;
2689 } 2698 }
2690 2699
2691 return entries; 2700 return entries;
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 31cc4cb0dbf2..39c059ca670e 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -9,7 +9,7 @@
9#include <linux/kprobes.h> 9#include <linux/kprobes.h>
10#include "trace.h" 10#include "trace.h"
11 11
12static char *perf_trace_buf[4]; 12static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS];
13 13
14/* 14/*
15 * Force it to be aligned to unsigned long to avoid misaligned accesses 15 * Force it to be aligned to unsigned long to avoid misaligned accesses
@@ -24,7 +24,7 @@ static int total_ref_count;
24static int perf_trace_event_init(struct ftrace_event_call *tp_event, 24static int perf_trace_event_init(struct ftrace_event_call *tp_event,
25 struct perf_event *p_event) 25 struct perf_event *p_event)
26{ 26{
27 struct hlist_head *list; 27 struct hlist_head __percpu *list;
28 int ret = -ENOMEM; 28 int ret = -ENOMEM;
29 int cpu; 29 int cpu;
30 30
@@ -42,11 +42,11 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event,
42 tp_event->perf_events = list; 42 tp_event->perf_events = list;
43 43
44 if (!total_ref_count) { 44 if (!total_ref_count) {
45 char *buf; 45 char __percpu *buf;
46 int i; 46 int i;
47 47
48 for (i = 0; i < 4; i++) { 48 for (i = 0; i < PERF_NR_CONTEXTS; i++) {
49 buf = (char *)alloc_percpu(perf_trace_t); 49 buf = (char __percpu *)alloc_percpu(perf_trace_t);
50 if (!buf) 50 if (!buf)
51 goto fail; 51 goto fail;
52 52
@@ -65,7 +65,7 @@ fail:
65 if (!total_ref_count) { 65 if (!total_ref_count) {
66 int i; 66 int i;
67 67
68 for (i = 0; i < 4; i++) { 68 for (i = 0; i < PERF_NR_CONTEXTS; i++) {
69 free_percpu(perf_trace_buf[i]); 69 free_percpu(perf_trace_buf[i]);
70 perf_trace_buf[i] = NULL; 70 perf_trace_buf[i] = NULL;
71 } 71 }
@@ -101,22 +101,26 @@ int perf_trace_init(struct perf_event *p_event)
101 return ret; 101 return ret;
102} 102}
103 103
104int perf_trace_enable(struct perf_event *p_event) 104int perf_trace_add(struct perf_event *p_event, int flags)
105{ 105{
106 struct ftrace_event_call *tp_event = p_event->tp_event; 106 struct ftrace_event_call *tp_event = p_event->tp_event;
107 struct hlist_head __percpu *pcpu_list;
107 struct hlist_head *list; 108 struct hlist_head *list;
108 109
109 list = tp_event->perf_events; 110 pcpu_list = tp_event->perf_events;
110 if (WARN_ON_ONCE(!list)) 111 if (WARN_ON_ONCE(!pcpu_list))
111 return -EINVAL; 112 return -EINVAL;
112 113
113 list = this_cpu_ptr(list); 114 if (!(flags & PERF_EF_START))
115 p_event->hw.state = PERF_HES_STOPPED;
116
117 list = this_cpu_ptr(pcpu_list);
114 hlist_add_head_rcu(&p_event->hlist_entry, list); 118 hlist_add_head_rcu(&p_event->hlist_entry, list);
115 119
116 return 0; 120 return 0;
117} 121}
118 122
119void perf_trace_disable(struct perf_event *p_event) 123void perf_trace_del(struct perf_event *p_event, int flags)
120{ 124{
121 hlist_del_rcu(&p_event->hlist_entry); 125 hlist_del_rcu(&p_event->hlist_entry);
122} 126}
@@ -142,7 +146,7 @@ void perf_trace_destroy(struct perf_event *p_event)
142 tp_event->perf_events = NULL; 146 tp_event->perf_events = NULL;
143 147
144 if (!--total_ref_count) { 148 if (!--total_ref_count) {
145 for (i = 0; i < 4; i++) { 149 for (i = 0; i < PERF_NR_CONTEXTS; i++) {
146 free_percpu(perf_trace_buf[i]); 150 free_percpu(perf_trace_buf[i]);
147 perf_trace_buf[i] = NULL; 151 perf_trace_buf[i] = NULL;
148 } 152 }
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 4c758f146328..398c0e8b332c 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -600,21 +600,29 @@ out:
600 600
601enum { 601enum {
602 FORMAT_HEADER = 1, 602 FORMAT_HEADER = 1,
603 FORMAT_PRINTFMT = 2, 603 FORMAT_FIELD_SEPERATOR = 2,
604 FORMAT_PRINTFMT = 3,
604}; 605};
605 606
606static void *f_next(struct seq_file *m, void *v, loff_t *pos) 607static void *f_next(struct seq_file *m, void *v, loff_t *pos)
607{ 608{
608 struct ftrace_event_call *call = m->private; 609 struct ftrace_event_call *call = m->private;
609 struct ftrace_event_field *field; 610 struct ftrace_event_field *field;
610 struct list_head *head; 611 struct list_head *common_head = &ftrace_common_fields;
612 struct list_head *head = trace_get_fields(call);
611 613
612 (*pos)++; 614 (*pos)++;
613 615
614 switch ((unsigned long)v) { 616 switch ((unsigned long)v) {
615 case FORMAT_HEADER: 617 case FORMAT_HEADER:
616 head = &ftrace_common_fields; 618 if (unlikely(list_empty(common_head)))
619 return NULL;
620
621 field = list_entry(common_head->prev,
622 struct ftrace_event_field, link);
623 return field;
617 624
625 case FORMAT_FIELD_SEPERATOR:
618 if (unlikely(list_empty(head))) 626 if (unlikely(list_empty(head)))
619 return NULL; 627 return NULL;
620 628
@@ -626,31 +634,10 @@ static void *f_next(struct seq_file *m, void *v, loff_t *pos)
626 return NULL; 634 return NULL;
627 } 635 }
628 636
629 head = trace_get_fields(call);
630
631 /*
632 * To separate common fields from event fields, the
633 * LSB is set on the first event field. Clear it in case.
634 */
635 v = (void *)((unsigned long)v & ~1L);
636
637 field = v; 637 field = v;
638 /* 638 if (field->link.prev == common_head)
639 * If this is a common field, and at the end of the list, then 639 return (void *)FORMAT_FIELD_SEPERATOR;
640 * continue with main list. 640 else if (field->link.prev == head)
641 */
642 if (field->link.prev == &ftrace_common_fields) {
643 if (unlikely(list_empty(head)))
644 return NULL;
645 field = list_entry(head->prev, struct ftrace_event_field, link);
646 /* Set the LSB to notify f_show to print an extra newline */
647 field = (struct ftrace_event_field *)
648 ((unsigned long)field | 1);
649 return field;
650 }
651
652 /* If we are done tell f_show to print the format */
653 if (field->link.prev == head)
654 return (void *)FORMAT_PRINTFMT; 641 return (void *)FORMAT_PRINTFMT;
655 642
656 field = list_entry(field->link.prev, struct ftrace_event_field, link); 643 field = list_entry(field->link.prev, struct ftrace_event_field, link);
@@ -688,22 +675,16 @@ static int f_show(struct seq_file *m, void *v)
688 seq_printf(m, "format:\n"); 675 seq_printf(m, "format:\n");
689 return 0; 676 return 0;
690 677
678 case FORMAT_FIELD_SEPERATOR:
679 seq_putc(m, '\n');
680 return 0;
681
691 case FORMAT_PRINTFMT: 682 case FORMAT_PRINTFMT:
692 seq_printf(m, "\nprint fmt: %s\n", 683 seq_printf(m, "\nprint fmt: %s\n",
693 call->print_fmt); 684 call->print_fmt);
694 return 0; 685 return 0;
695 } 686 }
696 687
697 /*
698 * To separate common fields from event fields, the
699 * LSB is set on the first event field. Clear it and
700 * print a newline if it is set.
701 */
702 if ((unsigned long)v & 1) {
703 seq_putc(m, '\n');
704 v = (void *)((unsigned long)v & ~1L);
705 }
706
707 field = v; 688 field = v;
708 689
709 /* 690 /*
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 6f233698518e..02c708ae0d42 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -15,15 +15,19 @@
15#include "trace.h" 15#include "trace.h"
16#include "trace_output.h" 16#include "trace_output.h"
17 17
18/* When set, irq functions will be ignored */
19static int ftrace_graph_skip_irqs;
20
18struct fgraph_cpu_data { 21struct fgraph_cpu_data {
19 pid_t last_pid; 22 pid_t last_pid;
20 int depth; 23 int depth;
24 int depth_irq;
21 int ignore; 25 int ignore;
22 unsigned long enter_funcs[FTRACE_RETFUNC_DEPTH]; 26 unsigned long enter_funcs[FTRACE_RETFUNC_DEPTH];
23}; 27};
24 28
25struct fgraph_data { 29struct fgraph_data {
26 struct fgraph_cpu_data *cpu_data; 30 struct fgraph_cpu_data __percpu *cpu_data;
27 31
28 /* Place to preserve last processed entry. */ 32 /* Place to preserve last processed entry. */
29 struct ftrace_graph_ent_entry ent; 33 struct ftrace_graph_ent_entry ent;
@@ -41,6 +45,7 @@ struct fgraph_data {
41#define TRACE_GRAPH_PRINT_PROC 0x8 45#define TRACE_GRAPH_PRINT_PROC 0x8
42#define TRACE_GRAPH_PRINT_DURATION 0x10 46#define TRACE_GRAPH_PRINT_DURATION 0x10
43#define TRACE_GRAPH_PRINT_ABS_TIME 0x20 47#define TRACE_GRAPH_PRINT_ABS_TIME 0x20
48#define TRACE_GRAPH_PRINT_IRQS 0x40
44 49
45static struct tracer_opt trace_opts[] = { 50static struct tracer_opt trace_opts[] = {
46 /* Display overruns? (for self-debug purpose) */ 51 /* Display overruns? (for self-debug purpose) */
@@ -55,13 +60,15 @@ static struct tracer_opt trace_opts[] = {
55 { TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) }, 60 { TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) },
56 /* Display absolute time of an entry */ 61 /* Display absolute time of an entry */
57 { TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) }, 62 { TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) },
63 /* Display interrupts */
64 { TRACER_OPT(funcgraph-irqs, TRACE_GRAPH_PRINT_IRQS) },
58 { } /* Empty entry */ 65 { } /* Empty entry */
59}; 66};
60 67
61static struct tracer_flags tracer_flags = { 68static struct tracer_flags tracer_flags = {
62 /* Don't display overruns and proc by default */ 69 /* Don't display overruns and proc by default */
63 .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD | 70 .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD |
64 TRACE_GRAPH_PRINT_DURATION, 71 TRACE_GRAPH_PRINT_DURATION | TRACE_GRAPH_PRINT_IRQS,
65 .opts = trace_opts 72 .opts = trace_opts
66}; 73};
67 74
@@ -204,6 +211,14 @@ int __trace_graph_entry(struct trace_array *tr,
204 return 1; 211 return 1;
205} 212}
206 213
214static inline int ftrace_graph_ignore_irqs(void)
215{
216 if (!ftrace_graph_skip_irqs)
217 return 0;
218
219 return in_irq();
220}
221
207int trace_graph_entry(struct ftrace_graph_ent *trace) 222int trace_graph_entry(struct ftrace_graph_ent *trace)
208{ 223{
209 struct trace_array *tr = graph_array; 224 struct trace_array *tr = graph_array;
@@ -218,7 +233,8 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
218 return 0; 233 return 0;
219 234
220 /* trace it when it is-nested-in or is a function enabled. */ 235 /* trace it when it is-nested-in or is a function enabled. */
221 if (!(trace->depth || ftrace_graph_addr(trace->func))) 236 if (!(trace->depth || ftrace_graph_addr(trace->func)) ||
237 ftrace_graph_ignore_irqs())
222 return 0; 238 return 0;
223 239
224 local_irq_save(flags); 240 local_irq_save(flags);
@@ -855,6 +871,92 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s,
855 return 0; 871 return 0;
856} 872}
857 873
874/*
875 * Entry check for irq code
876 *
877 * returns 1 if
878 * - we are inside irq code
879 * - we just extered irq code
880 *
881 * retunns 0 if
882 * - funcgraph-interrupts option is set
883 * - we are not inside irq code
884 */
885static int
886check_irq_entry(struct trace_iterator *iter, u32 flags,
887 unsigned long addr, int depth)
888{
889 int cpu = iter->cpu;
890 struct fgraph_data *data = iter->private;
891 int *depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq);
892
893 if (flags & TRACE_GRAPH_PRINT_IRQS)
894 return 0;
895
896 /*
897 * We are inside the irq code
898 */
899 if (*depth_irq >= 0)
900 return 1;
901
902 if ((addr < (unsigned long)__irqentry_text_start) ||
903 (addr >= (unsigned long)__irqentry_text_end))
904 return 0;
905
906 /*
907 * We are entering irq code.
908 */
909 *depth_irq = depth;
910 return 1;
911}
912
913/*
914 * Return check for irq code
915 *
916 * returns 1 if
917 * - we are inside irq code
918 * - we just left irq code
919 *
920 * returns 0 if
921 * - funcgraph-interrupts option is set
922 * - we are not inside irq code
923 */
924static int
925check_irq_return(struct trace_iterator *iter, u32 flags, int depth)
926{
927 int cpu = iter->cpu;
928 struct fgraph_data *data = iter->private;
929 int *depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq);
930
931 if (flags & TRACE_GRAPH_PRINT_IRQS)
932 return 0;
933
934 /*
935 * We are not inside the irq code.
936 */
937 if (*depth_irq == -1)
938 return 0;
939
940 /*
941 * We are inside the irq code, and this is returning entry.
942 * Let's not trace it and clear the entry depth, since
943 * we are out of irq code.
944 *
945 * This condition ensures that we 'leave the irq code' once
946 * we are out of the entry depth. Thus protecting us from
947 * the RETURN entry loss.
948 */
949 if (*depth_irq >= depth) {
950 *depth_irq = -1;
951 return 1;
952 }
953
954 /*
955 * We are inside the irq code, and this is not the entry.
956 */
957 return 1;
958}
959
858static enum print_line_t 960static enum print_line_t
859print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, 961print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
860 struct trace_iterator *iter, u32 flags) 962 struct trace_iterator *iter, u32 flags)
@@ -865,6 +967,9 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
865 static enum print_line_t ret; 967 static enum print_line_t ret;
866 int cpu = iter->cpu; 968 int cpu = iter->cpu;
867 969
970 if (check_irq_entry(iter, flags, call->func, call->depth))
971 return TRACE_TYPE_HANDLED;
972
868 if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func, flags)) 973 if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func, flags))
869 return TRACE_TYPE_PARTIAL_LINE; 974 return TRACE_TYPE_PARTIAL_LINE;
870 975
@@ -902,6 +1007,9 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
902 int ret; 1007 int ret;
903 int i; 1008 int i;
904 1009
1010 if (check_irq_return(iter, flags, trace->depth))
1011 return TRACE_TYPE_HANDLED;
1012
905 if (data) { 1013 if (data) {
906 struct fgraph_cpu_data *cpu_data; 1014 struct fgraph_cpu_data *cpu_data;
907 int cpu = iter->cpu; 1015 int cpu = iter->cpu;
@@ -1210,9 +1318,12 @@ void graph_trace_open(struct trace_iterator *iter)
1210 pid_t *pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid); 1318 pid_t *pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
1211 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth); 1319 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
1212 int *ignore = &(per_cpu_ptr(data->cpu_data, cpu)->ignore); 1320 int *ignore = &(per_cpu_ptr(data->cpu_data, cpu)->ignore);
1321 int *depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq);
1322
1213 *pid = -1; 1323 *pid = -1;
1214 *depth = 0; 1324 *depth = 0;
1215 *ignore = 0; 1325 *ignore = 0;
1326 *depth_irq = -1;
1216 } 1327 }
1217 1328
1218 iter->private = data; 1329 iter->private = data;
@@ -1235,6 +1346,14 @@ void graph_trace_close(struct trace_iterator *iter)
1235 } 1346 }
1236} 1347}
1237 1348
1349static int func_graph_set_flag(u32 old_flags, u32 bit, int set)
1350{
1351 if (bit == TRACE_GRAPH_PRINT_IRQS)
1352 ftrace_graph_skip_irqs = !set;
1353
1354 return 0;
1355}
1356
1238static struct trace_event_functions graph_functions = { 1357static struct trace_event_functions graph_functions = {
1239 .trace = print_graph_function_event, 1358 .trace = print_graph_function_event,
1240}; 1359};
@@ -1261,6 +1380,7 @@ static struct tracer graph_trace __read_mostly = {
1261 .print_line = print_graph_function, 1380 .print_line = print_graph_function,
1262 .print_header = print_graph_headers, 1381 .print_header = print_graph_headers,
1263 .flags = &tracer_flags, 1382 .flags = &tracer_flags,
1383 .set_flag = func_graph_set_flag,
1264#ifdef CONFIG_FTRACE_SELFTEST 1384#ifdef CONFIG_FTRACE_SELFTEST
1265 .selftest = trace_selftest_startup_function_graph, 1385 .selftest = trace_selftest_startup_function_graph,
1266#endif 1386#endif
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 7f9c3c52ecc1..dc8e16824b51 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -43,7 +43,6 @@ static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
43static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); 43static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
44#endif 44#endif
45 45
46static int __read_mostly did_panic;
47static int __initdata no_watchdog; 46static int __initdata no_watchdog;
48 47
49 48
@@ -187,18 +186,6 @@ static int is_softlockup(unsigned long touch_ts)
187 return 0; 186 return 0;
188} 187}
189 188
190static int
191watchdog_panic(struct notifier_block *this, unsigned long event, void *ptr)
192{
193 did_panic = 1;
194
195 return NOTIFY_DONE;
196}
197
198static struct notifier_block panic_block = {
199 .notifier_call = watchdog_panic,
200};
201
202#ifdef CONFIG_HARDLOCKUP_DETECTOR 189#ifdef CONFIG_HARDLOCKUP_DETECTOR
203static struct perf_event_attr wd_hw_attr = { 190static struct perf_event_attr wd_hw_attr = {
204 .type = PERF_TYPE_HARDWARE, 191 .type = PERF_TYPE_HARDWARE,
@@ -371,14 +358,14 @@ static int watchdog_nmi_enable(int cpu)
371 /* Try to register using hardware perf events */ 358 /* Try to register using hardware perf events */
372 wd_attr = &wd_hw_attr; 359 wd_attr = &wd_hw_attr;
373 wd_attr->sample_period = hw_nmi_get_sample_period(); 360 wd_attr->sample_period = hw_nmi_get_sample_period();
374 event = perf_event_create_kernel_counter(wd_attr, cpu, -1, watchdog_overflow_callback); 361 event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback);
375 if (!IS_ERR(event)) { 362 if (!IS_ERR(event)) {
376 printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n"); 363 printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n");
377 goto out_save; 364 goto out_save;
378 } 365 }
379 366
380 printk(KERN_ERR "NMI watchdog failed to create perf event on cpu%i: %p\n", cpu, event); 367 printk(KERN_ERR "NMI watchdog failed to create perf event on cpu%i: %p\n", cpu, event);
381 return -1; 368 return PTR_ERR(event);
382 369
383 /* success path */ 370 /* success path */
384out_save: 371out_save:
@@ -422,17 +409,19 @@ static int watchdog_prepare_cpu(int cpu)
422static int watchdog_enable(int cpu) 409static int watchdog_enable(int cpu)
423{ 410{
424 struct task_struct *p = per_cpu(softlockup_watchdog, cpu); 411 struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
412 int err;
425 413
426 /* enable the perf event */ 414 /* enable the perf event */
427 if (watchdog_nmi_enable(cpu) != 0) 415 err = watchdog_nmi_enable(cpu);
428 return -1; 416 if (err)
417 return err;
429 418
430 /* create the watchdog thread */ 419 /* create the watchdog thread */
431 if (!p) { 420 if (!p) {
432 p = kthread_create(watchdog, (void *)(unsigned long)cpu, "watchdog/%d", cpu); 421 p = kthread_create(watchdog, (void *)(unsigned long)cpu, "watchdog/%d", cpu);
433 if (IS_ERR(p)) { 422 if (IS_ERR(p)) {
434 printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu); 423 printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu);
435 return -1; 424 return PTR_ERR(p);
436 } 425 }
437 kthread_bind(p, cpu); 426 kthread_bind(p, cpu);
438 per_cpu(watchdog_touch_ts, cpu) = 0; 427 per_cpu(watchdog_touch_ts, cpu) = 0;
@@ -484,6 +473,9 @@ static void watchdog_disable_all_cpus(void)
484{ 473{
485 int cpu; 474 int cpu;
486 475
476 if (no_watchdog)
477 return;
478
487 for_each_online_cpu(cpu) 479 for_each_online_cpu(cpu)
488 watchdog_disable(cpu); 480 watchdog_disable(cpu);
489 481
@@ -526,17 +518,16 @@ static int __cpuinit
526cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) 518cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
527{ 519{
528 int hotcpu = (unsigned long)hcpu; 520 int hotcpu = (unsigned long)hcpu;
521 int err = 0;
529 522
530 switch (action) { 523 switch (action) {
531 case CPU_UP_PREPARE: 524 case CPU_UP_PREPARE:
532 case CPU_UP_PREPARE_FROZEN: 525 case CPU_UP_PREPARE_FROZEN:
533 if (watchdog_prepare_cpu(hotcpu)) 526 err = watchdog_prepare_cpu(hotcpu);
534 return NOTIFY_BAD;
535 break; 527 break;
536 case CPU_ONLINE: 528 case CPU_ONLINE:
537 case CPU_ONLINE_FROZEN: 529 case CPU_ONLINE_FROZEN:
538 if (watchdog_enable(hotcpu)) 530 err = watchdog_enable(hotcpu);
539 return NOTIFY_BAD;
540 break; 531 break;
541#ifdef CONFIG_HOTPLUG_CPU 532#ifdef CONFIG_HOTPLUG_CPU
542 case CPU_UP_CANCELED: 533 case CPU_UP_CANCELED:
@@ -549,7 +540,7 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
549 break; 540 break;
550#endif /* CONFIG_HOTPLUG_CPU */ 541#endif /* CONFIG_HOTPLUG_CPU */
551 } 542 }
552 return NOTIFY_OK; 543 return notifier_from_errno(err);
553} 544}
554 545
555static struct notifier_block __cpuinitdata cpu_nfb = { 546static struct notifier_block __cpuinitdata cpu_nfb = {
@@ -565,13 +556,11 @@ static int __init spawn_watchdog_task(void)
565 return 0; 556 return 0;
566 557
567 err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); 558 err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
568 WARN_ON(err == NOTIFY_BAD); 559 WARN_ON(notifier_to_errno(err));
569 560
570 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); 561 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
571 register_cpu_notifier(&cpu_nfb); 562 register_cpu_notifier(&cpu_nfb);
572 563
573 atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
574
575 return 0; 564 return 0;
576} 565}
577early_initcall(spawn_watchdog_task); 566early_initcall(spawn_watchdog_task);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 1b4afd2e6ca0..e85d549b6eac 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -482,6 +482,7 @@ config PROVE_LOCKING
482 select DEBUG_SPINLOCK 482 select DEBUG_SPINLOCK
483 select DEBUG_MUTEXES 483 select DEBUG_MUTEXES
484 select DEBUG_LOCK_ALLOC 484 select DEBUG_LOCK_ALLOC
485 select TRACE_IRQFLAGS
485 default n 486 default n
486 help 487 help
487 This feature enables the kernel to prove that all locking 488 This feature enables the kernel to prove that all locking
@@ -579,11 +580,10 @@ config DEBUG_LOCKDEP
579 of more runtime overhead. 580 of more runtime overhead.
580 581
581config TRACE_IRQFLAGS 582config TRACE_IRQFLAGS
582 depends on DEBUG_KERNEL
583 bool 583 bool
584 default y 584 help
585 depends on TRACE_IRQFLAGS_SUPPORT 585 Enables hooks to interrupt enabling and disabling for
586 depends on PROVE_LOCKING 586 either tracing or lock debugging.
587 587
588config DEBUG_SPINLOCK_SLEEP 588config DEBUG_SPINLOCK_SLEEP
589 bool "Spinlock debugging: sleep-inside-spinlock checking" 589 bool "Spinlock debugging: sleep-inside-spinlock checking"
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 251997a95483..282806ba7a57 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -243,6 +243,7 @@ void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
243 unlock_sock_fast(sk, slow); 243 unlock_sock_fast(sk, slow);
244 244
245 /* skb is now orphaned, can be freed outside of locked section */ 245 /* skb is now orphaned, can be freed outside of locked section */
246 trace_kfree_skb(skb, skb_free_datagram_locked);
246 __kfree_skb(skb); 247 __kfree_skb(skb);
247} 248}
248EXPORT_SYMBOL(skb_free_datagram_locked); 249EXPORT_SYMBOL(skb_free_datagram_locked);
diff --git a/net/core/dev.c b/net/core/dev.c
index 660dd41aaaa6..7ec85e27beed 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -128,6 +128,8 @@
128#include <linux/jhash.h> 128#include <linux/jhash.h>
129#include <linux/random.h> 129#include <linux/random.h>
130#include <trace/events/napi.h> 130#include <trace/events/napi.h>
131#include <trace/events/net.h>
132#include <trace/events/skb.h>
131#include <linux/pci.h> 133#include <linux/pci.h>
132 134
133#include "net-sysfs.h" 135#include "net-sysfs.h"
@@ -1978,6 +1980,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1978 } 1980 }
1979 1981
1980 rc = ops->ndo_start_xmit(skb, dev); 1982 rc = ops->ndo_start_xmit(skb, dev);
1983 trace_net_dev_xmit(skb, rc);
1981 if (rc == NETDEV_TX_OK) 1984 if (rc == NETDEV_TX_OK)
1982 txq_trans_update(txq); 1985 txq_trans_update(txq);
1983 return rc; 1986 return rc;
@@ -1998,6 +2001,7 @@ gso:
1998 skb_dst_drop(nskb); 2001 skb_dst_drop(nskb);
1999 2002
2000 rc = ops->ndo_start_xmit(nskb, dev); 2003 rc = ops->ndo_start_xmit(nskb, dev);
2004 trace_net_dev_xmit(nskb, rc);
2001 if (unlikely(rc != NETDEV_TX_OK)) { 2005 if (unlikely(rc != NETDEV_TX_OK)) {
2002 if (rc & ~NETDEV_TX_MASK) 2006 if (rc & ~NETDEV_TX_MASK)
2003 goto out_kfree_gso_skb; 2007 goto out_kfree_gso_skb;
@@ -2186,6 +2190,7 @@ int dev_queue_xmit(struct sk_buff *skb)
2186#ifdef CONFIG_NET_CLS_ACT 2190#ifdef CONFIG_NET_CLS_ACT
2187 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS); 2191 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
2188#endif 2192#endif
2193 trace_net_dev_queue(skb);
2189 if (q->enqueue) { 2194 if (q->enqueue) {
2190 rc = __dev_xmit_skb(skb, q, dev, txq); 2195 rc = __dev_xmit_skb(skb, q, dev, txq);
2191 goto out; 2196 goto out;
@@ -2512,6 +2517,7 @@ int netif_rx(struct sk_buff *skb)
2512 if (netdev_tstamp_prequeue) 2517 if (netdev_tstamp_prequeue)
2513 net_timestamp_check(skb); 2518 net_timestamp_check(skb);
2514 2519
2520 trace_netif_rx(skb);
2515#ifdef CONFIG_RPS 2521#ifdef CONFIG_RPS
2516 { 2522 {
2517 struct rps_dev_flow voidflow, *rflow = &voidflow; 2523 struct rps_dev_flow voidflow, *rflow = &voidflow;
@@ -2571,6 +2577,7 @@ static void net_tx_action(struct softirq_action *h)
2571 clist = clist->next; 2577 clist = clist->next;
2572 2578
2573 WARN_ON(atomic_read(&skb->users)); 2579 WARN_ON(atomic_read(&skb->users));
2580 trace_kfree_skb(skb, net_tx_action);
2574 __kfree_skb(skb); 2581 __kfree_skb(skb);
2575 } 2582 }
2576 } 2583 }
@@ -2828,6 +2835,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
2828 if (!netdev_tstamp_prequeue) 2835 if (!netdev_tstamp_prequeue)
2829 net_timestamp_check(skb); 2836 net_timestamp_check(skb);
2830 2837
2838 trace_netif_receive_skb(skb);
2831 if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb)) 2839 if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
2832 return NET_RX_SUCCESS; 2840 return NET_RX_SUCCESS;
2833 2841
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index afa6380ed88a..7f1bb2aba03b 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -26,6 +26,7 @@
26 26
27#define CREATE_TRACE_POINTS 27#define CREATE_TRACE_POINTS
28#include <trace/events/skb.h> 28#include <trace/events/skb.h>
29#include <trace/events/net.h>
29#include <trace/events/napi.h> 30#include <trace/events/napi.h>
30 31
31EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); 32EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c83b421341c0..56ba3c4e4761 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -466,6 +466,7 @@ void consume_skb(struct sk_buff *skb)
466 smp_rmb(); 466 smp_rmb();
467 else if (likely(!atomic_dec_and_test(&skb->users))) 467 else if (likely(!atomic_dec_and_test(&skb->users)))
468 return; 468 return;
469 trace_consume_skb(skb);
469 __kfree_skb(skb); 470 __kfree_skb(skb);
470} 471}
471EXPORT_SYMBOL(consume_skb); 472EXPORT_SYMBOL(consume_skb);
diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt
index 5164a655c39f..b2c63309a651 100644
--- a/tools/perf/Documentation/perf-annotate.txt
+++ b/tools/perf/Documentation/perf-annotate.txt
@@ -8,7 +8,7 @@ perf-annotate - Read perf.data (created by perf record) and display annotated co
8SYNOPSIS 8SYNOPSIS
9-------- 9--------
10[verse] 10[verse]
11'perf annotate' [-i <file> | --input=file] symbol_name 11'perf annotate' [-i <file> | --input=file] [symbol_name]
12 12
13DESCRIPTION 13DESCRIPTION
14----------- 14-----------
@@ -24,6 +24,13 @@ OPTIONS
24--input=:: 24--input=::
25 Input file name. (default: perf.data) 25 Input file name. (default: perf.data)
26 26
27--stdio:: Use the stdio interface.
28
29--tui:: Use the TUI interface Use of --tui requires a tty, if one is not
30 present, as when piping to other commands, the stdio interface is
31 used. This interfaces starts by centering on the line with more
32 samples, TAB/UNTAB cycles thru the lines with more samples.
33
27SEE ALSO 34SEE ALSO
28-------- 35--------
29linkperf:perf-record[1] 36linkperf:perf-record[1], linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index abfabe9147a4..12052c9ed0ba 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -65,6 +65,13 @@ OPTIONS
65 the tree is considered as a new profiled object. + 65 the tree is considered as a new profiled object. +
66 Default: fractal,0.5. 66 Default: fractal,0.5.
67 67
68--stdio:: Use the stdio interface.
69
70--tui:: Use the TUI interface, that is integrated with annotate and allows
71 zooming into DSOs or threads, among other features. Use of --tui
72 requires a tty, if one is not present, as when piping to other
73 commands, the stdio interface is used.
74
68SEE ALSO 75SEE ALSO
69-------- 76--------
70linkperf:perf-stat[1] 77linkperf:perf-stat[1]
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 4f1fa77c1feb..fe1e30722f3b 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -313,6 +313,9 @@ TEST_PROGRAMS =
313 313
314SCRIPT_SH += perf-archive.sh 314SCRIPT_SH += perf-archive.sh
315 315
316grep-libs = $(filter -l%,$(1))
317strip-libs = $(filter-out -l%,$(1))
318
316# 319#
317# No Perl scripts right now: 320# No Perl scripts right now:
318# 321#
@@ -588,14 +591,17 @@ endif
588ifdef NO_LIBPERL 591ifdef NO_LIBPERL
589 BASIC_CFLAGS += -DNO_LIBPERL 592 BASIC_CFLAGS += -DNO_LIBPERL
590else 593else
591 PERL_EMBED_LDOPTS = `perl -MExtUtils::Embed -e ldopts 2>/dev/null` 594 PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null)
595 PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS))
596 PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS))
592 PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null` 597 PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null`
593 FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS) 598 FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS)
594 599
595 ifneq ($(call try-cc,$(SOURCE_PERL_EMBED),$(FLAGS_PERL_EMBED)),y) 600 ifneq ($(call try-cc,$(SOURCE_PERL_EMBED),$(FLAGS_PERL_EMBED)),y)
596 BASIC_CFLAGS += -DNO_LIBPERL 601 BASIC_CFLAGS += -DNO_LIBPERL
597 else 602 else
598 ALL_LDFLAGS += $(PERL_EMBED_LDOPTS) 603 ALL_LDFLAGS += $(PERL_EMBED_LDFLAGS)
604 EXTLIBS += $(PERL_EMBED_LIBADD)
599 LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-perl.o 605 LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-perl.o
600 LIB_OBJS += $(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o 606 LIB_OBJS += $(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o
601 endif 607 endif
@@ -604,13 +610,16 @@ endif
604ifdef NO_LIBPYTHON 610ifdef NO_LIBPYTHON
605 BASIC_CFLAGS += -DNO_LIBPYTHON 611 BASIC_CFLAGS += -DNO_LIBPYTHON
606else 612else
607 PYTHON_EMBED_LDOPTS = `python-config --ldflags 2>/dev/null` 613 PYTHON_EMBED_LDOPTS = $(shell python-config --ldflags 2>/dev/null)
614 PYTHON_EMBED_LDFLAGS = $(call strip-libs,$(PYTHON_EMBED_LDOPTS))
615 PYTHON_EMBED_LIBADD = $(call grep-libs,$(PYTHON_EMBED_LDOPTS))
608 PYTHON_EMBED_CCOPTS = `python-config --cflags 2>/dev/null` 616 PYTHON_EMBED_CCOPTS = `python-config --cflags 2>/dev/null`
609 FLAGS_PYTHON_EMBED=$(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS) 617 FLAGS_PYTHON_EMBED=$(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
610 ifneq ($(call try-cc,$(SOURCE_PYTHON_EMBED),$(FLAGS_PYTHON_EMBED)),y) 618 ifneq ($(call try-cc,$(SOURCE_PYTHON_EMBED),$(FLAGS_PYTHON_EMBED)),y)
611 BASIC_CFLAGS += -DNO_LIBPYTHON 619 BASIC_CFLAGS += -DNO_LIBPYTHON
612 else 620 else
613 ALL_LDFLAGS += $(PYTHON_EMBED_LDOPTS) 621 ALL_LDFLAGS += $(PYTHON_EMBED_LDFLAGS)
622 EXTLIBS += $(PYTHON_EMBED_LIBADD)
614 LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o 623 LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o
615 LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o 624 LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o
616 endif 625 endif
@@ -653,6 +662,15 @@ else
653 endif 662 endif
654endif 663endif
655 664
665
666ifdef NO_STRLCPY
667 BASIC_CFLAGS += -DNO_STRLCPY
668else
669 ifneq ($(call try-cc,$(SOURCE_STRLCPY),),y)
670 BASIC_CFLAGS += -DNO_STRLCPY
671 endif
672endif
673
656ifndef CC_LD_DYNPATH 674ifndef CC_LD_DYNPATH
657 ifdef NO_R_TO_GCC_LINKER 675 ifdef NO_R_TO_GCC_LINKER
658 # Some gcc does not accept and pass -R to the linker to specify 676 # Some gcc does not accept and pass -R to the linker to specify
@@ -910,8 +928,8 @@ $(OUTPUT)perf.o: perf.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
910 $(ALL_CFLAGS) -c $(filter %.c,$^) -o $@ 928 $(ALL_CFLAGS) -c $(filter %.c,$^) -o $@
911 929
912$(OUTPUT)perf$X: $(OUTPUT)perf.o $(BUILTIN_OBJS) $(PERFLIBS) 930$(OUTPUT)perf$X: $(OUTPUT)perf.o $(BUILTIN_OBJS) $(PERFLIBS)
913 $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(OUTPUT)perf.o \ 931 $(QUIET_LINK)$(CC) $(ALL_CFLAGS) $(ALL_LDFLAGS) $(OUTPUT)perf.o \
914 $(BUILTIN_OBJS) $(ALL_LDFLAGS) $(LIBS) 932 $(BUILTIN_OBJS) $(LIBS) -o $@
915 933
916$(OUTPUT)builtin-help.o: builtin-help.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS 934$(OUTPUT)builtin-help.o: builtin-help.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
917 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) \ 935 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) \
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 1478dc64bf15..6d5604d8df95 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -28,7 +28,7 @@
28 28
29static char const *input_name = "perf.data"; 29static char const *input_name = "perf.data";
30 30
31static bool force; 31static bool force, use_tui, use_stdio;
32 32
33static bool full_paths; 33static bool full_paths;
34 34
@@ -321,7 +321,7 @@ static int hist_entry__tty_annotate(struct hist_entry *he)
321 321
322static void hists__find_annotations(struct hists *self) 322static void hists__find_annotations(struct hists *self)
323{ 323{
324 struct rb_node *first = rb_first(&self->entries), *nd = first; 324 struct rb_node *nd = rb_first(&self->entries), *next;
325 int key = KEY_RIGHT; 325 int key = KEY_RIGHT;
326 326
327 while (nd) { 327 while (nd) {
@@ -343,20 +343,19 @@ find_next:
343 343
344 if (use_browser > 0) { 344 if (use_browser > 0) {
345 key = hist_entry__tui_annotate(he); 345 key = hist_entry__tui_annotate(he);
346 if (is_exit_key(key))
347 break;
348 switch (key) { 346 switch (key) {
349 case KEY_RIGHT: 347 case KEY_RIGHT:
350 case '\t': 348 next = rb_next(nd);
351 nd = rb_next(nd);
352 break; 349 break;
353 case KEY_LEFT: 350 case KEY_LEFT:
354 if (nd == first) 351 next = rb_prev(nd);
355 continue;
356 nd = rb_prev(nd);
357 default:
358 break; 352 break;
353 default:
354 return;
359 } 355 }
356
357 if (next != NULL)
358 nd = next;
360 } else { 359 } else {
361 hist_entry__tty_annotate(he); 360 hist_entry__tty_annotate(he);
362 nd = rb_next(nd); 361 nd = rb_next(nd);
@@ -428,6 +427,8 @@ static const struct option options[] = {
428 "be more verbose (show symbol address, etc)"), 427 "be more verbose (show symbol address, etc)"),
429 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 428 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
430 "dump raw trace in ASCII"), 429 "dump raw trace in ASCII"),
430 OPT_BOOLEAN(0, "tui", &use_tui, "Use the TUI interface"),
431 OPT_BOOLEAN(0, "stdio", &use_stdio, "Use the stdio interface"),
431 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 432 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
432 "file", "vmlinux pathname"), 433 "file", "vmlinux pathname"),
433 OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, 434 OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
@@ -443,6 +444,11 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __used)
443{ 444{
444 argc = parse_options(argc, argv, options, annotate_usage, 0); 445 argc = parse_options(argc, argv, options, annotate_usage, 0);
445 446
447 if (use_stdio)
448 use_browser = 0;
449 else if (use_tui)
450 use_browser = 1;
451
446 setup_browser(); 452 setup_browser();
447 453
448 symbol_conf.priv_size = sizeof(struct sym_priv); 454 symbol_conf.priv_size = sizeof(struct sym_priv);
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 55fc1f46892a..5de405d45230 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -32,7 +32,7 @@
32 32
33static char const *input_name = "perf.data"; 33static char const *input_name = "perf.data";
34 34
35static bool force; 35static bool force, use_tui, use_stdio;
36static bool hide_unresolved; 36static bool hide_unresolved;
37static bool dont_use_callchains; 37static bool dont_use_callchains;
38 38
@@ -107,7 +107,8 @@ static int perf_session__add_hist_entry(struct perf_session *self,
107 goto out_free_syms; 107 goto out_free_syms;
108 err = 0; 108 err = 0;
109 if (symbol_conf.use_callchain) { 109 if (symbol_conf.use_callchain) {
110 err = append_chain(he->callchain, data->callchain, syms, data->period); 110 err = callchain_append(he->callchain, data->callchain, syms,
111 data->period);
111 if (err) 112 if (err)
112 goto out_free_syms; 113 goto out_free_syms;
113 } 114 }
@@ -450,6 +451,8 @@ static const struct option options[] = {
450 "Show per-thread event counters"), 451 "Show per-thread event counters"),
451 OPT_STRING(0, "pretty", &pretty_printing_style, "key", 452 OPT_STRING(0, "pretty", &pretty_printing_style, "key",
452 "pretty printing style key: normal raw"), 453 "pretty printing style key: normal raw"),
454 OPT_BOOLEAN(0, "tui", &use_tui, "Use the TUI interface"),
455 OPT_BOOLEAN(0, "stdio", &use_stdio, "Use the stdio interface"),
453 OPT_STRING('s', "sort", &sort_order, "key[,key2...]", 456 OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
454 "sort by key(s): pid, comm, dso, symbol, parent"), 457 "sort by key(s): pid, comm, dso, symbol, parent"),
455 OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, 458 OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization,
@@ -482,8 +485,15 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
482{ 485{
483 argc = parse_options(argc, argv, options, report_usage, 0); 486 argc = parse_options(argc, argv, options, report_usage, 0);
484 487
488 if (use_stdio)
489 use_browser = 0;
490 else if (use_tui)
491 use_browser = 1;
492
485 if (strcmp(input_name, "-") != 0) 493 if (strcmp(input_name, "-") != 0)
486 setup_browser(); 494 setup_browser();
495 else
496 use_browser = 0;
487 /* 497 /*
488 * Only in the newt browser we are doing integrated annotation, 498 * Only in the newt browser we are doing integrated annotation,
489 * so don't allocate extra space that won't be used in the stdio 499 * so don't allocate extra space that won't be used in the stdio
diff --git a/tools/perf/feature-tests.mak b/tools/perf/feature-tests.mak
index 7a7b60859053..b253db634f04 100644
--- a/tools/perf/feature-tests.mak
+++ b/tools/perf/feature-tests.mak
@@ -110,6 +110,17 @@ int main(void)
110} 110}
111endef 111endef
112 112
113define SOURCE_STRLCPY
114#include <stdlib.h>
115extern size_t strlcpy(char *dest, const char *src, size_t size);
116
117int main(void)
118{
119 strlcpy(NULL, NULL, 0);
120 return 0;
121}
122endef
123
113# try-cc 124# try-cc
114# Usage: option = $(call try-cc, source-to-build, cc-options) 125# Usage: option = $(call try-cc, source-to-build, cc-options)
115try-cc = $(shell sh -c \ 126try-cc = $(shell sh -c \
diff --git a/tools/perf/scripts/python/bin/netdev-times-record b/tools/perf/scripts/python/bin/netdev-times-record
new file mode 100644
index 000000000000..d931a828126b
--- /dev/null
+++ b/tools/perf/scripts/python/bin/netdev-times-record
@@ -0,0 +1,8 @@
1#!/bin/bash
2perf record -a -e net:net_dev_xmit -e net:net_dev_queue \
3 -e net:netif_receive_skb -e net:netif_rx \
4 -e skb:consume_skb -e skb:kfree_skb \
5 -e skb:skb_copy_datagram_iovec -e napi:napi_poll \
6 -e irq:irq_handler_entry -e irq:irq_handler_exit \
7 -e irq:softirq_entry -e irq:softirq_exit \
8 -e irq:softirq_raise $@
diff --git a/tools/perf/scripts/python/bin/netdev-times-report b/tools/perf/scripts/python/bin/netdev-times-report
new file mode 100644
index 000000000000..c3d0a638123d
--- /dev/null
+++ b/tools/perf/scripts/python/bin/netdev-times-report
@@ -0,0 +1,5 @@
1#!/bin/bash
2# description: display a process of packet and processing time
3# args: [tx] [rx] [dev=] [debug]
4
5perf trace -s ~/libexec/perf-core/scripts/python/netdev-times.py $@
diff --git a/tools/perf/scripts/python/netdev-times.py b/tools/perf/scripts/python/netdev-times.py
new file mode 100644
index 000000000000..9aa0a32972e8
--- /dev/null
+++ b/tools/perf/scripts/python/netdev-times.py
@@ -0,0 +1,464 @@
1# Display a process of packets and processed time.
2# It helps us to investigate networking or network device.
3#
4# options
5# tx: show only tx chart
6# rx: show only rx chart
7# dev=: show only thing related to specified device
8# debug: work with debug mode. It shows buffer status.
9
10import os
11import sys
12
13sys.path.append(os.environ['PERF_EXEC_PATH'] + \
14 '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
15
16from perf_trace_context import *
17from Core import *
18from Util import *
19
20all_event_list = []; # insert all tracepoint event related with this script
21irq_dic = {}; # key is cpu and value is a list which stacks irqs
22 # which raise NET_RX softirq
23net_rx_dic = {}; # key is cpu and value include time of NET_RX softirq-entry
24 # and a list which stacks receive
25receive_hunk_list = []; # a list which include a sequence of receive events
26rx_skb_list = []; # received packet list for matching
27 # skb_copy_datagram_iovec
28
29buffer_budget = 65536; # the budget of rx_skb_list, tx_queue_list and
30 # tx_xmit_list
31of_count_rx_skb_list = 0; # overflow count
32
33tx_queue_list = []; # list of packets which pass through dev_queue_xmit
34of_count_tx_queue_list = 0; # overflow count
35
36tx_xmit_list = []; # list of packets which pass through dev_hard_start_xmit
37of_count_tx_xmit_list = 0; # overflow count
38
39tx_free_list = []; # list of packets which is freed
40
41# options
42show_tx = 0;
43show_rx = 0;
44dev = 0; # store a name of device specified by option "dev="
45debug = 0;
46
47# indices of event_info tuple
48EINFO_IDX_NAME= 0
49EINFO_IDX_CONTEXT=1
50EINFO_IDX_CPU= 2
51EINFO_IDX_TIME= 3
52EINFO_IDX_PID= 4
53EINFO_IDX_COMM= 5
54
55# Calculate a time interval(msec) from src(nsec) to dst(nsec)
56def diff_msec(src, dst):
57 return (dst - src) / 1000000.0
58
59# Display a process of transmitting a packet
60def print_transmit(hunk):
61 if dev != 0 and hunk['dev'].find(dev) < 0:
62 return
63 print "%7s %5d %6d.%06dsec %12.3fmsec %12.3fmsec" % \
64 (hunk['dev'], hunk['len'],
65 nsecs_secs(hunk['queue_t']),
66 nsecs_nsecs(hunk['queue_t'])/1000,
67 diff_msec(hunk['queue_t'], hunk['xmit_t']),
68 diff_msec(hunk['xmit_t'], hunk['free_t']))
69
70# Format for displaying rx packet processing
71PF_IRQ_ENTRY= " irq_entry(+%.3fmsec irq=%d:%s)"
72PF_SOFT_ENTRY=" softirq_entry(+%.3fmsec)"
73PF_NAPI_POLL= " napi_poll_exit(+%.3fmsec %s)"
74PF_JOINT= " |"
75PF_WJOINT= " | |"
76PF_NET_RECV= " |---netif_receive_skb(+%.3fmsec skb=%x len=%d)"
77PF_NET_RX= " |---netif_rx(+%.3fmsec skb=%x)"
78PF_CPY_DGRAM= " | skb_copy_datagram_iovec(+%.3fmsec %d:%s)"
79PF_KFREE_SKB= " | kfree_skb(+%.3fmsec location=%x)"
80PF_CONS_SKB= " | consume_skb(+%.3fmsec)"
81
82# Display a process of received packets and interrputs associated with
83# a NET_RX softirq
84def print_receive(hunk):
85 show_hunk = 0
86 irq_list = hunk['irq_list']
87 cpu = irq_list[0]['cpu']
88 base_t = irq_list[0]['irq_ent_t']
89 # check if this hunk should be showed
90 if dev != 0:
91 for i in range(len(irq_list)):
92 if irq_list[i]['name'].find(dev) >= 0:
93 show_hunk = 1
94 break
95 else:
96 show_hunk = 1
97 if show_hunk == 0:
98 return
99
100 print "%d.%06dsec cpu=%d" % \
101 (nsecs_secs(base_t), nsecs_nsecs(base_t)/1000, cpu)
102 for i in range(len(irq_list)):
103 print PF_IRQ_ENTRY % \
104 (diff_msec(base_t, irq_list[i]['irq_ent_t']),
105 irq_list[i]['irq'], irq_list[i]['name'])
106 print PF_JOINT
107 irq_event_list = irq_list[i]['event_list']
108 for j in range(len(irq_event_list)):
109 irq_event = irq_event_list[j]
110 if irq_event['event'] == 'netif_rx':
111 print PF_NET_RX % \
112 (diff_msec(base_t, irq_event['time']),
113 irq_event['skbaddr'])
114 print PF_JOINT
115 print PF_SOFT_ENTRY % \
116 diff_msec(base_t, hunk['sirq_ent_t'])
117 print PF_JOINT
118 event_list = hunk['event_list']
119 for i in range(len(event_list)):
120 event = event_list[i]
121 if event['event_name'] == 'napi_poll':
122 print PF_NAPI_POLL % \
123 (diff_msec(base_t, event['event_t']), event['dev'])
124 if i == len(event_list) - 1:
125 print ""
126 else:
127 print PF_JOINT
128 else:
129 print PF_NET_RECV % \
130 (diff_msec(base_t, event['event_t']), event['skbaddr'],
131 event['len'])
132 if 'comm' in event.keys():
133 print PF_WJOINT
134 print PF_CPY_DGRAM % \
135 (diff_msec(base_t, event['comm_t']),
136 event['pid'], event['comm'])
137 elif 'handle' in event.keys():
138 print PF_WJOINT
139 if event['handle'] == "kfree_skb":
140 print PF_KFREE_SKB % \
141 (diff_msec(base_t,
142 event['comm_t']),
143 event['location'])
144 elif event['handle'] == "consume_skb":
145 print PF_CONS_SKB % \
146 diff_msec(base_t,
147 event['comm_t'])
148 print PF_JOINT
149
150def trace_begin():
151 global show_tx
152 global show_rx
153 global dev
154 global debug
155
156 for i in range(len(sys.argv)):
157 if i == 0:
158 continue
159 arg = sys.argv[i]
160 if arg == 'tx':
161 show_tx = 1
162 elif arg =='rx':
163 show_rx = 1
164 elif arg.find('dev=',0, 4) >= 0:
165 dev = arg[4:]
166 elif arg == 'debug':
167 debug = 1
168 if show_tx == 0 and show_rx == 0:
169 show_tx = 1
170 show_rx = 1
171
172def trace_end():
173 # order all events in time
174 all_event_list.sort(lambda a,b :cmp(a[EINFO_IDX_TIME],
175 b[EINFO_IDX_TIME]))
176 # process all events
177 for i in range(len(all_event_list)):
178 event_info = all_event_list[i]
179 name = event_info[EINFO_IDX_NAME]
180 if name == 'irq__softirq_exit':
181 handle_irq_softirq_exit(event_info)
182 elif name == 'irq__softirq_entry':
183 handle_irq_softirq_entry(event_info)
184 elif name == 'irq__softirq_raise':
185 handle_irq_softirq_raise(event_info)
186 elif name == 'irq__irq_handler_entry':
187 handle_irq_handler_entry(event_info)
188 elif name == 'irq__irq_handler_exit':
189 handle_irq_handler_exit(event_info)
190 elif name == 'napi__napi_poll':
191 handle_napi_poll(event_info)
192 elif name == 'net__netif_receive_skb':
193 handle_netif_receive_skb(event_info)
194 elif name == 'net__netif_rx':
195 handle_netif_rx(event_info)
196 elif name == 'skb__skb_copy_datagram_iovec':
197 handle_skb_copy_datagram_iovec(event_info)
198 elif name == 'net__net_dev_queue':
199 handle_net_dev_queue(event_info)
200 elif name == 'net__net_dev_xmit':
201 handle_net_dev_xmit(event_info)
202 elif name == 'skb__kfree_skb':
203 handle_kfree_skb(event_info)
204 elif name == 'skb__consume_skb':
205 handle_consume_skb(event_info)
206 # display receive hunks
207 if show_rx:
208 for i in range(len(receive_hunk_list)):
209 print_receive(receive_hunk_list[i])
210 # display transmit hunks
211 if show_tx:
212 print " dev len Qdisc " \
213 " netdevice free"
214 for i in range(len(tx_free_list)):
215 print_transmit(tx_free_list[i])
216 if debug:
217 print "debug buffer status"
218 print "----------------------------"
219 print "xmit Qdisc:remain:%d overflow:%d" % \
220 (len(tx_queue_list), of_count_tx_queue_list)
221 print "xmit netdevice:remain:%d overflow:%d" % \
222 (len(tx_xmit_list), of_count_tx_xmit_list)
223 print "receive:remain:%d overflow:%d" % \
224 (len(rx_skb_list), of_count_rx_skb_list)
225
226# called from perf, when it finds a correspoinding event
227def irq__softirq_entry(name, context, cpu, sec, nsec, pid, comm, vec):
228 if symbol_str("irq__softirq_entry", "vec", vec) != "NET_RX":
229 return
230 event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, vec)
231 all_event_list.append(event_info)
232
233def irq__softirq_exit(name, context, cpu, sec, nsec, pid, comm, vec):
234 if symbol_str("irq__softirq_entry", "vec", vec) != "NET_RX":
235 return
236 event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, vec)
237 all_event_list.append(event_info)
238
239def irq__softirq_raise(name, context, cpu, sec, nsec, pid, comm, vec):
240 if symbol_str("irq__softirq_entry", "vec", vec) != "NET_RX":
241 return
242 event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, vec)
243 all_event_list.append(event_info)
244
245def irq__irq_handler_entry(name, context, cpu, sec, nsec, pid, comm,
246 irq, irq_name):
247 event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
248 irq, irq_name)
249 all_event_list.append(event_info)
250
251def irq__irq_handler_exit(name, context, cpu, sec, nsec, pid, comm, irq, ret):
252 event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, irq, ret)
253 all_event_list.append(event_info)
254
255def napi__napi_poll(name, context, cpu, sec, nsec, pid, comm, napi, dev_name):
256 event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
257 napi, dev_name)
258 all_event_list.append(event_info)
259
260def net__netif_receive_skb(name, context, cpu, sec, nsec, pid, comm, skbaddr,
261 skblen, dev_name):
262 event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
263 skbaddr, skblen, dev_name)
264 all_event_list.append(event_info)
265
266def net__netif_rx(name, context, cpu, sec, nsec, pid, comm, skbaddr,
267 skblen, dev_name):
268 event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
269 skbaddr, skblen, dev_name)
270 all_event_list.append(event_info)
271
272def net__net_dev_queue(name, context, cpu, sec, nsec, pid, comm,
273 skbaddr, skblen, dev_name):
274 event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
275 skbaddr, skblen, dev_name)
276 all_event_list.append(event_info)
277
278def net__net_dev_xmit(name, context, cpu, sec, nsec, pid, comm,
279 skbaddr, skblen, rc, dev_name):
280 event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
281 skbaddr, skblen, rc ,dev_name)
282 all_event_list.append(event_info)
283
284def skb__kfree_skb(name, context, cpu, sec, nsec, pid, comm,
285 skbaddr, protocol, location):
286 event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
287 skbaddr, protocol, location)
288 all_event_list.append(event_info)
289
290def skb__consume_skb(name, context, cpu, sec, nsec, pid, comm, skbaddr):
291 event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
292 skbaddr)
293 all_event_list.append(event_info)
294
295def skb__skb_copy_datagram_iovec(name, context, cpu, sec, nsec, pid, comm,
296 skbaddr, skblen):
297 event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
298 skbaddr, skblen)
299 all_event_list.append(event_info)
300
301def handle_irq_handler_entry(event_info):
302 (name, context, cpu, time, pid, comm, irq, irq_name) = event_info
303 if cpu not in irq_dic.keys():
304 irq_dic[cpu] = []
305 irq_record = {'irq':irq, 'name':irq_name, 'cpu':cpu, 'irq_ent_t':time}
306 irq_dic[cpu].append(irq_record)
307
308def handle_irq_handler_exit(event_info):
309 (name, context, cpu, time, pid, comm, irq, ret) = event_info
310 if cpu not in irq_dic.keys():
311 return
312 irq_record = irq_dic[cpu].pop()
313 if irq != irq_record['irq']:
314 return
315 irq_record.update({'irq_ext_t':time})
316 # if an irq doesn't include NET_RX softirq, drop.
317 if 'event_list' in irq_record.keys():
318 irq_dic[cpu].append(irq_record)
319
320def handle_irq_softirq_raise(event_info):
321 (name, context, cpu, time, pid, comm, vec) = event_info
322 if cpu not in irq_dic.keys() \
323 or len(irq_dic[cpu]) == 0:
324 return
325 irq_record = irq_dic[cpu].pop()
326 if 'event_list' in irq_record.keys():
327 irq_event_list = irq_record['event_list']
328 else:
329 irq_event_list = []
330 irq_event_list.append({'time':time, 'event':'sirq_raise'})
331 irq_record.update({'event_list':irq_event_list})
332 irq_dic[cpu].append(irq_record)
333
334def handle_irq_softirq_entry(event_info):
335 (name, context, cpu, time, pid, comm, vec) = event_info
336 net_rx_dic[cpu] = {'sirq_ent_t':time, 'event_list':[]}
337
338def handle_irq_softirq_exit(event_info):
339 (name, context, cpu, time, pid, comm, vec) = event_info
340 irq_list = []
341 event_list = 0
342 if cpu in irq_dic.keys():
343 irq_list = irq_dic[cpu]
344 del irq_dic[cpu]
345 if cpu in net_rx_dic.keys():
346 sirq_ent_t = net_rx_dic[cpu]['sirq_ent_t']
347 event_list = net_rx_dic[cpu]['event_list']
348 del net_rx_dic[cpu]
349 if irq_list == [] or event_list == 0:
350 return
351 rec_data = {'sirq_ent_t':sirq_ent_t, 'sirq_ext_t':time,
352 'irq_list':irq_list, 'event_list':event_list}
353 # merge information realted to a NET_RX softirq
354 receive_hunk_list.append(rec_data)
355
356def handle_napi_poll(event_info):
357 (name, context, cpu, time, pid, comm, napi, dev_name) = event_info
358 if cpu in net_rx_dic.keys():
359 event_list = net_rx_dic[cpu]['event_list']
360 rec_data = {'event_name':'napi_poll',
361 'dev':dev_name, 'event_t':time}
362 event_list.append(rec_data)
363
364def handle_netif_rx(event_info):
365 (name, context, cpu, time, pid, comm,
366 skbaddr, skblen, dev_name) = event_info
367 if cpu not in irq_dic.keys() \
368 or len(irq_dic[cpu]) == 0:
369 return
370 irq_record = irq_dic[cpu].pop()
371 if 'event_list' in irq_record.keys():
372 irq_event_list = irq_record['event_list']
373 else:
374 irq_event_list = []
375 irq_event_list.append({'time':time, 'event':'netif_rx',
376 'skbaddr':skbaddr, 'skblen':skblen, 'dev_name':dev_name})
377 irq_record.update({'event_list':irq_event_list})
378 irq_dic[cpu].append(irq_record)
379
380def handle_netif_receive_skb(event_info):
381 global of_count_rx_skb_list
382
383 (name, context, cpu, time, pid, comm,
384 skbaddr, skblen, dev_name) = event_info
385 if cpu in net_rx_dic.keys():
386 rec_data = {'event_name':'netif_receive_skb',
387 'event_t':time, 'skbaddr':skbaddr, 'len':skblen}
388 event_list = net_rx_dic[cpu]['event_list']
389 event_list.append(rec_data)
390 rx_skb_list.insert(0, rec_data)
391 if len(rx_skb_list) > buffer_budget:
392 rx_skb_list.pop()
393 of_count_rx_skb_list += 1
394
395def handle_net_dev_queue(event_info):
396 global of_count_tx_queue_list
397
398 (name, context, cpu, time, pid, comm,
399 skbaddr, skblen, dev_name) = event_info
400 skb = {'dev':dev_name, 'skbaddr':skbaddr, 'len':skblen, 'queue_t':time}
401 tx_queue_list.insert(0, skb)
402 if len(tx_queue_list) > buffer_budget:
403 tx_queue_list.pop()
404 of_count_tx_queue_list += 1
405
406def handle_net_dev_xmit(event_info):
407 global of_count_tx_xmit_list
408
409 (name, context, cpu, time, pid, comm,
410 skbaddr, skblen, rc, dev_name) = event_info
411 if rc == 0: # NETDEV_TX_OK
412 for i in range(len(tx_queue_list)):
413 skb = tx_queue_list[i]
414 if skb['skbaddr'] == skbaddr:
415 skb['xmit_t'] = time
416 tx_xmit_list.insert(0, skb)
417 del tx_queue_list[i]
418 if len(tx_xmit_list) > buffer_budget:
419 tx_xmit_list.pop()
420 of_count_tx_xmit_list += 1
421 return
422
423def handle_kfree_skb(event_info):
424 (name, context, cpu, time, pid, comm,
425 skbaddr, protocol, location) = event_info
426 for i in range(len(tx_queue_list)):
427 skb = tx_queue_list[i]
428 if skb['skbaddr'] == skbaddr:
429 del tx_queue_list[i]
430 return
431 for i in range(len(tx_xmit_list)):
432 skb = tx_xmit_list[i]
433 if skb['skbaddr'] == skbaddr:
434 skb['free_t'] = time
435 tx_free_list.append(skb)
436 del tx_xmit_list[i]
437 return
438 for i in range(len(rx_skb_list)):
439 rec_data = rx_skb_list[i]
440 if rec_data['skbaddr'] == skbaddr:
441 rec_data.update({'handle':"kfree_skb",
442 'comm':comm, 'pid':pid, 'comm_t':time})
443 del rx_skb_list[i]
444 return
445
446def handle_consume_skb(event_info):
447 (name, context, cpu, time, pid, comm, skbaddr) = event_info
448 for i in range(len(tx_xmit_list)):
449 skb = tx_xmit_list[i]
450 if skb['skbaddr'] == skbaddr:
451 skb['free_t'] = time
452 tx_free_list.append(skb)
453 del tx_xmit_list[i]
454 return
455
456def handle_skb_copy_datagram_iovec(event_info):
457 (name, context, cpu, time, pid, comm, skbaddr, skblen) = event_info
458 for i in range(len(rx_skb_list)):
459 rec_data = rx_skb_list[i]
460 if skbaddr == rec_data['skbaddr']:
461 rec_data.update({'handle':"skb_copy_datagram_iovec",
462 'comm':comm, 'pid':pid, 'comm_t':time})
463 del rx_skb_list[i]
464 return
diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h
index 27e9ebe4076e..a7729797fd96 100644
--- a/tools/perf/util/cache.h
+++ b/tools/perf/util/cache.h
@@ -82,6 +82,8 @@ extern char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2
82extern char *perf_pathdup(const char *fmt, ...) 82extern char *perf_pathdup(const char *fmt, ...)
83 __attribute__((format (printf, 1, 2))); 83 __attribute__((format (printf, 1, 2)));
84 84
85#ifdef NO_STRLCPY
85extern size_t strlcpy(char *dest, const char *src, size_t size); 86extern size_t strlcpy(char *dest, const char *src, size_t size);
87#endif
86 88
87#endif /* __PERF_CACHE_H */ 89#endif /* __PERF_CACHE_H */
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index f231f43424d2..e12d539417b2 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -28,6 +28,9 @@ bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event)
28#define chain_for_each_child(child, parent) \ 28#define chain_for_each_child(child, parent) \
29 list_for_each_entry(child, &parent->children, brothers) 29 list_for_each_entry(child, &parent->children, brothers)
30 30
31#define chain_for_each_child_safe(child, next, parent) \
32 list_for_each_entry_safe(child, next, &parent->children, brothers)
33
31static void 34static void
32rb_insert_callchain(struct rb_root *root, struct callchain_node *chain, 35rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
33 enum chain_mode mode) 36 enum chain_mode mode)
@@ -86,10 +89,10 @@ __sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node,
86 * sort them by hit 89 * sort them by hit
87 */ 90 */
88static void 91static void
89sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node, 92sort_chain_flat(struct rb_root *rb_root, struct callchain_root *root,
90 u64 min_hit, struct callchain_param *param __used) 93 u64 min_hit, struct callchain_param *param __used)
91{ 94{
92 __sort_chain_flat(rb_root, node, min_hit); 95 __sort_chain_flat(rb_root, &root->node, min_hit);
93} 96}
94 97
95static void __sort_chain_graph_abs(struct callchain_node *node, 98static void __sort_chain_graph_abs(struct callchain_node *node,
@@ -108,11 +111,11 @@ static void __sort_chain_graph_abs(struct callchain_node *node,
108} 111}
109 112
110static void 113static void
111sort_chain_graph_abs(struct rb_root *rb_root, struct callchain_node *chain_root, 114sort_chain_graph_abs(struct rb_root *rb_root, struct callchain_root *chain_root,
112 u64 min_hit, struct callchain_param *param __used) 115 u64 min_hit, struct callchain_param *param __used)
113{ 116{
114 __sort_chain_graph_abs(chain_root, min_hit); 117 __sort_chain_graph_abs(&chain_root->node, min_hit);
115 rb_root->rb_node = chain_root->rb_root.rb_node; 118 rb_root->rb_node = chain_root->node.rb_root.rb_node;
116} 119}
117 120
118static void __sort_chain_graph_rel(struct callchain_node *node, 121static void __sort_chain_graph_rel(struct callchain_node *node,
@@ -133,11 +136,11 @@ static void __sort_chain_graph_rel(struct callchain_node *node,
133} 136}
134 137
135static void 138static void
136sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_node *chain_root, 139sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_root *chain_root,
137 u64 min_hit __used, struct callchain_param *param) 140 u64 min_hit __used, struct callchain_param *param)
138{ 141{
139 __sort_chain_graph_rel(chain_root, param->min_percent / 100.0); 142 __sort_chain_graph_rel(&chain_root->node, param->min_percent / 100.0);
140 rb_root->rb_node = chain_root->rb_root.rb_node; 143 rb_root->rb_node = chain_root->node.rb_root.rb_node;
141} 144}
142 145
143int register_callchain_param(struct callchain_param *param) 146int register_callchain_param(struct callchain_param *param)
@@ -284,19 +287,18 @@ split_add_child(struct callchain_node *parent, struct resolved_chain *chain,
284} 287}
285 288
286static int 289static int
287__append_chain(struct callchain_node *root, struct resolved_chain *chain, 290append_chain(struct callchain_node *root, struct resolved_chain *chain,
288 unsigned int start, u64 period); 291 unsigned int start, u64 period);
289 292
290static void 293static void
291__append_chain_children(struct callchain_node *root, 294append_chain_children(struct callchain_node *root, struct resolved_chain *chain,
292 struct resolved_chain *chain, 295 unsigned int start, u64 period)
293 unsigned int start, u64 period)
294{ 296{
295 struct callchain_node *rnode; 297 struct callchain_node *rnode;
296 298
297 /* lookup in childrens */ 299 /* lookup in childrens */
298 chain_for_each_child(rnode, root) { 300 chain_for_each_child(rnode, root) {
299 unsigned int ret = __append_chain(rnode, chain, start, period); 301 unsigned int ret = append_chain(rnode, chain, start, period);
300 302
301 if (!ret) 303 if (!ret)
302 goto inc_children_hit; 304 goto inc_children_hit;
@@ -309,8 +311,8 @@ inc_children_hit:
309} 311}
310 312
311static int 313static int
312__append_chain(struct callchain_node *root, struct resolved_chain *chain, 314append_chain(struct callchain_node *root, struct resolved_chain *chain,
313 unsigned int start, u64 period) 315 unsigned int start, u64 period)
314{ 316{
315 struct callchain_list *cnode; 317 struct callchain_list *cnode;
316 unsigned int i = start; 318 unsigned int i = start;
@@ -357,7 +359,7 @@ __append_chain(struct callchain_node *root, struct resolved_chain *chain,
357 } 359 }
358 360
359 /* We match the node and still have a part remaining */ 361 /* We match the node and still have a part remaining */
360 __append_chain_children(root, chain, i, period); 362 append_chain_children(root, chain, i, period);
361 363
362 return 0; 364 return 0;
363} 365}
@@ -380,8 +382,8 @@ static void filter_context(struct ip_callchain *old, struct resolved_chain *new,
380} 382}
381 383
382 384
383int append_chain(struct callchain_node *root, struct ip_callchain *chain, 385int callchain_append(struct callchain_root *root, struct ip_callchain *chain,
384 struct map_symbol *syms, u64 period) 386 struct map_symbol *syms, u64 period)
385{ 387{
386 struct resolved_chain *filtered; 388 struct resolved_chain *filtered;
387 389
@@ -398,9 +400,65 @@ int append_chain(struct callchain_node *root, struct ip_callchain *chain,
398 if (!filtered->nr) 400 if (!filtered->nr)
399 goto end; 401 goto end;
400 402
401 __append_chain_children(root, filtered, 0, period); 403 append_chain_children(&root->node, filtered, 0, period);
404
405 if (filtered->nr > root->max_depth)
406 root->max_depth = filtered->nr;
402end: 407end:
403 free(filtered); 408 free(filtered);
404 409
405 return 0; 410 return 0;
406} 411}
412
413static int
414merge_chain_branch(struct callchain_node *dst, struct callchain_node *src,
415 struct resolved_chain *chain)
416{
417 struct callchain_node *child, *next_child;
418 struct callchain_list *list, *next_list;
419 int old_pos = chain->nr;
420 int err = 0;
421
422 list_for_each_entry_safe(list, next_list, &src->val, list) {
423 chain->ips[chain->nr].ip = list->ip;
424 chain->ips[chain->nr].ms = list->ms;
425 chain->nr++;
426 list_del(&list->list);
427 free(list);
428 }
429
430 if (src->hit)
431 append_chain_children(dst, chain, 0, src->hit);
432
433 chain_for_each_child_safe(child, next_child, src) {
434 err = merge_chain_branch(dst, child, chain);
435 if (err)
436 break;
437
438 list_del(&child->brothers);
439 free(child);
440 }
441
442 chain->nr = old_pos;
443
444 return err;
445}
446
447int callchain_merge(struct callchain_root *dst, struct callchain_root *src)
448{
449 struct resolved_chain *chain;
450 int err;
451
452 chain = malloc(sizeof(*chain) +
453 src->max_depth * sizeof(struct resolved_ip));
454 if (!chain)
455 return -ENOMEM;
456
457 chain->nr = 0;
458
459 err = merge_chain_branch(&dst->node, &src->node, chain);
460
461 free(chain);
462
463 return err;
464}
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 6de4313924fb..c15fb8c24ad2 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -26,9 +26,14 @@ struct callchain_node {
26 u64 children_hit; 26 u64 children_hit;
27}; 27};
28 28
29struct callchain_root {
30 u64 max_depth;
31 struct callchain_node node;
32};
33
29struct callchain_param; 34struct callchain_param;
30 35
31typedef void (*sort_chain_func_t)(struct rb_root *, struct callchain_node *, 36typedef void (*sort_chain_func_t)(struct rb_root *, struct callchain_root *,
32 u64, struct callchain_param *); 37 u64, struct callchain_param *);
33 38
34struct callchain_param { 39struct callchain_param {
@@ -44,15 +49,16 @@ struct callchain_list {
44 struct list_head list; 49 struct list_head list;
45}; 50};
46 51
47static inline void callchain_init(struct callchain_node *node) 52static inline void callchain_init(struct callchain_root *root)
48{ 53{
49 INIT_LIST_HEAD(&node->brothers); 54 INIT_LIST_HEAD(&root->node.brothers);
50 INIT_LIST_HEAD(&node->children); 55 INIT_LIST_HEAD(&root->node.children);
51 INIT_LIST_HEAD(&node->val); 56 INIT_LIST_HEAD(&root->node.val);
52 57
53 node->children_hit = 0; 58 root->node.parent = NULL;
54 node->parent = NULL; 59 root->node.hit = 0;
55 node->hit = 0; 60 root->node.children_hit = 0;
61 root->max_depth = 0;
56} 62}
57 63
58static inline u64 cumul_hits(struct callchain_node *node) 64static inline u64 cumul_hits(struct callchain_node *node)
@@ -61,8 +67,9 @@ static inline u64 cumul_hits(struct callchain_node *node)
61} 67}
62 68
63int register_callchain_param(struct callchain_param *param); 69int register_callchain_param(struct callchain_param *param);
64int append_chain(struct callchain_node *root, struct ip_callchain *chain, 70int callchain_append(struct callchain_root *root, struct ip_callchain *chain,
65 struct map_symbol *syms, u64 period); 71 struct map_symbol *syms, u64 period);
72int callchain_merge(struct callchain_root *dst, struct callchain_root *src);
66 73
67bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event); 74bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event);
68#endif /* __PERF_CALLCHAIN_H */ 75#endif /* __PERF_CALLCHAIN_H */
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index be22ae6ef055..2022e8740994 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -87,7 +87,7 @@ static void hist_entry__add_cpumode_period(struct hist_entry *self,
87 87
88static struct hist_entry *hist_entry__new(struct hist_entry *template) 88static struct hist_entry *hist_entry__new(struct hist_entry *template)
89{ 89{
90 size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_node) : 0; 90 size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_root) : 0;
91 struct hist_entry *self = malloc(sizeof(*self) + callchain_size); 91 struct hist_entry *self = malloc(sizeof(*self) + callchain_size);
92 92
93 if (self != NULL) { 93 if (self != NULL) {
@@ -226,6 +226,8 @@ static bool collapse__insert_entry(struct rb_root *root, struct hist_entry *he)
226 226
227 if (!cmp) { 227 if (!cmp) {
228 iter->period += he->period; 228 iter->period += he->period;
229 if (symbol_conf.use_callchain)
230 callchain_merge(iter->callchain, he->callchain);
229 hist_entry__free(he); 231 hist_entry__free(he);
230 return false; 232 return false;
231 } 233 }
diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c
index 58a470d036dd..bd7497711424 100644
--- a/tools/perf/util/path.c
+++ b/tools/perf/util/path.c
@@ -22,6 +22,7 @@ static const char *get_perf_dir(void)
22 return "."; 22 return ".";
23} 23}
24 24
25#ifdef NO_STRLCPY
25size_t strlcpy(char *dest, const char *src, size_t size) 26size_t strlcpy(char *dest, const char *src, size_t size)
26{ 27{
27 size_t ret = strlen(src); 28 size_t ret = strlen(src);
@@ -33,7 +34,7 @@ size_t strlcpy(char *dest, const char *src, size_t size)
33 } 34 }
34 return ret; 35 return ret;
35} 36}
36 37#endif
37 38
38static char *get_pathname(void) 39static char *get_pathname(void)
39{ 40{
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 46e531d09e8b..0b91053a7d11 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -70,7 +70,7 @@ struct hist_entry {
70 struct hist_entry *pair; 70 struct hist_entry *pair;
71 struct rb_root sorted_chain; 71 struct rb_root sorted_chain;
72 }; 72 };
73 struct callchain_node callchain[0]; 73 struct callchain_root callchain[0];
74}; 74};
75 75
76enum sort_type { 76enum sort_type {
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index b2f5ae97f33d..b39f499e575a 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -388,6 +388,20 @@ size_t dso__fprintf_buildid(struct dso *self, FILE *fp)
388 return fprintf(fp, "%s", sbuild_id); 388 return fprintf(fp, "%s", sbuild_id);
389} 389}
390 390
391size_t dso__fprintf_symbols_by_name(struct dso *self, enum map_type type, FILE *fp)
392{
393 size_t ret = 0;
394 struct rb_node *nd;
395 struct symbol_name_rb_node *pos;
396
397 for (nd = rb_first(&self->symbol_names[type]); nd; nd = rb_next(nd)) {
398 pos = rb_entry(nd, struct symbol_name_rb_node, rb_node);
399 fprintf(fp, "%s\n", pos->sym.name);
400 }
401
402 return ret;
403}
404
391size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp) 405size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp)
392{ 406{
393 struct rb_node *nd; 407 struct rb_node *nd;
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index ea95c2756f05..038f2201ee09 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -182,6 +182,7 @@ size_t machines__fprintf_dsos(struct rb_root *self, FILE *fp);
182size_t machines__fprintf_dsos_buildid(struct rb_root *self, FILE *fp, bool with_hits); 182size_t machines__fprintf_dsos_buildid(struct rb_root *self, FILE *fp, bool with_hits);
183 183
184size_t dso__fprintf_buildid(struct dso *self, FILE *fp); 184size_t dso__fprintf_buildid(struct dso *self, FILE *fp);
185size_t dso__fprintf_symbols_by_name(struct dso *self, enum map_type type, FILE *fp);
185size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp); 186size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp);
186 187
187enum dso_origin { 188enum dso_origin {
diff --git a/tools/perf/util/ui/browser.c b/tools/perf/util/ui/browser.c
index 66f2d583d8c4..6d0df809a2ed 100644
--- a/tools/perf/util/ui/browser.c
+++ b/tools/perf/util/ui/browser.c
@@ -1,16 +1,6 @@
1#define _GNU_SOURCE
2#include <stdio.h>
3#undef _GNU_SOURCE
4/*
5 * slang versions <= 2.0.6 have a "#if HAVE_LONG_LONG" that breaks
6 * the build if it isn't defined. Use the equivalent one that glibc
7 * has on features.h.
8 */
9#include <features.h>
10#ifndef HAVE_LONG_LONG
11#define HAVE_LONG_LONG __GLIBC_HAVE_LONG_LONG
12#endif
13#include <slang.h> 1#include <slang.h>
2#include "libslang.h"
3#include <linux/compiler.h>
14#include <linux/list.h> 4#include <linux/list.h>
15#include <linux/rbtree.h> 5#include <linux/rbtree.h>
16#include <stdlib.h> 6#include <stdlib.h>
@@ -19,17 +9,9 @@
19#include "helpline.h" 9#include "helpline.h"
20#include "../color.h" 10#include "../color.h"
21#include "../util.h" 11#include "../util.h"
12#include <stdio.h>
22 13
23#if SLANG_VERSION < 20104 14static int ui_browser__percent_color(double percent, bool current)
24#define sltt_set_color(obj, name, fg, bg) \
25 SLtt_set_color(obj,(char *)name, (char *)fg, (char *)bg)
26#else
27#define sltt_set_color SLtt_set_color
28#endif
29
30newtComponent newt_form__new(void);
31
32int ui_browser__percent_color(double percent, bool current)
33{ 15{
34 if (current) 16 if (current)
35 return HE_COLORSET_SELECTED; 17 return HE_COLORSET_SELECTED;
@@ -40,6 +22,23 @@ int ui_browser__percent_color(double percent, bool current)
40 return HE_COLORSET_NORMAL; 22 return HE_COLORSET_NORMAL;
41} 23}
42 24
25void ui_browser__set_color(struct ui_browser *self __used, int color)
26{
27 SLsmg_set_color(color);
28}
29
30void ui_browser__set_percent_color(struct ui_browser *self,
31 double percent, bool current)
32{
33 int color = ui_browser__percent_color(percent, current);
34 ui_browser__set_color(self, color);
35}
36
37void ui_browser__gotorc(struct ui_browser *self, int y, int x)
38{
39 SLsmg_gotorc(self->y + y, self->x + x);
40}
41
43void ui_browser__list_head_seek(struct ui_browser *self, off_t offset, int whence) 42void ui_browser__list_head_seek(struct ui_browser *self, off_t offset, int whence)
44{ 43{
45 struct list_head *head = self->entries; 44 struct list_head *head = self->entries;
@@ -111,7 +110,7 @@ unsigned int ui_browser__rb_tree_refresh(struct ui_browser *self)
111 nd = self->top; 110 nd = self->top;
112 111
113 while (nd != NULL) { 112 while (nd != NULL) {
114 SLsmg_gotorc(self->y + row, self->x); 113 ui_browser__gotorc(self, row, 0);
115 self->write(self, nd, row); 114 self->write(self, nd, row);
116 if (++row == self->height) 115 if (++row == self->height)
117 break; 116 break;
@@ -131,13 +130,10 @@ void ui_browser__refresh_dimensions(struct ui_browser *self)
131 int cols, rows; 130 int cols, rows;
132 newtGetScreenSize(&cols, &rows); 131 newtGetScreenSize(&cols, &rows);
133 132
134 if (self->width > cols - 4) 133 self->width = cols - 1;
135 self->width = cols - 4; 134 self->height = rows - 2;
136 self->height = rows - 5; 135 self->y = 1;
137 if (self->height > self->nr_entries) 136 self->x = 0;
138 self->height = self->nr_entries;
139 self->y = (rows - self->height) / 2;
140 self->x = (cols - self->width) / 2;
141} 137}
142 138
143void ui_browser__reset_index(struct ui_browser *self) 139void ui_browser__reset_index(struct ui_browser *self)
@@ -146,34 +142,48 @@ void ui_browser__reset_index(struct ui_browser *self)
146 self->seek(self, 0, SEEK_SET); 142 self->seek(self, 0, SEEK_SET);
147} 143}
148 144
145void ui_browser__add_exit_key(struct ui_browser *self, int key)
146{
147 newtFormAddHotKey(self->form, key);
148}
149
150void ui_browser__add_exit_keys(struct ui_browser *self, int keys[])
151{
152 int i = 0;
153
154 while (keys[i] && i < 64) {
155 ui_browser__add_exit_key(self, keys[i]);
156 ++i;
157 }
158}
159
149int ui_browser__show(struct ui_browser *self, const char *title, 160int ui_browser__show(struct ui_browser *self, const char *title,
150 const char *helpline, ...) 161 const char *helpline, ...)
151{ 162{
152 va_list ap; 163 va_list ap;
164 int keys[] = { NEWT_KEY_UP, NEWT_KEY_DOWN, NEWT_KEY_PGUP,
165 NEWT_KEY_PGDN, NEWT_KEY_HOME, NEWT_KEY_END, ' ',
166 NEWT_KEY_LEFT, NEWT_KEY_ESCAPE, 'q', CTRL('c'), 0 };
153 167
154 if (self->form != NULL) { 168 if (self->form != NULL)
155 newtFormDestroy(self->form); 169 newtFormDestroy(self->form);
156 newtPopWindow(); 170
157 }
158 ui_browser__refresh_dimensions(self); 171 ui_browser__refresh_dimensions(self);
159 newtCenteredWindow(self->width, self->height, title); 172 self->form = newtForm(NULL, NULL, 0);
160 self->form = newt_form__new();
161 if (self->form == NULL) 173 if (self->form == NULL)
162 return -1; 174 return -1;
163 175
164 self->sb = newtVerticalScrollbar(self->width, 0, self->height, 176 self->sb = newtVerticalScrollbar(self->width, 1, self->height,
165 HE_COLORSET_NORMAL, 177 HE_COLORSET_NORMAL,
166 HE_COLORSET_SELECTED); 178 HE_COLORSET_SELECTED);
167 if (self->sb == NULL) 179 if (self->sb == NULL)
168 return -1; 180 return -1;
169 181
170 newtFormAddHotKey(self->form, NEWT_KEY_UP); 182 SLsmg_gotorc(0, 0);
171 newtFormAddHotKey(self->form, NEWT_KEY_DOWN); 183 ui_browser__set_color(self, NEWT_COLORSET_ROOT);
172 newtFormAddHotKey(self->form, NEWT_KEY_PGUP); 184 slsmg_write_nstring(title, self->width);
173 newtFormAddHotKey(self->form, NEWT_KEY_PGDN); 185
174 newtFormAddHotKey(self->form, NEWT_KEY_HOME); 186 ui_browser__add_exit_keys(self, keys);
175 newtFormAddHotKey(self->form, NEWT_KEY_END);
176 newtFormAddHotKey(self->form, ' ');
177 newtFormAddComponent(self->form, self->sb); 187 newtFormAddComponent(self->form, self->sb);
178 188
179 va_start(ap, helpline); 189 va_start(ap, helpline);
@@ -185,7 +195,6 @@ int ui_browser__show(struct ui_browser *self, const char *title,
185void ui_browser__hide(struct ui_browser *self) 195void ui_browser__hide(struct ui_browser *self)
186{ 196{
187 newtFormDestroy(self->form); 197 newtFormDestroy(self->form);
188 newtPopWindow();
189 self->form = NULL; 198 self->form = NULL;
190 ui_helpline__pop(); 199 ui_helpline__pop();
191} 200}
@@ -196,28 +205,28 @@ int ui_browser__refresh(struct ui_browser *self)
196 205
197 newtScrollbarSet(self->sb, self->index, self->nr_entries - 1); 206 newtScrollbarSet(self->sb, self->index, self->nr_entries - 1);
198 row = self->refresh(self); 207 row = self->refresh(self);
199 SLsmg_set_color(HE_COLORSET_NORMAL); 208 ui_browser__set_color(self, HE_COLORSET_NORMAL);
200 SLsmg_fill_region(self->y + row, self->x, 209 SLsmg_fill_region(self->y + row, self->x,
201 self->height - row, self->width, ' '); 210 self->height - row, self->width, ' ');
202 211
203 return 0; 212 return 0;
204} 213}
205 214
206int ui_browser__run(struct ui_browser *self, struct newtExitStruct *es) 215int ui_browser__run(struct ui_browser *self)
207{ 216{
217 struct newtExitStruct es;
218
208 if (ui_browser__refresh(self) < 0) 219 if (ui_browser__refresh(self) < 0)
209 return -1; 220 return -1;
210 221
211 while (1) { 222 while (1) {
212 off_t offset; 223 off_t offset;
213 224
214 newtFormRun(self->form, es); 225 newtFormRun(self->form, &es);
215 226
216 if (es->reason != NEWT_EXIT_HOTKEY) 227 if (es.reason != NEWT_EXIT_HOTKEY)
217 break; 228 break;
218 if (is_exit_key(es->u.key)) 229 switch (es.u.key) {
219 return es->u.key;
220 switch (es->u.key) {
221 case NEWT_KEY_DOWN: 230 case NEWT_KEY_DOWN:
222 if (self->index == self->nr_entries - 1) 231 if (self->index == self->nr_entries - 1)
223 break; 232 break;
@@ -274,12 +283,12 @@ int ui_browser__run(struct ui_browser *self, struct newtExitStruct *es)
274 self->seek(self, -offset, SEEK_END); 283 self->seek(self, -offset, SEEK_END);
275 break; 284 break;
276 default: 285 default:
277 return es->u.key; 286 return es.u.key;
278 } 287 }
279 if (ui_browser__refresh(self) < 0) 288 if (ui_browser__refresh(self) < 0)
280 return -1; 289 return -1;
281 } 290 }
282 return 0; 291 return -1;
283} 292}
284 293
285unsigned int ui_browser__list_head_refresh(struct ui_browser *self) 294unsigned int ui_browser__list_head_refresh(struct ui_browser *self)
@@ -294,7 +303,7 @@ unsigned int ui_browser__list_head_refresh(struct ui_browser *self)
294 pos = self->top; 303 pos = self->top;
295 304
296 list_for_each_from(pos, head) { 305 list_for_each_from(pos, head) {
297 SLsmg_gotorc(self->y + row, self->x); 306 ui_browser__gotorc(self, row, 0);
298 self->write(self, pos, row); 307 self->write(self, pos, row);
299 if (++row == self->height) 308 if (++row == self->height)
300 break; 309 break;
diff --git a/tools/perf/util/ui/browser.h b/tools/perf/util/ui/browser.h
index 0b9f829214f7..0dc7e4da36f5 100644
--- a/tools/perf/util/ui/browser.h
+++ b/tools/perf/util/ui/browser.h
@@ -25,16 +25,21 @@ struct ui_browser {
25}; 25};
26 26
27 27
28int ui_browser__percent_color(double percent, bool current); 28void ui_browser__set_color(struct ui_browser *self, int color);
29void ui_browser__set_percent_color(struct ui_browser *self,
30 double percent, bool current);
29bool ui_browser__is_current_entry(struct ui_browser *self, unsigned row); 31bool ui_browser__is_current_entry(struct ui_browser *self, unsigned row);
30void ui_browser__refresh_dimensions(struct ui_browser *self); 32void ui_browser__refresh_dimensions(struct ui_browser *self);
31void ui_browser__reset_index(struct ui_browser *self); 33void ui_browser__reset_index(struct ui_browser *self);
32 34
35void ui_browser__gotorc(struct ui_browser *self, int y, int x);
36void ui_browser__add_exit_key(struct ui_browser *self, int key);
37void ui_browser__add_exit_keys(struct ui_browser *self, int keys[]);
33int ui_browser__show(struct ui_browser *self, const char *title, 38int ui_browser__show(struct ui_browser *self, const char *title,
34 const char *helpline, ...); 39 const char *helpline, ...);
35void ui_browser__hide(struct ui_browser *self); 40void ui_browser__hide(struct ui_browser *self);
36int ui_browser__refresh(struct ui_browser *self); 41int ui_browser__refresh(struct ui_browser *self);
37int ui_browser__run(struct ui_browser *self, struct newtExitStruct *es); 42int ui_browser__run(struct ui_browser *self);
38 43
39void ui_browser__rb_tree_seek(struct ui_browser *self, off_t offset, int whence); 44void ui_browser__rb_tree_seek(struct ui_browser *self, off_t offset, int whence);
40unsigned int ui_browser__rb_tree_refresh(struct ui_browser *self); 45unsigned int ui_browser__rb_tree_refresh(struct ui_browser *self);
diff --git a/tools/perf/util/ui/browsers/annotate.c b/tools/perf/util/ui/browsers/annotate.c
index a90273e63f4f..82b78f99251b 100644
--- a/tools/perf/util/ui/browsers/annotate.c
+++ b/tools/perf/util/ui/browsers/annotate.c
@@ -40,14 +40,12 @@ static void annotate_browser__write(struct ui_browser *self, void *entry, int ro
40 40
41 if (ol->offset != -1) { 41 if (ol->offset != -1) {
42 struct objdump_line_rb_node *olrb = objdump_line__rb(ol); 42 struct objdump_line_rb_node *olrb = objdump_line__rb(ol);
43 int color = ui_browser__percent_color(olrb->percent, current_entry); 43 ui_browser__set_percent_color(self, olrb->percent, current_entry);
44 SLsmg_set_color(color);
45 slsmg_printf(" %7.2f ", olrb->percent); 44 slsmg_printf(" %7.2f ", olrb->percent);
46 if (!current_entry) 45 if (!current_entry)
47 SLsmg_set_color(HE_COLORSET_CODE); 46 ui_browser__set_color(self, HE_COLORSET_CODE);
48 } else { 47 } else {
49 int color = ui_browser__percent_color(0, current_entry); 48 ui_browser__set_percent_color(self, 0, current_entry);
50 SLsmg_set_color(color);
51 slsmg_write_nstring(" ", 9); 49 slsmg_write_nstring(" ", 9);
52 } 50 }
53 51
@@ -135,32 +133,31 @@ static void annotate_browser__set_top(struct annotate_browser *self,
135 self->curr_hot = nd; 133 self->curr_hot = nd;
136} 134}
137 135
138static int annotate_browser__run(struct annotate_browser *self, 136static int annotate_browser__run(struct annotate_browser *self)
139 struct newtExitStruct *es)
140{ 137{
141 struct rb_node *nd; 138 struct rb_node *nd;
142 struct hist_entry *he = self->b.priv; 139 struct hist_entry *he = self->b.priv;
140 int key;
143 141
144 if (ui_browser__show(&self->b, he->ms.sym->name, 142 if (ui_browser__show(&self->b, he->ms.sym->name,
145 "<- or ESC: exit, TAB/shift+TAB: cycle thru samples") < 0) 143 "<-, -> or ESC: exit, TAB/shift+TAB: cycle thru samples") < 0)
146 return -1; 144 return -1;
147 145 /*
148 newtFormAddHotKey(self->b.form, NEWT_KEY_LEFT); 146 * To allow builtin-annotate to cycle thru multiple symbols by
149 newtFormAddHotKey(self->b.form, NEWT_KEY_RIGHT); 147 * examining the exit key for this function.
148 */
149 ui_browser__add_exit_key(&self->b, NEWT_KEY_RIGHT);
150 150
151 nd = self->curr_hot; 151 nd = self->curr_hot;
152 if (nd) { 152 if (nd) {
153 newtFormAddHotKey(self->b.form, NEWT_KEY_TAB); 153 int tabs[] = { NEWT_KEY_TAB, NEWT_KEY_UNTAB, 0 };
154 newtFormAddHotKey(self->b.form, NEWT_KEY_UNTAB); 154 ui_browser__add_exit_keys(&self->b, tabs);
155 } 155 }
156 156
157 while (1) { 157 while (1) {
158 ui_browser__run(&self->b, es); 158 key = ui_browser__run(&self->b);
159
160 if (es->reason != NEWT_EXIT_HOTKEY)
161 break;
162 159
163 switch (es->u.key) { 160 switch (key) {
164 case NEWT_KEY_TAB: 161 case NEWT_KEY_TAB:
165 nd = rb_prev(nd); 162 nd = rb_prev(nd);
166 if (nd == NULL) 163 if (nd == NULL)
@@ -179,12 +176,11 @@ static int annotate_browser__run(struct annotate_browser *self,
179 } 176 }
180out: 177out:
181 ui_browser__hide(&self->b); 178 ui_browser__hide(&self->b);
182 return es->u.key; 179 return key;
183} 180}
184 181
185int hist_entry__tui_annotate(struct hist_entry *self) 182int hist_entry__tui_annotate(struct hist_entry *self)
186{ 183{
187 struct newtExitStruct es;
188 struct objdump_line *pos, *n; 184 struct objdump_line *pos, *n;
189 struct objdump_line_rb_node *rbpos; 185 struct objdump_line_rb_node *rbpos;
190 LIST_HEAD(head); 186 LIST_HEAD(head);
@@ -232,7 +228,7 @@ int hist_entry__tui_annotate(struct hist_entry *self)
232 annotate_browser__set_top(&browser, browser.curr_hot); 228 annotate_browser__set_top(&browser, browser.curr_hot);
233 229
234 browser.b.width += 18; /* Percentage */ 230 browser.b.width += 18; /* Percentage */
235 ret = annotate_browser__run(&browser, &es); 231 ret = annotate_browser__run(&browser);
236 list_for_each_entry_safe(pos, n, &head, node) { 232 list_for_each_entry_safe(pos, n, &head, node) {
237 list_del(&pos->node); 233 list_del(&pos->node);
238 objdump_line__free(pos); 234 objdump_line__free(pos);
diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c
index dafdf6775d77..ebda8c3fde9e 100644
--- a/tools/perf/util/ui/browsers/hists.c
+++ b/tools/perf/util/ui/browsers/hists.c
@@ -58,6 +58,11 @@ static char callchain_list__folded(const struct callchain_list *self)
58 return map_symbol__folded(&self->ms); 58 return map_symbol__folded(&self->ms);
59} 59}
60 60
61static void map_symbol__set_folding(struct map_symbol *self, bool unfold)
62{
63 self->unfolded = unfold ? self->has_children : false;
64}
65
61static int callchain_node__count_rows_rb_tree(struct callchain_node *self) 66static int callchain_node__count_rows_rb_tree(struct callchain_node *self)
62{ 67{
63 int n = 0; 68 int n = 0;
@@ -129,16 +134,16 @@ static void callchain_node__init_have_children_rb_tree(struct callchain_node *se
129 for (nd = rb_first(&self->rb_root); nd; nd = rb_next(nd)) { 134 for (nd = rb_first(&self->rb_root); nd; nd = rb_next(nd)) {
130 struct callchain_node *child = rb_entry(nd, struct callchain_node, rb_node); 135 struct callchain_node *child = rb_entry(nd, struct callchain_node, rb_node);
131 struct callchain_list *chain; 136 struct callchain_list *chain;
132 int first = true; 137 bool first = true;
133 138
134 list_for_each_entry(chain, &child->val, list) { 139 list_for_each_entry(chain, &child->val, list) {
135 if (first) { 140 if (first) {
136 first = false; 141 first = false;
137 chain->ms.has_children = chain->list.next != &child->val || 142 chain->ms.has_children = chain->list.next != &child->val ||
138 rb_first(&child->rb_root) != NULL; 143 !RB_EMPTY_ROOT(&child->rb_root);
139 } else 144 } else
140 chain->ms.has_children = chain->list.next == &child->val && 145 chain->ms.has_children = chain->list.next == &child->val &&
141 rb_first(&child->rb_root) != NULL; 146 !RB_EMPTY_ROOT(&child->rb_root);
142 } 147 }
143 148
144 callchain_node__init_have_children_rb_tree(child); 149 callchain_node__init_have_children_rb_tree(child);
@@ -150,7 +155,7 @@ static void callchain_node__init_have_children(struct callchain_node *self)
150 struct callchain_list *chain; 155 struct callchain_list *chain;
151 156
152 list_for_each_entry(chain, &self->val, list) 157 list_for_each_entry(chain, &self->val, list)
153 chain->ms.has_children = rb_first(&self->rb_root) != NULL; 158 chain->ms.has_children = !RB_EMPTY_ROOT(&self->rb_root);
154 159
155 callchain_node__init_have_children_rb_tree(self); 160 callchain_node__init_have_children_rb_tree(self);
156} 161}
@@ -168,6 +173,7 @@ static void callchain__init_have_children(struct rb_root *self)
168static void hist_entry__init_have_children(struct hist_entry *self) 173static void hist_entry__init_have_children(struct hist_entry *self)
169{ 174{
170 if (!self->init_have_children) { 175 if (!self->init_have_children) {
176 self->ms.has_children = !RB_EMPTY_ROOT(&self->sorted_chain);
171 callchain__init_have_children(&self->sorted_chain); 177 callchain__init_have_children(&self->sorted_chain);
172 self->init_have_children = true; 178 self->init_have_children = true;
173 } 179 }
@@ -195,43 +201,114 @@ static bool hist_browser__toggle_fold(struct hist_browser *self)
195 return false; 201 return false;
196} 202}
197 203
198static int hist_browser__run(struct hist_browser *self, const char *title, 204static int callchain_node__set_folding_rb_tree(struct callchain_node *self, bool unfold)
199 struct newtExitStruct *es) 205{
206 int n = 0;
207 struct rb_node *nd;
208
209 for (nd = rb_first(&self->rb_root); nd; nd = rb_next(nd)) {
210 struct callchain_node *child = rb_entry(nd, struct callchain_node, rb_node);
211 struct callchain_list *chain;
212 bool has_children = false;
213
214 list_for_each_entry(chain, &child->val, list) {
215 ++n;
216 map_symbol__set_folding(&chain->ms, unfold);
217 has_children = chain->ms.has_children;
218 }
219
220 if (has_children)
221 n += callchain_node__set_folding_rb_tree(child, unfold);
222 }
223
224 return n;
225}
226
227static int callchain_node__set_folding(struct callchain_node *node, bool unfold)
228{
229 struct callchain_list *chain;
230 bool has_children = false;
231 int n = 0;
232
233 list_for_each_entry(chain, &node->val, list) {
234 ++n;
235 map_symbol__set_folding(&chain->ms, unfold);
236 has_children = chain->ms.has_children;
237 }
238
239 if (has_children)
240 n += callchain_node__set_folding_rb_tree(node, unfold);
241
242 return n;
243}
244
245static int callchain__set_folding(struct rb_root *chain, bool unfold)
246{
247 struct rb_node *nd;
248 int n = 0;
249
250 for (nd = rb_first(chain); nd; nd = rb_next(nd)) {
251 struct callchain_node *node = rb_entry(nd, struct callchain_node, rb_node);
252 n += callchain_node__set_folding(node, unfold);
253 }
254
255 return n;
256}
257
258static void hist_entry__set_folding(struct hist_entry *self, bool unfold)
259{
260 hist_entry__init_have_children(self);
261 map_symbol__set_folding(&self->ms, unfold);
262
263 if (self->ms.has_children) {
264 int n = callchain__set_folding(&self->sorted_chain, unfold);
265 self->nr_rows = unfold ? n : 0;
266 } else
267 self->nr_rows = 0;
268}
269
270static void hists__set_folding(struct hists *self, bool unfold)
271{
272 struct rb_node *nd;
273
274 self->nr_entries = 0;
275
276 for (nd = rb_first(&self->entries); nd; nd = rb_next(nd)) {
277 struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
278 hist_entry__set_folding(he, unfold);
279 self->nr_entries += 1 + he->nr_rows;
280 }
281}
282
283static void hist_browser__set_folding(struct hist_browser *self, bool unfold)
284{
285 hists__set_folding(self->hists, unfold);
286 self->b.nr_entries = self->hists->nr_entries;
287 /* Go to the start, we may be way after valid entries after a collapse */
288 ui_browser__reset_index(&self->b);
289}
290
291static int hist_browser__run(struct hist_browser *self, const char *title)
200{ 292{
201 char str[256], unit; 293 int key;
202 unsigned long nr_events = self->hists->stats.nr_events[PERF_RECORD_SAMPLE]; 294 int exit_keys[] = { 'a', '?', 'h', 'C', 'd', 'D', 'E', 't',
295 NEWT_KEY_ENTER, NEWT_KEY_RIGHT, NEWT_KEY_LEFT, 0, };
203 296
204 self->b.entries = &self->hists->entries; 297 self->b.entries = &self->hists->entries;
205 self->b.nr_entries = self->hists->nr_entries; 298 self->b.nr_entries = self->hists->nr_entries;
206 299
207 hist_browser__refresh_dimensions(self); 300 hist_browser__refresh_dimensions(self);
208 301
209 nr_events = convert_unit(nr_events, &unit);
210 snprintf(str, sizeof(str), "Events: %lu%c ",
211 nr_events, unit);
212 newtDrawRootText(0, 0, str);
213
214 if (ui_browser__show(&self->b, title, 302 if (ui_browser__show(&self->b, title,
215 "Press '?' for help on key bindings") < 0) 303 "Press '?' for help on key bindings") < 0)
216 return -1; 304 return -1;
217 305
218 newtFormAddHotKey(self->b.form, 'a'); 306 ui_browser__add_exit_keys(&self->b, exit_keys);
219 newtFormAddHotKey(self->b.form, '?');
220 newtFormAddHotKey(self->b.form, 'h');
221 newtFormAddHotKey(self->b.form, 'd');
222 newtFormAddHotKey(self->b.form, 'D');
223 newtFormAddHotKey(self->b.form, 't');
224
225 newtFormAddHotKey(self->b.form, NEWT_KEY_LEFT);
226 newtFormAddHotKey(self->b.form, NEWT_KEY_RIGHT);
227 newtFormAddHotKey(self->b.form, NEWT_KEY_ENTER);
228 307
229 while (1) { 308 while (1) {
230 ui_browser__run(&self->b, es); 309 key = ui_browser__run(&self->b);
231 310
232 if (es->reason != NEWT_EXIT_HOTKEY) 311 switch (key) {
233 break;
234 switch (es->u.key) {
235 case 'D': { /* Debug */ 312 case 'D': { /* Debug */
236 static int seq; 313 static int seq;
237 struct hist_entry *h = rb_entry(self->b.top, 314 struct hist_entry *h = rb_entry(self->b.top,
@@ -245,18 +322,26 @@ static int hist_browser__run(struct hist_browser *self, const char *title,
245 self->b.top_idx, 322 self->b.top_idx,
246 h->row_offset, h->nr_rows); 323 h->row_offset, h->nr_rows);
247 } 324 }
248 continue; 325 break;
326 case 'C':
327 /* Collapse the whole world. */
328 hist_browser__set_folding(self, false);
329 break;
330 case 'E':
331 /* Expand the whole world. */
332 hist_browser__set_folding(self, true);
333 break;
249 case NEWT_KEY_ENTER: 334 case NEWT_KEY_ENTER:
250 if (hist_browser__toggle_fold(self)) 335 if (hist_browser__toggle_fold(self))
251 break; 336 break;
252 /* fall thru */ 337 /* fall thru */
253 default: 338 default:
254 return 0; 339 goto out;
255 } 340 }
256 } 341 }
257 342out:
258 ui_browser__hide(&self->b); 343 ui_browser__hide(&self->b);
259 return 0; 344 return key;
260} 345}
261 346
262static char *callchain_list__sym_name(struct callchain_list *self, 347static char *callchain_list__sym_name(struct callchain_list *self,
@@ -306,15 +391,10 @@ static int hist_browser__show_callchain_node_rb_tree(struct hist_browser *self,
306 int color; 391 int color;
307 bool was_first = first; 392 bool was_first = first;
308 393
309 if (first) { 394 if (first)
310 first = false; 395 first = false;
311 chain->ms.has_children = chain->list.next != &child->val || 396 else
312 rb_first(&child->rb_root) != NULL;
313 } else {
314 extra_offset = LEVEL_OFFSET_STEP; 397 extra_offset = LEVEL_OFFSET_STEP;
315 chain->ms.has_children = chain->list.next == &child->val &&
316 rb_first(&child->rb_root) != NULL;
317 }
318 398
319 folded_sign = callchain_list__folded(chain); 399 folded_sign = callchain_list__folded(chain);
320 if (*row_offset != 0) { 400 if (*row_offset != 0) {
@@ -341,8 +421,8 @@ static int hist_browser__show_callchain_node_rb_tree(struct hist_browser *self,
341 *is_current_entry = true; 421 *is_current_entry = true;
342 } 422 }
343 423
344 SLsmg_set_color(color); 424 ui_browser__set_color(&self->b, color);
345 SLsmg_gotorc(self->b.y + row, self->b.x); 425 ui_browser__gotorc(&self->b, row, 0);
346 slsmg_write_nstring(" ", offset + extra_offset); 426 slsmg_write_nstring(" ", offset + extra_offset);
347 slsmg_printf("%c ", folded_sign); 427 slsmg_printf("%c ", folded_sign);
348 slsmg_write_nstring(str, width); 428 slsmg_write_nstring(str, width);
@@ -384,12 +464,7 @@ static int hist_browser__show_callchain_node(struct hist_browser *self,
384 list_for_each_entry(chain, &node->val, list) { 464 list_for_each_entry(chain, &node->val, list) {
385 char ipstr[BITS_PER_LONG / 4 + 1], *s; 465 char ipstr[BITS_PER_LONG / 4 + 1], *s;
386 int color; 466 int color;
387 /* 467
388 * FIXME: This should be moved to somewhere else,
389 * probably when the callchain is created, so as not to
390 * traverse it all over again
391 */
392 chain->ms.has_children = rb_first(&node->rb_root) != NULL;
393 folded_sign = callchain_list__folded(chain); 468 folded_sign = callchain_list__folded(chain);
394 469
395 if (*row_offset != 0) { 470 if (*row_offset != 0) {
@@ -405,8 +480,8 @@ static int hist_browser__show_callchain_node(struct hist_browser *self,
405 } 480 }
406 481
407 s = callchain_list__sym_name(chain, ipstr, sizeof(ipstr)); 482 s = callchain_list__sym_name(chain, ipstr, sizeof(ipstr));
408 SLsmg_gotorc(self->b.y + row, self->b.x); 483 ui_browser__gotorc(&self->b, row, 0);
409 SLsmg_set_color(color); 484 ui_browser__set_color(&self->b, color);
410 slsmg_write_nstring(" ", offset); 485 slsmg_write_nstring(" ", offset);
411 slsmg_printf("%c ", folded_sign); 486 slsmg_printf("%c ", folded_sign);
412 slsmg_write_nstring(s, width - 2); 487 slsmg_write_nstring(s, width - 2);
@@ -465,7 +540,7 @@ static int hist_browser__show_entry(struct hist_browser *self,
465 } 540 }
466 541
467 if (symbol_conf.use_callchain) { 542 if (symbol_conf.use_callchain) {
468 entry->ms.has_children = !RB_EMPTY_ROOT(&entry->sorted_chain); 543 hist_entry__init_have_children(entry);
469 folded_sign = hist_entry__folded(entry); 544 folded_sign = hist_entry__folded(entry);
470 } 545 }
471 546
@@ -484,8 +559,8 @@ static int hist_browser__show_entry(struct hist_browser *self,
484 color = HE_COLORSET_NORMAL; 559 color = HE_COLORSET_NORMAL;
485 } 560 }
486 561
487 SLsmg_set_color(color); 562 ui_browser__set_color(&self->b, color);
488 SLsmg_gotorc(self->b.y + row, self->b.x); 563 ui_browser__gotorc(&self->b, row, 0);
489 if (symbol_conf.use_callchain) { 564 if (symbol_conf.use_callchain) {
490 slsmg_printf("%c ", folded_sign); 565 slsmg_printf("%c ", folded_sign);
491 width -= 2; 566 width -= 2;
@@ -687,8 +762,6 @@ static struct hist_browser *hist_browser__new(struct hists *hists)
687 762
688static void hist_browser__delete(struct hist_browser *self) 763static void hist_browser__delete(struct hist_browser *self)
689{ 764{
690 newtFormDestroy(self->b.form);
691 newtPopWindow();
692 free(self); 765 free(self);
693} 766}
694 767
@@ -702,21 +775,26 @@ static struct thread *hist_browser__selected_thread(struct hist_browser *self)
702 return self->he_selection->thread; 775 return self->he_selection->thread;
703} 776}
704 777
705static int hist_browser__title(char *bf, size_t size, const char *ev_name, 778static int hists__browser_title(struct hists *self, char *bf, size_t size,
706 const struct dso *dso, const struct thread *thread) 779 const char *ev_name, const struct dso *dso,
780 const struct thread *thread)
707{ 781{
708 int printed = 0; 782 char unit;
783 int printed;
784 unsigned long nr_events = self->stats.nr_events[PERF_RECORD_SAMPLE];
785
786 nr_events = convert_unit(nr_events, &unit);
787 printed = snprintf(bf, size, "Events: %lu%c %s", nr_events, unit, ev_name);
709 788
710 if (thread) 789 if (thread)
711 printed += snprintf(bf + printed, size - printed, 790 printed += snprintf(bf + printed, size - printed,
712 "Thread: %s(%d)", 791 ", Thread: %s(%d)",
713 (thread->comm_set ? thread->comm : ""), 792 (thread->comm_set ? thread->comm : ""),
714 thread->pid); 793 thread->pid);
715 if (dso) 794 if (dso)
716 printed += snprintf(bf + printed, size - printed, 795 printed += snprintf(bf + printed, size - printed,
717 "%sDSO: %s", thread ? " " : "", 796 ", DSO: %s", dso->short_name);
718 dso->short_name); 797 return printed;
719 return printed ?: snprintf(bf, size, "Event: %s", ev_name);
720} 798}
721 799
722int hists__browse(struct hists *self, const char *helpline, const char *ev_name) 800int hists__browse(struct hists *self, const char *helpline, const char *ev_name)
@@ -725,7 +803,6 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name)
725 struct pstack *fstack; 803 struct pstack *fstack;
726 const struct thread *thread_filter = NULL; 804 const struct thread *thread_filter = NULL;
727 const struct dso *dso_filter = NULL; 805 const struct dso *dso_filter = NULL;
728 struct newtExitStruct es;
729 char msg[160]; 806 char msg[160];
730 int key = -1; 807 int key = -1;
731 808
@@ -738,9 +815,8 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name)
738 815
739 ui_helpline__push(helpline); 816 ui_helpline__push(helpline);
740 817
741 hist_browser__title(msg, sizeof(msg), ev_name, 818 hists__browser_title(self, msg, sizeof(msg), ev_name,
742 dso_filter, thread_filter); 819 dso_filter, thread_filter);
743
744 while (1) { 820 while (1) {
745 const struct thread *thread; 821 const struct thread *thread;
746 const struct dso *dso; 822 const struct dso *dso;
@@ -749,70 +825,63 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name)
749 annotate = -2, zoom_dso = -2, zoom_thread = -2, 825 annotate = -2, zoom_dso = -2, zoom_thread = -2,
750 browse_map = -2; 826 browse_map = -2;
751 827
752 if (hist_browser__run(browser, msg, &es)) 828 key = hist_browser__run(browser, msg);
753 break;
754 829
755 thread = hist_browser__selected_thread(browser); 830 thread = hist_browser__selected_thread(browser);
756 dso = browser->selection->map ? browser->selection->map->dso : NULL; 831 dso = browser->selection->map ? browser->selection->map->dso : NULL;
757 832
758 if (es.reason == NEWT_EXIT_HOTKEY) { 833 switch (key) {
759 key = es.u.key; 834 case NEWT_KEY_TAB:
760 835 case NEWT_KEY_UNTAB:
761 switch (key) { 836 /*
762 case NEWT_KEY_F1: 837 * Exit the browser, let hists__browser_tree
763 goto do_help; 838 * go to the next or previous
764 case NEWT_KEY_TAB: 839 */
765 case NEWT_KEY_UNTAB: 840 goto out_free_stack;
766 /* 841 case 'a':
767 * Exit the browser, let hists__browser_tree 842 if (browser->selection->map == NULL &&
768 * go to the next or previous 843 browser->selection->map->dso->annotate_warned)
769 */
770 goto out_free_stack;
771 default:;
772 }
773
774 switch (key) {
775 case 'a':
776 if (browser->selection->map == NULL &&
777 browser->selection->map->dso->annotate_warned)
778 continue;
779 goto do_annotate;
780 case 'd':
781 goto zoom_dso;
782 case 't':
783 goto zoom_thread;
784 case 'h':
785 case '?':
786do_help:
787 ui__help_window("-> Zoom into DSO/Threads & Annotate current symbol\n"
788 "<- Zoom out\n"
789 "a Annotate current symbol\n"
790 "h/?/F1 Show this window\n"
791 "d Zoom into current DSO\n"
792 "t Zoom into current Thread\n"
793 "q/CTRL+C Exit browser");
794 continue; 844 continue;
795 default:; 845 goto do_annotate;
796 } 846 case 'd':
797 if (is_exit_key(key)) { 847 goto zoom_dso;
798 if (key == NEWT_KEY_ESCAPE && 848 case 't':
799 !ui__dialog_yesno("Do you really want to exit?")) 849 goto zoom_thread;
800 continue; 850 case NEWT_KEY_F1:
801 break; 851 case 'h':
802 } 852 case '?':
803 853 ui__help_window("-> Zoom into DSO/Threads & Annotate current symbol\n"
804 if (es.u.key == NEWT_KEY_LEFT) { 854 "<- Zoom out\n"
805 const void *top; 855 "a Annotate current symbol\n"
856 "h/?/F1 Show this window\n"
857 "C Collapse all callchains\n"
858 "E Expand all callchains\n"
859 "d Zoom into current DSO\n"
860 "t Zoom into current Thread\n"
861 "q/CTRL+C Exit browser");
862 continue;
863 case NEWT_KEY_ENTER:
864 case NEWT_KEY_RIGHT:
865 /* menu */
866 break;
867 case NEWT_KEY_LEFT: {
868 const void *top;
806 869
807 if (pstack__empty(fstack)) 870 if (pstack__empty(fstack))
808 continue;
809 top = pstack__pop(fstack);
810 if (top == &dso_filter)
811 goto zoom_out_dso;
812 if (top == &thread_filter)
813 goto zoom_out_thread;
814 continue; 871 continue;
815 } 872 top = pstack__pop(fstack);
873 if (top == &dso_filter)
874 goto zoom_out_dso;
875 if (top == &thread_filter)
876 goto zoom_out_thread;
877 continue;
878 }
879 case NEWT_KEY_ESCAPE:
880 if (!ui__dialog_yesno("Do you really want to exit?"))
881 continue;
882 /* Fall thru */
883 default:
884 goto out_free_stack;
816 } 885 }
817 886
818 if (browser->selection->sym != NULL && 887 if (browser->selection->sym != NULL &&
@@ -885,8 +954,8 @@ zoom_out_dso:
885 pstack__push(fstack, &dso_filter); 954 pstack__push(fstack, &dso_filter);
886 } 955 }
887 hists__filter_by_dso(self, dso_filter); 956 hists__filter_by_dso(self, dso_filter);
888 hist_browser__title(msg, sizeof(msg), ev_name, 957 hists__browser_title(self, msg, sizeof(msg), ev_name,
889 dso_filter, thread_filter); 958 dso_filter, thread_filter);
890 hist_browser__reset(browser); 959 hist_browser__reset(browser);
891 } else if (choice == zoom_thread) { 960 } else if (choice == zoom_thread) {
892zoom_thread: 961zoom_thread:
@@ -903,8 +972,8 @@ zoom_out_thread:
903 pstack__push(fstack, &thread_filter); 972 pstack__push(fstack, &thread_filter);
904 } 973 }
905 hists__filter_by_thread(self, thread_filter); 974 hists__filter_by_thread(self, thread_filter);
906 hist_browser__title(msg, sizeof(msg), ev_name, 975 hists__browser_title(self, msg, sizeof(msg), ev_name,
907 dso_filter, thread_filter); 976 dso_filter, thread_filter);
908 hist_browser__reset(browser); 977 hist_browser__reset(browser);
909 } 978 }
910 } 979 }
@@ -925,10 +994,6 @@ int hists__tui_browse_tree(struct rb_root *self, const char *help)
925 const char *ev_name = __event_name(hists->type, hists->config); 994 const char *ev_name = __event_name(hists->type, hists->config);
926 995
927 key = hists__browse(hists, help, ev_name); 996 key = hists__browse(hists, help, ev_name);
928
929 if (is_exit_key(key))
930 break;
931
932 switch (key) { 997 switch (key) {
933 case NEWT_KEY_TAB: 998 case NEWT_KEY_TAB:
934 next = rb_next(nd); 999 next = rb_next(nd);
@@ -940,7 +1005,7 @@ int hists__tui_browse_tree(struct rb_root *self, const char *help)
940 continue; 1005 continue;
941 nd = rb_prev(nd); 1006 nd = rb_prev(nd);
942 default: 1007 default:
943 break; 1008 return key;
944 } 1009 }
945 } 1010 }
946 1011
diff --git a/tools/perf/util/ui/browsers/map.c b/tools/perf/util/ui/browsers/map.c
index 142b825b42bf..e35437dfa5b4 100644
--- a/tools/perf/util/ui/browsers/map.c
+++ b/tools/perf/util/ui/browsers/map.c
@@ -1,6 +1,5 @@
1#include "../libslang.h" 1#include "../libslang.h"
2#include <elf.h> 2#include <elf.h>
3#include <newt.h>
4#include <sys/ttydefaults.h> 3#include <sys/ttydefaults.h>
5#include <ctype.h> 4#include <ctype.h>
6#include <string.h> 5#include <string.h>
@@ -47,7 +46,6 @@ out_free_form:
47struct map_browser { 46struct map_browser {
48 struct ui_browser b; 47 struct ui_browser b;
49 struct map *map; 48 struct map *map;
50 u16 namelen;
51 u8 addrlen; 49 u8 addrlen;
52}; 50};
53 51
@@ -56,14 +54,16 @@ static void map_browser__write(struct ui_browser *self, void *nd, int row)
56 struct symbol *sym = rb_entry(nd, struct symbol, rb_node); 54 struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
57 struct map_browser *mb = container_of(self, struct map_browser, b); 55 struct map_browser *mb = container_of(self, struct map_browser, b);
58 bool current_entry = ui_browser__is_current_entry(self, row); 56 bool current_entry = ui_browser__is_current_entry(self, row);
59 int color = ui_browser__percent_color(0, current_entry); 57 int width;
60 58
61 SLsmg_set_color(color); 59 ui_browser__set_percent_color(self, 0, current_entry);
62 slsmg_printf("%*llx %*llx %c ", 60 slsmg_printf("%*llx %*llx %c ",
63 mb->addrlen, sym->start, mb->addrlen, sym->end, 61 mb->addrlen, sym->start, mb->addrlen, sym->end,
64 sym->binding == STB_GLOBAL ? 'g' : 62 sym->binding == STB_GLOBAL ? 'g' :
65 sym->binding == STB_LOCAL ? 'l' : 'w'); 63 sym->binding == STB_LOCAL ? 'l' : 'w');
66 slsmg_write_nstring(sym->name, mb->namelen); 64 width = self->width - ((mb->addrlen * 2) + 4);
65 if (width > 0)
66 slsmg_write_nstring(sym->name, width);
67} 67}
68 68
69/* FIXME uber-kludgy, see comment on cmd_report... */ 69/* FIXME uber-kludgy, see comment on cmd_report... */
@@ -98,31 +98,29 @@ static int map_browser__search(struct map_browser *self)
98 return 0; 98 return 0;
99} 99}
100 100
101static int map_browser__run(struct map_browser *self, struct newtExitStruct *es) 101static int map_browser__run(struct map_browser *self)
102{ 102{
103 int key;
104
103 if (ui_browser__show(&self->b, self->map->dso->long_name, 105 if (ui_browser__show(&self->b, self->map->dso->long_name,
104 "Press <- or ESC to exit, %s / to search", 106 "Press <- or ESC to exit, %s / to search",
105 verbose ? "" : "restart with -v to use") < 0) 107 verbose ? "" : "restart with -v to use") < 0)
106 return -1; 108 return -1;
107 109
108 newtFormAddHotKey(self->b.form, NEWT_KEY_LEFT);
109 newtFormAddHotKey(self->b.form, NEWT_KEY_ENTER);
110 if (verbose) 110 if (verbose)
111 newtFormAddHotKey(self->b.form, '/'); 111 ui_browser__add_exit_key(&self->b, '/');
112 112
113 while (1) { 113 while (1) {
114 ui_browser__run(&self->b, es); 114 key = ui_browser__run(&self->b);
115 115
116 if (es->reason != NEWT_EXIT_HOTKEY) 116 if (verbose && key == '/')
117 break;
118 if (verbose && es->u.key == '/')
119 map_browser__search(self); 117 map_browser__search(self);
120 else 118 else
121 break; 119 break;
122 } 120 }
123 121
124 ui_browser__hide(&self->b); 122 ui_browser__hide(&self->b);
125 return 0; 123 return key;
126} 124}
127 125
128int map__browse(struct map *self) 126int map__browse(struct map *self)
@@ -136,7 +134,6 @@ int map__browse(struct map *self)
136 }, 134 },
137 .map = self, 135 .map = self,
138 }; 136 };
139 struct newtExitStruct es;
140 struct rb_node *nd; 137 struct rb_node *nd;
141 char tmp[BITS_PER_LONG / 4]; 138 char tmp[BITS_PER_LONG / 4];
142 u64 maxaddr = 0; 139 u64 maxaddr = 0;
@@ -144,8 +141,6 @@ int map__browse(struct map *self)
144 for (nd = rb_first(mb.b.entries); nd; nd = rb_next(nd)) { 141 for (nd = rb_first(mb.b.entries); nd; nd = rb_next(nd)) {
145 struct symbol *pos = rb_entry(nd, struct symbol, rb_node); 142 struct symbol *pos = rb_entry(nd, struct symbol, rb_node);
146 143
147 if (mb.namelen < pos->namelen)
148 mb.namelen = pos->namelen;
149 if (maxaddr < pos->end) 144 if (maxaddr < pos->end)
150 maxaddr = pos->end; 145 maxaddr = pos->end;
151 if (verbose) { 146 if (verbose) {
@@ -156,6 +151,5 @@ int map__browse(struct map *self)
156 } 151 }
157 152
158 mb.addrlen = snprintf(tmp, sizeof(tmp), "%llx", maxaddr); 153 mb.addrlen = snprintf(tmp, sizeof(tmp), "%llx", maxaddr);
159 mb.b.width += mb.addrlen * 2 + 4 + mb.namelen; 154 return map_browser__run(&mb);
160 return map_browser__run(&mb, &es);
161} 155}
diff --git a/tools/perf/util/ui/util.c b/tools/perf/util/ui/util.c
index 04600e26ceea..9706d9d40279 100644
--- a/tools/perf/util/ui/util.c
+++ b/tools/perf/util/ui/util.c
@@ -11,8 +11,6 @@
11#include "helpline.h" 11#include "helpline.h"
12#include "util.h" 12#include "util.h"
13 13
14newtComponent newt_form__new(void);
15
16static void newt_form__set_exit_keys(newtComponent self) 14static void newt_form__set_exit_keys(newtComponent self)
17{ 15{
18 newtFormAddHotKey(self, NEWT_KEY_LEFT); 16 newtFormAddHotKey(self, NEWT_KEY_LEFT);
@@ -22,7 +20,7 @@ static void newt_form__set_exit_keys(newtComponent self)
22 newtFormAddHotKey(self, CTRL('c')); 20 newtFormAddHotKey(self, CTRL('c'));
23} 21}
24 22
25newtComponent newt_form__new(void) 23static newtComponent newt_form__new(void)
26{ 24{
27 newtComponent self = newtForm(NULL, NULL, 0); 25 newtComponent self = newtForm(NULL, NULL, 0);
28 if (self) 26 if (self)
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index f380fed74359..7562707ddd1c 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -266,19 +266,6 @@ bool strglobmatch(const char *str, const char *pat);
266bool strlazymatch(const char *str, const char *pat); 266bool strlazymatch(const char *str, const char *pat);
267unsigned long convert_unit(unsigned long value, char *unit); 267unsigned long convert_unit(unsigned long value, char *unit);
268 268
269#ifndef ESC
270#define ESC 27
271#endif
272
273static inline bool is_exit_key(int key)
274{
275 char up;
276 if (key == CTRL('c') || key == ESC)
277 return true;
278 up = toupper(key);
279 return up == 'Q';
280}
281
282#define _STR(x) #x 269#define _STR(x) #x
283#define STR(x) _STR(x) 270#define STR(x) _STR(x)
284 271