aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2010-06-11 07:35:08 -0400
committerIngo Molnar <mingo@elte.hu>2010-09-09 14:46:28 -0400
commitb0a873ebbf87bf38bf70b5e39a7cadc96099fa13 (patch)
tree63ab672b847aed295f99b9b2a3bbcfd5d3d35bd9
parent51b0fe39549a04858001922919ab355dee9bdfcf (diff)
perf: Register PMU implementations
Simple registration interface for struct pmu, this provides the infrastructure for removing all the weak functions. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: paulus <paulus@samba.org> Cc: stephane eranian <eranian@googlemail.com> Cc: Robert Richter <robert.richter@amd.com> Cc: Will Deacon <will.deacon@arm.com> Cc: Paul Mundt <lethal@linux-sh.org> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Cyrill Gorcunov <gorcunov@gmail.com> Cc: Lin Ming <ming.m.lin@intel.com> Cc: Yanmin <yanmin_zhang@linux.intel.com> Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com> Cc: David Miller <davem@davemloft.net> Cc: Michael Cree <mcree@orcon.net.nz> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/alpha/kernel/perf_event.c37
-rw-r--r--arch/arm/kernel/perf_event.c38
-rw-r--r--arch/powerpc/kernel/perf_event.c46
-rw-r--r--arch/powerpc/kernel/perf_event_fsl_emb.c37
-rw-r--r--arch/sh/kernel/perf_event.c35
-rw-r--r--arch/sparc/kernel/perf_event.c29
-rw-r--r--arch/x86/kernel/cpu/perf_event.c45
-rw-r--r--include/linux/perf_event.h10
-rw-r--r--kernel/hw_breakpoint.c35
-rw-r--r--kernel/perf_event.c588
10 files changed, 488 insertions, 412 deletions
diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c
index 56fa4159038..19660b5c298 100644
--- a/arch/alpha/kernel/perf_event.c
+++ b/arch/alpha/kernel/perf_event.c
@@ -642,34 +642,39 @@ static int __hw_perf_event_init(struct perf_event *event)
642 return 0; 642 return 0;
643} 643}
644 644
645static struct pmu pmu = {
646 .enable = alpha_pmu_enable,
647 .disable = alpha_pmu_disable,
648 .read = alpha_pmu_read,
649 .unthrottle = alpha_pmu_unthrottle,
650};
651
652
653/* 645/*
654 * Main entry point to initialise a HW performance event. 646 * Main entry point to initialise a HW performance event.
655 */ 647 */
656struct pmu *hw_perf_event_init(struct perf_event *event) 648static int alpha_pmu_event_init(struct perf_event *event)
657{ 649{
658 int err; 650 int err;
659 651
652 switch (event->attr.type) {
653 case PERF_TYPE_RAW:
654 case PERF_TYPE_HARDWARE:
655 case PERF_TYPE_HW_CACHE:
656 break;
657
658 default:
659 return -ENOENT;
660 }
661
660 if (!alpha_pmu) 662 if (!alpha_pmu)
661 return ERR_PTR(-ENODEV); 663 return -ENODEV;
662 664
663 /* Do the real initialisation work. */ 665 /* Do the real initialisation work. */
664 err = __hw_perf_event_init(event); 666 err = __hw_perf_event_init(event);
665 667
666 if (err) 668 return err;
667 return ERR_PTR(err);
668
669 return &pmu;
670} 669}
671 670
672 671static struct pmu pmu = {
672 .event_init = alpha_pmu_event_init,
673 .enable = alpha_pmu_enable,
674 .disable = alpha_pmu_disable,
675 .read = alpha_pmu_read,
676 .unthrottle = alpha_pmu_unthrottle,
677};
673 678
674/* 679/*
675 * Main entry point - enable HW performance counters. 680 * Main entry point - enable HW performance counters.
@@ -838,5 +843,7 @@ void __init init_hw_perf_events(void)
838 /* And set up PMU specification */ 843 /* And set up PMU specification */
839 alpha_pmu = &ev67_pmu; 844 alpha_pmu = &ev67_pmu;
840 perf_max_events = alpha_pmu->num_pmcs; 845 perf_max_events = alpha_pmu->num_pmcs;
846
847 perf_pmu_register(&pmu);
841} 848}
842 849
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 0671e92c511..f62f9db35db 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -306,12 +306,7 @@ out:
306 return err; 306 return err;
307} 307}
308 308
309static struct pmu pmu = { 309static struct pmu pmu;
310 .enable = armpmu_enable,
311 .disable = armpmu_disable,
312 .unthrottle = armpmu_unthrottle,
313 .read = armpmu_read,
314};
315 310
316static int 311static int
317validate_event(struct cpu_hw_events *cpuc, 312validate_event(struct cpu_hw_events *cpuc,
@@ -491,20 +486,29 @@ __hw_perf_event_init(struct perf_event *event)
491 return err; 486 return err;
492} 487}
493 488
494struct pmu * 489static int armpmu_event_init(struct perf_event *event)
495hw_perf_event_init(struct perf_event *event)
496{ 490{
497 int err = 0; 491 int err = 0;
498 492
493 switch (event->attr.type) {
494 case PERF_TYPE_RAW:
495 case PERF_TYPE_HARDWARE:
496 case PERF_TYPE_HW_CACHE:
497 break;
498
499 default:
500 return -ENOENT;
501 }
502
499 if (!armpmu) 503 if (!armpmu)
500 return ERR_PTR(-ENODEV); 504 return -ENODEV;
501 505
502 event->destroy = hw_perf_event_destroy; 506 event->destroy = hw_perf_event_destroy;
503 507
504 if (!atomic_inc_not_zero(&active_events)) { 508 if (!atomic_inc_not_zero(&active_events)) {
505 if (atomic_read(&active_events) > perf_max_events) { 509 if (atomic_read(&active_events) > perf_max_events) {
506 atomic_dec(&active_events); 510 atomic_dec(&active_events);
507 return ERR_PTR(-ENOSPC); 511 return -ENOSPC;
508 } 512 }
509 513
510 mutex_lock(&pmu_reserve_mutex); 514 mutex_lock(&pmu_reserve_mutex);
@@ -518,15 +522,23 @@ hw_perf_event_init(struct perf_event *event)
518 } 522 }
519 523
520 if (err) 524 if (err)
521 return ERR_PTR(err); 525 return err;
522 526
523 err = __hw_perf_event_init(event); 527 err = __hw_perf_event_init(event);
524 if (err) 528 if (err)
525 hw_perf_event_destroy(event); 529 hw_perf_event_destroy(event);
526 530
527 return err ? ERR_PTR(err) : &pmu; 531 return err;
528} 532}
529 533
534static struct pmu pmu = {
535 .event_init = armpmu_event_init,
536 .enable = armpmu_enable,
537 .disable = armpmu_disable,
538 .unthrottle = armpmu_unthrottle,
539 .read = armpmu_read,
540};
541
530void 542void
531hw_perf_enable(void) 543hw_perf_enable(void)
532{ 544{
@@ -2994,6 +3006,8 @@ init_hw_perf_events(void)
2994 perf_max_events = -1; 3006 perf_max_events = -1;
2995 } 3007 }
2996 3008
3009 perf_pmu_register(&pmu);
3010
2997 return 0; 3011 return 0;
2998} 3012}
2999arch_initcall(init_hw_perf_events); 3013arch_initcall(init_hw_perf_events);
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index 5f78681ad90..19131b2614b 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -904,16 +904,6 @@ int power_pmu_commit_txn(struct pmu *pmu)
904 return 0; 904 return 0;
905} 905}
906 906
907struct pmu power_pmu = {
908 .enable = power_pmu_enable,
909 .disable = power_pmu_disable,
910 .read = power_pmu_read,
911 .unthrottle = power_pmu_unthrottle,
912 .start_txn = power_pmu_start_txn,
913 .cancel_txn = power_pmu_cancel_txn,
914 .commit_txn = power_pmu_commit_txn,
915};
916
917/* 907/*
918 * Return 1 if we might be able to put event on a limited PMC, 908 * Return 1 if we might be able to put event on a limited PMC,
919 * or 0 if not. 909 * or 0 if not.
@@ -1014,7 +1004,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp)
1014 return 0; 1004 return 0;
1015} 1005}
1016 1006
1017struct pmu *hw_perf_event_init(struct perf_event *event) 1007static int power_pmu_event_init(struct perf_event *event)
1018{ 1008{
1019 u64 ev; 1009 u64 ev;
1020 unsigned long flags; 1010 unsigned long flags;
@@ -1026,25 +1016,27 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
1026 struct cpu_hw_events *cpuhw; 1016 struct cpu_hw_events *cpuhw;
1027 1017
1028 if (!ppmu) 1018 if (!ppmu)
1029 return ERR_PTR(-ENXIO); 1019 return -ENOENT;
1020
1030 switch (event->attr.type) { 1021 switch (event->attr.type) {
1031 case PERF_TYPE_HARDWARE: 1022 case PERF_TYPE_HARDWARE:
1032 ev = event->attr.config; 1023 ev = event->attr.config;
1033 if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) 1024 if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
1034 return ERR_PTR(-EOPNOTSUPP); 1025 return -EOPNOTSUPP;
1035 ev = ppmu->generic_events[ev]; 1026 ev = ppmu->generic_events[ev];
1036 break; 1027 break;
1037 case PERF_TYPE_HW_CACHE: 1028 case PERF_TYPE_HW_CACHE:
1038 err = hw_perf_cache_event(event->attr.config, &ev); 1029 err = hw_perf_cache_event(event->attr.config, &ev);
1039 if (err) 1030 if (err)
1040 return ERR_PTR(err); 1031 return err;
1041 break; 1032 break;
1042 case PERF_TYPE_RAW: 1033 case PERF_TYPE_RAW:
1043 ev = event->attr.config; 1034 ev = event->attr.config;
1044 break; 1035 break;
1045 default: 1036 default:
1046 return ERR_PTR(-EINVAL); 1037 return -ENOENT;
1047 } 1038 }
1039
1048 event->hw.config_base = ev; 1040 event->hw.config_base = ev;
1049 event->hw.idx = 0; 1041 event->hw.idx = 0;
1050 1042
@@ -1081,7 +1073,7 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
1081 */ 1073 */
1082 ev = normal_pmc_alternative(ev, flags); 1074 ev = normal_pmc_alternative(ev, flags);
1083 if (!ev) 1075 if (!ev)
1084 return ERR_PTR(-EINVAL); 1076 return -EINVAL;
1085 } 1077 }
1086 } 1078 }
1087 1079
@@ -1095,19 +1087,19 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
1095 n = collect_events(event->group_leader, ppmu->n_counter - 1, 1087 n = collect_events(event->group_leader, ppmu->n_counter - 1,
1096 ctrs, events, cflags); 1088 ctrs, events, cflags);
1097 if (n < 0) 1089 if (n < 0)
1098 return ERR_PTR(-EINVAL); 1090 return -EINVAL;
1099 } 1091 }
1100 events[n] = ev; 1092 events[n] = ev;
1101 ctrs[n] = event; 1093 ctrs[n] = event;
1102 cflags[n] = flags; 1094 cflags[n] = flags;
1103 if (check_excludes(ctrs, cflags, n, 1)) 1095 if (check_excludes(ctrs, cflags, n, 1))
1104 return ERR_PTR(-EINVAL); 1096 return -EINVAL;
1105 1097
1106 cpuhw = &get_cpu_var(cpu_hw_events); 1098 cpuhw = &get_cpu_var(cpu_hw_events);
1107 err = power_check_constraints(cpuhw, events, cflags, n + 1); 1099 err = power_check_constraints(cpuhw, events, cflags, n + 1);
1108 put_cpu_var(cpu_hw_events); 1100 put_cpu_var(cpu_hw_events);
1109 if (err) 1101 if (err)
1110 return ERR_PTR(-EINVAL); 1102 return -EINVAL;
1111 1103
1112 event->hw.config = events[n]; 1104 event->hw.config = events[n];
1113 event->hw.event_base = cflags[n]; 1105 event->hw.event_base = cflags[n];
@@ -1132,11 +1124,20 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
1132 } 1124 }
1133 event->destroy = hw_perf_event_destroy; 1125 event->destroy = hw_perf_event_destroy;
1134 1126
1135 if (err) 1127 return err;
1136 return ERR_PTR(err);
1137 return &power_pmu;
1138} 1128}
1139 1129
1130struct pmu power_pmu = {
1131 .event_init = power_pmu_event_init,
1132 .enable = power_pmu_enable,
1133 .disable = power_pmu_disable,
1134 .read = power_pmu_read,
1135 .unthrottle = power_pmu_unthrottle,
1136 .start_txn = power_pmu_start_txn,
1137 .cancel_txn = power_pmu_cancel_txn,
1138 .commit_txn = power_pmu_commit_txn,
1139};
1140
1140/* 1141/*
1141 * A counter has overflowed; update its count and record 1142 * A counter has overflowed; update its count and record
1142 * things if requested. Note that interrupts are hard-disabled 1143 * things if requested. Note that interrupts are hard-disabled
@@ -1342,6 +1343,7 @@ int register_power_pmu(struct power_pmu *pmu)
1342 freeze_events_kernel = MMCR0_FCHV; 1343 freeze_events_kernel = MMCR0_FCHV;
1343#endif /* CONFIG_PPC64 */ 1344#endif /* CONFIG_PPC64 */
1344 1345
1346 perf_pmu_register(&power_pmu);
1345 perf_cpu_notifier(power_pmu_notifier); 1347 perf_cpu_notifier(power_pmu_notifier);
1346 1348
1347 return 0; 1349 return 0;
diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c
index d7619b5e7a6..ea6a804e43f 100644
--- a/arch/powerpc/kernel/perf_event_fsl_emb.c
+++ b/arch/powerpc/kernel/perf_event_fsl_emb.c
@@ -378,13 +378,6 @@ static void fsl_emb_pmu_unthrottle(struct perf_event *event)
378 local_irq_restore(flags); 378 local_irq_restore(flags);
379} 379}
380 380
381static struct pmu fsl_emb_pmu = {
382 .enable = fsl_emb_pmu_enable,
383 .disable = fsl_emb_pmu_disable,
384 .read = fsl_emb_pmu_read,
385 .unthrottle = fsl_emb_pmu_unthrottle,
386};
387
388/* 381/*
389 * Release the PMU if this is the last perf_event. 382 * Release the PMU if this is the last perf_event.
390 */ 383 */
@@ -428,7 +421,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp)
428 return 0; 421 return 0;
429} 422}
430 423
431struct pmu *hw_perf_event_init(struct perf_event *event) 424static int fsl_emb_pmu_event_init(struct perf_event *event)
432{ 425{
433 u64 ev; 426 u64 ev;
434 struct perf_event *events[MAX_HWEVENTS]; 427 struct perf_event *events[MAX_HWEVENTS];
@@ -441,14 +434,14 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
441 case PERF_TYPE_HARDWARE: 434 case PERF_TYPE_HARDWARE:
442 ev = event->attr.config; 435 ev = event->attr.config;
443 if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) 436 if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
444 return ERR_PTR(-EOPNOTSUPP); 437 return -EOPNOTSUPP;
445 ev = ppmu->generic_events[ev]; 438 ev = ppmu->generic_events[ev];
446 break; 439 break;
447 440
448 case PERF_TYPE_HW_CACHE: 441 case PERF_TYPE_HW_CACHE:
449 err = hw_perf_cache_event(event->attr.config, &ev); 442 err = hw_perf_cache_event(event->attr.config, &ev);
450 if (err) 443 if (err)
451 return ERR_PTR(err); 444 return err;
452 break; 445 break;
453 446
454 case PERF_TYPE_RAW: 447 case PERF_TYPE_RAW:
@@ -456,12 +449,12 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
456 break; 449 break;
457 450
458 default: 451 default:
459 return ERR_PTR(-EINVAL); 452 return -ENOENT;
460 } 453 }
461 454
462 event->hw.config = ppmu->xlate_event(ev); 455 event->hw.config = ppmu->xlate_event(ev);
463 if (!(event->hw.config & FSL_EMB_EVENT_VALID)) 456 if (!(event->hw.config & FSL_EMB_EVENT_VALID))
464 return ERR_PTR(-EINVAL); 457 return -EINVAL;
465 458
466 /* 459 /*
467 * If this is in a group, check if it can go on with all the 460 * If this is in a group, check if it can go on with all the
@@ -473,7 +466,7 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
473 n = collect_events(event->group_leader, 466 n = collect_events(event->group_leader,
474 ppmu->n_counter - 1, events); 467 ppmu->n_counter - 1, events);
475 if (n < 0) 468 if (n < 0)
476 return ERR_PTR(-EINVAL); 469 return -EINVAL;
477 } 470 }
478 471
479 if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) { 472 if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) {
@@ -484,7 +477,7 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
484 } 477 }
485 478
486 if (num_restricted >= ppmu->n_restricted) 479 if (num_restricted >= ppmu->n_restricted)
487 return ERR_PTR(-EINVAL); 480 return -EINVAL;
488 } 481 }
489 482
490 event->hw.idx = -1; 483 event->hw.idx = -1;
@@ -497,7 +490,7 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
497 if (event->attr.exclude_kernel) 490 if (event->attr.exclude_kernel)
498 event->hw.config_base |= PMLCA_FCS; 491 event->hw.config_base |= PMLCA_FCS;
499 if (event->attr.exclude_idle) 492 if (event->attr.exclude_idle)
500 return ERR_PTR(-ENOTSUPP); 493 return -ENOTSUPP;
501 494
502 event->hw.last_period = event->hw.sample_period; 495 event->hw.last_period = event->hw.sample_period;
503 local64_set(&event->hw.period_left, event->hw.last_period); 496 local64_set(&event->hw.period_left, event->hw.last_period);
@@ -523,11 +516,17 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
523 } 516 }
524 event->destroy = hw_perf_event_destroy; 517 event->destroy = hw_perf_event_destroy;
525 518
526 if (err) 519 return err;
527 return ERR_PTR(err);
528 return &fsl_emb_pmu;
529} 520}
530 521
522static struct pmu fsl_emb_pmu = {
523 .event_init = fsl_emb_pmu_event_init,
524 .enable = fsl_emb_pmu_enable,
525 .disable = fsl_emb_pmu_disable,
526 .read = fsl_emb_pmu_read,
527 .unthrottle = fsl_emb_pmu_unthrottle,
528};
529
531/* 530/*
532 * A counter has overflowed; update its count and record 531 * A counter has overflowed; update its count and record
533 * things if requested. Note that interrupts are hard-disabled 532 * things if requested. Note that interrupts are hard-disabled
@@ -651,5 +650,7 @@ int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu)
651 pr_info("%s performance monitor hardware support registered\n", 650 pr_info("%s performance monitor hardware support registered\n",
652 pmu->name); 651 pmu->name);
653 652
653 perf_pmu_register(&fsl_emb_pmu);
654
654 return 0; 655 return 0;
655} 656}
diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c
index 395572c94c6..8cb206597e0 100644
--- a/arch/sh/kernel/perf_event.c
+++ b/arch/sh/kernel/perf_event.c
@@ -257,26 +257,38 @@ static void sh_pmu_read(struct perf_event *event)
257 sh_perf_event_update(event, &event->hw, event->hw.idx); 257 sh_perf_event_update(event, &event->hw, event->hw.idx);
258} 258}
259 259
260static struct pmu pmu = { 260static int sh_pmu_event_init(struct perf_event *event)
261 .enable = sh_pmu_enable,
262 .disable = sh_pmu_disable,
263 .read = sh_pmu_read,
264};
265
266struct pmu *hw_perf_event_init(struct perf_event *event)
267{ 261{
268 int err = __hw_perf_event_init(event); 262 int err;
263
264 switch (event->attr.type) {
265 case PERF_TYPE_RAW:
266 case PERF_TYPE_HW_CACHE:
267 case PERF_TYPE_HARDWARE:
268 err = __hw_perf_event_init(event);
269 break;
270
271 default:
272 return -ENOENT;
273 }
274
269 if (unlikely(err)) { 275 if (unlikely(err)) {
270 if (event->destroy) 276 if (event->destroy)
271 event->destroy(event); 277 event->destroy(event);
272 return ERR_PTR(err);
273 } 278 }
274 279
275 return &pmu; 280 return err;
276} 281}
277 282
283static struct pmu pmu = {
284 .event_init = sh_pmu_event_init,
285 .enable = sh_pmu_enable,
286 .disable = sh_pmu_disable,
287 .read = sh_pmu_read,
288};
289
278static void sh_pmu_setup(int cpu) 290static void sh_pmu_setup(int cpu)
279{ 291
280 struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); 292 struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
281 293
282 memset(cpuhw, 0, sizeof(struct cpu_hw_events)); 294 memset(cpuhw, 0, sizeof(struct cpu_hw_events));
@@ -325,6 +337,7 @@ int __cpuinit register_sh_pmu(struct sh_pmu *pmu)
325 337
326 WARN_ON(pmu->num_events > MAX_HWEVENTS); 338 WARN_ON(pmu->num_events > MAX_HWEVENTS);
327 339
340 perf_pmu_register(&pmu);
328 perf_cpu_notifier(sh_pmu_notifier); 341 perf_cpu_notifier(sh_pmu_notifier);
329 return 0; 342 return 0;
330} 343}
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 481b894a501..bed4327f5a7 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -1025,7 +1025,7 @@ out:
1025 return ret; 1025 return ret;
1026} 1026}
1027 1027
1028static int __hw_perf_event_init(struct perf_event *event) 1028static int sparc_pmu_event_init(struct perf_event *event)
1029{ 1029{
1030 struct perf_event_attr *attr = &event->attr; 1030 struct perf_event_attr *attr = &event->attr;
1031 struct perf_event *evts[MAX_HWEVENTS]; 1031 struct perf_event *evts[MAX_HWEVENTS];
@@ -1038,17 +1038,27 @@ static int __hw_perf_event_init(struct perf_event *event)
1038 if (atomic_read(&nmi_active) < 0) 1038 if (atomic_read(&nmi_active) < 0)
1039 return -ENODEV; 1039 return -ENODEV;
1040 1040
1041 if (attr->type == PERF_TYPE_HARDWARE) { 1041 switch (attr->type) {
1042 case PERF_TYPE_HARDWARE:
1042 if (attr->config >= sparc_pmu->max_events) 1043 if (attr->config >= sparc_pmu->max_events)
1043 return -EINVAL; 1044 return -EINVAL;
1044 pmap = sparc_pmu->event_map(attr->config); 1045 pmap = sparc_pmu->event_map(attr->config);
1045 } else if (attr->type == PERF_TYPE_HW_CACHE) { 1046 break;
1047
1048 case PERF_TYPE_HW_CACHE:
1046 pmap = sparc_map_cache_event(attr->config); 1049 pmap = sparc_map_cache_event(attr->config);
1047 if (IS_ERR(pmap)) 1050 if (IS_ERR(pmap))
1048 return PTR_ERR(pmap); 1051 return PTR_ERR(pmap);
1049 } else 1052 break;
1053
1054 case PERF_TYPE_RAW:
1050 return -EOPNOTSUPP; 1055 return -EOPNOTSUPP;
1051 1056
1057 default:
1058 return -ENOENT;
1059
1060 }
1061
1052 /* We save the enable bits in the config_base. */ 1062 /* We save the enable bits in the config_base. */
1053 hwc->config_base = sparc_pmu->irq_bit; 1063 hwc->config_base = sparc_pmu->irq_bit;
1054 if (!attr->exclude_user) 1064 if (!attr->exclude_user)
@@ -1143,6 +1153,7 @@ static int sparc_pmu_commit_txn(struct pmu *pmu)
1143} 1153}
1144 1154
1145static struct pmu pmu = { 1155static struct pmu pmu = {
1156 .event_init = sparc_pmu_event_init,
1146 .enable = sparc_pmu_enable, 1157 .enable = sparc_pmu_enable,
1147 .disable = sparc_pmu_disable, 1158 .disable = sparc_pmu_disable,
1148 .read = sparc_pmu_read, 1159 .read = sparc_pmu_read,
@@ -1152,15 +1163,6 @@ static struct pmu pmu = {
1152 .commit_txn = sparc_pmu_commit_txn, 1163 .commit_txn = sparc_pmu_commit_txn,
1153}; 1164};
1154 1165
1155struct pmu *hw_perf_event_init(struct perf_event *event)
1156{
1157 int err = __hw_perf_event_init(event);
1158
1159 if (err)
1160 return ERR_PTR(err);
1161 return &pmu;
1162}
1163
1164void perf_event_print_debug(void) 1166void perf_event_print_debug(void)
1165{ 1167{
1166 unsigned long flags; 1168 unsigned long flags;
@@ -1280,6 +1282,7 @@ void __init init_hw_perf_events(void)
1280 /* All sparc64 PMUs currently have 2 events. */ 1282 /* All sparc64 PMUs currently have 2 events. */
1281 perf_max_events = 2; 1283 perf_max_events = 2;
1282 1284
1285 perf_pmu_register(&pmu);
1283 register_die_notifier(&perf_event_nmi_notifier); 1286 register_die_notifier(&perf_event_nmi_notifier);
1284} 1287}
1285 1288
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index fdd97f2e996..2c89264ee79 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -530,7 +530,7 @@ static int x86_pmu_hw_config(struct perf_event *event)
530/* 530/*
531 * Setup the hardware configuration for a given attr_type 531 * Setup the hardware configuration for a given attr_type
532 */ 532 */
533static int __hw_perf_event_init(struct perf_event *event) 533static int __x86_pmu_event_init(struct perf_event *event)
534{ 534{
535 int err; 535 int err;
536 536
@@ -1414,6 +1414,7 @@ void __init init_hw_perf_events(void)
1414 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); 1414 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
1415 pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); 1415 pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
1416 1416
1417 perf_pmu_register(&pmu);
1417 perf_cpu_notifier(x86_pmu_notifier); 1418 perf_cpu_notifier(x86_pmu_notifier);
1418} 1419}
1419 1420
@@ -1483,18 +1484,6 @@ static int x86_pmu_commit_txn(struct pmu *pmu)
1483 return 0; 1484 return 0;
1484} 1485}
1485 1486
1486static struct pmu pmu = {
1487 .enable = x86_pmu_enable,
1488 .disable = x86_pmu_disable,
1489 .start = x86_pmu_start,
1490 .stop = x86_pmu_stop,
1491 .read = x86_pmu_read,
1492 .unthrottle = x86_pmu_unthrottle,
1493 .start_txn = x86_pmu_start_txn,
1494 .cancel_txn = x86_pmu_cancel_txn,
1495 .commit_txn = x86_pmu_commit_txn,
1496};
1497
1498/* 1487/*
1499 * validate that we can schedule this event 1488 * validate that we can schedule this event
1500 */ 1489 */
@@ -1569,12 +1558,22 @@ out:
1569 return ret; 1558 return ret;
1570} 1559}
1571 1560
1572struct pmu *hw_perf_event_init(struct perf_event *event) 1561int x86_pmu_event_init(struct perf_event *event)
1573{ 1562{
1574 struct pmu *tmp; 1563 struct pmu *tmp;
1575 int err; 1564 int err;
1576 1565
1577 err = __hw_perf_event_init(event); 1566 switch (event->attr.type) {
1567 case PERF_TYPE_RAW:
1568 case PERF_TYPE_HARDWARE:
1569 case PERF_TYPE_HW_CACHE:
1570 break;
1571
1572 default:
1573 return -ENOENT;
1574 }
1575
1576 err = __x86_pmu_event_init(event);
1578 if (!err) { 1577 if (!err) {
1579 /* 1578 /*
1580 * we temporarily connect event to its pmu 1579 * we temporarily connect event to its pmu
@@ -1594,12 +1593,24 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
1594 if (err) { 1593 if (err) {
1595 if (event->destroy) 1594 if (event->destroy)
1596 event->destroy(event); 1595 event->destroy(event);
1597 return ERR_PTR(err);
1598 } 1596 }
1599 1597
1600 return &pmu; 1598 return err;
1601} 1599}
1602 1600
1601static struct pmu pmu = {
1602 .event_init = x86_pmu_event_init,
1603 .enable = x86_pmu_enable,
1604 .disable = x86_pmu_disable,
1605 .start = x86_pmu_start,
1606 .stop = x86_pmu_stop,
1607 .read = x86_pmu_read,
1608 .unthrottle = x86_pmu_unthrottle,
1609 .start_txn = x86_pmu_start_txn,
1610 .cancel_txn = x86_pmu_cancel_txn,
1611 .commit_txn = x86_pmu_commit_txn,
1612};
1613
1603/* 1614/*
1604 * callchain support 1615 * callchain support
1605 */ 1616 */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 09d048b5211..ab72f56eb37 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -561,6 +561,13 @@ struct perf_event;
561 * struct pmu - generic performance monitoring unit 561 * struct pmu - generic performance monitoring unit
562 */ 562 */
563struct pmu { 563struct pmu {
564 struct list_head entry;
565
566 /*
567 * Should return -ENOENT when the @event doesn't match this pmu
568 */
569 int (*event_init) (struct perf_event *event);
570
564 int (*enable) (struct perf_event *event); 571 int (*enable) (struct perf_event *event);
565 void (*disable) (struct perf_event *event); 572 void (*disable) (struct perf_event *event);
566 int (*start) (struct perf_event *event); 573 int (*start) (struct perf_event *event);
@@ -849,7 +856,8 @@ struct perf_output_handle {
849 */ 856 */
850extern int perf_max_events; 857extern int perf_max_events;
851 858
852extern struct pmu *hw_perf_event_init(struct perf_event *event); 859extern int perf_pmu_register(struct pmu *pmu);
860extern void perf_pmu_unregister(struct pmu *pmu);
853 861
854extern void perf_event_task_sched_in(struct task_struct *task); 862extern void perf_event_task_sched_in(struct task_struct *task);
855extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next); 863extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next);
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index d71a987fd2b..e9c5cfa1fd2 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -565,6 +565,34 @@ static struct notifier_block hw_breakpoint_exceptions_nb = {
565 .priority = 0x7fffffff 565 .priority = 0x7fffffff
566}; 566};
567 567
568static void bp_perf_event_destroy(struct perf_event *event)
569{
570 release_bp_slot(event);
571}
572
573static int hw_breakpoint_event_init(struct perf_event *bp)
574{
575 int err;
576
577 if (bp->attr.type != PERF_TYPE_BREAKPOINT)
578 return -ENOENT;
579
580 err = register_perf_hw_breakpoint(bp);
581 if (err)
582 return err;
583
584 bp->destroy = bp_perf_event_destroy;
585
586 return 0;
587}
588
589static struct pmu perf_breakpoint = {
590 .event_init = hw_breakpoint_event_init,
591 .enable = arch_install_hw_breakpoint,
592 .disable = arch_uninstall_hw_breakpoint,
593 .read = hw_breakpoint_pmu_read,
594};
595
568static int __init init_hw_breakpoint(void) 596static int __init init_hw_breakpoint(void)
569{ 597{
570 unsigned int **task_bp_pinned; 598 unsigned int **task_bp_pinned;
@@ -586,6 +614,8 @@ static int __init init_hw_breakpoint(void)
586 614
587 constraints_initialized = 1; 615 constraints_initialized = 1;
588 616
617 perf_pmu_register(&perf_breakpoint);
618
589 return register_die_notifier(&hw_breakpoint_exceptions_nb); 619 return register_die_notifier(&hw_breakpoint_exceptions_nb);
590 620
591 err_alloc: 621 err_alloc:
@@ -601,8 +631,3 @@ static int __init init_hw_breakpoint(void)
601core_initcall(init_hw_breakpoint); 631core_initcall(init_hw_breakpoint);
602 632
603 633
604struct pmu perf_ops_bp = {
605 .enable = arch_install_hw_breakpoint,
606 .disable = arch_uninstall_hw_breakpoint,
607 .read = hw_breakpoint_pmu_read,
608};
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index fb46fd13f31..288ce43de57 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -31,7 +31,6 @@
31#include <linux/kernel_stat.h> 31#include <linux/kernel_stat.h>
32#include <linux/perf_event.h> 32#include <linux/perf_event.h>
33#include <linux/ftrace_event.h> 33#include <linux/ftrace_event.h>
34#include <linux/hw_breakpoint.h>
35 34
36#include <asm/irq_regs.h> 35#include <asm/irq_regs.h>
37 36
@@ -72,14 +71,6 @@ static atomic64_t perf_event_id;
72 */ 71 */
73static DEFINE_SPINLOCK(perf_resource_lock); 72static DEFINE_SPINLOCK(perf_resource_lock);
74 73
75/*
76 * Architecture provided APIs - weak aliases:
77 */
78extern __weak struct pmu *hw_perf_event_init(struct perf_event *event)
79{
80 return NULL;
81}
82
83void __weak hw_perf_disable(void) { barrier(); } 74void __weak hw_perf_disable(void) { barrier(); }
84void __weak hw_perf_enable(void) { barrier(); } 75void __weak hw_perf_enable(void) { barrier(); }
85 76
@@ -4501,182 +4492,6 @@ static int perf_swevent_int(struct perf_event *event)
4501 return 0; 4492 return 0;
4502} 4493}
4503 4494
4504static struct pmu perf_ops_generic = {
4505 .enable = perf_swevent_enable,
4506 .disable = perf_swevent_disable,
4507 .start = perf_swevent_int,
4508 .stop = perf_swevent_void,
4509 .read = perf_swevent_read,
4510 .unthrottle = perf_swevent_void, /* hwc->interrupts already reset */
4511};
4512
4513/*
4514 * hrtimer based swevent callback
4515 */
4516
4517static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
4518{
4519 enum hrtimer_restart ret = HRTIMER_RESTART;
4520 struct perf_sample_data data;
4521 struct pt_regs *regs;
4522 struct perf_event *event;
4523 u64 period;
4524
4525 event = container_of(hrtimer, struct perf_event, hw.hrtimer);
4526 event->pmu->read(event);
4527
4528 perf_sample_data_init(&data, 0);
4529 data.period = event->hw.last_period;
4530 regs = get_irq_regs();
4531
4532 if (regs && !perf_exclude_event(event, regs)) {
4533 if (!(event->attr.exclude_idle && current->pid == 0))
4534 if (perf_event_overflow(event, 0, &data, regs))
4535 ret = HRTIMER_NORESTART;
4536 }
4537
4538 period = max_t(u64, 10000, event->hw.sample_period);
4539 hrtimer_forward_now(hrtimer, ns_to_ktime(period));
4540
4541 return ret;
4542}
4543
4544static void perf_swevent_start_hrtimer(struct perf_event *event)
4545{
4546 struct hw_perf_event *hwc = &event->hw;
4547
4548 hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
4549 hwc->hrtimer.function = perf_swevent_hrtimer;
4550 if (hwc->sample_period) {
4551 u64 period;
4552
4553 if (hwc->remaining) {
4554 if (hwc->remaining < 0)
4555 period = 10000;
4556 else
4557 period = hwc->remaining;
4558 hwc->remaining = 0;
4559 } else {
4560 period = max_t(u64, 10000, hwc->sample_period);
4561 }
4562 __hrtimer_start_range_ns(&hwc->hrtimer,
4563 ns_to_ktime(period), 0,
4564 HRTIMER_MODE_REL, 0);
4565 }
4566}
4567
4568static void perf_swevent_cancel_hrtimer(struct perf_event *event)
4569{
4570 struct hw_perf_event *hwc = &event->hw;
4571
4572 if (hwc->sample_period) {
4573 ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer);
4574 hwc->remaining = ktime_to_ns(remaining);
4575
4576 hrtimer_cancel(&hwc->hrtimer);
4577 }
4578}
4579
4580/*
4581 * Software event: cpu wall time clock
4582 */
4583
4584static void cpu_clock_perf_event_update(struct perf_event *event)
4585{
4586 int cpu = raw_smp_processor_id();
4587 s64 prev;
4588 u64 now;
4589
4590 now = cpu_clock(cpu);
4591 prev = local64_xchg(&event->hw.prev_count, now);
4592 local64_add(now - prev, &event->count);
4593}
4594
4595static int cpu_clock_perf_event_enable(struct perf_event *event)
4596{
4597 struct hw_perf_event *hwc = &event->hw;
4598 int cpu = raw_smp_processor_id();
4599
4600 local64_set(&hwc->prev_count, cpu_clock(cpu));
4601 perf_swevent_start_hrtimer(event);
4602
4603 return 0;
4604}
4605
4606static void cpu_clock_perf_event_disable(struct perf_event *event)
4607{
4608 perf_swevent_cancel_hrtimer(event);
4609 cpu_clock_perf_event_update(event);
4610}
4611
4612static void cpu_clock_perf_event_read(struct perf_event *event)
4613{
4614 cpu_clock_perf_event_update(event);
4615}
4616
4617static struct pmu perf_ops_cpu_clock = {
4618 .enable = cpu_clock_perf_event_enable,
4619 .disable = cpu_clock_perf_event_disable,
4620 .read = cpu_clock_perf_event_read,
4621};
4622
4623/*
4624 * Software event: task time clock
4625 */
4626
4627static void task_clock_perf_event_update(struct perf_event *event, u64 now)
4628{
4629 u64 prev;
4630 s64 delta;
4631
4632 prev = local64_xchg(&event->hw.prev_count, now);
4633 delta = now - prev;
4634 local64_add(delta, &event->count);
4635}
4636
4637static int task_clock_perf_event_enable(struct perf_event *event)
4638{
4639 struct hw_perf_event *hwc = &event->hw;
4640 u64 now;
4641
4642 now = event->ctx->time;
4643
4644 local64_set(&hwc->prev_count, now);
4645
4646 perf_swevent_start_hrtimer(event);
4647
4648 return 0;
4649}
4650
4651static void task_clock_perf_event_disable(struct perf_event *event)
4652{
4653 perf_swevent_cancel_hrtimer(event);
4654 task_clock_perf_event_update(event, event->ctx->time);
4655
4656}
4657
4658static void task_clock_perf_event_read(struct perf_event *event)
4659{
4660 u64 time;
4661
4662 if (!in_nmi()) {
4663 update_context_time(event->ctx);
4664 time = event->ctx->time;
4665 } else {
4666 u64 now = perf_clock();
4667 u64 delta = now - event->ctx->timestamp;
4668 time = event->ctx->time + delta;
4669 }
4670
4671 task_clock_perf_event_update(event, time);
4672}
4673
4674static struct pmu perf_ops_task_clock = {
4675 .enable = task_clock_perf_event_enable,
4676 .disable = task_clock_perf_event_disable,
4677 .read = task_clock_perf_event_read,
4678};
4679
4680/* Deref the hlist from the update side */ 4495/* Deref the hlist from the update side */
4681static inline struct swevent_hlist * 4496static inline struct swevent_hlist *
4682swevent_hlist_deref(struct perf_cpu_context *cpuctx) 4497swevent_hlist_deref(struct perf_cpu_context *cpuctx)
@@ -4783,17 +4598,63 @@ static int swevent_hlist_get(struct perf_event *event)
4783 return err; 4598 return err;
4784} 4599}
4785 4600
4786#ifdef CONFIG_EVENT_TRACING 4601atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
4787 4602
4788static struct pmu perf_ops_tracepoint = { 4603static void sw_perf_event_destroy(struct perf_event *event)
4789 .enable = perf_trace_enable, 4604{
4790 .disable = perf_trace_disable, 4605 u64 event_id = event->attr.config;
4606
4607 WARN_ON(event->parent);
4608
4609 atomic_dec(&perf_swevent_enabled[event_id]);
4610 swevent_hlist_put(event);
4611}
4612
4613static int perf_swevent_init(struct perf_event *event)
4614{
4615 int event_id = event->attr.config;
4616
4617 if (event->attr.type != PERF_TYPE_SOFTWARE)
4618 return -ENOENT;
4619
4620 switch (event_id) {
4621 case PERF_COUNT_SW_CPU_CLOCK:
4622 case PERF_COUNT_SW_TASK_CLOCK:
4623 return -ENOENT;
4624
4625 default:
4626 break;
4627 }
4628
4629 if (event_id > PERF_COUNT_SW_MAX)
4630 return -ENOENT;
4631
4632 if (!event->parent) {
4633 int err;
4634
4635 err = swevent_hlist_get(event);
4636 if (err)
4637 return err;
4638
4639 atomic_inc(&perf_swevent_enabled[event_id]);
4640 event->destroy = sw_perf_event_destroy;
4641 }
4642
4643 return 0;
4644}
4645
4646static struct pmu perf_swevent = {
4647 .event_init = perf_swevent_init,
4648 .enable = perf_swevent_enable,
4649 .disable = perf_swevent_disable,
4791 .start = perf_swevent_int, 4650 .start = perf_swevent_int,
4792 .stop = perf_swevent_void, 4651 .stop = perf_swevent_void,
4793 .read = perf_swevent_read, 4652 .read = perf_swevent_read,
4794 .unthrottle = perf_swevent_void, 4653 .unthrottle = perf_swevent_void, /* hwc->interrupts already reset */
4795}; 4654};
4796 4655
4656#ifdef CONFIG_EVENT_TRACING
4657
4797static int perf_tp_filter_match(struct perf_event *event, 4658static int perf_tp_filter_match(struct perf_event *event,
4798 struct perf_sample_data *data) 4659 struct perf_sample_data *data)
4799{ 4660{
@@ -4849,10 +4710,13 @@ static void tp_perf_event_destroy(struct perf_event *event)
4849 perf_trace_destroy(event); 4710 perf_trace_destroy(event);
4850} 4711}
4851 4712
4852static struct pmu *tp_perf_event_init(struct perf_event *event) 4713static int perf_tp_event_init(struct perf_event *event)
4853{ 4714{
4854 int err; 4715 int err;
4855 4716
4717 if (event->attr.type != PERF_TYPE_TRACEPOINT)
4718 return -ENOENT;
4719
4856 /* 4720 /*
4857 * Raw tracepoint data is a severe data leak, only allow root to 4721 * Raw tracepoint data is a severe data leak, only allow root to
4858 * have these. 4722 * have these.
@@ -4860,15 +4724,30 @@ static struct pmu *tp_perf_event_init(struct perf_event *event)
4860 if ((event->attr.sample_type & PERF_SAMPLE_RAW) && 4724 if ((event->attr.sample_type & PERF_SAMPLE_RAW) &&
4861 perf_paranoid_tracepoint_raw() && 4725 perf_paranoid_tracepoint_raw() &&
4862 !capable(CAP_SYS_ADMIN)) 4726 !capable(CAP_SYS_ADMIN))
4863 return ERR_PTR(-EPERM); 4727 return -EPERM;
4864 4728
4865 err = perf_trace_init(event); 4729 err = perf_trace_init(event);
4866 if (err) 4730 if (err)
4867 return NULL; 4731 return err;
4868 4732
4869 event->destroy = tp_perf_event_destroy; 4733 event->destroy = tp_perf_event_destroy;
4870 4734
4871 return &perf_ops_tracepoint; 4735 return 0;
4736}
4737
4738static struct pmu perf_tracepoint = {
4739 .event_init = perf_tp_event_init,
4740 .enable = perf_trace_enable,
4741 .disable = perf_trace_disable,
4742 .start = perf_swevent_int,
4743 .stop = perf_swevent_void,
4744 .read = perf_swevent_read,
4745 .unthrottle = perf_swevent_void,
4746};
4747
4748static inline void perf_tp_register(void)
4749{
4750 perf_pmu_register(&perf_tracepoint);
4872} 4751}
4873 4752
4874static int perf_event_set_filter(struct perf_event *event, void __user *arg) 4753static int perf_event_set_filter(struct perf_event *event, void __user *arg)
@@ -4896,9 +4775,8 @@ static void perf_event_free_filter(struct perf_event *event)
4896 4775
4897#else 4776#else
4898 4777
4899static struct pmu *tp_perf_event_init(struct perf_event *event) 4778static inline void perf_tp_register(void)
4900{ 4779{
4901 return NULL;
4902} 4780}
4903 4781
4904static int perf_event_set_filter(struct perf_event *event, void __user *arg) 4782static int perf_event_set_filter(struct perf_event *event, void __user *arg)
@@ -4913,105 +4791,247 @@ static void perf_event_free_filter(struct perf_event *event)
4913#endif /* CONFIG_EVENT_TRACING */ 4791#endif /* CONFIG_EVENT_TRACING */
4914 4792
4915#ifdef CONFIG_HAVE_HW_BREAKPOINT 4793#ifdef CONFIG_HAVE_HW_BREAKPOINT
4916static void bp_perf_event_destroy(struct perf_event *event) 4794void perf_bp_event(struct perf_event *bp, void *data)
4917{ 4795{
4918 release_bp_slot(event); 4796 struct perf_sample_data sample;
4797 struct pt_regs *regs = data;
4798
4799 perf_sample_data_init(&sample, bp->attr.bp_addr);
4800
4801 if (!perf_exclude_event(bp, regs))
4802 perf_swevent_add(bp, 1, 1, &sample, regs);
4919} 4803}
4804#endif
4805
4806/*
4807 * hrtimer based swevent callback
4808 */
4920 4809
4921static struct pmu *bp_perf_event_init(struct perf_event *bp) 4810static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
4922{ 4811{
4923 int err; 4812 enum hrtimer_restart ret = HRTIMER_RESTART;
4813 struct perf_sample_data data;
4814 struct pt_regs *regs;
4815 struct perf_event *event;
4816 u64 period;
4924 4817
4925 err = register_perf_hw_breakpoint(bp); 4818 event = container_of(hrtimer, struct perf_event, hw.hrtimer);
4926 if (err) 4819 event->pmu->read(event);
4927 return ERR_PTR(err); 4820
4821 perf_sample_data_init(&data, 0);
4822 data.period = event->hw.last_period;
4823 regs = get_irq_regs();
4824
4825 if (regs && !perf_exclude_event(event, regs)) {
4826 if (!(event->attr.exclude_idle && current->pid == 0))
4827 if (perf_event_overflow(event, 0, &data, regs))
4828 ret = HRTIMER_NORESTART;
4829 }
4928 4830
4929 bp->destroy = bp_perf_event_destroy; 4831 period = max_t(u64, 10000, event->hw.sample_period);
4832 hrtimer_forward_now(hrtimer, ns_to_ktime(period));
4930 4833
4931 return &perf_ops_bp; 4834 return ret;
4932} 4835}
4933 4836
4934void perf_bp_event(struct perf_event *bp, void *data) 4837static void perf_swevent_start_hrtimer(struct perf_event *event)
4935{ 4838{
4936 struct perf_sample_data sample; 4839 struct hw_perf_event *hwc = &event->hw;
4937 struct pt_regs *regs = data;
4938 4840
4939 perf_sample_data_init(&sample, bp->attr.bp_addr); 4841 hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
4842 hwc->hrtimer.function = perf_swevent_hrtimer;
4843 if (hwc->sample_period) {
4844 u64 period;
4940 4845
4941 if (!perf_exclude_event(bp, regs)) 4846 if (hwc->remaining) {
4942 perf_swevent_add(bp, 1, 1, &sample, regs); 4847 if (hwc->remaining < 0)
4848 period = 10000;
4849 else
4850 period = hwc->remaining;
4851 hwc->remaining = 0;
4852 } else {
4853 period = max_t(u64, 10000, hwc->sample_period);
4854 }
4855 __hrtimer_start_range_ns(&hwc->hrtimer,
4856 ns_to_ktime(period), 0,
4857 HRTIMER_MODE_REL, 0);
4858 }
4943} 4859}
4944#else 4860
4945static struct pmu *bp_perf_event_init(struct perf_event *bp) 4861static void perf_swevent_cancel_hrtimer(struct perf_event *event)
4946{ 4862{
4947 return NULL; 4863 struct hw_perf_event *hwc = &event->hw;
4864
4865 if (hwc->sample_period) {
4866 ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer);
4867 hwc->remaining = ktime_to_ns(remaining);
4868
4869 hrtimer_cancel(&hwc->hrtimer);
4870 }
4948} 4871}
4949 4872
4950void perf_bp_event(struct perf_event *bp, void *regs) 4873/*
4874 * Software event: cpu wall time clock
4875 */
4876
4877static void cpu_clock_event_update(struct perf_event *event)
4951{ 4878{
4879 int cpu = raw_smp_processor_id();
4880 s64 prev;
4881 u64 now;
4882
4883 now = cpu_clock(cpu);
4884 prev = local64_xchg(&event->hw.prev_count, now);
4885 local64_add(now - prev, &event->count);
4952} 4886}
4953#endif
4954 4887
4955atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; 4888static int cpu_clock_event_enable(struct perf_event *event)
4889{
4890 struct hw_perf_event *hwc = &event->hw;
4891 int cpu = raw_smp_processor_id();
4956 4892
4957static void sw_perf_event_destroy(struct perf_event *event) 4893 local64_set(&hwc->prev_count, cpu_clock(cpu));
4894 perf_swevent_start_hrtimer(event);
4895
4896 return 0;
4897}
4898
4899static void cpu_clock_event_disable(struct perf_event *event)
4958{ 4900{
4959 u64 event_id = event->attr.config; 4901 perf_swevent_cancel_hrtimer(event);
4902 cpu_clock_event_update(event);
4903}
4960 4904
4961 WARN_ON(event->parent); 4905static void cpu_clock_event_read(struct perf_event *event)
4906{
4907 cpu_clock_event_update(event);
4908}
4962 4909
4963 atomic_dec(&perf_swevent_enabled[event_id]); 4910static int cpu_clock_event_init(struct perf_event *event)
4964 swevent_hlist_put(event); 4911{
4912 if (event->attr.type != PERF_TYPE_SOFTWARE)
4913 return -ENOENT;
4914
4915 if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK)
4916 return -ENOENT;
4917
4918 return 0;
4965} 4919}
4966 4920
4967static struct pmu *sw_perf_event_init(struct perf_event *event) 4921static struct pmu perf_cpu_clock = {
4922 .event_init = cpu_clock_event_init,
4923 .enable = cpu_clock_event_enable,
4924 .disable = cpu_clock_event_disable,
4925 .read = cpu_clock_event_read,
4926};
4927
4928/*
4929 * Software event: task time clock
4930 */
4931
4932static void task_clock_event_update(struct perf_event *event, u64 now)
4968{ 4933{
4969 struct pmu *pmu = NULL; 4934 u64 prev;
4970 u64 event_id = event->attr.config; 4935 s64 delta;
4971 4936
4972 /* 4937 prev = local64_xchg(&event->hw.prev_count, now);
4973 * Software events (currently) can't in general distinguish 4938 delta = now - prev;
4974 * between user, kernel and hypervisor events. 4939 local64_add(delta, &event->count);
4975 * However, context switches and cpu migrations are considered 4940}
4976 * to be kernel events, and page faults are never hypervisor
4977 * events.
4978 */
4979 switch (event_id) {
4980 case PERF_COUNT_SW_CPU_CLOCK:
4981 pmu = &perf_ops_cpu_clock;
4982 4941
4983 break; 4942static int task_clock_event_enable(struct perf_event *event)
4984 case PERF_COUNT_SW_TASK_CLOCK: 4943{
4985 /* 4944 struct hw_perf_event *hwc = &event->hw;
4986 * If the user instantiates this as a per-cpu event, 4945 u64 now;
4987 * use the cpu_clock event instead.
4988 */
4989 if (event->ctx->task)
4990 pmu = &perf_ops_task_clock;
4991 else
4992 pmu = &perf_ops_cpu_clock;
4993 4946
4994 break; 4947 now = event->ctx->time;
4995 case PERF_COUNT_SW_PAGE_FAULTS:
4996 case PERF_COUNT_SW_PAGE_FAULTS_MIN:
4997 case PERF_COUNT_SW_PAGE_FAULTS_MAJ:
4998 case PERF_COUNT_SW_CONTEXT_SWITCHES:
4999 case PERF_COUNT_SW_CPU_MIGRATIONS:
5000 case PERF_COUNT_SW_ALIGNMENT_FAULTS:
5001 case PERF_COUNT_SW_EMULATION_FAULTS:
5002 if (!event->parent) {
5003 int err;
5004
5005 err = swevent_hlist_get(event);
5006 if (err)
5007 return ERR_PTR(err);
5008 4948
5009 atomic_inc(&perf_swevent_enabled[event_id]); 4949 local64_set(&hwc->prev_count, now);
5010 event->destroy = sw_perf_event_destroy; 4950
4951 perf_swevent_start_hrtimer(event);
4952
4953 return 0;
4954}
4955
4956static void task_clock_event_disable(struct perf_event *event)
4957{
4958 perf_swevent_cancel_hrtimer(event);
4959 task_clock_event_update(event, event->ctx->time);
4960
4961}
4962
4963static void task_clock_event_read(struct perf_event *event)
4964{
4965 u64 time;
4966
4967 if (!in_nmi()) {
4968 update_context_time(event->ctx);
4969 time = event->ctx->time;
4970 } else {
4971 u64 now = perf_clock();
4972 u64 delta = now - event->ctx->timestamp;
4973 time = event->ctx->time + delta;
4974 }
4975
4976 task_clock_event_update(event, time);
4977}
4978
4979static int task_clock_event_init(struct perf_event *event)
4980{
4981 if (event->attr.type != PERF_TYPE_SOFTWARE)
4982 return -ENOENT;
4983
4984 if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK)
4985 return -ENOENT;
4986
4987 return 0;
4988}
4989
4990static struct pmu perf_task_clock = {
4991 .event_init = task_clock_event_init,
4992 .enable = task_clock_event_enable,
4993 .disable = task_clock_event_disable,
4994 .read = task_clock_event_read,
4995};
4996
4997static LIST_HEAD(pmus);
4998static DEFINE_MUTEX(pmus_lock);
4999static struct srcu_struct pmus_srcu;
5000
5001int perf_pmu_register(struct pmu *pmu)
5002{
5003 mutex_lock(&pmus_lock);
5004 list_add_rcu(&pmu->entry, &pmus);
5005 mutex_unlock(&pmus_lock);
5006
5007 return 0;
5008}
5009
5010void perf_pmu_unregister(struct pmu *pmu)
5011{
5012 mutex_lock(&pmus_lock);
5013 list_del_rcu(&pmu->entry);
5014 mutex_unlock(&pmus_lock);
5015
5016 synchronize_srcu(&pmus_srcu);
5017}
5018
5019struct pmu *perf_init_event(struct perf_event *event)
5020{
5021 struct pmu *pmu = NULL;
5022 int idx;
5023
5024 idx = srcu_read_lock(&pmus_srcu);
5025 list_for_each_entry_rcu(pmu, &pmus, entry) {
5026 int ret = pmu->event_init(event);
5027 if (!ret)
5028 break;
5029 if (ret != -ENOENT) {
5030 pmu = ERR_PTR(ret);
5031 break;
5011 } 5032 }
5012 pmu = &perf_ops_generic;
5013 break;
5014 } 5033 }
5034 srcu_read_unlock(&pmus_srcu, idx);
5015 5035
5016 return pmu; 5036 return pmu;
5017} 5037}
@@ -5092,29 +5112,8 @@ perf_event_alloc(struct perf_event_attr *attr,
5092 if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP)) 5112 if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP))
5093 goto done; 5113 goto done;
5094 5114
5095 switch (attr->type) { 5115 pmu = perf_init_event(event);
5096 case PERF_TYPE_RAW:
5097 case PERF_TYPE_HARDWARE:
5098 case PERF_TYPE_HW_CACHE:
5099 pmu = hw_perf_event_init(event);
5100 break;
5101
5102 case PERF_TYPE_SOFTWARE:
5103 pmu = sw_perf_event_init(event);
5104 break;
5105
5106 case PERF_TYPE_TRACEPOINT:
5107 pmu = tp_perf_event_init(event);
5108 break;
5109 5116
5110 case PERF_TYPE_BREAKPOINT:
5111 pmu = bp_perf_event_init(event);
5112 break;
5113
5114
5115 default:
5116 break;
5117 }
5118done: 5117done:
5119 err = 0; 5118 err = 0;
5120 if (!pmu) 5119 if (!pmu)
@@ -5979,22 +5978,15 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
5979 return NOTIFY_OK; 5978 return NOTIFY_OK;
5980} 5979}
5981 5980
5982/*
5983 * This has to have a higher priority than migration_notifier in sched.c.
5984 */
5985static struct notifier_block __cpuinitdata perf_cpu_nb = {
5986 .notifier_call = perf_cpu_notify,
5987 .priority = 20,
5988};
5989
5990void __init perf_event_init(void) 5981void __init perf_event_init(void)
5991{ 5982{
5992 perf_event_init_all_cpus(); 5983 perf_event_init_all_cpus();
5993 perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE, 5984 init_srcu_struct(&pmus_srcu);
5994 (void *)(long)smp_processor_id()); 5985 perf_pmu_register(&perf_swevent);
5995 perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE, 5986 perf_pmu_register(&perf_cpu_clock);
5996 (void *)(long)smp_processor_id()); 5987 perf_pmu_register(&perf_task_clock);
5997 register_cpu_notifier(&perf_cpu_nb); 5988 perf_tp_register();
5989 perf_cpu_notifier(perf_cpu_notify);
5998} 5990}
5999 5991
6000static ssize_t perf_show_reserve_percpu(struct sysdev_class *class, 5992static ssize_t perf_show_reserve_percpu(struct sysdev_class *class,