aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2010-06-11 07:35:08 -0400
committerIngo Molnar <mingo@elte.hu>2010-09-09 14:46:28 -0400
commitb0a873ebbf87bf38bf70b5e39a7cadc96099fa13 (patch)
tree63ab672b847aed295f99b9b2a3bbcfd5d3d35bd9 /kernel
parent51b0fe39549a04858001922919ab355dee9bdfcf (diff)
perf: Register PMU implementations
Simple registration interface for struct pmu, this provides the infrastructure for removing all the weak functions. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: paulus <paulus@samba.org> Cc: stephane eranian <eranian@googlemail.com> Cc: Robert Richter <robert.richter@amd.com> Cc: Will Deacon <will.deacon@arm.com> Cc: Paul Mundt <lethal@linux-sh.org> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Cyrill Gorcunov <gorcunov@gmail.com> Cc: Lin Ming <ming.m.lin@intel.com> Cc: Yanmin <yanmin_zhang@linux.intel.com> Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com> Cc: David Miller <davem@davemloft.net> Cc: Michael Cree <mcree@orcon.net.nz> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/hw_breakpoint.c35
-rw-r--r--kernel/perf_event.c588
2 files changed, 320 insertions, 303 deletions
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index d71a987fd2bf..e9c5cfa1fd20 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -565,6 +565,34 @@ static struct notifier_block hw_breakpoint_exceptions_nb = {
565 .priority = 0x7fffffff 565 .priority = 0x7fffffff
566}; 566};
567 567
568static void bp_perf_event_destroy(struct perf_event *event)
569{
570 release_bp_slot(event);
571}
572
573static int hw_breakpoint_event_init(struct perf_event *bp)
574{
575 int err;
576
577 if (bp->attr.type != PERF_TYPE_BREAKPOINT)
578 return -ENOENT;
579
580 err = register_perf_hw_breakpoint(bp);
581 if (err)
582 return err;
583
584 bp->destroy = bp_perf_event_destroy;
585
586 return 0;
587}
588
589static struct pmu perf_breakpoint = {
590 .event_init = hw_breakpoint_event_init,
591 .enable = arch_install_hw_breakpoint,
592 .disable = arch_uninstall_hw_breakpoint,
593 .read = hw_breakpoint_pmu_read,
594};
595
568static int __init init_hw_breakpoint(void) 596static int __init init_hw_breakpoint(void)
569{ 597{
570 unsigned int **task_bp_pinned; 598 unsigned int **task_bp_pinned;
@@ -586,6 +614,8 @@ static int __init init_hw_breakpoint(void)
586 614
587 constraints_initialized = 1; 615 constraints_initialized = 1;
588 616
617 perf_pmu_register(&perf_breakpoint);
618
589 return register_die_notifier(&hw_breakpoint_exceptions_nb); 619 return register_die_notifier(&hw_breakpoint_exceptions_nb);
590 620
591 err_alloc: 621 err_alloc:
@@ -601,8 +631,3 @@ static int __init init_hw_breakpoint(void)
601core_initcall(init_hw_breakpoint); 631core_initcall(init_hw_breakpoint);
602 632
603 633
604struct pmu perf_ops_bp = {
605 .enable = arch_install_hw_breakpoint,
606 .disable = arch_uninstall_hw_breakpoint,
607 .read = hw_breakpoint_pmu_read,
608};
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index fb46fd13f31f..288ce43de57c 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -31,7 +31,6 @@
31#include <linux/kernel_stat.h> 31#include <linux/kernel_stat.h>
32#include <linux/perf_event.h> 32#include <linux/perf_event.h>
33#include <linux/ftrace_event.h> 33#include <linux/ftrace_event.h>
34#include <linux/hw_breakpoint.h>
35 34
36#include <asm/irq_regs.h> 35#include <asm/irq_regs.h>
37 36
@@ -72,14 +71,6 @@ static atomic64_t perf_event_id;
72 */ 71 */
73static DEFINE_SPINLOCK(perf_resource_lock); 72static DEFINE_SPINLOCK(perf_resource_lock);
74 73
75/*
76 * Architecture provided APIs - weak aliases:
77 */
78extern __weak struct pmu *hw_perf_event_init(struct perf_event *event)
79{
80 return NULL;
81}
82
83void __weak hw_perf_disable(void) { barrier(); } 74void __weak hw_perf_disable(void) { barrier(); }
84void __weak hw_perf_enable(void) { barrier(); } 75void __weak hw_perf_enable(void) { barrier(); }
85 76
@@ -4501,182 +4492,6 @@ static int perf_swevent_int(struct perf_event *event)
4501 return 0; 4492 return 0;
4502} 4493}
4503 4494
4504static struct pmu perf_ops_generic = {
4505 .enable = perf_swevent_enable,
4506 .disable = perf_swevent_disable,
4507 .start = perf_swevent_int,
4508 .stop = perf_swevent_void,
4509 .read = perf_swevent_read,
4510 .unthrottle = perf_swevent_void, /* hwc->interrupts already reset */
4511};
4512
4513/*
4514 * hrtimer based swevent callback
4515 */
4516
4517static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
4518{
4519 enum hrtimer_restart ret = HRTIMER_RESTART;
4520 struct perf_sample_data data;
4521 struct pt_regs *regs;
4522 struct perf_event *event;
4523 u64 period;
4524
4525 event = container_of(hrtimer, struct perf_event, hw.hrtimer);
4526 event->pmu->read(event);
4527
4528 perf_sample_data_init(&data, 0);
4529 data.period = event->hw.last_period;
4530 regs = get_irq_regs();
4531
4532 if (regs && !perf_exclude_event(event, regs)) {
4533 if (!(event->attr.exclude_idle && current->pid == 0))
4534 if (perf_event_overflow(event, 0, &data, regs))
4535 ret = HRTIMER_NORESTART;
4536 }
4537
4538 period = max_t(u64, 10000, event->hw.sample_period);
4539 hrtimer_forward_now(hrtimer, ns_to_ktime(period));
4540
4541 return ret;
4542}
4543
4544static void perf_swevent_start_hrtimer(struct perf_event *event)
4545{
4546 struct hw_perf_event *hwc = &event->hw;
4547
4548 hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
4549 hwc->hrtimer.function = perf_swevent_hrtimer;
4550 if (hwc->sample_period) {
4551 u64 period;
4552
4553 if (hwc->remaining) {
4554 if (hwc->remaining < 0)
4555 period = 10000;
4556 else
4557 period = hwc->remaining;
4558 hwc->remaining = 0;
4559 } else {
4560 period = max_t(u64, 10000, hwc->sample_period);
4561 }
4562 __hrtimer_start_range_ns(&hwc->hrtimer,
4563 ns_to_ktime(period), 0,
4564 HRTIMER_MODE_REL, 0);
4565 }
4566}
4567
4568static void perf_swevent_cancel_hrtimer(struct perf_event *event)
4569{
4570 struct hw_perf_event *hwc = &event->hw;
4571
4572 if (hwc->sample_period) {
4573 ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer);
4574 hwc->remaining = ktime_to_ns(remaining);
4575
4576 hrtimer_cancel(&hwc->hrtimer);
4577 }
4578}
4579
4580/*
4581 * Software event: cpu wall time clock
4582 */
4583
4584static void cpu_clock_perf_event_update(struct perf_event *event)
4585{
4586 int cpu = raw_smp_processor_id();
4587 s64 prev;
4588 u64 now;
4589
4590 now = cpu_clock(cpu);
4591 prev = local64_xchg(&event->hw.prev_count, now);
4592 local64_add(now - prev, &event->count);
4593}
4594
4595static int cpu_clock_perf_event_enable(struct perf_event *event)
4596{
4597 struct hw_perf_event *hwc = &event->hw;
4598 int cpu = raw_smp_processor_id();
4599
4600 local64_set(&hwc->prev_count, cpu_clock(cpu));
4601 perf_swevent_start_hrtimer(event);
4602
4603 return 0;
4604}
4605
4606static void cpu_clock_perf_event_disable(struct perf_event *event)
4607{
4608 perf_swevent_cancel_hrtimer(event);
4609 cpu_clock_perf_event_update(event);
4610}
4611
4612static void cpu_clock_perf_event_read(struct perf_event *event)
4613{
4614 cpu_clock_perf_event_update(event);
4615}
4616
4617static struct pmu perf_ops_cpu_clock = {
4618 .enable = cpu_clock_perf_event_enable,
4619 .disable = cpu_clock_perf_event_disable,
4620 .read = cpu_clock_perf_event_read,
4621};
4622
4623/*
4624 * Software event: task time clock
4625 */
4626
4627static void task_clock_perf_event_update(struct perf_event *event, u64 now)
4628{
4629 u64 prev;
4630 s64 delta;
4631
4632 prev = local64_xchg(&event->hw.prev_count, now);
4633 delta = now - prev;
4634 local64_add(delta, &event->count);
4635}
4636
4637static int task_clock_perf_event_enable(struct perf_event *event)
4638{
4639 struct hw_perf_event *hwc = &event->hw;
4640 u64 now;
4641
4642 now = event->ctx->time;
4643
4644 local64_set(&hwc->prev_count, now);
4645
4646 perf_swevent_start_hrtimer(event);
4647
4648 return 0;
4649}
4650
4651static void task_clock_perf_event_disable(struct perf_event *event)
4652{
4653 perf_swevent_cancel_hrtimer(event);
4654 task_clock_perf_event_update(event, event->ctx->time);
4655
4656}
4657
4658static void task_clock_perf_event_read(struct perf_event *event)
4659{
4660 u64 time;
4661
4662 if (!in_nmi()) {
4663 update_context_time(event->ctx);
4664 time = event->ctx->time;
4665 } else {
4666 u64 now = perf_clock();
4667 u64 delta = now - event->ctx->timestamp;
4668 time = event->ctx->time + delta;
4669 }
4670
4671 task_clock_perf_event_update(event, time);
4672}
4673
4674static struct pmu perf_ops_task_clock = {
4675 .enable = task_clock_perf_event_enable,
4676 .disable = task_clock_perf_event_disable,
4677 .read = task_clock_perf_event_read,
4678};
4679
4680/* Deref the hlist from the update side */ 4495/* Deref the hlist from the update side */
4681static inline struct swevent_hlist * 4496static inline struct swevent_hlist *
4682swevent_hlist_deref(struct perf_cpu_context *cpuctx) 4497swevent_hlist_deref(struct perf_cpu_context *cpuctx)
@@ -4783,17 +4598,63 @@ static int swevent_hlist_get(struct perf_event *event)
4783 return err; 4598 return err;
4784} 4599}
4785 4600
4786#ifdef CONFIG_EVENT_TRACING 4601atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
4787 4602
4788static struct pmu perf_ops_tracepoint = { 4603static void sw_perf_event_destroy(struct perf_event *event)
4789 .enable = perf_trace_enable, 4604{
4790 .disable = perf_trace_disable, 4605 u64 event_id = event->attr.config;
4606
4607 WARN_ON(event->parent);
4608
4609 atomic_dec(&perf_swevent_enabled[event_id]);
4610 swevent_hlist_put(event);
4611}
4612
4613static int perf_swevent_init(struct perf_event *event)
4614{
4615 int event_id = event->attr.config;
4616
4617 if (event->attr.type != PERF_TYPE_SOFTWARE)
4618 return -ENOENT;
4619
4620 switch (event_id) {
4621 case PERF_COUNT_SW_CPU_CLOCK:
4622 case PERF_COUNT_SW_TASK_CLOCK:
4623 return -ENOENT;
4624
4625 default:
4626 break;
4627 }
4628
4629 if (event_id > PERF_COUNT_SW_MAX)
4630 return -ENOENT;
4631
4632 if (!event->parent) {
4633 int err;
4634
4635 err = swevent_hlist_get(event);
4636 if (err)
4637 return err;
4638
4639 atomic_inc(&perf_swevent_enabled[event_id]);
4640 event->destroy = sw_perf_event_destroy;
4641 }
4642
4643 return 0;
4644}
4645
4646static struct pmu perf_swevent = {
4647 .event_init = perf_swevent_init,
4648 .enable = perf_swevent_enable,
4649 .disable = perf_swevent_disable,
4791 .start = perf_swevent_int, 4650 .start = perf_swevent_int,
4792 .stop = perf_swevent_void, 4651 .stop = perf_swevent_void,
4793 .read = perf_swevent_read, 4652 .read = perf_swevent_read,
4794 .unthrottle = perf_swevent_void, 4653 .unthrottle = perf_swevent_void, /* hwc->interrupts already reset */
4795}; 4654};
4796 4655
4656#ifdef CONFIG_EVENT_TRACING
4657
4797static int perf_tp_filter_match(struct perf_event *event, 4658static int perf_tp_filter_match(struct perf_event *event,
4798 struct perf_sample_data *data) 4659 struct perf_sample_data *data)
4799{ 4660{
@@ -4849,10 +4710,13 @@ static void tp_perf_event_destroy(struct perf_event *event)
4849 perf_trace_destroy(event); 4710 perf_trace_destroy(event);
4850} 4711}
4851 4712
4852static struct pmu *tp_perf_event_init(struct perf_event *event) 4713static int perf_tp_event_init(struct perf_event *event)
4853{ 4714{
4854 int err; 4715 int err;
4855 4716
4717 if (event->attr.type != PERF_TYPE_TRACEPOINT)
4718 return -ENOENT;
4719
4856 /* 4720 /*
4857 * Raw tracepoint data is a severe data leak, only allow root to 4721 * Raw tracepoint data is a severe data leak, only allow root to
4858 * have these. 4722 * have these.
@@ -4860,15 +4724,30 @@ static struct pmu *tp_perf_event_init(struct perf_event *event)
4860 if ((event->attr.sample_type & PERF_SAMPLE_RAW) && 4724 if ((event->attr.sample_type & PERF_SAMPLE_RAW) &&
4861 perf_paranoid_tracepoint_raw() && 4725 perf_paranoid_tracepoint_raw() &&
4862 !capable(CAP_SYS_ADMIN)) 4726 !capable(CAP_SYS_ADMIN))
4863 return ERR_PTR(-EPERM); 4727 return -EPERM;
4864 4728
4865 err = perf_trace_init(event); 4729 err = perf_trace_init(event);
4866 if (err) 4730 if (err)
4867 return NULL; 4731 return err;
4868 4732
4869 event->destroy = tp_perf_event_destroy; 4733 event->destroy = tp_perf_event_destroy;
4870 4734
4871 return &perf_ops_tracepoint; 4735 return 0;
4736}
4737
4738static struct pmu perf_tracepoint = {
4739 .event_init = perf_tp_event_init,
4740 .enable = perf_trace_enable,
4741 .disable = perf_trace_disable,
4742 .start = perf_swevent_int,
4743 .stop = perf_swevent_void,
4744 .read = perf_swevent_read,
4745 .unthrottle = perf_swevent_void,
4746};
4747
4748static inline void perf_tp_register(void)
4749{
4750 perf_pmu_register(&perf_tracepoint);
4872} 4751}
4873 4752
4874static int perf_event_set_filter(struct perf_event *event, void __user *arg) 4753static int perf_event_set_filter(struct perf_event *event, void __user *arg)
@@ -4896,9 +4775,8 @@ static void perf_event_free_filter(struct perf_event *event)
4896 4775
4897#else 4776#else
4898 4777
4899static struct pmu *tp_perf_event_init(struct perf_event *event) 4778static inline void perf_tp_register(void)
4900{ 4779{
4901 return NULL;
4902} 4780}
4903 4781
4904static int perf_event_set_filter(struct perf_event *event, void __user *arg) 4782static int perf_event_set_filter(struct perf_event *event, void __user *arg)
@@ -4913,105 +4791,247 @@ static void perf_event_free_filter(struct perf_event *event)
4913#endif /* CONFIG_EVENT_TRACING */ 4791#endif /* CONFIG_EVENT_TRACING */
4914 4792
4915#ifdef CONFIG_HAVE_HW_BREAKPOINT 4793#ifdef CONFIG_HAVE_HW_BREAKPOINT
4916static void bp_perf_event_destroy(struct perf_event *event) 4794void perf_bp_event(struct perf_event *bp, void *data)
4917{ 4795{
4918 release_bp_slot(event); 4796 struct perf_sample_data sample;
4797 struct pt_regs *regs = data;
4798
4799 perf_sample_data_init(&sample, bp->attr.bp_addr);
4800
4801 if (!perf_exclude_event(bp, regs))
4802 perf_swevent_add(bp, 1, 1, &sample, regs);
4919} 4803}
4804#endif
4805
4806/*
4807 * hrtimer based swevent callback
4808 */
4920 4809
4921static struct pmu *bp_perf_event_init(struct perf_event *bp) 4810static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
4922{ 4811{
4923 int err; 4812 enum hrtimer_restart ret = HRTIMER_RESTART;
4813 struct perf_sample_data data;
4814 struct pt_regs *regs;
4815 struct perf_event *event;
4816 u64 period;
4924 4817
4925 err = register_perf_hw_breakpoint(bp); 4818 event = container_of(hrtimer, struct perf_event, hw.hrtimer);
4926 if (err) 4819 event->pmu->read(event);
4927 return ERR_PTR(err); 4820
4821 perf_sample_data_init(&data, 0);
4822 data.period = event->hw.last_period;
4823 regs = get_irq_regs();
4824
4825 if (regs && !perf_exclude_event(event, regs)) {
4826 if (!(event->attr.exclude_idle && current->pid == 0))
4827 if (perf_event_overflow(event, 0, &data, regs))
4828 ret = HRTIMER_NORESTART;
4829 }
4928 4830
4929 bp->destroy = bp_perf_event_destroy; 4831 period = max_t(u64, 10000, event->hw.sample_period);
4832 hrtimer_forward_now(hrtimer, ns_to_ktime(period));
4930 4833
4931 return &perf_ops_bp; 4834 return ret;
4932} 4835}
4933 4836
4934void perf_bp_event(struct perf_event *bp, void *data) 4837static void perf_swevent_start_hrtimer(struct perf_event *event)
4935{ 4838{
4936 struct perf_sample_data sample; 4839 struct hw_perf_event *hwc = &event->hw;
4937 struct pt_regs *regs = data;
4938 4840
4939 perf_sample_data_init(&sample, bp->attr.bp_addr); 4841 hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
4842 hwc->hrtimer.function = perf_swevent_hrtimer;
4843 if (hwc->sample_period) {
4844 u64 period;
4940 4845
4941 if (!perf_exclude_event(bp, regs)) 4846 if (hwc->remaining) {
4942 perf_swevent_add(bp, 1, 1, &sample, regs); 4847 if (hwc->remaining < 0)
4848 period = 10000;
4849 else
4850 period = hwc->remaining;
4851 hwc->remaining = 0;
4852 } else {
4853 period = max_t(u64, 10000, hwc->sample_period);
4854 }
4855 __hrtimer_start_range_ns(&hwc->hrtimer,
4856 ns_to_ktime(period), 0,
4857 HRTIMER_MODE_REL, 0);
4858 }
4943} 4859}
4944#else 4860
4945static struct pmu *bp_perf_event_init(struct perf_event *bp) 4861static void perf_swevent_cancel_hrtimer(struct perf_event *event)
4946{ 4862{
4947 return NULL; 4863 struct hw_perf_event *hwc = &event->hw;
4864
4865 if (hwc->sample_period) {
4866 ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer);
4867 hwc->remaining = ktime_to_ns(remaining);
4868
4869 hrtimer_cancel(&hwc->hrtimer);
4870 }
4948} 4871}
4949 4872
4950void perf_bp_event(struct perf_event *bp, void *regs) 4873/*
4874 * Software event: cpu wall time clock
4875 */
4876
4877static void cpu_clock_event_update(struct perf_event *event)
4951{ 4878{
4879 int cpu = raw_smp_processor_id();
4880 s64 prev;
4881 u64 now;
4882
4883 now = cpu_clock(cpu);
4884 prev = local64_xchg(&event->hw.prev_count, now);
4885 local64_add(now - prev, &event->count);
4952} 4886}
4953#endif
4954 4887
4955atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; 4888static int cpu_clock_event_enable(struct perf_event *event)
4889{
4890 struct hw_perf_event *hwc = &event->hw;
4891 int cpu = raw_smp_processor_id();
4956 4892
4957static void sw_perf_event_destroy(struct perf_event *event) 4893 local64_set(&hwc->prev_count, cpu_clock(cpu));
4894 perf_swevent_start_hrtimer(event);
4895
4896 return 0;
4897}
4898
4899static void cpu_clock_event_disable(struct perf_event *event)
4958{ 4900{
4959 u64 event_id = event->attr.config; 4901 perf_swevent_cancel_hrtimer(event);
4902 cpu_clock_event_update(event);
4903}
4960 4904
4961 WARN_ON(event->parent); 4905static void cpu_clock_event_read(struct perf_event *event)
4906{
4907 cpu_clock_event_update(event);
4908}
4962 4909
4963 atomic_dec(&perf_swevent_enabled[event_id]); 4910static int cpu_clock_event_init(struct perf_event *event)
4964 swevent_hlist_put(event); 4911{
4912 if (event->attr.type != PERF_TYPE_SOFTWARE)
4913 return -ENOENT;
4914
4915 if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK)
4916 return -ENOENT;
4917
4918 return 0;
4965} 4919}
4966 4920
4967static struct pmu *sw_perf_event_init(struct perf_event *event) 4921static struct pmu perf_cpu_clock = {
4922 .event_init = cpu_clock_event_init,
4923 .enable = cpu_clock_event_enable,
4924 .disable = cpu_clock_event_disable,
4925 .read = cpu_clock_event_read,
4926};
4927
4928/*
4929 * Software event: task time clock
4930 */
4931
4932static void task_clock_event_update(struct perf_event *event, u64 now)
4968{ 4933{
4969 struct pmu *pmu = NULL; 4934 u64 prev;
4970 u64 event_id = event->attr.config; 4935 s64 delta;
4971 4936
4972 /* 4937 prev = local64_xchg(&event->hw.prev_count, now);
4973 * Software events (currently) can't in general distinguish 4938 delta = now - prev;
4974 * between user, kernel and hypervisor events. 4939 local64_add(delta, &event->count);
4975 * However, context switches and cpu migrations are considered 4940}
4976 * to be kernel events, and page faults are never hypervisor
4977 * events.
4978 */
4979 switch (event_id) {
4980 case PERF_COUNT_SW_CPU_CLOCK:
4981 pmu = &perf_ops_cpu_clock;
4982 4941
4983 break; 4942static int task_clock_event_enable(struct perf_event *event)
4984 case PERF_COUNT_SW_TASK_CLOCK: 4943{
4985 /* 4944 struct hw_perf_event *hwc = &event->hw;
4986 * If the user instantiates this as a per-cpu event, 4945 u64 now;
4987 * use the cpu_clock event instead.
4988 */
4989 if (event->ctx->task)
4990 pmu = &perf_ops_task_clock;
4991 else
4992 pmu = &perf_ops_cpu_clock;
4993 4946
4994 break; 4947 now = event->ctx->time;
4995 case PERF_COUNT_SW_PAGE_FAULTS:
4996 case PERF_COUNT_SW_PAGE_FAULTS_MIN:
4997 case PERF_COUNT_SW_PAGE_FAULTS_MAJ:
4998 case PERF_COUNT_SW_CONTEXT_SWITCHES:
4999 case PERF_COUNT_SW_CPU_MIGRATIONS:
5000 case PERF_COUNT_SW_ALIGNMENT_FAULTS:
5001 case PERF_COUNT_SW_EMULATION_FAULTS:
5002 if (!event->parent) {
5003 int err;
5004
5005 err = swevent_hlist_get(event);
5006 if (err)
5007 return ERR_PTR(err);
5008 4948
5009 atomic_inc(&perf_swevent_enabled[event_id]); 4949 local64_set(&hwc->prev_count, now);
5010 event->destroy = sw_perf_event_destroy; 4950
4951 perf_swevent_start_hrtimer(event);
4952
4953 return 0;
4954}
4955
4956static void task_clock_event_disable(struct perf_event *event)
4957{
4958 perf_swevent_cancel_hrtimer(event);
4959 task_clock_event_update(event, event->ctx->time);
4960
4961}
4962
4963static void task_clock_event_read(struct perf_event *event)
4964{
4965 u64 time;
4966
4967 if (!in_nmi()) {
4968 update_context_time(event->ctx);
4969 time = event->ctx->time;
4970 } else {
4971 u64 now = perf_clock();
4972 u64 delta = now - event->ctx->timestamp;
4973 time = event->ctx->time + delta;
4974 }
4975
4976 task_clock_event_update(event, time);
4977}
4978
4979static int task_clock_event_init(struct perf_event *event)
4980{
4981 if (event->attr.type != PERF_TYPE_SOFTWARE)
4982 return -ENOENT;
4983
4984 if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK)
4985 return -ENOENT;
4986
4987 return 0;
4988}
4989
4990static struct pmu perf_task_clock = {
4991 .event_init = task_clock_event_init,
4992 .enable = task_clock_event_enable,
4993 .disable = task_clock_event_disable,
4994 .read = task_clock_event_read,
4995};
4996
4997static LIST_HEAD(pmus);
4998static DEFINE_MUTEX(pmus_lock);
4999static struct srcu_struct pmus_srcu;
5000
5001int perf_pmu_register(struct pmu *pmu)
5002{
5003 mutex_lock(&pmus_lock);
5004 list_add_rcu(&pmu->entry, &pmus);
5005 mutex_unlock(&pmus_lock);
5006
5007 return 0;
5008}
5009
5010void perf_pmu_unregister(struct pmu *pmu)
5011{
5012 mutex_lock(&pmus_lock);
5013 list_del_rcu(&pmu->entry);
5014 mutex_unlock(&pmus_lock);
5015
5016 synchronize_srcu(&pmus_srcu);
5017}
5018
5019struct pmu *perf_init_event(struct perf_event *event)
5020{
5021 struct pmu *pmu = NULL;
5022 int idx;
5023
5024 idx = srcu_read_lock(&pmus_srcu);
5025 list_for_each_entry_rcu(pmu, &pmus, entry) {
5026 int ret = pmu->event_init(event);
5027 if (!ret)
5028 break;
5029 if (ret != -ENOENT) {
5030 pmu = ERR_PTR(ret);
5031 break;
5011 } 5032 }
5012 pmu = &perf_ops_generic;
5013 break;
5014 } 5033 }
5034 srcu_read_unlock(&pmus_srcu, idx);
5015 5035
5016 return pmu; 5036 return pmu;
5017} 5037}
@@ -5092,29 +5112,8 @@ perf_event_alloc(struct perf_event_attr *attr,
5092 if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP)) 5112 if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP))
5093 goto done; 5113 goto done;
5094 5114
5095 switch (attr->type) { 5115 pmu = perf_init_event(event);
5096 case PERF_TYPE_RAW:
5097 case PERF_TYPE_HARDWARE:
5098 case PERF_TYPE_HW_CACHE:
5099 pmu = hw_perf_event_init(event);
5100 break;
5101
5102 case PERF_TYPE_SOFTWARE:
5103 pmu = sw_perf_event_init(event);
5104 break;
5105
5106 case PERF_TYPE_TRACEPOINT:
5107 pmu = tp_perf_event_init(event);
5108 break;
5109 5116
5110 case PERF_TYPE_BREAKPOINT:
5111 pmu = bp_perf_event_init(event);
5112 break;
5113
5114
5115 default:
5116 break;
5117 }
5118done: 5117done:
5119 err = 0; 5118 err = 0;
5120 if (!pmu) 5119 if (!pmu)
@@ -5979,22 +5978,15 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
5979 return NOTIFY_OK; 5978 return NOTIFY_OK;
5980} 5979}
5981 5980
5982/*
5983 * This has to have a higher priority than migration_notifier in sched.c.
5984 */
5985static struct notifier_block __cpuinitdata perf_cpu_nb = {
5986 .notifier_call = perf_cpu_notify,
5987 .priority = 20,
5988};
5989
5990void __init perf_event_init(void) 5981void __init perf_event_init(void)
5991{ 5982{
5992 perf_event_init_all_cpus(); 5983 perf_event_init_all_cpus();
5993 perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE, 5984 init_srcu_struct(&pmus_srcu);
5994 (void *)(long)smp_processor_id()); 5985 perf_pmu_register(&perf_swevent);
5995 perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE, 5986 perf_pmu_register(&perf_cpu_clock);
5996 (void *)(long)smp_processor_id()); 5987 perf_pmu_register(&perf_task_clock);
5997 register_cpu_notifier(&perf_cpu_nb); 5988 perf_tp_register();
5989 perf_cpu_notifier(perf_cpu_notify);
5998} 5990}
5999 5991
6000static ssize_t perf_show_reserve_percpu(struct sysdev_class *class, 5992static ssize_t perf_show_reserve_percpu(struct sysdev_class *class,