diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2010-06-11 07:35:08 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-09-09 14:46:28 -0400 |
commit | b0a873ebbf87bf38bf70b5e39a7cadc96099fa13 (patch) | |
tree | 63ab672b847aed295f99b9b2a3bbcfd5d3d35bd9 /kernel/perf_event.c | |
parent | 51b0fe39549a04858001922919ab355dee9bdfcf (diff) |
perf: Register PMU implementations
Simple registration interface for struct pmu, this provides the
infrastructure for removing all the weak functions.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Michael Cree <mcree@orcon.net.nz>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/perf_event.c')
-rw-r--r-- | kernel/perf_event.c | 588 |
1 files changed, 290 insertions, 298 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index fb46fd13f31f..288ce43de57c 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -31,7 +31,6 @@ | |||
31 | #include <linux/kernel_stat.h> | 31 | #include <linux/kernel_stat.h> |
32 | #include <linux/perf_event.h> | 32 | #include <linux/perf_event.h> |
33 | #include <linux/ftrace_event.h> | 33 | #include <linux/ftrace_event.h> |
34 | #include <linux/hw_breakpoint.h> | ||
35 | 34 | ||
36 | #include <asm/irq_regs.h> | 35 | #include <asm/irq_regs.h> |
37 | 36 | ||
@@ -72,14 +71,6 @@ static atomic64_t perf_event_id; | |||
72 | */ | 71 | */ |
73 | static DEFINE_SPINLOCK(perf_resource_lock); | 72 | static DEFINE_SPINLOCK(perf_resource_lock); |
74 | 73 | ||
75 | /* | ||
76 | * Architecture provided APIs - weak aliases: | ||
77 | */ | ||
78 | extern __weak struct pmu *hw_perf_event_init(struct perf_event *event) | ||
79 | { | ||
80 | return NULL; | ||
81 | } | ||
82 | |||
83 | void __weak hw_perf_disable(void) { barrier(); } | 74 | void __weak hw_perf_disable(void) { barrier(); } |
84 | void __weak hw_perf_enable(void) { barrier(); } | 75 | void __weak hw_perf_enable(void) { barrier(); } |
85 | 76 | ||
@@ -4501,182 +4492,6 @@ static int perf_swevent_int(struct perf_event *event) | |||
4501 | return 0; | 4492 | return 0; |
4502 | } | 4493 | } |
4503 | 4494 | ||
4504 | static struct pmu perf_ops_generic = { | ||
4505 | .enable = perf_swevent_enable, | ||
4506 | .disable = perf_swevent_disable, | ||
4507 | .start = perf_swevent_int, | ||
4508 | .stop = perf_swevent_void, | ||
4509 | .read = perf_swevent_read, | ||
4510 | .unthrottle = perf_swevent_void, /* hwc->interrupts already reset */ | ||
4511 | }; | ||
4512 | |||
4513 | /* | ||
4514 | * hrtimer based swevent callback | ||
4515 | */ | ||
4516 | |||
4517 | static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) | ||
4518 | { | ||
4519 | enum hrtimer_restart ret = HRTIMER_RESTART; | ||
4520 | struct perf_sample_data data; | ||
4521 | struct pt_regs *regs; | ||
4522 | struct perf_event *event; | ||
4523 | u64 period; | ||
4524 | |||
4525 | event = container_of(hrtimer, struct perf_event, hw.hrtimer); | ||
4526 | event->pmu->read(event); | ||
4527 | |||
4528 | perf_sample_data_init(&data, 0); | ||
4529 | data.period = event->hw.last_period; | ||
4530 | regs = get_irq_regs(); | ||
4531 | |||
4532 | if (regs && !perf_exclude_event(event, regs)) { | ||
4533 | if (!(event->attr.exclude_idle && current->pid == 0)) | ||
4534 | if (perf_event_overflow(event, 0, &data, regs)) | ||
4535 | ret = HRTIMER_NORESTART; | ||
4536 | } | ||
4537 | |||
4538 | period = max_t(u64, 10000, event->hw.sample_period); | ||
4539 | hrtimer_forward_now(hrtimer, ns_to_ktime(period)); | ||
4540 | |||
4541 | return ret; | ||
4542 | } | ||
4543 | |||
4544 | static void perf_swevent_start_hrtimer(struct perf_event *event) | ||
4545 | { | ||
4546 | struct hw_perf_event *hwc = &event->hw; | ||
4547 | |||
4548 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
4549 | hwc->hrtimer.function = perf_swevent_hrtimer; | ||
4550 | if (hwc->sample_period) { | ||
4551 | u64 period; | ||
4552 | |||
4553 | if (hwc->remaining) { | ||
4554 | if (hwc->remaining < 0) | ||
4555 | period = 10000; | ||
4556 | else | ||
4557 | period = hwc->remaining; | ||
4558 | hwc->remaining = 0; | ||
4559 | } else { | ||
4560 | period = max_t(u64, 10000, hwc->sample_period); | ||
4561 | } | ||
4562 | __hrtimer_start_range_ns(&hwc->hrtimer, | ||
4563 | ns_to_ktime(period), 0, | ||
4564 | HRTIMER_MODE_REL, 0); | ||
4565 | } | ||
4566 | } | ||
4567 | |||
4568 | static void perf_swevent_cancel_hrtimer(struct perf_event *event) | ||
4569 | { | ||
4570 | struct hw_perf_event *hwc = &event->hw; | ||
4571 | |||
4572 | if (hwc->sample_period) { | ||
4573 | ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); | ||
4574 | hwc->remaining = ktime_to_ns(remaining); | ||
4575 | |||
4576 | hrtimer_cancel(&hwc->hrtimer); | ||
4577 | } | ||
4578 | } | ||
4579 | |||
4580 | /* | ||
4581 | * Software event: cpu wall time clock | ||
4582 | */ | ||
4583 | |||
4584 | static void cpu_clock_perf_event_update(struct perf_event *event) | ||
4585 | { | ||
4586 | int cpu = raw_smp_processor_id(); | ||
4587 | s64 prev; | ||
4588 | u64 now; | ||
4589 | |||
4590 | now = cpu_clock(cpu); | ||
4591 | prev = local64_xchg(&event->hw.prev_count, now); | ||
4592 | local64_add(now - prev, &event->count); | ||
4593 | } | ||
4594 | |||
4595 | static int cpu_clock_perf_event_enable(struct perf_event *event) | ||
4596 | { | ||
4597 | struct hw_perf_event *hwc = &event->hw; | ||
4598 | int cpu = raw_smp_processor_id(); | ||
4599 | |||
4600 | local64_set(&hwc->prev_count, cpu_clock(cpu)); | ||
4601 | perf_swevent_start_hrtimer(event); | ||
4602 | |||
4603 | return 0; | ||
4604 | } | ||
4605 | |||
4606 | static void cpu_clock_perf_event_disable(struct perf_event *event) | ||
4607 | { | ||
4608 | perf_swevent_cancel_hrtimer(event); | ||
4609 | cpu_clock_perf_event_update(event); | ||
4610 | } | ||
4611 | |||
4612 | static void cpu_clock_perf_event_read(struct perf_event *event) | ||
4613 | { | ||
4614 | cpu_clock_perf_event_update(event); | ||
4615 | } | ||
4616 | |||
4617 | static struct pmu perf_ops_cpu_clock = { | ||
4618 | .enable = cpu_clock_perf_event_enable, | ||
4619 | .disable = cpu_clock_perf_event_disable, | ||
4620 | .read = cpu_clock_perf_event_read, | ||
4621 | }; | ||
4622 | |||
4623 | /* | ||
4624 | * Software event: task time clock | ||
4625 | */ | ||
4626 | |||
4627 | static void task_clock_perf_event_update(struct perf_event *event, u64 now) | ||
4628 | { | ||
4629 | u64 prev; | ||
4630 | s64 delta; | ||
4631 | |||
4632 | prev = local64_xchg(&event->hw.prev_count, now); | ||
4633 | delta = now - prev; | ||
4634 | local64_add(delta, &event->count); | ||
4635 | } | ||
4636 | |||
4637 | static int task_clock_perf_event_enable(struct perf_event *event) | ||
4638 | { | ||
4639 | struct hw_perf_event *hwc = &event->hw; | ||
4640 | u64 now; | ||
4641 | |||
4642 | now = event->ctx->time; | ||
4643 | |||
4644 | local64_set(&hwc->prev_count, now); | ||
4645 | |||
4646 | perf_swevent_start_hrtimer(event); | ||
4647 | |||
4648 | return 0; | ||
4649 | } | ||
4650 | |||
4651 | static void task_clock_perf_event_disable(struct perf_event *event) | ||
4652 | { | ||
4653 | perf_swevent_cancel_hrtimer(event); | ||
4654 | task_clock_perf_event_update(event, event->ctx->time); | ||
4655 | |||
4656 | } | ||
4657 | |||
4658 | static void task_clock_perf_event_read(struct perf_event *event) | ||
4659 | { | ||
4660 | u64 time; | ||
4661 | |||
4662 | if (!in_nmi()) { | ||
4663 | update_context_time(event->ctx); | ||
4664 | time = event->ctx->time; | ||
4665 | } else { | ||
4666 | u64 now = perf_clock(); | ||
4667 | u64 delta = now - event->ctx->timestamp; | ||
4668 | time = event->ctx->time + delta; | ||
4669 | } | ||
4670 | |||
4671 | task_clock_perf_event_update(event, time); | ||
4672 | } | ||
4673 | |||
4674 | static struct pmu perf_ops_task_clock = { | ||
4675 | .enable = task_clock_perf_event_enable, | ||
4676 | .disable = task_clock_perf_event_disable, | ||
4677 | .read = task_clock_perf_event_read, | ||
4678 | }; | ||
4679 | |||
4680 | /* Deref the hlist from the update side */ | 4495 | /* Deref the hlist from the update side */ |
4681 | static inline struct swevent_hlist * | 4496 | static inline struct swevent_hlist * |
4682 | swevent_hlist_deref(struct perf_cpu_context *cpuctx) | 4497 | swevent_hlist_deref(struct perf_cpu_context *cpuctx) |
@@ -4783,17 +4598,63 @@ static int swevent_hlist_get(struct perf_event *event) | |||
4783 | return err; | 4598 | return err; |
4784 | } | 4599 | } |
4785 | 4600 | ||
4786 | #ifdef CONFIG_EVENT_TRACING | 4601 | atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; |
4787 | 4602 | ||
4788 | static struct pmu perf_ops_tracepoint = { | 4603 | static void sw_perf_event_destroy(struct perf_event *event) |
4789 | .enable = perf_trace_enable, | 4604 | { |
4790 | .disable = perf_trace_disable, | 4605 | u64 event_id = event->attr.config; |
4606 | |||
4607 | WARN_ON(event->parent); | ||
4608 | |||
4609 | atomic_dec(&perf_swevent_enabled[event_id]); | ||
4610 | swevent_hlist_put(event); | ||
4611 | } | ||
4612 | |||
4613 | static int perf_swevent_init(struct perf_event *event) | ||
4614 | { | ||
4615 | int event_id = event->attr.config; | ||
4616 | |||
4617 | if (event->attr.type != PERF_TYPE_SOFTWARE) | ||
4618 | return -ENOENT; | ||
4619 | |||
4620 | switch (event_id) { | ||
4621 | case PERF_COUNT_SW_CPU_CLOCK: | ||
4622 | case PERF_COUNT_SW_TASK_CLOCK: | ||
4623 | return -ENOENT; | ||
4624 | |||
4625 | default: | ||
4626 | break; | ||
4627 | } | ||
4628 | |||
4629 | if (event_id > PERF_COUNT_SW_MAX) | ||
4630 | return -ENOENT; | ||
4631 | |||
4632 | if (!event->parent) { | ||
4633 | int err; | ||
4634 | |||
4635 | err = swevent_hlist_get(event); | ||
4636 | if (err) | ||
4637 | return err; | ||
4638 | |||
4639 | atomic_inc(&perf_swevent_enabled[event_id]); | ||
4640 | event->destroy = sw_perf_event_destroy; | ||
4641 | } | ||
4642 | |||
4643 | return 0; | ||
4644 | } | ||
4645 | |||
4646 | static struct pmu perf_swevent = { | ||
4647 | .event_init = perf_swevent_init, | ||
4648 | .enable = perf_swevent_enable, | ||
4649 | .disable = perf_swevent_disable, | ||
4791 | .start = perf_swevent_int, | 4650 | .start = perf_swevent_int, |
4792 | .stop = perf_swevent_void, | 4651 | .stop = perf_swevent_void, |
4793 | .read = perf_swevent_read, | 4652 | .read = perf_swevent_read, |
4794 | .unthrottle = perf_swevent_void, | 4653 | .unthrottle = perf_swevent_void, /* hwc->interrupts already reset */ |
4795 | }; | 4654 | }; |
4796 | 4655 | ||
4656 | #ifdef CONFIG_EVENT_TRACING | ||
4657 | |||
4797 | static int perf_tp_filter_match(struct perf_event *event, | 4658 | static int perf_tp_filter_match(struct perf_event *event, |
4798 | struct perf_sample_data *data) | 4659 | struct perf_sample_data *data) |
4799 | { | 4660 | { |
@@ -4849,10 +4710,13 @@ static void tp_perf_event_destroy(struct perf_event *event) | |||
4849 | perf_trace_destroy(event); | 4710 | perf_trace_destroy(event); |
4850 | } | 4711 | } |
4851 | 4712 | ||
4852 | static struct pmu *tp_perf_event_init(struct perf_event *event) | 4713 | static int perf_tp_event_init(struct perf_event *event) |
4853 | { | 4714 | { |
4854 | int err; | 4715 | int err; |
4855 | 4716 | ||
4717 | if (event->attr.type != PERF_TYPE_TRACEPOINT) | ||
4718 | return -ENOENT; | ||
4719 | |||
4856 | /* | 4720 | /* |
4857 | * Raw tracepoint data is a severe data leak, only allow root to | 4721 | * Raw tracepoint data is a severe data leak, only allow root to |
4858 | * have these. | 4722 | * have these. |
@@ -4860,15 +4724,30 @@ static struct pmu *tp_perf_event_init(struct perf_event *event) | |||
4860 | if ((event->attr.sample_type & PERF_SAMPLE_RAW) && | 4724 | if ((event->attr.sample_type & PERF_SAMPLE_RAW) && |
4861 | perf_paranoid_tracepoint_raw() && | 4725 | perf_paranoid_tracepoint_raw() && |
4862 | !capable(CAP_SYS_ADMIN)) | 4726 | !capable(CAP_SYS_ADMIN)) |
4863 | return ERR_PTR(-EPERM); | 4727 | return -EPERM; |
4864 | 4728 | ||
4865 | err = perf_trace_init(event); | 4729 | err = perf_trace_init(event); |
4866 | if (err) | 4730 | if (err) |
4867 | return NULL; | 4731 | return err; |
4868 | 4732 | ||
4869 | event->destroy = tp_perf_event_destroy; | 4733 | event->destroy = tp_perf_event_destroy; |
4870 | 4734 | ||
4871 | return &perf_ops_tracepoint; | 4735 | return 0; |
4736 | } | ||
4737 | |||
4738 | static struct pmu perf_tracepoint = { | ||
4739 | .event_init = perf_tp_event_init, | ||
4740 | .enable = perf_trace_enable, | ||
4741 | .disable = perf_trace_disable, | ||
4742 | .start = perf_swevent_int, | ||
4743 | .stop = perf_swevent_void, | ||
4744 | .read = perf_swevent_read, | ||
4745 | .unthrottle = perf_swevent_void, | ||
4746 | }; | ||
4747 | |||
4748 | static inline void perf_tp_register(void) | ||
4749 | { | ||
4750 | perf_pmu_register(&perf_tracepoint); | ||
4872 | } | 4751 | } |
4873 | 4752 | ||
4874 | static int perf_event_set_filter(struct perf_event *event, void __user *arg) | 4753 | static int perf_event_set_filter(struct perf_event *event, void __user *arg) |
@@ -4896,9 +4775,8 @@ static void perf_event_free_filter(struct perf_event *event) | |||
4896 | 4775 | ||
4897 | #else | 4776 | #else |
4898 | 4777 | ||
4899 | static struct pmu *tp_perf_event_init(struct perf_event *event) | 4778 | static inline void perf_tp_register(void) |
4900 | { | 4779 | { |
4901 | return NULL; | ||
4902 | } | 4780 | } |
4903 | 4781 | ||
4904 | static int perf_event_set_filter(struct perf_event *event, void __user *arg) | 4782 | static int perf_event_set_filter(struct perf_event *event, void __user *arg) |
@@ -4913,105 +4791,247 @@ static void perf_event_free_filter(struct perf_event *event) | |||
4913 | #endif /* CONFIG_EVENT_TRACING */ | 4791 | #endif /* CONFIG_EVENT_TRACING */ |
4914 | 4792 | ||
4915 | #ifdef CONFIG_HAVE_HW_BREAKPOINT | 4793 | #ifdef CONFIG_HAVE_HW_BREAKPOINT |
4916 | static void bp_perf_event_destroy(struct perf_event *event) | 4794 | void perf_bp_event(struct perf_event *bp, void *data) |
4917 | { | 4795 | { |
4918 | release_bp_slot(event); | 4796 | struct perf_sample_data sample; |
4797 | struct pt_regs *regs = data; | ||
4798 | |||
4799 | perf_sample_data_init(&sample, bp->attr.bp_addr); | ||
4800 | |||
4801 | if (!perf_exclude_event(bp, regs)) | ||
4802 | perf_swevent_add(bp, 1, 1, &sample, regs); | ||
4919 | } | 4803 | } |
4804 | #endif | ||
4805 | |||
4806 | /* | ||
4807 | * hrtimer based swevent callback | ||
4808 | */ | ||
4920 | 4809 | ||
4921 | static struct pmu *bp_perf_event_init(struct perf_event *bp) | 4810 | static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) |
4922 | { | 4811 | { |
4923 | int err; | 4812 | enum hrtimer_restart ret = HRTIMER_RESTART; |
4813 | struct perf_sample_data data; | ||
4814 | struct pt_regs *regs; | ||
4815 | struct perf_event *event; | ||
4816 | u64 period; | ||
4924 | 4817 | ||
4925 | err = register_perf_hw_breakpoint(bp); | 4818 | event = container_of(hrtimer, struct perf_event, hw.hrtimer); |
4926 | if (err) | 4819 | event->pmu->read(event); |
4927 | return ERR_PTR(err); | 4820 | |
4821 | perf_sample_data_init(&data, 0); | ||
4822 | data.period = event->hw.last_period; | ||
4823 | regs = get_irq_regs(); | ||
4824 | |||
4825 | if (regs && !perf_exclude_event(event, regs)) { | ||
4826 | if (!(event->attr.exclude_idle && current->pid == 0)) | ||
4827 | if (perf_event_overflow(event, 0, &data, regs)) | ||
4828 | ret = HRTIMER_NORESTART; | ||
4829 | } | ||
4928 | 4830 | ||
4929 | bp->destroy = bp_perf_event_destroy; | 4831 | period = max_t(u64, 10000, event->hw.sample_period); |
4832 | hrtimer_forward_now(hrtimer, ns_to_ktime(period)); | ||
4930 | 4833 | ||
4931 | return &perf_ops_bp; | 4834 | return ret; |
4932 | } | 4835 | } |
4933 | 4836 | ||
4934 | void perf_bp_event(struct perf_event *bp, void *data) | 4837 | static void perf_swevent_start_hrtimer(struct perf_event *event) |
4935 | { | 4838 | { |
4936 | struct perf_sample_data sample; | 4839 | struct hw_perf_event *hwc = &event->hw; |
4937 | struct pt_regs *regs = data; | ||
4938 | 4840 | ||
4939 | perf_sample_data_init(&sample, bp->attr.bp_addr); | 4841 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
4842 | hwc->hrtimer.function = perf_swevent_hrtimer; | ||
4843 | if (hwc->sample_period) { | ||
4844 | u64 period; | ||
4940 | 4845 | ||
4941 | if (!perf_exclude_event(bp, regs)) | 4846 | if (hwc->remaining) { |
4942 | perf_swevent_add(bp, 1, 1, &sample, regs); | 4847 | if (hwc->remaining < 0) |
4848 | period = 10000; | ||
4849 | else | ||
4850 | period = hwc->remaining; | ||
4851 | hwc->remaining = 0; | ||
4852 | } else { | ||
4853 | period = max_t(u64, 10000, hwc->sample_period); | ||
4854 | } | ||
4855 | __hrtimer_start_range_ns(&hwc->hrtimer, | ||
4856 | ns_to_ktime(period), 0, | ||
4857 | HRTIMER_MODE_REL, 0); | ||
4858 | } | ||
4943 | } | 4859 | } |
4944 | #else | 4860 | |
4945 | static struct pmu *bp_perf_event_init(struct perf_event *bp) | 4861 | static void perf_swevent_cancel_hrtimer(struct perf_event *event) |
4946 | { | 4862 | { |
4947 | return NULL; | 4863 | struct hw_perf_event *hwc = &event->hw; |
4864 | |||
4865 | if (hwc->sample_period) { | ||
4866 | ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); | ||
4867 | hwc->remaining = ktime_to_ns(remaining); | ||
4868 | |||
4869 | hrtimer_cancel(&hwc->hrtimer); | ||
4870 | } | ||
4948 | } | 4871 | } |
4949 | 4872 | ||
4950 | void perf_bp_event(struct perf_event *bp, void *regs) | 4873 | /* |
4874 | * Software event: cpu wall time clock | ||
4875 | */ | ||
4876 | |||
4877 | static void cpu_clock_event_update(struct perf_event *event) | ||
4951 | { | 4878 | { |
4879 | int cpu = raw_smp_processor_id(); | ||
4880 | s64 prev; | ||
4881 | u64 now; | ||
4882 | |||
4883 | now = cpu_clock(cpu); | ||
4884 | prev = local64_xchg(&event->hw.prev_count, now); | ||
4885 | local64_add(now - prev, &event->count); | ||
4952 | } | 4886 | } |
4953 | #endif | ||
4954 | 4887 | ||
4955 | atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; | 4888 | static int cpu_clock_event_enable(struct perf_event *event) |
4889 | { | ||
4890 | struct hw_perf_event *hwc = &event->hw; | ||
4891 | int cpu = raw_smp_processor_id(); | ||
4956 | 4892 | ||
4957 | static void sw_perf_event_destroy(struct perf_event *event) | 4893 | local64_set(&hwc->prev_count, cpu_clock(cpu)); |
4894 | perf_swevent_start_hrtimer(event); | ||
4895 | |||
4896 | return 0; | ||
4897 | } | ||
4898 | |||
4899 | static void cpu_clock_event_disable(struct perf_event *event) | ||
4958 | { | 4900 | { |
4959 | u64 event_id = event->attr.config; | 4901 | perf_swevent_cancel_hrtimer(event); |
4902 | cpu_clock_event_update(event); | ||
4903 | } | ||
4960 | 4904 | ||
4961 | WARN_ON(event->parent); | 4905 | static void cpu_clock_event_read(struct perf_event *event) |
4906 | { | ||
4907 | cpu_clock_event_update(event); | ||
4908 | } | ||
4962 | 4909 | ||
4963 | atomic_dec(&perf_swevent_enabled[event_id]); | 4910 | static int cpu_clock_event_init(struct perf_event *event) |
4964 | swevent_hlist_put(event); | 4911 | { |
4912 | if (event->attr.type != PERF_TYPE_SOFTWARE) | ||
4913 | return -ENOENT; | ||
4914 | |||
4915 | if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK) | ||
4916 | return -ENOENT; | ||
4917 | |||
4918 | return 0; | ||
4965 | } | 4919 | } |
4966 | 4920 | ||
4967 | static struct pmu *sw_perf_event_init(struct perf_event *event) | 4921 | static struct pmu perf_cpu_clock = { |
4922 | .event_init = cpu_clock_event_init, | ||
4923 | .enable = cpu_clock_event_enable, | ||
4924 | .disable = cpu_clock_event_disable, | ||
4925 | .read = cpu_clock_event_read, | ||
4926 | }; | ||
4927 | |||
4928 | /* | ||
4929 | * Software event: task time clock | ||
4930 | */ | ||
4931 | |||
4932 | static void task_clock_event_update(struct perf_event *event, u64 now) | ||
4968 | { | 4933 | { |
4969 | struct pmu *pmu = NULL; | 4934 | u64 prev; |
4970 | u64 event_id = event->attr.config; | 4935 | s64 delta; |
4971 | 4936 | ||
4972 | /* | 4937 | prev = local64_xchg(&event->hw.prev_count, now); |
4973 | * Software events (currently) can't in general distinguish | 4938 | delta = now - prev; |
4974 | * between user, kernel and hypervisor events. | 4939 | local64_add(delta, &event->count); |
4975 | * However, context switches and cpu migrations are considered | 4940 | } |
4976 | * to be kernel events, and page faults are never hypervisor | ||
4977 | * events. | ||
4978 | */ | ||
4979 | switch (event_id) { | ||
4980 | case PERF_COUNT_SW_CPU_CLOCK: | ||
4981 | pmu = &perf_ops_cpu_clock; | ||
4982 | 4941 | ||
4983 | break; | 4942 | static int task_clock_event_enable(struct perf_event *event) |
4984 | case PERF_COUNT_SW_TASK_CLOCK: | 4943 | { |
4985 | /* | 4944 | struct hw_perf_event *hwc = &event->hw; |
4986 | * If the user instantiates this as a per-cpu event, | 4945 | u64 now; |
4987 | * use the cpu_clock event instead. | ||
4988 | */ | ||
4989 | if (event->ctx->task) | ||
4990 | pmu = &perf_ops_task_clock; | ||
4991 | else | ||
4992 | pmu = &perf_ops_cpu_clock; | ||
4993 | 4946 | ||
4994 | break; | 4947 | now = event->ctx->time; |
4995 | case PERF_COUNT_SW_PAGE_FAULTS: | ||
4996 | case PERF_COUNT_SW_PAGE_FAULTS_MIN: | ||
4997 | case PERF_COUNT_SW_PAGE_FAULTS_MAJ: | ||
4998 | case PERF_COUNT_SW_CONTEXT_SWITCHES: | ||
4999 | case PERF_COUNT_SW_CPU_MIGRATIONS: | ||
5000 | case PERF_COUNT_SW_ALIGNMENT_FAULTS: | ||
5001 | case PERF_COUNT_SW_EMULATION_FAULTS: | ||
5002 | if (!event->parent) { | ||
5003 | int err; | ||
5004 | |||
5005 | err = swevent_hlist_get(event); | ||
5006 | if (err) | ||
5007 | return ERR_PTR(err); | ||
5008 | 4948 | ||
5009 | atomic_inc(&perf_swevent_enabled[event_id]); | 4949 | local64_set(&hwc->prev_count, now); |
5010 | event->destroy = sw_perf_event_destroy; | 4950 | |
4951 | perf_swevent_start_hrtimer(event); | ||
4952 | |||
4953 | return 0; | ||
4954 | } | ||
4955 | |||
4956 | static void task_clock_event_disable(struct perf_event *event) | ||
4957 | { | ||
4958 | perf_swevent_cancel_hrtimer(event); | ||
4959 | task_clock_event_update(event, event->ctx->time); | ||
4960 | |||
4961 | } | ||
4962 | |||
4963 | static void task_clock_event_read(struct perf_event *event) | ||
4964 | { | ||
4965 | u64 time; | ||
4966 | |||
4967 | if (!in_nmi()) { | ||
4968 | update_context_time(event->ctx); | ||
4969 | time = event->ctx->time; | ||
4970 | } else { | ||
4971 | u64 now = perf_clock(); | ||
4972 | u64 delta = now - event->ctx->timestamp; | ||
4973 | time = event->ctx->time + delta; | ||
4974 | } | ||
4975 | |||
4976 | task_clock_event_update(event, time); | ||
4977 | } | ||
4978 | |||
4979 | static int task_clock_event_init(struct perf_event *event) | ||
4980 | { | ||
4981 | if (event->attr.type != PERF_TYPE_SOFTWARE) | ||
4982 | return -ENOENT; | ||
4983 | |||
4984 | if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK) | ||
4985 | return -ENOENT; | ||
4986 | |||
4987 | return 0; | ||
4988 | } | ||
4989 | |||
4990 | static struct pmu perf_task_clock = { | ||
4991 | .event_init = task_clock_event_init, | ||
4992 | .enable = task_clock_event_enable, | ||
4993 | .disable = task_clock_event_disable, | ||
4994 | .read = task_clock_event_read, | ||
4995 | }; | ||
4996 | |||
4997 | static LIST_HEAD(pmus); | ||
4998 | static DEFINE_MUTEX(pmus_lock); | ||
4999 | static struct srcu_struct pmus_srcu; | ||
5000 | |||
5001 | int perf_pmu_register(struct pmu *pmu) | ||
5002 | { | ||
5003 | mutex_lock(&pmus_lock); | ||
5004 | list_add_rcu(&pmu->entry, &pmus); | ||
5005 | mutex_unlock(&pmus_lock); | ||
5006 | |||
5007 | return 0; | ||
5008 | } | ||
5009 | |||
5010 | void perf_pmu_unregister(struct pmu *pmu) | ||
5011 | { | ||
5012 | mutex_lock(&pmus_lock); | ||
5013 | list_del_rcu(&pmu->entry); | ||
5014 | mutex_unlock(&pmus_lock); | ||
5015 | |||
5016 | synchronize_srcu(&pmus_srcu); | ||
5017 | } | ||
5018 | |||
5019 | struct pmu *perf_init_event(struct perf_event *event) | ||
5020 | { | ||
5021 | struct pmu *pmu = NULL; | ||
5022 | int idx; | ||
5023 | |||
5024 | idx = srcu_read_lock(&pmus_srcu); | ||
5025 | list_for_each_entry_rcu(pmu, &pmus, entry) { | ||
5026 | int ret = pmu->event_init(event); | ||
5027 | if (!ret) | ||
5028 | break; | ||
5029 | if (ret != -ENOENT) { | ||
5030 | pmu = ERR_PTR(ret); | ||
5031 | break; | ||
5011 | } | 5032 | } |
5012 | pmu = &perf_ops_generic; | ||
5013 | break; | ||
5014 | } | 5033 | } |
5034 | srcu_read_unlock(&pmus_srcu, idx); | ||
5015 | 5035 | ||
5016 | return pmu; | 5036 | return pmu; |
5017 | } | 5037 | } |
@@ -5092,29 +5112,8 @@ perf_event_alloc(struct perf_event_attr *attr, | |||
5092 | if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP)) | 5112 | if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP)) |
5093 | goto done; | 5113 | goto done; |
5094 | 5114 | ||
5095 | switch (attr->type) { | 5115 | pmu = perf_init_event(event); |
5096 | case PERF_TYPE_RAW: | ||
5097 | case PERF_TYPE_HARDWARE: | ||
5098 | case PERF_TYPE_HW_CACHE: | ||
5099 | pmu = hw_perf_event_init(event); | ||
5100 | break; | ||
5101 | |||
5102 | case PERF_TYPE_SOFTWARE: | ||
5103 | pmu = sw_perf_event_init(event); | ||
5104 | break; | ||
5105 | |||
5106 | case PERF_TYPE_TRACEPOINT: | ||
5107 | pmu = tp_perf_event_init(event); | ||
5108 | break; | ||
5109 | 5116 | ||
5110 | case PERF_TYPE_BREAKPOINT: | ||
5111 | pmu = bp_perf_event_init(event); | ||
5112 | break; | ||
5113 | |||
5114 | |||
5115 | default: | ||
5116 | break; | ||
5117 | } | ||
5118 | done: | 5117 | done: |
5119 | err = 0; | 5118 | err = 0; |
5120 | if (!pmu) | 5119 | if (!pmu) |
@@ -5979,22 +5978,15 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) | |||
5979 | return NOTIFY_OK; | 5978 | return NOTIFY_OK; |
5980 | } | 5979 | } |
5981 | 5980 | ||
5982 | /* | ||
5983 | * This has to have a higher priority than migration_notifier in sched.c. | ||
5984 | */ | ||
5985 | static struct notifier_block __cpuinitdata perf_cpu_nb = { | ||
5986 | .notifier_call = perf_cpu_notify, | ||
5987 | .priority = 20, | ||
5988 | }; | ||
5989 | |||
5990 | void __init perf_event_init(void) | 5981 | void __init perf_event_init(void) |
5991 | { | 5982 | { |
5992 | perf_event_init_all_cpus(); | 5983 | perf_event_init_all_cpus(); |
5993 | perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE, | 5984 | init_srcu_struct(&pmus_srcu); |
5994 | (void *)(long)smp_processor_id()); | 5985 | perf_pmu_register(&perf_swevent); |
5995 | perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE, | 5986 | perf_pmu_register(&perf_cpu_clock); |
5996 | (void *)(long)smp_processor_id()); | 5987 | perf_pmu_register(&perf_task_clock); |
5997 | register_cpu_notifier(&perf_cpu_nb); | 5988 | perf_tp_register(); |
5989 | perf_cpu_notifier(perf_cpu_notify); | ||
5998 | } | 5990 | } |
5999 | 5991 | ||
6000 | static ssize_t perf_show_reserve_percpu(struct sysdev_class *class, | 5992 | static ssize_t perf_show_reserve_percpu(struct sysdev_class *class, |