diff options
author | Arnd Bergmann <arnd@arndb.de> | 2014-11-20 07:49:52 -0500 |
---|---|---|
committer | Arnd Bergmann <arnd@arndb.de> | 2014-11-20 07:49:52 -0500 |
commit | b9e0e5a9e075575cc47940da8271d4908d3ae9c3 (patch) | |
tree | 1ad6cafc584265817fc7ff772dc6c27745c7e55d | |
parent | c3e6dc65f2ce83dacc0a18104bf44931e7eb8a5d (diff) | |
parent | af66abfe2ec8bd82211e9e4f036a64c902ff4cdb (diff) |
Merge tag 'arm-perf-3.19' of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux into next/drivers
Pull "ARM: perf: updates for 3.19" from Will Deacon:
This patch series takes us slightly further on the road to big.LITTLE
support in perf. The main change enabling this is moving the CCI PMU
driver away from the arm-pmu abstraction, allowing the arch code to
focus specifically on support for CPU PMUs.
* tag 'arm-perf-3.19' of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux:
arm: perf: fold hotplug notifier into arm_pmu
arm: perf: dynamically allocate cpu hardware data
arm: perf: fold percpu_pmu into pmu_hw_events
arm: perf: kill get_hw_events()
arm: perf: limit size of accounting data
arm: perf: use IDR types for CPU PMUs
arm: perf: make PMU probing data-driven
arm: perf: add missing pr_info newlines
arm: perf: factor out callchain code
ARM: perf: use pr_* instead of printk
ARM: perf: remove useless return and check of idx in counter handling
bus: cci: move away from arm_pmu framework
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
-rw-r--r-- | arch/arm/include/asm/perf_event.h | 2 | ||||
-rw-r--r-- | arch/arm/include/asm/pmu.h | 36 | ||||
-rw-r--r-- | arch/arm/kernel/Makefile | 2 | ||||
-rw-r--r-- | arch/arm/kernel/perf_callchain.c | 136 | ||||
-rw-r--r-- | arch/arm/kernel/perf_event.c | 162 | ||||
-rw-r--r-- | arch/arm/kernel/perf_event_cpu.c | 181 | ||||
-rw-r--r-- | arch/arm/kernel/perf_event_v6.c | 12 | ||||
-rw-r--r-- | arch/arm/kernel/perf_event_v7.c | 72 | ||||
-rw-r--r-- | arch/arm/kernel/perf_event_xscale.c | 20 | ||||
-rw-r--r-- | drivers/bus/arm-cci.c | 552 |
10 files changed, 772 insertions, 403 deletions
diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h index c3a83691af8e..d9cf138fd7d4 100644 --- a/arch/arm/include/asm/perf_event.h +++ b/arch/arm/include/asm/perf_event.h | |||
@@ -12,7 +12,7 @@ | |||
12 | #ifndef __ARM_PERF_EVENT_H__ | 12 | #ifndef __ARM_PERF_EVENT_H__ |
13 | #define __ARM_PERF_EVENT_H__ | 13 | #define __ARM_PERF_EVENT_H__ |
14 | 14 | ||
15 | #ifdef CONFIG_HW_PERF_EVENTS | 15 | #ifdef CONFIG_PERF_EVENTS |
16 | struct pt_regs; | 16 | struct pt_regs; |
17 | extern unsigned long perf_instruction_pointer(struct pt_regs *regs); | 17 | extern unsigned long perf_instruction_pointer(struct pt_regs *regs); |
18 | extern unsigned long perf_misc_flags(struct pt_regs *regs); | 18 | extern unsigned long perf_misc_flags(struct pt_regs *regs); |
diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h index 0b648c541293..b1596bd59129 100644 --- a/arch/arm/include/asm/pmu.h +++ b/arch/arm/include/asm/pmu.h | |||
@@ -15,6 +15,8 @@ | |||
15 | #include <linux/interrupt.h> | 15 | #include <linux/interrupt.h> |
16 | #include <linux/perf_event.h> | 16 | #include <linux/perf_event.h> |
17 | 17 | ||
18 | #include <asm/cputype.h> | ||
19 | |||
18 | /* | 20 | /* |
19 | * struct arm_pmu_platdata - ARM PMU platform data | 21 | * struct arm_pmu_platdata - ARM PMU platform data |
20 | * | 22 | * |
@@ -66,19 +68,25 @@ struct pmu_hw_events { | |||
66 | /* | 68 | /* |
67 | * The events that are active on the PMU for the given index. | 69 | * The events that are active on the PMU for the given index. |
68 | */ | 70 | */ |
69 | struct perf_event **events; | 71 | struct perf_event *events[ARMPMU_MAX_HWEVENTS]; |
70 | 72 | ||
71 | /* | 73 | /* |
72 | * A 1 bit for an index indicates that the counter is being used for | 74 | * A 1 bit for an index indicates that the counter is being used for |
73 | * an event. A 0 means that the counter can be used. | 75 | * an event. A 0 means that the counter can be used. |
74 | */ | 76 | */ |
75 | unsigned long *used_mask; | 77 | DECLARE_BITMAP(used_mask, ARMPMU_MAX_HWEVENTS); |
76 | 78 | ||
77 | /* | 79 | /* |
78 | * Hardware lock to serialize accesses to PMU registers. Needed for the | 80 | * Hardware lock to serialize accesses to PMU registers. Needed for the |
79 | * read/modify/write sequences. | 81 | * read/modify/write sequences. |
80 | */ | 82 | */ |
81 | raw_spinlock_t pmu_lock; | 83 | raw_spinlock_t pmu_lock; |
84 | |||
85 | /* | ||
86 | * When using percpu IRQs, we need a percpu dev_id. Place it here as we | ||
87 | * already have to allocate this struct per cpu. | ||
88 | */ | ||
89 | struct arm_pmu *percpu_pmu; | ||
82 | }; | 90 | }; |
83 | 91 | ||
84 | struct arm_pmu { | 92 | struct arm_pmu { |
@@ -107,7 +115,8 @@ struct arm_pmu { | |||
107 | struct mutex reserve_mutex; | 115 | struct mutex reserve_mutex; |
108 | u64 max_period; | 116 | u64 max_period; |
109 | struct platform_device *plat_device; | 117 | struct platform_device *plat_device; |
110 | struct pmu_hw_events *(*get_hw_events)(void); | 118 | struct pmu_hw_events __percpu *hw_events; |
119 | struct notifier_block hotplug_nb; | ||
111 | }; | 120 | }; |
112 | 121 | ||
113 | #define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu)) | 122 | #define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu)) |
@@ -127,6 +136,27 @@ int armpmu_map_event(struct perf_event *event, | |||
127 | [PERF_COUNT_HW_CACHE_RESULT_MAX], | 136 | [PERF_COUNT_HW_CACHE_RESULT_MAX], |
128 | u32 raw_event_mask); | 137 | u32 raw_event_mask); |
129 | 138 | ||
139 | struct pmu_probe_info { | ||
140 | unsigned int cpuid; | ||
141 | unsigned int mask; | ||
142 | int (*init)(struct arm_pmu *); | ||
143 | }; | ||
144 | |||
145 | #define PMU_PROBE(_cpuid, _mask, _fn) \ | ||
146 | { \ | ||
147 | .cpuid = (_cpuid), \ | ||
148 | .mask = (_mask), \ | ||
149 | .init = (_fn), \ | ||
150 | } | ||
151 | |||
152 | #define ARM_PMU_PROBE(_cpuid, _fn) \ | ||
153 | PMU_PROBE(_cpuid, ARM_CPU_PART_MASK, _fn) | ||
154 | |||
155 | #define ARM_PMU_XSCALE_MASK ((0xff << 24) | ARM_CPU_XSCALE_ARCH_MASK) | ||
156 | |||
157 | #define XSCALE_PMU_PROBE(_version, _fn) \ | ||
158 | PMU_PROBE(ARM_CPU_IMP_INTEL << 24 | _version, ARM_PMU_XSCALE_MASK, _fn) | ||
159 | |||
130 | #endif /* CONFIG_HW_PERF_EVENTS */ | 160 | #endif /* CONFIG_HW_PERF_EVENTS */ |
131 | 161 | ||
132 | #endif /* __ARM_PMU_H__ */ | 162 | #endif /* __ARM_PMU_H__ */ |
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 38ddd9f83d0e..8dcbed5016ac 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile | |||
@@ -82,7 +82,7 @@ obj-$(CONFIG_CPU_MOHAWK) += xscale-cp0.o | |||
82 | obj-$(CONFIG_CPU_PJ4) += pj4-cp0.o | 82 | obj-$(CONFIG_CPU_PJ4) += pj4-cp0.o |
83 | obj-$(CONFIG_CPU_PJ4B) += pj4-cp0.o | 83 | obj-$(CONFIG_CPU_PJ4B) += pj4-cp0.o |
84 | obj-$(CONFIG_IWMMXT) += iwmmxt.o | 84 | obj-$(CONFIG_IWMMXT) += iwmmxt.o |
85 | obj-$(CONFIG_PERF_EVENTS) += perf_regs.o | 85 | obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o |
86 | obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o perf_event_cpu.o | 86 | obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o perf_event_cpu.o |
87 | AFLAGS_iwmmxt.o := -Wa,-mcpu=iwmmxt | 87 | AFLAGS_iwmmxt.o := -Wa,-mcpu=iwmmxt |
88 | obj-$(CONFIG_ARM_CPU_TOPOLOGY) += topology.o | 88 | obj-$(CONFIG_ARM_CPU_TOPOLOGY) += topology.o |
diff --git a/arch/arm/kernel/perf_callchain.c b/arch/arm/kernel/perf_callchain.c new file mode 100644 index 000000000000..4e02ae5950ff --- /dev/null +++ b/arch/arm/kernel/perf_callchain.c | |||
@@ -0,0 +1,136 @@ | |||
1 | /* | ||
2 | * ARM callchain support | ||
3 | * | ||
4 | * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles | ||
5 | * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com> | ||
6 | * | ||
7 | * This code is based on the ARM OProfile backtrace code. | ||
8 | */ | ||
9 | #include <linux/perf_event.h> | ||
10 | #include <linux/uaccess.h> | ||
11 | |||
12 | #include <asm/stacktrace.h> | ||
13 | |||
14 | /* | ||
15 | * The registers we're interested in are at the end of the variable | ||
16 | * length saved register structure. The fp points at the end of this | ||
17 | * structure so the address of this struct is: | ||
18 | * (struct frame_tail *)(xxx->fp)-1 | ||
19 | * | ||
20 | * This code has been adapted from the ARM OProfile support. | ||
21 | */ | ||
22 | struct frame_tail { | ||
23 | struct frame_tail __user *fp; | ||
24 | unsigned long sp; | ||
25 | unsigned long lr; | ||
26 | } __attribute__((packed)); | ||
27 | |||
28 | /* | ||
29 | * Get the return address for a single stackframe and return a pointer to the | ||
30 | * next frame tail. | ||
31 | */ | ||
32 | static struct frame_tail __user * | ||
33 | user_backtrace(struct frame_tail __user *tail, | ||
34 | struct perf_callchain_entry *entry) | ||
35 | { | ||
36 | struct frame_tail buftail; | ||
37 | unsigned long err; | ||
38 | |||
39 | if (!access_ok(VERIFY_READ, tail, sizeof(buftail))) | ||
40 | return NULL; | ||
41 | |||
42 | pagefault_disable(); | ||
43 | err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail)); | ||
44 | pagefault_enable(); | ||
45 | |||
46 | if (err) | ||
47 | return NULL; | ||
48 | |||
49 | perf_callchain_store(entry, buftail.lr); | ||
50 | |||
51 | /* | ||
52 | * Frame pointers should strictly progress back up the stack | ||
53 | * (towards higher addresses). | ||
54 | */ | ||
55 | if (tail + 1 >= buftail.fp) | ||
56 | return NULL; | ||
57 | |||
58 | return buftail.fp - 1; | ||
59 | } | ||
60 | |||
61 | void | ||
62 | perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) | ||
63 | { | ||
64 | struct frame_tail __user *tail; | ||
65 | |||
66 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { | ||
67 | /* We don't support guest os callchain now */ | ||
68 | return; | ||
69 | } | ||
70 | |||
71 | perf_callchain_store(entry, regs->ARM_pc); | ||
72 | |||
73 | if (!current->mm) | ||
74 | return; | ||
75 | |||
76 | tail = (struct frame_tail __user *)regs->ARM_fp - 1; | ||
77 | |||
78 | while ((entry->nr < PERF_MAX_STACK_DEPTH) && | ||
79 | tail && !((unsigned long)tail & 0x3)) | ||
80 | tail = user_backtrace(tail, entry); | ||
81 | } | ||
82 | |||
83 | /* | ||
84 | * Gets called by walk_stackframe() for every stackframe. This will be called | ||
85 | * whist unwinding the stackframe and is like a subroutine return so we use | ||
86 | * the PC. | ||
87 | */ | ||
88 | static int | ||
89 | callchain_trace(struct stackframe *fr, | ||
90 | void *data) | ||
91 | { | ||
92 | struct perf_callchain_entry *entry = data; | ||
93 | perf_callchain_store(entry, fr->pc); | ||
94 | return 0; | ||
95 | } | ||
96 | |||
97 | void | ||
98 | perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) | ||
99 | { | ||
100 | struct stackframe fr; | ||
101 | |||
102 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { | ||
103 | /* We don't support guest os callchain now */ | ||
104 | return; | ||
105 | } | ||
106 | |||
107 | arm_get_current_stackframe(regs, &fr); | ||
108 | walk_stackframe(&fr, callchain_trace, entry); | ||
109 | } | ||
110 | |||
111 | unsigned long perf_instruction_pointer(struct pt_regs *regs) | ||
112 | { | ||
113 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) | ||
114 | return perf_guest_cbs->get_guest_ip(); | ||
115 | |||
116 | return instruction_pointer(regs); | ||
117 | } | ||
118 | |||
119 | unsigned long perf_misc_flags(struct pt_regs *regs) | ||
120 | { | ||
121 | int misc = 0; | ||
122 | |||
123 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { | ||
124 | if (perf_guest_cbs->is_user_mode()) | ||
125 | misc |= PERF_RECORD_MISC_GUEST_USER; | ||
126 | else | ||
127 | misc |= PERF_RECORD_MISC_GUEST_KERNEL; | ||
128 | } else { | ||
129 | if (user_mode(regs)) | ||
130 | misc |= PERF_RECORD_MISC_USER; | ||
131 | else | ||
132 | misc |= PERF_RECORD_MISC_KERNEL; | ||
133 | } | ||
134 | |||
135 | return misc; | ||
136 | } | ||
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 266cba46db3e..e34934f63a49 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c | |||
@@ -7,21 +7,18 @@ | |||
7 | * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com> | 7 | * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com> |
8 | * | 8 | * |
9 | * This code is based on the sparc64 perf event code, which is in turn based | 9 | * This code is based on the sparc64 perf event code, which is in turn based |
10 | * on the x86 code. Callchain code is based on the ARM OProfile backtrace | 10 | * on the x86 code. |
11 | * code. | ||
12 | */ | 11 | */ |
13 | #define pr_fmt(fmt) "hw perfevents: " fmt | 12 | #define pr_fmt(fmt) "hw perfevents: " fmt |
14 | 13 | ||
15 | #include <linux/kernel.h> | 14 | #include <linux/kernel.h> |
16 | #include <linux/platform_device.h> | 15 | #include <linux/platform_device.h> |
17 | #include <linux/pm_runtime.h> | 16 | #include <linux/pm_runtime.h> |
18 | #include <linux/uaccess.h> | ||
19 | #include <linux/irq.h> | 17 | #include <linux/irq.h> |
20 | #include <linux/irqdesc.h> | 18 | #include <linux/irqdesc.h> |
21 | 19 | ||
22 | #include <asm/irq_regs.h> | 20 | #include <asm/irq_regs.h> |
23 | #include <asm/pmu.h> | 21 | #include <asm/pmu.h> |
24 | #include <asm/stacktrace.h> | ||
25 | 22 | ||
26 | static int | 23 | static int |
27 | armpmu_map_cache_event(const unsigned (*cache_map) | 24 | armpmu_map_cache_event(const unsigned (*cache_map) |
@@ -80,8 +77,12 @@ armpmu_map_event(struct perf_event *event, | |||
80 | u32 raw_event_mask) | 77 | u32 raw_event_mask) |
81 | { | 78 | { |
82 | u64 config = event->attr.config; | 79 | u64 config = event->attr.config; |
80 | int type = event->attr.type; | ||
83 | 81 | ||
84 | switch (event->attr.type) { | 82 | if (type == event->pmu->type) |
83 | return armpmu_map_raw_event(raw_event_mask, config); | ||
84 | |||
85 | switch (type) { | ||
85 | case PERF_TYPE_HARDWARE: | 86 | case PERF_TYPE_HARDWARE: |
86 | return armpmu_map_hw_event(event_map, config); | 87 | return armpmu_map_hw_event(event_map, config); |
87 | case PERF_TYPE_HW_CACHE: | 88 | case PERF_TYPE_HW_CACHE: |
@@ -200,7 +201,7 @@ static void | |||
200 | armpmu_del(struct perf_event *event, int flags) | 201 | armpmu_del(struct perf_event *event, int flags) |
201 | { | 202 | { |
202 | struct arm_pmu *armpmu = to_arm_pmu(event->pmu); | 203 | struct arm_pmu *armpmu = to_arm_pmu(event->pmu); |
203 | struct pmu_hw_events *hw_events = armpmu->get_hw_events(); | 204 | struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); |
204 | struct hw_perf_event *hwc = &event->hw; | 205 | struct hw_perf_event *hwc = &event->hw; |
205 | int idx = hwc->idx; | 206 | int idx = hwc->idx; |
206 | 207 | ||
@@ -217,7 +218,7 @@ static int | |||
217 | armpmu_add(struct perf_event *event, int flags) | 218 | armpmu_add(struct perf_event *event, int flags) |
218 | { | 219 | { |
219 | struct arm_pmu *armpmu = to_arm_pmu(event->pmu); | 220 | struct arm_pmu *armpmu = to_arm_pmu(event->pmu); |
220 | struct pmu_hw_events *hw_events = armpmu->get_hw_events(); | 221 | struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); |
221 | struct hw_perf_event *hwc = &event->hw; | 222 | struct hw_perf_event *hwc = &event->hw; |
222 | int idx; | 223 | int idx; |
223 | int err = 0; | 224 | int err = 0; |
@@ -274,14 +275,12 @@ validate_group(struct perf_event *event) | |||
274 | { | 275 | { |
275 | struct perf_event *sibling, *leader = event->group_leader; | 276 | struct perf_event *sibling, *leader = event->group_leader; |
276 | struct pmu_hw_events fake_pmu; | 277 | struct pmu_hw_events fake_pmu; |
277 | DECLARE_BITMAP(fake_used_mask, ARMPMU_MAX_HWEVENTS); | ||
278 | 278 | ||
279 | /* | 279 | /* |
280 | * Initialise the fake PMU. We only need to populate the | 280 | * Initialise the fake PMU. We only need to populate the |
281 | * used_mask for the purposes of validation. | 281 | * used_mask for the purposes of validation. |
282 | */ | 282 | */ |
283 | memset(fake_used_mask, 0, sizeof(fake_used_mask)); | 283 | memset(&fake_pmu.used_mask, 0, sizeof(fake_pmu.used_mask)); |
284 | fake_pmu.used_mask = fake_used_mask; | ||
285 | 284 | ||
286 | if (!validate_event(&fake_pmu, leader)) | 285 | if (!validate_event(&fake_pmu, leader)) |
287 | return -EINVAL; | 286 | return -EINVAL; |
@@ -305,17 +304,21 @@ static irqreturn_t armpmu_dispatch_irq(int irq, void *dev) | |||
305 | int ret; | 304 | int ret; |
306 | u64 start_clock, finish_clock; | 305 | u64 start_clock, finish_clock; |
307 | 306 | ||
308 | if (irq_is_percpu(irq)) | 307 | /* |
309 | dev = *(void **)dev; | 308 | * we request the IRQ with a (possibly percpu) struct arm_pmu**, but |
310 | armpmu = dev; | 309 | * the handlers expect a struct arm_pmu*. The percpu_irq framework will |
310 | * do any necessary shifting, we just need to perform the first | ||
311 | * dereference. | ||
312 | */ | ||
313 | armpmu = *(void **)dev; | ||
311 | plat_device = armpmu->plat_device; | 314 | plat_device = armpmu->plat_device; |
312 | plat = dev_get_platdata(&plat_device->dev); | 315 | plat = dev_get_platdata(&plat_device->dev); |
313 | 316 | ||
314 | start_clock = sched_clock(); | 317 | start_clock = sched_clock(); |
315 | if (plat && plat->handle_irq) | 318 | if (plat && plat->handle_irq) |
316 | ret = plat->handle_irq(irq, dev, armpmu->handle_irq); | 319 | ret = plat->handle_irq(irq, armpmu, armpmu->handle_irq); |
317 | else | 320 | else |
318 | ret = armpmu->handle_irq(irq, dev); | 321 | ret = armpmu->handle_irq(irq, armpmu); |
319 | finish_clock = sched_clock(); | 322 | finish_clock = sched_clock(); |
320 | 323 | ||
321 | perf_sample_event_took(finish_clock - start_clock); | 324 | perf_sample_event_took(finish_clock - start_clock); |
@@ -468,7 +471,7 @@ static int armpmu_event_init(struct perf_event *event) | |||
468 | static void armpmu_enable(struct pmu *pmu) | 471 | static void armpmu_enable(struct pmu *pmu) |
469 | { | 472 | { |
470 | struct arm_pmu *armpmu = to_arm_pmu(pmu); | 473 | struct arm_pmu *armpmu = to_arm_pmu(pmu); |
471 | struct pmu_hw_events *hw_events = armpmu->get_hw_events(); | 474 | struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); |
472 | int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events); | 475 | int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events); |
473 | 476 | ||
474 | if (enabled) | 477 | if (enabled) |
@@ -533,130 +536,3 @@ int armpmu_register(struct arm_pmu *armpmu, int type) | |||
533 | return perf_pmu_register(&armpmu->pmu, armpmu->name, type); | 536 | return perf_pmu_register(&armpmu->pmu, armpmu->name, type); |
534 | } | 537 | } |
535 | 538 | ||
536 | /* | ||
537 | * Callchain handling code. | ||
538 | */ | ||
539 | |||
540 | /* | ||
541 | * The registers we're interested in are at the end of the variable | ||
542 | * length saved register structure. The fp points at the end of this | ||
543 | * structure so the address of this struct is: | ||
544 | * (struct frame_tail *)(xxx->fp)-1 | ||
545 | * | ||
546 | * This code has been adapted from the ARM OProfile support. | ||
547 | */ | ||
548 | struct frame_tail { | ||
549 | struct frame_tail __user *fp; | ||
550 | unsigned long sp; | ||
551 | unsigned long lr; | ||
552 | } __attribute__((packed)); | ||
553 | |||
554 | /* | ||
555 | * Get the return address for a single stackframe and return a pointer to the | ||
556 | * next frame tail. | ||
557 | */ | ||
558 | static struct frame_tail __user * | ||
559 | user_backtrace(struct frame_tail __user *tail, | ||
560 | struct perf_callchain_entry *entry) | ||
561 | { | ||
562 | struct frame_tail buftail; | ||
563 | unsigned long err; | ||
564 | |||
565 | if (!access_ok(VERIFY_READ, tail, sizeof(buftail))) | ||
566 | return NULL; | ||
567 | |||
568 | pagefault_disable(); | ||
569 | err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail)); | ||
570 | pagefault_enable(); | ||
571 | |||
572 | if (err) | ||
573 | return NULL; | ||
574 | |||
575 | perf_callchain_store(entry, buftail.lr); | ||
576 | |||
577 | /* | ||
578 | * Frame pointers should strictly progress back up the stack | ||
579 | * (towards higher addresses). | ||
580 | */ | ||
581 | if (tail + 1 >= buftail.fp) | ||
582 | return NULL; | ||
583 | |||
584 | return buftail.fp - 1; | ||
585 | } | ||
586 | |||
587 | void | ||
588 | perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) | ||
589 | { | ||
590 | struct frame_tail __user *tail; | ||
591 | |||
592 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { | ||
593 | /* We don't support guest os callchain now */ | ||
594 | return; | ||
595 | } | ||
596 | |||
597 | perf_callchain_store(entry, regs->ARM_pc); | ||
598 | |||
599 | if (!current->mm) | ||
600 | return; | ||
601 | |||
602 | tail = (struct frame_tail __user *)regs->ARM_fp - 1; | ||
603 | |||
604 | while ((entry->nr < PERF_MAX_STACK_DEPTH) && | ||
605 | tail && !((unsigned long)tail & 0x3)) | ||
606 | tail = user_backtrace(tail, entry); | ||
607 | } | ||
608 | |||
609 | /* | ||
610 | * Gets called by walk_stackframe() for every stackframe. This will be called | ||
611 | * whist unwinding the stackframe and is like a subroutine return so we use | ||
612 | * the PC. | ||
613 | */ | ||
614 | static int | ||
615 | callchain_trace(struct stackframe *fr, | ||
616 | void *data) | ||
617 | { | ||
618 | struct perf_callchain_entry *entry = data; | ||
619 | perf_callchain_store(entry, fr->pc); | ||
620 | return 0; | ||
621 | } | ||
622 | |||
623 | void | ||
624 | perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) | ||
625 | { | ||
626 | struct stackframe fr; | ||
627 | |||
628 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { | ||
629 | /* We don't support guest os callchain now */ | ||
630 | return; | ||
631 | } | ||
632 | |||
633 | arm_get_current_stackframe(regs, &fr); | ||
634 | walk_stackframe(&fr, callchain_trace, entry); | ||
635 | } | ||
636 | |||
637 | unsigned long perf_instruction_pointer(struct pt_regs *regs) | ||
638 | { | ||
639 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) | ||
640 | return perf_guest_cbs->get_guest_ip(); | ||
641 | |||
642 | return instruction_pointer(regs); | ||
643 | } | ||
644 | |||
645 | unsigned long perf_misc_flags(struct pt_regs *regs) | ||
646 | { | ||
647 | int misc = 0; | ||
648 | |||
649 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { | ||
650 | if (perf_guest_cbs->is_user_mode()) | ||
651 | misc |= PERF_RECORD_MISC_GUEST_USER; | ||
652 | else | ||
653 | misc |= PERF_RECORD_MISC_GUEST_KERNEL; | ||
654 | } else { | ||
655 | if (user_mode(regs)) | ||
656 | misc |= PERF_RECORD_MISC_USER; | ||
657 | else | ||
658 | misc |= PERF_RECORD_MISC_KERNEL; | ||
659 | } | ||
660 | |||
661 | return misc; | ||
662 | } | ||
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index eb2c4d55666b..dd9acc95ebc0 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c | |||
@@ -35,11 +35,6 @@ | |||
35 | /* Set at runtime when we know what CPU type we are. */ | 35 | /* Set at runtime when we know what CPU type we are. */ |
36 | static struct arm_pmu *cpu_pmu; | 36 | static struct arm_pmu *cpu_pmu; |
37 | 37 | ||
38 | static DEFINE_PER_CPU(struct arm_pmu *, percpu_pmu); | ||
39 | static DEFINE_PER_CPU(struct perf_event * [ARMPMU_MAX_HWEVENTS], hw_events); | ||
40 | static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)], used_mask); | ||
41 | static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events); | ||
42 | |||
43 | /* | 38 | /* |
44 | * Despite the names, these two functions are CPU-specific and are used | 39 | * Despite the names, these two functions are CPU-specific and are used |
45 | * by the OProfile/perf code. | 40 | * by the OProfile/perf code. |
@@ -69,11 +64,6 @@ EXPORT_SYMBOL_GPL(perf_num_counters); | |||
69 | #include "perf_event_v6.c" | 64 | #include "perf_event_v6.c" |
70 | #include "perf_event_v7.c" | 65 | #include "perf_event_v7.c" |
71 | 66 | ||
72 | static struct pmu_hw_events *cpu_pmu_get_cpu_events(void) | ||
73 | { | ||
74 | return this_cpu_ptr(&cpu_hw_events); | ||
75 | } | ||
76 | |||
77 | static void cpu_pmu_enable_percpu_irq(void *data) | 67 | static void cpu_pmu_enable_percpu_irq(void *data) |
78 | { | 68 | { |
79 | int irq = *(int *)data; | 69 | int irq = *(int *)data; |
@@ -92,20 +82,21 @@ static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu) | |||
92 | { | 82 | { |
93 | int i, irq, irqs; | 83 | int i, irq, irqs; |
94 | struct platform_device *pmu_device = cpu_pmu->plat_device; | 84 | struct platform_device *pmu_device = cpu_pmu->plat_device; |
85 | struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events; | ||
95 | 86 | ||
96 | irqs = min(pmu_device->num_resources, num_possible_cpus()); | 87 | irqs = min(pmu_device->num_resources, num_possible_cpus()); |
97 | 88 | ||
98 | irq = platform_get_irq(pmu_device, 0); | 89 | irq = platform_get_irq(pmu_device, 0); |
99 | if (irq >= 0 && irq_is_percpu(irq)) { | 90 | if (irq >= 0 && irq_is_percpu(irq)) { |
100 | on_each_cpu(cpu_pmu_disable_percpu_irq, &irq, 1); | 91 | on_each_cpu(cpu_pmu_disable_percpu_irq, &irq, 1); |
101 | free_percpu_irq(irq, &percpu_pmu); | 92 | free_percpu_irq(irq, &hw_events->percpu_pmu); |
102 | } else { | 93 | } else { |
103 | for (i = 0; i < irqs; ++i) { | 94 | for (i = 0; i < irqs; ++i) { |
104 | if (!cpumask_test_and_clear_cpu(i, &cpu_pmu->active_irqs)) | 95 | if (!cpumask_test_and_clear_cpu(i, &cpu_pmu->active_irqs)) |
105 | continue; | 96 | continue; |
106 | irq = platform_get_irq(pmu_device, i); | 97 | irq = platform_get_irq(pmu_device, i); |
107 | if (irq >= 0) | 98 | if (irq >= 0) |
108 | free_irq(irq, cpu_pmu); | 99 | free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, i)); |
109 | } | 100 | } |
110 | } | 101 | } |
111 | } | 102 | } |
@@ -114,19 +105,21 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) | |||
114 | { | 105 | { |
115 | int i, err, irq, irqs; | 106 | int i, err, irq, irqs; |
116 | struct platform_device *pmu_device = cpu_pmu->plat_device; | 107 | struct platform_device *pmu_device = cpu_pmu->plat_device; |
108 | struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events; | ||
117 | 109 | ||
118 | if (!pmu_device) | 110 | if (!pmu_device) |
119 | return -ENODEV; | 111 | return -ENODEV; |
120 | 112 | ||
121 | irqs = min(pmu_device->num_resources, num_possible_cpus()); | 113 | irqs = min(pmu_device->num_resources, num_possible_cpus()); |
122 | if (irqs < 1) { | 114 | if (irqs < 1) { |
123 | printk_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n"); | 115 | pr_warn_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n"); |
124 | return 0; | 116 | return 0; |
125 | } | 117 | } |
126 | 118 | ||
127 | irq = platform_get_irq(pmu_device, 0); | 119 | irq = platform_get_irq(pmu_device, 0); |
128 | if (irq >= 0 && irq_is_percpu(irq)) { | 120 | if (irq >= 0 && irq_is_percpu(irq)) { |
129 | err = request_percpu_irq(irq, handler, "arm-pmu", &percpu_pmu); | 121 | err = request_percpu_irq(irq, handler, "arm-pmu", |
122 | &hw_events->percpu_pmu); | ||
130 | if (err) { | 123 | if (err) { |
131 | pr_err("unable to request IRQ%d for ARM PMU counters\n", | 124 | pr_err("unable to request IRQ%d for ARM PMU counters\n", |
132 | irq); | 125 | irq); |
@@ -153,7 +146,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) | |||
153 | 146 | ||
154 | err = request_irq(irq, handler, | 147 | err = request_irq(irq, handler, |
155 | IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu", | 148 | IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu", |
156 | cpu_pmu); | 149 | per_cpu_ptr(&hw_events->percpu_pmu, i)); |
157 | if (err) { | 150 | if (err) { |
158 | pr_err("unable to request IRQ%d for ARM PMU counters\n", | 151 | pr_err("unable to request IRQ%d for ARM PMU counters\n", |
159 | irq); | 152 | irq); |
@@ -167,18 +160,50 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) | |||
167 | return 0; | 160 | return 0; |
168 | } | 161 | } |
169 | 162 | ||
170 | static void cpu_pmu_init(struct arm_pmu *cpu_pmu) | 163 | /* |
164 | * PMU hardware loses all context when a CPU goes offline. | ||
165 | * When a CPU is hotplugged back in, since some hardware registers are | ||
166 | * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading | ||
167 | * junk values out of them. | ||
168 | */ | ||
169 | static int cpu_pmu_notify(struct notifier_block *b, unsigned long action, | ||
170 | void *hcpu) | ||
171 | { | ||
172 | struct arm_pmu *pmu = container_of(b, struct arm_pmu, hotplug_nb); | ||
173 | |||
174 | if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING) | ||
175 | return NOTIFY_DONE; | ||
176 | |||
177 | if (pmu->reset) | ||
178 | pmu->reset(pmu); | ||
179 | else | ||
180 | return NOTIFY_DONE; | ||
181 | |||
182 | return NOTIFY_OK; | ||
183 | } | ||
184 | |||
185 | static int cpu_pmu_init(struct arm_pmu *cpu_pmu) | ||
171 | { | 186 | { |
187 | int err; | ||
172 | int cpu; | 188 | int cpu; |
189 | struct pmu_hw_events __percpu *cpu_hw_events; | ||
190 | |||
191 | cpu_hw_events = alloc_percpu(struct pmu_hw_events); | ||
192 | if (!cpu_hw_events) | ||
193 | return -ENOMEM; | ||
194 | |||
195 | cpu_pmu->hotplug_nb.notifier_call = cpu_pmu_notify; | ||
196 | err = register_cpu_notifier(&cpu_pmu->hotplug_nb); | ||
197 | if (err) | ||
198 | goto out_hw_events; | ||
199 | |||
173 | for_each_possible_cpu(cpu) { | 200 | for_each_possible_cpu(cpu) { |
174 | struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu); | 201 | struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu); |
175 | events->events = per_cpu(hw_events, cpu); | ||
176 | events->used_mask = per_cpu(used_mask, cpu); | ||
177 | raw_spin_lock_init(&events->pmu_lock); | 202 | raw_spin_lock_init(&events->pmu_lock); |
178 | per_cpu(percpu_pmu, cpu) = cpu_pmu; | 203 | events->percpu_pmu = cpu_pmu; |
179 | } | 204 | } |
180 | 205 | ||
181 | cpu_pmu->get_hw_events = cpu_pmu_get_cpu_events; | 206 | cpu_pmu->hw_events = cpu_hw_events; |
182 | cpu_pmu->request_irq = cpu_pmu_request_irq; | 207 | cpu_pmu->request_irq = cpu_pmu_request_irq; |
183 | cpu_pmu->free_irq = cpu_pmu_free_irq; | 208 | cpu_pmu->free_irq = cpu_pmu_free_irq; |
184 | 209 | ||
@@ -189,31 +214,19 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu) | |||
189 | /* If no interrupts available, set the corresponding capability flag */ | 214 | /* If no interrupts available, set the corresponding capability flag */ |
190 | if (!platform_get_irq(cpu_pmu->plat_device, 0)) | 215 | if (!platform_get_irq(cpu_pmu->plat_device, 0)) |
191 | cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; | 216 | cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; |
192 | } | ||
193 | |||
194 | /* | ||
195 | * PMU hardware loses all context when a CPU goes offline. | ||
196 | * When a CPU is hotplugged back in, since some hardware registers are | ||
197 | * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading | ||
198 | * junk values out of them. | ||
199 | */ | ||
200 | static int cpu_pmu_notify(struct notifier_block *b, unsigned long action, | ||
201 | void *hcpu) | ||
202 | { | ||
203 | if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING) | ||
204 | return NOTIFY_DONE; | ||
205 | 217 | ||
206 | if (cpu_pmu && cpu_pmu->reset) | 218 | return 0; |
207 | cpu_pmu->reset(cpu_pmu); | ||
208 | else | ||
209 | return NOTIFY_DONE; | ||
210 | 219 | ||
211 | return NOTIFY_OK; | 220 | out_hw_events: |
221 | free_percpu(cpu_hw_events); | ||
222 | return err; | ||
212 | } | 223 | } |
213 | 224 | ||
214 | static struct notifier_block cpu_pmu_hotplug_notifier = { | 225 | static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu) |
215 | .notifier_call = cpu_pmu_notify, | 226 | { |
216 | }; | 227 | unregister_cpu_notifier(&cpu_pmu->hotplug_nb); |
228 | free_percpu(cpu_pmu->hw_events); | ||
229 | } | ||
217 | 230 | ||
218 | /* | 231 | /* |
219 | * PMU platform driver and devicetree bindings. | 232 | * PMU platform driver and devicetree bindings. |
@@ -241,48 +254,34 @@ static struct platform_device_id cpu_pmu_plat_device_ids[] = { | |||
241 | {}, | 254 | {}, |
242 | }; | 255 | }; |
243 | 256 | ||
257 | static const struct pmu_probe_info pmu_probe_table[] = { | ||
258 | ARM_PMU_PROBE(ARM_CPU_PART_ARM1136, armv6_1136_pmu_init), | ||
259 | ARM_PMU_PROBE(ARM_CPU_PART_ARM1156, armv6_1156_pmu_init), | ||
260 | ARM_PMU_PROBE(ARM_CPU_PART_ARM1176, armv6_1176_pmu_init), | ||
261 | ARM_PMU_PROBE(ARM_CPU_PART_ARM11MPCORE, armv6mpcore_pmu_init), | ||
262 | ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A8, armv7_a8_pmu_init), | ||
263 | ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A9, armv7_a9_pmu_init), | ||
264 | XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V1, xscale1pmu_init), | ||
265 | XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V2, xscale2pmu_init), | ||
266 | { /* sentinel value */ } | ||
267 | }; | ||
268 | |||
244 | /* | 269 | /* |
245 | * CPU PMU identification and probing. | 270 | * CPU PMU identification and probing. |
246 | */ | 271 | */ |
247 | static int probe_current_pmu(struct arm_pmu *pmu) | 272 | static int probe_current_pmu(struct arm_pmu *pmu) |
248 | { | 273 | { |
249 | int cpu = get_cpu(); | 274 | int cpu = get_cpu(); |
275 | unsigned int cpuid = read_cpuid_id(); | ||
250 | int ret = -ENODEV; | 276 | int ret = -ENODEV; |
277 | const struct pmu_probe_info *info; | ||
251 | 278 | ||
252 | pr_info("probing PMU on CPU %d\n", cpu); | 279 | pr_info("probing PMU on CPU %d\n", cpu); |
253 | 280 | ||
254 | switch (read_cpuid_part()) { | 281 | for (info = pmu_probe_table; info->init != NULL; info++) { |
255 | /* ARM Ltd CPUs. */ | 282 | if ((cpuid & info->mask) != info->cpuid) |
256 | case ARM_CPU_PART_ARM1136: | 283 | continue; |
257 | ret = armv6_1136_pmu_init(pmu); | 284 | ret = info->init(pmu); |
258 | break; | ||
259 | case ARM_CPU_PART_ARM1156: | ||
260 | ret = armv6_1156_pmu_init(pmu); | ||
261 | break; | ||
262 | case ARM_CPU_PART_ARM1176: | ||
263 | ret = armv6_1176_pmu_init(pmu); | ||
264 | break; | ||
265 | case ARM_CPU_PART_ARM11MPCORE: | ||
266 | ret = armv6mpcore_pmu_init(pmu); | ||
267 | break; | ||
268 | case ARM_CPU_PART_CORTEX_A8: | ||
269 | ret = armv7_a8_pmu_init(pmu); | ||
270 | break; | ||
271 | case ARM_CPU_PART_CORTEX_A9: | ||
272 | ret = armv7_a9_pmu_init(pmu); | ||
273 | break; | ||
274 | |||
275 | default: | ||
276 | if (read_cpuid_implementor() == ARM_CPU_IMP_INTEL) { | ||
277 | switch (xscale_cpu_arch_version()) { | ||
278 | case ARM_CPU_XSCALE_ARCH_V1: | ||
279 | ret = xscale1pmu_init(pmu); | ||
280 | break; | ||
281 | case ARM_CPU_XSCALE_ARCH_V2: | ||
282 | ret = xscale2pmu_init(pmu); | ||
283 | break; | ||
284 | } | ||
285 | } | ||
286 | break; | 285 | break; |
287 | } | 286 | } |
288 | 287 | ||
@@ -299,13 +298,13 @@ static int cpu_pmu_device_probe(struct platform_device *pdev) | |||
299 | int ret = -ENODEV; | 298 | int ret = -ENODEV; |
300 | 299 | ||
301 | if (cpu_pmu) { | 300 | if (cpu_pmu) { |
302 | pr_info("attempt to register multiple PMU devices!"); | 301 | pr_info("attempt to register multiple PMU devices!\n"); |
303 | return -ENOSPC; | 302 | return -ENOSPC; |
304 | } | 303 | } |
305 | 304 | ||
306 | pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL); | 305 | pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL); |
307 | if (!pmu) { | 306 | if (!pmu) { |
308 | pr_info("failed to allocate PMU device!"); | 307 | pr_info("failed to allocate PMU device!\n"); |
309 | return -ENOMEM; | 308 | return -ENOMEM; |
310 | } | 309 | } |
311 | 310 | ||
@@ -320,18 +319,24 @@ static int cpu_pmu_device_probe(struct platform_device *pdev) | |||
320 | } | 319 | } |
321 | 320 | ||
322 | if (ret) { | 321 | if (ret) { |
323 | pr_info("failed to probe PMU!"); | 322 | pr_info("failed to probe PMU!\n"); |
324 | goto out_free; | 323 | goto out_free; |
325 | } | 324 | } |
326 | 325 | ||
327 | cpu_pmu_init(cpu_pmu); | 326 | ret = cpu_pmu_init(cpu_pmu); |
328 | ret = armpmu_register(cpu_pmu, PERF_TYPE_RAW); | 327 | if (ret) |
328 | goto out_free; | ||
329 | 329 | ||
330 | if (!ret) | 330 | ret = armpmu_register(cpu_pmu, -1); |
331 | return 0; | 331 | if (ret) |
332 | goto out_destroy; | ||
332 | 333 | ||
334 | return 0; | ||
335 | |||
336 | out_destroy: | ||
337 | cpu_pmu_destroy(cpu_pmu); | ||
333 | out_free: | 338 | out_free: |
334 | pr_info("failed to register PMU devices!"); | 339 | pr_info("failed to register PMU devices!\n"); |
335 | kfree(pmu); | 340 | kfree(pmu); |
336 | return ret; | 341 | return ret; |
337 | } | 342 | } |
@@ -348,16 +353,6 @@ static struct platform_driver cpu_pmu_driver = { | |||
348 | 353 | ||
349 | static int __init register_pmu_driver(void) | 354 | static int __init register_pmu_driver(void) |
350 | { | 355 | { |
351 | int err; | 356 | return platform_driver_register(&cpu_pmu_driver); |
352 | |||
353 | err = register_cpu_notifier(&cpu_pmu_hotplug_notifier); | ||
354 | if (err) | ||
355 | return err; | ||
356 | |||
357 | err = platform_driver_register(&cpu_pmu_driver); | ||
358 | if (err) | ||
359 | unregister_cpu_notifier(&cpu_pmu_hotplug_notifier); | ||
360 | |||
361 | return err; | ||
362 | } | 357 | } |
363 | device_initcall(register_pmu_driver); | 358 | device_initcall(register_pmu_driver); |
diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c index abfeb04f3213..f2ffd5c542ed 100644 --- a/arch/arm/kernel/perf_event_v6.c +++ b/arch/arm/kernel/perf_event_v6.c | |||
@@ -262,7 +262,7 @@ static void armv6pmu_enable_event(struct perf_event *event) | |||
262 | unsigned long val, mask, evt, flags; | 262 | unsigned long val, mask, evt, flags; |
263 | struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); | 263 | struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); |
264 | struct hw_perf_event *hwc = &event->hw; | 264 | struct hw_perf_event *hwc = &event->hw; |
265 | struct pmu_hw_events *events = cpu_pmu->get_hw_events(); | 265 | struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); |
266 | int idx = hwc->idx; | 266 | int idx = hwc->idx; |
267 | 267 | ||
268 | if (ARMV6_CYCLE_COUNTER == idx) { | 268 | if (ARMV6_CYCLE_COUNTER == idx) { |
@@ -300,7 +300,7 @@ armv6pmu_handle_irq(int irq_num, | |||
300 | unsigned long pmcr = armv6_pmcr_read(); | 300 | unsigned long pmcr = armv6_pmcr_read(); |
301 | struct perf_sample_data data; | 301 | struct perf_sample_data data; |
302 | struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; | 302 | struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; |
303 | struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events(); | 303 | struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); |
304 | struct pt_regs *regs; | 304 | struct pt_regs *regs; |
305 | int idx; | 305 | int idx; |
306 | 306 | ||
@@ -356,7 +356,7 @@ armv6pmu_handle_irq(int irq_num, | |||
356 | static void armv6pmu_start(struct arm_pmu *cpu_pmu) | 356 | static void armv6pmu_start(struct arm_pmu *cpu_pmu) |
357 | { | 357 | { |
358 | unsigned long flags, val; | 358 | unsigned long flags, val; |
359 | struct pmu_hw_events *events = cpu_pmu->get_hw_events(); | 359 | struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); |
360 | 360 | ||
361 | raw_spin_lock_irqsave(&events->pmu_lock, flags); | 361 | raw_spin_lock_irqsave(&events->pmu_lock, flags); |
362 | val = armv6_pmcr_read(); | 362 | val = armv6_pmcr_read(); |
@@ -368,7 +368,7 @@ static void armv6pmu_start(struct arm_pmu *cpu_pmu) | |||
368 | static void armv6pmu_stop(struct arm_pmu *cpu_pmu) | 368 | static void armv6pmu_stop(struct arm_pmu *cpu_pmu) |
369 | { | 369 | { |
370 | unsigned long flags, val; | 370 | unsigned long flags, val; |
371 | struct pmu_hw_events *events = cpu_pmu->get_hw_events(); | 371 | struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); |
372 | 372 | ||
373 | raw_spin_lock_irqsave(&events->pmu_lock, flags); | 373 | raw_spin_lock_irqsave(&events->pmu_lock, flags); |
374 | val = armv6_pmcr_read(); | 374 | val = armv6_pmcr_read(); |
@@ -409,7 +409,7 @@ static void armv6pmu_disable_event(struct perf_event *event) | |||
409 | unsigned long val, mask, evt, flags; | 409 | unsigned long val, mask, evt, flags; |
410 | struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); | 410 | struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); |
411 | struct hw_perf_event *hwc = &event->hw; | 411 | struct hw_perf_event *hwc = &event->hw; |
412 | struct pmu_hw_events *events = cpu_pmu->get_hw_events(); | 412 | struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); |
413 | int idx = hwc->idx; | 413 | int idx = hwc->idx; |
414 | 414 | ||
415 | if (ARMV6_CYCLE_COUNTER == idx) { | 415 | if (ARMV6_CYCLE_COUNTER == idx) { |
@@ -444,7 +444,7 @@ static void armv6mpcore_pmu_disable_event(struct perf_event *event) | |||
444 | unsigned long val, mask, flags, evt = 0; | 444 | unsigned long val, mask, flags, evt = 0; |
445 | struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); | 445 | struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); |
446 | struct hw_perf_event *hwc = &event->hw; | 446 | struct hw_perf_event *hwc = &event->hw; |
447 | struct pmu_hw_events *events = cpu_pmu->get_hw_events(); | 447 | struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); |
448 | int idx = hwc->idx; | 448 | int idx = hwc->idx; |
449 | 449 | ||
450 | if (ARMV6_CYCLE_COUNTER == idx) { | 450 | if (ARMV6_CYCLE_COUNTER == idx) { |
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 116758b77f93..8993770c47de 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c | |||
@@ -564,13 +564,11 @@ static inline int armv7_pmnc_counter_has_overflowed(u32 pmnc, int idx) | |||
564 | return pmnc & BIT(ARMV7_IDX_TO_COUNTER(idx)); | 564 | return pmnc & BIT(ARMV7_IDX_TO_COUNTER(idx)); |
565 | } | 565 | } |
566 | 566 | ||
567 | static inline int armv7_pmnc_select_counter(int idx) | 567 | static inline void armv7_pmnc_select_counter(int idx) |
568 | { | 568 | { |
569 | u32 counter = ARMV7_IDX_TO_COUNTER(idx); | 569 | u32 counter = ARMV7_IDX_TO_COUNTER(idx); |
570 | asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (counter)); | 570 | asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (counter)); |
571 | isb(); | 571 | isb(); |
572 | |||
573 | return idx; | ||
574 | } | 572 | } |
575 | 573 | ||
576 | static inline u32 armv7pmu_read_counter(struct perf_event *event) | 574 | static inline u32 armv7pmu_read_counter(struct perf_event *event) |
@@ -580,13 +578,15 @@ static inline u32 armv7pmu_read_counter(struct perf_event *event) | |||
580 | int idx = hwc->idx; | 578 | int idx = hwc->idx; |
581 | u32 value = 0; | 579 | u32 value = 0; |
582 | 580 | ||
583 | if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) | 581 | if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) { |
584 | pr_err("CPU%u reading wrong counter %d\n", | 582 | pr_err("CPU%u reading wrong counter %d\n", |
585 | smp_processor_id(), idx); | 583 | smp_processor_id(), idx); |
586 | else if (idx == ARMV7_IDX_CYCLE_COUNTER) | 584 | } else if (idx == ARMV7_IDX_CYCLE_COUNTER) { |
587 | asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value)); | 585 | asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value)); |
588 | else if (armv7_pmnc_select_counter(idx) == idx) | 586 | } else { |
587 | armv7_pmnc_select_counter(idx); | ||
589 | asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (value)); | 588 | asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (value)); |
589 | } | ||
590 | 590 | ||
591 | return value; | 591 | return value; |
592 | } | 592 | } |
@@ -597,45 +597,43 @@ static inline void armv7pmu_write_counter(struct perf_event *event, u32 value) | |||
597 | struct hw_perf_event *hwc = &event->hw; | 597 | struct hw_perf_event *hwc = &event->hw; |
598 | int idx = hwc->idx; | 598 | int idx = hwc->idx; |
599 | 599 | ||
600 | if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) | 600 | if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) { |
601 | pr_err("CPU%u writing wrong counter %d\n", | 601 | pr_err("CPU%u writing wrong counter %d\n", |
602 | smp_processor_id(), idx); | 602 | smp_processor_id(), idx); |
603 | else if (idx == ARMV7_IDX_CYCLE_COUNTER) | 603 | } else if (idx == ARMV7_IDX_CYCLE_COUNTER) { |
604 | asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value)); | 604 | asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value)); |
605 | else if (armv7_pmnc_select_counter(idx) == idx) | 605 | } else { |
606 | armv7_pmnc_select_counter(idx); | ||
606 | asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" (value)); | 607 | asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" (value)); |
608 | } | ||
607 | } | 609 | } |
608 | 610 | ||
609 | static inline void armv7_pmnc_write_evtsel(int idx, u32 val) | 611 | static inline void armv7_pmnc_write_evtsel(int idx, u32 val) |
610 | { | 612 | { |
611 | if (armv7_pmnc_select_counter(idx) == idx) { | 613 | armv7_pmnc_select_counter(idx); |
612 | val &= ARMV7_EVTYPE_MASK; | 614 | val &= ARMV7_EVTYPE_MASK; |
613 | asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val)); | 615 | asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val)); |
614 | } | ||
615 | } | 616 | } |
616 | 617 | ||
617 | static inline int armv7_pmnc_enable_counter(int idx) | 618 | static inline void armv7_pmnc_enable_counter(int idx) |
618 | { | 619 | { |
619 | u32 counter = ARMV7_IDX_TO_COUNTER(idx); | 620 | u32 counter = ARMV7_IDX_TO_COUNTER(idx); |
620 | asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (BIT(counter))); | 621 | asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (BIT(counter))); |
621 | return idx; | ||
622 | } | 622 | } |
623 | 623 | ||
624 | static inline int armv7_pmnc_disable_counter(int idx) | 624 | static inline void armv7_pmnc_disable_counter(int idx) |
625 | { | 625 | { |
626 | u32 counter = ARMV7_IDX_TO_COUNTER(idx); | 626 | u32 counter = ARMV7_IDX_TO_COUNTER(idx); |
627 | asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (BIT(counter))); | 627 | asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (BIT(counter))); |
628 | return idx; | ||
629 | } | 628 | } |
630 | 629 | ||
631 | static inline int armv7_pmnc_enable_intens(int idx) | 630 | static inline void armv7_pmnc_enable_intens(int idx) |
632 | { | 631 | { |
633 | u32 counter = ARMV7_IDX_TO_COUNTER(idx); | 632 | u32 counter = ARMV7_IDX_TO_COUNTER(idx); |
634 | asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (BIT(counter))); | 633 | asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (BIT(counter))); |
635 | return idx; | ||
636 | } | 634 | } |
637 | 635 | ||
638 | static inline int armv7_pmnc_disable_intens(int idx) | 636 | static inline void armv7_pmnc_disable_intens(int idx) |
639 | { | 637 | { |
640 | u32 counter = ARMV7_IDX_TO_COUNTER(idx); | 638 | u32 counter = ARMV7_IDX_TO_COUNTER(idx); |
641 | asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (BIT(counter))); | 639 | asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (BIT(counter))); |
@@ -643,8 +641,6 @@ static inline int armv7_pmnc_disable_intens(int idx) | |||
643 | /* Clear the overflow flag in case an interrupt is pending. */ | 641 | /* Clear the overflow flag in case an interrupt is pending. */ |
644 | asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (BIT(counter))); | 642 | asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (BIT(counter))); |
645 | isb(); | 643 | isb(); |
646 | |||
647 | return idx; | ||
648 | } | 644 | } |
649 | 645 | ||
650 | static inline u32 armv7_pmnc_getreset_flags(void) | 646 | static inline u32 armv7_pmnc_getreset_flags(void) |
@@ -667,34 +663,34 @@ static void armv7_pmnc_dump_regs(struct arm_pmu *cpu_pmu) | |||
667 | u32 val; | 663 | u32 val; |
668 | unsigned int cnt; | 664 | unsigned int cnt; |
669 | 665 | ||
670 | printk(KERN_INFO "PMNC registers dump:\n"); | 666 | pr_info("PMNC registers dump:\n"); |
671 | 667 | ||
672 | asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val)); | 668 | asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val)); |
673 | printk(KERN_INFO "PMNC =0x%08x\n", val); | 669 | pr_info("PMNC =0x%08x\n", val); |
674 | 670 | ||
675 | asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val)); | 671 | asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val)); |
676 | printk(KERN_INFO "CNTENS=0x%08x\n", val); | 672 | pr_info("CNTENS=0x%08x\n", val); |
677 | 673 | ||
678 | asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val)); | 674 | asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val)); |
679 | printk(KERN_INFO "INTENS=0x%08x\n", val); | 675 | pr_info("INTENS=0x%08x\n", val); |
680 | 676 | ||
681 | asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); | 677 | asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); |
682 | printk(KERN_INFO "FLAGS =0x%08x\n", val); | 678 | pr_info("FLAGS =0x%08x\n", val); |
683 | 679 | ||
684 | asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val)); | 680 | asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val)); |
685 | printk(KERN_INFO "SELECT=0x%08x\n", val); | 681 | pr_info("SELECT=0x%08x\n", val); |
686 | 682 | ||
687 | asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val)); | 683 | asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val)); |
688 | printk(KERN_INFO "CCNT =0x%08x\n", val); | 684 | pr_info("CCNT =0x%08x\n", val); |
689 | 685 | ||
690 | for (cnt = ARMV7_IDX_COUNTER0; | 686 | for (cnt = ARMV7_IDX_COUNTER0; |
691 | cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) { | 687 | cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) { |
692 | armv7_pmnc_select_counter(cnt); | 688 | armv7_pmnc_select_counter(cnt); |
693 | asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val)); | 689 | asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val)); |
694 | printk(KERN_INFO "CNT[%d] count =0x%08x\n", | 690 | pr_info("CNT[%d] count =0x%08x\n", |
695 | ARMV7_IDX_TO_COUNTER(cnt), val); | 691 | ARMV7_IDX_TO_COUNTER(cnt), val); |
696 | asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val)); | 692 | asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val)); |
697 | printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n", | 693 | pr_info("CNT[%d] evtsel=0x%08x\n", |
698 | ARMV7_IDX_TO_COUNTER(cnt), val); | 694 | ARMV7_IDX_TO_COUNTER(cnt), val); |
699 | } | 695 | } |
700 | } | 696 | } |
@@ -705,7 +701,7 @@ static void armv7pmu_enable_event(struct perf_event *event) | |||
705 | unsigned long flags; | 701 | unsigned long flags; |
706 | struct hw_perf_event *hwc = &event->hw; | 702 | struct hw_perf_event *hwc = &event->hw; |
707 | struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); | 703 | struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); |
708 | struct pmu_hw_events *events = cpu_pmu->get_hw_events(); | 704 | struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); |
709 | int idx = hwc->idx; | 705 | int idx = hwc->idx; |
710 | 706 | ||
711 | if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) { | 707 | if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) { |
@@ -751,7 +747,7 @@ static void armv7pmu_disable_event(struct perf_event *event) | |||
751 | unsigned long flags; | 747 | unsigned long flags; |
752 | struct hw_perf_event *hwc = &event->hw; | 748 | struct hw_perf_event *hwc = &event->hw; |
753 | struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); | 749 | struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); |
754 | struct pmu_hw_events *events = cpu_pmu->get_hw_events(); | 750 | struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); |
755 | int idx = hwc->idx; | 751 | int idx = hwc->idx; |
756 | 752 | ||
757 | if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) { | 753 | if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) { |
@@ -783,7 +779,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) | |||
783 | u32 pmnc; | 779 | u32 pmnc; |
784 | struct perf_sample_data data; | 780 | struct perf_sample_data data; |
785 | struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; | 781 | struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; |
786 | struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events(); | 782 | struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); |
787 | struct pt_regs *regs; | 783 | struct pt_regs *regs; |
788 | int idx; | 784 | int idx; |
789 | 785 | ||
@@ -843,7 +839,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) | |||
843 | static void armv7pmu_start(struct arm_pmu *cpu_pmu) | 839 | static void armv7pmu_start(struct arm_pmu *cpu_pmu) |
844 | { | 840 | { |
845 | unsigned long flags; | 841 | unsigned long flags; |
846 | struct pmu_hw_events *events = cpu_pmu->get_hw_events(); | 842 | struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); |
847 | 843 | ||
848 | raw_spin_lock_irqsave(&events->pmu_lock, flags); | 844 | raw_spin_lock_irqsave(&events->pmu_lock, flags); |
849 | /* Enable all counters */ | 845 | /* Enable all counters */ |
@@ -854,7 +850,7 @@ static void armv7pmu_start(struct arm_pmu *cpu_pmu) | |||
854 | static void armv7pmu_stop(struct arm_pmu *cpu_pmu) | 850 | static void armv7pmu_stop(struct arm_pmu *cpu_pmu) |
855 | { | 851 | { |
856 | unsigned long flags; | 852 | unsigned long flags; |
857 | struct pmu_hw_events *events = cpu_pmu->get_hw_events(); | 853 | struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); |
858 | 854 | ||
859 | raw_spin_lock_irqsave(&events->pmu_lock, flags); | 855 | raw_spin_lock_irqsave(&events->pmu_lock, flags); |
860 | /* Disable all counters */ | 856 | /* Disable all counters */ |
@@ -1287,7 +1283,7 @@ static void krait_pmu_disable_event(struct perf_event *event) | |||
1287 | struct hw_perf_event *hwc = &event->hw; | 1283 | struct hw_perf_event *hwc = &event->hw; |
1288 | int idx = hwc->idx; | 1284 | int idx = hwc->idx; |
1289 | struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); | 1285 | struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); |
1290 | struct pmu_hw_events *events = cpu_pmu->get_hw_events(); | 1286 | struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); |
1291 | 1287 | ||
1292 | /* Disable counter and interrupt */ | 1288 | /* Disable counter and interrupt */ |
1293 | raw_spin_lock_irqsave(&events->pmu_lock, flags); | 1289 | raw_spin_lock_irqsave(&events->pmu_lock, flags); |
@@ -1313,7 +1309,7 @@ static void krait_pmu_enable_event(struct perf_event *event) | |||
1313 | struct hw_perf_event *hwc = &event->hw; | 1309 | struct hw_perf_event *hwc = &event->hw; |
1314 | int idx = hwc->idx; | 1310 | int idx = hwc->idx; |
1315 | struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); | 1311 | struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); |
1316 | struct pmu_hw_events *events = cpu_pmu->get_hw_events(); | 1312 | struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); |
1317 | 1313 | ||
1318 | /* | 1314 | /* |
1319 | * Enable counter and interrupt, and set the counter to count | 1315 | * Enable counter and interrupt, and set the counter to count |
diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c index 08da0af550b7..8af9f1f82c68 100644 --- a/arch/arm/kernel/perf_event_xscale.c +++ b/arch/arm/kernel/perf_event_xscale.c | |||
@@ -138,7 +138,7 @@ xscale1pmu_handle_irq(int irq_num, void *dev) | |||
138 | unsigned long pmnc; | 138 | unsigned long pmnc; |
139 | struct perf_sample_data data; | 139 | struct perf_sample_data data; |
140 | struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; | 140 | struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; |
141 | struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events(); | 141 | struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); |
142 | struct pt_regs *regs; | 142 | struct pt_regs *regs; |
143 | int idx; | 143 | int idx; |
144 | 144 | ||
@@ -198,7 +198,7 @@ static void xscale1pmu_enable_event(struct perf_event *event) | |||
198 | unsigned long val, mask, evt, flags; | 198 | unsigned long val, mask, evt, flags; |
199 | struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); | 199 | struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); |
200 | struct hw_perf_event *hwc = &event->hw; | 200 | struct hw_perf_event *hwc = &event->hw; |
201 | struct pmu_hw_events *events = cpu_pmu->get_hw_events(); | 201 | struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); |
202 | int idx = hwc->idx; | 202 | int idx = hwc->idx; |
203 | 203 | ||
204 | switch (idx) { | 204 | switch (idx) { |
@@ -234,7 +234,7 @@ static void xscale1pmu_disable_event(struct perf_event *event) | |||
234 | unsigned long val, mask, evt, flags; | 234 | unsigned long val, mask, evt, flags; |
235 | struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); | 235 | struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); |
236 | struct hw_perf_event *hwc = &event->hw; | 236 | struct hw_perf_event *hwc = &event->hw; |
237 | struct pmu_hw_events *events = cpu_pmu->get_hw_events(); | 237 | struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); |
238 | int idx = hwc->idx; | 238 | int idx = hwc->idx; |
239 | 239 | ||
240 | switch (idx) { | 240 | switch (idx) { |
@@ -287,7 +287,7 @@ xscale1pmu_get_event_idx(struct pmu_hw_events *cpuc, | |||
287 | static void xscale1pmu_start(struct arm_pmu *cpu_pmu) | 287 | static void xscale1pmu_start(struct arm_pmu *cpu_pmu) |
288 | { | 288 | { |
289 | unsigned long flags, val; | 289 | unsigned long flags, val; |
290 | struct pmu_hw_events *events = cpu_pmu->get_hw_events(); | 290 | struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); |
291 | 291 | ||
292 | raw_spin_lock_irqsave(&events->pmu_lock, flags); | 292 | raw_spin_lock_irqsave(&events->pmu_lock, flags); |
293 | val = xscale1pmu_read_pmnc(); | 293 | val = xscale1pmu_read_pmnc(); |
@@ -299,7 +299,7 @@ static void xscale1pmu_start(struct arm_pmu *cpu_pmu) | |||
299 | static void xscale1pmu_stop(struct arm_pmu *cpu_pmu) | 299 | static void xscale1pmu_stop(struct arm_pmu *cpu_pmu) |
300 | { | 300 | { |
301 | unsigned long flags, val; | 301 | unsigned long flags, val; |
302 | struct pmu_hw_events *events = cpu_pmu->get_hw_events(); | 302 | struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); |
303 | 303 | ||
304 | raw_spin_lock_irqsave(&events->pmu_lock, flags); | 304 | raw_spin_lock_irqsave(&events->pmu_lock, flags); |
305 | val = xscale1pmu_read_pmnc(); | 305 | val = xscale1pmu_read_pmnc(); |
@@ -485,7 +485,7 @@ xscale2pmu_handle_irq(int irq_num, void *dev) | |||
485 | unsigned long pmnc, of_flags; | 485 | unsigned long pmnc, of_flags; |
486 | struct perf_sample_data data; | 486 | struct perf_sample_data data; |
487 | struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; | 487 | struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; |
488 | struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events(); | 488 | struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); |
489 | struct pt_regs *regs; | 489 | struct pt_regs *regs; |
490 | int idx; | 490 | int idx; |
491 | 491 | ||
@@ -539,7 +539,7 @@ static void xscale2pmu_enable_event(struct perf_event *event) | |||
539 | unsigned long flags, ien, evtsel; | 539 | unsigned long flags, ien, evtsel; |
540 | struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); | 540 | struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); |
541 | struct hw_perf_event *hwc = &event->hw; | 541 | struct hw_perf_event *hwc = &event->hw; |
542 | struct pmu_hw_events *events = cpu_pmu->get_hw_events(); | 542 | struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); |
543 | int idx = hwc->idx; | 543 | int idx = hwc->idx; |
544 | 544 | ||
545 | ien = xscale2pmu_read_int_enable(); | 545 | ien = xscale2pmu_read_int_enable(); |
@@ -585,7 +585,7 @@ static void xscale2pmu_disable_event(struct perf_event *event) | |||
585 | unsigned long flags, ien, evtsel, of_flags; | 585 | unsigned long flags, ien, evtsel, of_flags; |
586 | struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); | 586 | struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); |
587 | struct hw_perf_event *hwc = &event->hw; | 587 | struct hw_perf_event *hwc = &event->hw; |
588 | struct pmu_hw_events *events = cpu_pmu->get_hw_events(); | 588 | struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); |
589 | int idx = hwc->idx; | 589 | int idx = hwc->idx; |
590 | 590 | ||
591 | ien = xscale2pmu_read_int_enable(); | 591 | ien = xscale2pmu_read_int_enable(); |
@@ -651,7 +651,7 @@ out: | |||
651 | static void xscale2pmu_start(struct arm_pmu *cpu_pmu) | 651 | static void xscale2pmu_start(struct arm_pmu *cpu_pmu) |
652 | { | 652 | { |
653 | unsigned long flags, val; | 653 | unsigned long flags, val; |
654 | struct pmu_hw_events *events = cpu_pmu->get_hw_events(); | 654 | struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); |
655 | 655 | ||
656 | raw_spin_lock_irqsave(&events->pmu_lock, flags); | 656 | raw_spin_lock_irqsave(&events->pmu_lock, flags); |
657 | val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64; | 657 | val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64; |
@@ -663,7 +663,7 @@ static void xscale2pmu_start(struct arm_pmu *cpu_pmu) | |||
663 | static void xscale2pmu_stop(struct arm_pmu *cpu_pmu) | 663 | static void xscale2pmu_stop(struct arm_pmu *cpu_pmu) |
664 | { | 664 | { |
665 | unsigned long flags, val; | 665 | unsigned long flags, val; |
666 | struct pmu_hw_events *events = cpu_pmu->get_hw_events(); | 666 | struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); |
667 | 667 | ||
668 | raw_spin_lock_irqsave(&events->pmu_lock, flags); | 668 | raw_spin_lock_irqsave(&events->pmu_lock, flags); |
669 | val = xscale2pmu_read_pmnc(); | 669 | val = xscale2pmu_read_pmnc(); |
diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c index 7af78df241f2..860da40b78ef 100644 --- a/drivers/bus/arm-cci.c +++ b/drivers/bus/arm-cci.c | |||
@@ -16,17 +16,17 @@ | |||
16 | 16 | ||
17 | #include <linux/arm-cci.h> | 17 | #include <linux/arm-cci.h> |
18 | #include <linux/io.h> | 18 | #include <linux/io.h> |
19 | #include <linux/interrupt.h> | ||
19 | #include <linux/module.h> | 20 | #include <linux/module.h> |
20 | #include <linux/of_address.h> | 21 | #include <linux/of_address.h> |
21 | #include <linux/of_irq.h> | 22 | #include <linux/of_irq.h> |
22 | #include <linux/of_platform.h> | 23 | #include <linux/of_platform.h> |
24 | #include <linux/perf_event.h> | ||
23 | #include <linux/platform_device.h> | 25 | #include <linux/platform_device.h> |
24 | #include <linux/slab.h> | 26 | #include <linux/slab.h> |
25 | #include <linux/spinlock.h> | 27 | #include <linux/spinlock.h> |
26 | 28 | ||
27 | #include <asm/cacheflush.h> | 29 | #include <asm/cacheflush.h> |
28 | #include <asm/irq_regs.h> | ||
29 | #include <asm/pmu.h> | ||
30 | #include <asm/smp_plat.h> | 30 | #include <asm/smp_plat.h> |
31 | 31 | ||
32 | #define DRIVER_NAME "CCI-400" | 32 | #define DRIVER_NAME "CCI-400" |
@@ -98,6 +98,8 @@ static unsigned long cci_ctrl_phys; | |||
98 | 98 | ||
99 | #define CCI_PMU_CNTR_BASE(idx) ((idx) * SZ_4K) | 99 | #define CCI_PMU_CNTR_BASE(idx) ((idx) * SZ_4K) |
100 | 100 | ||
101 | #define CCI_PMU_CNTR_MASK ((1ULL << 32) -1) | ||
102 | |||
101 | /* | 103 | /* |
102 | * Instead of an event id to monitor CCI cycles, a dedicated counter is | 104 | * Instead of an event id to monitor CCI cycles, a dedicated counter is |
103 | * provided. Use 0xff to represent CCI cycles and hope that no future revisions | 105 | * provided. Use 0xff to represent CCI cycles and hope that no future revisions |
@@ -170,18 +172,29 @@ static char *const pmu_names[] = { | |||
170 | [CCI_REV_R1] = "CCI_400_r1", | 172 | [CCI_REV_R1] = "CCI_400_r1", |
171 | }; | 173 | }; |
172 | 174 | ||
173 | struct cci_pmu_drv_data { | 175 | struct cci_pmu_hw_events { |
176 | struct perf_event *events[CCI_PMU_MAX_HW_EVENTS]; | ||
177 | unsigned long used_mask[BITS_TO_LONGS(CCI_PMU_MAX_HW_EVENTS)]; | ||
178 | raw_spinlock_t pmu_lock; | ||
179 | }; | ||
180 | |||
181 | struct cci_pmu { | ||
174 | void __iomem *base; | 182 | void __iomem *base; |
175 | struct arm_pmu *cci_pmu; | 183 | struct pmu pmu; |
176 | int nr_irqs; | 184 | int nr_irqs; |
177 | int irqs[CCI_PMU_MAX_HW_EVENTS]; | 185 | int irqs[CCI_PMU_MAX_HW_EVENTS]; |
178 | unsigned long active_irqs; | 186 | unsigned long active_irqs; |
179 | struct perf_event *events[CCI_PMU_MAX_HW_EVENTS]; | ||
180 | unsigned long used_mask[BITS_TO_LONGS(CCI_PMU_MAX_HW_EVENTS)]; | ||
181 | struct pmu_port_event_ranges *port_ranges; | 187 | struct pmu_port_event_ranges *port_ranges; |
182 | struct pmu_hw_events hw_events; | 188 | struct cci_pmu_hw_events hw_events; |
189 | struct platform_device *plat_device; | ||
190 | int num_events; | ||
191 | atomic_t active_events; | ||
192 | struct mutex reserve_mutex; | ||
193 | cpumask_t cpus; | ||
183 | }; | 194 | }; |
184 | static struct cci_pmu_drv_data *pmu; | 195 | static struct cci_pmu *pmu; |
196 | |||
197 | #define to_cci_pmu(c) (container_of(c, struct cci_pmu, pmu)) | ||
185 | 198 | ||
186 | static bool is_duplicate_irq(int irq, int *irqs, int nr_irqs) | 199 | static bool is_duplicate_irq(int irq, int *irqs, int nr_irqs) |
187 | { | 200 | { |
@@ -252,7 +265,7 @@ static int pmu_validate_hw_event(u8 hw_event) | |||
252 | return -ENOENT; | 265 | return -ENOENT; |
253 | } | 266 | } |
254 | 267 | ||
255 | static int pmu_is_valid_counter(struct arm_pmu *cci_pmu, int idx) | 268 | static int pmu_is_valid_counter(struct cci_pmu *cci_pmu, int idx) |
256 | { | 269 | { |
257 | return CCI_PMU_CYCLE_CNTR_IDX <= idx && | 270 | return CCI_PMU_CYCLE_CNTR_IDX <= idx && |
258 | idx <= CCI_PMU_CNTR_LAST(cci_pmu); | 271 | idx <= CCI_PMU_CNTR_LAST(cci_pmu); |
@@ -293,14 +306,9 @@ static u32 pmu_get_max_counters(void) | |||
293 | return n_cnts + 1; | 306 | return n_cnts + 1; |
294 | } | 307 | } |
295 | 308 | ||
296 | static struct pmu_hw_events *pmu_get_hw_events(void) | 309 | static int pmu_get_event_idx(struct cci_pmu_hw_events *hw, struct perf_event *event) |
297 | { | ||
298 | return &pmu->hw_events; | ||
299 | } | ||
300 | |||
301 | static int pmu_get_event_idx(struct pmu_hw_events *hw, struct perf_event *event) | ||
302 | { | 310 | { |
303 | struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu); | 311 | struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); |
304 | struct hw_perf_event *hw_event = &event->hw; | 312 | struct hw_perf_event *hw_event = &event->hw; |
305 | unsigned long cci_event = hw_event->config_base & CCI_PMU_EVENT_MASK; | 313 | unsigned long cci_event = hw_event->config_base & CCI_PMU_EVENT_MASK; |
306 | int idx; | 314 | int idx; |
@@ -336,7 +344,7 @@ static int pmu_map_event(struct perf_event *event) | |||
336 | return mapping; | 344 | return mapping; |
337 | } | 345 | } |
338 | 346 | ||
339 | static int pmu_request_irq(struct arm_pmu *cci_pmu, irq_handler_t handler) | 347 | static int pmu_request_irq(struct cci_pmu *cci_pmu, irq_handler_t handler) |
340 | { | 348 | { |
341 | int i; | 349 | int i; |
342 | struct platform_device *pmu_device = cci_pmu->plat_device; | 350 | struct platform_device *pmu_device = cci_pmu->plat_device; |
@@ -371,17 +379,91 @@ static int pmu_request_irq(struct arm_pmu *cci_pmu, irq_handler_t handler) | |||
371 | return 0; | 379 | return 0; |
372 | } | 380 | } |
373 | 381 | ||
382 | static void pmu_free_irq(struct cci_pmu *cci_pmu) | ||
383 | { | ||
384 | int i; | ||
385 | |||
386 | for (i = 0; i < pmu->nr_irqs; i++) { | ||
387 | if (!test_and_clear_bit(i, &pmu->active_irqs)) | ||
388 | continue; | ||
389 | |||
390 | free_irq(pmu->irqs[i], cci_pmu); | ||
391 | } | ||
392 | } | ||
393 | |||
394 | static u32 pmu_read_counter(struct perf_event *event) | ||
395 | { | ||
396 | struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); | ||
397 | struct hw_perf_event *hw_counter = &event->hw; | ||
398 | int idx = hw_counter->idx; | ||
399 | u32 value; | ||
400 | |||
401 | if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) { | ||
402 | dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); | ||
403 | return 0; | ||
404 | } | ||
405 | value = pmu_read_register(idx, CCI_PMU_CNTR); | ||
406 | |||
407 | return value; | ||
408 | } | ||
409 | |||
410 | static void pmu_write_counter(struct perf_event *event, u32 value) | ||
411 | { | ||
412 | struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); | ||
413 | struct hw_perf_event *hw_counter = &event->hw; | ||
414 | int idx = hw_counter->idx; | ||
415 | |||
416 | if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) | ||
417 | dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); | ||
418 | else | ||
419 | pmu_write_register(value, idx, CCI_PMU_CNTR); | ||
420 | } | ||
421 | |||
422 | static u64 pmu_event_update(struct perf_event *event) | ||
423 | { | ||
424 | struct hw_perf_event *hwc = &event->hw; | ||
425 | u64 delta, prev_raw_count, new_raw_count; | ||
426 | |||
427 | do { | ||
428 | prev_raw_count = local64_read(&hwc->prev_count); | ||
429 | new_raw_count = pmu_read_counter(event); | ||
430 | } while (local64_cmpxchg(&hwc->prev_count, prev_raw_count, | ||
431 | new_raw_count) != prev_raw_count); | ||
432 | |||
433 | delta = (new_raw_count - prev_raw_count) & CCI_PMU_CNTR_MASK; | ||
434 | |||
435 | local64_add(delta, &event->count); | ||
436 | |||
437 | return new_raw_count; | ||
438 | } | ||
439 | |||
440 | static void pmu_read(struct perf_event *event) | ||
441 | { | ||
442 | pmu_event_update(event); | ||
443 | } | ||
444 | |||
445 | void pmu_event_set_period(struct perf_event *event) | ||
446 | { | ||
447 | struct hw_perf_event *hwc = &event->hw; | ||
448 | /* | ||
449 | * The CCI PMU counters have a period of 2^32. To account for the | ||
450 | * possiblity of extreme interrupt latency we program for a period of | ||
451 | * half that. Hopefully we can handle the interrupt before another 2^31 | ||
452 | * events occur and the counter overtakes its previous value. | ||
453 | */ | ||
454 | u64 val = 1ULL << 31; | ||
455 | local64_set(&hwc->prev_count, val); | ||
456 | pmu_write_counter(event, val); | ||
457 | } | ||
458 | |||
374 | static irqreturn_t pmu_handle_irq(int irq_num, void *dev) | 459 | static irqreturn_t pmu_handle_irq(int irq_num, void *dev) |
375 | { | 460 | { |
376 | unsigned long flags; | 461 | unsigned long flags; |
377 | struct arm_pmu *cci_pmu = (struct arm_pmu *)dev; | 462 | struct cci_pmu *cci_pmu = dev; |
378 | struct pmu_hw_events *events = cci_pmu->get_hw_events(); | 463 | struct cci_pmu_hw_events *events = &pmu->hw_events; |
379 | struct perf_sample_data data; | ||
380 | struct pt_regs *regs; | ||
381 | int idx, handled = IRQ_NONE; | 464 | int idx, handled = IRQ_NONE; |
382 | 465 | ||
383 | raw_spin_lock_irqsave(&events->pmu_lock, flags); | 466 | raw_spin_lock_irqsave(&events->pmu_lock, flags); |
384 | regs = get_irq_regs(); | ||
385 | /* | 467 | /* |
386 | * Iterate over counters and update the corresponding perf events. | 468 | * Iterate over counters and update the corresponding perf events. |
387 | * This should work regardless of whether we have per-counter overflow | 469 | * This should work regardless of whether we have per-counter overflow |
@@ -403,154 +485,407 @@ static irqreturn_t pmu_handle_irq(int irq_num, void *dev) | |||
403 | 485 | ||
404 | pmu_write_register(CCI_PMU_OVRFLW_FLAG, idx, CCI_PMU_OVRFLW); | 486 | pmu_write_register(CCI_PMU_OVRFLW_FLAG, idx, CCI_PMU_OVRFLW); |
405 | 487 | ||
488 | pmu_event_update(event); | ||
489 | pmu_event_set_period(event); | ||
406 | handled = IRQ_HANDLED; | 490 | handled = IRQ_HANDLED; |
407 | |||
408 | armpmu_event_update(event); | ||
409 | perf_sample_data_init(&data, 0, hw_counter->last_period); | ||
410 | if (!armpmu_event_set_period(event)) | ||
411 | continue; | ||
412 | |||
413 | if (perf_event_overflow(event, &data, regs)) | ||
414 | cci_pmu->disable(event); | ||
415 | } | 491 | } |
416 | raw_spin_unlock_irqrestore(&events->pmu_lock, flags); | 492 | raw_spin_unlock_irqrestore(&events->pmu_lock, flags); |
417 | 493 | ||
418 | return IRQ_RETVAL(handled); | 494 | return IRQ_RETVAL(handled); |
419 | } | 495 | } |
420 | 496 | ||
421 | static void pmu_free_irq(struct arm_pmu *cci_pmu) | 497 | static int cci_pmu_get_hw(struct cci_pmu *cci_pmu) |
422 | { | 498 | { |
423 | int i; | 499 | int ret = pmu_request_irq(cci_pmu, pmu_handle_irq); |
500 | if (ret) { | ||
501 | pmu_free_irq(cci_pmu); | ||
502 | return ret; | ||
503 | } | ||
504 | return 0; | ||
505 | } | ||
424 | 506 | ||
425 | for (i = 0; i < pmu->nr_irqs; i++) { | 507 | static void cci_pmu_put_hw(struct cci_pmu *cci_pmu) |
426 | if (!test_and_clear_bit(i, &pmu->active_irqs)) | 508 | { |
427 | continue; | 509 | pmu_free_irq(cci_pmu); |
510 | } | ||
428 | 511 | ||
429 | free_irq(pmu->irqs[i], cci_pmu); | 512 | static void hw_perf_event_destroy(struct perf_event *event) |
513 | { | ||
514 | struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); | ||
515 | atomic_t *active_events = &cci_pmu->active_events; | ||
516 | struct mutex *reserve_mutex = &cci_pmu->reserve_mutex; | ||
517 | |||
518 | if (atomic_dec_and_mutex_lock(active_events, reserve_mutex)) { | ||
519 | cci_pmu_put_hw(cci_pmu); | ||
520 | mutex_unlock(reserve_mutex); | ||
430 | } | 521 | } |
431 | } | 522 | } |
432 | 523 | ||
433 | static void pmu_enable_event(struct perf_event *event) | 524 | static void cci_pmu_enable(struct pmu *pmu) |
434 | { | 525 | { |
526 | struct cci_pmu *cci_pmu = to_cci_pmu(pmu); | ||
527 | struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events; | ||
528 | int enabled = bitmap_weight(hw_events->used_mask, cci_pmu->num_events); | ||
435 | unsigned long flags; | 529 | unsigned long flags; |
436 | struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu); | 530 | u32 val; |
437 | struct pmu_hw_events *events = cci_pmu->get_hw_events(); | 531 | |
438 | struct hw_perf_event *hw_counter = &event->hw; | 532 | if (!enabled) |
439 | int idx = hw_counter->idx; | 533 | return; |
534 | |||
535 | raw_spin_lock_irqsave(&hw_events->pmu_lock, flags); | ||
536 | |||
537 | /* Enable all the PMU counters. */ | ||
538 | val = readl_relaxed(cci_ctrl_base + CCI_PMCR) | CCI_PMCR_CEN; | ||
539 | writel(val, cci_ctrl_base + CCI_PMCR); | ||
540 | raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags); | ||
541 | |||
542 | } | ||
543 | |||
544 | static void cci_pmu_disable(struct pmu *pmu) | ||
545 | { | ||
546 | struct cci_pmu *cci_pmu = to_cci_pmu(pmu); | ||
547 | struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events; | ||
548 | unsigned long flags; | ||
549 | u32 val; | ||
550 | |||
551 | raw_spin_lock_irqsave(&hw_events->pmu_lock, flags); | ||
552 | |||
553 | /* Disable all the PMU counters. */ | ||
554 | val = readl_relaxed(cci_ctrl_base + CCI_PMCR) & ~CCI_PMCR_CEN; | ||
555 | writel(val, cci_ctrl_base + CCI_PMCR); | ||
556 | raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags); | ||
557 | } | ||
558 | |||
559 | static void cci_pmu_start(struct perf_event *event, int pmu_flags) | ||
560 | { | ||
561 | struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); | ||
562 | struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events; | ||
563 | struct hw_perf_event *hwc = &event->hw; | ||
564 | int idx = hwc->idx; | ||
565 | unsigned long flags; | ||
566 | |||
567 | /* | ||
568 | * To handle interrupt latency, we always reprogram the period | ||
569 | * regardlesss of PERF_EF_RELOAD. | ||
570 | */ | ||
571 | if (pmu_flags & PERF_EF_RELOAD) | ||
572 | WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); | ||
573 | |||
574 | hwc->state = 0; | ||
440 | 575 | ||
441 | if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) { | 576 | if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) { |
442 | dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); | 577 | dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); |
443 | return; | 578 | return; |
444 | } | 579 | } |
445 | 580 | ||
446 | raw_spin_lock_irqsave(&events->pmu_lock, flags); | 581 | raw_spin_lock_irqsave(&hw_events->pmu_lock, flags); |
447 | 582 | ||
448 | /* Configure the event to count, unless you are counting cycles */ | 583 | /* Configure the event to count, unless you are counting cycles */ |
449 | if (idx != CCI_PMU_CYCLE_CNTR_IDX) | 584 | if (idx != CCI_PMU_CYCLE_CNTR_IDX) |
450 | pmu_set_event(idx, hw_counter->config_base); | 585 | pmu_set_event(idx, hwc->config_base); |
451 | 586 | ||
587 | pmu_event_set_period(event); | ||
452 | pmu_enable_counter(idx); | 588 | pmu_enable_counter(idx); |
453 | 589 | ||
454 | raw_spin_unlock_irqrestore(&events->pmu_lock, flags); | 590 | raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags); |
455 | } | 591 | } |
456 | 592 | ||
457 | static void pmu_disable_event(struct perf_event *event) | 593 | static void cci_pmu_stop(struct perf_event *event, int pmu_flags) |
458 | { | 594 | { |
459 | struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu); | 595 | struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); |
460 | struct hw_perf_event *hw_counter = &event->hw; | 596 | struct hw_perf_event *hwc = &event->hw; |
461 | int idx = hw_counter->idx; | 597 | int idx = hwc->idx; |
598 | |||
599 | if (hwc->state & PERF_HES_STOPPED) | ||
600 | return; | ||
462 | 601 | ||
463 | if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) { | 602 | if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) { |
464 | dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); | 603 | dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); |
465 | return; | 604 | return; |
466 | } | 605 | } |
467 | 606 | ||
607 | /* | ||
608 | * We always reprogram the counter, so ignore PERF_EF_UPDATE. See | ||
609 | * cci_pmu_start() | ||
610 | */ | ||
468 | pmu_disable_counter(idx); | 611 | pmu_disable_counter(idx); |
612 | pmu_event_update(event); | ||
613 | hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; | ||
469 | } | 614 | } |
470 | 615 | ||
471 | static void pmu_start(struct arm_pmu *cci_pmu) | 616 | static int cci_pmu_add(struct perf_event *event, int flags) |
472 | { | 617 | { |
473 | u32 val; | 618 | struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); |
474 | unsigned long flags; | 619 | struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events; |
475 | struct pmu_hw_events *events = cci_pmu->get_hw_events(); | 620 | struct hw_perf_event *hwc = &event->hw; |
621 | int idx; | ||
622 | int err = 0; | ||
476 | 623 | ||
477 | raw_spin_lock_irqsave(&events->pmu_lock, flags); | 624 | perf_pmu_disable(event->pmu); |
478 | 625 | ||
479 | /* Enable all the PMU counters. */ | 626 | /* If we don't have a space for the counter then finish early. */ |
480 | val = readl_relaxed(cci_ctrl_base + CCI_PMCR) | CCI_PMCR_CEN; | 627 | idx = pmu_get_event_idx(hw_events, event); |
481 | writel(val, cci_ctrl_base + CCI_PMCR); | 628 | if (idx < 0) { |
629 | err = idx; | ||
630 | goto out; | ||
631 | } | ||
482 | 632 | ||
483 | raw_spin_unlock_irqrestore(&events->pmu_lock, flags); | 633 | event->hw.idx = idx; |
634 | hw_events->events[idx] = event; | ||
635 | |||
636 | hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; | ||
637 | if (flags & PERF_EF_START) | ||
638 | cci_pmu_start(event, PERF_EF_RELOAD); | ||
639 | |||
640 | /* Propagate our changes to the userspace mapping. */ | ||
641 | perf_event_update_userpage(event); | ||
642 | |||
643 | out: | ||
644 | perf_pmu_enable(event->pmu); | ||
645 | return err; | ||
484 | } | 646 | } |
485 | 647 | ||
486 | static void pmu_stop(struct arm_pmu *cci_pmu) | 648 | static void cci_pmu_del(struct perf_event *event, int flags) |
487 | { | 649 | { |
488 | u32 val; | 650 | struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); |
489 | unsigned long flags; | 651 | struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events; |
490 | struct pmu_hw_events *events = cci_pmu->get_hw_events(); | 652 | struct hw_perf_event *hwc = &event->hw; |
653 | int idx = hwc->idx; | ||
491 | 654 | ||
492 | raw_spin_lock_irqsave(&events->pmu_lock, flags); | 655 | cci_pmu_stop(event, PERF_EF_UPDATE); |
656 | hw_events->events[idx] = NULL; | ||
657 | clear_bit(idx, hw_events->used_mask); | ||
493 | 658 | ||
494 | /* Disable all the PMU counters. */ | 659 | perf_event_update_userpage(event); |
495 | val = readl_relaxed(cci_ctrl_base + CCI_PMCR) & ~CCI_PMCR_CEN; | 660 | } |
496 | writel(val, cci_ctrl_base + CCI_PMCR); | ||
497 | 661 | ||
498 | raw_spin_unlock_irqrestore(&events->pmu_lock, flags); | 662 | static int |
663 | validate_event(struct cci_pmu_hw_events *hw_events, | ||
664 | struct perf_event *event) | ||
665 | { | ||
666 | if (is_software_event(event)) | ||
667 | return 1; | ||
668 | |||
669 | if (event->state < PERF_EVENT_STATE_OFF) | ||
670 | return 1; | ||
671 | |||
672 | if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec) | ||
673 | return 1; | ||
674 | |||
675 | return pmu_get_event_idx(hw_events, event) >= 0; | ||
499 | } | 676 | } |
500 | 677 | ||
501 | static u32 pmu_read_counter(struct perf_event *event) | 678 | static int |
679 | validate_group(struct perf_event *event) | ||
502 | { | 680 | { |
503 | struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu); | 681 | struct perf_event *sibling, *leader = event->group_leader; |
504 | struct hw_perf_event *hw_counter = &event->hw; | 682 | struct cci_pmu_hw_events fake_pmu = { |
505 | int idx = hw_counter->idx; | 683 | /* |
506 | u32 value; | 684 | * Initialise the fake PMU. We only need to populate the |
685 | * used_mask for the purposes of validation. | ||
686 | */ | ||
687 | .used_mask = CPU_BITS_NONE, | ||
688 | }; | ||
507 | 689 | ||
508 | if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) { | 690 | if (!validate_event(&fake_pmu, leader)) |
509 | dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); | 691 | return -EINVAL; |
510 | return 0; | 692 | |
693 | list_for_each_entry(sibling, &leader->sibling_list, group_entry) { | ||
694 | if (!validate_event(&fake_pmu, sibling)) | ||
695 | return -EINVAL; | ||
511 | } | 696 | } |
512 | value = pmu_read_register(idx, CCI_PMU_CNTR); | ||
513 | 697 | ||
514 | return value; | 698 | if (!validate_event(&fake_pmu, event)) |
699 | return -EINVAL; | ||
700 | |||
701 | return 0; | ||
515 | } | 702 | } |
516 | 703 | ||
517 | static void pmu_write_counter(struct perf_event *event, u32 value) | 704 | static int |
705 | __hw_perf_event_init(struct perf_event *event) | ||
518 | { | 706 | { |
519 | struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu); | 707 | struct hw_perf_event *hwc = &event->hw; |
520 | struct hw_perf_event *hw_counter = &event->hw; | 708 | int mapping; |
521 | int idx = hw_counter->idx; | ||
522 | 709 | ||
523 | if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) | 710 | mapping = pmu_map_event(event); |
524 | dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); | 711 | |
525 | else | 712 | if (mapping < 0) { |
526 | pmu_write_register(value, idx, CCI_PMU_CNTR); | 713 | pr_debug("event %x:%llx not supported\n", event->attr.type, |
714 | event->attr.config); | ||
715 | return mapping; | ||
716 | } | ||
717 | |||
718 | /* | ||
719 | * We don't assign an index until we actually place the event onto | ||
720 | * hardware. Use -1 to signify that we haven't decided where to put it | ||
721 | * yet. | ||
722 | */ | ||
723 | hwc->idx = -1; | ||
724 | hwc->config_base = 0; | ||
725 | hwc->config = 0; | ||
726 | hwc->event_base = 0; | ||
727 | |||
728 | /* | ||
729 | * Store the event encoding into the config_base field. | ||
730 | */ | ||
731 | hwc->config_base |= (unsigned long)mapping; | ||
732 | |||
733 | /* | ||
734 | * Limit the sample_period to half of the counter width. That way, the | ||
735 | * new counter value is far less likely to overtake the previous one | ||
736 | * unless you have some serious IRQ latency issues. | ||
737 | */ | ||
738 | hwc->sample_period = CCI_PMU_CNTR_MASK >> 1; | ||
739 | hwc->last_period = hwc->sample_period; | ||
740 | local64_set(&hwc->period_left, hwc->sample_period); | ||
741 | |||
742 | if (event->group_leader != event) { | ||
743 | if (validate_group(event) != 0) | ||
744 | return -EINVAL; | ||
745 | } | ||
746 | |||
747 | return 0; | ||
748 | } | ||
749 | |||
750 | static int cci_pmu_event_init(struct perf_event *event) | ||
751 | { | ||
752 | struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu); | ||
753 | atomic_t *active_events = &cci_pmu->active_events; | ||
754 | int err = 0; | ||
755 | int cpu; | ||
756 | |||
757 | if (event->attr.type != event->pmu->type) | ||
758 | return -ENOENT; | ||
759 | |||
760 | /* Shared by all CPUs, no meaningful state to sample */ | ||
761 | if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) | ||
762 | return -EOPNOTSUPP; | ||
763 | |||
764 | /* We have no filtering of any kind */ | ||
765 | if (event->attr.exclude_user || | ||
766 | event->attr.exclude_kernel || | ||
767 | event->attr.exclude_hv || | ||
768 | event->attr.exclude_idle || | ||
769 | event->attr.exclude_host || | ||
770 | event->attr.exclude_guest) | ||
771 | return -EINVAL; | ||
772 | |||
773 | /* | ||
774 | * Following the example set by other "uncore" PMUs, we accept any CPU | ||
775 | * and rewrite its affinity dynamically rather than having perf core | ||
776 | * handle cpu == -1 and pid == -1 for this case. | ||
777 | * | ||
778 | * The perf core will pin online CPUs for the duration of this call and | ||
779 | * the event being installed into its context, so the PMU's CPU can't | ||
780 | * change under our feet. | ||
781 | */ | ||
782 | cpu = cpumask_first(&cci_pmu->cpus); | ||
783 | if (event->cpu < 0 || cpu < 0) | ||
784 | return -EINVAL; | ||
785 | event->cpu = cpu; | ||
786 | |||
787 | event->destroy = hw_perf_event_destroy; | ||
788 | if (!atomic_inc_not_zero(active_events)) { | ||
789 | mutex_lock(&cci_pmu->reserve_mutex); | ||
790 | if (atomic_read(active_events) == 0) | ||
791 | err = cci_pmu_get_hw(cci_pmu); | ||
792 | if (!err) | ||
793 | atomic_inc(active_events); | ||
794 | mutex_unlock(&cci_pmu->reserve_mutex); | ||
795 | } | ||
796 | if (err) | ||
797 | return err; | ||
798 | |||
799 | err = __hw_perf_event_init(event); | ||
800 | if (err) | ||
801 | hw_perf_event_destroy(event); | ||
802 | |||
803 | return err; | ||
527 | } | 804 | } |
528 | 805 | ||
529 | static int cci_pmu_init(struct arm_pmu *cci_pmu, struct platform_device *pdev) | 806 | static ssize_t pmu_attr_cpumask_show(struct device *dev, |
807 | struct device_attribute *attr, char *buf) | ||
530 | { | 808 | { |
531 | *cci_pmu = (struct arm_pmu){ | 809 | int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &pmu->cpus); |
532 | .name = pmu_names[probe_cci_revision()], | 810 | |
533 | .max_period = (1LLU << 32) - 1, | 811 | buf[n++] = '\n'; |
534 | .get_hw_events = pmu_get_hw_events, | 812 | buf[n] = '\0'; |
535 | .get_event_idx = pmu_get_event_idx, | 813 | return n; |
536 | .map_event = pmu_map_event, | 814 | } |
537 | .request_irq = pmu_request_irq, | 815 | |
538 | .handle_irq = pmu_handle_irq, | 816 | static DEVICE_ATTR(cpumask, S_IRUGO, pmu_attr_cpumask_show, NULL); |
539 | .free_irq = pmu_free_irq, | 817 | |
540 | .enable = pmu_enable_event, | 818 | static struct attribute *pmu_attrs[] = { |
541 | .disable = pmu_disable_event, | 819 | &dev_attr_cpumask.attr, |
542 | .start = pmu_start, | 820 | NULL, |
543 | .stop = pmu_stop, | 821 | }; |
544 | .read_counter = pmu_read_counter, | 822 | |
545 | .write_counter = pmu_write_counter, | 823 | static struct attribute_group pmu_attr_group = { |
824 | .attrs = pmu_attrs, | ||
825 | }; | ||
826 | |||
827 | static const struct attribute_group *pmu_attr_groups[] = { | ||
828 | &pmu_attr_group, | ||
829 | NULL | ||
830 | }; | ||
831 | |||
832 | static int cci_pmu_init(struct cci_pmu *cci_pmu, struct platform_device *pdev) | ||
833 | { | ||
834 | char *name = pmu_names[probe_cci_revision()]; | ||
835 | cci_pmu->pmu = (struct pmu) { | ||
836 | .name = pmu_names[probe_cci_revision()], | ||
837 | .task_ctx_nr = perf_invalid_context, | ||
838 | .pmu_enable = cci_pmu_enable, | ||
839 | .pmu_disable = cci_pmu_disable, | ||
840 | .event_init = cci_pmu_event_init, | ||
841 | .add = cci_pmu_add, | ||
842 | .del = cci_pmu_del, | ||
843 | .start = cci_pmu_start, | ||
844 | .stop = cci_pmu_stop, | ||
845 | .read = pmu_read, | ||
846 | .attr_groups = pmu_attr_groups, | ||
546 | }; | 847 | }; |
547 | 848 | ||
548 | cci_pmu->plat_device = pdev; | 849 | cci_pmu->plat_device = pdev; |
549 | cci_pmu->num_events = pmu_get_max_counters(); | 850 | cci_pmu->num_events = pmu_get_max_counters(); |
550 | 851 | ||
551 | return armpmu_register(cci_pmu, -1); | 852 | return perf_pmu_register(&cci_pmu->pmu, name, -1); |
552 | } | 853 | } |
553 | 854 | ||
855 | static int cci_pmu_cpu_notifier(struct notifier_block *self, | ||
856 | unsigned long action, void *hcpu) | ||
857 | { | ||
858 | unsigned int cpu = (long)hcpu; | ||
859 | unsigned int target; | ||
860 | |||
861 | switch (action & ~CPU_TASKS_FROZEN) { | ||
862 | case CPU_DOWN_PREPARE: | ||
863 | if (!cpumask_test_and_clear_cpu(cpu, &pmu->cpus)) | ||
864 | break; | ||
865 | target = cpumask_any_but(cpu_online_mask, cpu); | ||
866 | if (target < 0) // UP, last CPU | ||
867 | break; | ||
868 | /* | ||
869 | * TODO: migrate context once core races on event->ctx have | ||
870 | * been fixed. | ||
871 | */ | ||
872 | cpumask_set_cpu(target, &pmu->cpus); | ||
873 | default: | ||
874 | break; | ||
875 | } | ||
876 | |||
877 | return NOTIFY_OK; | ||
878 | } | ||
879 | |||
880 | static struct notifier_block cci_pmu_cpu_nb = { | ||
881 | .notifier_call = cci_pmu_cpu_notifier, | ||
882 | /* | ||
883 | * to migrate uncore events, our notifier should be executed | ||
884 | * before perf core's notifier. | ||
885 | */ | ||
886 | .priority = CPU_PRI_PERF + 1, | ||
887 | }; | ||
888 | |||
554 | static const struct of_device_id arm_cci_pmu_matches[] = { | 889 | static const struct of_device_id arm_cci_pmu_matches[] = { |
555 | { | 890 | { |
556 | .compatible = "arm,cci-400-pmu", | 891 | .compatible = "arm,cci-400-pmu", |
@@ -604,15 +939,16 @@ static int cci_pmu_probe(struct platform_device *pdev) | |||
604 | return -EINVAL; | 939 | return -EINVAL; |
605 | } | 940 | } |
606 | 941 | ||
607 | pmu->cci_pmu = devm_kzalloc(&pdev->dev, sizeof(*(pmu->cci_pmu)), GFP_KERNEL); | ||
608 | if (!pmu->cci_pmu) | ||
609 | return -ENOMEM; | ||
610 | |||
611 | pmu->hw_events.events = pmu->events; | ||
612 | pmu->hw_events.used_mask = pmu->used_mask; | ||
613 | raw_spin_lock_init(&pmu->hw_events.pmu_lock); | 942 | raw_spin_lock_init(&pmu->hw_events.pmu_lock); |
943 | mutex_init(&pmu->reserve_mutex); | ||
944 | atomic_set(&pmu->active_events, 0); | ||
945 | cpumask_set_cpu(smp_processor_id(), &pmu->cpus); | ||
946 | |||
947 | ret = register_cpu_notifier(&cci_pmu_cpu_nb); | ||
948 | if (ret) | ||
949 | return ret; | ||
614 | 950 | ||
615 | ret = cci_pmu_init(pmu->cci_pmu, pdev); | 951 | ret = cci_pmu_init(pmu, pdev); |
616 | if (ret) | 952 | if (ret) |
617 | return ret; | 953 | return ret; |
618 | 954 | ||