aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm
diff options
context:
space:
mode:
authorMark Rutland <mark.rutland@arm.com>2014-05-13 14:46:10 -0400
committerWill Deacon <will.deacon@arm.com>2014-10-30 08:17:00 -0400
commit5ebd92003494a19ac5246ae385c073be16de1144 (patch)
tree8c9367b21854ea92a92a355978523a33abf48a3c /arch/arm
parent116792508607002896b706fbad8310419fcc5742 (diff)
arm: perf: fold percpu_pmu into pmu_hw_events
Currently the percpu_pmu pointers used as percpu_irq dev_id values are defined separately from the other per-cpu accounting data, which make dynamically allocating the data (as will be required for systems with heterogeneous CPUs) difficult. This patch moves the percpu_pmu pointers into pmu_hw_events (which is itself allocated per cpu), which will allow for easier dynamic allocation. Both percpu and regular irqs are requested using percpu_pmu pointers as tokens, freeing us from having to know whether an irq is percpu within the handler, and thus avoiding a radix tree lookup on the handler path. Signed-off-by: Mark Rutland <mark.rutland@arm.com> Reviewed-by: Will Deacon <will.deacon@arm.com> Reviewed-by: Stephen Boyd <sboyd@codeaurora.org> Tested-by: Stephen Boyd <sboyd@codeaurora.org> Signed-off-by: Will Deacon <will.deacon@arm.com>
Diffstat (limited to 'arch/arm')
-rw-r--r--arch/arm/include/asm/pmu.h6
-rw-r--r--arch/arm/kernel/perf_event.c14
-rw-r--r--arch/arm/kernel/perf_event_cpu.c14
3 files changed, 23 insertions, 11 deletions
diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h
index f273dd2285a1..cc0149835507 100644
--- a/arch/arm/include/asm/pmu.h
+++ b/arch/arm/include/asm/pmu.h
@@ -81,6 +81,12 @@ struct pmu_hw_events {
81 * read/modify/write sequences. 81 * read/modify/write sequences.
82 */ 82 */
83 raw_spinlock_t pmu_lock; 83 raw_spinlock_t pmu_lock;
84
85 /*
86 * When using percpu IRQs, we need a percpu dev_id. Place it here as we
87 * already have to allocate this struct per cpu.
88 */
89 struct arm_pmu *percpu_pmu;
84}; 90};
85 91
86struct arm_pmu { 92struct arm_pmu {
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 05ac5ee6e2bb..e34934f63a49 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -304,17 +304,21 @@ static irqreturn_t armpmu_dispatch_irq(int irq, void *dev)
304 int ret; 304 int ret;
305 u64 start_clock, finish_clock; 305 u64 start_clock, finish_clock;
306 306
307 if (irq_is_percpu(irq)) 307 /*
308 dev = *(void **)dev; 308 * we request the IRQ with a (possibly percpu) struct arm_pmu**, but
309 armpmu = dev; 309 * the handlers expect a struct arm_pmu*. The percpu_irq framework will
310 * do any necessary shifting, we just need to perform the first
311 * dereference.
312 */
313 armpmu = *(void **)dev;
310 plat_device = armpmu->plat_device; 314 plat_device = armpmu->plat_device;
311 plat = dev_get_platdata(&plat_device->dev); 315 plat = dev_get_platdata(&plat_device->dev);
312 316
313 start_clock = sched_clock(); 317 start_clock = sched_clock();
314 if (plat && plat->handle_irq) 318 if (plat && plat->handle_irq)
315 ret = plat->handle_irq(irq, dev, armpmu->handle_irq); 319 ret = plat->handle_irq(irq, armpmu, armpmu->handle_irq);
316 else 320 else
317 ret = armpmu->handle_irq(irq, dev); 321 ret = armpmu->handle_irq(irq, armpmu);
318 finish_clock = sched_clock(); 322 finish_clock = sched_clock();
319 323
320 perf_sample_event_took(finish_clock - start_clock); 324 perf_sample_event_took(finish_clock - start_clock);
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index fd24ad84dba6..b9391fa2368d 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -35,7 +35,6 @@
35/* Set at runtime when we know what CPU type we are. */ 35/* Set at runtime when we know what CPU type we are. */
36static struct arm_pmu *cpu_pmu; 36static struct arm_pmu *cpu_pmu;
37 37
38static DEFINE_PER_CPU(struct arm_pmu *, percpu_pmu);
39static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events); 38static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events);
40 39
41/* 40/*
@@ -85,20 +84,21 @@ static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu)
85{ 84{
86 int i, irq, irqs; 85 int i, irq, irqs;
87 struct platform_device *pmu_device = cpu_pmu->plat_device; 86 struct platform_device *pmu_device = cpu_pmu->plat_device;
87 struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events;
88 88
89 irqs = min(pmu_device->num_resources, num_possible_cpus()); 89 irqs = min(pmu_device->num_resources, num_possible_cpus());
90 90
91 irq = platform_get_irq(pmu_device, 0); 91 irq = platform_get_irq(pmu_device, 0);
92 if (irq >= 0 && irq_is_percpu(irq)) { 92 if (irq >= 0 && irq_is_percpu(irq)) {
93 on_each_cpu(cpu_pmu_disable_percpu_irq, &irq, 1); 93 on_each_cpu(cpu_pmu_disable_percpu_irq, &irq, 1);
94 free_percpu_irq(irq, &percpu_pmu); 94 free_percpu_irq(irq, &hw_events->percpu_pmu);
95 } else { 95 } else {
96 for (i = 0; i < irqs; ++i) { 96 for (i = 0; i < irqs; ++i) {
97 if (!cpumask_test_and_clear_cpu(i, &cpu_pmu->active_irqs)) 97 if (!cpumask_test_and_clear_cpu(i, &cpu_pmu->active_irqs))
98 continue; 98 continue;
99 irq = platform_get_irq(pmu_device, i); 99 irq = platform_get_irq(pmu_device, i);
100 if (irq >= 0) 100 if (irq >= 0)
101 free_irq(irq, cpu_pmu); 101 free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, i));
102 } 102 }
103 } 103 }
104} 104}
@@ -107,6 +107,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
107{ 107{
108 int i, err, irq, irqs; 108 int i, err, irq, irqs;
109 struct platform_device *pmu_device = cpu_pmu->plat_device; 109 struct platform_device *pmu_device = cpu_pmu->plat_device;
110 struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events;
110 111
111 if (!pmu_device) 112 if (!pmu_device)
112 return -ENODEV; 113 return -ENODEV;
@@ -119,7 +120,8 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
119 120
120 irq = platform_get_irq(pmu_device, 0); 121 irq = platform_get_irq(pmu_device, 0);
121 if (irq >= 0 && irq_is_percpu(irq)) { 122 if (irq >= 0 && irq_is_percpu(irq)) {
122 err = request_percpu_irq(irq, handler, "arm-pmu", &percpu_pmu); 123 err = request_percpu_irq(irq, handler, "arm-pmu",
124 &hw_events->percpu_pmu);
123 if (err) { 125 if (err) {
124 pr_err("unable to request IRQ%d for ARM PMU counters\n", 126 pr_err("unable to request IRQ%d for ARM PMU counters\n",
125 irq); 127 irq);
@@ -146,7 +148,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
146 148
147 err = request_irq(irq, handler, 149 err = request_irq(irq, handler,
148 IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu", 150 IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu",
149 cpu_pmu); 151 per_cpu_ptr(&hw_events->percpu_pmu, i));
150 if (err) { 152 if (err) {
151 pr_err("unable to request IRQ%d for ARM PMU counters\n", 153 pr_err("unable to request IRQ%d for ARM PMU counters\n",
152 irq); 154 irq);
@@ -166,7 +168,7 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
166 for_each_possible_cpu(cpu) { 168 for_each_possible_cpu(cpu) {
167 struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu); 169 struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu);
168 raw_spin_lock_init(&events->pmu_lock); 170 raw_spin_lock_init(&events->pmu_lock);
169 per_cpu(percpu_pmu, cpu) = cpu_pmu; 171 events->percpu_pmu = cpu_pmu;
170 } 172 }
171 173
172 cpu_pmu->hw_events = &cpu_hw_events; 174 cpu_pmu->hw_events = &cpu_hw_events;