aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2009-01-09 04:21:55 -0500
committerPaul Mackerras <paulus@samba.org>2009-01-10 00:32:05 -0500
commit4574910e5087085a1f330ff8373cee4503f5c77c (patch)
treea3bb6c974c2314ca91ee2e3a33a7283187ad19ea
parent93a6d3ce6962044fe9badf528fed46b455d58292 (diff)
powerpc/perf_counter: Add generic support for POWER-family PMU hardware
This provides the architecture-specific functions needed to access PMU hardware on the 64-bit PowerPC processors. It has been designed for the IBM POWER family (POWER 4/4+/5/5+/6 and PPC970) but will hopefully also suit other 64-bit PowerPC machines (although probably not Cell given how different it is in this area). This doesn't include back-ends for any specific processors. This implements a system which allows back-ends to express the constraints that their hardware has on what events can be counted simultaneously. The constraints are expressed as a 64-bit mask + 64-bit value for each event, and the encoding is capable of expressing the constraints arising from having a set of multiplexers feeding an event bus, with some events being available through multiple multiplexer settings, such as we get on POWER4 and PPC970. Furthermore, the back-end can supply alternative event codes for each event, and the constraint checking code will try all possible combinations of alternative event codes to try to find a combination that will fit. Signed-off-by: Paul Mackerras <paulus@samba.org>
-rw-r--r--arch/powerpc/include/asm/perf_counter.h62
-rw-r--r--arch/powerpc/kernel/Makefile1
-rw-r--r--arch/powerpc/kernel/perf_counter.c754
3 files changed, 817 insertions, 0 deletions
diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h
index 59530ae1d53c..9d7ff6d7fb56 100644
--- a/arch/powerpc/include/asm/perf_counter.h
+++ b/arch/powerpc/include/asm/perf_counter.h
@@ -8,3 +8,65 @@
8 * as published by the Free Software Foundation; either version 8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 */ 10 */
11#include <linux/types.h>
12
13#define MAX_HWCOUNTERS 8
14#define MAX_EVENT_ALTERNATIVES 8
15
16/*
17 * This struct provides the constants and functions needed to
18 * describe the PMU on a particular POWER-family CPU.
19 */
20struct power_pmu {
21 int n_counter;
22 int max_alternatives;
23 u64 add_fields;
24 u64 test_adder;
25 int (*compute_mmcr)(unsigned int events[], int n_ev,
26 unsigned int hwc[], u64 mmcr[]);
27 int (*get_constraint)(unsigned int event, u64 *mskp, u64 *valp);
28 int (*get_alternatives)(unsigned int event, unsigned int alt[]);
29 void (*disable_pmc)(unsigned int pmc, u64 mmcr[]);
30 int n_generic;
31 int *generic_events;
32};
33
34extern struct power_pmu *ppmu;
35
36/*
37 * The power_pmu.get_constraint function returns a 64-bit value and
38 * a 64-bit mask that express the constraints between this event and
39 * other events.
40 *
41 * The value and mask are divided up into (non-overlapping) bitfields
42 * of three different types:
43 *
44 * Select field: this expresses the constraint that some set of bits
45 * in MMCR* needs to be set to a specific value for this event. For a
46 * select field, the mask contains 1s in every bit of the field, and
47 * the value contains a unique value for each possible setting of the
48 * MMCR* bits. The constraint checking code will ensure that two events
49 * that set the same field in their masks have the same value in their
50 * value dwords.
51 *
52 * Add field: this expresses the constraint that there can be at most
53 * N events in a particular class. A field of k bits can be used for
54 * N <= 2^(k-1) - 1. The mask has the most significant bit of the field
55 * set (and the other bits 0), and the value has only the least significant
56 * bit of the field set. In addition, the 'add_fields' and 'test_adder'
57 * in the struct power_pmu for this processor come into play. The
58 * add_fields value contains 1 in the LSB of the field, and the
59 * test_adder contains 2^(k-1) - 1 - N in the field.
60 *
61 * NAND field: this expresses the constraint that you may not have events
62 * in all of a set of classes. (For example, on PPC970, you can't select
63 * events from the FPU, ISU and IDU simultaneously, although any two are
64 * possible.) For N classes, the field is N+1 bits wide, and each class
65 * is assigned one bit from the least-significant N bits. The mask has
66 * only the most-significant bit set, and the value has only the bit
67 * for the event's class set. The test_adder has the least significant
68 * bit set in the field.
69 *
70 * If an event is not subject to the constraint expressed by a particular
71 * field, then it will have 0 in both the mask and value for that field.
72 */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 1308a86e9070..fde190bbb2bd 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -94,6 +94,7 @@ obj-$(CONFIG_AUDIT) += audit.o
94obj64-$(CONFIG_AUDIT) += compat_audit.o 94obj64-$(CONFIG_AUDIT) += compat_audit.o
95 95
96obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o 96obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
97obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o
97 98
98obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o 99obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o
99 100
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
new file mode 100644
index 000000000000..c7d4c2966a5c
--- /dev/null
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -0,0 +1,754 @@
1/*
2 * Performance counter support - powerpc architecture code
3 *
4 * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/kernel.h>
12#include <linux/sched.h>
13#include <linux/perf_counter.h>
14#include <linux/percpu.h>
15#include <linux/hardirq.h>
16#include <asm/reg.h>
17#include <asm/pmc.h>
18
19struct cpu_hw_counters {
20 int n_counters;
21 int n_percpu;
22 int disabled;
23 int n_added;
24 struct perf_counter *counter[MAX_HWCOUNTERS];
25 unsigned int events[MAX_HWCOUNTERS];
26 u64 mmcr[3];
27};
28DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters);
29
30struct power_pmu *ppmu;
31
32void perf_counter_print_debug(void)
33{
34}
35
36/*
37 * Return 1 for a software counter, 0 for a hardware counter
38 */
39static inline int is_software_counter(struct perf_counter *counter)
40{
41 return !counter->hw_event.raw && counter->hw_event.type < 0;
42}
43
44/*
45 * Read one performance monitor counter (PMC).
46 */
47static unsigned long read_pmc(int idx)
48{
49 unsigned long val;
50
51 switch (idx) {
52 case 1:
53 val = mfspr(SPRN_PMC1);
54 break;
55 case 2:
56 val = mfspr(SPRN_PMC2);
57 break;
58 case 3:
59 val = mfspr(SPRN_PMC3);
60 break;
61 case 4:
62 val = mfspr(SPRN_PMC4);
63 break;
64 case 5:
65 val = mfspr(SPRN_PMC5);
66 break;
67 case 6:
68 val = mfspr(SPRN_PMC6);
69 break;
70 case 7:
71 val = mfspr(SPRN_PMC7);
72 break;
73 case 8:
74 val = mfspr(SPRN_PMC8);
75 break;
76 default:
77 printk(KERN_ERR "oops trying to read PMC%d\n", idx);
78 val = 0;
79 }
80 return val;
81}
82
83/*
84 * Write one PMC.
85 */
86static void write_pmc(int idx, unsigned long val)
87{
88 switch (idx) {
89 case 1:
90 mtspr(SPRN_PMC1, val);
91 break;
92 case 2:
93 mtspr(SPRN_PMC2, val);
94 break;
95 case 3:
96 mtspr(SPRN_PMC3, val);
97 break;
98 case 4:
99 mtspr(SPRN_PMC4, val);
100 break;
101 case 5:
102 mtspr(SPRN_PMC5, val);
103 break;
104 case 6:
105 mtspr(SPRN_PMC6, val);
106 break;
107 case 7:
108 mtspr(SPRN_PMC7, val);
109 break;
110 case 8:
111 mtspr(SPRN_PMC8, val);
112 break;
113 default:
114 printk(KERN_ERR "oops trying to write PMC%d\n", idx);
115 }
116}
117
118/*
119 * Check if a set of events can all go on the PMU at once.
120 * If they can't, this will look at alternative codes for the events
121 * and see if any combination of alternative codes is feasible.
122 * The feasible set is returned in event[].
123 */
124static int power_check_constraints(unsigned int event[], int n_ev)
125{
126 u64 mask, value, nv;
127 unsigned int alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
128 u64 amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
129 u64 avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
130 u64 smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS];
131 int n_alt[MAX_HWCOUNTERS], choice[MAX_HWCOUNTERS];
132 int i, j;
133 u64 addf = ppmu->add_fields;
134 u64 tadd = ppmu->test_adder;
135
136 if (n_ev > ppmu->n_counter)
137 return -1;
138
139 /* First see if the events will go on as-is */
140 for (i = 0; i < n_ev; ++i) {
141 alternatives[i][0] = event[i];
142 if (ppmu->get_constraint(event[i], &amasks[i][0],
143 &avalues[i][0]))
144 return -1;
145 choice[i] = 0;
146 }
147 value = mask = 0;
148 for (i = 0; i < n_ev; ++i) {
149 nv = (value | avalues[i][0]) + (value & avalues[i][0] & addf);
150 if ((((nv + tadd) ^ value) & mask) != 0 ||
151 (((nv + tadd) ^ avalues[i][0]) & amasks[i][0]) != 0)
152 break;
153 value = nv;
154 mask |= amasks[i][0];
155 }
156 if (i == n_ev)
157 return 0; /* all OK */
158
159 /* doesn't work, gather alternatives... */
160 if (!ppmu->get_alternatives)
161 return -1;
162 for (i = 0; i < n_ev; ++i) {
163 n_alt[i] = ppmu->get_alternatives(event[i], alternatives[i]);
164 for (j = 1; j < n_alt[i]; ++j)
165 ppmu->get_constraint(alternatives[i][j],
166 &amasks[i][j], &avalues[i][j]);
167 }
168
169 /* enumerate all possibilities and see if any will work */
170 i = 0;
171 j = -1;
172 value = mask = nv = 0;
173 while (i < n_ev) {
174 if (j >= 0) {
175 /* we're backtracking, restore context */
176 value = svalues[i];
177 mask = smasks[i];
178 j = choice[i];
179 }
180 /*
181 * See if any alternative k for event i,
182 * where k > j, will satisfy the constraints.
183 */
184 while (++j < n_alt[i]) {
185 nv = (value | avalues[i][j]) +
186 (value & avalues[i][j] & addf);
187 if ((((nv + tadd) ^ value) & mask) == 0 &&
188 (((nv + tadd) ^ avalues[i][j])
189 & amasks[i][j]) == 0)
190 break;
191 }
192 if (j >= n_alt[i]) {
193 /*
194 * No feasible alternative, backtrack
195 * to event i-1 and continue enumerating its
196 * alternatives from where we got up to.
197 */
198 if (--i < 0)
199 return -1;
200 } else {
201 /*
202 * Found a feasible alternative for event i,
203 * remember where we got up to with this event,
204 * go on to the next event, and start with
205 * the first alternative for it.
206 */
207 choice[i] = j;
208 svalues[i] = value;
209 smasks[i] = mask;
210 value = nv;
211 mask |= amasks[i][j];
212 ++i;
213 j = -1;
214 }
215 }
216
217 /* OK, we have a feasible combination, tell the caller the solution */
218 for (i = 0; i < n_ev; ++i)
219 event[i] = alternatives[i][choice[i]];
220 return 0;
221}
222
223static void power_perf_read(struct perf_counter *counter)
224{
225 long val, delta, prev;
226
227 if (!counter->hw.idx)
228 return;
229 /*
230 * Performance monitor interrupts come even when interrupts
231 * are soft-disabled, as long as interrupts are hard-enabled.
232 * Therefore we treat them like NMIs.
233 */
234 do {
235 prev = atomic64_read(&counter->hw.prev_count);
236 barrier();
237 val = read_pmc(counter->hw.idx);
238 } while (atomic64_cmpxchg(&counter->hw.prev_count, prev, val) != prev);
239
240 /* The counters are only 32 bits wide */
241 delta = (val - prev) & 0xfffffffful;
242 atomic64_add(delta, &counter->count);
243 atomic64_sub(delta, &counter->hw.period_left);
244}
245
246/*
247 * Disable all counters to prevent PMU interrupts and to allow
248 * counters to be added or removed.
249 */
250u64 hw_perf_save_disable(void)
251{
252 struct cpu_hw_counters *cpuhw;
253 unsigned long ret;
254 unsigned long flags;
255
256 local_irq_save(flags);
257 cpuhw = &__get_cpu_var(cpu_hw_counters);
258
259 ret = cpuhw->disabled;
260 if (!ret) {
261 cpuhw->disabled = 1;
262 cpuhw->n_added = 0;
263
264 /*
265 * Set the 'freeze counters' bit.
266 * The barrier is to make sure the mtspr has been
267 * executed and the PMU has frozen the counters
268 * before we return.
269 */
270 mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC);
271 mb();
272 }
273 local_irq_restore(flags);
274 return ret;
275}
276
277/*
278 * Re-enable all counters if disable == 0.
279 * If we were previously disabled and counters were added, then
280 * put the new config on the PMU.
281 */
282void hw_perf_restore(u64 disable)
283{
284 struct perf_counter *counter;
285 struct cpu_hw_counters *cpuhw;
286 unsigned long flags;
287 long i;
288 unsigned long val;
289 s64 left;
290 unsigned int hwc_index[MAX_HWCOUNTERS];
291
292 if (disable)
293 return;
294 local_irq_save(flags);
295 cpuhw = &__get_cpu_var(cpu_hw_counters);
296 cpuhw->disabled = 0;
297
298 /*
299 * If we didn't change anything, or only removed counters,
300 * no need to recalculate MMCR* settings and reset the PMCs.
301 * Just reenable the PMU with the current MMCR* settings
302 * (possibly updated for removal of counters).
303 */
304 if (!cpuhw->n_added) {
305 mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
306 mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
307 mtspr(SPRN_MMCR0, cpuhw->mmcr[0]);
308 goto out;
309 }
310
311 /*
312 * Compute MMCR* values for the new set of counters
313 */
314 if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_counters, hwc_index,
315 cpuhw->mmcr)) {
316 /* shouldn't ever get here */
317 printk(KERN_ERR "oops compute_mmcr failed\n");
318 goto out;
319 }
320
321 /*
322 * Write the new configuration to MMCR* with the freeze
323 * bit set and set the hardware counters to their initial values.
324 * Then unfreeze the counters.
325 */
326 mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
327 mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
328 mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
329 | MMCR0_FC);
330
331 /*
332 * Read off any pre-existing counters that need to move
333 * to another PMC.
334 */
335 for (i = 0; i < cpuhw->n_counters; ++i) {
336 counter = cpuhw->counter[i];
337 if (counter->hw.idx && counter->hw.idx != hwc_index[i] + 1) {
338 power_perf_read(counter);
339 write_pmc(counter->hw.idx, 0);
340 counter->hw.idx = 0;
341 }
342 }
343
344 /*
345 * Initialize the PMCs for all the new and moved counters.
346 */
347 for (i = 0; i < cpuhw->n_counters; ++i) {
348 counter = cpuhw->counter[i];
349 if (counter->hw.idx)
350 continue;
351 val = 0;
352 if (counter->hw_event.irq_period) {
353 left = atomic64_read(&counter->hw.period_left);
354 if (left < 0x80000000L)
355 val = 0x80000000L - left;
356 }
357 atomic64_set(&counter->hw.prev_count, val);
358 counter->hw.idx = hwc_index[i] + 1;
359 write_pmc(counter->hw.idx, val);
360 }
361 mb();
362 cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
363 mtspr(SPRN_MMCR0, cpuhw->mmcr[0]);
364
365 out:
366 local_irq_restore(flags);
367}
368
369static int collect_events(struct perf_counter *group, int max_count,
370 struct perf_counter *ctrs[], unsigned int *events)
371{
372 int n = 0;
373 struct perf_counter *counter;
374
375 if (!is_software_counter(group)) {
376 if (n >= max_count)
377 return -1;
378 ctrs[n] = group;
379 events[n++] = group->hw.config;
380 }
381 list_for_each_entry(counter, &group->sibling_list, list_entry) {
382 if (!is_software_counter(counter) &&
383 counter->state != PERF_COUNTER_STATE_OFF) {
384 if (n >= max_count)
385 return -1;
386 ctrs[n] = counter;
387 events[n++] = counter->hw.config;
388 }
389 }
390 return n;
391}
392
393static void counter_sched_in(struct perf_counter *counter, int cpu)
394{
395 counter->state = PERF_COUNTER_STATE_ACTIVE;
396 counter->oncpu = cpu;
397 if (is_software_counter(counter))
398 counter->hw_ops->enable(counter);
399}
400
401/*
402 * Called to enable a whole group of counters.
403 * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
404 * Assumes the caller has disabled interrupts and has
405 * frozen the PMU with hw_perf_save_disable.
406 */
407int hw_perf_group_sched_in(struct perf_counter *group_leader,
408 struct perf_cpu_context *cpuctx,
409 struct perf_counter_context *ctx, int cpu)
410{
411 struct cpu_hw_counters *cpuhw;
412 long i, n, n0;
413 struct perf_counter *sub;
414
415 cpuhw = &__get_cpu_var(cpu_hw_counters);
416 n0 = cpuhw->n_counters;
417 n = collect_events(group_leader, ppmu->n_counter - n0,
418 &cpuhw->counter[n0], &cpuhw->events[n0]);
419 if (n < 0)
420 return -EAGAIN;
421 if (power_check_constraints(cpuhw->events, n + n0))
422 return -EAGAIN;
423 cpuhw->n_counters = n0 + n;
424 cpuhw->n_added += n;
425
426 /*
427 * OK, this group can go on; update counter states etc.,
428 * and enable any software counters
429 */
430 for (i = n0; i < n0 + n; ++i)
431 cpuhw->counter[i]->hw.config = cpuhw->events[i];
432 n = 1;
433 counter_sched_in(group_leader, cpu);
434 list_for_each_entry(sub, &group_leader->sibling_list, list_entry) {
435 if (sub->state != PERF_COUNTER_STATE_OFF) {
436 counter_sched_in(sub, cpu);
437 ++n;
438 }
439 }
440 cpuctx->active_oncpu += n;
441 ctx->nr_active += n;
442
443 return 1;
444}
445
446/*
447 * Add a counter to the PMU.
448 * If all counters are not already frozen, then we disable and
449 * re-enable the PMU in order to get hw_perf_restore to do the
450 * actual work of reconfiguring the PMU.
451 */
452static int power_perf_enable(struct perf_counter *counter)
453{
454 struct cpu_hw_counters *cpuhw;
455 unsigned long flags;
456 u64 pmudis;
457 int n0;
458 int ret = -EAGAIN;
459
460 local_irq_save(flags);
461 pmudis = hw_perf_save_disable();
462
463 /*
464 * Add the counter to the list (if there is room)
465 * and check whether the total set is still feasible.
466 */
467 cpuhw = &__get_cpu_var(cpu_hw_counters);
468 n0 = cpuhw->n_counters;
469 if (n0 >= ppmu->n_counter)
470 goto out;
471 cpuhw->counter[n0] = counter;
472 cpuhw->events[n0] = counter->hw.config;
473 if (power_check_constraints(cpuhw->events, n0 + 1))
474 goto out;
475
476 counter->hw.config = cpuhw->events[n0];
477 ++cpuhw->n_counters;
478 ++cpuhw->n_added;
479
480 ret = 0;
481 out:
482 hw_perf_restore(pmudis);
483 local_irq_restore(flags);
484 return ret;
485}
486
487/*
488 * Remove a counter from the PMU.
489 */
490static void power_perf_disable(struct perf_counter *counter)
491{
492 struct cpu_hw_counters *cpuhw;
493 long i;
494 u64 pmudis;
495 unsigned long flags;
496
497 local_irq_save(flags);
498 pmudis = hw_perf_save_disable();
499
500 power_perf_read(counter);
501
502 cpuhw = &__get_cpu_var(cpu_hw_counters);
503 for (i = 0; i < cpuhw->n_counters; ++i) {
504 if (counter == cpuhw->counter[i]) {
505 while (++i < cpuhw->n_counters)
506 cpuhw->counter[i-1] = cpuhw->counter[i];
507 --cpuhw->n_counters;
508 ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr);
509 write_pmc(counter->hw.idx, 0);
510 counter->hw.idx = 0;
511 break;
512 }
513 }
514 if (cpuhw->n_counters == 0) {
515 /* disable exceptions if no counters are running */
516 cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
517 }
518
519 hw_perf_restore(pmudis);
520 local_irq_restore(flags);
521}
522
523struct hw_perf_counter_ops power_perf_ops = {
524 .enable = power_perf_enable,
525 .disable = power_perf_disable,
526 .read = power_perf_read
527};
528
529const struct hw_perf_counter_ops *
530hw_perf_counter_init(struct perf_counter *counter)
531{
532 unsigned long ev;
533 struct perf_counter *ctrs[MAX_HWCOUNTERS];
534 unsigned int events[MAX_HWCOUNTERS];
535 int n;
536
537 if (!ppmu)
538 return NULL;
539 if ((s64)counter->hw_event.irq_period < 0)
540 return NULL;
541 ev = counter->hw_event.type;
542 if (!counter->hw_event.raw) {
543 if (ev >= ppmu->n_generic ||
544 ppmu->generic_events[ev] == 0)
545 return NULL;
546 ev = ppmu->generic_events[ev];
547 }
548 counter->hw.config_base = ev;
549 counter->hw.idx = 0;
550
551 /*
552 * If this is in a group, check if it can go on with all the
553 * other hardware counters in the group. We assume the counter
554 * hasn't been linked into its leader's sibling list at this point.
555 */
556 n = 0;
557 if (counter->group_leader != counter) {
558 n = collect_events(counter->group_leader, ppmu->n_counter - 1,
559 ctrs, events);
560 if (n < 0)
561 return NULL;
562 }
563 events[n++] = ev;
564 if (power_check_constraints(events, n))
565 return NULL;
566
567 counter->hw.config = events[n - 1];
568 atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period);
569 return &power_perf_ops;
570}
571
572/*
573 * Handle wakeups.
574 */
575void perf_counter_do_pending(void)
576{
577 int i;
578 struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters);
579 struct perf_counter *counter;
580
581 set_perf_counter_pending(0);
582 for (i = 0; i < cpuhw->n_counters; ++i) {
583 counter = cpuhw->counter[i];
584 if (counter && counter->wakeup_pending) {
585 counter->wakeup_pending = 0;
586 wake_up(&counter->waitq);
587 }
588 }
589}
590
591/*
592 * Record data for an irq counter.
593 * This function was lifted from the x86 code; maybe it should
594 * go in the core?
595 */
596static void perf_store_irq_data(struct perf_counter *counter, u64 data)
597{
598 struct perf_data *irqdata = counter->irqdata;
599
600 if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) {
601 irqdata->overrun++;
602 } else {
603 u64 *p = (u64 *) &irqdata->data[irqdata->len];
604
605 *p = data;
606 irqdata->len += sizeof(u64);
607 }
608}
609
610/*
611 * Record all the values of the counters in a group
612 */
613static void perf_handle_group(struct perf_counter *counter)
614{
615 struct perf_counter *leader, *sub;
616
617 leader = counter->group_leader;
618 list_for_each_entry(sub, &leader->sibling_list, list_entry) {
619 if (sub != counter)
620 sub->hw_ops->read(sub);
621 perf_store_irq_data(counter, sub->hw_event.type);
622 perf_store_irq_data(counter, atomic64_read(&sub->count));
623 }
624}
625
626/*
627 * A counter has overflowed; update its count and record
628 * things if requested. Note that interrupts are hard-disabled
629 * here so there is no possibility of being interrupted.
630 */
631static void record_and_restart(struct perf_counter *counter, long val,
632 struct pt_regs *regs)
633{
634 s64 prev, delta, left;
635 int record = 0;
636
637 /* we don't have to worry about interrupts here */
638 prev = atomic64_read(&counter->hw.prev_count);
639 delta = (val - prev) & 0xfffffffful;
640 atomic64_add(delta, &counter->count);
641
642 /*
643 * See if the total period for this counter has expired,
644 * and update for the next period.
645 */
646 val = 0;
647 left = atomic64_read(&counter->hw.period_left) - delta;
648 if (counter->hw_event.irq_period) {
649 if (left <= 0) {
650 left += counter->hw_event.irq_period;
651 if (left <= 0)
652 left = counter->hw_event.irq_period;
653 record = 1;
654 }
655 if (left < 0x80000000L)
656 val = 0x80000000L - left;
657 }
658 write_pmc(counter->hw.idx, val);
659 atomic64_set(&counter->hw.prev_count, val);
660 atomic64_set(&counter->hw.period_left, left);
661
662 /*
663 * Finally record data if requested.
664 */
665 if (record) {
666 switch (counter->hw_event.record_type) {
667 case PERF_RECORD_SIMPLE:
668 break;
669 case PERF_RECORD_IRQ:
670 perf_store_irq_data(counter, instruction_pointer(regs));
671 counter->wakeup_pending = 1;
672 break;
673 case PERF_RECORD_GROUP:
674 perf_handle_group(counter);
675 counter->wakeup_pending = 1;
676 break;
677 }
678 }
679}
680
681/*
682 * Performance monitor interrupt stuff
683 */
684static void perf_counter_interrupt(struct pt_regs *regs)
685{
686 int i;
687 struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters);
688 struct perf_counter *counter;
689 long val;
690 int need_wakeup = 0, found = 0;
691
692 for (i = 0; i < cpuhw->n_counters; ++i) {
693 counter = cpuhw->counter[i];
694 val = read_pmc(counter->hw.idx);
695 if ((int)val < 0) {
696 /* counter has overflowed */
697 found = 1;
698 record_and_restart(counter, val, regs);
699 if (counter->wakeup_pending)
700 need_wakeup = 1;
701 }
702 }
703
704 /*
705 * In case we didn't find and reset the counter that caused
706 * the interrupt, scan all counters and reset any that are
707 * negative, to avoid getting continual interrupts.
708 * Any that we processed in the previous loop will not be negative.
709 */
710 if (!found) {
711 for (i = 0; i < ppmu->n_counter; ++i) {
712 val = read_pmc(i + 1);
713 if ((int)val < 0)
714 write_pmc(i + 1, 0);
715 }
716 }
717
718 /*
719 * Reset MMCR0 to its normal value. This will set PMXE and
720 * clear FC (freeze counters) and PMAO (perf mon alert occurred)
721 * and thus allow interrupts to occur again.
722 * XXX might want to use MSR.PM to keep the counters frozen until
723 * we get back out of this interrupt.
724 */
725 mtspr(SPRN_MMCR0, cpuhw->mmcr[0]);
726
727 /*
728 * If we need a wakeup, check whether interrupts were soft-enabled
729 * when we took the interrupt. If they were, we can wake stuff up
730 * immediately; otherwise we'll have to set a flag and do the
731 * wakeup when interrupts get soft-enabled.
732 */
733 if (need_wakeup) {
734 if (regs->softe) {
735 irq_enter();
736 perf_counter_do_pending();
737 irq_exit();
738 } else {
739 set_perf_counter_pending(1);
740 }
741 }
742}
743
744static int init_perf_counters(void)
745{
746 if (reserve_pmc_hardware(perf_counter_interrupt)) {
747 printk(KERN_ERR "Couldn't init performance monitor subsystem\n");
748 return -EBUSY;
749 }
750
751 return 0;
752}
753
754arch_initcall(init_perf_counters);