aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel/perf_counter.c
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2009-04-29 08:38:51 -0400
committerIngo Molnar <mingo@elte.hu>2009-04-29 08:58:35 -0400
commitab7ef2e50a557af92f4f90689f51fadadafc16b2 (patch)
tree71ef1cbc279e5d2ad96b6c701617ac60ff36c363 /arch/powerpc/kernel/perf_counter.c
parent98144511427c192e4249ff66a3f9debc55c59411 (diff)
perf_counter: powerpc: allow use of limited-function counters
POWER5+ and POWER6 have two hardware counters with limited functionality: PMC5 counts instructions completed in run state and PMC6 counts cycles in run state. (Run state is the state when a hardware RUN bit is 1; the idle task clears RUN while waiting for work to do and sets it when there is work to do.) These counters can't be written to by the kernel, can't generate interrupts, and don't obey the freeze conditions. That means we can only use them for per-task counters (where we know we'll always be in run state; we can't put a per-task counter on an idle task), and only if we don't want interrupts and we do want to count in all processor modes. Obviously some counters can't go on a limited hardware counter, but there are also situations where we can only put a counter on a limited hardware counter - if there are already counters on that exclude some processor modes and we want to put on a per-task cycle or instruction counter that doesn't exclude any processor mode, it could go on if it can use a limited hardware counter. To keep track of these constraints, this adds a flags argument to the processor-specific get_alternatives() functions, with three bits defined: one to say that we can accept alternative event codes that go on limited counters, one to say we only want alternatives on limited counters, and one to say that this is a per-task counter and therefore events that are gated by run state are equivalent to those that aren't (e.g. a "cycles" event is equivalent to a "cycles in run state" event). These flags are computed for each counter and stored in the counter->hw.counter_base field (slightly wonky name for what it does, but it was an existing unused field). Since the limited counters don't freeze when we freeze the other counters, we need some special handling to avoid getting skew between things counted on the limited counters and those counted on normal counters. To minimize this skew, if we are using any limited counters, we read PMC5 and PMC6 immediately after setting and clearing the freeze bit. This is done in a single asm in the new write_mmcr0() function. The code here is specific to PMC5 and PMC6 being the limited hardware counters. Being more general (e.g. having a bitmap of limited hardware counter numbers) would have meant more complex code to read the limited counters when freezing and unfreezing the normal counters, with conditional branches, which would have increased the skew. Since it isn't necessary for the code to be more general at this stage, it isn't. This also extends the back-ends for POWER5+ and POWER6 to be able to handle up to 6 counters rather than the 4 they previously handled. Signed-off-by: Paul Mackerras <paulus@samba.org> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Robert Richter <robert.richter@amd.com> LKML-Reference: <18936.19035.163066.892208@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/powerpc/kernel/perf_counter.c')
-rw-r--r--arch/powerpc/kernel/perf_counter.c297
1 files changed, 262 insertions, 35 deletions
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index d9bbe5efc649..15cdc8e67229 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -23,10 +23,14 @@ struct cpu_hw_counters {
23 int n_percpu; 23 int n_percpu;
24 int disabled; 24 int disabled;
25 int n_added; 25 int n_added;
26 int n_limited;
27 u8 pmcs_enabled;
26 struct perf_counter *counter[MAX_HWCOUNTERS]; 28 struct perf_counter *counter[MAX_HWCOUNTERS];
27 unsigned int events[MAX_HWCOUNTERS]; 29 unsigned int events[MAX_HWCOUNTERS];
30 unsigned int flags[MAX_HWCOUNTERS];
28 u64 mmcr[3]; 31 u64 mmcr[3];
29 u8 pmcs_enabled; 32 struct perf_counter *limited_counter[MAX_LIMITED_HWCOUNTERS];
33 u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS];
30}; 34};
31DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters); 35DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters);
32 36
@@ -127,7 +131,8 @@ static void write_pmc(int idx, unsigned long val)
127 * and see if any combination of alternative codes is feasible. 131 * and see if any combination of alternative codes is feasible.
128 * The feasible set is returned in event[]. 132 * The feasible set is returned in event[].
129 */ 133 */
130static int power_check_constraints(unsigned int event[], int n_ev) 134static int power_check_constraints(unsigned int event[], unsigned int cflags[],
135 int n_ev)
131{ 136{
132 u64 mask, value, nv; 137 u64 mask, value, nv;
133 unsigned int alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; 138 unsigned int alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
@@ -144,11 +149,15 @@ static int power_check_constraints(unsigned int event[], int n_ev)
144 149
145 /* First see if the events will go on as-is */ 150 /* First see if the events will go on as-is */
146 for (i = 0; i < n_ev; ++i) { 151 for (i = 0; i < n_ev; ++i) {
147 alternatives[i][0] = event[i]; 152 if ((cflags[i] & PPMU_LIMITED_PMC_REQD)
153 && !ppmu->limited_pmc_event(event[i])) {
154 ppmu->get_alternatives(event[i], cflags[i],
155 alternatives[i]);
156 event[i] = alternatives[i][0];
157 }
148 if (ppmu->get_constraint(event[i], &amasks[i][0], 158 if (ppmu->get_constraint(event[i], &amasks[i][0],
149 &avalues[i][0])) 159 &avalues[i][0]))
150 return -1; 160 return -1;
151 choice[i] = 0;
152 } 161 }
153 value = mask = 0; 162 value = mask = 0;
154 for (i = 0; i < n_ev; ++i) { 163 for (i = 0; i < n_ev; ++i) {
@@ -166,7 +175,9 @@ static int power_check_constraints(unsigned int event[], int n_ev)
166 if (!ppmu->get_alternatives) 175 if (!ppmu->get_alternatives)
167 return -1; 176 return -1;
168 for (i = 0; i < n_ev; ++i) { 177 for (i = 0; i < n_ev; ++i) {
169 n_alt[i] = ppmu->get_alternatives(event[i], alternatives[i]); 178 choice[i] = 0;
179 n_alt[i] = ppmu->get_alternatives(event[i], cflags[i],
180 alternatives[i]);
170 for (j = 1; j < n_alt[i]; ++j) 181 for (j = 1; j < n_alt[i]; ++j)
171 ppmu->get_constraint(alternatives[i][j], 182 ppmu->get_constraint(alternatives[i][j],
172 &amasks[i][j], &avalues[i][j]); 183 &amasks[i][j], &avalues[i][j]);
@@ -231,28 +242,41 @@ static int power_check_constraints(unsigned int event[], int n_ev)
231 * exclude_{user,kernel,hv} with each other and any previously 242 * exclude_{user,kernel,hv} with each other and any previously
232 * added counters. 243 * added counters.
233 */ 244 */
234static int check_excludes(struct perf_counter **ctrs, int n_prev, int n_new) 245static int check_excludes(struct perf_counter **ctrs, unsigned int cflags[],
246 int n_prev, int n_new)
235{ 247{
236 int eu, ek, eh; 248 int eu = 0, ek = 0, eh = 0;
237 int i, n; 249 int i, n, first;
238 struct perf_counter *counter; 250 struct perf_counter *counter;
239 251
240 n = n_prev + n_new; 252 n = n_prev + n_new;
241 if (n <= 1) 253 if (n <= 1)
242 return 0; 254 return 0;
243 255
244 eu = ctrs[0]->hw_event.exclude_user; 256 first = 1;
245 ek = ctrs[0]->hw_event.exclude_kernel; 257 for (i = 0; i < n; ++i) {
246 eh = ctrs[0]->hw_event.exclude_hv; 258 if (cflags[i] & PPMU_LIMITED_PMC_OK) {
247 if (n_prev == 0) 259 cflags[i] &= ~PPMU_LIMITED_PMC_REQD;
248 n_prev = 1; 260 continue;
249 for (i = n_prev; i < n; ++i) { 261 }
250 counter = ctrs[i]; 262 counter = ctrs[i];
251 if (counter->hw_event.exclude_user != eu || 263 if (first) {
252 counter->hw_event.exclude_kernel != ek || 264 eu = counter->hw_event.exclude_user;
253 counter->hw_event.exclude_hv != eh) 265 ek = counter->hw_event.exclude_kernel;
266 eh = counter->hw_event.exclude_hv;
267 first = 0;
268 } else if (counter->hw_event.exclude_user != eu ||
269 counter->hw_event.exclude_kernel != ek ||
270 counter->hw_event.exclude_hv != eh) {
254 return -EAGAIN; 271 return -EAGAIN;
272 }
255 } 273 }
274
275 if (eu || ek || eh)
276 for (i = 0; i < n; ++i)
277 if (cflags[i] & PPMU_LIMITED_PMC_OK)
278 cflags[i] |= PPMU_LIMITED_PMC_REQD;
279
256 return 0; 280 return 0;
257} 281}
258 282
@@ -280,6 +304,85 @@ static void power_pmu_read(struct perf_counter *counter)
280} 304}
281 305
282/* 306/*
307 * On some machines, PMC5 and PMC6 can't be written, don't respect
308 * the freeze conditions, and don't generate interrupts. This tells
309 * us if `counter' is using such a PMC.
310 */
311static int is_limited_pmc(int pmcnum)
312{
313 return ppmu->limited_pmc5_6 && (pmcnum == 5 || pmcnum == 6);
314}
315
316static void freeze_limited_counters(struct cpu_hw_counters *cpuhw,
317 unsigned long pmc5, unsigned long pmc6)
318{
319 struct perf_counter *counter;
320 u64 val, prev, delta;
321 int i;
322
323 for (i = 0; i < cpuhw->n_limited; ++i) {
324 counter = cpuhw->limited_counter[i];
325 if (!counter->hw.idx)
326 continue;
327 val = (counter->hw.idx == 5) ? pmc5 : pmc6;
328 prev = atomic64_read(&counter->hw.prev_count);
329 counter->hw.idx = 0;
330 delta = (val - prev) & 0xfffffffful;
331 atomic64_add(delta, &counter->count);
332 }
333}
334
335static void thaw_limited_counters(struct cpu_hw_counters *cpuhw,
336 unsigned long pmc5, unsigned long pmc6)
337{
338 struct perf_counter *counter;
339 u64 val;
340 int i;
341
342 for (i = 0; i < cpuhw->n_limited; ++i) {
343 counter = cpuhw->limited_counter[i];
344 counter->hw.idx = cpuhw->limited_hwidx[i];
345 val = (counter->hw.idx == 5) ? pmc5 : pmc6;
346 atomic64_set(&counter->hw.prev_count, val);
347 perf_counter_update_userpage(counter);
348 }
349}
350
351/*
352 * Since limited counters don't respect the freeze conditions, we
353 * have to read them immediately after freezing or unfreezing the
354 * other counters. We try to keep the values from the limited
355 * counters as consistent as possible by keeping the delay (in
356 * cycles and instructions) between freezing/unfreezing and reading
357 * the limited counters as small and consistent as possible.
358 * Therefore, if any limited counters are in use, we read them
359 * both, and always in the same order, to minimize variability,
360 * and do it inside the same asm that writes MMCR0.
361 */
362static void write_mmcr0(struct cpu_hw_counters *cpuhw, unsigned long mmcr0)
363{
364 unsigned long pmc5, pmc6;
365
366 if (!cpuhw->n_limited) {
367 mtspr(SPRN_MMCR0, mmcr0);
368 return;
369 }
370
371 /*
372 * Write MMCR0, then read PMC5 and PMC6 immediately.
373 */
374 asm volatile("mtspr %3,%2; mfspr %0,%4; mfspr %1,%5"
375 : "=&r" (pmc5), "=&r" (pmc6)
376 : "r" (mmcr0), "i" (SPRN_MMCR0),
377 "i" (SPRN_PMC5), "i" (SPRN_PMC6));
378
379 if (mmcr0 & MMCR0_FC)
380 freeze_limited_counters(cpuhw, pmc5, pmc6);
381 else
382 thaw_limited_counters(cpuhw, pmc5, pmc6);
383}
384
385/*
283 * Disable all counters to prevent PMU interrupts and to allow 386 * Disable all counters to prevent PMU interrupts and to allow
284 * counters to be added or removed. 387 * counters to be added or removed.
285 */ 388 */
@@ -321,7 +424,7 @@ u64 hw_perf_save_disable(void)
321 * executed and the PMU has frozen the counters 424 * executed and the PMU has frozen the counters
322 * before we return. 425 * before we return.
323 */ 426 */
324 mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC); 427 write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) | MMCR0_FC);
325 mb(); 428 mb();
326 } 429 }
327 local_irq_restore(flags); 430 local_irq_restore(flags);
@@ -342,6 +445,8 @@ void hw_perf_restore(u64 disable)
342 unsigned long val; 445 unsigned long val;
343 s64 left; 446 s64 left;
344 unsigned int hwc_index[MAX_HWCOUNTERS]; 447 unsigned int hwc_index[MAX_HWCOUNTERS];
448 int n_lim;
449 int idx;
345 450
346 if (disable) 451 if (disable)
347 return; 452 return;
@@ -414,10 +519,18 @@ void hw_perf_restore(u64 disable)
414 /* 519 /*
415 * Initialize the PMCs for all the new and moved counters. 520 * Initialize the PMCs for all the new and moved counters.
416 */ 521 */
522 cpuhw->n_limited = n_lim = 0;
417 for (i = 0; i < cpuhw->n_counters; ++i) { 523 for (i = 0; i < cpuhw->n_counters; ++i) {
418 counter = cpuhw->counter[i]; 524 counter = cpuhw->counter[i];
419 if (counter->hw.idx) 525 if (counter->hw.idx)
420 continue; 526 continue;
527 idx = hwc_index[i] + 1;
528 if (is_limited_pmc(idx)) {
529 cpuhw->limited_counter[n_lim] = counter;
530 cpuhw->limited_hwidx[n_lim] = idx;
531 ++n_lim;
532 continue;
533 }
421 val = 0; 534 val = 0;
422 if (counter->hw_event.irq_period) { 535 if (counter->hw_event.irq_period) {
423 left = atomic64_read(&counter->hw.period_left); 536 left = atomic64_read(&counter->hw.period_left);
@@ -425,15 +538,16 @@ void hw_perf_restore(u64 disable)
425 val = 0x80000000L - left; 538 val = 0x80000000L - left;
426 } 539 }
427 atomic64_set(&counter->hw.prev_count, val); 540 atomic64_set(&counter->hw.prev_count, val);
428 counter->hw.idx = hwc_index[i] + 1; 541 counter->hw.idx = idx;
429 write_pmc(counter->hw.idx, val); 542 write_pmc(idx, val);
430 perf_counter_update_userpage(counter); 543 perf_counter_update_userpage(counter);
431 } 544 }
545 cpuhw->n_limited = n_lim;
432 cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE; 546 cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
433 547
434 out_enable: 548 out_enable:
435 mb(); 549 mb();
436 mtspr(SPRN_MMCR0, cpuhw->mmcr[0]); 550 write_mmcr0(cpuhw, cpuhw->mmcr[0]);
437 551
438 /* 552 /*
439 * Enable instruction sampling if necessary 553 * Enable instruction sampling if necessary
@@ -448,7 +562,8 @@ void hw_perf_restore(u64 disable)
448} 562}
449 563
450static int collect_events(struct perf_counter *group, int max_count, 564static int collect_events(struct perf_counter *group, int max_count,
451 struct perf_counter *ctrs[], unsigned int *events) 565 struct perf_counter *ctrs[], unsigned int *events,
566 unsigned int *flags)
452{ 567{
453 int n = 0; 568 int n = 0;
454 struct perf_counter *counter; 569 struct perf_counter *counter;
@@ -457,6 +572,7 @@ static int collect_events(struct perf_counter *group, int max_count,
457 if (n >= max_count) 572 if (n >= max_count)
458 return -1; 573 return -1;
459 ctrs[n] = group; 574 ctrs[n] = group;
575 flags[n] = group->hw.counter_base;
460 events[n++] = group->hw.config; 576 events[n++] = group->hw.config;
461 } 577 }
462 list_for_each_entry(counter, &group->sibling_list, list_entry) { 578 list_for_each_entry(counter, &group->sibling_list, list_entry) {
@@ -465,6 +581,7 @@ static int collect_events(struct perf_counter *group, int max_count,
465 if (n >= max_count) 581 if (n >= max_count)
466 return -1; 582 return -1;
467 ctrs[n] = counter; 583 ctrs[n] = counter;
584 flags[n] = counter->hw.counter_base;
468 events[n++] = counter->hw.config; 585 events[n++] = counter->hw.config;
469 } 586 }
470 } 587 }
@@ -497,12 +614,14 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader,
497 cpuhw = &__get_cpu_var(cpu_hw_counters); 614 cpuhw = &__get_cpu_var(cpu_hw_counters);
498 n0 = cpuhw->n_counters; 615 n0 = cpuhw->n_counters;
499 n = collect_events(group_leader, ppmu->n_counter - n0, 616 n = collect_events(group_leader, ppmu->n_counter - n0,
500 &cpuhw->counter[n0], &cpuhw->events[n0]); 617 &cpuhw->counter[n0], &cpuhw->events[n0],
618 &cpuhw->flags[n0]);
501 if (n < 0) 619 if (n < 0)
502 return -EAGAIN; 620 return -EAGAIN;
503 if (check_excludes(cpuhw->counter, n0, n)) 621 if (check_excludes(cpuhw->counter, cpuhw->flags, n0, n))
504 return -EAGAIN; 622 return -EAGAIN;
505 if (power_check_constraints(cpuhw->events, n + n0)) 623 i = power_check_constraints(cpuhw->events, cpuhw->flags, n + n0);
624 if (i < 0)
506 return -EAGAIN; 625 return -EAGAIN;
507 cpuhw->n_counters = n0 + n; 626 cpuhw->n_counters = n0 + n;
508 cpuhw->n_added += n; 627 cpuhw->n_added += n;
@@ -554,9 +673,10 @@ static int power_pmu_enable(struct perf_counter *counter)
554 goto out; 673 goto out;
555 cpuhw->counter[n0] = counter; 674 cpuhw->counter[n0] = counter;
556 cpuhw->events[n0] = counter->hw.config; 675 cpuhw->events[n0] = counter->hw.config;
557 if (check_excludes(cpuhw->counter, n0, 1)) 676 cpuhw->flags[n0] = counter->hw.counter_base;
677 if (check_excludes(cpuhw->counter, cpuhw->flags, n0, 1))
558 goto out; 678 goto out;
559 if (power_check_constraints(cpuhw->events, n0 + 1)) 679 if (power_check_constraints(cpuhw->events, cpuhw->flags, n0 + 1))
560 goto out; 680 goto out;
561 681
562 counter->hw.config = cpuhw->events[n0]; 682 counter->hw.config = cpuhw->events[n0];
@@ -592,12 +712,24 @@ static void power_pmu_disable(struct perf_counter *counter)
592 cpuhw->counter[i-1] = cpuhw->counter[i]; 712 cpuhw->counter[i-1] = cpuhw->counter[i];
593 --cpuhw->n_counters; 713 --cpuhw->n_counters;
594 ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr); 714 ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr);
595 write_pmc(counter->hw.idx, 0); 715 if (counter->hw.idx) {
596 counter->hw.idx = 0; 716 write_pmc(counter->hw.idx, 0);
717 counter->hw.idx = 0;
718 }
597 perf_counter_update_userpage(counter); 719 perf_counter_update_userpage(counter);
598 break; 720 break;
599 } 721 }
600 } 722 }
723 for (i = 0; i < cpuhw->n_limited; ++i)
724 if (counter == cpuhw->limited_counter[i])
725 break;
726 if (i < cpuhw->n_limited) {
727 while (++i < cpuhw->n_limited) {
728 cpuhw->limited_counter[i-1] = cpuhw->limited_counter[i];
729 cpuhw->limited_hwidx[i-1] = cpuhw->limited_hwidx[i];
730 }
731 --cpuhw->n_limited;
732 }
601 if (cpuhw->n_counters == 0) { 733 if (cpuhw->n_counters == 0) {
602 /* disable exceptions if no counters are running */ 734 /* disable exceptions if no counters are running */
603 cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); 735 cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
@@ -613,6 +745,61 @@ struct pmu power_pmu = {
613 .read = power_pmu_read, 745 .read = power_pmu_read,
614}; 746};
615 747
748/*
749 * Return 1 if we might be able to put counter on a limited PMC,
750 * or 0 if not.
751 * A counter can only go on a limited PMC if it counts something
752 * that a limited PMC can count, doesn't require interrupts, and
753 * doesn't exclude any processor mode.
754 */
755static int can_go_on_limited_pmc(struct perf_counter *counter, unsigned int ev,
756 unsigned int flags)
757{
758 int n;
759 unsigned int alt[MAX_EVENT_ALTERNATIVES];
760
761 if (counter->hw_event.exclude_user
762 || counter->hw_event.exclude_kernel
763 || counter->hw_event.exclude_hv
764 || counter->hw_event.irq_period)
765 return 0;
766
767 if (ppmu->limited_pmc_event(ev))
768 return 1;
769
770 /*
771 * The requested event isn't on a limited PMC already;
772 * see if any alternative code goes on a limited PMC.
773 */
774 if (!ppmu->get_alternatives)
775 return 0;
776
777 flags |= PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD;
778 n = ppmu->get_alternatives(ev, flags, alt);
779 if (n)
780 return alt[0];
781
782 return 0;
783}
784
785/*
786 * Find an alternative event that goes on a normal PMC, if possible,
787 * and return the event code, or 0 if there is no such alternative.
788 * (Note: event code 0 is "don't count" on all machines.)
789 */
790static unsigned long normal_pmc_alternative(unsigned long ev,
791 unsigned long flags)
792{
793 unsigned int alt[MAX_EVENT_ALTERNATIVES];
794 int n;
795
796 flags &= ~(PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD);
797 n = ppmu->get_alternatives(ev, flags, alt);
798 if (!n)
799 return 0;
800 return alt[0];
801}
802
616/* Number of perf_counters counting hardware events */ 803/* Number of perf_counters counting hardware events */
617static atomic_t num_counters; 804static atomic_t num_counters;
618/* Used to avoid races in calling reserve/release_pmc_hardware */ 805/* Used to avoid races in calling reserve/release_pmc_hardware */
@@ -633,9 +820,10 @@ static void hw_perf_counter_destroy(struct perf_counter *counter)
633 820
634const struct pmu *hw_perf_counter_init(struct perf_counter *counter) 821const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
635{ 822{
636 unsigned long ev; 823 unsigned long ev, flags;
637 struct perf_counter *ctrs[MAX_HWCOUNTERS]; 824 struct perf_counter *ctrs[MAX_HWCOUNTERS];
638 unsigned int events[MAX_HWCOUNTERS]; 825 unsigned int events[MAX_HWCOUNTERS];
826 unsigned int cflags[MAX_HWCOUNTERS];
639 int n; 827 int n;
640 int err; 828 int err;
641 829
@@ -661,7 +849,36 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
661 */ 849 */
662 if (!firmware_has_feature(FW_FEATURE_LPAR)) 850 if (!firmware_has_feature(FW_FEATURE_LPAR))
663 counter->hw_event.exclude_hv = 0; 851 counter->hw_event.exclude_hv = 0;
664 852
853 /*
854 * If this is a per-task counter, then we can use
855 * PM_RUN_* events interchangeably with their non RUN_*
856 * equivalents, e.g. PM_RUN_CYC instead of PM_CYC.
857 * XXX we should check if the task is an idle task.
858 */
859 flags = 0;
860 if (counter->ctx->task)
861 flags |= PPMU_ONLY_COUNT_RUN;
862
863 /*
864 * If this machine has limited counters, check whether this
865 * event could go on a limited counter.
866 */
867 if (ppmu->limited_pmc5_6) {
868 if (can_go_on_limited_pmc(counter, ev, flags)) {
869 flags |= PPMU_LIMITED_PMC_OK;
870 } else if (ppmu->limited_pmc_event(ev)) {
871 /*
872 * The requested event is on a limited PMC,
873 * but we can't use a limited PMC; see if any
874 * alternative goes on a normal PMC.
875 */
876 ev = normal_pmc_alternative(ev, flags);
877 if (!ev)
878 return ERR_PTR(-EINVAL);
879 }
880 }
881
665 /* 882 /*
666 * If this is in a group, check if it can go on with all the 883 * If this is in a group, check if it can go on with all the
667 * other hardware counters in the group. We assume the counter 884 * other hardware counters in the group. We assume the counter
@@ -670,18 +887,20 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
670 n = 0; 887 n = 0;
671 if (counter->group_leader != counter) { 888 if (counter->group_leader != counter) {
672 n = collect_events(counter->group_leader, ppmu->n_counter - 1, 889 n = collect_events(counter->group_leader, ppmu->n_counter - 1,
673 ctrs, events); 890 ctrs, events, cflags);
674 if (n < 0) 891 if (n < 0)
675 return ERR_PTR(-EINVAL); 892 return ERR_PTR(-EINVAL);
676 } 893 }
677 events[n] = ev; 894 events[n] = ev;
678 ctrs[n] = counter; 895 ctrs[n] = counter;
679 if (check_excludes(ctrs, n, 1)) 896 cflags[n] = flags;
897 if (check_excludes(ctrs, cflags, n, 1))
680 return ERR_PTR(-EINVAL); 898 return ERR_PTR(-EINVAL);
681 if (power_check_constraints(events, n + 1)) 899 if (power_check_constraints(events, cflags, n + 1))
682 return ERR_PTR(-EINVAL); 900 return ERR_PTR(-EINVAL);
683 901
684 counter->hw.config = events[n]; 902 counter->hw.config = events[n];
903 counter->hw.counter_base = cflags[n];
685 atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period); 904 atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period);
686 905
687 /* 906 /*
@@ -763,6 +982,10 @@ static void perf_counter_interrupt(struct pt_regs *regs)
763 int found = 0; 982 int found = 0;
764 int nmi; 983 int nmi;
765 984
985 if (cpuhw->n_limited)
986 freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5),
987 mfspr(SPRN_PMC6));
988
766 /* 989 /*
767 * If interrupts were soft-disabled when this PMU interrupt 990 * If interrupts were soft-disabled when this PMU interrupt
768 * occurred, treat it as an NMI. 991 * occurred, treat it as an NMI.
@@ -775,6 +998,8 @@ static void perf_counter_interrupt(struct pt_regs *regs)
775 998
776 for (i = 0; i < cpuhw->n_counters; ++i) { 999 for (i = 0; i < cpuhw->n_counters; ++i) {
777 counter = cpuhw->counter[i]; 1000 counter = cpuhw->counter[i];
1001 if (is_limited_pmc(counter->hw.idx))
1002 continue;
778 val = read_pmc(counter->hw.idx); 1003 val = read_pmc(counter->hw.idx);
779 if ((int)val < 0) { 1004 if ((int)val < 0) {
780 /* counter has overflowed */ 1005 /* counter has overflowed */
@@ -791,6 +1016,8 @@ static void perf_counter_interrupt(struct pt_regs *regs)
791 */ 1016 */
792 if (!found) { 1017 if (!found) {
793 for (i = 0; i < ppmu->n_counter; ++i) { 1018 for (i = 0; i < ppmu->n_counter; ++i) {
1019 if (is_limited_pmc(i + 1))
1020 continue;
794 val = read_pmc(i + 1); 1021 val = read_pmc(i + 1);
795 if ((int)val < 0) 1022 if ((int)val < 0)
796 write_pmc(i + 1, 0); 1023 write_pmc(i + 1, 0);
@@ -804,7 +1031,7 @@ static void perf_counter_interrupt(struct pt_regs *regs)
804 * XXX might want to use MSR.PM to keep the counters frozen until 1031 * XXX might want to use MSR.PM to keep the counters frozen until
805 * we get back out of this interrupt. 1032 * we get back out of this interrupt.
806 */ 1033 */
807 mtspr(SPRN_MMCR0, cpuhw->mmcr[0]); 1034 write_mmcr0(cpuhw, cpuhw->mmcr[0]);
808 1035
809 if (nmi) 1036 if (nmi)
810 nmi_exit(); 1037 nmi_exit();