aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2009-04-29 08:38:51 -0400
committerIngo Molnar <mingo@elte.hu>2009-04-29 08:58:35 -0400
commitab7ef2e50a557af92f4f90689f51fadadafc16b2 (patch)
tree71ef1cbc279e5d2ad96b6c701617ac60ff36c363 /arch/powerpc/kernel
parent98144511427c192e4249ff66a3f9debc55c59411 (diff)
perf_counter: powerpc: allow use of limited-function counters
POWER5+ and POWER6 have two hardware counters with limited functionality: PMC5 counts instructions completed in run state and PMC6 counts cycles in run state. (Run state is the state when a hardware RUN bit is 1; the idle task clears RUN while waiting for work to do and sets it when there is work to do.) These counters can't be written to by the kernel, can't generate interrupts, and don't obey the freeze conditions. That means we can only use them for per-task counters (where we know we'll always be in run state; we can't put a per-task counter on an idle task), and only if we don't want interrupts and we do want to count in all processor modes. Obviously some counters can't go on a limited hardware counter, but there are also situations where we can only put a counter on a limited hardware counter - if there are already counters on that exclude some processor modes and we want to put on a per-task cycle or instruction counter that doesn't exclude any processor mode, it could go on if it can use a limited hardware counter. To keep track of these constraints, this adds a flags argument to the processor-specific get_alternatives() functions, with three bits defined: one to say that we can accept alternative event codes that go on limited counters, one to say we only want alternatives on limited counters, and one to say that this is a per-task counter and therefore events that are gated by run state are equivalent to those that aren't (e.g. a "cycles" event is equivalent to a "cycles in run state" event). These flags are computed for each counter and stored in the counter->hw.counter_base field (slightly wonky name for what it does, but it was an existing unused field). Since the limited counters don't freeze when we freeze the other counters, we need some special handling to avoid getting skew between things counted on the limited counters and those counted on normal counters. To minimize this skew, if we are using any limited counters, we read PMC5 and PMC6 immediately after setting and clearing the freeze bit. This is done in a single asm in the new write_mmcr0() function. The code here is specific to PMC5 and PMC6 being the limited hardware counters. Being more general (e.g. having a bitmap of limited hardware counter numbers) would have meant more complex code to read the limited counters when freezing and unfreezing the normal counters, with conditional branches, which would have increased the skew. Since it isn't necessary for the code to be more general at this stage, it isn't. This also extends the back-ends for POWER5+ and POWER6 to be able to handle up to 6 counters rather than the 4 they previously handled. Signed-off-by: Paul Mackerras <paulus@samba.org> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Robert Richter <robert.richter@amd.com> LKML-Reference: <18936.19035.163066.892208@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/perf_counter.c297
-rw-r--r--arch/powerpc/kernel/power4-pmu.c3
-rw-r--r--arch/powerpc/kernel/power5+-pmu.c117
-rw-r--r--arch/powerpc/kernel/power5-pmu.c3
-rw-r--r--arch/powerpc/kernel/power6-pmu.c119
-rw-r--r--arch/powerpc/kernel/ppc970-pmu.c3
6 files changed, 467 insertions, 75 deletions
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index d9bbe5efc649..15cdc8e67229 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -23,10 +23,14 @@ struct cpu_hw_counters {
23 int n_percpu; 23 int n_percpu;
24 int disabled; 24 int disabled;
25 int n_added; 25 int n_added;
26 int n_limited;
27 u8 pmcs_enabled;
26 struct perf_counter *counter[MAX_HWCOUNTERS]; 28 struct perf_counter *counter[MAX_HWCOUNTERS];
27 unsigned int events[MAX_HWCOUNTERS]; 29 unsigned int events[MAX_HWCOUNTERS];
30 unsigned int flags[MAX_HWCOUNTERS];
28 u64 mmcr[3]; 31 u64 mmcr[3];
29 u8 pmcs_enabled; 32 struct perf_counter *limited_counter[MAX_LIMITED_HWCOUNTERS];
33 u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS];
30}; 34};
31DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters); 35DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters);
32 36
@@ -127,7 +131,8 @@ static void write_pmc(int idx, unsigned long val)
127 * and see if any combination of alternative codes is feasible. 131 * and see if any combination of alternative codes is feasible.
128 * The feasible set is returned in event[]. 132 * The feasible set is returned in event[].
129 */ 133 */
130static int power_check_constraints(unsigned int event[], int n_ev) 134static int power_check_constraints(unsigned int event[], unsigned int cflags[],
135 int n_ev)
131{ 136{
132 u64 mask, value, nv; 137 u64 mask, value, nv;
133 unsigned int alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; 138 unsigned int alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
@@ -144,11 +149,15 @@ static int power_check_constraints(unsigned int event[], int n_ev)
144 149
145 /* First see if the events will go on as-is */ 150 /* First see if the events will go on as-is */
146 for (i = 0; i < n_ev; ++i) { 151 for (i = 0; i < n_ev; ++i) {
147 alternatives[i][0] = event[i]; 152 if ((cflags[i] & PPMU_LIMITED_PMC_REQD)
153 && !ppmu->limited_pmc_event(event[i])) {
154 ppmu->get_alternatives(event[i], cflags[i],
155 alternatives[i]);
156 event[i] = alternatives[i][0];
157 }
148 if (ppmu->get_constraint(event[i], &amasks[i][0], 158 if (ppmu->get_constraint(event[i], &amasks[i][0],
149 &avalues[i][0])) 159 &avalues[i][0]))
150 return -1; 160 return -1;
151 choice[i] = 0;
152 } 161 }
153 value = mask = 0; 162 value = mask = 0;
154 for (i = 0; i < n_ev; ++i) { 163 for (i = 0; i < n_ev; ++i) {
@@ -166,7 +175,9 @@ static int power_check_constraints(unsigned int event[], int n_ev)
166 if (!ppmu->get_alternatives) 175 if (!ppmu->get_alternatives)
167 return -1; 176 return -1;
168 for (i = 0; i < n_ev; ++i) { 177 for (i = 0; i < n_ev; ++i) {
169 n_alt[i] = ppmu->get_alternatives(event[i], alternatives[i]); 178 choice[i] = 0;
179 n_alt[i] = ppmu->get_alternatives(event[i], cflags[i],
180 alternatives[i]);
170 for (j = 1; j < n_alt[i]; ++j) 181 for (j = 1; j < n_alt[i]; ++j)
171 ppmu->get_constraint(alternatives[i][j], 182 ppmu->get_constraint(alternatives[i][j],
172 &amasks[i][j], &avalues[i][j]); 183 &amasks[i][j], &avalues[i][j]);
@@ -231,28 +242,41 @@ static int power_check_constraints(unsigned int event[], int n_ev)
231 * exclude_{user,kernel,hv} with each other and any previously 242 * exclude_{user,kernel,hv} with each other and any previously
232 * added counters. 243 * added counters.
233 */ 244 */
234static int check_excludes(struct perf_counter **ctrs, int n_prev, int n_new) 245static int check_excludes(struct perf_counter **ctrs, unsigned int cflags[],
246 int n_prev, int n_new)
235{ 247{
236 int eu, ek, eh; 248 int eu = 0, ek = 0, eh = 0;
237 int i, n; 249 int i, n, first;
238 struct perf_counter *counter; 250 struct perf_counter *counter;
239 251
240 n = n_prev + n_new; 252 n = n_prev + n_new;
241 if (n <= 1) 253 if (n <= 1)
242 return 0; 254 return 0;
243 255
244 eu = ctrs[0]->hw_event.exclude_user; 256 first = 1;
245 ek = ctrs[0]->hw_event.exclude_kernel; 257 for (i = 0; i < n; ++i) {
246 eh = ctrs[0]->hw_event.exclude_hv; 258 if (cflags[i] & PPMU_LIMITED_PMC_OK) {
247 if (n_prev == 0) 259 cflags[i] &= ~PPMU_LIMITED_PMC_REQD;
248 n_prev = 1; 260 continue;
249 for (i = n_prev; i < n; ++i) { 261 }
250 counter = ctrs[i]; 262 counter = ctrs[i];
251 if (counter->hw_event.exclude_user != eu || 263 if (first) {
252 counter->hw_event.exclude_kernel != ek || 264 eu = counter->hw_event.exclude_user;
253 counter->hw_event.exclude_hv != eh) 265 ek = counter->hw_event.exclude_kernel;
266 eh = counter->hw_event.exclude_hv;
267 first = 0;
268 } else if (counter->hw_event.exclude_user != eu ||
269 counter->hw_event.exclude_kernel != ek ||
270 counter->hw_event.exclude_hv != eh) {
254 return -EAGAIN; 271 return -EAGAIN;
272 }
255 } 273 }
274
275 if (eu || ek || eh)
276 for (i = 0; i < n; ++i)
277 if (cflags[i] & PPMU_LIMITED_PMC_OK)
278 cflags[i] |= PPMU_LIMITED_PMC_REQD;
279
256 return 0; 280 return 0;
257} 281}
258 282
@@ -280,6 +304,85 @@ static void power_pmu_read(struct perf_counter *counter)
280} 304}
281 305
282/* 306/*
307 * On some machines, PMC5 and PMC6 can't be written, don't respect
308 * the freeze conditions, and don't generate interrupts. This tells
309 * us if `counter' is using such a PMC.
310 */
311static int is_limited_pmc(int pmcnum)
312{
313 return ppmu->limited_pmc5_6 && (pmcnum == 5 || pmcnum == 6);
314}
315
316static void freeze_limited_counters(struct cpu_hw_counters *cpuhw,
317 unsigned long pmc5, unsigned long pmc6)
318{
319 struct perf_counter *counter;
320 u64 val, prev, delta;
321 int i;
322
323 for (i = 0; i < cpuhw->n_limited; ++i) {
324 counter = cpuhw->limited_counter[i];
325 if (!counter->hw.idx)
326 continue;
327 val = (counter->hw.idx == 5) ? pmc5 : pmc6;
328 prev = atomic64_read(&counter->hw.prev_count);
329 counter->hw.idx = 0;
330 delta = (val - prev) & 0xfffffffful;
331 atomic64_add(delta, &counter->count);
332 }
333}
334
335static void thaw_limited_counters(struct cpu_hw_counters *cpuhw,
336 unsigned long pmc5, unsigned long pmc6)
337{
338 struct perf_counter *counter;
339 u64 val;
340 int i;
341
342 for (i = 0; i < cpuhw->n_limited; ++i) {
343 counter = cpuhw->limited_counter[i];
344 counter->hw.idx = cpuhw->limited_hwidx[i];
345 val = (counter->hw.idx == 5) ? pmc5 : pmc6;
346 atomic64_set(&counter->hw.prev_count, val);
347 perf_counter_update_userpage(counter);
348 }
349}
350
351/*
352 * Since limited counters don't respect the freeze conditions, we
353 * have to read them immediately after freezing or unfreezing the
354 * other counters. We try to keep the values from the limited
355 * counters as consistent as possible by keeping the delay (in
356 * cycles and instructions) between freezing/unfreezing and reading
357 * the limited counters as small and consistent as possible.
358 * Therefore, if any limited counters are in use, we read them
359 * both, and always in the same order, to minimize variability,
360 * and do it inside the same asm that writes MMCR0.
361 */
362static void write_mmcr0(struct cpu_hw_counters *cpuhw, unsigned long mmcr0)
363{
364 unsigned long pmc5, pmc6;
365
366 if (!cpuhw->n_limited) {
367 mtspr(SPRN_MMCR0, mmcr0);
368 return;
369 }
370
371 /*
372 * Write MMCR0, then read PMC5 and PMC6 immediately.
373 */
374 asm volatile("mtspr %3,%2; mfspr %0,%4; mfspr %1,%5"
375 : "=&r" (pmc5), "=&r" (pmc6)
376 : "r" (mmcr0), "i" (SPRN_MMCR0),
377 "i" (SPRN_PMC5), "i" (SPRN_PMC6));
378
379 if (mmcr0 & MMCR0_FC)
380 freeze_limited_counters(cpuhw, pmc5, pmc6);
381 else
382 thaw_limited_counters(cpuhw, pmc5, pmc6);
383}
384
385/*
283 * Disable all counters to prevent PMU interrupts and to allow 386 * Disable all counters to prevent PMU interrupts and to allow
284 * counters to be added or removed. 387 * counters to be added or removed.
285 */ 388 */
@@ -321,7 +424,7 @@ u64 hw_perf_save_disable(void)
321 * executed and the PMU has frozen the counters 424 * executed and the PMU has frozen the counters
322 * before we return. 425 * before we return.
323 */ 426 */
324 mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC); 427 write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) | MMCR0_FC);
325 mb(); 428 mb();
326 } 429 }
327 local_irq_restore(flags); 430 local_irq_restore(flags);
@@ -342,6 +445,8 @@ void hw_perf_restore(u64 disable)
342 unsigned long val; 445 unsigned long val;
343 s64 left; 446 s64 left;
344 unsigned int hwc_index[MAX_HWCOUNTERS]; 447 unsigned int hwc_index[MAX_HWCOUNTERS];
448 int n_lim;
449 int idx;
345 450
346 if (disable) 451 if (disable)
347 return; 452 return;
@@ -414,10 +519,18 @@ void hw_perf_restore(u64 disable)
414 /* 519 /*
415 * Initialize the PMCs for all the new and moved counters. 520 * Initialize the PMCs for all the new and moved counters.
416 */ 521 */
522 cpuhw->n_limited = n_lim = 0;
417 for (i = 0; i < cpuhw->n_counters; ++i) { 523 for (i = 0; i < cpuhw->n_counters; ++i) {
418 counter = cpuhw->counter[i]; 524 counter = cpuhw->counter[i];
419 if (counter->hw.idx) 525 if (counter->hw.idx)
420 continue; 526 continue;
527 idx = hwc_index[i] + 1;
528 if (is_limited_pmc(idx)) {
529 cpuhw->limited_counter[n_lim] = counter;
530 cpuhw->limited_hwidx[n_lim] = idx;
531 ++n_lim;
532 continue;
533 }
421 val = 0; 534 val = 0;
422 if (counter->hw_event.irq_period) { 535 if (counter->hw_event.irq_period) {
423 left = atomic64_read(&counter->hw.period_left); 536 left = atomic64_read(&counter->hw.period_left);
@@ -425,15 +538,16 @@ void hw_perf_restore(u64 disable)
425 val = 0x80000000L - left; 538 val = 0x80000000L - left;
426 } 539 }
427 atomic64_set(&counter->hw.prev_count, val); 540 atomic64_set(&counter->hw.prev_count, val);
428 counter->hw.idx = hwc_index[i] + 1; 541 counter->hw.idx = idx;
429 write_pmc(counter->hw.idx, val); 542 write_pmc(idx, val);
430 perf_counter_update_userpage(counter); 543 perf_counter_update_userpage(counter);
431 } 544 }
545 cpuhw->n_limited = n_lim;
432 cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE; 546 cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
433 547
434 out_enable: 548 out_enable:
435 mb(); 549 mb();
436 mtspr(SPRN_MMCR0, cpuhw->mmcr[0]); 550 write_mmcr0(cpuhw, cpuhw->mmcr[0]);
437 551
438 /* 552 /*
439 * Enable instruction sampling if necessary 553 * Enable instruction sampling if necessary
@@ -448,7 +562,8 @@ void hw_perf_restore(u64 disable)
448} 562}
449 563
450static int collect_events(struct perf_counter *group, int max_count, 564static int collect_events(struct perf_counter *group, int max_count,
451 struct perf_counter *ctrs[], unsigned int *events) 565 struct perf_counter *ctrs[], unsigned int *events,
566 unsigned int *flags)
452{ 567{
453 int n = 0; 568 int n = 0;
454 struct perf_counter *counter; 569 struct perf_counter *counter;
@@ -457,6 +572,7 @@ static int collect_events(struct perf_counter *group, int max_count,
457 if (n >= max_count) 572 if (n >= max_count)
458 return -1; 573 return -1;
459 ctrs[n] = group; 574 ctrs[n] = group;
575 flags[n] = group->hw.counter_base;
460 events[n++] = group->hw.config; 576 events[n++] = group->hw.config;
461 } 577 }
462 list_for_each_entry(counter, &group->sibling_list, list_entry) { 578 list_for_each_entry(counter, &group->sibling_list, list_entry) {
@@ -465,6 +581,7 @@ static int collect_events(struct perf_counter *group, int max_count,
465 if (n >= max_count) 581 if (n >= max_count)
466 return -1; 582 return -1;
467 ctrs[n] = counter; 583 ctrs[n] = counter;
584 flags[n] = counter->hw.counter_base;
468 events[n++] = counter->hw.config; 585 events[n++] = counter->hw.config;
469 } 586 }
470 } 587 }
@@ -497,12 +614,14 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader,
497 cpuhw = &__get_cpu_var(cpu_hw_counters); 614 cpuhw = &__get_cpu_var(cpu_hw_counters);
498 n0 = cpuhw->n_counters; 615 n0 = cpuhw->n_counters;
499 n = collect_events(group_leader, ppmu->n_counter - n0, 616 n = collect_events(group_leader, ppmu->n_counter - n0,
500 &cpuhw->counter[n0], &cpuhw->events[n0]); 617 &cpuhw->counter[n0], &cpuhw->events[n0],
618 &cpuhw->flags[n0]);
501 if (n < 0) 619 if (n < 0)
502 return -EAGAIN; 620 return -EAGAIN;
503 if (check_excludes(cpuhw->counter, n0, n)) 621 if (check_excludes(cpuhw->counter, cpuhw->flags, n0, n))
504 return -EAGAIN; 622 return -EAGAIN;
505 if (power_check_constraints(cpuhw->events, n + n0)) 623 i = power_check_constraints(cpuhw->events, cpuhw->flags, n + n0);
624 if (i < 0)
506 return -EAGAIN; 625 return -EAGAIN;
507 cpuhw->n_counters = n0 + n; 626 cpuhw->n_counters = n0 + n;
508 cpuhw->n_added += n; 627 cpuhw->n_added += n;
@@ -554,9 +673,10 @@ static int power_pmu_enable(struct perf_counter *counter)
554 goto out; 673 goto out;
555 cpuhw->counter[n0] = counter; 674 cpuhw->counter[n0] = counter;
556 cpuhw->events[n0] = counter->hw.config; 675 cpuhw->events[n0] = counter->hw.config;
557 if (check_excludes(cpuhw->counter, n0, 1)) 676 cpuhw->flags[n0] = counter->hw.counter_base;
677 if (check_excludes(cpuhw->counter, cpuhw->flags, n0, 1))
558 goto out; 678 goto out;
559 if (power_check_constraints(cpuhw->events, n0 + 1)) 679 if (power_check_constraints(cpuhw->events, cpuhw->flags, n0 + 1))
560 goto out; 680 goto out;
561 681
562 counter->hw.config = cpuhw->events[n0]; 682 counter->hw.config = cpuhw->events[n0];
@@ -592,12 +712,24 @@ static void power_pmu_disable(struct perf_counter *counter)
592 cpuhw->counter[i-1] = cpuhw->counter[i]; 712 cpuhw->counter[i-1] = cpuhw->counter[i];
593 --cpuhw->n_counters; 713 --cpuhw->n_counters;
594 ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr); 714 ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr);
595 write_pmc(counter->hw.idx, 0); 715 if (counter->hw.idx) {
596 counter->hw.idx = 0; 716 write_pmc(counter->hw.idx, 0);
717 counter->hw.idx = 0;
718 }
597 perf_counter_update_userpage(counter); 719 perf_counter_update_userpage(counter);
598 break; 720 break;
599 } 721 }
600 } 722 }
723 for (i = 0; i < cpuhw->n_limited; ++i)
724 if (counter == cpuhw->limited_counter[i])
725 break;
726 if (i < cpuhw->n_limited) {
727 while (++i < cpuhw->n_limited) {
728 cpuhw->limited_counter[i-1] = cpuhw->limited_counter[i];
729 cpuhw->limited_hwidx[i-1] = cpuhw->limited_hwidx[i];
730 }
731 --cpuhw->n_limited;
732 }
601 if (cpuhw->n_counters == 0) { 733 if (cpuhw->n_counters == 0) {
602 /* disable exceptions if no counters are running */ 734 /* disable exceptions if no counters are running */
603 cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); 735 cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
@@ -613,6 +745,61 @@ struct pmu power_pmu = {
613 .read = power_pmu_read, 745 .read = power_pmu_read,
614}; 746};
615 747
748/*
749 * Return 1 if we might be able to put counter on a limited PMC,
750 * or 0 if not.
751 * A counter can only go on a limited PMC if it counts something
752 * that a limited PMC can count, doesn't require interrupts, and
753 * doesn't exclude any processor mode.
754 */
755static int can_go_on_limited_pmc(struct perf_counter *counter, unsigned int ev,
756 unsigned int flags)
757{
758 int n;
759 unsigned int alt[MAX_EVENT_ALTERNATIVES];
760
761 if (counter->hw_event.exclude_user
762 || counter->hw_event.exclude_kernel
763 || counter->hw_event.exclude_hv
764 || counter->hw_event.irq_period)
765 return 0;
766
767 if (ppmu->limited_pmc_event(ev))
768 return 1;
769
770 /*
771 * The requested event isn't on a limited PMC already;
772 * see if any alternative code goes on a limited PMC.
773 */
774 if (!ppmu->get_alternatives)
775 return 0;
776
777 flags |= PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD;
778 n = ppmu->get_alternatives(ev, flags, alt);
779 if (n)
780 return alt[0];
781
782 return 0;
783}
784
785/*
786 * Find an alternative event that goes on a normal PMC, if possible,
787 * and return the event code, or 0 if there is no such alternative.
788 * (Note: event code 0 is "don't count" on all machines.)
789 */
790static unsigned long normal_pmc_alternative(unsigned long ev,
791 unsigned long flags)
792{
793 unsigned int alt[MAX_EVENT_ALTERNATIVES];
794 int n;
795
796 flags &= ~(PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD);
797 n = ppmu->get_alternatives(ev, flags, alt);
798 if (!n)
799 return 0;
800 return alt[0];
801}
802
616/* Number of perf_counters counting hardware events */ 803/* Number of perf_counters counting hardware events */
617static atomic_t num_counters; 804static atomic_t num_counters;
618/* Used to avoid races in calling reserve/release_pmc_hardware */ 805/* Used to avoid races in calling reserve/release_pmc_hardware */
@@ -633,9 +820,10 @@ static void hw_perf_counter_destroy(struct perf_counter *counter)
633 820
634const struct pmu *hw_perf_counter_init(struct perf_counter *counter) 821const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
635{ 822{
636 unsigned long ev; 823 unsigned long ev, flags;
637 struct perf_counter *ctrs[MAX_HWCOUNTERS]; 824 struct perf_counter *ctrs[MAX_HWCOUNTERS];
638 unsigned int events[MAX_HWCOUNTERS]; 825 unsigned int events[MAX_HWCOUNTERS];
826 unsigned int cflags[MAX_HWCOUNTERS];
639 int n; 827 int n;
640 int err; 828 int err;
641 829
@@ -661,7 +849,36 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
661 */ 849 */
662 if (!firmware_has_feature(FW_FEATURE_LPAR)) 850 if (!firmware_has_feature(FW_FEATURE_LPAR))
663 counter->hw_event.exclude_hv = 0; 851 counter->hw_event.exclude_hv = 0;
664 852
853 /*
854 * If this is a per-task counter, then we can use
855 * PM_RUN_* events interchangeably with their non RUN_*
856 * equivalents, e.g. PM_RUN_CYC instead of PM_CYC.
857 * XXX we should check if the task is an idle task.
858 */
859 flags = 0;
860 if (counter->ctx->task)
861 flags |= PPMU_ONLY_COUNT_RUN;
862
863 /*
864 * If this machine has limited counters, check whether this
865 * event could go on a limited counter.
866 */
867 if (ppmu->limited_pmc5_6) {
868 if (can_go_on_limited_pmc(counter, ev, flags)) {
869 flags |= PPMU_LIMITED_PMC_OK;
870 } else if (ppmu->limited_pmc_event(ev)) {
871 /*
872 * The requested event is on a limited PMC,
873 * but we can't use a limited PMC; see if any
874 * alternative goes on a normal PMC.
875 */
876 ev = normal_pmc_alternative(ev, flags);
877 if (!ev)
878 return ERR_PTR(-EINVAL);
879 }
880 }
881
665 /* 882 /*
666 * If this is in a group, check if it can go on with all the 883 * If this is in a group, check if it can go on with all the
667 * other hardware counters in the group. We assume the counter 884 * other hardware counters in the group. We assume the counter
@@ -670,18 +887,20 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
670 n = 0; 887 n = 0;
671 if (counter->group_leader != counter) { 888 if (counter->group_leader != counter) {
672 n = collect_events(counter->group_leader, ppmu->n_counter - 1, 889 n = collect_events(counter->group_leader, ppmu->n_counter - 1,
673 ctrs, events); 890 ctrs, events, cflags);
674 if (n < 0) 891 if (n < 0)
675 return ERR_PTR(-EINVAL); 892 return ERR_PTR(-EINVAL);
676 } 893 }
677 events[n] = ev; 894 events[n] = ev;
678 ctrs[n] = counter; 895 ctrs[n] = counter;
679 if (check_excludes(ctrs, n, 1)) 896 cflags[n] = flags;
897 if (check_excludes(ctrs, cflags, n, 1))
680 return ERR_PTR(-EINVAL); 898 return ERR_PTR(-EINVAL);
681 if (power_check_constraints(events, n + 1)) 899 if (power_check_constraints(events, cflags, n + 1))
682 return ERR_PTR(-EINVAL); 900 return ERR_PTR(-EINVAL);
683 901
684 counter->hw.config = events[n]; 902 counter->hw.config = events[n];
903 counter->hw.counter_base = cflags[n];
685 atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period); 904 atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period);
686 905
687 /* 906 /*
@@ -763,6 +982,10 @@ static void perf_counter_interrupt(struct pt_regs *regs)
763 int found = 0; 982 int found = 0;
764 int nmi; 983 int nmi;
765 984
985 if (cpuhw->n_limited)
986 freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5),
987 mfspr(SPRN_PMC6));
988
766 /* 989 /*
767 * If interrupts were soft-disabled when this PMU interrupt 990 * If interrupts were soft-disabled when this PMU interrupt
768 * occurred, treat it as an NMI. 991 * occurred, treat it as an NMI.
@@ -775,6 +998,8 @@ static void perf_counter_interrupt(struct pt_regs *regs)
775 998
776 for (i = 0; i < cpuhw->n_counters; ++i) { 999 for (i = 0; i < cpuhw->n_counters; ++i) {
777 counter = cpuhw->counter[i]; 1000 counter = cpuhw->counter[i];
1001 if (is_limited_pmc(counter->hw.idx))
1002 continue;
778 val = read_pmc(counter->hw.idx); 1003 val = read_pmc(counter->hw.idx);
779 if ((int)val < 0) { 1004 if ((int)val < 0) {
780 /* counter has overflowed */ 1005 /* counter has overflowed */
@@ -791,6 +1016,8 @@ static void perf_counter_interrupt(struct pt_regs *regs)
791 */ 1016 */
792 if (!found) { 1017 if (!found) {
793 for (i = 0; i < ppmu->n_counter; ++i) { 1018 for (i = 0; i < ppmu->n_counter; ++i) {
1019 if (is_limited_pmc(i + 1))
1020 continue;
794 val = read_pmc(i + 1); 1021 val = read_pmc(i + 1);
795 if ((int)val < 0) 1022 if ((int)val < 0)
796 write_pmc(i + 1, 0); 1023 write_pmc(i + 1, 0);
@@ -804,7 +1031,7 @@ static void perf_counter_interrupt(struct pt_regs *regs)
804 * XXX might want to use MSR.PM to keep the counters frozen until 1031 * XXX might want to use MSR.PM to keep the counters frozen until
805 * we get back out of this interrupt. 1032 * we get back out of this interrupt.
806 */ 1033 */
807 mtspr(SPRN_MMCR0, cpuhw->mmcr[0]); 1034 write_mmcr0(cpuhw, cpuhw->mmcr[0]);
808 1035
809 if (nmi) 1036 if (nmi)
810 nmi_exit(); 1037 nmi_exit();
diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c
index 1407b19ab619..744a2756958e 100644
--- a/arch/powerpc/kernel/power4-pmu.c
+++ b/arch/powerpc/kernel/power4-pmu.c
@@ -320,7 +320,8 @@ static unsigned int ppc_inst_cmpl[] = {
320 0x1001, 0x4001, 0x6001, 0x7001, 0x8001 320 0x1001, 0x4001, 0x6001, 0x7001, 0x8001
321}; 321};
322 322
323static int p4_get_alternatives(unsigned int event, unsigned int alt[]) 323static int p4_get_alternatives(unsigned int event, unsigned int flags,
324 unsigned int alt[])
324{ 325{
325 int i, j, na; 326 int i, j, na;
326 327
diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c
index 1222c8ea3c26..8154eaa2404f 100644
--- a/arch/powerpc/kernel/power5+-pmu.c
+++ b/arch/powerpc/kernel/power5+-pmu.c
@@ -78,8 +78,8 @@
78 * Layout of constraint bits: 78 * Layout of constraint bits:
79 * 6666555555555544444444443333333333222222222211111111110000000000 79 * 6666555555555544444444443333333333222222222211111111110000000000
80 * 3210987654321098765432109876543210987654321098765432109876543210 80 * 3210987654321098765432109876543210987654321098765432109876543210
81 * [ ><><>< ><> <><>[ > < >< >< >< ><><><><> 81 * [ ><><>< ><> <><>[ > < >< >< >< ><><><><><><>
82 * NC G0G1G2 G3 T0T1 UC B0 B1 B2 B3 P4P3P2P1 82 * NC G0G1G2 G3 T0T1 UC B0 B1 B2 B3 P6P5P4P3P2P1
83 * 83 *
84 * NC - number of counters 84 * NC - number of counters
85 * 51: NC error 0x0008_0000_0000_0000 85 * 51: NC error 0x0008_0000_0000_0000
@@ -105,18 +105,18 @@
105 * 30: IDU|GRS events needed 0x00_4000_0000 105 * 30: IDU|GRS events needed 0x00_4000_0000
106 * 106 *
107 * B0 107 * B0
108 * 20-23: Byte 0 event source 0x00f0_0000 108 * 24-27: Byte 0 event source 0x0f00_0000
109 * Encoding as for the event code 109 * Encoding as for the event code
110 * 110 *
111 * B1, B2, B3 111 * B1, B2, B3
112 * 16-19, 12-15, 8-11: Byte 1, 2, 3 event sources 112 * 20-23, 16-19, 12-15: Byte 1, 2, 3 event sources
113 * 113 *
114 * P4 114 * P6
115 * 7: P1 error 0x80 115 * 11: P6 error 0x800
116 * 6-7: Count of events needing PMC4 116 * 10-11: Count of events needing PMC6
117 * 117 *
118 * P1..P3 118 * P1..P5
119 * 0-6: Count of events needing PMC1..PMC3 119 * 0-9: Count of events needing PMC1..PMC5
120 */ 120 */
121 121
122static const int grsel_shift[8] = { 122static const int grsel_shift[8] = {
@@ -143,11 +143,13 @@ static int power5p_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
143 143
144 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; 144 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
145 if (pmc) { 145 if (pmc) {
146 if (pmc > 4) 146 if (pmc > 6)
147 return -1; 147 return -1;
148 sh = (pmc - 1) * 2; 148 sh = (pmc - 1) * 2;
149 mask |= 2 << sh; 149 mask |= 2 << sh;
150 value |= 1 << sh; 150 value |= 1 << sh;
151 if (pmc >= 5 && !(event == 0x500009 || event == 0x600005))
152 return -1;
151 } 153 }
152 if (event & PM_BUSEVENT_MSK) { 154 if (event & PM_BUSEVENT_MSK) {
153 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; 155 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
@@ -173,16 +175,26 @@ static int power5p_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
173 value |= (u64)((event >> PM_GRS_SH) & fmask) << sh; 175 value |= (u64)((event >> PM_GRS_SH) & fmask) << sh;
174 } 176 }
175 /* Set byte lane select field */ 177 /* Set byte lane select field */
176 mask |= 0xfULL << (20 - 4 * byte); 178 mask |= 0xfULL << (24 - 4 * byte);
177 value |= (u64)unit << (20 - 4 * byte); 179 value |= (u64)unit << (24 - 4 * byte);
180 }
181 if (pmc < 5) {
182 /* need a counter from PMC1-4 set */
183 mask |= 0x8000000000000ull;
184 value |= 0x1000000000000ull;
178 } 185 }
179 mask |= 0x8000000000000ull;
180 value |= 0x1000000000000ull;
181 *maskp = mask; 186 *maskp = mask;
182 *valp = value; 187 *valp = value;
183 return 0; 188 return 0;
184} 189}
185 190
191static int power5p_limited_pmc_event(unsigned int event)
192{
193 int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
194
195 return pmc == 5 || pmc == 6;
196}
197
186#define MAX_ALT 3 /* at most 3 alternatives for any event */ 198#define MAX_ALT 3 /* at most 3 alternatives for any event */
187 199
188static const unsigned int event_alternatives[][MAX_ALT] = { 200static const unsigned int event_alternatives[][MAX_ALT] = {
@@ -193,6 +205,7 @@ static const unsigned int event_alternatives[][MAX_ALT] = {
193 { 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */ 205 { 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */
194 { 0x800c4, 0xc20e0 }, /* PM_DTLB_MISS */ 206 { 0x800c4, 0xc20e0 }, /* PM_DTLB_MISS */
195 { 0xc50c6, 0xc60e0 }, /* PM_MRK_DTLB_MISS */ 207 { 0xc50c6, 0xc60e0 }, /* PM_MRK_DTLB_MISS */
208 { 0x100005, 0x600005 }, /* PM_RUN_CYC */
196 { 0x100009, 0x200009 }, /* PM_INST_CMPL */ 209 { 0x100009, 0x200009 }, /* PM_INST_CMPL */
197 { 0x200015, 0x300015 }, /* PM_LSU_LMQ_SRQ_EMPTY_CYC */ 210 { 0x200015, 0x300015 }, /* PM_LSU_LMQ_SRQ_EMPTY_CYC */
198 { 0x300009, 0x400009 }, /* PM_INST_DISP */ 211 { 0x300009, 0x400009 }, /* PM_INST_DISP */
@@ -260,24 +273,85 @@ static int find_alternative_bdecode(unsigned int event)
260 return -1; 273 return -1;
261} 274}
262 275
263static int power5p_get_alternatives(unsigned int event, unsigned int alt[]) 276static int power5p_get_alternatives(unsigned int event, unsigned int flags,
277 unsigned int alt[])
264{ 278{
265 int i, j, ae, nalt = 1; 279 int i, j, ae, nalt = 1;
280 int nlim;
266 281
267 alt[0] = event; 282 alt[0] = event;
268 nalt = 1; 283 nalt = 1;
284 nlim = power5p_limited_pmc_event(event);
269 i = find_alternative(event); 285 i = find_alternative(event);
270 if (i >= 0) { 286 if (i >= 0) {
271 for (j = 0; j < MAX_ALT; ++j) { 287 for (j = 0; j < MAX_ALT; ++j) {
272 ae = event_alternatives[i][j]; 288 ae = event_alternatives[i][j];
273 if (ae && ae != event) 289 if (ae && ae != event)
274 alt[nalt++] = ae; 290 alt[nalt++] = ae;
291 nlim += power5p_limited_pmc_event(ae);
275 } 292 }
276 } else { 293 } else {
277 ae = find_alternative_bdecode(event); 294 ae = find_alternative_bdecode(event);
278 if (ae > 0) 295 if (ae > 0)
279 alt[nalt++] = ae; 296 alt[nalt++] = ae;
280 } 297 }
298
299 if (flags & PPMU_ONLY_COUNT_RUN) {
300 /*
301 * We're only counting in RUN state,
302 * so PM_CYC is equivalent to PM_RUN_CYC
303 * and PM_INST_CMPL === PM_RUN_INST_CMPL.
304 * This doesn't include alternatives that don't provide
305 * any extra flexibility in assigning PMCs (e.g.
306 * 0x100005 for PM_RUN_CYC vs. 0xf for PM_CYC).
307 * Note that even with these additional alternatives
308 * we never end up with more than 3 alternatives for any event.
309 */
310 j = nalt;
311 for (i = 0; i < nalt; ++i) {
312 switch (alt[i]) {
313 case 0xf: /* PM_CYC */
314 alt[j++] = 0x600005; /* PM_RUN_CYC */
315 ++nlim;
316 break;
317 case 0x600005: /* PM_RUN_CYC */
318 alt[j++] = 0xf;
319 break;
320 case 0x100009: /* PM_INST_CMPL */
321 alt[j++] = 0x500009; /* PM_RUN_INST_CMPL */
322 ++nlim;
323 break;
324 case 0x500009: /* PM_RUN_INST_CMPL */
325 alt[j++] = 0x100009; /* PM_INST_CMPL */
326 alt[j++] = 0x200009;
327 break;
328 }
329 }
330 nalt = j;
331 }
332
333 if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) {
334 /* remove the limited PMC events */
335 j = 0;
336 for (i = 0; i < nalt; ++i) {
337 if (!power5p_limited_pmc_event(alt[i])) {
338 alt[j] = alt[i];
339 ++j;
340 }
341 }
342 nalt = j;
343 } else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) {
344 /* remove all but the limited PMC events */
345 j = 0;
346 for (i = 0; i < nalt; ++i) {
347 if (power5p_limited_pmc_event(alt[i])) {
348 alt[j] = alt[i];
349 ++j;
350 }
351 }
352 nalt = j;
353 }
354
281 return nalt; 355 return nalt;
282} 356}
283 357
@@ -390,7 +464,7 @@ static int power5p_compute_mmcr(unsigned int event[], int n_ev,
390 unsigned char unituse[16]; 464 unsigned char unituse[16];
391 int ttmuse; 465 int ttmuse;
392 466
393 if (n_ev > 4) 467 if (n_ev > 6)
394 return -1; 468 return -1;
395 469
396 /* First pass to count resource use */ 470 /* First pass to count resource use */
@@ -399,7 +473,7 @@ static int power5p_compute_mmcr(unsigned int event[], int n_ev,
399 for (i = 0; i < n_ev; ++i) { 473 for (i = 0; i < n_ev; ++i) {
400 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; 474 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
401 if (pmc) { 475 if (pmc) {
402 if (pmc > 4) 476 if (pmc > 6)
403 return -1; 477 return -1;
404 if (pmc_inuse & (1 << (pmc - 1))) 478 if (pmc_inuse & (1 << (pmc - 1)))
405 return -1; 479 return -1;
@@ -488,13 +562,16 @@ static int power5p_compute_mmcr(unsigned int event[], int n_ev,
488 if (pmc >= 4) 562 if (pmc >= 4)
489 return -1; 563 return -1;
490 pmc_inuse |= 1 << pmc; 564 pmc_inuse |= 1 << pmc;
491 } else { 565 } else if (pmc <= 4) {
492 /* Direct event */ 566 /* Direct event */
493 --pmc; 567 --pmc;
494 if (isbus && (byte & 2) && 568 if (isbus && (byte & 2) &&
495 (psel == 8 || psel == 0x10 || psel == 0x28)) 569 (psel == 8 || psel == 0x10 || psel == 0x28))
496 /* add events on higher-numbered bus */ 570 /* add events on higher-numbered bus */
497 mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc); 571 mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc);
572 } else {
573 /* Instructions or run cycles on PMC5/6 */
574 --pmc;
498 } 575 }
499 if (isbus && unit == PM_GRS) { 576 if (isbus && unit == PM_GRS) {
500 bit = psel & 7; 577 bit = psel & 7;
@@ -538,7 +615,7 @@ static int power5p_generic_events[] = {
538}; 615};
539 616
540struct power_pmu power5p_pmu = { 617struct power_pmu power5p_pmu = {
541 .n_counter = 4, 618 .n_counter = 6,
542 .max_alternatives = MAX_ALT, 619 .max_alternatives = MAX_ALT,
543 .add_fields = 0x7000000000055ull, 620 .add_fields = 0x7000000000055ull,
544 .test_adder = 0x3000040000000ull, 621 .test_adder = 0x3000040000000ull,
@@ -548,4 +625,6 @@ struct power_pmu power5p_pmu = {
548 .disable_pmc = power5p_disable_pmc, 625 .disable_pmc = power5p_disable_pmc,
549 .n_generic = ARRAY_SIZE(power5p_generic_events), 626 .n_generic = ARRAY_SIZE(power5p_generic_events),
550 .generic_events = power5p_generic_events, 627 .generic_events = power5p_generic_events,
628 .limited_pmc5_6 = 1,
629 .limited_pmc_event = power5p_limited_pmc_event,
551}; 630};
diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c
index 116c4bb1809e..6e667dc86470 100644
--- a/arch/powerpc/kernel/power5-pmu.c
+++ b/arch/powerpc/kernel/power5-pmu.c
@@ -269,7 +269,8 @@ static int find_alternative_bdecode(unsigned int event)
269 return -1; 269 return -1;
270} 270}
271 271
272static int power5_get_alternatives(unsigned int event, unsigned int alt[]) 272static int power5_get_alternatives(unsigned int event, unsigned int flags,
273 unsigned int alt[])
273{ 274{
274 int i, j, ae, nalt = 1; 275 int i, j, ae, nalt = 1;
275 276
diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c
index fce1fc290a1d..d44049f0ae27 100644
--- a/arch/powerpc/kernel/power6-pmu.c
+++ b/arch/powerpc/kernel/power6-pmu.c
@@ -182,7 +182,7 @@ static int p6_compute_mmcr(unsigned int event[], int n_ev,
182 unsigned int ttmset = 0; 182 unsigned int ttmset = 0;
183 unsigned int pmc_inuse = 0; 183 unsigned int pmc_inuse = 0;
184 184
185 if (n_ev > 4) 185 if (n_ev > 6)
186 return -1; 186 return -1;
187 for (i = 0; i < n_ev; ++i) { 187 for (i = 0; i < n_ev; ++i) {
188 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; 188 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
@@ -202,6 +202,8 @@ static int p6_compute_mmcr(unsigned int event[], int n_ev,
202 for (pmc = 0; pmc < 4; ++pmc) 202 for (pmc = 0; pmc < 4; ++pmc)
203 if (!(pmc_inuse & (1 << pmc))) 203 if (!(pmc_inuse & (1 << pmc)))
204 break; 204 break;
205 if (pmc >= 4)
206 return -1;
205 pmc_inuse |= 1 << pmc; 207 pmc_inuse |= 1 << pmc;
206 } 208 }
207 hwc[i] = pmc; 209 hwc[i] = pmc;
@@ -240,7 +242,8 @@ static int p6_compute_mmcr(unsigned int event[], int n_ev,
240 } 242 }
241 if (power6_marked_instr_event(event[i])) 243 if (power6_marked_instr_event(event[i]))
242 mmcra |= MMCRA_SAMPLE_ENABLE; 244 mmcra |= MMCRA_SAMPLE_ENABLE;
243 mmcr1 |= (u64)psel << MMCR1_PMCSEL_SH(pmc); 245 if (pmc < 4)
246 mmcr1 |= (u64)psel << MMCR1_PMCSEL_SH(pmc);
244 } 247 }
245 mmcr[0] = 0; 248 mmcr[0] = 0;
246 if (pmc_inuse & 1) 249 if (pmc_inuse & 1)
@@ -256,19 +259,20 @@ static int p6_compute_mmcr(unsigned int event[], int n_ev,
256 * Layout of constraint bits: 259 * Layout of constraint bits:
257 * 260 *
258 * 0-1 add field: number of uses of PMC1 (max 1) 261 * 0-1 add field: number of uses of PMC1 (max 1)
259 * 2-3, 4-5, 6-7: ditto for PMC2, 3, 4 262 * 2-3, 4-5, 6-7, 8-9, 10-11: ditto for PMC2, 3, 4, 5, 6
260 * 8-10 select field: nest (subunit) event selector 263 * 12-15 add field: number of uses of PMC1-4 (max 4)
261 * 16-19 select field: unit on byte 0 of event bus 264 * 16-19 select field: unit on byte 0 of event bus
262 * 20-23, 24-27, 28-31 ditto for bytes 1, 2, 3 265 * 20-23, 24-27, 28-31 ditto for bytes 1, 2, 3
266 * 32-34 select field: nest (subunit) event selector
263 */ 267 */
264static int p6_get_constraint(unsigned int event, u64 *maskp, u64 *valp) 268static int p6_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
265{ 269{
266 int pmc, byte, sh; 270 int pmc, byte, sh, subunit;
267 unsigned int mask = 0, value = 0; 271 u64 mask = 0, value = 0;
268 272
269 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; 273 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
270 if (pmc) { 274 if (pmc) {
271 if (pmc > 4) 275 if (pmc > 4 && !(event == 0x500009 || event == 0x600005))
272 return -1; 276 return -1;
273 sh = (pmc - 1) * 2; 277 sh = (pmc - 1) * 2;
274 mask |= 2 << sh; 278 mask |= 2 << sh;
@@ -276,26 +280,38 @@ static int p6_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
276 } 280 }
277 if (event & PM_BUSEVENT_MSK) { 281 if (event & PM_BUSEVENT_MSK) {
278 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; 282 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
279 sh = byte * 4; 283 sh = byte * 4 + (16 - PM_UNIT_SH);
280 mask |= PM_UNIT_MSKS << sh; 284 mask |= PM_UNIT_MSKS << sh;
281 value |= (event & PM_UNIT_MSKS) << sh; 285 value |= (u64)(event & PM_UNIT_MSKS) << sh;
282 if ((event & PM_UNIT_MSKS) == (5 << PM_UNIT_SH)) { 286 if ((event & PM_UNIT_MSKS) == (5 << PM_UNIT_SH)) {
283 mask |= PM_SUBUNIT_MSKS; 287 subunit = (event >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK;
284 value |= event & PM_SUBUNIT_MSKS; 288 mask |= (u64)PM_SUBUNIT_MSK << 32;
289 value |= (u64)subunit << 32;
285 } 290 }
286 } 291 }
292 if (pmc <= 4) {
293 mask |= 0x8000; /* add field for count of PMC1-4 uses */
294 value |= 0x1000;
295 }
287 *maskp = mask; 296 *maskp = mask;
288 *valp = value; 297 *valp = value;
289 return 0; 298 return 0;
290} 299}
291 300
301static int p6_limited_pmc_event(unsigned int event)
302{
303 int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
304
305 return pmc == 5 || pmc == 6;
306}
307
292#define MAX_ALT 4 /* at most 4 alternatives for any event */ 308#define MAX_ALT 4 /* at most 4 alternatives for any event */
293 309
294static const unsigned int event_alternatives[][MAX_ALT] = { 310static const unsigned int event_alternatives[][MAX_ALT] = {
295 { 0x0130e8, 0x2000f6, 0x3000fc }, /* PM_PTEG_RELOAD_VALID */ 311 { 0x0130e8, 0x2000f6, 0x3000fc }, /* PM_PTEG_RELOAD_VALID */
296 { 0x080080, 0x10000d, 0x30000c, 0x4000f0 }, /* PM_LD_MISS_L1 */ 312 { 0x080080, 0x10000d, 0x30000c, 0x4000f0 }, /* PM_LD_MISS_L1 */
297 { 0x080088, 0x200054, 0x3000f0 }, /* PM_ST_MISS_L1 */ 313 { 0x080088, 0x200054, 0x3000f0 }, /* PM_ST_MISS_L1 */
298 { 0x10000a, 0x2000f4 }, /* PM_RUN_CYC */ 314 { 0x10000a, 0x2000f4, 0x600005 }, /* PM_RUN_CYC */
299 { 0x10000b, 0x2000f5 }, /* PM_RUN_COUNT */ 315 { 0x10000b, 0x2000f5 }, /* PM_RUN_COUNT */
300 { 0x10000e, 0x400010 }, /* PM_PURR */ 316 { 0x10000e, 0x400010 }, /* PM_PURR */
301 { 0x100010, 0x4000f8 }, /* PM_FLUSH */ 317 { 0x100010, 0x4000f8 }, /* PM_FLUSH */
@@ -340,13 +356,15 @@ static int find_alternatives_list(unsigned int event)
340 return -1; 356 return -1;
341} 357}
342 358
343static int p6_get_alternatives(unsigned int event, unsigned int alt[]) 359static int p6_get_alternatives(unsigned int event, unsigned int flags,
360 unsigned int alt[])
344{ 361{
345 int i, j; 362 int i, j, nlim;
346 unsigned int aevent, psel, pmc; 363 unsigned int aevent, psel, pmc;
347 unsigned int nalt = 1; 364 unsigned int nalt = 1;
348 365
349 alt[0] = event; 366 alt[0] = event;
367 nlim = p6_limited_pmc_event(event);
350 368
351 /* check the alternatives table */ 369 /* check the alternatives table */
352 i = find_alternatives_list(event); 370 i = find_alternatives_list(event);
@@ -358,6 +376,7 @@ static int p6_get_alternatives(unsigned int event, unsigned int alt[])
358 break; 376 break;
359 if (aevent != event) 377 if (aevent != event)
360 alt[nalt++] = aevent; 378 alt[nalt++] = aevent;
379 nlim += p6_limited_pmc_event(aevent);
361 } 380 }
362 381
363 } else { 382 } else {
@@ -375,13 +394,75 @@ static int p6_get_alternatives(unsigned int event, unsigned int alt[])
375 ((pmc > 2? pmc - 2: pmc + 2) << PM_PMC_SH); 394 ((pmc > 2? pmc - 2: pmc + 2) << PM_PMC_SH);
376 } 395 }
377 396
397 if (flags & PPMU_ONLY_COUNT_RUN) {
398 /*
399 * We're only counting in RUN state,
400 * so PM_CYC is equivalent to PM_RUN_CYC,
401 * PM_INST_CMPL === PM_RUN_INST_CMPL, PM_PURR === PM_RUN_PURR.
402 * This doesn't include alternatives that don't provide
403 * any extra flexibility in assigning PMCs (e.g.
404 * 0x10000a for PM_RUN_CYC vs. 0x1e for PM_CYC).
405 * Note that even with these additional alternatives
406 * we never end up with more than 4 alternatives for any event.
407 */
408 j = nalt;
409 for (i = 0; i < nalt; ++i) {
410 switch (alt[i]) {
411 case 0x1e: /* PM_CYC */
412 alt[j++] = 0x600005; /* PM_RUN_CYC */
413 ++nlim;
414 break;
415 case 0x10000a: /* PM_RUN_CYC */
416 alt[j++] = 0x1e; /* PM_CYC */
417 break;
418 case 2: /* PM_INST_CMPL */
419 alt[j++] = 0x500009; /* PM_RUN_INST_CMPL */
420 ++nlim;
421 break;
422 case 0x500009: /* PM_RUN_INST_CMPL */
423 alt[j++] = 2; /* PM_INST_CMPL */
424 break;
425 case 0x10000e: /* PM_PURR */
426 alt[j++] = 0x4000f4; /* PM_RUN_PURR */
427 break;
428 case 0x4000f4: /* PM_RUN_PURR */
429 alt[j++] = 0x10000e; /* PM_PURR */
430 break;
431 }
432 }
433 nalt = j;
434 }
435
436 if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) {
437 /* remove the limited PMC events */
438 j = 0;
439 for (i = 0; i < nalt; ++i) {
440 if (!p6_limited_pmc_event(alt[i])) {
441 alt[j] = alt[i];
442 ++j;
443 }
444 }
445 nalt = j;
446 } else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) {
447 /* remove all but the limited PMC events */
448 j = 0;
449 for (i = 0; i < nalt; ++i) {
450 if (p6_limited_pmc_event(alt[i])) {
451 alt[j] = alt[i];
452 ++j;
453 }
454 }
455 nalt = j;
456 }
457
378 return nalt; 458 return nalt;
379} 459}
380 460
381static void p6_disable_pmc(unsigned int pmc, u64 mmcr[]) 461static void p6_disable_pmc(unsigned int pmc, u64 mmcr[])
382{ 462{
383 /* Set PMCxSEL to 0 to disable PMCx */ 463 /* Set PMCxSEL to 0 to disable PMCx */
384 mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc)); 464 if (pmc <= 3)
465 mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc));
385} 466}
386 467
387static int power6_generic_events[] = { 468static int power6_generic_events[] = {
@@ -394,14 +475,16 @@ static int power6_generic_events[] = {
394}; 475};
395 476
396struct power_pmu power6_pmu = { 477struct power_pmu power6_pmu = {
397 .n_counter = 4, 478 .n_counter = 6,
398 .max_alternatives = MAX_ALT, 479 .max_alternatives = MAX_ALT,
399 .add_fields = 0x55, 480 .add_fields = 0x1555,
400 .test_adder = 0, 481 .test_adder = 0x3000,
401 .compute_mmcr = p6_compute_mmcr, 482 .compute_mmcr = p6_compute_mmcr,
402 .get_constraint = p6_get_constraint, 483 .get_constraint = p6_get_constraint,
403 .get_alternatives = p6_get_alternatives, 484 .get_alternatives = p6_get_alternatives,
404 .disable_pmc = p6_disable_pmc, 485 .disable_pmc = p6_disable_pmc,
405 .n_generic = ARRAY_SIZE(power6_generic_events), 486 .n_generic = ARRAY_SIZE(power6_generic_events),
406 .generic_events = power6_generic_events, 487 .generic_events = power6_generic_events,
488 .limited_pmc5_6 = 1,
489 .limited_pmc_event = p6_limited_pmc_event,
407}; 490};
diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c
index aed8ccd7c077..af2d1884058c 100644
--- a/arch/powerpc/kernel/ppc970-pmu.c
+++ b/arch/powerpc/kernel/ppc970-pmu.c
@@ -243,7 +243,8 @@ static int p970_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
243 return 0; 243 return 0;
244} 244}
245 245
246static int p970_get_alternatives(unsigned int event, unsigned int alt[]) 246static int p970_get_alternatives(unsigned int event, unsigned int flags,
247 unsigned int alt[])
247{ 248{
248 alt[0] = event; 249 alt[0] = event;
249 250