diff options
Diffstat (limited to 'arch/sparc/kernel/perf_event.c')
-rw-r--r-- | arch/sparc/kernel/perf_event.c | 516 |
1 files changed, 414 insertions, 102 deletions
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 5713957dcb8a..e48651dace1b 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c | |||
@@ -25,36 +25,48 @@ | |||
25 | #include <linux/atomic.h> | 25 | #include <linux/atomic.h> |
26 | #include <asm/nmi.h> | 26 | #include <asm/nmi.h> |
27 | #include <asm/pcr.h> | 27 | #include <asm/pcr.h> |
28 | #include <asm/perfctr.h> | ||
29 | #include <asm/cacheflush.h> | 28 | #include <asm/cacheflush.h> |
30 | 29 | ||
31 | #include "kernel.h" | 30 | #include "kernel.h" |
32 | #include "kstack.h" | 31 | #include "kstack.h" |
33 | 32 | ||
34 | /* Sparc64 chips have two performance counters, 32-bits each, with | 33 | /* Two classes of sparc64 chips currently exist. All of which have |
35 | * overflow interrupts generated on transition from 0xffffffff to 0. | 34 | * 32-bit counters which can generate overflow interrupts on the |
36 | * The counters are accessed in one go using a 64-bit register. | 35 | * transition from 0xffffffff to 0. |
37 | * | 36 | * |
38 | * Both counters are controlled using a single control register. The | 37 | * All chips upto and including SPARC-T3 have two performance |
39 | * only way to stop all sampling is to clear all of the context (user, | 38 | * counters. The two 32-bit counters are accessed in one go using a |
40 | * supervisor, hypervisor) sampling enable bits. But these bits apply | 39 | * single 64-bit register. |
41 | * to both counters, thus the two counters can't be enabled/disabled | ||
42 | * individually. | ||
43 | * | 40 | * |
44 | * The control register has two event fields, one for each of the two | 41 | * On these older chips both counters are controlled using a single |
45 | * counters. It's thus nearly impossible to have one counter going | 42 | * control register. The only way to stop all sampling is to clear |
46 | * while keeping the other one stopped. Therefore it is possible to | 43 | * all of the context (user, supervisor, hypervisor) sampling enable |
47 | * get overflow interrupts for counters not currently "in use" and | 44 | * bits. But these bits apply to both counters, thus the two counters |
48 | * that condition must be checked in the overflow interrupt handler. | 45 | * can't be enabled/disabled individually. |
46 | * | ||
47 | * Furthermore, the control register on these older chips have two | ||
48 | * event fields, one for each of the two counters. It's thus nearly | ||
49 | * impossible to have one counter going while keeping the other one | ||
50 | * stopped. Therefore it is possible to get overflow interrupts for | ||
51 | * counters not currently "in use" and that condition must be checked | ||
52 | * in the overflow interrupt handler. | ||
49 | * | 53 | * |
50 | * So we use a hack, in that we program inactive counters with the | 54 | * So we use a hack, in that we program inactive counters with the |
51 | * "sw_count0" and "sw_count1" events. These count how many times | 55 | * "sw_count0" and "sw_count1" events. These count how many times |
52 | * the instruction "sethi %hi(0xfc000), %g0" is executed. It's an | 56 | * the instruction "sethi %hi(0xfc000), %g0" is executed. It's an |
53 | * unusual way to encode a NOP and therefore will not trigger in | 57 | * unusual way to encode a NOP and therefore will not trigger in |
54 | * normal code. | 58 | * normal code. |
59 | * | ||
60 | * Starting with SPARC-T4 we have one control register per counter. | ||
61 | * And the counters are stored in individual registers. The registers | ||
62 | * for the counters are 64-bit but only a 32-bit counter is | ||
63 | * implemented. The event selections on SPARC-T4 lack any | ||
64 | * restrictions, therefore we can elide all of the complicated | ||
65 | * conflict resolution code we have for SPARC-T3 and earlier chips. | ||
55 | */ | 66 | */ |
56 | 67 | ||
57 | #define MAX_HWEVENTS 2 | 68 | #define MAX_HWEVENTS 4 |
69 | #define MAX_PCRS 4 | ||
58 | #define MAX_PERIOD ((1UL << 32) - 1) | 70 | #define MAX_PERIOD ((1UL << 32) - 1) |
59 | 71 | ||
60 | #define PIC_UPPER_INDEX 0 | 72 | #define PIC_UPPER_INDEX 0 |
@@ -90,8 +102,8 @@ struct cpu_hw_events { | |||
90 | */ | 102 | */ |
91 | int current_idx[MAX_HWEVENTS]; | 103 | int current_idx[MAX_HWEVENTS]; |
92 | 104 | ||
93 | /* Software copy of %pcr register on this cpu. */ | 105 | /* Software copy of %pcr register(s) on this cpu. */ |
94 | u64 pcr; | 106 | u64 pcr[MAX_HWEVENTS]; |
95 | 107 | ||
96 | /* Enabled/disable state. */ | 108 | /* Enabled/disable state. */ |
97 | int enabled; | 109 | int enabled; |
@@ -103,6 +115,8 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; | |||
103 | /* An event map describes the characteristics of a performance | 115 | /* An event map describes the characteristics of a performance |
104 | * counter event. In particular it gives the encoding as well as | 116 | * counter event. In particular it gives the encoding as well as |
105 | * a mask telling which counters the event can be measured on. | 117 | * a mask telling which counters the event can be measured on. |
118 | * | ||
119 | * The mask is unused on SPARC-T4 and later. | ||
106 | */ | 120 | */ |
107 | struct perf_event_map { | 121 | struct perf_event_map { |
108 | u16 encoding; | 122 | u16 encoding; |
@@ -142,15 +156,53 @@ struct sparc_pmu { | |||
142 | const struct perf_event_map *(*event_map)(int); | 156 | const struct perf_event_map *(*event_map)(int); |
143 | const cache_map_t *cache_map; | 157 | const cache_map_t *cache_map; |
144 | int max_events; | 158 | int max_events; |
159 | u32 (*read_pmc)(int); | ||
160 | void (*write_pmc)(int, u64); | ||
145 | int upper_shift; | 161 | int upper_shift; |
146 | int lower_shift; | 162 | int lower_shift; |
147 | int event_mask; | 163 | int event_mask; |
164 | int user_bit; | ||
165 | int priv_bit; | ||
148 | int hv_bit; | 166 | int hv_bit; |
149 | int irq_bit; | 167 | int irq_bit; |
150 | int upper_nop; | 168 | int upper_nop; |
151 | int lower_nop; | 169 | int lower_nop; |
170 | unsigned int flags; | ||
171 | #define SPARC_PMU_ALL_EXCLUDES_SAME 0x00000001 | ||
172 | #define SPARC_PMU_HAS_CONFLICTS 0x00000002 | ||
173 | int max_hw_events; | ||
174 | int num_pcrs; | ||
175 | int num_pic_regs; | ||
152 | }; | 176 | }; |
153 | 177 | ||
178 | static u32 sparc_default_read_pmc(int idx) | ||
179 | { | ||
180 | u64 val; | ||
181 | |||
182 | val = pcr_ops->read_pic(0); | ||
183 | if (idx == PIC_UPPER_INDEX) | ||
184 | val >>= 32; | ||
185 | |||
186 | return val & 0xffffffff; | ||
187 | } | ||
188 | |||
189 | static void sparc_default_write_pmc(int idx, u64 val) | ||
190 | { | ||
191 | u64 shift, mask, pic; | ||
192 | |||
193 | shift = 0; | ||
194 | if (idx == PIC_UPPER_INDEX) | ||
195 | shift = 32; | ||
196 | |||
197 | mask = ((u64) 0xffffffff) << shift; | ||
198 | val <<= shift; | ||
199 | |||
200 | pic = pcr_ops->read_pic(0); | ||
201 | pic &= ~mask; | ||
202 | pic |= val; | ||
203 | pcr_ops->write_pic(0, pic); | ||
204 | } | ||
205 | |||
154 | static const struct perf_event_map ultra3_perfmon_event_map[] = { | 206 | static const struct perf_event_map ultra3_perfmon_event_map[] = { |
155 | [PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER }, | 207 | [PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER }, |
156 | [PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER }, | 208 | [PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER }, |
@@ -268,11 +320,20 @@ static const struct sparc_pmu ultra3_pmu = { | |||
268 | .event_map = ultra3_event_map, | 320 | .event_map = ultra3_event_map, |
269 | .cache_map = &ultra3_cache_map, | 321 | .cache_map = &ultra3_cache_map, |
270 | .max_events = ARRAY_SIZE(ultra3_perfmon_event_map), | 322 | .max_events = ARRAY_SIZE(ultra3_perfmon_event_map), |
323 | .read_pmc = sparc_default_read_pmc, | ||
324 | .write_pmc = sparc_default_write_pmc, | ||
271 | .upper_shift = 11, | 325 | .upper_shift = 11, |
272 | .lower_shift = 4, | 326 | .lower_shift = 4, |
273 | .event_mask = 0x3f, | 327 | .event_mask = 0x3f, |
328 | .user_bit = PCR_UTRACE, | ||
329 | .priv_bit = PCR_STRACE, | ||
274 | .upper_nop = 0x1c, | 330 | .upper_nop = 0x1c, |
275 | .lower_nop = 0x14, | 331 | .lower_nop = 0x14, |
332 | .flags = (SPARC_PMU_ALL_EXCLUDES_SAME | | ||
333 | SPARC_PMU_HAS_CONFLICTS), | ||
334 | .max_hw_events = 2, | ||
335 | .num_pcrs = 1, | ||
336 | .num_pic_regs = 1, | ||
276 | }; | 337 | }; |
277 | 338 | ||
278 | /* Niagara1 is very limited. The upper PIC is hard-locked to count | 339 | /* Niagara1 is very limited. The upper PIC is hard-locked to count |
@@ -397,11 +458,20 @@ static const struct sparc_pmu niagara1_pmu = { | |||
397 | .event_map = niagara1_event_map, | 458 | .event_map = niagara1_event_map, |
398 | .cache_map = &niagara1_cache_map, | 459 | .cache_map = &niagara1_cache_map, |
399 | .max_events = ARRAY_SIZE(niagara1_perfmon_event_map), | 460 | .max_events = ARRAY_SIZE(niagara1_perfmon_event_map), |
461 | .read_pmc = sparc_default_read_pmc, | ||
462 | .write_pmc = sparc_default_write_pmc, | ||
400 | .upper_shift = 0, | 463 | .upper_shift = 0, |
401 | .lower_shift = 4, | 464 | .lower_shift = 4, |
402 | .event_mask = 0x7, | 465 | .event_mask = 0x7, |
466 | .user_bit = PCR_UTRACE, | ||
467 | .priv_bit = PCR_STRACE, | ||
403 | .upper_nop = 0x0, | 468 | .upper_nop = 0x0, |
404 | .lower_nop = 0x0, | 469 | .lower_nop = 0x0, |
470 | .flags = (SPARC_PMU_ALL_EXCLUDES_SAME | | ||
471 | SPARC_PMU_HAS_CONFLICTS), | ||
472 | .max_hw_events = 2, | ||
473 | .num_pcrs = 1, | ||
474 | .num_pic_regs = 1, | ||
405 | }; | 475 | }; |
406 | 476 | ||
407 | static const struct perf_event_map niagara2_perfmon_event_map[] = { | 477 | static const struct perf_event_map niagara2_perfmon_event_map[] = { |
@@ -523,13 +593,203 @@ static const struct sparc_pmu niagara2_pmu = { | |||
523 | .event_map = niagara2_event_map, | 593 | .event_map = niagara2_event_map, |
524 | .cache_map = &niagara2_cache_map, | 594 | .cache_map = &niagara2_cache_map, |
525 | .max_events = ARRAY_SIZE(niagara2_perfmon_event_map), | 595 | .max_events = ARRAY_SIZE(niagara2_perfmon_event_map), |
596 | .read_pmc = sparc_default_read_pmc, | ||
597 | .write_pmc = sparc_default_write_pmc, | ||
526 | .upper_shift = 19, | 598 | .upper_shift = 19, |
527 | .lower_shift = 6, | 599 | .lower_shift = 6, |
528 | .event_mask = 0xfff, | 600 | .event_mask = 0xfff, |
529 | .hv_bit = 0x8, | 601 | .user_bit = PCR_UTRACE, |
602 | .priv_bit = PCR_STRACE, | ||
603 | .hv_bit = PCR_N2_HTRACE, | ||
530 | .irq_bit = 0x30, | 604 | .irq_bit = 0x30, |
531 | .upper_nop = 0x220, | 605 | .upper_nop = 0x220, |
532 | .lower_nop = 0x220, | 606 | .lower_nop = 0x220, |
607 | .flags = (SPARC_PMU_ALL_EXCLUDES_SAME | | ||
608 | SPARC_PMU_HAS_CONFLICTS), | ||
609 | .max_hw_events = 2, | ||
610 | .num_pcrs = 1, | ||
611 | .num_pic_regs = 1, | ||
612 | }; | ||
613 | |||
614 | static const struct perf_event_map niagara4_perfmon_event_map[] = { | ||
615 | [PERF_COUNT_HW_CPU_CYCLES] = { (26 << 6) }, | ||
616 | [PERF_COUNT_HW_INSTRUCTIONS] = { (3 << 6) | 0x3f }, | ||
617 | [PERF_COUNT_HW_CACHE_REFERENCES] = { (3 << 6) | 0x04 }, | ||
618 | [PERF_COUNT_HW_CACHE_MISSES] = { (16 << 6) | 0x07 }, | ||
619 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { (4 << 6) | 0x01 }, | ||
620 | [PERF_COUNT_HW_BRANCH_MISSES] = { (25 << 6) | 0x0f }, | ||
621 | }; | ||
622 | |||
623 | static const struct perf_event_map *niagara4_event_map(int event_id) | ||
624 | { | ||
625 | return &niagara4_perfmon_event_map[event_id]; | ||
626 | } | ||
627 | |||
628 | static const cache_map_t niagara4_cache_map = { | ||
629 | [C(L1D)] = { | ||
630 | [C(OP_READ)] = { | ||
631 | [C(RESULT_ACCESS)] = { (3 << 6) | 0x04 }, | ||
632 | [C(RESULT_MISS)] = { (16 << 6) | 0x07 }, | ||
633 | }, | ||
634 | [C(OP_WRITE)] = { | ||
635 | [C(RESULT_ACCESS)] = { (3 << 6) | 0x08 }, | ||
636 | [C(RESULT_MISS)] = { (16 << 6) | 0x07 }, | ||
637 | }, | ||
638 | [C(OP_PREFETCH)] = { | ||
639 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, | ||
640 | [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, | ||
641 | }, | ||
642 | }, | ||
643 | [C(L1I)] = { | ||
644 | [C(OP_READ)] = { | ||
645 | [C(RESULT_ACCESS)] = { (3 << 6) | 0x3f }, | ||
646 | [C(RESULT_MISS)] = { (11 << 6) | 0x03 }, | ||
647 | }, | ||
648 | [ C(OP_WRITE) ] = { | ||
649 | [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE }, | ||
650 | [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE }, | ||
651 | }, | ||
652 | [ C(OP_PREFETCH) ] = { | ||
653 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
654 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
655 | }, | ||
656 | }, | ||
657 | [C(LL)] = { | ||
658 | [C(OP_READ)] = { | ||
659 | [C(RESULT_ACCESS)] = { (3 << 6) | 0x04 }, | ||
660 | [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, | ||
661 | }, | ||
662 | [C(OP_WRITE)] = { | ||
663 | [C(RESULT_ACCESS)] = { (3 << 6) | 0x08 }, | ||
664 | [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, | ||
665 | }, | ||
666 | [C(OP_PREFETCH)] = { | ||
667 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, | ||
668 | [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, | ||
669 | }, | ||
670 | }, | ||
671 | [C(DTLB)] = { | ||
672 | [C(OP_READ)] = { | ||
673 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, | ||
674 | [C(RESULT_MISS)] = { (17 << 6) | 0x3f }, | ||
675 | }, | ||
676 | [ C(OP_WRITE) ] = { | ||
677 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
678 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
679 | }, | ||
680 | [ C(OP_PREFETCH) ] = { | ||
681 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
682 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
683 | }, | ||
684 | }, | ||
685 | [C(ITLB)] = { | ||
686 | [C(OP_READ)] = { | ||
687 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, | ||
688 | [C(RESULT_MISS)] = { (6 << 6) | 0x3f }, | ||
689 | }, | ||
690 | [ C(OP_WRITE) ] = { | ||
691 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
692 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
693 | }, | ||
694 | [ C(OP_PREFETCH) ] = { | ||
695 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
696 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
697 | }, | ||
698 | }, | ||
699 | [C(BPU)] = { | ||
700 | [C(OP_READ)] = { | ||
701 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, | ||
702 | [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, | ||
703 | }, | ||
704 | [ C(OP_WRITE) ] = { | ||
705 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
706 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
707 | }, | ||
708 | [ C(OP_PREFETCH) ] = { | ||
709 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
710 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
711 | }, | ||
712 | }, | ||
713 | [C(NODE)] = { | ||
714 | [C(OP_READ)] = { | ||
715 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, | ||
716 | [C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
717 | }, | ||
718 | [ C(OP_WRITE) ] = { | ||
719 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
720 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
721 | }, | ||
722 | [ C(OP_PREFETCH) ] = { | ||
723 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
724 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
725 | }, | ||
726 | }, | ||
727 | }; | ||
728 | |||
729 | static u32 sparc_vt_read_pmc(int idx) | ||
730 | { | ||
731 | u64 val = pcr_ops->read_pic(idx); | ||
732 | |||
733 | return val & 0xffffffff; | ||
734 | } | ||
735 | |||
736 | static void sparc_vt_write_pmc(int idx, u64 val) | ||
737 | { | ||
738 | u64 pcr; | ||
739 | |||
740 | /* There seems to be an internal latch on the overflow event | ||
741 | * on SPARC-T4 that prevents it from triggering unless you | ||
742 | * update the PIC exactly as we do here. The requirement | ||
743 | * seems to be that you have to turn off event counting in the | ||
744 | * PCR around the PIC update. | ||
745 | * | ||
746 | * For example, after the following sequence: | ||
747 | * | ||
748 | * 1) set PIC to -1 | ||
749 | * 2) enable event counting and overflow reporting in PCR | ||
750 | * 3) overflow triggers, softint 15 handler invoked | ||
751 | * 4) clear OV bit in PCR | ||
752 | * 5) write PIC to -1 | ||
753 | * | ||
754 | * a subsequent overflow event will not trigger. This | ||
755 | * sequence works on SPARC-T3 and previous chips. | ||
756 | */ | ||
757 | pcr = pcr_ops->read_pcr(idx); | ||
758 | pcr_ops->write_pcr(idx, PCR_N4_PICNPT); | ||
759 | |||
760 | pcr_ops->write_pic(idx, val & 0xffffffff); | ||
761 | |||
762 | pcr_ops->write_pcr(idx, pcr); | ||
763 | } | ||
764 | |||
765 | static const struct sparc_pmu niagara4_pmu = { | ||
766 | .event_map = niagara4_event_map, | ||
767 | .cache_map = &niagara4_cache_map, | ||
768 | .max_events = ARRAY_SIZE(niagara4_perfmon_event_map), | ||
769 | .read_pmc = sparc_vt_read_pmc, | ||
770 | .write_pmc = sparc_vt_write_pmc, | ||
771 | .upper_shift = 5, | ||
772 | .lower_shift = 5, | ||
773 | .event_mask = 0x7ff, | ||
774 | .user_bit = PCR_N4_UTRACE, | ||
775 | .priv_bit = PCR_N4_STRACE, | ||
776 | |||
777 | /* We explicitly don't support hypervisor tracing. The T4 | ||
778 | * generates the overflow event for precise events via a trap | ||
779 | * which will not be generated (ie. it's completely lost) if | ||
780 | * we happen to be in the hypervisor when the event triggers. | ||
781 | * Essentially, the overflow event reporting is completely | ||
782 | * unusable when you have hypervisor mode tracing enabled. | ||
783 | */ | ||
784 | .hv_bit = 0, | ||
785 | |||
786 | .irq_bit = PCR_N4_TOE, | ||
787 | .upper_nop = 0, | ||
788 | .lower_nop = 0, | ||
789 | .flags = 0, | ||
790 | .max_hw_events = 4, | ||
791 | .num_pcrs = 4, | ||
792 | .num_pic_regs = 4, | ||
533 | }; | 793 | }; |
534 | 794 | ||
535 | static const struct sparc_pmu *sparc_pmu __read_mostly; | 795 | static const struct sparc_pmu *sparc_pmu __read_mostly; |
@@ -558,55 +818,35 @@ static u64 nop_for_index(int idx) | |||
558 | static inline void sparc_pmu_enable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) | 818 | static inline void sparc_pmu_enable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) |
559 | { | 819 | { |
560 | u64 val, mask = mask_for_index(idx); | 820 | u64 val, mask = mask_for_index(idx); |
821 | int pcr_index = 0; | ||
561 | 822 | ||
562 | val = cpuc->pcr; | 823 | if (sparc_pmu->num_pcrs > 1) |
824 | pcr_index = idx; | ||
825 | |||
826 | val = cpuc->pcr[pcr_index]; | ||
563 | val &= ~mask; | 827 | val &= ~mask; |
564 | val |= hwc->config; | 828 | val |= hwc->config; |
565 | cpuc->pcr = val; | 829 | cpuc->pcr[pcr_index] = val; |
566 | 830 | ||
567 | pcr_ops->write(cpuc->pcr); | 831 | pcr_ops->write_pcr(pcr_index, cpuc->pcr[pcr_index]); |
568 | } | 832 | } |
569 | 833 | ||
570 | static inline void sparc_pmu_disable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) | 834 | static inline void sparc_pmu_disable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) |
571 | { | 835 | { |
572 | u64 mask = mask_for_index(idx); | 836 | u64 mask = mask_for_index(idx); |
573 | u64 nop = nop_for_index(idx); | 837 | u64 nop = nop_for_index(idx); |
838 | int pcr_index = 0; | ||
574 | u64 val; | 839 | u64 val; |
575 | 840 | ||
576 | val = cpuc->pcr; | 841 | if (sparc_pmu->num_pcrs > 1) |
842 | pcr_index = idx; | ||
843 | |||
844 | val = cpuc->pcr[pcr_index]; | ||
577 | val &= ~mask; | 845 | val &= ~mask; |
578 | val |= nop; | 846 | val |= nop; |
579 | cpuc->pcr = val; | 847 | cpuc->pcr[pcr_index] = val; |
580 | 848 | ||
581 | pcr_ops->write(cpuc->pcr); | 849 | pcr_ops->write_pcr(pcr_index, cpuc->pcr[pcr_index]); |
582 | } | ||
583 | |||
584 | static u32 read_pmc(int idx) | ||
585 | { | ||
586 | u64 val; | ||
587 | |||
588 | read_pic(val); | ||
589 | if (idx == PIC_UPPER_INDEX) | ||
590 | val >>= 32; | ||
591 | |||
592 | return val & 0xffffffff; | ||
593 | } | ||
594 | |||
595 | static void write_pmc(int idx, u64 val) | ||
596 | { | ||
597 | u64 shift, mask, pic; | ||
598 | |||
599 | shift = 0; | ||
600 | if (idx == PIC_UPPER_INDEX) | ||
601 | shift = 32; | ||
602 | |||
603 | mask = ((u64) 0xffffffff) << shift; | ||
604 | val <<= shift; | ||
605 | |||
606 | read_pic(pic); | ||
607 | pic &= ~mask; | ||
608 | pic |= val; | ||
609 | write_pic(pic); | ||
610 | } | 850 | } |
611 | 851 | ||
612 | static u64 sparc_perf_event_update(struct perf_event *event, | 852 | static u64 sparc_perf_event_update(struct perf_event *event, |
@@ -618,7 +858,7 @@ static u64 sparc_perf_event_update(struct perf_event *event, | |||
618 | 858 | ||
619 | again: | 859 | again: |
620 | prev_raw_count = local64_read(&hwc->prev_count); | 860 | prev_raw_count = local64_read(&hwc->prev_count); |
621 | new_raw_count = read_pmc(idx); | 861 | new_raw_count = sparc_pmu->read_pmc(idx); |
622 | 862 | ||
623 | if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, | 863 | if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, |
624 | new_raw_count) != prev_raw_count) | 864 | new_raw_count) != prev_raw_count) |
@@ -658,25 +898,17 @@ static int sparc_perf_event_set_period(struct perf_event *event, | |||
658 | 898 | ||
659 | local64_set(&hwc->prev_count, (u64)-left); | 899 | local64_set(&hwc->prev_count, (u64)-left); |
660 | 900 | ||
661 | write_pmc(idx, (u64)(-left) & 0xffffffff); | 901 | sparc_pmu->write_pmc(idx, (u64)(-left) & 0xffffffff); |
662 | 902 | ||
663 | perf_event_update_userpage(event); | 903 | perf_event_update_userpage(event); |
664 | 904 | ||
665 | return ret; | 905 | return ret; |
666 | } | 906 | } |
667 | 907 | ||
668 | /* If performance event entries have been added, move existing | 908 | static void read_in_all_counters(struct cpu_hw_events *cpuc) |
669 | * events around (if necessary) and then assign new entries to | ||
670 | * counters. | ||
671 | */ | ||
672 | static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr) | ||
673 | { | 909 | { |
674 | int i; | 910 | int i; |
675 | 911 | ||
676 | if (!cpuc->n_added) | ||
677 | goto out; | ||
678 | |||
679 | /* Read in the counters which are moving. */ | ||
680 | for (i = 0; i < cpuc->n_events; i++) { | 912 | for (i = 0; i < cpuc->n_events; i++) { |
681 | struct perf_event *cp = cpuc->event[i]; | 913 | struct perf_event *cp = cpuc->event[i]; |
682 | 914 | ||
@@ -687,6 +919,20 @@ static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr) | |||
687 | cpuc->current_idx[i] = PIC_NO_INDEX; | 919 | cpuc->current_idx[i] = PIC_NO_INDEX; |
688 | } | 920 | } |
689 | } | 921 | } |
922 | } | ||
923 | |||
924 | /* On this PMU all PICs are programmed using a single PCR. Calculate | ||
925 | * the combined control register value. | ||
926 | * | ||
927 | * For such chips we require that all of the events have the same | ||
928 | * configuration, so just fetch the settings from the first entry. | ||
929 | */ | ||
930 | static void calculate_single_pcr(struct cpu_hw_events *cpuc) | ||
931 | { | ||
932 | int i; | ||
933 | |||
934 | if (!cpuc->n_added) | ||
935 | goto out; | ||
690 | 936 | ||
691 | /* Assign to counters all unassigned events. */ | 937 | /* Assign to counters all unassigned events. */ |
692 | for (i = 0; i < cpuc->n_events; i++) { | 938 | for (i = 0; i < cpuc->n_events; i++) { |
@@ -702,20 +948,71 @@ static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr) | |||
702 | cpuc->current_idx[i] = idx; | 948 | cpuc->current_idx[i] = idx; |
703 | 949 | ||
704 | enc = perf_event_get_enc(cpuc->events[i]); | 950 | enc = perf_event_get_enc(cpuc->events[i]); |
705 | pcr &= ~mask_for_index(idx); | 951 | cpuc->pcr[0] &= ~mask_for_index(idx); |
706 | if (hwc->state & PERF_HES_STOPPED) | 952 | if (hwc->state & PERF_HES_STOPPED) |
707 | pcr |= nop_for_index(idx); | 953 | cpuc->pcr[0] |= nop_for_index(idx); |
708 | else | 954 | else |
709 | pcr |= event_encoding(enc, idx); | 955 | cpuc->pcr[0] |= event_encoding(enc, idx); |
710 | } | 956 | } |
711 | out: | 957 | out: |
712 | return pcr; | 958 | cpuc->pcr[0] |= cpuc->event[0]->hw.config_base; |
959 | } | ||
960 | |||
961 | /* On this PMU each PIC has it's own PCR control register. */ | ||
962 | static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc) | ||
963 | { | ||
964 | int i; | ||
965 | |||
966 | if (!cpuc->n_added) | ||
967 | goto out; | ||
968 | |||
969 | for (i = 0; i < cpuc->n_events; i++) { | ||
970 | struct perf_event *cp = cpuc->event[i]; | ||
971 | struct hw_perf_event *hwc = &cp->hw; | ||
972 | int idx = hwc->idx; | ||
973 | u64 enc; | ||
974 | |||
975 | if (cpuc->current_idx[i] != PIC_NO_INDEX) | ||
976 | continue; | ||
977 | |||
978 | sparc_perf_event_set_period(cp, hwc, idx); | ||
979 | cpuc->current_idx[i] = idx; | ||
980 | |||
981 | enc = perf_event_get_enc(cpuc->events[i]); | ||
982 | cpuc->pcr[idx] &= ~mask_for_index(idx); | ||
983 | if (hwc->state & PERF_HES_STOPPED) | ||
984 | cpuc->pcr[idx] |= nop_for_index(idx); | ||
985 | else | ||
986 | cpuc->pcr[idx] |= event_encoding(enc, idx); | ||
987 | } | ||
988 | out: | ||
989 | for (i = 0; i < cpuc->n_events; i++) { | ||
990 | struct perf_event *cp = cpuc->event[i]; | ||
991 | int idx = cp->hw.idx; | ||
992 | |||
993 | cpuc->pcr[idx] |= cp->hw.config_base; | ||
994 | } | ||
995 | } | ||
996 | |||
997 | /* If performance event entries have been added, move existing events | ||
998 | * around (if necessary) and then assign new entries to counters. | ||
999 | */ | ||
1000 | static void update_pcrs_for_enable(struct cpu_hw_events *cpuc) | ||
1001 | { | ||
1002 | if (cpuc->n_added) | ||
1003 | read_in_all_counters(cpuc); | ||
1004 | |||
1005 | if (sparc_pmu->num_pcrs == 1) { | ||
1006 | calculate_single_pcr(cpuc); | ||
1007 | } else { | ||
1008 | calculate_multiple_pcrs(cpuc); | ||
1009 | } | ||
713 | } | 1010 | } |
714 | 1011 | ||
715 | static void sparc_pmu_enable(struct pmu *pmu) | 1012 | static void sparc_pmu_enable(struct pmu *pmu) |
716 | { | 1013 | { |
717 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1014 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
718 | u64 pcr; | 1015 | int i; |
719 | 1016 | ||
720 | if (cpuc->enabled) | 1017 | if (cpuc->enabled) |
721 | return; | 1018 | return; |
@@ -723,26 +1020,17 @@ static void sparc_pmu_enable(struct pmu *pmu) | |||
723 | cpuc->enabled = 1; | 1020 | cpuc->enabled = 1; |
724 | barrier(); | 1021 | barrier(); |
725 | 1022 | ||
726 | pcr = cpuc->pcr; | 1023 | if (cpuc->n_events) |
727 | if (!cpuc->n_events) { | 1024 | update_pcrs_for_enable(cpuc); |
728 | pcr = 0; | ||
729 | } else { | ||
730 | pcr = maybe_change_configuration(cpuc, pcr); | ||
731 | |||
732 | /* We require that all of the events have the same | ||
733 | * configuration, so just fetch the settings from the | ||
734 | * first entry. | ||
735 | */ | ||
736 | cpuc->pcr = pcr | cpuc->event[0]->hw.config_base; | ||
737 | } | ||
738 | 1025 | ||
739 | pcr_ops->write(cpuc->pcr); | 1026 | for (i = 0; i < sparc_pmu->num_pcrs; i++) |
1027 | pcr_ops->write_pcr(i, cpuc->pcr[i]); | ||
740 | } | 1028 | } |
741 | 1029 | ||
742 | static void sparc_pmu_disable(struct pmu *pmu) | 1030 | static void sparc_pmu_disable(struct pmu *pmu) |
743 | { | 1031 | { |
744 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1032 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
745 | u64 val; | 1033 | int i; |
746 | 1034 | ||
747 | if (!cpuc->enabled) | 1035 | if (!cpuc->enabled) |
748 | return; | 1036 | return; |
@@ -750,12 +1038,14 @@ static void sparc_pmu_disable(struct pmu *pmu) | |||
750 | cpuc->enabled = 0; | 1038 | cpuc->enabled = 0; |
751 | cpuc->n_added = 0; | 1039 | cpuc->n_added = 0; |
752 | 1040 | ||
753 | val = cpuc->pcr; | 1041 | for (i = 0; i < sparc_pmu->num_pcrs; i++) { |
754 | val &= ~(PCR_UTRACE | PCR_STRACE | | 1042 | u64 val = cpuc->pcr[i]; |
755 | sparc_pmu->hv_bit | sparc_pmu->irq_bit); | ||
756 | cpuc->pcr = val; | ||
757 | 1043 | ||
758 | pcr_ops->write(cpuc->pcr); | 1044 | val &= ~(sparc_pmu->user_bit | sparc_pmu->priv_bit | |
1045 | sparc_pmu->hv_bit | sparc_pmu->irq_bit); | ||
1046 | cpuc->pcr[i] = val; | ||
1047 | pcr_ops->write_pcr(i, cpuc->pcr[i]); | ||
1048 | } | ||
759 | } | 1049 | } |
760 | 1050 | ||
761 | static int active_event_index(struct cpu_hw_events *cpuc, | 1051 | static int active_event_index(struct cpu_hw_events *cpuc, |
@@ -854,9 +1144,11 @@ static DEFINE_MUTEX(pmc_grab_mutex); | |||
854 | static void perf_stop_nmi_watchdog(void *unused) | 1144 | static void perf_stop_nmi_watchdog(void *unused) |
855 | { | 1145 | { |
856 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1146 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1147 | int i; | ||
857 | 1148 | ||
858 | stop_nmi_watchdog(NULL); | 1149 | stop_nmi_watchdog(NULL); |
859 | cpuc->pcr = pcr_ops->read(); | 1150 | for (i = 0; i < sparc_pmu->num_pcrs; i++) |
1151 | cpuc->pcr[i] = pcr_ops->read_pcr(i); | ||
860 | } | 1152 | } |
861 | 1153 | ||
862 | void perf_event_grab_pmc(void) | 1154 | void perf_event_grab_pmc(void) |
@@ -942,9 +1234,17 @@ static int sparc_check_constraints(struct perf_event **evts, | |||
942 | if (!n_ev) | 1234 | if (!n_ev) |
943 | return 0; | 1235 | return 0; |
944 | 1236 | ||
945 | if (n_ev > MAX_HWEVENTS) | 1237 | if (n_ev > sparc_pmu->max_hw_events) |
946 | return -1; | 1238 | return -1; |
947 | 1239 | ||
1240 | if (!(sparc_pmu->flags & SPARC_PMU_HAS_CONFLICTS)) { | ||
1241 | int i; | ||
1242 | |||
1243 | for (i = 0; i < n_ev; i++) | ||
1244 | evts[i]->hw.idx = i; | ||
1245 | return 0; | ||
1246 | } | ||
1247 | |||
948 | msk0 = perf_event_get_msk(events[0]); | 1248 | msk0 = perf_event_get_msk(events[0]); |
949 | if (n_ev == 1) { | 1249 | if (n_ev == 1) { |
950 | if (msk0 & PIC_LOWER) | 1250 | if (msk0 & PIC_LOWER) |
@@ -1000,6 +1300,9 @@ static int check_excludes(struct perf_event **evts, int n_prev, int n_new) | |||
1000 | struct perf_event *event; | 1300 | struct perf_event *event; |
1001 | int i, n, first; | 1301 | int i, n, first; |
1002 | 1302 | ||
1303 | if (!(sparc_pmu->flags & SPARC_PMU_ALL_EXCLUDES_SAME)) | ||
1304 | return 0; | ||
1305 | |||
1003 | n = n_prev + n_new; | 1306 | n = n_prev + n_new; |
1004 | if (n <= 1) | 1307 | if (n <= 1) |
1005 | return 0; | 1308 | return 0; |
@@ -1059,7 +1362,7 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags) | |||
1059 | perf_pmu_disable(event->pmu); | 1362 | perf_pmu_disable(event->pmu); |
1060 | 1363 | ||
1061 | n0 = cpuc->n_events; | 1364 | n0 = cpuc->n_events; |
1062 | if (n0 >= MAX_HWEVENTS) | 1365 | if (n0 >= sparc_pmu->max_hw_events) |
1063 | goto out; | 1366 | goto out; |
1064 | 1367 | ||
1065 | cpuc->event[n0] = event; | 1368 | cpuc->event[n0] = event; |
@@ -1146,16 +1449,16 @@ static int sparc_pmu_event_init(struct perf_event *event) | |||
1146 | /* We save the enable bits in the config_base. */ | 1449 | /* We save the enable bits in the config_base. */ |
1147 | hwc->config_base = sparc_pmu->irq_bit; | 1450 | hwc->config_base = sparc_pmu->irq_bit; |
1148 | if (!attr->exclude_user) | 1451 | if (!attr->exclude_user) |
1149 | hwc->config_base |= PCR_UTRACE; | 1452 | hwc->config_base |= sparc_pmu->user_bit; |
1150 | if (!attr->exclude_kernel) | 1453 | if (!attr->exclude_kernel) |
1151 | hwc->config_base |= PCR_STRACE; | 1454 | hwc->config_base |= sparc_pmu->priv_bit; |
1152 | if (!attr->exclude_hv) | 1455 | if (!attr->exclude_hv) |
1153 | hwc->config_base |= sparc_pmu->hv_bit; | 1456 | hwc->config_base |= sparc_pmu->hv_bit; |
1154 | 1457 | ||
1155 | n = 0; | 1458 | n = 0; |
1156 | if (event->group_leader != event) { | 1459 | if (event->group_leader != event) { |
1157 | n = collect_events(event->group_leader, | 1460 | n = collect_events(event->group_leader, |
1158 | MAX_HWEVENTS - 1, | 1461 | sparc_pmu->max_hw_events - 1, |
1159 | evts, events, current_idx_dmy); | 1462 | evts, events, current_idx_dmy); |
1160 | if (n < 0) | 1463 | if (n < 0) |
1161 | return -EINVAL; | 1464 | return -EINVAL; |
@@ -1254,8 +1557,7 @@ static struct pmu pmu = { | |||
1254 | void perf_event_print_debug(void) | 1557 | void perf_event_print_debug(void) |
1255 | { | 1558 | { |
1256 | unsigned long flags; | 1559 | unsigned long flags; |
1257 | u64 pcr, pic; | 1560 | int cpu, i; |
1258 | int cpu; | ||
1259 | 1561 | ||
1260 | if (!sparc_pmu) | 1562 | if (!sparc_pmu) |
1261 | return; | 1563 | return; |
@@ -1264,12 +1566,13 @@ void perf_event_print_debug(void) | |||
1264 | 1566 | ||
1265 | cpu = smp_processor_id(); | 1567 | cpu = smp_processor_id(); |
1266 | 1568 | ||
1267 | pcr = pcr_ops->read(); | ||
1268 | read_pic(pic); | ||
1269 | |||
1270 | pr_info("\n"); | 1569 | pr_info("\n"); |
1271 | pr_info("CPU#%d: PCR[%016llx] PIC[%016llx]\n", | 1570 | for (i = 0; i < sparc_pmu->num_pcrs; i++) |
1272 | cpu, pcr, pic); | 1571 | pr_info("CPU#%d: PCR%d[%016llx]\n", |
1572 | cpu, i, pcr_ops->read_pcr(i)); | ||
1573 | for (i = 0; i < sparc_pmu->num_pic_regs; i++) | ||
1574 | pr_info("CPU#%d: PIC%d[%016llx]\n", | ||
1575 | cpu, i, pcr_ops->read_pic(i)); | ||
1273 | 1576 | ||
1274 | local_irq_restore(flags); | 1577 | local_irq_restore(flags); |
1275 | } | 1578 | } |
@@ -1305,8 +1608,9 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, | |||
1305 | * Do this before we peek at the counters to determine | 1608 | * Do this before we peek at the counters to determine |
1306 | * overflow so we don't lose any events. | 1609 | * overflow so we don't lose any events. |
1307 | */ | 1610 | */ |
1308 | if (sparc_pmu->irq_bit) | 1611 | if (sparc_pmu->irq_bit && |
1309 | pcr_ops->write(cpuc->pcr); | 1612 | sparc_pmu->num_pcrs == 1) |
1613 | pcr_ops->write_pcr(0, cpuc->pcr[0]); | ||
1310 | 1614 | ||
1311 | for (i = 0; i < cpuc->n_events; i++) { | 1615 | for (i = 0; i < cpuc->n_events; i++) { |
1312 | struct perf_event *event = cpuc->event[i]; | 1616 | struct perf_event *event = cpuc->event[i]; |
@@ -1314,6 +1618,10 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, | |||
1314 | struct hw_perf_event *hwc; | 1618 | struct hw_perf_event *hwc; |
1315 | u64 val; | 1619 | u64 val; |
1316 | 1620 | ||
1621 | if (sparc_pmu->irq_bit && | ||
1622 | sparc_pmu->num_pcrs > 1) | ||
1623 | pcr_ops->write_pcr(idx, cpuc->pcr[idx]); | ||
1624 | |||
1317 | hwc = &event->hw; | 1625 | hwc = &event->hw; |
1318 | val = sparc_perf_event_update(event, hwc, idx); | 1626 | val = sparc_perf_event_update(event, hwc, idx); |
1319 | if (val & (1ULL << 31)) | 1627 | if (val & (1ULL << 31)) |
@@ -1352,6 +1660,10 @@ static bool __init supported_pmu(void) | |||
1352 | sparc_pmu = &niagara2_pmu; | 1660 | sparc_pmu = &niagara2_pmu; |
1353 | return true; | 1661 | return true; |
1354 | } | 1662 | } |
1663 | if (!strcmp(sparc_pmu_type, "niagara4")) { | ||
1664 | sparc_pmu = &niagara4_pmu; | ||
1665 | return true; | ||
1666 | } | ||
1355 | return false; | 1667 | return false; |
1356 | } | 1668 | } |
1357 | 1669 | ||