aboutsummaryrefslogtreecommitdiffstats
path: root/arch/sparc/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-10-02 15:57:42 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-02 15:57:42 -0400
commita20acf99f75e49271381d65db097c9763060a1e8 (patch)
tree3cf661125e86b7625171b96b885bf5395f62e684 /arch/sparc/kernel
parent437589a74b6a590d175f86cf9f7b2efcee7765e7 (diff)
parent42a4172b6ebb4a419085c6caee7c135e51cae5ea (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-next
Pull sparc updates from David Miller: "Largely this is simply adding support for the Niagara 4 cpu. Major areas are perf events (chip now supports 4 counters and can monitor any event on each counter), crypto (opcodes are availble for sha1, sha256, sha512, md5, crc32c, AES, DES, CAMELLIA, and Kasumi although the last is unsupported since we lack a generic crypto layer Kasumi implementation), and an optimized memcpy. Finally some cleanups by Peter Senna Tschudin." * git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-next: (47 commits) sparc64: Fix trailing whitespace in NG4 memcpy. sparc64: Fix comment type in NG4 copy from user. sparc64: Add SPARC-T4 optimized memcpy. drivers/sbus/char: removes unnecessary semicolon arch/sparc/kernel/pci_sun4v.c: removes unnecessary semicolon sparc64: Fix function argument comment in camellia_sparc64_key_expand asm. sparc64: Fix IV handling bug in des_sparc64_cbc_decrypt sparc64: Add auto-loading mechanism to crypto-opcode drivers. sparc64: Add missing pr_fmt define to crypto opcode drivers. sparc64: Adjust crypto priorities. sparc64: Use cpu_pgsz_mask for linear kernel mapping config. sparc64: Probe cpu page size support more portably. sparc64: Support 2GB and 16GB page sizes for kernel linear mappings. sparc64: Fix bugs in unrolled 256-bit loops. sparc64: Avoid code duplication in crypto assembler. sparc64: Unroll CTR crypt loops in AES driver. sparc64: Unroll ECB decryption loops in AES driver. sparc64: Unroll ECB encryption loops in AES driver. sparc64: Add ctr mode support to AES driver. sparc64: Move AES driver over to a methods based implementation. ...
Diffstat (limited to 'arch/sparc/kernel')
-rw-r--r--arch/sparc/kernel/head_64.S14
-rw-r--r--arch/sparc/kernel/hvapi.c1
-rw-r--r--arch/sparc/kernel/hvcalls.S16
-rw-r--r--arch/sparc/kernel/ktlb.S25
-rw-r--r--arch/sparc/kernel/mdesc.c24
-rw-r--r--arch/sparc/kernel/nmi.c21
-rw-r--r--arch/sparc/kernel/pci_sun4v.c2
-rw-r--r--arch/sparc/kernel/pcr.c172
-rw-r--r--arch/sparc/kernel/perf_event.c516
-rw-r--r--arch/sparc/kernel/setup_64.c67
10 files changed, 690 insertions, 168 deletions
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index b42ddbf9651e..ee5dcced2499 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -559,10 +559,10 @@ niagara_tlb_fixup:
559 be,pt %xcc, niagara2_patch 559 be,pt %xcc, niagara2_patch
560 nop 560 nop
561 cmp %g1, SUN4V_CHIP_NIAGARA4 561 cmp %g1, SUN4V_CHIP_NIAGARA4
562 be,pt %xcc, niagara2_patch 562 be,pt %xcc, niagara4_patch
563 nop 563 nop
564 cmp %g1, SUN4V_CHIP_NIAGARA5 564 cmp %g1, SUN4V_CHIP_NIAGARA5
565 be,pt %xcc, niagara2_patch 565 be,pt %xcc, niagara4_patch
566 nop 566 nop
567 567
568 call generic_patch_copyops 568 call generic_patch_copyops
@@ -573,6 +573,16 @@ niagara_tlb_fixup:
573 nop 573 nop
574 574
575 ba,a,pt %xcc, 80f 575 ba,a,pt %xcc, 80f
576niagara4_patch:
577 call niagara4_patch_copyops
578 nop
579 call niagara_patch_bzero
580 nop
581 call niagara4_patch_pageops
582 nop
583
584 ba,a,pt %xcc, 80f
585
576niagara2_patch: 586niagara2_patch:
577 call niagara2_patch_copyops 587 call niagara2_patch_copyops
578 nop 588 nop
diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c
index 8593672838fd..1032df43ec95 100644
--- a/arch/sparc/kernel/hvapi.c
+++ b/arch/sparc/kernel/hvapi.c
@@ -45,6 +45,7 @@ static struct api_info api_table[] = {
45 { .group = HV_GRP_NIU, }, 45 { .group = HV_GRP_NIU, },
46 { .group = HV_GRP_VF_CPU, }, 46 { .group = HV_GRP_VF_CPU, },
47 { .group = HV_GRP_KT_CPU, }, 47 { .group = HV_GRP_KT_CPU, },
48 { .group = HV_GRP_VT_CPU, },
48 { .group = HV_GRP_DIAG, .flags = FLAG_PRE_API }, 49 { .group = HV_GRP_DIAG, .flags = FLAG_PRE_API },
49}; 50};
50 51
diff --git a/arch/sparc/kernel/hvcalls.S b/arch/sparc/kernel/hvcalls.S
index 58d60de4d65b..f3ab509b76a8 100644
--- a/arch/sparc/kernel/hvcalls.S
+++ b/arch/sparc/kernel/hvcalls.S
@@ -805,3 +805,19 @@ ENTRY(sun4v_reboot_data_set)
805 retl 805 retl
806 nop 806 nop
807ENDPROC(sun4v_reboot_data_set) 807ENDPROC(sun4v_reboot_data_set)
808
809ENTRY(sun4v_vt_get_perfreg)
810 mov %o1, %o4
811 mov HV_FAST_VT_GET_PERFREG, %o5
812 ta HV_FAST_TRAP
813 stx %o1, [%o4]
814 retl
815 nop
816ENDPROC(sun4v_vt_get_perfreg)
817
818ENTRY(sun4v_vt_set_perfreg)
819 mov HV_FAST_VT_SET_PERFREG, %o5
820 ta HV_FAST_TRAP
821 retl
822 nop
823ENDPROC(sun4v_vt_set_perfreg)
diff --git a/arch/sparc/kernel/ktlb.S b/arch/sparc/kernel/ktlb.S
index 79f310364849..0746e5e32b37 100644
--- a/arch/sparc/kernel/ktlb.S
+++ b/arch/sparc/kernel/ktlb.S
@@ -188,31 +188,26 @@ valid_addr_bitmap_patch:
188 be,pn %xcc, kvmap_dtlb_longpath 188 be,pn %xcc, kvmap_dtlb_longpath
189 189
1902: sethi %hi(kpte_linear_bitmap), %g2 1902: sethi %hi(kpte_linear_bitmap), %g2
191 or %g2, %lo(kpte_linear_bitmap), %g2
192 191
193 /* Get the 256MB physical address index. */ 192 /* Get the 256MB physical address index. */
194 sllx %g4, 21, %g5 193 sllx %g4, 21, %g5
195 mov 1, %g7 194 or %g2, %lo(kpte_linear_bitmap), %g2
196 srlx %g5, 21 + 28, %g5 195 srlx %g5, 21 + 28, %g5
196 and %g5, (32 - 1), %g7
197 197
198 /* Don't try this at home kids... this depends upon srlx 198 /* Divide by 32 to get the offset into the bitmask. */
199 * only taking the low 6 bits of the shift count in %g5. 199 srlx %g5, 5, %g5
200 */ 200 add %g7, %g7, %g7
201 sllx %g7, %g5, %g7
202
203 /* Divide by 64 to get the offset into the bitmask. */
204 srlx %g5, 6, %g5
205 sllx %g5, 3, %g5 201 sllx %g5, 3, %g5
206 202
207 /* kern_linear_pte_xor[((mask & bit) ? 1 : 0)] */ 203 /* kern_linear_pte_xor[(mask >> shift) & 3)] */
208 ldx [%g2 + %g5], %g2 204 ldx [%g2 + %g5], %g2
209 andcc %g2, %g7, %g0 205 srlx %g2, %g7, %g7
210 sethi %hi(kern_linear_pte_xor), %g5 206 sethi %hi(kern_linear_pte_xor), %g5
207 and %g7, 3, %g7
211 or %g5, %lo(kern_linear_pte_xor), %g5 208 or %g5, %lo(kern_linear_pte_xor), %g5
212 bne,a,pt %xcc, 1f 209 sllx %g7, 3, %g7
213 add %g5, 8, %g5 210 ldx [%g5 + %g7], %g2
214
2151: ldx [%g5], %g2
216 211
217 .globl kvmap_linear_patch 212 .globl kvmap_linear_patch
218kvmap_linear_patch: 213kvmap_linear_patch:
diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c
index 6dc796280589..831c001604e8 100644
--- a/arch/sparc/kernel/mdesc.c
+++ b/arch/sparc/kernel/mdesc.c
@@ -817,6 +817,30 @@ void __cpuinit mdesc_populate_present_mask(cpumask_t *mask)
817 mdesc_iterate_over_cpus(record_one_cpu, NULL, mask); 817 mdesc_iterate_over_cpus(record_one_cpu, NULL, mask);
818} 818}
819 819
820static void * __init check_one_pgsz(struct mdesc_handle *hp, u64 mp, int cpuid, void *arg)
821{
822 const u64 *pgsz_prop = mdesc_get_property(hp, mp, "mmu-page-size-list", NULL);
823 unsigned long *pgsz_mask = arg;
824 u64 val;
825
826 val = (HV_PGSZ_MASK_8K | HV_PGSZ_MASK_64K |
827 HV_PGSZ_MASK_512K | HV_PGSZ_MASK_4MB);
828 if (pgsz_prop)
829 val = *pgsz_prop;
830
831 if (!*pgsz_mask)
832 *pgsz_mask = val;
833 else
834 *pgsz_mask &= val;
835 return NULL;
836}
837
838void __init mdesc_get_page_sizes(cpumask_t *mask, unsigned long *pgsz_mask)
839{
840 *pgsz_mask = 0;
841 mdesc_iterate_over_cpus(check_one_pgsz, pgsz_mask, mask);
842}
843
820static void * __cpuinit fill_in_one_cpu(struct mdesc_handle *hp, u64 mp, int cpuid, void *arg) 844static void * __cpuinit fill_in_one_cpu(struct mdesc_handle *hp, u64 mp, int cpuid, void *arg)
821{ 845{
822 const u64 *cfreq = mdesc_get_property(hp, mp, "clock-frequency", NULL); 846 const u64 *cfreq = mdesc_get_property(hp, mp, "clock-frequency", NULL);
diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c
index eb1c1f010a47..6479256fd5a4 100644
--- a/arch/sparc/kernel/nmi.c
+++ b/arch/sparc/kernel/nmi.c
@@ -22,7 +22,6 @@
22#include <asm/perf_event.h> 22#include <asm/perf_event.h>
23#include <asm/ptrace.h> 23#include <asm/ptrace.h>
24#include <asm/pcr.h> 24#include <asm/pcr.h>
25#include <asm/perfctr.h>
26 25
27#include "kstack.h" 26#include "kstack.h"
28 27
@@ -109,7 +108,7 @@ notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs)
109 pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP) 108 pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP)
110 touched = 1; 109 touched = 1;
111 else 110 else
112 pcr_ops->write(PCR_PIC_PRIV); 111 pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);
113 112
114 sum = local_cpu_data().irq0_irqs; 113 sum = local_cpu_data().irq0_irqs;
115 if (__get_cpu_var(nmi_touch)) { 114 if (__get_cpu_var(nmi_touch)) {
@@ -126,8 +125,8 @@ notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs)
126 __this_cpu_write(alert_counter, 0); 125 __this_cpu_write(alert_counter, 0);
127 } 126 }
128 if (__get_cpu_var(wd_enabled)) { 127 if (__get_cpu_var(wd_enabled)) {
129 write_pic(picl_value(nmi_hz)); 128 pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz));
130 pcr_ops->write(pcr_enable); 129 pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable);
131 } 130 }
132 131
133 restore_hardirq_stack(orig_sp); 132 restore_hardirq_stack(orig_sp);
@@ -166,7 +165,7 @@ static void report_broken_nmi(int cpu, int *prev_nmi_count)
166 165
167void stop_nmi_watchdog(void *unused) 166void stop_nmi_watchdog(void *unused)
168{ 167{
169 pcr_ops->write(PCR_PIC_PRIV); 168 pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);
170 __get_cpu_var(wd_enabled) = 0; 169 __get_cpu_var(wd_enabled) = 0;
171 atomic_dec(&nmi_active); 170 atomic_dec(&nmi_active);
172} 171}
@@ -223,10 +222,10 @@ void start_nmi_watchdog(void *unused)
223 __get_cpu_var(wd_enabled) = 1; 222 __get_cpu_var(wd_enabled) = 1;
224 atomic_inc(&nmi_active); 223 atomic_inc(&nmi_active);
225 224
226 pcr_ops->write(PCR_PIC_PRIV); 225 pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);
227 write_pic(picl_value(nmi_hz)); 226 pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz));
228 227
229 pcr_ops->write(pcr_enable); 228 pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable);
230} 229}
231 230
232static void nmi_adjust_hz_one(void *unused) 231static void nmi_adjust_hz_one(void *unused)
@@ -234,10 +233,10 @@ static void nmi_adjust_hz_one(void *unused)
234 if (!__get_cpu_var(wd_enabled)) 233 if (!__get_cpu_var(wd_enabled))
235 return; 234 return;
236 235
237 pcr_ops->write(PCR_PIC_PRIV); 236 pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);
238 write_pic(picl_value(nmi_hz)); 237 pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz));
239 238
240 pcr_ops->write(pcr_enable); 239 pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable);
241} 240}
242 241
243void nmi_adjust_hz(unsigned int new_hz) 242void nmi_adjust_hz(unsigned int new_hz)
diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
index 7661e84a05a0..051b69caeffd 100644
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -594,7 +594,7 @@ static int __devinit pci_sun4v_iommu_init(struct pci_pbm_info *pbm)
594 printk(KERN_ERR PFX "Strange virtual-dma[%08x:%08x].\n", 594 printk(KERN_ERR PFX "Strange virtual-dma[%08x:%08x].\n",
595 vdma[0], vdma[1]); 595 vdma[0], vdma[1]);
596 return -EINVAL; 596 return -EINVAL;
597 }; 597 }
598 598
599 dma_mask = (roundup_pow_of_two(vdma[1]) - 1UL); 599 dma_mask = (roundup_pow_of_two(vdma[1]) - 1UL);
600 num_tsb_entries = vdma[1] / IO_PAGE_SIZE; 600 num_tsb_entries = vdma[1] / IO_PAGE_SIZE;
diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c
index 0ce0dd2332aa..269af58497aa 100644
--- a/arch/sparc/kernel/pcr.c
+++ b/arch/sparc/kernel/pcr.c
@@ -13,23 +13,14 @@
13#include <asm/pil.h> 13#include <asm/pil.h>
14#include <asm/pcr.h> 14#include <asm/pcr.h>
15#include <asm/nmi.h> 15#include <asm/nmi.h>
16#include <asm/asi.h>
16#include <asm/spitfire.h> 17#include <asm/spitfire.h>
17#include <asm/perfctr.h>
18 18
19/* This code is shared between various users of the performance 19/* This code is shared between various users of the performance
20 * counters. Users will be oprofile, pseudo-NMI watchdog, and the 20 * counters. Users will be oprofile, pseudo-NMI watchdog, and the
21 * perf_event support layer. 21 * perf_event support layer.
22 */ 22 */
23 23
24#define PCR_SUN4U_ENABLE (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE)
25#define PCR_N2_ENABLE (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE | \
26 PCR_N2_TOE_OV1 | \
27 (2 << PCR_N2_SL1_SHIFT) | \
28 (0xff << PCR_N2_MASK1_SHIFT))
29
30u64 pcr_enable;
31unsigned int picl_shift;
32
33/* Performance counter interrupts run unmasked at PIL level 15. 24/* Performance counter interrupts run unmasked at PIL level 15.
34 * Therefore we can't do things like wakeups and other work 25 * Therefore we can't do things like wakeups and other work
35 * that expects IRQ disabling to be adhered to in locking etc. 26 * that expects IRQ disabling to be adhered to in locking etc.
@@ -60,39 +51,144 @@ void arch_irq_work_raise(void)
60const struct pcr_ops *pcr_ops; 51const struct pcr_ops *pcr_ops;
61EXPORT_SYMBOL_GPL(pcr_ops); 52EXPORT_SYMBOL_GPL(pcr_ops);
62 53
63static u64 direct_pcr_read(void) 54static u64 direct_pcr_read(unsigned long reg_num)
64{ 55{
65 u64 val; 56 u64 val;
66 57
67 read_pcr(val); 58 WARN_ON_ONCE(reg_num != 0);
59 __asm__ __volatile__("rd %%pcr, %0" : "=r" (val));
68 return val; 60 return val;
69} 61}
70 62
71static void direct_pcr_write(u64 val) 63static void direct_pcr_write(unsigned long reg_num, u64 val)
64{
65 WARN_ON_ONCE(reg_num != 0);
66 __asm__ __volatile__("wr %0, 0x0, %%pcr" : : "r" (val));
67}
68
69static u64 direct_pic_read(unsigned long reg_num)
72{ 70{
73 write_pcr(val); 71 u64 val;
72
73 WARN_ON_ONCE(reg_num != 0);
74 __asm__ __volatile__("rd %%pic, %0" : "=r" (val));
75 return val;
76}
77
78static void direct_pic_write(unsigned long reg_num, u64 val)
79{
80 WARN_ON_ONCE(reg_num != 0);
81
82 /* Blackbird errata workaround. See commentary in
83 * arch/sparc64/kernel/smp.c:smp_percpu_timer_interrupt()
84 * for more information.
85 */
86 __asm__ __volatile__("ba,pt %%xcc, 99f\n\t"
87 " nop\n\t"
88 ".align 64\n"
89 "99:wr %0, 0x0, %%pic\n\t"
90 "rd %%pic, %%g0" : : "r" (val));
91}
92
93static u64 direct_picl_value(unsigned int nmi_hz)
94{
95 u32 delta = local_cpu_data().clock_tick / nmi_hz;
96
97 return ((u64)((0 - delta) & 0xffffffff)) << 32;
74} 98}
75 99
76static const struct pcr_ops direct_pcr_ops = { 100static const struct pcr_ops direct_pcr_ops = {
77 .read = direct_pcr_read, 101 .read_pcr = direct_pcr_read,
78 .write = direct_pcr_write, 102 .write_pcr = direct_pcr_write,
103 .read_pic = direct_pic_read,
104 .write_pic = direct_pic_write,
105 .nmi_picl_value = direct_picl_value,
106 .pcr_nmi_enable = (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE),
107 .pcr_nmi_disable = PCR_PIC_PRIV,
79}; 108};
80 109
81static void n2_pcr_write(u64 val) 110static void n2_pcr_write(unsigned long reg_num, u64 val)
82{ 111{
83 unsigned long ret; 112 unsigned long ret;
84 113
114 WARN_ON_ONCE(reg_num != 0);
85 if (val & PCR_N2_HTRACE) { 115 if (val & PCR_N2_HTRACE) {
86 ret = sun4v_niagara2_setperf(HV_N2_PERF_SPARC_CTL, val); 116 ret = sun4v_niagara2_setperf(HV_N2_PERF_SPARC_CTL, val);
87 if (ret != HV_EOK) 117 if (ret != HV_EOK)
88 write_pcr(val); 118 direct_pcr_write(reg_num, val);
89 } else 119 } else
90 write_pcr(val); 120 direct_pcr_write(reg_num, val);
121}
122
123static u64 n2_picl_value(unsigned int nmi_hz)
124{
125 u32 delta = local_cpu_data().clock_tick / (nmi_hz << 2);
126
127 return ((u64)((0 - delta) & 0xffffffff)) << 32;
91} 128}
92 129
93static const struct pcr_ops n2_pcr_ops = { 130static const struct pcr_ops n2_pcr_ops = {
94 .read = direct_pcr_read, 131 .read_pcr = direct_pcr_read,
95 .write = n2_pcr_write, 132 .write_pcr = n2_pcr_write,
133 .read_pic = direct_pic_read,
134 .write_pic = direct_pic_write,
135 .nmi_picl_value = n2_picl_value,
136 .pcr_nmi_enable = (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE |
137 PCR_N2_TOE_OV1 |
138 (2 << PCR_N2_SL1_SHIFT) |
139 (0xff << PCR_N2_MASK1_SHIFT)),
140 .pcr_nmi_disable = PCR_PIC_PRIV,
141};
142
143static u64 n4_pcr_read(unsigned long reg_num)
144{
145 unsigned long val;
146
147 (void) sun4v_vt_get_perfreg(reg_num, &val);
148
149 return val;
150}
151
152static void n4_pcr_write(unsigned long reg_num, u64 val)
153{
154 (void) sun4v_vt_set_perfreg(reg_num, val);
155}
156
157static u64 n4_pic_read(unsigned long reg_num)
158{
159 unsigned long val;
160
161 __asm__ __volatile__("ldxa [%1] %2, %0"
162 : "=r" (val)
163 : "r" (reg_num * 0x8UL), "i" (ASI_PIC));
164
165 return val;
166}
167
168static void n4_pic_write(unsigned long reg_num, u64 val)
169{
170 __asm__ __volatile__("stxa %0, [%1] %2"
171 : /* no outputs */
172 : "r" (val), "r" (reg_num * 0x8UL), "i" (ASI_PIC));
173}
174
175static u64 n4_picl_value(unsigned int nmi_hz)
176{
177 u32 delta = local_cpu_data().clock_tick / (nmi_hz << 2);
178
179 return ((u64)((0 - delta) & 0xffffffff));
180}
181
182static const struct pcr_ops n4_pcr_ops = {
183 .read_pcr = n4_pcr_read,
184 .write_pcr = n4_pcr_write,
185 .read_pic = n4_pic_read,
186 .write_pic = n4_pic_write,
187 .nmi_picl_value = n4_picl_value,
188 .pcr_nmi_enable = (PCR_N4_PICNPT | PCR_N4_STRACE |
189 PCR_N4_UTRACE | PCR_N4_TOE |
190 (26 << PCR_N4_SL_SHIFT)),
191 .pcr_nmi_disable = PCR_N4_PICNPT,
96}; 192};
97 193
98static unsigned long perf_hsvc_group; 194static unsigned long perf_hsvc_group;
@@ -115,6 +211,10 @@ static int __init register_perf_hsvc(void)
115 perf_hsvc_group = HV_GRP_KT_CPU; 211 perf_hsvc_group = HV_GRP_KT_CPU;
116 break; 212 break;
117 213
214 case SUN4V_CHIP_NIAGARA4:
215 perf_hsvc_group = HV_GRP_VT_CPU;
216 break;
217
118 default: 218 default:
119 return -ENODEV; 219 return -ENODEV;
120 } 220 }
@@ -139,6 +239,29 @@ static void __init unregister_perf_hsvc(void)
139 sun4v_hvapi_unregister(perf_hsvc_group); 239 sun4v_hvapi_unregister(perf_hsvc_group);
140} 240}
141 241
242static int __init setup_sun4v_pcr_ops(void)
243{
244 int ret = 0;
245
246 switch (sun4v_chip_type) {
247 case SUN4V_CHIP_NIAGARA1:
248 case SUN4V_CHIP_NIAGARA2:
249 case SUN4V_CHIP_NIAGARA3:
250 pcr_ops = &n2_pcr_ops;
251 break;
252
253 case SUN4V_CHIP_NIAGARA4:
254 pcr_ops = &n4_pcr_ops;
255 break;
256
257 default:
258 ret = -ENODEV;
259 break;
260 }
261
262 return ret;
263}
264
142int __init pcr_arch_init(void) 265int __init pcr_arch_init(void)
143{ 266{
144 int err = register_perf_hsvc(); 267 int err = register_perf_hsvc();
@@ -148,15 +271,14 @@ int __init pcr_arch_init(void)
148 271
149 switch (tlb_type) { 272 switch (tlb_type) {
150 case hypervisor: 273 case hypervisor:
151 pcr_ops = &n2_pcr_ops; 274 err = setup_sun4v_pcr_ops();
152 pcr_enable = PCR_N2_ENABLE; 275 if (err)
153 picl_shift = 2; 276 goto out_unregister;
154 break; 277 break;
155 278
156 case cheetah: 279 case cheetah:
157 case cheetah_plus: 280 case cheetah_plus:
158 pcr_ops = &direct_pcr_ops; 281 pcr_ops = &direct_pcr_ops;
159 pcr_enable = PCR_SUN4U_ENABLE;
160 break; 282 break;
161 283
162 case spitfire: 284 case spitfire:
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 5713957dcb8a..e48651dace1b 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -25,36 +25,48 @@
25#include <linux/atomic.h> 25#include <linux/atomic.h>
26#include <asm/nmi.h> 26#include <asm/nmi.h>
27#include <asm/pcr.h> 27#include <asm/pcr.h>
28#include <asm/perfctr.h>
29#include <asm/cacheflush.h> 28#include <asm/cacheflush.h>
30 29
31#include "kernel.h" 30#include "kernel.h"
32#include "kstack.h" 31#include "kstack.h"
33 32
34/* Sparc64 chips have two performance counters, 32-bits each, with 33/* Two classes of sparc64 chips currently exist. All of which have
35 * overflow interrupts generated on transition from 0xffffffff to 0. 34 * 32-bit counters which can generate overflow interrupts on the
36 * The counters are accessed in one go using a 64-bit register. 35 * transition from 0xffffffff to 0.
37 * 36 *
38 * Both counters are controlled using a single control register. The 37 * All chips upto and including SPARC-T3 have two performance
39 * only way to stop all sampling is to clear all of the context (user, 38 * counters. The two 32-bit counters are accessed in one go using a
40 * supervisor, hypervisor) sampling enable bits. But these bits apply 39 * single 64-bit register.
41 * to both counters, thus the two counters can't be enabled/disabled
42 * individually.
43 * 40 *
44 * The control register has two event fields, one for each of the two 41 * On these older chips both counters are controlled using a single
45 * counters. It's thus nearly impossible to have one counter going 42 * control register. The only way to stop all sampling is to clear
46 * while keeping the other one stopped. Therefore it is possible to 43 * all of the context (user, supervisor, hypervisor) sampling enable
47 * get overflow interrupts for counters not currently "in use" and 44 * bits. But these bits apply to both counters, thus the two counters
48 * that condition must be checked in the overflow interrupt handler. 45 * can't be enabled/disabled individually.
46 *
47 * Furthermore, the control register on these older chips have two
48 * event fields, one for each of the two counters. It's thus nearly
49 * impossible to have one counter going while keeping the other one
50 * stopped. Therefore it is possible to get overflow interrupts for
51 * counters not currently "in use" and that condition must be checked
52 * in the overflow interrupt handler.
49 * 53 *
50 * So we use a hack, in that we program inactive counters with the 54 * So we use a hack, in that we program inactive counters with the
51 * "sw_count0" and "sw_count1" events. These count how many times 55 * "sw_count0" and "sw_count1" events. These count how many times
52 * the instruction "sethi %hi(0xfc000), %g0" is executed. It's an 56 * the instruction "sethi %hi(0xfc000), %g0" is executed. It's an
53 * unusual way to encode a NOP and therefore will not trigger in 57 * unusual way to encode a NOP and therefore will not trigger in
54 * normal code. 58 * normal code.
59 *
60 * Starting with SPARC-T4 we have one control register per counter.
61 * And the counters are stored in individual registers. The registers
62 * for the counters are 64-bit but only a 32-bit counter is
63 * implemented. The event selections on SPARC-T4 lack any
64 * restrictions, therefore we can elide all of the complicated
65 * conflict resolution code we have for SPARC-T3 and earlier chips.
55 */ 66 */
56 67
57#define MAX_HWEVENTS 2 68#define MAX_HWEVENTS 4
69#define MAX_PCRS 4
58#define MAX_PERIOD ((1UL << 32) - 1) 70#define MAX_PERIOD ((1UL << 32) - 1)
59 71
60#define PIC_UPPER_INDEX 0 72#define PIC_UPPER_INDEX 0
@@ -90,8 +102,8 @@ struct cpu_hw_events {
90 */ 102 */
91 int current_idx[MAX_HWEVENTS]; 103 int current_idx[MAX_HWEVENTS];
92 104
93 /* Software copy of %pcr register on this cpu. */ 105 /* Software copy of %pcr register(s) on this cpu. */
94 u64 pcr; 106 u64 pcr[MAX_HWEVENTS];
95 107
96 /* Enabled/disable state. */ 108 /* Enabled/disable state. */
97 int enabled; 109 int enabled;
@@ -103,6 +115,8 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, };
103/* An event map describes the characteristics of a performance 115/* An event map describes the characteristics of a performance
104 * counter event. In particular it gives the encoding as well as 116 * counter event. In particular it gives the encoding as well as
105 * a mask telling which counters the event can be measured on. 117 * a mask telling which counters the event can be measured on.
118 *
119 * The mask is unused on SPARC-T4 and later.
106 */ 120 */
107struct perf_event_map { 121struct perf_event_map {
108 u16 encoding; 122 u16 encoding;
@@ -142,15 +156,53 @@ struct sparc_pmu {
142 const struct perf_event_map *(*event_map)(int); 156 const struct perf_event_map *(*event_map)(int);
143 const cache_map_t *cache_map; 157 const cache_map_t *cache_map;
144 int max_events; 158 int max_events;
159 u32 (*read_pmc)(int);
160 void (*write_pmc)(int, u64);
145 int upper_shift; 161 int upper_shift;
146 int lower_shift; 162 int lower_shift;
147 int event_mask; 163 int event_mask;
164 int user_bit;
165 int priv_bit;
148 int hv_bit; 166 int hv_bit;
149 int irq_bit; 167 int irq_bit;
150 int upper_nop; 168 int upper_nop;
151 int lower_nop; 169 int lower_nop;
170 unsigned int flags;
171#define SPARC_PMU_ALL_EXCLUDES_SAME 0x00000001
172#define SPARC_PMU_HAS_CONFLICTS 0x00000002
173 int max_hw_events;
174 int num_pcrs;
175 int num_pic_regs;
152}; 176};
153 177
178static u32 sparc_default_read_pmc(int idx)
179{
180 u64 val;
181
182 val = pcr_ops->read_pic(0);
183 if (idx == PIC_UPPER_INDEX)
184 val >>= 32;
185
186 return val & 0xffffffff;
187}
188
189static void sparc_default_write_pmc(int idx, u64 val)
190{
191 u64 shift, mask, pic;
192
193 shift = 0;
194 if (idx == PIC_UPPER_INDEX)
195 shift = 32;
196
197 mask = ((u64) 0xffffffff) << shift;
198 val <<= shift;
199
200 pic = pcr_ops->read_pic(0);
201 pic &= ~mask;
202 pic |= val;
203 pcr_ops->write_pic(0, pic);
204}
205
154static const struct perf_event_map ultra3_perfmon_event_map[] = { 206static const struct perf_event_map ultra3_perfmon_event_map[] = {
155 [PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER }, 207 [PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER },
156 [PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER }, 208 [PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER },
@@ -268,11 +320,20 @@ static const struct sparc_pmu ultra3_pmu = {
268 .event_map = ultra3_event_map, 320 .event_map = ultra3_event_map,
269 .cache_map = &ultra3_cache_map, 321 .cache_map = &ultra3_cache_map,
270 .max_events = ARRAY_SIZE(ultra3_perfmon_event_map), 322 .max_events = ARRAY_SIZE(ultra3_perfmon_event_map),
323 .read_pmc = sparc_default_read_pmc,
324 .write_pmc = sparc_default_write_pmc,
271 .upper_shift = 11, 325 .upper_shift = 11,
272 .lower_shift = 4, 326 .lower_shift = 4,
273 .event_mask = 0x3f, 327 .event_mask = 0x3f,
328 .user_bit = PCR_UTRACE,
329 .priv_bit = PCR_STRACE,
274 .upper_nop = 0x1c, 330 .upper_nop = 0x1c,
275 .lower_nop = 0x14, 331 .lower_nop = 0x14,
332 .flags = (SPARC_PMU_ALL_EXCLUDES_SAME |
333 SPARC_PMU_HAS_CONFLICTS),
334 .max_hw_events = 2,
335 .num_pcrs = 1,
336 .num_pic_regs = 1,
276}; 337};
277 338
278/* Niagara1 is very limited. The upper PIC is hard-locked to count 339/* Niagara1 is very limited. The upper PIC is hard-locked to count
@@ -397,11 +458,20 @@ static const struct sparc_pmu niagara1_pmu = {
397 .event_map = niagara1_event_map, 458 .event_map = niagara1_event_map,
398 .cache_map = &niagara1_cache_map, 459 .cache_map = &niagara1_cache_map,
399 .max_events = ARRAY_SIZE(niagara1_perfmon_event_map), 460 .max_events = ARRAY_SIZE(niagara1_perfmon_event_map),
461 .read_pmc = sparc_default_read_pmc,
462 .write_pmc = sparc_default_write_pmc,
400 .upper_shift = 0, 463 .upper_shift = 0,
401 .lower_shift = 4, 464 .lower_shift = 4,
402 .event_mask = 0x7, 465 .event_mask = 0x7,
466 .user_bit = PCR_UTRACE,
467 .priv_bit = PCR_STRACE,
403 .upper_nop = 0x0, 468 .upper_nop = 0x0,
404 .lower_nop = 0x0, 469 .lower_nop = 0x0,
470 .flags = (SPARC_PMU_ALL_EXCLUDES_SAME |
471 SPARC_PMU_HAS_CONFLICTS),
472 .max_hw_events = 2,
473 .num_pcrs = 1,
474 .num_pic_regs = 1,
405}; 475};
406 476
407static const struct perf_event_map niagara2_perfmon_event_map[] = { 477static const struct perf_event_map niagara2_perfmon_event_map[] = {
@@ -523,13 +593,203 @@ static const struct sparc_pmu niagara2_pmu = {
523 .event_map = niagara2_event_map, 593 .event_map = niagara2_event_map,
524 .cache_map = &niagara2_cache_map, 594 .cache_map = &niagara2_cache_map,
525 .max_events = ARRAY_SIZE(niagara2_perfmon_event_map), 595 .max_events = ARRAY_SIZE(niagara2_perfmon_event_map),
596 .read_pmc = sparc_default_read_pmc,
597 .write_pmc = sparc_default_write_pmc,
526 .upper_shift = 19, 598 .upper_shift = 19,
527 .lower_shift = 6, 599 .lower_shift = 6,
528 .event_mask = 0xfff, 600 .event_mask = 0xfff,
529 .hv_bit = 0x8, 601 .user_bit = PCR_UTRACE,
602 .priv_bit = PCR_STRACE,
603 .hv_bit = PCR_N2_HTRACE,
530 .irq_bit = 0x30, 604 .irq_bit = 0x30,
531 .upper_nop = 0x220, 605 .upper_nop = 0x220,
532 .lower_nop = 0x220, 606 .lower_nop = 0x220,
607 .flags = (SPARC_PMU_ALL_EXCLUDES_SAME |
608 SPARC_PMU_HAS_CONFLICTS),
609 .max_hw_events = 2,
610 .num_pcrs = 1,
611 .num_pic_regs = 1,
612};
613
614static const struct perf_event_map niagara4_perfmon_event_map[] = {
615 [PERF_COUNT_HW_CPU_CYCLES] = { (26 << 6) },
616 [PERF_COUNT_HW_INSTRUCTIONS] = { (3 << 6) | 0x3f },
617 [PERF_COUNT_HW_CACHE_REFERENCES] = { (3 << 6) | 0x04 },
618 [PERF_COUNT_HW_CACHE_MISSES] = { (16 << 6) | 0x07 },
619 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { (4 << 6) | 0x01 },
620 [PERF_COUNT_HW_BRANCH_MISSES] = { (25 << 6) | 0x0f },
621};
622
623static const struct perf_event_map *niagara4_event_map(int event_id)
624{
625 return &niagara4_perfmon_event_map[event_id];
626}
627
628static const cache_map_t niagara4_cache_map = {
629[C(L1D)] = {
630 [C(OP_READ)] = {
631 [C(RESULT_ACCESS)] = { (3 << 6) | 0x04 },
632 [C(RESULT_MISS)] = { (16 << 6) | 0x07 },
633 },
634 [C(OP_WRITE)] = {
635 [C(RESULT_ACCESS)] = { (3 << 6) | 0x08 },
636 [C(RESULT_MISS)] = { (16 << 6) | 0x07 },
637 },
638 [C(OP_PREFETCH)] = {
639 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
640 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
641 },
642},
643[C(L1I)] = {
644 [C(OP_READ)] = {
645 [C(RESULT_ACCESS)] = { (3 << 6) | 0x3f },
646 [C(RESULT_MISS)] = { (11 << 6) | 0x03 },
647 },
648 [ C(OP_WRITE) ] = {
649 [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE },
650 [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE },
651 },
652 [ C(OP_PREFETCH) ] = {
653 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
654 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
655 },
656},
657[C(LL)] = {
658 [C(OP_READ)] = {
659 [C(RESULT_ACCESS)] = { (3 << 6) | 0x04 },
660 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
661 },
662 [C(OP_WRITE)] = {
663 [C(RESULT_ACCESS)] = { (3 << 6) | 0x08 },
664 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
665 },
666 [C(OP_PREFETCH)] = {
667 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
668 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
669 },
670},
671[C(DTLB)] = {
672 [C(OP_READ)] = {
673 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
674 [C(RESULT_MISS)] = { (17 << 6) | 0x3f },
675 },
676 [ C(OP_WRITE) ] = {
677 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
678 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
679 },
680 [ C(OP_PREFETCH) ] = {
681 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
682 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
683 },
684},
685[C(ITLB)] = {
686 [C(OP_READ)] = {
687 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
688 [C(RESULT_MISS)] = { (6 << 6) | 0x3f },
689 },
690 [ C(OP_WRITE) ] = {
691 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
692 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
693 },
694 [ C(OP_PREFETCH) ] = {
695 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
696 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
697 },
698},
699[C(BPU)] = {
700 [C(OP_READ)] = {
701 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
702 [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
703 },
704 [ C(OP_WRITE) ] = {
705 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
706 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
707 },
708 [ C(OP_PREFETCH) ] = {
709 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
710 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
711 },
712},
713[C(NODE)] = {
714 [C(OP_READ)] = {
715 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
716 [C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
717 },
718 [ C(OP_WRITE) ] = {
719 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
720 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
721 },
722 [ C(OP_PREFETCH) ] = {
723 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
724 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
725 },
726},
727};
728
729static u32 sparc_vt_read_pmc(int idx)
730{
731 u64 val = pcr_ops->read_pic(idx);
732
733 return val & 0xffffffff;
734}
735
736static void sparc_vt_write_pmc(int idx, u64 val)
737{
738 u64 pcr;
739
740 /* There seems to be an internal latch on the overflow event
741 * on SPARC-T4 that prevents it from triggering unless you
742 * update the PIC exactly as we do here. The requirement
743 * seems to be that you have to turn off event counting in the
744 * PCR around the PIC update.
745 *
746 * For example, after the following sequence:
747 *
748 * 1) set PIC to -1
749 * 2) enable event counting and overflow reporting in PCR
750 * 3) overflow triggers, softint 15 handler invoked
751 * 4) clear OV bit in PCR
752 * 5) write PIC to -1
753 *
754 * a subsequent overflow event will not trigger. This
755 * sequence works on SPARC-T3 and previous chips.
756 */
757 pcr = pcr_ops->read_pcr(idx);
758 pcr_ops->write_pcr(idx, PCR_N4_PICNPT);
759
760 pcr_ops->write_pic(idx, val & 0xffffffff);
761
762 pcr_ops->write_pcr(idx, pcr);
763}
764
765static const struct sparc_pmu niagara4_pmu = {
766 .event_map = niagara4_event_map,
767 .cache_map = &niagara4_cache_map,
768 .max_events = ARRAY_SIZE(niagara4_perfmon_event_map),
769 .read_pmc = sparc_vt_read_pmc,
770 .write_pmc = sparc_vt_write_pmc,
771 .upper_shift = 5,
772 .lower_shift = 5,
773 .event_mask = 0x7ff,
774 .user_bit = PCR_N4_UTRACE,
775 .priv_bit = PCR_N4_STRACE,
776
777 /* We explicitly don't support hypervisor tracing. The T4
778 * generates the overflow event for precise events via a trap
779 * which will not be generated (ie. it's completely lost) if
780 * we happen to be in the hypervisor when the event triggers.
781 * Essentially, the overflow event reporting is completely
782 * unusable when you have hypervisor mode tracing enabled.
783 */
784 .hv_bit = 0,
785
786 .irq_bit = PCR_N4_TOE,
787 .upper_nop = 0,
788 .lower_nop = 0,
789 .flags = 0,
790 .max_hw_events = 4,
791 .num_pcrs = 4,
792 .num_pic_regs = 4,
533}; 793};
534 794
535static const struct sparc_pmu *sparc_pmu __read_mostly; 795static const struct sparc_pmu *sparc_pmu __read_mostly;
@@ -558,55 +818,35 @@ static u64 nop_for_index(int idx)
558static inline void sparc_pmu_enable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) 818static inline void sparc_pmu_enable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx)
559{ 819{
560 u64 val, mask = mask_for_index(idx); 820 u64 val, mask = mask_for_index(idx);
821 int pcr_index = 0;
561 822
562 val = cpuc->pcr; 823 if (sparc_pmu->num_pcrs > 1)
824 pcr_index = idx;
825
826 val = cpuc->pcr[pcr_index];
563 val &= ~mask; 827 val &= ~mask;
564 val |= hwc->config; 828 val |= hwc->config;
565 cpuc->pcr = val; 829 cpuc->pcr[pcr_index] = val;
566 830
567 pcr_ops->write(cpuc->pcr); 831 pcr_ops->write_pcr(pcr_index, cpuc->pcr[pcr_index]);
568} 832}
569 833
570static inline void sparc_pmu_disable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) 834static inline void sparc_pmu_disable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx)
571{ 835{
572 u64 mask = mask_for_index(idx); 836 u64 mask = mask_for_index(idx);
573 u64 nop = nop_for_index(idx); 837 u64 nop = nop_for_index(idx);
838 int pcr_index = 0;
574 u64 val; 839 u64 val;
575 840
576 val = cpuc->pcr; 841 if (sparc_pmu->num_pcrs > 1)
842 pcr_index = idx;
843
844 val = cpuc->pcr[pcr_index];
577 val &= ~mask; 845 val &= ~mask;
578 val |= nop; 846 val |= nop;
579 cpuc->pcr = val; 847 cpuc->pcr[pcr_index] = val;
580 848
581 pcr_ops->write(cpuc->pcr); 849 pcr_ops->write_pcr(pcr_index, cpuc->pcr[pcr_index]);
582}
583
584static u32 read_pmc(int idx)
585{
586 u64 val;
587
588 read_pic(val);
589 if (idx == PIC_UPPER_INDEX)
590 val >>= 32;
591
592 return val & 0xffffffff;
593}
594
595static void write_pmc(int idx, u64 val)
596{
597 u64 shift, mask, pic;
598
599 shift = 0;
600 if (idx == PIC_UPPER_INDEX)
601 shift = 32;
602
603 mask = ((u64) 0xffffffff) << shift;
604 val <<= shift;
605
606 read_pic(pic);
607 pic &= ~mask;
608 pic |= val;
609 write_pic(pic);
610} 850}
611 851
612static u64 sparc_perf_event_update(struct perf_event *event, 852static u64 sparc_perf_event_update(struct perf_event *event,
@@ -618,7 +858,7 @@ static u64 sparc_perf_event_update(struct perf_event *event,
618 858
619again: 859again:
620 prev_raw_count = local64_read(&hwc->prev_count); 860 prev_raw_count = local64_read(&hwc->prev_count);
621 new_raw_count = read_pmc(idx); 861 new_raw_count = sparc_pmu->read_pmc(idx);
622 862
623 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, 863 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
624 new_raw_count) != prev_raw_count) 864 new_raw_count) != prev_raw_count)
@@ -658,25 +898,17 @@ static int sparc_perf_event_set_period(struct perf_event *event,
658 898
659 local64_set(&hwc->prev_count, (u64)-left); 899 local64_set(&hwc->prev_count, (u64)-left);
660 900
661 write_pmc(idx, (u64)(-left) & 0xffffffff); 901 sparc_pmu->write_pmc(idx, (u64)(-left) & 0xffffffff);
662 902
663 perf_event_update_userpage(event); 903 perf_event_update_userpage(event);
664 904
665 return ret; 905 return ret;
666} 906}
667 907
668/* If performance event entries have been added, move existing 908static void read_in_all_counters(struct cpu_hw_events *cpuc)
669 * events around (if necessary) and then assign new entries to
670 * counters.
671 */
672static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr)
673{ 909{
674 int i; 910 int i;
675 911
676 if (!cpuc->n_added)
677 goto out;
678
679 /* Read in the counters which are moving. */
680 for (i = 0; i < cpuc->n_events; i++) { 912 for (i = 0; i < cpuc->n_events; i++) {
681 struct perf_event *cp = cpuc->event[i]; 913 struct perf_event *cp = cpuc->event[i];
682 914
@@ -687,6 +919,20 @@ static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr)
687 cpuc->current_idx[i] = PIC_NO_INDEX; 919 cpuc->current_idx[i] = PIC_NO_INDEX;
688 } 920 }
689 } 921 }
922}
923
924/* On this PMU all PICs are programmed using a single PCR. Calculate
925 * the combined control register value.
926 *
927 * For such chips we require that all of the events have the same
928 * configuration, so just fetch the settings from the first entry.
929 */
930static void calculate_single_pcr(struct cpu_hw_events *cpuc)
931{
932 int i;
933
934 if (!cpuc->n_added)
935 goto out;
690 936
691 /* Assign to counters all unassigned events. */ 937 /* Assign to counters all unassigned events. */
692 for (i = 0; i < cpuc->n_events; i++) { 938 for (i = 0; i < cpuc->n_events; i++) {
@@ -702,20 +948,71 @@ static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr)
702 cpuc->current_idx[i] = idx; 948 cpuc->current_idx[i] = idx;
703 949
704 enc = perf_event_get_enc(cpuc->events[i]); 950 enc = perf_event_get_enc(cpuc->events[i]);
705 pcr &= ~mask_for_index(idx); 951 cpuc->pcr[0] &= ~mask_for_index(idx);
706 if (hwc->state & PERF_HES_STOPPED) 952 if (hwc->state & PERF_HES_STOPPED)
707 pcr |= nop_for_index(idx); 953 cpuc->pcr[0] |= nop_for_index(idx);
708 else 954 else
709 pcr |= event_encoding(enc, idx); 955 cpuc->pcr[0] |= event_encoding(enc, idx);
710 } 956 }
711out: 957out:
712 return pcr; 958 cpuc->pcr[0] |= cpuc->event[0]->hw.config_base;
959}
960
961/* On this PMU each PIC has it's own PCR control register. */
962static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc)
963{
964 int i;
965
966 if (!cpuc->n_added)
967 goto out;
968
969 for (i = 0; i < cpuc->n_events; i++) {
970 struct perf_event *cp = cpuc->event[i];
971 struct hw_perf_event *hwc = &cp->hw;
972 int idx = hwc->idx;
973 u64 enc;
974
975 if (cpuc->current_idx[i] != PIC_NO_INDEX)
976 continue;
977
978 sparc_perf_event_set_period(cp, hwc, idx);
979 cpuc->current_idx[i] = idx;
980
981 enc = perf_event_get_enc(cpuc->events[i]);
982 cpuc->pcr[idx] &= ~mask_for_index(idx);
983 if (hwc->state & PERF_HES_STOPPED)
984 cpuc->pcr[idx] |= nop_for_index(idx);
985 else
986 cpuc->pcr[idx] |= event_encoding(enc, idx);
987 }
988out:
989 for (i = 0; i < cpuc->n_events; i++) {
990 struct perf_event *cp = cpuc->event[i];
991 int idx = cp->hw.idx;
992
993 cpuc->pcr[idx] |= cp->hw.config_base;
994 }
995}
996
997/* If performance event entries have been added, move existing events
998 * around (if necessary) and then assign new entries to counters.
999 */
1000static void update_pcrs_for_enable(struct cpu_hw_events *cpuc)
1001{
1002 if (cpuc->n_added)
1003 read_in_all_counters(cpuc);
1004
1005 if (sparc_pmu->num_pcrs == 1) {
1006 calculate_single_pcr(cpuc);
1007 } else {
1008 calculate_multiple_pcrs(cpuc);
1009 }
713} 1010}
714 1011
715static void sparc_pmu_enable(struct pmu *pmu) 1012static void sparc_pmu_enable(struct pmu *pmu)
716{ 1013{
717 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1014 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
718 u64 pcr; 1015 int i;
719 1016
720 if (cpuc->enabled) 1017 if (cpuc->enabled)
721 return; 1018 return;
@@ -723,26 +1020,17 @@ static void sparc_pmu_enable(struct pmu *pmu)
723 cpuc->enabled = 1; 1020 cpuc->enabled = 1;
724 barrier(); 1021 barrier();
725 1022
726 pcr = cpuc->pcr; 1023 if (cpuc->n_events)
727 if (!cpuc->n_events) { 1024 update_pcrs_for_enable(cpuc);
728 pcr = 0;
729 } else {
730 pcr = maybe_change_configuration(cpuc, pcr);
731
732 /* We require that all of the events have the same
733 * configuration, so just fetch the settings from the
734 * first entry.
735 */
736 cpuc->pcr = pcr | cpuc->event[0]->hw.config_base;
737 }
738 1025
739 pcr_ops->write(cpuc->pcr); 1026 for (i = 0; i < sparc_pmu->num_pcrs; i++)
1027 pcr_ops->write_pcr(i, cpuc->pcr[i]);
740} 1028}
741 1029
742static void sparc_pmu_disable(struct pmu *pmu) 1030static void sparc_pmu_disable(struct pmu *pmu)
743{ 1031{
744 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1032 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
745 u64 val; 1033 int i;
746 1034
747 if (!cpuc->enabled) 1035 if (!cpuc->enabled)
748 return; 1036 return;
@@ -750,12 +1038,14 @@ static void sparc_pmu_disable(struct pmu *pmu)
750 cpuc->enabled = 0; 1038 cpuc->enabled = 0;
751 cpuc->n_added = 0; 1039 cpuc->n_added = 0;
752 1040
753 val = cpuc->pcr; 1041 for (i = 0; i < sparc_pmu->num_pcrs; i++) {
754 val &= ~(PCR_UTRACE | PCR_STRACE | 1042 u64 val = cpuc->pcr[i];
755 sparc_pmu->hv_bit | sparc_pmu->irq_bit);
756 cpuc->pcr = val;
757 1043
758 pcr_ops->write(cpuc->pcr); 1044 val &= ~(sparc_pmu->user_bit | sparc_pmu->priv_bit |
1045 sparc_pmu->hv_bit | sparc_pmu->irq_bit);
1046 cpuc->pcr[i] = val;
1047 pcr_ops->write_pcr(i, cpuc->pcr[i]);
1048 }
759} 1049}
760 1050
761static int active_event_index(struct cpu_hw_events *cpuc, 1051static int active_event_index(struct cpu_hw_events *cpuc,
@@ -854,9 +1144,11 @@ static DEFINE_MUTEX(pmc_grab_mutex);
854static void perf_stop_nmi_watchdog(void *unused) 1144static void perf_stop_nmi_watchdog(void *unused)
855{ 1145{
856 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1146 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1147 int i;
857 1148
858 stop_nmi_watchdog(NULL); 1149 stop_nmi_watchdog(NULL);
859 cpuc->pcr = pcr_ops->read(); 1150 for (i = 0; i < sparc_pmu->num_pcrs; i++)
1151 cpuc->pcr[i] = pcr_ops->read_pcr(i);
860} 1152}
861 1153
862void perf_event_grab_pmc(void) 1154void perf_event_grab_pmc(void)
@@ -942,9 +1234,17 @@ static int sparc_check_constraints(struct perf_event **evts,
942 if (!n_ev) 1234 if (!n_ev)
943 return 0; 1235 return 0;
944 1236
945 if (n_ev > MAX_HWEVENTS) 1237 if (n_ev > sparc_pmu->max_hw_events)
946 return -1; 1238 return -1;
947 1239
1240 if (!(sparc_pmu->flags & SPARC_PMU_HAS_CONFLICTS)) {
1241 int i;
1242
1243 for (i = 0; i < n_ev; i++)
1244 evts[i]->hw.idx = i;
1245 return 0;
1246 }
1247
948 msk0 = perf_event_get_msk(events[0]); 1248 msk0 = perf_event_get_msk(events[0]);
949 if (n_ev == 1) { 1249 if (n_ev == 1) {
950 if (msk0 & PIC_LOWER) 1250 if (msk0 & PIC_LOWER)
@@ -1000,6 +1300,9 @@ static int check_excludes(struct perf_event **evts, int n_prev, int n_new)
1000 struct perf_event *event; 1300 struct perf_event *event;
1001 int i, n, first; 1301 int i, n, first;
1002 1302
1303 if (!(sparc_pmu->flags & SPARC_PMU_ALL_EXCLUDES_SAME))
1304 return 0;
1305
1003 n = n_prev + n_new; 1306 n = n_prev + n_new;
1004 if (n <= 1) 1307 if (n <= 1)
1005 return 0; 1308 return 0;
@@ -1059,7 +1362,7 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags)
1059 perf_pmu_disable(event->pmu); 1362 perf_pmu_disable(event->pmu);
1060 1363
1061 n0 = cpuc->n_events; 1364 n0 = cpuc->n_events;
1062 if (n0 >= MAX_HWEVENTS) 1365 if (n0 >= sparc_pmu->max_hw_events)
1063 goto out; 1366 goto out;
1064 1367
1065 cpuc->event[n0] = event; 1368 cpuc->event[n0] = event;
@@ -1146,16 +1449,16 @@ static int sparc_pmu_event_init(struct perf_event *event)
1146 /* We save the enable bits in the config_base. */ 1449 /* We save the enable bits in the config_base. */
1147 hwc->config_base = sparc_pmu->irq_bit; 1450 hwc->config_base = sparc_pmu->irq_bit;
1148 if (!attr->exclude_user) 1451 if (!attr->exclude_user)
1149 hwc->config_base |= PCR_UTRACE; 1452 hwc->config_base |= sparc_pmu->user_bit;
1150 if (!attr->exclude_kernel) 1453 if (!attr->exclude_kernel)
1151 hwc->config_base |= PCR_STRACE; 1454 hwc->config_base |= sparc_pmu->priv_bit;
1152 if (!attr->exclude_hv) 1455 if (!attr->exclude_hv)
1153 hwc->config_base |= sparc_pmu->hv_bit; 1456 hwc->config_base |= sparc_pmu->hv_bit;
1154 1457
1155 n = 0; 1458 n = 0;
1156 if (event->group_leader != event) { 1459 if (event->group_leader != event) {
1157 n = collect_events(event->group_leader, 1460 n = collect_events(event->group_leader,
1158 MAX_HWEVENTS - 1, 1461 sparc_pmu->max_hw_events - 1,
1159 evts, events, current_idx_dmy); 1462 evts, events, current_idx_dmy);
1160 if (n < 0) 1463 if (n < 0)
1161 return -EINVAL; 1464 return -EINVAL;
@@ -1254,8 +1557,7 @@ static struct pmu pmu = {
1254void perf_event_print_debug(void) 1557void perf_event_print_debug(void)
1255{ 1558{
1256 unsigned long flags; 1559 unsigned long flags;
1257 u64 pcr, pic; 1560 int cpu, i;
1258 int cpu;
1259 1561
1260 if (!sparc_pmu) 1562 if (!sparc_pmu)
1261 return; 1563 return;
@@ -1264,12 +1566,13 @@ void perf_event_print_debug(void)
1264 1566
1265 cpu = smp_processor_id(); 1567 cpu = smp_processor_id();
1266 1568
1267 pcr = pcr_ops->read();
1268 read_pic(pic);
1269
1270 pr_info("\n"); 1569 pr_info("\n");
1271 pr_info("CPU#%d: PCR[%016llx] PIC[%016llx]\n", 1570 for (i = 0; i < sparc_pmu->num_pcrs; i++)
1272 cpu, pcr, pic); 1571 pr_info("CPU#%d: PCR%d[%016llx]\n",
1572 cpu, i, pcr_ops->read_pcr(i));
1573 for (i = 0; i < sparc_pmu->num_pic_regs; i++)
1574 pr_info("CPU#%d: PIC%d[%016llx]\n",
1575 cpu, i, pcr_ops->read_pic(i));
1273 1576
1274 local_irq_restore(flags); 1577 local_irq_restore(flags);
1275} 1578}
@@ -1305,8 +1608,9 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
1305 * Do this before we peek at the counters to determine 1608 * Do this before we peek at the counters to determine
1306 * overflow so we don't lose any events. 1609 * overflow so we don't lose any events.
1307 */ 1610 */
1308 if (sparc_pmu->irq_bit) 1611 if (sparc_pmu->irq_bit &&
1309 pcr_ops->write(cpuc->pcr); 1612 sparc_pmu->num_pcrs == 1)
1613 pcr_ops->write_pcr(0, cpuc->pcr[0]);
1310 1614
1311 for (i = 0; i < cpuc->n_events; i++) { 1615 for (i = 0; i < cpuc->n_events; i++) {
1312 struct perf_event *event = cpuc->event[i]; 1616 struct perf_event *event = cpuc->event[i];
@@ -1314,6 +1618,10 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
1314 struct hw_perf_event *hwc; 1618 struct hw_perf_event *hwc;
1315 u64 val; 1619 u64 val;
1316 1620
1621 if (sparc_pmu->irq_bit &&
1622 sparc_pmu->num_pcrs > 1)
1623 pcr_ops->write_pcr(idx, cpuc->pcr[idx]);
1624
1317 hwc = &event->hw; 1625 hwc = &event->hw;
1318 val = sparc_perf_event_update(event, hwc, idx); 1626 val = sparc_perf_event_update(event, hwc, idx);
1319 if (val & (1ULL << 31)) 1627 if (val & (1ULL << 31))
@@ -1352,6 +1660,10 @@ static bool __init supported_pmu(void)
1352 sparc_pmu = &niagara2_pmu; 1660 sparc_pmu = &niagara2_pmu;
1353 return true; 1661 return true;
1354 } 1662 }
1663 if (!strcmp(sparc_pmu_type, "niagara4")) {
1664 sparc_pmu = &niagara4_pmu;
1665 return true;
1666 }
1355 return false; 1667 return false;
1356} 1668}
1357 1669
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index 1414d16712b2..0800e71d8a88 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -340,7 +340,12 @@ static const char *hwcaps[] = {
340 */ 340 */
341 "mul32", "div32", "fsmuld", "v8plus", "popc", "vis", "vis2", 341 "mul32", "div32", "fsmuld", "v8plus", "popc", "vis", "vis2",
342 "ASIBlkInit", "fmaf", "vis3", "hpc", "random", "trans", "fjfmau", 342 "ASIBlkInit", "fmaf", "vis3", "hpc", "random", "trans", "fjfmau",
343 "ima", "cspare", 343 "ima", "cspare", "pause", "cbcond",
344};
345
346static const char *crypto_hwcaps[] = {
347 "aes", "des", "kasumi", "camellia", "md5", "sha1", "sha256",
348 "sha512", "mpmul", "montmul", "montsqr", "crc32c",
344}; 349};
345 350
346void cpucap_info(struct seq_file *m) 351void cpucap_info(struct seq_file *m)
@@ -357,27 +362,61 @@ void cpucap_info(struct seq_file *m)
357 printed++; 362 printed++;
358 } 363 }
359 } 364 }
365 if (caps & HWCAP_SPARC_CRYPTO) {
366 unsigned long cfr;
367
368 __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
369 for (i = 0; i < ARRAY_SIZE(crypto_hwcaps); i++) {
370 unsigned long bit = 1UL << i;
371 if (cfr & bit) {
372 seq_printf(m, "%s%s",
373 printed ? "," : "", crypto_hwcaps[i]);
374 printed++;
375 }
376 }
377 }
360 seq_putc(m, '\n'); 378 seq_putc(m, '\n');
361} 379}
362 380
381static void __init report_one_hwcap(int *printed, const char *name)
382{
383 if ((*printed) == 0)
384 printk(KERN_INFO "CPU CAPS: [");
385 printk(KERN_CONT "%s%s",
386 (*printed) ? "," : "", name);
387 if (++(*printed) == 8) {
388 printk(KERN_CONT "]\n");
389 *printed = 0;
390 }
391}
392
393static void __init report_crypto_hwcaps(int *printed)
394{
395 unsigned long cfr;
396 int i;
397
398 __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
399
400 for (i = 0; i < ARRAY_SIZE(crypto_hwcaps); i++) {
401 unsigned long bit = 1UL << i;
402 if (cfr & bit)
403 report_one_hwcap(printed, crypto_hwcaps[i]);
404 }
405}
406
363static void __init report_hwcaps(unsigned long caps) 407static void __init report_hwcaps(unsigned long caps)
364{ 408{
365 int i, printed = 0; 409 int i, printed = 0;
366 410
367 printk(KERN_INFO "CPU CAPS: [");
368 for (i = 0; i < ARRAY_SIZE(hwcaps); i++) { 411 for (i = 0; i < ARRAY_SIZE(hwcaps); i++) {
369 unsigned long bit = 1UL << i; 412 unsigned long bit = 1UL << i;
370 if (caps & bit) { 413 if (caps & bit)
371 printk(KERN_CONT "%s%s", 414 report_one_hwcap(&printed, hwcaps[i]);
372 printed ? "," : "", hwcaps[i]);
373 if (++printed == 8) {
374 printk(KERN_CONT "]\n");
375 printk(KERN_INFO "CPU CAPS: [");
376 printed = 0;
377 }
378 }
379 } 415 }
380 printk(KERN_CONT "]\n"); 416 if (caps & HWCAP_SPARC_CRYPTO)
417 report_crypto_hwcaps(&printed);
418 if (printed != 0)
419 printk(KERN_CONT "]\n");
381} 420}
382 421
383static unsigned long __init mdesc_cpu_hwcap_list(void) 422static unsigned long __init mdesc_cpu_hwcap_list(void)
@@ -411,6 +450,10 @@ static unsigned long __init mdesc_cpu_hwcap_list(void)
411 break; 450 break;
412 } 451 }
413 } 452 }
453 for (i = 0; i < ARRAY_SIZE(crypto_hwcaps); i++) {
454 if (!strcmp(prop, crypto_hwcaps[i]))
455 caps |= HWCAP_SPARC_CRYPTO;
456 }
414 457
415 plen = strlen(prop) + 1; 458 plen = strlen(prop) + 1;
416 prop += plen; 459 prop += plen;