aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2009-09-08 20:55:21 -0400
committerDan Williams <dan.j.williams@intel.com>2009-09-08 20:55:21 -0400
commitbbb20089a3275a19e475dbc21320c3742e3ca423 (patch)
tree216fdc1cbef450ca688135c5b8969169482d9a48 /arch/powerpc/kernel
parent3e48e656903e9fd8bc805c6a2c4264d7808d315b (diff)
parent657a77fa7284d8ae28dfa48f1dc5d919bf5b2843 (diff)
Merge branch 'dmaengine' into async-tx-next
Conflicts: crypto/async_tx/async_xor.c drivers/dma/ioat/dma_v2.h drivers/dma/ioat/pci.c drivers/md/raid5.c
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/Makefile13
-rw-r--r--arch/powerpc/kernel/align.c20
-rw-r--r--arch/powerpc/kernel/asm-offsets.c33
-rw-r--r--arch/powerpc/kernel/cpu_setup_6xx.S3
-rw-r--r--arch/powerpc/kernel/cpu_setup_fsl_booke.S49
-rw-r--r--arch/powerpc/kernel/cputable.c6
-rw-r--r--arch/powerpc/kernel/dma-swiotlb.c163
-rw-r--r--arch/powerpc/kernel/dma.c2
-rw-r--r--arch/powerpc/kernel/entry_64.S9
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S978
-rw-r--r--arch/powerpc/kernel/ftrace.c31
-rw-r--r--arch/powerpc/kernel/head_32.S101
-rw-r--r--arch/powerpc/kernel/head_64.S1095
-rw-r--r--arch/powerpc/kernel/head_booke.h10
-rw-r--r--arch/powerpc/kernel/init_task.c4
-rw-r--r--arch/powerpc/kernel/irq.c129
-rw-r--r--arch/powerpc/kernel/lparcfg.c40
-rw-r--r--arch/powerpc/kernel/misc_64.S92
-rw-r--r--arch/powerpc/kernel/module.c2
-rw-r--r--arch/powerpc/kernel/mpc7450-pmu.c417
-rw-r--r--arch/powerpc/kernel/paca.c14
-rw-r--r--arch/powerpc/kernel/pci-common.c3
-rw-r--r--arch/powerpc/kernel/pci_32.c19
-rw-r--r--arch/powerpc/kernel/pci_64.c17
-rw-r--r--arch/powerpc/kernel/pci_dn.c28
-rw-r--r--arch/powerpc/kernel/perf_counter.c1306
-rw-r--r--arch/powerpc/kernel/power4-pmu.c615
-rw-r--r--arch/powerpc/kernel/power5+-pmu.c688
-rw-r--r--arch/powerpc/kernel/power5-pmu.c627
-rw-r--r--arch/powerpc/kernel/power6-pmu.c546
-rw-r--r--arch/powerpc/kernel/power7-pmu.c374
-rw-r--r--arch/powerpc/kernel/ppc970-pmu.c499
-rw-r--r--arch/powerpc/kernel/process.c2
-rw-r--r--arch/powerpc/kernel/prom.c2
-rw-r--r--arch/powerpc/kernel/prom_init.c43
-rw-r--r--arch/powerpc/kernel/ptrace.c23
-rw-r--r--arch/powerpc/kernel/rtas_pci.c10
-rw-r--r--arch/powerpc/kernel/setup_32.c6
-rw-r--r--arch/powerpc/kernel/setup_64.c15
-rw-r--r--arch/powerpc/kernel/time.c56
-rw-r--r--arch/powerpc/kernel/traps.c130
-rw-r--r--arch/powerpc/kernel/vector.S210
42 files changed, 6955 insertions, 1475 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 71901fbda4a5..b73396b93905 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -4,6 +4,8 @@
4 4
5CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' 5CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"'
6 6
7subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
8
7ifeq ($(CONFIG_PPC64),y) 9ifeq ($(CONFIG_PPC64),y)
8CFLAGS_prom_init.o += -mno-minimal-toc 10CFLAGS_prom_init.o += -mno-minimal-toc
9endif 11endif
@@ -36,7 +38,7 @@ obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
36 firmware.o nvram_64.o 38 firmware.o nvram_64.o
37obj64-$(CONFIG_RELOCATABLE) += reloc_64.o 39obj64-$(CONFIG_RELOCATABLE) += reloc_64.o
38obj-$(CONFIG_PPC64) += vdso64/ 40obj-$(CONFIG_PPC64) += vdso64/
39obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o 41obj-$(CONFIG_ALTIVEC) += vecemu.o
40obj-$(CONFIG_PPC_970_NAP) += idle_power4.o 42obj-$(CONFIG_PPC_970_NAP) += idle_power4.o
41obj-$(CONFIG_PPC_OF) += of_device.o of_platform.o prom_parse.o 43obj-$(CONFIG_PPC_OF) += of_device.o of_platform.o prom_parse.o
42obj-$(CONFIG_PPC_CLOCK) += clock.o 44obj-$(CONFIG_PPC_CLOCK) += clock.o
@@ -82,6 +84,7 @@ obj-$(CONFIG_SMP) += smp.o
82obj-$(CONFIG_KPROBES) += kprobes.o 84obj-$(CONFIG_KPROBES) += kprobes.o
83obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o 85obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o
84obj-$(CONFIG_STACKTRACE) += stacktrace.o 86obj-$(CONFIG_STACKTRACE) += stacktrace.o
87obj-$(CONFIG_SWIOTLB) += dma-swiotlb.o
85 88
86pci64-$(CONFIG_PPC64) += pci_dn.o isa-bridge.o 89pci64-$(CONFIG_PPC64) += pci_dn.o isa-bridge.o
87obj-$(CONFIG_PCI) += pci_$(CONFIG_WORD_SIZE).o $(pci64-y) \ 90obj-$(CONFIG_PCI) += pci_$(CONFIG_WORD_SIZE).o $(pci64-y) \
@@ -94,6 +97,10 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o
94 97
95obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o 98obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
96obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o 99obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
100obj-$(CONFIG_PPC_PERF_CTRS) += perf_counter.o
101obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \
102 power5+-pmu.o power6-pmu.o power7-pmu.o
103obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o
97 104
98obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o 105obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o
99 106
@@ -102,12 +109,14 @@ obj-y += iomap.o
102endif 109endif
103 110
104obj-$(CONFIG_PPC64) += $(obj64-y) 111obj-$(CONFIG_PPC64) += $(obj64-y)
112obj-$(CONFIG_PPC32) += $(obj32-y)
105 113
106ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC),) 114ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC),)
107obj-y += ppc_save_regs.o 115obj-y += ppc_save_regs.o
108endif 116endif
109 117
110extra-$(CONFIG_PPC_FPU) += fpu.o 118extra-$(CONFIG_PPC_FPU) += fpu.o
119extra-$(CONFIG_ALTIVEC) += vector.o
111extra-$(CONFIG_PPC64) += entry_64.o 120extra-$(CONFIG_PPC64) += entry_64.o
112 121
113extra-y += systbl_chk.i 122extra-y += systbl_chk.i
@@ -120,6 +129,7 @@ PHONY += systbl_chk
120systbl_chk: $(src)/systbl_chk.sh $(obj)/systbl_chk.i 129systbl_chk: $(src)/systbl_chk.sh $(obj)/systbl_chk.i
121 $(call cmd,systbl_chk) 130 $(call cmd,systbl_chk)
122 131
132ifeq ($(CONFIG_PPC_OF_BOOT_TRAMPOLINE),y)
123$(obj)/built-in.o: prom_init_check 133$(obj)/built-in.o: prom_init_check
124 134
125quiet_cmd_prom_init_check = CALL $< 135quiet_cmd_prom_init_check = CALL $<
@@ -128,5 +138,6 @@ quiet_cmd_prom_init_check = CALL $<
128PHONY += prom_init_check 138PHONY += prom_init_check
129prom_init_check: $(src)/prom_init_check.sh $(obj)/prom_init.o 139prom_init_check: $(src)/prom_init_check.sh $(obj)/prom_init.o
130 $(call cmd,prom_init_check) 140 $(call cmd,prom_init_check)
141endif
131 142
132clean-files := vmlinux.lds 143clean-files := vmlinux.lds
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index 5ffcfaa77d6a..a5b632e52fae 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -24,6 +24,7 @@
24#include <asm/system.h> 24#include <asm/system.h>
25#include <asm/cache.h> 25#include <asm/cache.h>
26#include <asm/cputable.h> 26#include <asm/cputable.h>
27#include <asm/emulated_ops.h>
27 28
28struct aligninfo { 29struct aligninfo {
29 unsigned char len; 30 unsigned char len;
@@ -730,8 +731,10 @@ int fix_alignment(struct pt_regs *regs)
730 areg = dsisr & 0x1f; /* register to update */ 731 areg = dsisr & 0x1f; /* register to update */
731 732
732#ifdef CONFIG_SPE 733#ifdef CONFIG_SPE
733 if ((instr >> 26) == 0x4) 734 if ((instr >> 26) == 0x4) {
735 PPC_WARN_EMULATED(spe);
734 return emulate_spe(regs, reg, instr); 736 return emulate_spe(regs, reg, instr);
737 }
735#endif 738#endif
736 739
737 instr = (dsisr >> 10) & 0x7f; 740 instr = (dsisr >> 10) & 0x7f;
@@ -783,23 +786,28 @@ int fix_alignment(struct pt_regs *regs)
783 flags |= SPLT; 786 flags |= SPLT;
784 nb = 8; 787 nb = 8;
785 } 788 }
789 PPC_WARN_EMULATED(vsx);
786 return emulate_vsx(addr, reg, areg, regs, flags, nb); 790 return emulate_vsx(addr, reg, areg, regs, flags, nb);
787 } 791 }
788#endif 792#endif
789 /* A size of 0 indicates an instruction we don't support, with 793 /* A size of 0 indicates an instruction we don't support, with
790 * the exception of DCBZ which is handled as a special case here 794 * the exception of DCBZ which is handled as a special case here
791 */ 795 */
792 if (instr == DCBZ) 796 if (instr == DCBZ) {
797 PPC_WARN_EMULATED(dcbz);
793 return emulate_dcbz(regs, addr); 798 return emulate_dcbz(regs, addr);
799 }
794 if (unlikely(nb == 0)) 800 if (unlikely(nb == 0))
795 return 0; 801 return 0;
796 802
797 /* Load/Store Multiple instructions are handled in their own 803 /* Load/Store Multiple instructions are handled in their own
798 * function 804 * function
799 */ 805 */
800 if (flags & M) 806 if (flags & M) {
807 PPC_WARN_EMULATED(multiple);
801 return emulate_multiple(regs, addr, reg, nb, 808 return emulate_multiple(regs, addr, reg, nb,
802 flags, instr, swiz); 809 flags, instr, swiz);
810 }
803 811
804 /* Verify the address of the operand */ 812 /* Verify the address of the operand */
805 if (unlikely(user_mode(regs) && 813 if (unlikely(user_mode(regs) &&
@@ -816,8 +824,12 @@ int fix_alignment(struct pt_regs *regs)
816 } 824 }
817 825
818 /* Special case for 16-byte FP loads and stores */ 826 /* Special case for 16-byte FP loads and stores */
819 if (nb == 16) 827 if (nb == 16) {
828 PPC_WARN_EMULATED(fp_pair);
820 return emulate_fp_pair(addr, reg, flags); 829 return emulate_fp_pair(addr, reg, flags);
830 }
831
832 PPC_WARN_EMULATED(unaligned);
821 833
822 /* If we are loading, get the data from user space, else 834 /* If we are loading, get the data from user space, else
823 * get it from register values 835 * get it from register values
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 1e40bc053946..561b64652311 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -122,8 +122,6 @@ int main(void)
122 DEFINE(PACAKSAVE, offsetof(struct paca_struct, kstack)); 122 DEFINE(PACAKSAVE, offsetof(struct paca_struct, kstack));
123 DEFINE(PACACURRENT, offsetof(struct paca_struct, __current)); 123 DEFINE(PACACURRENT, offsetof(struct paca_struct, __current));
124 DEFINE(PACASAVEDMSR, offsetof(struct paca_struct, saved_msr)); 124 DEFINE(PACASAVEDMSR, offsetof(struct paca_struct, saved_msr));
125 DEFINE(PACASTABREAL, offsetof(struct paca_struct, stab_real));
126 DEFINE(PACASTABVIRT, offsetof(struct paca_struct, stab_addr));
127 DEFINE(PACASTABRR, offsetof(struct paca_struct, stab_rr)); 125 DEFINE(PACASTABRR, offsetof(struct paca_struct, stab_rr));
128 DEFINE(PACAR1, offsetof(struct paca_struct, saved_r1)); 126 DEFINE(PACAR1, offsetof(struct paca_struct, saved_r1));
129 DEFINE(PACATOC, offsetof(struct paca_struct, kernel_toc)); 127 DEFINE(PACATOC, offsetof(struct paca_struct, kernel_toc));
@@ -131,35 +129,31 @@ int main(void)
131 DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr)); 129 DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr));
132 DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled)); 130 DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled));
133 DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled)); 131 DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled));
134 DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache)); 132 DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_counter_pending));
135 DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
136 DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); 133 DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
137 DEFINE(PACAVMALLOCSLLP, offsetof(struct paca_struct, vmalloc_sllp));
138#ifdef CONFIG_PPC_MM_SLICES 134#ifdef CONFIG_PPC_MM_SLICES
139 DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct, 135 DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct,
140 context.low_slices_psize)); 136 context.low_slices_psize));
141 DEFINE(PACAHIGHSLICEPSIZE, offsetof(struct paca_struct, 137 DEFINE(PACAHIGHSLICEPSIZE, offsetof(struct paca_struct,
142 context.high_slices_psize)); 138 context.high_slices_psize));
143 DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def)); 139 DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def));
140#endif /* CONFIG_PPC_MM_SLICES */
141#ifdef CONFIG_PPC_STD_MMU_64
142 DEFINE(PACASTABREAL, offsetof(struct paca_struct, stab_real));
143 DEFINE(PACASTABVIRT, offsetof(struct paca_struct, stab_addr));
144 DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache));
145 DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
146 DEFINE(PACAVMALLOCSLLP, offsetof(struct paca_struct, vmalloc_sllp));
147#ifdef CONFIG_PPC_MM_SLICES
144 DEFINE(MMUPSIZESLLP, offsetof(struct mmu_psize_def, sllp)); 148 DEFINE(MMUPSIZESLLP, offsetof(struct mmu_psize_def, sllp));
145#else 149#else
146 DEFINE(PACACONTEXTSLLP, offsetof(struct paca_struct, context.sllp)); 150 DEFINE(PACACONTEXTSLLP, offsetof(struct paca_struct, context.sllp));
147
148#endif /* CONFIG_PPC_MM_SLICES */ 151#endif /* CONFIG_PPC_MM_SLICES */
149 DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen)); 152 DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen));
150 DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc)); 153 DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc));
151 DEFINE(PACA_EXSLB, offsetof(struct paca_struct, exslb)); 154 DEFINE(PACA_EXSLB, offsetof(struct paca_struct, exslb));
152 DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp));
153 DEFINE(PACALPPACAPTR, offsetof(struct paca_struct, lppaca_ptr)); 155 DEFINE(PACALPPACAPTR, offsetof(struct paca_struct, lppaca_ptr));
154 DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));
155 DEFINE(PACA_STARTPURR, offsetof(struct paca_struct, startpurr));
156 DEFINE(PACA_STARTSPURR, offsetof(struct paca_struct, startspurr));
157 DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time));
158 DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time));
159 DEFINE(PACA_SLBSHADOWPTR, offsetof(struct paca_struct, slb_shadow_ptr)); 156 DEFINE(PACA_SLBSHADOWPTR, offsetof(struct paca_struct, slb_shadow_ptr));
160 DEFINE(PACA_DATA_OFFSET, offsetof(struct paca_struct, data_offset));
161 DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save));
162
163 DEFINE(SLBSHADOW_STACKVSID, 157 DEFINE(SLBSHADOW_STACKVSID,
164 offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid)); 158 offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid));
165 DEFINE(SLBSHADOW_STACKESID, 159 DEFINE(SLBSHADOW_STACKESID,
@@ -169,6 +163,15 @@ int main(void)
169 DEFINE(LPPACAANYINT, offsetof(struct lppaca, int_dword.any_int)); 163 DEFINE(LPPACAANYINT, offsetof(struct lppaca, int_dword.any_int));
170 DEFINE(LPPACADECRINT, offsetof(struct lppaca, int_dword.fields.decr_int)); 164 DEFINE(LPPACADECRINT, offsetof(struct lppaca, int_dword.fields.decr_int));
171 DEFINE(SLBSHADOW_SAVEAREA, offsetof(struct slb_shadow, save_area)); 165 DEFINE(SLBSHADOW_SAVEAREA, offsetof(struct slb_shadow, save_area));
166#endif /* CONFIG_PPC_STD_MMU_64 */
167 DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp));
168 DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));
169 DEFINE(PACA_STARTPURR, offsetof(struct paca_struct, startpurr));
170 DEFINE(PACA_STARTSPURR, offsetof(struct paca_struct, startspurr));
171 DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time));
172 DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time));
173 DEFINE(PACA_DATA_OFFSET, offsetof(struct paca_struct, data_offset));
174 DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save));
172#endif /* CONFIG_PPC64 */ 175#endif /* CONFIG_PPC64 */
173 176
174 /* RTAS */ 177 /* RTAS */
diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S
index 54f767e31a1a..1e9949e68856 100644
--- a/arch/powerpc/kernel/cpu_setup_6xx.S
+++ b/arch/powerpc/kernel/cpu_setup_6xx.S
@@ -239,6 +239,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_L3CR)
239 ori r11,r11,HID0_SGE | HID0_FOLD | HID0_BHTE 239 ori r11,r11,HID0_SGE | HID0_FOLD | HID0_BHTE
240 ori r11,r11,HID0_LRSTK | HID0_BTIC 240 ori r11,r11,HID0_LRSTK | HID0_BTIC
241 oris r11,r11,HID0_DPM@h 241 oris r11,r11,HID0_DPM@h
242BEGIN_MMU_FTR_SECTION
243 oris r11,r11,HID0_HIGH_BAT@h
244END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
242BEGIN_FTR_SECTION 245BEGIN_FTR_SECTION
243 xori r11,r11,HID0_BTIC 246 xori r11,r11,HID0_BTIC
244END_FTR_SECTION_IFSET(CPU_FTR_NO_BTIC) 247END_FTR_SECTION_IFSET(CPU_FTR_NO_BTIC)
diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
index eb4b9adcedb4..0adb50ad8031 100644
--- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S
+++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
@@ -17,6 +17,40 @@
17#include <asm/cputable.h> 17#include <asm/cputable.h>
18#include <asm/ppc_asm.h> 18#include <asm/ppc_asm.h>
19 19
20_GLOBAL(__e500_icache_setup)
21 mfspr r0, SPRN_L1CSR1
22 andi. r3, r0, L1CSR1_ICE
23 bnelr /* Already enabled */
24 oris r0, r0, L1CSR1_CPE@h
25 ori r0, r0, (L1CSR1_ICFI | L1CSR1_ICLFR | L1CSR1_ICE)
26 mtspr SPRN_L1CSR1, r0 /* Enable I-Cache */
27 isync
28 blr
29
30_GLOBAL(__e500_dcache_setup)
31 mfspr r0, SPRN_L1CSR0
32 andi. r3, r0, L1CSR0_DCE
33 bnelr /* Already enabled */
34 msync
35 isync
36 li r0, 0
37 mtspr SPRN_L1CSR0, r0 /* Disable */
38 msync
39 isync
40 li r0, (L1CSR0_DCFI | L1CSR0_CLFC)
41 mtspr SPRN_L1CSR0, r0 /* Invalidate */
42 isync
431: mfspr r0, SPRN_L1CSR0
44 andi. r3, r0, L1CSR0_CLFC
45 bne+ 1b /* Wait for lock bits reset */
46 oris r0, r0, L1CSR0_CPE@h
47 ori r0, r0, L1CSR0_DCE
48 msync
49 isync
50 mtspr SPRN_L1CSR0, r0 /* Enable */
51 isync
52 blr
53
20_GLOBAL(__setup_cpu_e200) 54_GLOBAL(__setup_cpu_e200)
21 /* enable dedicated debug exception handling resources (Debug APU) */ 55 /* enable dedicated debug exception handling resources (Debug APU) */
22 mfspr r3,SPRN_HID0 56 mfspr r3,SPRN_HID0
@@ -25,7 +59,16 @@ _GLOBAL(__setup_cpu_e200)
25 b __setup_e200_ivors 59 b __setup_e200_ivors
26_GLOBAL(__setup_cpu_e500v1) 60_GLOBAL(__setup_cpu_e500v1)
27_GLOBAL(__setup_cpu_e500v2) 61_GLOBAL(__setup_cpu_e500v2)
28 b __setup_e500_ivors 62 mflr r4
63 bl __e500_icache_setup
64 bl __e500_dcache_setup
65 bl __setup_e500_ivors
66 mtlr r4
67 blr
29_GLOBAL(__setup_cpu_e500mc) 68_GLOBAL(__setup_cpu_e500mc)
30 b __setup_e500mc_ivors 69 mflr r4
31 70 bl __e500_icache_setup
71 bl __e500_dcache_setup
72 bl __setup_e500mc_ivors
73 mtlr r4
74 blr
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 3e33fb933d99..4a24a2fc4574 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -427,7 +427,8 @@ static struct cpu_spec __initdata cpu_specs[] = {
427 .cpu_name = "POWER7 (architected)", 427 .cpu_name = "POWER7 (architected)",
428 .cpu_features = CPU_FTRS_POWER7, 428 .cpu_features = CPU_FTRS_POWER7,
429 .cpu_user_features = COMMON_USER_POWER7, 429 .cpu_user_features = COMMON_USER_POWER7,
430 .mmu_features = MMU_FTR_HPTE_TABLE, 430 .mmu_features = MMU_FTR_HPTE_TABLE |
431 MMU_FTR_TLBIE_206,
431 .icache_bsize = 128, 432 .icache_bsize = 128,
432 .dcache_bsize = 128, 433 .dcache_bsize = 128,
433 .machine_check = machine_check_generic, 434 .machine_check = machine_check_generic,
@@ -441,7 +442,8 @@ static struct cpu_spec __initdata cpu_specs[] = {
441 .cpu_name = "POWER7 (raw)", 442 .cpu_name = "POWER7 (raw)",
442 .cpu_features = CPU_FTRS_POWER7, 443 .cpu_features = CPU_FTRS_POWER7,
443 .cpu_user_features = COMMON_USER_POWER7, 444 .cpu_user_features = COMMON_USER_POWER7,
444 .mmu_features = MMU_FTR_HPTE_TABLE, 445 .mmu_features = MMU_FTR_HPTE_TABLE |
446 MMU_FTR_TLBIE_206,
445 .icache_bsize = 128, 447 .icache_bsize = 128,
446 .dcache_bsize = 128, 448 .dcache_bsize = 128,
447 .num_pmcs = 6, 449 .num_pmcs = 6,
diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c
new file mode 100644
index 000000000000..68ccf11e4f19
--- /dev/null
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -0,0 +1,163 @@
1/*
2 * Contains routines needed to support swiotlb for ppc.
3 *
4 * Copyright (C) 2009 Becky Bruce, Freescale Semiconductor
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2 of the License, or (at your
9 * option) any later version.
10 *
11 */
12
13#include <linux/dma-mapping.h>
14#include <linux/pfn.h>
15#include <linux/of_platform.h>
16#include <linux/platform_device.h>
17#include <linux/pci.h>
18
19#include <asm/machdep.h>
20#include <asm/swiotlb.h>
21#include <asm/dma.h>
22#include <asm/abs_addr.h>
23
24int swiotlb __read_mostly;
25unsigned int ppc_swiotlb_enable;
26
27void *swiotlb_bus_to_virt(struct device *hwdev, dma_addr_t addr)
28{
29 unsigned long pfn = PFN_DOWN(swiotlb_bus_to_phys(hwdev, addr));
30 void *pageaddr = page_address(pfn_to_page(pfn));
31
32 if (pageaddr != NULL)
33 return pageaddr + (addr % PAGE_SIZE);
34 return NULL;
35}
36
37dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
38{
39 return paddr + get_dma_direct_offset(hwdev);
40}
41
42phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr)
43
44{
45 return baddr - get_dma_direct_offset(hwdev);
46}
47
48/*
49 * Determine if an address needs bounce buffering via swiotlb.
50 * Going forward I expect the swiotlb code to generalize on using
51 * a dma_ops->addr_needs_map, and this function will move from here to the
52 * generic swiotlb code.
53 */
54int
55swiotlb_arch_address_needs_mapping(struct device *hwdev, dma_addr_t addr,
56 size_t size)
57{
58 struct dma_mapping_ops *dma_ops = get_dma_ops(hwdev);
59
60 BUG_ON(!dma_ops);
61 return dma_ops->addr_needs_map(hwdev, addr, size);
62}
63
64/*
65 * Determine if an address is reachable by a pci device, or if we must bounce.
66 */
67static int
68swiotlb_pci_addr_needs_map(struct device *hwdev, dma_addr_t addr, size_t size)
69{
70 u64 mask = dma_get_mask(hwdev);
71 dma_addr_t max;
72 struct pci_controller *hose;
73 struct pci_dev *pdev = to_pci_dev(hwdev);
74
75 hose = pci_bus_to_host(pdev->bus);
76 max = hose->dma_window_base_cur + hose->dma_window_size;
77
78 /* check that we're within mapped pci window space */
79 if ((addr + size > max) | (addr < hose->dma_window_base_cur))
80 return 1;
81
82 return !is_buffer_dma_capable(mask, addr, size);
83}
84
85static int
86swiotlb_addr_needs_map(struct device *hwdev, dma_addr_t addr, size_t size)
87{
88 return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size);
89}
90
91
92/*
93 * At the moment, all platforms that use this code only require
94 * swiotlb to be used if we're operating on HIGHMEM. Since
95 * we don't ever call anything other than map_sg, unmap_sg,
96 * map_page, and unmap_page on highmem, use normal dma_ops
97 * for everything else.
98 */
99struct dma_mapping_ops swiotlb_dma_ops = {
100 .alloc_coherent = dma_direct_alloc_coherent,
101 .free_coherent = dma_direct_free_coherent,
102 .map_sg = swiotlb_map_sg_attrs,
103 .unmap_sg = swiotlb_unmap_sg_attrs,
104 .dma_supported = swiotlb_dma_supported,
105 .map_page = swiotlb_map_page,
106 .unmap_page = swiotlb_unmap_page,
107 .addr_needs_map = swiotlb_addr_needs_map,
108 .sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu,
109 .sync_single_range_for_device = swiotlb_sync_single_range_for_device,
110 .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
111 .sync_sg_for_device = swiotlb_sync_sg_for_device
112};
113
114struct dma_mapping_ops swiotlb_pci_dma_ops = {
115 .alloc_coherent = dma_direct_alloc_coherent,
116 .free_coherent = dma_direct_free_coherent,
117 .map_sg = swiotlb_map_sg_attrs,
118 .unmap_sg = swiotlb_unmap_sg_attrs,
119 .dma_supported = swiotlb_dma_supported,
120 .map_page = swiotlb_map_page,
121 .unmap_page = swiotlb_unmap_page,
122 .addr_needs_map = swiotlb_pci_addr_needs_map,
123 .sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu,
124 .sync_single_range_for_device = swiotlb_sync_single_range_for_device,
125 .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
126 .sync_sg_for_device = swiotlb_sync_sg_for_device
127};
128
129static int ppc_swiotlb_bus_notify(struct notifier_block *nb,
130 unsigned long action, void *data)
131{
132 struct device *dev = data;
133
134 /* We are only intereted in device addition */
135 if (action != BUS_NOTIFY_ADD_DEVICE)
136 return 0;
137
138 /* May need to bounce if the device can't address all of DRAM */
139 if (dma_get_mask(dev) < lmb_end_of_DRAM())
140 set_dma_ops(dev, &swiotlb_dma_ops);
141
142 return NOTIFY_DONE;
143}
144
145static struct notifier_block ppc_swiotlb_plat_bus_notifier = {
146 .notifier_call = ppc_swiotlb_bus_notify,
147 .priority = 0,
148};
149
150static struct notifier_block ppc_swiotlb_of_bus_notifier = {
151 .notifier_call = ppc_swiotlb_bus_notify,
152 .priority = 0,
153};
154
155int __init swiotlb_setup_bus_notifier(void)
156{
157 bus_register_notifier(&platform_bus_type,
158 &ppc_swiotlb_plat_bus_notifier);
159 bus_register_notifier(&of_platform_bus_type,
160 &ppc_swiotlb_of_bus_notifier);
161
162 return 0;
163}
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index 6b02793dc75b..20a60d661ba8 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -19,7 +19,7 @@
19 * default the offset is PCI_DRAM_OFFSET. 19 * default the offset is PCI_DRAM_OFFSET.
20 */ 20 */
21 21
22static unsigned long get_dma_direct_offset(struct device *dev) 22unsigned long get_dma_direct_offset(struct device *dev)
23{ 23{
24 if (dev) 24 if (dev)
25 return (unsigned long)dev->archdata.dma_data; 25 return (unsigned long)dev->archdata.dma_data;
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index abfc32330479..43e073477c34 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -526,6 +526,15 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES)
5262: 5262:
527 TRACE_AND_RESTORE_IRQ(r5); 527 TRACE_AND_RESTORE_IRQ(r5);
528 528
529#ifdef CONFIG_PERF_COUNTERS
530 /* check paca->perf_counter_pending if we're enabling ints */
531 lbz r3,PACAPERFPEND(r13)
532 and. r3,r3,r5
533 beq 27f
534 bl .perf_counter_do_pending
53527:
536#endif /* CONFIG_PERF_COUNTERS */
537
529 /* extract EE bit and use it to restore paca->hard_enabled */ 538 /* extract EE bit and use it to restore paca->hard_enabled */
530 ld r3,_MSR(r1) 539 ld r3,_MSR(r1)
531 rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */ 540 rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
new file mode 100644
index 000000000000..eb898112e577
--- /dev/null
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -0,0 +1,978 @@
1/*
2 * This file contains the 64-bit "server" PowerPC variant
3 * of the low level exception handling including exception
4 * vectors, exception return, part of the slb and stab
5 * handling and other fixed offset specific things.
6 *
7 * This file is meant to be #included from head_64.S due to
8 * position dependant assembly.
9 *
10 * Most of this originates from head_64.S and thus has the same
11 * copyright history.
12 *
13 */
14
15/*
16 * We layout physical memory as follows:
17 * 0x0000 - 0x00ff : Secondary processor spin code
18 * 0x0100 - 0x2fff : pSeries Interrupt prologs
19 * 0x3000 - 0x5fff : interrupt support, iSeries and common interrupt prologs
20 * 0x6000 - 0x6fff : Initial (CPU0) segment table
21 * 0x7000 - 0x7fff : FWNMI data area
22 * 0x8000 - : Early init and support code
23 */
24
25
26/*
27 * SPRG Usage
28 *
29 * Register Definition
30 *
31 * SPRG0 reserved for hypervisor
32 * SPRG1 temp - used to save gpr
33 * SPRG2 temp - used to save gpr
34 * SPRG3 virt addr of paca
35 */
36
37/*
38 * This is the start of the interrupt handlers for pSeries
39 * This code runs with relocation off.
40 * Code from here to __end_interrupts gets copied down to real
41 * address 0x100 when we are running a relocatable kernel.
42 * Therefore any relative branches in this section must only
43 * branch to labels in this section.
44 */
45 . = 0x100
46 .globl __start_interrupts
47__start_interrupts:
48
49 STD_EXCEPTION_PSERIES(0x100, system_reset)
50
51 . = 0x200
52_machine_check_pSeries:
53 HMT_MEDIUM
54 mtspr SPRN_SPRG1,r13 /* save r13 */
55 EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common)
56
57 . = 0x300
58 .globl data_access_pSeries
59data_access_pSeries:
60 HMT_MEDIUM
61 mtspr SPRN_SPRG1,r13
62BEGIN_FTR_SECTION
63 mtspr SPRN_SPRG2,r12
64 mfspr r13,SPRN_DAR
65 mfspr r12,SPRN_DSISR
66 srdi r13,r13,60
67 rlwimi r13,r12,16,0x20
68 mfcr r12
69 cmpwi r13,0x2c
70 beq do_stab_bolted_pSeries
71 mtcrf 0x80,r12
72 mfspr r12,SPRN_SPRG2
73END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
74 EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common)
75
76 . = 0x380
77 .globl data_access_slb_pSeries
78data_access_slb_pSeries:
79 HMT_MEDIUM
80 mtspr SPRN_SPRG1,r13
81 mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
82 std r3,PACA_EXSLB+EX_R3(r13)
83 mfspr r3,SPRN_DAR
84 std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */
85 mfcr r9
86#ifdef __DISABLED__
87 /* Keep that around for when we re-implement dynamic VSIDs */
88 cmpdi r3,0
89 bge slb_miss_user_pseries
90#endif /* __DISABLED__ */
91 std r10,PACA_EXSLB+EX_R10(r13)
92 std r11,PACA_EXSLB+EX_R11(r13)
93 std r12,PACA_EXSLB+EX_R12(r13)
94 mfspr r10,SPRN_SPRG1
95 std r10,PACA_EXSLB+EX_R13(r13)
96 mfspr r12,SPRN_SRR1 /* and SRR1 */
97#ifndef CONFIG_RELOCATABLE
98 b .slb_miss_realmode
99#else
100 /*
101 * We can't just use a direct branch to .slb_miss_realmode
102 * because the distance from here to there depends on where
103 * the kernel ends up being put.
104 */
105 mfctr r11
106 ld r10,PACAKBASE(r13)
107 LOAD_HANDLER(r10, .slb_miss_realmode)
108 mtctr r10
109 bctr
110#endif
111
112 STD_EXCEPTION_PSERIES(0x400, instruction_access)
113
114 . = 0x480
115 .globl instruction_access_slb_pSeries
116instruction_access_slb_pSeries:
117 HMT_MEDIUM
118 mtspr SPRN_SPRG1,r13
119 mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
120 std r3,PACA_EXSLB+EX_R3(r13)
121 mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
122 std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */
123 mfcr r9
124#ifdef __DISABLED__
125 /* Keep that around for when we re-implement dynamic VSIDs */
126 cmpdi r3,0
127 bge slb_miss_user_pseries
128#endif /* __DISABLED__ */
129 std r10,PACA_EXSLB+EX_R10(r13)
130 std r11,PACA_EXSLB+EX_R11(r13)
131 std r12,PACA_EXSLB+EX_R12(r13)
132 mfspr r10,SPRN_SPRG1
133 std r10,PACA_EXSLB+EX_R13(r13)
134 mfspr r12,SPRN_SRR1 /* and SRR1 */
135#ifndef CONFIG_RELOCATABLE
136 b .slb_miss_realmode
137#else
138 mfctr r11
139 ld r10,PACAKBASE(r13)
140 LOAD_HANDLER(r10, .slb_miss_realmode)
141 mtctr r10
142 bctr
143#endif
144
145 MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt)
146 STD_EXCEPTION_PSERIES(0x600, alignment)
147 STD_EXCEPTION_PSERIES(0x700, program_check)
148 STD_EXCEPTION_PSERIES(0x800, fp_unavailable)
149 MASKABLE_EXCEPTION_PSERIES(0x900, decrementer)
150 STD_EXCEPTION_PSERIES(0xa00, trap_0a)
151 STD_EXCEPTION_PSERIES(0xb00, trap_0b)
152
153 . = 0xc00
154 .globl system_call_pSeries
155system_call_pSeries:
156 HMT_MEDIUM
157BEGIN_FTR_SECTION
158 cmpdi r0,0x1ebe
159 beq- 1f
160END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
161 mr r9,r13
162 mfspr r13,SPRN_SPRG3
163 mfspr r11,SPRN_SRR0
164 ld r12,PACAKBASE(r13)
165 ld r10,PACAKMSR(r13)
166 LOAD_HANDLER(r12, system_call_entry)
167 mtspr SPRN_SRR0,r12
168 mfspr r12,SPRN_SRR1
169 mtspr SPRN_SRR1,r10
170 rfid
171 b . /* prevent speculative execution */
172
173/* Fast LE/BE switch system call */
1741: mfspr r12,SPRN_SRR1
175 xori r12,r12,MSR_LE
176 mtspr SPRN_SRR1,r12
177 rfid /* return to userspace */
178 b .
179
180 STD_EXCEPTION_PSERIES(0xd00, single_step)
181 STD_EXCEPTION_PSERIES(0xe00, trap_0e)
182
183 /* We need to deal with the Altivec unavailable exception
184 * here which is at 0xf20, thus in the middle of the
185 * prolog code of the PerformanceMonitor one. A little
186 * trickery is thus necessary
187 */
188 . = 0xf00
189 b performance_monitor_pSeries
190
191 . = 0xf20
192 b altivec_unavailable_pSeries
193
194 . = 0xf40
195 b vsx_unavailable_pSeries
196
197#ifdef CONFIG_CBE_RAS
198 HSTD_EXCEPTION_PSERIES(0x1200, cbe_system_error)
199#endif /* CONFIG_CBE_RAS */
200 STD_EXCEPTION_PSERIES(0x1300, instruction_breakpoint)
201#ifdef CONFIG_CBE_RAS
202 HSTD_EXCEPTION_PSERIES(0x1600, cbe_maintenance)
203#endif /* CONFIG_CBE_RAS */
204 STD_EXCEPTION_PSERIES(0x1700, altivec_assist)
205#ifdef CONFIG_CBE_RAS
206 HSTD_EXCEPTION_PSERIES(0x1800, cbe_thermal)
207#endif /* CONFIG_CBE_RAS */
208
209 . = 0x3000
210
211/*** pSeries interrupt support ***/
212
213 /* moved from 0xf00 */
214 STD_EXCEPTION_PSERIES(., performance_monitor)
215 STD_EXCEPTION_PSERIES(., altivec_unavailable)
216 STD_EXCEPTION_PSERIES(., vsx_unavailable)
217
218/*
219 * An interrupt came in while soft-disabled; clear EE in SRR1,
220 * clear paca->hard_enabled and return.
221 */
222masked_interrupt:
223 stb r10,PACAHARDIRQEN(r13)
224 mtcrf 0x80,r9
225 ld r9,PACA_EXGEN+EX_R9(r13)
226 mfspr r10,SPRN_SRR1
227 rldicl r10,r10,48,1 /* clear MSR_EE */
228 rotldi r10,r10,16
229 mtspr SPRN_SRR1,r10
230 ld r10,PACA_EXGEN+EX_R10(r13)
231 mfspr r13,SPRN_SPRG1
232 rfid
233 b .
234
235 .align 7
236do_stab_bolted_pSeries:
237 mtcrf 0x80,r12
238 mfspr r12,SPRN_SPRG2
239 EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted)
240
241#ifdef CONFIG_PPC_PSERIES
242/*
243 * Vectors for the FWNMI option. Share common code.
244 */
245 .globl system_reset_fwnmi
246 .align 7
247system_reset_fwnmi:
248 HMT_MEDIUM
249 mtspr SPRN_SPRG1,r13 /* save r13 */
250 EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common)
251
252 .globl machine_check_fwnmi
253 .align 7
254machine_check_fwnmi:
255 HMT_MEDIUM
256 mtspr SPRN_SPRG1,r13 /* save r13 */
257 EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common)
258
259#endif /* CONFIG_PPC_PSERIES */
260
261#ifdef __DISABLED__
262/*
263 * This is used for when the SLB miss handler has to go virtual,
264 * which doesn't happen for now anymore but will once we re-implement
265 * dynamic VSIDs for shared page tables
266 */
267slb_miss_user_pseries:
268 std r10,PACA_EXGEN+EX_R10(r13)
269 std r11,PACA_EXGEN+EX_R11(r13)
270 std r12,PACA_EXGEN+EX_R12(r13)
271 mfspr r10,SPRG1
272 ld r11,PACA_EXSLB+EX_R9(r13)
273 ld r12,PACA_EXSLB+EX_R3(r13)
274 std r10,PACA_EXGEN+EX_R13(r13)
275 std r11,PACA_EXGEN+EX_R9(r13)
276 std r12,PACA_EXGEN+EX_R3(r13)
277 clrrdi r12,r13,32
278 mfmsr r10
279 mfspr r11,SRR0 /* save SRR0 */
280 ori r12,r12,slb_miss_user_common@l /* virt addr of handler */
281 ori r10,r10,MSR_IR|MSR_DR|MSR_RI
282 mtspr SRR0,r12
283 mfspr r12,SRR1 /* and SRR1 */
284 mtspr SRR1,r10
285 rfid
286 b . /* prevent spec. execution */
287#endif /* __DISABLED__ */
288
289 .align 7
290 .globl __end_interrupts
291__end_interrupts:
292
293/*
294 * Code from here down to __end_handlers is invoked from the
295 * exception prologs above. Because the prologs assemble the
296 * addresses of these handlers using the LOAD_HANDLER macro,
297 * which uses an addi instruction, these handlers must be in
298 * the first 32k of the kernel image.
299 */
300
301/*** Common interrupt handlers ***/
302
303 STD_EXCEPTION_COMMON(0x100, system_reset, .system_reset_exception)
304
305 /*
306 * Machine check is different because we use a different
307 * save area: PACA_EXMC instead of PACA_EXGEN.
308 */
309 .align 7
310 .globl machine_check_common
311machine_check_common:
312 EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC)
313 FINISH_NAP
314 DISABLE_INTS
315 bl .save_nvgprs
316 addi r3,r1,STACK_FRAME_OVERHEAD
317 bl .machine_check_exception
318 b .ret_from_except
319
320 STD_EXCEPTION_COMMON_LITE(0x900, decrementer, .timer_interrupt)
321 STD_EXCEPTION_COMMON(0xa00, trap_0a, .unknown_exception)
322 STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception)
323 STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception)
324 STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception)
325 STD_EXCEPTION_COMMON_IDLE(0xf00, performance_monitor, .performance_monitor_exception)
326 STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, .instruction_breakpoint_exception)
327#ifdef CONFIG_ALTIVEC
328 STD_EXCEPTION_COMMON(0x1700, altivec_assist, .altivec_assist_exception)
329#else
330 STD_EXCEPTION_COMMON(0x1700, altivec_assist, .unknown_exception)
331#endif
332#ifdef CONFIG_CBE_RAS
333 STD_EXCEPTION_COMMON(0x1200, cbe_system_error, .cbe_system_error_exception)
334 STD_EXCEPTION_COMMON(0x1600, cbe_maintenance, .cbe_maintenance_exception)
335 STD_EXCEPTION_COMMON(0x1800, cbe_thermal, .cbe_thermal_exception)
336#endif /* CONFIG_CBE_RAS */
337
338 .align 7
339system_call_entry:
340 b system_call_common
341
342/*
343 * Here we have detected that the kernel stack pointer is bad.
344 * R9 contains the saved CR, r13 points to the paca,
345 * r10 contains the (bad) kernel stack pointer,
346 * r11 and r12 contain the saved SRR0 and SRR1.
347 * We switch to using an emergency stack, save the registers there,
348 * and call kernel_bad_stack(), which panics.
349 */
350bad_stack:
351 ld r1,PACAEMERGSP(r13)
352 subi r1,r1,64+INT_FRAME_SIZE
353 std r9,_CCR(r1)
354 std r10,GPR1(r1)
355 std r11,_NIP(r1)
356 std r12,_MSR(r1)
357 mfspr r11,SPRN_DAR
358 mfspr r12,SPRN_DSISR
359 std r11,_DAR(r1)
360 std r12,_DSISR(r1)
361 mflr r10
362 mfctr r11
363 mfxer r12
364 std r10,_LINK(r1)
365 std r11,_CTR(r1)
366 std r12,_XER(r1)
367 SAVE_GPR(0,r1)
368 SAVE_GPR(2,r1)
369 SAVE_4GPRS(3,r1)
370 SAVE_2GPRS(7,r1)
371 SAVE_10GPRS(12,r1)
372 SAVE_10GPRS(22,r1)
373 lhz r12,PACA_TRAP_SAVE(r13)
374 std r12,_TRAP(r1)
375 addi r11,r1,INT_FRAME_SIZE
376 std r11,0(r1)
377 li r12,0
378 std r12,0(r11)
379 ld r2,PACATOC(r13)
3801: addi r3,r1,STACK_FRAME_OVERHEAD
381 bl .kernel_bad_stack
382 b 1b
383
384/*
385 * Here r13 points to the paca, r9 contains the saved CR,
386 * SRR0 and SRR1 are saved in r11 and r12,
387 * r9 - r13 are saved in paca->exgen.
388 */
389 .align 7
390 .globl data_access_common
391data_access_common:
392 mfspr r10,SPRN_DAR
393 std r10,PACA_EXGEN+EX_DAR(r13)
394 mfspr r10,SPRN_DSISR
395 stw r10,PACA_EXGEN+EX_DSISR(r13)
396 EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN)
397 ld r3,PACA_EXGEN+EX_DAR(r13)
398 lwz r4,PACA_EXGEN+EX_DSISR(r13)
399 li r5,0x300
400 b .do_hash_page /* Try to handle as hpte fault */
401
402 .align 7
403 .globl instruction_access_common
404instruction_access_common:
405 EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN)
406 ld r3,_NIP(r1)
407 andis. r4,r12,0x5820
408 li r5,0x400
409 b .do_hash_page /* Try to handle as hpte fault */
410
411/*
412 * Here is the common SLB miss user that is used when going to virtual
413 * mode for SLB misses, that is currently not used
414 */
415#ifdef __DISABLED__
416 .align 7
417 .globl slb_miss_user_common
418slb_miss_user_common:
419 mflr r10
420 std r3,PACA_EXGEN+EX_DAR(r13)
421 stw r9,PACA_EXGEN+EX_CCR(r13)
422 std r10,PACA_EXGEN+EX_LR(r13)
423 std r11,PACA_EXGEN+EX_SRR0(r13)
424 bl .slb_allocate_user
425
426 ld r10,PACA_EXGEN+EX_LR(r13)
427 ld r3,PACA_EXGEN+EX_R3(r13)
428 lwz r9,PACA_EXGEN+EX_CCR(r13)
429 ld r11,PACA_EXGEN+EX_SRR0(r13)
430 mtlr r10
431 beq- slb_miss_fault
432
433 andi. r10,r12,MSR_RI /* check for unrecoverable exception */
434 beq- unrecov_user_slb
435 mfmsr r10
436
437.machine push
438.machine "power4"
439 mtcrf 0x80,r9
440.machine pop
441
442 clrrdi r10,r10,2 /* clear RI before setting SRR0/1 */
443 mtmsrd r10,1
444
445 mtspr SRR0,r11
446 mtspr SRR1,r12
447
448 ld r9,PACA_EXGEN+EX_R9(r13)
449 ld r10,PACA_EXGEN+EX_R10(r13)
450 ld r11,PACA_EXGEN+EX_R11(r13)
451 ld r12,PACA_EXGEN+EX_R12(r13)
452 ld r13,PACA_EXGEN+EX_R13(r13)
453 rfid
454 b .
455
456slb_miss_fault:
457 EXCEPTION_PROLOG_COMMON(0x380, PACA_EXGEN)
458 ld r4,PACA_EXGEN+EX_DAR(r13)
459 li r5,0
460 std r4,_DAR(r1)
461 std r5,_DSISR(r1)
462 b handle_page_fault
463
464unrecov_user_slb:
465 EXCEPTION_PROLOG_COMMON(0x4200, PACA_EXGEN)
466 DISABLE_INTS
467 bl .save_nvgprs
4681: addi r3,r1,STACK_FRAME_OVERHEAD
469 bl .unrecoverable_exception
470 b 1b
471
472#endif /* __DISABLED__ */
473
474
475/*
476 * r13 points to the PACA, r9 contains the saved CR,
477 * r12 contain the saved SRR1, SRR0 is still ready for return
478 * r3 has the faulting address
479 * r9 - r13 are saved in paca->exslb.
480 * r3 is saved in paca->slb_r3
481 * We assume we aren't going to take any exceptions during this procedure.
482 */
483_GLOBAL(slb_miss_realmode)
484 mflr r10
485#ifdef CONFIG_RELOCATABLE
486 mtctr r11
487#endif
488
489 stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
490 std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
491
492 bl .slb_allocate_realmode
493
494 /* All done -- return from exception. */
495
496 ld r10,PACA_EXSLB+EX_LR(r13)
497 ld r3,PACA_EXSLB+EX_R3(r13)
498 lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
499#ifdef CONFIG_PPC_ISERIES
500BEGIN_FW_FTR_SECTION
501 ld r11,PACALPPACAPTR(r13)
502 ld r11,LPPACASRR0(r11) /* get SRR0 value */
503END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)
504#endif /* CONFIG_PPC_ISERIES */
505
506 mtlr r10
507
508 andi. r10,r12,MSR_RI /* check for unrecoverable exception */
509 beq- 2f
510
511.machine push
512.machine "power4"
513 mtcrf 0x80,r9
514 mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
515.machine pop
516
517#ifdef CONFIG_PPC_ISERIES
518BEGIN_FW_FTR_SECTION
519 mtspr SPRN_SRR0,r11
520 mtspr SPRN_SRR1,r12
521END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)
522#endif /* CONFIG_PPC_ISERIES */
523 ld r9,PACA_EXSLB+EX_R9(r13)
524 ld r10,PACA_EXSLB+EX_R10(r13)
525 ld r11,PACA_EXSLB+EX_R11(r13)
526 ld r12,PACA_EXSLB+EX_R12(r13)
527 ld r13,PACA_EXSLB+EX_R13(r13)
528 rfid
529 b . /* prevent speculative execution */
530
5312:
532#ifdef CONFIG_PPC_ISERIES
533BEGIN_FW_FTR_SECTION
534 b unrecov_slb
535END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)
536#endif /* CONFIG_PPC_ISERIES */
537 mfspr r11,SPRN_SRR0
538 ld r10,PACAKBASE(r13)
539 LOAD_HANDLER(r10,unrecov_slb)
540 mtspr SPRN_SRR0,r10
541 ld r10,PACAKMSR(r13)
542 mtspr SPRN_SRR1,r10
543 rfid
544 b .
545
546unrecov_slb:
547 EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
548 DISABLE_INTS
549 bl .save_nvgprs
5501: addi r3,r1,STACK_FRAME_OVERHEAD
551 bl .unrecoverable_exception
552 b 1b
553
554 .align 7
555 .globl hardware_interrupt_common
556 .globl hardware_interrupt_entry
557hardware_interrupt_common:
558 EXCEPTION_PROLOG_COMMON(0x500, PACA_EXGEN)
559 FINISH_NAP
560hardware_interrupt_entry:
561 DISABLE_INTS
562BEGIN_FTR_SECTION
563 bl .ppc64_runlatch_on
564END_FTR_SECTION_IFSET(CPU_FTR_CTRL)
565 addi r3,r1,STACK_FRAME_OVERHEAD
566 bl .do_IRQ
567 b .ret_from_except_lite
568
569#ifdef CONFIG_PPC_970_NAP
570power4_fixup_nap:
571 andc r9,r9,r10
572 std r9,TI_LOCAL_FLAGS(r11)
573 ld r10,_LINK(r1) /* make idle task do the */
574 std r10,_NIP(r1) /* equivalent of a blr */
575 blr
576#endif
577
578 .align 7
579 .globl alignment_common
580alignment_common:
581 mfspr r10,SPRN_DAR
582 std r10,PACA_EXGEN+EX_DAR(r13)
583 mfspr r10,SPRN_DSISR
584 stw r10,PACA_EXGEN+EX_DSISR(r13)
585 EXCEPTION_PROLOG_COMMON(0x600, PACA_EXGEN)
586 ld r3,PACA_EXGEN+EX_DAR(r13)
587 lwz r4,PACA_EXGEN+EX_DSISR(r13)
588 std r3,_DAR(r1)
589 std r4,_DSISR(r1)
590 bl .save_nvgprs
591 addi r3,r1,STACK_FRAME_OVERHEAD
592 ENABLE_INTS
593 bl .alignment_exception
594 b .ret_from_except
595
596 .align 7
597 .globl program_check_common
598program_check_common:
599 EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN)
600 bl .save_nvgprs
601 addi r3,r1,STACK_FRAME_OVERHEAD
602 ENABLE_INTS
603 bl .program_check_exception
604 b .ret_from_except
605
606 .align 7
607 .globl fp_unavailable_common
608fp_unavailable_common:
609 EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN)
610 bne 1f /* if from user, just load it up */
611 bl .save_nvgprs
612 addi r3,r1,STACK_FRAME_OVERHEAD
613 ENABLE_INTS
614 bl .kernel_fp_unavailable_exception
615 BUG_OPCODE
6161: bl .load_up_fpu
617 b fast_exception_return
618
619 .align 7
620 .globl altivec_unavailable_common
621altivec_unavailable_common:
622 EXCEPTION_PROLOG_COMMON(0xf20, PACA_EXGEN)
623#ifdef CONFIG_ALTIVEC
624BEGIN_FTR_SECTION
625 beq 1f
626 bl .load_up_altivec
627 b fast_exception_return
6281:
629END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
630#endif
631 bl .save_nvgprs
632 addi r3,r1,STACK_FRAME_OVERHEAD
633 ENABLE_INTS
634 bl .altivec_unavailable_exception
635 b .ret_from_except
636
637 .align 7
638 .globl vsx_unavailable_common
639vsx_unavailable_common:
640 EXCEPTION_PROLOG_COMMON(0xf40, PACA_EXGEN)
641#ifdef CONFIG_VSX
642BEGIN_FTR_SECTION
643 bne .load_up_vsx
6441:
645END_FTR_SECTION_IFSET(CPU_FTR_VSX)
646#endif
647 bl .save_nvgprs
648 addi r3,r1,STACK_FRAME_OVERHEAD
649 ENABLE_INTS
650 bl .vsx_unavailable_exception
651 b .ret_from_except
652
653 .align 7
654 .globl __end_handlers
655__end_handlers:
656
657/*
658 * Return from an exception with minimal checks.
659 * The caller is assumed to have done EXCEPTION_PROLOG_COMMON.
660 * If interrupts have been enabled, or anything has been
661 * done that might have changed the scheduling status of
662 * any task or sent any task a signal, you should use
663 * ret_from_except or ret_from_except_lite instead of this.
664 */
665fast_exc_return_irq: /* restores irq state too */
666 ld r3,SOFTE(r1)
667 TRACE_AND_RESTORE_IRQ(r3);
668 ld r12,_MSR(r1)
669 rldicl r4,r12,49,63 /* get MSR_EE to LSB */
670 stb r4,PACAHARDIRQEN(r13) /* restore paca->hard_enabled */
671 b 1f
672
673 .globl fast_exception_return
674fast_exception_return:
675 ld r12,_MSR(r1)
6761: ld r11,_NIP(r1)
677 andi. r3,r12,MSR_RI /* check if RI is set */
678 beq- unrecov_fer
679
680#ifdef CONFIG_VIRT_CPU_ACCOUNTING
681 andi. r3,r12,MSR_PR
682 beq 2f
683 ACCOUNT_CPU_USER_EXIT(r3, r4)
6842:
685#endif
686
687 ld r3,_CCR(r1)
688 ld r4,_LINK(r1)
689 ld r5,_CTR(r1)
690 ld r6,_XER(r1)
691 mtcr r3
692 mtlr r4
693 mtctr r5
694 mtxer r6
695 REST_GPR(0, r1)
696 REST_8GPRS(2, r1)
697
698 mfmsr r10
699 rldicl r10,r10,48,1 /* clear EE */
700 rldicr r10,r10,16,61 /* clear RI (LE is 0 already) */
701 mtmsrd r10,1
702
703 mtspr SPRN_SRR1,r12
704 mtspr SPRN_SRR0,r11
705 REST_4GPRS(10, r1)
706 ld r1,GPR1(r1)
707 rfid
708 b . /* prevent speculative execution */
709
710unrecov_fer:
711 bl .save_nvgprs
7121: addi r3,r1,STACK_FRAME_OVERHEAD
713 bl .unrecoverable_exception
714 b 1b
715
716
717/*
718 * Hash table stuff
719 */
720 .align 7
721_STATIC(do_hash_page)
722 std r3,_DAR(r1)
723 std r4,_DSISR(r1)
724
725 andis. r0,r4,0xa450 /* weird error? */
726 bne- handle_page_fault /* if not, try to insert a HPTE */
727BEGIN_FTR_SECTION
728 andis. r0,r4,0x0020 /* Is it a segment table fault? */
729 bne- do_ste_alloc /* If so handle it */
730END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
731
732 /*
733 * On iSeries, we soft-disable interrupts here, then
734 * hard-enable interrupts so that the hash_page code can spin on
735 * the hash_table_lock without problems on a shared processor.
736 */
737 DISABLE_INTS
738
739 /*
740 * Currently, trace_hardirqs_off() will be called by DISABLE_INTS
741 * and will clobber volatile registers when irq tracing is enabled
742 * so we need to reload them. It may be possible to be smarter here
743 * and move the irq tracing elsewhere but let's keep it simple for
744 * now
745 */
746#ifdef CONFIG_TRACE_IRQFLAGS
747 ld r3,_DAR(r1)
748 ld r4,_DSISR(r1)
749 ld r5,_TRAP(r1)
750 ld r12,_MSR(r1)
751 clrrdi r5,r5,4
752#endif /* CONFIG_TRACE_IRQFLAGS */
753 /*
754 * We need to set the _PAGE_USER bit if MSR_PR is set or if we are
755 * accessing a userspace segment (even from the kernel). We assume
756 * kernel addresses always have the high bit set.
757 */
758 rlwinm r4,r4,32-25+9,31-9,31-9 /* DSISR_STORE -> _PAGE_RW */
759 rotldi r0,r3,15 /* Move high bit into MSR_PR posn */
760 orc r0,r12,r0 /* MSR_PR | ~high_bit */
761 rlwimi r4,r0,32-13,30,30 /* becomes _PAGE_USER access bit */
762 ori r4,r4,1 /* add _PAGE_PRESENT */
763 rlwimi r4,r5,22+2,31-2,31-2 /* Set _PAGE_EXEC if trap is 0x400 */
764
765 /*
766 * r3 contains the faulting address
767 * r4 contains the required access permissions
768 * r5 contains the trap number
769 *
770 * at return r3 = 0 for success
771 */
772 bl .hash_page /* build HPTE if possible */
773 cmpdi r3,0 /* see if hash_page succeeded */
774
775BEGIN_FW_FTR_SECTION
776 /*
777 * If we had interrupts soft-enabled at the point where the
778 * DSI/ISI occurred, and an interrupt came in during hash_page,
779 * handle it now.
780 * We jump to ret_from_except_lite rather than fast_exception_return
781 * because ret_from_except_lite will check for and handle pending
782 * interrupts if necessary.
783 */
784 beq 13f
785END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)
786
787BEGIN_FW_FTR_SECTION
788 /*
789 * Here we have interrupts hard-disabled, so it is sufficient
790 * to restore paca->{soft,hard}_enable and get out.
791 */
792 beq fast_exc_return_irq /* Return from exception on success */
793END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISERIES)
794
795 /* For a hash failure, we don't bother re-enabling interrupts */
796 ble- 12f
797
798 /*
799 * hash_page couldn't handle it, set soft interrupt enable back
800 * to what it was before the trap. Note that .raw_local_irq_restore
801 * handles any interrupts pending at this point.
802 */
803 ld r3,SOFTE(r1)
804 TRACE_AND_RESTORE_IRQ_PARTIAL(r3, 11f)
805 bl .raw_local_irq_restore
806 b 11f
807
808/* Here we have a page fault that hash_page can't handle. */
809handle_page_fault:
810 ENABLE_INTS
81111: ld r4,_DAR(r1)
812 ld r5,_DSISR(r1)
813 addi r3,r1,STACK_FRAME_OVERHEAD
814 bl .do_page_fault
815 cmpdi r3,0
816 beq+ 13f
817 bl .save_nvgprs
818 mr r5,r3
819 addi r3,r1,STACK_FRAME_OVERHEAD
820 lwz r4,_DAR(r1)
821 bl .bad_page_fault
822 b .ret_from_except
823
82413: b .ret_from_except_lite
825
826/* We have a page fault that hash_page could handle but HV refused
827 * the PTE insertion
828 */
82912: bl .save_nvgprs
830 mr r5,r3
831 addi r3,r1,STACK_FRAME_OVERHEAD
832 ld r4,_DAR(r1)
833 bl .low_hash_fault
834 b .ret_from_except
835
836 /* here we have a segment miss */
837do_ste_alloc:
838 bl .ste_allocate /* try to insert stab entry */
839 cmpdi r3,0
840 bne- handle_page_fault
841 b fast_exception_return
842
843/*
844 * r13 points to the PACA, r9 contains the saved CR,
845 * r11 and r12 contain the saved SRR0 and SRR1.
846 * r9 - r13 are saved in paca->exslb.
847 * We assume we aren't going to take any exceptions during this procedure.
848 * We assume (DAR >> 60) == 0xc.
849 */
850 .align 7
851_GLOBAL(do_stab_bolted)
852 stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
853 std r11,PACA_EXSLB+EX_SRR0(r13) /* save SRR0 in exc. frame */
854
855 /* Hash to the primary group */
856 ld r10,PACASTABVIRT(r13)
857 mfspr r11,SPRN_DAR
858 srdi r11,r11,28
859 rldimi r10,r11,7,52 /* r10 = first ste of the group */
860
861 /* Calculate VSID */
862 /* This is a kernel address, so protovsid = ESID */
863 ASM_VSID_SCRAMBLE(r11, r9, 256M)
864 rldic r9,r11,12,16 /* r9 = vsid << 12 */
865
866 /* Search the primary group for a free entry */
8671: ld r11,0(r10) /* Test valid bit of the current ste */
868 andi. r11,r11,0x80
869 beq 2f
870 addi r10,r10,16
871 andi. r11,r10,0x70
872 bne 1b
873
874 /* Stick for only searching the primary group for now. */
875 /* At least for now, we use a very simple random castout scheme */
876 /* Use the TB as a random number ; OR in 1 to avoid entry 0 */
877 mftb r11
878 rldic r11,r11,4,57 /* r11 = (r11 << 4) & 0x70 */
879 ori r11,r11,0x10
880
881 /* r10 currently points to an ste one past the group of interest */
882 /* make it point to the randomly selected entry */
883 subi r10,r10,128
884 or r10,r10,r11 /* r10 is the entry to invalidate */
885
886 isync /* mark the entry invalid */
887 ld r11,0(r10)
888 rldicl r11,r11,56,1 /* clear the valid bit */
889 rotldi r11,r11,8
890 std r11,0(r10)
891 sync
892
893 clrrdi r11,r11,28 /* Get the esid part of the ste */
894 slbie r11
895
8962: std r9,8(r10) /* Store the vsid part of the ste */
897 eieio
898
899 mfspr r11,SPRN_DAR /* Get the new esid */
900 clrrdi r11,r11,28 /* Permits a full 32b of ESID */
901 ori r11,r11,0x90 /* Turn on valid and kp */
902 std r11,0(r10) /* Put new entry back into the stab */
903
904 sync
905
906 /* All done -- return from exception. */
907 lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
908 ld r11,PACA_EXSLB+EX_SRR0(r13) /* get saved SRR0 */
909
910 andi. r10,r12,MSR_RI
911 beq- unrecov_slb
912
913 mtcrf 0x80,r9 /* restore CR */
914
915 mfmsr r10
916 clrrdi r10,r10,2
917 mtmsrd r10,1
918
919 mtspr SPRN_SRR0,r11
920 mtspr SPRN_SRR1,r12
921 ld r9,PACA_EXSLB+EX_R9(r13)
922 ld r10,PACA_EXSLB+EX_R10(r13)
923 ld r11,PACA_EXSLB+EX_R11(r13)
924 ld r12,PACA_EXSLB+EX_R12(r13)
925 ld r13,PACA_EXSLB+EX_R13(r13)
926 rfid
927 b . /* prevent speculative execution */
928
929/*
930 * Space for CPU0's segment table.
931 *
932 * On iSeries, the hypervisor must fill in at least one entry before
933 * we get control (with relocate on). The address is given to the hv
934 * as a page number (see xLparMap below), so this must be at a
935 * fixed address (the linker can't compute (u64)&initial_stab >>
936 * PAGE_SHIFT).
937 */
938 . = STAB0_OFFSET /* 0x6000 */
939 .globl initial_stab
940initial_stab:
941 .space 4096
942
943#ifdef CONFIG_PPC_PSERIES
944/*
945 * Data area reserved for FWNMI option.
946 * This address (0x7000) is fixed by the RPA.
947 */
948 .= 0x7000
949 .globl fwnmi_data_area
950fwnmi_data_area:
951#endif /* CONFIG_PPC_PSERIES */
952
953 /* iSeries does not use the FWNMI stuff, so it is safe to put
954 * this here, even if we later allow kernels that will boot on
955 * both pSeries and iSeries */
956#ifdef CONFIG_PPC_ISERIES
957 . = LPARMAP_PHYS
958 .globl xLparMap
959xLparMap:
960 .quad HvEsidsToMap /* xNumberEsids */
961 .quad HvRangesToMap /* xNumberRanges */
962 .quad STAB0_PAGE /* xSegmentTableOffs */
963 .zero 40 /* xRsvd */
964 /* xEsids (HvEsidsToMap entries of 2 quads) */
965 .quad PAGE_OFFSET_ESID /* xKernelEsid */
966 .quad PAGE_OFFSET_VSID /* xKernelVsid */
967 .quad VMALLOC_START_ESID /* xKernelEsid */
968 .quad VMALLOC_START_VSID /* xKernelVsid */
969 /* xRanges (HvRangesToMap entries of 3 quads) */
970 .quad HvPagesToMap /* xPages */
971 .quad 0 /* xOffset */
972 .quad PAGE_OFFSET_VSID << (SID_SHIFT - HW_PAGE_SHIFT) /* xVPN */
973
974#endif /* CONFIG_PPC_ISERIES */
975
976#ifdef CONFIG_PPC_PSERIES
977 . = 0x8000
978#endif /* CONFIG_PPC_PSERIES */
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index 2d182f119d1d..ce1f3e44c24f 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -23,25 +23,14 @@
23#include <asm/code-patching.h> 23#include <asm/code-patching.h>
24#include <asm/ftrace.h> 24#include <asm/ftrace.h>
25 25
26#ifdef CONFIG_PPC32
27# define GET_ADDR(addr) addr
28#else
29/* PowerPC64's functions are data that points to the functions */
30# define GET_ADDR(addr) (*(unsigned long *)addr)
31#endif
32 26
33#ifdef CONFIG_DYNAMIC_FTRACE 27#ifdef CONFIG_DYNAMIC_FTRACE
34static unsigned int ftrace_nop_replace(void)
35{
36 return PPC_INST_NOP;
37}
38
39static unsigned int 28static unsigned int
40ftrace_call_replace(unsigned long ip, unsigned long addr, int link) 29ftrace_call_replace(unsigned long ip, unsigned long addr, int link)
41{ 30{
42 unsigned int op; 31 unsigned int op;
43 32
44 addr = GET_ADDR(addr); 33 addr = ppc_function_entry((void *)addr);
45 34
46 /* if (link) set op to 'bl' else 'b' */ 35 /* if (link) set op to 'bl' else 'b' */
47 op = create_branch((unsigned int *)ip, addr, link ? 1 : 0); 36 op = create_branch((unsigned int *)ip, addr, link ? 1 : 0);
@@ -49,14 +38,6 @@ ftrace_call_replace(unsigned long ip, unsigned long addr, int link)
49 return op; 38 return op;
50} 39}
51 40
52#ifdef CONFIG_PPC64
53# define _ASM_ALIGN " .align 3 "
54# define _ASM_PTR " .llong "
55#else
56# define _ASM_ALIGN " .align 2 "
57# define _ASM_PTR " .long "
58#endif
59
60static int 41static int
61ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new) 42ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new)
62{ 43{
@@ -197,7 +178,7 @@ __ftrace_make_nop(struct module *mod,
197 ptr = ((unsigned long)jmp[0] << 32) + jmp[1]; 178 ptr = ((unsigned long)jmp[0] << 32) + jmp[1];
198 179
199 /* This should match what was called */ 180 /* This should match what was called */
200 if (ptr != GET_ADDR(addr)) { 181 if (ptr != ppc_function_entry((void *)addr)) {
201 printk(KERN_ERR "addr does not match %lx\n", ptr); 182 printk(KERN_ERR "addr does not match %lx\n", ptr);
202 return -EINVAL; 183 return -EINVAL;
203 } 184 }
@@ -328,7 +309,7 @@ int ftrace_make_nop(struct module *mod,
328 if (test_24bit_addr(ip, addr)) { 309 if (test_24bit_addr(ip, addr)) {
329 /* within range */ 310 /* within range */
330 old = ftrace_call_replace(ip, addr, 1); 311 old = ftrace_call_replace(ip, addr, 1);
331 new = ftrace_nop_replace(); 312 new = PPC_INST_NOP;
332 return ftrace_modify_code(ip, old, new); 313 return ftrace_modify_code(ip, old, new);
333 } 314 }
334 315
@@ -466,7 +447,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
466 */ 447 */
467 if (test_24bit_addr(ip, addr)) { 448 if (test_24bit_addr(ip, addr)) {
468 /* within range */ 449 /* within range */
469 old = ftrace_nop_replace(); 450 old = PPC_INST_NOP;
470 new = ftrace_call_replace(ip, addr, 1); 451 new = ftrace_call_replace(ip, addr, 1);
471 return ftrace_modify_code(ip, old, new); 452 return ftrace_modify_code(ip, old, new);
472 } 453 }
@@ -570,7 +551,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
570 return_hooker = (unsigned long)&mod_return_to_handler; 551 return_hooker = (unsigned long)&mod_return_to_handler;
571#endif 552#endif
572 553
573 return_hooker = GET_ADDR(return_hooker); 554 return_hooker = ppc_function_entry((void *)return_hooker);
574 555
575 /* 556 /*
576 * Protect against fault, even if it shouldn't 557 * Protect against fault, even if it shouldn't
@@ -605,7 +586,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
605 return; 586 return;
606 } 587 }
607 588
608 if (ftrace_push_return_trace(old, self_addr, &trace.depth) == -EBUSY) { 589 if (ftrace_push_return_trace(old, self_addr, &trace.depth, 0) == -EBUSY) {
609 *parent = old; 590 *parent = old;
610 return; 591 return;
611 } 592 }
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index c01467f952d3..48469463f89e 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -733,9 +733,11 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
733AltiVecUnavailable: 733AltiVecUnavailable:
734 EXCEPTION_PROLOG 734 EXCEPTION_PROLOG
735#ifdef CONFIG_ALTIVEC 735#ifdef CONFIG_ALTIVEC
736 bne load_up_altivec /* if from user, just load it up */ 736 beq 1f
737 bl load_up_altivec /* if from user, just load it up */
738 b fast_exception_return
737#endif /* CONFIG_ALTIVEC */ 739#endif /* CONFIG_ALTIVEC */
738 addi r3,r1,STACK_FRAME_OVERHEAD 7401: addi r3,r1,STACK_FRAME_OVERHEAD
739 EXC_XFER_EE_LITE(0xf20, altivec_unavailable_exception) 741 EXC_XFER_EE_LITE(0xf20, altivec_unavailable_exception)
740 742
741PerformanceMonitor: 743PerformanceMonitor:
@@ -743,101 +745,6 @@ PerformanceMonitor:
743 addi r3,r1,STACK_FRAME_OVERHEAD 745 addi r3,r1,STACK_FRAME_OVERHEAD
744 EXC_XFER_STD(0xf00, performance_monitor_exception) 746 EXC_XFER_STD(0xf00, performance_monitor_exception)
745 747
746#ifdef CONFIG_ALTIVEC
747/* Note that the AltiVec support is closely modeled after the FP
748 * support. Changes to one are likely to be applicable to the
749 * other! */
750load_up_altivec:
751/*
752 * Disable AltiVec for the task which had AltiVec previously,
753 * and save its AltiVec registers in its thread_struct.
754 * Enables AltiVec for use in the kernel on return.
755 * On SMP we know the AltiVec units are free, since we give it up every
756 * switch. -- Kumar
757 */
758 mfmsr r5
759 oris r5,r5,MSR_VEC@h
760 MTMSRD(r5) /* enable use of AltiVec now */
761 isync
762/*
763 * For SMP, we don't do lazy AltiVec switching because it just gets too
764 * horrendously complex, especially when a task switches from one CPU
765 * to another. Instead we call giveup_altivec in switch_to.
766 */
767#ifndef CONFIG_SMP
768 tophys(r6,0)
769 addis r3,r6,last_task_used_altivec@ha
770 lwz r4,last_task_used_altivec@l(r3)
771 cmpwi 0,r4,0
772 beq 1f
773 add r4,r4,r6
774 addi r4,r4,THREAD /* want THREAD of last_task_used_altivec */
775 SAVE_32VRS(0,r10,r4)
776 mfvscr vr0
777 li r10,THREAD_VSCR
778 stvx vr0,r10,r4
779 lwz r5,PT_REGS(r4)
780 add r5,r5,r6
781 lwz r4,_MSR-STACK_FRAME_OVERHEAD(r5)
782 lis r10,MSR_VEC@h
783 andc r4,r4,r10 /* disable altivec for previous task */
784 stw r4,_MSR-STACK_FRAME_OVERHEAD(r5)
7851:
786#endif /* CONFIG_SMP */
787 /* enable use of AltiVec after return */
788 oris r9,r9,MSR_VEC@h
789 mfspr r5,SPRN_SPRG3 /* current task's THREAD (phys) */
790 li r4,1
791 li r10,THREAD_VSCR
792 stw r4,THREAD_USED_VR(r5)
793 lvx vr0,r10,r5
794 mtvscr vr0
795 REST_32VRS(0,r10,r5)
796#ifndef CONFIG_SMP
797 subi r4,r5,THREAD
798 sub r4,r4,r6
799 stw r4,last_task_used_altivec@l(r3)
800#endif /* CONFIG_SMP */
801 /* restore registers and return */
802 /* we haven't used ctr or xer or lr */
803 b fast_exception_return
804
805/*
806 * giveup_altivec(tsk)
807 * Disable AltiVec for the task given as the argument,
808 * and save the AltiVec registers in its thread_struct.
809 * Enables AltiVec for use in the kernel on return.
810 */
811
812 .globl giveup_altivec
813giveup_altivec:
814 mfmsr r5
815 oris r5,r5,MSR_VEC@h
816 SYNC
817 MTMSRD(r5) /* enable use of AltiVec now */
818 isync
819 cmpwi 0,r3,0
820 beqlr- /* if no previous owner, done */
821 addi r3,r3,THREAD /* want THREAD of task */
822 lwz r5,PT_REGS(r3)
823 cmpwi 0,r5,0
824 SAVE_32VRS(0, r4, r3)
825 mfvscr vr0
826 li r4,THREAD_VSCR
827 stvx vr0,r4,r3
828 beq 1f
829 lwz r4,_MSR-STACK_FRAME_OVERHEAD(r5)
830 lis r3,MSR_VEC@h
831 andc r4,r4,r3 /* disable AltiVec for previous task */
832 stw r4,_MSR-STACK_FRAME_OVERHEAD(r5)
8331:
834#ifndef CONFIG_SMP
835 li r5,0
836 lis r4,last_task_used_altivec@ha
837 stw r5,last_task_used_altivec@l(r4)
838#endif /* CONFIG_SMP */
839 blr
840#endif /* CONFIG_ALTIVEC */
841 748
842/* 749/*
843 * This code is jumped to from the startup code to copy 750 * This code is jumped to from the startup code to copy
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 50ef505b8fb6..012505ebd9f9 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -12,8 +12,9 @@
12 * Adapted for 64bit PowerPC by Dave Engebretsen, Peter Bergner, and 12 * Adapted for 64bit PowerPC by Dave Engebretsen, Peter Bergner, and
13 * Mike Corrigan {engebret|bergner|mikejc}@us.ibm.com 13 * Mike Corrigan {engebret|bergner|mikejc}@us.ibm.com
14 * 14 *
15 * This file contains the low-level support and setup for the 15 * This file contains the entry point for the 64-bit kernel along
16 * PowerPC-64 platform, including trap and interrupt dispatch. 16 * with some early initialization code common to all 64-bit powerpc
17 * variants.
17 * 18 *
18 * This program is free software; you can redistribute it and/or 19 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License 20 * modify it under the terms of the GNU General Public License
@@ -38,36 +39,25 @@
38#include <asm/exception.h> 39#include <asm/exception.h>
39#include <asm/irqflags.h> 40#include <asm/irqflags.h>
40 41
41/* 42/* The physical memory is layed out such that the secondary processor
42 * We layout physical memory as follows: 43 * spin code sits at 0x0000...0x00ff. On server, the vectors follow
43 * 0x0000 - 0x00ff : Secondary processor spin code 44 * using the layout described in exceptions-64s.S
44 * 0x0100 - 0x2fff : pSeries Interrupt prologs
45 * 0x3000 - 0x5fff : interrupt support, iSeries and common interrupt prologs
46 * 0x6000 - 0x6fff : Initial (CPU0) segment table
47 * 0x7000 - 0x7fff : FWNMI data area
48 * 0x8000 - : Early init and support code
49 */
50
51/*
52 * SPRG Usage
53 *
54 * Register Definition
55 *
56 * SPRG0 reserved for hypervisor
57 * SPRG1 temp - used to save gpr
58 * SPRG2 temp - used to save gpr
59 * SPRG3 virt addr of paca
60 */ 45 */
61 46
62/* 47/*
63 * Entering into this code we make the following assumptions: 48 * Entering into this code we make the following assumptions:
64 * For pSeries: 49 *
50 * For pSeries or server processors:
65 * 1. The MMU is off & open firmware is running in real mode. 51 * 1. The MMU is off & open firmware is running in real mode.
66 * 2. The kernel is entered at __start 52 * 2. The kernel is entered at __start
67 * 53 *
68 * For iSeries: 54 * For iSeries:
69 * 1. The MMU is on (as it always is for iSeries) 55 * 1. The MMU is on (as it always is for iSeries)
70 * 2. The kernel is entered at system_reset_iSeries 56 * 2. The kernel is entered at system_reset_iSeries
57 *
58 * For Book3E processors:
59 * 1. The MMU is on running in AS0 in a state defined in ePAPR
60 * 2. The kernel is entered at __start
71 */ 61 */
72 62
73 .text 63 .text
@@ -166,1065 +156,14 @@ exception_marker:
166 .text 156 .text
167 157
168/* 158/*
169 * This is the start of the interrupt handlers for pSeries 159 * On server, we include the exception vectors code here as it
170 * This code runs with relocation off. 160 * relies on absolute addressing which is only possible within
171 * Code from here to __end_interrupts gets copied down to real 161 * this compilation unit
172 * address 0x100 when we are running a relocatable kernel.
173 * Therefore any relative branches in this section must only
174 * branch to labels in this section.
175 */
176 . = 0x100
177 .globl __start_interrupts
178__start_interrupts:
179
180 STD_EXCEPTION_PSERIES(0x100, system_reset)
181
182 . = 0x200
183_machine_check_pSeries:
184 HMT_MEDIUM
185 mtspr SPRN_SPRG1,r13 /* save r13 */
186 EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common)
187
188 . = 0x300
189 .globl data_access_pSeries
190data_access_pSeries:
191 HMT_MEDIUM
192 mtspr SPRN_SPRG1,r13
193BEGIN_FTR_SECTION
194 mtspr SPRN_SPRG2,r12
195 mfspr r13,SPRN_DAR
196 mfspr r12,SPRN_DSISR
197 srdi r13,r13,60
198 rlwimi r13,r12,16,0x20
199 mfcr r12
200 cmpwi r13,0x2c
201 beq do_stab_bolted_pSeries
202 mtcrf 0x80,r12
203 mfspr r12,SPRN_SPRG2
204END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
205 EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common)
206
207 . = 0x380
208 .globl data_access_slb_pSeries
209data_access_slb_pSeries:
210 HMT_MEDIUM
211 mtspr SPRN_SPRG1,r13
212 mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
213 std r3,PACA_EXSLB+EX_R3(r13)
214 mfspr r3,SPRN_DAR
215 std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */
216 mfcr r9
217#ifdef __DISABLED__
218 /* Keep that around for when we re-implement dynamic VSIDs */
219 cmpdi r3,0
220 bge slb_miss_user_pseries
221#endif /* __DISABLED__ */
222 std r10,PACA_EXSLB+EX_R10(r13)
223 std r11,PACA_EXSLB+EX_R11(r13)
224 std r12,PACA_EXSLB+EX_R12(r13)
225 mfspr r10,SPRN_SPRG1
226 std r10,PACA_EXSLB+EX_R13(r13)
227 mfspr r12,SPRN_SRR1 /* and SRR1 */
228#ifndef CONFIG_RELOCATABLE
229 b .slb_miss_realmode
230#else
231 /*
232 * We can't just use a direct branch to .slb_miss_realmode
233 * because the distance from here to there depends on where
234 * the kernel ends up being put.
235 */
236 mfctr r11
237 ld r10,PACAKBASE(r13)
238 LOAD_HANDLER(r10, .slb_miss_realmode)
239 mtctr r10
240 bctr
241#endif
242
243 STD_EXCEPTION_PSERIES(0x400, instruction_access)
244
245 . = 0x480
246 .globl instruction_access_slb_pSeries
247instruction_access_slb_pSeries:
248 HMT_MEDIUM
249 mtspr SPRN_SPRG1,r13
250 mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
251 std r3,PACA_EXSLB+EX_R3(r13)
252 mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
253 std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */
254 mfcr r9
255#ifdef __DISABLED__
256 /* Keep that around for when we re-implement dynamic VSIDs */
257 cmpdi r3,0
258 bge slb_miss_user_pseries
259#endif /* __DISABLED__ */
260 std r10,PACA_EXSLB+EX_R10(r13)
261 std r11,PACA_EXSLB+EX_R11(r13)
262 std r12,PACA_EXSLB+EX_R12(r13)
263 mfspr r10,SPRN_SPRG1
264 std r10,PACA_EXSLB+EX_R13(r13)
265 mfspr r12,SPRN_SRR1 /* and SRR1 */
266#ifndef CONFIG_RELOCATABLE
267 b .slb_miss_realmode
268#else
269 mfctr r11
270 ld r10,PACAKBASE(r13)
271 LOAD_HANDLER(r10, .slb_miss_realmode)
272 mtctr r10
273 bctr
274#endif
275
276 MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt)
277 STD_EXCEPTION_PSERIES(0x600, alignment)
278 STD_EXCEPTION_PSERIES(0x700, program_check)
279 STD_EXCEPTION_PSERIES(0x800, fp_unavailable)
280 MASKABLE_EXCEPTION_PSERIES(0x900, decrementer)
281 STD_EXCEPTION_PSERIES(0xa00, trap_0a)
282 STD_EXCEPTION_PSERIES(0xb00, trap_0b)
283
284 . = 0xc00
285 .globl system_call_pSeries
286system_call_pSeries:
287 HMT_MEDIUM
288BEGIN_FTR_SECTION
289 cmpdi r0,0x1ebe
290 beq- 1f
291END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
292 mr r9,r13
293 mfspr r13,SPRN_SPRG3
294 mfspr r11,SPRN_SRR0
295 ld r12,PACAKBASE(r13)
296 ld r10,PACAKMSR(r13)
297 LOAD_HANDLER(r12, system_call_entry)
298 mtspr SPRN_SRR0,r12
299 mfspr r12,SPRN_SRR1
300 mtspr SPRN_SRR1,r10
301 rfid
302 b . /* prevent speculative execution */
303
304/* Fast LE/BE switch system call */
3051: mfspr r12,SPRN_SRR1
306 xori r12,r12,MSR_LE
307 mtspr SPRN_SRR1,r12
308 rfid /* return to userspace */
309 b .
310
311 STD_EXCEPTION_PSERIES(0xd00, single_step)
312 STD_EXCEPTION_PSERIES(0xe00, trap_0e)
313
314 /* We need to deal with the Altivec unavailable exception
315 * here which is at 0xf20, thus in the middle of the
316 * prolog code of the PerformanceMonitor one. A little
317 * trickery is thus necessary
318 */
319 . = 0xf00
320 b performance_monitor_pSeries
321
322 . = 0xf20
323 b altivec_unavailable_pSeries
324
325 . = 0xf40
326 b vsx_unavailable_pSeries
327
328#ifdef CONFIG_CBE_RAS
329 HSTD_EXCEPTION_PSERIES(0x1200, cbe_system_error)
330#endif /* CONFIG_CBE_RAS */
331 STD_EXCEPTION_PSERIES(0x1300, instruction_breakpoint)
332#ifdef CONFIG_CBE_RAS
333 HSTD_EXCEPTION_PSERIES(0x1600, cbe_maintenance)
334#endif /* CONFIG_CBE_RAS */
335 STD_EXCEPTION_PSERIES(0x1700, altivec_assist)
336#ifdef CONFIG_CBE_RAS
337 HSTD_EXCEPTION_PSERIES(0x1800, cbe_thermal)
338#endif /* CONFIG_CBE_RAS */
339
340 . = 0x3000
341
342/*** pSeries interrupt support ***/
343
344 /* moved from 0xf00 */
345 STD_EXCEPTION_PSERIES(., performance_monitor)
346 STD_EXCEPTION_PSERIES(., altivec_unavailable)
347 STD_EXCEPTION_PSERIES(., vsx_unavailable)
348
349/*
350 * An interrupt came in while soft-disabled; clear EE in SRR1,
351 * clear paca->hard_enabled and return.
352 */
353masked_interrupt:
354 stb r10,PACAHARDIRQEN(r13)
355 mtcrf 0x80,r9
356 ld r9,PACA_EXGEN+EX_R9(r13)
357 mfspr r10,SPRN_SRR1
358 rldicl r10,r10,48,1 /* clear MSR_EE */
359 rotldi r10,r10,16
360 mtspr SPRN_SRR1,r10
361 ld r10,PACA_EXGEN+EX_R10(r13)
362 mfspr r13,SPRN_SPRG1
363 rfid
364 b .
365
366 .align 7
367do_stab_bolted_pSeries:
368 mtcrf 0x80,r12
369 mfspr r12,SPRN_SPRG2
370 EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted)
371
372#ifdef CONFIG_PPC_PSERIES
373/*
374 * Vectors for the FWNMI option. Share common code.
375 */
376 .globl system_reset_fwnmi
377 .align 7
378system_reset_fwnmi:
379 HMT_MEDIUM
380 mtspr SPRN_SPRG1,r13 /* save r13 */
381 EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common)
382
383 .globl machine_check_fwnmi
384 .align 7
385machine_check_fwnmi:
386 HMT_MEDIUM
387 mtspr SPRN_SPRG1,r13 /* save r13 */
388 EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common)
389
390#endif /* CONFIG_PPC_PSERIES */
391
392#ifdef __DISABLED__
393/*
394 * This is used for when the SLB miss handler has to go virtual,
395 * which doesn't happen for now anymore but will once we re-implement
396 * dynamic VSIDs for shared page tables
397 */
398slb_miss_user_pseries:
399 std r10,PACA_EXGEN+EX_R10(r13)
400 std r11,PACA_EXGEN+EX_R11(r13)
401 std r12,PACA_EXGEN+EX_R12(r13)
402 mfspr r10,SPRG1
403 ld r11,PACA_EXSLB+EX_R9(r13)
404 ld r12,PACA_EXSLB+EX_R3(r13)
405 std r10,PACA_EXGEN+EX_R13(r13)
406 std r11,PACA_EXGEN+EX_R9(r13)
407 std r12,PACA_EXGEN+EX_R3(r13)
408 clrrdi r12,r13,32
409 mfmsr r10
410 mfspr r11,SRR0 /* save SRR0 */
411 ori r12,r12,slb_miss_user_common@l /* virt addr of handler */
412 ori r10,r10,MSR_IR|MSR_DR|MSR_RI
413 mtspr SRR0,r12
414 mfspr r12,SRR1 /* and SRR1 */
415 mtspr SRR1,r10
416 rfid
417 b . /* prevent spec. execution */
418#endif /* __DISABLED__ */
419
420 .align 7
421 .globl __end_interrupts
422__end_interrupts:
423
424/*
425 * Code from here down to __end_handlers is invoked from the
426 * exception prologs above. Because the prologs assemble the
427 * addresses of these handlers using the LOAD_HANDLER macro,
428 * which uses an addi instruction, these handlers must be in
429 * the first 32k of the kernel image.
430 */
431
432/*** Common interrupt handlers ***/
433
434 STD_EXCEPTION_COMMON(0x100, system_reset, .system_reset_exception)
435
436 /*
437 * Machine check is different because we use a different
438 * save area: PACA_EXMC instead of PACA_EXGEN.
439 */
440 .align 7
441 .globl machine_check_common
442machine_check_common:
443 EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC)
444 FINISH_NAP
445 DISABLE_INTS
446 bl .save_nvgprs
447 addi r3,r1,STACK_FRAME_OVERHEAD
448 bl .machine_check_exception
449 b .ret_from_except
450
451 STD_EXCEPTION_COMMON_LITE(0x900, decrementer, .timer_interrupt)
452 STD_EXCEPTION_COMMON(0xa00, trap_0a, .unknown_exception)
453 STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception)
454 STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception)
455 STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception)
456 STD_EXCEPTION_COMMON_IDLE(0xf00, performance_monitor, .performance_monitor_exception)
457 STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, .instruction_breakpoint_exception)
458#ifdef CONFIG_ALTIVEC
459 STD_EXCEPTION_COMMON(0x1700, altivec_assist, .altivec_assist_exception)
460#else
461 STD_EXCEPTION_COMMON(0x1700, altivec_assist, .unknown_exception)
462#endif
463#ifdef CONFIG_CBE_RAS
464 STD_EXCEPTION_COMMON(0x1200, cbe_system_error, .cbe_system_error_exception)
465 STD_EXCEPTION_COMMON(0x1600, cbe_maintenance, .cbe_maintenance_exception)
466 STD_EXCEPTION_COMMON(0x1800, cbe_thermal, .cbe_thermal_exception)
467#endif /* CONFIG_CBE_RAS */
468
469 .align 7
470system_call_entry:
471 b system_call_common
472
473/*
474 * Here we have detected that the kernel stack pointer is bad.
475 * R9 contains the saved CR, r13 points to the paca,
476 * r10 contains the (bad) kernel stack pointer,
477 * r11 and r12 contain the saved SRR0 and SRR1.
478 * We switch to using an emergency stack, save the registers there,
479 * and call kernel_bad_stack(), which panics.
480 */
481bad_stack:
482 ld r1,PACAEMERGSP(r13)
483 subi r1,r1,64+INT_FRAME_SIZE
484 std r9,_CCR(r1)
485 std r10,GPR1(r1)
486 std r11,_NIP(r1)
487 std r12,_MSR(r1)
488 mfspr r11,SPRN_DAR
489 mfspr r12,SPRN_DSISR
490 std r11,_DAR(r1)
491 std r12,_DSISR(r1)
492 mflr r10
493 mfctr r11
494 mfxer r12
495 std r10,_LINK(r1)
496 std r11,_CTR(r1)
497 std r12,_XER(r1)
498 SAVE_GPR(0,r1)
499 SAVE_GPR(2,r1)
500 SAVE_4GPRS(3,r1)
501 SAVE_2GPRS(7,r1)
502 SAVE_10GPRS(12,r1)
503 SAVE_10GPRS(22,r1)
504 lhz r12,PACA_TRAP_SAVE(r13)
505 std r12,_TRAP(r1)
506 addi r11,r1,INT_FRAME_SIZE
507 std r11,0(r1)
508 li r12,0
509 std r12,0(r11)
510 ld r2,PACATOC(r13)
5111: addi r3,r1,STACK_FRAME_OVERHEAD
512 bl .kernel_bad_stack
513 b 1b
514
515/*
516 * Here r13 points to the paca, r9 contains the saved CR,
517 * SRR0 and SRR1 are saved in r11 and r12,
518 * r9 - r13 are saved in paca->exgen.
519 */
520 .align 7
521 .globl data_access_common
522data_access_common:
523 mfspr r10,SPRN_DAR
524 std r10,PACA_EXGEN+EX_DAR(r13)
525 mfspr r10,SPRN_DSISR
526 stw r10,PACA_EXGEN+EX_DSISR(r13)
527 EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN)
528 ld r3,PACA_EXGEN+EX_DAR(r13)
529 lwz r4,PACA_EXGEN+EX_DSISR(r13)
530 li r5,0x300
531 b .do_hash_page /* Try to handle as hpte fault */
532
533 .align 7
534 .globl instruction_access_common
535instruction_access_common:
536 EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN)
537 ld r3,_NIP(r1)
538 andis. r4,r12,0x5820
539 li r5,0x400
540 b .do_hash_page /* Try to handle as hpte fault */
541
542/*
543 * Here is the common SLB miss user that is used when going to virtual
544 * mode for SLB misses, that is currently not used
545 */
546#ifdef __DISABLED__
547 .align 7
548 .globl slb_miss_user_common
549slb_miss_user_common:
550 mflr r10
551 std r3,PACA_EXGEN+EX_DAR(r13)
552 stw r9,PACA_EXGEN+EX_CCR(r13)
553 std r10,PACA_EXGEN+EX_LR(r13)
554 std r11,PACA_EXGEN+EX_SRR0(r13)
555 bl .slb_allocate_user
556
557 ld r10,PACA_EXGEN+EX_LR(r13)
558 ld r3,PACA_EXGEN+EX_R3(r13)
559 lwz r9,PACA_EXGEN+EX_CCR(r13)
560 ld r11,PACA_EXGEN+EX_SRR0(r13)
561 mtlr r10
562 beq- slb_miss_fault
563
564 andi. r10,r12,MSR_RI /* check for unrecoverable exception */
565 beq- unrecov_user_slb
566 mfmsr r10
567
568.machine push
569.machine "power4"
570 mtcrf 0x80,r9
571.machine pop
572
573 clrrdi r10,r10,2 /* clear RI before setting SRR0/1 */
574 mtmsrd r10,1
575
576 mtspr SRR0,r11
577 mtspr SRR1,r12
578
579 ld r9,PACA_EXGEN+EX_R9(r13)
580 ld r10,PACA_EXGEN+EX_R10(r13)
581 ld r11,PACA_EXGEN+EX_R11(r13)
582 ld r12,PACA_EXGEN+EX_R12(r13)
583 ld r13,PACA_EXGEN+EX_R13(r13)
584 rfid
585 b .
586
587slb_miss_fault:
588 EXCEPTION_PROLOG_COMMON(0x380, PACA_EXGEN)
589 ld r4,PACA_EXGEN+EX_DAR(r13)
590 li r5,0
591 std r4,_DAR(r1)
592 std r5,_DSISR(r1)
593 b handle_page_fault
594
595unrecov_user_slb:
596 EXCEPTION_PROLOG_COMMON(0x4200, PACA_EXGEN)
597 DISABLE_INTS
598 bl .save_nvgprs
5991: addi r3,r1,STACK_FRAME_OVERHEAD
600 bl .unrecoverable_exception
601 b 1b
602
603#endif /* __DISABLED__ */
604
605
606/*
607 * r13 points to the PACA, r9 contains the saved CR,
608 * r12 contain the saved SRR1, SRR0 is still ready for return
609 * r3 has the faulting address
610 * r9 - r13 are saved in paca->exslb.
611 * r3 is saved in paca->slb_r3
612 * We assume we aren't going to take any exceptions during this procedure.
613 */
614_GLOBAL(slb_miss_realmode)
615 mflr r10
616#ifdef CONFIG_RELOCATABLE
617 mtctr r11
618#endif
619
620 stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
621 std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
622
623 bl .slb_allocate_realmode
624
625 /* All done -- return from exception. */
626
627 ld r10,PACA_EXSLB+EX_LR(r13)
628 ld r3,PACA_EXSLB+EX_R3(r13)
629 lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
630#ifdef CONFIG_PPC_ISERIES
631BEGIN_FW_FTR_SECTION
632 ld r11,PACALPPACAPTR(r13)
633 ld r11,LPPACASRR0(r11) /* get SRR0 value */
634END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)
635#endif /* CONFIG_PPC_ISERIES */
636
637 mtlr r10
638
639 andi. r10,r12,MSR_RI /* check for unrecoverable exception */
640 beq- 2f
641
642.machine push
643.machine "power4"
644 mtcrf 0x80,r9
645 mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
646.machine pop
647
648#ifdef CONFIG_PPC_ISERIES
649BEGIN_FW_FTR_SECTION
650 mtspr SPRN_SRR0,r11
651 mtspr SPRN_SRR1,r12
652END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)
653#endif /* CONFIG_PPC_ISERIES */
654 ld r9,PACA_EXSLB+EX_R9(r13)
655 ld r10,PACA_EXSLB+EX_R10(r13)
656 ld r11,PACA_EXSLB+EX_R11(r13)
657 ld r12,PACA_EXSLB+EX_R12(r13)
658 ld r13,PACA_EXSLB+EX_R13(r13)
659 rfid
660 b . /* prevent speculative execution */
661
6622:
663#ifdef CONFIG_PPC_ISERIES
664BEGIN_FW_FTR_SECTION
665 b unrecov_slb
666END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)
667#endif /* CONFIG_PPC_ISERIES */
668 mfspr r11,SPRN_SRR0
669 ld r10,PACAKBASE(r13)
670 LOAD_HANDLER(r10,unrecov_slb)
671 mtspr SPRN_SRR0,r10
672 ld r10,PACAKMSR(r13)
673 mtspr SPRN_SRR1,r10
674 rfid
675 b .
676
677unrecov_slb:
678 EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
679 DISABLE_INTS
680 bl .save_nvgprs
6811: addi r3,r1,STACK_FRAME_OVERHEAD
682 bl .unrecoverable_exception
683 b 1b
684
685 .align 7
686 .globl hardware_interrupt_common
687 .globl hardware_interrupt_entry
688hardware_interrupt_common:
689 EXCEPTION_PROLOG_COMMON(0x500, PACA_EXGEN)
690 FINISH_NAP
691hardware_interrupt_entry:
692 DISABLE_INTS
693BEGIN_FTR_SECTION
694 bl .ppc64_runlatch_on
695END_FTR_SECTION_IFSET(CPU_FTR_CTRL)
696 addi r3,r1,STACK_FRAME_OVERHEAD
697 bl .do_IRQ
698 b .ret_from_except_lite
699
700#ifdef CONFIG_PPC_970_NAP
701power4_fixup_nap:
702 andc r9,r9,r10
703 std r9,TI_LOCAL_FLAGS(r11)
704 ld r10,_LINK(r1) /* make idle task do the */
705 std r10,_NIP(r1) /* equivalent of a blr */
706 blr
707#endif
708
709 .align 7
710 .globl alignment_common
711alignment_common:
712 mfspr r10,SPRN_DAR
713 std r10,PACA_EXGEN+EX_DAR(r13)
714 mfspr r10,SPRN_DSISR
715 stw r10,PACA_EXGEN+EX_DSISR(r13)
716 EXCEPTION_PROLOG_COMMON(0x600, PACA_EXGEN)
717 ld r3,PACA_EXGEN+EX_DAR(r13)
718 lwz r4,PACA_EXGEN+EX_DSISR(r13)
719 std r3,_DAR(r1)
720 std r4,_DSISR(r1)
721 bl .save_nvgprs
722 addi r3,r1,STACK_FRAME_OVERHEAD
723 ENABLE_INTS
724 bl .alignment_exception
725 b .ret_from_except
726
727 .align 7
728 .globl program_check_common
729program_check_common:
730 EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN)
731 bl .save_nvgprs
732 addi r3,r1,STACK_FRAME_OVERHEAD
733 ENABLE_INTS
734 bl .program_check_exception
735 b .ret_from_except
736
737 .align 7
738 .globl fp_unavailable_common
739fp_unavailable_common:
740 EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN)
741 bne 1f /* if from user, just load it up */
742 bl .save_nvgprs
743 addi r3,r1,STACK_FRAME_OVERHEAD
744 ENABLE_INTS
745 bl .kernel_fp_unavailable_exception
746 BUG_OPCODE
7471: bl .load_up_fpu
748 b fast_exception_return
749
750 .align 7
751 .globl altivec_unavailable_common
752altivec_unavailable_common:
753 EXCEPTION_PROLOG_COMMON(0xf20, PACA_EXGEN)
754#ifdef CONFIG_ALTIVEC
755BEGIN_FTR_SECTION
756 beq 1f
757 bl .load_up_altivec
758 b fast_exception_return
7591:
760END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
761#endif
762 bl .save_nvgprs
763 addi r3,r1,STACK_FRAME_OVERHEAD
764 ENABLE_INTS
765 bl .altivec_unavailable_exception
766 b .ret_from_except
767
768 .align 7
769 .globl vsx_unavailable_common
770vsx_unavailable_common:
771 EXCEPTION_PROLOG_COMMON(0xf40, PACA_EXGEN)
772#ifdef CONFIG_VSX
773BEGIN_FTR_SECTION
774 bne .load_up_vsx
7751:
776END_FTR_SECTION_IFSET(CPU_FTR_VSX)
777#endif
778 bl .save_nvgprs
779 addi r3,r1,STACK_FRAME_OVERHEAD
780 ENABLE_INTS
781 bl .vsx_unavailable_exception
782 b .ret_from_except
783
784 .align 7
785 .globl __end_handlers
786__end_handlers:
787
788/*
789 * Return from an exception with minimal checks.
790 * The caller is assumed to have done EXCEPTION_PROLOG_COMMON.
791 * If interrupts have been enabled, or anything has been
792 * done that might have changed the scheduling status of
793 * any task or sent any task a signal, you should use
794 * ret_from_except or ret_from_except_lite instead of this.
795 */ 162 */
796fast_exc_return_irq: /* restores irq state too */ 163#ifdef CONFIG_PPC_BOOK3S
797 ld r3,SOFTE(r1) 164#include "exceptions-64s.S"
798 TRACE_AND_RESTORE_IRQ(r3);
799 ld r12,_MSR(r1)
800 rldicl r4,r12,49,63 /* get MSR_EE to LSB */
801 stb r4,PACAHARDIRQEN(r13) /* restore paca->hard_enabled */
802 b 1f
803
804 .globl fast_exception_return
805fast_exception_return:
806 ld r12,_MSR(r1)
8071: ld r11,_NIP(r1)
808 andi. r3,r12,MSR_RI /* check if RI is set */
809 beq- unrecov_fer
810
811#ifdef CONFIG_VIRT_CPU_ACCOUNTING
812 andi. r3,r12,MSR_PR
813 beq 2f
814 ACCOUNT_CPU_USER_EXIT(r3, r4)
8152:
816#endif 165#endif
817 166
818 ld r3,_CCR(r1)
819 ld r4,_LINK(r1)
820 ld r5,_CTR(r1)
821 ld r6,_XER(r1)
822 mtcr r3
823 mtlr r4
824 mtctr r5
825 mtxer r6
826 REST_GPR(0, r1)
827 REST_8GPRS(2, r1)
828
829 mfmsr r10
830 rldicl r10,r10,48,1 /* clear EE */
831 rldicr r10,r10,16,61 /* clear RI (LE is 0 already) */
832 mtmsrd r10,1
833
834 mtspr SPRN_SRR1,r12
835 mtspr SPRN_SRR0,r11
836 REST_4GPRS(10, r1)
837 ld r1,GPR1(r1)
838 rfid
839 b . /* prevent speculative execution */
840
841unrecov_fer:
842 bl .save_nvgprs
8431: addi r3,r1,STACK_FRAME_OVERHEAD
844 bl .unrecoverable_exception
845 b 1b
846
847#ifdef CONFIG_ALTIVEC
848/*
849 * load_up_altivec(unused, unused, tsk)
850 * Disable VMX for the task which had it previously,
851 * and save its vector registers in its thread_struct.
852 * Enables the VMX for use in the kernel on return.
853 * On SMP we know the VMX is free, since we give it up every
854 * switch (ie, no lazy save of the vector registers).
855 * On entry: r13 == 'current' && last_task_used_altivec != 'current'
856 */
857_STATIC(load_up_altivec)
858 mfmsr r5 /* grab the current MSR */
859 oris r5,r5,MSR_VEC@h
860 mtmsrd r5 /* enable use of VMX now */
861 isync
862
863/*
864 * For SMP, we don't do lazy VMX switching because it just gets too
865 * horrendously complex, especially when a task switches from one CPU
866 * to another. Instead we call giveup_altvec in switch_to.
867 * VRSAVE isn't dealt with here, that is done in the normal context
868 * switch code. Note that we could rely on vrsave value to eventually
869 * avoid saving all of the VREGs here...
870 */
871#ifndef CONFIG_SMP
872 ld r3,last_task_used_altivec@got(r2)
873 ld r4,0(r3)
874 cmpdi 0,r4,0
875 beq 1f
876 /* Save VMX state to last_task_used_altivec's THREAD struct */
877 addi r4,r4,THREAD
878 SAVE_32VRS(0,r5,r4)
879 mfvscr vr0
880 li r10,THREAD_VSCR
881 stvx vr0,r10,r4
882 /* Disable VMX for last_task_used_altivec */
883 ld r5,PT_REGS(r4)
884 ld r4,_MSR-STACK_FRAME_OVERHEAD(r5)
885 lis r6,MSR_VEC@h
886 andc r4,r4,r6
887 std r4,_MSR-STACK_FRAME_OVERHEAD(r5)
8881:
889#endif /* CONFIG_SMP */
890 /* Hack: if we get an altivec unavailable trap with VRSAVE
891 * set to all zeros, we assume this is a broken application
892 * that fails to set it properly, and thus we switch it to
893 * all 1's
894 */
895 mfspr r4,SPRN_VRSAVE
896 cmpdi 0,r4,0
897 bne+ 1f
898 li r4,-1
899 mtspr SPRN_VRSAVE,r4
9001:
901 /* enable use of VMX after return */
902 ld r4,PACACURRENT(r13)
903 addi r5,r4,THREAD /* Get THREAD */
904 oris r12,r12,MSR_VEC@h
905 std r12,_MSR(r1)
906 li r4,1
907 li r10,THREAD_VSCR
908 stw r4,THREAD_USED_VR(r5)
909 lvx vr0,r10,r5
910 mtvscr vr0
911 REST_32VRS(0,r4,r5)
912#ifndef CONFIG_SMP
913 /* Update last_task_used_math to 'current' */
914 subi r4,r5,THREAD /* Back to 'current' */
915 std r4,0(r3)
916#endif /* CONFIG_SMP */
917 /* restore registers and return */
918 blr
919#endif /* CONFIG_ALTIVEC */
920
921#ifdef CONFIG_VSX
922/*
923 * load_up_vsx(unused, unused, tsk)
924 * Disable VSX for the task which had it previously,
925 * and save its vector registers in its thread_struct.
926 * Reuse the fp and vsx saves, but first check to see if they have
927 * been saved already.
928 * On entry: r13 == 'current' && last_task_used_vsx != 'current'
929 */
930_STATIC(load_up_vsx)
931/* Load FP and VSX registers if they haven't been done yet */
932 andi. r5,r12,MSR_FP
933 beql+ load_up_fpu /* skip if already loaded */
934 andis. r5,r12,MSR_VEC@h
935 beql+ load_up_altivec /* skip if already loaded */
936
937#ifndef CONFIG_SMP
938 ld r3,last_task_used_vsx@got(r2)
939 ld r4,0(r3)
940 cmpdi 0,r4,0
941 beq 1f
942 /* Disable VSX for last_task_used_vsx */
943 addi r4,r4,THREAD
944 ld r5,PT_REGS(r4)
945 ld r4,_MSR-STACK_FRAME_OVERHEAD(r5)
946 lis r6,MSR_VSX@h
947 andc r6,r4,r6
948 std r6,_MSR-STACK_FRAME_OVERHEAD(r5)
9491:
950#endif /* CONFIG_SMP */
951 ld r4,PACACURRENT(r13)
952 addi r4,r4,THREAD /* Get THREAD */
953 li r6,1
954 stw r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */
955 /* enable use of VSX after return */
956 oris r12,r12,MSR_VSX@h
957 std r12,_MSR(r1)
958#ifndef CONFIG_SMP
959 /* Update last_task_used_math to 'current' */
960 ld r4,PACACURRENT(r13)
961 std r4,0(r3)
962#endif /* CONFIG_SMP */
963 b fast_exception_return
964#endif /* CONFIG_VSX */
965
966/*
967 * Hash table stuff
968 */
969 .align 7
970_STATIC(do_hash_page)
971 std r3,_DAR(r1)
972 std r4,_DSISR(r1)
973
974 andis. r0,r4,0xa450 /* weird error? */
975 bne- handle_page_fault /* if not, try to insert a HPTE */
976BEGIN_FTR_SECTION
977 andis. r0,r4,0x0020 /* Is it a segment table fault? */
978 bne- do_ste_alloc /* If so handle it */
979END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
980
981 /*
982 * On iSeries, we soft-disable interrupts here, then
983 * hard-enable interrupts so that the hash_page code can spin on
984 * the hash_table_lock without problems on a shared processor.
985 */
986 DISABLE_INTS
987
988 /*
989 * Currently, trace_hardirqs_off() will be called by DISABLE_INTS
990 * and will clobber volatile registers when irq tracing is enabled
991 * so we need to reload them. It may be possible to be smarter here
992 * and move the irq tracing elsewhere but let's keep it simple for
993 * now
994 */
995#ifdef CONFIG_TRACE_IRQFLAGS
996 ld r3,_DAR(r1)
997 ld r4,_DSISR(r1)
998 ld r5,_TRAP(r1)
999 ld r12,_MSR(r1)
1000 clrrdi r5,r5,4
1001#endif /* CONFIG_TRACE_IRQFLAGS */
1002 /*
1003 * We need to set the _PAGE_USER bit if MSR_PR is set or if we are
1004 * accessing a userspace segment (even from the kernel). We assume
1005 * kernel addresses always have the high bit set.
1006 */
1007 rlwinm r4,r4,32-25+9,31-9,31-9 /* DSISR_STORE -> _PAGE_RW */
1008 rotldi r0,r3,15 /* Move high bit into MSR_PR posn */
1009 orc r0,r12,r0 /* MSR_PR | ~high_bit */
1010 rlwimi r4,r0,32-13,30,30 /* becomes _PAGE_USER access bit */
1011 ori r4,r4,1 /* add _PAGE_PRESENT */
1012 rlwimi r4,r5,22+2,31-2,31-2 /* Set _PAGE_EXEC if trap is 0x400 */
1013
1014 /*
1015 * r3 contains the faulting address
1016 * r4 contains the required access permissions
1017 * r5 contains the trap number
1018 *
1019 * at return r3 = 0 for success
1020 */
1021 bl .hash_page /* build HPTE if possible */
1022 cmpdi r3,0 /* see if hash_page succeeded */
1023
1024BEGIN_FW_FTR_SECTION
1025 /*
1026 * If we had interrupts soft-enabled at the point where the
1027 * DSI/ISI occurred, and an interrupt came in during hash_page,
1028 * handle it now.
1029 * We jump to ret_from_except_lite rather than fast_exception_return
1030 * because ret_from_except_lite will check for and handle pending
1031 * interrupts if necessary.
1032 */
1033 beq 13f
1034END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)
1035
1036BEGIN_FW_FTR_SECTION
1037 /*
1038 * Here we have interrupts hard-disabled, so it is sufficient
1039 * to restore paca->{soft,hard}_enable and get out.
1040 */
1041 beq fast_exc_return_irq /* Return from exception on success */
1042END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISERIES)
1043
1044 /* For a hash failure, we don't bother re-enabling interrupts */
1045 ble- 12f
1046
1047 /*
1048 * hash_page couldn't handle it, set soft interrupt enable back
1049 * to what it was before the trap. Note that .raw_local_irq_restore
1050 * handles any interrupts pending at this point.
1051 */
1052 ld r3,SOFTE(r1)
1053 TRACE_AND_RESTORE_IRQ_PARTIAL(r3, 11f)
1054 bl .raw_local_irq_restore
1055 b 11f
1056
1057/* Here we have a page fault that hash_page can't handle. */
1058handle_page_fault:
1059 ENABLE_INTS
106011: ld r4,_DAR(r1)
1061 ld r5,_DSISR(r1)
1062 addi r3,r1,STACK_FRAME_OVERHEAD
1063 bl .do_page_fault
1064 cmpdi r3,0
1065 beq+ 13f
1066 bl .save_nvgprs
1067 mr r5,r3
1068 addi r3,r1,STACK_FRAME_OVERHEAD
1069 lwz r4,_DAR(r1)
1070 bl .bad_page_fault
1071 b .ret_from_except
1072
107313: b .ret_from_except_lite
1074
1075/* We have a page fault that hash_page could handle but HV refused
1076 * the PTE insertion
1077 */
107812: bl .save_nvgprs
1079 mr r5,r3
1080 addi r3,r1,STACK_FRAME_OVERHEAD
1081 ld r4,_DAR(r1)
1082 bl .low_hash_fault
1083 b .ret_from_except
1084
1085 /* here we have a segment miss */
1086do_ste_alloc:
1087 bl .ste_allocate /* try to insert stab entry */
1088 cmpdi r3,0
1089 bne- handle_page_fault
1090 b fast_exception_return
1091
1092/*
1093 * r13 points to the PACA, r9 contains the saved CR,
1094 * r11 and r12 contain the saved SRR0 and SRR1.
1095 * r9 - r13 are saved in paca->exslb.
1096 * We assume we aren't going to take any exceptions during this procedure.
1097 * We assume (DAR >> 60) == 0xc.
1098 */
1099 .align 7
1100_GLOBAL(do_stab_bolted)
1101 stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
1102 std r11,PACA_EXSLB+EX_SRR0(r13) /* save SRR0 in exc. frame */
1103
1104 /* Hash to the primary group */
1105 ld r10,PACASTABVIRT(r13)
1106 mfspr r11,SPRN_DAR
1107 srdi r11,r11,28
1108 rldimi r10,r11,7,52 /* r10 = first ste of the group */
1109
1110 /* Calculate VSID */
1111 /* This is a kernel address, so protovsid = ESID */
1112 ASM_VSID_SCRAMBLE(r11, r9, 256M)
1113 rldic r9,r11,12,16 /* r9 = vsid << 12 */
1114
1115 /* Search the primary group for a free entry */
11161: ld r11,0(r10) /* Test valid bit of the current ste */
1117 andi. r11,r11,0x80
1118 beq 2f
1119 addi r10,r10,16
1120 andi. r11,r10,0x70
1121 bne 1b
1122
1123 /* Stick for only searching the primary group for now. */
1124 /* At least for now, we use a very simple random castout scheme */
1125 /* Use the TB as a random number ; OR in 1 to avoid entry 0 */
1126 mftb r11
1127 rldic r11,r11,4,57 /* r11 = (r11 << 4) & 0x70 */
1128 ori r11,r11,0x10
1129
1130 /* r10 currently points to an ste one past the group of interest */
1131 /* make it point to the randomly selected entry */
1132 subi r10,r10,128
1133 or r10,r10,r11 /* r10 is the entry to invalidate */
1134
1135 isync /* mark the entry invalid */
1136 ld r11,0(r10)
1137 rldicl r11,r11,56,1 /* clear the valid bit */
1138 rotldi r11,r11,8
1139 std r11,0(r10)
1140 sync
1141
1142 clrrdi r11,r11,28 /* Get the esid part of the ste */
1143 slbie r11
1144
11452: std r9,8(r10) /* Store the vsid part of the ste */
1146 eieio
1147
1148 mfspr r11,SPRN_DAR /* Get the new esid */
1149 clrrdi r11,r11,28 /* Permits a full 32b of ESID */
1150 ori r11,r11,0x90 /* Turn on valid and kp */
1151 std r11,0(r10) /* Put new entry back into the stab */
1152
1153 sync
1154
1155 /* All done -- return from exception. */
1156 lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
1157 ld r11,PACA_EXSLB+EX_SRR0(r13) /* get saved SRR0 */
1158
1159 andi. r10,r12,MSR_RI
1160 beq- unrecov_slb
1161
1162 mtcrf 0x80,r9 /* restore CR */
1163
1164 mfmsr r10
1165 clrrdi r10,r10,2
1166 mtmsrd r10,1
1167
1168 mtspr SPRN_SRR0,r11
1169 mtspr SPRN_SRR1,r12
1170 ld r9,PACA_EXSLB+EX_R9(r13)
1171 ld r10,PACA_EXSLB+EX_R10(r13)
1172 ld r11,PACA_EXSLB+EX_R11(r13)
1173 ld r12,PACA_EXSLB+EX_R12(r13)
1174 ld r13,PACA_EXSLB+EX_R13(r13)
1175 rfid
1176 b . /* prevent speculative execution */
1177
1178/*
1179 * Space for CPU0's segment table.
1180 *
1181 * On iSeries, the hypervisor must fill in at least one entry before
1182 * we get control (with relocate on). The address is given to the hv
1183 * as a page number (see xLparMap below), so this must be at a
1184 * fixed address (the linker can't compute (u64)&initial_stab >>
1185 * PAGE_SHIFT).
1186 */
1187 . = STAB0_OFFSET /* 0x6000 */
1188 .globl initial_stab
1189initial_stab:
1190 .space 4096
1191
1192#ifdef CONFIG_PPC_PSERIES
1193/*
1194 * Data area reserved for FWNMI option.
1195 * This address (0x7000) is fixed by the RPA.
1196 */
1197 .= 0x7000
1198 .globl fwnmi_data_area
1199fwnmi_data_area:
1200#endif /* CONFIG_PPC_PSERIES */
1201
1202 /* iSeries does not use the FWNMI stuff, so it is safe to put
1203 * this here, even if we later allow kernels that will boot on
1204 * both pSeries and iSeries */
1205#ifdef CONFIG_PPC_ISERIES
1206 . = LPARMAP_PHYS
1207 .globl xLparMap
1208xLparMap:
1209 .quad HvEsidsToMap /* xNumberEsids */
1210 .quad HvRangesToMap /* xNumberRanges */
1211 .quad STAB0_PAGE /* xSegmentTableOffs */
1212 .zero 40 /* xRsvd */
1213 /* xEsids (HvEsidsToMap entries of 2 quads) */
1214 .quad PAGE_OFFSET_ESID /* xKernelEsid */
1215 .quad PAGE_OFFSET_VSID /* xKernelVsid */
1216 .quad VMALLOC_START_ESID /* xKernelEsid */
1217 .quad VMALLOC_START_VSID /* xKernelVsid */
1218 /* xRanges (HvRangesToMap entries of 3 quads) */
1219 .quad HvPagesToMap /* xPages */
1220 .quad 0 /* xOffset */
1221 .quad PAGE_OFFSET_VSID << (SID_SHIFT - HW_PAGE_SHIFT) /* xVPN */
1222
1223#endif /* CONFIG_PPC_ISERIES */
1224
1225#ifdef CONFIG_PPC_PSERIES
1226 . = 0x8000
1227#endif /* CONFIG_PPC_PSERIES */
1228 167
1229/* 168/*
1230 * On pSeries and most other platforms, secondary processors spin 169 * On pSeries and most other platforms, secondary processors spin
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 95f39f1e68d4..5f9febc8d143 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -256,7 +256,7 @@ label:
256 * off DE in the DSRR1 value and clearing the debug status. \ 256 * off DE in the DSRR1 value and clearing the debug status. \
257 */ \ 257 */ \
258 mfspr r10,SPRN_DBSR; /* check single-step/branch taken */ \ 258 mfspr r10,SPRN_DBSR; /* check single-step/branch taken */ \
259 andis. r10,r10,DBSR_IC@h; \ 259 andis. r10,r10,(DBSR_IC|DBSR_BT)@h; \
260 beq+ 2f; \ 260 beq+ 2f; \
261 \ 261 \
262 lis r10,KERNELBASE@h; /* check if exception in vectors */ \ 262 lis r10,KERNELBASE@h; /* check if exception in vectors */ \
@@ -271,7 +271,7 @@ label:
271 \ 271 \
272 /* here it looks like we got an inappropriate debug exception. */ \ 272 /* here it looks like we got an inappropriate debug exception. */ \
2731: rlwinm r9,r9,0,~MSR_DE; /* clear DE in the CDRR1 value */ \ 2731: rlwinm r9,r9,0,~MSR_DE; /* clear DE in the CDRR1 value */ \
274 lis r10,DBSR_IC@h; /* clear the IC event */ \ 274 lis r10,(DBSR_IC|DBSR_BT)@h; /* clear the IC event */ \
275 mtspr SPRN_DBSR,r10; \ 275 mtspr SPRN_DBSR,r10; \
276 /* restore state and get out */ \ 276 /* restore state and get out */ \
277 lwz r10,_CCR(r11); \ 277 lwz r10,_CCR(r11); \
@@ -309,7 +309,7 @@ label:
309 * off DE in the CSRR1 value and clearing the debug status. \ 309 * off DE in the CSRR1 value and clearing the debug status. \
310 */ \ 310 */ \
311 mfspr r10,SPRN_DBSR; /* check single-step/branch taken */ \ 311 mfspr r10,SPRN_DBSR; /* check single-step/branch taken */ \
312 andis. r10,r10,DBSR_IC@h; \ 312 andis. r10,r10,(DBSR_IC|DBSR_BT)@h; \
313 beq+ 2f; \ 313 beq+ 2f; \
314 \ 314 \
315 lis r10,KERNELBASE@h; /* check if exception in vectors */ \ 315 lis r10,KERNELBASE@h; /* check if exception in vectors */ \
@@ -317,14 +317,14 @@ label:
317 cmplw r12,r10; \ 317 cmplw r12,r10; \
318 blt+ 2f; /* addr below exception vectors */ \ 318 blt+ 2f; /* addr below exception vectors */ \
319 \ 319 \
320 lis r10,DebugCrit@h; \ 320 lis r10,DebugCrit@h; \
321 ori r10,r10,DebugCrit@l; \ 321 ori r10,r10,DebugCrit@l; \
322 cmplw r12,r10; \ 322 cmplw r12,r10; \
323 bgt+ 2f; /* addr above exception vectors */ \ 323 bgt+ 2f; /* addr above exception vectors */ \
324 \ 324 \
325 /* here it looks like we got an inappropriate debug exception. */ \ 325 /* here it looks like we got an inappropriate debug exception. */ \
3261: rlwinm r9,r9,0,~MSR_DE; /* clear DE in the CSRR1 value */ \ 3261: rlwinm r9,r9,0,~MSR_DE; /* clear DE in the CSRR1 value */ \
327 lis r10,DBSR_IC@h; /* clear the IC event */ \ 327 lis r10,(DBSR_IC|DBSR_BT)@h; /* clear the IC event */ \
328 mtspr SPRN_DBSR,r10; \ 328 mtspr SPRN_DBSR,r10; \
329 /* restore state and get out */ \ 329 /* restore state and get out */ \
330 lwz r10,_CCR(r11); \ 330 lwz r10,_CCR(r11); \
diff --git a/arch/powerpc/kernel/init_task.c b/arch/powerpc/kernel/init_task.c
index 688b329800bd..ffc4253fef55 100644
--- a/arch/powerpc/kernel/init_task.c
+++ b/arch/powerpc/kernel/init_task.c
@@ -9,10 +9,6 @@
9 9
10static struct signal_struct init_signals = INIT_SIGNALS(init_signals); 10static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
11static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); 11static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
12struct mm_struct init_mm = INIT_MM(init_mm);
13
14EXPORT_SYMBOL(init_mm);
15
16/* 12/*
17 * Initial thread structure. 13 * Initial thread structure.
18 * 14 *
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 8c1a4966867e..f7f376ea7b17 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -53,6 +53,7 @@
53#include <linux/bootmem.h> 53#include <linux/bootmem.h>
54#include <linux/pci.h> 54#include <linux/pci.h>
55#include <linux/debugfs.h> 55#include <linux/debugfs.h>
56#include <linux/perf_counter.h>
56 57
57#include <asm/uaccess.h> 58#include <asm/uaccess.h>
58#include <asm/system.h> 59#include <asm/system.h>
@@ -117,6 +118,7 @@ notrace void raw_local_irq_restore(unsigned long en)
117 if (!en) 118 if (!en)
118 return; 119 return;
119 120
121#ifdef CONFIG_PPC_STD_MMU_64
120 if (firmware_has_feature(FW_FEATURE_ISERIES)) { 122 if (firmware_has_feature(FW_FEATURE_ISERIES)) {
121 /* 123 /*
122 * Do we need to disable preemption here? Not really: in the 124 * Do we need to disable preemption here? Not really: in the
@@ -134,6 +136,12 @@ notrace void raw_local_irq_restore(unsigned long en)
134 if (local_paca->lppaca_ptr->int_dword.any_int) 136 if (local_paca->lppaca_ptr->int_dword.any_int)
135 iseries_handle_interrupts(); 137 iseries_handle_interrupts();
136 } 138 }
139#endif /* CONFIG_PPC_STD_MMU_64 */
140
141 if (test_perf_counter_pending()) {
142 clear_perf_counter_pending();
143 perf_counter_do_pending();
144 }
137 145
138 /* 146 /*
139 * if (get_paca()->hard_enabled) return; 147 * if (get_paca()->hard_enabled) return;
@@ -248,77 +256,84 @@ void fixup_irqs(cpumask_t map)
248} 256}
249#endif 257#endif
250 258
251void do_IRQ(struct pt_regs *regs)
252{
253 struct pt_regs *old_regs = set_irq_regs(regs);
254 unsigned int irq;
255#ifdef CONFIG_IRQSTACKS 259#ifdef CONFIG_IRQSTACKS
260static inline void handle_one_irq(unsigned int irq)
261{
256 struct thread_info *curtp, *irqtp; 262 struct thread_info *curtp, *irqtp;
257#endif 263 unsigned long saved_sp_limit;
264 struct irq_desc *desc;
258 265
259 irq_enter(); 266 /* Switch to the irq stack to handle this */
267 curtp = current_thread_info();
268 irqtp = hardirq_ctx[smp_processor_id()];
269
270 if (curtp == irqtp) {
271 /* We're already on the irq stack, just handle it */
272 generic_handle_irq(irq);
273 return;
274 }
275
276 desc = irq_desc + irq;
277 saved_sp_limit = current->thread.ksp_limit;
278
279 irqtp->task = curtp->task;
280 irqtp->flags = 0;
281
282 /* Copy the softirq bits in preempt_count so that the
283 * softirq checks work in the hardirq context. */
284 irqtp->preempt_count = (irqtp->preempt_count & ~SOFTIRQ_MASK) |
285 (curtp->preempt_count & SOFTIRQ_MASK);
286
287 current->thread.ksp_limit = (unsigned long)irqtp +
288 _ALIGN_UP(sizeof(struct thread_info), 16);
289
290 call_handle_irq(irq, desc, irqtp, desc->handle_irq);
291 current->thread.ksp_limit = saved_sp_limit;
292 irqtp->task = NULL;
260 293
294 /* Set any flag that may have been set on the
295 * alternate stack
296 */
297 if (irqtp->flags)
298 set_bits(irqtp->flags, &curtp->flags);
299}
300#else
301static inline void handle_one_irq(unsigned int irq)
302{
303 generic_handle_irq(irq);
304}
305#endif
306
307static inline void check_stack_overflow(void)
308{
261#ifdef CONFIG_DEBUG_STACKOVERFLOW 309#ifdef CONFIG_DEBUG_STACKOVERFLOW
262 /* Debugging check for stack overflow: is there less than 2KB free? */ 310 long sp;
263 {
264 long sp;
265 311
266 sp = __get_SP() & (THREAD_SIZE-1); 312 sp = __get_SP() & (THREAD_SIZE-1);
267 313
268 if (unlikely(sp < (sizeof(struct thread_info) + 2048))) { 314 /* check for stack overflow: is there less than 2KB free? */
269 printk("do_IRQ: stack overflow: %ld\n", 315 if (unlikely(sp < (sizeof(struct thread_info) + 2048))) {
270 sp - sizeof(struct thread_info)); 316 printk("do_IRQ: stack overflow: %ld\n",
271 dump_stack(); 317 sp - sizeof(struct thread_info));
272 } 318 dump_stack();
273 } 319 }
274#endif 320#endif
321}
275 322
276 /* 323void do_IRQ(struct pt_regs *regs)
277 * Every platform is required to implement ppc_md.get_irq. 324{
278 * This function will either return an irq number or NO_IRQ to 325 struct pt_regs *old_regs = set_irq_regs(regs);
279 * indicate there are no more pending. 326 unsigned int irq;
280 * The value NO_IRQ_IGNORE is for buggy hardware and means that this
281 * IRQ has already been handled. -- Tom
282 */
283 irq = ppc_md.get_irq();
284 327
285 if (irq != NO_IRQ && irq != NO_IRQ_IGNORE) { 328 irq_enter();
286#ifdef CONFIG_IRQSTACKS
287 /* Switch to the irq stack to handle this */
288 curtp = current_thread_info();
289 irqtp = hardirq_ctx[smp_processor_id()];
290 if (curtp != irqtp) {
291 struct irq_desc *desc = irq_desc + irq;
292 void *handler = desc->handle_irq;
293 unsigned long saved_sp_limit = current->thread.ksp_limit;
294 if (handler == NULL)
295 handler = &__do_IRQ;
296 irqtp->task = curtp->task;
297 irqtp->flags = 0;
298
299 /* Copy the softirq bits in preempt_count so that the
300 * softirq checks work in the hardirq context.
301 */
302 irqtp->preempt_count =
303 (irqtp->preempt_count & ~SOFTIRQ_MASK) |
304 (curtp->preempt_count & SOFTIRQ_MASK);
305 329
306 current->thread.ksp_limit = (unsigned long)irqtp + 330 check_stack_overflow();
307 _ALIGN_UP(sizeof(struct thread_info), 16);
308 call_handle_irq(irq, desc, irqtp, handler);
309 current->thread.ksp_limit = saved_sp_limit;
310 irqtp->task = NULL;
311 331
332 irq = ppc_md.get_irq();
312 333
313 /* Set any flag that may have been set on the 334 if (irq != NO_IRQ && irq != NO_IRQ_IGNORE)
314 * alternate stack 335 handle_one_irq(irq);
315 */ 336 else if (irq != NO_IRQ_IGNORE)
316 if (irqtp->flags)
317 set_bits(irqtp->flags, &curtp->flags);
318 } else
319#endif
320 generic_handle_irq(irq);
321 } else if (irq != NO_IRQ_IGNORE)
322 /* That's not SMP safe ... but who cares ? */ 337 /* That's not SMP safe ... but who cares ? */
323 ppc_spurious_interrupts++; 338 ppc_spurious_interrupts++;
324 339
diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
index 78b3f7840ade..2419cc706ff1 100644
--- a/arch/powerpc/kernel/lparcfg.c
+++ b/arch/powerpc/kernel/lparcfg.c
@@ -169,6 +169,9 @@ struct hvcall_ppp_data {
169 u8 unallocated_weight; 169 u8 unallocated_weight;
170 u16 active_procs_in_pool; 170 u16 active_procs_in_pool;
171 u16 active_system_procs; 171 u16 active_system_procs;
172 u16 phys_platform_procs;
173 u32 max_proc_cap_avail;
174 u32 entitled_proc_cap_avail;
172}; 175};
173 176
174/* 177/*
@@ -190,13 +193,18 @@ struct hvcall_ppp_data {
190 * XX - Unallocated Variable Processor Capacity Weight. 193 * XX - Unallocated Variable Processor Capacity Weight.
191 * XXXX - Active processors in Physical Processor Pool. 194 * XXXX - Active processors in Physical Processor Pool.
192 * XXXX - Processors active on platform. 195 * XXXX - Processors active on platform.
196 * R8 (QQQQRRRRRRSSSSSS). if ibm,partition-performance-parameters-level >= 1
197 * XXXX - Physical platform procs allocated to virtualization.
198 * XXXXXX - Max procs capacity % available to the partitions pool.
199 * XXXXXX - Entitled procs capacity % available to the
200 * partitions pool.
193 */ 201 */
194static unsigned int h_get_ppp(struct hvcall_ppp_data *ppp_data) 202static unsigned int h_get_ppp(struct hvcall_ppp_data *ppp_data)
195{ 203{
196 unsigned long rc; 204 unsigned long rc;
197 unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; 205 unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
198 206
199 rc = plpar_hcall(H_GET_PPP, retbuf); 207 rc = plpar_hcall9(H_GET_PPP, retbuf);
200 208
201 ppp_data->entitlement = retbuf[0]; 209 ppp_data->entitlement = retbuf[0];
202 ppp_data->unallocated_entitlement = retbuf[1]; 210 ppp_data->unallocated_entitlement = retbuf[1];
@@ -210,6 +218,10 @@ static unsigned int h_get_ppp(struct hvcall_ppp_data *ppp_data)
210 ppp_data->active_procs_in_pool = (retbuf[3] >> 2 * 8) & 0xffff; 218 ppp_data->active_procs_in_pool = (retbuf[3] >> 2 * 8) & 0xffff;
211 ppp_data->active_system_procs = retbuf[3] & 0xffff; 219 ppp_data->active_system_procs = retbuf[3] & 0xffff;
212 220
221 ppp_data->phys_platform_procs = retbuf[4] >> 6 * 8;
222 ppp_data->max_proc_cap_avail = (retbuf[4] >> 3 * 8) & 0xffffff;
223 ppp_data->entitled_proc_cap_avail = retbuf[4] & 0xffffff;
224
213 return rc; 225 return rc;
214} 226}
215 227
@@ -234,6 +246,8 @@ static unsigned h_pic(unsigned long *pool_idle_time,
234static void parse_ppp_data(struct seq_file *m) 246static void parse_ppp_data(struct seq_file *m)
235{ 247{
236 struct hvcall_ppp_data ppp_data; 248 struct hvcall_ppp_data ppp_data;
249 struct device_node *root;
250 const int *perf_level;
237 int rc; 251 int rc;
238 252
239 rc = h_get_ppp(&ppp_data); 253 rc = h_get_ppp(&ppp_data);
@@ -267,6 +281,28 @@ static void parse_ppp_data(struct seq_file *m)
267 seq_printf(m, "capped=%d\n", ppp_data.capped); 281 seq_printf(m, "capped=%d\n", ppp_data.capped);
268 seq_printf(m, "unallocated_capacity=%lld\n", 282 seq_printf(m, "unallocated_capacity=%lld\n",
269 ppp_data.unallocated_entitlement); 283 ppp_data.unallocated_entitlement);
284
285 /* The last bits of information returned from h_get_ppp are only
286 * valid if the ibm,partition-performance-parameters-level
287 * property is >= 1.
288 */
289 root = of_find_node_by_path("/");
290 if (root) {
291 perf_level = of_get_property(root,
292 "ibm,partition-performance-parameters-level",
293 NULL);
294 if (perf_level && (*perf_level >= 1)) {
295 seq_printf(m,
296 "physical_procs_allocated_to_virtualization=%d\n",
297 ppp_data.phys_platform_procs);
298 seq_printf(m, "max_proc_capacity_available=%d\n",
299 ppp_data.max_proc_cap_avail);
300 seq_printf(m, "entitled_proc_capacity_available=%d\n",
301 ppp_data.entitled_proc_cap_avail);
302 }
303
304 of_node_put(root);
305 }
270} 306}
271 307
272/** 308/**
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index b9530b2395a2..a5cf9c1356a6 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -457,98 +457,6 @@ _GLOBAL(disable_kernel_fp)
457 isync 457 isync
458 blr 458 blr
459 459
460#ifdef CONFIG_ALTIVEC
461
462#if 0 /* this has no callers for now */
463/*
464 * disable_kernel_altivec()
465 * Disable the VMX.
466 */
467_GLOBAL(disable_kernel_altivec)
468 mfmsr r3
469 rldicl r0,r3,(63-MSR_VEC_LG),1
470 rldicl r3,r0,(MSR_VEC_LG+1),0
471 mtmsrd r3 /* disable use of VMX now */
472 isync
473 blr
474#endif /* 0 */
475
476/*
477 * giveup_altivec(tsk)
478 * Disable VMX for the task given as the argument,
479 * and save the vector registers in its thread_struct.
480 * Enables the VMX for use in the kernel on return.
481 */
482_GLOBAL(giveup_altivec)
483 mfmsr r5
484 oris r5,r5,MSR_VEC@h
485 mtmsrd r5 /* enable use of VMX now */
486 isync
487 cmpdi 0,r3,0
488 beqlr- /* if no previous owner, done */
489 addi r3,r3,THREAD /* want THREAD of task */
490 ld r5,PT_REGS(r3)
491 cmpdi 0,r5,0
492 SAVE_32VRS(0,r4,r3)
493 mfvscr vr0
494 li r4,THREAD_VSCR
495 stvx vr0,r4,r3
496 beq 1f
497 ld r4,_MSR-STACK_FRAME_OVERHEAD(r5)
498#ifdef CONFIG_VSX
499BEGIN_FTR_SECTION
500 lis r3,(MSR_VEC|MSR_VSX)@h
501FTR_SECTION_ELSE
502 lis r3,MSR_VEC@h
503ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX)
504#else
505 lis r3,MSR_VEC@h
506#endif
507 andc r4,r4,r3 /* disable FP for previous task */
508 std r4,_MSR-STACK_FRAME_OVERHEAD(r5)
5091:
510#ifndef CONFIG_SMP
511 li r5,0
512 ld r4,last_task_used_altivec@got(r2)
513 std r5,0(r4)
514#endif /* CONFIG_SMP */
515 blr
516
517#endif /* CONFIG_ALTIVEC */
518
519#ifdef CONFIG_VSX
520/*
521 * __giveup_vsx(tsk)
522 * Disable VSX for the task given as the argument.
523 * Does NOT save vsx registers.
524 * Enables the VSX for use in the kernel on return.
525 */
526_GLOBAL(__giveup_vsx)
527 mfmsr r5
528 oris r5,r5,MSR_VSX@h
529 mtmsrd r5 /* enable use of VSX now */
530 isync
531
532 cmpdi 0,r3,0
533 beqlr- /* if no previous owner, done */
534 addi r3,r3,THREAD /* want THREAD of task */
535 ld r5,PT_REGS(r3)
536 cmpdi 0,r5,0
537 beq 1f
538 ld r4,_MSR-STACK_FRAME_OVERHEAD(r5)
539 lis r3,MSR_VSX@h
540 andc r4,r4,r3 /* disable VSX for previous task */
541 std r4,_MSR-STACK_FRAME_OVERHEAD(r5)
5421:
543#ifndef CONFIG_SMP
544 li r5,0
545 ld r4,last_task_used_vsx@got(r2)
546 std r5,0(r4)
547#endif /* CONFIG_SMP */
548 blr
549
550#endif /* CONFIG_VSX */
551
552/* kexec_wait(phys_cpu) 460/* kexec_wait(phys_cpu)
553 * 461 *
554 * wait for the flag to change, indicating this kernel is going away but 462 * wait for the flag to change, indicating this kernel is going away but
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
index 43e7e3a7f130..477c663e0140 100644
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -43,8 +43,6 @@ void *module_alloc(unsigned long size)
43void module_free(struct module *mod, void *module_region) 43void module_free(struct module *mod, void *module_region)
44{ 44{
45 vfree(module_region); 45 vfree(module_region);
46 /* FIXME: If module_region == mod->init_region, trim exception
47 table entries. */
48} 46}
49 47
50static const Elf_Shdr *find_section(const Elf_Ehdr *hdr, 48static const Elf_Shdr *find_section(const Elf_Ehdr *hdr,
diff --git a/arch/powerpc/kernel/mpc7450-pmu.c b/arch/powerpc/kernel/mpc7450-pmu.c
new file mode 100644
index 000000000000..75ff47fed7bf
--- /dev/null
+++ b/arch/powerpc/kernel/mpc7450-pmu.c
@@ -0,0 +1,417 @@
1/*
2 * Performance counter support for MPC7450-family processors.
3 *
4 * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/string.h>
12#include <linux/perf_counter.h>
13#include <linux/string.h>
14#include <asm/reg.h>
15#include <asm/cputable.h>
16
17#define N_COUNTER 6 /* Number of hardware counters */
18#define MAX_ALT 3 /* Maximum number of event alternative codes */
19
20/*
21 * Bits in event code for MPC7450 family
22 */
23#define PM_THRMULT_MSKS 0x40000
24#define PM_THRESH_SH 12
25#define PM_THRESH_MSK 0x3f
26#define PM_PMC_SH 8
27#define PM_PMC_MSK 7
28#define PM_PMCSEL_MSK 0x7f
29
30/*
31 * Classify events according to how specific their PMC requirements are.
32 * Result is:
33 * 0: can go on any PMC
34 * 1: can go on PMCs 1-4
35 * 2: can go on PMCs 1,2,4
36 * 3: can go on PMCs 1 or 2
37 * 4: can only go on one PMC
38 * -1: event code is invalid
39 */
40#define N_CLASSES 5
41
42static int mpc7450_classify_event(u32 event)
43{
44 int pmc;
45
46 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
47 if (pmc) {
48 if (pmc > N_COUNTER)
49 return -1;
50 return 4;
51 }
52 event &= PM_PMCSEL_MSK;
53 if (event <= 1)
54 return 0;
55 if (event <= 7)
56 return 1;
57 if (event <= 13)
58 return 2;
59 if (event <= 22)
60 return 3;
61 return -1;
62}
63
64/*
65 * Events using threshold and possible threshold scale:
66 * code scale? name
67 * 11e N PM_INSTQ_EXCEED_CYC
68 * 11f N PM_ALTV_IQ_EXCEED_CYC
69 * 128 Y PM_DTLB_SEARCH_EXCEED_CYC
70 * 12b Y PM_LD_MISS_EXCEED_L1_CYC
71 * 220 N PM_CQ_EXCEED_CYC
72 * 30c N PM_GPR_RB_EXCEED_CYC
73 * 30d ? PM_FPR_IQ_EXCEED_CYC ?
74 * 311 Y PM_ITLB_SEARCH_EXCEED
75 * 410 N PM_GPR_IQ_EXCEED_CYC
76 */
77
78/*
79 * Return use of threshold and threshold scale bits:
80 * 0 = uses neither, 1 = uses threshold, 2 = uses both
81 */
82static int mpc7450_threshold_use(u32 event)
83{
84 int pmc, sel;
85
86 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
87 sel = event & PM_PMCSEL_MSK;
88 switch (pmc) {
89 case 1:
90 if (sel == 0x1e || sel == 0x1f)
91 return 1;
92 if (sel == 0x28 || sel == 0x2b)
93 return 2;
94 break;
95 case 2:
96 if (sel == 0x20)
97 return 1;
98 break;
99 case 3:
100 if (sel == 0xc || sel == 0xd)
101 return 1;
102 if (sel == 0x11)
103 return 2;
104 break;
105 case 4:
106 if (sel == 0x10)
107 return 1;
108 break;
109 }
110 return 0;
111}
112
113/*
114 * Layout of constraint bits:
115 * 33222222222211111111110000000000
116 * 10987654321098765432109876543210
117 * |< >< > < > < ><><><><><><>
118 * TS TV G4 G3 G2P6P5P4P3P2P1
119 *
120 * P1 - P6
121 * 0 - 11: Count of events needing PMC1 .. PMC6
122 *
123 * G2
124 * 12 - 14: Count of events needing PMC1 or PMC2
125 *
126 * G3
127 * 16 - 18: Count of events needing PMC1, PMC2 or PMC4
128 *
129 * G4
130 * 20 - 23: Count of events needing PMC1, PMC2, PMC3 or PMC4
131 *
132 * TV
133 * 24 - 29: Threshold value requested
134 *
135 * TS
136 * 30: Threshold scale value requested
137 */
138
139static u32 pmcbits[N_COUNTER][2] = {
140 { 0x00844002, 0x00111001 }, /* PMC1 mask, value: P1,G2,G3,G4 */
141 { 0x00844008, 0x00111004 }, /* PMC2: P2,G2,G3,G4 */
142 { 0x00800020, 0x00100010 }, /* PMC3: P3,G4 */
143 { 0x00840080, 0x00110040 }, /* PMC4: P4,G3,G4 */
144 { 0x00000200, 0x00000100 }, /* PMC5: P5 */
145 { 0x00000800, 0x00000400 } /* PMC6: P6 */
146};
147
148static u32 classbits[N_CLASSES - 1][2] = {
149 { 0x00000000, 0x00000000 }, /* class 0: no constraint */
150 { 0x00800000, 0x00100000 }, /* class 1: G4 */
151 { 0x00040000, 0x00010000 }, /* class 2: G3 */
152 { 0x00004000, 0x00001000 }, /* class 3: G2 */
153};
154
155static int mpc7450_get_constraint(u64 event, unsigned long *maskp,
156 unsigned long *valp)
157{
158 int pmc, class;
159 u32 mask, value;
160 int thresh, tuse;
161
162 class = mpc7450_classify_event(event);
163 if (class < 0)
164 return -1;
165 if (class == 4) {
166 pmc = ((unsigned int)event >> PM_PMC_SH) & PM_PMC_MSK;
167 mask = pmcbits[pmc - 1][0];
168 value = pmcbits[pmc - 1][1];
169 } else {
170 mask = classbits[class][0];
171 value = classbits[class][1];
172 }
173
174 tuse = mpc7450_threshold_use(event);
175 if (tuse) {
176 thresh = ((unsigned int)event >> PM_THRESH_SH) & PM_THRESH_MSK;
177 mask |= 0x3f << 24;
178 value |= thresh << 24;
179 if (tuse == 2) {
180 mask |= 0x40000000;
181 if ((unsigned int)event & PM_THRMULT_MSKS)
182 value |= 0x40000000;
183 }
184 }
185
186 *maskp = mask;
187 *valp = value;
188 return 0;
189}
190
191static const unsigned int event_alternatives[][MAX_ALT] = {
192 { 0x217, 0x317 }, /* PM_L1_DCACHE_MISS */
193 { 0x418, 0x50f, 0x60f }, /* PM_SNOOP_RETRY */
194 { 0x502, 0x602 }, /* PM_L2_HIT */
195 { 0x503, 0x603 }, /* PM_L3_HIT */
196 { 0x504, 0x604 }, /* PM_L2_ICACHE_MISS */
197 { 0x505, 0x605 }, /* PM_L3_ICACHE_MISS */
198 { 0x506, 0x606 }, /* PM_L2_DCACHE_MISS */
199 { 0x507, 0x607 }, /* PM_L3_DCACHE_MISS */
200 { 0x50a, 0x623 }, /* PM_LD_HIT_L3 */
201 { 0x50b, 0x624 }, /* PM_ST_HIT_L3 */
202 { 0x50d, 0x60d }, /* PM_L2_TOUCH_HIT */
203 { 0x50e, 0x60e }, /* PM_L3_TOUCH_HIT */
204 { 0x512, 0x612 }, /* PM_INT_LOCAL */
205 { 0x513, 0x61d }, /* PM_L2_MISS */
206 { 0x514, 0x61e }, /* PM_L3_MISS */
207};
208
209/*
210 * Scan the alternatives table for a match and return the
211 * index into the alternatives table if found, else -1.
212 */
213static int find_alternative(u32 event)
214{
215 int i, j;
216
217 for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
218 if (event < event_alternatives[i][0])
219 break;
220 for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
221 if (event == event_alternatives[i][j])
222 return i;
223 }
224 return -1;
225}
226
227static int mpc7450_get_alternatives(u64 event, unsigned int flags, u64 alt[])
228{
229 int i, j, nalt = 1;
230 u32 ae;
231
232 alt[0] = event;
233 nalt = 1;
234 i = find_alternative((u32)event);
235 if (i >= 0) {
236 for (j = 0; j < MAX_ALT; ++j) {
237 ae = event_alternatives[i][j];
238 if (ae && ae != (u32)event)
239 alt[nalt++] = ae;
240 }
241 }
242 return nalt;
243}
244
245/*
246 * Bitmaps of which PMCs each class can use for classes 0 - 3.
247 * Bit i is set if PMC i+1 is usable.
248 */
249static const u8 classmap[N_CLASSES] = {
250 0x3f, 0x0f, 0x0b, 0x03, 0
251};
252
253/* Bit position and width of each PMCSEL field */
254static const int pmcsel_shift[N_COUNTER] = {
255 6, 0, 27, 22, 17, 11
256};
257static const u32 pmcsel_mask[N_COUNTER] = {
258 0x7f, 0x3f, 0x1f, 0x1f, 0x1f, 0x3f
259};
260
261/*
262 * Compute MMCR0/1/2 values for a set of events.
263 */
264static int mpc7450_compute_mmcr(u64 event[], int n_ev,
265 unsigned int hwc[], unsigned long mmcr[])
266{
267 u8 event_index[N_CLASSES][N_COUNTER];
268 int n_classevent[N_CLASSES];
269 int i, j, class, tuse;
270 u32 pmc_inuse = 0, pmc_avail;
271 u32 mmcr0 = 0, mmcr1 = 0, mmcr2 = 0;
272 u32 ev, pmc, thresh;
273
274 if (n_ev > N_COUNTER)
275 return -1;
276
277 /* First pass: count usage in each class */
278 for (i = 0; i < N_CLASSES; ++i)
279 n_classevent[i] = 0;
280 for (i = 0; i < n_ev; ++i) {
281 class = mpc7450_classify_event(event[i]);
282 if (class < 0)
283 return -1;
284 j = n_classevent[class]++;
285 event_index[class][j] = i;
286 }
287
288 /* Second pass: allocate PMCs from most specific event to least */
289 for (class = N_CLASSES - 1; class >= 0; --class) {
290 for (i = 0; i < n_classevent[class]; ++i) {
291 ev = event[event_index[class][i]];
292 if (class == 4) {
293 pmc = (ev >> PM_PMC_SH) & PM_PMC_MSK;
294 if (pmc_inuse & (1 << (pmc - 1)))
295 return -1;
296 } else {
297 /* Find a suitable PMC */
298 pmc_avail = classmap[class] & ~pmc_inuse;
299 if (!pmc_avail)
300 return -1;
301 pmc = ffs(pmc_avail);
302 }
303 pmc_inuse |= 1 << (pmc - 1);
304
305 tuse = mpc7450_threshold_use(ev);
306 if (tuse) {
307 thresh = (ev >> PM_THRESH_SH) & PM_THRESH_MSK;
308 mmcr0 |= thresh << 16;
309 if (tuse == 2 && (ev & PM_THRMULT_MSKS))
310 mmcr2 = 0x80000000;
311 }
312 ev &= pmcsel_mask[pmc - 1];
313 ev <<= pmcsel_shift[pmc - 1];
314 if (pmc <= 2)
315 mmcr0 |= ev;
316 else
317 mmcr1 |= ev;
318 hwc[event_index[class][i]] = pmc - 1;
319 }
320 }
321
322 if (pmc_inuse & 1)
323 mmcr0 |= MMCR0_PMC1CE;
324 if (pmc_inuse & 0x3e)
325 mmcr0 |= MMCR0_PMCnCE;
326
327 /* Return MMCRx values */
328 mmcr[0] = mmcr0;
329 mmcr[1] = mmcr1;
330 mmcr[2] = mmcr2;
331 return 0;
332}
333
334/*
335 * Disable counting by a PMC.
336 * Note that the pmc argument is 0-based here, not 1-based.
337 */
338static void mpc7450_disable_pmc(unsigned int pmc, unsigned long mmcr[])
339{
340 if (pmc <= 1)
341 mmcr[0] &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]);
342 else
343 mmcr[1] &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]);
344}
345
346static int mpc7450_generic_events[] = {
347 [PERF_COUNT_HW_CPU_CYCLES] = 1,
348 [PERF_COUNT_HW_INSTRUCTIONS] = 2,
349 [PERF_COUNT_HW_CACHE_MISSES] = 0x217, /* PM_L1_DCACHE_MISS */
350 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x122, /* PM_BR_CMPL */
351 [PERF_COUNT_HW_BRANCH_MISSES] = 0x41c, /* PM_BR_MPRED */
352};
353
354#define C(x) PERF_COUNT_HW_CACHE_##x
355
356/*
357 * Table of generalized cache-related events.
358 * 0 means not supported, -1 means nonsensical, other values
359 * are event codes.
360 */
361static int mpc7450_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
362 [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
363 [C(OP_READ)] = { 0, 0x225 },
364 [C(OP_WRITE)] = { 0, 0x227 },
365 [C(OP_PREFETCH)] = { 0, 0 },
366 },
367 [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
368 [C(OP_READ)] = { 0x129, 0x115 },
369 [C(OP_WRITE)] = { -1, -1 },
370 [C(OP_PREFETCH)] = { 0x634, 0 },
371 },
372 [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
373 [C(OP_READ)] = { 0, 0 },
374 [C(OP_WRITE)] = { 0, 0 },
375 [C(OP_PREFETCH)] = { 0, 0 },
376 },
377 [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
378 [C(OP_READ)] = { 0, 0x312 },
379 [C(OP_WRITE)] = { -1, -1 },
380 [C(OP_PREFETCH)] = { -1, -1 },
381 },
382 [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
383 [C(OP_READ)] = { 0, 0x223 },
384 [C(OP_WRITE)] = { -1, -1 },
385 [C(OP_PREFETCH)] = { -1, -1 },
386 },
387 [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
388 [C(OP_READ)] = { 0x122, 0x41c },
389 [C(OP_WRITE)] = { -1, -1 },
390 [C(OP_PREFETCH)] = { -1, -1 },
391 },
392};
393
394struct power_pmu mpc7450_pmu = {
395 .name = "MPC7450 family",
396 .n_counter = N_COUNTER,
397 .max_alternatives = MAX_ALT,
398 .add_fields = 0x00111555ul,
399 .test_adder = 0x00301000ul,
400 .compute_mmcr = mpc7450_compute_mmcr,
401 .get_constraint = mpc7450_get_constraint,
402 .get_alternatives = mpc7450_get_alternatives,
403 .disable_pmc = mpc7450_disable_pmc,
404 .n_generic = ARRAY_SIZE(mpc7450_generic_events),
405 .generic_events = mpc7450_generic_events,
406 .cache_events = &mpc7450_cache_events,
407};
408
409static int init_mpc7450_pmu(void)
410{
411 if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/7450"))
412 return -ENODEV;
413
414 return register_power_pmu(&mpc7450_pmu);
415}
416
417arch_initcall(init_mpc7450_pmu);
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index c744b327bcab..e9962c7f8a09 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -18,6 +18,8 @@
18 * field correctly */ 18 * field correctly */
19extern unsigned long __toc_start; 19extern unsigned long __toc_start;
20 20
21#ifdef CONFIG_PPC_BOOK3S
22
21/* 23/*
22 * The structure which the hypervisor knows about - this structure 24 * The structure which the hypervisor knows about - this structure
23 * should not cross a page boundary. The vpa_init/register_vpa call 25 * should not cross a page boundary. The vpa_init/register_vpa call
@@ -41,6 +43,10 @@ struct lppaca lppaca[] = {
41 }, 43 },
42}; 44};
43 45
46#endif /* CONFIG_PPC_BOOK3S */
47
48#ifdef CONFIG_PPC_STD_MMU_64
49
44/* 50/*
45 * 3 persistent SLBs are registered here. The buffer will be zero 51 * 3 persistent SLBs are registered here. The buffer will be zero
46 * initially, hence will all be invaild until we actually write them. 52 * initially, hence will all be invaild until we actually write them.
@@ -52,6 +58,8 @@ struct slb_shadow slb_shadow[] __cacheline_aligned = {
52 }, 58 },
53}; 59};
54 60
61#endif /* CONFIG_PPC_STD_MMU_64 */
62
55/* The Paca is an array with one entry per processor. Each contains an 63/* The Paca is an array with one entry per processor. Each contains an
56 * lppaca, which contains the information shared between the 64 * lppaca, which contains the information shared between the
57 * hypervisor and Linux. 65 * hypervisor and Linux.
@@ -77,15 +85,19 @@ void __init initialise_pacas(void)
77 for (cpu = 0; cpu < NR_CPUS; cpu++) { 85 for (cpu = 0; cpu < NR_CPUS; cpu++) {
78 struct paca_struct *new_paca = &paca[cpu]; 86 struct paca_struct *new_paca = &paca[cpu];
79 87
88#ifdef CONFIG_PPC_BOOK3S
80 new_paca->lppaca_ptr = &lppaca[cpu]; 89 new_paca->lppaca_ptr = &lppaca[cpu];
90#endif
81 new_paca->lock_token = 0x8000; 91 new_paca->lock_token = 0x8000;
82 new_paca->paca_index = cpu; 92 new_paca->paca_index = cpu;
83 new_paca->kernel_toc = kernel_toc; 93 new_paca->kernel_toc = kernel_toc;
84 new_paca->kernelbase = (unsigned long) _stext; 94 new_paca->kernelbase = (unsigned long) _stext;
85 new_paca->kernel_msr = MSR_KERNEL; 95 new_paca->kernel_msr = MSR_KERNEL;
86 new_paca->hw_cpu_id = 0xffff; 96 new_paca->hw_cpu_id = 0xffff;
87 new_paca->slb_shadow_ptr = &slb_shadow[cpu];
88 new_paca->__current = &init_task; 97 new_paca->__current = &init_task;
98#ifdef CONFIG_PPC_STD_MMU_64
99 new_paca->slb_shadow_ptr = &slb_shadow[cpu];
100#endif /* CONFIG_PPC_STD_MMU_64 */
89 101
90 } 102 }
91} 103}
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 4fee63cb53ff..5a56e97c5ac0 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1505,7 +1505,7 @@ void __init pcibios_resource_survey(void)
1505 * rest of the code later, for now, keep it as-is as our main 1505 * rest of the code later, for now, keep it as-is as our main
1506 * resource allocation function doesn't deal with sub-trees yet. 1506 * resource allocation function doesn't deal with sub-trees yet.
1507 */ 1507 */
1508void __devinit pcibios_claim_one_bus(struct pci_bus *bus) 1508void pcibios_claim_one_bus(struct pci_bus *bus)
1509{ 1509{
1510 struct pci_dev *dev; 1510 struct pci_dev *dev;
1511 struct pci_bus *child_bus; 1511 struct pci_bus *child_bus;
@@ -1533,7 +1533,6 @@ void __devinit pcibios_claim_one_bus(struct pci_bus *bus)
1533 list_for_each_entry(child_bus, &bus->children, node) 1533 list_for_each_entry(child_bus, &bus->children, node)
1534 pcibios_claim_one_bus(child_bus); 1534 pcibios_claim_one_bus(child_bus);
1535} 1535}
1536EXPORT_SYMBOL_GPL(pcibios_claim_one_bus);
1537 1536
1538 1537
1539/* pcibios_finish_adding_to_bus 1538/* pcibios_finish_adding_to_bus
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index d473634e39e3..3ae1c666ff92 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -33,7 +33,6 @@ int pcibios_assign_bus_offset = 1;
33 33
34void pcibios_make_OF_bus_map(void); 34void pcibios_make_OF_bus_map(void);
35 35
36static void fixup_broken_pcnet32(struct pci_dev* dev);
37static void fixup_cpc710_pci64(struct pci_dev* dev); 36static void fixup_cpc710_pci64(struct pci_dev* dev);
38#ifdef CONFIG_PPC_OF 37#ifdef CONFIG_PPC_OF
39static u8* pci_to_OF_bus_map; 38static u8* pci_to_OF_bus_map;
@@ -72,16 +71,6 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MOTOROLA, PCI_ANY_ID, fixup_hide_host_res
72DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_FREESCALE, PCI_ANY_ID, fixup_hide_host_resource_fsl); 71DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_FREESCALE, PCI_ANY_ID, fixup_hide_host_resource_fsl);
73 72
74static void 73static void
75fixup_broken_pcnet32(struct pci_dev* dev)
76{
77 if ((dev->class>>8 == PCI_CLASS_NETWORK_ETHERNET)) {
78 dev->vendor = PCI_VENDOR_ID_AMD;
79 pci_write_config_word(dev, PCI_VENDOR_ID, PCI_VENDOR_ID_AMD);
80 }
81}
82DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TRIDENT, PCI_ANY_ID, fixup_broken_pcnet32);
83
84static void
85fixup_cpc710_pci64(struct pci_dev* dev) 74fixup_cpc710_pci64(struct pci_dev* dev)
86{ 75{
87 /* Hide the PCI64 BARs from the kernel as their content doesn't 76 /* Hide the PCI64 BARs from the kernel as their content doesn't
@@ -447,14 +436,6 @@ static int __init pcibios_init(void)
447 436
448subsys_initcall(pcibios_init); 437subsys_initcall(pcibios_init);
449 438
450/* the next one is stolen from the alpha port... */
451void __init
452pcibios_update_irq(struct pci_dev *dev, int irq)
453{
454 pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
455 /* XXX FIXME - update OF device tree node interrupt property */
456}
457
458static struct pci_controller* 439static struct pci_controller*
459pci_bus_to_hose(int bus) 440pci_bus_to_hose(int bus)
460{ 441{
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index 96edb6f8babb..9e8902fa14c7 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -43,16 +43,6 @@ unsigned long pci_probe_only = 1;
43unsigned long pci_io_base = ISA_IO_BASE; 43unsigned long pci_io_base = ISA_IO_BASE;
44EXPORT_SYMBOL(pci_io_base); 44EXPORT_SYMBOL(pci_io_base);
45 45
46static void fixup_broken_pcnet32(struct pci_dev* dev)
47{
48 if ((dev->class>>8 == PCI_CLASS_NETWORK_ETHERNET)) {
49 dev->vendor = PCI_VENDOR_ID_AMD;
50 pci_write_config_word(dev, PCI_VENDOR_ID, PCI_VENDOR_ID_AMD);
51 }
52}
53DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TRIDENT, PCI_ANY_ID, fixup_broken_pcnet32);
54
55
56static u32 get_int_prop(struct device_node *np, const char *name, u32 def) 46static u32 get_int_prop(struct device_node *np, const char *name, u32 def)
57{ 47{
58 const u32 *prop; 48 const u32 *prop;
@@ -430,6 +420,9 @@ int pcibios_unmap_io_space(struct pci_bus *bus)
430 * so flushing the hash table is the only sane way to make sure 420 * so flushing the hash table is the only sane way to make sure
431 * that no hash entries are covering that removed bridge area 421 * that no hash entries are covering that removed bridge area
432 * while still allowing other busses overlapping those pages 422 * while still allowing other busses overlapping those pages
423 *
424 * Note: If we ever support P2P hotplug on Book3E, we'll have
425 * to do an appropriate TLB flush here too
433 */ 426 */
434 if (bus->self) { 427 if (bus->self) {
435 struct resource *res = bus->resource[0]; 428 struct resource *res = bus->resource[0];
@@ -437,8 +430,10 @@ int pcibios_unmap_io_space(struct pci_bus *bus)
437 pr_debug("IO unmapping for PCI-PCI bridge %s\n", 430 pr_debug("IO unmapping for PCI-PCI bridge %s\n",
438 pci_name(bus->self)); 431 pci_name(bus->self));
439 432
433#ifdef CONFIG_PPC_STD_MMU_64
440 __flush_hash_table_range(&init_mm, res->start + _IO_BASE, 434 __flush_hash_table_range(&init_mm, res->start + _IO_BASE,
441 res->end + _IO_BASE + 1); 435 res->end + _IO_BASE + 1);
436#endif
442 return 0; 437 return 0;
443 } 438 }
444 439
@@ -511,7 +506,7 @@ int __devinit pcibios_map_io_space(struct pci_bus *bus)
511 pr_debug("IO mapping for PHB %s\n", hose->dn->full_name); 506 pr_debug("IO mapping for PHB %s\n", hose->dn->full_name);
512 pr_debug(" phys=0x%016llx, virt=0x%p (alloc=0x%p)\n", 507 pr_debug(" phys=0x%016llx, virt=0x%p (alloc=0x%p)\n",
513 hose->io_base_phys, hose->io_base_virt, hose->io_base_alloc); 508 hose->io_base_phys, hose->io_base_virt, hose->io_base_alloc);
514 pr_debug(" size=0x%016lx (alloc=0x%016lx)\n", 509 pr_debug(" size=0x%016llx (alloc=0x%016lx)\n",
515 hose->pci_io_size, size_page); 510 hose->pci_io_size, size_page);
516 511
517 /* Establish the mapping */ 512 /* Establish the mapping */
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index 1c67de52e3ce..d5e36e5dc7c2 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -27,7 +27,6 @@
27#include <asm/io.h> 27#include <asm/io.h>
28#include <asm/prom.h> 28#include <asm/prom.h>
29#include <asm/pci-bridge.h> 29#include <asm/pci-bridge.h>
30#include <asm/pSeries_reconfig.h>
31#include <asm/ppc-pci.h> 30#include <asm/ppc-pci.h>
32#include <asm/firmware.h> 31#include <asm/firmware.h>
33 32
@@ -35,7 +34,7 @@
35 * Traverse_func that inits the PCI fields of the device node. 34 * Traverse_func that inits the PCI fields of the device node.
36 * NOTE: this *must* be done before read/write config to the device. 35 * NOTE: this *must* be done before read/write config to the device.
37 */ 36 */
38static void * __devinit update_dn_pci_info(struct device_node *dn, void *data) 37void * __devinit update_dn_pci_info(struct device_node *dn, void *data)
39{ 38{
40 struct pci_controller *phb = data; 39 struct pci_controller *phb = data;
41 const int *type = 40 const int *type =
@@ -184,29 +183,6 @@ struct device_node *fetch_dev_dn(struct pci_dev *dev)
184} 183}
185EXPORT_SYMBOL(fetch_dev_dn); 184EXPORT_SYMBOL(fetch_dev_dn);
186 185
187static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *node)
188{
189 struct device_node *np = node;
190 struct pci_dn *pci = NULL;
191 int err = NOTIFY_OK;
192
193 switch (action) {
194 case PSERIES_RECONFIG_ADD:
195 pci = np->parent->data;
196 if (pci)
197 update_dn_pci_info(np, pci->phb);
198 break;
199 default:
200 err = NOTIFY_DONE;
201 break;
202 }
203 return err;
204}
205
206static struct notifier_block pci_dn_reconfig_nb = {
207 .notifier_call = pci_dn_reconfig_notifier,
208};
209
210/** 186/**
211 * pci_devs_phb_init - Initialize phbs and pci devs under them. 187 * pci_devs_phb_init - Initialize phbs and pci devs under them.
212 * 188 *
@@ -223,6 +199,4 @@ void __init pci_devs_phb_init(void)
223 /* This must be done first so the device nodes have valid pci info! */ 199 /* This must be done first so the device nodes have valid pci info! */
224 list_for_each_entry_safe(phb, tmp, &hose_list, list_node) 200 list_for_each_entry_safe(phb, tmp, &hose_list, list_node)
225 pci_devs_phb_init_dynamic(phb); 201 pci_devs_phb_init_dynamic(phb);
226
227 pSeries_reconfig_notifier_register(&pci_dn_reconfig_nb);
228} 202}
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
new file mode 100644
index 000000000000..809fdf94b95f
--- /dev/null
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -0,0 +1,1306 @@
1/*
2 * Performance counter support - powerpc architecture code
3 *
4 * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/kernel.h>
12#include <linux/sched.h>
13#include <linux/perf_counter.h>
14#include <linux/percpu.h>
15#include <linux/hardirq.h>
16#include <asm/reg.h>
17#include <asm/pmc.h>
18#include <asm/machdep.h>
19#include <asm/firmware.h>
20#include <asm/ptrace.h>
21
22struct cpu_hw_counters {
23 int n_counters;
24 int n_percpu;
25 int disabled;
26 int n_added;
27 int n_limited;
28 u8 pmcs_enabled;
29 struct perf_counter *counter[MAX_HWCOUNTERS];
30 u64 events[MAX_HWCOUNTERS];
31 unsigned int flags[MAX_HWCOUNTERS];
32 unsigned long mmcr[3];
33 struct perf_counter *limited_counter[MAX_LIMITED_HWCOUNTERS];
34 u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS];
35};
36DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters);
37
38struct power_pmu *ppmu;
39
40/*
41 * Normally, to ignore kernel events we set the FCS (freeze counters
42 * in supervisor mode) bit in MMCR0, but if the kernel runs with the
43 * hypervisor bit set in the MSR, or if we are running on a processor
44 * where the hypervisor bit is forced to 1 (as on Apple G5 processors),
45 * then we need to use the FCHV bit to ignore kernel events.
46 */
47static unsigned int freeze_counters_kernel = MMCR0_FCS;
48
49/*
50 * 32-bit doesn't have MMCRA but does have an MMCR2,
51 * and a few other names are different.
52 */
53#ifdef CONFIG_PPC32
54
55#define MMCR0_FCHV 0
56#define MMCR0_PMCjCE MMCR0_PMCnCE
57
58#define SPRN_MMCRA SPRN_MMCR2
59#define MMCRA_SAMPLE_ENABLE 0
60
61static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
62{
63 return 0;
64}
65static inline void perf_set_pmu_inuse(int inuse) { }
66static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { }
67static inline u32 perf_get_misc_flags(struct pt_regs *regs)
68{
69 return 0;
70}
71static inline void perf_read_regs(struct pt_regs *regs) { }
72static inline int perf_intr_is_nmi(struct pt_regs *regs)
73{
74 return 0;
75}
76
77#endif /* CONFIG_PPC32 */
78
79/*
80 * Things that are specific to 64-bit implementations.
81 */
82#ifdef CONFIG_PPC64
83
84static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
85{
86 unsigned long mmcra = regs->dsisr;
87
88 if ((mmcra & MMCRA_SAMPLE_ENABLE) && !(ppmu->flags & PPMU_ALT_SIPR)) {
89 unsigned long slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT;
90 if (slot > 1)
91 return 4 * (slot - 1);
92 }
93 return 0;
94}
95
96static inline void perf_set_pmu_inuse(int inuse)
97{
98 get_lppaca()->pmcregs_in_use = inuse;
99}
100
101/*
102 * The user wants a data address recorded.
103 * If we're not doing instruction sampling, give them the SDAR
104 * (sampled data address). If we are doing instruction sampling, then
105 * only give them the SDAR if it corresponds to the instruction
106 * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC
107 * bit in MMCRA.
108 */
109static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
110{
111 unsigned long mmcra = regs->dsisr;
112 unsigned long sdsync = (ppmu->flags & PPMU_ALT_SIPR) ?
113 POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC;
114
115 if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync))
116 *addrp = mfspr(SPRN_SDAR);
117}
118
119static inline u32 perf_get_misc_flags(struct pt_regs *regs)
120{
121 unsigned long mmcra = regs->dsisr;
122
123 if (TRAP(regs) != 0xf00)
124 return 0; /* not a PMU interrupt */
125
126 if (ppmu->flags & PPMU_ALT_SIPR) {
127 if (mmcra & POWER6_MMCRA_SIHV)
128 return PERF_EVENT_MISC_HYPERVISOR;
129 return (mmcra & POWER6_MMCRA_SIPR) ?
130 PERF_EVENT_MISC_USER : PERF_EVENT_MISC_KERNEL;
131 }
132 if (mmcra & MMCRA_SIHV)
133 return PERF_EVENT_MISC_HYPERVISOR;
134 return (mmcra & MMCRA_SIPR) ? PERF_EVENT_MISC_USER :
135 PERF_EVENT_MISC_KERNEL;
136}
137
138/*
139 * Overload regs->dsisr to store MMCRA so we only need to read it once
140 * on each interrupt.
141 */
142static inline void perf_read_regs(struct pt_regs *regs)
143{
144 regs->dsisr = mfspr(SPRN_MMCRA);
145}
146
147/*
148 * If interrupts were soft-disabled when a PMU interrupt occurs, treat
149 * it as an NMI.
150 */
151static inline int perf_intr_is_nmi(struct pt_regs *regs)
152{
153 return !regs->softe;
154}
155
156#endif /* CONFIG_PPC64 */
157
158static void perf_counter_interrupt(struct pt_regs *regs);
159
160void perf_counter_print_debug(void)
161{
162}
163
164/*
165 * Read one performance monitor counter (PMC).
166 */
167static unsigned long read_pmc(int idx)
168{
169 unsigned long val;
170
171 switch (idx) {
172 case 1:
173 val = mfspr(SPRN_PMC1);
174 break;
175 case 2:
176 val = mfspr(SPRN_PMC2);
177 break;
178 case 3:
179 val = mfspr(SPRN_PMC3);
180 break;
181 case 4:
182 val = mfspr(SPRN_PMC4);
183 break;
184 case 5:
185 val = mfspr(SPRN_PMC5);
186 break;
187 case 6:
188 val = mfspr(SPRN_PMC6);
189 break;
190#ifdef CONFIG_PPC64
191 case 7:
192 val = mfspr(SPRN_PMC7);
193 break;
194 case 8:
195 val = mfspr(SPRN_PMC8);
196 break;
197#endif /* CONFIG_PPC64 */
198 default:
199 printk(KERN_ERR "oops trying to read PMC%d\n", idx);
200 val = 0;
201 }
202 return val;
203}
204
205/*
206 * Write one PMC.
207 */
208static void write_pmc(int idx, unsigned long val)
209{
210 switch (idx) {
211 case 1:
212 mtspr(SPRN_PMC1, val);
213 break;
214 case 2:
215 mtspr(SPRN_PMC2, val);
216 break;
217 case 3:
218 mtspr(SPRN_PMC3, val);
219 break;
220 case 4:
221 mtspr(SPRN_PMC4, val);
222 break;
223 case 5:
224 mtspr(SPRN_PMC5, val);
225 break;
226 case 6:
227 mtspr(SPRN_PMC6, val);
228 break;
229#ifdef CONFIG_PPC64
230 case 7:
231 mtspr(SPRN_PMC7, val);
232 break;
233 case 8:
234 mtspr(SPRN_PMC8, val);
235 break;
236#endif /* CONFIG_PPC64 */
237 default:
238 printk(KERN_ERR "oops trying to write PMC%d\n", idx);
239 }
240}
241
242/*
243 * Check if a set of events can all go on the PMU at once.
244 * If they can't, this will look at alternative codes for the events
245 * and see if any combination of alternative codes is feasible.
246 * The feasible set is returned in event[].
247 */
248static int power_check_constraints(u64 event[], unsigned int cflags[],
249 int n_ev)
250{
251 unsigned long mask, value, nv;
252 u64 alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
253 unsigned long amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
254 unsigned long avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
255 unsigned long smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS];
256 int n_alt[MAX_HWCOUNTERS], choice[MAX_HWCOUNTERS];
257 int i, j;
258 unsigned long addf = ppmu->add_fields;
259 unsigned long tadd = ppmu->test_adder;
260
261 if (n_ev > ppmu->n_counter)
262 return -1;
263
264 /* First see if the events will go on as-is */
265 for (i = 0; i < n_ev; ++i) {
266 if ((cflags[i] & PPMU_LIMITED_PMC_REQD)
267 && !ppmu->limited_pmc_event(event[i])) {
268 ppmu->get_alternatives(event[i], cflags[i],
269 alternatives[i]);
270 event[i] = alternatives[i][0];
271 }
272 if (ppmu->get_constraint(event[i], &amasks[i][0],
273 &avalues[i][0]))
274 return -1;
275 }
276 value = mask = 0;
277 for (i = 0; i < n_ev; ++i) {
278 nv = (value | avalues[i][0]) + (value & avalues[i][0] & addf);
279 if ((((nv + tadd) ^ value) & mask) != 0 ||
280 (((nv + tadd) ^ avalues[i][0]) & amasks[i][0]) != 0)
281 break;
282 value = nv;
283 mask |= amasks[i][0];
284 }
285 if (i == n_ev)
286 return 0; /* all OK */
287
288 /* doesn't work, gather alternatives... */
289 if (!ppmu->get_alternatives)
290 return -1;
291 for (i = 0; i < n_ev; ++i) {
292 choice[i] = 0;
293 n_alt[i] = ppmu->get_alternatives(event[i], cflags[i],
294 alternatives[i]);
295 for (j = 1; j < n_alt[i]; ++j)
296 ppmu->get_constraint(alternatives[i][j],
297 &amasks[i][j], &avalues[i][j]);
298 }
299
300 /* enumerate all possibilities and see if any will work */
301 i = 0;
302 j = -1;
303 value = mask = nv = 0;
304 while (i < n_ev) {
305 if (j >= 0) {
306 /* we're backtracking, restore context */
307 value = svalues[i];
308 mask = smasks[i];
309 j = choice[i];
310 }
311 /*
312 * See if any alternative k for event i,
313 * where k > j, will satisfy the constraints.
314 */
315 while (++j < n_alt[i]) {
316 nv = (value | avalues[i][j]) +
317 (value & avalues[i][j] & addf);
318 if ((((nv + tadd) ^ value) & mask) == 0 &&
319 (((nv + tadd) ^ avalues[i][j])
320 & amasks[i][j]) == 0)
321 break;
322 }
323 if (j >= n_alt[i]) {
324 /*
325 * No feasible alternative, backtrack
326 * to event i-1 and continue enumerating its
327 * alternatives from where we got up to.
328 */
329 if (--i < 0)
330 return -1;
331 } else {
332 /*
333 * Found a feasible alternative for event i,
334 * remember where we got up to with this event,
335 * go on to the next event, and start with
336 * the first alternative for it.
337 */
338 choice[i] = j;
339 svalues[i] = value;
340 smasks[i] = mask;
341 value = nv;
342 mask |= amasks[i][j];
343 ++i;
344 j = -1;
345 }
346 }
347
348 /* OK, we have a feasible combination, tell the caller the solution */
349 for (i = 0; i < n_ev; ++i)
350 event[i] = alternatives[i][choice[i]];
351 return 0;
352}
353
354/*
355 * Check if newly-added counters have consistent settings for
356 * exclude_{user,kernel,hv} with each other and any previously
357 * added counters.
358 */
359static int check_excludes(struct perf_counter **ctrs, unsigned int cflags[],
360 int n_prev, int n_new)
361{
362 int eu = 0, ek = 0, eh = 0;
363 int i, n, first;
364 struct perf_counter *counter;
365
366 n = n_prev + n_new;
367 if (n <= 1)
368 return 0;
369
370 first = 1;
371 for (i = 0; i < n; ++i) {
372 if (cflags[i] & PPMU_LIMITED_PMC_OK) {
373 cflags[i] &= ~PPMU_LIMITED_PMC_REQD;
374 continue;
375 }
376 counter = ctrs[i];
377 if (first) {
378 eu = counter->attr.exclude_user;
379 ek = counter->attr.exclude_kernel;
380 eh = counter->attr.exclude_hv;
381 first = 0;
382 } else if (counter->attr.exclude_user != eu ||
383 counter->attr.exclude_kernel != ek ||
384 counter->attr.exclude_hv != eh) {
385 return -EAGAIN;
386 }
387 }
388
389 if (eu || ek || eh)
390 for (i = 0; i < n; ++i)
391 if (cflags[i] & PPMU_LIMITED_PMC_OK)
392 cflags[i] |= PPMU_LIMITED_PMC_REQD;
393
394 return 0;
395}
396
397static void power_pmu_read(struct perf_counter *counter)
398{
399 s64 val, delta, prev;
400
401 if (!counter->hw.idx)
402 return;
403 /*
404 * Performance monitor interrupts come even when interrupts
405 * are soft-disabled, as long as interrupts are hard-enabled.
406 * Therefore we treat them like NMIs.
407 */
408 do {
409 prev = atomic64_read(&counter->hw.prev_count);
410 barrier();
411 val = read_pmc(counter->hw.idx);
412 } while (atomic64_cmpxchg(&counter->hw.prev_count, prev, val) != prev);
413
414 /* The counters are only 32 bits wide */
415 delta = (val - prev) & 0xfffffffful;
416 atomic64_add(delta, &counter->count);
417 atomic64_sub(delta, &counter->hw.period_left);
418}
419
420/*
421 * On some machines, PMC5 and PMC6 can't be written, don't respect
422 * the freeze conditions, and don't generate interrupts. This tells
423 * us if `counter' is using such a PMC.
424 */
425static int is_limited_pmc(int pmcnum)
426{
427 return (ppmu->flags & PPMU_LIMITED_PMC5_6)
428 && (pmcnum == 5 || pmcnum == 6);
429}
430
431static void freeze_limited_counters(struct cpu_hw_counters *cpuhw,
432 unsigned long pmc5, unsigned long pmc6)
433{
434 struct perf_counter *counter;
435 u64 val, prev, delta;
436 int i;
437
438 for (i = 0; i < cpuhw->n_limited; ++i) {
439 counter = cpuhw->limited_counter[i];
440 if (!counter->hw.idx)
441 continue;
442 val = (counter->hw.idx == 5) ? pmc5 : pmc6;
443 prev = atomic64_read(&counter->hw.prev_count);
444 counter->hw.idx = 0;
445 delta = (val - prev) & 0xfffffffful;
446 atomic64_add(delta, &counter->count);
447 }
448}
449
450static void thaw_limited_counters(struct cpu_hw_counters *cpuhw,
451 unsigned long pmc5, unsigned long pmc6)
452{
453 struct perf_counter *counter;
454 u64 val;
455 int i;
456
457 for (i = 0; i < cpuhw->n_limited; ++i) {
458 counter = cpuhw->limited_counter[i];
459 counter->hw.idx = cpuhw->limited_hwidx[i];
460 val = (counter->hw.idx == 5) ? pmc5 : pmc6;
461 atomic64_set(&counter->hw.prev_count, val);
462 perf_counter_update_userpage(counter);
463 }
464}
465
466/*
467 * Since limited counters don't respect the freeze conditions, we
468 * have to read them immediately after freezing or unfreezing the
469 * other counters. We try to keep the values from the limited
470 * counters as consistent as possible by keeping the delay (in
471 * cycles and instructions) between freezing/unfreezing and reading
472 * the limited counters as small and consistent as possible.
473 * Therefore, if any limited counters are in use, we read them
474 * both, and always in the same order, to minimize variability,
475 * and do it inside the same asm that writes MMCR0.
476 */
477static void write_mmcr0(struct cpu_hw_counters *cpuhw, unsigned long mmcr0)
478{
479 unsigned long pmc5, pmc6;
480
481 if (!cpuhw->n_limited) {
482 mtspr(SPRN_MMCR0, mmcr0);
483 return;
484 }
485
486 /*
487 * Write MMCR0, then read PMC5 and PMC6 immediately.
488 * To ensure we don't get a performance monitor interrupt
489 * between writing MMCR0 and freezing/thawing the limited
490 * counters, we first write MMCR0 with the counter overflow
491 * interrupt enable bits turned off.
492 */
493 asm volatile("mtspr %3,%2; mfspr %0,%4; mfspr %1,%5"
494 : "=&r" (pmc5), "=&r" (pmc6)
495 : "r" (mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)),
496 "i" (SPRN_MMCR0),
497 "i" (SPRN_PMC5), "i" (SPRN_PMC6));
498
499 if (mmcr0 & MMCR0_FC)
500 freeze_limited_counters(cpuhw, pmc5, pmc6);
501 else
502 thaw_limited_counters(cpuhw, pmc5, pmc6);
503
504 /*
505 * Write the full MMCR0 including the counter overflow interrupt
506 * enable bits, if necessary.
507 */
508 if (mmcr0 & (MMCR0_PMC1CE | MMCR0_PMCjCE))
509 mtspr(SPRN_MMCR0, mmcr0);
510}
511
512/*
513 * Disable all counters to prevent PMU interrupts and to allow
514 * counters to be added or removed.
515 */
516void hw_perf_disable(void)
517{
518 struct cpu_hw_counters *cpuhw;
519 unsigned long flags;
520
521 local_irq_save(flags);
522 cpuhw = &__get_cpu_var(cpu_hw_counters);
523
524 if (!cpuhw->disabled) {
525 cpuhw->disabled = 1;
526 cpuhw->n_added = 0;
527
528 /*
529 * Check if we ever enabled the PMU on this cpu.
530 */
531 if (!cpuhw->pmcs_enabled) {
532 if (ppc_md.enable_pmcs)
533 ppc_md.enable_pmcs();
534 cpuhw->pmcs_enabled = 1;
535 }
536
537 /*
538 * Disable instruction sampling if it was enabled
539 */
540 if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
541 mtspr(SPRN_MMCRA,
542 cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
543 mb();
544 }
545
546 /*
547 * Set the 'freeze counters' bit.
548 * The barrier is to make sure the mtspr has been
549 * executed and the PMU has frozen the counters
550 * before we return.
551 */
552 write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) | MMCR0_FC);
553 mb();
554 }
555 local_irq_restore(flags);
556}
557
558/*
559 * Re-enable all counters if disable == 0.
560 * If we were previously disabled and counters were added, then
561 * put the new config on the PMU.
562 */
563void hw_perf_enable(void)
564{
565 struct perf_counter *counter;
566 struct cpu_hw_counters *cpuhw;
567 unsigned long flags;
568 long i;
569 unsigned long val;
570 s64 left;
571 unsigned int hwc_index[MAX_HWCOUNTERS];
572 int n_lim;
573 int idx;
574
575 local_irq_save(flags);
576 cpuhw = &__get_cpu_var(cpu_hw_counters);
577 if (!cpuhw->disabled) {
578 local_irq_restore(flags);
579 return;
580 }
581 cpuhw->disabled = 0;
582
583 /*
584 * If we didn't change anything, or only removed counters,
585 * no need to recalculate MMCR* settings and reset the PMCs.
586 * Just reenable the PMU with the current MMCR* settings
587 * (possibly updated for removal of counters).
588 */
589 if (!cpuhw->n_added) {
590 mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
591 mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
592 if (cpuhw->n_counters == 0)
593 perf_set_pmu_inuse(0);
594 goto out_enable;
595 }
596
597 /*
598 * Compute MMCR* values for the new set of counters
599 */
600 if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_counters, hwc_index,
601 cpuhw->mmcr)) {
602 /* shouldn't ever get here */
603 printk(KERN_ERR "oops compute_mmcr failed\n");
604 goto out;
605 }
606
607 /*
608 * Add in MMCR0 freeze bits corresponding to the
609 * attr.exclude_* bits for the first counter.
610 * We have already checked that all counters have the
611 * same values for these bits as the first counter.
612 */
613 counter = cpuhw->counter[0];
614 if (counter->attr.exclude_user)
615 cpuhw->mmcr[0] |= MMCR0_FCP;
616 if (counter->attr.exclude_kernel)
617 cpuhw->mmcr[0] |= freeze_counters_kernel;
618 if (counter->attr.exclude_hv)
619 cpuhw->mmcr[0] |= MMCR0_FCHV;
620
621 /*
622 * Write the new configuration to MMCR* with the freeze
623 * bit set and set the hardware counters to their initial values.
624 * Then unfreeze the counters.
625 */
626 perf_set_pmu_inuse(1);
627 mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
628 mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
629 mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
630 | MMCR0_FC);
631
632 /*
633 * Read off any pre-existing counters that need to move
634 * to another PMC.
635 */
636 for (i = 0; i < cpuhw->n_counters; ++i) {
637 counter = cpuhw->counter[i];
638 if (counter->hw.idx && counter->hw.idx != hwc_index[i] + 1) {
639 power_pmu_read(counter);
640 write_pmc(counter->hw.idx, 0);
641 counter->hw.idx = 0;
642 }
643 }
644
645 /*
646 * Initialize the PMCs for all the new and moved counters.
647 */
648 cpuhw->n_limited = n_lim = 0;
649 for (i = 0; i < cpuhw->n_counters; ++i) {
650 counter = cpuhw->counter[i];
651 if (counter->hw.idx)
652 continue;
653 idx = hwc_index[i] + 1;
654 if (is_limited_pmc(idx)) {
655 cpuhw->limited_counter[n_lim] = counter;
656 cpuhw->limited_hwidx[n_lim] = idx;
657 ++n_lim;
658 continue;
659 }
660 val = 0;
661 if (counter->hw.sample_period) {
662 left = atomic64_read(&counter->hw.period_left);
663 if (left < 0x80000000L)
664 val = 0x80000000L - left;
665 }
666 atomic64_set(&counter->hw.prev_count, val);
667 counter->hw.idx = idx;
668 write_pmc(idx, val);
669 perf_counter_update_userpage(counter);
670 }
671 cpuhw->n_limited = n_lim;
672 cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
673
674 out_enable:
675 mb();
676 write_mmcr0(cpuhw, cpuhw->mmcr[0]);
677
678 /*
679 * Enable instruction sampling if necessary
680 */
681 if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
682 mb();
683 mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
684 }
685
686 out:
687 local_irq_restore(flags);
688}
689
690static int collect_events(struct perf_counter *group, int max_count,
691 struct perf_counter *ctrs[], u64 *events,
692 unsigned int *flags)
693{
694 int n = 0;
695 struct perf_counter *counter;
696
697 if (!is_software_counter(group)) {
698 if (n >= max_count)
699 return -1;
700 ctrs[n] = group;
701 flags[n] = group->hw.counter_base;
702 events[n++] = group->hw.config;
703 }
704 list_for_each_entry(counter, &group->sibling_list, list_entry) {
705 if (!is_software_counter(counter) &&
706 counter->state != PERF_COUNTER_STATE_OFF) {
707 if (n >= max_count)
708 return -1;
709 ctrs[n] = counter;
710 flags[n] = counter->hw.counter_base;
711 events[n++] = counter->hw.config;
712 }
713 }
714 return n;
715}
716
717static void counter_sched_in(struct perf_counter *counter, int cpu)
718{
719 counter->state = PERF_COUNTER_STATE_ACTIVE;
720 counter->oncpu = cpu;
721 counter->tstamp_running += counter->ctx->time - counter->tstamp_stopped;
722 if (is_software_counter(counter))
723 counter->pmu->enable(counter);
724}
725
726/*
727 * Called to enable a whole group of counters.
728 * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
729 * Assumes the caller has disabled interrupts and has
730 * frozen the PMU with hw_perf_save_disable.
731 */
732int hw_perf_group_sched_in(struct perf_counter *group_leader,
733 struct perf_cpu_context *cpuctx,
734 struct perf_counter_context *ctx, int cpu)
735{
736 struct cpu_hw_counters *cpuhw;
737 long i, n, n0;
738 struct perf_counter *sub;
739
740 cpuhw = &__get_cpu_var(cpu_hw_counters);
741 n0 = cpuhw->n_counters;
742 n = collect_events(group_leader, ppmu->n_counter - n0,
743 &cpuhw->counter[n0], &cpuhw->events[n0],
744 &cpuhw->flags[n0]);
745 if (n < 0)
746 return -EAGAIN;
747 if (check_excludes(cpuhw->counter, cpuhw->flags, n0, n))
748 return -EAGAIN;
749 i = power_check_constraints(cpuhw->events, cpuhw->flags, n + n0);
750 if (i < 0)
751 return -EAGAIN;
752 cpuhw->n_counters = n0 + n;
753 cpuhw->n_added += n;
754
755 /*
756 * OK, this group can go on; update counter states etc.,
757 * and enable any software counters
758 */
759 for (i = n0; i < n0 + n; ++i)
760 cpuhw->counter[i]->hw.config = cpuhw->events[i];
761 cpuctx->active_oncpu += n;
762 n = 1;
763 counter_sched_in(group_leader, cpu);
764 list_for_each_entry(sub, &group_leader->sibling_list, list_entry) {
765 if (sub->state != PERF_COUNTER_STATE_OFF) {
766 counter_sched_in(sub, cpu);
767 ++n;
768 }
769 }
770 ctx->nr_active += n;
771
772 return 1;
773}
774
775/*
776 * Add a counter to the PMU.
777 * If all counters are not already frozen, then we disable and
778 * re-enable the PMU in order to get hw_perf_enable to do the
779 * actual work of reconfiguring the PMU.
780 */
781static int power_pmu_enable(struct perf_counter *counter)
782{
783 struct cpu_hw_counters *cpuhw;
784 unsigned long flags;
785 int n0;
786 int ret = -EAGAIN;
787
788 local_irq_save(flags);
789 perf_disable();
790
791 /*
792 * Add the counter to the list (if there is room)
793 * and check whether the total set is still feasible.
794 */
795 cpuhw = &__get_cpu_var(cpu_hw_counters);
796 n0 = cpuhw->n_counters;
797 if (n0 >= ppmu->n_counter)
798 goto out;
799 cpuhw->counter[n0] = counter;
800 cpuhw->events[n0] = counter->hw.config;
801 cpuhw->flags[n0] = counter->hw.counter_base;
802 if (check_excludes(cpuhw->counter, cpuhw->flags, n0, 1))
803 goto out;
804 if (power_check_constraints(cpuhw->events, cpuhw->flags, n0 + 1))
805 goto out;
806
807 counter->hw.config = cpuhw->events[n0];
808 ++cpuhw->n_counters;
809 ++cpuhw->n_added;
810
811 ret = 0;
812 out:
813 perf_enable();
814 local_irq_restore(flags);
815 return ret;
816}
817
818/*
819 * Remove a counter from the PMU.
820 */
821static void power_pmu_disable(struct perf_counter *counter)
822{
823 struct cpu_hw_counters *cpuhw;
824 long i;
825 unsigned long flags;
826
827 local_irq_save(flags);
828 perf_disable();
829
830 power_pmu_read(counter);
831
832 cpuhw = &__get_cpu_var(cpu_hw_counters);
833 for (i = 0; i < cpuhw->n_counters; ++i) {
834 if (counter == cpuhw->counter[i]) {
835 while (++i < cpuhw->n_counters)
836 cpuhw->counter[i-1] = cpuhw->counter[i];
837 --cpuhw->n_counters;
838 ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr);
839 if (counter->hw.idx) {
840 write_pmc(counter->hw.idx, 0);
841 counter->hw.idx = 0;
842 }
843 perf_counter_update_userpage(counter);
844 break;
845 }
846 }
847 for (i = 0; i < cpuhw->n_limited; ++i)
848 if (counter == cpuhw->limited_counter[i])
849 break;
850 if (i < cpuhw->n_limited) {
851 while (++i < cpuhw->n_limited) {
852 cpuhw->limited_counter[i-1] = cpuhw->limited_counter[i];
853 cpuhw->limited_hwidx[i-1] = cpuhw->limited_hwidx[i];
854 }
855 --cpuhw->n_limited;
856 }
857 if (cpuhw->n_counters == 0) {
858 /* disable exceptions if no counters are running */
859 cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
860 }
861
862 perf_enable();
863 local_irq_restore(flags);
864}
865
866/*
867 * Re-enable interrupts on a counter after they were throttled
868 * because they were coming too fast.
869 */
870static void power_pmu_unthrottle(struct perf_counter *counter)
871{
872 s64 val, left;
873 unsigned long flags;
874
875 if (!counter->hw.idx || !counter->hw.sample_period)
876 return;
877 local_irq_save(flags);
878 perf_disable();
879 power_pmu_read(counter);
880 left = counter->hw.sample_period;
881 counter->hw.last_period = left;
882 val = 0;
883 if (left < 0x80000000L)
884 val = 0x80000000L - left;
885 write_pmc(counter->hw.idx, val);
886 atomic64_set(&counter->hw.prev_count, val);
887 atomic64_set(&counter->hw.period_left, left);
888 perf_counter_update_userpage(counter);
889 perf_enable();
890 local_irq_restore(flags);
891}
892
893struct pmu power_pmu = {
894 .enable = power_pmu_enable,
895 .disable = power_pmu_disable,
896 .read = power_pmu_read,
897 .unthrottle = power_pmu_unthrottle,
898};
899
900/*
901 * Return 1 if we might be able to put counter on a limited PMC,
902 * or 0 if not.
903 * A counter can only go on a limited PMC if it counts something
904 * that a limited PMC can count, doesn't require interrupts, and
905 * doesn't exclude any processor mode.
906 */
907static int can_go_on_limited_pmc(struct perf_counter *counter, u64 ev,
908 unsigned int flags)
909{
910 int n;
911 u64 alt[MAX_EVENT_ALTERNATIVES];
912
913 if (counter->attr.exclude_user
914 || counter->attr.exclude_kernel
915 || counter->attr.exclude_hv
916 || counter->attr.sample_period)
917 return 0;
918
919 if (ppmu->limited_pmc_event(ev))
920 return 1;
921
922 /*
923 * The requested event isn't on a limited PMC already;
924 * see if any alternative code goes on a limited PMC.
925 */
926 if (!ppmu->get_alternatives)
927 return 0;
928
929 flags |= PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD;
930 n = ppmu->get_alternatives(ev, flags, alt);
931
932 return n > 0;
933}
934
935/*
936 * Find an alternative event that goes on a normal PMC, if possible,
937 * and return the event code, or 0 if there is no such alternative.
938 * (Note: event code 0 is "don't count" on all machines.)
939 */
940static u64 normal_pmc_alternative(u64 ev, unsigned long flags)
941{
942 u64 alt[MAX_EVENT_ALTERNATIVES];
943 int n;
944
945 flags &= ~(PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD);
946 n = ppmu->get_alternatives(ev, flags, alt);
947 if (!n)
948 return 0;
949 return alt[0];
950}
951
952/* Number of perf_counters counting hardware events */
953static atomic_t num_counters;
954/* Used to avoid races in calling reserve/release_pmc_hardware */
955static DEFINE_MUTEX(pmc_reserve_mutex);
956
957/*
958 * Release the PMU if this is the last perf_counter.
959 */
960static void hw_perf_counter_destroy(struct perf_counter *counter)
961{
962 if (!atomic_add_unless(&num_counters, -1, 1)) {
963 mutex_lock(&pmc_reserve_mutex);
964 if (atomic_dec_return(&num_counters) == 0)
965 release_pmc_hardware();
966 mutex_unlock(&pmc_reserve_mutex);
967 }
968}
969
970/*
971 * Translate a generic cache event config to a raw event code.
972 */
973static int hw_perf_cache_event(u64 config, u64 *eventp)
974{
975 unsigned long type, op, result;
976 int ev;
977
978 if (!ppmu->cache_events)
979 return -EINVAL;
980
981 /* unpack config */
982 type = config & 0xff;
983 op = (config >> 8) & 0xff;
984 result = (config >> 16) & 0xff;
985
986 if (type >= PERF_COUNT_HW_CACHE_MAX ||
987 op >= PERF_COUNT_HW_CACHE_OP_MAX ||
988 result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
989 return -EINVAL;
990
991 ev = (*ppmu->cache_events)[type][op][result];
992 if (ev == 0)
993 return -EOPNOTSUPP;
994 if (ev == -1)
995 return -EINVAL;
996 *eventp = ev;
997 return 0;
998}
999
1000const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
1001{
1002 u64 ev;
1003 unsigned long flags;
1004 struct perf_counter *ctrs[MAX_HWCOUNTERS];
1005 u64 events[MAX_HWCOUNTERS];
1006 unsigned int cflags[MAX_HWCOUNTERS];
1007 int n;
1008 int err;
1009
1010 if (!ppmu)
1011 return ERR_PTR(-ENXIO);
1012 switch (counter->attr.type) {
1013 case PERF_TYPE_HARDWARE:
1014 ev = counter->attr.config;
1015 if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
1016 return ERR_PTR(-EOPNOTSUPP);
1017 ev = ppmu->generic_events[ev];
1018 break;
1019 case PERF_TYPE_HW_CACHE:
1020 err = hw_perf_cache_event(counter->attr.config, &ev);
1021 if (err)
1022 return ERR_PTR(err);
1023 break;
1024 case PERF_TYPE_RAW:
1025 ev = counter->attr.config;
1026 break;
1027 default:
1028 return ERR_PTR(-EINVAL);
1029 }
1030 counter->hw.config_base = ev;
1031 counter->hw.idx = 0;
1032
1033 /*
1034 * If we are not running on a hypervisor, force the
1035 * exclude_hv bit to 0 so that we don't care what
1036 * the user set it to.
1037 */
1038 if (!firmware_has_feature(FW_FEATURE_LPAR))
1039 counter->attr.exclude_hv = 0;
1040
1041 /*
1042 * If this is a per-task counter, then we can use
1043 * PM_RUN_* events interchangeably with their non RUN_*
1044 * equivalents, e.g. PM_RUN_CYC instead of PM_CYC.
1045 * XXX we should check if the task is an idle task.
1046 */
1047 flags = 0;
1048 if (counter->ctx->task)
1049 flags |= PPMU_ONLY_COUNT_RUN;
1050
1051 /*
1052 * If this machine has limited counters, check whether this
1053 * event could go on a limited counter.
1054 */
1055 if (ppmu->flags & PPMU_LIMITED_PMC5_6) {
1056 if (can_go_on_limited_pmc(counter, ev, flags)) {
1057 flags |= PPMU_LIMITED_PMC_OK;
1058 } else if (ppmu->limited_pmc_event(ev)) {
1059 /*
1060 * The requested event is on a limited PMC,
1061 * but we can't use a limited PMC; see if any
1062 * alternative goes on a normal PMC.
1063 */
1064 ev = normal_pmc_alternative(ev, flags);
1065 if (!ev)
1066 return ERR_PTR(-EINVAL);
1067 }
1068 }
1069
1070 /*
1071 * If this is in a group, check if it can go on with all the
1072 * other hardware counters in the group. We assume the counter
1073 * hasn't been linked into its leader's sibling list at this point.
1074 */
1075 n = 0;
1076 if (counter->group_leader != counter) {
1077 n = collect_events(counter->group_leader, ppmu->n_counter - 1,
1078 ctrs, events, cflags);
1079 if (n < 0)
1080 return ERR_PTR(-EINVAL);
1081 }
1082 events[n] = ev;
1083 ctrs[n] = counter;
1084 cflags[n] = flags;
1085 if (check_excludes(ctrs, cflags, n, 1))
1086 return ERR_PTR(-EINVAL);
1087 if (power_check_constraints(events, cflags, n + 1))
1088 return ERR_PTR(-EINVAL);
1089
1090 counter->hw.config = events[n];
1091 counter->hw.counter_base = cflags[n];
1092 counter->hw.last_period = counter->hw.sample_period;
1093 atomic64_set(&counter->hw.period_left, counter->hw.last_period);
1094
1095 /*
1096 * See if we need to reserve the PMU.
1097 * If no counters are currently in use, then we have to take a
1098 * mutex to ensure that we don't race with another task doing
1099 * reserve_pmc_hardware or release_pmc_hardware.
1100 */
1101 err = 0;
1102 if (!atomic_inc_not_zero(&num_counters)) {
1103 mutex_lock(&pmc_reserve_mutex);
1104 if (atomic_read(&num_counters) == 0 &&
1105 reserve_pmc_hardware(perf_counter_interrupt))
1106 err = -EBUSY;
1107 else
1108 atomic_inc(&num_counters);
1109 mutex_unlock(&pmc_reserve_mutex);
1110 }
1111 counter->destroy = hw_perf_counter_destroy;
1112
1113 if (err)
1114 return ERR_PTR(err);
1115 return &power_pmu;
1116}
1117
1118/*
1119 * A counter has overflowed; update its count and record
1120 * things if requested. Note that interrupts are hard-disabled
1121 * here so there is no possibility of being interrupted.
1122 */
1123static void record_and_restart(struct perf_counter *counter, unsigned long val,
1124 struct pt_regs *regs, int nmi)
1125{
1126 u64 period = counter->hw.sample_period;
1127 s64 prev, delta, left;
1128 int record = 0;
1129
1130 /* we don't have to worry about interrupts here */
1131 prev = atomic64_read(&counter->hw.prev_count);
1132 delta = (val - prev) & 0xfffffffful;
1133 atomic64_add(delta, &counter->count);
1134
1135 /*
1136 * See if the total period for this counter has expired,
1137 * and update for the next period.
1138 */
1139 val = 0;
1140 left = atomic64_read(&counter->hw.period_left) - delta;
1141 if (period) {
1142 if (left <= 0) {
1143 left += period;
1144 if (left <= 0)
1145 left = period;
1146 record = 1;
1147 }
1148 if (left < 0x80000000LL)
1149 val = 0x80000000LL - left;
1150 }
1151
1152 /*
1153 * Finally record data if requested.
1154 */
1155 if (record) {
1156 struct perf_sample_data data = {
1157 .regs = regs,
1158 .addr = 0,
1159 .period = counter->hw.last_period,
1160 };
1161
1162 if (counter->attr.sample_type & PERF_SAMPLE_ADDR)
1163 perf_get_data_addr(regs, &data.addr);
1164
1165 if (perf_counter_overflow(counter, nmi, &data)) {
1166 /*
1167 * Interrupts are coming too fast - throttle them
1168 * by setting the counter to 0, so it will be
1169 * at least 2^30 cycles until the next interrupt
1170 * (assuming each counter counts at most 2 counts
1171 * per cycle).
1172 */
1173 val = 0;
1174 left = ~0ULL >> 1;
1175 }
1176 }
1177
1178 write_pmc(counter->hw.idx, val);
1179 atomic64_set(&counter->hw.prev_count, val);
1180 atomic64_set(&counter->hw.period_left, left);
1181 perf_counter_update_userpage(counter);
1182}
1183
1184/*
1185 * Called from generic code to get the misc flags (i.e. processor mode)
1186 * for an event.
1187 */
1188unsigned long perf_misc_flags(struct pt_regs *regs)
1189{
1190 u32 flags = perf_get_misc_flags(regs);
1191
1192 if (flags)
1193 return flags;
1194 return user_mode(regs) ? PERF_EVENT_MISC_USER :
1195 PERF_EVENT_MISC_KERNEL;
1196}
1197
1198/*
1199 * Called from generic code to get the instruction pointer
1200 * for an event.
1201 */
1202unsigned long perf_instruction_pointer(struct pt_regs *regs)
1203{
1204 unsigned long ip;
1205
1206 if (TRAP(regs) != 0xf00)
1207 return regs->nip; /* not a PMU interrupt */
1208
1209 ip = mfspr(SPRN_SIAR) + perf_ip_adjust(regs);
1210 return ip;
1211}
1212
1213/*
1214 * Performance monitor interrupt stuff
1215 */
1216static void perf_counter_interrupt(struct pt_regs *regs)
1217{
1218 int i;
1219 struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters);
1220 struct perf_counter *counter;
1221 unsigned long val;
1222 int found = 0;
1223 int nmi;
1224
1225 if (cpuhw->n_limited)
1226 freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5),
1227 mfspr(SPRN_PMC6));
1228
1229 perf_read_regs(regs);
1230
1231 nmi = perf_intr_is_nmi(regs);
1232 if (nmi)
1233 nmi_enter();
1234 else
1235 irq_enter();
1236
1237 for (i = 0; i < cpuhw->n_counters; ++i) {
1238 counter = cpuhw->counter[i];
1239 if (!counter->hw.idx || is_limited_pmc(counter->hw.idx))
1240 continue;
1241 val = read_pmc(counter->hw.idx);
1242 if ((int)val < 0) {
1243 /* counter has overflowed */
1244 found = 1;
1245 record_and_restart(counter, val, regs, nmi);
1246 }
1247 }
1248
1249 /*
1250 * In case we didn't find and reset the counter that caused
1251 * the interrupt, scan all counters and reset any that are
1252 * negative, to avoid getting continual interrupts.
1253 * Any that we processed in the previous loop will not be negative.
1254 */
1255 if (!found) {
1256 for (i = 0; i < ppmu->n_counter; ++i) {
1257 if (is_limited_pmc(i + 1))
1258 continue;
1259 val = read_pmc(i + 1);
1260 if ((int)val < 0)
1261 write_pmc(i + 1, 0);
1262 }
1263 }
1264
1265 /*
1266 * Reset MMCR0 to its normal value. This will set PMXE and
1267 * clear FC (freeze counters) and PMAO (perf mon alert occurred)
1268 * and thus allow interrupts to occur again.
1269 * XXX might want to use MSR.PM to keep the counters frozen until
1270 * we get back out of this interrupt.
1271 */
1272 write_mmcr0(cpuhw, cpuhw->mmcr[0]);
1273
1274 if (nmi)
1275 nmi_exit();
1276 else
1277 irq_exit();
1278}
1279
1280void hw_perf_counter_setup(int cpu)
1281{
1282 struct cpu_hw_counters *cpuhw = &per_cpu(cpu_hw_counters, cpu);
1283
1284 memset(cpuhw, 0, sizeof(*cpuhw));
1285 cpuhw->mmcr[0] = MMCR0_FC;
1286}
1287
1288int register_power_pmu(struct power_pmu *pmu)
1289{
1290 if (ppmu)
1291 return -EBUSY; /* something's already registered */
1292
1293 ppmu = pmu;
1294 pr_info("%s performance monitor hardware support registered\n",
1295 pmu->name);
1296
1297#ifdef MSR_HV
1298 /*
1299 * Use FCHV to ignore kernel events if MSR.HV is set.
1300 */
1301 if (mfmsr() & MSR_HV)
1302 freeze_counters_kernel = MMCR0_FCHV;
1303#endif /* CONFIG_PPC64 */
1304
1305 return 0;
1306}
diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c
new file mode 100644
index 000000000000..db90b0c5c27b
--- /dev/null
+++ b/arch/powerpc/kernel/power4-pmu.c
@@ -0,0 +1,615 @@
1/*
2 * Performance counter support for POWER4 (GP) and POWER4+ (GQ) processors.
3 *
4 * Copyright 2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/kernel.h>
12#include <linux/perf_counter.h>
13#include <linux/string.h>
14#include <asm/reg.h>
15#include <asm/cputable.h>
16
17/*
18 * Bits in event code for POWER4
19 */
20#define PM_PMC_SH 12 /* PMC number (1-based) for direct events */
21#define PM_PMC_MSK 0xf
22#define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */
23#define PM_UNIT_MSK 0xf
24#define PM_LOWER_SH 6
25#define PM_LOWER_MSK 1
26#define PM_LOWER_MSKS 0x40
27#define PM_BYTE_SH 4 /* Byte number of event bus to use */
28#define PM_BYTE_MSK 3
29#define PM_PMCSEL_MSK 7
30
31/*
32 * Unit code values
33 */
34#define PM_FPU 1
35#define PM_ISU1 2
36#define PM_IFU 3
37#define PM_IDU0 4
38#define PM_ISU1_ALT 6
39#define PM_ISU2 7
40#define PM_IFU_ALT 8
41#define PM_LSU0 9
42#define PM_LSU1 0xc
43#define PM_GPS 0xf
44
45/*
46 * Bits in MMCR0 for POWER4
47 */
48#define MMCR0_PMC1SEL_SH 8
49#define MMCR0_PMC2SEL_SH 1
50#define MMCR_PMCSEL_MSK 0x1f
51
52/*
53 * Bits in MMCR1 for POWER4
54 */
55#define MMCR1_TTM0SEL_SH 62
56#define MMCR1_TTC0SEL_SH 61
57#define MMCR1_TTM1SEL_SH 59
58#define MMCR1_TTC1SEL_SH 58
59#define MMCR1_TTM2SEL_SH 56
60#define MMCR1_TTC2SEL_SH 55
61#define MMCR1_TTM3SEL_SH 53
62#define MMCR1_TTC3SEL_SH 52
63#define MMCR1_TTMSEL_MSK 3
64#define MMCR1_TD_CP_DBG0SEL_SH 50
65#define MMCR1_TD_CP_DBG1SEL_SH 48
66#define MMCR1_TD_CP_DBG2SEL_SH 46
67#define MMCR1_TD_CP_DBG3SEL_SH 44
68#define MMCR1_DEBUG0SEL_SH 43
69#define MMCR1_DEBUG1SEL_SH 42
70#define MMCR1_DEBUG2SEL_SH 41
71#define MMCR1_DEBUG3SEL_SH 40
72#define MMCR1_PMC1_ADDER_SEL_SH 39
73#define MMCR1_PMC2_ADDER_SEL_SH 38
74#define MMCR1_PMC6_ADDER_SEL_SH 37
75#define MMCR1_PMC5_ADDER_SEL_SH 36
76#define MMCR1_PMC8_ADDER_SEL_SH 35
77#define MMCR1_PMC7_ADDER_SEL_SH 34
78#define MMCR1_PMC3_ADDER_SEL_SH 33
79#define MMCR1_PMC4_ADDER_SEL_SH 32
80#define MMCR1_PMC3SEL_SH 27
81#define MMCR1_PMC4SEL_SH 22
82#define MMCR1_PMC5SEL_SH 17
83#define MMCR1_PMC6SEL_SH 12
84#define MMCR1_PMC7SEL_SH 7
85#define MMCR1_PMC8SEL_SH 2 /* note bit 0 is in MMCRA for GP */
86
87static short mmcr1_adder_bits[8] = {
88 MMCR1_PMC1_ADDER_SEL_SH,
89 MMCR1_PMC2_ADDER_SEL_SH,
90 MMCR1_PMC3_ADDER_SEL_SH,
91 MMCR1_PMC4_ADDER_SEL_SH,
92 MMCR1_PMC5_ADDER_SEL_SH,
93 MMCR1_PMC6_ADDER_SEL_SH,
94 MMCR1_PMC7_ADDER_SEL_SH,
95 MMCR1_PMC8_ADDER_SEL_SH
96};
97
98/*
99 * Bits in MMCRA
100 */
101#define MMCRA_PMC8SEL0_SH 17 /* PMC8SEL bit 0 for GP */
102
103/*
104 * Layout of constraint bits:
105 * 6666555555555544444444443333333333222222222211111111110000000000
106 * 3210987654321098765432109876543210987654321098765432109876543210
107 * |[ >[ >[ >|||[ >[ >< >< >< >< ><><><><><><><><>
108 * | UC1 UC2 UC3 ||| PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8
109 * \SMPL ||\TTC3SEL
110 * |\TTC_IFU_SEL
111 * \TTM2SEL0
112 *
113 * SMPL - SAMPLE_ENABLE constraint
114 * 56: SAMPLE_ENABLE value 0x0100_0000_0000_0000
115 *
116 * UC1 - unit constraint 1: can't have all three of FPU/ISU1/IDU0|ISU2
117 * 55: UC1 error 0x0080_0000_0000_0000
118 * 54: FPU events needed 0x0040_0000_0000_0000
119 * 53: ISU1 events needed 0x0020_0000_0000_0000
120 * 52: IDU0|ISU2 events needed 0x0010_0000_0000_0000
121 *
122 * UC2 - unit constraint 2: can't have all three of FPU/IFU/LSU0
123 * 51: UC2 error 0x0008_0000_0000_0000
124 * 50: FPU events needed 0x0004_0000_0000_0000
125 * 49: IFU events needed 0x0002_0000_0000_0000
126 * 48: LSU0 events needed 0x0001_0000_0000_0000
127 *
128 * UC3 - unit constraint 3: can't have all four of LSU0/IFU/IDU0|ISU2/ISU1
129 * 47: UC3 error 0x8000_0000_0000
130 * 46: LSU0 events needed 0x4000_0000_0000
131 * 45: IFU events needed 0x2000_0000_0000
132 * 44: IDU0|ISU2 events needed 0x1000_0000_0000
133 * 43: ISU1 events needed 0x0800_0000_0000
134 *
135 * TTM2SEL0
136 * 42: 0 = IDU0 events needed
137 * 1 = ISU2 events needed 0x0400_0000_0000
138 *
139 * TTC_IFU_SEL
140 * 41: 0 = IFU.U events needed
141 * 1 = IFU.L events needed 0x0200_0000_0000
142 *
143 * TTC3SEL
144 * 40: 0 = LSU1.U events needed
145 * 1 = LSU1.L events needed 0x0100_0000_0000
146 *
147 * PS1
148 * 39: PS1 error 0x0080_0000_0000
149 * 36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000
150 *
151 * PS2
152 * 35: PS2 error 0x0008_0000_0000
153 * 32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000
154 *
155 * B0
156 * 28-31: Byte 0 event source 0xf000_0000
157 * 1 = FPU
158 * 2 = ISU1
159 * 3 = IFU
160 * 4 = IDU0
161 * 7 = ISU2
162 * 9 = LSU0
163 * c = LSU1
164 * f = GPS
165 *
166 * B1, B2, B3
167 * 24-27, 20-23, 16-19: Byte 1, 2, 3 event sources
168 *
169 * P8
170 * 15: P8 error 0x8000
171 * 14-15: Count of events needing PMC8
172 *
173 * P1..P7
174 * 0-13: Count of events needing PMC1..PMC7
175 *
176 * Note: this doesn't allow events using IFU.U to be combined with events
177 * using IFU.L, though that is feasible (using TTM0 and TTM2). However
178 * there are no listed events for IFU.L (they are debug events not
179 * verified for performance monitoring) so this shouldn't cause a
180 * problem.
181 */
182
183static struct unitinfo {
184 unsigned long value, mask;
185 int unit;
186 int lowerbit;
187} p4_unitinfo[16] = {
188 [PM_FPU] = { 0x44000000000000ul, 0x88000000000000ul, PM_FPU, 0 },
189 [PM_ISU1] = { 0x20080000000000ul, 0x88000000000000ul, PM_ISU1, 0 },
190 [PM_ISU1_ALT] =
191 { 0x20080000000000ul, 0x88000000000000ul, PM_ISU1, 0 },
192 [PM_IFU] = { 0x02200000000000ul, 0x08820000000000ul, PM_IFU, 41 },
193 [PM_IFU_ALT] =
194 { 0x02200000000000ul, 0x08820000000000ul, PM_IFU, 41 },
195 [PM_IDU0] = { 0x10100000000000ul, 0x80840000000000ul, PM_IDU0, 1 },
196 [PM_ISU2] = { 0x10140000000000ul, 0x80840000000000ul, PM_ISU2, 0 },
197 [PM_LSU0] = { 0x01400000000000ul, 0x08800000000000ul, PM_LSU0, 0 },
198 [PM_LSU1] = { 0x00000000000000ul, 0x00010000000000ul, PM_LSU1, 40 },
199 [PM_GPS] = { 0x00000000000000ul, 0x00000000000000ul, PM_GPS, 0 }
200};
201
202static unsigned char direct_marked_event[8] = {
203 (1<<2) | (1<<3), /* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */
204 (1<<3) | (1<<5), /* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */
205 (1<<3), /* PMC3: PM_MRK_ST_CMPL_INT */
206 (1<<4) | (1<<5), /* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */
207 (1<<4) | (1<<5), /* PMC5: PM_MRK_GRP_TIMEO */
208 (1<<3) | (1<<4) | (1<<5),
209 /* PMC6: PM_MRK_ST_GPS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */
210 (1<<4) | (1<<5), /* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */
211 (1<<4), /* PMC8: PM_MRK_LSU_FIN */
212};
213
214/*
215 * Returns 1 if event counts things relating to marked instructions
216 * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
217 */
218static int p4_marked_instr_event(u64 event)
219{
220 int pmc, psel, unit, byte, bit;
221 unsigned int mask;
222
223 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
224 psel = event & PM_PMCSEL_MSK;
225 if (pmc) {
226 if (direct_marked_event[pmc - 1] & (1 << psel))
227 return 1;
228 if (psel == 0) /* add events */
229 bit = (pmc <= 4)? pmc - 1: 8 - pmc;
230 else if (psel == 6) /* decode events */
231 bit = 4;
232 else
233 return 0;
234 } else
235 bit = psel;
236
237 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
238 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
239 mask = 0;
240 switch (unit) {
241 case PM_LSU1:
242 if (event & PM_LOWER_MSKS)
243 mask = 1 << 28; /* byte 7 bit 4 */
244 else
245 mask = 6 << 24; /* byte 3 bits 1 and 2 */
246 break;
247 case PM_LSU0:
248 /* byte 3, bit 3; byte 2 bits 0,2,3,4,5; byte 1 */
249 mask = 0x083dff00;
250 }
251 return (mask >> (byte * 8 + bit)) & 1;
252}
253
254static int p4_get_constraint(u64 event, unsigned long *maskp,
255 unsigned long *valp)
256{
257 int pmc, byte, unit, lower, sh;
258 unsigned long mask = 0, value = 0;
259 int grp = -1;
260
261 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
262 if (pmc) {
263 if (pmc > 8)
264 return -1;
265 sh = (pmc - 1) * 2;
266 mask |= 2 << sh;
267 value |= 1 << sh;
268 grp = ((pmc - 1) >> 1) & 1;
269 }
270 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
271 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
272 if (unit) {
273 lower = (event >> PM_LOWER_SH) & PM_LOWER_MSK;
274
275 /*
276 * Bus events on bytes 0 and 2 can be counted
277 * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8.
278 */
279 if (!pmc)
280 grp = byte & 1;
281
282 if (!p4_unitinfo[unit].unit)
283 return -1;
284 mask |= p4_unitinfo[unit].mask;
285 value |= p4_unitinfo[unit].value;
286 sh = p4_unitinfo[unit].lowerbit;
287 if (sh > 1)
288 value |= (unsigned long)lower << sh;
289 else if (lower != sh)
290 return -1;
291 unit = p4_unitinfo[unit].unit;
292
293 /* Set byte lane select field */
294 mask |= 0xfULL << (28 - 4 * byte);
295 value |= (unsigned long)unit << (28 - 4 * byte);
296 }
297 if (grp == 0) {
298 /* increment PMC1/2/5/6 field */
299 mask |= 0x8000000000ull;
300 value |= 0x1000000000ull;
301 } else {
302 /* increment PMC3/4/7/8 field */
303 mask |= 0x800000000ull;
304 value |= 0x100000000ull;
305 }
306
307 /* Marked instruction events need sample_enable set */
308 if (p4_marked_instr_event(event)) {
309 mask |= 1ull << 56;
310 value |= 1ull << 56;
311 }
312
313 /* PMCSEL=6 decode events on byte 2 need sample_enable clear */
314 if (pmc && (event & PM_PMCSEL_MSK) == 6 && byte == 2)
315 mask |= 1ull << 56;
316
317 *maskp = mask;
318 *valp = value;
319 return 0;
320}
321
322static unsigned int ppc_inst_cmpl[] = {
323 0x1001, 0x4001, 0x6001, 0x7001, 0x8001
324};
325
326static int p4_get_alternatives(u64 event, unsigned int flags, u64 alt[])
327{
328 int i, j, na;
329
330 alt[0] = event;
331 na = 1;
332
333 /* 2 possibilities for PM_GRP_DISP_REJECT */
334 if (event == 0x8003 || event == 0x0224) {
335 alt[1] = event ^ (0x8003 ^ 0x0224);
336 return 2;
337 }
338
339 /* 2 possibilities for PM_ST_MISS_L1 */
340 if (event == 0x0c13 || event == 0x0c23) {
341 alt[1] = event ^ (0x0c13 ^ 0x0c23);
342 return 2;
343 }
344
345 /* several possibilities for PM_INST_CMPL */
346 for (i = 0; i < ARRAY_SIZE(ppc_inst_cmpl); ++i) {
347 if (event == ppc_inst_cmpl[i]) {
348 for (j = 0; j < ARRAY_SIZE(ppc_inst_cmpl); ++j)
349 if (j != i)
350 alt[na++] = ppc_inst_cmpl[j];
351 break;
352 }
353 }
354
355 return na;
356}
357
358static int p4_compute_mmcr(u64 event[], int n_ev,
359 unsigned int hwc[], unsigned long mmcr[])
360{
361 unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0;
362 unsigned int pmc, unit, byte, psel, lower;
363 unsigned int ttm, grp;
364 unsigned int pmc_inuse = 0;
365 unsigned int pmc_grp_use[2];
366 unsigned char busbyte[4];
367 unsigned char unituse[16];
368 unsigned int unitlower = 0;
369 int i;
370
371 if (n_ev > 8)
372 return -1;
373
374 /* First pass to count resource use */
375 pmc_grp_use[0] = pmc_grp_use[1] = 0;
376 memset(busbyte, 0, sizeof(busbyte));
377 memset(unituse, 0, sizeof(unituse));
378 for (i = 0; i < n_ev; ++i) {
379 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
380 if (pmc) {
381 if (pmc_inuse & (1 << (pmc - 1)))
382 return -1;
383 pmc_inuse |= 1 << (pmc - 1);
384 /* count 1/2/5/6 vs 3/4/7/8 use */
385 ++pmc_grp_use[((pmc - 1) >> 1) & 1];
386 }
387 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
388 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
389 lower = (event[i] >> PM_LOWER_SH) & PM_LOWER_MSK;
390 if (unit) {
391 if (!pmc)
392 ++pmc_grp_use[byte & 1];
393 if (unit == 6 || unit == 8)
394 /* map alt ISU1/IFU codes: 6->2, 8->3 */
395 unit = (unit >> 1) - 1;
396 if (busbyte[byte] && busbyte[byte] != unit)
397 return -1;
398 busbyte[byte] = unit;
399 lower <<= unit;
400 if (unituse[unit] && lower != (unitlower & lower))
401 return -1;
402 unituse[unit] = 1;
403 unitlower |= lower;
404 }
405 }
406 if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4)
407 return -1;
408
409 /*
410 * Assign resources and set multiplexer selects.
411 *
412 * Units 1,2,3 are on TTM0, 4,6,7 on TTM1, 8,10 on TTM2.
413 * Each TTMx can only select one unit, but since
414 * units 2 and 6 are both ISU1, and 3 and 8 are both IFU,
415 * we have some choices.
416 */
417 if (unituse[2] & (unituse[1] | (unituse[3] & unituse[9]))) {
418 unituse[6] = 1; /* Move 2 to 6 */
419 unituse[2] = 0;
420 }
421 if (unituse[3] & (unituse[1] | unituse[2])) {
422 unituse[8] = 1; /* Move 3 to 8 */
423 unituse[3] = 0;
424 unitlower = (unitlower & ~8) | ((unitlower & 8) << 5);
425 }
426 /* Check only one unit per TTMx */
427 if (unituse[1] + unituse[2] + unituse[3] > 1 ||
428 unituse[4] + unituse[6] + unituse[7] > 1 ||
429 unituse[8] + unituse[9] > 1 ||
430 (unituse[5] | unituse[10] | unituse[11] |
431 unituse[13] | unituse[14]))
432 return -1;
433
434 /* Set TTMxSEL fields. Note, units 1-3 => TTM0SEL codes 0-2 */
435 mmcr1 |= (unsigned long)(unituse[3] * 2 + unituse[2])
436 << MMCR1_TTM0SEL_SH;
437 mmcr1 |= (unsigned long)(unituse[7] * 3 + unituse[6] * 2)
438 << MMCR1_TTM1SEL_SH;
439 mmcr1 |= (unsigned long)unituse[9] << MMCR1_TTM2SEL_SH;
440
441 /* Set TTCxSEL fields. */
442 if (unitlower & 0xe)
443 mmcr1 |= 1ull << MMCR1_TTC0SEL_SH;
444 if (unitlower & 0xf0)
445 mmcr1 |= 1ull << MMCR1_TTC1SEL_SH;
446 if (unitlower & 0xf00)
447 mmcr1 |= 1ull << MMCR1_TTC2SEL_SH;
448 if (unitlower & 0x7000)
449 mmcr1 |= 1ull << MMCR1_TTC3SEL_SH;
450
451 /* Set byte lane select fields. */
452 for (byte = 0; byte < 4; ++byte) {
453 unit = busbyte[byte];
454 if (!unit)
455 continue;
456 if (unit == 0xf) {
457 /* special case for GPS */
458 mmcr1 |= 1ull << (MMCR1_DEBUG0SEL_SH - byte);
459 } else {
460 if (!unituse[unit])
461 ttm = unit - 1; /* 2->1, 3->2 */
462 else
463 ttm = unit >> 2;
464 mmcr1 |= (unsigned long)ttm
465 << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
466 }
467 }
468
469 /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
470 for (i = 0; i < n_ev; ++i) {
471 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
472 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
473 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
474 psel = event[i] & PM_PMCSEL_MSK;
475 if (!pmc) {
476 /* Bus event or 00xxx direct event (off or cycles) */
477 if (unit)
478 psel |= 0x10 | ((byte & 2) << 2);
479 for (pmc = 0; pmc < 8; ++pmc) {
480 if (pmc_inuse & (1 << pmc))
481 continue;
482 grp = (pmc >> 1) & 1;
483 if (unit) {
484 if (grp == (byte & 1))
485 break;
486 } else if (pmc_grp_use[grp] < 4) {
487 ++pmc_grp_use[grp];
488 break;
489 }
490 }
491 pmc_inuse |= 1 << pmc;
492 } else {
493 /* Direct event */
494 --pmc;
495 if (psel == 0 && (byte & 2))
496 /* add events on higher-numbered bus */
497 mmcr1 |= 1ull << mmcr1_adder_bits[pmc];
498 else if (psel == 6 && byte == 3)
499 /* seem to need to set sample_enable here */
500 mmcra |= MMCRA_SAMPLE_ENABLE;
501 psel |= 8;
502 }
503 if (pmc <= 1)
504 mmcr0 |= psel << (MMCR0_PMC1SEL_SH - 7 * pmc);
505 else
506 mmcr1 |= psel << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2));
507 if (pmc == 7) /* PMC8 */
508 mmcra |= (psel & 1) << MMCRA_PMC8SEL0_SH;
509 hwc[i] = pmc;
510 if (p4_marked_instr_event(event[i]))
511 mmcra |= MMCRA_SAMPLE_ENABLE;
512 }
513
514 if (pmc_inuse & 1)
515 mmcr0 |= MMCR0_PMC1CE;
516 if (pmc_inuse & 0xfe)
517 mmcr0 |= MMCR0_PMCjCE;
518
519 mmcra |= 0x2000; /* mark only one IOP per PPC instruction */
520
521 /* Return MMCRx values */
522 mmcr[0] = mmcr0;
523 mmcr[1] = mmcr1;
524 mmcr[2] = mmcra;
525 return 0;
526}
527
528static void p4_disable_pmc(unsigned int pmc, unsigned long mmcr[])
529{
530 /*
531 * Setting the PMCxSEL field to 0 disables PMC x.
532 * (Note that pmc is 0-based here, not 1-based.)
533 */
534 if (pmc <= 1) {
535 mmcr[0] &= ~(0x1fUL << (MMCR0_PMC1SEL_SH - 7 * pmc));
536 } else {
537 mmcr[1] &= ~(0x1fUL << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)));
538 if (pmc == 7)
539 mmcr[2] &= ~(1UL << MMCRA_PMC8SEL0_SH);
540 }
541}
542
543static int p4_generic_events[] = {
544 [PERF_COUNT_HW_CPU_CYCLES] = 7,
545 [PERF_COUNT_HW_INSTRUCTIONS] = 0x1001,
546 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x8c10, /* PM_LD_REF_L1 */
547 [PERF_COUNT_HW_CACHE_MISSES] = 0x3c10, /* PM_LD_MISS_L1 */
548 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x330, /* PM_BR_ISSUED */
549 [PERF_COUNT_HW_BRANCH_MISSES] = 0x331, /* PM_BR_MPRED_CR */
550};
551
552#define C(x) PERF_COUNT_HW_CACHE_##x
553
554/*
555 * Table of generalized cache-related events.
556 * 0 means not supported, -1 means nonsensical, other values
557 * are event codes.
558 */
559static int power4_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
560 [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
561 [C(OP_READ)] = { 0x8c10, 0x3c10 },
562 [C(OP_WRITE)] = { 0x7c10, 0xc13 },
563 [C(OP_PREFETCH)] = { 0xc35, 0 },
564 },
565 [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
566 [C(OP_READ)] = { 0, 0 },
567 [C(OP_WRITE)] = { -1, -1 },
568 [C(OP_PREFETCH)] = { 0, 0 },
569 },
570 [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
571 [C(OP_READ)] = { 0, 0 },
572 [C(OP_WRITE)] = { 0, 0 },
573 [C(OP_PREFETCH)] = { 0xc34, 0 },
574 },
575 [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
576 [C(OP_READ)] = { 0, 0x904 },
577 [C(OP_WRITE)] = { -1, -1 },
578 [C(OP_PREFETCH)] = { -1, -1 },
579 },
580 [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
581 [C(OP_READ)] = { 0, 0x900 },
582 [C(OP_WRITE)] = { -1, -1 },
583 [C(OP_PREFETCH)] = { -1, -1 },
584 },
585 [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
586 [C(OP_READ)] = { 0x330, 0x331 },
587 [C(OP_WRITE)] = { -1, -1 },
588 [C(OP_PREFETCH)] = { -1, -1 },
589 },
590};
591
592static struct power_pmu power4_pmu = {
593 .name = "POWER4/4+",
594 .n_counter = 8,
595 .max_alternatives = 5,
596 .add_fields = 0x0000001100005555ul,
597 .test_adder = 0x0011083300000000ul,
598 .compute_mmcr = p4_compute_mmcr,
599 .get_constraint = p4_get_constraint,
600 .get_alternatives = p4_get_alternatives,
601 .disable_pmc = p4_disable_pmc,
602 .n_generic = ARRAY_SIZE(p4_generic_events),
603 .generic_events = p4_generic_events,
604 .cache_events = &power4_cache_events,
605};
606
607static int init_power4_pmu(void)
608{
609 if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power4"))
610 return -ENODEV;
611
612 return register_power_pmu(&power4_pmu);
613}
614
615arch_initcall(init_power4_pmu);
diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c
new file mode 100644
index 000000000000..f4adca8e98a4
--- /dev/null
+++ b/arch/powerpc/kernel/power5+-pmu.c
@@ -0,0 +1,688 @@
1/*
2 * Performance counter support for POWER5+/++ (not POWER5) processors.
3 *
4 * Copyright 2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/kernel.h>
12#include <linux/perf_counter.h>
13#include <linux/string.h>
14#include <asm/reg.h>
15#include <asm/cputable.h>
16
17/*
18 * Bits in event code for POWER5+ (POWER5 GS) and POWER5++ (POWER5 GS DD3)
19 */
20#define PM_PMC_SH 20 /* PMC number (1-based) for direct events */
21#define PM_PMC_MSK 0xf
22#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH)
23#define PM_UNIT_SH 16 /* TTMMUX number and setting - unit select */
24#define PM_UNIT_MSK 0xf
25#define PM_BYTE_SH 12 /* Byte number of event bus to use */
26#define PM_BYTE_MSK 7
27#define PM_GRS_SH 8 /* Storage subsystem mux select */
28#define PM_GRS_MSK 7
29#define PM_BUSEVENT_MSK 0x80 /* Set if event uses event bus */
30#define PM_PMCSEL_MSK 0x7f
31
32/* Values in PM_UNIT field */
33#define PM_FPU 0
34#define PM_ISU0 1
35#define PM_IFU 2
36#define PM_ISU1 3
37#define PM_IDU 4
38#define PM_ISU0_ALT 6
39#define PM_GRS 7
40#define PM_LSU0 8
41#define PM_LSU1 0xc
42#define PM_LASTUNIT 0xc
43
44/*
45 * Bits in MMCR1 for POWER5+
46 */
47#define MMCR1_TTM0SEL_SH 62
48#define MMCR1_TTM1SEL_SH 60
49#define MMCR1_TTM2SEL_SH 58
50#define MMCR1_TTM3SEL_SH 56
51#define MMCR1_TTMSEL_MSK 3
52#define MMCR1_TD_CP_DBG0SEL_SH 54
53#define MMCR1_TD_CP_DBG1SEL_SH 52
54#define MMCR1_TD_CP_DBG2SEL_SH 50
55#define MMCR1_TD_CP_DBG3SEL_SH 48
56#define MMCR1_GRS_L2SEL_SH 46
57#define MMCR1_GRS_L2SEL_MSK 3
58#define MMCR1_GRS_L3SEL_SH 44
59#define MMCR1_GRS_L3SEL_MSK 3
60#define MMCR1_GRS_MCSEL_SH 41
61#define MMCR1_GRS_MCSEL_MSK 7
62#define MMCR1_GRS_FABSEL_SH 39
63#define MMCR1_GRS_FABSEL_MSK 3
64#define MMCR1_PMC1_ADDER_SEL_SH 35
65#define MMCR1_PMC2_ADDER_SEL_SH 34
66#define MMCR1_PMC3_ADDER_SEL_SH 33
67#define MMCR1_PMC4_ADDER_SEL_SH 32
68#define MMCR1_PMC1SEL_SH 25
69#define MMCR1_PMC2SEL_SH 17
70#define MMCR1_PMC3SEL_SH 9
71#define MMCR1_PMC4SEL_SH 1
72#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
73#define MMCR1_PMCSEL_MSK 0x7f
74
75/*
76 * Bits in MMCRA
77 */
78
79/*
80 * Layout of constraint bits:
81 * 6666555555555544444444443333333333222222222211111111110000000000
82 * 3210987654321098765432109876543210987654321098765432109876543210
83 * [ ><><>< ><> <><>[ > < >< >< >< ><><><><><><>
84 * NC G0G1G2 G3 T0T1 UC B0 B1 B2 B3 P6P5P4P3P2P1
85 *
86 * NC - number of counters
87 * 51: NC error 0x0008_0000_0000_0000
88 * 48-50: number of events needing PMC1-4 0x0007_0000_0000_0000
89 *
90 * G0..G3 - GRS mux constraints
91 * 46-47: GRS_L2SEL value
92 * 44-45: GRS_L3SEL value
93 * 41-44: GRS_MCSEL value
94 * 39-40: GRS_FABSEL value
95 * Note that these match up with their bit positions in MMCR1
96 *
97 * T0 - TTM0 constraint
98 * 36-37: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0x30_0000_0000
99 *
100 * T1 - TTM1 constraint
101 * 34-35: TTM1SEL value (0=IDU, 3=GRS) 0x0c_0000_0000
102 *
103 * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS
104 * 33: UC3 error 0x02_0000_0000
105 * 32: FPU|IFU|ISU1 events needed 0x01_0000_0000
106 * 31: ISU0 events needed 0x01_8000_0000
107 * 30: IDU|GRS events needed 0x00_4000_0000
108 *
109 * B0
110 * 24-27: Byte 0 event source 0x0f00_0000
111 * Encoding as for the event code
112 *
113 * B1, B2, B3
114 * 20-23, 16-19, 12-15: Byte 1, 2, 3 event sources
115 *
116 * P6
117 * 11: P6 error 0x800
118 * 10-11: Count of events needing PMC6
119 *
120 * P1..P5
121 * 0-9: Count of events needing PMC1..PMC5
122 */
123
124static const int grsel_shift[8] = {
125 MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH,
126 MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH,
127 MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH
128};
129
130/* Masks and values for using events from the various units */
131static unsigned long unit_cons[PM_LASTUNIT+1][2] = {
132 [PM_FPU] = { 0x3200000000ul, 0x0100000000ul },
133 [PM_ISU0] = { 0x0200000000ul, 0x0080000000ul },
134 [PM_ISU1] = { 0x3200000000ul, 0x3100000000ul },
135 [PM_IFU] = { 0x3200000000ul, 0x2100000000ul },
136 [PM_IDU] = { 0x0e00000000ul, 0x0040000000ul },
137 [PM_GRS] = { 0x0e00000000ul, 0x0c40000000ul },
138};
139
140static int power5p_get_constraint(u64 event, unsigned long *maskp,
141 unsigned long *valp)
142{
143 int pmc, byte, unit, sh;
144 int bit, fmask;
145 unsigned long mask = 0, value = 0;
146
147 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
148 if (pmc) {
149 if (pmc > 6)
150 return -1;
151 sh = (pmc - 1) * 2;
152 mask |= 2 << sh;
153 value |= 1 << sh;
154 if (pmc >= 5 && !(event == 0x500009 || event == 0x600005))
155 return -1;
156 }
157 if (event & PM_BUSEVENT_MSK) {
158 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
159 if (unit > PM_LASTUNIT)
160 return -1;
161 if (unit == PM_ISU0_ALT)
162 unit = PM_ISU0;
163 mask |= unit_cons[unit][0];
164 value |= unit_cons[unit][1];
165 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
166 if (byte >= 4) {
167 if (unit != PM_LSU1)
168 return -1;
169 /* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */
170 ++unit;
171 byte &= 3;
172 }
173 if (unit == PM_GRS) {
174 bit = event & 7;
175 fmask = (bit == 6)? 7: 3;
176 sh = grsel_shift[bit];
177 mask |= (unsigned long)fmask << sh;
178 value |= (unsigned long)((event >> PM_GRS_SH) & fmask)
179 << sh;
180 }
181 /* Set byte lane select field */
182 mask |= 0xfUL << (24 - 4 * byte);
183 value |= (unsigned long)unit << (24 - 4 * byte);
184 }
185 if (pmc < 5) {
186 /* need a counter from PMC1-4 set */
187 mask |= 0x8000000000000ul;
188 value |= 0x1000000000000ul;
189 }
190 *maskp = mask;
191 *valp = value;
192 return 0;
193}
194
195static int power5p_limited_pmc_event(u64 event)
196{
197 int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
198
199 return pmc == 5 || pmc == 6;
200}
201
202#define MAX_ALT 3 /* at most 3 alternatives for any event */
203
204static const unsigned int event_alternatives[][MAX_ALT] = {
205 { 0x100c0, 0x40001f }, /* PM_GCT_FULL_CYC */
206 { 0x120e4, 0x400002 }, /* PM_GRP_DISP_REJECT */
207 { 0x230e2, 0x323087 }, /* PM_BR_PRED_CR */
208 { 0x230e3, 0x223087, 0x3230a0 }, /* PM_BR_PRED_TA */
209 { 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */
210 { 0x800c4, 0xc20e0 }, /* PM_DTLB_MISS */
211 { 0xc50c6, 0xc60e0 }, /* PM_MRK_DTLB_MISS */
212 { 0x100005, 0x600005 }, /* PM_RUN_CYC */
213 { 0x100009, 0x200009 }, /* PM_INST_CMPL */
214 { 0x200015, 0x300015 }, /* PM_LSU_LMQ_SRQ_EMPTY_CYC */
215 { 0x300009, 0x400009 }, /* PM_INST_DISP */
216};
217
218/*
219 * Scan the alternatives table for a match and return the
220 * index into the alternatives table if found, else -1.
221 */
222static int find_alternative(unsigned int event)
223{
224 int i, j;
225
226 for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
227 if (event < event_alternatives[i][0])
228 break;
229 for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
230 if (event == event_alternatives[i][j])
231 return i;
232 }
233 return -1;
234}
235
236static const unsigned char bytedecode_alternatives[4][4] = {
237 /* PMC 1 */ { 0x21, 0x23, 0x25, 0x27 },
238 /* PMC 2 */ { 0x07, 0x17, 0x0e, 0x1e },
239 /* PMC 3 */ { 0x20, 0x22, 0x24, 0x26 },
240 /* PMC 4 */ { 0x07, 0x17, 0x0e, 0x1e }
241};
242
243/*
244 * Some direct events for decodes of event bus byte 3 have alternative
245 * PMCSEL values on other counters. This returns the alternative
246 * event code for those that do, or -1 otherwise. This also handles
247 * alternative PCMSEL values for add events.
248 */
249static s64 find_alternative_bdecode(u64 event)
250{
251 int pmc, altpmc, pp, j;
252
253 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
254 if (pmc == 0 || pmc > 4)
255 return -1;
256 altpmc = 5 - pmc; /* 1 <-> 4, 2 <-> 3 */
257 pp = event & PM_PMCSEL_MSK;
258 for (j = 0; j < 4; ++j) {
259 if (bytedecode_alternatives[pmc - 1][j] == pp) {
260 return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) |
261 (altpmc << PM_PMC_SH) |
262 bytedecode_alternatives[altpmc - 1][j];
263 }
264 }
265
266 /* new decode alternatives for power5+ */
267 if (pmc == 1 && (pp == 0x0d || pp == 0x0e))
268 return event + (2 << PM_PMC_SH) + (0x2e - 0x0d);
269 if (pmc == 3 && (pp == 0x2e || pp == 0x2f))
270 return event - (2 << PM_PMC_SH) - (0x2e - 0x0d);
271
272 /* alternative add event encodings */
273 if (pp == 0x10 || pp == 0x28)
274 return ((event ^ (0x10 ^ 0x28)) & ~PM_PMC_MSKS) |
275 (altpmc << PM_PMC_SH);
276
277 return -1;
278}
279
280static int power5p_get_alternatives(u64 event, unsigned int flags, u64 alt[])
281{
282 int i, j, nalt = 1;
283 int nlim;
284 s64 ae;
285
286 alt[0] = event;
287 nalt = 1;
288 nlim = power5p_limited_pmc_event(event);
289 i = find_alternative(event);
290 if (i >= 0) {
291 for (j = 0; j < MAX_ALT; ++j) {
292 ae = event_alternatives[i][j];
293 if (ae && ae != event)
294 alt[nalt++] = ae;
295 nlim += power5p_limited_pmc_event(ae);
296 }
297 } else {
298 ae = find_alternative_bdecode(event);
299 if (ae > 0)
300 alt[nalt++] = ae;
301 }
302
303 if (flags & PPMU_ONLY_COUNT_RUN) {
304 /*
305 * We're only counting in RUN state,
306 * so PM_CYC is equivalent to PM_RUN_CYC
307 * and PM_INST_CMPL === PM_RUN_INST_CMPL.
308 * This doesn't include alternatives that don't provide
309 * any extra flexibility in assigning PMCs (e.g.
310 * 0x100005 for PM_RUN_CYC vs. 0xf for PM_CYC).
311 * Note that even with these additional alternatives
312 * we never end up with more than 3 alternatives for any event.
313 */
314 j = nalt;
315 for (i = 0; i < nalt; ++i) {
316 switch (alt[i]) {
317 case 0xf: /* PM_CYC */
318 alt[j++] = 0x600005; /* PM_RUN_CYC */
319 ++nlim;
320 break;
321 case 0x600005: /* PM_RUN_CYC */
322 alt[j++] = 0xf;
323 break;
324 case 0x100009: /* PM_INST_CMPL */
325 alt[j++] = 0x500009; /* PM_RUN_INST_CMPL */
326 ++nlim;
327 break;
328 case 0x500009: /* PM_RUN_INST_CMPL */
329 alt[j++] = 0x100009; /* PM_INST_CMPL */
330 alt[j++] = 0x200009;
331 break;
332 }
333 }
334 nalt = j;
335 }
336
337 if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) {
338 /* remove the limited PMC events */
339 j = 0;
340 for (i = 0; i < nalt; ++i) {
341 if (!power5p_limited_pmc_event(alt[i])) {
342 alt[j] = alt[i];
343 ++j;
344 }
345 }
346 nalt = j;
347 } else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) {
348 /* remove all but the limited PMC events */
349 j = 0;
350 for (i = 0; i < nalt; ++i) {
351 if (power5p_limited_pmc_event(alt[i])) {
352 alt[j] = alt[i];
353 ++j;
354 }
355 }
356 nalt = j;
357 }
358
359 return nalt;
360}
361
362/*
363 * Map of which direct events on which PMCs are marked instruction events.
364 * Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event.
365 * Bit 0 is set if it is marked for all PMCs.
366 * The 0x80 bit indicates a byte decode PMCSEL value.
367 */
368static unsigned char direct_event_is_marked[0x28] = {
369 0, /* 00 */
370 0x1f, /* 01 PM_IOPS_CMPL */
371 0x2, /* 02 PM_MRK_GRP_DISP */
372 0xe, /* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */
373 0, /* 04 */
374 0x1c, /* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */
375 0x80, /* 06 */
376 0x80, /* 07 */
377 0, 0, 0,/* 08 - 0a */
378 0x18, /* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */
379 0, /* 0c */
380 0x80, /* 0d */
381 0x80, /* 0e */
382 0, /* 0f */
383 0, /* 10 */
384 0x14, /* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */
385 0, /* 12 */
386 0x10, /* 13 PM_MRK_GRP_CMPL */
387 0x1f, /* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */
388 0x2, /* 15 PM_MRK_GRP_ISSUED */
389 0x80, /* 16 */
390 0x80, /* 17 */
391 0, 0, 0, 0, 0,
392 0x80, /* 1d */
393 0x80, /* 1e */
394 0, /* 1f */
395 0x80, /* 20 */
396 0x80, /* 21 */
397 0x80, /* 22 */
398 0x80, /* 23 */
399 0x80, /* 24 */
400 0x80, /* 25 */
401 0x80, /* 26 */
402 0x80, /* 27 */
403};
404
405/*
406 * Returns 1 if event counts things relating to marked instructions
407 * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
408 */
409static int power5p_marked_instr_event(u64 event)
410{
411 int pmc, psel;
412 int bit, byte, unit;
413 u32 mask;
414
415 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
416 psel = event & PM_PMCSEL_MSK;
417 if (pmc >= 5)
418 return 0;
419
420 bit = -1;
421 if (psel < sizeof(direct_event_is_marked)) {
422 if (direct_event_is_marked[psel] & (1 << pmc))
423 return 1;
424 if (direct_event_is_marked[psel] & 0x80)
425 bit = 4;
426 else if (psel == 0x08)
427 bit = pmc - 1;
428 else if (psel == 0x10)
429 bit = 4 - pmc;
430 else if (psel == 0x1b && (pmc == 1 || pmc == 3))
431 bit = 4;
432 } else if ((psel & 0x48) == 0x40) {
433 bit = psel & 7;
434 } else if (psel == 0x28) {
435 bit = pmc - 1;
436 } else if (pmc == 3 && (psel == 0x2e || psel == 0x2f)) {
437 bit = 4;
438 }
439
440 if (!(event & PM_BUSEVENT_MSK) || bit == -1)
441 return 0;
442
443 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
444 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
445 if (unit == PM_LSU0) {
446 /* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */
447 mask = 0x5dff00;
448 } else if (unit == PM_LSU1 && byte >= 4) {
449 byte -= 4;
450 /* byte 5 bits 6-7, byte 6 bits 0,4, byte 7 bits 0-4,6 */
451 mask = 0x5f11c000;
452 } else
453 return 0;
454
455 return (mask >> (byte * 8 + bit)) & 1;
456}
457
458static int power5p_compute_mmcr(u64 event[], int n_ev,
459 unsigned int hwc[], unsigned long mmcr[])
460{
461 unsigned long mmcr1 = 0;
462 unsigned long mmcra = 0;
463 unsigned int pmc, unit, byte, psel;
464 unsigned int ttm;
465 int i, isbus, bit, grsel;
466 unsigned int pmc_inuse = 0;
467 unsigned char busbyte[4];
468 unsigned char unituse[16];
469 int ttmuse;
470
471 if (n_ev > 6)
472 return -1;
473
474 /* First pass to count resource use */
475 memset(busbyte, 0, sizeof(busbyte));
476 memset(unituse, 0, sizeof(unituse));
477 for (i = 0; i < n_ev; ++i) {
478 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
479 if (pmc) {
480 if (pmc > 6)
481 return -1;
482 if (pmc_inuse & (1 << (pmc - 1)))
483 return -1;
484 pmc_inuse |= 1 << (pmc - 1);
485 }
486 if (event[i] & PM_BUSEVENT_MSK) {
487 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
488 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
489 if (unit > PM_LASTUNIT)
490 return -1;
491 if (unit == PM_ISU0_ALT)
492 unit = PM_ISU0;
493 if (byte >= 4) {
494 if (unit != PM_LSU1)
495 return -1;
496 ++unit;
497 byte &= 3;
498 }
499 if (busbyte[byte] && busbyte[byte] != unit)
500 return -1;
501 busbyte[byte] = unit;
502 unituse[unit] = 1;
503 }
504 }
505
506 /*
507 * Assign resources and set multiplexer selects.
508 *
509 * PM_ISU0 can go either on TTM0 or TTM1, but that's the only
510 * choice we have to deal with.
511 */
512 if (unituse[PM_ISU0] &
513 (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) {
514 unituse[PM_ISU0_ALT] = 1; /* move ISU to TTM1 */
515 unituse[PM_ISU0] = 0;
516 }
517 /* Set TTM[01]SEL fields. */
518 ttmuse = 0;
519 for (i = PM_FPU; i <= PM_ISU1; ++i) {
520 if (!unituse[i])
521 continue;
522 if (ttmuse++)
523 return -1;
524 mmcr1 |= (unsigned long)i << MMCR1_TTM0SEL_SH;
525 }
526 ttmuse = 0;
527 for (; i <= PM_GRS; ++i) {
528 if (!unituse[i])
529 continue;
530 if (ttmuse++)
531 return -1;
532 mmcr1 |= (unsigned long)(i & 3) << MMCR1_TTM1SEL_SH;
533 }
534 if (ttmuse > 1)
535 return -1;
536
537 /* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */
538 for (byte = 0; byte < 4; ++byte) {
539 unit = busbyte[byte];
540 if (!unit)
541 continue;
542 if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) {
543 /* get ISU0 through TTM1 rather than TTM0 */
544 unit = PM_ISU0_ALT;
545 } else if (unit == PM_LSU1 + 1) {
546 /* select lower word of LSU1 for this byte */
547 mmcr1 |= 1ul << (MMCR1_TTM3SEL_SH + 3 - byte);
548 }
549 ttm = unit >> 2;
550 mmcr1 |= (unsigned long)ttm
551 << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
552 }
553
554 /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
555 for (i = 0; i < n_ev; ++i) {
556 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
557 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
558 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
559 psel = event[i] & PM_PMCSEL_MSK;
560 isbus = event[i] & PM_BUSEVENT_MSK;
561 if (!pmc) {
562 /* Bus event or any-PMC direct event */
563 for (pmc = 0; pmc < 4; ++pmc) {
564 if (!(pmc_inuse & (1 << pmc)))
565 break;
566 }
567 if (pmc >= 4)
568 return -1;
569 pmc_inuse |= 1 << pmc;
570 } else if (pmc <= 4) {
571 /* Direct event */
572 --pmc;
573 if (isbus && (byte & 2) &&
574 (psel == 8 || psel == 0x10 || psel == 0x28))
575 /* add events on higher-numbered bus */
576 mmcr1 |= 1ul << (MMCR1_PMC1_ADDER_SEL_SH - pmc);
577 } else {
578 /* Instructions or run cycles on PMC5/6 */
579 --pmc;
580 }
581 if (isbus && unit == PM_GRS) {
582 bit = psel & 7;
583 grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK;
584 mmcr1 |= (unsigned long)grsel << grsel_shift[bit];
585 }
586 if (power5p_marked_instr_event(event[i]))
587 mmcra |= MMCRA_SAMPLE_ENABLE;
588 if ((psel & 0x58) == 0x40 && (byte & 1) != ((pmc >> 1) & 1))
589 /* select alternate byte lane */
590 psel |= 0x10;
591 if (pmc <= 3)
592 mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
593 hwc[i] = pmc;
594 }
595
596 /* Return MMCRx values */
597 mmcr[0] = 0;
598 if (pmc_inuse & 1)
599 mmcr[0] = MMCR0_PMC1CE;
600 if (pmc_inuse & 0x3e)
601 mmcr[0] |= MMCR0_PMCjCE;
602 mmcr[1] = mmcr1;
603 mmcr[2] = mmcra;
604 return 0;
605}
606
607static void power5p_disable_pmc(unsigned int pmc, unsigned long mmcr[])
608{
609 if (pmc <= 3)
610 mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
611}
612
613static int power5p_generic_events[] = {
614 [PERF_COUNT_HW_CPU_CYCLES] = 0xf,
615 [PERF_COUNT_HW_INSTRUCTIONS] = 0x100009,
616 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x1c10a8, /* LD_REF_L1 */
617 [PERF_COUNT_HW_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */
618 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */
619 [PERF_COUNT_HW_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */
620};
621
622#define C(x) PERF_COUNT_HW_CACHE_##x
623
624/*
625 * Table of generalized cache-related events.
626 * 0 means not supported, -1 means nonsensical, other values
627 * are event codes.
628 */
629static int power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
630 [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
631 [C(OP_READ)] = { 0x1c10a8, 0x3c1088 },
632 [C(OP_WRITE)] = { 0x2c10a8, 0xc10c3 },
633 [C(OP_PREFETCH)] = { 0xc70e7, -1 },
634 },
635 [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
636 [C(OP_READ)] = { 0, 0 },
637 [C(OP_WRITE)] = { -1, -1 },
638 [C(OP_PREFETCH)] = { 0, 0 },
639 },
640 [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
641 [C(OP_READ)] = { 0, 0 },
642 [C(OP_WRITE)] = { 0, 0 },
643 [C(OP_PREFETCH)] = { 0xc50c3, 0 },
644 },
645 [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
646 [C(OP_READ)] = { 0xc20e4, 0x800c4 },
647 [C(OP_WRITE)] = { -1, -1 },
648 [C(OP_PREFETCH)] = { -1, -1 },
649 },
650 [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
651 [C(OP_READ)] = { 0, 0x800c0 },
652 [C(OP_WRITE)] = { -1, -1 },
653 [C(OP_PREFETCH)] = { -1, -1 },
654 },
655 [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
656 [C(OP_READ)] = { 0x230e4, 0x230e5 },
657 [C(OP_WRITE)] = { -1, -1 },
658 [C(OP_PREFETCH)] = { -1, -1 },
659 },
660};
661
662static struct power_pmu power5p_pmu = {
663 .name = "POWER5+/++",
664 .n_counter = 6,
665 .max_alternatives = MAX_ALT,
666 .add_fields = 0x7000000000055ul,
667 .test_adder = 0x3000040000000ul,
668 .compute_mmcr = power5p_compute_mmcr,
669 .get_constraint = power5p_get_constraint,
670 .get_alternatives = power5p_get_alternatives,
671 .disable_pmc = power5p_disable_pmc,
672 .limited_pmc_event = power5p_limited_pmc_event,
673 .flags = PPMU_LIMITED_PMC5_6,
674 .n_generic = ARRAY_SIZE(power5p_generic_events),
675 .generic_events = power5p_generic_events,
676 .cache_events = &power5p_cache_events,
677};
678
679static int init_power5p_pmu(void)
680{
681 if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5+")
682 && strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5++"))
683 return -ENODEV;
684
685 return register_power_pmu(&power5p_pmu);
686}
687
688arch_initcall(init_power5p_pmu);
diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c
new file mode 100644
index 000000000000..29b2c6c0e83a
--- /dev/null
+++ b/arch/powerpc/kernel/power5-pmu.c
@@ -0,0 +1,627 @@
1/*
2 * Performance counter support for POWER5 (not POWER5++) processors.
3 *
4 * Copyright 2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/kernel.h>
12#include <linux/perf_counter.h>
13#include <linux/string.h>
14#include <asm/reg.h>
15#include <asm/cputable.h>
16
17/*
18 * Bits in event code for POWER5 (not POWER5++)
19 */
20#define PM_PMC_SH 20 /* PMC number (1-based) for direct events */
21#define PM_PMC_MSK 0xf
22#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH)
23#define PM_UNIT_SH 16 /* TTMMUX number and setting - unit select */
24#define PM_UNIT_MSK 0xf
25#define PM_BYTE_SH 12 /* Byte number of event bus to use */
26#define PM_BYTE_MSK 7
27#define PM_GRS_SH 8 /* Storage subsystem mux select */
28#define PM_GRS_MSK 7
29#define PM_BUSEVENT_MSK 0x80 /* Set if event uses event bus */
30#define PM_PMCSEL_MSK 0x7f
31
32/* Values in PM_UNIT field */
33#define PM_FPU 0
34#define PM_ISU0 1
35#define PM_IFU 2
36#define PM_ISU1 3
37#define PM_IDU 4
38#define PM_ISU0_ALT 6
39#define PM_GRS 7
40#define PM_LSU0 8
41#define PM_LSU1 0xc
42#define PM_LASTUNIT 0xc
43
44/*
45 * Bits in MMCR1 for POWER5
46 */
47#define MMCR1_TTM0SEL_SH 62
48#define MMCR1_TTM1SEL_SH 60
49#define MMCR1_TTM2SEL_SH 58
50#define MMCR1_TTM3SEL_SH 56
51#define MMCR1_TTMSEL_MSK 3
52#define MMCR1_TD_CP_DBG0SEL_SH 54
53#define MMCR1_TD_CP_DBG1SEL_SH 52
54#define MMCR1_TD_CP_DBG2SEL_SH 50
55#define MMCR1_TD_CP_DBG3SEL_SH 48
56#define MMCR1_GRS_L2SEL_SH 46
57#define MMCR1_GRS_L2SEL_MSK 3
58#define MMCR1_GRS_L3SEL_SH 44
59#define MMCR1_GRS_L3SEL_MSK 3
60#define MMCR1_GRS_MCSEL_SH 41
61#define MMCR1_GRS_MCSEL_MSK 7
62#define MMCR1_GRS_FABSEL_SH 39
63#define MMCR1_GRS_FABSEL_MSK 3
64#define MMCR1_PMC1_ADDER_SEL_SH 35
65#define MMCR1_PMC2_ADDER_SEL_SH 34
66#define MMCR1_PMC3_ADDER_SEL_SH 33
67#define MMCR1_PMC4_ADDER_SEL_SH 32
68#define MMCR1_PMC1SEL_SH 25
69#define MMCR1_PMC2SEL_SH 17
70#define MMCR1_PMC3SEL_SH 9
71#define MMCR1_PMC4SEL_SH 1
72#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
73#define MMCR1_PMCSEL_MSK 0x7f
74
75/*
76 * Bits in MMCRA
77 */
78
79/*
80 * Layout of constraint bits:
81 * 6666555555555544444444443333333333222222222211111111110000000000
82 * 3210987654321098765432109876543210987654321098765432109876543210
83 * <><>[ ><><>< ><> [ >[ >[ >< >< >< >< ><><><><><><>
84 * T0T1 NC G0G1G2 G3 UC PS1PS2 B0 B1 B2 B3 P6P5P4P3P2P1
85 *
86 * T0 - TTM0 constraint
87 * 54-55: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0xc0_0000_0000_0000
88 *
89 * T1 - TTM1 constraint
90 * 52-53: TTM1SEL value (0=IDU, 3=GRS) 0x30_0000_0000_0000
91 *
92 * NC - number of counters
93 * 51: NC error 0x0008_0000_0000_0000
94 * 48-50: number of events needing PMC1-4 0x0007_0000_0000_0000
95 *
96 * G0..G3 - GRS mux constraints
97 * 46-47: GRS_L2SEL value
98 * 44-45: GRS_L3SEL value
99 * 41-44: GRS_MCSEL value
100 * 39-40: GRS_FABSEL value
101 * Note that these match up with their bit positions in MMCR1
102 *
103 * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS
104 * 37: UC3 error 0x20_0000_0000
105 * 36: FPU|IFU|ISU1 events needed 0x10_0000_0000
106 * 35: ISU0 events needed 0x08_0000_0000
107 * 34: IDU|GRS events needed 0x04_0000_0000
108 *
109 * PS1
110 * 33: PS1 error 0x2_0000_0000
111 * 31-32: count of events needing PMC1/2 0x1_8000_0000
112 *
113 * PS2
114 * 30: PS2 error 0x4000_0000
115 * 28-29: count of events needing PMC3/4 0x3000_0000
116 *
117 * B0
118 * 24-27: Byte 0 event source 0x0f00_0000
119 * Encoding as for the event code
120 *
121 * B1, B2, B3
122 * 20-23, 16-19, 12-15: Byte 1, 2, 3 event sources
123 *
124 * P1..P6
125 * 0-11: Count of events needing PMC1..PMC6
126 */
127
128static const int grsel_shift[8] = {
129 MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH,
130 MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH,
131 MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH
132};
133
134/* Masks and values for using events from the various units */
135static unsigned long unit_cons[PM_LASTUNIT+1][2] = {
136 [PM_FPU] = { 0xc0002000000000ul, 0x00001000000000ul },
137 [PM_ISU0] = { 0x00002000000000ul, 0x00000800000000ul },
138 [PM_ISU1] = { 0xc0002000000000ul, 0xc0001000000000ul },
139 [PM_IFU] = { 0xc0002000000000ul, 0x80001000000000ul },
140 [PM_IDU] = { 0x30002000000000ul, 0x00000400000000ul },
141 [PM_GRS] = { 0x30002000000000ul, 0x30000400000000ul },
142};
143
144static int power5_get_constraint(u64 event, unsigned long *maskp,
145 unsigned long *valp)
146{
147 int pmc, byte, unit, sh;
148 int bit, fmask;
149 unsigned long mask = 0, value = 0;
150 int grp = -1;
151
152 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
153 if (pmc) {
154 if (pmc > 6)
155 return -1;
156 sh = (pmc - 1) * 2;
157 mask |= 2 << sh;
158 value |= 1 << sh;
159 if (pmc <= 4)
160 grp = (pmc - 1) >> 1;
161 else if (event != 0x500009 && event != 0x600005)
162 return -1;
163 }
164 if (event & PM_BUSEVENT_MSK) {
165 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
166 if (unit > PM_LASTUNIT)
167 return -1;
168 if (unit == PM_ISU0_ALT)
169 unit = PM_ISU0;
170 mask |= unit_cons[unit][0];
171 value |= unit_cons[unit][1];
172 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
173 if (byte >= 4) {
174 if (unit != PM_LSU1)
175 return -1;
176 /* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */
177 ++unit;
178 byte &= 3;
179 }
180 if (unit == PM_GRS) {
181 bit = event & 7;
182 fmask = (bit == 6)? 7: 3;
183 sh = grsel_shift[bit];
184 mask |= (unsigned long)fmask << sh;
185 value |= (unsigned long)((event >> PM_GRS_SH) & fmask)
186 << sh;
187 }
188 /*
189 * Bus events on bytes 0 and 2 can be counted
190 * on PMC1/2; bytes 1 and 3 on PMC3/4.
191 */
192 if (!pmc)
193 grp = byte & 1;
194 /* Set byte lane select field */
195 mask |= 0xfUL << (24 - 4 * byte);
196 value |= (unsigned long)unit << (24 - 4 * byte);
197 }
198 if (grp == 0) {
199 /* increment PMC1/2 field */
200 mask |= 0x200000000ul;
201 value |= 0x080000000ul;
202 } else if (grp == 1) {
203 /* increment PMC3/4 field */
204 mask |= 0x40000000ul;
205 value |= 0x10000000ul;
206 }
207 if (pmc < 5) {
208 /* need a counter from PMC1-4 set */
209 mask |= 0x8000000000000ul;
210 value |= 0x1000000000000ul;
211 }
212 *maskp = mask;
213 *valp = value;
214 return 0;
215}
216
217#define MAX_ALT 3 /* at most 3 alternatives for any event */
218
219static const unsigned int event_alternatives[][MAX_ALT] = {
220 { 0x120e4, 0x400002 }, /* PM_GRP_DISP_REJECT */
221 { 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */
222 { 0x100005, 0x600005 }, /* PM_RUN_CYC */
223 { 0x100009, 0x200009, 0x500009 }, /* PM_INST_CMPL */
224 { 0x300009, 0x400009 }, /* PM_INST_DISP */
225};
226
227/*
228 * Scan the alternatives table for a match and return the
229 * index into the alternatives table if found, else -1.
230 */
231static int find_alternative(u64 event)
232{
233 int i, j;
234
235 for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
236 if (event < event_alternatives[i][0])
237 break;
238 for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
239 if (event == event_alternatives[i][j])
240 return i;
241 }
242 return -1;
243}
244
245static const unsigned char bytedecode_alternatives[4][4] = {
246 /* PMC 1 */ { 0x21, 0x23, 0x25, 0x27 },
247 /* PMC 2 */ { 0x07, 0x17, 0x0e, 0x1e },
248 /* PMC 3 */ { 0x20, 0x22, 0x24, 0x26 },
249 /* PMC 4 */ { 0x07, 0x17, 0x0e, 0x1e }
250};
251
252/*
253 * Some direct events for decodes of event bus byte 3 have alternative
254 * PMCSEL values on other counters. This returns the alternative
255 * event code for those that do, or -1 otherwise.
256 */
257static s64 find_alternative_bdecode(u64 event)
258{
259 int pmc, altpmc, pp, j;
260
261 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
262 if (pmc == 0 || pmc > 4)
263 return -1;
264 altpmc = 5 - pmc; /* 1 <-> 4, 2 <-> 3 */
265 pp = event & PM_PMCSEL_MSK;
266 for (j = 0; j < 4; ++j) {
267 if (bytedecode_alternatives[pmc - 1][j] == pp) {
268 return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) |
269 (altpmc << PM_PMC_SH) |
270 bytedecode_alternatives[altpmc - 1][j];
271 }
272 }
273 return -1;
274}
275
276static int power5_get_alternatives(u64 event, unsigned int flags, u64 alt[])
277{
278 int i, j, nalt = 1;
279 s64 ae;
280
281 alt[0] = event;
282 nalt = 1;
283 i = find_alternative(event);
284 if (i >= 0) {
285 for (j = 0; j < MAX_ALT; ++j) {
286 ae = event_alternatives[i][j];
287 if (ae && ae != event)
288 alt[nalt++] = ae;
289 }
290 } else {
291 ae = find_alternative_bdecode(event);
292 if (ae > 0)
293 alt[nalt++] = ae;
294 }
295 return nalt;
296}
297
298/*
299 * Map of which direct events on which PMCs are marked instruction events.
300 * Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event.
301 * Bit 0 is set if it is marked for all PMCs.
302 * The 0x80 bit indicates a byte decode PMCSEL value.
303 */
304static unsigned char direct_event_is_marked[0x28] = {
305 0, /* 00 */
306 0x1f, /* 01 PM_IOPS_CMPL */
307 0x2, /* 02 PM_MRK_GRP_DISP */
308 0xe, /* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */
309 0, /* 04 */
310 0x1c, /* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */
311 0x80, /* 06 */
312 0x80, /* 07 */
313 0, 0, 0,/* 08 - 0a */
314 0x18, /* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */
315 0, /* 0c */
316 0x80, /* 0d */
317 0x80, /* 0e */
318 0, /* 0f */
319 0, /* 10 */
320 0x14, /* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */
321 0, /* 12 */
322 0x10, /* 13 PM_MRK_GRP_CMPL */
323 0x1f, /* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */
324 0x2, /* 15 PM_MRK_GRP_ISSUED */
325 0x80, /* 16 */
326 0x80, /* 17 */
327 0, 0, 0, 0, 0,
328 0x80, /* 1d */
329 0x80, /* 1e */
330 0, /* 1f */
331 0x80, /* 20 */
332 0x80, /* 21 */
333 0x80, /* 22 */
334 0x80, /* 23 */
335 0x80, /* 24 */
336 0x80, /* 25 */
337 0x80, /* 26 */
338 0x80, /* 27 */
339};
340
341/*
342 * Returns 1 if event counts things relating to marked instructions
343 * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
344 */
345static int power5_marked_instr_event(u64 event)
346{
347 int pmc, psel;
348 int bit, byte, unit;
349 u32 mask;
350
351 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
352 psel = event & PM_PMCSEL_MSK;
353 if (pmc >= 5)
354 return 0;
355
356 bit = -1;
357 if (psel < sizeof(direct_event_is_marked)) {
358 if (direct_event_is_marked[psel] & (1 << pmc))
359 return 1;
360 if (direct_event_is_marked[psel] & 0x80)
361 bit = 4;
362 else if (psel == 0x08)
363 bit = pmc - 1;
364 else if (psel == 0x10)
365 bit = 4 - pmc;
366 else if (psel == 0x1b && (pmc == 1 || pmc == 3))
367 bit = 4;
368 } else if ((psel & 0x58) == 0x40)
369 bit = psel & 7;
370
371 if (!(event & PM_BUSEVENT_MSK))
372 return 0;
373
374 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
375 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
376 if (unit == PM_LSU0) {
377 /* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */
378 mask = 0x5dff00;
379 } else if (unit == PM_LSU1 && byte >= 4) {
380 byte -= 4;
381 /* byte 4 bits 1,3,5,7, byte 5 bits 6-7, byte 7 bits 0-4,6 */
382 mask = 0x5f00c0aa;
383 } else
384 return 0;
385
386 return (mask >> (byte * 8 + bit)) & 1;
387}
388
389static int power5_compute_mmcr(u64 event[], int n_ev,
390 unsigned int hwc[], unsigned long mmcr[])
391{
392 unsigned long mmcr1 = 0;
393 unsigned long mmcra = 0;
394 unsigned int pmc, unit, byte, psel;
395 unsigned int ttm, grp;
396 int i, isbus, bit, grsel;
397 unsigned int pmc_inuse = 0;
398 unsigned int pmc_grp_use[2];
399 unsigned char busbyte[4];
400 unsigned char unituse[16];
401 int ttmuse;
402
403 if (n_ev > 6)
404 return -1;
405
406 /* First pass to count resource use */
407 pmc_grp_use[0] = pmc_grp_use[1] = 0;
408 memset(busbyte, 0, sizeof(busbyte));
409 memset(unituse, 0, sizeof(unituse));
410 for (i = 0; i < n_ev; ++i) {
411 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
412 if (pmc) {
413 if (pmc > 6)
414 return -1;
415 if (pmc_inuse & (1 << (pmc - 1)))
416 return -1;
417 pmc_inuse |= 1 << (pmc - 1);
418 /* count 1/2 vs 3/4 use */
419 if (pmc <= 4)
420 ++pmc_grp_use[(pmc - 1) >> 1];
421 }
422 if (event[i] & PM_BUSEVENT_MSK) {
423 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
424 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
425 if (unit > PM_LASTUNIT)
426 return -1;
427 if (unit == PM_ISU0_ALT)
428 unit = PM_ISU0;
429 if (byte >= 4) {
430 if (unit != PM_LSU1)
431 return -1;
432 ++unit;
433 byte &= 3;
434 }
435 if (!pmc)
436 ++pmc_grp_use[byte & 1];
437 if (busbyte[byte] && busbyte[byte] != unit)
438 return -1;
439 busbyte[byte] = unit;
440 unituse[unit] = 1;
441 }
442 }
443 if (pmc_grp_use[0] > 2 || pmc_grp_use[1] > 2)
444 return -1;
445
446 /*
447 * Assign resources and set multiplexer selects.
448 *
449 * PM_ISU0 can go either on TTM0 or TTM1, but that's the only
450 * choice we have to deal with.
451 */
452 if (unituse[PM_ISU0] &
453 (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) {
454 unituse[PM_ISU0_ALT] = 1; /* move ISU to TTM1 */
455 unituse[PM_ISU0] = 0;
456 }
457 /* Set TTM[01]SEL fields. */
458 ttmuse = 0;
459 for (i = PM_FPU; i <= PM_ISU1; ++i) {
460 if (!unituse[i])
461 continue;
462 if (ttmuse++)
463 return -1;
464 mmcr1 |= (unsigned long)i << MMCR1_TTM0SEL_SH;
465 }
466 ttmuse = 0;
467 for (; i <= PM_GRS; ++i) {
468 if (!unituse[i])
469 continue;
470 if (ttmuse++)
471 return -1;
472 mmcr1 |= (unsigned long)(i & 3) << MMCR1_TTM1SEL_SH;
473 }
474 if (ttmuse > 1)
475 return -1;
476
477 /* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */
478 for (byte = 0; byte < 4; ++byte) {
479 unit = busbyte[byte];
480 if (!unit)
481 continue;
482 if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) {
483 /* get ISU0 through TTM1 rather than TTM0 */
484 unit = PM_ISU0_ALT;
485 } else if (unit == PM_LSU1 + 1) {
486 /* select lower word of LSU1 for this byte */
487 mmcr1 |= 1ul << (MMCR1_TTM3SEL_SH + 3 - byte);
488 }
489 ttm = unit >> 2;
490 mmcr1 |= (unsigned long)ttm
491 << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
492 }
493
494 /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
495 for (i = 0; i < n_ev; ++i) {
496 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
497 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
498 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
499 psel = event[i] & PM_PMCSEL_MSK;
500 isbus = event[i] & PM_BUSEVENT_MSK;
501 if (!pmc) {
502 /* Bus event or any-PMC direct event */
503 for (pmc = 0; pmc < 4; ++pmc) {
504 if (pmc_inuse & (1 << pmc))
505 continue;
506 grp = (pmc >> 1) & 1;
507 if (isbus) {
508 if (grp == (byte & 1))
509 break;
510 } else if (pmc_grp_use[grp] < 2) {
511 ++pmc_grp_use[grp];
512 break;
513 }
514 }
515 pmc_inuse |= 1 << pmc;
516 } else if (pmc <= 4) {
517 /* Direct event */
518 --pmc;
519 if ((psel == 8 || psel == 0x10) && isbus && (byte & 2))
520 /* add events on higher-numbered bus */
521 mmcr1 |= 1ul << (MMCR1_PMC1_ADDER_SEL_SH - pmc);
522 } else {
523 /* Instructions or run cycles on PMC5/6 */
524 --pmc;
525 }
526 if (isbus && unit == PM_GRS) {
527 bit = psel & 7;
528 grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK;
529 mmcr1 |= (unsigned long)grsel << grsel_shift[bit];
530 }
531 if (power5_marked_instr_event(event[i]))
532 mmcra |= MMCRA_SAMPLE_ENABLE;
533 if (pmc <= 3)
534 mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
535 hwc[i] = pmc;
536 }
537
538 /* Return MMCRx values */
539 mmcr[0] = 0;
540 if (pmc_inuse & 1)
541 mmcr[0] = MMCR0_PMC1CE;
542 if (pmc_inuse & 0x3e)
543 mmcr[0] |= MMCR0_PMCjCE;
544 mmcr[1] = mmcr1;
545 mmcr[2] = mmcra;
546 return 0;
547}
548
549static void power5_disable_pmc(unsigned int pmc, unsigned long mmcr[])
550{
551 if (pmc <= 3)
552 mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
553}
554
555static int power5_generic_events[] = {
556 [PERF_COUNT_HW_CPU_CYCLES] = 0xf,
557 [PERF_COUNT_HW_INSTRUCTIONS] = 0x100009,
558 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4c1090, /* LD_REF_L1 */
559 [PERF_COUNT_HW_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */
560 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */
561 [PERF_COUNT_HW_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */
562};
563
564#define C(x) PERF_COUNT_HW_CACHE_##x
565
566/*
567 * Table of generalized cache-related events.
568 * 0 means not supported, -1 means nonsensical, other values
569 * are event codes.
570 */
571static int power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
572 [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
573 [C(OP_READ)] = { 0x4c1090, 0x3c1088 },
574 [C(OP_WRITE)] = { 0x3c1090, 0xc10c3 },
575 [C(OP_PREFETCH)] = { 0xc70e7, 0 },
576 },
577 [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
578 [C(OP_READ)] = { 0, 0 },
579 [C(OP_WRITE)] = { -1, -1 },
580 [C(OP_PREFETCH)] = { 0, 0 },
581 },
582 [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
583 [C(OP_READ)] = { 0, 0x3c309b },
584 [C(OP_WRITE)] = { 0, 0 },
585 [C(OP_PREFETCH)] = { 0xc50c3, 0 },
586 },
587 [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
588 [C(OP_READ)] = { 0x2c4090, 0x800c4 },
589 [C(OP_WRITE)] = { -1, -1 },
590 [C(OP_PREFETCH)] = { -1, -1 },
591 },
592 [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
593 [C(OP_READ)] = { 0, 0x800c0 },
594 [C(OP_WRITE)] = { -1, -1 },
595 [C(OP_PREFETCH)] = { -1, -1 },
596 },
597 [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
598 [C(OP_READ)] = { 0x230e4, 0x230e5 },
599 [C(OP_WRITE)] = { -1, -1 },
600 [C(OP_PREFETCH)] = { -1, -1 },
601 },
602};
603
604static struct power_pmu power5_pmu = {
605 .name = "POWER5",
606 .n_counter = 6,
607 .max_alternatives = MAX_ALT,
608 .add_fields = 0x7000090000555ul,
609 .test_adder = 0x3000490000000ul,
610 .compute_mmcr = power5_compute_mmcr,
611 .get_constraint = power5_get_constraint,
612 .get_alternatives = power5_get_alternatives,
613 .disable_pmc = power5_disable_pmc,
614 .n_generic = ARRAY_SIZE(power5_generic_events),
615 .generic_events = power5_generic_events,
616 .cache_events = &power5_cache_events,
617};
618
619static int init_power5_pmu(void)
620{
621 if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5"))
622 return -ENODEV;
623
624 return register_power_pmu(&power5_pmu);
625}
626
627arch_initcall(init_power5_pmu);
diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c
new file mode 100644
index 000000000000..09ae5bf5bda7
--- /dev/null
+++ b/arch/powerpc/kernel/power6-pmu.c
@@ -0,0 +1,546 @@
1/*
2 * Performance counter support for POWER6 processors.
3 *
4 * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/kernel.h>
12#include <linux/perf_counter.h>
13#include <linux/string.h>
14#include <asm/reg.h>
15#include <asm/cputable.h>
16
17/*
18 * Bits in event code for POWER6
19 */
20#define PM_PMC_SH 20 /* PMC number (1-based) for direct events */
21#define PM_PMC_MSK 0x7
22#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH)
23#define PM_UNIT_SH 16 /* Unit event comes (TTMxSEL encoding) */
24#define PM_UNIT_MSK 0xf
25#define PM_UNIT_MSKS (PM_UNIT_MSK << PM_UNIT_SH)
26#define PM_LLAV 0x8000 /* Load lookahead match value */
27#define PM_LLA 0x4000 /* Load lookahead match enable */
28#define PM_BYTE_SH 12 /* Byte of event bus to use */
29#define PM_BYTE_MSK 3
30#define PM_SUBUNIT_SH 8 /* Subunit event comes from (NEST_SEL enc.) */
31#define PM_SUBUNIT_MSK 7
32#define PM_SUBUNIT_MSKS (PM_SUBUNIT_MSK << PM_SUBUNIT_SH)
33#define PM_PMCSEL_MSK 0xff /* PMCxSEL value */
34#define PM_BUSEVENT_MSK 0xf3700
35
36/*
37 * Bits in MMCR1 for POWER6
38 */
39#define MMCR1_TTM0SEL_SH 60
40#define MMCR1_TTMSEL_SH(n) (MMCR1_TTM0SEL_SH - (n) * 4)
41#define MMCR1_TTMSEL_MSK 0xf
42#define MMCR1_TTMSEL(m, n) (((m) >> MMCR1_TTMSEL_SH(n)) & MMCR1_TTMSEL_MSK)
43#define MMCR1_NESTSEL_SH 45
44#define MMCR1_NESTSEL_MSK 0x7
45#define MMCR1_NESTSEL(m) (((m) >> MMCR1_NESTSEL_SH) & MMCR1_NESTSEL_MSK)
46#define MMCR1_PMC1_LLA (1ul << 44)
47#define MMCR1_PMC1_LLA_VALUE (1ul << 39)
48#define MMCR1_PMC1_ADDR_SEL (1ul << 35)
49#define MMCR1_PMC1SEL_SH 24
50#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
51#define MMCR1_PMCSEL_MSK 0xff
52
53/*
54 * Map of which direct events on which PMCs are marked instruction events.
55 * Indexed by PMCSEL value >> 1.
56 * Bottom 4 bits are a map of which PMCs are interesting,
57 * top 4 bits say what sort of event:
58 * 0 = direct marked event,
59 * 1 = byte decode event,
60 * 4 = add/and event (PMC1 -> bits 0 & 4),
61 * 5 = add/and event (PMC1 -> bits 1 & 5),
62 * 6 = add/and event (PMC1 -> bits 2 & 6),
63 * 7 = add/and event (PMC1 -> bits 3 & 7).
64 */
65static unsigned char direct_event_is_marked[0x60 >> 1] = {
66 0, /* 00 */
67 0, /* 02 */
68 0, /* 04 */
69 0x07, /* 06 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */
70 0x04, /* 08 PM_MRK_DFU_FIN */
71 0x06, /* 0a PM_MRK_IFU_FIN, PM_MRK_INST_FIN */
72 0, /* 0c */
73 0, /* 0e */
74 0x02, /* 10 PM_MRK_INST_DISP */
75 0x08, /* 12 PM_MRK_LSU_DERAT_MISS */
76 0, /* 14 */
77 0, /* 16 */
78 0x0c, /* 18 PM_THRESH_TIMEO, PM_MRK_INST_FIN */
79 0x0f, /* 1a PM_MRK_INST_DISP, PM_MRK_{FXU,FPU,LSU}_FIN */
80 0x01, /* 1c PM_MRK_INST_ISSUED */
81 0, /* 1e */
82 0, /* 20 */
83 0, /* 22 */
84 0, /* 24 */
85 0, /* 26 */
86 0x15, /* 28 PM_MRK_DATA_FROM_L2MISS, PM_MRK_DATA_FROM_L3MISS */
87 0, /* 2a */
88 0, /* 2c */
89 0, /* 2e */
90 0x4f, /* 30 */
91 0x7f, /* 32 */
92 0x4f, /* 34 */
93 0x5f, /* 36 */
94 0x6f, /* 38 */
95 0x4f, /* 3a */
96 0, /* 3c */
97 0x08, /* 3e PM_MRK_INST_TIMEO */
98 0x1f, /* 40 */
99 0x1f, /* 42 */
100 0x1f, /* 44 */
101 0x1f, /* 46 */
102 0x1f, /* 48 */
103 0x1f, /* 4a */
104 0x1f, /* 4c */
105 0x1f, /* 4e */
106 0, /* 50 */
107 0x05, /* 52 PM_MRK_BR_TAKEN, PM_MRK_BR_MPRED */
108 0x1c, /* 54 PM_MRK_PTEG_FROM_L3MISS, PM_MRK_PTEG_FROM_L2MISS */
109 0x02, /* 56 PM_MRK_LD_MISS_L1 */
110 0, /* 58 */
111 0, /* 5a */
112 0, /* 5c */
113 0, /* 5e */
114};
115
116/*
117 * Masks showing for each unit which bits are marked events.
118 * These masks are in LE order, i.e. 0x00000001 is byte 0, bit 0.
119 */
120static u32 marked_bus_events[16] = {
121 0x01000000, /* direct events set 1: byte 3 bit 0 */
122 0x00010000, /* direct events set 2: byte 2 bit 0 */
123 0, 0, 0, 0, /* IDU, IFU, nest: nothing */
124 0x00000088, /* VMX set 1: byte 0 bits 3, 7 */
125 0x000000c0, /* VMX set 2: byte 0 bits 4-7 */
126 0x04010000, /* LSU set 1: byte 2 bit 0, byte 3 bit 2 */
127 0xff010000u, /* LSU set 2: byte 2 bit 0, all of byte 3 */
128 0, /* LSU set 3 */
129 0x00000010, /* VMX set 3: byte 0 bit 4 */
130 0, /* BFP set 1 */
131 0x00000022, /* BFP set 2: byte 0 bits 1, 5 */
132 0, 0
133};
134
135/*
136 * Returns 1 if event counts things relating to marked instructions
137 * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
138 */
139static int power6_marked_instr_event(u64 event)
140{
141 int pmc, psel, ptype;
142 int bit, byte, unit;
143 u32 mask;
144
145 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
146 psel = (event & PM_PMCSEL_MSK) >> 1; /* drop edge/level bit */
147 if (pmc >= 5)
148 return 0;
149
150 bit = -1;
151 if (psel < sizeof(direct_event_is_marked)) {
152 ptype = direct_event_is_marked[psel];
153 if (pmc == 0 || !(ptype & (1 << (pmc - 1))))
154 return 0;
155 ptype >>= 4;
156 if (ptype == 0)
157 return 1;
158 if (ptype == 1)
159 bit = 0;
160 else
161 bit = ptype ^ (pmc - 1);
162 } else if ((psel & 0x48) == 0x40)
163 bit = psel & 7;
164
165 if (!(event & PM_BUSEVENT_MSK) || bit == -1)
166 return 0;
167
168 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
169 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
170 mask = marked_bus_events[unit];
171 return (mask >> (byte * 8 + bit)) & 1;
172}
173
174/*
175 * Assign PMC numbers and compute MMCR1 value for a set of events
176 */
177static int p6_compute_mmcr(u64 event[], int n_ev,
178 unsigned int hwc[], unsigned long mmcr[])
179{
180 unsigned long mmcr1 = 0;
181 unsigned long mmcra = 0;
182 int i;
183 unsigned int pmc, ev, b, u, s, psel;
184 unsigned int ttmset = 0;
185 unsigned int pmc_inuse = 0;
186
187 if (n_ev > 6)
188 return -1;
189 for (i = 0; i < n_ev; ++i) {
190 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
191 if (pmc) {
192 if (pmc_inuse & (1 << (pmc - 1)))
193 return -1; /* collision! */
194 pmc_inuse |= 1 << (pmc - 1);
195 }
196 }
197 for (i = 0; i < n_ev; ++i) {
198 ev = event[i];
199 pmc = (ev >> PM_PMC_SH) & PM_PMC_MSK;
200 if (pmc) {
201 --pmc;
202 } else {
203 /* can go on any PMC; find a free one */
204 for (pmc = 0; pmc < 4; ++pmc)
205 if (!(pmc_inuse & (1 << pmc)))
206 break;
207 if (pmc >= 4)
208 return -1;
209 pmc_inuse |= 1 << pmc;
210 }
211 hwc[i] = pmc;
212 psel = ev & PM_PMCSEL_MSK;
213 if (ev & PM_BUSEVENT_MSK) {
214 /* this event uses the event bus */
215 b = (ev >> PM_BYTE_SH) & PM_BYTE_MSK;
216 u = (ev >> PM_UNIT_SH) & PM_UNIT_MSK;
217 /* check for conflict on this byte of event bus */
218 if ((ttmset & (1 << b)) && MMCR1_TTMSEL(mmcr1, b) != u)
219 return -1;
220 mmcr1 |= (unsigned long)u << MMCR1_TTMSEL_SH(b);
221 ttmset |= 1 << b;
222 if (u == 5) {
223 /* Nest events have a further mux */
224 s = (ev >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK;
225 if ((ttmset & 0x10) &&
226 MMCR1_NESTSEL(mmcr1) != s)
227 return -1;
228 ttmset |= 0x10;
229 mmcr1 |= (unsigned long)s << MMCR1_NESTSEL_SH;
230 }
231 if (0x30 <= psel && psel <= 0x3d) {
232 /* these need the PMCx_ADDR_SEL bits */
233 if (b >= 2)
234 mmcr1 |= MMCR1_PMC1_ADDR_SEL >> pmc;
235 }
236 /* bus select values are different for PMC3/4 */
237 if (pmc >= 2 && (psel & 0x90) == 0x80)
238 psel ^= 0x20;
239 }
240 if (ev & PM_LLA) {
241 mmcr1 |= MMCR1_PMC1_LLA >> pmc;
242 if (ev & PM_LLAV)
243 mmcr1 |= MMCR1_PMC1_LLA_VALUE >> pmc;
244 }
245 if (power6_marked_instr_event(event[i]))
246 mmcra |= MMCRA_SAMPLE_ENABLE;
247 if (pmc < 4)
248 mmcr1 |= (unsigned long)psel << MMCR1_PMCSEL_SH(pmc);
249 }
250 mmcr[0] = 0;
251 if (pmc_inuse & 1)
252 mmcr[0] = MMCR0_PMC1CE;
253 if (pmc_inuse & 0xe)
254 mmcr[0] |= MMCR0_PMCjCE;
255 mmcr[1] = mmcr1;
256 mmcr[2] = mmcra;
257 return 0;
258}
259
260/*
261 * Layout of constraint bits:
262 *
263 * 0-1 add field: number of uses of PMC1 (max 1)
264 * 2-3, 4-5, 6-7, 8-9, 10-11: ditto for PMC2, 3, 4, 5, 6
265 * 12-15 add field: number of uses of PMC1-4 (max 4)
266 * 16-19 select field: unit on byte 0 of event bus
267 * 20-23, 24-27, 28-31 ditto for bytes 1, 2, 3
268 * 32-34 select field: nest (subunit) event selector
269 */
270static int p6_get_constraint(u64 event, unsigned long *maskp,
271 unsigned long *valp)
272{
273 int pmc, byte, sh, subunit;
274 unsigned long mask = 0, value = 0;
275
276 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
277 if (pmc) {
278 if (pmc > 4 && !(event == 0x500009 || event == 0x600005))
279 return -1;
280 sh = (pmc - 1) * 2;
281 mask |= 2 << sh;
282 value |= 1 << sh;
283 }
284 if (event & PM_BUSEVENT_MSK) {
285 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
286 sh = byte * 4 + (16 - PM_UNIT_SH);
287 mask |= PM_UNIT_MSKS << sh;
288 value |= (unsigned long)(event & PM_UNIT_MSKS) << sh;
289 if ((event & PM_UNIT_MSKS) == (5 << PM_UNIT_SH)) {
290 subunit = (event >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK;
291 mask |= (unsigned long)PM_SUBUNIT_MSK << 32;
292 value |= (unsigned long)subunit << 32;
293 }
294 }
295 if (pmc <= 4) {
296 mask |= 0x8000; /* add field for count of PMC1-4 uses */
297 value |= 0x1000;
298 }
299 *maskp = mask;
300 *valp = value;
301 return 0;
302}
303
304static int p6_limited_pmc_event(u64 event)
305{
306 int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
307
308 return pmc == 5 || pmc == 6;
309}
310
311#define MAX_ALT 4 /* at most 4 alternatives for any event */
312
313static const unsigned int event_alternatives[][MAX_ALT] = {
314 { 0x0130e8, 0x2000f6, 0x3000fc }, /* PM_PTEG_RELOAD_VALID */
315 { 0x080080, 0x10000d, 0x30000c, 0x4000f0 }, /* PM_LD_MISS_L1 */
316 { 0x080088, 0x200054, 0x3000f0 }, /* PM_ST_MISS_L1 */
317 { 0x10000a, 0x2000f4, 0x600005 }, /* PM_RUN_CYC */
318 { 0x10000b, 0x2000f5 }, /* PM_RUN_COUNT */
319 { 0x10000e, 0x400010 }, /* PM_PURR */
320 { 0x100010, 0x4000f8 }, /* PM_FLUSH */
321 { 0x10001a, 0x200010 }, /* PM_MRK_INST_DISP */
322 { 0x100026, 0x3000f8 }, /* PM_TB_BIT_TRANS */
323 { 0x100054, 0x2000f0 }, /* PM_ST_FIN */
324 { 0x100056, 0x2000fc }, /* PM_L1_ICACHE_MISS */
325 { 0x1000f0, 0x40000a }, /* PM_INST_IMC_MATCH_CMPL */
326 { 0x1000f8, 0x200008 }, /* PM_GCT_EMPTY_CYC */
327 { 0x1000fc, 0x400006 }, /* PM_LSU_DERAT_MISS_CYC */
328 { 0x20000e, 0x400007 }, /* PM_LSU_DERAT_MISS */
329 { 0x200012, 0x300012 }, /* PM_INST_DISP */
330 { 0x2000f2, 0x3000f2 }, /* PM_INST_DISP */
331 { 0x2000f8, 0x300010 }, /* PM_EXT_INT */
332 { 0x2000fe, 0x300056 }, /* PM_DATA_FROM_L2MISS */
333 { 0x2d0030, 0x30001a }, /* PM_MRK_FPU_FIN */
334 { 0x30000a, 0x400018 }, /* PM_MRK_INST_FIN */
335 { 0x3000f6, 0x40000e }, /* PM_L1_DCACHE_RELOAD_VALID */
336 { 0x3000fe, 0x400056 }, /* PM_DATA_FROM_L3MISS */
337};
338
339/*
340 * This could be made more efficient with a binary search on
341 * a presorted list, if necessary
342 */
343static int find_alternatives_list(u64 event)
344{
345 int i, j;
346 unsigned int alt;
347
348 for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
349 if (event < event_alternatives[i][0])
350 return -1;
351 for (j = 0; j < MAX_ALT; ++j) {
352 alt = event_alternatives[i][j];
353 if (!alt || event < alt)
354 break;
355 if (event == alt)
356 return i;
357 }
358 }
359 return -1;
360}
361
362static int p6_get_alternatives(u64 event, unsigned int flags, u64 alt[])
363{
364 int i, j, nlim;
365 unsigned int psel, pmc;
366 unsigned int nalt = 1;
367 u64 aevent;
368
369 alt[0] = event;
370 nlim = p6_limited_pmc_event(event);
371
372 /* check the alternatives table */
373 i = find_alternatives_list(event);
374 if (i >= 0) {
375 /* copy out alternatives from list */
376 for (j = 0; j < MAX_ALT; ++j) {
377 aevent = event_alternatives[i][j];
378 if (!aevent)
379 break;
380 if (aevent != event)
381 alt[nalt++] = aevent;
382 nlim += p6_limited_pmc_event(aevent);
383 }
384
385 } else {
386 /* Check for alternative ways of computing sum events */
387 /* PMCSEL 0x32 counter N == PMCSEL 0x34 counter 5-N */
388 psel = event & (PM_PMCSEL_MSK & ~1); /* ignore edge bit */
389 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
390 if (pmc && (psel == 0x32 || psel == 0x34))
391 alt[nalt++] = ((event ^ 0x6) & ~PM_PMC_MSKS) |
392 ((5 - pmc) << PM_PMC_SH);
393
394 /* PMCSEL 0x38 counter N == PMCSEL 0x3a counter N+/-2 */
395 if (pmc && (psel == 0x38 || psel == 0x3a))
396 alt[nalt++] = ((event ^ 0x2) & ~PM_PMC_MSKS) |
397 ((pmc > 2? pmc - 2: pmc + 2) << PM_PMC_SH);
398 }
399
400 if (flags & PPMU_ONLY_COUNT_RUN) {
401 /*
402 * We're only counting in RUN state,
403 * so PM_CYC is equivalent to PM_RUN_CYC,
404 * PM_INST_CMPL === PM_RUN_INST_CMPL, PM_PURR === PM_RUN_PURR.
405 * This doesn't include alternatives that don't provide
406 * any extra flexibility in assigning PMCs (e.g.
407 * 0x10000a for PM_RUN_CYC vs. 0x1e for PM_CYC).
408 * Note that even with these additional alternatives
409 * we never end up with more than 4 alternatives for any event.
410 */
411 j = nalt;
412 for (i = 0; i < nalt; ++i) {
413 switch (alt[i]) {
414 case 0x1e: /* PM_CYC */
415 alt[j++] = 0x600005; /* PM_RUN_CYC */
416 ++nlim;
417 break;
418 case 0x10000a: /* PM_RUN_CYC */
419 alt[j++] = 0x1e; /* PM_CYC */
420 break;
421 case 2: /* PM_INST_CMPL */
422 alt[j++] = 0x500009; /* PM_RUN_INST_CMPL */
423 ++nlim;
424 break;
425 case 0x500009: /* PM_RUN_INST_CMPL */
426 alt[j++] = 2; /* PM_INST_CMPL */
427 break;
428 case 0x10000e: /* PM_PURR */
429 alt[j++] = 0x4000f4; /* PM_RUN_PURR */
430 break;
431 case 0x4000f4: /* PM_RUN_PURR */
432 alt[j++] = 0x10000e; /* PM_PURR */
433 break;
434 }
435 }
436 nalt = j;
437 }
438
439 if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) {
440 /* remove the limited PMC events */
441 j = 0;
442 for (i = 0; i < nalt; ++i) {
443 if (!p6_limited_pmc_event(alt[i])) {
444 alt[j] = alt[i];
445 ++j;
446 }
447 }
448 nalt = j;
449 } else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) {
450 /* remove all but the limited PMC events */
451 j = 0;
452 for (i = 0; i < nalt; ++i) {
453 if (p6_limited_pmc_event(alt[i])) {
454 alt[j] = alt[i];
455 ++j;
456 }
457 }
458 nalt = j;
459 }
460
461 return nalt;
462}
463
464static void p6_disable_pmc(unsigned int pmc, unsigned long mmcr[])
465{
466 /* Set PMCxSEL to 0 to disable PMCx */
467 if (pmc <= 3)
468 mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc));
469}
470
471static int power6_generic_events[] = {
472 [PERF_COUNT_HW_CPU_CYCLES] = 0x1e,
473 [PERF_COUNT_HW_INSTRUCTIONS] = 2,
474 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x280030, /* LD_REF_L1 */
475 [PERF_COUNT_HW_CACHE_MISSES] = 0x30000c, /* LD_MISS_L1 */
476 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x410a0, /* BR_PRED */
477 [PERF_COUNT_HW_BRANCH_MISSES] = 0x400052, /* BR_MPRED */
478};
479
480#define C(x) PERF_COUNT_HW_CACHE_##x
481
482/*
483 * Table of generalized cache-related events.
484 * 0 means not supported, -1 means nonsensical, other values
485 * are event codes.
486 * The "DTLB" and "ITLB" events relate to the DERAT and IERAT.
487 */
488static int power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
489 [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
490 [C(OP_READ)] = { 0x80082, 0x80080 },
491 [C(OP_WRITE)] = { 0x80086, 0x80088 },
492 [C(OP_PREFETCH)] = { 0x810a4, 0 },
493 },
494 [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
495 [C(OP_READ)] = { 0, 0x100056 },
496 [C(OP_WRITE)] = { -1, -1 },
497 [C(OP_PREFETCH)] = { 0x4008c, 0 },
498 },
499 [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
500 [C(OP_READ)] = { 0x150730, 0x250532 },
501 [C(OP_WRITE)] = { 0x250432, 0x150432 },
502 [C(OP_PREFETCH)] = { 0x810a6, 0 },
503 },
504 [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
505 [C(OP_READ)] = { 0, 0x20000e },
506 [C(OP_WRITE)] = { -1, -1 },
507 [C(OP_PREFETCH)] = { -1, -1 },
508 },
509 [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
510 [C(OP_READ)] = { 0, 0x420ce },
511 [C(OP_WRITE)] = { -1, -1 },
512 [C(OP_PREFETCH)] = { -1, -1 },
513 },
514 [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
515 [C(OP_READ)] = { 0x430e6, 0x400052 },
516 [C(OP_WRITE)] = { -1, -1 },
517 [C(OP_PREFETCH)] = { -1, -1 },
518 },
519};
520
521static struct power_pmu power6_pmu = {
522 .name = "POWER6",
523 .n_counter = 6,
524 .max_alternatives = MAX_ALT,
525 .add_fields = 0x1555,
526 .test_adder = 0x3000,
527 .compute_mmcr = p6_compute_mmcr,
528 .get_constraint = p6_get_constraint,
529 .get_alternatives = p6_get_alternatives,
530 .disable_pmc = p6_disable_pmc,
531 .limited_pmc_event = p6_limited_pmc_event,
532 .flags = PPMU_LIMITED_PMC5_6 | PPMU_ALT_SIPR,
533 .n_generic = ARRAY_SIZE(power6_generic_events),
534 .generic_events = power6_generic_events,
535 .cache_events = &power6_cache_events,
536};
537
538static int init_power6_pmu(void)
539{
540 if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power6"))
541 return -ENODEV;
542
543 return register_power_pmu(&power6_pmu);
544}
545
546arch_initcall(init_power6_pmu);
diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c
new file mode 100644
index 000000000000..5d755ef7ac8f
--- /dev/null
+++ b/arch/powerpc/kernel/power7-pmu.c
@@ -0,0 +1,374 @@
1/*
2 * Performance counter support for POWER7 processors.
3 *
4 * Copyright 2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/kernel.h>
12#include <linux/perf_counter.h>
13#include <linux/string.h>
14#include <asm/reg.h>
15#include <asm/cputable.h>
16
17/*
18 * Bits in event code for POWER7
19 */
20#define PM_PMC_SH 16 /* PMC number (1-based) for direct events */
21#define PM_PMC_MSK 0xf
22#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH)
23#define PM_UNIT_SH 12 /* TTMMUX number and setting - unit select */
24#define PM_UNIT_MSK 0xf
25#define PM_COMBINE_SH 11 /* Combined event bit */
26#define PM_COMBINE_MSK 1
27#define PM_COMBINE_MSKS 0x800
28#define PM_L2SEL_SH 8 /* L2 event select */
29#define PM_L2SEL_MSK 7
30#define PM_PMCSEL_MSK 0xff
31
32/*
33 * Bits in MMCR1 for POWER7
34 */
35#define MMCR1_TTM0SEL_SH 60
36#define MMCR1_TTM1SEL_SH 56
37#define MMCR1_TTM2SEL_SH 52
38#define MMCR1_TTM3SEL_SH 48
39#define MMCR1_TTMSEL_MSK 0xf
40#define MMCR1_L2SEL_SH 45
41#define MMCR1_L2SEL_MSK 7
42#define MMCR1_PMC1_COMBINE_SH 35
43#define MMCR1_PMC2_COMBINE_SH 34
44#define MMCR1_PMC3_COMBINE_SH 33
45#define MMCR1_PMC4_COMBINE_SH 32
46#define MMCR1_PMC1SEL_SH 24
47#define MMCR1_PMC2SEL_SH 16
48#define MMCR1_PMC3SEL_SH 8
49#define MMCR1_PMC4SEL_SH 0
50#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
51#define MMCR1_PMCSEL_MSK 0xff
52
53/*
54 * Bits in MMCRA
55 */
56
57/*
58 * Layout of constraint bits:
59 * 6666555555555544444444443333333333222222222211111111110000000000
60 * 3210987654321098765432109876543210987654321098765432109876543210
61 * [ ><><><><><><>
62 * NC P6P5P4P3P2P1
63 *
64 * NC - number of counters
65 * 15: NC error 0x8000
66 * 12-14: number of events needing PMC1-4 0x7000
67 *
68 * P6
69 * 11: P6 error 0x800
70 * 10-11: Count of events needing PMC6
71 *
72 * P1..P5
73 * 0-9: Count of events needing PMC1..PMC5
74 */
75
76static int power7_get_constraint(u64 event, unsigned long *maskp,
77 unsigned long *valp)
78{
79 int pmc, sh;
80 unsigned long mask = 0, value = 0;
81
82 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
83 if (pmc) {
84 if (pmc > 6)
85 return -1;
86 sh = (pmc - 1) * 2;
87 mask |= 2 << sh;
88 value |= 1 << sh;
89 if (pmc >= 5 && !(event == 0x500fa || event == 0x600f4))
90 return -1;
91 }
92 if (pmc < 5) {
93 /* need a counter from PMC1-4 set */
94 mask |= 0x8000;
95 value |= 0x1000;
96 }
97 *maskp = mask;
98 *valp = value;
99 return 0;
100}
101
102#define MAX_ALT 2 /* at most 2 alternatives for any event */
103
104static const unsigned int event_alternatives[][MAX_ALT] = {
105 { 0x200f2, 0x300f2 }, /* PM_INST_DISP */
106 { 0x200f4, 0x600f4 }, /* PM_RUN_CYC */
107 { 0x400fa, 0x500fa }, /* PM_RUN_INST_CMPL */
108};
109
110/*
111 * Scan the alternatives table for a match and return the
112 * index into the alternatives table if found, else -1.
113 */
114static int find_alternative(u64 event)
115{
116 int i, j;
117
118 for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
119 if (event < event_alternatives[i][0])
120 break;
121 for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
122 if (event == event_alternatives[i][j])
123 return i;
124 }
125 return -1;
126}
127
128static s64 find_alternative_decode(u64 event)
129{
130 int pmc, psel;
131
132 /* this only handles the 4x decode events */
133 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
134 psel = event & PM_PMCSEL_MSK;
135 if ((pmc == 2 || pmc == 4) && (psel & ~7) == 0x40)
136 return event - (1 << PM_PMC_SH) + 8;
137 if ((pmc == 1 || pmc == 3) && (psel & ~7) == 0x48)
138 return event + (1 << PM_PMC_SH) - 8;
139 return -1;
140}
141
142static int power7_get_alternatives(u64 event, unsigned int flags, u64 alt[])
143{
144 int i, j, nalt = 1;
145 s64 ae;
146
147 alt[0] = event;
148 nalt = 1;
149 i = find_alternative(event);
150 if (i >= 0) {
151 for (j = 0; j < MAX_ALT; ++j) {
152 ae = event_alternatives[i][j];
153 if (ae && ae != event)
154 alt[nalt++] = ae;
155 }
156 } else {
157 ae = find_alternative_decode(event);
158 if (ae > 0)
159 alt[nalt++] = ae;
160 }
161
162 if (flags & PPMU_ONLY_COUNT_RUN) {
163 /*
164 * We're only counting in RUN state,
165 * so PM_CYC is equivalent to PM_RUN_CYC
166 * and PM_INST_CMPL === PM_RUN_INST_CMPL.
167 * This doesn't include alternatives that don't provide
168 * any extra flexibility in assigning PMCs.
169 */
170 j = nalt;
171 for (i = 0; i < nalt; ++i) {
172 switch (alt[i]) {
173 case 0x1e: /* PM_CYC */
174 alt[j++] = 0x600f4; /* PM_RUN_CYC */
175 break;
176 case 0x600f4: /* PM_RUN_CYC */
177 alt[j++] = 0x1e;
178 break;
179 case 0x2: /* PM_PPC_CMPL */
180 alt[j++] = 0x500fa; /* PM_RUN_INST_CMPL */
181 break;
182 case 0x500fa: /* PM_RUN_INST_CMPL */
183 alt[j++] = 0x2; /* PM_PPC_CMPL */
184 break;
185 }
186 }
187 nalt = j;
188 }
189
190 return nalt;
191}
192
193/*
194 * Returns 1 if event counts things relating to marked instructions
195 * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
196 */
197static int power7_marked_instr_event(u64 event)
198{
199 int pmc, psel;
200 int unit;
201
202 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
203 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
204 psel = event & PM_PMCSEL_MSK & ~1; /* trim off edge/level bit */
205 if (pmc >= 5)
206 return 0;
207
208 switch (psel >> 4) {
209 case 2:
210 return pmc == 2 || pmc == 4;
211 case 3:
212 if (psel == 0x3c)
213 return pmc == 1;
214 if (psel == 0x3e)
215 return pmc != 2;
216 return 1;
217 case 4:
218 case 5:
219 return unit == 0xd;
220 case 6:
221 if (psel == 0x64)
222 return pmc >= 3;
223 case 8:
224 return unit == 0xd;
225 }
226 return 0;
227}
228
229static int power7_compute_mmcr(u64 event[], int n_ev,
230 unsigned int hwc[], unsigned long mmcr[])
231{
232 unsigned long mmcr1 = 0;
233 unsigned long mmcra = 0;
234 unsigned int pmc, unit, combine, l2sel, psel;
235 unsigned int pmc_inuse = 0;
236 int i;
237
238 /* First pass to count resource use */
239 for (i = 0; i < n_ev; ++i) {
240 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
241 if (pmc) {
242 if (pmc > 6)
243 return -1;
244 if (pmc_inuse & (1 << (pmc - 1)))
245 return -1;
246 pmc_inuse |= 1 << (pmc - 1);
247 }
248 }
249
250 /* Second pass: assign PMCs, set all MMCR1 fields */
251 for (i = 0; i < n_ev; ++i) {
252 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
253 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
254 combine = (event[i] >> PM_COMBINE_SH) & PM_COMBINE_MSK;
255 l2sel = (event[i] >> PM_L2SEL_SH) & PM_L2SEL_MSK;
256 psel = event[i] & PM_PMCSEL_MSK;
257 if (!pmc) {
258 /* Bus event or any-PMC direct event */
259 for (pmc = 0; pmc < 4; ++pmc) {
260 if (!(pmc_inuse & (1 << pmc)))
261 break;
262 }
263 if (pmc >= 4)
264 return -1;
265 pmc_inuse |= 1 << pmc;
266 } else {
267 /* Direct or decoded event */
268 --pmc;
269 }
270 if (pmc <= 3) {
271 mmcr1 |= (unsigned long) unit
272 << (MMCR1_TTM0SEL_SH - 4 * pmc);
273 mmcr1 |= (unsigned long) combine
274 << (MMCR1_PMC1_COMBINE_SH - pmc);
275 mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
276 if (unit == 6) /* L2 events */
277 mmcr1 |= (unsigned long) l2sel
278 << MMCR1_L2SEL_SH;
279 }
280 if (power7_marked_instr_event(event[i]))
281 mmcra |= MMCRA_SAMPLE_ENABLE;
282 hwc[i] = pmc;
283 }
284
285 /* Return MMCRx values */
286 mmcr[0] = 0;
287 if (pmc_inuse & 1)
288 mmcr[0] = MMCR0_PMC1CE;
289 if (pmc_inuse & 0x3e)
290 mmcr[0] |= MMCR0_PMCjCE;
291 mmcr[1] = mmcr1;
292 mmcr[2] = mmcra;
293 return 0;
294}
295
296static void power7_disable_pmc(unsigned int pmc, unsigned long mmcr[])
297{
298 if (pmc <= 3)
299 mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc));
300}
301
302static int power7_generic_events[] = {
303 [PERF_COUNT_HW_CPU_CYCLES] = 0x1e,
304 [PERF_COUNT_HW_INSTRUCTIONS] = 2,
305 [PERF_COUNT_HW_CACHE_REFERENCES] = 0xc880, /* LD_REF_L1_LSU*/
306 [PERF_COUNT_HW_CACHE_MISSES] = 0x400f0, /* LD_MISS_L1 */
307 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x10068, /* BRU_FIN */
308 [PERF_COUNT_HW_BRANCH_MISSES] = 0x400f6, /* BR_MPRED */
309};
310
311#define C(x) PERF_COUNT_HW_CACHE_##x
312
313/*
314 * Table of generalized cache-related events.
315 * 0 means not supported, -1 means nonsensical, other values
316 * are event codes.
317 */
318static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
319 [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
320 [C(OP_READ)] = { 0x400f0, 0xc880 },
321 [C(OP_WRITE)] = { 0, 0x300f0 },
322 [C(OP_PREFETCH)] = { 0xd8b8, 0 },
323 },
324 [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
325 [C(OP_READ)] = { 0, 0x200fc },
326 [C(OP_WRITE)] = { -1, -1 },
327 [C(OP_PREFETCH)] = { 0x408a, 0 },
328 },
329 [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
330 [C(OP_READ)] = { 0x6080, 0x6084 },
331 [C(OP_WRITE)] = { 0x6082, 0x6086 },
332 [C(OP_PREFETCH)] = { 0, 0 },
333 },
334 [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
335 [C(OP_READ)] = { 0, 0x300fc },
336 [C(OP_WRITE)] = { -1, -1 },
337 [C(OP_PREFETCH)] = { -1, -1 },
338 },
339 [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
340 [C(OP_READ)] = { 0, 0x400fc },
341 [C(OP_WRITE)] = { -1, -1 },
342 [C(OP_PREFETCH)] = { -1, -1 },
343 },
344 [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
345 [C(OP_READ)] = { 0x10068, 0x400f6 },
346 [C(OP_WRITE)] = { -1, -1 },
347 [C(OP_PREFETCH)] = { -1, -1 },
348 },
349};
350
351static struct power_pmu power7_pmu = {
352 .name = "POWER7",
353 .n_counter = 6,
354 .max_alternatives = MAX_ALT + 1,
355 .add_fields = 0x1555ul,
356 .test_adder = 0x3000ul,
357 .compute_mmcr = power7_compute_mmcr,
358 .get_constraint = power7_get_constraint,
359 .get_alternatives = power7_get_alternatives,
360 .disable_pmc = power7_disable_pmc,
361 .n_generic = ARRAY_SIZE(power7_generic_events),
362 .generic_events = power7_generic_events,
363 .cache_events = &power7_cache_events,
364};
365
366static int init_power7_pmu(void)
367{
368 if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power7"))
369 return -ENODEV;
370
371 return register_power_pmu(&power7_pmu);
372}
373
374arch_initcall(init_power7_pmu);
diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c
new file mode 100644
index 000000000000..6637c87fe70e
--- /dev/null
+++ b/arch/powerpc/kernel/ppc970-pmu.c
@@ -0,0 +1,499 @@
1/*
2 * Performance counter support for PPC970-family processors.
3 *
4 * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/string.h>
12#include <linux/perf_counter.h>
13#include <linux/string.h>
14#include <asm/reg.h>
15#include <asm/cputable.h>
16
17/*
18 * Bits in event code for PPC970
19 */
20#define PM_PMC_SH 12 /* PMC number (1-based) for direct events */
21#define PM_PMC_MSK 0xf
22#define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */
23#define PM_UNIT_MSK 0xf
24#define PM_SPCSEL_SH 6
25#define PM_SPCSEL_MSK 3
26#define PM_BYTE_SH 4 /* Byte number of event bus to use */
27#define PM_BYTE_MSK 3
28#define PM_PMCSEL_MSK 0xf
29
30/* Values in PM_UNIT field */
31#define PM_NONE 0
32#define PM_FPU 1
33#define PM_VPU 2
34#define PM_ISU 3
35#define PM_IFU 4
36#define PM_IDU 5
37#define PM_STS 6
38#define PM_LSU0 7
39#define PM_LSU1U 8
40#define PM_LSU1L 9
41#define PM_LASTUNIT 9
42
43/*
44 * Bits in MMCR0 for PPC970
45 */
46#define MMCR0_PMC1SEL_SH 8
47#define MMCR0_PMC2SEL_SH 1
48#define MMCR_PMCSEL_MSK 0x1f
49
50/*
51 * Bits in MMCR1 for PPC970
52 */
53#define MMCR1_TTM0SEL_SH 62
54#define MMCR1_TTM1SEL_SH 59
55#define MMCR1_TTM3SEL_SH 53
56#define MMCR1_TTMSEL_MSK 3
57#define MMCR1_TD_CP_DBG0SEL_SH 50
58#define MMCR1_TD_CP_DBG1SEL_SH 48
59#define MMCR1_TD_CP_DBG2SEL_SH 46
60#define MMCR1_TD_CP_DBG3SEL_SH 44
61#define MMCR1_PMC1_ADDER_SEL_SH 39
62#define MMCR1_PMC2_ADDER_SEL_SH 38
63#define MMCR1_PMC6_ADDER_SEL_SH 37
64#define MMCR1_PMC5_ADDER_SEL_SH 36
65#define MMCR1_PMC8_ADDER_SEL_SH 35
66#define MMCR1_PMC7_ADDER_SEL_SH 34
67#define MMCR1_PMC3_ADDER_SEL_SH 33
68#define MMCR1_PMC4_ADDER_SEL_SH 32
69#define MMCR1_PMC3SEL_SH 27
70#define MMCR1_PMC4SEL_SH 22
71#define MMCR1_PMC5SEL_SH 17
72#define MMCR1_PMC6SEL_SH 12
73#define MMCR1_PMC7SEL_SH 7
74#define MMCR1_PMC8SEL_SH 2
75
76static short mmcr1_adder_bits[8] = {
77 MMCR1_PMC1_ADDER_SEL_SH,
78 MMCR1_PMC2_ADDER_SEL_SH,
79 MMCR1_PMC3_ADDER_SEL_SH,
80 MMCR1_PMC4_ADDER_SEL_SH,
81 MMCR1_PMC5_ADDER_SEL_SH,
82 MMCR1_PMC6_ADDER_SEL_SH,
83 MMCR1_PMC7_ADDER_SEL_SH,
84 MMCR1_PMC8_ADDER_SEL_SH
85};
86
87/*
88 * Bits in MMCRA
89 */
90
91/*
92 * Layout of constraint bits:
93 * 6666555555555544444444443333333333222222222211111111110000000000
94 * 3210987654321098765432109876543210987654321098765432109876543210
95 * <><><>[ >[ >[ >< >< >< >< ><><><><><><><><>
96 * SPT0T1 UC PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8
97 *
98 * SP - SPCSEL constraint
99 * 48-49: SPCSEL value 0x3_0000_0000_0000
100 *
101 * T0 - TTM0 constraint
102 * 46-47: TTM0SEL value (0=FPU, 2=IFU, 3=VPU) 0xC000_0000_0000
103 *
104 * T1 - TTM1 constraint
105 * 44-45: TTM1SEL value (0=IDU, 3=STS) 0x3000_0000_0000
106 *
107 * UC - unit constraint: can't have all three of FPU|IFU|VPU, ISU, IDU|STS
108 * 43: UC3 error 0x0800_0000_0000
109 * 42: FPU|IFU|VPU events needed 0x0400_0000_0000
110 * 41: ISU events needed 0x0200_0000_0000
111 * 40: IDU|STS events needed 0x0100_0000_0000
112 *
113 * PS1
114 * 39: PS1 error 0x0080_0000_0000
115 * 36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000
116 *
117 * PS2
118 * 35: PS2 error 0x0008_0000_0000
119 * 32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000
120 *
121 * B0
122 * 28-31: Byte 0 event source 0xf000_0000
123 * Encoding as for the event code
124 *
125 * B1, B2, B3
126 * 24-27, 20-23, 16-19: Byte 1, 2, 3 event sources
127 *
128 * P1
129 * 15: P1 error 0x8000
130 * 14-15: Count of events needing PMC1
131 *
132 * P2..P8
133 * 0-13: Count of events needing PMC2..PMC8
134 */
135
136static unsigned char direct_marked_event[8] = {
137 (1<<2) | (1<<3), /* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */
138 (1<<3) | (1<<5), /* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */
139 (1<<3) | (1<<5), /* PMC3: PM_MRK_ST_CMPL_INT, PM_MRK_VMX_FIN */
140 (1<<4) | (1<<5), /* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */
141 (1<<4) | (1<<5), /* PMC5: PM_GRP_MRK, PM_MRK_GRP_TIMEO */
142 (1<<3) | (1<<4) | (1<<5),
143 /* PMC6: PM_MRK_ST_STS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */
144 (1<<4) | (1<<5), /* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */
145 (1<<4) /* PMC8: PM_MRK_LSU_FIN */
146};
147
148/*
149 * Returns 1 if event counts things relating to marked instructions
150 * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
151 */
152static int p970_marked_instr_event(u64 event)
153{
154 int pmc, psel, unit, byte, bit;
155 unsigned int mask;
156
157 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
158 psel = event & PM_PMCSEL_MSK;
159 if (pmc) {
160 if (direct_marked_event[pmc - 1] & (1 << psel))
161 return 1;
162 if (psel == 0) /* add events */
163 bit = (pmc <= 4)? pmc - 1: 8 - pmc;
164 else if (psel == 7 || psel == 13) /* decode events */
165 bit = 4;
166 else
167 return 0;
168 } else
169 bit = psel;
170
171 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
172 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
173 mask = 0;
174 switch (unit) {
175 case PM_VPU:
176 mask = 0x4c; /* byte 0 bits 2,3,6 */
177 case PM_LSU0:
178 /* byte 2 bits 0,2,3,4,6; all of byte 1 */
179 mask = 0x085dff00;
180 case PM_LSU1L:
181 mask = 0x50 << 24; /* byte 3 bits 4,6 */
182 break;
183 }
184 return (mask >> (byte * 8 + bit)) & 1;
185}
186
187/* Masks and values for using events from the various units */
188static unsigned long unit_cons[PM_LASTUNIT+1][2] = {
189 [PM_FPU] = { 0xc80000000000ull, 0x040000000000ull },
190 [PM_VPU] = { 0xc80000000000ull, 0xc40000000000ull },
191 [PM_ISU] = { 0x080000000000ull, 0x020000000000ull },
192 [PM_IFU] = { 0xc80000000000ull, 0x840000000000ull },
193 [PM_IDU] = { 0x380000000000ull, 0x010000000000ull },
194 [PM_STS] = { 0x380000000000ull, 0x310000000000ull },
195};
196
197static int p970_get_constraint(u64 event, unsigned long *maskp,
198 unsigned long *valp)
199{
200 int pmc, byte, unit, sh, spcsel;
201 unsigned long mask = 0, value = 0;
202 int grp = -1;
203
204 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
205 if (pmc) {
206 if (pmc > 8)
207 return -1;
208 sh = (pmc - 1) * 2;
209 mask |= 2 << sh;
210 value |= 1 << sh;
211 grp = ((pmc - 1) >> 1) & 1;
212 }
213 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
214 if (unit) {
215 if (unit > PM_LASTUNIT)
216 return -1;
217 mask |= unit_cons[unit][0];
218 value |= unit_cons[unit][1];
219 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
220 /*
221 * Bus events on bytes 0 and 2 can be counted
222 * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8.
223 */
224 if (!pmc)
225 grp = byte & 1;
226 /* Set byte lane select field */
227 mask |= 0xfULL << (28 - 4 * byte);
228 value |= (unsigned long)unit << (28 - 4 * byte);
229 }
230 if (grp == 0) {
231 /* increment PMC1/2/5/6 field */
232 mask |= 0x8000000000ull;
233 value |= 0x1000000000ull;
234 } else if (grp == 1) {
235 /* increment PMC3/4/7/8 field */
236 mask |= 0x800000000ull;
237 value |= 0x100000000ull;
238 }
239 spcsel = (event >> PM_SPCSEL_SH) & PM_SPCSEL_MSK;
240 if (spcsel) {
241 mask |= 3ull << 48;
242 value |= (unsigned long)spcsel << 48;
243 }
244 *maskp = mask;
245 *valp = value;
246 return 0;
247}
248
249static int p970_get_alternatives(u64 event, unsigned int flags, u64 alt[])
250{
251 alt[0] = event;
252
253 /* 2 alternatives for LSU empty */
254 if (event == 0x2002 || event == 0x3002) {
255 alt[1] = event ^ 0x1000;
256 return 2;
257 }
258
259 return 1;
260}
261
262static int p970_compute_mmcr(u64 event[], int n_ev,
263 unsigned int hwc[], unsigned long mmcr[])
264{
265 unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0;
266 unsigned int pmc, unit, byte, psel;
267 unsigned int ttm, grp;
268 unsigned int pmc_inuse = 0;
269 unsigned int pmc_grp_use[2];
270 unsigned char busbyte[4];
271 unsigned char unituse[16];
272 unsigned char unitmap[] = { 0, 0<<3, 3<<3, 1<<3, 2<<3, 0|4, 3|4 };
273 unsigned char ttmuse[2];
274 unsigned char pmcsel[8];
275 int i;
276 int spcsel;
277
278 if (n_ev > 8)
279 return -1;
280
281 /* First pass to count resource use */
282 pmc_grp_use[0] = pmc_grp_use[1] = 0;
283 memset(busbyte, 0, sizeof(busbyte));
284 memset(unituse, 0, sizeof(unituse));
285 for (i = 0; i < n_ev; ++i) {
286 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
287 if (pmc) {
288 if (pmc_inuse & (1 << (pmc - 1)))
289 return -1;
290 pmc_inuse |= 1 << (pmc - 1);
291 /* count 1/2/5/6 vs 3/4/7/8 use */
292 ++pmc_grp_use[((pmc - 1) >> 1) & 1];
293 }
294 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
295 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
296 if (unit) {
297 if (unit > PM_LASTUNIT)
298 return -1;
299 if (!pmc)
300 ++pmc_grp_use[byte & 1];
301 if (busbyte[byte] && busbyte[byte] != unit)
302 return -1;
303 busbyte[byte] = unit;
304 unituse[unit] = 1;
305 }
306 }
307 if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4)
308 return -1;
309
310 /*
311 * Assign resources and set multiplexer selects.
312 *
313 * PM_ISU can go either on TTM0 or TTM1, but that's the only
314 * choice we have to deal with.
315 */
316 if (unituse[PM_ISU] &
317 (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_VPU]))
318 unitmap[PM_ISU] = 2 | 4; /* move ISU to TTM1 */
319 /* Set TTM[01]SEL fields. */
320 ttmuse[0] = ttmuse[1] = 0;
321 for (i = PM_FPU; i <= PM_STS; ++i) {
322 if (!unituse[i])
323 continue;
324 ttm = unitmap[i];
325 ++ttmuse[(ttm >> 2) & 1];
326 mmcr1 |= (unsigned long)(ttm & ~4) << MMCR1_TTM1SEL_SH;
327 }
328 /* Check only one unit per TTMx */
329 if (ttmuse[0] > 1 || ttmuse[1] > 1)
330 return -1;
331
332 /* Set byte lane select fields and TTM3SEL. */
333 for (byte = 0; byte < 4; ++byte) {
334 unit = busbyte[byte];
335 if (!unit)
336 continue;
337 if (unit <= PM_STS)
338 ttm = (unitmap[unit] >> 2) & 1;
339 else if (unit == PM_LSU0)
340 ttm = 2;
341 else {
342 ttm = 3;
343 if (unit == PM_LSU1L && byte >= 2)
344 mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte);
345 }
346 mmcr1 |= (unsigned long)ttm
347 << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
348 }
349
350 /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
351 memset(pmcsel, 0x8, sizeof(pmcsel)); /* 8 means don't count */
352 for (i = 0; i < n_ev; ++i) {
353 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
354 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
355 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
356 psel = event[i] & PM_PMCSEL_MSK;
357 if (!pmc) {
358 /* Bus event or any-PMC direct event */
359 if (unit)
360 psel |= 0x10 | ((byte & 2) << 2);
361 else
362 psel |= 8;
363 for (pmc = 0; pmc < 8; ++pmc) {
364 if (pmc_inuse & (1 << pmc))
365 continue;
366 grp = (pmc >> 1) & 1;
367 if (unit) {
368 if (grp == (byte & 1))
369 break;
370 } else if (pmc_grp_use[grp] < 4) {
371 ++pmc_grp_use[grp];
372 break;
373 }
374 }
375 pmc_inuse |= 1 << pmc;
376 } else {
377 /* Direct event */
378 --pmc;
379 if (psel == 0 && (byte & 2))
380 /* add events on higher-numbered bus */
381 mmcr1 |= 1ull << mmcr1_adder_bits[pmc];
382 }
383 pmcsel[pmc] = psel;
384 hwc[i] = pmc;
385 spcsel = (event[i] >> PM_SPCSEL_SH) & PM_SPCSEL_MSK;
386 mmcr1 |= spcsel;
387 if (p970_marked_instr_event(event[i]))
388 mmcra |= MMCRA_SAMPLE_ENABLE;
389 }
390 for (pmc = 0; pmc < 2; ++pmc)
391 mmcr0 |= pmcsel[pmc] << (MMCR0_PMC1SEL_SH - 7 * pmc);
392 for (; pmc < 8; ++pmc)
393 mmcr1 |= (unsigned long)pmcsel[pmc]
394 << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2));
395 if (pmc_inuse & 1)
396 mmcr0 |= MMCR0_PMC1CE;
397 if (pmc_inuse & 0xfe)
398 mmcr0 |= MMCR0_PMCjCE;
399
400 mmcra |= 0x2000; /* mark only one IOP per PPC instruction */
401
402 /* Return MMCRx values */
403 mmcr[0] = mmcr0;
404 mmcr[1] = mmcr1;
405 mmcr[2] = mmcra;
406 return 0;
407}
408
409static void p970_disable_pmc(unsigned int pmc, unsigned long mmcr[])
410{
411 int shift, i;
412
413 if (pmc <= 1) {
414 shift = MMCR0_PMC1SEL_SH - 7 * pmc;
415 i = 0;
416 } else {
417 shift = MMCR1_PMC3SEL_SH - 5 * (pmc - 2);
418 i = 1;
419 }
420 /*
421 * Setting the PMCxSEL field to 0x08 disables PMC x.
422 */
423 mmcr[i] = (mmcr[i] & ~(0x1fUL << shift)) | (0x08UL << shift);
424}
425
426static int ppc970_generic_events[] = {
427 [PERF_COUNT_HW_CPU_CYCLES] = 7,
428 [PERF_COUNT_HW_INSTRUCTIONS] = 1,
429 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x8810, /* PM_LD_REF_L1 */
430 [PERF_COUNT_HW_CACHE_MISSES] = 0x3810, /* PM_LD_MISS_L1 */
431 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x431, /* PM_BR_ISSUED */
432 [PERF_COUNT_HW_BRANCH_MISSES] = 0x327, /* PM_GRP_BR_MPRED */
433};
434
435#define C(x) PERF_COUNT_HW_CACHE_##x
436
437/*
438 * Table of generalized cache-related events.
439 * 0 means not supported, -1 means nonsensical, other values
440 * are event codes.
441 */
442static int ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
443 [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
444 [C(OP_READ)] = { 0x8810, 0x3810 },
445 [C(OP_WRITE)] = { 0x7810, 0x813 },
446 [C(OP_PREFETCH)] = { 0x731, 0 },
447 },
448 [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
449 [C(OP_READ)] = { 0, 0 },
450 [C(OP_WRITE)] = { -1, -1 },
451 [C(OP_PREFETCH)] = { 0, 0 },
452 },
453 [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
454 [C(OP_READ)] = { 0, 0 },
455 [C(OP_WRITE)] = { 0, 0 },
456 [C(OP_PREFETCH)] = { 0x733, 0 },
457 },
458 [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
459 [C(OP_READ)] = { 0, 0x704 },
460 [C(OP_WRITE)] = { -1, -1 },
461 [C(OP_PREFETCH)] = { -1, -1 },
462 },
463 [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
464 [C(OP_READ)] = { 0, 0x700 },
465 [C(OP_WRITE)] = { -1, -1 },
466 [C(OP_PREFETCH)] = { -1, -1 },
467 },
468 [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
469 [C(OP_READ)] = { 0x431, 0x327 },
470 [C(OP_WRITE)] = { -1, -1 },
471 [C(OP_PREFETCH)] = { -1, -1 },
472 },
473};
474
475static struct power_pmu ppc970_pmu = {
476 .name = "PPC970/FX/MP",
477 .n_counter = 8,
478 .max_alternatives = 2,
479 .add_fields = 0x001100005555ull,
480 .test_adder = 0x013300000000ull,
481 .compute_mmcr = p970_compute_mmcr,
482 .get_constraint = p970_get_constraint,
483 .get_alternatives = p970_get_alternatives,
484 .disable_pmc = p970_disable_pmc,
485 .n_generic = ARRAY_SIZE(ppc970_generic_events),
486 .generic_events = ppc970_generic_events,
487 .cache_events = &ppc970_cache_events,
488};
489
490static int init_ppc970_pmu(void)
491{
492 if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/970")
493 && strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/970MP"))
494 return -ENODEV;
495
496 return register_power_pmu(&ppc970_pmu);
497}
498
499arch_initcall(init_ppc970_pmu);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 7b44a33f03c2..3e7135bbe40f 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -650,7 +650,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
650 p->thread.ksp_limit = (unsigned long)task_stack_page(p) + 650 p->thread.ksp_limit = (unsigned long)task_stack_page(p) +
651 _ALIGN_UP(sizeof(struct thread_info), 16); 651 _ALIGN_UP(sizeof(struct thread_info), 16);
652 652
653#ifdef CONFIG_PPC64 653#ifdef CONFIG_PPC_STD_MMU_64
654 if (cpu_has_feature(CPU_FTR_SLB)) { 654 if (cpu_has_feature(CPU_FTR_SLB)) {
655 unsigned long sp_vsid; 655 unsigned long sp_vsid;
656 unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp; 656 unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp;
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index ce01ff2474da..d4405b95bfaa 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -585,7 +585,7 @@ static void __init check_cpu_pa_features(unsigned long node)
585 ibm_pa_features, ARRAY_SIZE(ibm_pa_features)); 585 ibm_pa_features, ARRAY_SIZE(ibm_pa_features));
586} 586}
587 587
588#ifdef CONFIG_PPC64 588#ifdef CONFIG_PPC_STD_MMU_64
589static void __init check_cpu_slb_size(unsigned long node) 589static void __init check_cpu_slb_size(unsigned long node)
590{ 590{
591 u32 *slb_size_ptr; 591 u32 *slb_size_ptr;
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 2f0e64b53642..a538824616fd 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -44,10 +44,7 @@
44#include <asm/sections.h> 44#include <asm/sections.h>
45#include <asm/machdep.h> 45#include <asm/machdep.h>
46 46
47#ifdef CONFIG_LOGO_LINUX_CLUT224
48#include <linux/linux_logo.h> 47#include <linux/linux_logo.h>
49extern const struct linux_logo logo_linux_clut224;
50#endif
51 48
52/* 49/*
53 * Properties whose value is longer than this get excluded from our 50 * Properties whose value is longer than this get excluded from our
@@ -1950,8 +1947,47 @@ static void __init fixup_device_tree_maple(void)
1950 prom_setprop(isa, name, "ranges", 1947 prom_setprop(isa, name, "ranges",
1951 isa_ranges, sizeof(isa_ranges)); 1948 isa_ranges, sizeof(isa_ranges));
1952} 1949}
1950
1951#define CPC925_MC_START 0xf8000000
1952#define CPC925_MC_LENGTH 0x1000000
1953/* The values for memory-controller don't have right number of cells */
1954static void __init fixup_device_tree_maple_memory_controller(void)
1955{
1956 phandle mc;
1957 u32 mc_reg[4];
1958 char *name = "/hostbridge@f8000000";
1959 struct prom_t *_prom = &RELOC(prom);
1960 u32 ac, sc;
1961
1962 mc = call_prom("finddevice", 1, 1, ADDR(name));
1963 if (!PHANDLE_VALID(mc))
1964 return;
1965
1966 if (prom_getproplen(mc, "reg") != 8)
1967 return;
1968
1969 prom_getprop(_prom->root, "#address-cells", &ac, sizeof(ac));
1970 prom_getprop(_prom->root, "#size-cells", &sc, sizeof(sc));
1971 if ((ac != 2) || (sc != 2))
1972 return;
1973
1974 if (prom_getprop(mc, "reg", mc_reg, sizeof(mc_reg)) == PROM_ERROR)
1975 return;
1976
1977 if (mc_reg[0] != CPC925_MC_START || mc_reg[1] != CPC925_MC_LENGTH)
1978 return;
1979
1980 prom_printf("Fixing up bogus hostbridge on Maple...\n");
1981
1982 mc_reg[0] = 0x0;
1983 mc_reg[1] = CPC925_MC_START;
1984 mc_reg[2] = 0x0;
1985 mc_reg[3] = CPC925_MC_LENGTH;
1986 prom_setprop(mc, name, "reg", mc_reg, sizeof(mc_reg));
1987}
1953#else 1988#else
1954#define fixup_device_tree_maple() 1989#define fixup_device_tree_maple()
1990#define fixup_device_tree_maple_memory_controller()
1955#endif 1991#endif
1956 1992
1957#ifdef CONFIG_PPC_CHRP 1993#ifdef CONFIG_PPC_CHRP
@@ -2192,6 +2228,7 @@ static void __init fixup_device_tree_efika(void)
2192static void __init fixup_device_tree(void) 2228static void __init fixup_device_tree(void)
2193{ 2229{
2194 fixup_device_tree_maple(); 2230 fixup_device_tree_maple();
2231 fixup_device_tree_maple_memory_controller();
2195 fixup_device_tree_chrp(); 2232 fixup_device_tree_chrp();
2196 fixup_device_tree_pmac(); 2233 fixup_device_tree_pmac();
2197 fixup_device_tree_efika(); 2234 fixup_device_tree_efika();
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 3635be61f899..9fa2c7dcd05a 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -704,15 +704,34 @@ void user_enable_single_step(struct task_struct *task)
704 704
705 if (regs != NULL) { 705 if (regs != NULL) {
706#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) 706#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
707 task->thread.dbcr0 &= ~DBCR0_BT;
707 task->thread.dbcr0 |= DBCR0_IDM | DBCR0_IC; 708 task->thread.dbcr0 |= DBCR0_IDM | DBCR0_IC;
708 regs->msr |= MSR_DE; 709 regs->msr |= MSR_DE;
709#else 710#else
711 regs->msr &= ~MSR_BE;
710 regs->msr |= MSR_SE; 712 regs->msr |= MSR_SE;
711#endif 713#endif
712 } 714 }
713 set_tsk_thread_flag(task, TIF_SINGLESTEP); 715 set_tsk_thread_flag(task, TIF_SINGLESTEP);
714} 716}
715 717
718void user_enable_block_step(struct task_struct *task)
719{
720 struct pt_regs *regs = task->thread.regs;
721
722 if (regs != NULL) {
723#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
724 task->thread.dbcr0 &= ~DBCR0_IC;
725 task->thread.dbcr0 = DBCR0_IDM | DBCR0_BT;
726 regs->msr |= MSR_DE;
727#else
728 regs->msr &= ~MSR_SE;
729 regs->msr |= MSR_BE;
730#endif
731 }
732 set_tsk_thread_flag(task, TIF_SINGLESTEP);
733}
734
716void user_disable_single_step(struct task_struct *task) 735void user_disable_single_step(struct task_struct *task)
717{ 736{
718 struct pt_regs *regs = task->thread.regs; 737 struct pt_regs *regs = task->thread.regs;
@@ -726,10 +745,10 @@ void user_disable_single_step(struct task_struct *task)
726 745
727 if (regs != NULL) { 746 if (regs != NULL) {
728#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) 747#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
729 task->thread.dbcr0 &= ~(DBCR0_IC | DBCR0_IDM); 748 task->thread.dbcr0 &= ~(DBCR0_IC | DBCR0_BT | DBCR0_IDM);
730 regs->msr &= ~MSR_DE; 749 regs->msr &= ~MSR_DE;
731#else 750#else
732 regs->msr &= ~MSR_SE; 751 regs->msr &= ~(MSR_SE | MSR_BE);
733#endif 752#endif
734 } 753 }
735 clear_tsk_thread_flag(task, TIF_SINGLESTEP); 754 clear_tsk_thread_flag(task, TIF_SINGLESTEP);
diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
index 8869001ab5d7..54e66da8f743 100644
--- a/arch/powerpc/kernel/rtas_pci.c
+++ b/arch/powerpc/kernel/rtas_pci.c
@@ -93,10 +93,7 @@ static int rtas_pci_read_config(struct pci_bus *bus,
93{ 93{
94 struct device_node *busdn, *dn; 94 struct device_node *busdn, *dn;
95 95
96 if (bus->self) 96 busdn = pci_bus_to_OF_node(bus);
97 busdn = pci_device_to_OF_node(bus->self);
98 else
99 busdn = bus->sysdata; /* must be a phb */
100 97
101 /* Search only direct children of the bus */ 98 /* Search only direct children of the bus */
102 for (dn = busdn->child; dn; dn = dn->sibling) { 99 for (dn = busdn->child; dn; dn = dn->sibling) {
@@ -140,10 +137,7 @@ static int rtas_pci_write_config(struct pci_bus *bus,
140{ 137{
141 struct device_node *busdn, *dn; 138 struct device_node *busdn, *dn;
142 139
143 if (bus->self) 140 busdn = pci_bus_to_OF_node(bus);
144 busdn = pci_device_to_OF_node(bus->self);
145 else
146 busdn = bus->sysdata; /* must be a phb */
147 141
148 /* Search only direct children of the bus */ 142 /* Search only direct children of the bus */
149 for (dn = busdn->child; dn; dn = dn->sibling) { 143 for (dn = busdn->child; dn; dn = dn->sibling) {
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 9e1ca745d8f0..1d154248cf40 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -39,6 +39,7 @@
39#include <asm/serial.h> 39#include <asm/serial.h>
40#include <asm/udbg.h> 40#include <asm/udbg.h>
41#include <asm/mmu_context.h> 41#include <asm/mmu_context.h>
42#include <asm/swiotlb.h>
42 43
43#include "setup.h" 44#include "setup.h"
44 45
@@ -332,6 +333,11 @@ void __init setup_arch(char **cmdline_p)
332 ppc_md.setup_arch(); 333 ppc_md.setup_arch();
333 if ( ppc_md.progress ) ppc_md.progress("arch: exit", 0x3eab); 334 if ( ppc_md.progress ) ppc_md.progress("arch: exit", 0x3eab);
334 335
336#ifdef CONFIG_SWIOTLB
337 if (ppc_swiotlb_enable)
338 swiotlb_init();
339#endif
340
335 paging_init(); 341 paging_init();
336 342
337 /* Initialize the MMU context management stuff */ 343 /* Initialize the MMU context management stuff */
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index c410c606955d..1f6816003ebe 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -61,6 +61,7 @@
61#include <asm/xmon.h> 61#include <asm/xmon.h>
62#include <asm/udbg.h> 62#include <asm/udbg.h>
63#include <asm/kexec.h> 63#include <asm/kexec.h>
64#include <asm/swiotlb.h>
64 65
65#include "setup.h" 66#include "setup.h"
66 67
@@ -417,12 +418,14 @@ void __init setup_system(void)
417 if (ppc64_caches.iline_size != 0x80) 418 if (ppc64_caches.iline_size != 0x80)
418 printk("ppc64_caches.icache_line_size = 0x%x\n", 419 printk("ppc64_caches.icache_line_size = 0x%x\n",
419 ppc64_caches.iline_size); 420 ppc64_caches.iline_size);
421#ifdef CONFIG_PPC_STD_MMU_64
420 if (htab_address) 422 if (htab_address)
421 printk("htab_address = 0x%p\n", htab_address); 423 printk("htab_address = 0x%p\n", htab_address);
422 printk("htab_hash_mask = 0x%lx\n", htab_hash_mask); 424 printk("htab_hash_mask = 0x%lx\n", htab_hash_mask);
425#endif /* CONFIG_PPC_STD_MMU_64 */
423 if (PHYSICAL_START > 0) 426 if (PHYSICAL_START > 0)
424 printk("physical_start = 0x%lx\n", 427 printk("physical_start = 0x%llx\n",
425 PHYSICAL_START); 428 (unsigned long long)PHYSICAL_START);
426 printk("-----------------------------------------------------\n"); 429 printk("-----------------------------------------------------\n");
427 430
428 DBG(" <- setup_system()\n"); 431 DBG(" <- setup_system()\n");
@@ -511,8 +514,9 @@ void __init setup_arch(char **cmdline_p)
511 irqstack_early_init(); 514 irqstack_early_init();
512 emergency_stack_init(); 515 emergency_stack_init();
513 516
517#ifdef CONFIG_PPC_STD_MMU_64
514 stabs_alloc(); 518 stabs_alloc();
515 519#endif
516 /* set up the bootmem stuff with available memory */ 520 /* set up the bootmem stuff with available memory */
517 do_init_bootmem(); 521 do_init_bootmem();
518 sparse_init(); 522 sparse_init();
@@ -524,6 +528,11 @@ void __init setup_arch(char **cmdline_p)
524 if (ppc_md.setup_arch) 528 if (ppc_md.setup_arch)
525 ppc_md.setup_arch(); 529 ppc_md.setup_arch();
526 530
531#ifdef CONFIG_SWIOTLB
532 if (ppc_swiotlb_enable)
533 swiotlb_init();
534#endif
535
527 paging_init(); 536 paging_init();
528 ppc64_boot_msg(0x15, "Setup Done"); 537 ppc64_boot_msg(0x15, "Setup Done");
529} 538}
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 48571ac56fb7..eae4511ceeac 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -52,6 +52,8 @@
52#include <linux/jiffies.h> 52#include <linux/jiffies.h>
53#include <linux/posix-timers.h> 53#include <linux/posix-timers.h>
54#include <linux/irq.h> 54#include <linux/irq.h>
55#include <linux/delay.h>
56#include <linux/perf_counter.h>
55 57
56#include <asm/io.h> 58#include <asm/io.h>
57#include <asm/processor.h> 59#include <asm/processor.h>
@@ -109,7 +111,7 @@ static void decrementer_set_mode(enum clock_event_mode mode,
109static struct clock_event_device decrementer_clockevent = { 111static struct clock_event_device decrementer_clockevent = {
110 .name = "decrementer", 112 .name = "decrementer",
111 .rating = 200, 113 .rating = 200,
112 .shift = 16, 114 .shift = 0, /* To be filled in */
113 .mult = 0, /* To be filled in */ 115 .mult = 0, /* To be filled in */
114 .irq = 0, 116 .irq = 0,
115 .set_next_event = decrementer_set_next_event, 117 .set_next_event = decrementer_set_next_event,
@@ -524,6 +526,26 @@ void __init iSeries_time_init_early(void)
524} 526}
525#endif /* CONFIG_PPC_ISERIES */ 527#endif /* CONFIG_PPC_ISERIES */
526 528
529#if defined(CONFIG_PERF_COUNTERS) && defined(CONFIG_PPC32)
530DEFINE_PER_CPU(u8, perf_counter_pending);
531
532void set_perf_counter_pending(void)
533{
534 get_cpu_var(perf_counter_pending) = 1;
535 set_dec(1);
536 put_cpu_var(perf_counter_pending);
537}
538
539#define test_perf_counter_pending() __get_cpu_var(perf_counter_pending)
540#define clear_perf_counter_pending() __get_cpu_var(perf_counter_pending) = 0
541
542#else /* CONFIG_PERF_COUNTERS && CONFIG_PPC32 */
543
544#define test_perf_counter_pending() 0
545#define clear_perf_counter_pending()
546
547#endif /* CONFIG_PERF_COUNTERS && CONFIG_PPC32 */
548
527/* 549/*
528 * For iSeries shared processors, we have to let the hypervisor 550 * For iSeries shared processors, we have to let the hypervisor
529 * set the hardware decrementer. We set a virtual decrementer 551 * set the hardware decrementer. We set a virtual decrementer
@@ -550,6 +572,10 @@ void timer_interrupt(struct pt_regs * regs)
550 set_dec(DECREMENTER_MAX); 572 set_dec(DECREMENTER_MAX);
551 573
552#ifdef CONFIG_PPC32 574#ifdef CONFIG_PPC32
575 if (test_perf_counter_pending()) {
576 clear_perf_counter_pending();
577 perf_counter_do_pending();
578 }
553 if (atomic_read(&ppc_n_lost_interrupts) != 0) 579 if (atomic_read(&ppc_n_lost_interrupts) != 0)
554 do_IRQ(regs); 580 do_IRQ(regs);
555#endif 581#endif
@@ -843,6 +869,22 @@ static void decrementer_set_mode(enum clock_event_mode mode,
843 decrementer_set_next_event(DECREMENTER_MAX, dev); 869 decrementer_set_next_event(DECREMENTER_MAX, dev);
844} 870}
845 871
872static void __init setup_clockevent_multiplier(unsigned long hz)
873{
874 u64 mult, shift = 32;
875
876 while (1) {
877 mult = div_sc(hz, NSEC_PER_SEC, shift);
878 if (mult && (mult >> 32UL) == 0UL)
879 break;
880
881 shift--;
882 }
883
884 decrementer_clockevent.shift = shift;
885 decrementer_clockevent.mult = mult;
886}
887
846static void register_decrementer_clockevent(int cpu) 888static void register_decrementer_clockevent(int cpu)
847{ 889{
848 struct clock_event_device *dec = &per_cpu(decrementers, cpu).event; 890 struct clock_event_device *dec = &per_cpu(decrementers, cpu).event;
@@ -860,8 +902,7 @@ static void __init init_decrementer_clockevent(void)
860{ 902{
861 int cpu = smp_processor_id(); 903 int cpu = smp_processor_id();
862 904
863 decrementer_clockevent.mult = div_sc(ppc_tb_freq, NSEC_PER_SEC, 905 setup_clockevent_multiplier(ppc_tb_freq);
864 decrementer_clockevent.shift);
865 decrementer_clockevent.max_delta_ns = 906 decrementer_clockevent.max_delta_ns =
866 clockevent_delta2ns(DECREMENTER_MAX, &decrementer_clockevent); 907 clockevent_delta2ns(DECREMENTER_MAX, &decrementer_clockevent);
867 decrementer_clockevent.min_delta_ns = 908 decrementer_clockevent.min_delta_ns =
@@ -1128,6 +1169,15 @@ void div128_by_32(u64 dividend_high, u64 dividend_low,
1128 1169
1129} 1170}
1130 1171
1172/* We don't need to calibrate delay, we use the CPU timebase for that */
1173void calibrate_delay(void)
1174{
1175 /* Some generic code (such as spinlock debug) use loops_per_jiffy
1176 * as the number of __delay(1) in a jiffy, so make it so
1177 */
1178 loops_per_jiffy = tb_ticks_per_jiffy;
1179}
1180
1131static int __init rtc_init(void) 1181static int __init rtc_init(void)
1132{ 1182{
1133 struct platform_device *pdev; 1183 struct platform_device *pdev;
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 678fbff0d206..6f0ae1a9bfae 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -33,7 +33,9 @@
33#include <linux/backlight.h> 33#include <linux/backlight.h>
34#include <linux/bug.h> 34#include <linux/bug.h>
35#include <linux/kdebug.h> 35#include <linux/kdebug.h>
36#include <linux/debugfs.h>
36 37
38#include <asm/emulated_ops.h>
37#include <asm/pgtable.h> 39#include <asm/pgtable.h>
38#include <asm/uaccess.h> 40#include <asm/uaccess.h>
39#include <asm/system.h> 41#include <asm/system.h>
@@ -757,36 +759,44 @@ static int emulate_instruction(struct pt_regs *regs)
757 759
758 /* Emulate the mfspr rD, PVR. */ 760 /* Emulate the mfspr rD, PVR. */
759 if ((instword & PPC_INST_MFSPR_PVR_MASK) == PPC_INST_MFSPR_PVR) { 761 if ((instword & PPC_INST_MFSPR_PVR_MASK) == PPC_INST_MFSPR_PVR) {
762 PPC_WARN_EMULATED(mfpvr);
760 rd = (instword >> 21) & 0x1f; 763 rd = (instword >> 21) & 0x1f;
761 regs->gpr[rd] = mfspr(SPRN_PVR); 764 regs->gpr[rd] = mfspr(SPRN_PVR);
762 return 0; 765 return 0;
763 } 766 }
764 767
765 /* Emulating the dcba insn is just a no-op. */ 768 /* Emulating the dcba insn is just a no-op. */
766 if ((instword & PPC_INST_DCBA_MASK) == PPC_INST_DCBA) 769 if ((instword & PPC_INST_DCBA_MASK) == PPC_INST_DCBA) {
770 PPC_WARN_EMULATED(dcba);
767 return 0; 771 return 0;
772 }
768 773
769 /* Emulate the mcrxr insn. */ 774 /* Emulate the mcrxr insn. */
770 if ((instword & PPC_INST_MCRXR_MASK) == PPC_INST_MCRXR) { 775 if ((instword & PPC_INST_MCRXR_MASK) == PPC_INST_MCRXR) {
771 int shift = (instword >> 21) & 0x1c; 776 int shift = (instword >> 21) & 0x1c;
772 unsigned long msk = 0xf0000000UL >> shift; 777 unsigned long msk = 0xf0000000UL >> shift;
773 778
779 PPC_WARN_EMULATED(mcrxr);
774 regs->ccr = (regs->ccr & ~msk) | ((regs->xer >> shift) & msk); 780 regs->ccr = (regs->ccr & ~msk) | ((regs->xer >> shift) & msk);
775 regs->xer &= ~0xf0000000UL; 781 regs->xer &= ~0xf0000000UL;
776 return 0; 782 return 0;
777 } 783 }
778 784
779 /* Emulate load/store string insn. */ 785 /* Emulate load/store string insn. */
780 if ((instword & PPC_INST_STRING_GEN_MASK) == PPC_INST_STRING) 786 if ((instword & PPC_INST_STRING_GEN_MASK) == PPC_INST_STRING) {
787 PPC_WARN_EMULATED(string);
781 return emulate_string_inst(regs, instword); 788 return emulate_string_inst(regs, instword);
789 }
782 790
783 /* Emulate the popcntb (Population Count Bytes) instruction. */ 791 /* Emulate the popcntb (Population Count Bytes) instruction. */
784 if ((instword & PPC_INST_POPCNTB_MASK) == PPC_INST_POPCNTB) { 792 if ((instword & PPC_INST_POPCNTB_MASK) == PPC_INST_POPCNTB) {
793 PPC_WARN_EMULATED(popcntb);
785 return emulate_popcntb_inst(regs, instword); 794 return emulate_popcntb_inst(regs, instword);
786 } 795 }
787 796
788 /* Emulate isel (Integer Select) instruction */ 797 /* Emulate isel (Integer Select) instruction */
789 if ((instword & PPC_INST_ISEL_MASK) == PPC_INST_ISEL) { 798 if ((instword & PPC_INST_ISEL_MASK) == PPC_INST_ISEL) {
799 PPC_WARN_EMULATED(isel);
790 return emulate_isel(regs, instword); 800 return emulate_isel(regs, instword);
791 } 801 }
792 802
@@ -984,6 +994,8 @@ void SoftwareEmulation(struct pt_regs *regs)
984 994
985#ifdef CONFIG_MATH_EMULATION 995#ifdef CONFIG_MATH_EMULATION
986 errcode = do_mathemu(regs); 996 errcode = do_mathemu(regs);
997 if (errcode >= 0)
998 PPC_WARN_EMULATED(math);
987 999
988 switch (errcode) { 1000 switch (errcode) {
989 case 0: 1001 case 0:
@@ -1005,6 +1017,9 @@ void SoftwareEmulation(struct pt_regs *regs)
1005 1017
1006#elif defined(CONFIG_8XX_MINIMAL_FPEMU) 1018#elif defined(CONFIG_8XX_MINIMAL_FPEMU)
1007 errcode = Soft_emulate_8xx(regs); 1019 errcode = Soft_emulate_8xx(regs);
1020 if (errcode >= 0)
1021 PPC_WARN_EMULATED(8xx);
1022
1008 switch (errcode) { 1023 switch (errcode) {
1009 case 0: 1024 case 0:
1010 emulate_single_step(regs); 1025 emulate_single_step(regs);
@@ -1026,7 +1041,34 @@ void SoftwareEmulation(struct pt_regs *regs)
1026 1041
1027void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status) 1042void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status)
1028{ 1043{
1029 if (debug_status & DBSR_IC) { /* instruction completion */ 1044 /* Hack alert: On BookE, Branch Taken stops on the branch itself, while
1045 * on server, it stops on the target of the branch. In order to simulate
1046 * the server behaviour, we thus restart right away with a single step
1047 * instead of stopping here when hitting a BT
1048 */
1049 if (debug_status & DBSR_BT) {
1050 regs->msr &= ~MSR_DE;
1051
1052 /* Disable BT */
1053 mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_BT);
1054 /* Clear the BT event */
1055 mtspr(SPRN_DBSR, DBSR_BT);
1056
1057 /* Do the single step trick only when coming from userspace */
1058 if (user_mode(regs)) {
1059 current->thread.dbcr0 &= ~DBCR0_BT;
1060 current->thread.dbcr0 |= DBCR0_IDM | DBCR0_IC;
1061 regs->msr |= MSR_DE;
1062 return;
1063 }
1064
1065 if (notify_die(DIE_SSTEP, "block_step", regs, 5,
1066 5, SIGTRAP) == NOTIFY_STOP) {
1067 return;
1068 }
1069 if (debugger_sstep(regs))
1070 return;
1071 } else if (debug_status & DBSR_IC) { /* Instruction complete */
1030 regs->msr &= ~MSR_DE; 1072 regs->msr &= ~MSR_DE;
1031 1073
1032 /* Disable instruction completion */ 1074 /* Disable instruction completion */
@@ -1042,9 +1084,8 @@ void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status)
1042 if (debugger_sstep(regs)) 1084 if (debugger_sstep(regs))
1043 return; 1085 return;
1044 1086
1045 if (user_mode(regs)) { 1087 if (user_mode(regs))
1046 current->thread.dbcr0 &= ~DBCR0_IC; 1088 current->thread.dbcr0 &= ~(DBCR0_IC);
1047 }
1048 1089
1049 _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); 1090 _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);
1050 } else if (debug_status & (DBSR_DAC1R | DBSR_DAC1W)) { 1091 } else if (debug_status & (DBSR_DAC1R | DBSR_DAC1W)) {
@@ -1088,6 +1129,7 @@ void altivec_assist_exception(struct pt_regs *regs)
1088 1129
1089 flush_altivec_to_thread(current); 1130 flush_altivec_to_thread(current);
1090 1131
1132 PPC_WARN_EMULATED(altivec);
1091 err = emulate_altivec(regs); 1133 err = emulate_altivec(regs);
1092 if (err == 0) { 1134 if (err == 0) {
1093 regs->nip += 4; /* skip emulated instruction */ 1135 regs->nip += 4; /* skip emulated instruction */
@@ -1286,3 +1328,79 @@ void kernel_bad_stack(struct pt_regs *regs)
1286void __init trap_init(void) 1328void __init trap_init(void)
1287{ 1329{
1288} 1330}
1331
1332
1333#ifdef CONFIG_PPC_EMULATED_STATS
1334
1335#define WARN_EMULATED_SETUP(type) .type = { .name = #type }
1336
1337struct ppc_emulated ppc_emulated = {
1338#ifdef CONFIG_ALTIVEC
1339 WARN_EMULATED_SETUP(altivec),
1340#endif
1341 WARN_EMULATED_SETUP(dcba),
1342 WARN_EMULATED_SETUP(dcbz),
1343 WARN_EMULATED_SETUP(fp_pair),
1344 WARN_EMULATED_SETUP(isel),
1345 WARN_EMULATED_SETUP(mcrxr),
1346 WARN_EMULATED_SETUP(mfpvr),
1347 WARN_EMULATED_SETUP(multiple),
1348 WARN_EMULATED_SETUP(popcntb),
1349 WARN_EMULATED_SETUP(spe),
1350 WARN_EMULATED_SETUP(string),
1351 WARN_EMULATED_SETUP(unaligned),
1352#ifdef CONFIG_MATH_EMULATION
1353 WARN_EMULATED_SETUP(math),
1354#elif defined(CONFIG_8XX_MINIMAL_FPEMU)
1355 WARN_EMULATED_SETUP(8xx),
1356#endif
1357#ifdef CONFIG_VSX
1358 WARN_EMULATED_SETUP(vsx),
1359#endif
1360};
1361
1362u32 ppc_warn_emulated;
1363
1364void ppc_warn_emulated_print(const char *type)
1365{
1366 if (printk_ratelimit())
1367 pr_warning("%s used emulated %s instruction\n", current->comm,
1368 type);
1369}
1370
1371static int __init ppc_warn_emulated_init(void)
1372{
1373 struct dentry *dir, *d;
1374 unsigned int i;
1375 struct ppc_emulated_entry *entries = (void *)&ppc_emulated;
1376
1377 if (!powerpc_debugfs_root)
1378 return -ENODEV;
1379
1380 dir = debugfs_create_dir("emulated_instructions",
1381 powerpc_debugfs_root);
1382 if (!dir)
1383 return -ENOMEM;
1384
1385 d = debugfs_create_u32("do_warn", S_IRUGO | S_IWUSR, dir,
1386 &ppc_warn_emulated);
1387 if (!d)
1388 goto fail;
1389
1390 for (i = 0; i < sizeof(ppc_emulated)/sizeof(*entries); i++) {
1391 d = debugfs_create_u32(entries[i].name, S_IRUGO | S_IWUSR, dir,
1392 (u32 *)&entries[i].val.counter);
1393 if (!d)
1394 goto fail;
1395 }
1396
1397 return 0;
1398
1399fail:
1400 debugfs_remove_recursive(dir);
1401 return -ENOMEM;
1402}
1403
1404device_initcall(ppc_warn_emulated_init);
1405
1406#endif /* CONFIG_PPC_EMULATED_STATS */
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 49ac3d6e1399..ef36cbbc5882 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -1,5 +1,215 @@
1#include <asm/processor.h>
1#include <asm/ppc_asm.h> 2#include <asm/ppc_asm.h>
2#include <asm/reg.h> 3#include <asm/reg.h>
4#include <asm/asm-offsets.h>
5#include <asm/cputable.h>
6#include <asm/thread_info.h>
7#include <asm/page.h>
8
9/*
10 * load_up_altivec(unused, unused, tsk)
11 * Disable VMX for the task which had it previously,
12 * and save its vector registers in its thread_struct.
13 * Enables the VMX for use in the kernel on return.
14 * On SMP we know the VMX is free, since we give it up every
15 * switch (ie, no lazy save of the vector registers).
16 */
17_GLOBAL(load_up_altivec)
18 mfmsr r5 /* grab the current MSR */
19 oris r5,r5,MSR_VEC@h
20 MTMSRD(r5) /* enable use of AltiVec now */
21 isync
22
23/*
24 * For SMP, we don't do lazy VMX switching because it just gets too
25 * horrendously complex, especially when a task switches from one CPU
26 * to another. Instead we call giveup_altvec in switch_to.
27 * VRSAVE isn't dealt with here, that is done in the normal context
28 * switch code. Note that we could rely on vrsave value to eventually
29 * avoid saving all of the VREGs here...
30 */
31#ifndef CONFIG_SMP
32 LOAD_REG_ADDRBASE(r3, last_task_used_altivec)
33 toreal(r3)
34 PPC_LL r4,ADDROFF(last_task_used_altivec)(r3)
35 PPC_LCMPI 0,r4,0
36 beq 1f
37
38 /* Save VMX state to last_task_used_altivec's THREAD struct */
39 toreal(r4)
40 addi r4,r4,THREAD
41 SAVE_32VRS(0,r5,r4)
42 mfvscr vr0
43 li r10,THREAD_VSCR
44 stvx vr0,r10,r4
45 /* Disable VMX for last_task_used_altivec */
46 PPC_LL r5,PT_REGS(r4)
47 toreal(r5)
48 PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
49 lis r10,MSR_VEC@h
50 andc r4,r4,r10
51 PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
521:
53#endif /* CONFIG_SMP */
54
55 /* Hack: if we get an altivec unavailable trap with VRSAVE
56 * set to all zeros, we assume this is a broken application
57 * that fails to set it properly, and thus we switch it to
58 * all 1's
59 */
60 mfspr r4,SPRN_VRSAVE
61 cmpdi 0,r4,0
62 bne+ 1f
63 li r4,-1
64 mtspr SPRN_VRSAVE,r4
651:
66 /* enable use of VMX after return */
67#ifdef CONFIG_PPC32
68 mfspr r5,SPRN_SPRG3 /* current task's THREAD (phys) */
69 oris r9,r9,MSR_VEC@h
70#else
71 ld r4,PACACURRENT(r13)
72 addi r5,r4,THREAD /* Get THREAD */
73 oris r12,r12,MSR_VEC@h
74 std r12,_MSR(r1)
75#endif
76 li r4,1
77 li r10,THREAD_VSCR
78 stw r4,THREAD_USED_VR(r5)
79 lvx vr0,r10,r5
80 mtvscr vr0
81 REST_32VRS(0,r4,r5)
82#ifndef CONFIG_SMP
83 /* Update last_task_used_math to 'current' */
84 subi r4,r5,THREAD /* Back to 'current' */
85 fromreal(r4)
86 PPC_STL r4,ADDROFF(last_task_used_math)(r3)
87#endif /* CONFIG_SMP */
88 /* restore registers and return */
89 blr
90
91/*
92 * giveup_altivec(tsk)
93 * Disable VMX for the task given as the argument,
94 * and save the vector registers in its thread_struct.
95 * Enables the VMX for use in the kernel on return.
96 */
97_GLOBAL(giveup_altivec)
98 mfmsr r5
99 oris r5,r5,MSR_VEC@h
100 SYNC
101 MTMSRD(r5) /* enable use of VMX now */
102 isync
103 PPC_LCMPI 0,r3,0
104 beqlr- /* if no previous owner, done */
105 addi r3,r3,THREAD /* want THREAD of task */
106 PPC_LL r5,PT_REGS(r3)
107 PPC_LCMPI 0,r5,0
108 SAVE_32VRS(0,r4,r3)
109 mfvscr vr0
110 li r4,THREAD_VSCR
111 stvx vr0,r4,r3
112 beq 1f
113 PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
114#ifdef CONFIG_VSX
115BEGIN_FTR_SECTION
116 lis r3,(MSR_VEC|MSR_VSX)@h
117FTR_SECTION_ELSE
118 lis r3,MSR_VEC@h
119ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX)
120#else
121 lis r3,MSR_VEC@h
122#endif
123 andc r4,r4,r3 /* disable FP for previous task */
124 PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
1251:
126#ifndef CONFIG_SMP
127 li r5,0
128 LOAD_REG_ADDRBASE(r4,last_task_used_altivec)
129 PPC_STL r5,ADDROFF(last_task_used_altivec)(r4)
130#endif /* CONFIG_SMP */
131 blr
132
133#ifdef CONFIG_VSX
134
135#ifdef CONFIG_PPC32
136#error This asm code isn't ready for 32-bit kernels
137#endif
138
139/*
140 * load_up_vsx(unused, unused, tsk)
141 * Disable VSX for the task which had it previously,
142 * and save its vector registers in its thread_struct.
143 * Reuse the fp and vsx saves, but first check to see if they have
144 * been saved already.
145 */
146_GLOBAL(load_up_vsx)
147/* Load FP and VSX registers if they haven't been done yet */
148 andi. r5,r12,MSR_FP
149 beql+ load_up_fpu /* skip if already loaded */
150 andis. r5,r12,MSR_VEC@h
151 beql+ load_up_altivec /* skip if already loaded */
152
153#ifndef CONFIG_SMP
154 ld r3,last_task_used_vsx@got(r2)
155 ld r4,0(r3)
156 cmpdi 0,r4,0
157 beq 1f
158 /* Disable VSX for last_task_used_vsx */
159 addi r4,r4,THREAD
160 ld r5,PT_REGS(r4)
161 ld r4,_MSR-STACK_FRAME_OVERHEAD(r5)
162 lis r6,MSR_VSX@h
163 andc r6,r4,r6
164 std r6,_MSR-STACK_FRAME_OVERHEAD(r5)
1651:
166#endif /* CONFIG_SMP */
167 ld r4,PACACURRENT(r13)
168 addi r4,r4,THREAD /* Get THREAD */
169 li r6,1
170 stw r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */
171 /* enable use of VSX after return */
172 oris r12,r12,MSR_VSX@h
173 std r12,_MSR(r1)
174#ifndef CONFIG_SMP
175 /* Update last_task_used_math to 'current' */
176 ld r4,PACACURRENT(r13)
177 std r4,0(r3)
178#endif /* CONFIG_SMP */
179 b fast_exception_return
180
181/*
182 * __giveup_vsx(tsk)
183 * Disable VSX for the task given as the argument.
184 * Does NOT save vsx registers.
185 * Enables the VSX for use in the kernel on return.
186 */
187_GLOBAL(__giveup_vsx)
188 mfmsr r5
189 oris r5,r5,MSR_VSX@h
190 mtmsrd r5 /* enable use of VSX now */
191 isync
192
193 cmpdi 0,r3,0
194 beqlr- /* if no previous owner, done */
195 addi r3,r3,THREAD /* want THREAD of task */
196 ld r5,PT_REGS(r3)
197 cmpdi 0,r5,0
198 beq 1f
199 ld r4,_MSR-STACK_FRAME_OVERHEAD(r5)
200 lis r3,MSR_VSX@h
201 andc r4,r4,r3 /* disable VSX for previous task */
202 std r4,_MSR-STACK_FRAME_OVERHEAD(r5)
2031:
204#ifndef CONFIG_SMP
205 li r5,0
206 ld r4,last_task_used_vsx@got(r2)
207 std r5,0(r4)
208#endif /* CONFIG_SMP */
209 blr
210
211#endif /* CONFIG_VSX */
212
3 213
4/* 214/*
5 * The routines below are in assembler so we can closely control the 215 * The routines below are in assembler so we can closely control the