aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm/kernel')
-rw-r--r--arch/arm/kernel/Makefile9
-rw-r--r--arch/arm/kernel/entry-armv.S56
-rw-r--r--arch/arm/kernel/entry-common.S202
-rw-r--r--arch/arm/kernel/entry-header.S19
-rw-r--r--arch/arm/kernel/fiq.c10
-rw-r--r--arch/arm/kernel/ftrace.c103
-rw-r--r--arch/arm/kernel/head.S50
-rw-r--r--arch/arm/kernel/hw_breakpoint.c543
-rw-r--r--arch/arm/kernel/irq.c34
-rw-r--r--arch/arm/kernel/iwmmxt.S55
-rw-r--r--arch/arm/kernel/machine_kexec.c30
-rw-r--r--arch/arm/kernel/module.c109
-rw-r--r--arch/arm/kernel/perf_event.c2466
-rw-r--r--arch/arm/kernel/perf_event_v6.c672
-rw-r--r--arch/arm/kernel/perf_event_v7.c906
-rw-r--r--arch/arm/kernel/perf_event_xscale.c807
-rw-r--r--arch/arm/kernel/pj4-cp0.c94
-rw-r--r--arch/arm/kernel/ptrace.c4
-rw-r--r--arch/arm/kernel/sched_clock.c69
-rw-r--r--arch/arm/kernel/setup.c37
-rw-r--r--arch/arm/kernel/smp.c448
-rw-r--r--arch/arm/kernel/smp_tlb.c139
-rw-r--r--arch/arm/kernel/smp_twd.c17
-rw-r--r--arch/arm/kernel/swp_emulate.c267
-rw-r--r--arch/arm/kernel/time.c4
-rw-r--r--arch/arm/kernel/traps.c26
-rw-r--r--arch/arm/kernel/vmlinux.lds.S2
27 files changed, 3989 insertions, 3189 deletions
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 5b9b268f4fbb..185ee822c935 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -5,7 +5,7 @@
5CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET) 5CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET)
6AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) 6AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
7 7
8ifdef CONFIG_DYNAMIC_FTRACE 8ifdef CONFIG_FUNCTION_TRACER
9CFLAGS_REMOVE_ftrace.o = -pg 9CFLAGS_REMOVE_ftrace.o = -pg
10endif 10endif
11 11
@@ -29,10 +29,12 @@ obj-$(CONFIG_MODULES) += armksyms.o module.o
29obj-$(CONFIG_ARTHUR) += arthur.o 29obj-$(CONFIG_ARTHUR) += arthur.o
30obj-$(CONFIG_ISA_DMA) += dma-isa.o 30obj-$(CONFIG_ISA_DMA) += dma-isa.o
31obj-$(CONFIG_PCI) += bios32.o isa.o 31obj-$(CONFIG_PCI) += bios32.o isa.o
32obj-$(CONFIG_SMP) += smp.o 32obj-$(CONFIG_HAVE_SCHED_CLOCK) += sched_clock.o
33obj-$(CONFIG_SMP) += smp.o smp_tlb.o
33obj-$(CONFIG_HAVE_ARM_SCU) += smp_scu.o 34obj-$(CONFIG_HAVE_ARM_SCU) += smp_scu.o
34obj-$(CONFIG_HAVE_ARM_TWD) += smp_twd.o 35obj-$(CONFIG_HAVE_ARM_TWD) += smp_twd.o
35obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o 36obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
37obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
36obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o 38obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
37obj-$(CONFIG_KPROBES) += kprobes.o kprobes-decode.o 39obj-$(CONFIG_KPROBES) += kprobes.o kprobes-decode.o
38obj-$(CONFIG_ATAGS_PROC) += atags.o 40obj-$(CONFIG_ATAGS_PROC) += atags.o
@@ -42,6 +44,8 @@ obj-$(CONFIG_KGDB) += kgdb.o
42obj-$(CONFIG_ARM_UNWIND) += unwind.o 44obj-$(CONFIG_ARM_UNWIND) += unwind.o
43obj-$(CONFIG_HAVE_TCM) += tcm.o 45obj-$(CONFIG_HAVE_TCM) += tcm.o
44obj-$(CONFIG_CRASH_DUMP) += crash_dump.o 46obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
47obj-$(CONFIG_SWP_EMULATE) += swp_emulate.o
48CFLAGS_swp_emulate.o := -Wa,-march=armv7-a
45obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o 49obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
46 50
47obj-$(CONFIG_CRUNCH) += crunch.o crunch-bits.o 51obj-$(CONFIG_CRUNCH) += crunch.o crunch-bits.o
@@ -50,6 +54,7 @@ AFLAGS_crunch-bits.o := -Wa,-mcpu=ep9312
50obj-$(CONFIG_CPU_XSCALE) += xscale-cp0.o 54obj-$(CONFIG_CPU_XSCALE) += xscale-cp0.o
51obj-$(CONFIG_CPU_XSC3) += xscale-cp0.o 55obj-$(CONFIG_CPU_XSC3) += xscale-cp0.o
52obj-$(CONFIG_CPU_MOHAWK) += xscale-cp0.o 56obj-$(CONFIG_CPU_MOHAWK) += xscale-cp0.o
57obj-$(CONFIG_CPU_PJ4) += pj4-cp0.o
53obj-$(CONFIG_IWMMXT) += iwmmxt.o 58obj-$(CONFIG_IWMMXT) += iwmmxt.o
54obj-$(CONFIG_CPU_HAS_PMU) += pmu.o 59obj-$(CONFIG_CPU_HAS_PMU) += pmu.o
55obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o 60obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index bb96a7d4bbf5..2b46fea36c9f 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -25,42 +25,22 @@
25#include <asm/tls.h> 25#include <asm/tls.h>
26 26
27#include "entry-header.S" 27#include "entry-header.S"
28#include <asm/entry-macro-multi.S>
28 29
29/* 30/*
30 * Interrupt handling. Preserves r7, r8, r9 31 * Interrupt handling. Preserves r7, r8, r9
31 */ 32 */
32 .macro irq_handler 33 .macro irq_handler
33 get_irqnr_preamble r5, lr 34#ifdef CONFIG_MULTI_IRQ_HANDLER
341: get_irqnr_and_base r0, r6, r5, lr 35 ldr r5, =handle_arch_irq
35 movne r1, sp 36 mov r0, sp
36 @ 37 ldr r5, [r5]
37 @ routine called with r0 = irq number, r1 = struct pt_regs * 38 adr lr, BSYM(9997f)
38 @ 39 teq r5, #0
39 adrne lr, BSYM(1b) 40 movne pc, r5
40 bne asm_do_IRQ
41
42#ifdef CONFIG_SMP
43 /*
44 * XXX
45 *
46 * this macro assumes that irqstat (r6) and base (r5) are
47 * preserved from get_irqnr_and_base above
48 */
49 ALT_SMP(test_for_ipi r0, r6, r5, lr)
50 ALT_UP_B(9997f)
51 movne r0, sp
52 adrne lr, BSYM(1b)
53 bne do_IPI
54
55#ifdef CONFIG_LOCAL_TIMERS
56 test_for_ltirq r0, r6, r5, lr
57 movne r0, sp
58 adrne lr, BSYM(1b)
59 bne do_local_timer
60#endif 41#endif
42 arch_irq_handler_default
619997: 439997:
62#endif
63
64 .endm 44 .endm
65 45
66#ifdef CONFIG_KPROBES 46#ifdef CONFIG_KPROBES
@@ -198,6 +178,7 @@ __dabt_svc:
198 @ 178 @
199 @ set desired IRQ state, then call main handler 179 @ set desired IRQ state, then call main handler
200 @ 180 @
181 debug_entry r1
201 msr cpsr_c, r9 182 msr cpsr_c, r9
202 mov r2, sp 183 mov r2, sp
203 bl do_DataAbort 184 bl do_DataAbort
@@ -324,6 +305,7 @@ __pabt_svc:
324#else 305#else
325 bl CPU_PABORT_HANDLER 306 bl CPU_PABORT_HANDLER
326#endif 307#endif
308 debug_entry r1
327 msr cpsr_c, r9 @ Maybe enable interrupts 309 msr cpsr_c, r9 @ Maybe enable interrupts
328 mov r2, sp @ regs 310 mov r2, sp @ regs
329 bl do_PrefetchAbort @ call abort handler 311 bl do_PrefetchAbort @ call abort handler
@@ -439,6 +421,7 @@ __dabt_usr:
439 @ 421 @
440 @ IRQs on, then call the main handler 422 @ IRQs on, then call the main handler
441 @ 423 @
424 debug_entry r1
442 enable_irq 425 enable_irq
443 mov r2, sp 426 mov r2, sp
444 adr lr, BSYM(ret_from_exception) 427 adr lr, BSYM(ret_from_exception)
@@ -703,6 +686,7 @@ __pabt_usr:
703#else 686#else
704 bl CPU_PABORT_HANDLER 687 bl CPU_PABORT_HANDLER
705#endif 688#endif
689 debug_entry r1
706 enable_irq @ Enable interrupts 690 enable_irq @ Enable interrupts
707 mov r2, sp @ regs 691 mov r2, sp @ regs
708 bl do_PrefetchAbort @ call abort handler 692 bl do_PrefetchAbort @ call abort handler
@@ -735,7 +719,7 @@ ENTRY(__switch_to)
735 THUMB( stmia ip!, {r4 - sl, fp} ) @ Store most regs on stack 719 THUMB( stmia ip!, {r4 - sl, fp} ) @ Store most regs on stack
736 THUMB( str sp, [ip], #4 ) 720 THUMB( str sp, [ip], #4 )
737 THUMB( str lr, [ip], #4 ) 721 THUMB( str lr, [ip], #4 )
738#ifdef CONFIG_MMU 722#ifdef CONFIG_CPU_USE_DOMAINS
739 ldr r6, [r2, #TI_CPU_DOMAIN] 723 ldr r6, [r2, #TI_CPU_DOMAIN]
740#endif 724#endif
741 set_tls r3, r4, r5 725 set_tls r3, r4, r5
@@ -744,7 +728,7 @@ ENTRY(__switch_to)
744 ldr r8, =__stack_chk_guard 728 ldr r8, =__stack_chk_guard
745 ldr r7, [r7, #TSK_STACK_CANARY] 729 ldr r7, [r7, #TSK_STACK_CANARY]
746#endif 730#endif
747#ifdef CONFIG_MMU 731#ifdef CONFIG_CPU_USE_DOMAINS
748 mcr p15, 0, r6, c3, c0, 0 @ Set domain register 732 mcr p15, 0, r6, c3, c0, 0 @ Set domain register
749#endif 733#endif
750 mov r5, r0 734 mov r5, r0
@@ -842,7 +826,7 @@ __kuser_helper_start:
842 */ 826 */
843 827
844__kuser_memory_barrier: @ 0xffff0fa0 828__kuser_memory_barrier: @ 0xffff0fa0
845 smp_dmb 829 smp_dmb arm
846 usr_ret lr 830 usr_ret lr
847 831
848 .align 5 832 .align 5
@@ -959,7 +943,7 @@ kuser_cmpxchg_fixup:
959 943
960#else 944#else
961 945
962 smp_dmb 946 smp_dmb arm
9631: ldrex r3, [r2] 9471: ldrex r3, [r2]
964 subs r3, r3, r0 948 subs r3, r3, r0
965 strexeq r3, r1, [r2] 949 strexeq r3, r1, [r2]
@@ -1245,3 +1229,9 @@ cr_alignment:
1245 .space 4 1229 .space 4
1246cr_no_alignment: 1230cr_no_alignment:
1247 .space 4 1231 .space 4
1232
1233#ifdef CONFIG_MULTI_IRQ_HANDLER
1234 .globl handle_arch_irq
1235handle_arch_irq:
1236 .space 4
1237#endif
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 80bf8cd88d7c..1e7b04a40a31 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -147,98 +147,170 @@ ENDPROC(ret_from_fork)
147#endif 147#endif
148#endif 148#endif
149 149
150#ifdef CONFIG_DYNAMIC_FTRACE 150.macro __mcount suffix
151ENTRY(__gnu_mcount_nc) 151 mcount_enter
152 mov ip, lr 152 ldr r0, =ftrace_trace_function
153 ldmia sp!, {lr} 153 ldr r2, [r0]
154 mov pc, ip 154 adr r0, .Lftrace_stub
155ENDPROC(__gnu_mcount_nc) 155 cmp r0, r2
156 bne 1f
157
158#ifdef CONFIG_FUNCTION_GRAPH_TRACER
159 ldr r1, =ftrace_graph_return
160 ldr r2, [r1]
161 cmp r0, r2
162 bne ftrace_graph_caller\suffix
163
164 ldr r1, =ftrace_graph_entry
165 ldr r2, [r1]
166 ldr r0, =ftrace_graph_entry_stub
167 cmp r0, r2
168 bne ftrace_graph_caller\suffix
169#endif
156 170
157ENTRY(ftrace_caller) 171 mcount_exit
158 stmdb sp!, {r0-r3, lr} 172
159 mov r0, lr 1731: mcount_get_lr r1 @ lr of instrumented func
174 mov r0, lr @ instrumented function
175 sub r0, r0, #MCOUNT_INSN_SIZE
176 adr lr, BSYM(2f)
177 mov pc, r2
1782: mcount_exit
179.endm
180
181.macro __ftrace_caller suffix
182 mcount_enter
183
184 mcount_get_lr r1 @ lr of instrumented func
185 mov r0, lr @ instrumented function
160 sub r0, r0, #MCOUNT_INSN_SIZE 186 sub r0, r0, #MCOUNT_INSN_SIZE
161 ldr r1, [sp, #20]
162 187
163 .global ftrace_call 188 .globl ftrace_call\suffix
164ftrace_call: 189ftrace_call\suffix:
165 bl ftrace_stub 190 bl ftrace_stub
166 ldmia sp!, {r0-r3, ip, lr} 191
167 mov pc, ip 192#ifdef CONFIG_FUNCTION_GRAPH_TRACER
168ENDPROC(ftrace_caller) 193 .globl ftrace_graph_call\suffix
194ftrace_graph_call\suffix:
195 mov r0, r0
196#endif
197
198 mcount_exit
199.endm
200
201.macro __ftrace_graph_caller
202 sub r0, fp, #4 @ &lr of instrumented routine (&parent)
203#ifdef CONFIG_DYNAMIC_FTRACE
204 @ called from __ftrace_caller, saved in mcount_enter
205 ldr r1, [sp, #16] @ instrumented routine (func)
206#else
207 @ called from __mcount, untouched in lr
208 mov r1, lr @ instrumented routine (func)
209#endif
210 sub r1, r1, #MCOUNT_INSN_SIZE
211 mov r2, fp @ frame pointer
212 bl prepare_ftrace_return
213 mcount_exit
214.endm
169 215
170#ifdef CONFIG_OLD_MCOUNT 216#ifdef CONFIG_OLD_MCOUNT
217/*
218 * mcount
219 */
220
221.macro mcount_enter
222 stmdb sp!, {r0-r3, lr}
223.endm
224
225.macro mcount_get_lr reg
226 ldr \reg, [fp, #-4]
227.endm
228
229.macro mcount_exit
230 ldr lr, [fp, #-4]
231 ldmia sp!, {r0-r3, pc}
232.endm
233
171ENTRY(mcount) 234ENTRY(mcount)
235#ifdef CONFIG_DYNAMIC_FTRACE
172 stmdb sp!, {lr} 236 stmdb sp!, {lr}
173 ldr lr, [fp, #-4] 237 ldr lr, [fp, #-4]
174 ldmia sp!, {pc} 238 ldmia sp!, {pc}
239#else
240 __mcount _old
241#endif
175ENDPROC(mcount) 242ENDPROC(mcount)
176 243
244#ifdef CONFIG_DYNAMIC_FTRACE
177ENTRY(ftrace_caller_old) 245ENTRY(ftrace_caller_old)
178 stmdb sp!, {r0-r3, lr} 246 __ftrace_caller _old
179 ldr r1, [fp, #-4]
180 mov r0, lr
181 sub r0, r0, #MCOUNT_INSN_SIZE
182
183 .globl ftrace_call_old
184ftrace_call_old:
185 bl ftrace_stub
186 ldr lr, [fp, #-4] @ restore lr
187 ldmia sp!, {r0-r3, pc}
188ENDPROC(ftrace_caller_old) 247ENDPROC(ftrace_caller_old)
189#endif 248#endif
190 249
191#else 250#ifdef CONFIG_FUNCTION_GRAPH_TRACER
251ENTRY(ftrace_graph_caller_old)
252 __ftrace_graph_caller
253ENDPROC(ftrace_graph_caller_old)
254#endif
192 255
193ENTRY(__gnu_mcount_nc) 256.purgem mcount_enter
257.purgem mcount_get_lr
258.purgem mcount_exit
259#endif
260
261/*
262 * __gnu_mcount_nc
263 */
264
265.macro mcount_enter
194 stmdb sp!, {r0-r3, lr} 266 stmdb sp!, {r0-r3, lr}
195 ldr r0, =ftrace_trace_function 267.endm
196 ldr r2, [r0] 268
197 adr r0, .Lftrace_stub 269.macro mcount_get_lr reg
198 cmp r0, r2 270 ldr \reg, [sp, #20]
199 bne gnu_trace 271.endm
272
273.macro mcount_exit
200 ldmia sp!, {r0-r3, ip, lr} 274 ldmia sp!, {r0-r3, ip, lr}
201 mov pc, ip 275 mov pc, ip
276.endm
202 277
203gnu_trace: 278ENTRY(__gnu_mcount_nc)
204 ldr r1, [sp, #20] @ lr of instrumented routine 279#ifdef CONFIG_DYNAMIC_FTRACE
205 mov r0, lr 280 mov ip, lr
206 sub r0, r0, #MCOUNT_INSN_SIZE 281 ldmia sp!, {lr}
207 adr lr, BSYM(1f)
208 mov pc, r2
2091:
210 ldmia sp!, {r0-r3, ip, lr}
211 mov pc, ip 282 mov pc, ip
283#else
284 __mcount
285#endif
212ENDPROC(__gnu_mcount_nc) 286ENDPROC(__gnu_mcount_nc)
213 287
214#ifdef CONFIG_OLD_MCOUNT 288#ifdef CONFIG_DYNAMIC_FTRACE
215/* 289ENTRY(ftrace_caller)
216 * This is under an ifdef in order to force link-time errors for people trying 290 __ftrace_caller
217 * to build with !FRAME_POINTER with a GCC which doesn't use the new-style 291ENDPROC(ftrace_caller)
218 * mcount. 292#endif
219 */
220ENTRY(mcount)
221 stmdb sp!, {r0-r3, lr}
222 ldr r0, =ftrace_trace_function
223 ldr r2, [r0]
224 adr r0, ftrace_stub
225 cmp r0, r2
226 bne trace
227 ldr lr, [fp, #-4] @ restore lr
228 ldmia sp!, {r0-r3, pc}
229 293
230trace: 294#ifdef CONFIG_FUNCTION_GRAPH_TRACER
231 ldr r1, [fp, #-4] @ lr of instrumented routine 295ENTRY(ftrace_graph_caller)
232 mov r0, lr 296 __ftrace_graph_caller
233 sub r0, r0, #MCOUNT_INSN_SIZE 297ENDPROC(ftrace_graph_caller)
234 mov lr, pc
235 mov pc, r2
236 ldr lr, [fp, #-4] @ restore lr
237 ldmia sp!, {r0-r3, pc}
238ENDPROC(mcount)
239#endif 298#endif
240 299
241#endif /* CONFIG_DYNAMIC_FTRACE */ 300.purgem mcount_enter
301.purgem mcount_get_lr
302.purgem mcount_exit
303
304#ifdef CONFIG_FUNCTION_GRAPH_TRACER
305 .globl return_to_handler
306return_to_handler:
307 stmdb sp!, {r0-r3}
308 mov r0, fp @ frame pointer
309 bl ftrace_return_to_handler
310 mov lr, r0 @ r0 has real ret addr
311 ldmia sp!, {r0-r3}
312 mov pc, lr
313#endif
242 314
243ENTRY(ftrace_stub) 315ENTRY(ftrace_stub)
244.Lftrace_stub: 316.Lftrace_stub:
diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
index d93f976fb389..ae9464900168 100644
--- a/arch/arm/kernel/entry-header.S
+++ b/arch/arm/kernel/entry-header.S
@@ -165,6 +165,25 @@
165 .endm 165 .endm
166#endif /* !CONFIG_THUMB2_KERNEL */ 166#endif /* !CONFIG_THUMB2_KERNEL */
167 167
168 @
169 @ Debug exceptions are taken as prefetch or data aborts.
170 @ We must disable preemption during the handler so that
171 @ we can access the debug registers safely.
172 @
173 .macro debug_entry, fsr
174#if defined(CONFIG_HAVE_HW_BREAKPOINT) && defined(CONFIG_PREEMPT)
175 ldr r4, =0x40f @ mask out fsr.fs
176 and r5, r4, \fsr
177 cmp r5, #2 @ debug exception
178 bne 1f
179 get_thread_info r10
180 ldr r6, [r10, #TI_PREEMPT] @ get preempt count
181 add r11, r6, #1 @ increment it
182 str r11, [r10, #TI_PREEMPT]
1831:
184#endif
185 .endm
186
168/* 187/*
169 * These are the registers used in the syscall handler, and allow us to 188 * These are the registers used in the syscall handler, and allow us to
170 * have in theory up to 7 arguments to a function - r0 to r6. 189 * have in theory up to 7 arguments to a function - r0 to r6.
diff --git a/arch/arm/kernel/fiq.c b/arch/arm/kernel/fiq.c
index 6ff7919613d7..e72dc34eea1c 100644
--- a/arch/arm/kernel/fiq.c
+++ b/arch/arm/kernel/fiq.c
@@ -45,6 +45,7 @@
45#include <asm/fiq.h> 45#include <asm/fiq.h>
46#include <asm/irq.h> 46#include <asm/irq.h>
47#include <asm/system.h> 47#include <asm/system.h>
48#include <asm/traps.h>
48 49
49static unsigned long no_fiq_insn; 50static unsigned long no_fiq_insn;
50 51
@@ -67,17 +68,22 @@ static struct fiq_handler default_owner = {
67 68
68static struct fiq_handler *current_fiq = &default_owner; 69static struct fiq_handler *current_fiq = &default_owner;
69 70
70int show_fiq_list(struct seq_file *p, void *v) 71int show_fiq_list(struct seq_file *p, int prec)
71{ 72{
72 if (current_fiq != &default_owner) 73 if (current_fiq != &default_owner)
73 seq_printf(p, "FIQ: %s\n", current_fiq->name); 74 seq_printf(p, "%*s: %s\n", prec, "FIQ",
75 current_fiq->name);
74 76
75 return 0; 77 return 0;
76} 78}
77 79
78void set_fiq_handler(void *start, unsigned int length) 80void set_fiq_handler(void *start, unsigned int length)
79{ 81{
82#if defined(CONFIG_CPU_USE_DOMAINS)
80 memcpy((void *)0xffff001c, start, length); 83 memcpy((void *)0xffff001c, start, length);
84#else
85 memcpy(vectors_page + 0x1c, start, length);
86#endif
81 flush_icache_range(0xffff001c, 0xffff001c + length); 87 flush_icache_range(0xffff001c, 0xffff001c + length);
82 if (!vectors_high()) 88 if (!vectors_high())
83 flush_icache_range(0x1c, 0x1c + length); 89 flush_icache_range(0x1c, 0x1c + length);
diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
index 971ac8c36ea7..c0062ad1e847 100644
--- a/arch/arm/kernel/ftrace.c
+++ b/arch/arm/kernel/ftrace.c
@@ -24,6 +24,7 @@
24#define NOP 0xe8bd4000 /* pop {lr} */ 24#define NOP 0xe8bd4000 /* pop {lr} */
25#endif 25#endif
26 26
27#ifdef CONFIG_DYNAMIC_FTRACE
27#ifdef CONFIG_OLD_MCOUNT 28#ifdef CONFIG_OLD_MCOUNT
28#define OLD_MCOUNT_ADDR ((unsigned long) mcount) 29#define OLD_MCOUNT_ADDR ((unsigned long) mcount)
29#define OLD_FTRACE_ADDR ((unsigned long) ftrace_caller_old) 30#define OLD_FTRACE_ADDR ((unsigned long) ftrace_caller_old)
@@ -59,9 +60,9 @@ static unsigned long adjust_address(struct dyn_ftrace *rec, unsigned long addr)
59} 60}
60#endif 61#endif
61 62
62/* construct a branch (BL) instruction to addr */
63#ifdef CONFIG_THUMB2_KERNEL 63#ifdef CONFIG_THUMB2_KERNEL
64static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) 64static unsigned long ftrace_gen_branch(unsigned long pc, unsigned long addr,
65 bool link)
65{ 66{
66 unsigned long s, j1, j2, i1, i2, imm10, imm11; 67 unsigned long s, j1, j2, i1, i2, imm10, imm11;
67 unsigned long first, second; 68 unsigned long first, second;
@@ -83,15 +84,22 @@ static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr)
83 j2 = (!i2) ^ s; 84 j2 = (!i2) ^ s;
84 85
85 first = 0xf000 | (s << 10) | imm10; 86 first = 0xf000 | (s << 10) | imm10;
86 second = 0xd000 | (j1 << 13) | (j2 << 11) | imm11; 87 second = 0x9000 | (j1 << 13) | (j2 << 11) | imm11;
88 if (link)
89 second |= 1 << 14;
87 90
88 return (second << 16) | first; 91 return (second << 16) | first;
89} 92}
90#else 93#else
91static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) 94static unsigned long ftrace_gen_branch(unsigned long pc, unsigned long addr,
95 bool link)
92{ 96{
97 unsigned long opcode = 0xea000000;
93 long offset; 98 long offset;
94 99
100 if (link)
101 opcode |= 1 << 24;
102
95 offset = (long)addr - (long)(pc + 8); 103 offset = (long)addr - (long)(pc + 8);
96 if (unlikely(offset < -33554432 || offset > 33554428)) { 104 if (unlikely(offset < -33554432 || offset > 33554428)) {
97 /* Can't generate branches that far (from ARM ARM). Ftrace 105 /* Can't generate branches that far (from ARM ARM). Ftrace
@@ -103,10 +111,15 @@ static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr)
103 111
104 offset = (offset >> 2) & 0x00ffffff; 112 offset = (offset >> 2) & 0x00ffffff;
105 113
106 return 0xeb000000 | offset; 114 return opcode | offset;
107} 115}
108#endif 116#endif
109 117
118static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr)
119{
120 return ftrace_gen_branch(pc, addr, true);
121}
122
110static int ftrace_modify_code(unsigned long pc, unsigned long old, 123static int ftrace_modify_code(unsigned long pc, unsigned long old,
111 unsigned long new) 124 unsigned long new)
112{ 125{
@@ -193,3 +206,83 @@ int __init ftrace_dyn_arch_init(void *data)
193 206
194 return 0; 207 return 0;
195} 208}
209#endif /* CONFIG_DYNAMIC_FTRACE */
210
211#ifdef CONFIG_FUNCTION_GRAPH_TRACER
212void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
213 unsigned long frame_pointer)
214{
215 unsigned long return_hooker = (unsigned long) &return_to_handler;
216 struct ftrace_graph_ent trace;
217 unsigned long old;
218 int err;
219
220 if (unlikely(atomic_read(&current->tracing_graph_pause)))
221 return;
222
223 old = *parent;
224 *parent = return_hooker;
225
226 err = ftrace_push_return_trace(old, self_addr, &trace.depth,
227 frame_pointer);
228 if (err == -EBUSY) {
229 *parent = old;
230 return;
231 }
232
233 trace.func = self_addr;
234
235 /* Only trace if the calling function expects to */
236 if (!ftrace_graph_entry(&trace)) {
237 current->curr_ret_stack--;
238 *parent = old;
239 }
240}
241
242#ifdef CONFIG_DYNAMIC_FTRACE
243extern unsigned long ftrace_graph_call;
244extern unsigned long ftrace_graph_call_old;
245extern void ftrace_graph_caller_old(void);
246
247static int __ftrace_modify_caller(unsigned long *callsite,
248 void (*func) (void), bool enable)
249{
250 unsigned long caller_fn = (unsigned long) func;
251 unsigned long pc = (unsigned long) callsite;
252 unsigned long branch = ftrace_gen_branch(pc, caller_fn, false);
253 unsigned long nop = 0xe1a00000; /* mov r0, r0 */
254 unsigned long old = enable ? nop : branch;
255 unsigned long new = enable ? branch : nop;
256
257 return ftrace_modify_code(pc, old, new);
258}
259
260static int ftrace_modify_graph_caller(bool enable)
261{
262 int ret;
263
264 ret = __ftrace_modify_caller(&ftrace_graph_call,
265 ftrace_graph_caller,
266 enable);
267
268#ifdef CONFIG_OLD_MCOUNT
269 if (!ret)
270 ret = __ftrace_modify_caller(&ftrace_graph_call_old,
271 ftrace_graph_caller_old,
272 enable);
273#endif
274
275 return ret;
276}
277
278int ftrace_enable_ftrace_graph_caller(void)
279{
280 return ftrace_modify_graph_caller(true);
281}
282
283int ftrace_disable_ftrace_graph_caller(void)
284{
285 return ftrace_modify_graph_caller(false);
286}
287#endif /* CONFIG_DYNAMIC_FTRACE */
288#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 6bd82d25683c..f17d9a09e8fb 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -91,6 +91,11 @@ ENTRY(stext)
91 movs r8, r5 @ invalid machine (r5=0)? 91 movs r8, r5 @ invalid machine (r5=0)?
92 THUMB( it eq ) @ force fixup-able long branch encoding 92 THUMB( it eq ) @ force fixup-able long branch encoding
93 beq __error_a @ yes, error 'a' 93 beq __error_a @ yes, error 'a'
94
95 /*
96 * r1 = machine no, r2 = atags,
97 * r8 = machinfo, r9 = cpuid, r10 = procinfo
98 */
94 bl __vet_atags 99 bl __vet_atags
95#ifdef CONFIG_SMP_ON_UP 100#ifdef CONFIG_SMP_ON_UP
96 bl __fixup_smp 101 bl __fixup_smp
@@ -387,19 +392,19 @@ ENDPROC(__turn_mmu_on)
387 392
388#ifdef CONFIG_SMP_ON_UP 393#ifdef CONFIG_SMP_ON_UP
389__fixup_smp: 394__fixup_smp:
390 mov r7, #0x00070000 395 mov r4, #0x00070000
391 orr r6, r7, #0xff000000 @ mask 0xff070000 396 orr r3, r4, #0xff000000 @ mask 0xff070000
392 orr r7, r7, #0x41000000 @ val 0x41070000 397 orr r4, r4, #0x41000000 @ val 0x41070000
393 and r0, r9, r6 398 and r0, r9, r3
394 teq r0, r7 @ ARM CPU and ARMv6/v7? 399 teq r0, r4 @ ARM CPU and ARMv6/v7?
395 bne __fixup_smp_on_up @ no, assume UP 400 bne __fixup_smp_on_up @ no, assume UP
396 401
397 orr r6, r6, #0x0000ff00 402 orr r3, r3, #0x0000ff00
398 orr r6, r6, #0x000000f0 @ mask 0xff07fff0 403 orr r3, r3, #0x000000f0 @ mask 0xff07fff0
399 orr r7, r7, #0x0000b000 404 orr r4, r4, #0x0000b000
400 orr r7, r7, #0x00000020 @ val 0x4107b020 405 orr r4, r4, #0x00000020 @ val 0x4107b020
401 and r0, r9, r6 406 and r0, r9, r3
402 teq r0, r7 @ ARM 11MPCore? 407 teq r0, r4 @ ARM 11MPCore?
403 moveq pc, lr @ yes, assume SMP 408 moveq pc, lr @ yes, assume SMP
404 409
405 mrc p15, 0, r0, c0, c0, 5 @ read MPIDR 410 mrc p15, 0, r0, c0, c0, 5 @ read MPIDR
@@ -408,15 +413,22 @@ __fixup_smp:
408 413
409__fixup_smp_on_up: 414__fixup_smp_on_up:
410 adr r0, 1f 415 adr r0, 1f
411 ldmia r0, {r3, r6, r7} 416 ldmia r0, {r3 - r5}
412 sub r3, r0, r3 417 sub r3, r0, r3
413 add r6, r6, r3 418 add r4, r4, r3
414 add r7, r7, r3 419 add r5, r5, r3
4152: cmp r6, r7 4202: cmp r4, r5
416 ldmia r6!, {r0, r4} 421 movhs pc, lr
417 strlo r4, [r0, r3] 422 ldmia r4!, {r0, r6}
418 blo 2b 423 ARM( str r6, [r0, r3] )
419 mov pc, lr 424 THUMB( add r0, r0, r3 )
425#ifdef __ARMEB__
426 THUMB( mov r6, r6, ror #16 ) @ Convert word order for big-endian.
427#endif
428 THUMB( strh r6, [r0], #2 ) @ For Thumb-2, store as two halfwords
429 THUMB( mov r6, r6, lsr #16 ) @ to be robust against misaligned r3.
430 THUMB( strh r6, [r0] )
431 b 2b
420ENDPROC(__fixup_smp) 432ENDPROC(__fixup_smp)
421 433
422 .align 434 .align
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index 21e3a4ab3b8c..c9f3f0467570 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -24,6 +24,7 @@
24#define pr_fmt(fmt) "hw-breakpoint: " fmt 24#define pr_fmt(fmt) "hw-breakpoint: " fmt
25 25
26#include <linux/errno.h> 26#include <linux/errno.h>
27#include <linux/hardirq.h>
27#include <linux/perf_event.h> 28#include <linux/perf_event.h>
28#include <linux/hw_breakpoint.h> 29#include <linux/hw_breakpoint.h>
29#include <linux/smp.h> 30#include <linux/smp.h>
@@ -44,6 +45,7 @@ static DEFINE_PER_CPU(struct perf_event *, wp_on_reg[ARM_MAX_WRP]);
44 45
45/* Number of BRP/WRP registers on this CPU. */ 46/* Number of BRP/WRP registers on this CPU. */
46static int core_num_brps; 47static int core_num_brps;
48static int core_num_reserved_brps;
47static int core_num_wrps; 49static int core_num_wrps;
48 50
49/* Debug architecture version. */ 51/* Debug architecture version. */
@@ -52,87 +54,6 @@ static u8 debug_arch;
52/* Maximum supported watchpoint length. */ 54/* Maximum supported watchpoint length. */
53static u8 max_watchpoint_len; 55static u8 max_watchpoint_len;
54 56
55/* Determine number of BRP registers available. */
56static int get_num_brps(void)
57{
58 u32 didr;
59 ARM_DBG_READ(c0, 0, didr);
60 return ((didr >> 24) & 0xf) + 1;
61}
62
63/* Determine number of WRP registers available. */
64static int get_num_wrps(void)
65{
66 /*
67 * FIXME: When a watchpoint fires, the only way to work out which
68 * watchpoint it was is by disassembling the faulting instruction
69 * and working out the address of the memory access.
70 *
71 * Furthermore, we can only do this if the watchpoint was precise
72 * since imprecise watchpoints prevent us from calculating register
73 * based addresses.
74 *
75 * For the time being, we only report 1 watchpoint register so we
76 * always know which watchpoint fired. In the future we can either
77 * add a disassembler and address generation emulator, or we can
78 * insert a check to see if the DFAR is set on watchpoint exception
79 * entry [the ARM ARM states that the DFAR is UNKNOWN, but
80 * experience shows that it is set on some implementations].
81 */
82
83#if 0
84 u32 didr, wrps;
85 ARM_DBG_READ(c0, 0, didr);
86 return ((didr >> 28) & 0xf) + 1;
87#endif
88
89 return 1;
90}
91
92int hw_breakpoint_slots(int type)
93{
94 /*
95 * We can be called early, so don't rely on
96 * our static variables being initialised.
97 */
98 switch (type) {
99 case TYPE_INST:
100 return get_num_brps();
101 case TYPE_DATA:
102 return get_num_wrps();
103 default:
104 pr_warning("unknown slot type: %d\n", type);
105 return 0;
106 }
107}
108
109/* Determine debug architecture. */
110static u8 get_debug_arch(void)
111{
112 u32 didr;
113
114 /* Do we implement the extended CPUID interface? */
115 if (((read_cpuid_id() >> 16) & 0xf) != 0xf) {
116 pr_warning("CPUID feature registers not supported. "
117 "Assuming v6 debug is present.\n");
118 return ARM_DEBUG_ARCH_V6;
119 }
120
121 ARM_DBG_READ(c0, 0, didr);
122 return (didr >> 16) & 0xf;
123}
124
125/* Does this core support mismatch breakpoints? */
126static int core_has_mismatch_bps(void)
127{
128 return debug_arch >= ARM_DEBUG_ARCH_V7_ECP14 && core_num_brps > 1;
129}
130
131u8 arch_get_debug_arch(void)
132{
133 return debug_arch;
134}
135
136#define READ_WB_REG_CASE(OP2, M, VAL) \ 57#define READ_WB_REG_CASE(OP2, M, VAL) \
137 case ((OP2 << 4) + M): \ 58 case ((OP2 << 4) + M): \
138 ARM_DBG_READ(c ## M, OP2, VAL); \ 59 ARM_DBG_READ(c ## M, OP2, VAL); \
@@ -210,6 +131,94 @@ static void write_wb_reg(int n, u32 val)
210 isb(); 131 isb();
211} 132}
212 133
134/* Determine debug architecture. */
135static u8 get_debug_arch(void)
136{
137 u32 didr;
138
139 /* Do we implement the extended CPUID interface? */
140 if (((read_cpuid_id() >> 16) & 0xf) != 0xf) {
141 pr_warning("CPUID feature registers not supported. "
142 "Assuming v6 debug is present.\n");
143 return ARM_DEBUG_ARCH_V6;
144 }
145
146 ARM_DBG_READ(c0, 0, didr);
147 return (didr >> 16) & 0xf;
148}
149
150u8 arch_get_debug_arch(void)
151{
152 return debug_arch;
153}
154
155/* Determine number of BRP register available. */
156static int get_num_brp_resources(void)
157{
158 u32 didr;
159 ARM_DBG_READ(c0, 0, didr);
160 return ((didr >> 24) & 0xf) + 1;
161}
162
163/* Does this core support mismatch breakpoints? */
164static int core_has_mismatch_brps(void)
165{
166 return (get_debug_arch() >= ARM_DEBUG_ARCH_V7_ECP14 &&
167 get_num_brp_resources() > 1);
168}
169
170/* Determine number of usable WRPs available. */
171static int get_num_wrps(void)
172{
173 /*
174 * FIXME: When a watchpoint fires, the only way to work out which
175 * watchpoint it was is by disassembling the faulting instruction
176 * and working out the address of the memory access.
177 *
178 * Furthermore, we can only do this if the watchpoint was precise
179 * since imprecise watchpoints prevent us from calculating register
180 * based addresses.
181 *
182 * Providing we have more than 1 breakpoint register, we only report
183 * a single watchpoint register for the time being. This way, we always
184 * know which watchpoint fired. In the future we can either add a
185 * disassembler and address generation emulator, or we can insert a
186 * check to see if the DFAR is set on watchpoint exception entry
187 * [the ARM ARM states that the DFAR is UNKNOWN, but experience shows
188 * that it is set on some implementations].
189 */
190
191#if 0
192 int wrps;
193 u32 didr;
194 ARM_DBG_READ(c0, 0, didr);
195 wrps = ((didr >> 28) & 0xf) + 1;
196#endif
197 int wrps = 1;
198
199 if (core_has_mismatch_brps() && wrps >= get_num_brp_resources())
200 wrps = get_num_brp_resources() - 1;
201
202 return wrps;
203}
204
205/* We reserve one breakpoint for each watchpoint. */
206static int get_num_reserved_brps(void)
207{
208 if (core_has_mismatch_brps())
209 return get_num_wrps();
210 return 0;
211}
212
213/* Determine number of usable BRPs available. */
214static int get_num_brps(void)
215{
216 int brps = get_num_brp_resources();
217 if (core_has_mismatch_brps())
218 brps -= get_num_reserved_brps();
219 return brps;
220}
221
213/* 222/*
214 * In order to access the breakpoint/watchpoint control registers, 223 * In order to access the breakpoint/watchpoint control registers,
215 * we must be running in debug monitor mode. Unfortunately, we can 224 * we must be running in debug monitor mode. Unfortunately, we can
@@ -230,8 +239,12 @@ static int enable_monitor_mode(void)
230 goto out; 239 goto out;
231 } 240 }
232 241
242 /* If monitor mode is already enabled, just return. */
243 if (dscr & ARM_DSCR_MDBGEN)
244 goto out;
245
233 /* Write to the corresponding DSCR. */ 246 /* Write to the corresponding DSCR. */
234 switch (debug_arch) { 247 switch (get_debug_arch()) {
235 case ARM_DEBUG_ARCH_V6: 248 case ARM_DEBUG_ARCH_V6:
236 case ARM_DEBUG_ARCH_V6_1: 249 case ARM_DEBUG_ARCH_V6_1:
237 ARM_DBG_WRITE(c1, 0, (dscr | ARM_DSCR_MDBGEN)); 250 ARM_DBG_WRITE(c1, 0, (dscr | ARM_DSCR_MDBGEN));
@@ -246,15 +259,30 @@ static int enable_monitor_mode(void)
246 259
247 /* Check that the write made it through. */ 260 /* Check that the write made it through. */
248 ARM_DBG_READ(c1, 0, dscr); 261 ARM_DBG_READ(c1, 0, dscr);
249 if (WARN_ONCE(!(dscr & ARM_DSCR_MDBGEN), 262 if (!(dscr & ARM_DSCR_MDBGEN))
250 "failed to enable monitor mode.")) {
251 ret = -EPERM; 263 ret = -EPERM;
252 }
253 264
254out: 265out:
255 return ret; 266 return ret;
256} 267}
257 268
269int hw_breakpoint_slots(int type)
270{
271 /*
272 * We can be called early, so don't rely on
273 * our static variables being initialised.
274 */
275 switch (type) {
276 case TYPE_INST:
277 return get_num_brps();
278 case TYPE_DATA:
279 return get_num_wrps();
280 default:
281 pr_warning("unknown slot type: %d\n", type);
282 return 0;
283 }
284}
285
258/* 286/*
259 * Check if 8-bit byte-address select is available. 287 * Check if 8-bit byte-address select is available.
260 * This clobbers WRP 0. 288 * This clobbers WRP 0.
@@ -268,9 +296,6 @@ static u8 get_max_wp_len(void)
268 if (debug_arch < ARM_DEBUG_ARCH_V7_ECP14) 296 if (debug_arch < ARM_DEBUG_ARCH_V7_ECP14)
269 goto out; 297 goto out;
270 298
271 if (enable_monitor_mode())
272 goto out;
273
274 memset(&ctrl, 0, sizeof(ctrl)); 299 memset(&ctrl, 0, sizeof(ctrl));
275 ctrl.len = ARM_BREAKPOINT_LEN_8; 300 ctrl.len = ARM_BREAKPOINT_LEN_8;
276 ctrl_reg = encode_ctrl_reg(ctrl); 301 ctrl_reg = encode_ctrl_reg(ctrl);
@@ -290,23 +315,6 @@ u8 arch_get_max_wp_len(void)
290} 315}
291 316
292/* 317/*
293 * Handler for reactivating a suspended watchpoint when the single
294 * step `mismatch' breakpoint is triggered.
295 */
296static void wp_single_step_handler(struct perf_event *bp, int unused,
297 struct perf_sample_data *data,
298 struct pt_regs *regs)
299{
300 perf_event_enable(counter_arch_bp(bp)->suspended_wp);
301 unregister_hw_breakpoint(bp);
302}
303
304static int bp_is_single_step(struct perf_event *bp)
305{
306 return bp->overflow_handler == wp_single_step_handler;
307}
308
309/*
310 * Install a perf counter breakpoint. 318 * Install a perf counter breakpoint.
311 */ 319 */
312int arch_install_hw_breakpoint(struct perf_event *bp) 320int arch_install_hw_breakpoint(struct perf_event *bp)
@@ -314,30 +322,41 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
314 struct arch_hw_breakpoint *info = counter_arch_bp(bp); 322 struct arch_hw_breakpoint *info = counter_arch_bp(bp);
315 struct perf_event **slot, **slots; 323 struct perf_event **slot, **slots;
316 int i, max_slots, ctrl_base, val_base, ret = 0; 324 int i, max_slots, ctrl_base, val_base, ret = 0;
325 u32 addr, ctrl;
317 326
318 /* Ensure that we are in monitor mode and halting mode is disabled. */ 327 /* Ensure that we are in monitor mode and halting mode is disabled. */
319 ret = enable_monitor_mode(); 328 ret = enable_monitor_mode();
320 if (ret) 329 if (ret)
321 goto out; 330 goto out;
322 331
332 addr = info->address;
333 ctrl = encode_ctrl_reg(info->ctrl) | 0x1;
334
323 if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { 335 if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) {
324 /* Breakpoint */ 336 /* Breakpoint */
325 ctrl_base = ARM_BASE_BCR; 337 ctrl_base = ARM_BASE_BCR;
326 val_base = ARM_BASE_BVR; 338 val_base = ARM_BASE_BVR;
327 slots = __get_cpu_var(bp_on_reg); 339 slots = (struct perf_event **)__get_cpu_var(bp_on_reg);
328 max_slots = core_num_brps - 1; 340 max_slots = core_num_brps;
329 341 if (info->step_ctrl.enabled) {
330 if (bp_is_single_step(bp)) { 342 /* Override the breakpoint data with the step data. */
331 info->ctrl.mismatch = 1; 343 addr = info->trigger & ~0x3;
332 i = max_slots; 344 ctrl = encode_ctrl_reg(info->step_ctrl);
333 slots[i] = bp;
334 goto setup;
335 } 345 }
336 } else { 346 } else {
337 /* Watchpoint */ 347 /* Watchpoint */
338 ctrl_base = ARM_BASE_WCR; 348 if (info->step_ctrl.enabled) {
339 val_base = ARM_BASE_WVR; 349 /* Install into the reserved breakpoint region. */
340 slots = __get_cpu_var(wp_on_reg); 350 ctrl_base = ARM_BASE_BCR + core_num_brps;
351 val_base = ARM_BASE_BVR + core_num_brps;
352 /* Override the watchpoint data with the step data. */
353 addr = info->trigger & ~0x3;
354 ctrl = encode_ctrl_reg(info->step_ctrl);
355 } else {
356 ctrl_base = ARM_BASE_WCR;
357 val_base = ARM_BASE_WVR;
358 }
359 slots = (struct perf_event **)__get_cpu_var(wp_on_reg);
341 max_slots = core_num_wrps; 360 max_slots = core_num_wrps;
342 } 361 }
343 362
@@ -355,12 +374,11 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
355 goto out; 374 goto out;
356 } 375 }
357 376
358setup:
359 /* Setup the address register. */ 377 /* Setup the address register. */
360 write_wb_reg(val_base + i, info->address); 378 write_wb_reg(val_base + i, addr);
361 379
362 /* Setup the control register. */ 380 /* Setup the control register. */
363 write_wb_reg(ctrl_base + i, encode_ctrl_reg(info->ctrl) | 0x1); 381 write_wb_reg(ctrl_base + i, ctrl);
364 382
365out: 383out:
366 return ret; 384 return ret;
@@ -375,18 +393,15 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp)
375 if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { 393 if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) {
376 /* Breakpoint */ 394 /* Breakpoint */
377 base = ARM_BASE_BCR; 395 base = ARM_BASE_BCR;
378 slots = __get_cpu_var(bp_on_reg); 396 slots = (struct perf_event **)__get_cpu_var(bp_on_reg);
379 max_slots = core_num_brps - 1; 397 max_slots = core_num_brps;
380
381 if (bp_is_single_step(bp)) {
382 i = max_slots;
383 slots[i] = NULL;
384 goto reset;
385 }
386 } else { 398 } else {
387 /* Watchpoint */ 399 /* Watchpoint */
388 base = ARM_BASE_WCR; 400 if (info->step_ctrl.enabled)
389 slots = __get_cpu_var(wp_on_reg); 401 base = ARM_BASE_BCR + core_num_brps;
402 else
403 base = ARM_BASE_WCR;
404 slots = (struct perf_event **)__get_cpu_var(wp_on_reg);
390 max_slots = core_num_wrps; 405 max_slots = core_num_wrps;
391 } 406 }
392 407
@@ -403,7 +418,6 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp)
403 if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot")) 418 if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot"))
404 return; 419 return;
405 420
406reset:
407 /* Reset the control register. */ 421 /* Reset the control register. */
408 write_wb_reg(base + i, 0); 422 write_wb_reg(base + i, 0);
409} 423}
@@ -537,12 +551,23 @@ static int arch_build_bp_info(struct perf_event *bp)
537 return -EINVAL; 551 return -EINVAL;
538 } 552 }
539 553
554 /*
555 * Breakpoints must be of length 2 (thumb) or 4 (ARM) bytes.
556 * Watchpoints can be of length 1, 2, 4 or 8 bytes if supported
557 * by the hardware and must be aligned to the appropriate number of
558 * bytes.
559 */
560 if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE &&
561 info->ctrl.len != ARM_BREAKPOINT_LEN_2 &&
562 info->ctrl.len != ARM_BREAKPOINT_LEN_4)
563 return -EINVAL;
564
540 /* Address */ 565 /* Address */
541 info->address = bp->attr.bp_addr; 566 info->address = bp->attr.bp_addr;
542 567
543 /* Privilege */ 568 /* Privilege */
544 info->ctrl.privilege = ARM_BREAKPOINT_USER; 569 info->ctrl.privilege = ARM_BREAKPOINT_USER;
545 if (arch_check_bp_in_kernelspace(bp) && !bp_is_single_step(bp)) 570 if (arch_check_bp_in_kernelspace(bp))
546 info->ctrl.privilege |= ARM_BREAKPOINT_PRIV; 571 info->ctrl.privilege |= ARM_BREAKPOINT_PRIV;
547 572
548 /* Enabled? */ 573 /* Enabled? */
@@ -561,7 +586,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
561{ 586{
562 struct arch_hw_breakpoint *info = counter_arch_bp(bp); 587 struct arch_hw_breakpoint *info = counter_arch_bp(bp);
563 int ret = 0; 588 int ret = 0;
564 u32 bytelen, max_len, offset, alignment_mask = 0x3; 589 u32 offset, alignment_mask = 0x3;
565 590
566 /* Build the arch_hw_breakpoint. */ 591 /* Build the arch_hw_breakpoint. */
567 ret = arch_build_bp_info(bp); 592 ret = arch_build_bp_info(bp);
@@ -571,84 +596,85 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
571 /* Check address alignment. */ 596 /* Check address alignment. */
572 if (info->ctrl.len == ARM_BREAKPOINT_LEN_8) 597 if (info->ctrl.len == ARM_BREAKPOINT_LEN_8)
573 alignment_mask = 0x7; 598 alignment_mask = 0x7;
574 if (info->address & alignment_mask) { 599 offset = info->address & alignment_mask;
575 /* 600 switch (offset) {
576 * Try to fix the alignment. This may result in a length 601 case 0:
577 * that is too large, so we must check for that. 602 /* Aligned */
578 */ 603 break;
579 bytelen = get_hbp_len(info->ctrl.len); 604 case 1:
580 max_len = info->ctrl.type == ARM_BREAKPOINT_EXECUTE ? 4 : 605 /* Allow single byte watchpoint. */
581 max_watchpoint_len; 606 if (info->ctrl.len == ARM_BREAKPOINT_LEN_1)
582 607 break;
583 if (max_len >= 8) 608 case 2:
584 offset = info->address & 0x7; 609 /* Allow halfword watchpoints and breakpoints. */
585 else 610 if (info->ctrl.len == ARM_BREAKPOINT_LEN_2)
586 offset = info->address & 0x3; 611 break;
587 612 default:
588 if (bytelen > (1 << ((max_len - (offset + 1)) >> 1))) { 613 ret = -EINVAL;
589 ret = -EFBIG; 614 goto out;
590 goto out;
591 }
592
593 info->ctrl.len <<= offset;
594 info->address &= ~offset;
595
596 pr_debug("breakpoint alignment fixup: length = 0x%x, "
597 "address = 0x%x\n", info->ctrl.len, info->address);
598 } 615 }
599 616
617 info->address &= ~alignment_mask;
618 info->ctrl.len <<= offset;
619
600 /* 620 /*
601 * Currently we rely on an overflow handler to take 621 * Currently we rely on an overflow handler to take
602 * care of single-stepping the breakpoint when it fires. 622 * care of single-stepping the breakpoint when it fires.
603 * In the case of userspace breakpoints on a core with V7 debug, 623 * In the case of userspace breakpoints on a core with V7 debug,
604 * we can use the mismatch feature as a poor-man's hardware single-step. 624 * we can use the mismatch feature as a poor-man's hardware
625 * single-step, but this only works for per-task breakpoints.
605 */ 626 */
606 if (WARN_ONCE(!bp->overflow_handler && 627 if (WARN_ONCE(!bp->overflow_handler &&
607 (arch_check_bp_in_kernelspace(bp) || !core_has_mismatch_bps()), 628 (arch_check_bp_in_kernelspace(bp) || !core_has_mismatch_brps()
629 || !bp->hw.bp_target),
608 "overflow handler required but none found")) { 630 "overflow handler required but none found")) {
609 ret = -EINVAL; 631 ret = -EINVAL;
610 goto out;
611 } 632 }
612out: 633out:
613 return ret; 634 return ret;
614} 635}
615 636
616static void update_mismatch_flag(int idx, int flag) 637/*
638 * Enable/disable single-stepping over the breakpoint bp at address addr.
639 */
640static void enable_single_step(struct perf_event *bp, u32 addr)
617{ 641{
618 struct perf_event *bp = __get_cpu_var(bp_on_reg[idx]); 642 struct arch_hw_breakpoint *info = counter_arch_bp(bp);
619 struct arch_hw_breakpoint *info;
620
621 if (bp == NULL)
622 return;
623 643
624 info = counter_arch_bp(bp); 644 arch_uninstall_hw_breakpoint(bp);
645 info->step_ctrl.mismatch = 1;
646 info->step_ctrl.len = ARM_BREAKPOINT_LEN_4;
647 info->step_ctrl.type = ARM_BREAKPOINT_EXECUTE;
648 info->step_ctrl.privilege = info->ctrl.privilege;
649 info->step_ctrl.enabled = 1;
650 info->trigger = addr;
651 arch_install_hw_breakpoint(bp);
652}
625 653
626 /* Update the mismatch field to enter/exit `single-step' mode */ 654static void disable_single_step(struct perf_event *bp)
627 if (!bp->overflow_handler && info->ctrl.mismatch != flag) { 655{
628 info->ctrl.mismatch = flag; 656 arch_uninstall_hw_breakpoint(bp);
629 write_wb_reg(ARM_BASE_BCR + idx, encode_ctrl_reg(info->ctrl) | 0x1); 657 counter_arch_bp(bp)->step_ctrl.enabled = 0;
630 } 658 arch_install_hw_breakpoint(bp);
631} 659}
632 660
633static void watchpoint_handler(unsigned long unknown, struct pt_regs *regs) 661static void watchpoint_handler(unsigned long unknown, struct pt_regs *regs)
634{ 662{
635 int i; 663 int i;
636 struct perf_event *bp, **slots = __get_cpu_var(wp_on_reg); 664 struct perf_event *wp, **slots;
637 struct arch_hw_breakpoint *info; 665 struct arch_hw_breakpoint *info;
638 struct perf_event_attr attr; 666
667 slots = (struct perf_event **)__get_cpu_var(wp_on_reg);
639 668
640 /* Without a disassembler, we can only handle 1 watchpoint. */ 669 /* Without a disassembler, we can only handle 1 watchpoint. */
641 BUG_ON(core_num_wrps > 1); 670 BUG_ON(core_num_wrps > 1);
642 671
643 hw_breakpoint_init(&attr);
644 attr.bp_addr = regs->ARM_pc & ~0x3;
645 attr.bp_len = HW_BREAKPOINT_LEN_4;
646 attr.bp_type = HW_BREAKPOINT_X;
647
648 for (i = 0; i < core_num_wrps; ++i) { 672 for (i = 0; i < core_num_wrps; ++i) {
649 rcu_read_lock(); 673 rcu_read_lock();
650 674
651 if (slots[i] == NULL) { 675 wp = slots[i];
676
677 if (wp == NULL) {
652 rcu_read_unlock(); 678 rcu_read_unlock();
653 continue; 679 continue;
654 } 680 }
@@ -658,24 +684,51 @@ static void watchpoint_handler(unsigned long unknown, struct pt_regs *regs)
658 * single watchpoint, we can set the trigger to the lowest 684 * single watchpoint, we can set the trigger to the lowest
659 * possible faulting address. 685 * possible faulting address.
660 */ 686 */
661 info = counter_arch_bp(slots[i]); 687 info = counter_arch_bp(wp);
662 info->trigger = slots[i]->attr.bp_addr; 688 info->trigger = wp->attr.bp_addr;
663 pr_debug("watchpoint fired: address = 0x%x\n", info->trigger); 689 pr_debug("watchpoint fired: address = 0x%x\n", info->trigger);
664 perf_bp_event(slots[i], regs); 690 perf_bp_event(wp, regs);
665 691
666 /* 692 /*
667 * If no overflow handler is present, insert a temporary 693 * If no overflow handler is present, insert a temporary
668 * mismatch breakpoint so we can single-step over the 694 * mismatch breakpoint so we can single-step over the
669 * watchpoint trigger. 695 * watchpoint trigger.
670 */ 696 */
671 if (!slots[i]->overflow_handler) { 697 if (!wp->overflow_handler)
672 bp = register_user_hw_breakpoint(&attr, 698 enable_single_step(wp, instruction_pointer(regs));
673 wp_single_step_handler, 699
674 current); 700 rcu_read_unlock();
675 counter_arch_bp(bp)->suspended_wp = slots[i]; 701 }
676 perf_event_disable(slots[i]); 702}
677 }
678 703
704static void watchpoint_single_step_handler(unsigned long pc)
705{
706 int i;
707 struct perf_event *wp, **slots;
708 struct arch_hw_breakpoint *info;
709
710 slots = (struct perf_event **)__get_cpu_var(wp_on_reg);
711
712 for (i = 0; i < core_num_reserved_brps; ++i) {
713 rcu_read_lock();
714
715 wp = slots[i];
716
717 if (wp == NULL)
718 goto unlock;
719
720 info = counter_arch_bp(wp);
721 if (!info->step_ctrl.enabled)
722 goto unlock;
723
724 /*
725 * Restore the original watchpoint if we've completed the
726 * single-step.
727 */
728 if (info->trigger != pc)
729 disable_single_step(wp);
730
731unlock:
679 rcu_read_unlock(); 732 rcu_read_unlock();
680 } 733 }
681} 734}
@@ -683,62 +736,69 @@ static void watchpoint_handler(unsigned long unknown, struct pt_regs *regs)
683static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs) 736static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs)
684{ 737{
685 int i; 738 int i;
686 int mismatch;
687 u32 ctrl_reg, val, addr; 739 u32 ctrl_reg, val, addr;
688 struct perf_event *bp, **slots = __get_cpu_var(bp_on_reg); 740 struct perf_event *bp, **slots;
689 struct arch_hw_breakpoint *info; 741 struct arch_hw_breakpoint *info;
690 struct arch_hw_breakpoint_ctrl ctrl; 742 struct arch_hw_breakpoint_ctrl ctrl;
691 743
744 slots = (struct perf_event **)__get_cpu_var(bp_on_reg);
745
692 /* The exception entry code places the amended lr in the PC. */ 746 /* The exception entry code places the amended lr in the PC. */
693 addr = regs->ARM_pc; 747 addr = regs->ARM_pc;
694 748
749 /* Check the currently installed breakpoints first. */
695 for (i = 0; i < core_num_brps; ++i) { 750 for (i = 0; i < core_num_brps; ++i) {
696 rcu_read_lock(); 751 rcu_read_lock();
697 752
698 bp = slots[i]; 753 bp = slots[i];
699 754
700 if (bp == NULL) { 755 if (bp == NULL)
701 rcu_read_unlock(); 756 goto unlock;
702 continue;
703 }
704 757
705 mismatch = 0; 758 info = counter_arch_bp(bp);
706 759
707 /* Check if the breakpoint value matches. */ 760 /* Check if the breakpoint value matches. */
708 val = read_wb_reg(ARM_BASE_BVR + i); 761 val = read_wb_reg(ARM_BASE_BVR + i);
709 if (val != (addr & ~0x3)) 762 if (val != (addr & ~0x3))
710 goto unlock; 763 goto mismatch;
711 764
712 /* Possible match, check the byte address select to confirm. */ 765 /* Possible match, check the byte address select to confirm. */
713 ctrl_reg = read_wb_reg(ARM_BASE_BCR + i); 766 ctrl_reg = read_wb_reg(ARM_BASE_BCR + i);
714 decode_ctrl_reg(ctrl_reg, &ctrl); 767 decode_ctrl_reg(ctrl_reg, &ctrl);
715 if ((1 << (addr & 0x3)) & ctrl.len) { 768 if ((1 << (addr & 0x3)) & ctrl.len) {
716 mismatch = 1;
717 info = counter_arch_bp(bp);
718 info->trigger = addr; 769 info->trigger = addr;
719 }
720
721unlock:
722 if ((mismatch && !info->ctrl.mismatch) || bp_is_single_step(bp)) {
723 pr_debug("breakpoint fired: address = 0x%x\n", addr); 770 pr_debug("breakpoint fired: address = 0x%x\n", addr);
724 perf_bp_event(bp, regs); 771 perf_bp_event(bp, regs);
772 if (!bp->overflow_handler)
773 enable_single_step(bp, addr);
774 goto unlock;
725 } 775 }
726 776
727 update_mismatch_flag(i, mismatch); 777mismatch:
778 /* If we're stepping a breakpoint, it can now be restored. */
779 if (info->step_ctrl.enabled)
780 disable_single_step(bp);
781unlock:
728 rcu_read_unlock(); 782 rcu_read_unlock();
729 } 783 }
784
785 /* Handle any pending watchpoint single-step breakpoints. */
786 watchpoint_single_step_handler(addr);
730} 787}
731 788
732/* 789/*
733 * Called from either the Data Abort Handler [watchpoint] or the 790 * Called from either the Data Abort Handler [watchpoint] or the
734 * Prefetch Abort Handler [breakpoint]. 791 * Prefetch Abort Handler [breakpoint] with preemption disabled.
735 */ 792 */
736static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr, 793static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr,
737 struct pt_regs *regs) 794 struct pt_regs *regs)
738{ 795{
739 int ret = 1; /* Unhandled fault. */ 796 int ret = 0;
740 u32 dscr; 797 u32 dscr;
741 798
799 /* We must be called with preemption disabled. */
800 WARN_ON(preemptible());
801
742 /* We only handle watchpoints and hardware breakpoints. */ 802 /* We only handle watchpoints and hardware breakpoints. */
743 ARM_DBG_READ(c1, 0, dscr); 803 ARM_DBG_READ(c1, 0, dscr);
744 804
@@ -753,25 +813,47 @@ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr,
753 watchpoint_handler(addr, regs); 813 watchpoint_handler(addr, regs);
754 break; 814 break;
755 default: 815 default:
756 goto out; 816 ret = 1; /* Unhandled fault. */
757 } 817 }
758 818
759 ret = 0; 819 /*
760out: 820 * Re-enable preemption after it was disabled in the
821 * low-level exception handling code.
822 */
823 preempt_enable();
824
761 return ret; 825 return ret;
762} 826}
763 827
764/* 828/*
765 * One-time initialisation. 829 * One-time initialisation.
766 */ 830 */
767static void __init reset_ctrl_regs(void *unused) 831static void reset_ctrl_regs(void *unused)
768{ 832{
769 int i; 833 int i;
770 834
835 /*
836 * v7 debug contains save and restore registers so that debug state
837 * can be maintained across low-power modes without leaving
838 * the debug logic powered up. It is IMPLEMENTATION DEFINED whether
839 * we can write to the debug registers out of reset, so we must
840 * unlock the OS Lock Access Register to avoid taking undefined
841 * instruction exceptions later on.
842 */
843 if (debug_arch >= ARM_DEBUG_ARCH_V7_ECP14) {
844 /*
845 * Unconditionally clear the lock by writing a value
846 * other than 0xC5ACCE55 to the access register.
847 */
848 asm volatile("mcr p14, 0, %0, c1, c0, 4" : : "r" (0));
849 isb();
850 }
851
771 if (enable_monitor_mode()) 852 if (enable_monitor_mode())
772 return; 853 return;
773 854
774 for (i = 0; i < core_num_brps; ++i) { 855 /* We must also reset any reserved registers. */
856 for (i = 0; i < core_num_brps + core_num_reserved_brps; ++i) {
775 write_wb_reg(ARM_BASE_BCR + i, 0UL); 857 write_wb_reg(ARM_BASE_BCR + i, 0UL);
776 write_wb_reg(ARM_BASE_BVR + i, 0UL); 858 write_wb_reg(ARM_BASE_BVR + i, 0UL);
777 } 859 }
@@ -782,45 +864,57 @@ static void __init reset_ctrl_regs(void *unused)
782 } 864 }
783} 865}
784 866
867static int __cpuinit dbg_reset_notify(struct notifier_block *self,
868 unsigned long action, void *cpu)
869{
870 if (action == CPU_ONLINE)
871 smp_call_function_single((int)cpu, reset_ctrl_regs, NULL, 1);
872 return NOTIFY_OK;
873}
874
875static struct notifier_block __cpuinitdata dbg_reset_nb = {
876 .notifier_call = dbg_reset_notify,
877};
878
785static int __init arch_hw_breakpoint_init(void) 879static int __init arch_hw_breakpoint_init(void)
786{ 880{
787 int ret = 0;
788 u32 dscr; 881 u32 dscr;
789 882
790 debug_arch = get_debug_arch(); 883 debug_arch = get_debug_arch();
791 884
792 if (debug_arch > ARM_DEBUG_ARCH_V7_ECP14) { 885 if (debug_arch > ARM_DEBUG_ARCH_V7_ECP14) {
793 pr_info("debug architecture 0x%x unsupported.\n", debug_arch); 886 pr_info("debug architecture 0x%x unsupported.\n", debug_arch);
794 ret = -ENODEV; 887 return 0;
795 goto out;
796 } 888 }
797 889
798 /* Determine how many BRPs/WRPs are available. */ 890 /* Determine how many BRPs/WRPs are available. */
799 core_num_brps = get_num_brps(); 891 core_num_brps = get_num_brps();
892 core_num_reserved_brps = get_num_reserved_brps();
800 core_num_wrps = get_num_wrps(); 893 core_num_wrps = get_num_wrps();
801 894
802 pr_info("found %d breakpoint and %d watchpoint registers.\n", 895 pr_info("found %d breakpoint and %d watchpoint registers.\n",
803 core_num_brps, core_num_wrps); 896 core_num_brps + core_num_reserved_brps, core_num_wrps);
804 897
805 if (core_has_mismatch_bps()) 898 if (core_num_reserved_brps)
806 pr_info("1 breakpoint reserved for watchpoint single-step.\n"); 899 pr_info("%d breakpoint(s) reserved for watchpoint "
900 "single-step.\n", core_num_reserved_brps);
807 901
808 ARM_DBG_READ(c1, 0, dscr); 902 ARM_DBG_READ(c1, 0, dscr);
809 if (dscr & ARM_DSCR_HDBGEN) { 903 if (dscr & ARM_DSCR_HDBGEN) {
810 pr_warning("halting debug mode enabled. Assuming maximum " 904 pr_warning("halting debug mode enabled. Assuming maximum "
811 "watchpoint size of 4 bytes."); 905 "watchpoint size of 4 bytes.");
812 } else { 906 } else {
813 /* Work out the maximum supported watchpoint length. */
814 max_watchpoint_len = get_max_wp_len();
815 pr_info("maximum watchpoint size is %u bytes.\n",
816 max_watchpoint_len);
817
818 /* 907 /*
819 * Reset the breakpoint resources. We assume that a halting 908 * Reset the breakpoint resources. We assume that a halting
820 * debugger will leave the world in a nice state for us. 909 * debugger will leave the world in a nice state for us.
821 */ 910 */
822 smp_call_function(reset_ctrl_regs, NULL, 1); 911 smp_call_function(reset_ctrl_regs, NULL, 1);
823 reset_ctrl_regs(NULL); 912 reset_ctrl_regs(NULL);
913
914 /* Work out the maximum supported watchpoint length. */
915 max_watchpoint_len = get_max_wp_len();
916 pr_info("maximum watchpoint size is %u bytes.\n",
917 max_watchpoint_len);
824 } 918 }
825 919
826 /* Register debug fault handler. */ 920 /* Register debug fault handler. */
@@ -829,8 +923,9 @@ static int __init arch_hw_breakpoint_init(void)
829 hook_ifault_code(2, hw_breakpoint_pending, SIGTRAP, TRAP_HWBKPT, 923 hook_ifault_code(2, hw_breakpoint_pending, SIGTRAP, TRAP_HWBKPT,
830 "breakpoint debug exception"); 924 "breakpoint debug exception");
831 925
832out: 926 /* Register hotplug notifier. */
833 return ret; 927 register_cpu_notifier(&dbg_reset_nb);
928 return 0;
834} 929}
835arch_initcall(arch_hw_breakpoint_init); 930arch_initcall(arch_hw_breakpoint_init);
836 931
diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index 36ad3be4692a..8135438b8818 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -35,8 +35,10 @@
35#include <linux/list.h> 35#include <linux/list.h>
36#include <linux/kallsyms.h> 36#include <linux/kallsyms.h>
37#include <linux/proc_fs.h> 37#include <linux/proc_fs.h>
38#include <linux/ftrace.h>
38 39
39#include <asm/system.h> 40#include <asm/system.h>
41#include <asm/mach/arch.h>
40#include <asm/mach/irq.h> 42#include <asm/mach/irq.h>
41#include <asm/mach/time.h> 43#include <asm/mach/time.h>
42 44
@@ -47,8 +49,6 @@
47#define irq_finish(irq) do { } while (0) 49#define irq_finish(irq) do { } while (0)
48#endif 50#endif
49 51
50unsigned int arch_nr_irqs;
51void (*init_arch_irq)(void) __initdata = NULL;
52unsigned long irq_err_count; 52unsigned long irq_err_count;
53 53
54int show_interrupts(struct seq_file *p, void *v) 54int show_interrupts(struct seq_file *p, void *v)
@@ -57,11 +57,20 @@ int show_interrupts(struct seq_file *p, void *v)
57 struct irq_desc *desc; 57 struct irq_desc *desc;
58 struct irqaction * action; 58 struct irqaction * action;
59 unsigned long flags; 59 unsigned long flags;
60 int prec, n;
61
62 for (prec = 3, n = 1000; prec < 10 && n <= nr_irqs; prec++)
63 n *= 10;
64
65#ifdef CONFIG_SMP
66 if (prec < 4)
67 prec = 4;
68#endif
60 69
61 if (i == 0) { 70 if (i == 0) {
62 char cpuname[12]; 71 char cpuname[12];
63 72
64 seq_printf(p, " "); 73 seq_printf(p, "%*s ", prec, "");
65 for_each_present_cpu(cpu) { 74 for_each_present_cpu(cpu) {
66 sprintf(cpuname, "CPU%d", cpu); 75 sprintf(cpuname, "CPU%d", cpu);
67 seq_printf(p, " %10s", cpuname); 76 seq_printf(p, " %10s", cpuname);
@@ -76,7 +85,7 @@ int show_interrupts(struct seq_file *p, void *v)
76 if (!action) 85 if (!action)
77 goto unlock; 86 goto unlock;
78 87
79 seq_printf(p, "%3d: ", i); 88 seq_printf(p, "%*d: ", prec, i);
80 for_each_present_cpu(cpu) 89 for_each_present_cpu(cpu)
81 seq_printf(p, "%10u ", kstat_irqs_cpu(i, cpu)); 90 seq_printf(p, "%10u ", kstat_irqs_cpu(i, cpu));
82 seq_printf(p, " %10s", desc->chip->name ? : "-"); 91 seq_printf(p, " %10s", desc->chip->name ? : "-");
@@ -89,13 +98,15 @@ unlock:
89 raw_spin_unlock_irqrestore(&desc->lock, flags); 98 raw_spin_unlock_irqrestore(&desc->lock, flags);
90 } else if (i == nr_irqs) { 99 } else if (i == nr_irqs) {
91#ifdef CONFIG_FIQ 100#ifdef CONFIG_FIQ
92 show_fiq_list(p, v); 101 show_fiq_list(p, prec);
93#endif 102#endif
94#ifdef CONFIG_SMP 103#ifdef CONFIG_SMP
95 show_ipi_list(p); 104 show_ipi_list(p, prec);
96 show_local_irqs(p); 105#endif
106#ifdef CONFIG_LOCAL_TIMERS
107 show_local_irqs(p, prec);
97#endif 108#endif
98 seq_printf(p, "Err: %10lu\n", irq_err_count); 109 seq_printf(p, "%*s: %10lu\n", prec, "Err", irq_err_count);
99 } 110 }
100 return 0; 111 return 0;
101} 112}
@@ -105,7 +116,8 @@ unlock:
105 * come via this function. Instead, they should provide their 116 * come via this function. Instead, they should provide their
106 * own 'handler' 117 * own 'handler'
107 */ 118 */
108asmlinkage void __exception asm_do_IRQ(unsigned int irq, struct pt_regs *regs) 119asmlinkage void __exception_irq_entry
120asm_do_IRQ(unsigned int irq, struct pt_regs *regs)
109{ 121{
110 struct pt_regs *old_regs = set_irq_regs(regs); 122 struct pt_regs *old_regs = set_irq_regs(regs);
111 123
@@ -154,13 +166,13 @@ void set_irq_flags(unsigned int irq, unsigned int iflags)
154 166
155void __init init_IRQ(void) 167void __init init_IRQ(void)
156{ 168{
157 init_arch_irq(); 169 machine_desc->init_irq();
158} 170}
159 171
160#ifdef CONFIG_SPARSE_IRQ 172#ifdef CONFIG_SPARSE_IRQ
161int __init arch_probe_nr_irqs(void) 173int __init arch_probe_nr_irqs(void)
162{ 174{
163 nr_irqs = arch_nr_irqs ? arch_nr_irqs : NR_IRQS; 175 nr_irqs = machine_desc->nr_irqs ? machine_desc->nr_irqs : NR_IRQS;
164 return nr_irqs; 176 return nr_irqs;
165} 177}
166#endif 178#endif
diff --git a/arch/arm/kernel/iwmmxt.S b/arch/arm/kernel/iwmmxt.S
index b63b528f22a6..7fa3bb0d2397 100644
--- a/arch/arm/kernel/iwmmxt.S
+++ b/arch/arm/kernel/iwmmxt.S
@@ -19,6 +19,14 @@
19#include <asm/thread_info.h> 19#include <asm/thread_info.h>
20#include <asm/asm-offsets.h> 20#include <asm/asm-offsets.h>
21 21
22#if defined(CONFIG_CPU_PJ4)
23#define PJ4(code...) code
24#define XSC(code...)
25#else
26#define PJ4(code...)
27#define XSC(code...) code
28#endif
29
22#define MMX_WR0 (0x00) 30#define MMX_WR0 (0x00)
23#define MMX_WR1 (0x08) 31#define MMX_WR1 (0x08)
24#define MMX_WR2 (0x10) 32#define MMX_WR2 (0x10)
@@ -58,11 +66,17 @@
58 66
59ENTRY(iwmmxt_task_enable) 67ENTRY(iwmmxt_task_enable)
60 68
61 mrc p15, 0, r2, c15, c1, 0 69 XSC(mrc p15, 0, r2, c15, c1, 0)
62 tst r2, #0x3 @ CP0 and CP1 accessible? 70 PJ4(mrc p15, 0, r2, c1, c0, 2)
71 @ CP0 and CP1 accessible?
72 XSC(tst r2, #0x3)
73 PJ4(tst r2, #0xf)
63 movne pc, lr @ if so no business here 74 movne pc, lr @ if so no business here
64 orr r2, r2, #0x3 @ enable access to CP0 and CP1 75 @ enable access to CP0 and CP1
65 mcr p15, 0, r2, c15, c1, 0 76 XSC(orr r2, r2, #0x3)
77 XSC(mcr p15, 0, r2, c15, c1, 0)
78 PJ4(orr r2, r2, #0xf)
79 PJ4(mcr p15, 0, r2, c1, c0, 2)
66 80
67 ldr r3, =concan_owner 81 ldr r3, =concan_owner
68 add r0, r10, #TI_IWMMXT_STATE @ get task Concan save area 82 add r0, r10, #TI_IWMMXT_STATE @ get task Concan save area
@@ -179,17 +193,26 @@ ENTRY(iwmmxt_task_disable)
179 teqne r1, r2 @ or specified one? 193 teqne r1, r2 @ or specified one?
180 bne 1f @ no: quit 194 bne 1f @ no: quit
181 195
182 mrc p15, 0, r4, c15, c1, 0 196 @ enable access to CP0 and CP1
183 orr r4, r4, #0x3 @ enable access to CP0 and CP1 197 XSC(mrc p15, 0, r4, c15, c1, 0)
184 mcr p15, 0, r4, c15, c1, 0 198 XSC(orr r4, r4, #0xf)
199 XSC(mcr p15, 0, r4, c15, c1, 0)
200 PJ4(mrc p15, 0, r4, c1, c0, 2)
201 PJ4(orr r4, r4, #0x3)
202 PJ4(mcr p15, 0, r4, c1, c0, 2)
203
185 mov r0, #0 @ nothing to load 204 mov r0, #0 @ nothing to load
186 str r0, [r3] @ no more current owner 205 str r0, [r3] @ no more current owner
187 mrc p15, 0, r2, c2, c0, 0 206 mrc p15, 0, r2, c2, c0, 0
188 mov r2, r2 @ cpwait 207 mov r2, r2 @ cpwait
189 bl concan_save 208 bl concan_save
190 209
191 bic r4, r4, #0x3 @ disable access to CP0 and CP1 210 @ disable access to CP0 and CP1
192 mcr p15, 0, r4, c15, c1, 0 211 XSC(bic r4, r4, #0x3)
212 XSC(mcr p15, 0, r4, c15, c1, 0)
213 PJ4(bic r4, r4, #0xf)
214 PJ4(mcr p15, 0, r4, c1, c0, 2)
215
193 mrc p15, 0, r2, c2, c0, 0 216 mrc p15, 0, r2, c2, c0, 0
194 mov r2, r2 @ cpwait 217 mov r2, r2 @ cpwait
195 218
@@ -277,8 +300,11 @@ ENTRY(iwmmxt_task_restore)
277 */ 300 */
278ENTRY(iwmmxt_task_switch) 301ENTRY(iwmmxt_task_switch)
279 302
280 mrc p15, 0, r1, c15, c1, 0 303 XSC(mrc p15, 0, r1, c15, c1, 0)
281 tst r1, #0x3 @ CP0 and CP1 accessible? 304 PJ4(mrc p15, 0, r1, c1, c0, 2)
305 @ CP0 and CP1 accessible?
306 XSC(tst r1, #0x3)
307 PJ4(tst r1, #0xf)
282 bne 1f @ yes: block them for next task 308 bne 1f @ yes: block them for next task
283 309
284 ldr r2, =concan_owner 310 ldr r2, =concan_owner
@@ -287,8 +313,11 @@ ENTRY(iwmmxt_task_switch)
287 teq r2, r3 @ next task owns it? 313 teq r2, r3 @ next task owns it?
288 movne pc, lr @ no: leave Concan disabled 314 movne pc, lr @ no: leave Concan disabled
289 315
2901: eor r1, r1, #3 @ flip Concan access 3161: @ flip Conan access
291 mcr p15, 0, r1, c15, c1, 0 317 XSC(eor r1, r1, #0x3)
318 XSC(mcr p15, 0, r1, c15, c1, 0)
319 PJ4(eor r1, r1, #0xf)
320 PJ4(mcr p15, 0, r1, c1, c0, 2)
292 321
293 mrc p15, 0, r1, c2, c0, 0 322 mrc p15, 0, r1, c2, c0, 0
294 sub pc, lr, r1, lsr #32 @ cpwait and return 323 sub pc, lr, r1, lsr #32 @ cpwait and return
diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c
index 3a8fd5140d7a..30ead135ff5f 100644
--- a/arch/arm/kernel/machine_kexec.c
+++ b/arch/arm/kernel/machine_kexec.c
@@ -23,6 +23,8 @@ extern unsigned long kexec_indirection_page;
23extern unsigned long kexec_mach_type; 23extern unsigned long kexec_mach_type;
24extern unsigned long kexec_boot_atags; 24extern unsigned long kexec_boot_atags;
25 25
26static atomic_t waiting_for_crash_ipi;
27
26/* 28/*
27 * Provide a dummy crash_notes definition while crash dump arrives to arm. 29 * Provide a dummy crash_notes definition while crash dump arrives to arm.
28 * This prevents breakage of crash_notes attribute in kernel/ksysfs.c. 30 * This prevents breakage of crash_notes attribute in kernel/ksysfs.c.
@@ -37,9 +39,37 @@ void machine_kexec_cleanup(struct kimage *image)
37{ 39{
38} 40}
39 41
42void machine_crash_nonpanic_core(void *unused)
43{
44 struct pt_regs regs;
45
46 crash_setup_regs(&regs, NULL);
47 printk(KERN_DEBUG "CPU %u will stop doing anything useful since another CPU has crashed\n",
48 smp_processor_id());
49 crash_save_cpu(&regs, smp_processor_id());
50 flush_cache_all();
51
52 atomic_dec(&waiting_for_crash_ipi);
53 while (1)
54 cpu_relax();
55}
56
40void machine_crash_shutdown(struct pt_regs *regs) 57void machine_crash_shutdown(struct pt_regs *regs)
41{ 58{
59 unsigned long msecs;
60
42 local_irq_disable(); 61 local_irq_disable();
62
63 atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
64 smp_call_function(machine_crash_nonpanic_core, NULL, false);
65 msecs = 1000; /* Wait at most a second for the other cpus to stop */
66 while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
67 mdelay(1);
68 msecs--;
69 }
70 if (atomic_read(&waiting_for_crash_ipi) > 0)
71 printk(KERN_WARNING "Non-crashing CPUs did not react to IPI\n");
72
43 crash_save_cpu(regs, smp_processor_id()); 73 crash_save_cpu(regs, smp_processor_id());
44 74
45 printk(KERN_INFO "Loading crashdump kernel...\n"); 75 printk(KERN_INFO "Loading crashdump kernel...\n");
diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index d9bd786ce23d..0c1bb68ff4a8 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -67,35 +67,6 @@ int module_frob_arch_sections(Elf_Ehdr *hdr,
67 char *secstrings, 67 char *secstrings,
68 struct module *mod) 68 struct module *mod)
69{ 69{
70#ifdef CONFIG_ARM_UNWIND
71 Elf_Shdr *s, *sechdrs_end = sechdrs + hdr->e_shnum;
72 struct arm_unwind_mapping *maps = mod->arch.map;
73
74 for (s = sechdrs; s < sechdrs_end; s++) {
75 char const *secname = secstrings + s->sh_name;
76
77 if (strcmp(".ARM.exidx.init.text", secname) == 0)
78 maps[ARM_SEC_INIT].unw_sec = s;
79 else if (strcmp(".ARM.exidx.devinit.text", secname) == 0)
80 maps[ARM_SEC_DEVINIT].unw_sec = s;
81 else if (strcmp(".ARM.exidx", secname) == 0)
82 maps[ARM_SEC_CORE].unw_sec = s;
83 else if (strcmp(".ARM.exidx.exit.text", secname) == 0)
84 maps[ARM_SEC_EXIT].unw_sec = s;
85 else if (strcmp(".ARM.exidx.devexit.text", secname) == 0)
86 maps[ARM_SEC_DEVEXIT].unw_sec = s;
87 else if (strcmp(".init.text", secname) == 0)
88 maps[ARM_SEC_INIT].sec_text = s;
89 else if (strcmp(".devinit.text", secname) == 0)
90 maps[ARM_SEC_DEVINIT].sec_text = s;
91 else if (strcmp(".text", secname) == 0)
92 maps[ARM_SEC_CORE].sec_text = s;
93 else if (strcmp(".exit.text", secname) == 0)
94 maps[ARM_SEC_EXIT].sec_text = s;
95 else if (strcmp(".devexit.text", secname) == 0)
96 maps[ARM_SEC_DEVEXIT].sec_text = s;
97 }
98#endif
99 return 0; 70 return 0;
100} 71}
101 72
@@ -300,41 +271,69 @@ apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab,
300 return -ENOEXEC; 271 return -ENOEXEC;
301} 272}
302 273
303#ifdef CONFIG_ARM_UNWIND 274struct mod_unwind_map {
304static void register_unwind_tables(struct module *mod) 275 const Elf_Shdr *unw_sec;
276 const Elf_Shdr *txt_sec;
277};
278
279int module_finalize(const Elf32_Ehdr *hdr, const Elf_Shdr *sechdrs,
280 struct module *mod)
305{ 281{
282#ifdef CONFIG_ARM_UNWIND
283 const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
284 const Elf_Shdr *s, *sechdrs_end = sechdrs + hdr->e_shnum;
285 struct mod_unwind_map maps[ARM_SEC_MAX];
306 int i; 286 int i;
307 for (i = 0; i < ARM_SEC_MAX; ++i) { 287
308 struct arm_unwind_mapping *map = &mod->arch.map[i]; 288 memset(maps, 0, sizeof(maps));
309 if (map->unw_sec && map->sec_text) 289
310 map->unwind = unwind_table_add(map->unw_sec->sh_addr, 290 for (s = sechdrs; s < sechdrs_end; s++) {
311 map->unw_sec->sh_size, 291 const char *secname = secstrs + s->sh_name;
312 map->sec_text->sh_addr, 292
313 map->sec_text->sh_size); 293 if (!(s->sh_flags & SHF_ALLOC))
294 continue;
295
296 if (strcmp(".ARM.exidx.init.text", secname) == 0)
297 maps[ARM_SEC_INIT].unw_sec = s;
298 else if (strcmp(".ARM.exidx.devinit.text", secname) == 0)
299 maps[ARM_SEC_DEVINIT].unw_sec = s;
300 else if (strcmp(".ARM.exidx", secname) == 0)
301 maps[ARM_SEC_CORE].unw_sec = s;
302 else if (strcmp(".ARM.exidx.exit.text", secname) == 0)
303 maps[ARM_SEC_EXIT].unw_sec = s;
304 else if (strcmp(".ARM.exidx.devexit.text", secname) == 0)
305 maps[ARM_SEC_DEVEXIT].unw_sec = s;
306 else if (strcmp(".init.text", secname) == 0)
307 maps[ARM_SEC_INIT].txt_sec = s;
308 else if (strcmp(".devinit.text", secname) == 0)
309 maps[ARM_SEC_DEVINIT].txt_sec = s;
310 else if (strcmp(".text", secname) == 0)
311 maps[ARM_SEC_CORE].txt_sec = s;
312 else if (strcmp(".exit.text", secname) == 0)
313 maps[ARM_SEC_EXIT].txt_sec = s;
314 else if (strcmp(".devexit.text", secname) == 0)
315 maps[ARM_SEC_DEVEXIT].txt_sec = s;
314 } 316 }
315}
316 317
317static void unregister_unwind_tables(struct module *mod) 318 for (i = 0; i < ARM_SEC_MAX; i++)
318{ 319 if (maps[i].unw_sec && maps[i].txt_sec)
319 int i = ARM_SEC_MAX; 320 mod->arch.unwind[i] =
320 while (--i >= 0) 321 unwind_table_add(maps[i].unw_sec->sh_addr,
321 unwind_table_del(mod->arch.map[i].unwind); 322 maps[i].unw_sec->sh_size,
322} 323 maps[i].txt_sec->sh_addr,
323#else 324 maps[i].txt_sec->sh_size);
324static inline void register_unwind_tables(struct module *mod) { }
325static inline void unregister_unwind_tables(struct module *mod) { }
326#endif 325#endif
327
328int
329module_finalize(const Elf32_Ehdr *hdr, const Elf_Shdr *sechdrs,
330 struct module *module)
331{
332 register_unwind_tables(module);
333 return 0; 326 return 0;
334} 327}
335 328
336void 329void
337module_arch_cleanup(struct module *mod) 330module_arch_cleanup(struct module *mod)
338{ 331{
339 unregister_unwind_tables(mod); 332#ifdef CONFIG_ARM_UNWIND
333 int i;
334
335 for (i = 0; i < ARM_SEC_MAX; i++)
336 if (mod->arch.unwind[i])
337 unwind_table_del(mod->arch.unwind[i]);
338#endif
340} 339}
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index fdfa4976b0bf..5efa2647a2fb 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -4,9 +4,7 @@
4 * ARM performance counter support. 4 * ARM performance counter support.
5 * 5 *
6 * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles 6 * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
7 * 7 * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com>
8 * ARMv7 support: Jean Pihet <jpihet@mvista.com>
9 * 2010 (c) MontaVista Software, LLC.
10 * 8 *
11 * This code is based on the sparc64 perf event code, which is in turn based 9 * This code is based on the sparc64 perf event code, which is in turn based
12 * on the x86 code. Callchain code is based on the ARM OProfile backtrace 10 * on the x86 code. Callchain code is based on the ARM OProfile backtrace
@@ -34,7 +32,7 @@ static struct platform_device *pmu_device;
34 * Hardware lock to serialize accesses to PMU registers. Needed for the 32 * Hardware lock to serialize accesses to PMU registers. Needed for the
35 * read/modify/write sequences. 33 * read/modify/write sequences.
36 */ 34 */
37DEFINE_SPINLOCK(pmu_lock); 35static DEFINE_RAW_SPINLOCK(pmu_lock);
38 36
39/* 37/*
40 * ARMv6 supports a maximum of 3 events, starting from index 1. If we add 38 * ARMv6 supports a maximum of 3 events, starting from index 1. If we add
@@ -67,31 +65,25 @@ struct cpu_hw_events {
67 */ 65 */
68 unsigned long active_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)]; 66 unsigned long active_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)];
69}; 67};
70DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); 68static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
71
72/* PMU names. */
73static const char *arm_pmu_names[] = {
74 [ARM_PERF_PMU_ID_XSCALE1] = "xscale1",
75 [ARM_PERF_PMU_ID_XSCALE2] = "xscale2",
76 [ARM_PERF_PMU_ID_V6] = "v6",
77 [ARM_PERF_PMU_ID_V6MP] = "v6mpcore",
78 [ARM_PERF_PMU_ID_CA8] = "ARMv7 Cortex-A8",
79 [ARM_PERF_PMU_ID_CA9] = "ARMv7 Cortex-A9",
80};
81 69
82struct arm_pmu { 70struct arm_pmu {
83 enum arm_perf_pmu_ids id; 71 enum arm_perf_pmu_ids id;
72 const char *name;
84 irqreturn_t (*handle_irq)(int irq_num, void *dev); 73 irqreturn_t (*handle_irq)(int irq_num, void *dev);
85 void (*enable)(struct hw_perf_event *evt, int idx); 74 void (*enable)(struct hw_perf_event *evt, int idx);
86 void (*disable)(struct hw_perf_event *evt, int idx); 75 void (*disable)(struct hw_perf_event *evt, int idx);
87 int (*event_map)(int evt);
88 u64 (*raw_event)(u64);
89 int (*get_event_idx)(struct cpu_hw_events *cpuc, 76 int (*get_event_idx)(struct cpu_hw_events *cpuc,
90 struct hw_perf_event *hwc); 77 struct hw_perf_event *hwc);
91 u32 (*read_counter)(int idx); 78 u32 (*read_counter)(int idx);
92 void (*write_counter)(int idx, u32 val); 79 void (*write_counter)(int idx, u32 val);
93 void (*start)(void); 80 void (*start)(void);
94 void (*stop)(void); 81 void (*stop)(void);
82 const unsigned (*cache_map)[PERF_COUNT_HW_CACHE_MAX]
83 [PERF_COUNT_HW_CACHE_OP_MAX]
84 [PERF_COUNT_HW_CACHE_RESULT_MAX];
85 const unsigned (*event_map)[PERF_COUNT_HW_MAX];
86 u32 raw_event_mask;
95 int num_events; 87 int num_events;
96 u64 max_period; 88 u64 max_period;
97}; 89};
@@ -136,10 +128,6 @@ EXPORT_SYMBOL_GPL(perf_num_counters);
136 128
137#define CACHE_OP_UNSUPPORTED 0xFFFF 129#define CACHE_OP_UNSUPPORTED 0xFFFF
138 130
139static unsigned armpmu_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
140 [PERF_COUNT_HW_CACHE_OP_MAX]
141 [PERF_COUNT_HW_CACHE_RESULT_MAX];
142
143static int 131static int
144armpmu_map_cache_event(u64 config) 132armpmu_map_cache_event(u64 config)
145{ 133{
@@ -157,7 +145,7 @@ armpmu_map_cache_event(u64 config)
157 if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) 145 if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
158 return -EINVAL; 146 return -EINVAL;
159 147
160 ret = (int)armpmu_perf_cache_map[cache_type][cache_op][cache_result]; 148 ret = (int)(*armpmu->cache_map)[cache_type][cache_op][cache_result];
161 149
162 if (ret == CACHE_OP_UNSUPPORTED) 150 if (ret == CACHE_OP_UNSUPPORTED)
163 return -ENOENT; 151 return -ENOENT;
@@ -166,6 +154,19 @@ armpmu_map_cache_event(u64 config)
166} 154}
167 155
168static int 156static int
157armpmu_map_event(u64 config)
158{
159 int mapping = (*armpmu->event_map)[config];
160 return mapping == HW_OP_UNSUPPORTED ? -EOPNOTSUPP : mapping;
161}
162
163static int
164armpmu_map_raw_event(u64 config)
165{
166 return (int)(config & armpmu->raw_event_mask);
167}
168
169static int
169armpmu_event_set_period(struct perf_event *event, 170armpmu_event_set_period(struct perf_event *event,
170 struct hw_perf_event *hwc, 171 struct hw_perf_event *hwc,
171 int idx) 172 int idx)
@@ -458,11 +459,11 @@ __hw_perf_event_init(struct perf_event *event)
458 459
459 /* Decode the generic type into an ARM event identifier. */ 460 /* Decode the generic type into an ARM event identifier. */
460 if (PERF_TYPE_HARDWARE == event->attr.type) { 461 if (PERF_TYPE_HARDWARE == event->attr.type) {
461 mapping = armpmu->event_map(event->attr.config); 462 mapping = armpmu_map_event(event->attr.config);
462 } else if (PERF_TYPE_HW_CACHE == event->attr.type) { 463 } else if (PERF_TYPE_HW_CACHE == event->attr.type) {
463 mapping = armpmu_map_cache_event(event->attr.config); 464 mapping = armpmu_map_cache_event(event->attr.config);
464 } else if (PERF_TYPE_RAW == event->attr.type) { 465 } else if (PERF_TYPE_RAW == event->attr.type) {
465 mapping = armpmu->raw_event(event->attr.config); 466 mapping = armpmu_map_raw_event(event->attr.config);
466 } else { 467 } else {
467 pr_debug("event type %x not supported\n", event->attr.type); 468 pr_debug("event type %x not supported\n", event->attr.type);
468 return -EOPNOTSUPP; 469 return -EOPNOTSUPP;
@@ -603,2366 +604,10 @@ static struct pmu pmu = {
603 .read = armpmu_read, 604 .read = armpmu_read,
604}; 605};
605 606
606/* 607/* Include the PMU-specific implementations. */
607 * ARMv6 Performance counter handling code. 608#include "perf_event_xscale.c"
608 * 609#include "perf_event_v6.c"
609 * ARMv6 has 2 configurable performance counters and a single cycle counter. 610#include "perf_event_v7.c"
610 * They all share a single reset bit but can be written to zero so we can use
611 * that for a reset.
612 *
613 * The counters can't be individually enabled or disabled so when we remove
614 * one event and replace it with another we could get spurious counts from the
615 * wrong event. However, we can take advantage of the fact that the
616 * performance counters can export events to the event bus, and the event bus
617 * itself can be monitored. This requires that we *don't* export the events to
618 * the event bus. The procedure for disabling a configurable counter is:
619 * - change the counter to count the ETMEXTOUT[0] signal (0x20). This
620 * effectively stops the counter from counting.
621 * - disable the counter's interrupt generation (each counter has it's
622 * own interrupt enable bit).
623 * Once stopped, the counter value can be written as 0 to reset.
624 *
625 * To enable a counter:
626 * - enable the counter's interrupt generation.
627 * - set the new event type.
628 *
629 * Note: the dedicated cycle counter only counts cycles and can't be
630 * enabled/disabled independently of the others. When we want to disable the
631 * cycle counter, we have to just disable the interrupt reporting and start
632 * ignoring that counter. When re-enabling, we have to reset the value and
633 * enable the interrupt.
634 */
635
636enum armv6_perf_types {
637 ARMV6_PERFCTR_ICACHE_MISS = 0x0,
638 ARMV6_PERFCTR_IBUF_STALL = 0x1,
639 ARMV6_PERFCTR_DDEP_STALL = 0x2,
640 ARMV6_PERFCTR_ITLB_MISS = 0x3,
641 ARMV6_PERFCTR_DTLB_MISS = 0x4,
642 ARMV6_PERFCTR_BR_EXEC = 0x5,
643 ARMV6_PERFCTR_BR_MISPREDICT = 0x6,
644 ARMV6_PERFCTR_INSTR_EXEC = 0x7,
645 ARMV6_PERFCTR_DCACHE_HIT = 0x9,
646 ARMV6_PERFCTR_DCACHE_ACCESS = 0xA,
647 ARMV6_PERFCTR_DCACHE_MISS = 0xB,
648 ARMV6_PERFCTR_DCACHE_WBACK = 0xC,
649 ARMV6_PERFCTR_SW_PC_CHANGE = 0xD,
650 ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF,
651 ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10,
652 ARMV6_PERFCTR_LSU_FULL_STALL = 0x11,
653 ARMV6_PERFCTR_WBUF_DRAINED = 0x12,
654 ARMV6_PERFCTR_CPU_CYCLES = 0xFF,
655 ARMV6_PERFCTR_NOP = 0x20,
656};
657
658enum armv6_counters {
659 ARMV6_CYCLE_COUNTER = 1,
660 ARMV6_COUNTER0,
661 ARMV6_COUNTER1,
662};
663
664/*
665 * The hardware events that we support. We do support cache operations but
666 * we have harvard caches and no way to combine instruction and data
667 * accesses/misses in hardware.
668 */
669static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
670 [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES,
671 [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC,
672 [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
673 [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
674 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC,
675 [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT,
676 [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
677};
678
679static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
680 [PERF_COUNT_HW_CACHE_OP_MAX]
681 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
682 [C(L1D)] = {
683 /*
684 * The performance counters don't differentiate between read
685 * and write accesses/misses so this isn't strictly correct,
686 * but it's the best we can do. Writes and reads get
687 * combined.
688 */
689 [C(OP_READ)] = {
690 [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
691 [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
692 },
693 [C(OP_WRITE)] = {
694 [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
695 [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
696 },
697 [C(OP_PREFETCH)] = {
698 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
699 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
700 },
701 },
702 [C(L1I)] = {
703 [C(OP_READ)] = {
704 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
705 [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS,
706 },
707 [C(OP_WRITE)] = {
708 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
709 [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS,
710 },
711 [C(OP_PREFETCH)] = {
712 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
713 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
714 },
715 },
716 [C(LL)] = {
717 [C(OP_READ)] = {
718 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
719 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
720 },
721 [C(OP_WRITE)] = {
722 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
723 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
724 },
725 [C(OP_PREFETCH)] = {
726 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
727 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
728 },
729 },
730 [C(DTLB)] = {
731 /*
732 * The ARM performance counters can count micro DTLB misses,
733 * micro ITLB misses and main TLB misses. There isn't an event
734 * for TLB misses, so use the micro misses here and if users
735 * want the main TLB misses they can use a raw counter.
736 */
737 [C(OP_READ)] = {
738 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
739 [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
740 },
741 [C(OP_WRITE)] = {
742 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
743 [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
744 },
745 [C(OP_PREFETCH)] = {
746 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
747 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
748 },
749 },
750 [C(ITLB)] = {
751 [C(OP_READ)] = {
752 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
753 [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
754 },
755 [C(OP_WRITE)] = {
756 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
757 [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
758 },
759 [C(OP_PREFETCH)] = {
760 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
761 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
762 },
763 },
764 [C(BPU)] = {
765 [C(OP_READ)] = {
766 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
767 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
768 },
769 [C(OP_WRITE)] = {
770 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
771 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
772 },
773 [C(OP_PREFETCH)] = {
774 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
775 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
776 },
777 },
778};
779
780enum armv6mpcore_perf_types {
781 ARMV6MPCORE_PERFCTR_ICACHE_MISS = 0x0,
782 ARMV6MPCORE_PERFCTR_IBUF_STALL = 0x1,
783 ARMV6MPCORE_PERFCTR_DDEP_STALL = 0x2,
784 ARMV6MPCORE_PERFCTR_ITLB_MISS = 0x3,
785 ARMV6MPCORE_PERFCTR_DTLB_MISS = 0x4,
786 ARMV6MPCORE_PERFCTR_BR_EXEC = 0x5,
787 ARMV6MPCORE_PERFCTR_BR_NOTPREDICT = 0x6,
788 ARMV6MPCORE_PERFCTR_BR_MISPREDICT = 0x7,
789 ARMV6MPCORE_PERFCTR_INSTR_EXEC = 0x8,
790 ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA,
791 ARMV6MPCORE_PERFCTR_DCACHE_RDMISS = 0xB,
792 ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC,
793 ARMV6MPCORE_PERFCTR_DCACHE_WRMISS = 0xD,
794 ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE,
795 ARMV6MPCORE_PERFCTR_SW_PC_CHANGE = 0xF,
796 ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS = 0x10,
797 ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11,
798 ARMV6MPCORE_PERFCTR_LSU_FULL_STALL = 0x12,
799 ARMV6MPCORE_PERFCTR_WBUF_DRAINED = 0x13,
800 ARMV6MPCORE_PERFCTR_CPU_CYCLES = 0xFF,
801};
802
803/*
804 * The hardware events that we support. We do support cache operations but
805 * we have harvard caches and no way to combine instruction and data
806 * accesses/misses in hardware.
807 */
808static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = {
809 [PERF_COUNT_HW_CPU_CYCLES] = ARMV6MPCORE_PERFCTR_CPU_CYCLES,
810 [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_INSTR_EXEC,
811 [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
812 [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
813 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC,
814 [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6MPCORE_PERFCTR_BR_MISPREDICT,
815 [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
816};
817
818static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
819 [PERF_COUNT_HW_CACHE_OP_MAX]
820 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
821 [C(L1D)] = {
822 [C(OP_READ)] = {
823 [C(RESULT_ACCESS)] =
824 ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS,
825 [C(RESULT_MISS)] =
826 ARMV6MPCORE_PERFCTR_DCACHE_RDMISS,
827 },
828 [C(OP_WRITE)] = {
829 [C(RESULT_ACCESS)] =
830 ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS,
831 [C(RESULT_MISS)] =
832 ARMV6MPCORE_PERFCTR_DCACHE_WRMISS,
833 },
834 [C(OP_PREFETCH)] = {
835 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
836 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
837 },
838 },
839 [C(L1I)] = {
840 [C(OP_READ)] = {
841 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
842 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
843 },
844 [C(OP_WRITE)] = {
845 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
846 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
847 },
848 [C(OP_PREFETCH)] = {
849 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
850 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
851 },
852 },
853 [C(LL)] = {
854 [C(OP_READ)] = {
855 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
856 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
857 },
858 [C(OP_WRITE)] = {
859 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
860 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
861 },
862 [C(OP_PREFETCH)] = {
863 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
864 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
865 },
866 },
867 [C(DTLB)] = {
868 /*
869 * The ARM performance counters can count micro DTLB misses,
870 * micro ITLB misses and main TLB misses. There isn't an event
871 * for TLB misses, so use the micro misses here and if users
872 * want the main TLB misses they can use a raw counter.
873 */
874 [C(OP_READ)] = {
875 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
876 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
877 },
878 [C(OP_WRITE)] = {
879 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
880 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
881 },
882 [C(OP_PREFETCH)] = {
883 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
884 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
885 },
886 },
887 [C(ITLB)] = {
888 [C(OP_READ)] = {
889 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
890 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
891 },
892 [C(OP_WRITE)] = {
893 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
894 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
895 },
896 [C(OP_PREFETCH)] = {
897 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
898 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
899 },
900 },
901 [C(BPU)] = {
902 [C(OP_READ)] = {
903 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
904 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
905 },
906 [C(OP_WRITE)] = {
907 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
908 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
909 },
910 [C(OP_PREFETCH)] = {
911 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
912 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
913 },
914 },
915};
916
917static inline unsigned long
918armv6_pmcr_read(void)
919{
920 u32 val;
921 asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val));
922 return val;
923}
924
925static inline void
926armv6_pmcr_write(unsigned long val)
927{
928 asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val));
929}
930
931#define ARMV6_PMCR_ENABLE (1 << 0)
932#define ARMV6_PMCR_CTR01_RESET (1 << 1)
933#define ARMV6_PMCR_CCOUNT_RESET (1 << 2)
934#define ARMV6_PMCR_CCOUNT_DIV (1 << 3)
935#define ARMV6_PMCR_COUNT0_IEN (1 << 4)
936#define ARMV6_PMCR_COUNT1_IEN (1 << 5)
937#define ARMV6_PMCR_CCOUNT_IEN (1 << 6)
938#define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8)
939#define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9)
940#define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10)
941#define ARMV6_PMCR_EVT_COUNT0_SHIFT 20
942#define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT)
943#define ARMV6_PMCR_EVT_COUNT1_SHIFT 12
944#define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT)
945
946#define ARMV6_PMCR_OVERFLOWED_MASK \
947 (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \
948 ARMV6_PMCR_CCOUNT_OVERFLOW)
949
950static inline int
951armv6_pmcr_has_overflowed(unsigned long pmcr)
952{
953 return (pmcr & ARMV6_PMCR_OVERFLOWED_MASK);
954}
955
956static inline int
957armv6_pmcr_counter_has_overflowed(unsigned long pmcr,
958 enum armv6_counters counter)
959{
960 int ret = 0;
961
962 if (ARMV6_CYCLE_COUNTER == counter)
963 ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW;
964 else if (ARMV6_COUNTER0 == counter)
965 ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW;
966 else if (ARMV6_COUNTER1 == counter)
967 ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW;
968 else
969 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
970
971 return ret;
972}
973
974static inline u32
975armv6pmu_read_counter(int counter)
976{
977 unsigned long value = 0;
978
979 if (ARMV6_CYCLE_COUNTER == counter)
980 asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value));
981 else if (ARMV6_COUNTER0 == counter)
982 asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value));
983 else if (ARMV6_COUNTER1 == counter)
984 asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value));
985 else
986 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
987
988 return value;
989}
990
991static inline void
992armv6pmu_write_counter(int counter,
993 u32 value)
994{
995 if (ARMV6_CYCLE_COUNTER == counter)
996 asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value));
997 else if (ARMV6_COUNTER0 == counter)
998 asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value));
999 else if (ARMV6_COUNTER1 == counter)
1000 asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value));
1001 else
1002 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
1003}
1004
1005void
1006armv6pmu_enable_event(struct hw_perf_event *hwc,
1007 int idx)
1008{
1009 unsigned long val, mask, evt, flags;
1010
1011 if (ARMV6_CYCLE_COUNTER == idx) {
1012 mask = 0;
1013 evt = ARMV6_PMCR_CCOUNT_IEN;
1014 } else if (ARMV6_COUNTER0 == idx) {
1015 mask = ARMV6_PMCR_EVT_COUNT0_MASK;
1016 evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) |
1017 ARMV6_PMCR_COUNT0_IEN;
1018 } else if (ARMV6_COUNTER1 == idx) {
1019 mask = ARMV6_PMCR_EVT_COUNT1_MASK;
1020 evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) |
1021 ARMV6_PMCR_COUNT1_IEN;
1022 } else {
1023 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
1024 return;
1025 }
1026
1027 /*
1028 * Mask out the current event and set the counter to count the event
1029 * that we're interested in.
1030 */
1031 spin_lock_irqsave(&pmu_lock, flags);
1032 val = armv6_pmcr_read();
1033 val &= ~mask;
1034 val |= evt;
1035 armv6_pmcr_write(val);
1036 spin_unlock_irqrestore(&pmu_lock, flags);
1037}
1038
1039static irqreturn_t
1040armv6pmu_handle_irq(int irq_num,
1041 void *dev)
1042{
1043 unsigned long pmcr = armv6_pmcr_read();
1044 struct perf_sample_data data;
1045 struct cpu_hw_events *cpuc;
1046 struct pt_regs *regs;
1047 int idx;
1048
1049 if (!armv6_pmcr_has_overflowed(pmcr))
1050 return IRQ_NONE;
1051
1052 regs = get_irq_regs();
1053
1054 /*
1055 * The interrupts are cleared by writing the overflow flags back to
1056 * the control register. All of the other bits don't have any effect
1057 * if they are rewritten, so write the whole value back.
1058 */
1059 armv6_pmcr_write(pmcr);
1060
1061 perf_sample_data_init(&data, 0);
1062
1063 cpuc = &__get_cpu_var(cpu_hw_events);
1064 for (idx = 0; idx <= armpmu->num_events; ++idx) {
1065 struct perf_event *event = cpuc->events[idx];
1066 struct hw_perf_event *hwc;
1067
1068 if (!test_bit(idx, cpuc->active_mask))
1069 continue;
1070
1071 /*
1072 * We have a single interrupt for all counters. Check that
1073 * each counter has overflowed before we process it.
1074 */
1075 if (!armv6_pmcr_counter_has_overflowed(pmcr, idx))
1076 continue;
1077
1078 hwc = &event->hw;
1079 armpmu_event_update(event, hwc, idx);
1080 data.period = event->hw.last_period;
1081 if (!armpmu_event_set_period(event, hwc, idx))
1082 continue;
1083
1084 if (perf_event_overflow(event, 0, &data, regs))
1085 armpmu->disable(hwc, idx);
1086 }
1087
1088 /*
1089 * Handle the pending perf events.
1090 *
1091 * Note: this call *must* be run with interrupts disabled. For
1092 * platforms that can have the PMU interrupts raised as an NMI, this
1093 * will not work.
1094 */
1095 irq_work_run();
1096
1097 return IRQ_HANDLED;
1098}
1099
1100static void
1101armv6pmu_start(void)
1102{
1103 unsigned long flags, val;
1104
1105 spin_lock_irqsave(&pmu_lock, flags);
1106 val = armv6_pmcr_read();
1107 val |= ARMV6_PMCR_ENABLE;
1108 armv6_pmcr_write(val);
1109 spin_unlock_irqrestore(&pmu_lock, flags);
1110}
1111
1112void
1113armv6pmu_stop(void)
1114{
1115 unsigned long flags, val;
1116
1117 spin_lock_irqsave(&pmu_lock, flags);
1118 val = armv6_pmcr_read();
1119 val &= ~ARMV6_PMCR_ENABLE;
1120 armv6_pmcr_write(val);
1121 spin_unlock_irqrestore(&pmu_lock, flags);
1122}
1123
1124static inline int
1125armv6pmu_event_map(int config)
1126{
1127 int mapping = armv6_perf_map[config];
1128 if (HW_OP_UNSUPPORTED == mapping)
1129 mapping = -EOPNOTSUPP;
1130 return mapping;
1131}
1132
1133static inline int
1134armv6mpcore_pmu_event_map(int config)
1135{
1136 int mapping = armv6mpcore_perf_map[config];
1137 if (HW_OP_UNSUPPORTED == mapping)
1138 mapping = -EOPNOTSUPP;
1139 return mapping;
1140}
1141
1142static u64
1143armv6pmu_raw_event(u64 config)
1144{
1145 return config & 0xff;
1146}
1147
1148static int
1149armv6pmu_get_event_idx(struct cpu_hw_events *cpuc,
1150 struct hw_perf_event *event)
1151{
1152 /* Always place a cycle counter into the cycle counter. */
1153 if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) {
1154 if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask))
1155 return -EAGAIN;
1156
1157 return ARMV6_CYCLE_COUNTER;
1158 } else {
1159 /*
1160 * For anything other than a cycle counter, try and use
1161 * counter0 and counter1.
1162 */
1163 if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask)) {
1164 return ARMV6_COUNTER1;
1165 }
1166
1167 if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask)) {
1168 return ARMV6_COUNTER0;
1169 }
1170
1171 /* The counters are all in use. */
1172 return -EAGAIN;
1173 }
1174}
1175
1176static void
1177armv6pmu_disable_event(struct hw_perf_event *hwc,
1178 int idx)
1179{
1180 unsigned long val, mask, evt, flags;
1181
1182 if (ARMV6_CYCLE_COUNTER == idx) {
1183 mask = ARMV6_PMCR_CCOUNT_IEN;
1184 evt = 0;
1185 } else if (ARMV6_COUNTER0 == idx) {
1186 mask = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK;
1187 evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT;
1188 } else if (ARMV6_COUNTER1 == idx) {
1189 mask = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK;
1190 evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT;
1191 } else {
1192 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
1193 return;
1194 }
1195
1196 /*
1197 * Mask out the current event and set the counter to count the number
1198 * of ETM bus signal assertion cycles. The external reporting should
1199 * be disabled and so this should never increment.
1200 */
1201 spin_lock_irqsave(&pmu_lock, flags);
1202 val = armv6_pmcr_read();
1203 val &= ~mask;
1204 val |= evt;
1205 armv6_pmcr_write(val);
1206 spin_unlock_irqrestore(&pmu_lock, flags);
1207}
1208
1209static void
1210armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc,
1211 int idx)
1212{
1213 unsigned long val, mask, flags, evt = 0;
1214
1215 if (ARMV6_CYCLE_COUNTER == idx) {
1216 mask = ARMV6_PMCR_CCOUNT_IEN;
1217 } else if (ARMV6_COUNTER0 == idx) {
1218 mask = ARMV6_PMCR_COUNT0_IEN;
1219 } else if (ARMV6_COUNTER1 == idx) {
1220 mask = ARMV6_PMCR_COUNT1_IEN;
1221 } else {
1222 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
1223 return;
1224 }
1225
1226 /*
1227 * Unlike UP ARMv6, we don't have a way of stopping the counters. We
1228 * simply disable the interrupt reporting.
1229 */
1230 spin_lock_irqsave(&pmu_lock, flags);
1231 val = armv6_pmcr_read();
1232 val &= ~mask;
1233 val |= evt;
1234 armv6_pmcr_write(val);
1235 spin_unlock_irqrestore(&pmu_lock, flags);
1236}
1237
1238static const struct arm_pmu armv6pmu = {
1239 .id = ARM_PERF_PMU_ID_V6,
1240 .handle_irq = armv6pmu_handle_irq,
1241 .enable = armv6pmu_enable_event,
1242 .disable = armv6pmu_disable_event,
1243 .event_map = armv6pmu_event_map,
1244 .raw_event = armv6pmu_raw_event,
1245 .read_counter = armv6pmu_read_counter,
1246 .write_counter = armv6pmu_write_counter,
1247 .get_event_idx = armv6pmu_get_event_idx,
1248 .start = armv6pmu_start,
1249 .stop = armv6pmu_stop,
1250 .num_events = 3,
1251 .max_period = (1LLU << 32) - 1,
1252};
1253
1254/*
1255 * ARMv6mpcore is almost identical to single core ARMv6 with the exception
1256 * that some of the events have different enumerations and that there is no
1257 * *hack* to stop the programmable counters. To stop the counters we simply
1258 * disable the interrupt reporting and update the event. When unthrottling we
1259 * reset the period and enable the interrupt reporting.
1260 */
1261static const struct arm_pmu armv6mpcore_pmu = {
1262 .id = ARM_PERF_PMU_ID_V6MP,
1263 .handle_irq = armv6pmu_handle_irq,
1264 .enable = armv6pmu_enable_event,
1265 .disable = armv6mpcore_pmu_disable_event,
1266 .event_map = armv6mpcore_pmu_event_map,
1267 .raw_event = armv6pmu_raw_event,
1268 .read_counter = armv6pmu_read_counter,
1269 .write_counter = armv6pmu_write_counter,
1270 .get_event_idx = armv6pmu_get_event_idx,
1271 .start = armv6pmu_start,
1272 .stop = armv6pmu_stop,
1273 .num_events = 3,
1274 .max_period = (1LLU << 32) - 1,
1275};
1276
1277/*
1278 * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code.
1279 *
1280 * Copied from ARMv6 code, with the low level code inspired
1281 * by the ARMv7 Oprofile code.
1282 *
1283 * Cortex-A8 has up to 4 configurable performance counters and
1284 * a single cycle counter.
1285 * Cortex-A9 has up to 31 configurable performance counters and
1286 * a single cycle counter.
1287 *
1288 * All counters can be enabled/disabled and IRQ masked separately. The cycle
1289 * counter and all 4 performance counters together can be reset separately.
1290 */
1291
1292/* Common ARMv7 event types */
1293enum armv7_perf_types {
1294 ARMV7_PERFCTR_PMNC_SW_INCR = 0x00,
1295 ARMV7_PERFCTR_IFETCH_MISS = 0x01,
1296 ARMV7_PERFCTR_ITLB_MISS = 0x02,
1297 ARMV7_PERFCTR_DCACHE_REFILL = 0x03,
1298 ARMV7_PERFCTR_DCACHE_ACCESS = 0x04,
1299 ARMV7_PERFCTR_DTLB_REFILL = 0x05,
1300 ARMV7_PERFCTR_DREAD = 0x06,
1301 ARMV7_PERFCTR_DWRITE = 0x07,
1302
1303 ARMV7_PERFCTR_EXC_TAKEN = 0x09,
1304 ARMV7_PERFCTR_EXC_EXECUTED = 0x0A,
1305 ARMV7_PERFCTR_CID_WRITE = 0x0B,
1306 /* ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS.
1307 * It counts:
1308 * - all branch instructions,
1309 * - instructions that explicitly write the PC,
1310 * - exception generating instructions.
1311 */
1312 ARMV7_PERFCTR_PC_WRITE = 0x0C,
1313 ARMV7_PERFCTR_PC_IMM_BRANCH = 0x0D,
1314 ARMV7_PERFCTR_UNALIGNED_ACCESS = 0x0F,
1315 ARMV7_PERFCTR_PC_BRANCH_MIS_PRED = 0x10,
1316 ARMV7_PERFCTR_CLOCK_CYCLES = 0x11,
1317
1318 ARMV7_PERFCTR_PC_BRANCH_MIS_USED = 0x12,
1319
1320 ARMV7_PERFCTR_CPU_CYCLES = 0xFF
1321};
1322
1323/* ARMv7 Cortex-A8 specific event types */
1324enum armv7_a8_perf_types {
1325 ARMV7_PERFCTR_INSTR_EXECUTED = 0x08,
1326
1327 ARMV7_PERFCTR_PC_PROC_RETURN = 0x0E,
1328
1329 ARMV7_PERFCTR_WRITE_BUFFER_FULL = 0x40,
1330 ARMV7_PERFCTR_L2_STORE_MERGED = 0x41,
1331 ARMV7_PERFCTR_L2_STORE_BUFF = 0x42,
1332 ARMV7_PERFCTR_L2_ACCESS = 0x43,
1333 ARMV7_PERFCTR_L2_CACH_MISS = 0x44,
1334 ARMV7_PERFCTR_AXI_READ_CYCLES = 0x45,
1335 ARMV7_PERFCTR_AXI_WRITE_CYCLES = 0x46,
1336 ARMV7_PERFCTR_MEMORY_REPLAY = 0x47,
1337 ARMV7_PERFCTR_UNALIGNED_ACCESS_REPLAY = 0x48,
1338 ARMV7_PERFCTR_L1_DATA_MISS = 0x49,
1339 ARMV7_PERFCTR_L1_INST_MISS = 0x4A,
1340 ARMV7_PERFCTR_L1_DATA_COLORING = 0x4B,
1341 ARMV7_PERFCTR_L1_NEON_DATA = 0x4C,
1342 ARMV7_PERFCTR_L1_NEON_CACH_DATA = 0x4D,
1343 ARMV7_PERFCTR_L2_NEON = 0x4E,
1344 ARMV7_PERFCTR_L2_NEON_HIT = 0x4F,
1345 ARMV7_PERFCTR_L1_INST = 0x50,
1346 ARMV7_PERFCTR_PC_RETURN_MIS_PRED = 0x51,
1347 ARMV7_PERFCTR_PC_BRANCH_FAILED = 0x52,
1348 ARMV7_PERFCTR_PC_BRANCH_TAKEN = 0x53,
1349 ARMV7_PERFCTR_PC_BRANCH_EXECUTED = 0x54,
1350 ARMV7_PERFCTR_OP_EXECUTED = 0x55,
1351 ARMV7_PERFCTR_CYCLES_INST_STALL = 0x56,
1352 ARMV7_PERFCTR_CYCLES_INST = 0x57,
1353 ARMV7_PERFCTR_CYCLES_NEON_DATA_STALL = 0x58,
1354 ARMV7_PERFCTR_CYCLES_NEON_INST_STALL = 0x59,
1355 ARMV7_PERFCTR_NEON_CYCLES = 0x5A,
1356
1357 ARMV7_PERFCTR_PMU0_EVENTS = 0x70,
1358 ARMV7_PERFCTR_PMU1_EVENTS = 0x71,
1359 ARMV7_PERFCTR_PMU_EVENTS = 0x72,
1360};
1361
1362/* ARMv7 Cortex-A9 specific event types */
1363enum armv7_a9_perf_types {
1364 ARMV7_PERFCTR_JAVA_HW_BYTECODE_EXEC = 0x40,
1365 ARMV7_PERFCTR_JAVA_SW_BYTECODE_EXEC = 0x41,
1366 ARMV7_PERFCTR_JAZELLE_BRANCH_EXEC = 0x42,
1367
1368 ARMV7_PERFCTR_COHERENT_LINE_MISS = 0x50,
1369 ARMV7_PERFCTR_COHERENT_LINE_HIT = 0x51,
1370
1371 ARMV7_PERFCTR_ICACHE_DEP_STALL_CYCLES = 0x60,
1372 ARMV7_PERFCTR_DCACHE_DEP_STALL_CYCLES = 0x61,
1373 ARMV7_PERFCTR_TLB_MISS_DEP_STALL_CYCLES = 0x62,
1374 ARMV7_PERFCTR_STREX_EXECUTED_PASSED = 0x63,
1375 ARMV7_PERFCTR_STREX_EXECUTED_FAILED = 0x64,
1376 ARMV7_PERFCTR_DATA_EVICTION = 0x65,
1377 ARMV7_PERFCTR_ISSUE_STAGE_NO_INST = 0x66,
1378 ARMV7_PERFCTR_ISSUE_STAGE_EMPTY = 0x67,
1379 ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE = 0x68,
1380
1381 ARMV7_PERFCTR_PREDICTABLE_FUNCT_RETURNS = 0x6E,
1382
1383 ARMV7_PERFCTR_MAIN_UNIT_EXECUTED_INST = 0x70,
1384 ARMV7_PERFCTR_SECOND_UNIT_EXECUTED_INST = 0x71,
1385 ARMV7_PERFCTR_LD_ST_UNIT_EXECUTED_INST = 0x72,
1386 ARMV7_PERFCTR_FP_EXECUTED_INST = 0x73,
1387 ARMV7_PERFCTR_NEON_EXECUTED_INST = 0x74,
1388
1389 ARMV7_PERFCTR_PLD_FULL_DEP_STALL_CYCLES = 0x80,
1390 ARMV7_PERFCTR_DATA_WR_DEP_STALL_CYCLES = 0x81,
1391 ARMV7_PERFCTR_ITLB_MISS_DEP_STALL_CYCLES = 0x82,
1392 ARMV7_PERFCTR_DTLB_MISS_DEP_STALL_CYCLES = 0x83,
1393 ARMV7_PERFCTR_MICRO_ITLB_MISS_DEP_STALL_CYCLES = 0x84,
1394 ARMV7_PERFCTR_MICRO_DTLB_MISS_DEP_STALL_CYCLES = 0x85,
1395 ARMV7_PERFCTR_DMB_DEP_STALL_CYCLES = 0x86,
1396
1397 ARMV7_PERFCTR_INTGR_CLK_ENABLED_CYCLES = 0x8A,
1398 ARMV7_PERFCTR_DATA_ENGINE_CLK_EN_CYCLES = 0x8B,
1399
1400 ARMV7_PERFCTR_ISB_INST = 0x90,
1401 ARMV7_PERFCTR_DSB_INST = 0x91,
1402 ARMV7_PERFCTR_DMB_INST = 0x92,
1403 ARMV7_PERFCTR_EXT_INTERRUPTS = 0x93,
1404
1405 ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_COMPLETED = 0xA0,
1406 ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_SKIPPED = 0xA1,
1407 ARMV7_PERFCTR_PLE_FIFO_FLUSH = 0xA2,
1408 ARMV7_PERFCTR_PLE_RQST_COMPLETED = 0xA3,
1409 ARMV7_PERFCTR_PLE_FIFO_OVERFLOW = 0xA4,
1410 ARMV7_PERFCTR_PLE_RQST_PROG = 0xA5
1411};
1412
1413/*
1414 * Cortex-A8 HW events mapping
1415 *
1416 * The hardware events that we support. We do support cache operations but
1417 * we have harvard caches and no way to combine instruction and data
1418 * accesses/misses in hardware.
1419 */
1420static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = {
1421 [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
1422 [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
1423 [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
1424 [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
1425 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
1426 [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1427 [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES,
1428};
1429
1430static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
1431 [PERF_COUNT_HW_CACHE_OP_MAX]
1432 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
1433 [C(L1D)] = {
1434 /*
1435 * The performance counters don't differentiate between read
1436 * and write accesses/misses so this isn't strictly correct,
1437 * but it's the best we can do. Writes and reads get
1438 * combined.
1439 */
1440 [C(OP_READ)] = {
1441 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS,
1442 [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL,
1443 },
1444 [C(OP_WRITE)] = {
1445 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS,
1446 [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL,
1447 },
1448 [C(OP_PREFETCH)] = {
1449 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1450 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1451 },
1452 },
1453 [C(L1I)] = {
1454 [C(OP_READ)] = {
1455 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST,
1456 [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS,
1457 },
1458 [C(OP_WRITE)] = {
1459 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST,
1460 [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS,
1461 },
1462 [C(OP_PREFETCH)] = {
1463 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1464 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1465 },
1466 },
1467 [C(LL)] = {
1468 [C(OP_READ)] = {
1469 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS,
1470 [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS,
1471 },
1472 [C(OP_WRITE)] = {
1473 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS,
1474 [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS,
1475 },
1476 [C(OP_PREFETCH)] = {
1477 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1478 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1479 },
1480 },
1481 [C(DTLB)] = {
1482 /*
1483 * Only ITLB misses and DTLB refills are supported.
1484 * If users want the DTLB refills misses a raw counter
1485 * must be used.
1486 */
1487 [C(OP_READ)] = {
1488 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1489 [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
1490 },
1491 [C(OP_WRITE)] = {
1492 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1493 [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
1494 },
1495 [C(OP_PREFETCH)] = {
1496 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1497 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1498 },
1499 },
1500 [C(ITLB)] = {
1501 [C(OP_READ)] = {
1502 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1503 [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS,
1504 },
1505 [C(OP_WRITE)] = {
1506 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1507 [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS,
1508 },
1509 [C(OP_PREFETCH)] = {
1510 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1511 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1512 },
1513 },
1514 [C(BPU)] = {
1515 [C(OP_READ)] = {
1516 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE,
1517 [C(RESULT_MISS)]
1518 = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1519 },
1520 [C(OP_WRITE)] = {
1521 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE,
1522 [C(RESULT_MISS)]
1523 = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1524 },
1525 [C(OP_PREFETCH)] = {
1526 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1527 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1528 },
1529 },
1530};
1531
1532/*
1533 * Cortex-A9 HW events mapping
1534 */
1535static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = {
1536 [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
1537 [PERF_COUNT_HW_INSTRUCTIONS] =
1538 ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE,
1539 [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_COHERENT_LINE_HIT,
1540 [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_COHERENT_LINE_MISS,
1541 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
1542 [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1543 [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES,
1544};
1545
1546static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
1547 [PERF_COUNT_HW_CACHE_OP_MAX]
1548 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
1549 [C(L1D)] = {
1550 /*
1551 * The performance counters don't differentiate between read
1552 * and write accesses/misses so this isn't strictly correct,
1553 * but it's the best we can do. Writes and reads get
1554 * combined.
1555 */
1556 [C(OP_READ)] = {
1557 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS,
1558 [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL,
1559 },
1560 [C(OP_WRITE)] = {
1561 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS,
1562 [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL,
1563 },
1564 [C(OP_PREFETCH)] = {
1565 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1566 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1567 },
1568 },
1569 [C(L1I)] = {
1570 [C(OP_READ)] = {
1571 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1572 [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS,
1573 },
1574 [C(OP_WRITE)] = {
1575 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1576 [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS,
1577 },
1578 [C(OP_PREFETCH)] = {
1579 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1580 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1581 },
1582 },
1583 [C(LL)] = {
1584 [C(OP_READ)] = {
1585 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1586 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1587 },
1588 [C(OP_WRITE)] = {
1589 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1590 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1591 },
1592 [C(OP_PREFETCH)] = {
1593 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1594 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1595 },
1596 },
1597 [C(DTLB)] = {
1598 /*
1599 * Only ITLB misses and DTLB refills are supported.
1600 * If users want the DTLB refills misses a raw counter
1601 * must be used.
1602 */
1603 [C(OP_READ)] = {
1604 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1605 [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
1606 },
1607 [C(OP_WRITE)] = {
1608 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1609 [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
1610 },
1611 [C(OP_PREFETCH)] = {
1612 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1613 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1614 },
1615 },
1616 [C(ITLB)] = {
1617 [C(OP_READ)] = {
1618 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1619 [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS,
1620 },
1621 [C(OP_WRITE)] = {
1622 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1623 [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS,
1624 },
1625 [C(OP_PREFETCH)] = {
1626 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1627 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1628 },
1629 },
1630 [C(BPU)] = {
1631 [C(OP_READ)] = {
1632 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE,
1633 [C(RESULT_MISS)]
1634 = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1635 },
1636 [C(OP_WRITE)] = {
1637 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE,
1638 [C(RESULT_MISS)]
1639 = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1640 },
1641 [C(OP_PREFETCH)] = {
1642 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1643 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1644 },
1645 },
1646};
1647
1648/*
1649 * Perf Events counters
1650 */
1651enum armv7_counters {
1652 ARMV7_CYCLE_COUNTER = 1, /* Cycle counter */
1653 ARMV7_COUNTER0 = 2, /* First event counter */
1654};
1655
1656/*
1657 * The cycle counter is ARMV7_CYCLE_COUNTER.
1658 * The first event counter is ARMV7_COUNTER0.
1659 * The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1).
1660 */
1661#define ARMV7_COUNTER_LAST (ARMV7_COUNTER0 + armpmu->num_events - 1)
1662
1663/*
1664 * ARMv7 low level PMNC access
1665 */
1666
1667/*
1668 * Per-CPU PMNC: config reg
1669 */
1670#define ARMV7_PMNC_E (1 << 0) /* Enable all counters */
1671#define ARMV7_PMNC_P (1 << 1) /* Reset all counters */
1672#define ARMV7_PMNC_C (1 << 2) /* Cycle counter reset */
1673#define ARMV7_PMNC_D (1 << 3) /* CCNT counts every 64th cpu cycle */
1674#define ARMV7_PMNC_X (1 << 4) /* Export to ETM */
1675#define ARMV7_PMNC_DP (1 << 5) /* Disable CCNT if non-invasive debug*/
1676#define ARMV7_PMNC_N_SHIFT 11 /* Number of counters supported */
1677#define ARMV7_PMNC_N_MASK 0x1f
1678#define ARMV7_PMNC_MASK 0x3f /* Mask for writable bits */
1679
1680/*
1681 * Available counters
1682 */
1683#define ARMV7_CNT0 0 /* First event counter */
1684#define ARMV7_CCNT 31 /* Cycle counter */
1685
1686/* Perf Event to low level counters mapping */
1687#define ARMV7_EVENT_CNT_TO_CNTx (ARMV7_COUNTER0 - ARMV7_CNT0)
1688
1689/*
1690 * CNTENS: counters enable reg
1691 */
1692#define ARMV7_CNTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1693#define ARMV7_CNTENS_C (1 << ARMV7_CCNT)
1694
1695/*
1696 * CNTENC: counters disable reg
1697 */
1698#define ARMV7_CNTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1699#define ARMV7_CNTENC_C (1 << ARMV7_CCNT)
1700
1701/*
1702 * INTENS: counters overflow interrupt enable reg
1703 */
1704#define ARMV7_INTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1705#define ARMV7_INTENS_C (1 << ARMV7_CCNT)
1706
1707/*
1708 * INTENC: counters overflow interrupt disable reg
1709 */
1710#define ARMV7_INTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1711#define ARMV7_INTENC_C (1 << ARMV7_CCNT)
1712
1713/*
1714 * EVTSEL: Event selection reg
1715 */
1716#define ARMV7_EVTSEL_MASK 0xff /* Mask for writable bits */
1717
1718/*
1719 * SELECT: Counter selection reg
1720 */
1721#define ARMV7_SELECT_MASK 0x1f /* Mask for writable bits */
1722
1723/*
1724 * FLAG: counters overflow flag status reg
1725 */
1726#define ARMV7_FLAG_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1727#define ARMV7_FLAG_C (1 << ARMV7_CCNT)
1728#define ARMV7_FLAG_MASK 0xffffffff /* Mask for writable bits */
1729#define ARMV7_OVERFLOWED_MASK ARMV7_FLAG_MASK
1730
1731static inline unsigned long armv7_pmnc_read(void)
1732{
1733 u32 val;
1734 asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val));
1735 return val;
1736}
1737
1738static inline void armv7_pmnc_write(unsigned long val)
1739{
1740 val &= ARMV7_PMNC_MASK;
1741 asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val));
1742}
1743
1744static inline int armv7_pmnc_has_overflowed(unsigned long pmnc)
1745{
1746 return pmnc & ARMV7_OVERFLOWED_MASK;
1747}
1748
1749static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc,
1750 enum armv7_counters counter)
1751{
1752 int ret = 0;
1753
1754 if (counter == ARMV7_CYCLE_COUNTER)
1755 ret = pmnc & ARMV7_FLAG_C;
1756 else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST))
1757 ret = pmnc & ARMV7_FLAG_P(counter);
1758 else
1759 pr_err("CPU%u checking wrong counter %d overflow status\n",
1760 smp_processor_id(), counter);
1761
1762 return ret;
1763}
1764
1765static inline int armv7_pmnc_select_counter(unsigned int idx)
1766{
1767 u32 val;
1768
1769 if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) {
1770 pr_err("CPU%u selecting wrong PMNC counter"
1771 " %d\n", smp_processor_id(), idx);
1772 return -1;
1773 }
1774
1775 val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK;
1776 asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val));
1777
1778 return idx;
1779}
1780
1781static inline u32 armv7pmu_read_counter(int idx)
1782{
1783 unsigned long value = 0;
1784
1785 if (idx == ARMV7_CYCLE_COUNTER)
1786 asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value));
1787 else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
1788 if (armv7_pmnc_select_counter(idx) == idx)
1789 asm volatile("mrc p15, 0, %0, c9, c13, 2"
1790 : "=r" (value));
1791 } else
1792 pr_err("CPU%u reading wrong counter %d\n",
1793 smp_processor_id(), idx);
1794
1795 return value;
1796}
1797
1798static inline void armv7pmu_write_counter(int idx, u32 value)
1799{
1800 if (idx == ARMV7_CYCLE_COUNTER)
1801 asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value));
1802 else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
1803 if (armv7_pmnc_select_counter(idx) == idx)
1804 asm volatile("mcr p15, 0, %0, c9, c13, 2"
1805 : : "r" (value));
1806 } else
1807 pr_err("CPU%u writing wrong counter %d\n",
1808 smp_processor_id(), idx);
1809}
1810
1811static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val)
1812{
1813 if (armv7_pmnc_select_counter(idx) == idx) {
1814 val &= ARMV7_EVTSEL_MASK;
1815 asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val));
1816 }
1817}
1818
1819static inline u32 armv7_pmnc_enable_counter(unsigned int idx)
1820{
1821 u32 val;
1822
1823 if ((idx != ARMV7_CYCLE_COUNTER) &&
1824 ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
1825 pr_err("CPU%u enabling wrong PMNC counter"
1826 " %d\n", smp_processor_id(), idx);
1827 return -1;
1828 }
1829
1830 if (idx == ARMV7_CYCLE_COUNTER)
1831 val = ARMV7_CNTENS_C;
1832 else
1833 val = ARMV7_CNTENS_P(idx);
1834
1835 asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val));
1836
1837 return idx;
1838}
1839
1840static inline u32 armv7_pmnc_disable_counter(unsigned int idx)
1841{
1842 u32 val;
1843
1844
1845 if ((idx != ARMV7_CYCLE_COUNTER) &&
1846 ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
1847 pr_err("CPU%u disabling wrong PMNC counter"
1848 " %d\n", smp_processor_id(), idx);
1849 return -1;
1850 }
1851
1852 if (idx == ARMV7_CYCLE_COUNTER)
1853 val = ARMV7_CNTENC_C;
1854 else
1855 val = ARMV7_CNTENC_P(idx);
1856
1857 asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val));
1858
1859 return idx;
1860}
1861
1862static inline u32 armv7_pmnc_enable_intens(unsigned int idx)
1863{
1864 u32 val;
1865
1866 if ((idx != ARMV7_CYCLE_COUNTER) &&
1867 ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
1868 pr_err("CPU%u enabling wrong PMNC counter"
1869 " interrupt enable %d\n", smp_processor_id(), idx);
1870 return -1;
1871 }
1872
1873 if (idx == ARMV7_CYCLE_COUNTER)
1874 val = ARMV7_INTENS_C;
1875 else
1876 val = ARMV7_INTENS_P(idx);
1877
1878 asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val));
1879
1880 return idx;
1881}
1882
1883static inline u32 armv7_pmnc_disable_intens(unsigned int idx)
1884{
1885 u32 val;
1886
1887 if ((idx != ARMV7_CYCLE_COUNTER) &&
1888 ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
1889 pr_err("CPU%u disabling wrong PMNC counter"
1890 " interrupt enable %d\n", smp_processor_id(), idx);
1891 return -1;
1892 }
1893
1894 if (idx == ARMV7_CYCLE_COUNTER)
1895 val = ARMV7_INTENC_C;
1896 else
1897 val = ARMV7_INTENC_P(idx);
1898
1899 asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val));
1900
1901 return idx;
1902}
1903
1904static inline u32 armv7_pmnc_getreset_flags(void)
1905{
1906 u32 val;
1907
1908 /* Read */
1909 asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
1910
1911 /* Write to clear flags */
1912 val &= ARMV7_FLAG_MASK;
1913 asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val));
1914
1915 return val;
1916}
1917
1918#ifdef DEBUG
1919static void armv7_pmnc_dump_regs(void)
1920{
1921 u32 val;
1922 unsigned int cnt;
1923
1924 printk(KERN_INFO "PMNC registers dump:\n");
1925
1926 asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val));
1927 printk(KERN_INFO "PMNC =0x%08x\n", val);
1928
1929 asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val));
1930 printk(KERN_INFO "CNTENS=0x%08x\n", val);
1931
1932 asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val));
1933 printk(KERN_INFO "INTENS=0x%08x\n", val);
1934
1935 asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
1936 printk(KERN_INFO "FLAGS =0x%08x\n", val);
1937
1938 asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val));
1939 printk(KERN_INFO "SELECT=0x%08x\n", val);
1940
1941 asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));
1942 printk(KERN_INFO "CCNT =0x%08x\n", val);
1943
1944 for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) {
1945 armv7_pmnc_select_counter(cnt);
1946 asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val));
1947 printk(KERN_INFO "CNT[%d] count =0x%08x\n",
1948 cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
1949 asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val));
1950 printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n",
1951 cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
1952 }
1953}
1954#endif
1955
1956void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx)
1957{
1958 unsigned long flags;
1959
1960 /*
1961 * Enable counter and interrupt, and set the counter to count
1962 * the event that we're interested in.
1963 */
1964 spin_lock_irqsave(&pmu_lock, flags);
1965
1966 /*
1967 * Disable counter
1968 */
1969 armv7_pmnc_disable_counter(idx);
1970
1971 /*
1972 * Set event (if destined for PMNx counters)
1973 * We don't need to set the event if it's a cycle count
1974 */
1975 if (idx != ARMV7_CYCLE_COUNTER)
1976 armv7_pmnc_write_evtsel(idx, hwc->config_base);
1977
1978 /*
1979 * Enable interrupt for this counter
1980 */
1981 armv7_pmnc_enable_intens(idx);
1982
1983 /*
1984 * Enable counter
1985 */
1986 armv7_pmnc_enable_counter(idx);
1987
1988 spin_unlock_irqrestore(&pmu_lock, flags);
1989}
1990
1991static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx)
1992{
1993 unsigned long flags;
1994
1995 /*
1996 * Disable counter and interrupt
1997 */
1998 spin_lock_irqsave(&pmu_lock, flags);
1999
2000 /*
2001 * Disable counter
2002 */
2003 armv7_pmnc_disable_counter(idx);
2004
2005 /*
2006 * Disable interrupt for this counter
2007 */
2008 armv7_pmnc_disable_intens(idx);
2009
2010 spin_unlock_irqrestore(&pmu_lock, flags);
2011}
2012
2013static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
2014{
2015 unsigned long pmnc;
2016 struct perf_sample_data data;
2017 struct cpu_hw_events *cpuc;
2018 struct pt_regs *regs;
2019 int idx;
2020
2021 /*
2022 * Get and reset the IRQ flags
2023 */
2024 pmnc = armv7_pmnc_getreset_flags();
2025
2026 /*
2027 * Did an overflow occur?
2028 */
2029 if (!armv7_pmnc_has_overflowed(pmnc))
2030 return IRQ_NONE;
2031
2032 /*
2033 * Handle the counter(s) overflow(s)
2034 */
2035 regs = get_irq_regs();
2036
2037 perf_sample_data_init(&data, 0);
2038
2039 cpuc = &__get_cpu_var(cpu_hw_events);
2040 for (idx = 0; idx <= armpmu->num_events; ++idx) {
2041 struct perf_event *event = cpuc->events[idx];
2042 struct hw_perf_event *hwc;
2043
2044 if (!test_bit(idx, cpuc->active_mask))
2045 continue;
2046
2047 /*
2048 * We have a single interrupt for all counters. Check that
2049 * each counter has overflowed before we process it.
2050 */
2051 if (!armv7_pmnc_counter_has_overflowed(pmnc, idx))
2052 continue;
2053
2054 hwc = &event->hw;
2055 armpmu_event_update(event, hwc, idx);
2056 data.period = event->hw.last_period;
2057 if (!armpmu_event_set_period(event, hwc, idx))
2058 continue;
2059
2060 if (perf_event_overflow(event, 0, &data, regs))
2061 armpmu->disable(hwc, idx);
2062 }
2063
2064 /*
2065 * Handle the pending perf events.
2066 *
2067 * Note: this call *must* be run with interrupts disabled. For
2068 * platforms that can have the PMU interrupts raised as an NMI, this
2069 * will not work.
2070 */
2071 irq_work_run();
2072
2073 return IRQ_HANDLED;
2074}
2075
2076static void armv7pmu_start(void)
2077{
2078 unsigned long flags;
2079
2080 spin_lock_irqsave(&pmu_lock, flags);
2081 /* Enable all counters */
2082 armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E);
2083 spin_unlock_irqrestore(&pmu_lock, flags);
2084}
2085
2086static void armv7pmu_stop(void)
2087{
2088 unsigned long flags;
2089
2090 spin_lock_irqsave(&pmu_lock, flags);
2091 /* Disable all counters */
2092 armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E);
2093 spin_unlock_irqrestore(&pmu_lock, flags);
2094}
2095
2096static inline int armv7_a8_pmu_event_map(int config)
2097{
2098 int mapping = armv7_a8_perf_map[config];
2099 if (HW_OP_UNSUPPORTED == mapping)
2100 mapping = -EOPNOTSUPP;
2101 return mapping;
2102}
2103
2104static inline int armv7_a9_pmu_event_map(int config)
2105{
2106 int mapping = armv7_a9_perf_map[config];
2107 if (HW_OP_UNSUPPORTED == mapping)
2108 mapping = -EOPNOTSUPP;
2109 return mapping;
2110}
2111
2112static u64 armv7pmu_raw_event(u64 config)
2113{
2114 return config & 0xff;
2115}
2116
2117static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc,
2118 struct hw_perf_event *event)
2119{
2120 int idx;
2121
2122 /* Always place a cycle counter into the cycle counter. */
2123 if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) {
2124 if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask))
2125 return -EAGAIN;
2126
2127 return ARMV7_CYCLE_COUNTER;
2128 } else {
2129 /*
2130 * For anything other than a cycle counter, try and use
2131 * the events counters
2132 */
2133 for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) {
2134 if (!test_and_set_bit(idx, cpuc->used_mask))
2135 return idx;
2136 }
2137
2138 /* The counters are all in use. */
2139 return -EAGAIN;
2140 }
2141}
2142
2143static struct arm_pmu armv7pmu = {
2144 .handle_irq = armv7pmu_handle_irq,
2145 .enable = armv7pmu_enable_event,
2146 .disable = armv7pmu_disable_event,
2147 .raw_event = armv7pmu_raw_event,
2148 .read_counter = armv7pmu_read_counter,
2149 .write_counter = armv7pmu_write_counter,
2150 .get_event_idx = armv7pmu_get_event_idx,
2151 .start = armv7pmu_start,
2152 .stop = armv7pmu_stop,
2153 .max_period = (1LLU << 32) - 1,
2154};
2155
2156static u32 __init armv7_reset_read_pmnc(void)
2157{
2158 u32 nb_cnt;
2159
2160 /* Initialize & Reset PMNC: C and P bits */
2161 armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C);
2162
2163 /* Read the nb of CNTx counters supported from PMNC */
2164 nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK;
2165
2166 /* Add the CPU cycles counter and return */
2167 return nb_cnt + 1;
2168}
2169
2170/*
2171 * ARMv5 [xscale] Performance counter handling code.
2172 *
2173 * Based on xscale OProfile code.
2174 *
2175 * There are two variants of the xscale PMU that we support:
2176 * - xscale1pmu: 2 event counters and a cycle counter
2177 * - xscale2pmu: 4 event counters and a cycle counter
2178 * The two variants share event definitions, but have different
2179 * PMU structures.
2180 */
2181
2182enum xscale_perf_types {
2183 XSCALE_PERFCTR_ICACHE_MISS = 0x00,
2184 XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01,
2185 XSCALE_PERFCTR_DATA_STALL = 0x02,
2186 XSCALE_PERFCTR_ITLB_MISS = 0x03,
2187 XSCALE_PERFCTR_DTLB_MISS = 0x04,
2188 XSCALE_PERFCTR_BRANCH = 0x05,
2189 XSCALE_PERFCTR_BRANCH_MISS = 0x06,
2190 XSCALE_PERFCTR_INSTRUCTION = 0x07,
2191 XSCALE_PERFCTR_DCACHE_FULL_STALL = 0x08,
2192 XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09,
2193 XSCALE_PERFCTR_DCACHE_ACCESS = 0x0A,
2194 XSCALE_PERFCTR_DCACHE_MISS = 0x0B,
2195 XSCALE_PERFCTR_DCACHE_WRITE_BACK = 0x0C,
2196 XSCALE_PERFCTR_PC_CHANGED = 0x0D,
2197 XSCALE_PERFCTR_BCU_REQUEST = 0x10,
2198 XSCALE_PERFCTR_BCU_FULL = 0x11,
2199 XSCALE_PERFCTR_BCU_DRAIN = 0x12,
2200 XSCALE_PERFCTR_BCU_ECC_NO_ELOG = 0x14,
2201 XSCALE_PERFCTR_BCU_1_BIT_ERR = 0x15,
2202 XSCALE_PERFCTR_RMW = 0x16,
2203 /* XSCALE_PERFCTR_CCNT is not hardware defined */
2204 XSCALE_PERFCTR_CCNT = 0xFE,
2205 XSCALE_PERFCTR_UNUSED = 0xFF,
2206};
2207
2208enum xscale_counters {
2209 XSCALE_CYCLE_COUNTER = 1,
2210 XSCALE_COUNTER0,
2211 XSCALE_COUNTER1,
2212 XSCALE_COUNTER2,
2213 XSCALE_COUNTER3,
2214};
2215
2216static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = {
2217 [PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT,
2218 [PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION,
2219 [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
2220 [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
2221 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH,
2222 [PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS,
2223 [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
2224};
2225
2226static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
2227 [PERF_COUNT_HW_CACHE_OP_MAX]
2228 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
2229 [C(L1D)] = {
2230 [C(OP_READ)] = {
2231 [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
2232 [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
2233 },
2234 [C(OP_WRITE)] = {
2235 [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
2236 [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
2237 },
2238 [C(OP_PREFETCH)] = {
2239 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2240 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
2241 },
2242 },
2243 [C(L1I)] = {
2244 [C(OP_READ)] = {
2245 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2246 [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS,
2247 },
2248 [C(OP_WRITE)] = {
2249 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2250 [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS,
2251 },
2252 [C(OP_PREFETCH)] = {
2253 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2254 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
2255 },
2256 },
2257 [C(LL)] = {
2258 [C(OP_READ)] = {
2259 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2260 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
2261 },
2262 [C(OP_WRITE)] = {
2263 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2264 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
2265 },
2266 [C(OP_PREFETCH)] = {
2267 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2268 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
2269 },
2270 },
2271 [C(DTLB)] = {
2272 [C(OP_READ)] = {
2273 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2274 [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
2275 },
2276 [C(OP_WRITE)] = {
2277 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2278 [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
2279 },
2280 [C(OP_PREFETCH)] = {
2281 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2282 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
2283 },
2284 },
2285 [C(ITLB)] = {
2286 [C(OP_READ)] = {
2287 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2288 [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
2289 },
2290 [C(OP_WRITE)] = {
2291 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2292 [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
2293 },
2294 [C(OP_PREFETCH)] = {
2295 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2296 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
2297 },
2298 },
2299 [C(BPU)] = {
2300 [C(OP_READ)] = {
2301 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2302 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
2303 },
2304 [C(OP_WRITE)] = {
2305 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2306 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
2307 },
2308 [C(OP_PREFETCH)] = {
2309 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2310 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
2311 },
2312 },
2313};
2314
2315#define XSCALE_PMU_ENABLE 0x001
2316#define XSCALE_PMN_RESET 0x002
2317#define XSCALE_CCNT_RESET 0x004
2318#define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET)
2319#define XSCALE_PMU_CNT64 0x008
2320
2321static inline int
2322xscalepmu_event_map(int config)
2323{
2324 int mapping = xscale_perf_map[config];
2325 if (HW_OP_UNSUPPORTED == mapping)
2326 mapping = -EOPNOTSUPP;
2327 return mapping;
2328}
2329
2330static u64
2331xscalepmu_raw_event(u64 config)
2332{
2333 return config & 0xff;
2334}
2335
2336#define XSCALE1_OVERFLOWED_MASK 0x700
2337#define XSCALE1_CCOUNT_OVERFLOW 0x400
2338#define XSCALE1_COUNT0_OVERFLOW 0x100
2339#define XSCALE1_COUNT1_OVERFLOW 0x200
2340#define XSCALE1_CCOUNT_INT_EN 0x040
2341#define XSCALE1_COUNT0_INT_EN 0x010
2342#define XSCALE1_COUNT1_INT_EN 0x020
2343#define XSCALE1_COUNT0_EVT_SHFT 12
2344#define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT)
2345#define XSCALE1_COUNT1_EVT_SHFT 20
2346#define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT)
2347
2348static inline u32
2349xscale1pmu_read_pmnc(void)
2350{
2351 u32 val;
2352 asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val));
2353 return val;
2354}
2355
2356static inline void
2357xscale1pmu_write_pmnc(u32 val)
2358{
2359 /* upper 4bits and 7, 11 are write-as-0 */
2360 val &= 0xffff77f;
2361 asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val));
2362}
2363
2364static inline int
2365xscale1_pmnc_counter_has_overflowed(unsigned long pmnc,
2366 enum xscale_counters counter)
2367{
2368 int ret = 0;
2369
2370 switch (counter) {
2371 case XSCALE_CYCLE_COUNTER:
2372 ret = pmnc & XSCALE1_CCOUNT_OVERFLOW;
2373 break;
2374 case XSCALE_COUNTER0:
2375 ret = pmnc & XSCALE1_COUNT0_OVERFLOW;
2376 break;
2377 case XSCALE_COUNTER1:
2378 ret = pmnc & XSCALE1_COUNT1_OVERFLOW;
2379 break;
2380 default:
2381 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
2382 }
2383
2384 return ret;
2385}
2386
2387static irqreturn_t
2388xscale1pmu_handle_irq(int irq_num, void *dev)
2389{
2390 unsigned long pmnc;
2391 struct perf_sample_data data;
2392 struct cpu_hw_events *cpuc;
2393 struct pt_regs *regs;
2394 int idx;
2395
2396 /*
2397 * NOTE: there's an A stepping erratum that states if an overflow
2398 * bit already exists and another occurs, the previous
2399 * Overflow bit gets cleared. There's no workaround.
2400 * Fixed in B stepping or later.
2401 */
2402 pmnc = xscale1pmu_read_pmnc();
2403
2404 /*
2405 * Write the value back to clear the overflow flags. Overflow
2406 * flags remain in pmnc for use below. We also disable the PMU
2407 * while we process the interrupt.
2408 */
2409 xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
2410
2411 if (!(pmnc & XSCALE1_OVERFLOWED_MASK))
2412 return IRQ_NONE;
2413
2414 regs = get_irq_regs();
2415
2416 perf_sample_data_init(&data, 0);
2417
2418 cpuc = &__get_cpu_var(cpu_hw_events);
2419 for (idx = 0; idx <= armpmu->num_events; ++idx) {
2420 struct perf_event *event = cpuc->events[idx];
2421 struct hw_perf_event *hwc;
2422
2423 if (!test_bit(idx, cpuc->active_mask))
2424 continue;
2425
2426 if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx))
2427 continue;
2428
2429 hwc = &event->hw;
2430 armpmu_event_update(event, hwc, idx);
2431 data.period = event->hw.last_period;
2432 if (!armpmu_event_set_period(event, hwc, idx))
2433 continue;
2434
2435 if (perf_event_overflow(event, 0, &data, regs))
2436 armpmu->disable(hwc, idx);
2437 }
2438
2439 irq_work_run();
2440
2441 /*
2442 * Re-enable the PMU.
2443 */
2444 pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE;
2445 xscale1pmu_write_pmnc(pmnc);
2446
2447 return IRQ_HANDLED;
2448}
2449
2450static void
2451xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx)
2452{
2453 unsigned long val, mask, evt, flags;
2454
2455 switch (idx) {
2456 case XSCALE_CYCLE_COUNTER:
2457 mask = 0;
2458 evt = XSCALE1_CCOUNT_INT_EN;
2459 break;
2460 case XSCALE_COUNTER0:
2461 mask = XSCALE1_COUNT0_EVT_MASK;
2462 evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) |
2463 XSCALE1_COUNT0_INT_EN;
2464 break;
2465 case XSCALE_COUNTER1:
2466 mask = XSCALE1_COUNT1_EVT_MASK;
2467 evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) |
2468 XSCALE1_COUNT1_INT_EN;
2469 break;
2470 default:
2471 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
2472 return;
2473 }
2474
2475 spin_lock_irqsave(&pmu_lock, flags);
2476 val = xscale1pmu_read_pmnc();
2477 val &= ~mask;
2478 val |= evt;
2479 xscale1pmu_write_pmnc(val);
2480 spin_unlock_irqrestore(&pmu_lock, flags);
2481}
2482
2483static void
2484xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx)
2485{
2486 unsigned long val, mask, evt, flags;
2487
2488 switch (idx) {
2489 case XSCALE_CYCLE_COUNTER:
2490 mask = XSCALE1_CCOUNT_INT_EN;
2491 evt = 0;
2492 break;
2493 case XSCALE_COUNTER0:
2494 mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK;
2495 evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT;
2496 break;
2497 case XSCALE_COUNTER1:
2498 mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK;
2499 evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT;
2500 break;
2501 default:
2502 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
2503 return;
2504 }
2505
2506 spin_lock_irqsave(&pmu_lock, flags);
2507 val = xscale1pmu_read_pmnc();
2508 val &= ~mask;
2509 val |= evt;
2510 xscale1pmu_write_pmnc(val);
2511 spin_unlock_irqrestore(&pmu_lock, flags);
2512}
2513
2514static int
2515xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc,
2516 struct hw_perf_event *event)
2517{
2518 if (XSCALE_PERFCTR_CCNT == event->config_base) {
2519 if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask))
2520 return -EAGAIN;
2521
2522 return XSCALE_CYCLE_COUNTER;
2523 } else {
2524 if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask)) {
2525 return XSCALE_COUNTER1;
2526 }
2527
2528 if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask)) {
2529 return XSCALE_COUNTER0;
2530 }
2531
2532 return -EAGAIN;
2533 }
2534}
2535
2536static void
2537xscale1pmu_start(void)
2538{
2539 unsigned long flags, val;
2540
2541 spin_lock_irqsave(&pmu_lock, flags);
2542 val = xscale1pmu_read_pmnc();
2543 val |= XSCALE_PMU_ENABLE;
2544 xscale1pmu_write_pmnc(val);
2545 spin_unlock_irqrestore(&pmu_lock, flags);
2546}
2547
2548static void
2549xscale1pmu_stop(void)
2550{
2551 unsigned long flags, val;
2552
2553 spin_lock_irqsave(&pmu_lock, flags);
2554 val = xscale1pmu_read_pmnc();
2555 val &= ~XSCALE_PMU_ENABLE;
2556 xscale1pmu_write_pmnc(val);
2557 spin_unlock_irqrestore(&pmu_lock, flags);
2558}
2559
2560static inline u32
2561xscale1pmu_read_counter(int counter)
2562{
2563 u32 val = 0;
2564
2565 switch (counter) {
2566 case XSCALE_CYCLE_COUNTER:
2567 asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val));
2568 break;
2569 case XSCALE_COUNTER0:
2570 asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val));
2571 break;
2572 case XSCALE_COUNTER1:
2573 asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val));
2574 break;
2575 }
2576
2577 return val;
2578}
2579
2580static inline void
2581xscale1pmu_write_counter(int counter, u32 val)
2582{
2583 switch (counter) {
2584 case XSCALE_CYCLE_COUNTER:
2585 asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val));
2586 break;
2587 case XSCALE_COUNTER0:
2588 asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val));
2589 break;
2590 case XSCALE_COUNTER1:
2591 asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val));
2592 break;
2593 }
2594}
2595
2596static const struct arm_pmu xscale1pmu = {
2597 .id = ARM_PERF_PMU_ID_XSCALE1,
2598 .handle_irq = xscale1pmu_handle_irq,
2599 .enable = xscale1pmu_enable_event,
2600 .disable = xscale1pmu_disable_event,
2601 .event_map = xscalepmu_event_map,
2602 .raw_event = xscalepmu_raw_event,
2603 .read_counter = xscale1pmu_read_counter,
2604 .write_counter = xscale1pmu_write_counter,
2605 .get_event_idx = xscale1pmu_get_event_idx,
2606 .start = xscale1pmu_start,
2607 .stop = xscale1pmu_stop,
2608 .num_events = 3,
2609 .max_period = (1LLU << 32) - 1,
2610};
2611
2612#define XSCALE2_OVERFLOWED_MASK 0x01f
2613#define XSCALE2_CCOUNT_OVERFLOW 0x001
2614#define XSCALE2_COUNT0_OVERFLOW 0x002
2615#define XSCALE2_COUNT1_OVERFLOW 0x004
2616#define XSCALE2_COUNT2_OVERFLOW 0x008
2617#define XSCALE2_COUNT3_OVERFLOW 0x010
2618#define XSCALE2_CCOUNT_INT_EN 0x001
2619#define XSCALE2_COUNT0_INT_EN 0x002
2620#define XSCALE2_COUNT1_INT_EN 0x004
2621#define XSCALE2_COUNT2_INT_EN 0x008
2622#define XSCALE2_COUNT3_INT_EN 0x010
2623#define XSCALE2_COUNT0_EVT_SHFT 0
2624#define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT)
2625#define XSCALE2_COUNT1_EVT_SHFT 8
2626#define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT)
2627#define XSCALE2_COUNT2_EVT_SHFT 16
2628#define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT)
2629#define XSCALE2_COUNT3_EVT_SHFT 24
2630#define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT)
2631
2632static inline u32
2633xscale2pmu_read_pmnc(void)
2634{
2635 u32 val;
2636 asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val));
2637 /* bits 1-2 and 4-23 are read-unpredictable */
2638 return val & 0xff000009;
2639}
2640
2641static inline void
2642xscale2pmu_write_pmnc(u32 val)
2643{
2644 /* bits 4-23 are write-as-0, 24-31 are write ignored */
2645 val &= 0xf;
2646 asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val));
2647}
2648
2649static inline u32
2650xscale2pmu_read_overflow_flags(void)
2651{
2652 u32 val;
2653 asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val));
2654 return val;
2655}
2656
2657static inline void
2658xscale2pmu_write_overflow_flags(u32 val)
2659{
2660 asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val));
2661}
2662
2663static inline u32
2664xscale2pmu_read_event_select(void)
2665{
2666 u32 val;
2667 asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val));
2668 return val;
2669}
2670
2671static inline void
2672xscale2pmu_write_event_select(u32 val)
2673{
2674 asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val));
2675}
2676
2677static inline u32
2678xscale2pmu_read_int_enable(void)
2679{
2680 u32 val;
2681 asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val));
2682 return val;
2683}
2684
2685static void
2686xscale2pmu_write_int_enable(u32 val)
2687{
2688 asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val));
2689}
2690
2691static inline int
2692xscale2_pmnc_counter_has_overflowed(unsigned long of_flags,
2693 enum xscale_counters counter)
2694{
2695 int ret = 0;
2696
2697 switch (counter) {
2698 case XSCALE_CYCLE_COUNTER:
2699 ret = of_flags & XSCALE2_CCOUNT_OVERFLOW;
2700 break;
2701 case XSCALE_COUNTER0:
2702 ret = of_flags & XSCALE2_COUNT0_OVERFLOW;
2703 break;
2704 case XSCALE_COUNTER1:
2705 ret = of_flags & XSCALE2_COUNT1_OVERFLOW;
2706 break;
2707 case XSCALE_COUNTER2:
2708 ret = of_flags & XSCALE2_COUNT2_OVERFLOW;
2709 break;
2710 case XSCALE_COUNTER3:
2711 ret = of_flags & XSCALE2_COUNT3_OVERFLOW;
2712 break;
2713 default:
2714 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
2715 }
2716
2717 return ret;
2718}
2719
2720static irqreturn_t
2721xscale2pmu_handle_irq(int irq_num, void *dev)
2722{
2723 unsigned long pmnc, of_flags;
2724 struct perf_sample_data data;
2725 struct cpu_hw_events *cpuc;
2726 struct pt_regs *regs;
2727 int idx;
2728
2729 /* Disable the PMU. */
2730 pmnc = xscale2pmu_read_pmnc();
2731 xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
2732
2733 /* Check the overflow flag register. */
2734 of_flags = xscale2pmu_read_overflow_flags();
2735 if (!(of_flags & XSCALE2_OVERFLOWED_MASK))
2736 return IRQ_NONE;
2737
2738 /* Clear the overflow bits. */
2739 xscale2pmu_write_overflow_flags(of_flags);
2740
2741 regs = get_irq_regs();
2742
2743 perf_sample_data_init(&data, 0);
2744
2745 cpuc = &__get_cpu_var(cpu_hw_events);
2746 for (idx = 0; idx <= armpmu->num_events; ++idx) {
2747 struct perf_event *event = cpuc->events[idx];
2748 struct hw_perf_event *hwc;
2749
2750 if (!test_bit(idx, cpuc->active_mask))
2751 continue;
2752
2753 if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx))
2754 continue;
2755
2756 hwc = &event->hw;
2757 armpmu_event_update(event, hwc, idx);
2758 data.period = event->hw.last_period;
2759 if (!armpmu_event_set_period(event, hwc, idx))
2760 continue;
2761
2762 if (perf_event_overflow(event, 0, &data, regs))
2763 armpmu->disable(hwc, idx);
2764 }
2765
2766 irq_work_run();
2767
2768 /*
2769 * Re-enable the PMU.
2770 */
2771 pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE;
2772 xscale2pmu_write_pmnc(pmnc);
2773
2774 return IRQ_HANDLED;
2775}
2776
2777static void
2778xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx)
2779{
2780 unsigned long flags, ien, evtsel;
2781
2782 ien = xscale2pmu_read_int_enable();
2783 evtsel = xscale2pmu_read_event_select();
2784
2785 switch (idx) {
2786 case XSCALE_CYCLE_COUNTER:
2787 ien |= XSCALE2_CCOUNT_INT_EN;
2788 break;
2789 case XSCALE_COUNTER0:
2790 ien |= XSCALE2_COUNT0_INT_EN;
2791 evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
2792 evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT;
2793 break;
2794 case XSCALE_COUNTER1:
2795 ien |= XSCALE2_COUNT1_INT_EN;
2796 evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
2797 evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT;
2798 break;
2799 case XSCALE_COUNTER2:
2800 ien |= XSCALE2_COUNT2_INT_EN;
2801 evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
2802 evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT;
2803 break;
2804 case XSCALE_COUNTER3:
2805 ien |= XSCALE2_COUNT3_INT_EN;
2806 evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
2807 evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT;
2808 break;
2809 default:
2810 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
2811 return;
2812 }
2813
2814 spin_lock_irqsave(&pmu_lock, flags);
2815 xscale2pmu_write_event_select(evtsel);
2816 xscale2pmu_write_int_enable(ien);
2817 spin_unlock_irqrestore(&pmu_lock, flags);
2818}
2819
2820static void
2821xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx)
2822{
2823 unsigned long flags, ien, evtsel;
2824
2825 ien = xscale2pmu_read_int_enable();
2826 evtsel = xscale2pmu_read_event_select();
2827
2828 switch (idx) {
2829 case XSCALE_CYCLE_COUNTER:
2830 ien &= ~XSCALE2_CCOUNT_INT_EN;
2831 break;
2832 case XSCALE_COUNTER0:
2833 ien &= ~XSCALE2_COUNT0_INT_EN;
2834 evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
2835 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT;
2836 break;
2837 case XSCALE_COUNTER1:
2838 ien &= ~XSCALE2_COUNT1_INT_EN;
2839 evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
2840 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT;
2841 break;
2842 case XSCALE_COUNTER2:
2843 ien &= ~XSCALE2_COUNT2_INT_EN;
2844 evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
2845 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT;
2846 break;
2847 case XSCALE_COUNTER3:
2848 ien &= ~XSCALE2_COUNT3_INT_EN;
2849 evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
2850 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT;
2851 break;
2852 default:
2853 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
2854 return;
2855 }
2856
2857 spin_lock_irqsave(&pmu_lock, flags);
2858 xscale2pmu_write_event_select(evtsel);
2859 xscale2pmu_write_int_enable(ien);
2860 spin_unlock_irqrestore(&pmu_lock, flags);
2861}
2862
2863static int
2864xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc,
2865 struct hw_perf_event *event)
2866{
2867 int idx = xscale1pmu_get_event_idx(cpuc, event);
2868 if (idx >= 0)
2869 goto out;
2870
2871 if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask))
2872 idx = XSCALE_COUNTER3;
2873 else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask))
2874 idx = XSCALE_COUNTER2;
2875out:
2876 return idx;
2877}
2878
2879static void
2880xscale2pmu_start(void)
2881{
2882 unsigned long flags, val;
2883
2884 spin_lock_irqsave(&pmu_lock, flags);
2885 val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64;
2886 val |= XSCALE_PMU_ENABLE;
2887 xscale2pmu_write_pmnc(val);
2888 spin_unlock_irqrestore(&pmu_lock, flags);
2889}
2890
2891static void
2892xscale2pmu_stop(void)
2893{
2894 unsigned long flags, val;
2895
2896 spin_lock_irqsave(&pmu_lock, flags);
2897 val = xscale2pmu_read_pmnc();
2898 val &= ~XSCALE_PMU_ENABLE;
2899 xscale2pmu_write_pmnc(val);
2900 spin_unlock_irqrestore(&pmu_lock, flags);
2901}
2902
2903static inline u32
2904xscale2pmu_read_counter(int counter)
2905{
2906 u32 val = 0;
2907
2908 switch (counter) {
2909 case XSCALE_CYCLE_COUNTER:
2910 asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val));
2911 break;
2912 case XSCALE_COUNTER0:
2913 asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val));
2914 break;
2915 case XSCALE_COUNTER1:
2916 asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val));
2917 break;
2918 case XSCALE_COUNTER2:
2919 asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val));
2920 break;
2921 case XSCALE_COUNTER3:
2922 asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val));
2923 break;
2924 }
2925
2926 return val;
2927}
2928
2929static inline void
2930xscale2pmu_write_counter(int counter, u32 val)
2931{
2932 switch (counter) {
2933 case XSCALE_CYCLE_COUNTER:
2934 asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val));
2935 break;
2936 case XSCALE_COUNTER0:
2937 asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val));
2938 break;
2939 case XSCALE_COUNTER1:
2940 asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val));
2941 break;
2942 case XSCALE_COUNTER2:
2943 asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val));
2944 break;
2945 case XSCALE_COUNTER3:
2946 asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val));
2947 break;
2948 }
2949}
2950
2951static const struct arm_pmu xscale2pmu = {
2952 .id = ARM_PERF_PMU_ID_XSCALE2,
2953 .handle_irq = xscale2pmu_handle_irq,
2954 .enable = xscale2pmu_enable_event,
2955 .disable = xscale2pmu_disable_event,
2956 .event_map = xscalepmu_event_map,
2957 .raw_event = xscalepmu_raw_event,
2958 .read_counter = xscale2pmu_read_counter,
2959 .write_counter = xscale2pmu_write_counter,
2960 .get_event_idx = xscale2pmu_get_event_idx,
2961 .start = xscale2pmu_start,
2962 .stop = xscale2pmu_stop,
2963 .num_events = 5,
2964 .max_period = (1LLU << 32) - 1,
2965};
2966 611
2967static int __init 612static int __init
2968init_hw_perf_events(void) 613init_hw_perf_events(void)
@@ -2977,37 +622,16 @@ init_hw_perf_events(void)
2977 case 0xB360: /* ARM1136 */ 622 case 0xB360: /* ARM1136 */
2978 case 0xB560: /* ARM1156 */ 623 case 0xB560: /* ARM1156 */
2979 case 0xB760: /* ARM1176 */ 624 case 0xB760: /* ARM1176 */
2980 armpmu = &armv6pmu; 625 armpmu = armv6pmu_init();
2981 memcpy(armpmu_perf_cache_map, armv6_perf_cache_map,
2982 sizeof(armv6_perf_cache_map));
2983 break; 626 break;
2984 case 0xB020: /* ARM11mpcore */ 627 case 0xB020: /* ARM11mpcore */
2985 armpmu = &armv6mpcore_pmu; 628 armpmu = armv6mpcore_pmu_init();
2986 memcpy(armpmu_perf_cache_map,
2987 armv6mpcore_perf_cache_map,
2988 sizeof(armv6mpcore_perf_cache_map));
2989 break; 629 break;
2990 case 0xC080: /* Cortex-A8 */ 630 case 0xC080: /* Cortex-A8 */
2991 armv7pmu.id = ARM_PERF_PMU_ID_CA8; 631 armpmu = armv7_a8_pmu_init();
2992 memcpy(armpmu_perf_cache_map, armv7_a8_perf_cache_map,
2993 sizeof(armv7_a8_perf_cache_map));
2994 armv7pmu.event_map = armv7_a8_pmu_event_map;
2995 armpmu = &armv7pmu;
2996
2997 /* Reset PMNC and read the nb of CNTx counters
2998 supported */
2999 armv7pmu.num_events = armv7_reset_read_pmnc();
3000 break; 632 break;
3001 case 0xC090: /* Cortex-A9 */ 633 case 0xC090: /* Cortex-A9 */
3002 armv7pmu.id = ARM_PERF_PMU_ID_CA9; 634 armpmu = armv7_a9_pmu_init();
3003 memcpy(armpmu_perf_cache_map, armv7_a9_perf_cache_map,
3004 sizeof(armv7_a9_perf_cache_map));
3005 armv7pmu.event_map = armv7_a9_pmu_event_map;
3006 armpmu = &armv7pmu;
3007
3008 /* Reset PMNC and read the nb of CNTx counters
3009 supported */
3010 armv7pmu.num_events = armv7_reset_read_pmnc();
3011 break; 635 break;
3012 } 636 }
3013 /* Intel CPUs [xscale]. */ 637 /* Intel CPUs [xscale]. */
@@ -3015,21 +639,17 @@ init_hw_perf_events(void)
3015 part_number = (cpuid >> 13) & 0x7; 639 part_number = (cpuid >> 13) & 0x7;
3016 switch (part_number) { 640 switch (part_number) {
3017 case 1: 641 case 1:
3018 armpmu = &xscale1pmu; 642 armpmu = xscale1pmu_init();
3019 memcpy(armpmu_perf_cache_map, xscale_perf_cache_map,
3020 sizeof(xscale_perf_cache_map));
3021 break; 643 break;
3022 case 2: 644 case 2:
3023 armpmu = &xscale2pmu; 645 armpmu = xscale2pmu_init();
3024 memcpy(armpmu_perf_cache_map, xscale_perf_cache_map,
3025 sizeof(xscale_perf_cache_map));
3026 break; 646 break;
3027 } 647 }
3028 } 648 }
3029 649
3030 if (armpmu) { 650 if (armpmu) {
3031 pr_info("enabled with %s PMU driver, %d counters available\n", 651 pr_info("enabled with %s PMU driver, %d counters available\n",
3032 arm_pmu_names[armpmu->id], armpmu->num_events); 652 armpmu->name, armpmu->num_events);
3033 } else { 653 } else {
3034 pr_info("no hardware support available\n"); 654 pr_info("no hardware support available\n");
3035 } 655 }
@@ -3053,17 +673,17 @@ early_initcall(init_hw_perf_events);
3053 * This code has been adapted from the ARM OProfile support. 673 * This code has been adapted from the ARM OProfile support.
3054 */ 674 */
3055struct frame_tail { 675struct frame_tail {
3056 struct frame_tail *fp; 676 struct frame_tail __user *fp;
3057 unsigned long sp; 677 unsigned long sp;
3058 unsigned long lr; 678 unsigned long lr;
3059} __attribute__((packed)); 679} __attribute__((packed));
3060 680
3061/* 681/*
3062 * Get the return address for a single stackframe and return a pointer to the 682 * Get the return address for a single stackframe and return a pointer to the
3063 * next frame tail. 683 * next frame tail.
3064 */ 684 */
3065static struct frame_tail * 685static struct frame_tail __user *
3066user_backtrace(struct frame_tail *tail, 686user_backtrace(struct frame_tail __user *tail,
3067 struct perf_callchain_entry *entry) 687 struct perf_callchain_entry *entry)
3068{ 688{
3069 struct frame_tail buftail; 689 struct frame_tail buftail;
@@ -3089,10 +709,10 @@ user_backtrace(struct frame_tail *tail,
3089void 709void
3090perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) 710perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
3091{ 711{
3092 struct frame_tail *tail; 712 struct frame_tail __user *tail;
3093 713
3094 714
3095 tail = (struct frame_tail *)regs->ARM_fp - 1; 715 tail = (struct frame_tail __user *)regs->ARM_fp - 1;
3096 716
3097 while (tail && !((unsigned long)tail & 0x3)) 717 while (tail && !((unsigned long)tail & 0x3))
3098 tail = user_backtrace(tail, entry); 718 tail = user_backtrace(tail, entry);
diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c
new file mode 100644
index 000000000000..c058bfc8532b
--- /dev/null
+++ b/arch/arm/kernel/perf_event_v6.c
@@ -0,0 +1,672 @@
1/*
2 * ARMv6 Performance counter handling code.
3 *
4 * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
5 *
6 * ARMv6 has 2 configurable performance counters and a single cycle counter.
7 * They all share a single reset bit but can be written to zero so we can use
8 * that for a reset.
9 *
10 * The counters can't be individually enabled or disabled so when we remove
11 * one event and replace it with another we could get spurious counts from the
12 * wrong event. However, we can take advantage of the fact that the
13 * performance counters can export events to the event bus, and the event bus
14 * itself can be monitored. This requires that we *don't* export the events to
15 * the event bus. The procedure for disabling a configurable counter is:
16 * - change the counter to count the ETMEXTOUT[0] signal (0x20). This
17 * effectively stops the counter from counting.
18 * - disable the counter's interrupt generation (each counter has it's
19 * own interrupt enable bit).
20 * Once stopped, the counter value can be written as 0 to reset.
21 *
22 * To enable a counter:
23 * - enable the counter's interrupt generation.
24 * - set the new event type.
25 *
26 * Note: the dedicated cycle counter only counts cycles and can't be
27 * enabled/disabled independently of the others. When we want to disable the
28 * cycle counter, we have to just disable the interrupt reporting and start
29 * ignoring that counter. When re-enabling, we have to reset the value and
30 * enable the interrupt.
31 */
32
33#ifdef CONFIG_CPU_V6
34enum armv6_perf_types {
35 ARMV6_PERFCTR_ICACHE_MISS = 0x0,
36 ARMV6_PERFCTR_IBUF_STALL = 0x1,
37 ARMV6_PERFCTR_DDEP_STALL = 0x2,
38 ARMV6_PERFCTR_ITLB_MISS = 0x3,
39 ARMV6_PERFCTR_DTLB_MISS = 0x4,
40 ARMV6_PERFCTR_BR_EXEC = 0x5,
41 ARMV6_PERFCTR_BR_MISPREDICT = 0x6,
42 ARMV6_PERFCTR_INSTR_EXEC = 0x7,
43 ARMV6_PERFCTR_DCACHE_HIT = 0x9,
44 ARMV6_PERFCTR_DCACHE_ACCESS = 0xA,
45 ARMV6_PERFCTR_DCACHE_MISS = 0xB,
46 ARMV6_PERFCTR_DCACHE_WBACK = 0xC,
47 ARMV6_PERFCTR_SW_PC_CHANGE = 0xD,
48 ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF,
49 ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10,
50 ARMV6_PERFCTR_LSU_FULL_STALL = 0x11,
51 ARMV6_PERFCTR_WBUF_DRAINED = 0x12,
52 ARMV6_PERFCTR_CPU_CYCLES = 0xFF,
53 ARMV6_PERFCTR_NOP = 0x20,
54};
55
56enum armv6_counters {
57 ARMV6_CYCLE_COUNTER = 1,
58 ARMV6_COUNTER0,
59 ARMV6_COUNTER1,
60};
61
62/*
63 * The hardware events that we support. We do support cache operations but
64 * we have harvard caches and no way to combine instruction and data
65 * accesses/misses in hardware.
66 */
67static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
68 [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES,
69 [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC,
70 [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
71 [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
72 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC,
73 [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT,
74 [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
75};
76
77static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
78 [PERF_COUNT_HW_CACHE_OP_MAX]
79 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
80 [C(L1D)] = {
81 /*
82 * The performance counters don't differentiate between read
83 * and write accesses/misses so this isn't strictly correct,
84 * but it's the best we can do. Writes and reads get
85 * combined.
86 */
87 [C(OP_READ)] = {
88 [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
89 [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
90 },
91 [C(OP_WRITE)] = {
92 [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
93 [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
94 },
95 [C(OP_PREFETCH)] = {
96 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
97 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
98 },
99 },
100 [C(L1I)] = {
101 [C(OP_READ)] = {
102 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
103 [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS,
104 },
105 [C(OP_WRITE)] = {
106 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
107 [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS,
108 },
109 [C(OP_PREFETCH)] = {
110 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
111 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
112 },
113 },
114 [C(LL)] = {
115 [C(OP_READ)] = {
116 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
117 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
118 },
119 [C(OP_WRITE)] = {
120 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
121 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
122 },
123 [C(OP_PREFETCH)] = {
124 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
125 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
126 },
127 },
128 [C(DTLB)] = {
129 /*
130 * The ARM performance counters can count micro DTLB misses,
131 * micro ITLB misses and main TLB misses. There isn't an event
132 * for TLB misses, so use the micro misses here and if users
133 * want the main TLB misses they can use a raw counter.
134 */
135 [C(OP_READ)] = {
136 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
137 [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
138 },
139 [C(OP_WRITE)] = {
140 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
141 [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
142 },
143 [C(OP_PREFETCH)] = {
144 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
145 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
146 },
147 },
148 [C(ITLB)] = {
149 [C(OP_READ)] = {
150 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
151 [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
152 },
153 [C(OP_WRITE)] = {
154 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
155 [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
156 },
157 [C(OP_PREFETCH)] = {
158 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
159 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
160 },
161 },
162 [C(BPU)] = {
163 [C(OP_READ)] = {
164 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
165 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
166 },
167 [C(OP_WRITE)] = {
168 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
169 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
170 },
171 [C(OP_PREFETCH)] = {
172 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
173 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
174 },
175 },
176};
177
178enum armv6mpcore_perf_types {
179 ARMV6MPCORE_PERFCTR_ICACHE_MISS = 0x0,
180 ARMV6MPCORE_PERFCTR_IBUF_STALL = 0x1,
181 ARMV6MPCORE_PERFCTR_DDEP_STALL = 0x2,
182 ARMV6MPCORE_PERFCTR_ITLB_MISS = 0x3,
183 ARMV6MPCORE_PERFCTR_DTLB_MISS = 0x4,
184 ARMV6MPCORE_PERFCTR_BR_EXEC = 0x5,
185 ARMV6MPCORE_PERFCTR_BR_NOTPREDICT = 0x6,
186 ARMV6MPCORE_PERFCTR_BR_MISPREDICT = 0x7,
187 ARMV6MPCORE_PERFCTR_INSTR_EXEC = 0x8,
188 ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA,
189 ARMV6MPCORE_PERFCTR_DCACHE_RDMISS = 0xB,
190 ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC,
191 ARMV6MPCORE_PERFCTR_DCACHE_WRMISS = 0xD,
192 ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE,
193 ARMV6MPCORE_PERFCTR_SW_PC_CHANGE = 0xF,
194 ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS = 0x10,
195 ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11,
196 ARMV6MPCORE_PERFCTR_LSU_FULL_STALL = 0x12,
197 ARMV6MPCORE_PERFCTR_WBUF_DRAINED = 0x13,
198 ARMV6MPCORE_PERFCTR_CPU_CYCLES = 0xFF,
199};
200
201/*
202 * The hardware events that we support. We do support cache operations but
203 * we have harvard caches and no way to combine instruction and data
204 * accesses/misses in hardware.
205 */
206static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = {
207 [PERF_COUNT_HW_CPU_CYCLES] = ARMV6MPCORE_PERFCTR_CPU_CYCLES,
208 [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_INSTR_EXEC,
209 [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
210 [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
211 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC,
212 [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6MPCORE_PERFCTR_BR_MISPREDICT,
213 [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
214};
215
216static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
217 [PERF_COUNT_HW_CACHE_OP_MAX]
218 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
219 [C(L1D)] = {
220 [C(OP_READ)] = {
221 [C(RESULT_ACCESS)] =
222 ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS,
223 [C(RESULT_MISS)] =
224 ARMV6MPCORE_PERFCTR_DCACHE_RDMISS,
225 },
226 [C(OP_WRITE)] = {
227 [C(RESULT_ACCESS)] =
228 ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS,
229 [C(RESULT_MISS)] =
230 ARMV6MPCORE_PERFCTR_DCACHE_WRMISS,
231 },
232 [C(OP_PREFETCH)] = {
233 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
234 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
235 },
236 },
237 [C(L1I)] = {
238 [C(OP_READ)] = {
239 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
240 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
241 },
242 [C(OP_WRITE)] = {
243 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
244 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
245 },
246 [C(OP_PREFETCH)] = {
247 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
248 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
249 },
250 },
251 [C(LL)] = {
252 [C(OP_READ)] = {
253 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
254 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
255 },
256 [C(OP_WRITE)] = {
257 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
258 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
259 },
260 [C(OP_PREFETCH)] = {
261 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
262 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
263 },
264 },
265 [C(DTLB)] = {
266 /*
267 * The ARM performance counters can count micro DTLB misses,
268 * micro ITLB misses and main TLB misses. There isn't an event
269 * for TLB misses, so use the micro misses here and if users
270 * want the main TLB misses they can use a raw counter.
271 */
272 [C(OP_READ)] = {
273 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
274 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
275 },
276 [C(OP_WRITE)] = {
277 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
278 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
279 },
280 [C(OP_PREFETCH)] = {
281 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
282 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
283 },
284 },
285 [C(ITLB)] = {
286 [C(OP_READ)] = {
287 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
288 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
289 },
290 [C(OP_WRITE)] = {
291 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
292 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
293 },
294 [C(OP_PREFETCH)] = {
295 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
296 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
297 },
298 },
299 [C(BPU)] = {
300 [C(OP_READ)] = {
301 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
302 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
303 },
304 [C(OP_WRITE)] = {
305 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
306 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
307 },
308 [C(OP_PREFETCH)] = {
309 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
310 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
311 },
312 },
313};
314
315static inline unsigned long
316armv6_pmcr_read(void)
317{
318 u32 val;
319 asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val));
320 return val;
321}
322
323static inline void
324armv6_pmcr_write(unsigned long val)
325{
326 asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val));
327}
328
329#define ARMV6_PMCR_ENABLE (1 << 0)
330#define ARMV6_PMCR_CTR01_RESET (1 << 1)
331#define ARMV6_PMCR_CCOUNT_RESET (1 << 2)
332#define ARMV6_PMCR_CCOUNT_DIV (1 << 3)
333#define ARMV6_PMCR_COUNT0_IEN (1 << 4)
334#define ARMV6_PMCR_COUNT1_IEN (1 << 5)
335#define ARMV6_PMCR_CCOUNT_IEN (1 << 6)
336#define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8)
337#define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9)
338#define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10)
339#define ARMV6_PMCR_EVT_COUNT0_SHIFT 20
340#define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT)
341#define ARMV6_PMCR_EVT_COUNT1_SHIFT 12
342#define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT)
343
344#define ARMV6_PMCR_OVERFLOWED_MASK \
345 (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \
346 ARMV6_PMCR_CCOUNT_OVERFLOW)
347
348static inline int
349armv6_pmcr_has_overflowed(unsigned long pmcr)
350{
351 return pmcr & ARMV6_PMCR_OVERFLOWED_MASK;
352}
353
354static inline int
355armv6_pmcr_counter_has_overflowed(unsigned long pmcr,
356 enum armv6_counters counter)
357{
358 int ret = 0;
359
360 if (ARMV6_CYCLE_COUNTER == counter)
361 ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW;
362 else if (ARMV6_COUNTER0 == counter)
363 ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW;
364 else if (ARMV6_COUNTER1 == counter)
365 ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW;
366 else
367 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
368
369 return ret;
370}
371
372static inline u32
373armv6pmu_read_counter(int counter)
374{
375 unsigned long value = 0;
376
377 if (ARMV6_CYCLE_COUNTER == counter)
378 asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value));
379 else if (ARMV6_COUNTER0 == counter)
380 asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value));
381 else if (ARMV6_COUNTER1 == counter)
382 asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value));
383 else
384 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
385
386 return value;
387}
388
389static inline void
390armv6pmu_write_counter(int counter,
391 u32 value)
392{
393 if (ARMV6_CYCLE_COUNTER == counter)
394 asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value));
395 else if (ARMV6_COUNTER0 == counter)
396 asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value));
397 else if (ARMV6_COUNTER1 == counter)
398 asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value));
399 else
400 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
401}
402
403static void
404armv6pmu_enable_event(struct hw_perf_event *hwc,
405 int idx)
406{
407 unsigned long val, mask, evt, flags;
408
409 if (ARMV6_CYCLE_COUNTER == idx) {
410 mask = 0;
411 evt = ARMV6_PMCR_CCOUNT_IEN;
412 } else if (ARMV6_COUNTER0 == idx) {
413 mask = ARMV6_PMCR_EVT_COUNT0_MASK;
414 evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) |
415 ARMV6_PMCR_COUNT0_IEN;
416 } else if (ARMV6_COUNTER1 == idx) {
417 mask = ARMV6_PMCR_EVT_COUNT1_MASK;
418 evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) |
419 ARMV6_PMCR_COUNT1_IEN;
420 } else {
421 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
422 return;
423 }
424
425 /*
426 * Mask out the current event and set the counter to count the event
427 * that we're interested in.
428 */
429 raw_spin_lock_irqsave(&pmu_lock, flags);
430 val = armv6_pmcr_read();
431 val &= ~mask;
432 val |= evt;
433 armv6_pmcr_write(val);
434 raw_spin_unlock_irqrestore(&pmu_lock, flags);
435}
436
437static irqreturn_t
438armv6pmu_handle_irq(int irq_num,
439 void *dev)
440{
441 unsigned long pmcr = armv6_pmcr_read();
442 struct perf_sample_data data;
443 struct cpu_hw_events *cpuc;
444 struct pt_regs *regs;
445 int idx;
446
447 if (!armv6_pmcr_has_overflowed(pmcr))
448 return IRQ_NONE;
449
450 regs = get_irq_regs();
451
452 /*
453 * The interrupts are cleared by writing the overflow flags back to
454 * the control register. All of the other bits don't have any effect
455 * if they are rewritten, so write the whole value back.
456 */
457 armv6_pmcr_write(pmcr);
458
459 perf_sample_data_init(&data, 0);
460
461 cpuc = &__get_cpu_var(cpu_hw_events);
462 for (idx = 0; idx <= armpmu->num_events; ++idx) {
463 struct perf_event *event = cpuc->events[idx];
464 struct hw_perf_event *hwc;
465
466 if (!test_bit(idx, cpuc->active_mask))
467 continue;
468
469 /*
470 * We have a single interrupt for all counters. Check that
471 * each counter has overflowed before we process it.
472 */
473 if (!armv6_pmcr_counter_has_overflowed(pmcr, idx))
474 continue;
475
476 hwc = &event->hw;
477 armpmu_event_update(event, hwc, idx);
478 data.period = event->hw.last_period;
479 if (!armpmu_event_set_period(event, hwc, idx))
480 continue;
481
482 if (perf_event_overflow(event, 0, &data, regs))
483 armpmu->disable(hwc, idx);
484 }
485
486 /*
487 * Handle the pending perf events.
488 *
489 * Note: this call *must* be run with interrupts disabled. For
490 * platforms that can have the PMU interrupts raised as an NMI, this
491 * will not work.
492 */
493 irq_work_run();
494
495 return IRQ_HANDLED;
496}
497
498static void
499armv6pmu_start(void)
500{
501 unsigned long flags, val;
502
503 raw_spin_lock_irqsave(&pmu_lock, flags);
504 val = armv6_pmcr_read();
505 val |= ARMV6_PMCR_ENABLE;
506 armv6_pmcr_write(val);
507 raw_spin_unlock_irqrestore(&pmu_lock, flags);
508}
509
510static void
511armv6pmu_stop(void)
512{
513 unsigned long flags, val;
514
515 raw_spin_lock_irqsave(&pmu_lock, flags);
516 val = armv6_pmcr_read();
517 val &= ~ARMV6_PMCR_ENABLE;
518 armv6_pmcr_write(val);
519 raw_spin_unlock_irqrestore(&pmu_lock, flags);
520}
521
522static int
523armv6pmu_get_event_idx(struct cpu_hw_events *cpuc,
524 struct hw_perf_event *event)
525{
526 /* Always place a cycle counter into the cycle counter. */
527 if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) {
528 if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask))
529 return -EAGAIN;
530
531 return ARMV6_CYCLE_COUNTER;
532 } else {
533 /*
534 * For anything other than a cycle counter, try and use
535 * counter0 and counter1.
536 */
537 if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask))
538 return ARMV6_COUNTER1;
539
540 if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask))
541 return ARMV6_COUNTER0;
542
543 /* The counters are all in use. */
544 return -EAGAIN;
545 }
546}
547
548static void
549armv6pmu_disable_event(struct hw_perf_event *hwc,
550 int idx)
551{
552 unsigned long val, mask, evt, flags;
553
554 if (ARMV6_CYCLE_COUNTER == idx) {
555 mask = ARMV6_PMCR_CCOUNT_IEN;
556 evt = 0;
557 } else if (ARMV6_COUNTER0 == idx) {
558 mask = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK;
559 evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT;
560 } else if (ARMV6_COUNTER1 == idx) {
561 mask = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK;
562 evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT;
563 } else {
564 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
565 return;
566 }
567
568 /*
569 * Mask out the current event and set the counter to count the number
570 * of ETM bus signal assertion cycles. The external reporting should
571 * be disabled and so this should never increment.
572 */
573 raw_spin_lock_irqsave(&pmu_lock, flags);
574 val = armv6_pmcr_read();
575 val &= ~mask;
576 val |= evt;
577 armv6_pmcr_write(val);
578 raw_spin_unlock_irqrestore(&pmu_lock, flags);
579}
580
581static void
582armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc,
583 int idx)
584{
585 unsigned long val, mask, flags, evt = 0;
586
587 if (ARMV6_CYCLE_COUNTER == idx) {
588 mask = ARMV6_PMCR_CCOUNT_IEN;
589 } else if (ARMV6_COUNTER0 == idx) {
590 mask = ARMV6_PMCR_COUNT0_IEN;
591 } else if (ARMV6_COUNTER1 == idx) {
592 mask = ARMV6_PMCR_COUNT1_IEN;
593 } else {
594 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
595 return;
596 }
597
598 /*
599 * Unlike UP ARMv6, we don't have a way of stopping the counters. We
600 * simply disable the interrupt reporting.
601 */
602 raw_spin_lock_irqsave(&pmu_lock, flags);
603 val = armv6_pmcr_read();
604 val &= ~mask;
605 val |= evt;
606 armv6_pmcr_write(val);
607 raw_spin_unlock_irqrestore(&pmu_lock, flags);
608}
609
610static const struct arm_pmu armv6pmu = {
611 .id = ARM_PERF_PMU_ID_V6,
612 .name = "v6",
613 .handle_irq = armv6pmu_handle_irq,
614 .enable = armv6pmu_enable_event,
615 .disable = armv6pmu_disable_event,
616 .read_counter = armv6pmu_read_counter,
617 .write_counter = armv6pmu_write_counter,
618 .get_event_idx = armv6pmu_get_event_idx,
619 .start = armv6pmu_start,
620 .stop = armv6pmu_stop,
621 .cache_map = &armv6_perf_cache_map,
622 .event_map = &armv6_perf_map,
623 .raw_event_mask = 0xFF,
624 .num_events = 3,
625 .max_period = (1LLU << 32) - 1,
626};
627
628static const struct arm_pmu *__init armv6pmu_init(void)
629{
630 return &armv6pmu;
631}
632
633/*
634 * ARMv6mpcore is almost identical to single core ARMv6 with the exception
635 * that some of the events have different enumerations and that there is no
636 * *hack* to stop the programmable counters. To stop the counters we simply
637 * disable the interrupt reporting and update the event. When unthrottling we
638 * reset the period and enable the interrupt reporting.
639 */
640static const struct arm_pmu armv6mpcore_pmu = {
641 .id = ARM_PERF_PMU_ID_V6MP,
642 .name = "v6mpcore",
643 .handle_irq = armv6pmu_handle_irq,
644 .enable = armv6pmu_enable_event,
645 .disable = armv6mpcore_pmu_disable_event,
646 .read_counter = armv6pmu_read_counter,
647 .write_counter = armv6pmu_write_counter,
648 .get_event_idx = armv6pmu_get_event_idx,
649 .start = armv6pmu_start,
650 .stop = armv6pmu_stop,
651 .cache_map = &armv6mpcore_perf_cache_map,
652 .event_map = &armv6mpcore_perf_map,
653 .raw_event_mask = 0xFF,
654 .num_events = 3,
655 .max_period = (1LLU << 32) - 1,
656};
657
658static const struct arm_pmu *__init armv6mpcore_pmu_init(void)
659{
660 return &armv6mpcore_pmu;
661}
662#else
663static const struct arm_pmu *__init armv6pmu_init(void)
664{
665 return NULL;
666}
667
668static const struct arm_pmu *__init armv6mpcore_pmu_init(void)
669{
670 return NULL;
671}
672#endif /* CONFIG_CPU_V6 */
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
new file mode 100644
index 000000000000..2e1402556fa0
--- /dev/null
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -0,0 +1,906 @@
1/*
2 * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code.
3 *
4 * ARMv7 support: Jean Pihet <jpihet@mvista.com>
5 * 2010 (c) MontaVista Software, LLC.
6 *
7 * Copied from ARMv6 code, with the low level code inspired
8 * by the ARMv7 Oprofile code.
9 *
10 * Cortex-A8 has up to 4 configurable performance counters and
11 * a single cycle counter.
12 * Cortex-A9 has up to 31 configurable performance counters and
13 * a single cycle counter.
14 *
15 * All counters can be enabled/disabled and IRQ masked separately. The cycle
16 * counter and all 4 performance counters together can be reset separately.
17 */
18
19#ifdef CONFIG_CPU_V7
20/* Common ARMv7 event types */
21enum armv7_perf_types {
22 ARMV7_PERFCTR_PMNC_SW_INCR = 0x00,
23 ARMV7_PERFCTR_IFETCH_MISS = 0x01,
24 ARMV7_PERFCTR_ITLB_MISS = 0x02,
25 ARMV7_PERFCTR_DCACHE_REFILL = 0x03,
26 ARMV7_PERFCTR_DCACHE_ACCESS = 0x04,
27 ARMV7_PERFCTR_DTLB_REFILL = 0x05,
28 ARMV7_PERFCTR_DREAD = 0x06,
29 ARMV7_PERFCTR_DWRITE = 0x07,
30
31 ARMV7_PERFCTR_EXC_TAKEN = 0x09,
32 ARMV7_PERFCTR_EXC_EXECUTED = 0x0A,
33 ARMV7_PERFCTR_CID_WRITE = 0x0B,
34 /* ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS.
35 * It counts:
36 * - all branch instructions,
37 * - instructions that explicitly write the PC,
38 * - exception generating instructions.
39 */
40 ARMV7_PERFCTR_PC_WRITE = 0x0C,
41 ARMV7_PERFCTR_PC_IMM_BRANCH = 0x0D,
42 ARMV7_PERFCTR_UNALIGNED_ACCESS = 0x0F,
43 ARMV7_PERFCTR_PC_BRANCH_MIS_PRED = 0x10,
44 ARMV7_PERFCTR_CLOCK_CYCLES = 0x11,
45
46 ARMV7_PERFCTR_PC_BRANCH_MIS_USED = 0x12,
47
48 ARMV7_PERFCTR_CPU_CYCLES = 0xFF
49};
50
51/* ARMv7 Cortex-A8 specific event types */
52enum armv7_a8_perf_types {
53 ARMV7_PERFCTR_INSTR_EXECUTED = 0x08,
54
55 ARMV7_PERFCTR_PC_PROC_RETURN = 0x0E,
56
57 ARMV7_PERFCTR_WRITE_BUFFER_FULL = 0x40,
58 ARMV7_PERFCTR_L2_STORE_MERGED = 0x41,
59 ARMV7_PERFCTR_L2_STORE_BUFF = 0x42,
60 ARMV7_PERFCTR_L2_ACCESS = 0x43,
61 ARMV7_PERFCTR_L2_CACH_MISS = 0x44,
62 ARMV7_PERFCTR_AXI_READ_CYCLES = 0x45,
63 ARMV7_PERFCTR_AXI_WRITE_CYCLES = 0x46,
64 ARMV7_PERFCTR_MEMORY_REPLAY = 0x47,
65 ARMV7_PERFCTR_UNALIGNED_ACCESS_REPLAY = 0x48,
66 ARMV7_PERFCTR_L1_DATA_MISS = 0x49,
67 ARMV7_PERFCTR_L1_INST_MISS = 0x4A,
68 ARMV7_PERFCTR_L1_DATA_COLORING = 0x4B,
69 ARMV7_PERFCTR_L1_NEON_DATA = 0x4C,
70 ARMV7_PERFCTR_L1_NEON_CACH_DATA = 0x4D,
71 ARMV7_PERFCTR_L2_NEON = 0x4E,
72 ARMV7_PERFCTR_L2_NEON_HIT = 0x4F,
73 ARMV7_PERFCTR_L1_INST = 0x50,
74 ARMV7_PERFCTR_PC_RETURN_MIS_PRED = 0x51,
75 ARMV7_PERFCTR_PC_BRANCH_FAILED = 0x52,
76 ARMV7_PERFCTR_PC_BRANCH_TAKEN = 0x53,
77 ARMV7_PERFCTR_PC_BRANCH_EXECUTED = 0x54,
78 ARMV7_PERFCTR_OP_EXECUTED = 0x55,
79 ARMV7_PERFCTR_CYCLES_INST_STALL = 0x56,
80 ARMV7_PERFCTR_CYCLES_INST = 0x57,
81 ARMV7_PERFCTR_CYCLES_NEON_DATA_STALL = 0x58,
82 ARMV7_PERFCTR_CYCLES_NEON_INST_STALL = 0x59,
83 ARMV7_PERFCTR_NEON_CYCLES = 0x5A,
84
85 ARMV7_PERFCTR_PMU0_EVENTS = 0x70,
86 ARMV7_PERFCTR_PMU1_EVENTS = 0x71,
87 ARMV7_PERFCTR_PMU_EVENTS = 0x72,
88};
89
90/* ARMv7 Cortex-A9 specific event types */
91enum armv7_a9_perf_types {
92 ARMV7_PERFCTR_JAVA_HW_BYTECODE_EXEC = 0x40,
93 ARMV7_PERFCTR_JAVA_SW_BYTECODE_EXEC = 0x41,
94 ARMV7_PERFCTR_JAZELLE_BRANCH_EXEC = 0x42,
95
96 ARMV7_PERFCTR_COHERENT_LINE_MISS = 0x50,
97 ARMV7_PERFCTR_COHERENT_LINE_HIT = 0x51,
98
99 ARMV7_PERFCTR_ICACHE_DEP_STALL_CYCLES = 0x60,
100 ARMV7_PERFCTR_DCACHE_DEP_STALL_CYCLES = 0x61,
101 ARMV7_PERFCTR_TLB_MISS_DEP_STALL_CYCLES = 0x62,
102 ARMV7_PERFCTR_STREX_EXECUTED_PASSED = 0x63,
103 ARMV7_PERFCTR_STREX_EXECUTED_FAILED = 0x64,
104 ARMV7_PERFCTR_DATA_EVICTION = 0x65,
105 ARMV7_PERFCTR_ISSUE_STAGE_NO_INST = 0x66,
106 ARMV7_PERFCTR_ISSUE_STAGE_EMPTY = 0x67,
107 ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE = 0x68,
108
109 ARMV7_PERFCTR_PREDICTABLE_FUNCT_RETURNS = 0x6E,
110
111 ARMV7_PERFCTR_MAIN_UNIT_EXECUTED_INST = 0x70,
112 ARMV7_PERFCTR_SECOND_UNIT_EXECUTED_INST = 0x71,
113 ARMV7_PERFCTR_LD_ST_UNIT_EXECUTED_INST = 0x72,
114 ARMV7_PERFCTR_FP_EXECUTED_INST = 0x73,
115 ARMV7_PERFCTR_NEON_EXECUTED_INST = 0x74,
116
117 ARMV7_PERFCTR_PLD_FULL_DEP_STALL_CYCLES = 0x80,
118 ARMV7_PERFCTR_DATA_WR_DEP_STALL_CYCLES = 0x81,
119 ARMV7_PERFCTR_ITLB_MISS_DEP_STALL_CYCLES = 0x82,
120 ARMV7_PERFCTR_DTLB_MISS_DEP_STALL_CYCLES = 0x83,
121 ARMV7_PERFCTR_MICRO_ITLB_MISS_DEP_STALL_CYCLES = 0x84,
122 ARMV7_PERFCTR_MICRO_DTLB_MISS_DEP_STALL_CYCLES = 0x85,
123 ARMV7_PERFCTR_DMB_DEP_STALL_CYCLES = 0x86,
124
125 ARMV7_PERFCTR_INTGR_CLK_ENABLED_CYCLES = 0x8A,
126 ARMV7_PERFCTR_DATA_ENGINE_CLK_EN_CYCLES = 0x8B,
127
128 ARMV7_PERFCTR_ISB_INST = 0x90,
129 ARMV7_PERFCTR_DSB_INST = 0x91,
130 ARMV7_PERFCTR_DMB_INST = 0x92,
131 ARMV7_PERFCTR_EXT_INTERRUPTS = 0x93,
132
133 ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_COMPLETED = 0xA0,
134 ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_SKIPPED = 0xA1,
135 ARMV7_PERFCTR_PLE_FIFO_FLUSH = 0xA2,
136 ARMV7_PERFCTR_PLE_RQST_COMPLETED = 0xA3,
137 ARMV7_PERFCTR_PLE_FIFO_OVERFLOW = 0xA4,
138 ARMV7_PERFCTR_PLE_RQST_PROG = 0xA5
139};
140
141/*
142 * Cortex-A8 HW events mapping
143 *
144 * The hardware events that we support. We do support cache operations but
145 * we have harvard caches and no way to combine instruction and data
146 * accesses/misses in hardware.
147 */
148static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = {
149 [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
150 [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
151 [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
152 [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
153 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
154 [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
155 [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES,
156};
157
158static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
159 [PERF_COUNT_HW_CACHE_OP_MAX]
160 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
161 [C(L1D)] = {
162 /*
163 * The performance counters don't differentiate between read
164 * and write accesses/misses so this isn't strictly correct,
165 * but it's the best we can do. Writes and reads get
166 * combined.
167 */
168 [C(OP_READ)] = {
169 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS,
170 [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL,
171 },
172 [C(OP_WRITE)] = {
173 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS,
174 [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL,
175 },
176 [C(OP_PREFETCH)] = {
177 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
178 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
179 },
180 },
181 [C(L1I)] = {
182 [C(OP_READ)] = {
183 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST,
184 [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS,
185 },
186 [C(OP_WRITE)] = {
187 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST,
188 [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS,
189 },
190 [C(OP_PREFETCH)] = {
191 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
192 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
193 },
194 },
195 [C(LL)] = {
196 [C(OP_READ)] = {
197 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS,
198 [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS,
199 },
200 [C(OP_WRITE)] = {
201 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS,
202 [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS,
203 },
204 [C(OP_PREFETCH)] = {
205 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
206 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
207 },
208 },
209 [C(DTLB)] = {
210 /*
211 * Only ITLB misses and DTLB refills are supported.
212 * If users want the DTLB refills misses a raw counter
213 * must be used.
214 */
215 [C(OP_READ)] = {
216 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
217 [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
218 },
219 [C(OP_WRITE)] = {
220 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
221 [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
222 },
223 [C(OP_PREFETCH)] = {
224 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
225 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
226 },
227 },
228 [C(ITLB)] = {
229 [C(OP_READ)] = {
230 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
231 [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS,
232 },
233 [C(OP_WRITE)] = {
234 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
235 [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS,
236 },
237 [C(OP_PREFETCH)] = {
238 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
239 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
240 },
241 },
242 [C(BPU)] = {
243 [C(OP_READ)] = {
244 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE,
245 [C(RESULT_MISS)]
246 = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
247 },
248 [C(OP_WRITE)] = {
249 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE,
250 [C(RESULT_MISS)]
251 = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
252 },
253 [C(OP_PREFETCH)] = {
254 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
255 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
256 },
257 },
258};
259
260/*
261 * Cortex-A9 HW events mapping
262 */
263static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = {
264 [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
265 [PERF_COUNT_HW_INSTRUCTIONS] =
266 ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE,
267 [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_COHERENT_LINE_HIT,
268 [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_COHERENT_LINE_MISS,
269 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
270 [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
271 [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES,
272};
273
274static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
275 [PERF_COUNT_HW_CACHE_OP_MAX]
276 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
277 [C(L1D)] = {
278 /*
279 * The performance counters don't differentiate between read
280 * and write accesses/misses so this isn't strictly correct,
281 * but it's the best we can do. Writes and reads get
282 * combined.
283 */
284 [C(OP_READ)] = {
285 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS,
286 [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL,
287 },
288 [C(OP_WRITE)] = {
289 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS,
290 [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL,
291 },
292 [C(OP_PREFETCH)] = {
293 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
294 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
295 },
296 },
297 [C(L1I)] = {
298 [C(OP_READ)] = {
299 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
300 [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS,
301 },
302 [C(OP_WRITE)] = {
303 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
304 [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS,
305 },
306 [C(OP_PREFETCH)] = {
307 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
308 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
309 },
310 },
311 [C(LL)] = {
312 [C(OP_READ)] = {
313 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
314 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
315 },
316 [C(OP_WRITE)] = {
317 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
318 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
319 },
320 [C(OP_PREFETCH)] = {
321 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
322 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
323 },
324 },
325 [C(DTLB)] = {
326 /*
327 * Only ITLB misses and DTLB refills are supported.
328 * If users want the DTLB refills misses a raw counter
329 * must be used.
330 */
331 [C(OP_READ)] = {
332 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
333 [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
334 },
335 [C(OP_WRITE)] = {
336 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
337 [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
338 },
339 [C(OP_PREFETCH)] = {
340 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
341 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
342 },
343 },
344 [C(ITLB)] = {
345 [C(OP_READ)] = {
346 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
347 [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS,
348 },
349 [C(OP_WRITE)] = {
350 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
351 [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS,
352 },
353 [C(OP_PREFETCH)] = {
354 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
355 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
356 },
357 },
358 [C(BPU)] = {
359 [C(OP_READ)] = {
360 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE,
361 [C(RESULT_MISS)]
362 = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
363 },
364 [C(OP_WRITE)] = {
365 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE,
366 [C(RESULT_MISS)]
367 = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
368 },
369 [C(OP_PREFETCH)] = {
370 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
371 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
372 },
373 },
374};
375
376/*
377 * Perf Events counters
378 */
379enum armv7_counters {
380 ARMV7_CYCLE_COUNTER = 1, /* Cycle counter */
381 ARMV7_COUNTER0 = 2, /* First event counter */
382};
383
384/*
385 * The cycle counter is ARMV7_CYCLE_COUNTER.
386 * The first event counter is ARMV7_COUNTER0.
387 * The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1).
388 */
389#define ARMV7_COUNTER_LAST (ARMV7_COUNTER0 + armpmu->num_events - 1)
390
391/*
392 * ARMv7 low level PMNC access
393 */
394
395/*
396 * Per-CPU PMNC: config reg
397 */
398#define ARMV7_PMNC_E (1 << 0) /* Enable all counters */
399#define ARMV7_PMNC_P (1 << 1) /* Reset all counters */
400#define ARMV7_PMNC_C (1 << 2) /* Cycle counter reset */
401#define ARMV7_PMNC_D (1 << 3) /* CCNT counts every 64th cpu cycle */
402#define ARMV7_PMNC_X (1 << 4) /* Export to ETM */
403#define ARMV7_PMNC_DP (1 << 5) /* Disable CCNT if non-invasive debug*/
404#define ARMV7_PMNC_N_SHIFT 11 /* Number of counters supported */
405#define ARMV7_PMNC_N_MASK 0x1f
406#define ARMV7_PMNC_MASK 0x3f /* Mask for writable bits */
407
408/*
409 * Available counters
410 */
411#define ARMV7_CNT0 0 /* First event counter */
412#define ARMV7_CCNT 31 /* Cycle counter */
413
414/* Perf Event to low level counters mapping */
415#define ARMV7_EVENT_CNT_TO_CNTx (ARMV7_COUNTER0 - ARMV7_CNT0)
416
417/*
418 * CNTENS: counters enable reg
419 */
420#define ARMV7_CNTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
421#define ARMV7_CNTENS_C (1 << ARMV7_CCNT)
422
423/*
424 * CNTENC: counters disable reg
425 */
426#define ARMV7_CNTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
427#define ARMV7_CNTENC_C (1 << ARMV7_CCNT)
428
429/*
430 * INTENS: counters overflow interrupt enable reg
431 */
432#define ARMV7_INTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
433#define ARMV7_INTENS_C (1 << ARMV7_CCNT)
434
435/*
436 * INTENC: counters overflow interrupt disable reg
437 */
438#define ARMV7_INTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
439#define ARMV7_INTENC_C (1 << ARMV7_CCNT)
440
441/*
442 * EVTSEL: Event selection reg
443 */
444#define ARMV7_EVTSEL_MASK 0xff /* Mask for writable bits */
445
446/*
447 * SELECT: Counter selection reg
448 */
449#define ARMV7_SELECT_MASK 0x1f /* Mask for writable bits */
450
451/*
452 * FLAG: counters overflow flag status reg
453 */
454#define ARMV7_FLAG_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
455#define ARMV7_FLAG_C (1 << ARMV7_CCNT)
456#define ARMV7_FLAG_MASK 0xffffffff /* Mask for writable bits */
457#define ARMV7_OVERFLOWED_MASK ARMV7_FLAG_MASK
458
459static inline unsigned long armv7_pmnc_read(void)
460{
461 u32 val;
462 asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val));
463 return val;
464}
465
466static inline void armv7_pmnc_write(unsigned long val)
467{
468 val &= ARMV7_PMNC_MASK;
469 asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val));
470}
471
472static inline int armv7_pmnc_has_overflowed(unsigned long pmnc)
473{
474 return pmnc & ARMV7_OVERFLOWED_MASK;
475}
476
477static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc,
478 enum armv7_counters counter)
479{
480 int ret = 0;
481
482 if (counter == ARMV7_CYCLE_COUNTER)
483 ret = pmnc & ARMV7_FLAG_C;
484 else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST))
485 ret = pmnc & ARMV7_FLAG_P(counter);
486 else
487 pr_err("CPU%u checking wrong counter %d overflow status\n",
488 smp_processor_id(), counter);
489
490 return ret;
491}
492
493static inline int armv7_pmnc_select_counter(unsigned int idx)
494{
495 u32 val;
496
497 if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) {
498 pr_err("CPU%u selecting wrong PMNC counter"
499 " %d\n", smp_processor_id(), idx);
500 return -1;
501 }
502
503 val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK;
504 asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val));
505
506 return idx;
507}
508
509static inline u32 armv7pmu_read_counter(int idx)
510{
511 unsigned long value = 0;
512
513 if (idx == ARMV7_CYCLE_COUNTER)
514 asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value));
515 else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
516 if (armv7_pmnc_select_counter(idx) == idx)
517 asm volatile("mrc p15, 0, %0, c9, c13, 2"
518 : "=r" (value));
519 } else
520 pr_err("CPU%u reading wrong counter %d\n",
521 smp_processor_id(), idx);
522
523 return value;
524}
525
526static inline void armv7pmu_write_counter(int idx, u32 value)
527{
528 if (idx == ARMV7_CYCLE_COUNTER)
529 asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value));
530 else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
531 if (armv7_pmnc_select_counter(idx) == idx)
532 asm volatile("mcr p15, 0, %0, c9, c13, 2"
533 : : "r" (value));
534 } else
535 pr_err("CPU%u writing wrong counter %d\n",
536 smp_processor_id(), idx);
537}
538
539static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val)
540{
541 if (armv7_pmnc_select_counter(idx) == idx) {
542 val &= ARMV7_EVTSEL_MASK;
543 asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val));
544 }
545}
546
547static inline u32 armv7_pmnc_enable_counter(unsigned int idx)
548{
549 u32 val;
550
551 if ((idx != ARMV7_CYCLE_COUNTER) &&
552 ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
553 pr_err("CPU%u enabling wrong PMNC counter"
554 " %d\n", smp_processor_id(), idx);
555 return -1;
556 }
557
558 if (idx == ARMV7_CYCLE_COUNTER)
559 val = ARMV7_CNTENS_C;
560 else
561 val = ARMV7_CNTENS_P(idx);
562
563 asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val));
564
565 return idx;
566}
567
568static inline u32 armv7_pmnc_disable_counter(unsigned int idx)
569{
570 u32 val;
571
572
573 if ((idx != ARMV7_CYCLE_COUNTER) &&
574 ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
575 pr_err("CPU%u disabling wrong PMNC counter"
576 " %d\n", smp_processor_id(), idx);
577 return -1;
578 }
579
580 if (idx == ARMV7_CYCLE_COUNTER)
581 val = ARMV7_CNTENC_C;
582 else
583 val = ARMV7_CNTENC_P(idx);
584
585 asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val));
586
587 return idx;
588}
589
590static inline u32 armv7_pmnc_enable_intens(unsigned int idx)
591{
592 u32 val;
593
594 if ((idx != ARMV7_CYCLE_COUNTER) &&
595 ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
596 pr_err("CPU%u enabling wrong PMNC counter"
597 " interrupt enable %d\n", smp_processor_id(), idx);
598 return -1;
599 }
600
601 if (idx == ARMV7_CYCLE_COUNTER)
602 val = ARMV7_INTENS_C;
603 else
604 val = ARMV7_INTENS_P(idx);
605
606 asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val));
607
608 return idx;
609}
610
611static inline u32 armv7_pmnc_disable_intens(unsigned int idx)
612{
613 u32 val;
614
615 if ((idx != ARMV7_CYCLE_COUNTER) &&
616 ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
617 pr_err("CPU%u disabling wrong PMNC counter"
618 " interrupt enable %d\n", smp_processor_id(), idx);
619 return -1;
620 }
621
622 if (idx == ARMV7_CYCLE_COUNTER)
623 val = ARMV7_INTENC_C;
624 else
625 val = ARMV7_INTENC_P(idx);
626
627 asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val));
628
629 return idx;
630}
631
632static inline u32 armv7_pmnc_getreset_flags(void)
633{
634 u32 val;
635
636 /* Read */
637 asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
638
639 /* Write to clear flags */
640 val &= ARMV7_FLAG_MASK;
641 asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val));
642
643 return val;
644}
645
646#ifdef DEBUG
647static void armv7_pmnc_dump_regs(void)
648{
649 u32 val;
650 unsigned int cnt;
651
652 printk(KERN_INFO "PMNC registers dump:\n");
653
654 asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val));
655 printk(KERN_INFO "PMNC =0x%08x\n", val);
656
657 asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val));
658 printk(KERN_INFO "CNTENS=0x%08x\n", val);
659
660 asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val));
661 printk(KERN_INFO "INTENS=0x%08x\n", val);
662
663 asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
664 printk(KERN_INFO "FLAGS =0x%08x\n", val);
665
666 asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val));
667 printk(KERN_INFO "SELECT=0x%08x\n", val);
668
669 asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));
670 printk(KERN_INFO "CCNT =0x%08x\n", val);
671
672 for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) {
673 armv7_pmnc_select_counter(cnt);
674 asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val));
675 printk(KERN_INFO "CNT[%d] count =0x%08x\n",
676 cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
677 asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val));
678 printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n",
679 cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
680 }
681}
682#endif
683
684static void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx)
685{
686 unsigned long flags;
687
688 /*
689 * Enable counter and interrupt, and set the counter to count
690 * the event that we're interested in.
691 */
692 raw_spin_lock_irqsave(&pmu_lock, flags);
693
694 /*
695 * Disable counter
696 */
697 armv7_pmnc_disable_counter(idx);
698
699 /*
700 * Set event (if destined for PMNx counters)
701 * We don't need to set the event if it's a cycle count
702 */
703 if (idx != ARMV7_CYCLE_COUNTER)
704 armv7_pmnc_write_evtsel(idx, hwc->config_base);
705
706 /*
707 * Enable interrupt for this counter
708 */
709 armv7_pmnc_enable_intens(idx);
710
711 /*
712 * Enable counter
713 */
714 armv7_pmnc_enable_counter(idx);
715
716 raw_spin_unlock_irqrestore(&pmu_lock, flags);
717}
718
719static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx)
720{
721 unsigned long flags;
722
723 /*
724 * Disable counter and interrupt
725 */
726 raw_spin_lock_irqsave(&pmu_lock, flags);
727
728 /*
729 * Disable counter
730 */
731 armv7_pmnc_disable_counter(idx);
732
733 /*
734 * Disable interrupt for this counter
735 */
736 armv7_pmnc_disable_intens(idx);
737
738 raw_spin_unlock_irqrestore(&pmu_lock, flags);
739}
740
741static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
742{
743 unsigned long pmnc;
744 struct perf_sample_data data;
745 struct cpu_hw_events *cpuc;
746 struct pt_regs *regs;
747 int idx;
748
749 /*
750 * Get and reset the IRQ flags
751 */
752 pmnc = armv7_pmnc_getreset_flags();
753
754 /*
755 * Did an overflow occur?
756 */
757 if (!armv7_pmnc_has_overflowed(pmnc))
758 return IRQ_NONE;
759
760 /*
761 * Handle the counter(s) overflow(s)
762 */
763 regs = get_irq_regs();
764
765 perf_sample_data_init(&data, 0);
766
767 cpuc = &__get_cpu_var(cpu_hw_events);
768 for (idx = 0; idx <= armpmu->num_events; ++idx) {
769 struct perf_event *event = cpuc->events[idx];
770 struct hw_perf_event *hwc;
771
772 if (!test_bit(idx, cpuc->active_mask))
773 continue;
774
775 /*
776 * We have a single interrupt for all counters. Check that
777 * each counter has overflowed before we process it.
778 */
779 if (!armv7_pmnc_counter_has_overflowed(pmnc, idx))
780 continue;
781
782 hwc = &event->hw;
783 armpmu_event_update(event, hwc, idx);
784 data.period = event->hw.last_period;
785 if (!armpmu_event_set_period(event, hwc, idx))
786 continue;
787
788 if (perf_event_overflow(event, 0, &data, regs))
789 armpmu->disable(hwc, idx);
790 }
791
792 /*
793 * Handle the pending perf events.
794 *
795 * Note: this call *must* be run with interrupts disabled. For
796 * platforms that can have the PMU interrupts raised as an NMI, this
797 * will not work.
798 */
799 irq_work_run();
800
801 return IRQ_HANDLED;
802}
803
804static void armv7pmu_start(void)
805{
806 unsigned long flags;
807
808 raw_spin_lock_irqsave(&pmu_lock, flags);
809 /* Enable all counters */
810 armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E);
811 raw_spin_unlock_irqrestore(&pmu_lock, flags);
812}
813
814static void armv7pmu_stop(void)
815{
816 unsigned long flags;
817
818 raw_spin_lock_irqsave(&pmu_lock, flags);
819 /* Disable all counters */
820 armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E);
821 raw_spin_unlock_irqrestore(&pmu_lock, flags);
822}
823
824static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc,
825 struct hw_perf_event *event)
826{
827 int idx;
828
829 /* Always place a cycle counter into the cycle counter. */
830 if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) {
831 if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask))
832 return -EAGAIN;
833
834 return ARMV7_CYCLE_COUNTER;
835 } else {
836 /*
837 * For anything other than a cycle counter, try and use
838 * the events counters
839 */
840 for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) {
841 if (!test_and_set_bit(idx, cpuc->used_mask))
842 return idx;
843 }
844
845 /* The counters are all in use. */
846 return -EAGAIN;
847 }
848}
849
850static struct arm_pmu armv7pmu = {
851 .handle_irq = armv7pmu_handle_irq,
852 .enable = armv7pmu_enable_event,
853 .disable = armv7pmu_disable_event,
854 .read_counter = armv7pmu_read_counter,
855 .write_counter = armv7pmu_write_counter,
856 .get_event_idx = armv7pmu_get_event_idx,
857 .start = armv7pmu_start,
858 .stop = armv7pmu_stop,
859 .raw_event_mask = 0xFF,
860 .max_period = (1LLU << 32) - 1,
861};
862
863static u32 __init armv7_reset_read_pmnc(void)
864{
865 u32 nb_cnt;
866
867 /* Initialize & Reset PMNC: C and P bits */
868 armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C);
869
870 /* Read the nb of CNTx counters supported from PMNC */
871 nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK;
872
873 /* Add the CPU cycles counter and return */
874 return nb_cnt + 1;
875}
876
877static const struct arm_pmu *__init armv7_a8_pmu_init(void)
878{
879 armv7pmu.id = ARM_PERF_PMU_ID_CA8;
880 armv7pmu.name = "ARMv7 Cortex-A8";
881 armv7pmu.cache_map = &armv7_a8_perf_cache_map;
882 armv7pmu.event_map = &armv7_a8_perf_map;
883 armv7pmu.num_events = armv7_reset_read_pmnc();
884 return &armv7pmu;
885}
886
887static const struct arm_pmu *__init armv7_a9_pmu_init(void)
888{
889 armv7pmu.id = ARM_PERF_PMU_ID_CA9;
890 armv7pmu.name = "ARMv7 Cortex-A9";
891 armv7pmu.cache_map = &armv7_a9_perf_cache_map;
892 armv7pmu.event_map = &armv7_a9_perf_map;
893 armv7pmu.num_events = armv7_reset_read_pmnc();
894 return &armv7pmu;
895}
896#else
897static const struct arm_pmu *__init armv7_a8_pmu_init(void)
898{
899 return NULL;
900}
901
902static const struct arm_pmu *__init armv7_a9_pmu_init(void)
903{
904 return NULL;
905}
906#endif /* CONFIG_CPU_V7 */
diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c
new file mode 100644
index 000000000000..28cd3b025bc3
--- /dev/null
+++ b/arch/arm/kernel/perf_event_xscale.c
@@ -0,0 +1,807 @@
1/*
2 * ARMv5 [xscale] Performance counter handling code.
3 *
4 * Copyright (C) 2010, ARM Ltd., Will Deacon <will.deacon@arm.com>
5 *
6 * Based on the previous xscale OProfile code.
7 *
8 * There are two variants of the xscale PMU that we support:
9 * - xscale1pmu: 2 event counters and a cycle counter
10 * - xscale2pmu: 4 event counters and a cycle counter
11 * The two variants share event definitions, but have different
12 * PMU structures.
13 */
14
15#ifdef CONFIG_CPU_XSCALE
16enum xscale_perf_types {
17 XSCALE_PERFCTR_ICACHE_MISS = 0x00,
18 XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01,
19 XSCALE_PERFCTR_DATA_STALL = 0x02,
20 XSCALE_PERFCTR_ITLB_MISS = 0x03,
21 XSCALE_PERFCTR_DTLB_MISS = 0x04,
22 XSCALE_PERFCTR_BRANCH = 0x05,
23 XSCALE_PERFCTR_BRANCH_MISS = 0x06,
24 XSCALE_PERFCTR_INSTRUCTION = 0x07,
25 XSCALE_PERFCTR_DCACHE_FULL_STALL = 0x08,
26 XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09,
27 XSCALE_PERFCTR_DCACHE_ACCESS = 0x0A,
28 XSCALE_PERFCTR_DCACHE_MISS = 0x0B,
29 XSCALE_PERFCTR_DCACHE_WRITE_BACK = 0x0C,
30 XSCALE_PERFCTR_PC_CHANGED = 0x0D,
31 XSCALE_PERFCTR_BCU_REQUEST = 0x10,
32 XSCALE_PERFCTR_BCU_FULL = 0x11,
33 XSCALE_PERFCTR_BCU_DRAIN = 0x12,
34 XSCALE_PERFCTR_BCU_ECC_NO_ELOG = 0x14,
35 XSCALE_PERFCTR_BCU_1_BIT_ERR = 0x15,
36 XSCALE_PERFCTR_RMW = 0x16,
37 /* XSCALE_PERFCTR_CCNT is not hardware defined */
38 XSCALE_PERFCTR_CCNT = 0xFE,
39 XSCALE_PERFCTR_UNUSED = 0xFF,
40};
41
42enum xscale_counters {
43 XSCALE_CYCLE_COUNTER = 1,
44 XSCALE_COUNTER0,
45 XSCALE_COUNTER1,
46 XSCALE_COUNTER2,
47 XSCALE_COUNTER3,
48};
49
50static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = {
51 [PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT,
52 [PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION,
53 [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
54 [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
55 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH,
56 [PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS,
57 [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
58};
59
60static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
61 [PERF_COUNT_HW_CACHE_OP_MAX]
62 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
63 [C(L1D)] = {
64 [C(OP_READ)] = {
65 [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
66 [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
67 },
68 [C(OP_WRITE)] = {
69 [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
70 [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
71 },
72 [C(OP_PREFETCH)] = {
73 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
74 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
75 },
76 },
77 [C(L1I)] = {
78 [C(OP_READ)] = {
79 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
80 [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS,
81 },
82 [C(OP_WRITE)] = {
83 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
84 [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS,
85 },
86 [C(OP_PREFETCH)] = {
87 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
88 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
89 },
90 },
91 [C(LL)] = {
92 [C(OP_READ)] = {
93 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
94 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
95 },
96 [C(OP_WRITE)] = {
97 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
98 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
99 },
100 [C(OP_PREFETCH)] = {
101 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
102 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
103 },
104 },
105 [C(DTLB)] = {
106 [C(OP_READ)] = {
107 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
108 [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
109 },
110 [C(OP_WRITE)] = {
111 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
112 [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
113 },
114 [C(OP_PREFETCH)] = {
115 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
116 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
117 },
118 },
119 [C(ITLB)] = {
120 [C(OP_READ)] = {
121 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
122 [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
123 },
124 [C(OP_WRITE)] = {
125 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
126 [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
127 },
128 [C(OP_PREFETCH)] = {
129 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
130 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
131 },
132 },
133 [C(BPU)] = {
134 [C(OP_READ)] = {
135 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
136 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
137 },
138 [C(OP_WRITE)] = {
139 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
140 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
141 },
142 [C(OP_PREFETCH)] = {
143 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
144 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
145 },
146 },
147};
148
149#define XSCALE_PMU_ENABLE 0x001
150#define XSCALE_PMN_RESET 0x002
151#define XSCALE_CCNT_RESET 0x004
152#define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET)
153#define XSCALE_PMU_CNT64 0x008
154
155#define XSCALE1_OVERFLOWED_MASK 0x700
156#define XSCALE1_CCOUNT_OVERFLOW 0x400
157#define XSCALE1_COUNT0_OVERFLOW 0x100
158#define XSCALE1_COUNT1_OVERFLOW 0x200
159#define XSCALE1_CCOUNT_INT_EN 0x040
160#define XSCALE1_COUNT0_INT_EN 0x010
161#define XSCALE1_COUNT1_INT_EN 0x020
162#define XSCALE1_COUNT0_EVT_SHFT 12
163#define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT)
164#define XSCALE1_COUNT1_EVT_SHFT 20
165#define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT)
166
167static inline u32
168xscale1pmu_read_pmnc(void)
169{
170 u32 val;
171 asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val));
172 return val;
173}
174
175static inline void
176xscale1pmu_write_pmnc(u32 val)
177{
178 /* upper 4bits and 7, 11 are write-as-0 */
179 val &= 0xffff77f;
180 asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val));
181}
182
183static inline int
184xscale1_pmnc_counter_has_overflowed(unsigned long pmnc,
185 enum xscale_counters counter)
186{
187 int ret = 0;
188
189 switch (counter) {
190 case XSCALE_CYCLE_COUNTER:
191 ret = pmnc & XSCALE1_CCOUNT_OVERFLOW;
192 break;
193 case XSCALE_COUNTER0:
194 ret = pmnc & XSCALE1_COUNT0_OVERFLOW;
195 break;
196 case XSCALE_COUNTER1:
197 ret = pmnc & XSCALE1_COUNT1_OVERFLOW;
198 break;
199 default:
200 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
201 }
202
203 return ret;
204}
205
206static irqreturn_t
207xscale1pmu_handle_irq(int irq_num, void *dev)
208{
209 unsigned long pmnc;
210 struct perf_sample_data data;
211 struct cpu_hw_events *cpuc;
212 struct pt_regs *regs;
213 int idx;
214
215 /*
216 * NOTE: there's an A stepping erratum that states if an overflow
217 * bit already exists and another occurs, the previous
218 * Overflow bit gets cleared. There's no workaround.
219 * Fixed in B stepping or later.
220 */
221 pmnc = xscale1pmu_read_pmnc();
222
223 /*
224 * Write the value back to clear the overflow flags. Overflow
225 * flags remain in pmnc for use below. We also disable the PMU
226 * while we process the interrupt.
227 */
228 xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
229
230 if (!(pmnc & XSCALE1_OVERFLOWED_MASK))
231 return IRQ_NONE;
232
233 regs = get_irq_regs();
234
235 perf_sample_data_init(&data, 0);
236
237 cpuc = &__get_cpu_var(cpu_hw_events);
238 for (idx = 0; idx <= armpmu->num_events; ++idx) {
239 struct perf_event *event = cpuc->events[idx];
240 struct hw_perf_event *hwc;
241
242 if (!test_bit(idx, cpuc->active_mask))
243 continue;
244
245 if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx))
246 continue;
247
248 hwc = &event->hw;
249 armpmu_event_update(event, hwc, idx);
250 data.period = event->hw.last_period;
251 if (!armpmu_event_set_period(event, hwc, idx))
252 continue;
253
254 if (perf_event_overflow(event, 0, &data, regs))
255 armpmu->disable(hwc, idx);
256 }
257
258 irq_work_run();
259
260 /*
261 * Re-enable the PMU.
262 */
263 pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE;
264 xscale1pmu_write_pmnc(pmnc);
265
266 return IRQ_HANDLED;
267}
268
269static void
270xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx)
271{
272 unsigned long val, mask, evt, flags;
273
274 switch (idx) {
275 case XSCALE_CYCLE_COUNTER:
276 mask = 0;
277 evt = XSCALE1_CCOUNT_INT_EN;
278 break;
279 case XSCALE_COUNTER0:
280 mask = XSCALE1_COUNT0_EVT_MASK;
281 evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) |
282 XSCALE1_COUNT0_INT_EN;
283 break;
284 case XSCALE_COUNTER1:
285 mask = XSCALE1_COUNT1_EVT_MASK;
286 evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) |
287 XSCALE1_COUNT1_INT_EN;
288 break;
289 default:
290 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
291 return;
292 }
293
294 raw_spin_lock_irqsave(&pmu_lock, flags);
295 val = xscale1pmu_read_pmnc();
296 val &= ~mask;
297 val |= evt;
298 xscale1pmu_write_pmnc(val);
299 raw_spin_unlock_irqrestore(&pmu_lock, flags);
300}
301
302static void
303xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx)
304{
305 unsigned long val, mask, evt, flags;
306
307 switch (idx) {
308 case XSCALE_CYCLE_COUNTER:
309 mask = XSCALE1_CCOUNT_INT_EN;
310 evt = 0;
311 break;
312 case XSCALE_COUNTER0:
313 mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK;
314 evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT;
315 break;
316 case XSCALE_COUNTER1:
317 mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK;
318 evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT;
319 break;
320 default:
321 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
322 return;
323 }
324
325 raw_spin_lock_irqsave(&pmu_lock, flags);
326 val = xscale1pmu_read_pmnc();
327 val &= ~mask;
328 val |= evt;
329 xscale1pmu_write_pmnc(val);
330 raw_spin_unlock_irqrestore(&pmu_lock, flags);
331}
332
333static int
334xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc,
335 struct hw_perf_event *event)
336{
337 if (XSCALE_PERFCTR_CCNT == event->config_base) {
338 if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask))
339 return -EAGAIN;
340
341 return XSCALE_CYCLE_COUNTER;
342 } else {
343 if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask))
344 return XSCALE_COUNTER1;
345
346 if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask))
347 return XSCALE_COUNTER0;
348
349 return -EAGAIN;
350 }
351}
352
353static void
354xscale1pmu_start(void)
355{
356 unsigned long flags, val;
357
358 raw_spin_lock_irqsave(&pmu_lock, flags);
359 val = xscale1pmu_read_pmnc();
360 val |= XSCALE_PMU_ENABLE;
361 xscale1pmu_write_pmnc(val);
362 raw_spin_unlock_irqrestore(&pmu_lock, flags);
363}
364
365static void
366xscale1pmu_stop(void)
367{
368 unsigned long flags, val;
369
370 raw_spin_lock_irqsave(&pmu_lock, flags);
371 val = xscale1pmu_read_pmnc();
372 val &= ~XSCALE_PMU_ENABLE;
373 xscale1pmu_write_pmnc(val);
374 raw_spin_unlock_irqrestore(&pmu_lock, flags);
375}
376
377static inline u32
378xscale1pmu_read_counter(int counter)
379{
380 u32 val = 0;
381
382 switch (counter) {
383 case XSCALE_CYCLE_COUNTER:
384 asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val));
385 break;
386 case XSCALE_COUNTER0:
387 asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val));
388 break;
389 case XSCALE_COUNTER1:
390 asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val));
391 break;
392 }
393
394 return val;
395}
396
397static inline void
398xscale1pmu_write_counter(int counter, u32 val)
399{
400 switch (counter) {
401 case XSCALE_CYCLE_COUNTER:
402 asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val));
403 break;
404 case XSCALE_COUNTER0:
405 asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val));
406 break;
407 case XSCALE_COUNTER1:
408 asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val));
409 break;
410 }
411}
412
413static const struct arm_pmu xscale1pmu = {
414 .id = ARM_PERF_PMU_ID_XSCALE1,
415 .name = "xscale1",
416 .handle_irq = xscale1pmu_handle_irq,
417 .enable = xscale1pmu_enable_event,
418 .disable = xscale1pmu_disable_event,
419 .read_counter = xscale1pmu_read_counter,
420 .write_counter = xscale1pmu_write_counter,
421 .get_event_idx = xscale1pmu_get_event_idx,
422 .start = xscale1pmu_start,
423 .stop = xscale1pmu_stop,
424 .cache_map = &xscale_perf_cache_map,
425 .event_map = &xscale_perf_map,
426 .raw_event_mask = 0xFF,
427 .num_events = 3,
428 .max_period = (1LLU << 32) - 1,
429};
430
431static const struct arm_pmu *__init xscale1pmu_init(void)
432{
433 return &xscale1pmu;
434}
435
436#define XSCALE2_OVERFLOWED_MASK 0x01f
437#define XSCALE2_CCOUNT_OVERFLOW 0x001
438#define XSCALE2_COUNT0_OVERFLOW 0x002
439#define XSCALE2_COUNT1_OVERFLOW 0x004
440#define XSCALE2_COUNT2_OVERFLOW 0x008
441#define XSCALE2_COUNT3_OVERFLOW 0x010
442#define XSCALE2_CCOUNT_INT_EN 0x001
443#define XSCALE2_COUNT0_INT_EN 0x002
444#define XSCALE2_COUNT1_INT_EN 0x004
445#define XSCALE2_COUNT2_INT_EN 0x008
446#define XSCALE2_COUNT3_INT_EN 0x010
447#define XSCALE2_COUNT0_EVT_SHFT 0
448#define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT)
449#define XSCALE2_COUNT1_EVT_SHFT 8
450#define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT)
451#define XSCALE2_COUNT2_EVT_SHFT 16
452#define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT)
453#define XSCALE2_COUNT3_EVT_SHFT 24
454#define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT)
455
456static inline u32
457xscale2pmu_read_pmnc(void)
458{
459 u32 val;
460 asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val));
461 /* bits 1-2 and 4-23 are read-unpredictable */
462 return val & 0xff000009;
463}
464
465static inline void
466xscale2pmu_write_pmnc(u32 val)
467{
468 /* bits 4-23 are write-as-0, 24-31 are write ignored */
469 val &= 0xf;
470 asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val));
471}
472
473static inline u32
474xscale2pmu_read_overflow_flags(void)
475{
476 u32 val;
477 asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val));
478 return val;
479}
480
481static inline void
482xscale2pmu_write_overflow_flags(u32 val)
483{
484 asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val));
485}
486
487static inline u32
488xscale2pmu_read_event_select(void)
489{
490 u32 val;
491 asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val));
492 return val;
493}
494
495static inline void
496xscale2pmu_write_event_select(u32 val)
497{
498 asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val));
499}
500
501static inline u32
502xscale2pmu_read_int_enable(void)
503{
504 u32 val;
505 asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val));
506 return val;
507}
508
509static void
510xscale2pmu_write_int_enable(u32 val)
511{
512 asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val));
513}
514
515static inline int
516xscale2_pmnc_counter_has_overflowed(unsigned long of_flags,
517 enum xscale_counters counter)
518{
519 int ret = 0;
520
521 switch (counter) {
522 case XSCALE_CYCLE_COUNTER:
523 ret = of_flags & XSCALE2_CCOUNT_OVERFLOW;
524 break;
525 case XSCALE_COUNTER0:
526 ret = of_flags & XSCALE2_COUNT0_OVERFLOW;
527 break;
528 case XSCALE_COUNTER1:
529 ret = of_flags & XSCALE2_COUNT1_OVERFLOW;
530 break;
531 case XSCALE_COUNTER2:
532 ret = of_flags & XSCALE2_COUNT2_OVERFLOW;
533 break;
534 case XSCALE_COUNTER3:
535 ret = of_flags & XSCALE2_COUNT3_OVERFLOW;
536 break;
537 default:
538 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
539 }
540
541 return ret;
542}
543
544static irqreturn_t
545xscale2pmu_handle_irq(int irq_num, void *dev)
546{
547 unsigned long pmnc, of_flags;
548 struct perf_sample_data data;
549 struct cpu_hw_events *cpuc;
550 struct pt_regs *regs;
551 int idx;
552
553 /* Disable the PMU. */
554 pmnc = xscale2pmu_read_pmnc();
555 xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
556
557 /* Check the overflow flag register. */
558 of_flags = xscale2pmu_read_overflow_flags();
559 if (!(of_flags & XSCALE2_OVERFLOWED_MASK))
560 return IRQ_NONE;
561
562 /* Clear the overflow bits. */
563 xscale2pmu_write_overflow_flags(of_flags);
564
565 regs = get_irq_regs();
566
567 perf_sample_data_init(&data, 0);
568
569 cpuc = &__get_cpu_var(cpu_hw_events);
570 for (idx = 0; idx <= armpmu->num_events; ++idx) {
571 struct perf_event *event = cpuc->events[idx];
572 struct hw_perf_event *hwc;
573
574 if (!test_bit(idx, cpuc->active_mask))
575 continue;
576
577 if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx))
578 continue;
579
580 hwc = &event->hw;
581 armpmu_event_update(event, hwc, idx);
582 data.period = event->hw.last_period;
583 if (!armpmu_event_set_period(event, hwc, idx))
584 continue;
585
586 if (perf_event_overflow(event, 0, &data, regs))
587 armpmu->disable(hwc, idx);
588 }
589
590 irq_work_run();
591
592 /*
593 * Re-enable the PMU.
594 */
595 pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE;
596 xscale2pmu_write_pmnc(pmnc);
597
598 return IRQ_HANDLED;
599}
600
601static void
602xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx)
603{
604 unsigned long flags, ien, evtsel;
605
606 ien = xscale2pmu_read_int_enable();
607 evtsel = xscale2pmu_read_event_select();
608
609 switch (idx) {
610 case XSCALE_CYCLE_COUNTER:
611 ien |= XSCALE2_CCOUNT_INT_EN;
612 break;
613 case XSCALE_COUNTER0:
614 ien |= XSCALE2_COUNT0_INT_EN;
615 evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
616 evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT;
617 break;
618 case XSCALE_COUNTER1:
619 ien |= XSCALE2_COUNT1_INT_EN;
620 evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
621 evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT;
622 break;
623 case XSCALE_COUNTER2:
624 ien |= XSCALE2_COUNT2_INT_EN;
625 evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
626 evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT;
627 break;
628 case XSCALE_COUNTER3:
629 ien |= XSCALE2_COUNT3_INT_EN;
630 evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
631 evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT;
632 break;
633 default:
634 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
635 return;
636 }
637
638 raw_spin_lock_irqsave(&pmu_lock, flags);
639 xscale2pmu_write_event_select(evtsel);
640 xscale2pmu_write_int_enable(ien);
641 raw_spin_unlock_irqrestore(&pmu_lock, flags);
642}
643
644static void
645xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx)
646{
647 unsigned long flags, ien, evtsel;
648
649 ien = xscale2pmu_read_int_enable();
650 evtsel = xscale2pmu_read_event_select();
651
652 switch (idx) {
653 case XSCALE_CYCLE_COUNTER:
654 ien &= ~XSCALE2_CCOUNT_INT_EN;
655 break;
656 case XSCALE_COUNTER0:
657 ien &= ~XSCALE2_COUNT0_INT_EN;
658 evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
659 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT;
660 break;
661 case XSCALE_COUNTER1:
662 ien &= ~XSCALE2_COUNT1_INT_EN;
663 evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
664 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT;
665 break;
666 case XSCALE_COUNTER2:
667 ien &= ~XSCALE2_COUNT2_INT_EN;
668 evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
669 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT;
670 break;
671 case XSCALE_COUNTER3:
672 ien &= ~XSCALE2_COUNT3_INT_EN;
673 evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
674 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT;
675 break;
676 default:
677 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
678 return;
679 }
680
681 raw_spin_lock_irqsave(&pmu_lock, flags);
682 xscale2pmu_write_event_select(evtsel);
683 xscale2pmu_write_int_enable(ien);
684 raw_spin_unlock_irqrestore(&pmu_lock, flags);
685}
686
687static int
688xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc,
689 struct hw_perf_event *event)
690{
691 int idx = xscale1pmu_get_event_idx(cpuc, event);
692 if (idx >= 0)
693 goto out;
694
695 if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask))
696 idx = XSCALE_COUNTER3;
697 else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask))
698 idx = XSCALE_COUNTER2;
699out:
700 return idx;
701}
702
703static void
704xscale2pmu_start(void)
705{
706 unsigned long flags, val;
707
708 raw_spin_lock_irqsave(&pmu_lock, flags);
709 val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64;
710 val |= XSCALE_PMU_ENABLE;
711 xscale2pmu_write_pmnc(val);
712 raw_spin_unlock_irqrestore(&pmu_lock, flags);
713}
714
715static void
716xscale2pmu_stop(void)
717{
718 unsigned long flags, val;
719
720 raw_spin_lock_irqsave(&pmu_lock, flags);
721 val = xscale2pmu_read_pmnc();
722 val &= ~XSCALE_PMU_ENABLE;
723 xscale2pmu_write_pmnc(val);
724 raw_spin_unlock_irqrestore(&pmu_lock, flags);
725}
726
727static inline u32
728xscale2pmu_read_counter(int counter)
729{
730 u32 val = 0;
731
732 switch (counter) {
733 case XSCALE_CYCLE_COUNTER:
734 asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val));
735 break;
736 case XSCALE_COUNTER0:
737 asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val));
738 break;
739 case XSCALE_COUNTER1:
740 asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val));
741 break;
742 case XSCALE_COUNTER2:
743 asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val));
744 break;
745 case XSCALE_COUNTER3:
746 asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val));
747 break;
748 }
749
750 return val;
751}
752
753static inline void
754xscale2pmu_write_counter(int counter, u32 val)
755{
756 switch (counter) {
757 case XSCALE_CYCLE_COUNTER:
758 asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val));
759 break;
760 case XSCALE_COUNTER0:
761 asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val));
762 break;
763 case XSCALE_COUNTER1:
764 asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val));
765 break;
766 case XSCALE_COUNTER2:
767 asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val));
768 break;
769 case XSCALE_COUNTER3:
770 asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val));
771 break;
772 }
773}
774
775static const struct arm_pmu xscale2pmu = {
776 .id = ARM_PERF_PMU_ID_XSCALE2,
777 .name = "xscale2",
778 .handle_irq = xscale2pmu_handle_irq,
779 .enable = xscale2pmu_enable_event,
780 .disable = xscale2pmu_disable_event,
781 .read_counter = xscale2pmu_read_counter,
782 .write_counter = xscale2pmu_write_counter,
783 .get_event_idx = xscale2pmu_get_event_idx,
784 .start = xscale2pmu_start,
785 .stop = xscale2pmu_stop,
786 .cache_map = &xscale_perf_cache_map,
787 .event_map = &xscale_perf_map,
788 .raw_event_mask = 0xFF,
789 .num_events = 5,
790 .max_period = (1LLU << 32) - 1,
791};
792
793static const struct arm_pmu *__init xscale2pmu_init(void)
794{
795 return &xscale2pmu;
796}
797#else
798static const struct arm_pmu *__init xscale1pmu_init(void)
799{
800 return NULL;
801}
802
803static const struct arm_pmu *__init xscale2pmu_init(void)
804{
805 return NULL;
806}
807#endif /* CONFIG_CPU_XSCALE */
diff --git a/arch/arm/kernel/pj4-cp0.c b/arch/arm/kernel/pj4-cp0.c
new file mode 100644
index 000000000000..a4b1b0748fd3
--- /dev/null
+++ b/arch/arm/kernel/pj4-cp0.c
@@ -0,0 +1,94 @@
1/*
2 * linux/arch/arm/kernel/pj4-cp0.c
3 *
4 * PJ4 iWMMXt coprocessor context switching and handling
5 *
6 * Copyright (c) 2010 Marvell International Inc.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#include <linux/module.h>
14#include <linux/types.h>
15#include <linux/kernel.h>
16#include <linux/signal.h>
17#include <linux/sched.h>
18#include <linux/init.h>
19#include <linux/io.h>
20#include <asm/thread_notify.h>
21
22static int iwmmxt_do(struct notifier_block *self, unsigned long cmd, void *t)
23{
24 struct thread_info *thread = t;
25
26 switch (cmd) {
27 case THREAD_NOTIFY_FLUSH:
28 /*
29 * flush_thread() zeroes thread->fpstate, so no need
30 * to do anything here.
31 *
32 * FALLTHROUGH: Ensure we don't try to overwrite our newly
33 * initialised state information on the first fault.
34 */
35
36 case THREAD_NOTIFY_EXIT:
37 iwmmxt_task_release(thread);
38 break;
39
40 case THREAD_NOTIFY_SWITCH:
41 iwmmxt_task_switch(thread);
42 break;
43 }
44
45 return NOTIFY_DONE;
46}
47
48static struct notifier_block iwmmxt_notifier_block = {
49 .notifier_call = iwmmxt_do,
50};
51
52
53static u32 __init pj4_cp_access_read(void)
54{
55 u32 value;
56
57 __asm__ __volatile__ (
58 "mrc p15, 0, %0, c1, c0, 2\n\t"
59 : "=r" (value));
60 return value;
61}
62
63static void __init pj4_cp_access_write(u32 value)
64{
65 u32 temp;
66
67 __asm__ __volatile__ (
68 "mcr p15, 0, %1, c1, c0, 2\n\t"
69 "mrc p15, 0, %0, c1, c0, 2\n\t"
70 "mov %0, %0\n\t"
71 "sub pc, pc, #4\n\t"
72 : "=r" (temp) : "r" (value));
73}
74
75
76/*
77 * Disable CP0/CP1 on boot, and let call_fpe() and the iWMMXt lazy
78 * switch code handle iWMMXt context switching.
79 */
80static int __init pj4_cp0_init(void)
81{
82 u32 cp_access;
83
84 cp_access = pj4_cp_access_read() & ~0xf;
85 pj4_cp_access_write(cp_access);
86
87 printk(KERN_INFO "PJ4 iWMMXt coprocessor enabled.\n");
88 elf_hwcap |= HWCAP_IWMMXT;
89 thread_register_notifier(&iwmmxt_notifier_block);
90
91 return 0;
92}
93
94late_initcall(pj4_cp0_init);
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 3e97483abcf0..19c6816db61e 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -1060,8 +1060,8 @@ static int ptrace_sethbpregs(struct task_struct *tsk, long num,
1060 goto out; 1060 goto out;
1061 1061
1062 if ((gen_type & implied_type) != gen_type) { 1062 if ((gen_type & implied_type) != gen_type) {
1063 ret = -EINVAL; 1063 ret = -EINVAL;
1064 goto out; 1064 goto out;
1065 } 1065 }
1066 1066
1067 attr.bp_len = gen_len; 1067 attr.bp_len = gen_len;
diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c
new file mode 100644
index 000000000000..2cdcc9287c74
--- /dev/null
+++ b/arch/arm/kernel/sched_clock.c
@@ -0,0 +1,69 @@
1/*
2 * sched_clock.c: support for extending counters to full 64-bit ns counter
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#include <linux/clocksource.h>
9#include <linux/init.h>
10#include <linux/jiffies.h>
11#include <linux/kernel.h>
12#include <linux/sched.h>
13#include <linux/timer.h>
14
15#include <asm/sched_clock.h>
16
17static void sched_clock_poll(unsigned long wrap_ticks);
18static DEFINE_TIMER(sched_clock_timer, sched_clock_poll, 0, 0);
19static void (*sched_clock_update_fn)(void);
20
21static void sched_clock_poll(unsigned long wrap_ticks)
22{
23 mod_timer(&sched_clock_timer, round_jiffies(jiffies + wrap_ticks));
24 sched_clock_update_fn();
25}
26
27void __init init_sched_clock(struct clock_data *cd, void (*update)(void),
28 unsigned int clock_bits, unsigned long rate)
29{
30 unsigned long r, w;
31 u64 res, wrap;
32 char r_unit;
33
34 sched_clock_update_fn = update;
35
36 /* calculate the mult/shift to convert counter ticks to ns. */
37 clocks_calc_mult_shift(&cd->mult, &cd->shift, rate, NSEC_PER_SEC, 60);
38
39 r = rate;
40 if (r >= 4000000) {
41 r /= 1000000;
42 r_unit = 'M';
43 } else {
44 r /= 1000;
45 r_unit = 'k';
46 }
47
48 /* calculate how many ns until we wrap */
49 wrap = cyc_to_ns((1ULL << clock_bits) - 1, cd->mult, cd->shift);
50 do_div(wrap, NSEC_PER_MSEC);
51 w = wrap;
52
53 /* calculate the ns resolution of this counter */
54 res = cyc_to_ns(1ULL, cd->mult, cd->shift);
55 pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lums\n",
56 clock_bits, r, r_unit, res, w);
57
58 /*
59 * Start the timer to keep sched_clock() properly updated and
60 * sets the initial epoch.
61 */
62 sched_clock_timer.data = msecs_to_jiffies(w - (w / 10));
63 sched_clock_poll(sched_clock_timer.data);
64
65 /*
66 * Ensure that sched_clock() starts off at 0ns
67 */
68 cd->epoch_ns = 0;
69}
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 336f14e0e5c2..3455ad33de4c 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -75,9 +75,9 @@ extern void reboot_setup(char *str);
75 75
76unsigned int processor_id; 76unsigned int processor_id;
77EXPORT_SYMBOL(processor_id); 77EXPORT_SYMBOL(processor_id);
78unsigned int __machine_arch_type; 78unsigned int __machine_arch_type __read_mostly;
79EXPORT_SYMBOL(__machine_arch_type); 79EXPORT_SYMBOL(__machine_arch_type);
80unsigned int cacheid; 80unsigned int cacheid __read_mostly;
81EXPORT_SYMBOL(cacheid); 81EXPORT_SYMBOL(cacheid);
82 82
83unsigned int __atags_pointer __initdata; 83unsigned int __atags_pointer __initdata;
@@ -91,24 +91,24 @@ EXPORT_SYMBOL(system_serial_low);
91unsigned int system_serial_high; 91unsigned int system_serial_high;
92EXPORT_SYMBOL(system_serial_high); 92EXPORT_SYMBOL(system_serial_high);
93 93
94unsigned int elf_hwcap; 94unsigned int elf_hwcap __read_mostly;
95EXPORT_SYMBOL(elf_hwcap); 95EXPORT_SYMBOL(elf_hwcap);
96 96
97 97
98#ifdef MULTI_CPU 98#ifdef MULTI_CPU
99struct processor processor; 99struct processor processor __read_mostly;
100#endif 100#endif
101#ifdef MULTI_TLB 101#ifdef MULTI_TLB
102struct cpu_tlb_fns cpu_tlb; 102struct cpu_tlb_fns cpu_tlb __read_mostly;
103#endif 103#endif
104#ifdef MULTI_USER 104#ifdef MULTI_USER
105struct cpu_user_fns cpu_user; 105struct cpu_user_fns cpu_user __read_mostly;
106#endif 106#endif
107#ifdef MULTI_CACHE 107#ifdef MULTI_CACHE
108struct cpu_cache_fns cpu_cache; 108struct cpu_cache_fns cpu_cache __read_mostly;
109#endif 109#endif
110#ifdef CONFIG_OUTER_CACHE 110#ifdef CONFIG_OUTER_CACHE
111struct outer_cache_fns outer_cache; 111struct outer_cache_fns outer_cache __read_mostly;
112EXPORT_SYMBOL(outer_cache); 112EXPORT_SYMBOL(outer_cache);
113#endif 113#endif
114 114
@@ -126,6 +126,7 @@ EXPORT_SYMBOL(elf_platform);
126static const char *cpu_name; 126static const char *cpu_name;
127static const char *machine_name; 127static const char *machine_name;
128static char __initdata cmd_line[COMMAND_LINE_SIZE]; 128static char __initdata cmd_line[COMMAND_LINE_SIZE];
129struct machine_desc *machine_desc __initdata;
129 130
130static char default_command_line[COMMAND_LINE_SIZE] __initdata = CONFIG_CMDLINE; 131static char default_command_line[COMMAND_LINE_SIZE] __initdata = CONFIG_CMDLINE;
131static union { char c[4]; unsigned long l; } endian_test __initdata = { { 'l', '?', '?', 'b' } }; 132static union { char c[4]; unsigned long l; } endian_test __initdata = { { 'l', '?', '?', 'b' } };
@@ -708,13 +709,11 @@ static struct init_tags {
708 { 0, ATAG_NONE } 709 { 0, ATAG_NONE }
709}; 710};
710 711
711static void (*init_machine)(void) __initdata;
712
713static int __init customize_machine(void) 712static int __init customize_machine(void)
714{ 713{
715 /* customizes platform devices, or adds new ones */ 714 /* customizes platform devices, or adds new ones */
716 if (init_machine) 715 if (machine_desc->init_machine)
717 init_machine(); 716 machine_desc->init_machine();
718 return 0; 717 return 0;
719} 718}
720arch_initcall(customize_machine); 719arch_initcall(customize_machine);
@@ -809,6 +808,7 @@ void __init setup_arch(char **cmdline_p)
809 808
810 setup_processor(); 809 setup_processor();
811 mdesc = setup_machine(machine_arch_type); 810 mdesc = setup_machine(machine_arch_type);
811 machine_desc = mdesc;
812 machine_name = mdesc->name; 812 machine_name = mdesc->name;
813 813
814 if (mdesc->soft_reboot) 814 if (mdesc->soft_reboot)
@@ -868,13 +868,9 @@ void __init setup_arch(char **cmdline_p)
868 cpu_init(); 868 cpu_init();
869 tcm_init(); 869 tcm_init();
870 870
871 /* 871#ifdef CONFIG_MULTI_IRQ_HANDLER
872 * Set up various architecture-specific pointers 872 handle_arch_irq = mdesc->handle_irq;
873 */ 873#endif
874 arch_nr_irqs = mdesc->nr_irqs;
875 init_arch_irq = mdesc->init_irq;
876 system_timer = mdesc->timer;
877 init_machine = mdesc->init_machine;
878 874
879#ifdef CONFIG_VT 875#ifdef CONFIG_VT
880#if defined(CONFIG_VGA_CONSOLE) 876#if defined(CONFIG_VGA_CONSOLE)
@@ -884,6 +880,9 @@ void __init setup_arch(char **cmdline_p)
884#endif 880#endif
885#endif 881#endif
886 early_trap_init(); 882 early_trap_init();
883
884 if (mdesc->init_early)
885 mdesc->init_early();
887} 886}
888 887
889 888
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 9066473c0ebc..4539ebcb089f 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -16,6 +16,7 @@
16#include <linux/cache.h> 16#include <linux/cache.h>
17#include <linux/profile.h> 17#include <linux/profile.h>
18#include <linux/errno.h> 18#include <linux/errno.h>
19#include <linux/ftrace.h>
19#include <linux/mm.h> 20#include <linux/mm.h>
20#include <linux/err.h> 21#include <linux/err.h>
21#include <linux/cpu.h> 22#include <linux/cpu.h>
@@ -24,6 +25,7 @@
24#include <linux/irq.h> 25#include <linux/irq.h>
25#include <linux/percpu.h> 26#include <linux/percpu.h>
26#include <linux/clockchips.h> 27#include <linux/clockchips.h>
28#include <linux/completion.h>
27 29
28#include <asm/atomic.h> 30#include <asm/atomic.h>
29#include <asm/cacheflush.h> 31#include <asm/cacheflush.h>
@@ -37,7 +39,6 @@
37#include <asm/tlbflush.h> 39#include <asm/tlbflush.h>
38#include <asm/ptrace.h> 40#include <asm/ptrace.h>
39#include <asm/localtimer.h> 41#include <asm/localtimer.h>
40#include <asm/smp_plat.h>
41 42
42/* 43/*
43 * as from 2.5, kernels no longer have an init_tasks structure 44 * as from 2.5, kernels no longer have an init_tasks structure
@@ -46,64 +47,14 @@
46 */ 47 */
47struct secondary_data secondary_data; 48struct secondary_data secondary_data;
48 49
49/*
50 * structures for inter-processor calls
51 * - A collection of single bit ipi messages.
52 */
53struct ipi_data {
54 spinlock_t lock;
55 unsigned long ipi_count;
56 unsigned long bits;
57};
58
59static DEFINE_PER_CPU(struct ipi_data, ipi_data) = {
60 .lock = SPIN_LOCK_UNLOCKED,
61};
62
63enum ipi_msg_type { 50enum ipi_msg_type {
64 IPI_TIMER, 51 IPI_TIMER = 2,
65 IPI_RESCHEDULE, 52 IPI_RESCHEDULE,
66 IPI_CALL_FUNC, 53 IPI_CALL_FUNC,
67 IPI_CALL_FUNC_SINGLE, 54 IPI_CALL_FUNC_SINGLE,
68 IPI_CPU_STOP, 55 IPI_CPU_STOP,
69}; 56};
70 57
71static inline void identity_mapping_add(pgd_t *pgd, unsigned long start,
72 unsigned long end)
73{
74 unsigned long addr, prot;
75 pmd_t *pmd;
76
77 prot = PMD_TYPE_SECT | PMD_SECT_AP_WRITE;
78 if (cpu_architecture() <= CPU_ARCH_ARMv5TEJ && !cpu_is_xscale())
79 prot |= PMD_BIT4;
80
81 for (addr = start & PGDIR_MASK; addr < end;) {
82 pmd = pmd_offset(pgd + pgd_index(addr), addr);
83 pmd[0] = __pmd(addr | prot);
84 addr += SECTION_SIZE;
85 pmd[1] = __pmd(addr | prot);
86 addr += SECTION_SIZE;
87 flush_pmd_entry(pmd);
88 outer_clean_range(__pa(pmd), __pa(pmd + 1));
89 }
90}
91
92static inline void identity_mapping_del(pgd_t *pgd, unsigned long start,
93 unsigned long end)
94{
95 unsigned long addr;
96 pmd_t *pmd;
97
98 for (addr = start & PGDIR_MASK; addr < end; addr += PGDIR_SIZE) {
99 pmd = pmd_offset(pgd + pgd_index(addr), addr);
100 pmd[0] = __pmd(0);
101 pmd[1] = __pmd(0);
102 clean_pmd_entry(pmd);
103 outer_clean_range(__pa(pmd), __pa(pmd + 1));
104 }
105}
106
107int __cpuinit __cpu_up(unsigned int cpu) 58int __cpuinit __cpu_up(unsigned int cpu)
108{ 59{
109 struct cpuinfo_arm *ci = &per_cpu(cpu_data, cpu); 60 struct cpuinfo_arm *ci = &per_cpu(cpu_data, cpu);
@@ -177,8 +128,12 @@ int __cpuinit __cpu_up(unsigned int cpu)
177 barrier(); 128 barrier();
178 } 129 }
179 130
180 if (!cpu_online(cpu)) 131 if (!cpu_online(cpu)) {
132 pr_crit("CPU%u: failed to come online\n", cpu);
181 ret = -EIO; 133 ret = -EIO;
134 }
135 } else {
136 pr_err("CPU%u: failed to boot: %d\n", cpu, ret);
182 } 137 }
183 138
184 secondary_data.stack = NULL; 139 secondary_data.stack = NULL;
@@ -194,18 +149,12 @@ int __cpuinit __cpu_up(unsigned int cpu)
194 149
195 pgd_free(&init_mm, pgd); 150 pgd_free(&init_mm, pgd);
196 151
197 if (ret) {
198 printk(KERN_CRIT "CPU%u: processor failed to boot\n", cpu);
199
200 /*
201 * FIXME: We need to clean up the new idle thread. --rmk
202 */
203 }
204
205 return ret; 152 return ret;
206} 153}
207 154
208#ifdef CONFIG_HOTPLUG_CPU 155#ifdef CONFIG_HOTPLUG_CPU
156static void percpu_timer_stop(void);
157
209/* 158/*
210 * __cpu_disable runs on the processor to be shutdown. 159 * __cpu_disable runs on the processor to be shutdown.
211 */ 160 */
@@ -233,7 +182,7 @@ int __cpu_disable(void)
233 /* 182 /*
234 * Stop the local timer for this CPU. 183 * Stop the local timer for this CPU.
235 */ 184 */
236 local_timer_stop(); 185 percpu_timer_stop();
237 186
238 /* 187 /*
239 * Flush user cache and TLB mappings, and then remove this CPU 188 * Flush user cache and TLB mappings, and then remove this CPU
@@ -252,12 +201,20 @@ int __cpu_disable(void)
252 return 0; 201 return 0;
253} 202}
254 203
204static DECLARE_COMPLETION(cpu_died);
205
255/* 206/*
256 * called on the thread which is asking for a CPU to be shutdown - 207 * called on the thread which is asking for a CPU to be shutdown -
257 * waits until shutdown has completed, or it is timed out. 208 * waits until shutdown has completed, or it is timed out.
258 */ 209 */
259void __cpu_die(unsigned int cpu) 210void __cpu_die(unsigned int cpu)
260{ 211{
212 if (!wait_for_completion_timeout(&cpu_died, msecs_to_jiffies(5000))) {
213 pr_err("CPU%u: cpu didn't die\n", cpu);
214 return;
215 }
216 printk(KERN_NOTICE "CPU%u: shutdown\n", cpu);
217
261 if (!platform_cpu_kill(cpu)) 218 if (!platform_cpu_kill(cpu))
262 printk("CPU%u: unable to kill\n", cpu); 219 printk("CPU%u: unable to kill\n", cpu);
263} 220}
@@ -274,12 +231,17 @@ void __ref cpu_die(void)
274{ 231{
275 unsigned int cpu = smp_processor_id(); 232 unsigned int cpu = smp_processor_id();
276 233
277 local_irq_disable();
278 idle_task_exit(); 234 idle_task_exit();
279 235
236 local_irq_disable();
237 mb();
238
239 /* Tell __cpu_die() that this CPU is now safe to dispose of */
240 complete(&cpu_died);
241
280 /* 242 /*
281 * actual CPU shutdown procedure is at least platform (if not 243 * actual CPU shutdown procedure is at least platform (if not
282 * CPU) specific 244 * CPU) specific.
283 */ 245 */
284 platform_cpu_die(cpu); 246 platform_cpu_die(cpu);
285 247
@@ -289,6 +251,7 @@ void __ref cpu_die(void)
289 * to be repeated to undo the effects of taking the CPU offline. 251 * to be repeated to undo the effects of taking the CPU offline.
290 */ 252 */
291 __asm__("mov sp, %0\n" 253 __asm__("mov sp, %0\n"
254 " mov fp, #0\n"
292 " b secondary_start_kernel" 255 " b secondary_start_kernel"
293 : 256 :
294 : "r" (task_stack_page(current) + THREAD_SIZE - 8)); 257 : "r" (task_stack_page(current) + THREAD_SIZE - 8));
@@ -296,6 +259,17 @@ void __ref cpu_die(void)
296#endif /* CONFIG_HOTPLUG_CPU */ 259#endif /* CONFIG_HOTPLUG_CPU */
297 260
298/* 261/*
262 * Called by both boot and secondaries to move global data into
263 * per-processor storage.
264 */
265static void __cpuinit smp_store_cpu_info(unsigned int cpuid)
266{
267 struct cpuinfo_arm *cpu_info = &per_cpu(cpu_data, cpuid);
268
269 cpu_info->loops_per_jiffy = loops_per_jiffy;
270}
271
272/*
299 * This is the secondary CPU boot entry. We're using this CPUs 273 * This is the secondary CPU boot entry. We're using this CPUs
300 * idle thread stack, but a set of temporary page tables. 274 * idle thread stack, but a set of temporary page tables.
301 */ 275 */
@@ -319,6 +293,7 @@ asmlinkage void __cpuinit secondary_start_kernel(void)
319 293
320 cpu_init(); 294 cpu_init();
321 preempt_disable(); 295 preempt_disable();
296 trace_hardirqs_off();
322 297
323 /* 298 /*
324 * Give the platform a chance to do its own initialisation. 299 * Give the platform a chance to do its own initialisation.
@@ -352,17 +327,6 @@ asmlinkage void __cpuinit secondary_start_kernel(void)
352 cpu_idle(); 327 cpu_idle();
353} 328}
354 329
355/*
356 * Called by both boot and secondaries to move global data into
357 * per-processor storage.
358 */
359void __cpuinit smp_store_cpu_info(unsigned int cpuid)
360{
361 struct cpuinfo_arm *cpu_info = &per_cpu(cpu_data, cpuid);
362
363 cpu_info->loops_per_jiffy = loops_per_jiffy;
364}
365
366void __init smp_cpus_done(unsigned int max_cpus) 330void __init smp_cpus_done(unsigned int max_cpus)
367{ 331{
368 int cpu; 332 int cpu;
@@ -385,61 +349,80 @@ void __init smp_prepare_boot_cpu(void)
385 per_cpu(cpu_data, cpu).idle = current; 349 per_cpu(cpu_data, cpu).idle = current;
386} 350}
387 351
388static void send_ipi_message(const struct cpumask *mask, enum ipi_msg_type msg) 352void __init smp_prepare_cpus(unsigned int max_cpus)
389{ 353{
390 unsigned long flags; 354 unsigned int ncores = num_possible_cpus();
391 unsigned int cpu;
392 355
393 local_irq_save(flags); 356 smp_store_cpu_info(smp_processor_id());
394
395 for_each_cpu(cpu, mask) {
396 struct ipi_data *ipi = &per_cpu(ipi_data, cpu);
397
398 spin_lock(&ipi->lock);
399 ipi->bits |= 1 << msg;
400 spin_unlock(&ipi->lock);
401 }
402 357
403 /* 358 /*
404 * Call the platform specific cross-CPU call function. 359 * are we trying to boot more cores than exist?
405 */ 360 */
406 smp_cross_call(mask); 361 if (max_cpus > ncores)
362 max_cpus = ncores;
363
364 if (max_cpus > 1) {
365 /*
366 * Enable the local timer or broadcast device for the
367 * boot CPU, but only if we have more than one CPU.
368 */
369 percpu_timer_setup();
407 370
408 local_irq_restore(flags); 371 /*
372 * Initialise the SCU if there are more than one CPU
373 * and let them know where to start.
374 */
375 platform_smp_prepare_cpus(max_cpus);
376 }
409} 377}
410 378
411void arch_send_call_function_ipi_mask(const struct cpumask *mask) 379void arch_send_call_function_ipi_mask(const struct cpumask *mask)
412{ 380{
413 send_ipi_message(mask, IPI_CALL_FUNC); 381 smp_cross_call(mask, IPI_CALL_FUNC);
414} 382}
415 383
416void arch_send_call_function_single_ipi(int cpu) 384void arch_send_call_function_single_ipi(int cpu)
417{ 385{
418 send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE); 386 smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE);
419} 387}
420 388
421void show_ipi_list(struct seq_file *p) 389static const char *ipi_types[NR_IPI] = {
390#define S(x,s) [x - IPI_TIMER] = s
391 S(IPI_TIMER, "Timer broadcast interrupts"),
392 S(IPI_RESCHEDULE, "Rescheduling interrupts"),
393 S(IPI_CALL_FUNC, "Function call interrupts"),
394 S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"),
395 S(IPI_CPU_STOP, "CPU stop interrupts"),
396};
397
398void show_ipi_list(struct seq_file *p, int prec)
422{ 399{
423 unsigned int cpu; 400 unsigned int cpu, i;
424 401
425 seq_puts(p, "IPI:"); 402 for (i = 0; i < NR_IPI; i++) {
403 seq_printf(p, "%*s%u: ", prec - 1, "IPI", i);
426 404
427 for_each_present_cpu(cpu) 405 for_each_present_cpu(cpu)
428 seq_printf(p, " %10lu", per_cpu(ipi_data, cpu).ipi_count); 406 seq_printf(p, "%10u ",
407 __get_irq_stat(cpu, ipi_irqs[i]));
429 408
430 seq_putc(p, '\n'); 409 seq_printf(p, " %s\n", ipi_types[i]);
410 }
431} 411}
432 412
433void show_local_irqs(struct seq_file *p) 413u64 smp_irq_stat_cpu(unsigned int cpu)
434{ 414{
435 unsigned int cpu; 415 u64 sum = 0;
416 int i;
436 417
437 seq_printf(p, "LOC: "); 418 for (i = 0; i < NR_IPI; i++)
419 sum += __get_irq_stat(cpu, ipi_irqs[i]);
438 420
439 for_each_present_cpu(cpu) 421#ifdef CONFIG_LOCAL_TIMERS
440 seq_printf(p, "%10u ", irq_stat[cpu].local_timer_irqs); 422 sum += __get_irq_stat(cpu, local_timer_irqs);
423#endif
441 424
442 seq_putc(p, '\n'); 425 return sum;
443} 426}
444 427
445/* 428/*
@@ -456,24 +439,36 @@ static void ipi_timer(void)
456} 439}
457 440
458#ifdef CONFIG_LOCAL_TIMERS 441#ifdef CONFIG_LOCAL_TIMERS
459asmlinkage void __exception do_local_timer(struct pt_regs *regs) 442asmlinkage void __exception_irq_entry do_local_timer(struct pt_regs *regs)
460{ 443{
461 struct pt_regs *old_regs = set_irq_regs(regs); 444 struct pt_regs *old_regs = set_irq_regs(regs);
462 int cpu = smp_processor_id(); 445 int cpu = smp_processor_id();
463 446
464 if (local_timer_ack()) { 447 if (local_timer_ack()) {
465 irq_stat[cpu].local_timer_irqs++; 448 __inc_irq_stat(cpu, local_timer_irqs);
466 ipi_timer(); 449 ipi_timer();
467 } 450 }
468 451
469 set_irq_regs(old_regs); 452 set_irq_regs(old_regs);
470} 453}
454
455void show_local_irqs(struct seq_file *p, int prec)
456{
457 unsigned int cpu;
458
459 seq_printf(p, "%*s: ", prec, "LOC");
460
461 for_each_present_cpu(cpu)
462 seq_printf(p, "%10u ", __get_irq_stat(cpu, local_timer_irqs));
463
464 seq_printf(p, " Local timer interrupts\n");
465}
471#endif 466#endif
472 467
473#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST 468#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
474static void smp_timer_broadcast(const struct cpumask *mask) 469static void smp_timer_broadcast(const struct cpumask *mask)
475{ 470{
476 send_ipi_message(mask, IPI_TIMER); 471 smp_cross_call(mask, IPI_TIMER);
477} 472}
478#else 473#else
479#define smp_timer_broadcast NULL 474#define smp_timer_broadcast NULL
@@ -510,6 +505,21 @@ void __cpuinit percpu_timer_setup(void)
510 local_timer_setup(evt); 505 local_timer_setup(evt);
511} 506}
512 507
508#ifdef CONFIG_HOTPLUG_CPU
509/*
510 * The generic clock events code purposely does not stop the local timer
511 * on CPU_DEAD/CPU_DEAD_FROZEN hotplug events, so we have to do it
512 * manually here.
513 */
514static void percpu_timer_stop(void)
515{
516 unsigned int cpu = smp_processor_id();
517 struct clock_event_device *evt = &per_cpu(percpu_clockevent, cpu);
518
519 evt->set_mode(CLOCK_EVT_MODE_UNUSED, evt);
520}
521#endif
522
513static DEFINE_SPINLOCK(stop_lock); 523static DEFINE_SPINLOCK(stop_lock);
514 524
515/* 525/*
@@ -536,216 +546,76 @@ static void ipi_cpu_stop(unsigned int cpu)
536 546
537/* 547/*
538 * Main handler for inter-processor interrupts 548 * Main handler for inter-processor interrupts
539 *
540 * For ARM, the ipimask now only identifies a single
541 * category of IPI (Bit 1 IPIs have been replaced by a
542 * different mechanism):
543 *
544 * Bit 0 - Inter-processor function call
545 */ 549 */
546asmlinkage void __exception do_IPI(struct pt_regs *regs) 550asmlinkage void __exception_irq_entry do_IPI(int ipinr, struct pt_regs *regs)
547{ 551{
548 unsigned int cpu = smp_processor_id(); 552 unsigned int cpu = smp_processor_id();
549 struct ipi_data *ipi = &per_cpu(ipi_data, cpu);
550 struct pt_regs *old_regs = set_irq_regs(regs); 553 struct pt_regs *old_regs = set_irq_regs(regs);
551 554
552 ipi->ipi_count++; 555 if (ipinr >= IPI_TIMER && ipinr < IPI_TIMER + NR_IPI)
553 556 __inc_irq_stat(cpu, ipi_irqs[ipinr - IPI_TIMER]);
554 for (;;) {
555 unsigned long msgs;
556
557 spin_lock(&ipi->lock);
558 msgs = ipi->bits;
559 ipi->bits = 0;
560 spin_unlock(&ipi->lock);
561 557
562 if (!msgs) 558 switch (ipinr) {
563 break; 559 case IPI_TIMER:
564 560 ipi_timer();
565 do { 561 break;
566 unsigned nextmsg;
567
568 nextmsg = msgs & -msgs;
569 msgs &= ~nextmsg;
570 nextmsg = ffz(~nextmsg);
571
572 switch (nextmsg) {
573 case IPI_TIMER:
574 ipi_timer();
575 break;
576 562
577 case IPI_RESCHEDULE: 563 case IPI_RESCHEDULE:
578 /* 564 /*
579 * nothing more to do - eveything is 565 * nothing more to do - eveything is
580 * done on the interrupt return path 566 * done on the interrupt return path
581 */ 567 */
582 break; 568 break;
583 569
584 case IPI_CALL_FUNC: 570 case IPI_CALL_FUNC:
585 generic_smp_call_function_interrupt(); 571 generic_smp_call_function_interrupt();
586 break; 572 break;
587 573
588 case IPI_CALL_FUNC_SINGLE: 574 case IPI_CALL_FUNC_SINGLE:
589 generic_smp_call_function_single_interrupt(); 575 generic_smp_call_function_single_interrupt();
590 break; 576 break;
591 577
592 case IPI_CPU_STOP: 578 case IPI_CPU_STOP:
593 ipi_cpu_stop(cpu); 579 ipi_cpu_stop(cpu);
594 break; 580 break;
595 581
596 default: 582 default:
597 printk(KERN_CRIT "CPU%u: Unknown IPI message 0x%x\n", 583 printk(KERN_CRIT "CPU%u: Unknown IPI message 0x%x\n",
598 cpu, nextmsg); 584 cpu, ipinr);
599 break; 585 break;
600 }
601 } while (msgs);
602 } 586 }
603
604 set_irq_regs(old_regs); 587 set_irq_regs(old_regs);
605} 588}
606 589
607void smp_send_reschedule(int cpu) 590void smp_send_reschedule(int cpu)
608{ 591{
609 send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE); 592 smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE);
610} 593}
611 594
612void smp_send_stop(void) 595void smp_send_stop(void)
613{ 596{
614 cpumask_t mask = cpu_online_map; 597 unsigned long timeout;
615 cpu_clear(smp_processor_id(), mask);
616 if (!cpus_empty(mask))
617 send_ipi_message(&mask, IPI_CPU_STOP);
618}
619 598
620/* 599 if (num_online_cpus() > 1) {
621 * not supported here 600 cpumask_t mask = cpu_online_map;
622 */ 601 cpu_clear(smp_processor_id(), mask);
623int setup_profiling_timer(unsigned int multiplier)
624{
625 return -EINVAL;
626}
627 602
628static void 603 smp_cross_call(&mask, IPI_CPU_STOP);
629on_each_cpu_mask(void (*func)(void *), void *info, int wait, 604 }
630 const struct cpumask *mask)
631{
632 preempt_disable();
633 605
634 smp_call_function_many(mask, func, info, wait); 606 /* Wait up to one second for other CPUs to stop */
635 if (cpumask_test_cpu(smp_processor_id(), mask)) 607 timeout = USEC_PER_SEC;
636 func(info); 608 while (num_online_cpus() > 1 && timeout--)
609 udelay(1);
637 610
638 preempt_enable(); 611 if (num_online_cpus() > 1)
612 pr_warning("SMP: failed to stop secondary CPUs\n");
639} 613}
640 614
641/**********************************************************************/
642
643/* 615/*
644 * TLB operations 616 * not supported here
645 */ 617 */
646struct tlb_args { 618int setup_profiling_timer(unsigned int multiplier)
647 struct vm_area_struct *ta_vma;
648 unsigned long ta_start;
649 unsigned long ta_end;
650};
651
652static inline void ipi_flush_tlb_all(void *ignored)
653{
654 local_flush_tlb_all();
655}
656
657static inline void ipi_flush_tlb_mm(void *arg)
658{
659 struct mm_struct *mm = (struct mm_struct *)arg;
660
661 local_flush_tlb_mm(mm);
662}
663
664static inline void ipi_flush_tlb_page(void *arg)
665{
666 struct tlb_args *ta = (struct tlb_args *)arg;
667
668 local_flush_tlb_page(ta->ta_vma, ta->ta_start);
669}
670
671static inline void ipi_flush_tlb_kernel_page(void *arg)
672{
673 struct tlb_args *ta = (struct tlb_args *)arg;
674
675 local_flush_tlb_kernel_page(ta->ta_start);
676}
677
678static inline void ipi_flush_tlb_range(void *arg)
679{
680 struct tlb_args *ta = (struct tlb_args *)arg;
681
682 local_flush_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end);
683}
684
685static inline void ipi_flush_tlb_kernel_range(void *arg)
686{
687 struct tlb_args *ta = (struct tlb_args *)arg;
688
689 local_flush_tlb_kernel_range(ta->ta_start, ta->ta_end);
690}
691
692void flush_tlb_all(void)
693{
694 if (tlb_ops_need_broadcast())
695 on_each_cpu(ipi_flush_tlb_all, NULL, 1);
696 else
697 local_flush_tlb_all();
698}
699
700void flush_tlb_mm(struct mm_struct *mm)
701{
702 if (tlb_ops_need_broadcast())
703 on_each_cpu_mask(ipi_flush_tlb_mm, mm, 1, mm_cpumask(mm));
704 else
705 local_flush_tlb_mm(mm);
706}
707
708void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
709{
710 if (tlb_ops_need_broadcast()) {
711 struct tlb_args ta;
712 ta.ta_vma = vma;
713 ta.ta_start = uaddr;
714 on_each_cpu_mask(ipi_flush_tlb_page, &ta, 1, mm_cpumask(vma->vm_mm));
715 } else
716 local_flush_tlb_page(vma, uaddr);
717}
718
719void flush_tlb_kernel_page(unsigned long kaddr)
720{
721 if (tlb_ops_need_broadcast()) {
722 struct tlb_args ta;
723 ta.ta_start = kaddr;
724 on_each_cpu(ipi_flush_tlb_kernel_page, &ta, 1);
725 } else
726 local_flush_tlb_kernel_page(kaddr);
727}
728
729void flush_tlb_range(struct vm_area_struct *vma,
730 unsigned long start, unsigned long end)
731{
732 if (tlb_ops_need_broadcast()) {
733 struct tlb_args ta;
734 ta.ta_vma = vma;
735 ta.ta_start = start;
736 ta.ta_end = end;
737 on_each_cpu_mask(ipi_flush_tlb_range, &ta, 1, mm_cpumask(vma->vm_mm));
738 } else
739 local_flush_tlb_range(vma, start, end);
740}
741
742void flush_tlb_kernel_range(unsigned long start, unsigned long end)
743{ 619{
744 if (tlb_ops_need_broadcast()) { 620 return -EINVAL;
745 struct tlb_args ta;
746 ta.ta_start = start;
747 ta.ta_end = end;
748 on_each_cpu(ipi_flush_tlb_kernel_range, &ta, 1);
749 } else
750 local_flush_tlb_kernel_range(start, end);
751} 621}
diff --git a/arch/arm/kernel/smp_tlb.c b/arch/arm/kernel/smp_tlb.c
new file mode 100644
index 000000000000..7dcb35285be7
--- /dev/null
+++ b/arch/arm/kernel/smp_tlb.c
@@ -0,0 +1,139 @@
1/*
2 * linux/arch/arm/kernel/smp_tlb.c
3 *
4 * Copyright (C) 2002 ARM Limited, All Rights Reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#include <linux/preempt.h>
11#include <linux/smp.h>
12
13#include <asm/smp_plat.h>
14#include <asm/tlbflush.h>
15
16static void on_each_cpu_mask(void (*func)(void *), void *info, int wait,
17 const struct cpumask *mask)
18{
19 preempt_disable();
20
21 smp_call_function_many(mask, func, info, wait);
22 if (cpumask_test_cpu(smp_processor_id(), mask))
23 func(info);
24
25 preempt_enable();
26}
27
28/**********************************************************************/
29
30/*
31 * TLB operations
32 */
33struct tlb_args {
34 struct vm_area_struct *ta_vma;
35 unsigned long ta_start;
36 unsigned long ta_end;
37};
38
39static inline void ipi_flush_tlb_all(void *ignored)
40{
41 local_flush_tlb_all();
42}
43
44static inline void ipi_flush_tlb_mm(void *arg)
45{
46 struct mm_struct *mm = (struct mm_struct *)arg;
47
48 local_flush_tlb_mm(mm);
49}
50
51static inline void ipi_flush_tlb_page(void *arg)
52{
53 struct tlb_args *ta = (struct tlb_args *)arg;
54
55 local_flush_tlb_page(ta->ta_vma, ta->ta_start);
56}
57
58static inline void ipi_flush_tlb_kernel_page(void *arg)
59{
60 struct tlb_args *ta = (struct tlb_args *)arg;
61
62 local_flush_tlb_kernel_page(ta->ta_start);
63}
64
65static inline void ipi_flush_tlb_range(void *arg)
66{
67 struct tlb_args *ta = (struct tlb_args *)arg;
68
69 local_flush_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end);
70}
71
72static inline void ipi_flush_tlb_kernel_range(void *arg)
73{
74 struct tlb_args *ta = (struct tlb_args *)arg;
75
76 local_flush_tlb_kernel_range(ta->ta_start, ta->ta_end);
77}
78
79void flush_tlb_all(void)
80{
81 if (tlb_ops_need_broadcast())
82 on_each_cpu(ipi_flush_tlb_all, NULL, 1);
83 else
84 local_flush_tlb_all();
85}
86
87void flush_tlb_mm(struct mm_struct *mm)
88{
89 if (tlb_ops_need_broadcast())
90 on_each_cpu_mask(ipi_flush_tlb_mm, mm, 1, mm_cpumask(mm));
91 else
92 local_flush_tlb_mm(mm);
93}
94
95void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
96{
97 if (tlb_ops_need_broadcast()) {
98 struct tlb_args ta;
99 ta.ta_vma = vma;
100 ta.ta_start = uaddr;
101 on_each_cpu_mask(ipi_flush_tlb_page, &ta, 1, mm_cpumask(vma->vm_mm));
102 } else
103 local_flush_tlb_page(vma, uaddr);
104}
105
106void flush_tlb_kernel_page(unsigned long kaddr)
107{
108 if (tlb_ops_need_broadcast()) {
109 struct tlb_args ta;
110 ta.ta_start = kaddr;
111 on_each_cpu(ipi_flush_tlb_kernel_page, &ta, 1);
112 } else
113 local_flush_tlb_kernel_page(kaddr);
114}
115
116void flush_tlb_range(struct vm_area_struct *vma,
117 unsigned long start, unsigned long end)
118{
119 if (tlb_ops_need_broadcast()) {
120 struct tlb_args ta;
121 ta.ta_vma = vma;
122 ta.ta_start = start;
123 ta.ta_end = end;
124 on_each_cpu_mask(ipi_flush_tlb_range, &ta, 1, mm_cpumask(vma->vm_mm));
125 } else
126 local_flush_tlb_range(vma, start, end);
127}
128
129void flush_tlb_kernel_range(unsigned long start, unsigned long end)
130{
131 if (tlb_ops_need_broadcast()) {
132 struct tlb_args ta;
133 ta.ta_start = start;
134 ta.ta_end = end;
135 on_each_cpu(ipi_flush_tlb_kernel_range, &ta, 1);
136 } else
137 local_flush_tlb_kernel_range(start, end);
138}
139
diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c
index 35882fbf37f9..dd790745b3ef 100644
--- a/arch/arm/kernel/smp_twd.c
+++ b/arch/arm/kernel/smp_twd.c
@@ -127,8 +127,6 @@ static void __cpuinit twd_calibrate_rate(void)
127 */ 127 */
128void __cpuinit twd_timer_setup(struct clock_event_device *clk) 128void __cpuinit twd_timer_setup(struct clock_event_device *clk)
129{ 129{
130 unsigned long flags;
131
132 twd_calibrate_rate(); 130 twd_calibrate_rate();
133 131
134 clk->name = "local_timer"; 132 clk->name = "local_timer";
@@ -143,20 +141,7 @@ void __cpuinit twd_timer_setup(struct clock_event_device *clk)
143 clk->min_delta_ns = clockevent_delta2ns(0xf, clk); 141 clk->min_delta_ns = clockevent_delta2ns(0xf, clk);
144 142
145 /* Make sure our local interrupt controller has this enabled */ 143 /* Make sure our local interrupt controller has this enabled */
146 local_irq_save(flags); 144 gic_enable_ppi(clk->irq);
147 irq_to_desc(clk->irq)->status |= IRQ_NOPROBE;
148 get_irq_chip(clk->irq)->unmask(clk->irq);
149 local_irq_restore(flags);
150 145
151 clockevents_register_device(clk); 146 clockevents_register_device(clk);
152} 147}
153
154#ifdef CONFIG_HOTPLUG_CPU
155/*
156 * take a local timer down
157 */
158void twd_timer_stop(void)
159{
160 __raw_writel(0, twd_base + TWD_TIMER_CONTROL);
161}
162#endif
diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c
new file mode 100644
index 000000000000..7a5760922914
--- /dev/null
+++ b/arch/arm/kernel/swp_emulate.c
@@ -0,0 +1,267 @@
1/*
2 * linux/arch/arm/kernel/swp_emulate.c
3 *
4 * Copyright (C) 2009 ARM Limited
5 * __user_* functions adapted from include/asm/uaccess.h
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * Implements emulation of the SWP/SWPB instructions using load-exclusive and
12 * store-exclusive for processors that have them disabled (or future ones that
13 * might not implement them).
14 *
15 * Syntax of SWP{B} instruction: SWP{B}<c> <Rt>, <Rt2>, [<Rn>]
16 * Where: Rt = destination
17 * Rt2 = source
18 * Rn = address
19 */
20
21#include <linux/init.h>
22#include <linux/kernel.h>
23#include <linux/proc_fs.h>
24#include <linux/sched.h>
25#include <linux/syscalls.h>
26#include <linux/perf_event.h>
27
28#include <asm/traps.h>
29#include <asm/uaccess.h>
30
31/*
32 * Error-checking SWP macros implemented using ldrex{b}/strex{b}
33 */
34#define __user_swpX_asm(data, addr, res, temp, B) \
35 __asm__ __volatile__( \
36 " mov %2, %1\n" \
37 "0: ldrex"B" %1, [%3]\n" \
38 "1: strex"B" %0, %2, [%3]\n" \
39 " cmp %0, #0\n" \
40 " movne %0, %4\n" \
41 "2:\n" \
42 " .section .fixup,\"ax\"\n" \
43 " .align 2\n" \
44 "3: mov %0, %5\n" \
45 " b 2b\n" \
46 " .previous\n" \
47 " .section __ex_table,\"a\"\n" \
48 " .align 3\n" \
49 " .long 0b, 3b\n" \
50 " .long 1b, 3b\n" \
51 " .previous" \
52 : "=&r" (res), "+r" (data), "=&r" (temp) \
53 : "r" (addr), "i" (-EAGAIN), "i" (-EFAULT) \
54 : "cc", "memory")
55
56#define __user_swp_asm(data, addr, res, temp) \
57 __user_swpX_asm(data, addr, res, temp, "")
58#define __user_swpb_asm(data, addr, res, temp) \
59 __user_swpX_asm(data, addr, res, temp, "b")
60
61/*
62 * Macros/defines for extracting register numbers from instruction.
63 */
64#define EXTRACT_REG_NUM(instruction, offset) \
65 (((instruction) & (0xf << (offset))) >> (offset))
66#define RN_OFFSET 16
67#define RT_OFFSET 12
68#define RT2_OFFSET 0
69/*
70 * Bit 22 of the instruction encoding distinguishes between
71 * the SWP and SWPB variants (bit set means SWPB).
72 */
73#define TYPE_SWPB (1 << 22)
74
75static unsigned long swpcounter;
76static unsigned long swpbcounter;
77static unsigned long abtcounter;
78static pid_t previous_pid;
79
80#ifdef CONFIG_PROC_FS
81static int proc_read_status(char *page, char **start, off_t off, int count,
82 int *eof, void *data)
83{
84 char *p = page;
85 int len;
86
87 p += sprintf(p, "Emulated SWP:\t\t%lu\n", swpcounter);
88 p += sprintf(p, "Emulated SWPB:\t\t%lu\n", swpbcounter);
89 p += sprintf(p, "Aborted SWP{B}:\t\t%lu\n", abtcounter);
90 if (previous_pid != 0)
91 p += sprintf(p, "Last process:\t\t%d\n", previous_pid);
92
93 len = (p - page) - off;
94 if (len < 0)
95 len = 0;
96
97 *eof = (len <= count) ? 1 : 0;
98 *start = page + off;
99
100 return len;
101}
102#endif
103
104/*
105 * Set up process info to signal segmentation fault - called on access error.
106 */
107static void set_segfault(struct pt_regs *regs, unsigned long addr)
108{
109 siginfo_t info;
110
111 if (find_vma(current->mm, addr) == NULL)
112 info.si_code = SEGV_MAPERR;
113 else
114 info.si_code = SEGV_ACCERR;
115
116 info.si_signo = SIGSEGV;
117 info.si_errno = 0;
118 info.si_addr = (void *) instruction_pointer(regs);
119
120 pr_debug("SWP{B} emulation: access caused memory abort!\n");
121 arm_notify_die("Illegal memory access", regs, &info, 0, 0);
122
123 abtcounter++;
124}
125
126static int emulate_swpX(unsigned int address, unsigned int *data,
127 unsigned int type)
128{
129 unsigned int res = 0;
130
131 if ((type != TYPE_SWPB) && (address & 0x3)) {
132 /* SWP to unaligned address not permitted */
133 pr_debug("SWP instruction on unaligned pointer!\n");
134 return -EFAULT;
135 }
136
137 while (1) {
138 unsigned long temp;
139
140 /*
141 * Barrier required between accessing protected resource and
142 * releasing a lock for it. Legacy code might not have done
143 * this, and we cannot determine that this is not the case
144 * being emulated, so insert always.
145 */
146 smp_mb();
147
148 if (type == TYPE_SWPB)
149 __user_swpb_asm(*data, address, res, temp);
150 else
151 __user_swp_asm(*data, address, res, temp);
152
153 if (likely(res != -EAGAIN) || signal_pending(current))
154 break;
155
156 cond_resched();
157 }
158
159 if (res == 0) {
160 /*
161 * Barrier also required between aquiring a lock for a
162 * protected resource and accessing the resource. Inserted for
163 * same reason as above.
164 */
165 smp_mb();
166
167 if (type == TYPE_SWPB)
168 swpbcounter++;
169 else
170 swpcounter++;
171 }
172
173 return res;
174}
175
176/*
177 * swp_handler logs the id of calling process, dissects the instruction, sanity
178 * checks the memory location, calls emulate_swpX for the actual operation and
179 * deals with fixup/error handling before returning
180 */
181static int swp_handler(struct pt_regs *regs, unsigned int instr)
182{
183 unsigned int address, destreg, data, type;
184 unsigned int res = 0;
185
186 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, regs->ARM_pc);
187
188 if (current->pid != previous_pid) {
189 pr_debug("\"%s\" (%ld) uses deprecated SWP{B} instruction\n",
190 current->comm, (unsigned long)current->pid);
191 previous_pid = current->pid;
192 }
193
194 address = regs->uregs[EXTRACT_REG_NUM(instr, RN_OFFSET)];
195 data = regs->uregs[EXTRACT_REG_NUM(instr, RT2_OFFSET)];
196 destreg = EXTRACT_REG_NUM(instr, RT_OFFSET);
197
198 type = instr & TYPE_SWPB;
199
200 pr_debug("addr in r%d->0x%08x, dest is r%d, source in r%d->0x%08x)\n",
201 EXTRACT_REG_NUM(instr, RN_OFFSET), address,
202 destreg, EXTRACT_REG_NUM(instr, RT2_OFFSET), data);
203
204 /* Check access in reasonable access range for both SWP and SWPB */
205 if (!access_ok(VERIFY_WRITE, (address & ~3), 4)) {
206 pr_debug("SWP{B} emulation: access to %p not allowed!\n",
207 (void *)address);
208 res = -EFAULT;
209 } else {
210 res = emulate_swpX(address, &data, type);
211 }
212
213 if (res == 0) {
214 /*
215 * On successful emulation, revert the adjustment to the PC
216 * made in kernel/traps.c in order to resume execution at the
217 * instruction following the SWP{B}.
218 */
219 regs->ARM_pc += 4;
220 regs->uregs[destreg] = data;
221 } else if (res == -EFAULT) {
222 /*
223 * Memory errors do not mean emulation failed.
224 * Set up signal info to return SEGV, then return OK
225 */
226 set_segfault(regs, address);
227 }
228
229 return 0;
230}
231
232/*
233 * Only emulate SWP/SWPB executed in ARM state/User mode.
234 * The kernel must be SWP free and SWP{B} does not exist in Thumb/ThumbEE.
235 */
236static struct undef_hook swp_hook = {
237 .instr_mask = 0x0fb00ff0,
238 .instr_val = 0x01000090,
239 .cpsr_mask = MODE_MASK | PSR_T_BIT | PSR_J_BIT,
240 .cpsr_val = USR_MODE,
241 .fn = swp_handler
242};
243
244/*
245 * Register handler and create status file in /proc/cpu
246 * Invoked as late_initcall, since not needed before init spawned.
247 */
248static int __init swp_emulation_init(void)
249{
250#ifdef CONFIG_PROC_FS
251 struct proc_dir_entry *res;
252
253 res = create_proc_entry("cpu/swp_emulation", S_IRUGO, NULL);
254
255 if (!res)
256 return -ENOMEM;
257
258 res->read_proc = proc_read_status;
259#endif /* CONFIG_PROC_FS */
260
261 printk(KERN_NOTICE "Registering SWP/SWPB emulation handler\n");
262 register_undef_hook(&swp_hook);
263
264 return 0;
265}
266
267late_initcall(swp_emulation_init);
diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c
index 38c261f9951c..f1e2eb19a67d 100644
--- a/arch/arm/kernel/time.c
+++ b/arch/arm/kernel/time.c
@@ -30,12 +30,13 @@
30#include <asm/leds.h> 30#include <asm/leds.h>
31#include <asm/thread_info.h> 31#include <asm/thread_info.h>
32#include <asm/stacktrace.h> 32#include <asm/stacktrace.h>
33#include <asm/mach/arch.h>
33#include <asm/mach/time.h> 34#include <asm/mach/time.h>
34 35
35/* 36/*
36 * Our system timer. 37 * Our system timer.
37 */ 38 */
38struct sys_timer *system_timer; 39static struct sys_timer *system_timer;
39 40
40#if defined(CONFIG_RTC_DRV_CMOS) || defined(CONFIG_RTC_DRV_CMOS_MODULE) 41#if defined(CONFIG_RTC_DRV_CMOS) || defined(CONFIG_RTC_DRV_CMOS_MODULE)
41/* this needs a better home */ 42/* this needs a better home */
@@ -160,6 +161,7 @@ device_initcall(timer_init_sysfs);
160 161
161void __init time_init(void) 162void __init time_init(void)
162{ 163{
164 system_timer = machine_desc->timer;
163 system_timer->init(); 165 system_timer->init();
164} 166}
165 167
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 446aee97436f..ee57640ba2bb 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -37,6 +37,8 @@
37 37
38static const char *handler[]= { "prefetch abort", "data abort", "address exception", "interrupt" }; 38static const char *handler[]= { "prefetch abort", "data abort", "address exception", "interrupt" };
39 39
40void *vectors_page;
41
40#ifdef CONFIG_DEBUG_USER 42#ifdef CONFIG_DEBUG_USER
41unsigned int user_debug; 43unsigned int user_debug;
42 44
@@ -708,19 +710,19 @@ void __readwrite_bug(const char *fn)
708} 710}
709EXPORT_SYMBOL(__readwrite_bug); 711EXPORT_SYMBOL(__readwrite_bug);
710 712
711void __pte_error(const char *file, int line, unsigned long val) 713void __pte_error(const char *file, int line, pte_t pte)
712{ 714{
713 printk("%s:%d: bad pte %08lx.\n", file, line, val); 715 printk("%s:%d: bad pte %08lx.\n", file, line, pte_val(pte));
714} 716}
715 717
716void __pmd_error(const char *file, int line, unsigned long val) 718void __pmd_error(const char *file, int line, pmd_t pmd)
717{ 719{
718 printk("%s:%d: bad pmd %08lx.\n", file, line, val); 720 printk("%s:%d: bad pmd %08lx.\n", file, line, pmd_val(pmd));
719} 721}
720 722
721void __pgd_error(const char *file, int line, unsigned long val) 723void __pgd_error(const char *file, int line, pgd_t pgd)
722{ 724{
723 printk("%s:%d: bad pgd %08lx.\n", file, line, val); 725 printk("%s:%d: bad pgd %08lx.\n", file, line, pgd_val(pgd));
724} 726}
725 727
726asmlinkage void __div0(void) 728asmlinkage void __div0(void)
@@ -756,7 +758,11 @@ static void __init kuser_get_tls_init(unsigned long vectors)
756 758
757void __init early_trap_init(void) 759void __init early_trap_init(void)
758{ 760{
761#if defined(CONFIG_CPU_USE_DOMAINS)
759 unsigned long vectors = CONFIG_VECTORS_BASE; 762 unsigned long vectors = CONFIG_VECTORS_BASE;
763#else
764 unsigned long vectors = (unsigned long)vectors_page;
765#endif
760 extern char __stubs_start[], __stubs_end[]; 766 extern char __stubs_start[], __stubs_end[];
761 extern char __vectors_start[], __vectors_end[]; 767 extern char __vectors_start[], __vectors_end[];
762 extern char __kuser_helper_start[], __kuser_helper_end[]; 768 extern char __kuser_helper_start[], __kuser_helper_end[];
@@ -780,10 +786,10 @@ void __init early_trap_init(void)
780 * Copy signal return handlers into the vector page, and 786 * Copy signal return handlers into the vector page, and
781 * set sigreturn to be a pointer to these. 787 * set sigreturn to be a pointer to these.
782 */ 788 */
783 memcpy((void *)KERN_SIGRETURN_CODE, sigreturn_codes, 789 memcpy((void *)(vectors + KERN_SIGRETURN_CODE - CONFIG_VECTORS_BASE),
784 sizeof(sigreturn_codes)); 790 sigreturn_codes, sizeof(sigreturn_codes));
785 memcpy((void *)KERN_RESTART_CODE, syscall_restart_code, 791 memcpy((void *)(vectors + KERN_RESTART_CODE - CONFIG_VECTORS_BASE),
786 sizeof(syscall_restart_code)); 792 syscall_restart_code, sizeof(syscall_restart_code));
787 793
788 flush_icache_range(vectors, vectors + PAGE_SIZE); 794 flush_icache_range(vectors, vectors + PAGE_SIZE);
789 modify_domain(DOMAIN_USER, DOMAIN_CLIENT); 795 modify_domain(DOMAIN_USER, DOMAIN_CLIENT);
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index cead8893b46b..86b66f3f2031 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -101,6 +101,7 @@ SECTIONS
101 __exception_text_start = .; 101 __exception_text_start = .;
102 *(.exception.text) 102 *(.exception.text)
103 __exception_text_end = .; 103 __exception_text_end = .;
104 IRQENTRY_TEXT
104 TEXT_TEXT 105 TEXT_TEXT
105 SCHED_TEXT 106 SCHED_TEXT
106 LOCK_TEXT 107 LOCK_TEXT
@@ -167,6 +168,7 @@ SECTIONS
167 168
168 NOSAVE_DATA 169 NOSAVE_DATA
169 CACHELINE_ALIGNED_DATA(32) 170 CACHELINE_ALIGNED_DATA(32)
171 READ_MOSTLY_DATA(32)
170 172
171 /* 173 /*
172 * The exception fixup table (might need resorting at runtime) 174 * The exception fixup table (might need resorting at runtime)