aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm')
-rw-r--r--arch/arm/Kconfig26
-rw-r--r--arch/arm/Kconfig.debug2
-rw-r--r--arch/arm/common/timer-sp.c4
-rw-r--r--arch/arm/include/asm/sched_clock.h118
-rw-r--r--arch/arm/include/asm/system.h5
-rw-r--r--arch/arm/include/asm/traps.h23
-rw-r--r--arch/arm/kernel/Makefile4
-rw-r--r--arch/arm/kernel/entry-common.S202
-rw-r--r--arch/arm/kernel/ftrace.c103
-rw-r--r--arch/arm/kernel/irq.c4
-rw-r--r--arch/arm/kernel/perf_event.c2448
-rw-r--r--arch/arm/kernel/perf_event_v6.c672
-rw-r--r--arch/arm/kernel/perf_event_v7.c906
-rw-r--r--arch/arm/kernel/perf_event_xscale.c807
-rw-r--r--arch/arm/kernel/sched_clock.c69
-rw-r--r--arch/arm/kernel/smp.c5
-rw-r--r--arch/arm/kernel/vmlinux.lds.S1
-rw-r--r--arch/arm/mach-at91/at91rm9200_time.c4
-rw-r--r--arch/arm/mach-at91/at91sam926x_time.c4
-rw-r--r--arch/arm/mach-bcmring/core.c14
-rw-r--r--arch/arm/mach-davinci/time.c7
-rw-r--r--arch/arm/mach-integrator/integrator_ap.c4
-rw-r--r--arch/arm/mach-ixp4xx/common.c35
-rw-r--r--arch/arm/mach-lpc32xx/timer.c5
-rw-r--r--arch/arm/mach-mmp/time.c39
-rw-r--r--arch/arm/mach-msm/Kconfig2
-rw-r--r--arch/arm/mach-msm/timer.c5
-rw-r--r--arch/arm/mach-netx/time.c5
-rw-r--r--arch/arm/mach-ns9xxx/time-ns9360.c6
-rw-r--r--arch/arm/mach-omap1/time.c6
-rw-r--r--arch/arm/mach-omap2/timer-gp.c5
-rw-r--r--arch/arm/mach-pxa/time.c35
-rw-r--r--arch/arm/mach-realview/core.c10
-rw-r--r--arch/arm/mach-s5pv310/time.c6
-rw-r--r--arch/arm/mach-sa1100/generic.c23
-rw-r--r--arch/arm/mach-sa1100/time.c36
-rw-r--r--arch/arm/mach-tcc8k/time.c5
-rw-r--r--arch/arm/mach-tegra/timer.c31
-rw-r--r--arch/arm/mach-u300/timer.c22
-rw-r--r--arch/arm/mach-versatile/core.c10
-rw-r--r--arch/arm/mach-vexpress/v2m.c5
-rw-r--r--arch/arm/mach-w90x900/time.c5
-rw-r--r--arch/arm/plat-iop/time.c27
-rw-r--r--arch/arm/plat-mxc/epit.c5
-rw-r--r--arch/arm/plat-mxc/time.c5
-rw-r--r--arch/arm/plat-nomadik/Kconfig1
-rw-r--r--arch/arm/plat-nomadik/timer.c80
-rw-r--r--arch/arm/plat-omap/counter_32k.c47
-rw-r--r--arch/arm/plat-orion/time.c50
-rw-r--r--arch/arm/plat-spear/time.c6
-rw-r--r--arch/arm/plat-stmp3xxx/timer.c5
-rw-r--r--arch/arm/plat-versatile/Makefile5
-rw-r--r--arch/arm/plat-versatile/include/plat/sched_clock.h6
-rw-r--r--arch/arm/plat-versatile/sched-clock.c51
54 files changed, 3172 insertions, 2844 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 9d2608363390..2e86b8f62104 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -14,6 +14,7 @@ config ARM
14 select HAVE_FUNCTION_TRACER if (!XIP_KERNEL) 14 select HAVE_FUNCTION_TRACER if (!XIP_KERNEL)
15 select HAVE_FTRACE_MCOUNT_RECORD if (!XIP_KERNEL) 15 select HAVE_FTRACE_MCOUNT_RECORD if (!XIP_KERNEL)
16 select HAVE_DYNAMIC_FTRACE if (!XIP_KERNEL) 16 select HAVE_DYNAMIC_FTRACE if (!XIP_KERNEL)
17 select HAVE_FUNCTION_GRAPH_TRACER if (!THUMB2_KERNEL)
17 select HAVE_GENERIC_DMA_COHERENT 18 select HAVE_GENERIC_DMA_COHERENT
18 select HAVE_KERNEL_GZIP 19 select HAVE_KERNEL_GZIP
19 select HAVE_KERNEL_LZO 20 select HAVE_KERNEL_LZO
@@ -38,6 +39,9 @@ config HAVE_PWM
38config SYS_SUPPORTS_APM_EMULATION 39config SYS_SUPPORTS_APM_EMULATION
39 bool 40 bool
40 41
42config HAVE_SCHED_CLOCK
43 bool
44
41config GENERIC_GPIO 45config GENERIC_GPIO
42 bool 46 bool
43 47
@@ -233,6 +237,7 @@ config ARCH_REALVIEW
233 bool "ARM Ltd. RealView family" 237 bool "ARM Ltd. RealView family"
234 select ARM_AMBA 238 select ARM_AMBA
235 select COMMON_CLKDEV 239 select COMMON_CLKDEV
240 select HAVE_SCHED_CLOCK
236 select ICST 241 select ICST
237 select GENERIC_CLOCKEVENTS 242 select GENERIC_CLOCKEVENTS
238 select ARCH_WANT_OPTIONAL_GPIOLIB 243 select ARCH_WANT_OPTIONAL_GPIOLIB
@@ -247,6 +252,7 @@ config ARCH_VERSATILE
247 select ARM_AMBA 252 select ARM_AMBA
248 select ARM_VIC 253 select ARM_VIC
249 select COMMON_CLKDEV 254 select COMMON_CLKDEV
255 select HAVE_SCHED_CLOCK
250 select ICST 256 select ICST
251 select GENERIC_CLOCKEVENTS 257 select GENERIC_CLOCKEVENTS
252 select ARCH_WANT_OPTIONAL_GPIOLIB 258 select ARCH_WANT_OPTIONAL_GPIOLIB
@@ -263,6 +269,7 @@ config ARCH_VEXPRESS
263 select COMMON_CLKDEV 269 select COMMON_CLKDEV
264 select GENERIC_CLOCKEVENTS 270 select GENERIC_CLOCKEVENTS
265 select HAVE_CLK 271 select HAVE_CLK
272 select HAVE_SCHED_CLOCK
266 select ICST 273 select ICST
267 select PLAT_VERSATILE 274 select PLAT_VERSATILE
268 help 275 help
@@ -434,6 +441,7 @@ config ARCH_IXP4XX
434 select CPU_XSCALE 441 select CPU_XSCALE
435 select GENERIC_GPIO 442 select GENERIC_GPIO
436 select GENERIC_CLOCKEVENTS 443 select GENERIC_CLOCKEVENTS
444 select HAVE_SCHED_CLOCK
437 select DMABOUNCE if PCI 445 select DMABOUNCE if PCI
438 help 446 help
439 Support for Intel's IXP4XX (XScale) family of processors. 447 Support for Intel's IXP4XX (XScale) family of processors.
@@ -509,6 +517,7 @@ config ARCH_MMP
509 select ARCH_REQUIRE_GPIOLIB 517 select ARCH_REQUIRE_GPIOLIB
510 select COMMON_CLKDEV 518 select COMMON_CLKDEV
511 select GENERIC_CLOCKEVENTS 519 select GENERIC_CLOCKEVENTS
520 select HAVE_SCHED_CLOCK
512 select TICK_ONESHOT 521 select TICK_ONESHOT
513 select PLAT_PXA 522 select PLAT_PXA
514 select SPARSE_IRQ 523 select SPARSE_IRQ
@@ -565,6 +574,7 @@ config ARCH_TEGRA
565 select GENERIC_CLOCKEVENTS 574 select GENERIC_CLOCKEVENTS
566 select GENERIC_GPIO 575 select GENERIC_GPIO
567 select HAVE_CLK 576 select HAVE_CLK
577 select HAVE_SCHED_CLOCK
568 select COMMON_CLKDEV 578 select COMMON_CLKDEV
569 select ARCH_HAS_BARRIERS if CACHE_L2X0 579 select ARCH_HAS_BARRIERS if CACHE_L2X0
570 select ARCH_HAS_CPUFREQ 580 select ARCH_HAS_CPUFREQ
@@ -588,6 +598,7 @@ config ARCH_PXA
588 select COMMON_CLKDEV 598 select COMMON_CLKDEV
589 select ARCH_REQUIRE_GPIOLIB 599 select ARCH_REQUIRE_GPIOLIB
590 select GENERIC_CLOCKEVENTS 600 select GENERIC_CLOCKEVENTS
601 select HAVE_SCHED_CLOCK
591 select TICK_ONESHOT 602 select TICK_ONESHOT
592 select PLAT_PXA 603 select PLAT_PXA
593 select SPARSE_IRQ 604 select SPARSE_IRQ
@@ -636,6 +647,7 @@ config ARCH_SA1100
636 select CPU_FREQ 647 select CPU_FREQ
637 select GENERIC_CLOCKEVENTS 648 select GENERIC_CLOCKEVENTS
638 select HAVE_CLK 649 select HAVE_CLK
650 select HAVE_SCHED_CLOCK
639 select TICK_ONESHOT 651 select TICK_ONESHOT
640 select ARCH_REQUIRE_GPIOLIB 652 select ARCH_REQUIRE_GPIOLIB
641 help 653 help
@@ -782,6 +794,7 @@ config ARCH_U300
782 bool "ST-Ericsson U300 Series" 794 bool "ST-Ericsson U300 Series"
783 depends on MMU 795 depends on MMU
784 select CPU_ARM926T 796 select CPU_ARM926T
797 select HAVE_SCHED_CLOCK
785 select HAVE_TCM 798 select HAVE_TCM
786 select ARM_AMBA 799 select ARM_AMBA
787 select ARM_VIC 800 select ARM_VIC
@@ -830,6 +843,7 @@ config ARCH_OMAP
830 select ARCH_REQUIRE_GPIOLIB 843 select ARCH_REQUIRE_GPIOLIB
831 select ARCH_HAS_CPUFREQ 844 select ARCH_HAS_CPUFREQ
832 select GENERIC_CLOCKEVENTS 845 select GENERIC_CLOCKEVENTS
846 select HAVE_SCHED_CLOCK
833 select ARCH_HAS_HOLES_MEMORYMODEL 847 select ARCH_HAS_HOLES_MEMORYMODEL
834 help 848 help
835 Support for TI's OMAP platform (OMAP1/2/3/4). 849 Support for TI's OMAP platform (OMAP1/2/3/4).
@@ -983,9 +997,11 @@ config ARCH_ACORN
983config PLAT_IOP 997config PLAT_IOP
984 bool 998 bool
985 select GENERIC_CLOCKEVENTS 999 select GENERIC_CLOCKEVENTS
1000 select HAVE_SCHED_CLOCK
986 1001
987config PLAT_ORION 1002config PLAT_ORION
988 bool 1003 bool
1004 select HAVE_SCHED_CLOCK
989 1005
990config PLAT_PXA 1006config PLAT_PXA
991 bool 1007 bool
@@ -1212,10 +1228,11 @@ config SMP
1212 depends on EXPERIMENTAL 1228 depends on EXPERIMENTAL
1213 depends on GENERIC_CLOCKEVENTS 1229 depends on GENERIC_CLOCKEVENTS
1214 depends on REALVIEW_EB_ARM11MP || REALVIEW_EB_A9MP || \ 1230 depends on REALVIEW_EB_ARM11MP || REALVIEW_EB_A9MP || \
1215 MACH_REALVIEW_PB11MP || MACH_REALVIEW_PBX || ARCH_OMAP4 ||\ 1231 MACH_REALVIEW_PB11MP || MACH_REALVIEW_PBX || ARCH_OMAP4 || \
1216 ARCH_S5PV310 || ARCH_TEGRA || ARCH_U8500 || ARCH_VEXPRESS_CA9X4 1232 ARCH_S5PV310 || ARCH_TEGRA || ARCH_U8500 || ARCH_VEXPRESS_CA9X4 || \
1233 ARCH_MSM_SCORPIONMP
1217 select USE_GENERIC_SMP_HELPERS 1234 select USE_GENERIC_SMP_HELPERS
1218 select HAVE_ARM_SCU 1235 select HAVE_ARM_SCU if !ARCH_MSM_SCORPIONMP
1219 help 1236 help
1220 This enables support for systems with more than one CPU. If you have 1237 This enables support for systems with more than one CPU. If you have
1221 a system with only one CPU, like most personal computers, say N. If 1238 a system with only one CPU, like most personal computers, say N. If
@@ -1290,6 +1307,7 @@ config NR_CPUS
1290config HOTPLUG_CPU 1307config HOTPLUG_CPU
1291 bool "Support for hot-pluggable CPUs (EXPERIMENTAL)" 1308 bool "Support for hot-pluggable CPUs (EXPERIMENTAL)"
1292 depends on SMP && HOTPLUG && EXPERIMENTAL 1309 depends on SMP && HOTPLUG && EXPERIMENTAL
1310 depends on !ARCH_MSM
1293 help 1311 help
1294 Say Y here to experiment with turning CPUs off and on. CPUs 1312 Say Y here to experiment with turning CPUs off and on. CPUs
1295 can be controlled through /sys/devices/system/cpu. 1313 can be controlled through /sys/devices/system/cpu.
@@ -1298,7 +1316,7 @@ config LOCAL_TIMERS
1298 bool "Use local timer interrupts" 1316 bool "Use local timer interrupts"
1299 depends on SMP 1317 depends on SMP
1300 default y 1318 default y
1301 select HAVE_ARM_TWD 1319 select HAVE_ARM_TWD if !ARCH_MSM_SCORPIONMP
1302 help 1320 help
1303 Enable support for local timers on SMP platforms, rather then the 1321 Enable support for local timers on SMP platforms, rather then the
1304 legacy IPI broadcast method. Local timers allows the system 1322 legacy IPI broadcast method. Local timers allows the system
diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
index 2fd0b99afc4b..eac62085f5b2 100644
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -23,7 +23,7 @@ config STRICT_DEVMEM
23config FRAME_POINTER 23config FRAME_POINTER
24 bool 24 bool
25 depends on !THUMB2_KERNEL 25 depends on !THUMB2_KERNEL
26 default y if !ARM_UNWIND 26 default y if !ARM_UNWIND || FUNCTION_GRAPH_TRACER
27 help 27 help
28 If you say N here, the resulting kernel will be slightly smaller and 28 If you say N here, the resulting kernel will be slightly smaller and
29 faster. However, if neither FRAME_POINTER nor ARM_UNWIND are enabled, 29 faster. However, if neither FRAME_POINTER nor ARM_UNWIND are enabled,
diff --git a/arch/arm/common/timer-sp.c b/arch/arm/common/timer-sp.c
index 4740313daa5b..6ef3342153b9 100644
--- a/arch/arm/common/timer-sp.c
+++ b/arch/arm/common/timer-sp.c
@@ -44,7 +44,6 @@ static struct clocksource clocksource_sp804 = {
44 .rating = 200, 44 .rating = 200,
45 .read = sp804_read, 45 .read = sp804_read,
46 .mask = CLOCKSOURCE_MASK(32), 46 .mask = CLOCKSOURCE_MASK(32),
47 .shift = 20,
48 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 47 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
49}; 48};
50 49
@@ -61,8 +60,7 @@ void __init sp804_clocksource_init(void __iomem *base)
61 writel(TIMER_CTRL_32BIT | TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC, 60 writel(TIMER_CTRL_32BIT | TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC,
62 clksrc_base + TIMER_CTRL); 61 clksrc_base + TIMER_CTRL);
63 62
64 cs->mult = clocksource_khz2mult(TIMER_FREQ_KHZ, cs->shift); 63 clocksource_register_khz(cs, TIMER_FREQ_KHZ);
65 clocksource_register(cs);
66} 64}
67 65
68 66
diff --git a/arch/arm/include/asm/sched_clock.h b/arch/arm/include/asm/sched_clock.h
new file mode 100644
index 000000000000..a84628be1a7b
--- /dev/null
+++ b/arch/arm/include/asm/sched_clock.h
@@ -0,0 +1,118 @@
1/*
2 * sched_clock.h: support for extending counters to full 64-bit ns counter
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#ifndef ASM_SCHED_CLOCK
9#define ASM_SCHED_CLOCK
10
11#include <linux/kernel.h>
12#include <linux/types.h>
13
14struct clock_data {
15 u64 epoch_ns;
16 u32 epoch_cyc;
17 u32 epoch_cyc_copy;
18 u32 mult;
19 u32 shift;
20};
21
22#define DEFINE_CLOCK_DATA(name) struct clock_data name
23
24static inline u64 cyc_to_ns(u64 cyc, u32 mult, u32 shift)
25{
26 return (cyc * mult) >> shift;
27}
28
29/*
30 * Atomically update the sched_clock epoch. Your update callback will
31 * be called from a timer before the counter wraps - read the current
32 * counter value, and call this function to safely move the epochs
33 * forward. Only use this from the update callback.
34 */
35static inline void update_sched_clock(struct clock_data *cd, u32 cyc, u32 mask)
36{
37 unsigned long flags;
38 u64 ns = cd->epoch_ns +
39 cyc_to_ns((cyc - cd->epoch_cyc) & mask, cd->mult, cd->shift);
40
41 /*
42 * Write epoch_cyc and epoch_ns in a way that the update is
43 * detectable in cyc_to_fixed_sched_clock().
44 */
45 raw_local_irq_save(flags);
46 cd->epoch_cyc = cyc;
47 smp_wmb();
48 cd->epoch_ns = ns;
49 smp_wmb();
50 cd->epoch_cyc_copy = cyc;
51 raw_local_irq_restore(flags);
52}
53
54/*
55 * If your clock rate is known at compile time, using this will allow
56 * you to optimize the mult/shift loads away. This is paired with
57 * init_fixed_sched_clock() to ensure that your mult/shift are correct.
58 */
59static inline unsigned long long cyc_to_fixed_sched_clock(struct clock_data *cd,
60 u32 cyc, u32 mask, u32 mult, u32 shift)
61{
62 u64 epoch_ns;
63 u32 epoch_cyc;
64
65 /*
66 * Load the epoch_cyc and epoch_ns atomically. We do this by
67 * ensuring that we always write epoch_cyc, epoch_ns and
68 * epoch_cyc_copy in strict order, and read them in strict order.
69 * If epoch_cyc and epoch_cyc_copy are not equal, then we're in
70 * the middle of an update, and we should repeat the load.
71 */
72 do {
73 epoch_cyc = cd->epoch_cyc;
74 smp_rmb();
75 epoch_ns = cd->epoch_ns;
76 smp_rmb();
77 } while (epoch_cyc != cd->epoch_cyc_copy);
78
79 return epoch_ns + cyc_to_ns((cyc - epoch_cyc) & mask, mult, shift);
80}
81
82/*
83 * Otherwise, you need to use this, which will obtain the mult/shift
84 * from the clock_data structure. Use init_sched_clock() with this.
85 */
86static inline unsigned long long cyc_to_sched_clock(struct clock_data *cd,
87 u32 cyc, u32 mask)
88{
89 return cyc_to_fixed_sched_clock(cd, cyc, mask, cd->mult, cd->shift);
90}
91
92/*
93 * Initialize the clock data - calculate the appropriate multiplier
94 * and shift. Also setup a timer to ensure that the epoch is refreshed
95 * at the appropriate time interval, which will call your update
96 * handler.
97 */
98void init_sched_clock(struct clock_data *, void (*)(void),
99 unsigned int, unsigned long);
100
101/*
102 * Use this initialization function rather than init_sched_clock() if
103 * you're using cyc_to_fixed_sched_clock, which will warn if your
104 * constants are incorrect.
105 */
106static inline void init_fixed_sched_clock(struct clock_data *cd,
107 void (*update)(void), unsigned int bits, unsigned long rate,
108 u32 mult, u32 shift)
109{
110 init_sched_clock(cd, update, bits, rate);
111 if (cd->mult != mult || cd->shift != shift) {
112 pr_crit("sched_clock: wrong multiply/shift: %u>>%u vs calculated %u>>%u\n"
113 "sched_clock: fix multiply/shift to avoid scheduler hiccups\n",
114 mult, shift, cd->mult, cd->shift);
115 }
116}
117
118#endif
diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h
index 80025948b8ad..3222ab8b3447 100644
--- a/arch/arm/include/asm/system.h
+++ b/arch/arm/include/asm/system.h
@@ -63,6 +63,11 @@
63#include <asm/outercache.h> 63#include <asm/outercache.h>
64 64
65#define __exception __attribute__((section(".exception.text"))) 65#define __exception __attribute__((section(".exception.text")))
66#ifdef CONFIG_FUNCTION_GRAPH_TRACER
67#define __exception_irq_entry __irq_entry
68#else
69#define __exception_irq_entry __exception
70#endif
66 71
67struct thread_info; 72struct thread_info;
68struct task_struct; 73struct task_struct;
diff --git a/arch/arm/include/asm/traps.h b/arch/arm/include/asm/traps.h
index 491960bf4260..124475afb007 100644
--- a/arch/arm/include/asm/traps.h
+++ b/arch/arm/include/asm/traps.h
@@ -15,13 +15,32 @@ struct undef_hook {
15void register_undef_hook(struct undef_hook *hook); 15void register_undef_hook(struct undef_hook *hook);
16void unregister_undef_hook(struct undef_hook *hook); 16void unregister_undef_hook(struct undef_hook *hook);
17 17
18#ifdef CONFIG_FUNCTION_GRAPH_TRACER
19static inline int __in_irqentry_text(unsigned long ptr)
20{
21 extern char __irqentry_text_start[];
22 extern char __irqentry_text_end[];
23
24 return ptr >= (unsigned long)&__irqentry_text_start &&
25 ptr < (unsigned long)&__irqentry_text_end;
26}
27#else
28static inline int __in_irqentry_text(unsigned long ptr)
29{
30 return 0;
31}
32#endif
33
18static inline int in_exception_text(unsigned long ptr) 34static inline int in_exception_text(unsigned long ptr)
19{ 35{
20 extern char __exception_text_start[]; 36 extern char __exception_text_start[];
21 extern char __exception_text_end[]; 37 extern char __exception_text_end[];
38 int in;
39
40 in = ptr >= (unsigned long)&__exception_text_start &&
41 ptr < (unsigned long)&__exception_text_end;
22 42
23 return ptr >= (unsigned long)&__exception_text_start && 43 return in ? : __in_irqentry_text(ptr);
24 ptr < (unsigned long)&__exception_text_end;
25} 44}
26 45
27extern void __init early_trap_init(void); 46extern void __init early_trap_init(void);
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 5b9b268f4fbb..fd3ec49bfba6 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -5,7 +5,7 @@
5CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET) 5CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET)
6AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) 6AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
7 7
8ifdef CONFIG_DYNAMIC_FTRACE 8ifdef CONFIG_FUNCTION_TRACER
9CFLAGS_REMOVE_ftrace.o = -pg 9CFLAGS_REMOVE_ftrace.o = -pg
10endif 10endif
11 11
@@ -29,10 +29,12 @@ obj-$(CONFIG_MODULES) += armksyms.o module.o
29obj-$(CONFIG_ARTHUR) += arthur.o 29obj-$(CONFIG_ARTHUR) += arthur.o
30obj-$(CONFIG_ISA_DMA) += dma-isa.o 30obj-$(CONFIG_ISA_DMA) += dma-isa.o
31obj-$(CONFIG_PCI) += bios32.o isa.o 31obj-$(CONFIG_PCI) += bios32.o isa.o
32obj-$(CONFIG_HAVE_SCHED_CLOCK) += sched_clock.o
32obj-$(CONFIG_SMP) += smp.o 33obj-$(CONFIG_SMP) += smp.o
33obj-$(CONFIG_HAVE_ARM_SCU) += smp_scu.o 34obj-$(CONFIG_HAVE_ARM_SCU) += smp_scu.o
34obj-$(CONFIG_HAVE_ARM_TWD) += smp_twd.o 35obj-$(CONFIG_HAVE_ARM_TWD) += smp_twd.o
35obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o 36obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
37obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
36obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o 38obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
37obj-$(CONFIG_KPROBES) += kprobes.o kprobes-decode.o 39obj-$(CONFIG_KPROBES) += kprobes.o kprobes-decode.o
38obj-$(CONFIG_ATAGS_PROC) += atags.o 40obj-$(CONFIG_ATAGS_PROC) += atags.o
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 80bf8cd88d7c..1e7b04a40a31 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -147,98 +147,170 @@ ENDPROC(ret_from_fork)
147#endif 147#endif
148#endif 148#endif
149 149
150#ifdef CONFIG_DYNAMIC_FTRACE 150.macro __mcount suffix
151ENTRY(__gnu_mcount_nc) 151 mcount_enter
152 mov ip, lr 152 ldr r0, =ftrace_trace_function
153 ldmia sp!, {lr} 153 ldr r2, [r0]
154 mov pc, ip 154 adr r0, .Lftrace_stub
155ENDPROC(__gnu_mcount_nc) 155 cmp r0, r2
156 bne 1f
157
158#ifdef CONFIG_FUNCTION_GRAPH_TRACER
159 ldr r1, =ftrace_graph_return
160 ldr r2, [r1]
161 cmp r0, r2
162 bne ftrace_graph_caller\suffix
163
164 ldr r1, =ftrace_graph_entry
165 ldr r2, [r1]
166 ldr r0, =ftrace_graph_entry_stub
167 cmp r0, r2
168 bne ftrace_graph_caller\suffix
169#endif
156 170
157ENTRY(ftrace_caller) 171 mcount_exit
158 stmdb sp!, {r0-r3, lr} 172
159 mov r0, lr 1731: mcount_get_lr r1 @ lr of instrumented func
174 mov r0, lr @ instrumented function
175 sub r0, r0, #MCOUNT_INSN_SIZE
176 adr lr, BSYM(2f)
177 mov pc, r2
1782: mcount_exit
179.endm
180
181.macro __ftrace_caller suffix
182 mcount_enter
183
184 mcount_get_lr r1 @ lr of instrumented func
185 mov r0, lr @ instrumented function
160 sub r0, r0, #MCOUNT_INSN_SIZE 186 sub r0, r0, #MCOUNT_INSN_SIZE
161 ldr r1, [sp, #20]
162 187
163 .global ftrace_call 188 .globl ftrace_call\suffix
164ftrace_call: 189ftrace_call\suffix:
165 bl ftrace_stub 190 bl ftrace_stub
166 ldmia sp!, {r0-r3, ip, lr} 191
167 mov pc, ip 192#ifdef CONFIG_FUNCTION_GRAPH_TRACER
168ENDPROC(ftrace_caller) 193 .globl ftrace_graph_call\suffix
194ftrace_graph_call\suffix:
195 mov r0, r0
196#endif
197
198 mcount_exit
199.endm
200
201.macro __ftrace_graph_caller
202 sub r0, fp, #4 @ &lr of instrumented routine (&parent)
203#ifdef CONFIG_DYNAMIC_FTRACE
204 @ called from __ftrace_caller, saved in mcount_enter
205 ldr r1, [sp, #16] @ instrumented routine (func)
206#else
207 @ called from __mcount, untouched in lr
208 mov r1, lr @ instrumented routine (func)
209#endif
210 sub r1, r1, #MCOUNT_INSN_SIZE
211 mov r2, fp @ frame pointer
212 bl prepare_ftrace_return
213 mcount_exit
214.endm
169 215
170#ifdef CONFIG_OLD_MCOUNT 216#ifdef CONFIG_OLD_MCOUNT
217/*
218 * mcount
219 */
220
221.macro mcount_enter
222 stmdb sp!, {r0-r3, lr}
223.endm
224
225.macro mcount_get_lr reg
226 ldr \reg, [fp, #-4]
227.endm
228
229.macro mcount_exit
230 ldr lr, [fp, #-4]
231 ldmia sp!, {r0-r3, pc}
232.endm
233
171ENTRY(mcount) 234ENTRY(mcount)
235#ifdef CONFIG_DYNAMIC_FTRACE
172 stmdb sp!, {lr} 236 stmdb sp!, {lr}
173 ldr lr, [fp, #-4] 237 ldr lr, [fp, #-4]
174 ldmia sp!, {pc} 238 ldmia sp!, {pc}
239#else
240 __mcount _old
241#endif
175ENDPROC(mcount) 242ENDPROC(mcount)
176 243
244#ifdef CONFIG_DYNAMIC_FTRACE
177ENTRY(ftrace_caller_old) 245ENTRY(ftrace_caller_old)
178 stmdb sp!, {r0-r3, lr} 246 __ftrace_caller _old
179 ldr r1, [fp, #-4]
180 mov r0, lr
181 sub r0, r0, #MCOUNT_INSN_SIZE
182
183 .globl ftrace_call_old
184ftrace_call_old:
185 bl ftrace_stub
186 ldr lr, [fp, #-4] @ restore lr
187 ldmia sp!, {r0-r3, pc}
188ENDPROC(ftrace_caller_old) 247ENDPROC(ftrace_caller_old)
189#endif 248#endif
190 249
191#else 250#ifdef CONFIG_FUNCTION_GRAPH_TRACER
251ENTRY(ftrace_graph_caller_old)
252 __ftrace_graph_caller
253ENDPROC(ftrace_graph_caller_old)
254#endif
192 255
193ENTRY(__gnu_mcount_nc) 256.purgem mcount_enter
257.purgem mcount_get_lr
258.purgem mcount_exit
259#endif
260
261/*
262 * __gnu_mcount_nc
263 */
264
265.macro mcount_enter
194 stmdb sp!, {r0-r3, lr} 266 stmdb sp!, {r0-r3, lr}
195 ldr r0, =ftrace_trace_function 267.endm
196 ldr r2, [r0] 268
197 adr r0, .Lftrace_stub 269.macro mcount_get_lr reg
198 cmp r0, r2 270 ldr \reg, [sp, #20]
199 bne gnu_trace 271.endm
272
273.macro mcount_exit
200 ldmia sp!, {r0-r3, ip, lr} 274 ldmia sp!, {r0-r3, ip, lr}
201 mov pc, ip 275 mov pc, ip
276.endm
202 277
203gnu_trace: 278ENTRY(__gnu_mcount_nc)
204 ldr r1, [sp, #20] @ lr of instrumented routine 279#ifdef CONFIG_DYNAMIC_FTRACE
205 mov r0, lr 280 mov ip, lr
206 sub r0, r0, #MCOUNT_INSN_SIZE 281 ldmia sp!, {lr}
207 adr lr, BSYM(1f)
208 mov pc, r2
2091:
210 ldmia sp!, {r0-r3, ip, lr}
211 mov pc, ip 282 mov pc, ip
283#else
284 __mcount
285#endif
212ENDPROC(__gnu_mcount_nc) 286ENDPROC(__gnu_mcount_nc)
213 287
214#ifdef CONFIG_OLD_MCOUNT 288#ifdef CONFIG_DYNAMIC_FTRACE
215/* 289ENTRY(ftrace_caller)
216 * This is under an ifdef in order to force link-time errors for people trying 290 __ftrace_caller
217 * to build with !FRAME_POINTER with a GCC which doesn't use the new-style 291ENDPROC(ftrace_caller)
218 * mcount. 292#endif
219 */
220ENTRY(mcount)
221 stmdb sp!, {r0-r3, lr}
222 ldr r0, =ftrace_trace_function
223 ldr r2, [r0]
224 adr r0, ftrace_stub
225 cmp r0, r2
226 bne trace
227 ldr lr, [fp, #-4] @ restore lr
228 ldmia sp!, {r0-r3, pc}
229 293
230trace: 294#ifdef CONFIG_FUNCTION_GRAPH_TRACER
231 ldr r1, [fp, #-4] @ lr of instrumented routine 295ENTRY(ftrace_graph_caller)
232 mov r0, lr 296 __ftrace_graph_caller
233 sub r0, r0, #MCOUNT_INSN_SIZE 297ENDPROC(ftrace_graph_caller)
234 mov lr, pc
235 mov pc, r2
236 ldr lr, [fp, #-4] @ restore lr
237 ldmia sp!, {r0-r3, pc}
238ENDPROC(mcount)
239#endif 298#endif
240 299
241#endif /* CONFIG_DYNAMIC_FTRACE */ 300.purgem mcount_enter
301.purgem mcount_get_lr
302.purgem mcount_exit
303
304#ifdef CONFIG_FUNCTION_GRAPH_TRACER
305 .globl return_to_handler
306return_to_handler:
307 stmdb sp!, {r0-r3}
308 mov r0, fp @ frame pointer
309 bl ftrace_return_to_handler
310 mov lr, r0 @ r0 has real ret addr
311 ldmia sp!, {r0-r3}
312 mov pc, lr
313#endif
242 314
243ENTRY(ftrace_stub) 315ENTRY(ftrace_stub)
244.Lftrace_stub: 316.Lftrace_stub:
diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
index 971ac8c36ea7..c0062ad1e847 100644
--- a/arch/arm/kernel/ftrace.c
+++ b/arch/arm/kernel/ftrace.c
@@ -24,6 +24,7 @@
24#define NOP 0xe8bd4000 /* pop {lr} */ 24#define NOP 0xe8bd4000 /* pop {lr} */
25#endif 25#endif
26 26
27#ifdef CONFIG_DYNAMIC_FTRACE
27#ifdef CONFIG_OLD_MCOUNT 28#ifdef CONFIG_OLD_MCOUNT
28#define OLD_MCOUNT_ADDR ((unsigned long) mcount) 29#define OLD_MCOUNT_ADDR ((unsigned long) mcount)
29#define OLD_FTRACE_ADDR ((unsigned long) ftrace_caller_old) 30#define OLD_FTRACE_ADDR ((unsigned long) ftrace_caller_old)
@@ -59,9 +60,9 @@ static unsigned long adjust_address(struct dyn_ftrace *rec, unsigned long addr)
59} 60}
60#endif 61#endif
61 62
62/* construct a branch (BL) instruction to addr */
63#ifdef CONFIG_THUMB2_KERNEL 63#ifdef CONFIG_THUMB2_KERNEL
64static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) 64static unsigned long ftrace_gen_branch(unsigned long pc, unsigned long addr,
65 bool link)
65{ 66{
66 unsigned long s, j1, j2, i1, i2, imm10, imm11; 67 unsigned long s, j1, j2, i1, i2, imm10, imm11;
67 unsigned long first, second; 68 unsigned long first, second;
@@ -83,15 +84,22 @@ static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr)
83 j2 = (!i2) ^ s; 84 j2 = (!i2) ^ s;
84 85
85 first = 0xf000 | (s << 10) | imm10; 86 first = 0xf000 | (s << 10) | imm10;
86 second = 0xd000 | (j1 << 13) | (j2 << 11) | imm11; 87 second = 0x9000 | (j1 << 13) | (j2 << 11) | imm11;
88 if (link)
89 second |= 1 << 14;
87 90
88 return (second << 16) | first; 91 return (second << 16) | first;
89} 92}
90#else 93#else
91static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) 94static unsigned long ftrace_gen_branch(unsigned long pc, unsigned long addr,
95 bool link)
92{ 96{
97 unsigned long opcode = 0xea000000;
93 long offset; 98 long offset;
94 99
100 if (link)
101 opcode |= 1 << 24;
102
95 offset = (long)addr - (long)(pc + 8); 103 offset = (long)addr - (long)(pc + 8);
96 if (unlikely(offset < -33554432 || offset > 33554428)) { 104 if (unlikely(offset < -33554432 || offset > 33554428)) {
97 /* Can't generate branches that far (from ARM ARM). Ftrace 105 /* Can't generate branches that far (from ARM ARM). Ftrace
@@ -103,10 +111,15 @@ static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr)
103 111
104 offset = (offset >> 2) & 0x00ffffff; 112 offset = (offset >> 2) & 0x00ffffff;
105 113
106 return 0xeb000000 | offset; 114 return opcode | offset;
107} 115}
108#endif 116#endif
109 117
118static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr)
119{
120 return ftrace_gen_branch(pc, addr, true);
121}
122
110static int ftrace_modify_code(unsigned long pc, unsigned long old, 123static int ftrace_modify_code(unsigned long pc, unsigned long old,
111 unsigned long new) 124 unsigned long new)
112{ 125{
@@ -193,3 +206,83 @@ int __init ftrace_dyn_arch_init(void *data)
193 206
194 return 0; 207 return 0;
195} 208}
209#endif /* CONFIG_DYNAMIC_FTRACE */
210
211#ifdef CONFIG_FUNCTION_GRAPH_TRACER
212void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
213 unsigned long frame_pointer)
214{
215 unsigned long return_hooker = (unsigned long) &return_to_handler;
216 struct ftrace_graph_ent trace;
217 unsigned long old;
218 int err;
219
220 if (unlikely(atomic_read(&current->tracing_graph_pause)))
221 return;
222
223 old = *parent;
224 *parent = return_hooker;
225
226 err = ftrace_push_return_trace(old, self_addr, &trace.depth,
227 frame_pointer);
228 if (err == -EBUSY) {
229 *parent = old;
230 return;
231 }
232
233 trace.func = self_addr;
234
235 /* Only trace if the calling function expects to */
236 if (!ftrace_graph_entry(&trace)) {
237 current->curr_ret_stack--;
238 *parent = old;
239 }
240}
241
242#ifdef CONFIG_DYNAMIC_FTRACE
243extern unsigned long ftrace_graph_call;
244extern unsigned long ftrace_graph_call_old;
245extern void ftrace_graph_caller_old(void);
246
247static int __ftrace_modify_caller(unsigned long *callsite,
248 void (*func) (void), bool enable)
249{
250 unsigned long caller_fn = (unsigned long) func;
251 unsigned long pc = (unsigned long) callsite;
252 unsigned long branch = ftrace_gen_branch(pc, caller_fn, false);
253 unsigned long nop = 0xe1a00000; /* mov r0, r0 */
254 unsigned long old = enable ? nop : branch;
255 unsigned long new = enable ? branch : nop;
256
257 return ftrace_modify_code(pc, old, new);
258}
259
260static int ftrace_modify_graph_caller(bool enable)
261{
262 int ret;
263
264 ret = __ftrace_modify_caller(&ftrace_graph_call,
265 ftrace_graph_caller,
266 enable);
267
268#ifdef CONFIG_OLD_MCOUNT
269 if (!ret)
270 ret = __ftrace_modify_caller(&ftrace_graph_call_old,
271 ftrace_graph_caller_old,
272 enable);
273#endif
274
275 return ret;
276}
277
278int ftrace_enable_ftrace_graph_caller(void)
279{
280 return ftrace_modify_graph_caller(true);
281}
282
283int ftrace_disable_ftrace_graph_caller(void)
284{
285 return ftrace_modify_graph_caller(false);
286}
287#endif /* CONFIG_DYNAMIC_FTRACE */
288#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index 36ad3be4692a..6d616333340f 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -35,6 +35,7 @@
35#include <linux/list.h> 35#include <linux/list.h>
36#include <linux/kallsyms.h> 36#include <linux/kallsyms.h>
37#include <linux/proc_fs.h> 37#include <linux/proc_fs.h>
38#include <linux/ftrace.h>
38 39
39#include <asm/system.h> 40#include <asm/system.h>
40#include <asm/mach/irq.h> 41#include <asm/mach/irq.h>
@@ -105,7 +106,8 @@ unlock:
105 * come via this function. Instead, they should provide their 106 * come via this function. Instead, they should provide their
106 * own 'handler' 107 * own 'handler'
107 */ 108 */
108asmlinkage void __exception asm_do_IRQ(unsigned int irq, struct pt_regs *regs) 109asmlinkage void __exception_irq_entry
110asm_do_IRQ(unsigned int irq, struct pt_regs *regs)
109{ 111{
110 struct pt_regs *old_regs = set_irq_regs(regs); 112 struct pt_regs *old_regs = set_irq_regs(regs);
111 113
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 07a50357492a..421a4bb88fed 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -4,9 +4,7 @@
4 * ARM performance counter support. 4 * ARM performance counter support.
5 * 5 *
6 * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles 6 * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
7 * 7 * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com>
8 * ARMv7 support: Jean Pihet <jpihet@mvista.com>
9 * 2010 (c) MontaVista Software, LLC.
10 * 8 *
11 * This code is based on the sparc64 perf event code, which is in turn based 9 * This code is based on the sparc64 perf event code, which is in turn based
12 * on the x86 code. Callchain code is based on the ARM OProfile backtrace 10 * on the x86 code. Callchain code is based on the ARM OProfile backtrace
@@ -69,29 +67,23 @@ struct cpu_hw_events {
69}; 67};
70DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); 68DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
71 69
72/* PMU names. */
73static const char *arm_pmu_names[] = {
74 [ARM_PERF_PMU_ID_XSCALE1] = "xscale1",
75 [ARM_PERF_PMU_ID_XSCALE2] = "xscale2",
76 [ARM_PERF_PMU_ID_V6] = "v6",
77 [ARM_PERF_PMU_ID_V6MP] = "v6mpcore",
78 [ARM_PERF_PMU_ID_CA8] = "ARMv7 Cortex-A8",
79 [ARM_PERF_PMU_ID_CA9] = "ARMv7 Cortex-A9",
80};
81
82struct arm_pmu { 70struct arm_pmu {
83 enum arm_perf_pmu_ids id; 71 enum arm_perf_pmu_ids id;
72 const char *name;
84 irqreturn_t (*handle_irq)(int irq_num, void *dev); 73 irqreturn_t (*handle_irq)(int irq_num, void *dev);
85 void (*enable)(struct hw_perf_event *evt, int idx); 74 void (*enable)(struct hw_perf_event *evt, int idx);
86 void (*disable)(struct hw_perf_event *evt, int idx); 75 void (*disable)(struct hw_perf_event *evt, int idx);
87 int (*event_map)(int evt);
88 u64 (*raw_event)(u64);
89 int (*get_event_idx)(struct cpu_hw_events *cpuc, 76 int (*get_event_idx)(struct cpu_hw_events *cpuc,
90 struct hw_perf_event *hwc); 77 struct hw_perf_event *hwc);
91 u32 (*read_counter)(int idx); 78 u32 (*read_counter)(int idx);
92 void (*write_counter)(int idx, u32 val); 79 void (*write_counter)(int idx, u32 val);
93 void (*start)(void); 80 void (*start)(void);
94 void (*stop)(void); 81 void (*stop)(void);
82 const unsigned (*cache_map)[PERF_COUNT_HW_CACHE_MAX]
83 [PERF_COUNT_HW_CACHE_OP_MAX]
84 [PERF_COUNT_HW_CACHE_RESULT_MAX];
85 const unsigned (*event_map)[PERF_COUNT_HW_MAX];
86 u32 raw_event_mask;
95 int num_events; 87 int num_events;
96 u64 max_period; 88 u64 max_period;
97}; 89};
@@ -136,10 +128,6 @@ EXPORT_SYMBOL_GPL(perf_num_counters);
136 128
137#define CACHE_OP_UNSUPPORTED 0xFFFF 129#define CACHE_OP_UNSUPPORTED 0xFFFF
138 130
139static unsigned armpmu_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
140 [PERF_COUNT_HW_CACHE_OP_MAX]
141 [PERF_COUNT_HW_CACHE_RESULT_MAX];
142
143static int 131static int
144armpmu_map_cache_event(u64 config) 132armpmu_map_cache_event(u64 config)
145{ 133{
@@ -157,7 +145,7 @@ armpmu_map_cache_event(u64 config)
157 if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) 145 if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
158 return -EINVAL; 146 return -EINVAL;
159 147
160 ret = (int)armpmu_perf_cache_map[cache_type][cache_op][cache_result]; 148 ret = (int)(*armpmu->cache_map)[cache_type][cache_op][cache_result];
161 149
162 if (ret == CACHE_OP_UNSUPPORTED) 150 if (ret == CACHE_OP_UNSUPPORTED)
163 return -ENOENT; 151 return -ENOENT;
@@ -166,6 +154,19 @@ armpmu_map_cache_event(u64 config)
166} 154}
167 155
168static int 156static int
157armpmu_map_event(u64 config)
158{
159 int mapping = (*armpmu->event_map)[config];
160 return mapping == HW_OP_UNSUPPORTED ? -EOPNOTSUPP : mapping;
161}
162
163static int
164armpmu_map_raw_event(u64 config)
165{
166 return (int)(config & armpmu->raw_event_mask);
167}
168
169static int
169armpmu_event_set_period(struct perf_event *event, 170armpmu_event_set_period(struct perf_event *event,
170 struct hw_perf_event *hwc, 171 struct hw_perf_event *hwc,
171 int idx) 172 int idx)
@@ -458,11 +459,11 @@ __hw_perf_event_init(struct perf_event *event)
458 459
459 /* Decode the generic type into an ARM event identifier. */ 460 /* Decode the generic type into an ARM event identifier. */
460 if (PERF_TYPE_HARDWARE == event->attr.type) { 461 if (PERF_TYPE_HARDWARE == event->attr.type) {
461 mapping = armpmu->event_map(event->attr.config); 462 mapping = armpmu_map_event(event->attr.config);
462 } else if (PERF_TYPE_HW_CACHE == event->attr.type) { 463 } else if (PERF_TYPE_HW_CACHE == event->attr.type) {
463 mapping = armpmu_map_cache_event(event->attr.config); 464 mapping = armpmu_map_cache_event(event->attr.config);
464 } else if (PERF_TYPE_RAW == event->attr.type) { 465 } else if (PERF_TYPE_RAW == event->attr.type) {
465 mapping = armpmu->raw_event(event->attr.config); 466 mapping = armpmu_map_raw_event(event->attr.config);
466 } else { 467 } else {
467 pr_debug("event type %x not supported\n", event->attr.type); 468 pr_debug("event type %x not supported\n", event->attr.type);
468 return -EOPNOTSUPP; 469 return -EOPNOTSUPP;
@@ -603,2366 +604,10 @@ static struct pmu pmu = {
603 .read = armpmu_read, 604 .read = armpmu_read,
604}; 605};
605 606
606/* 607/* Include the PMU-specific implementations. */
607 * ARMv6 Performance counter handling code. 608#include "perf_event_xscale.c"
608 * 609#include "perf_event_v6.c"
609 * ARMv6 has 2 configurable performance counters and a single cycle counter. 610#include "perf_event_v7.c"
610 * They all share a single reset bit but can be written to zero so we can use
611 * that for a reset.
612 *
613 * The counters can't be individually enabled or disabled so when we remove
614 * one event and replace it with another we could get spurious counts from the
615 * wrong event. However, we can take advantage of the fact that the
616 * performance counters can export events to the event bus, and the event bus
617 * itself can be monitored. This requires that we *don't* export the events to
618 * the event bus. The procedure for disabling a configurable counter is:
619 * - change the counter to count the ETMEXTOUT[0] signal (0x20). This
620 * effectively stops the counter from counting.
621 * - disable the counter's interrupt generation (each counter has it's
622 * own interrupt enable bit).
623 * Once stopped, the counter value can be written as 0 to reset.
624 *
625 * To enable a counter:
626 * - enable the counter's interrupt generation.
627 * - set the new event type.
628 *
629 * Note: the dedicated cycle counter only counts cycles and can't be
630 * enabled/disabled independently of the others. When we want to disable the
631 * cycle counter, we have to just disable the interrupt reporting and start
632 * ignoring that counter. When re-enabling, we have to reset the value and
633 * enable the interrupt.
634 */
635
636enum armv6_perf_types {
637 ARMV6_PERFCTR_ICACHE_MISS = 0x0,
638 ARMV6_PERFCTR_IBUF_STALL = 0x1,
639 ARMV6_PERFCTR_DDEP_STALL = 0x2,
640 ARMV6_PERFCTR_ITLB_MISS = 0x3,
641 ARMV6_PERFCTR_DTLB_MISS = 0x4,
642 ARMV6_PERFCTR_BR_EXEC = 0x5,
643 ARMV6_PERFCTR_BR_MISPREDICT = 0x6,
644 ARMV6_PERFCTR_INSTR_EXEC = 0x7,
645 ARMV6_PERFCTR_DCACHE_HIT = 0x9,
646 ARMV6_PERFCTR_DCACHE_ACCESS = 0xA,
647 ARMV6_PERFCTR_DCACHE_MISS = 0xB,
648 ARMV6_PERFCTR_DCACHE_WBACK = 0xC,
649 ARMV6_PERFCTR_SW_PC_CHANGE = 0xD,
650 ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF,
651 ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10,
652 ARMV6_PERFCTR_LSU_FULL_STALL = 0x11,
653 ARMV6_PERFCTR_WBUF_DRAINED = 0x12,
654 ARMV6_PERFCTR_CPU_CYCLES = 0xFF,
655 ARMV6_PERFCTR_NOP = 0x20,
656};
657
658enum armv6_counters {
659 ARMV6_CYCLE_COUNTER = 1,
660 ARMV6_COUNTER0,
661 ARMV6_COUNTER1,
662};
663
664/*
665 * The hardware events that we support. We do support cache operations but
666 * we have harvard caches and no way to combine instruction and data
667 * accesses/misses in hardware.
668 */
669static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
670 [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES,
671 [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC,
672 [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
673 [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
674 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC,
675 [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT,
676 [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
677};
678
679static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
680 [PERF_COUNT_HW_CACHE_OP_MAX]
681 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
682 [C(L1D)] = {
683 /*
684 * The performance counters don't differentiate between read
685 * and write accesses/misses so this isn't strictly correct,
686 * but it's the best we can do. Writes and reads get
687 * combined.
688 */
689 [C(OP_READ)] = {
690 [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
691 [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
692 },
693 [C(OP_WRITE)] = {
694 [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
695 [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
696 },
697 [C(OP_PREFETCH)] = {
698 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
699 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
700 },
701 },
702 [C(L1I)] = {
703 [C(OP_READ)] = {
704 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
705 [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS,
706 },
707 [C(OP_WRITE)] = {
708 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
709 [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS,
710 },
711 [C(OP_PREFETCH)] = {
712 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
713 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
714 },
715 },
716 [C(LL)] = {
717 [C(OP_READ)] = {
718 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
719 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
720 },
721 [C(OP_WRITE)] = {
722 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
723 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
724 },
725 [C(OP_PREFETCH)] = {
726 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
727 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
728 },
729 },
730 [C(DTLB)] = {
731 /*
732 * The ARM performance counters can count micro DTLB misses,
733 * micro ITLB misses and main TLB misses. There isn't an event
734 * for TLB misses, so use the micro misses here and if users
735 * want the main TLB misses they can use a raw counter.
736 */
737 [C(OP_READ)] = {
738 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
739 [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
740 },
741 [C(OP_WRITE)] = {
742 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
743 [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
744 },
745 [C(OP_PREFETCH)] = {
746 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
747 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
748 },
749 },
750 [C(ITLB)] = {
751 [C(OP_READ)] = {
752 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
753 [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
754 },
755 [C(OP_WRITE)] = {
756 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
757 [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
758 },
759 [C(OP_PREFETCH)] = {
760 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
761 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
762 },
763 },
764 [C(BPU)] = {
765 [C(OP_READ)] = {
766 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
767 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
768 },
769 [C(OP_WRITE)] = {
770 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
771 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
772 },
773 [C(OP_PREFETCH)] = {
774 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
775 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
776 },
777 },
778};
779
780enum armv6mpcore_perf_types {
781 ARMV6MPCORE_PERFCTR_ICACHE_MISS = 0x0,
782 ARMV6MPCORE_PERFCTR_IBUF_STALL = 0x1,
783 ARMV6MPCORE_PERFCTR_DDEP_STALL = 0x2,
784 ARMV6MPCORE_PERFCTR_ITLB_MISS = 0x3,
785 ARMV6MPCORE_PERFCTR_DTLB_MISS = 0x4,
786 ARMV6MPCORE_PERFCTR_BR_EXEC = 0x5,
787 ARMV6MPCORE_PERFCTR_BR_NOTPREDICT = 0x6,
788 ARMV6MPCORE_PERFCTR_BR_MISPREDICT = 0x7,
789 ARMV6MPCORE_PERFCTR_INSTR_EXEC = 0x8,
790 ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA,
791 ARMV6MPCORE_PERFCTR_DCACHE_RDMISS = 0xB,
792 ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC,
793 ARMV6MPCORE_PERFCTR_DCACHE_WRMISS = 0xD,
794 ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE,
795 ARMV6MPCORE_PERFCTR_SW_PC_CHANGE = 0xF,
796 ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS = 0x10,
797 ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11,
798 ARMV6MPCORE_PERFCTR_LSU_FULL_STALL = 0x12,
799 ARMV6MPCORE_PERFCTR_WBUF_DRAINED = 0x13,
800 ARMV6MPCORE_PERFCTR_CPU_CYCLES = 0xFF,
801};
802
803/*
804 * The hardware events that we support. We do support cache operations but
805 * we have harvard caches and no way to combine instruction and data
806 * accesses/misses in hardware.
807 */
808static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = {
809 [PERF_COUNT_HW_CPU_CYCLES] = ARMV6MPCORE_PERFCTR_CPU_CYCLES,
810 [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_INSTR_EXEC,
811 [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
812 [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
813 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC,
814 [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6MPCORE_PERFCTR_BR_MISPREDICT,
815 [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
816};
817
818static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
819 [PERF_COUNT_HW_CACHE_OP_MAX]
820 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
821 [C(L1D)] = {
822 [C(OP_READ)] = {
823 [C(RESULT_ACCESS)] =
824 ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS,
825 [C(RESULT_MISS)] =
826 ARMV6MPCORE_PERFCTR_DCACHE_RDMISS,
827 },
828 [C(OP_WRITE)] = {
829 [C(RESULT_ACCESS)] =
830 ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS,
831 [C(RESULT_MISS)] =
832 ARMV6MPCORE_PERFCTR_DCACHE_WRMISS,
833 },
834 [C(OP_PREFETCH)] = {
835 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
836 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
837 },
838 },
839 [C(L1I)] = {
840 [C(OP_READ)] = {
841 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
842 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
843 },
844 [C(OP_WRITE)] = {
845 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
846 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
847 },
848 [C(OP_PREFETCH)] = {
849 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
850 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
851 },
852 },
853 [C(LL)] = {
854 [C(OP_READ)] = {
855 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
856 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
857 },
858 [C(OP_WRITE)] = {
859 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
860 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
861 },
862 [C(OP_PREFETCH)] = {
863 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
864 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
865 },
866 },
867 [C(DTLB)] = {
868 /*
869 * The ARM performance counters can count micro DTLB misses,
870 * micro ITLB misses and main TLB misses. There isn't an event
871 * for TLB misses, so use the micro misses here and if users
872 * want the main TLB misses they can use a raw counter.
873 */
874 [C(OP_READ)] = {
875 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
876 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
877 },
878 [C(OP_WRITE)] = {
879 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
880 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
881 },
882 [C(OP_PREFETCH)] = {
883 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
884 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
885 },
886 },
887 [C(ITLB)] = {
888 [C(OP_READ)] = {
889 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
890 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
891 },
892 [C(OP_WRITE)] = {
893 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
894 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
895 },
896 [C(OP_PREFETCH)] = {
897 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
898 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
899 },
900 },
901 [C(BPU)] = {
902 [C(OP_READ)] = {
903 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
904 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
905 },
906 [C(OP_WRITE)] = {
907 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
908 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
909 },
910 [C(OP_PREFETCH)] = {
911 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
912 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
913 },
914 },
915};
916
917static inline unsigned long
918armv6_pmcr_read(void)
919{
920 u32 val;
921 asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val));
922 return val;
923}
924
925static inline void
926armv6_pmcr_write(unsigned long val)
927{
928 asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val));
929}
930
931#define ARMV6_PMCR_ENABLE (1 << 0)
932#define ARMV6_PMCR_CTR01_RESET (1 << 1)
933#define ARMV6_PMCR_CCOUNT_RESET (1 << 2)
934#define ARMV6_PMCR_CCOUNT_DIV (1 << 3)
935#define ARMV6_PMCR_COUNT0_IEN (1 << 4)
936#define ARMV6_PMCR_COUNT1_IEN (1 << 5)
937#define ARMV6_PMCR_CCOUNT_IEN (1 << 6)
938#define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8)
939#define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9)
940#define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10)
941#define ARMV6_PMCR_EVT_COUNT0_SHIFT 20
942#define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT)
943#define ARMV6_PMCR_EVT_COUNT1_SHIFT 12
944#define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT)
945
946#define ARMV6_PMCR_OVERFLOWED_MASK \
947 (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \
948 ARMV6_PMCR_CCOUNT_OVERFLOW)
949
950static inline int
951armv6_pmcr_has_overflowed(unsigned long pmcr)
952{
953 return (pmcr & ARMV6_PMCR_OVERFLOWED_MASK);
954}
955
956static inline int
957armv6_pmcr_counter_has_overflowed(unsigned long pmcr,
958 enum armv6_counters counter)
959{
960 int ret = 0;
961
962 if (ARMV6_CYCLE_COUNTER == counter)
963 ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW;
964 else if (ARMV6_COUNTER0 == counter)
965 ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW;
966 else if (ARMV6_COUNTER1 == counter)
967 ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW;
968 else
969 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
970
971 return ret;
972}
973
974static inline u32
975armv6pmu_read_counter(int counter)
976{
977 unsigned long value = 0;
978
979 if (ARMV6_CYCLE_COUNTER == counter)
980 asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value));
981 else if (ARMV6_COUNTER0 == counter)
982 asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value));
983 else if (ARMV6_COUNTER1 == counter)
984 asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value));
985 else
986 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
987
988 return value;
989}
990
991static inline void
992armv6pmu_write_counter(int counter,
993 u32 value)
994{
995 if (ARMV6_CYCLE_COUNTER == counter)
996 asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value));
997 else if (ARMV6_COUNTER0 == counter)
998 asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value));
999 else if (ARMV6_COUNTER1 == counter)
1000 asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value));
1001 else
1002 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
1003}
1004
1005void
1006armv6pmu_enable_event(struct hw_perf_event *hwc,
1007 int idx)
1008{
1009 unsigned long val, mask, evt, flags;
1010
1011 if (ARMV6_CYCLE_COUNTER == idx) {
1012 mask = 0;
1013 evt = ARMV6_PMCR_CCOUNT_IEN;
1014 } else if (ARMV6_COUNTER0 == idx) {
1015 mask = ARMV6_PMCR_EVT_COUNT0_MASK;
1016 evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) |
1017 ARMV6_PMCR_COUNT0_IEN;
1018 } else if (ARMV6_COUNTER1 == idx) {
1019 mask = ARMV6_PMCR_EVT_COUNT1_MASK;
1020 evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) |
1021 ARMV6_PMCR_COUNT1_IEN;
1022 } else {
1023 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
1024 return;
1025 }
1026
1027 /*
1028 * Mask out the current event and set the counter to count the event
1029 * that we're interested in.
1030 */
1031 spin_lock_irqsave(&pmu_lock, flags);
1032 val = armv6_pmcr_read();
1033 val &= ~mask;
1034 val |= evt;
1035 armv6_pmcr_write(val);
1036 spin_unlock_irqrestore(&pmu_lock, flags);
1037}
1038
1039static irqreturn_t
1040armv6pmu_handle_irq(int irq_num,
1041 void *dev)
1042{
1043 unsigned long pmcr = armv6_pmcr_read();
1044 struct perf_sample_data data;
1045 struct cpu_hw_events *cpuc;
1046 struct pt_regs *regs;
1047 int idx;
1048
1049 if (!armv6_pmcr_has_overflowed(pmcr))
1050 return IRQ_NONE;
1051
1052 regs = get_irq_regs();
1053
1054 /*
1055 * The interrupts are cleared by writing the overflow flags back to
1056 * the control register. All of the other bits don't have any effect
1057 * if they are rewritten, so write the whole value back.
1058 */
1059 armv6_pmcr_write(pmcr);
1060
1061 perf_sample_data_init(&data, 0);
1062
1063 cpuc = &__get_cpu_var(cpu_hw_events);
1064 for (idx = 0; idx <= armpmu->num_events; ++idx) {
1065 struct perf_event *event = cpuc->events[idx];
1066 struct hw_perf_event *hwc;
1067
1068 if (!test_bit(idx, cpuc->active_mask))
1069 continue;
1070
1071 /*
1072 * We have a single interrupt for all counters. Check that
1073 * each counter has overflowed before we process it.
1074 */
1075 if (!armv6_pmcr_counter_has_overflowed(pmcr, idx))
1076 continue;
1077
1078 hwc = &event->hw;
1079 armpmu_event_update(event, hwc, idx);
1080 data.period = event->hw.last_period;
1081 if (!armpmu_event_set_period(event, hwc, idx))
1082 continue;
1083
1084 if (perf_event_overflow(event, 0, &data, regs))
1085 armpmu->disable(hwc, idx);
1086 }
1087
1088 /*
1089 * Handle the pending perf events.
1090 *
1091 * Note: this call *must* be run with interrupts disabled. For
1092 * platforms that can have the PMU interrupts raised as an NMI, this
1093 * will not work.
1094 */
1095 irq_work_run();
1096
1097 return IRQ_HANDLED;
1098}
1099
1100static void
1101armv6pmu_start(void)
1102{
1103 unsigned long flags, val;
1104
1105 spin_lock_irqsave(&pmu_lock, flags);
1106 val = armv6_pmcr_read();
1107 val |= ARMV6_PMCR_ENABLE;
1108 armv6_pmcr_write(val);
1109 spin_unlock_irqrestore(&pmu_lock, flags);
1110}
1111
1112void
1113armv6pmu_stop(void)
1114{
1115 unsigned long flags, val;
1116
1117 spin_lock_irqsave(&pmu_lock, flags);
1118 val = armv6_pmcr_read();
1119 val &= ~ARMV6_PMCR_ENABLE;
1120 armv6_pmcr_write(val);
1121 spin_unlock_irqrestore(&pmu_lock, flags);
1122}
1123
1124static inline int
1125armv6pmu_event_map(int config)
1126{
1127 int mapping = armv6_perf_map[config];
1128 if (HW_OP_UNSUPPORTED == mapping)
1129 mapping = -EOPNOTSUPP;
1130 return mapping;
1131}
1132
1133static inline int
1134armv6mpcore_pmu_event_map(int config)
1135{
1136 int mapping = armv6mpcore_perf_map[config];
1137 if (HW_OP_UNSUPPORTED == mapping)
1138 mapping = -EOPNOTSUPP;
1139 return mapping;
1140}
1141
1142static u64
1143armv6pmu_raw_event(u64 config)
1144{
1145 return config & 0xff;
1146}
1147
1148static int
1149armv6pmu_get_event_idx(struct cpu_hw_events *cpuc,
1150 struct hw_perf_event *event)
1151{
1152 /* Always place a cycle counter into the cycle counter. */
1153 if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) {
1154 if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask))
1155 return -EAGAIN;
1156
1157 return ARMV6_CYCLE_COUNTER;
1158 } else {
1159 /*
1160 * For anything other than a cycle counter, try and use
1161 * counter0 and counter1.
1162 */
1163 if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask)) {
1164 return ARMV6_COUNTER1;
1165 }
1166
1167 if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask)) {
1168 return ARMV6_COUNTER0;
1169 }
1170
1171 /* The counters are all in use. */
1172 return -EAGAIN;
1173 }
1174}
1175
1176static void
1177armv6pmu_disable_event(struct hw_perf_event *hwc,
1178 int idx)
1179{
1180 unsigned long val, mask, evt, flags;
1181
1182 if (ARMV6_CYCLE_COUNTER == idx) {
1183 mask = ARMV6_PMCR_CCOUNT_IEN;
1184 evt = 0;
1185 } else if (ARMV6_COUNTER0 == idx) {
1186 mask = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK;
1187 evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT;
1188 } else if (ARMV6_COUNTER1 == idx) {
1189 mask = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK;
1190 evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT;
1191 } else {
1192 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
1193 return;
1194 }
1195
1196 /*
1197 * Mask out the current event and set the counter to count the number
1198 * of ETM bus signal assertion cycles. The external reporting should
1199 * be disabled and so this should never increment.
1200 */
1201 spin_lock_irqsave(&pmu_lock, flags);
1202 val = armv6_pmcr_read();
1203 val &= ~mask;
1204 val |= evt;
1205 armv6_pmcr_write(val);
1206 spin_unlock_irqrestore(&pmu_lock, flags);
1207}
1208
1209static void
1210armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc,
1211 int idx)
1212{
1213 unsigned long val, mask, flags, evt = 0;
1214
1215 if (ARMV6_CYCLE_COUNTER == idx) {
1216 mask = ARMV6_PMCR_CCOUNT_IEN;
1217 } else if (ARMV6_COUNTER0 == idx) {
1218 mask = ARMV6_PMCR_COUNT0_IEN;
1219 } else if (ARMV6_COUNTER1 == idx) {
1220 mask = ARMV6_PMCR_COUNT1_IEN;
1221 } else {
1222 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
1223 return;
1224 }
1225
1226 /*
1227 * Unlike UP ARMv6, we don't have a way of stopping the counters. We
1228 * simply disable the interrupt reporting.
1229 */
1230 spin_lock_irqsave(&pmu_lock, flags);
1231 val = armv6_pmcr_read();
1232 val &= ~mask;
1233 val |= evt;
1234 armv6_pmcr_write(val);
1235 spin_unlock_irqrestore(&pmu_lock, flags);
1236}
1237
1238static const struct arm_pmu armv6pmu = {
1239 .id = ARM_PERF_PMU_ID_V6,
1240 .handle_irq = armv6pmu_handle_irq,
1241 .enable = armv6pmu_enable_event,
1242 .disable = armv6pmu_disable_event,
1243 .event_map = armv6pmu_event_map,
1244 .raw_event = armv6pmu_raw_event,
1245 .read_counter = armv6pmu_read_counter,
1246 .write_counter = armv6pmu_write_counter,
1247 .get_event_idx = armv6pmu_get_event_idx,
1248 .start = armv6pmu_start,
1249 .stop = armv6pmu_stop,
1250 .num_events = 3,
1251 .max_period = (1LLU << 32) - 1,
1252};
1253
1254/*
1255 * ARMv6mpcore is almost identical to single core ARMv6 with the exception
1256 * that some of the events have different enumerations and that there is no
1257 * *hack* to stop the programmable counters. To stop the counters we simply
1258 * disable the interrupt reporting and update the event. When unthrottling we
1259 * reset the period and enable the interrupt reporting.
1260 */
1261static const struct arm_pmu armv6mpcore_pmu = {
1262 .id = ARM_PERF_PMU_ID_V6MP,
1263 .handle_irq = armv6pmu_handle_irq,
1264 .enable = armv6pmu_enable_event,
1265 .disable = armv6mpcore_pmu_disable_event,
1266 .event_map = armv6mpcore_pmu_event_map,
1267 .raw_event = armv6pmu_raw_event,
1268 .read_counter = armv6pmu_read_counter,
1269 .write_counter = armv6pmu_write_counter,
1270 .get_event_idx = armv6pmu_get_event_idx,
1271 .start = armv6pmu_start,
1272 .stop = armv6pmu_stop,
1273 .num_events = 3,
1274 .max_period = (1LLU << 32) - 1,
1275};
1276
1277/*
1278 * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code.
1279 *
1280 * Copied from ARMv6 code, with the low level code inspired
1281 * by the ARMv7 Oprofile code.
1282 *
1283 * Cortex-A8 has up to 4 configurable performance counters and
1284 * a single cycle counter.
1285 * Cortex-A9 has up to 31 configurable performance counters and
1286 * a single cycle counter.
1287 *
1288 * All counters can be enabled/disabled and IRQ masked separately. The cycle
1289 * counter and all 4 performance counters together can be reset separately.
1290 */
1291
1292/* Common ARMv7 event types */
1293enum armv7_perf_types {
1294 ARMV7_PERFCTR_PMNC_SW_INCR = 0x00,
1295 ARMV7_PERFCTR_IFETCH_MISS = 0x01,
1296 ARMV7_PERFCTR_ITLB_MISS = 0x02,
1297 ARMV7_PERFCTR_DCACHE_REFILL = 0x03,
1298 ARMV7_PERFCTR_DCACHE_ACCESS = 0x04,
1299 ARMV7_PERFCTR_DTLB_REFILL = 0x05,
1300 ARMV7_PERFCTR_DREAD = 0x06,
1301 ARMV7_PERFCTR_DWRITE = 0x07,
1302
1303 ARMV7_PERFCTR_EXC_TAKEN = 0x09,
1304 ARMV7_PERFCTR_EXC_EXECUTED = 0x0A,
1305 ARMV7_PERFCTR_CID_WRITE = 0x0B,
1306 /* ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS.
1307 * It counts:
1308 * - all branch instructions,
1309 * - instructions that explicitly write the PC,
1310 * - exception generating instructions.
1311 */
1312 ARMV7_PERFCTR_PC_WRITE = 0x0C,
1313 ARMV7_PERFCTR_PC_IMM_BRANCH = 0x0D,
1314 ARMV7_PERFCTR_UNALIGNED_ACCESS = 0x0F,
1315 ARMV7_PERFCTR_PC_BRANCH_MIS_PRED = 0x10,
1316 ARMV7_PERFCTR_CLOCK_CYCLES = 0x11,
1317
1318 ARMV7_PERFCTR_PC_BRANCH_MIS_USED = 0x12,
1319
1320 ARMV7_PERFCTR_CPU_CYCLES = 0xFF
1321};
1322
1323/* ARMv7 Cortex-A8 specific event types */
1324enum armv7_a8_perf_types {
1325 ARMV7_PERFCTR_INSTR_EXECUTED = 0x08,
1326
1327 ARMV7_PERFCTR_PC_PROC_RETURN = 0x0E,
1328
1329 ARMV7_PERFCTR_WRITE_BUFFER_FULL = 0x40,
1330 ARMV7_PERFCTR_L2_STORE_MERGED = 0x41,
1331 ARMV7_PERFCTR_L2_STORE_BUFF = 0x42,
1332 ARMV7_PERFCTR_L2_ACCESS = 0x43,
1333 ARMV7_PERFCTR_L2_CACH_MISS = 0x44,
1334 ARMV7_PERFCTR_AXI_READ_CYCLES = 0x45,
1335 ARMV7_PERFCTR_AXI_WRITE_CYCLES = 0x46,
1336 ARMV7_PERFCTR_MEMORY_REPLAY = 0x47,
1337 ARMV7_PERFCTR_UNALIGNED_ACCESS_REPLAY = 0x48,
1338 ARMV7_PERFCTR_L1_DATA_MISS = 0x49,
1339 ARMV7_PERFCTR_L1_INST_MISS = 0x4A,
1340 ARMV7_PERFCTR_L1_DATA_COLORING = 0x4B,
1341 ARMV7_PERFCTR_L1_NEON_DATA = 0x4C,
1342 ARMV7_PERFCTR_L1_NEON_CACH_DATA = 0x4D,
1343 ARMV7_PERFCTR_L2_NEON = 0x4E,
1344 ARMV7_PERFCTR_L2_NEON_HIT = 0x4F,
1345 ARMV7_PERFCTR_L1_INST = 0x50,
1346 ARMV7_PERFCTR_PC_RETURN_MIS_PRED = 0x51,
1347 ARMV7_PERFCTR_PC_BRANCH_FAILED = 0x52,
1348 ARMV7_PERFCTR_PC_BRANCH_TAKEN = 0x53,
1349 ARMV7_PERFCTR_PC_BRANCH_EXECUTED = 0x54,
1350 ARMV7_PERFCTR_OP_EXECUTED = 0x55,
1351 ARMV7_PERFCTR_CYCLES_INST_STALL = 0x56,
1352 ARMV7_PERFCTR_CYCLES_INST = 0x57,
1353 ARMV7_PERFCTR_CYCLES_NEON_DATA_STALL = 0x58,
1354 ARMV7_PERFCTR_CYCLES_NEON_INST_STALL = 0x59,
1355 ARMV7_PERFCTR_NEON_CYCLES = 0x5A,
1356
1357 ARMV7_PERFCTR_PMU0_EVENTS = 0x70,
1358 ARMV7_PERFCTR_PMU1_EVENTS = 0x71,
1359 ARMV7_PERFCTR_PMU_EVENTS = 0x72,
1360};
1361
1362/* ARMv7 Cortex-A9 specific event types */
1363enum armv7_a9_perf_types {
1364 ARMV7_PERFCTR_JAVA_HW_BYTECODE_EXEC = 0x40,
1365 ARMV7_PERFCTR_JAVA_SW_BYTECODE_EXEC = 0x41,
1366 ARMV7_PERFCTR_JAZELLE_BRANCH_EXEC = 0x42,
1367
1368 ARMV7_PERFCTR_COHERENT_LINE_MISS = 0x50,
1369 ARMV7_PERFCTR_COHERENT_LINE_HIT = 0x51,
1370
1371 ARMV7_PERFCTR_ICACHE_DEP_STALL_CYCLES = 0x60,
1372 ARMV7_PERFCTR_DCACHE_DEP_STALL_CYCLES = 0x61,
1373 ARMV7_PERFCTR_TLB_MISS_DEP_STALL_CYCLES = 0x62,
1374 ARMV7_PERFCTR_STREX_EXECUTED_PASSED = 0x63,
1375 ARMV7_PERFCTR_STREX_EXECUTED_FAILED = 0x64,
1376 ARMV7_PERFCTR_DATA_EVICTION = 0x65,
1377 ARMV7_PERFCTR_ISSUE_STAGE_NO_INST = 0x66,
1378 ARMV7_PERFCTR_ISSUE_STAGE_EMPTY = 0x67,
1379 ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE = 0x68,
1380
1381 ARMV7_PERFCTR_PREDICTABLE_FUNCT_RETURNS = 0x6E,
1382
1383 ARMV7_PERFCTR_MAIN_UNIT_EXECUTED_INST = 0x70,
1384 ARMV7_PERFCTR_SECOND_UNIT_EXECUTED_INST = 0x71,
1385 ARMV7_PERFCTR_LD_ST_UNIT_EXECUTED_INST = 0x72,
1386 ARMV7_PERFCTR_FP_EXECUTED_INST = 0x73,
1387 ARMV7_PERFCTR_NEON_EXECUTED_INST = 0x74,
1388
1389 ARMV7_PERFCTR_PLD_FULL_DEP_STALL_CYCLES = 0x80,
1390 ARMV7_PERFCTR_DATA_WR_DEP_STALL_CYCLES = 0x81,
1391 ARMV7_PERFCTR_ITLB_MISS_DEP_STALL_CYCLES = 0x82,
1392 ARMV7_PERFCTR_DTLB_MISS_DEP_STALL_CYCLES = 0x83,
1393 ARMV7_PERFCTR_MICRO_ITLB_MISS_DEP_STALL_CYCLES = 0x84,
1394 ARMV7_PERFCTR_MICRO_DTLB_MISS_DEP_STALL_CYCLES = 0x85,
1395 ARMV7_PERFCTR_DMB_DEP_STALL_CYCLES = 0x86,
1396
1397 ARMV7_PERFCTR_INTGR_CLK_ENABLED_CYCLES = 0x8A,
1398 ARMV7_PERFCTR_DATA_ENGINE_CLK_EN_CYCLES = 0x8B,
1399
1400 ARMV7_PERFCTR_ISB_INST = 0x90,
1401 ARMV7_PERFCTR_DSB_INST = 0x91,
1402 ARMV7_PERFCTR_DMB_INST = 0x92,
1403 ARMV7_PERFCTR_EXT_INTERRUPTS = 0x93,
1404
1405 ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_COMPLETED = 0xA0,
1406 ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_SKIPPED = 0xA1,
1407 ARMV7_PERFCTR_PLE_FIFO_FLUSH = 0xA2,
1408 ARMV7_PERFCTR_PLE_RQST_COMPLETED = 0xA3,
1409 ARMV7_PERFCTR_PLE_FIFO_OVERFLOW = 0xA4,
1410 ARMV7_PERFCTR_PLE_RQST_PROG = 0xA5
1411};
1412
1413/*
1414 * Cortex-A8 HW events mapping
1415 *
1416 * The hardware events that we support. We do support cache operations but
1417 * we have harvard caches and no way to combine instruction and data
1418 * accesses/misses in hardware.
1419 */
1420static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = {
1421 [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
1422 [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
1423 [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
1424 [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
1425 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
1426 [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1427 [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES,
1428};
1429
1430static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
1431 [PERF_COUNT_HW_CACHE_OP_MAX]
1432 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
1433 [C(L1D)] = {
1434 /*
1435 * The performance counters don't differentiate between read
1436 * and write accesses/misses so this isn't strictly correct,
1437 * but it's the best we can do. Writes and reads get
1438 * combined.
1439 */
1440 [C(OP_READ)] = {
1441 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS,
1442 [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL,
1443 },
1444 [C(OP_WRITE)] = {
1445 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS,
1446 [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL,
1447 },
1448 [C(OP_PREFETCH)] = {
1449 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1450 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1451 },
1452 },
1453 [C(L1I)] = {
1454 [C(OP_READ)] = {
1455 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST,
1456 [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS,
1457 },
1458 [C(OP_WRITE)] = {
1459 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST,
1460 [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS,
1461 },
1462 [C(OP_PREFETCH)] = {
1463 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1464 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1465 },
1466 },
1467 [C(LL)] = {
1468 [C(OP_READ)] = {
1469 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS,
1470 [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS,
1471 },
1472 [C(OP_WRITE)] = {
1473 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS,
1474 [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS,
1475 },
1476 [C(OP_PREFETCH)] = {
1477 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1478 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1479 },
1480 },
1481 [C(DTLB)] = {
1482 /*
1483 * Only ITLB misses and DTLB refills are supported.
1484 * If users want the DTLB refills misses a raw counter
1485 * must be used.
1486 */
1487 [C(OP_READ)] = {
1488 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1489 [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
1490 },
1491 [C(OP_WRITE)] = {
1492 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1493 [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
1494 },
1495 [C(OP_PREFETCH)] = {
1496 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1497 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1498 },
1499 },
1500 [C(ITLB)] = {
1501 [C(OP_READ)] = {
1502 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1503 [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS,
1504 },
1505 [C(OP_WRITE)] = {
1506 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1507 [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS,
1508 },
1509 [C(OP_PREFETCH)] = {
1510 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1511 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1512 },
1513 },
1514 [C(BPU)] = {
1515 [C(OP_READ)] = {
1516 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE,
1517 [C(RESULT_MISS)]
1518 = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1519 },
1520 [C(OP_WRITE)] = {
1521 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE,
1522 [C(RESULT_MISS)]
1523 = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1524 },
1525 [C(OP_PREFETCH)] = {
1526 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1527 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1528 },
1529 },
1530};
1531
1532/*
1533 * Cortex-A9 HW events mapping
1534 */
1535static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = {
1536 [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
1537 [PERF_COUNT_HW_INSTRUCTIONS] =
1538 ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE,
1539 [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_COHERENT_LINE_HIT,
1540 [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_COHERENT_LINE_MISS,
1541 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
1542 [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1543 [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES,
1544};
1545
1546static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
1547 [PERF_COUNT_HW_CACHE_OP_MAX]
1548 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
1549 [C(L1D)] = {
1550 /*
1551 * The performance counters don't differentiate between read
1552 * and write accesses/misses so this isn't strictly correct,
1553 * but it's the best we can do. Writes and reads get
1554 * combined.
1555 */
1556 [C(OP_READ)] = {
1557 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS,
1558 [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL,
1559 },
1560 [C(OP_WRITE)] = {
1561 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS,
1562 [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL,
1563 },
1564 [C(OP_PREFETCH)] = {
1565 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1566 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1567 },
1568 },
1569 [C(L1I)] = {
1570 [C(OP_READ)] = {
1571 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1572 [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS,
1573 },
1574 [C(OP_WRITE)] = {
1575 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1576 [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS,
1577 },
1578 [C(OP_PREFETCH)] = {
1579 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1580 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1581 },
1582 },
1583 [C(LL)] = {
1584 [C(OP_READ)] = {
1585 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1586 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1587 },
1588 [C(OP_WRITE)] = {
1589 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1590 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1591 },
1592 [C(OP_PREFETCH)] = {
1593 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1594 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1595 },
1596 },
1597 [C(DTLB)] = {
1598 /*
1599 * Only ITLB misses and DTLB refills are supported.
1600 * If users want the DTLB refills misses a raw counter
1601 * must be used.
1602 */
1603 [C(OP_READ)] = {
1604 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1605 [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
1606 },
1607 [C(OP_WRITE)] = {
1608 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1609 [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
1610 },
1611 [C(OP_PREFETCH)] = {
1612 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1613 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1614 },
1615 },
1616 [C(ITLB)] = {
1617 [C(OP_READ)] = {
1618 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1619 [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS,
1620 },
1621 [C(OP_WRITE)] = {
1622 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1623 [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS,
1624 },
1625 [C(OP_PREFETCH)] = {
1626 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1627 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1628 },
1629 },
1630 [C(BPU)] = {
1631 [C(OP_READ)] = {
1632 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE,
1633 [C(RESULT_MISS)]
1634 = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1635 },
1636 [C(OP_WRITE)] = {
1637 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE,
1638 [C(RESULT_MISS)]
1639 = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1640 },
1641 [C(OP_PREFETCH)] = {
1642 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
1643 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
1644 },
1645 },
1646};
1647
1648/*
1649 * Perf Events counters
1650 */
1651enum armv7_counters {
1652 ARMV7_CYCLE_COUNTER = 1, /* Cycle counter */
1653 ARMV7_COUNTER0 = 2, /* First event counter */
1654};
1655
1656/*
1657 * The cycle counter is ARMV7_CYCLE_COUNTER.
1658 * The first event counter is ARMV7_COUNTER0.
1659 * The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1).
1660 */
1661#define ARMV7_COUNTER_LAST (ARMV7_COUNTER0 + armpmu->num_events - 1)
1662
1663/*
1664 * ARMv7 low level PMNC access
1665 */
1666
1667/*
1668 * Per-CPU PMNC: config reg
1669 */
1670#define ARMV7_PMNC_E (1 << 0) /* Enable all counters */
1671#define ARMV7_PMNC_P (1 << 1) /* Reset all counters */
1672#define ARMV7_PMNC_C (1 << 2) /* Cycle counter reset */
1673#define ARMV7_PMNC_D (1 << 3) /* CCNT counts every 64th cpu cycle */
1674#define ARMV7_PMNC_X (1 << 4) /* Export to ETM */
1675#define ARMV7_PMNC_DP (1 << 5) /* Disable CCNT if non-invasive debug*/
1676#define ARMV7_PMNC_N_SHIFT 11 /* Number of counters supported */
1677#define ARMV7_PMNC_N_MASK 0x1f
1678#define ARMV7_PMNC_MASK 0x3f /* Mask for writable bits */
1679
1680/*
1681 * Available counters
1682 */
1683#define ARMV7_CNT0 0 /* First event counter */
1684#define ARMV7_CCNT 31 /* Cycle counter */
1685
1686/* Perf Event to low level counters mapping */
1687#define ARMV7_EVENT_CNT_TO_CNTx (ARMV7_COUNTER0 - ARMV7_CNT0)
1688
1689/*
1690 * CNTENS: counters enable reg
1691 */
1692#define ARMV7_CNTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1693#define ARMV7_CNTENS_C (1 << ARMV7_CCNT)
1694
1695/*
1696 * CNTENC: counters disable reg
1697 */
1698#define ARMV7_CNTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1699#define ARMV7_CNTENC_C (1 << ARMV7_CCNT)
1700
1701/*
1702 * INTENS: counters overflow interrupt enable reg
1703 */
1704#define ARMV7_INTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1705#define ARMV7_INTENS_C (1 << ARMV7_CCNT)
1706
1707/*
1708 * INTENC: counters overflow interrupt disable reg
1709 */
1710#define ARMV7_INTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1711#define ARMV7_INTENC_C (1 << ARMV7_CCNT)
1712
1713/*
1714 * EVTSEL: Event selection reg
1715 */
1716#define ARMV7_EVTSEL_MASK 0xff /* Mask for writable bits */
1717
1718/*
1719 * SELECT: Counter selection reg
1720 */
1721#define ARMV7_SELECT_MASK 0x1f /* Mask for writable bits */
1722
1723/*
1724 * FLAG: counters overflow flag status reg
1725 */
1726#define ARMV7_FLAG_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1727#define ARMV7_FLAG_C (1 << ARMV7_CCNT)
1728#define ARMV7_FLAG_MASK 0xffffffff /* Mask for writable bits */
1729#define ARMV7_OVERFLOWED_MASK ARMV7_FLAG_MASK
1730
1731static inline unsigned long armv7_pmnc_read(void)
1732{
1733 u32 val;
1734 asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val));
1735 return val;
1736}
1737
1738static inline void armv7_pmnc_write(unsigned long val)
1739{
1740 val &= ARMV7_PMNC_MASK;
1741 asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val));
1742}
1743
1744static inline int armv7_pmnc_has_overflowed(unsigned long pmnc)
1745{
1746 return pmnc & ARMV7_OVERFLOWED_MASK;
1747}
1748
1749static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc,
1750 enum armv7_counters counter)
1751{
1752 int ret = 0;
1753
1754 if (counter == ARMV7_CYCLE_COUNTER)
1755 ret = pmnc & ARMV7_FLAG_C;
1756 else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST))
1757 ret = pmnc & ARMV7_FLAG_P(counter);
1758 else
1759 pr_err("CPU%u checking wrong counter %d overflow status\n",
1760 smp_processor_id(), counter);
1761
1762 return ret;
1763}
1764
1765static inline int armv7_pmnc_select_counter(unsigned int idx)
1766{
1767 u32 val;
1768
1769 if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) {
1770 pr_err("CPU%u selecting wrong PMNC counter"
1771 " %d\n", smp_processor_id(), idx);
1772 return -1;
1773 }
1774
1775 val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK;
1776 asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val));
1777
1778 return idx;
1779}
1780
1781static inline u32 armv7pmu_read_counter(int idx)
1782{
1783 unsigned long value = 0;
1784
1785 if (idx == ARMV7_CYCLE_COUNTER)
1786 asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value));
1787 else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
1788 if (armv7_pmnc_select_counter(idx) == idx)
1789 asm volatile("mrc p15, 0, %0, c9, c13, 2"
1790 : "=r" (value));
1791 } else
1792 pr_err("CPU%u reading wrong counter %d\n",
1793 smp_processor_id(), idx);
1794
1795 return value;
1796}
1797
1798static inline void armv7pmu_write_counter(int idx, u32 value)
1799{
1800 if (idx == ARMV7_CYCLE_COUNTER)
1801 asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value));
1802 else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
1803 if (armv7_pmnc_select_counter(idx) == idx)
1804 asm volatile("mcr p15, 0, %0, c9, c13, 2"
1805 : : "r" (value));
1806 } else
1807 pr_err("CPU%u writing wrong counter %d\n",
1808 smp_processor_id(), idx);
1809}
1810
1811static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val)
1812{
1813 if (armv7_pmnc_select_counter(idx) == idx) {
1814 val &= ARMV7_EVTSEL_MASK;
1815 asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val));
1816 }
1817}
1818
1819static inline u32 armv7_pmnc_enable_counter(unsigned int idx)
1820{
1821 u32 val;
1822
1823 if ((idx != ARMV7_CYCLE_COUNTER) &&
1824 ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
1825 pr_err("CPU%u enabling wrong PMNC counter"
1826 " %d\n", smp_processor_id(), idx);
1827 return -1;
1828 }
1829
1830 if (idx == ARMV7_CYCLE_COUNTER)
1831 val = ARMV7_CNTENS_C;
1832 else
1833 val = ARMV7_CNTENS_P(idx);
1834
1835 asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val));
1836
1837 return idx;
1838}
1839
1840static inline u32 armv7_pmnc_disable_counter(unsigned int idx)
1841{
1842 u32 val;
1843
1844
1845 if ((idx != ARMV7_CYCLE_COUNTER) &&
1846 ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
1847 pr_err("CPU%u disabling wrong PMNC counter"
1848 " %d\n", smp_processor_id(), idx);
1849 return -1;
1850 }
1851
1852 if (idx == ARMV7_CYCLE_COUNTER)
1853 val = ARMV7_CNTENC_C;
1854 else
1855 val = ARMV7_CNTENC_P(idx);
1856
1857 asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val));
1858
1859 return idx;
1860}
1861
1862static inline u32 armv7_pmnc_enable_intens(unsigned int idx)
1863{
1864 u32 val;
1865
1866 if ((idx != ARMV7_CYCLE_COUNTER) &&
1867 ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
1868 pr_err("CPU%u enabling wrong PMNC counter"
1869 " interrupt enable %d\n", smp_processor_id(), idx);
1870 return -1;
1871 }
1872
1873 if (idx == ARMV7_CYCLE_COUNTER)
1874 val = ARMV7_INTENS_C;
1875 else
1876 val = ARMV7_INTENS_P(idx);
1877
1878 asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val));
1879
1880 return idx;
1881}
1882
1883static inline u32 armv7_pmnc_disable_intens(unsigned int idx)
1884{
1885 u32 val;
1886
1887 if ((idx != ARMV7_CYCLE_COUNTER) &&
1888 ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
1889 pr_err("CPU%u disabling wrong PMNC counter"
1890 " interrupt enable %d\n", smp_processor_id(), idx);
1891 return -1;
1892 }
1893
1894 if (idx == ARMV7_CYCLE_COUNTER)
1895 val = ARMV7_INTENC_C;
1896 else
1897 val = ARMV7_INTENC_P(idx);
1898
1899 asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val));
1900
1901 return idx;
1902}
1903
1904static inline u32 armv7_pmnc_getreset_flags(void)
1905{
1906 u32 val;
1907
1908 /* Read */
1909 asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
1910
1911 /* Write to clear flags */
1912 val &= ARMV7_FLAG_MASK;
1913 asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val));
1914
1915 return val;
1916}
1917
1918#ifdef DEBUG
1919static void armv7_pmnc_dump_regs(void)
1920{
1921 u32 val;
1922 unsigned int cnt;
1923
1924 printk(KERN_INFO "PMNC registers dump:\n");
1925
1926 asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val));
1927 printk(KERN_INFO "PMNC =0x%08x\n", val);
1928
1929 asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val));
1930 printk(KERN_INFO "CNTENS=0x%08x\n", val);
1931
1932 asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val));
1933 printk(KERN_INFO "INTENS=0x%08x\n", val);
1934
1935 asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
1936 printk(KERN_INFO "FLAGS =0x%08x\n", val);
1937
1938 asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val));
1939 printk(KERN_INFO "SELECT=0x%08x\n", val);
1940
1941 asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));
1942 printk(KERN_INFO "CCNT =0x%08x\n", val);
1943
1944 for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) {
1945 armv7_pmnc_select_counter(cnt);
1946 asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val));
1947 printk(KERN_INFO "CNT[%d] count =0x%08x\n",
1948 cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
1949 asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val));
1950 printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n",
1951 cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
1952 }
1953}
1954#endif
1955
1956void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx)
1957{
1958 unsigned long flags;
1959
1960 /*
1961 * Enable counter and interrupt, and set the counter to count
1962 * the event that we're interested in.
1963 */
1964 spin_lock_irqsave(&pmu_lock, flags);
1965
1966 /*
1967 * Disable counter
1968 */
1969 armv7_pmnc_disable_counter(idx);
1970
1971 /*
1972 * Set event (if destined for PMNx counters)
1973 * We don't need to set the event if it's a cycle count
1974 */
1975 if (idx != ARMV7_CYCLE_COUNTER)
1976 armv7_pmnc_write_evtsel(idx, hwc->config_base);
1977
1978 /*
1979 * Enable interrupt for this counter
1980 */
1981 armv7_pmnc_enable_intens(idx);
1982
1983 /*
1984 * Enable counter
1985 */
1986 armv7_pmnc_enable_counter(idx);
1987
1988 spin_unlock_irqrestore(&pmu_lock, flags);
1989}
1990
1991static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx)
1992{
1993 unsigned long flags;
1994
1995 /*
1996 * Disable counter and interrupt
1997 */
1998 spin_lock_irqsave(&pmu_lock, flags);
1999
2000 /*
2001 * Disable counter
2002 */
2003 armv7_pmnc_disable_counter(idx);
2004
2005 /*
2006 * Disable interrupt for this counter
2007 */
2008 armv7_pmnc_disable_intens(idx);
2009
2010 spin_unlock_irqrestore(&pmu_lock, flags);
2011}
2012
2013static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
2014{
2015 unsigned long pmnc;
2016 struct perf_sample_data data;
2017 struct cpu_hw_events *cpuc;
2018 struct pt_regs *regs;
2019 int idx;
2020
2021 /*
2022 * Get and reset the IRQ flags
2023 */
2024 pmnc = armv7_pmnc_getreset_flags();
2025
2026 /*
2027 * Did an overflow occur?
2028 */
2029 if (!armv7_pmnc_has_overflowed(pmnc))
2030 return IRQ_NONE;
2031
2032 /*
2033 * Handle the counter(s) overflow(s)
2034 */
2035 regs = get_irq_regs();
2036
2037 perf_sample_data_init(&data, 0);
2038
2039 cpuc = &__get_cpu_var(cpu_hw_events);
2040 for (idx = 0; idx <= armpmu->num_events; ++idx) {
2041 struct perf_event *event = cpuc->events[idx];
2042 struct hw_perf_event *hwc;
2043
2044 if (!test_bit(idx, cpuc->active_mask))
2045 continue;
2046
2047 /*
2048 * We have a single interrupt for all counters. Check that
2049 * each counter has overflowed before we process it.
2050 */
2051 if (!armv7_pmnc_counter_has_overflowed(pmnc, idx))
2052 continue;
2053
2054 hwc = &event->hw;
2055 armpmu_event_update(event, hwc, idx);
2056 data.period = event->hw.last_period;
2057 if (!armpmu_event_set_period(event, hwc, idx))
2058 continue;
2059
2060 if (perf_event_overflow(event, 0, &data, regs))
2061 armpmu->disable(hwc, idx);
2062 }
2063
2064 /*
2065 * Handle the pending perf events.
2066 *
2067 * Note: this call *must* be run with interrupts disabled. For
2068 * platforms that can have the PMU interrupts raised as an NMI, this
2069 * will not work.
2070 */
2071 irq_work_run();
2072
2073 return IRQ_HANDLED;
2074}
2075
2076static void armv7pmu_start(void)
2077{
2078 unsigned long flags;
2079
2080 spin_lock_irqsave(&pmu_lock, flags);
2081 /* Enable all counters */
2082 armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E);
2083 spin_unlock_irqrestore(&pmu_lock, flags);
2084}
2085
2086static void armv7pmu_stop(void)
2087{
2088 unsigned long flags;
2089
2090 spin_lock_irqsave(&pmu_lock, flags);
2091 /* Disable all counters */
2092 armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E);
2093 spin_unlock_irqrestore(&pmu_lock, flags);
2094}
2095
2096static inline int armv7_a8_pmu_event_map(int config)
2097{
2098 int mapping = armv7_a8_perf_map[config];
2099 if (HW_OP_UNSUPPORTED == mapping)
2100 mapping = -EOPNOTSUPP;
2101 return mapping;
2102}
2103
2104static inline int armv7_a9_pmu_event_map(int config)
2105{
2106 int mapping = armv7_a9_perf_map[config];
2107 if (HW_OP_UNSUPPORTED == mapping)
2108 mapping = -EOPNOTSUPP;
2109 return mapping;
2110}
2111
2112static u64 armv7pmu_raw_event(u64 config)
2113{
2114 return config & 0xff;
2115}
2116
2117static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc,
2118 struct hw_perf_event *event)
2119{
2120 int idx;
2121
2122 /* Always place a cycle counter into the cycle counter. */
2123 if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) {
2124 if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask))
2125 return -EAGAIN;
2126
2127 return ARMV7_CYCLE_COUNTER;
2128 } else {
2129 /*
2130 * For anything other than a cycle counter, try and use
2131 * the events counters
2132 */
2133 for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) {
2134 if (!test_and_set_bit(idx, cpuc->used_mask))
2135 return idx;
2136 }
2137
2138 /* The counters are all in use. */
2139 return -EAGAIN;
2140 }
2141}
2142
2143static struct arm_pmu armv7pmu = {
2144 .handle_irq = armv7pmu_handle_irq,
2145 .enable = armv7pmu_enable_event,
2146 .disable = armv7pmu_disable_event,
2147 .raw_event = armv7pmu_raw_event,
2148 .read_counter = armv7pmu_read_counter,
2149 .write_counter = armv7pmu_write_counter,
2150 .get_event_idx = armv7pmu_get_event_idx,
2151 .start = armv7pmu_start,
2152 .stop = armv7pmu_stop,
2153 .max_period = (1LLU << 32) - 1,
2154};
2155
2156static u32 __init armv7_reset_read_pmnc(void)
2157{
2158 u32 nb_cnt;
2159
2160 /* Initialize & Reset PMNC: C and P bits */
2161 armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C);
2162
2163 /* Read the nb of CNTx counters supported from PMNC */
2164 nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK;
2165
2166 /* Add the CPU cycles counter and return */
2167 return nb_cnt + 1;
2168}
2169
2170/*
2171 * ARMv5 [xscale] Performance counter handling code.
2172 *
2173 * Based on xscale OProfile code.
2174 *
2175 * There are two variants of the xscale PMU that we support:
2176 * - xscale1pmu: 2 event counters and a cycle counter
2177 * - xscale2pmu: 4 event counters and a cycle counter
2178 * The two variants share event definitions, but have different
2179 * PMU structures.
2180 */
2181
2182enum xscale_perf_types {
2183 XSCALE_PERFCTR_ICACHE_MISS = 0x00,
2184 XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01,
2185 XSCALE_PERFCTR_DATA_STALL = 0x02,
2186 XSCALE_PERFCTR_ITLB_MISS = 0x03,
2187 XSCALE_PERFCTR_DTLB_MISS = 0x04,
2188 XSCALE_PERFCTR_BRANCH = 0x05,
2189 XSCALE_PERFCTR_BRANCH_MISS = 0x06,
2190 XSCALE_PERFCTR_INSTRUCTION = 0x07,
2191 XSCALE_PERFCTR_DCACHE_FULL_STALL = 0x08,
2192 XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09,
2193 XSCALE_PERFCTR_DCACHE_ACCESS = 0x0A,
2194 XSCALE_PERFCTR_DCACHE_MISS = 0x0B,
2195 XSCALE_PERFCTR_DCACHE_WRITE_BACK = 0x0C,
2196 XSCALE_PERFCTR_PC_CHANGED = 0x0D,
2197 XSCALE_PERFCTR_BCU_REQUEST = 0x10,
2198 XSCALE_PERFCTR_BCU_FULL = 0x11,
2199 XSCALE_PERFCTR_BCU_DRAIN = 0x12,
2200 XSCALE_PERFCTR_BCU_ECC_NO_ELOG = 0x14,
2201 XSCALE_PERFCTR_BCU_1_BIT_ERR = 0x15,
2202 XSCALE_PERFCTR_RMW = 0x16,
2203 /* XSCALE_PERFCTR_CCNT is not hardware defined */
2204 XSCALE_PERFCTR_CCNT = 0xFE,
2205 XSCALE_PERFCTR_UNUSED = 0xFF,
2206};
2207
2208enum xscale_counters {
2209 XSCALE_CYCLE_COUNTER = 1,
2210 XSCALE_COUNTER0,
2211 XSCALE_COUNTER1,
2212 XSCALE_COUNTER2,
2213 XSCALE_COUNTER3,
2214};
2215
2216static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = {
2217 [PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT,
2218 [PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION,
2219 [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
2220 [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
2221 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH,
2222 [PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS,
2223 [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
2224};
2225
2226static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
2227 [PERF_COUNT_HW_CACHE_OP_MAX]
2228 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
2229 [C(L1D)] = {
2230 [C(OP_READ)] = {
2231 [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
2232 [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
2233 },
2234 [C(OP_WRITE)] = {
2235 [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
2236 [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
2237 },
2238 [C(OP_PREFETCH)] = {
2239 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2240 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
2241 },
2242 },
2243 [C(L1I)] = {
2244 [C(OP_READ)] = {
2245 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2246 [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS,
2247 },
2248 [C(OP_WRITE)] = {
2249 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2250 [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS,
2251 },
2252 [C(OP_PREFETCH)] = {
2253 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2254 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
2255 },
2256 },
2257 [C(LL)] = {
2258 [C(OP_READ)] = {
2259 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2260 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
2261 },
2262 [C(OP_WRITE)] = {
2263 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2264 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
2265 },
2266 [C(OP_PREFETCH)] = {
2267 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2268 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
2269 },
2270 },
2271 [C(DTLB)] = {
2272 [C(OP_READ)] = {
2273 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2274 [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
2275 },
2276 [C(OP_WRITE)] = {
2277 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2278 [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
2279 },
2280 [C(OP_PREFETCH)] = {
2281 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2282 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
2283 },
2284 },
2285 [C(ITLB)] = {
2286 [C(OP_READ)] = {
2287 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2288 [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
2289 },
2290 [C(OP_WRITE)] = {
2291 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2292 [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
2293 },
2294 [C(OP_PREFETCH)] = {
2295 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2296 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
2297 },
2298 },
2299 [C(BPU)] = {
2300 [C(OP_READ)] = {
2301 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2302 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
2303 },
2304 [C(OP_WRITE)] = {
2305 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2306 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
2307 },
2308 [C(OP_PREFETCH)] = {
2309 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
2310 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
2311 },
2312 },
2313};
2314
2315#define XSCALE_PMU_ENABLE 0x001
2316#define XSCALE_PMN_RESET 0x002
2317#define XSCALE_CCNT_RESET 0x004
2318#define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET)
2319#define XSCALE_PMU_CNT64 0x008
2320
2321static inline int
2322xscalepmu_event_map(int config)
2323{
2324 int mapping = xscale_perf_map[config];
2325 if (HW_OP_UNSUPPORTED == mapping)
2326 mapping = -EOPNOTSUPP;
2327 return mapping;
2328}
2329
2330static u64
2331xscalepmu_raw_event(u64 config)
2332{
2333 return config & 0xff;
2334}
2335
2336#define XSCALE1_OVERFLOWED_MASK 0x700
2337#define XSCALE1_CCOUNT_OVERFLOW 0x400
2338#define XSCALE1_COUNT0_OVERFLOW 0x100
2339#define XSCALE1_COUNT1_OVERFLOW 0x200
2340#define XSCALE1_CCOUNT_INT_EN 0x040
2341#define XSCALE1_COUNT0_INT_EN 0x010
2342#define XSCALE1_COUNT1_INT_EN 0x020
2343#define XSCALE1_COUNT0_EVT_SHFT 12
2344#define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT)
2345#define XSCALE1_COUNT1_EVT_SHFT 20
2346#define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT)
2347
2348static inline u32
2349xscale1pmu_read_pmnc(void)
2350{
2351 u32 val;
2352 asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val));
2353 return val;
2354}
2355
2356static inline void
2357xscale1pmu_write_pmnc(u32 val)
2358{
2359 /* upper 4bits and 7, 11 are write-as-0 */
2360 val &= 0xffff77f;
2361 asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val));
2362}
2363
2364static inline int
2365xscale1_pmnc_counter_has_overflowed(unsigned long pmnc,
2366 enum xscale_counters counter)
2367{
2368 int ret = 0;
2369
2370 switch (counter) {
2371 case XSCALE_CYCLE_COUNTER:
2372 ret = pmnc & XSCALE1_CCOUNT_OVERFLOW;
2373 break;
2374 case XSCALE_COUNTER0:
2375 ret = pmnc & XSCALE1_COUNT0_OVERFLOW;
2376 break;
2377 case XSCALE_COUNTER1:
2378 ret = pmnc & XSCALE1_COUNT1_OVERFLOW;
2379 break;
2380 default:
2381 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
2382 }
2383
2384 return ret;
2385}
2386
2387static irqreturn_t
2388xscale1pmu_handle_irq(int irq_num, void *dev)
2389{
2390 unsigned long pmnc;
2391 struct perf_sample_data data;
2392 struct cpu_hw_events *cpuc;
2393 struct pt_regs *regs;
2394 int idx;
2395
2396 /*
2397 * NOTE: there's an A stepping erratum that states if an overflow
2398 * bit already exists and another occurs, the previous
2399 * Overflow bit gets cleared. There's no workaround.
2400 * Fixed in B stepping or later.
2401 */
2402 pmnc = xscale1pmu_read_pmnc();
2403
2404 /*
2405 * Write the value back to clear the overflow flags. Overflow
2406 * flags remain in pmnc for use below. We also disable the PMU
2407 * while we process the interrupt.
2408 */
2409 xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
2410
2411 if (!(pmnc & XSCALE1_OVERFLOWED_MASK))
2412 return IRQ_NONE;
2413
2414 regs = get_irq_regs();
2415
2416 perf_sample_data_init(&data, 0);
2417
2418 cpuc = &__get_cpu_var(cpu_hw_events);
2419 for (idx = 0; idx <= armpmu->num_events; ++idx) {
2420 struct perf_event *event = cpuc->events[idx];
2421 struct hw_perf_event *hwc;
2422
2423 if (!test_bit(idx, cpuc->active_mask))
2424 continue;
2425
2426 if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx))
2427 continue;
2428
2429 hwc = &event->hw;
2430 armpmu_event_update(event, hwc, idx);
2431 data.period = event->hw.last_period;
2432 if (!armpmu_event_set_period(event, hwc, idx))
2433 continue;
2434
2435 if (perf_event_overflow(event, 0, &data, regs))
2436 armpmu->disable(hwc, idx);
2437 }
2438
2439 irq_work_run();
2440
2441 /*
2442 * Re-enable the PMU.
2443 */
2444 pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE;
2445 xscale1pmu_write_pmnc(pmnc);
2446
2447 return IRQ_HANDLED;
2448}
2449
2450static void
2451xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx)
2452{
2453 unsigned long val, mask, evt, flags;
2454
2455 switch (idx) {
2456 case XSCALE_CYCLE_COUNTER:
2457 mask = 0;
2458 evt = XSCALE1_CCOUNT_INT_EN;
2459 break;
2460 case XSCALE_COUNTER0:
2461 mask = XSCALE1_COUNT0_EVT_MASK;
2462 evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) |
2463 XSCALE1_COUNT0_INT_EN;
2464 break;
2465 case XSCALE_COUNTER1:
2466 mask = XSCALE1_COUNT1_EVT_MASK;
2467 evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) |
2468 XSCALE1_COUNT1_INT_EN;
2469 break;
2470 default:
2471 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
2472 return;
2473 }
2474
2475 spin_lock_irqsave(&pmu_lock, flags);
2476 val = xscale1pmu_read_pmnc();
2477 val &= ~mask;
2478 val |= evt;
2479 xscale1pmu_write_pmnc(val);
2480 spin_unlock_irqrestore(&pmu_lock, flags);
2481}
2482
2483static void
2484xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx)
2485{
2486 unsigned long val, mask, evt, flags;
2487
2488 switch (idx) {
2489 case XSCALE_CYCLE_COUNTER:
2490 mask = XSCALE1_CCOUNT_INT_EN;
2491 evt = 0;
2492 break;
2493 case XSCALE_COUNTER0:
2494 mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK;
2495 evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT;
2496 break;
2497 case XSCALE_COUNTER1:
2498 mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK;
2499 evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT;
2500 break;
2501 default:
2502 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
2503 return;
2504 }
2505
2506 spin_lock_irqsave(&pmu_lock, flags);
2507 val = xscale1pmu_read_pmnc();
2508 val &= ~mask;
2509 val |= evt;
2510 xscale1pmu_write_pmnc(val);
2511 spin_unlock_irqrestore(&pmu_lock, flags);
2512}
2513
2514static int
2515xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc,
2516 struct hw_perf_event *event)
2517{
2518 if (XSCALE_PERFCTR_CCNT == event->config_base) {
2519 if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask))
2520 return -EAGAIN;
2521
2522 return XSCALE_CYCLE_COUNTER;
2523 } else {
2524 if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask)) {
2525 return XSCALE_COUNTER1;
2526 }
2527
2528 if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask)) {
2529 return XSCALE_COUNTER0;
2530 }
2531
2532 return -EAGAIN;
2533 }
2534}
2535
2536static void
2537xscale1pmu_start(void)
2538{
2539 unsigned long flags, val;
2540
2541 spin_lock_irqsave(&pmu_lock, flags);
2542 val = xscale1pmu_read_pmnc();
2543 val |= XSCALE_PMU_ENABLE;
2544 xscale1pmu_write_pmnc(val);
2545 spin_unlock_irqrestore(&pmu_lock, flags);
2546}
2547
2548static void
2549xscale1pmu_stop(void)
2550{
2551 unsigned long flags, val;
2552
2553 spin_lock_irqsave(&pmu_lock, flags);
2554 val = xscale1pmu_read_pmnc();
2555 val &= ~XSCALE_PMU_ENABLE;
2556 xscale1pmu_write_pmnc(val);
2557 spin_unlock_irqrestore(&pmu_lock, flags);
2558}
2559
2560static inline u32
2561xscale1pmu_read_counter(int counter)
2562{
2563 u32 val = 0;
2564
2565 switch (counter) {
2566 case XSCALE_CYCLE_COUNTER:
2567 asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val));
2568 break;
2569 case XSCALE_COUNTER0:
2570 asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val));
2571 break;
2572 case XSCALE_COUNTER1:
2573 asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val));
2574 break;
2575 }
2576
2577 return val;
2578}
2579
2580static inline void
2581xscale1pmu_write_counter(int counter, u32 val)
2582{
2583 switch (counter) {
2584 case XSCALE_CYCLE_COUNTER:
2585 asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val));
2586 break;
2587 case XSCALE_COUNTER0:
2588 asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val));
2589 break;
2590 case XSCALE_COUNTER1:
2591 asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val));
2592 break;
2593 }
2594}
2595
2596static const struct arm_pmu xscale1pmu = {
2597 .id = ARM_PERF_PMU_ID_XSCALE1,
2598 .handle_irq = xscale1pmu_handle_irq,
2599 .enable = xscale1pmu_enable_event,
2600 .disable = xscale1pmu_disable_event,
2601 .event_map = xscalepmu_event_map,
2602 .raw_event = xscalepmu_raw_event,
2603 .read_counter = xscale1pmu_read_counter,
2604 .write_counter = xscale1pmu_write_counter,
2605 .get_event_idx = xscale1pmu_get_event_idx,
2606 .start = xscale1pmu_start,
2607 .stop = xscale1pmu_stop,
2608 .num_events = 3,
2609 .max_period = (1LLU << 32) - 1,
2610};
2611
2612#define XSCALE2_OVERFLOWED_MASK 0x01f
2613#define XSCALE2_CCOUNT_OVERFLOW 0x001
2614#define XSCALE2_COUNT0_OVERFLOW 0x002
2615#define XSCALE2_COUNT1_OVERFLOW 0x004
2616#define XSCALE2_COUNT2_OVERFLOW 0x008
2617#define XSCALE2_COUNT3_OVERFLOW 0x010
2618#define XSCALE2_CCOUNT_INT_EN 0x001
2619#define XSCALE2_COUNT0_INT_EN 0x002
2620#define XSCALE2_COUNT1_INT_EN 0x004
2621#define XSCALE2_COUNT2_INT_EN 0x008
2622#define XSCALE2_COUNT3_INT_EN 0x010
2623#define XSCALE2_COUNT0_EVT_SHFT 0
2624#define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT)
2625#define XSCALE2_COUNT1_EVT_SHFT 8
2626#define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT)
2627#define XSCALE2_COUNT2_EVT_SHFT 16
2628#define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT)
2629#define XSCALE2_COUNT3_EVT_SHFT 24
2630#define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT)
2631
2632static inline u32
2633xscale2pmu_read_pmnc(void)
2634{
2635 u32 val;
2636 asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val));
2637 /* bits 1-2 and 4-23 are read-unpredictable */
2638 return val & 0xff000009;
2639}
2640
2641static inline void
2642xscale2pmu_write_pmnc(u32 val)
2643{
2644 /* bits 4-23 are write-as-0, 24-31 are write ignored */
2645 val &= 0xf;
2646 asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val));
2647}
2648
2649static inline u32
2650xscale2pmu_read_overflow_flags(void)
2651{
2652 u32 val;
2653 asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val));
2654 return val;
2655}
2656
2657static inline void
2658xscale2pmu_write_overflow_flags(u32 val)
2659{
2660 asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val));
2661}
2662
2663static inline u32
2664xscale2pmu_read_event_select(void)
2665{
2666 u32 val;
2667 asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val));
2668 return val;
2669}
2670
2671static inline void
2672xscale2pmu_write_event_select(u32 val)
2673{
2674 asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val));
2675}
2676
2677static inline u32
2678xscale2pmu_read_int_enable(void)
2679{
2680 u32 val;
2681 asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val));
2682 return val;
2683}
2684
2685static void
2686xscale2pmu_write_int_enable(u32 val)
2687{
2688 asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val));
2689}
2690
2691static inline int
2692xscale2_pmnc_counter_has_overflowed(unsigned long of_flags,
2693 enum xscale_counters counter)
2694{
2695 int ret = 0;
2696
2697 switch (counter) {
2698 case XSCALE_CYCLE_COUNTER:
2699 ret = of_flags & XSCALE2_CCOUNT_OVERFLOW;
2700 break;
2701 case XSCALE_COUNTER0:
2702 ret = of_flags & XSCALE2_COUNT0_OVERFLOW;
2703 break;
2704 case XSCALE_COUNTER1:
2705 ret = of_flags & XSCALE2_COUNT1_OVERFLOW;
2706 break;
2707 case XSCALE_COUNTER2:
2708 ret = of_flags & XSCALE2_COUNT2_OVERFLOW;
2709 break;
2710 case XSCALE_COUNTER3:
2711 ret = of_flags & XSCALE2_COUNT3_OVERFLOW;
2712 break;
2713 default:
2714 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
2715 }
2716
2717 return ret;
2718}
2719
2720static irqreturn_t
2721xscale2pmu_handle_irq(int irq_num, void *dev)
2722{
2723 unsigned long pmnc, of_flags;
2724 struct perf_sample_data data;
2725 struct cpu_hw_events *cpuc;
2726 struct pt_regs *regs;
2727 int idx;
2728
2729 /* Disable the PMU. */
2730 pmnc = xscale2pmu_read_pmnc();
2731 xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
2732
2733 /* Check the overflow flag register. */
2734 of_flags = xscale2pmu_read_overflow_flags();
2735 if (!(of_flags & XSCALE2_OVERFLOWED_MASK))
2736 return IRQ_NONE;
2737
2738 /* Clear the overflow bits. */
2739 xscale2pmu_write_overflow_flags(of_flags);
2740
2741 regs = get_irq_regs();
2742
2743 perf_sample_data_init(&data, 0);
2744
2745 cpuc = &__get_cpu_var(cpu_hw_events);
2746 for (idx = 0; idx <= armpmu->num_events; ++idx) {
2747 struct perf_event *event = cpuc->events[idx];
2748 struct hw_perf_event *hwc;
2749
2750 if (!test_bit(idx, cpuc->active_mask))
2751 continue;
2752
2753 if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx))
2754 continue;
2755
2756 hwc = &event->hw;
2757 armpmu_event_update(event, hwc, idx);
2758 data.period = event->hw.last_period;
2759 if (!armpmu_event_set_period(event, hwc, idx))
2760 continue;
2761
2762 if (perf_event_overflow(event, 0, &data, regs))
2763 armpmu->disable(hwc, idx);
2764 }
2765
2766 irq_work_run();
2767
2768 /*
2769 * Re-enable the PMU.
2770 */
2771 pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE;
2772 xscale2pmu_write_pmnc(pmnc);
2773
2774 return IRQ_HANDLED;
2775}
2776
2777static void
2778xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx)
2779{
2780 unsigned long flags, ien, evtsel;
2781
2782 ien = xscale2pmu_read_int_enable();
2783 evtsel = xscale2pmu_read_event_select();
2784
2785 switch (idx) {
2786 case XSCALE_CYCLE_COUNTER:
2787 ien |= XSCALE2_CCOUNT_INT_EN;
2788 break;
2789 case XSCALE_COUNTER0:
2790 ien |= XSCALE2_COUNT0_INT_EN;
2791 evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
2792 evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT;
2793 break;
2794 case XSCALE_COUNTER1:
2795 ien |= XSCALE2_COUNT1_INT_EN;
2796 evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
2797 evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT;
2798 break;
2799 case XSCALE_COUNTER2:
2800 ien |= XSCALE2_COUNT2_INT_EN;
2801 evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
2802 evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT;
2803 break;
2804 case XSCALE_COUNTER3:
2805 ien |= XSCALE2_COUNT3_INT_EN;
2806 evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
2807 evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT;
2808 break;
2809 default:
2810 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
2811 return;
2812 }
2813
2814 spin_lock_irqsave(&pmu_lock, flags);
2815 xscale2pmu_write_event_select(evtsel);
2816 xscale2pmu_write_int_enable(ien);
2817 spin_unlock_irqrestore(&pmu_lock, flags);
2818}
2819
2820static void
2821xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx)
2822{
2823 unsigned long flags, ien, evtsel;
2824
2825 ien = xscale2pmu_read_int_enable();
2826 evtsel = xscale2pmu_read_event_select();
2827
2828 switch (idx) {
2829 case XSCALE_CYCLE_COUNTER:
2830 ien &= ~XSCALE2_CCOUNT_INT_EN;
2831 break;
2832 case XSCALE_COUNTER0:
2833 ien &= ~XSCALE2_COUNT0_INT_EN;
2834 evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
2835 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT;
2836 break;
2837 case XSCALE_COUNTER1:
2838 ien &= ~XSCALE2_COUNT1_INT_EN;
2839 evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
2840 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT;
2841 break;
2842 case XSCALE_COUNTER2:
2843 ien &= ~XSCALE2_COUNT2_INT_EN;
2844 evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
2845 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT;
2846 break;
2847 case XSCALE_COUNTER3:
2848 ien &= ~XSCALE2_COUNT3_INT_EN;
2849 evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
2850 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT;
2851 break;
2852 default:
2853 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
2854 return;
2855 }
2856
2857 spin_lock_irqsave(&pmu_lock, flags);
2858 xscale2pmu_write_event_select(evtsel);
2859 xscale2pmu_write_int_enable(ien);
2860 spin_unlock_irqrestore(&pmu_lock, flags);
2861}
2862
2863static int
2864xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc,
2865 struct hw_perf_event *event)
2866{
2867 int idx = xscale1pmu_get_event_idx(cpuc, event);
2868 if (idx >= 0)
2869 goto out;
2870
2871 if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask))
2872 idx = XSCALE_COUNTER3;
2873 else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask))
2874 idx = XSCALE_COUNTER2;
2875out:
2876 return idx;
2877}
2878
2879static void
2880xscale2pmu_start(void)
2881{
2882 unsigned long flags, val;
2883
2884 spin_lock_irqsave(&pmu_lock, flags);
2885 val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64;
2886 val |= XSCALE_PMU_ENABLE;
2887 xscale2pmu_write_pmnc(val);
2888 spin_unlock_irqrestore(&pmu_lock, flags);
2889}
2890
2891static void
2892xscale2pmu_stop(void)
2893{
2894 unsigned long flags, val;
2895
2896 spin_lock_irqsave(&pmu_lock, flags);
2897 val = xscale2pmu_read_pmnc();
2898 val &= ~XSCALE_PMU_ENABLE;
2899 xscale2pmu_write_pmnc(val);
2900 spin_unlock_irqrestore(&pmu_lock, flags);
2901}
2902
2903static inline u32
2904xscale2pmu_read_counter(int counter)
2905{
2906 u32 val = 0;
2907
2908 switch (counter) {
2909 case XSCALE_CYCLE_COUNTER:
2910 asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val));
2911 break;
2912 case XSCALE_COUNTER0:
2913 asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val));
2914 break;
2915 case XSCALE_COUNTER1:
2916 asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val));
2917 break;
2918 case XSCALE_COUNTER2:
2919 asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val));
2920 break;
2921 case XSCALE_COUNTER3:
2922 asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val));
2923 break;
2924 }
2925
2926 return val;
2927}
2928
2929static inline void
2930xscale2pmu_write_counter(int counter, u32 val)
2931{
2932 switch (counter) {
2933 case XSCALE_CYCLE_COUNTER:
2934 asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val));
2935 break;
2936 case XSCALE_COUNTER0:
2937 asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val));
2938 break;
2939 case XSCALE_COUNTER1:
2940 asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val));
2941 break;
2942 case XSCALE_COUNTER2:
2943 asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val));
2944 break;
2945 case XSCALE_COUNTER3:
2946 asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val));
2947 break;
2948 }
2949}
2950
2951static const struct arm_pmu xscale2pmu = {
2952 .id = ARM_PERF_PMU_ID_XSCALE2,
2953 .handle_irq = xscale2pmu_handle_irq,
2954 .enable = xscale2pmu_enable_event,
2955 .disable = xscale2pmu_disable_event,
2956 .event_map = xscalepmu_event_map,
2957 .raw_event = xscalepmu_raw_event,
2958 .read_counter = xscale2pmu_read_counter,
2959 .write_counter = xscale2pmu_write_counter,
2960 .get_event_idx = xscale2pmu_get_event_idx,
2961 .start = xscale2pmu_start,
2962 .stop = xscale2pmu_stop,
2963 .num_events = 5,
2964 .max_period = (1LLU << 32) - 1,
2965};
2966 611
2967static int __init 612static int __init
2968init_hw_perf_events(void) 613init_hw_perf_events(void)
@@ -2977,37 +622,16 @@ init_hw_perf_events(void)
2977 case 0xB360: /* ARM1136 */ 622 case 0xB360: /* ARM1136 */
2978 case 0xB560: /* ARM1156 */ 623 case 0xB560: /* ARM1156 */
2979 case 0xB760: /* ARM1176 */ 624 case 0xB760: /* ARM1176 */
2980 armpmu = &armv6pmu; 625 armpmu = armv6pmu_init();
2981 memcpy(armpmu_perf_cache_map, armv6_perf_cache_map,
2982 sizeof(armv6_perf_cache_map));
2983 break; 626 break;
2984 case 0xB020: /* ARM11mpcore */ 627 case 0xB020: /* ARM11mpcore */
2985 armpmu = &armv6mpcore_pmu; 628 armpmu = armv6mpcore_pmu_init();
2986 memcpy(armpmu_perf_cache_map,
2987 armv6mpcore_perf_cache_map,
2988 sizeof(armv6mpcore_perf_cache_map));
2989 break; 629 break;
2990 case 0xC080: /* Cortex-A8 */ 630 case 0xC080: /* Cortex-A8 */
2991 armv7pmu.id = ARM_PERF_PMU_ID_CA8; 631 armpmu = armv7_a8_pmu_init();
2992 memcpy(armpmu_perf_cache_map, armv7_a8_perf_cache_map,
2993 sizeof(armv7_a8_perf_cache_map));
2994 armv7pmu.event_map = armv7_a8_pmu_event_map;
2995 armpmu = &armv7pmu;
2996
2997 /* Reset PMNC and read the nb of CNTx counters
2998 supported */
2999 armv7pmu.num_events = armv7_reset_read_pmnc();
3000 break; 632 break;
3001 case 0xC090: /* Cortex-A9 */ 633 case 0xC090: /* Cortex-A9 */
3002 armv7pmu.id = ARM_PERF_PMU_ID_CA9; 634 armpmu = armv7_a9_pmu_init();
3003 memcpy(armpmu_perf_cache_map, armv7_a9_perf_cache_map,
3004 sizeof(armv7_a9_perf_cache_map));
3005 armv7pmu.event_map = armv7_a9_pmu_event_map;
3006 armpmu = &armv7pmu;
3007
3008 /* Reset PMNC and read the nb of CNTx counters
3009 supported */
3010 armv7pmu.num_events = armv7_reset_read_pmnc();
3011 break; 635 break;
3012 } 636 }
3013 /* Intel CPUs [xscale]. */ 637 /* Intel CPUs [xscale]. */
@@ -3015,21 +639,17 @@ init_hw_perf_events(void)
3015 part_number = (cpuid >> 13) & 0x7; 639 part_number = (cpuid >> 13) & 0x7;
3016 switch (part_number) { 640 switch (part_number) {
3017 case 1: 641 case 1:
3018 armpmu = &xscale1pmu; 642 armpmu = xscale1pmu_init();
3019 memcpy(armpmu_perf_cache_map, xscale_perf_cache_map,
3020 sizeof(xscale_perf_cache_map));
3021 break; 643 break;
3022 case 2: 644 case 2:
3023 armpmu = &xscale2pmu; 645 armpmu = xscale2pmu_init();
3024 memcpy(armpmu_perf_cache_map, xscale_perf_cache_map,
3025 sizeof(xscale_perf_cache_map));
3026 break; 646 break;
3027 } 647 }
3028 } 648 }
3029 649
3030 if (armpmu) { 650 if (armpmu) {
3031 pr_info("enabled with %s PMU driver, %d counters available\n", 651 pr_info("enabled with %s PMU driver, %d counters available\n",
3032 arm_pmu_names[armpmu->id], armpmu->num_events); 652 armpmu->name, armpmu->num_events);
3033 } else { 653 } else {
3034 pr_info("no hardware support available\n"); 654 pr_info("no hardware support available\n");
3035 } 655 }
diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c
new file mode 100644
index 000000000000..7aeb07da9076
--- /dev/null
+++ b/arch/arm/kernel/perf_event_v6.c
@@ -0,0 +1,672 @@
1/*
2 * ARMv6 Performance counter handling code.
3 *
4 * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
5 *
6 * ARMv6 has 2 configurable performance counters and a single cycle counter.
7 * They all share a single reset bit but can be written to zero so we can use
8 * that for a reset.
9 *
10 * The counters can't be individually enabled or disabled so when we remove
11 * one event and replace it with another we could get spurious counts from the
12 * wrong event. However, we can take advantage of the fact that the
13 * performance counters can export events to the event bus, and the event bus
14 * itself can be monitored. This requires that we *don't* export the events to
15 * the event bus. The procedure for disabling a configurable counter is:
16 * - change the counter to count the ETMEXTOUT[0] signal (0x20). This
17 * effectively stops the counter from counting.
18 * - disable the counter's interrupt generation (each counter has it's
19 * own interrupt enable bit).
20 * Once stopped, the counter value can be written as 0 to reset.
21 *
22 * To enable a counter:
23 * - enable the counter's interrupt generation.
24 * - set the new event type.
25 *
26 * Note: the dedicated cycle counter only counts cycles and can't be
27 * enabled/disabled independently of the others. When we want to disable the
28 * cycle counter, we have to just disable the interrupt reporting and start
29 * ignoring that counter. When re-enabling, we have to reset the value and
30 * enable the interrupt.
31 */
32
33#ifdef CONFIG_CPU_V6
34enum armv6_perf_types {
35 ARMV6_PERFCTR_ICACHE_MISS = 0x0,
36 ARMV6_PERFCTR_IBUF_STALL = 0x1,
37 ARMV6_PERFCTR_DDEP_STALL = 0x2,
38 ARMV6_PERFCTR_ITLB_MISS = 0x3,
39 ARMV6_PERFCTR_DTLB_MISS = 0x4,
40 ARMV6_PERFCTR_BR_EXEC = 0x5,
41 ARMV6_PERFCTR_BR_MISPREDICT = 0x6,
42 ARMV6_PERFCTR_INSTR_EXEC = 0x7,
43 ARMV6_PERFCTR_DCACHE_HIT = 0x9,
44 ARMV6_PERFCTR_DCACHE_ACCESS = 0xA,
45 ARMV6_PERFCTR_DCACHE_MISS = 0xB,
46 ARMV6_PERFCTR_DCACHE_WBACK = 0xC,
47 ARMV6_PERFCTR_SW_PC_CHANGE = 0xD,
48 ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF,
49 ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10,
50 ARMV6_PERFCTR_LSU_FULL_STALL = 0x11,
51 ARMV6_PERFCTR_WBUF_DRAINED = 0x12,
52 ARMV6_PERFCTR_CPU_CYCLES = 0xFF,
53 ARMV6_PERFCTR_NOP = 0x20,
54};
55
56enum armv6_counters {
57 ARMV6_CYCLE_COUNTER = 1,
58 ARMV6_COUNTER0,
59 ARMV6_COUNTER1,
60};
61
62/*
63 * The hardware events that we support. We do support cache operations but
64 * we have harvard caches and no way to combine instruction and data
65 * accesses/misses in hardware.
66 */
67static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
68 [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES,
69 [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC,
70 [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
71 [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
72 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC,
73 [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT,
74 [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
75};
76
77static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
78 [PERF_COUNT_HW_CACHE_OP_MAX]
79 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
80 [C(L1D)] = {
81 /*
82 * The performance counters don't differentiate between read
83 * and write accesses/misses so this isn't strictly correct,
84 * but it's the best we can do. Writes and reads get
85 * combined.
86 */
87 [C(OP_READ)] = {
88 [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
89 [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
90 },
91 [C(OP_WRITE)] = {
92 [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
93 [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
94 },
95 [C(OP_PREFETCH)] = {
96 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
97 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
98 },
99 },
100 [C(L1I)] = {
101 [C(OP_READ)] = {
102 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
103 [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS,
104 },
105 [C(OP_WRITE)] = {
106 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
107 [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS,
108 },
109 [C(OP_PREFETCH)] = {
110 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
111 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
112 },
113 },
114 [C(LL)] = {
115 [C(OP_READ)] = {
116 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
117 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
118 },
119 [C(OP_WRITE)] = {
120 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
121 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
122 },
123 [C(OP_PREFETCH)] = {
124 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
125 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
126 },
127 },
128 [C(DTLB)] = {
129 /*
130 * The ARM performance counters can count micro DTLB misses,
131 * micro ITLB misses and main TLB misses. There isn't an event
132 * for TLB misses, so use the micro misses here and if users
133 * want the main TLB misses they can use a raw counter.
134 */
135 [C(OP_READ)] = {
136 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
137 [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
138 },
139 [C(OP_WRITE)] = {
140 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
141 [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
142 },
143 [C(OP_PREFETCH)] = {
144 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
145 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
146 },
147 },
148 [C(ITLB)] = {
149 [C(OP_READ)] = {
150 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
151 [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
152 },
153 [C(OP_WRITE)] = {
154 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
155 [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
156 },
157 [C(OP_PREFETCH)] = {
158 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
159 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
160 },
161 },
162 [C(BPU)] = {
163 [C(OP_READ)] = {
164 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
165 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
166 },
167 [C(OP_WRITE)] = {
168 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
169 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
170 },
171 [C(OP_PREFETCH)] = {
172 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
173 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
174 },
175 },
176};
177
178enum armv6mpcore_perf_types {
179 ARMV6MPCORE_PERFCTR_ICACHE_MISS = 0x0,
180 ARMV6MPCORE_PERFCTR_IBUF_STALL = 0x1,
181 ARMV6MPCORE_PERFCTR_DDEP_STALL = 0x2,
182 ARMV6MPCORE_PERFCTR_ITLB_MISS = 0x3,
183 ARMV6MPCORE_PERFCTR_DTLB_MISS = 0x4,
184 ARMV6MPCORE_PERFCTR_BR_EXEC = 0x5,
185 ARMV6MPCORE_PERFCTR_BR_NOTPREDICT = 0x6,
186 ARMV6MPCORE_PERFCTR_BR_MISPREDICT = 0x7,
187 ARMV6MPCORE_PERFCTR_INSTR_EXEC = 0x8,
188 ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA,
189 ARMV6MPCORE_PERFCTR_DCACHE_RDMISS = 0xB,
190 ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC,
191 ARMV6MPCORE_PERFCTR_DCACHE_WRMISS = 0xD,
192 ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE,
193 ARMV6MPCORE_PERFCTR_SW_PC_CHANGE = 0xF,
194 ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS = 0x10,
195 ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11,
196 ARMV6MPCORE_PERFCTR_LSU_FULL_STALL = 0x12,
197 ARMV6MPCORE_PERFCTR_WBUF_DRAINED = 0x13,
198 ARMV6MPCORE_PERFCTR_CPU_CYCLES = 0xFF,
199};
200
201/*
202 * The hardware events that we support. We do support cache operations but
203 * we have harvard caches and no way to combine instruction and data
204 * accesses/misses in hardware.
205 */
206static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = {
207 [PERF_COUNT_HW_CPU_CYCLES] = ARMV6MPCORE_PERFCTR_CPU_CYCLES,
208 [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_INSTR_EXEC,
209 [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
210 [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
211 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC,
212 [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6MPCORE_PERFCTR_BR_MISPREDICT,
213 [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
214};
215
216static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
217 [PERF_COUNT_HW_CACHE_OP_MAX]
218 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
219 [C(L1D)] = {
220 [C(OP_READ)] = {
221 [C(RESULT_ACCESS)] =
222 ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS,
223 [C(RESULT_MISS)] =
224 ARMV6MPCORE_PERFCTR_DCACHE_RDMISS,
225 },
226 [C(OP_WRITE)] = {
227 [C(RESULT_ACCESS)] =
228 ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS,
229 [C(RESULT_MISS)] =
230 ARMV6MPCORE_PERFCTR_DCACHE_WRMISS,
231 },
232 [C(OP_PREFETCH)] = {
233 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
234 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
235 },
236 },
237 [C(L1I)] = {
238 [C(OP_READ)] = {
239 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
240 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
241 },
242 [C(OP_WRITE)] = {
243 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
244 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
245 },
246 [C(OP_PREFETCH)] = {
247 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
248 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
249 },
250 },
251 [C(LL)] = {
252 [C(OP_READ)] = {
253 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
254 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
255 },
256 [C(OP_WRITE)] = {
257 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
258 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
259 },
260 [C(OP_PREFETCH)] = {
261 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
262 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
263 },
264 },
265 [C(DTLB)] = {
266 /*
267 * The ARM performance counters can count micro DTLB misses,
268 * micro ITLB misses and main TLB misses. There isn't an event
269 * for TLB misses, so use the micro misses here and if users
270 * want the main TLB misses they can use a raw counter.
271 */
272 [C(OP_READ)] = {
273 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
274 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
275 },
276 [C(OP_WRITE)] = {
277 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
278 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
279 },
280 [C(OP_PREFETCH)] = {
281 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
282 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
283 },
284 },
285 [C(ITLB)] = {
286 [C(OP_READ)] = {
287 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
288 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
289 },
290 [C(OP_WRITE)] = {
291 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
292 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
293 },
294 [C(OP_PREFETCH)] = {
295 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
296 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
297 },
298 },
299 [C(BPU)] = {
300 [C(OP_READ)] = {
301 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
302 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
303 },
304 [C(OP_WRITE)] = {
305 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
306 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
307 },
308 [C(OP_PREFETCH)] = {
309 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
310 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
311 },
312 },
313};
314
315static inline unsigned long
316armv6_pmcr_read(void)
317{
318 u32 val;
319 asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val));
320 return val;
321}
322
323static inline void
324armv6_pmcr_write(unsigned long val)
325{
326 asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val));
327}
328
329#define ARMV6_PMCR_ENABLE (1 << 0)
330#define ARMV6_PMCR_CTR01_RESET (1 << 1)
331#define ARMV6_PMCR_CCOUNT_RESET (1 << 2)
332#define ARMV6_PMCR_CCOUNT_DIV (1 << 3)
333#define ARMV6_PMCR_COUNT0_IEN (1 << 4)
334#define ARMV6_PMCR_COUNT1_IEN (1 << 5)
335#define ARMV6_PMCR_CCOUNT_IEN (1 << 6)
336#define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8)
337#define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9)
338#define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10)
339#define ARMV6_PMCR_EVT_COUNT0_SHIFT 20
340#define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT)
341#define ARMV6_PMCR_EVT_COUNT1_SHIFT 12
342#define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT)
343
344#define ARMV6_PMCR_OVERFLOWED_MASK \
345 (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \
346 ARMV6_PMCR_CCOUNT_OVERFLOW)
347
348static inline int
349armv6_pmcr_has_overflowed(unsigned long pmcr)
350{
351 return pmcr & ARMV6_PMCR_OVERFLOWED_MASK;
352}
353
354static inline int
355armv6_pmcr_counter_has_overflowed(unsigned long pmcr,
356 enum armv6_counters counter)
357{
358 int ret = 0;
359
360 if (ARMV6_CYCLE_COUNTER == counter)
361 ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW;
362 else if (ARMV6_COUNTER0 == counter)
363 ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW;
364 else if (ARMV6_COUNTER1 == counter)
365 ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW;
366 else
367 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
368
369 return ret;
370}
371
372static inline u32
373armv6pmu_read_counter(int counter)
374{
375 unsigned long value = 0;
376
377 if (ARMV6_CYCLE_COUNTER == counter)
378 asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value));
379 else if (ARMV6_COUNTER0 == counter)
380 asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value));
381 else if (ARMV6_COUNTER1 == counter)
382 asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value));
383 else
384 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
385
386 return value;
387}
388
389static inline void
390armv6pmu_write_counter(int counter,
391 u32 value)
392{
393 if (ARMV6_CYCLE_COUNTER == counter)
394 asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value));
395 else if (ARMV6_COUNTER0 == counter)
396 asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value));
397 else if (ARMV6_COUNTER1 == counter)
398 asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value));
399 else
400 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
401}
402
403void
404armv6pmu_enable_event(struct hw_perf_event *hwc,
405 int idx)
406{
407 unsigned long val, mask, evt, flags;
408
409 if (ARMV6_CYCLE_COUNTER == idx) {
410 mask = 0;
411 evt = ARMV6_PMCR_CCOUNT_IEN;
412 } else if (ARMV6_COUNTER0 == idx) {
413 mask = ARMV6_PMCR_EVT_COUNT0_MASK;
414 evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) |
415 ARMV6_PMCR_COUNT0_IEN;
416 } else if (ARMV6_COUNTER1 == idx) {
417 mask = ARMV6_PMCR_EVT_COUNT1_MASK;
418 evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) |
419 ARMV6_PMCR_COUNT1_IEN;
420 } else {
421 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
422 return;
423 }
424
425 /*
426 * Mask out the current event and set the counter to count the event
427 * that we're interested in.
428 */
429 spin_lock_irqsave(&pmu_lock, flags);
430 val = armv6_pmcr_read();
431 val &= ~mask;
432 val |= evt;
433 armv6_pmcr_write(val);
434 spin_unlock_irqrestore(&pmu_lock, flags);
435}
436
437static irqreturn_t
438armv6pmu_handle_irq(int irq_num,
439 void *dev)
440{
441 unsigned long pmcr = armv6_pmcr_read();
442 struct perf_sample_data data;
443 struct cpu_hw_events *cpuc;
444 struct pt_regs *regs;
445 int idx;
446
447 if (!armv6_pmcr_has_overflowed(pmcr))
448 return IRQ_NONE;
449
450 regs = get_irq_regs();
451
452 /*
453 * The interrupts are cleared by writing the overflow flags back to
454 * the control register. All of the other bits don't have any effect
455 * if they are rewritten, so write the whole value back.
456 */
457 armv6_pmcr_write(pmcr);
458
459 perf_sample_data_init(&data, 0);
460
461 cpuc = &__get_cpu_var(cpu_hw_events);
462 for (idx = 0; idx <= armpmu->num_events; ++idx) {
463 struct perf_event *event = cpuc->events[idx];
464 struct hw_perf_event *hwc;
465
466 if (!test_bit(idx, cpuc->active_mask))
467 continue;
468
469 /*
470 * We have a single interrupt for all counters. Check that
471 * each counter has overflowed before we process it.
472 */
473 if (!armv6_pmcr_counter_has_overflowed(pmcr, idx))
474 continue;
475
476 hwc = &event->hw;
477 armpmu_event_update(event, hwc, idx);
478 data.period = event->hw.last_period;
479 if (!armpmu_event_set_period(event, hwc, idx))
480 continue;
481
482 if (perf_event_overflow(event, 0, &data, regs))
483 armpmu->disable(hwc, idx);
484 }
485
486 /*
487 * Handle the pending perf events.
488 *
489 * Note: this call *must* be run with interrupts disabled. For
490 * platforms that can have the PMU interrupts raised as an NMI, this
491 * will not work.
492 */
493 irq_work_run();
494
495 return IRQ_HANDLED;
496}
497
498static void
499armv6pmu_start(void)
500{
501 unsigned long flags, val;
502
503 spin_lock_irqsave(&pmu_lock, flags);
504 val = armv6_pmcr_read();
505 val |= ARMV6_PMCR_ENABLE;
506 armv6_pmcr_write(val);
507 spin_unlock_irqrestore(&pmu_lock, flags);
508}
509
510static void
511armv6pmu_stop(void)
512{
513 unsigned long flags, val;
514
515 spin_lock_irqsave(&pmu_lock, flags);
516 val = armv6_pmcr_read();
517 val &= ~ARMV6_PMCR_ENABLE;
518 armv6_pmcr_write(val);
519 spin_unlock_irqrestore(&pmu_lock, flags);
520}
521
522static int
523armv6pmu_get_event_idx(struct cpu_hw_events *cpuc,
524 struct hw_perf_event *event)
525{
526 /* Always place a cycle counter into the cycle counter. */
527 if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) {
528 if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask))
529 return -EAGAIN;
530
531 return ARMV6_CYCLE_COUNTER;
532 } else {
533 /*
534 * For anything other than a cycle counter, try and use
535 * counter0 and counter1.
536 */
537 if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask))
538 return ARMV6_COUNTER1;
539
540 if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask))
541 return ARMV6_COUNTER0;
542
543 /* The counters are all in use. */
544 return -EAGAIN;
545 }
546}
547
548static void
549armv6pmu_disable_event(struct hw_perf_event *hwc,
550 int idx)
551{
552 unsigned long val, mask, evt, flags;
553
554 if (ARMV6_CYCLE_COUNTER == idx) {
555 mask = ARMV6_PMCR_CCOUNT_IEN;
556 evt = 0;
557 } else if (ARMV6_COUNTER0 == idx) {
558 mask = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK;
559 evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT;
560 } else if (ARMV6_COUNTER1 == idx) {
561 mask = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK;
562 evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT;
563 } else {
564 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
565 return;
566 }
567
568 /*
569 * Mask out the current event and set the counter to count the number
570 * of ETM bus signal assertion cycles. The external reporting should
571 * be disabled and so this should never increment.
572 */
573 spin_lock_irqsave(&pmu_lock, flags);
574 val = armv6_pmcr_read();
575 val &= ~mask;
576 val |= evt;
577 armv6_pmcr_write(val);
578 spin_unlock_irqrestore(&pmu_lock, flags);
579}
580
581static void
582armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc,
583 int idx)
584{
585 unsigned long val, mask, flags, evt = 0;
586
587 if (ARMV6_CYCLE_COUNTER == idx) {
588 mask = ARMV6_PMCR_CCOUNT_IEN;
589 } else if (ARMV6_COUNTER0 == idx) {
590 mask = ARMV6_PMCR_COUNT0_IEN;
591 } else if (ARMV6_COUNTER1 == idx) {
592 mask = ARMV6_PMCR_COUNT1_IEN;
593 } else {
594 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
595 return;
596 }
597
598 /*
599 * Unlike UP ARMv6, we don't have a way of stopping the counters. We
600 * simply disable the interrupt reporting.
601 */
602 spin_lock_irqsave(&pmu_lock, flags);
603 val = armv6_pmcr_read();
604 val &= ~mask;
605 val |= evt;
606 armv6_pmcr_write(val);
607 spin_unlock_irqrestore(&pmu_lock, flags);
608}
609
610static const struct arm_pmu armv6pmu = {
611 .id = ARM_PERF_PMU_ID_V6,
612 .name = "v6",
613 .handle_irq = armv6pmu_handle_irq,
614 .enable = armv6pmu_enable_event,
615 .disable = armv6pmu_disable_event,
616 .read_counter = armv6pmu_read_counter,
617 .write_counter = armv6pmu_write_counter,
618 .get_event_idx = armv6pmu_get_event_idx,
619 .start = armv6pmu_start,
620 .stop = armv6pmu_stop,
621 .cache_map = &armv6_perf_cache_map,
622 .event_map = &armv6_perf_map,
623 .raw_event_mask = 0xFF,
624 .num_events = 3,
625 .max_period = (1LLU << 32) - 1,
626};
627
628const struct arm_pmu *__init armv6pmu_init(void)
629{
630 return &armv6pmu;
631}
632
633/*
634 * ARMv6mpcore is almost identical to single core ARMv6 with the exception
635 * that some of the events have different enumerations and that there is no
636 * *hack* to stop the programmable counters. To stop the counters we simply
637 * disable the interrupt reporting and update the event. When unthrottling we
638 * reset the period and enable the interrupt reporting.
639 */
640static const struct arm_pmu armv6mpcore_pmu = {
641 .id = ARM_PERF_PMU_ID_V6MP,
642 .name = "v6mpcore",
643 .handle_irq = armv6pmu_handle_irq,
644 .enable = armv6pmu_enable_event,
645 .disable = armv6mpcore_pmu_disable_event,
646 .read_counter = armv6pmu_read_counter,
647 .write_counter = armv6pmu_write_counter,
648 .get_event_idx = armv6pmu_get_event_idx,
649 .start = armv6pmu_start,
650 .stop = armv6pmu_stop,
651 .cache_map = &armv6mpcore_perf_cache_map,
652 .event_map = &armv6mpcore_perf_map,
653 .raw_event_mask = 0xFF,
654 .num_events = 3,
655 .max_period = (1LLU << 32) - 1,
656};
657
658const struct arm_pmu *__init armv6mpcore_pmu_init(void)
659{
660 return &armv6mpcore_pmu;
661}
662#else
663const struct arm_pmu *__init armv6pmu_init(void)
664{
665 return NULL;
666}
667
668const struct arm_pmu *__init armv6mpcore_pmu_init(void)
669{
670 return NULL;
671}
672#endif /* CONFIG_CPU_V6 */
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
new file mode 100644
index 000000000000..4d0423969df9
--- /dev/null
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -0,0 +1,906 @@
1/*
2 * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code.
3 *
4 * ARMv7 support: Jean Pihet <jpihet@mvista.com>
5 * 2010 (c) MontaVista Software, LLC.
6 *
7 * Copied from ARMv6 code, with the low level code inspired
8 * by the ARMv7 Oprofile code.
9 *
10 * Cortex-A8 has up to 4 configurable performance counters and
11 * a single cycle counter.
12 * Cortex-A9 has up to 31 configurable performance counters and
13 * a single cycle counter.
14 *
15 * All counters can be enabled/disabled and IRQ masked separately. The cycle
16 * counter and all 4 performance counters together can be reset separately.
17 */
18
19#ifdef CONFIG_CPU_V7
20/* Common ARMv7 event types */
21enum armv7_perf_types {
22 ARMV7_PERFCTR_PMNC_SW_INCR = 0x00,
23 ARMV7_PERFCTR_IFETCH_MISS = 0x01,
24 ARMV7_PERFCTR_ITLB_MISS = 0x02,
25 ARMV7_PERFCTR_DCACHE_REFILL = 0x03,
26 ARMV7_PERFCTR_DCACHE_ACCESS = 0x04,
27 ARMV7_PERFCTR_DTLB_REFILL = 0x05,
28 ARMV7_PERFCTR_DREAD = 0x06,
29 ARMV7_PERFCTR_DWRITE = 0x07,
30
31 ARMV7_PERFCTR_EXC_TAKEN = 0x09,
32 ARMV7_PERFCTR_EXC_EXECUTED = 0x0A,
33 ARMV7_PERFCTR_CID_WRITE = 0x0B,
34 /* ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS.
35 * It counts:
36 * - all branch instructions,
37 * - instructions that explicitly write the PC,
38 * - exception generating instructions.
39 */
40 ARMV7_PERFCTR_PC_WRITE = 0x0C,
41 ARMV7_PERFCTR_PC_IMM_BRANCH = 0x0D,
42 ARMV7_PERFCTR_UNALIGNED_ACCESS = 0x0F,
43 ARMV7_PERFCTR_PC_BRANCH_MIS_PRED = 0x10,
44 ARMV7_PERFCTR_CLOCK_CYCLES = 0x11,
45
46 ARMV7_PERFCTR_PC_BRANCH_MIS_USED = 0x12,
47
48 ARMV7_PERFCTR_CPU_CYCLES = 0xFF
49};
50
51/* ARMv7 Cortex-A8 specific event types */
52enum armv7_a8_perf_types {
53 ARMV7_PERFCTR_INSTR_EXECUTED = 0x08,
54
55 ARMV7_PERFCTR_PC_PROC_RETURN = 0x0E,
56
57 ARMV7_PERFCTR_WRITE_BUFFER_FULL = 0x40,
58 ARMV7_PERFCTR_L2_STORE_MERGED = 0x41,
59 ARMV7_PERFCTR_L2_STORE_BUFF = 0x42,
60 ARMV7_PERFCTR_L2_ACCESS = 0x43,
61 ARMV7_PERFCTR_L2_CACH_MISS = 0x44,
62 ARMV7_PERFCTR_AXI_READ_CYCLES = 0x45,
63 ARMV7_PERFCTR_AXI_WRITE_CYCLES = 0x46,
64 ARMV7_PERFCTR_MEMORY_REPLAY = 0x47,
65 ARMV7_PERFCTR_UNALIGNED_ACCESS_REPLAY = 0x48,
66 ARMV7_PERFCTR_L1_DATA_MISS = 0x49,
67 ARMV7_PERFCTR_L1_INST_MISS = 0x4A,
68 ARMV7_PERFCTR_L1_DATA_COLORING = 0x4B,
69 ARMV7_PERFCTR_L1_NEON_DATA = 0x4C,
70 ARMV7_PERFCTR_L1_NEON_CACH_DATA = 0x4D,
71 ARMV7_PERFCTR_L2_NEON = 0x4E,
72 ARMV7_PERFCTR_L2_NEON_HIT = 0x4F,
73 ARMV7_PERFCTR_L1_INST = 0x50,
74 ARMV7_PERFCTR_PC_RETURN_MIS_PRED = 0x51,
75 ARMV7_PERFCTR_PC_BRANCH_FAILED = 0x52,
76 ARMV7_PERFCTR_PC_BRANCH_TAKEN = 0x53,
77 ARMV7_PERFCTR_PC_BRANCH_EXECUTED = 0x54,
78 ARMV7_PERFCTR_OP_EXECUTED = 0x55,
79 ARMV7_PERFCTR_CYCLES_INST_STALL = 0x56,
80 ARMV7_PERFCTR_CYCLES_INST = 0x57,
81 ARMV7_PERFCTR_CYCLES_NEON_DATA_STALL = 0x58,
82 ARMV7_PERFCTR_CYCLES_NEON_INST_STALL = 0x59,
83 ARMV7_PERFCTR_NEON_CYCLES = 0x5A,
84
85 ARMV7_PERFCTR_PMU0_EVENTS = 0x70,
86 ARMV7_PERFCTR_PMU1_EVENTS = 0x71,
87 ARMV7_PERFCTR_PMU_EVENTS = 0x72,
88};
89
90/* ARMv7 Cortex-A9 specific event types */
91enum armv7_a9_perf_types {
92 ARMV7_PERFCTR_JAVA_HW_BYTECODE_EXEC = 0x40,
93 ARMV7_PERFCTR_JAVA_SW_BYTECODE_EXEC = 0x41,
94 ARMV7_PERFCTR_JAZELLE_BRANCH_EXEC = 0x42,
95
96 ARMV7_PERFCTR_COHERENT_LINE_MISS = 0x50,
97 ARMV7_PERFCTR_COHERENT_LINE_HIT = 0x51,
98
99 ARMV7_PERFCTR_ICACHE_DEP_STALL_CYCLES = 0x60,
100 ARMV7_PERFCTR_DCACHE_DEP_STALL_CYCLES = 0x61,
101 ARMV7_PERFCTR_TLB_MISS_DEP_STALL_CYCLES = 0x62,
102 ARMV7_PERFCTR_STREX_EXECUTED_PASSED = 0x63,
103 ARMV7_PERFCTR_STREX_EXECUTED_FAILED = 0x64,
104 ARMV7_PERFCTR_DATA_EVICTION = 0x65,
105 ARMV7_PERFCTR_ISSUE_STAGE_NO_INST = 0x66,
106 ARMV7_PERFCTR_ISSUE_STAGE_EMPTY = 0x67,
107 ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE = 0x68,
108
109 ARMV7_PERFCTR_PREDICTABLE_FUNCT_RETURNS = 0x6E,
110
111 ARMV7_PERFCTR_MAIN_UNIT_EXECUTED_INST = 0x70,
112 ARMV7_PERFCTR_SECOND_UNIT_EXECUTED_INST = 0x71,
113 ARMV7_PERFCTR_LD_ST_UNIT_EXECUTED_INST = 0x72,
114 ARMV7_PERFCTR_FP_EXECUTED_INST = 0x73,
115 ARMV7_PERFCTR_NEON_EXECUTED_INST = 0x74,
116
117 ARMV7_PERFCTR_PLD_FULL_DEP_STALL_CYCLES = 0x80,
118 ARMV7_PERFCTR_DATA_WR_DEP_STALL_CYCLES = 0x81,
119 ARMV7_PERFCTR_ITLB_MISS_DEP_STALL_CYCLES = 0x82,
120 ARMV7_PERFCTR_DTLB_MISS_DEP_STALL_CYCLES = 0x83,
121 ARMV7_PERFCTR_MICRO_ITLB_MISS_DEP_STALL_CYCLES = 0x84,
122 ARMV7_PERFCTR_MICRO_DTLB_MISS_DEP_STALL_CYCLES = 0x85,
123 ARMV7_PERFCTR_DMB_DEP_STALL_CYCLES = 0x86,
124
125 ARMV7_PERFCTR_INTGR_CLK_ENABLED_CYCLES = 0x8A,
126 ARMV7_PERFCTR_DATA_ENGINE_CLK_EN_CYCLES = 0x8B,
127
128 ARMV7_PERFCTR_ISB_INST = 0x90,
129 ARMV7_PERFCTR_DSB_INST = 0x91,
130 ARMV7_PERFCTR_DMB_INST = 0x92,
131 ARMV7_PERFCTR_EXT_INTERRUPTS = 0x93,
132
133 ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_COMPLETED = 0xA0,
134 ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_SKIPPED = 0xA1,
135 ARMV7_PERFCTR_PLE_FIFO_FLUSH = 0xA2,
136 ARMV7_PERFCTR_PLE_RQST_COMPLETED = 0xA3,
137 ARMV7_PERFCTR_PLE_FIFO_OVERFLOW = 0xA4,
138 ARMV7_PERFCTR_PLE_RQST_PROG = 0xA5
139};
140
141/*
142 * Cortex-A8 HW events mapping
143 *
144 * The hardware events that we support. We do support cache operations but
145 * we have harvard caches and no way to combine instruction and data
146 * accesses/misses in hardware.
147 */
148static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = {
149 [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
150 [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
151 [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
152 [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
153 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
154 [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
155 [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES,
156};
157
158static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
159 [PERF_COUNT_HW_CACHE_OP_MAX]
160 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
161 [C(L1D)] = {
162 /*
163 * The performance counters don't differentiate between read
164 * and write accesses/misses so this isn't strictly correct,
165 * but it's the best we can do. Writes and reads get
166 * combined.
167 */
168 [C(OP_READ)] = {
169 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS,
170 [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL,
171 },
172 [C(OP_WRITE)] = {
173 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS,
174 [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL,
175 },
176 [C(OP_PREFETCH)] = {
177 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
178 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
179 },
180 },
181 [C(L1I)] = {
182 [C(OP_READ)] = {
183 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST,
184 [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS,
185 },
186 [C(OP_WRITE)] = {
187 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST,
188 [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS,
189 },
190 [C(OP_PREFETCH)] = {
191 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
192 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
193 },
194 },
195 [C(LL)] = {
196 [C(OP_READ)] = {
197 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS,
198 [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS,
199 },
200 [C(OP_WRITE)] = {
201 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS,
202 [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS,
203 },
204 [C(OP_PREFETCH)] = {
205 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
206 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
207 },
208 },
209 [C(DTLB)] = {
210 /*
211 * Only ITLB misses and DTLB refills are supported.
212 * If users want the DTLB refills misses a raw counter
213 * must be used.
214 */
215 [C(OP_READ)] = {
216 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
217 [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
218 },
219 [C(OP_WRITE)] = {
220 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
221 [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
222 },
223 [C(OP_PREFETCH)] = {
224 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
225 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
226 },
227 },
228 [C(ITLB)] = {
229 [C(OP_READ)] = {
230 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
231 [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS,
232 },
233 [C(OP_WRITE)] = {
234 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
235 [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS,
236 },
237 [C(OP_PREFETCH)] = {
238 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
239 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
240 },
241 },
242 [C(BPU)] = {
243 [C(OP_READ)] = {
244 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE,
245 [C(RESULT_MISS)]
246 = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
247 },
248 [C(OP_WRITE)] = {
249 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE,
250 [C(RESULT_MISS)]
251 = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
252 },
253 [C(OP_PREFETCH)] = {
254 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
255 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
256 },
257 },
258};
259
260/*
261 * Cortex-A9 HW events mapping
262 */
263static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = {
264 [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
265 [PERF_COUNT_HW_INSTRUCTIONS] =
266 ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE,
267 [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_COHERENT_LINE_HIT,
268 [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_COHERENT_LINE_MISS,
269 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
270 [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
271 [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES,
272};
273
274static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
275 [PERF_COUNT_HW_CACHE_OP_MAX]
276 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
277 [C(L1D)] = {
278 /*
279 * The performance counters don't differentiate between read
280 * and write accesses/misses so this isn't strictly correct,
281 * but it's the best we can do. Writes and reads get
282 * combined.
283 */
284 [C(OP_READ)] = {
285 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS,
286 [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL,
287 },
288 [C(OP_WRITE)] = {
289 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS,
290 [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL,
291 },
292 [C(OP_PREFETCH)] = {
293 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
294 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
295 },
296 },
297 [C(L1I)] = {
298 [C(OP_READ)] = {
299 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
300 [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS,
301 },
302 [C(OP_WRITE)] = {
303 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
304 [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS,
305 },
306 [C(OP_PREFETCH)] = {
307 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
308 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
309 },
310 },
311 [C(LL)] = {
312 [C(OP_READ)] = {
313 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
314 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
315 },
316 [C(OP_WRITE)] = {
317 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
318 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
319 },
320 [C(OP_PREFETCH)] = {
321 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
322 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
323 },
324 },
325 [C(DTLB)] = {
326 /*
327 * Only ITLB misses and DTLB refills are supported.
328 * If users want the DTLB refills misses a raw counter
329 * must be used.
330 */
331 [C(OP_READ)] = {
332 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
333 [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
334 },
335 [C(OP_WRITE)] = {
336 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
337 [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
338 },
339 [C(OP_PREFETCH)] = {
340 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
341 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
342 },
343 },
344 [C(ITLB)] = {
345 [C(OP_READ)] = {
346 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
347 [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS,
348 },
349 [C(OP_WRITE)] = {
350 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
351 [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS,
352 },
353 [C(OP_PREFETCH)] = {
354 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
355 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
356 },
357 },
358 [C(BPU)] = {
359 [C(OP_READ)] = {
360 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE,
361 [C(RESULT_MISS)]
362 = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
363 },
364 [C(OP_WRITE)] = {
365 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE,
366 [C(RESULT_MISS)]
367 = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
368 },
369 [C(OP_PREFETCH)] = {
370 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
371 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
372 },
373 },
374};
375
376/*
377 * Perf Events counters
378 */
379enum armv7_counters {
380 ARMV7_CYCLE_COUNTER = 1, /* Cycle counter */
381 ARMV7_COUNTER0 = 2, /* First event counter */
382};
383
384/*
385 * The cycle counter is ARMV7_CYCLE_COUNTER.
386 * The first event counter is ARMV7_COUNTER0.
387 * The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1).
388 */
389#define ARMV7_COUNTER_LAST (ARMV7_COUNTER0 + armpmu->num_events - 1)
390
391/*
392 * ARMv7 low level PMNC access
393 */
394
395/*
396 * Per-CPU PMNC: config reg
397 */
398#define ARMV7_PMNC_E (1 << 0) /* Enable all counters */
399#define ARMV7_PMNC_P (1 << 1) /* Reset all counters */
400#define ARMV7_PMNC_C (1 << 2) /* Cycle counter reset */
401#define ARMV7_PMNC_D (1 << 3) /* CCNT counts every 64th cpu cycle */
402#define ARMV7_PMNC_X (1 << 4) /* Export to ETM */
403#define ARMV7_PMNC_DP (1 << 5) /* Disable CCNT if non-invasive debug*/
404#define ARMV7_PMNC_N_SHIFT 11 /* Number of counters supported */
405#define ARMV7_PMNC_N_MASK 0x1f
406#define ARMV7_PMNC_MASK 0x3f /* Mask for writable bits */
407
408/*
409 * Available counters
410 */
411#define ARMV7_CNT0 0 /* First event counter */
412#define ARMV7_CCNT 31 /* Cycle counter */
413
414/* Perf Event to low level counters mapping */
415#define ARMV7_EVENT_CNT_TO_CNTx (ARMV7_COUNTER0 - ARMV7_CNT0)
416
417/*
418 * CNTENS: counters enable reg
419 */
420#define ARMV7_CNTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
421#define ARMV7_CNTENS_C (1 << ARMV7_CCNT)
422
423/*
424 * CNTENC: counters disable reg
425 */
426#define ARMV7_CNTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
427#define ARMV7_CNTENC_C (1 << ARMV7_CCNT)
428
429/*
430 * INTENS: counters overflow interrupt enable reg
431 */
432#define ARMV7_INTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
433#define ARMV7_INTENS_C (1 << ARMV7_CCNT)
434
435/*
436 * INTENC: counters overflow interrupt disable reg
437 */
438#define ARMV7_INTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
439#define ARMV7_INTENC_C (1 << ARMV7_CCNT)
440
441/*
442 * EVTSEL: Event selection reg
443 */
444#define ARMV7_EVTSEL_MASK 0xff /* Mask for writable bits */
445
446/*
447 * SELECT: Counter selection reg
448 */
449#define ARMV7_SELECT_MASK 0x1f /* Mask for writable bits */
450
451/*
452 * FLAG: counters overflow flag status reg
453 */
454#define ARMV7_FLAG_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
455#define ARMV7_FLAG_C (1 << ARMV7_CCNT)
456#define ARMV7_FLAG_MASK 0xffffffff /* Mask for writable bits */
457#define ARMV7_OVERFLOWED_MASK ARMV7_FLAG_MASK
458
459static inline unsigned long armv7_pmnc_read(void)
460{
461 u32 val;
462 asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val));
463 return val;
464}
465
466static inline void armv7_pmnc_write(unsigned long val)
467{
468 val &= ARMV7_PMNC_MASK;
469 asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val));
470}
471
472static inline int armv7_pmnc_has_overflowed(unsigned long pmnc)
473{
474 return pmnc & ARMV7_OVERFLOWED_MASK;
475}
476
477static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc,
478 enum armv7_counters counter)
479{
480 int ret = 0;
481
482 if (counter == ARMV7_CYCLE_COUNTER)
483 ret = pmnc & ARMV7_FLAG_C;
484 else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST))
485 ret = pmnc & ARMV7_FLAG_P(counter);
486 else
487 pr_err("CPU%u checking wrong counter %d overflow status\n",
488 smp_processor_id(), counter);
489
490 return ret;
491}
492
493static inline int armv7_pmnc_select_counter(unsigned int idx)
494{
495 u32 val;
496
497 if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) {
498 pr_err("CPU%u selecting wrong PMNC counter"
499 " %d\n", smp_processor_id(), idx);
500 return -1;
501 }
502
503 val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK;
504 asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val));
505
506 return idx;
507}
508
509static inline u32 armv7pmu_read_counter(int idx)
510{
511 unsigned long value = 0;
512
513 if (idx == ARMV7_CYCLE_COUNTER)
514 asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value));
515 else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
516 if (armv7_pmnc_select_counter(idx) == idx)
517 asm volatile("mrc p15, 0, %0, c9, c13, 2"
518 : "=r" (value));
519 } else
520 pr_err("CPU%u reading wrong counter %d\n",
521 smp_processor_id(), idx);
522
523 return value;
524}
525
526static inline void armv7pmu_write_counter(int idx, u32 value)
527{
528 if (idx == ARMV7_CYCLE_COUNTER)
529 asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value));
530 else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
531 if (armv7_pmnc_select_counter(idx) == idx)
532 asm volatile("mcr p15, 0, %0, c9, c13, 2"
533 : : "r" (value));
534 } else
535 pr_err("CPU%u writing wrong counter %d\n",
536 smp_processor_id(), idx);
537}
538
539static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val)
540{
541 if (armv7_pmnc_select_counter(idx) == idx) {
542 val &= ARMV7_EVTSEL_MASK;
543 asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val));
544 }
545}
546
547static inline u32 armv7_pmnc_enable_counter(unsigned int idx)
548{
549 u32 val;
550
551 if ((idx != ARMV7_CYCLE_COUNTER) &&
552 ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
553 pr_err("CPU%u enabling wrong PMNC counter"
554 " %d\n", smp_processor_id(), idx);
555 return -1;
556 }
557
558 if (idx == ARMV7_CYCLE_COUNTER)
559 val = ARMV7_CNTENS_C;
560 else
561 val = ARMV7_CNTENS_P(idx);
562
563 asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val));
564
565 return idx;
566}
567
568static inline u32 armv7_pmnc_disable_counter(unsigned int idx)
569{
570 u32 val;
571
572
573 if ((idx != ARMV7_CYCLE_COUNTER) &&
574 ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
575 pr_err("CPU%u disabling wrong PMNC counter"
576 " %d\n", smp_processor_id(), idx);
577 return -1;
578 }
579
580 if (idx == ARMV7_CYCLE_COUNTER)
581 val = ARMV7_CNTENC_C;
582 else
583 val = ARMV7_CNTENC_P(idx);
584
585 asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val));
586
587 return idx;
588}
589
590static inline u32 armv7_pmnc_enable_intens(unsigned int idx)
591{
592 u32 val;
593
594 if ((idx != ARMV7_CYCLE_COUNTER) &&
595 ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
596 pr_err("CPU%u enabling wrong PMNC counter"
597 " interrupt enable %d\n", smp_processor_id(), idx);
598 return -1;
599 }
600
601 if (idx == ARMV7_CYCLE_COUNTER)
602 val = ARMV7_INTENS_C;
603 else
604 val = ARMV7_INTENS_P(idx);
605
606 asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val));
607
608 return idx;
609}
610
611static inline u32 armv7_pmnc_disable_intens(unsigned int idx)
612{
613 u32 val;
614
615 if ((idx != ARMV7_CYCLE_COUNTER) &&
616 ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
617 pr_err("CPU%u disabling wrong PMNC counter"
618 " interrupt enable %d\n", smp_processor_id(), idx);
619 return -1;
620 }
621
622 if (idx == ARMV7_CYCLE_COUNTER)
623 val = ARMV7_INTENC_C;
624 else
625 val = ARMV7_INTENC_P(idx);
626
627 asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val));
628
629 return idx;
630}
631
632static inline u32 armv7_pmnc_getreset_flags(void)
633{
634 u32 val;
635
636 /* Read */
637 asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
638
639 /* Write to clear flags */
640 val &= ARMV7_FLAG_MASK;
641 asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val));
642
643 return val;
644}
645
646#ifdef DEBUG
647static void armv7_pmnc_dump_regs(void)
648{
649 u32 val;
650 unsigned int cnt;
651
652 printk(KERN_INFO "PMNC registers dump:\n");
653
654 asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val));
655 printk(KERN_INFO "PMNC =0x%08x\n", val);
656
657 asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val));
658 printk(KERN_INFO "CNTENS=0x%08x\n", val);
659
660 asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val));
661 printk(KERN_INFO "INTENS=0x%08x\n", val);
662
663 asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
664 printk(KERN_INFO "FLAGS =0x%08x\n", val);
665
666 asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val));
667 printk(KERN_INFO "SELECT=0x%08x\n", val);
668
669 asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));
670 printk(KERN_INFO "CCNT =0x%08x\n", val);
671
672 for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) {
673 armv7_pmnc_select_counter(cnt);
674 asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val));
675 printk(KERN_INFO "CNT[%d] count =0x%08x\n",
676 cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
677 asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val));
678 printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n",
679 cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
680 }
681}
682#endif
683
684void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx)
685{
686 unsigned long flags;
687
688 /*
689 * Enable counter and interrupt, and set the counter to count
690 * the event that we're interested in.
691 */
692 spin_lock_irqsave(&pmu_lock, flags);
693
694 /*
695 * Disable counter
696 */
697 armv7_pmnc_disable_counter(idx);
698
699 /*
700 * Set event (if destined for PMNx counters)
701 * We don't need to set the event if it's a cycle count
702 */
703 if (idx != ARMV7_CYCLE_COUNTER)
704 armv7_pmnc_write_evtsel(idx, hwc->config_base);
705
706 /*
707 * Enable interrupt for this counter
708 */
709 armv7_pmnc_enable_intens(idx);
710
711 /*
712 * Enable counter
713 */
714 armv7_pmnc_enable_counter(idx);
715
716 spin_unlock_irqrestore(&pmu_lock, flags);
717}
718
719static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx)
720{
721 unsigned long flags;
722
723 /*
724 * Disable counter and interrupt
725 */
726 spin_lock_irqsave(&pmu_lock, flags);
727
728 /*
729 * Disable counter
730 */
731 armv7_pmnc_disable_counter(idx);
732
733 /*
734 * Disable interrupt for this counter
735 */
736 armv7_pmnc_disable_intens(idx);
737
738 spin_unlock_irqrestore(&pmu_lock, flags);
739}
740
741static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
742{
743 unsigned long pmnc;
744 struct perf_sample_data data;
745 struct cpu_hw_events *cpuc;
746 struct pt_regs *regs;
747 int idx;
748
749 /*
750 * Get and reset the IRQ flags
751 */
752 pmnc = armv7_pmnc_getreset_flags();
753
754 /*
755 * Did an overflow occur?
756 */
757 if (!armv7_pmnc_has_overflowed(pmnc))
758 return IRQ_NONE;
759
760 /*
761 * Handle the counter(s) overflow(s)
762 */
763 regs = get_irq_regs();
764
765 perf_sample_data_init(&data, 0);
766
767 cpuc = &__get_cpu_var(cpu_hw_events);
768 for (idx = 0; idx <= armpmu->num_events; ++idx) {
769 struct perf_event *event = cpuc->events[idx];
770 struct hw_perf_event *hwc;
771
772 if (!test_bit(idx, cpuc->active_mask))
773 continue;
774
775 /*
776 * We have a single interrupt for all counters. Check that
777 * each counter has overflowed before we process it.
778 */
779 if (!armv7_pmnc_counter_has_overflowed(pmnc, idx))
780 continue;
781
782 hwc = &event->hw;
783 armpmu_event_update(event, hwc, idx);
784 data.period = event->hw.last_period;
785 if (!armpmu_event_set_period(event, hwc, idx))
786 continue;
787
788 if (perf_event_overflow(event, 0, &data, regs))
789 armpmu->disable(hwc, idx);
790 }
791
792 /*
793 * Handle the pending perf events.
794 *
795 * Note: this call *must* be run with interrupts disabled. For
796 * platforms that can have the PMU interrupts raised as an NMI, this
797 * will not work.
798 */
799 irq_work_run();
800
801 return IRQ_HANDLED;
802}
803
804static void armv7pmu_start(void)
805{
806 unsigned long flags;
807
808 spin_lock_irqsave(&pmu_lock, flags);
809 /* Enable all counters */
810 armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E);
811 spin_unlock_irqrestore(&pmu_lock, flags);
812}
813
814static void armv7pmu_stop(void)
815{
816 unsigned long flags;
817
818 spin_lock_irqsave(&pmu_lock, flags);
819 /* Disable all counters */
820 armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E);
821 spin_unlock_irqrestore(&pmu_lock, flags);
822}
823
824static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc,
825 struct hw_perf_event *event)
826{
827 int idx;
828
829 /* Always place a cycle counter into the cycle counter. */
830 if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) {
831 if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask))
832 return -EAGAIN;
833
834 return ARMV7_CYCLE_COUNTER;
835 } else {
836 /*
837 * For anything other than a cycle counter, try and use
838 * the events counters
839 */
840 for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) {
841 if (!test_and_set_bit(idx, cpuc->used_mask))
842 return idx;
843 }
844
845 /* The counters are all in use. */
846 return -EAGAIN;
847 }
848}
849
850static struct arm_pmu armv7pmu = {
851 .handle_irq = armv7pmu_handle_irq,
852 .enable = armv7pmu_enable_event,
853 .disable = armv7pmu_disable_event,
854 .read_counter = armv7pmu_read_counter,
855 .write_counter = armv7pmu_write_counter,
856 .get_event_idx = armv7pmu_get_event_idx,
857 .start = armv7pmu_start,
858 .stop = armv7pmu_stop,
859 .raw_event_mask = 0xFF,
860 .max_period = (1LLU << 32) - 1,
861};
862
863static u32 __init armv7_reset_read_pmnc(void)
864{
865 u32 nb_cnt;
866
867 /* Initialize & Reset PMNC: C and P bits */
868 armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C);
869
870 /* Read the nb of CNTx counters supported from PMNC */
871 nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK;
872
873 /* Add the CPU cycles counter and return */
874 return nb_cnt + 1;
875}
876
877const struct arm_pmu *__init armv7_a8_pmu_init(void)
878{
879 armv7pmu.id = ARM_PERF_PMU_ID_CA8;
880 armv7pmu.name = "ARMv7 Cortex-A8";
881 armv7pmu.cache_map = &armv7_a8_perf_cache_map;
882 armv7pmu.event_map = &armv7_a8_perf_map;
883 armv7pmu.num_events = armv7_reset_read_pmnc();
884 return &armv7pmu;
885}
886
887const struct arm_pmu *__init armv7_a9_pmu_init(void)
888{
889 armv7pmu.id = ARM_PERF_PMU_ID_CA9;
890 armv7pmu.name = "ARMv7 Cortex-A9";
891 armv7pmu.cache_map = &armv7_a9_perf_cache_map;
892 armv7pmu.event_map = &armv7_a9_perf_map;
893 armv7pmu.num_events = armv7_reset_read_pmnc();
894 return &armv7pmu;
895}
896#else
897const struct arm_pmu *__init armv7_a8_pmu_init(void)
898{
899 return NULL;
900}
901
902const struct arm_pmu *__init armv7_a9_pmu_init(void)
903{
904 return NULL;
905}
906#endif /* CONFIG_CPU_V7 */
diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c
new file mode 100644
index 000000000000..4e9592789d40
--- /dev/null
+++ b/arch/arm/kernel/perf_event_xscale.c
@@ -0,0 +1,807 @@
1/*
2 * ARMv5 [xscale] Performance counter handling code.
3 *
4 * Copyright (C) 2010, ARM Ltd., Will Deacon <will.deacon@arm.com>
5 *
6 * Based on the previous xscale OProfile code.
7 *
8 * There are two variants of the xscale PMU that we support:
9 * - xscale1pmu: 2 event counters and a cycle counter
10 * - xscale2pmu: 4 event counters and a cycle counter
11 * The two variants share event definitions, but have different
12 * PMU structures.
13 */
14
15#ifdef CONFIG_CPU_XSCALE
16enum xscale_perf_types {
17 XSCALE_PERFCTR_ICACHE_MISS = 0x00,
18 XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01,
19 XSCALE_PERFCTR_DATA_STALL = 0x02,
20 XSCALE_PERFCTR_ITLB_MISS = 0x03,
21 XSCALE_PERFCTR_DTLB_MISS = 0x04,
22 XSCALE_PERFCTR_BRANCH = 0x05,
23 XSCALE_PERFCTR_BRANCH_MISS = 0x06,
24 XSCALE_PERFCTR_INSTRUCTION = 0x07,
25 XSCALE_PERFCTR_DCACHE_FULL_STALL = 0x08,
26 XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09,
27 XSCALE_PERFCTR_DCACHE_ACCESS = 0x0A,
28 XSCALE_PERFCTR_DCACHE_MISS = 0x0B,
29 XSCALE_PERFCTR_DCACHE_WRITE_BACK = 0x0C,
30 XSCALE_PERFCTR_PC_CHANGED = 0x0D,
31 XSCALE_PERFCTR_BCU_REQUEST = 0x10,
32 XSCALE_PERFCTR_BCU_FULL = 0x11,
33 XSCALE_PERFCTR_BCU_DRAIN = 0x12,
34 XSCALE_PERFCTR_BCU_ECC_NO_ELOG = 0x14,
35 XSCALE_PERFCTR_BCU_1_BIT_ERR = 0x15,
36 XSCALE_PERFCTR_RMW = 0x16,
37 /* XSCALE_PERFCTR_CCNT is not hardware defined */
38 XSCALE_PERFCTR_CCNT = 0xFE,
39 XSCALE_PERFCTR_UNUSED = 0xFF,
40};
41
42enum xscale_counters {
43 XSCALE_CYCLE_COUNTER = 1,
44 XSCALE_COUNTER0,
45 XSCALE_COUNTER1,
46 XSCALE_COUNTER2,
47 XSCALE_COUNTER3,
48};
49
50static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = {
51 [PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT,
52 [PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION,
53 [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
54 [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
55 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH,
56 [PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS,
57 [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
58};
59
60static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
61 [PERF_COUNT_HW_CACHE_OP_MAX]
62 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
63 [C(L1D)] = {
64 [C(OP_READ)] = {
65 [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
66 [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
67 },
68 [C(OP_WRITE)] = {
69 [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
70 [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
71 },
72 [C(OP_PREFETCH)] = {
73 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
74 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
75 },
76 },
77 [C(L1I)] = {
78 [C(OP_READ)] = {
79 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
80 [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS,
81 },
82 [C(OP_WRITE)] = {
83 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
84 [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS,
85 },
86 [C(OP_PREFETCH)] = {
87 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
88 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
89 },
90 },
91 [C(LL)] = {
92 [C(OP_READ)] = {
93 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
94 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
95 },
96 [C(OP_WRITE)] = {
97 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
98 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
99 },
100 [C(OP_PREFETCH)] = {
101 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
102 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
103 },
104 },
105 [C(DTLB)] = {
106 [C(OP_READ)] = {
107 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
108 [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
109 },
110 [C(OP_WRITE)] = {
111 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
112 [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
113 },
114 [C(OP_PREFETCH)] = {
115 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
116 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
117 },
118 },
119 [C(ITLB)] = {
120 [C(OP_READ)] = {
121 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
122 [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
123 },
124 [C(OP_WRITE)] = {
125 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
126 [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
127 },
128 [C(OP_PREFETCH)] = {
129 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
130 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
131 },
132 },
133 [C(BPU)] = {
134 [C(OP_READ)] = {
135 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
136 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
137 },
138 [C(OP_WRITE)] = {
139 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
140 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
141 },
142 [C(OP_PREFETCH)] = {
143 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
144 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
145 },
146 },
147};
148
149#define XSCALE_PMU_ENABLE 0x001
150#define XSCALE_PMN_RESET 0x002
151#define XSCALE_CCNT_RESET 0x004
152#define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET)
153#define XSCALE_PMU_CNT64 0x008
154
155#define XSCALE1_OVERFLOWED_MASK 0x700
156#define XSCALE1_CCOUNT_OVERFLOW 0x400
157#define XSCALE1_COUNT0_OVERFLOW 0x100
158#define XSCALE1_COUNT1_OVERFLOW 0x200
159#define XSCALE1_CCOUNT_INT_EN 0x040
160#define XSCALE1_COUNT0_INT_EN 0x010
161#define XSCALE1_COUNT1_INT_EN 0x020
162#define XSCALE1_COUNT0_EVT_SHFT 12
163#define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT)
164#define XSCALE1_COUNT1_EVT_SHFT 20
165#define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT)
166
167static inline u32
168xscale1pmu_read_pmnc(void)
169{
170 u32 val;
171 asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val));
172 return val;
173}
174
175static inline void
176xscale1pmu_write_pmnc(u32 val)
177{
178 /* upper 4bits and 7, 11 are write-as-0 */
179 val &= 0xffff77f;
180 asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val));
181}
182
183static inline int
184xscale1_pmnc_counter_has_overflowed(unsigned long pmnc,
185 enum xscale_counters counter)
186{
187 int ret = 0;
188
189 switch (counter) {
190 case XSCALE_CYCLE_COUNTER:
191 ret = pmnc & XSCALE1_CCOUNT_OVERFLOW;
192 break;
193 case XSCALE_COUNTER0:
194 ret = pmnc & XSCALE1_COUNT0_OVERFLOW;
195 break;
196 case XSCALE_COUNTER1:
197 ret = pmnc & XSCALE1_COUNT1_OVERFLOW;
198 break;
199 default:
200 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
201 }
202
203 return ret;
204}
205
206static irqreturn_t
207xscale1pmu_handle_irq(int irq_num, void *dev)
208{
209 unsigned long pmnc;
210 struct perf_sample_data data;
211 struct cpu_hw_events *cpuc;
212 struct pt_regs *regs;
213 int idx;
214
215 /*
216 * NOTE: there's an A stepping erratum that states if an overflow
217 * bit already exists and another occurs, the previous
218 * Overflow bit gets cleared. There's no workaround.
219 * Fixed in B stepping or later.
220 */
221 pmnc = xscale1pmu_read_pmnc();
222
223 /*
224 * Write the value back to clear the overflow flags. Overflow
225 * flags remain in pmnc for use below. We also disable the PMU
226 * while we process the interrupt.
227 */
228 xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
229
230 if (!(pmnc & XSCALE1_OVERFLOWED_MASK))
231 return IRQ_NONE;
232
233 regs = get_irq_regs();
234
235 perf_sample_data_init(&data, 0);
236
237 cpuc = &__get_cpu_var(cpu_hw_events);
238 for (idx = 0; idx <= armpmu->num_events; ++idx) {
239 struct perf_event *event = cpuc->events[idx];
240 struct hw_perf_event *hwc;
241
242 if (!test_bit(idx, cpuc->active_mask))
243 continue;
244
245 if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx))
246 continue;
247
248 hwc = &event->hw;
249 armpmu_event_update(event, hwc, idx);
250 data.period = event->hw.last_period;
251 if (!armpmu_event_set_period(event, hwc, idx))
252 continue;
253
254 if (perf_event_overflow(event, 0, &data, regs))
255 armpmu->disable(hwc, idx);
256 }
257
258 irq_work_run();
259
260 /*
261 * Re-enable the PMU.
262 */
263 pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE;
264 xscale1pmu_write_pmnc(pmnc);
265
266 return IRQ_HANDLED;
267}
268
269static void
270xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx)
271{
272 unsigned long val, mask, evt, flags;
273
274 switch (idx) {
275 case XSCALE_CYCLE_COUNTER:
276 mask = 0;
277 evt = XSCALE1_CCOUNT_INT_EN;
278 break;
279 case XSCALE_COUNTER0:
280 mask = XSCALE1_COUNT0_EVT_MASK;
281 evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) |
282 XSCALE1_COUNT0_INT_EN;
283 break;
284 case XSCALE_COUNTER1:
285 mask = XSCALE1_COUNT1_EVT_MASK;
286 evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) |
287 XSCALE1_COUNT1_INT_EN;
288 break;
289 default:
290 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
291 return;
292 }
293
294 spin_lock_irqsave(&pmu_lock, flags);
295 val = xscale1pmu_read_pmnc();
296 val &= ~mask;
297 val |= evt;
298 xscale1pmu_write_pmnc(val);
299 spin_unlock_irqrestore(&pmu_lock, flags);
300}
301
302static void
303xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx)
304{
305 unsigned long val, mask, evt, flags;
306
307 switch (idx) {
308 case XSCALE_CYCLE_COUNTER:
309 mask = XSCALE1_CCOUNT_INT_EN;
310 evt = 0;
311 break;
312 case XSCALE_COUNTER0:
313 mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK;
314 evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT;
315 break;
316 case XSCALE_COUNTER1:
317 mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK;
318 evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT;
319 break;
320 default:
321 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
322 return;
323 }
324
325 spin_lock_irqsave(&pmu_lock, flags);
326 val = xscale1pmu_read_pmnc();
327 val &= ~mask;
328 val |= evt;
329 xscale1pmu_write_pmnc(val);
330 spin_unlock_irqrestore(&pmu_lock, flags);
331}
332
333static int
334xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc,
335 struct hw_perf_event *event)
336{
337 if (XSCALE_PERFCTR_CCNT == event->config_base) {
338 if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask))
339 return -EAGAIN;
340
341 return XSCALE_CYCLE_COUNTER;
342 } else {
343 if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask))
344 return XSCALE_COUNTER1;
345
346 if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask))
347 return XSCALE_COUNTER0;
348
349 return -EAGAIN;
350 }
351}
352
353static void
354xscale1pmu_start(void)
355{
356 unsigned long flags, val;
357
358 spin_lock_irqsave(&pmu_lock, flags);
359 val = xscale1pmu_read_pmnc();
360 val |= XSCALE_PMU_ENABLE;
361 xscale1pmu_write_pmnc(val);
362 spin_unlock_irqrestore(&pmu_lock, flags);
363}
364
365static void
366xscale1pmu_stop(void)
367{
368 unsigned long flags, val;
369
370 spin_lock_irqsave(&pmu_lock, flags);
371 val = xscale1pmu_read_pmnc();
372 val &= ~XSCALE_PMU_ENABLE;
373 xscale1pmu_write_pmnc(val);
374 spin_unlock_irqrestore(&pmu_lock, flags);
375}
376
377static inline u32
378xscale1pmu_read_counter(int counter)
379{
380 u32 val = 0;
381
382 switch (counter) {
383 case XSCALE_CYCLE_COUNTER:
384 asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val));
385 break;
386 case XSCALE_COUNTER0:
387 asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val));
388 break;
389 case XSCALE_COUNTER1:
390 asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val));
391 break;
392 }
393
394 return val;
395}
396
397static inline void
398xscale1pmu_write_counter(int counter, u32 val)
399{
400 switch (counter) {
401 case XSCALE_CYCLE_COUNTER:
402 asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val));
403 break;
404 case XSCALE_COUNTER0:
405 asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val));
406 break;
407 case XSCALE_COUNTER1:
408 asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val));
409 break;
410 }
411}
412
413static const struct arm_pmu xscale1pmu = {
414 .id = ARM_PERF_PMU_ID_XSCALE1,
415 .name = "xscale1",
416 .handle_irq = xscale1pmu_handle_irq,
417 .enable = xscale1pmu_enable_event,
418 .disable = xscale1pmu_disable_event,
419 .read_counter = xscale1pmu_read_counter,
420 .write_counter = xscale1pmu_write_counter,
421 .get_event_idx = xscale1pmu_get_event_idx,
422 .start = xscale1pmu_start,
423 .stop = xscale1pmu_stop,
424 .cache_map = &xscale_perf_cache_map,
425 .event_map = &xscale_perf_map,
426 .raw_event_mask = 0xFF,
427 .num_events = 3,
428 .max_period = (1LLU << 32) - 1,
429};
430
431const struct arm_pmu *__init xscale1pmu_init(void)
432{
433 return &xscale1pmu;
434}
435
436#define XSCALE2_OVERFLOWED_MASK 0x01f
437#define XSCALE2_CCOUNT_OVERFLOW 0x001
438#define XSCALE2_COUNT0_OVERFLOW 0x002
439#define XSCALE2_COUNT1_OVERFLOW 0x004
440#define XSCALE2_COUNT2_OVERFLOW 0x008
441#define XSCALE2_COUNT3_OVERFLOW 0x010
442#define XSCALE2_CCOUNT_INT_EN 0x001
443#define XSCALE2_COUNT0_INT_EN 0x002
444#define XSCALE2_COUNT1_INT_EN 0x004
445#define XSCALE2_COUNT2_INT_EN 0x008
446#define XSCALE2_COUNT3_INT_EN 0x010
447#define XSCALE2_COUNT0_EVT_SHFT 0
448#define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT)
449#define XSCALE2_COUNT1_EVT_SHFT 8
450#define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT)
451#define XSCALE2_COUNT2_EVT_SHFT 16
452#define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT)
453#define XSCALE2_COUNT3_EVT_SHFT 24
454#define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT)
455
456static inline u32
457xscale2pmu_read_pmnc(void)
458{
459 u32 val;
460 asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val));
461 /* bits 1-2 and 4-23 are read-unpredictable */
462 return val & 0xff000009;
463}
464
465static inline void
466xscale2pmu_write_pmnc(u32 val)
467{
468 /* bits 4-23 are write-as-0, 24-31 are write ignored */
469 val &= 0xf;
470 asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val));
471}
472
473static inline u32
474xscale2pmu_read_overflow_flags(void)
475{
476 u32 val;
477 asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val));
478 return val;
479}
480
481static inline void
482xscale2pmu_write_overflow_flags(u32 val)
483{
484 asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val));
485}
486
487static inline u32
488xscale2pmu_read_event_select(void)
489{
490 u32 val;
491 asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val));
492 return val;
493}
494
495static inline void
496xscale2pmu_write_event_select(u32 val)
497{
498 asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val));
499}
500
501static inline u32
502xscale2pmu_read_int_enable(void)
503{
504 u32 val;
505 asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val));
506 return val;
507}
508
509static void
510xscale2pmu_write_int_enable(u32 val)
511{
512 asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val));
513}
514
515static inline int
516xscale2_pmnc_counter_has_overflowed(unsigned long of_flags,
517 enum xscale_counters counter)
518{
519 int ret = 0;
520
521 switch (counter) {
522 case XSCALE_CYCLE_COUNTER:
523 ret = of_flags & XSCALE2_CCOUNT_OVERFLOW;
524 break;
525 case XSCALE_COUNTER0:
526 ret = of_flags & XSCALE2_COUNT0_OVERFLOW;
527 break;
528 case XSCALE_COUNTER1:
529 ret = of_flags & XSCALE2_COUNT1_OVERFLOW;
530 break;
531 case XSCALE_COUNTER2:
532 ret = of_flags & XSCALE2_COUNT2_OVERFLOW;
533 break;
534 case XSCALE_COUNTER3:
535 ret = of_flags & XSCALE2_COUNT3_OVERFLOW;
536 break;
537 default:
538 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
539 }
540
541 return ret;
542}
543
544static irqreturn_t
545xscale2pmu_handle_irq(int irq_num, void *dev)
546{
547 unsigned long pmnc, of_flags;
548 struct perf_sample_data data;
549 struct cpu_hw_events *cpuc;
550 struct pt_regs *regs;
551 int idx;
552
553 /* Disable the PMU. */
554 pmnc = xscale2pmu_read_pmnc();
555 xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
556
557 /* Check the overflow flag register. */
558 of_flags = xscale2pmu_read_overflow_flags();
559 if (!(of_flags & XSCALE2_OVERFLOWED_MASK))
560 return IRQ_NONE;
561
562 /* Clear the overflow bits. */
563 xscale2pmu_write_overflow_flags(of_flags);
564
565 regs = get_irq_regs();
566
567 perf_sample_data_init(&data, 0);
568
569 cpuc = &__get_cpu_var(cpu_hw_events);
570 for (idx = 0; idx <= armpmu->num_events; ++idx) {
571 struct perf_event *event = cpuc->events[idx];
572 struct hw_perf_event *hwc;
573
574 if (!test_bit(idx, cpuc->active_mask))
575 continue;
576
577 if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx))
578 continue;
579
580 hwc = &event->hw;
581 armpmu_event_update(event, hwc, idx);
582 data.period = event->hw.last_period;
583 if (!armpmu_event_set_period(event, hwc, idx))
584 continue;
585
586 if (perf_event_overflow(event, 0, &data, regs))
587 armpmu->disable(hwc, idx);
588 }
589
590 irq_work_run();
591
592 /*
593 * Re-enable the PMU.
594 */
595 pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE;
596 xscale2pmu_write_pmnc(pmnc);
597
598 return IRQ_HANDLED;
599}
600
601static void
602xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx)
603{
604 unsigned long flags, ien, evtsel;
605
606 ien = xscale2pmu_read_int_enable();
607 evtsel = xscale2pmu_read_event_select();
608
609 switch (idx) {
610 case XSCALE_CYCLE_COUNTER:
611 ien |= XSCALE2_CCOUNT_INT_EN;
612 break;
613 case XSCALE_COUNTER0:
614 ien |= XSCALE2_COUNT0_INT_EN;
615 evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
616 evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT;
617 break;
618 case XSCALE_COUNTER1:
619 ien |= XSCALE2_COUNT1_INT_EN;
620 evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
621 evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT;
622 break;
623 case XSCALE_COUNTER2:
624 ien |= XSCALE2_COUNT2_INT_EN;
625 evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
626 evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT;
627 break;
628 case XSCALE_COUNTER3:
629 ien |= XSCALE2_COUNT3_INT_EN;
630 evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
631 evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT;
632 break;
633 default:
634 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
635 return;
636 }
637
638 spin_lock_irqsave(&pmu_lock, flags);
639 xscale2pmu_write_event_select(evtsel);
640 xscale2pmu_write_int_enable(ien);
641 spin_unlock_irqrestore(&pmu_lock, flags);
642}
643
644static void
645xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx)
646{
647 unsigned long flags, ien, evtsel;
648
649 ien = xscale2pmu_read_int_enable();
650 evtsel = xscale2pmu_read_event_select();
651
652 switch (idx) {
653 case XSCALE_CYCLE_COUNTER:
654 ien &= ~XSCALE2_CCOUNT_INT_EN;
655 break;
656 case XSCALE_COUNTER0:
657 ien &= ~XSCALE2_COUNT0_INT_EN;
658 evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
659 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT;
660 break;
661 case XSCALE_COUNTER1:
662 ien &= ~XSCALE2_COUNT1_INT_EN;
663 evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
664 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT;
665 break;
666 case XSCALE_COUNTER2:
667 ien &= ~XSCALE2_COUNT2_INT_EN;
668 evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
669 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT;
670 break;
671 case XSCALE_COUNTER3:
672 ien &= ~XSCALE2_COUNT3_INT_EN;
673 evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
674 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT;
675 break;
676 default:
677 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
678 return;
679 }
680
681 spin_lock_irqsave(&pmu_lock, flags);
682 xscale2pmu_write_event_select(evtsel);
683 xscale2pmu_write_int_enable(ien);
684 spin_unlock_irqrestore(&pmu_lock, flags);
685}
686
687static int
688xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc,
689 struct hw_perf_event *event)
690{
691 int idx = xscale1pmu_get_event_idx(cpuc, event);
692 if (idx >= 0)
693 goto out;
694
695 if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask))
696 idx = XSCALE_COUNTER3;
697 else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask))
698 idx = XSCALE_COUNTER2;
699out:
700 return idx;
701}
702
703static void
704xscale2pmu_start(void)
705{
706 unsigned long flags, val;
707
708 spin_lock_irqsave(&pmu_lock, flags);
709 val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64;
710 val |= XSCALE_PMU_ENABLE;
711 xscale2pmu_write_pmnc(val);
712 spin_unlock_irqrestore(&pmu_lock, flags);
713}
714
715static void
716xscale2pmu_stop(void)
717{
718 unsigned long flags, val;
719
720 spin_lock_irqsave(&pmu_lock, flags);
721 val = xscale2pmu_read_pmnc();
722 val &= ~XSCALE_PMU_ENABLE;
723 xscale2pmu_write_pmnc(val);
724 spin_unlock_irqrestore(&pmu_lock, flags);
725}
726
727static inline u32
728xscale2pmu_read_counter(int counter)
729{
730 u32 val = 0;
731
732 switch (counter) {
733 case XSCALE_CYCLE_COUNTER:
734 asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val));
735 break;
736 case XSCALE_COUNTER0:
737 asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val));
738 break;
739 case XSCALE_COUNTER1:
740 asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val));
741 break;
742 case XSCALE_COUNTER2:
743 asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val));
744 break;
745 case XSCALE_COUNTER3:
746 asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val));
747 break;
748 }
749
750 return val;
751}
752
753static inline void
754xscale2pmu_write_counter(int counter, u32 val)
755{
756 switch (counter) {
757 case XSCALE_CYCLE_COUNTER:
758 asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val));
759 break;
760 case XSCALE_COUNTER0:
761 asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val));
762 break;
763 case XSCALE_COUNTER1:
764 asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val));
765 break;
766 case XSCALE_COUNTER2:
767 asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val));
768 break;
769 case XSCALE_COUNTER3:
770 asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val));
771 break;
772 }
773}
774
775static const struct arm_pmu xscale2pmu = {
776 .id = ARM_PERF_PMU_ID_XSCALE2,
777 .name = "xscale2",
778 .handle_irq = xscale2pmu_handle_irq,
779 .enable = xscale2pmu_enable_event,
780 .disable = xscale2pmu_disable_event,
781 .read_counter = xscale2pmu_read_counter,
782 .write_counter = xscale2pmu_write_counter,
783 .get_event_idx = xscale2pmu_get_event_idx,
784 .start = xscale2pmu_start,
785 .stop = xscale2pmu_stop,
786 .cache_map = &xscale_perf_cache_map,
787 .event_map = &xscale_perf_map,
788 .raw_event_mask = 0xFF,
789 .num_events = 5,
790 .max_period = (1LLU << 32) - 1,
791};
792
793const struct arm_pmu *__init xscale2pmu_init(void)
794{
795 return &xscale2pmu;
796}
797#else
798const struct arm_pmu *__init xscale1pmu_init(void)
799{
800 return NULL;
801}
802
803const struct arm_pmu *__init xscale2pmu_init(void)
804{
805 return NULL;
806}
807#endif /* CONFIG_CPU_XSCALE */
diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c
new file mode 100644
index 000000000000..2cdcc9287c74
--- /dev/null
+++ b/arch/arm/kernel/sched_clock.c
@@ -0,0 +1,69 @@
1/*
2 * sched_clock.c: support for extending counters to full 64-bit ns counter
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#include <linux/clocksource.h>
9#include <linux/init.h>
10#include <linux/jiffies.h>
11#include <linux/kernel.h>
12#include <linux/sched.h>
13#include <linux/timer.h>
14
15#include <asm/sched_clock.h>
16
17static void sched_clock_poll(unsigned long wrap_ticks);
18static DEFINE_TIMER(sched_clock_timer, sched_clock_poll, 0, 0);
19static void (*sched_clock_update_fn)(void);
20
21static void sched_clock_poll(unsigned long wrap_ticks)
22{
23 mod_timer(&sched_clock_timer, round_jiffies(jiffies + wrap_ticks));
24 sched_clock_update_fn();
25}
26
27void __init init_sched_clock(struct clock_data *cd, void (*update)(void),
28 unsigned int clock_bits, unsigned long rate)
29{
30 unsigned long r, w;
31 u64 res, wrap;
32 char r_unit;
33
34 sched_clock_update_fn = update;
35
36 /* calculate the mult/shift to convert counter ticks to ns. */
37 clocks_calc_mult_shift(&cd->mult, &cd->shift, rate, NSEC_PER_SEC, 60);
38
39 r = rate;
40 if (r >= 4000000) {
41 r /= 1000000;
42 r_unit = 'M';
43 } else {
44 r /= 1000;
45 r_unit = 'k';
46 }
47
48 /* calculate how many ns until we wrap */
49 wrap = cyc_to_ns((1ULL << clock_bits) - 1, cd->mult, cd->shift);
50 do_div(wrap, NSEC_PER_MSEC);
51 w = wrap;
52
53 /* calculate the ns resolution of this counter */
54 res = cyc_to_ns(1ULL, cd->mult, cd->shift);
55 pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lums\n",
56 clock_bits, r, r_unit, res, w);
57
58 /*
59 * Start the timer to keep sched_clock() properly updated and
60 * sets the initial epoch.
61 */
62 sched_clock_timer.data = msecs_to_jiffies(w - (w / 10));
63 sched_clock_poll(sched_clock_timer.data);
64
65 /*
66 * Ensure that sched_clock() starts off at 0ns
67 */
68 cd->epoch_ns = 0;
69}
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 9066473c0ebc..b6b78b22031b 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -16,6 +16,7 @@
16#include <linux/cache.h> 16#include <linux/cache.h>
17#include <linux/profile.h> 17#include <linux/profile.h>
18#include <linux/errno.h> 18#include <linux/errno.h>
19#include <linux/ftrace.h>
19#include <linux/mm.h> 20#include <linux/mm.h>
20#include <linux/err.h> 21#include <linux/err.h>
21#include <linux/cpu.h> 22#include <linux/cpu.h>
@@ -456,7 +457,7 @@ static void ipi_timer(void)
456} 457}
457 458
458#ifdef CONFIG_LOCAL_TIMERS 459#ifdef CONFIG_LOCAL_TIMERS
459asmlinkage void __exception do_local_timer(struct pt_regs *regs) 460asmlinkage void __exception_irq_entry do_local_timer(struct pt_regs *regs)
460{ 461{
461 struct pt_regs *old_regs = set_irq_regs(regs); 462 struct pt_regs *old_regs = set_irq_regs(regs);
462 int cpu = smp_processor_id(); 463 int cpu = smp_processor_id();
@@ -543,7 +544,7 @@ static void ipi_cpu_stop(unsigned int cpu)
543 * 544 *
544 * Bit 0 - Inter-processor function call 545 * Bit 0 - Inter-processor function call
545 */ 546 */
546asmlinkage void __exception do_IPI(struct pt_regs *regs) 547asmlinkage void __exception_irq_entry do_IPI(struct pt_regs *regs)
547{ 548{
548 unsigned int cpu = smp_processor_id(); 549 unsigned int cpu = smp_processor_id();
549 struct ipi_data *ipi = &per_cpu(ipi_data, cpu); 550 struct ipi_data *ipi = &per_cpu(ipi_data, cpu);
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index cead8893b46b..897c1a8f1694 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -101,6 +101,7 @@ SECTIONS
101 __exception_text_start = .; 101 __exception_text_start = .;
102 *(.exception.text) 102 *(.exception.text)
103 __exception_text_end = .; 103 __exception_text_end = .;
104 IRQENTRY_TEXT
104 TEXT_TEXT 105 TEXT_TEXT
105 SCHED_TEXT 106 SCHED_TEXT
106 LOCK_TEXT 107 LOCK_TEXT
diff --git a/arch/arm/mach-at91/at91rm9200_time.c b/arch/arm/mach-at91/at91rm9200_time.c
index 2500f41d8d2d..1dd69c85dfec 100644
--- a/arch/arm/mach-at91/at91rm9200_time.c
+++ b/arch/arm/mach-at91/at91rm9200_time.c
@@ -101,7 +101,6 @@ static struct clocksource clk32k = {
101 .rating = 150, 101 .rating = 150,
102 .read = read_clk32k, 102 .read = read_clk32k,
103 .mask = CLOCKSOURCE_MASK(20), 103 .mask = CLOCKSOURCE_MASK(20),
104 .shift = 10,
105 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 104 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
106}; 105};
107 106
@@ -201,8 +200,7 @@ void __init at91rm9200_timer_init(void)
201 clockevents_register_device(&clkevt); 200 clockevents_register_device(&clkevt);
202 201
203 /* register clocksource */ 202 /* register clocksource */
204 clk32k.mult = clocksource_hz2mult(AT91_SLOW_CLOCK, clk32k.shift); 203 clocksource_register_hz(&clk32k, AT91_SLOW_CLOCK);
205 clocksource_register(&clk32k);
206} 204}
207 205
208struct sys_timer at91rm9200_timer = { 206struct sys_timer at91rm9200_timer = {
diff --git a/arch/arm/mach-at91/at91sam926x_time.c b/arch/arm/mach-at91/at91sam926x_time.c
index 608a63240b64..4ba85499fa97 100644
--- a/arch/arm/mach-at91/at91sam926x_time.c
+++ b/arch/arm/mach-at91/at91sam926x_time.c
@@ -51,7 +51,6 @@ static struct clocksource pit_clk = {
51 .name = "pit", 51 .name = "pit",
52 .rating = 175, 52 .rating = 175,
53 .read = read_pit_clk, 53 .read = read_pit_clk,
54 .shift = 20,
55 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 54 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
56}; 55};
57 56
@@ -163,10 +162,9 @@ static void __init at91sam926x_pit_init(void)
163 * Register clocksource. The high order bits of PIV are unused, 162 * Register clocksource. The high order bits of PIV are unused,
164 * so this isn't a 32-bit counter unless we get clockevent irqs. 163 * so this isn't a 32-bit counter unless we get clockevent irqs.
165 */ 164 */
166 pit_clk.mult = clocksource_hz2mult(pit_rate, pit_clk.shift);
167 bits = 12 /* PICNT */ + ilog2(pit_cycle) /* PIV */; 165 bits = 12 /* PICNT */ + ilog2(pit_cycle) /* PIV */;
168 pit_clk.mask = CLOCKSOURCE_MASK(bits); 166 pit_clk.mask = CLOCKSOURCE_MASK(bits);
169 clocksource_register(&pit_clk); 167 clocksource_register_hz(&pit_clk, pit_rate);
170 168
171 /* Set up irq handler */ 169 /* Set up irq handler */
172 setup_irq(AT91_ID_SYS, &at91sam926x_pit_irq); 170 setup_irq(AT91_ID_SYS, &at91sam926x_pit_irq);
diff --git a/arch/arm/mach-bcmring/core.c b/arch/arm/mach-bcmring/core.c
index d3f959e92b2d..b91b1b0fdff3 100644
--- a/arch/arm/mach-bcmring/core.c
+++ b/arch/arm/mach-bcmring/core.c
@@ -294,7 +294,6 @@ static struct clocksource clocksource_bcmring_timer1 = {
294 .rating = 200, 294 .rating = 200,
295 .read = bcmring_get_cycles_timer1, 295 .read = bcmring_get_cycles_timer1,
296 .mask = CLOCKSOURCE_MASK(32), 296 .mask = CLOCKSOURCE_MASK(32),
297 .shift = 20,
298 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 297 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
299}; 298};
300 299
@@ -303,7 +302,6 @@ static struct clocksource clocksource_bcmring_timer3 = {
303 .rating = 100, 302 .rating = 100,
304 .read = bcmring_get_cycles_timer3, 303 .read = bcmring_get_cycles_timer3,
305 .mask = CLOCKSOURCE_MASK(32), 304 .mask = CLOCKSOURCE_MASK(32),
306 .shift = 20,
307 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 305 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
308}; 306};
309 307
@@ -316,10 +314,8 @@ static int __init bcmring_clocksource_init(void)
316 writel(TIMER_CTRL_32BIT | TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC, 314 writel(TIMER_CTRL_32BIT | TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC,
317 TIMER1_VA_BASE + TIMER_CTRL); 315 TIMER1_VA_BASE + TIMER_CTRL);
318 316
319 clocksource_bcmring_timer1.mult = 317 clocksource_register_khz(&clocksource_bcmring_timer1,
320 clocksource_khz2mult(TIMER1_FREQUENCY_MHZ * 1000, 318 TIMER1_FREQUENCY_MHZ * 1000);
321 clocksource_bcmring_timer1.shift);
322 clocksource_register(&clocksource_bcmring_timer1);
323 319
324 /* setup timer3 as free-running clocksource */ 320 /* setup timer3 as free-running clocksource */
325 writel(0, TIMER3_VA_BASE + TIMER_CTRL); 321 writel(0, TIMER3_VA_BASE + TIMER_CTRL);
@@ -328,10 +324,8 @@ static int __init bcmring_clocksource_init(void)
328 writel(TIMER_CTRL_32BIT | TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC, 324 writel(TIMER_CTRL_32BIT | TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC,
329 TIMER3_VA_BASE + TIMER_CTRL); 325 TIMER3_VA_BASE + TIMER_CTRL);
330 326
331 clocksource_bcmring_timer3.mult = 327 clocksource_register_khz(&clocksource_bcmring_timer3,
332 clocksource_khz2mult(TIMER3_FREQUENCY_KHZ, 328 TIMER3_FREQUENCY_KHZ);
333 clocksource_bcmring_timer3.shift);
334 clocksource_register(&clocksource_bcmring_timer3);
335 329
336 return 0; 330 return 0;
337} 331}
diff --git a/arch/arm/mach-davinci/time.c b/arch/arm/mach-davinci/time.c
index 0f21c36e65dd..c1486716de77 100644
--- a/arch/arm/mach-davinci/time.c
+++ b/arch/arm/mach-davinci/time.c
@@ -276,7 +276,6 @@ static struct clocksource clocksource_davinci = {
276 .rating = 300, 276 .rating = 300,
277 .read = read_cycles, 277 .read = read_cycles,
278 .mask = CLOCKSOURCE_MASK(32), 278 .mask = CLOCKSOURCE_MASK(32),
279 .shift = 24,
280 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 279 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
281}; 280};
282 281
@@ -378,10 +377,8 @@ static void __init davinci_timer_init(void)
378 377
379 /* setup clocksource */ 378 /* setup clocksource */
380 clocksource_davinci.name = id_to_name[clocksource_id]; 379 clocksource_davinci.name = id_to_name[clocksource_id];
381 clocksource_davinci.mult = 380 if (clocksource_register_hz(&clocksource_davinci,
382 clocksource_khz2mult(davinci_clock_tick_rate/1000, 381 davinci_clock_tick_rate))
383 clocksource_davinci.shift);
384 if (clocksource_register(&clocksource_davinci))
385 printk(err, clocksource_davinci.name); 382 printk(err, clocksource_davinci.name);
386 383
387 /* setup clockevent */ 384 /* setup clockevent */
diff --git a/arch/arm/mach-integrator/integrator_ap.c b/arch/arm/mach-integrator/integrator_ap.c
index 548208f11179..2774df8021dc 100644
--- a/arch/arm/mach-integrator/integrator_ap.c
+++ b/arch/arm/mach-integrator/integrator_ap.c
@@ -372,7 +372,6 @@ static struct clocksource clocksource_timersp = {
372 .rating = 200, 372 .rating = 200,
373 .read = timersp_read, 373 .read = timersp_read,
374 .mask = CLOCKSOURCE_MASK(16), 374 .mask = CLOCKSOURCE_MASK(16),
375 .shift = 16,
376 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 375 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
377}; 376};
378 377
@@ -390,8 +389,7 @@ static void integrator_clocksource_init(u32 khz)
390 writel(ctrl, base + TIMER_CTRL); 389 writel(ctrl, base + TIMER_CTRL);
391 writel(0xffff, base + TIMER_LOAD); 390 writel(0xffff, base + TIMER_LOAD);
392 391
393 cs->mult = clocksource_khz2mult(khz, cs->shift); 392 clocksource_register_khz(cs, khz);
394 clocksource_register(cs);
395} 393}
396 394
397static void __iomem * const clkevt_base = (void __iomem *)TIMER1_VA_BASE; 395static void __iomem * const clkevt_base = (void __iomem *)TIMER1_VA_BASE;
diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c
index 0bce09799d18..4dbfcbb9163c 100644
--- a/arch/arm/mach-ixp4xx/common.c
+++ b/arch/arm/mach-ixp4xx/common.c
@@ -35,6 +35,7 @@
35#include <asm/pgtable.h> 35#include <asm/pgtable.h>
36#include <asm/page.h> 36#include <asm/page.h>
37#include <asm/irq.h> 37#include <asm/irq.h>
38#include <asm/sched_clock.h>
38 39
39#include <asm/mach/map.h> 40#include <asm/mach/map.h>
40#include <asm/mach/irq.h> 41#include <asm/mach/irq.h>
@@ -399,6 +400,23 @@ void __init ixp4xx_sys_init(void)
399} 400}
400 401
401/* 402/*
403 * sched_clock()
404 */
405static DEFINE_CLOCK_DATA(cd);
406
407unsigned long long notrace sched_clock(void)
408{
409 u32 cyc = *IXP4XX_OSTS;
410 return cyc_to_sched_clock(&cd, cyc, (u32)~0);
411}
412
413static void notrace ixp4xx_update_sched_clock(void)
414{
415 u32 cyc = *IXP4XX_OSTS;
416 update_sched_clock(&cd, cyc, (u32)~0);
417}
418
419/*
402 * clocksource 420 * clocksource
403 */ 421 */
404static cycle_t ixp4xx_get_cycles(struct clocksource *cs) 422static cycle_t ixp4xx_get_cycles(struct clocksource *cs)
@@ -411,7 +429,6 @@ static struct clocksource clocksource_ixp4xx = {
411 .rating = 200, 429 .rating = 200,
412 .read = ixp4xx_get_cycles, 430 .read = ixp4xx_get_cycles,
413 .mask = CLOCKSOURCE_MASK(32), 431 .mask = CLOCKSOURCE_MASK(32),
414 .shift = 20,
415 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 432 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
416}; 433};
417 434
@@ -419,21 +436,9 @@ unsigned long ixp4xx_timer_freq = FREQ;
419EXPORT_SYMBOL(ixp4xx_timer_freq); 436EXPORT_SYMBOL(ixp4xx_timer_freq);
420static void __init ixp4xx_clocksource_init(void) 437static void __init ixp4xx_clocksource_init(void)
421{ 438{
422 clocksource_ixp4xx.mult = 439 init_sched_clock(&cd, ixp4xx_update_sched_clock, 32, ixp4xx_timer_freq);
423 clocksource_hz2mult(ixp4xx_timer_freq,
424 clocksource_ixp4xx.shift);
425 clocksource_register(&clocksource_ixp4xx);
426}
427
428/*
429 * sched_clock()
430 */
431unsigned long long sched_clock(void)
432{
433 cycle_t cyc = ixp4xx_get_cycles(NULL);
434 struct clocksource *cs = &clocksource_ixp4xx;
435 440
436 return clocksource_cyc2ns(cyc, cs->mult, cs->shift); 441 clocksource_register_hz(&clocksource_ixp4xx, ixp4xx_timer_freq);
437} 442}
438 443
439/* 444/*
diff --git a/arch/arm/mach-lpc32xx/timer.c b/arch/arm/mach-lpc32xx/timer.c
index 630dd4a74b26..6162ac308c20 100644
--- a/arch/arm/mach-lpc32xx/timer.c
+++ b/arch/arm/mach-lpc32xx/timer.c
@@ -38,7 +38,6 @@ static cycle_t lpc32xx_clksrc_read(struct clocksource *cs)
38 38
39static struct clocksource lpc32xx_clksrc = { 39static struct clocksource lpc32xx_clksrc = {
40 .name = "lpc32xx_clksrc", 40 .name = "lpc32xx_clksrc",
41 .shift = 24,
42 .rating = 300, 41 .rating = 300,
43 .read = lpc32xx_clksrc_read, 42 .read = lpc32xx_clksrc_read,
44 .mask = CLOCKSOURCE_MASK(32), 43 .mask = CLOCKSOURCE_MASK(32),
@@ -171,9 +170,7 @@ static void __init lpc32xx_timer_init(void)
171 __raw_writel(0, LCP32XX_TIMER_MCR(LPC32XX_TIMER1_BASE)); 170 __raw_writel(0, LCP32XX_TIMER_MCR(LPC32XX_TIMER1_BASE));
172 __raw_writel(LCP32XX_TIMER_CNTR_TCR_EN, 171 __raw_writel(LCP32XX_TIMER_CNTR_TCR_EN,
173 LCP32XX_TIMER_TCR(LPC32XX_TIMER1_BASE)); 172 LCP32XX_TIMER_TCR(LPC32XX_TIMER1_BASE));
174 lpc32xx_clksrc.mult = clocksource_hz2mult(clkrate, 173 clocksource_register_hz(&lpc32xx_clksrc, clkrate);
175 lpc32xx_clksrc.shift);
176 clocksource_register(&lpc32xx_clksrc);
177} 174}
178 175
179struct sys_timer lpc32xx_timer = { 176struct sys_timer lpc32xx_timer = {
diff --git a/arch/arm/mach-mmp/time.c b/arch/arm/mach-mmp/time.c
index 66528193f939..aeb9ae23e6ce 100644
--- a/arch/arm/mach-mmp/time.c
+++ b/arch/arm/mach-mmp/time.c
@@ -26,8 +26,8 @@
26#include <linux/io.h> 26#include <linux/io.h>
27#include <linux/irq.h> 27#include <linux/irq.h>
28#include <linux/sched.h> 28#include <linux/sched.h>
29#include <linux/cnt32_to_63.h>
30 29
30#include <asm/sched_clock.h>
31#include <mach/addr-map.h> 31#include <mach/addr-map.h>
32#include <mach/regs-timers.h> 32#include <mach/regs-timers.h>
33#include <mach/regs-apbc.h> 33#include <mach/regs-apbc.h>
@@ -42,23 +42,7 @@
42#define MAX_DELTA (0xfffffffe) 42#define MAX_DELTA (0xfffffffe)
43#define MIN_DELTA (16) 43#define MIN_DELTA (16)
44 44
45#define TCR2NS_SCALE_FACTOR 10 45static DEFINE_CLOCK_DATA(cd);
46
47static unsigned long tcr2ns_scale;
48
49static void __init set_tcr2ns_scale(unsigned long tcr_rate)
50{
51 unsigned long long v = 1000000000ULL << TCR2NS_SCALE_FACTOR;
52 do_div(v, tcr_rate);
53 tcr2ns_scale = v;
54 /*
55 * We want an even value to automatically clear the top bit
56 * returned by cnt32_to_63() without an additional run time
57 * instruction. So if the LSB is 1 then round it up.
58 */
59 if (tcr2ns_scale & 1)
60 tcr2ns_scale++;
61}
62 46
63/* 47/*
64 * FIXME: the timer needs some delay to stablize the counter capture 48 * FIXME: the timer needs some delay to stablize the counter capture
@@ -75,10 +59,16 @@ static inline uint32_t timer_read(void)
75 return __raw_readl(TIMERS_VIRT_BASE + TMR_CVWR(0)); 59 return __raw_readl(TIMERS_VIRT_BASE + TMR_CVWR(0));
76} 60}
77 61
78unsigned long long sched_clock(void) 62unsigned long long notrace sched_clock(void)
79{ 63{
80 unsigned long long v = cnt32_to_63(timer_read()); 64 u32 cyc = timer_read();
81 return (v * tcr2ns_scale) >> TCR2NS_SCALE_FACTOR; 65 return cyc_to_sched_clock(&cd, cyc, (u32)~0);
66}
67
68static void notrace mmp_update_sched_clock(void)
69{
70 u32 cyc = timer_read();
71 update_sched_clock(&cd, cyc, (u32)~0);
82} 72}
83 73
84static irqreturn_t timer_interrupt(int irq, void *dev_id) 74static irqreturn_t timer_interrupt(int irq, void *dev_id)
@@ -146,7 +136,6 @@ static cycle_t clksrc_read(struct clocksource *cs)
146 136
147static struct clocksource cksrc = { 137static struct clocksource cksrc = {
148 .name = "clocksource", 138 .name = "clocksource",
149 .shift = 20,
150 .rating = 200, 139 .rating = 200,
151 .read = clksrc_read, 140 .read = clksrc_read,
152 .mask = CLOCKSOURCE_MASK(32), 141 .mask = CLOCKSOURCE_MASK(32),
@@ -186,17 +175,15 @@ void __init timer_init(int irq)
186{ 175{
187 timer_config(); 176 timer_config();
188 177
189 set_tcr2ns_scale(CLOCK_TICK_RATE); 178 init_sched_clock(&cd, mmp_update_sched_clock, 32, CLOCK_TICK_RATE);
190 179
191 ckevt.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, ckevt.shift); 180 ckevt.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, ckevt.shift);
192 ckevt.max_delta_ns = clockevent_delta2ns(MAX_DELTA, &ckevt); 181 ckevt.max_delta_ns = clockevent_delta2ns(MAX_DELTA, &ckevt);
193 ckevt.min_delta_ns = clockevent_delta2ns(MIN_DELTA, &ckevt); 182 ckevt.min_delta_ns = clockevent_delta2ns(MIN_DELTA, &ckevt);
194 ckevt.cpumask = cpumask_of(0); 183 ckevt.cpumask = cpumask_of(0);
195 184
196 cksrc.mult = clocksource_hz2mult(CLOCK_TICK_RATE, cksrc.shift);
197
198 setup_irq(irq, &timer_irq); 185 setup_irq(irq, &timer_irq);
199 186
200 clocksource_register(&cksrc); 187 clocksource_register_hz(&cksrc, CLOCK_TICK_RATE);
201 clockevents_register_device(&ckevt); 188 clockevents_register_device(&ckevt);
202} 189}
diff --git a/arch/arm/mach-msm/Kconfig b/arch/arm/mach-msm/Kconfig
index dbbcfeb919db..31e5fd63ec9a 100644
--- a/arch/arm/mach-msm/Kconfig
+++ b/arch/arm/mach-msm/Kconfig
@@ -49,6 +49,8 @@ endchoice
49 49
50config MSM_SOC_REV_A 50config MSM_SOC_REV_A
51 bool 51 bool
52config ARCH_MSM_SCORPIONMP
53 bool
52 54
53config ARCH_MSM_ARM11 55config ARCH_MSM_ARM11
54 bool 56 bool
diff --git a/arch/arm/mach-msm/timer.c b/arch/arm/mach-msm/timer.c
index 950100f19d07..595be7fea31a 100644
--- a/arch/arm/mach-msm/timer.c
+++ b/arch/arm/mach-msm/timer.c
@@ -137,7 +137,6 @@ static struct msm_clock msm_clocks[] = {
137 .rating = 200, 137 .rating = 200,
138 .read = msm_gpt_read, 138 .read = msm_gpt_read,
139 .mask = CLOCKSOURCE_MASK(32), 139 .mask = CLOCKSOURCE_MASK(32),
140 .shift = 17,
141 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 140 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
142 }, 141 },
143 .irq = { 142 .irq = {
@@ -164,7 +163,6 @@ static struct msm_clock msm_clocks[] = {
164 .rating = 300, 163 .rating = 300,
165 .read = msm_dgt_read, 164 .read = msm_dgt_read,
166 .mask = CLOCKSOURCE_MASK((32 - MSM_DGT_SHIFT)), 165 .mask = CLOCKSOURCE_MASK((32 - MSM_DGT_SHIFT)),
167 .shift = 24 - MSM_DGT_SHIFT,
168 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 166 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
169 }, 167 },
170 .irq = { 168 .irq = {
@@ -205,8 +203,7 @@ static void __init msm_timer_init(void)
205 ce->min_delta_ns = clockevent_delta2ns(4, ce); 203 ce->min_delta_ns = clockevent_delta2ns(4, ce);
206 ce->cpumask = cpumask_of(0); 204 ce->cpumask = cpumask_of(0);
207 205
208 cs->mult = clocksource_hz2mult(clock->freq, cs->shift); 206 res = clocksource_register_hz(cs, clock->freq);
209 res = clocksource_register(cs);
210 if (res) 207 if (res)
211 printk(KERN_ERR "msm_timer_init: clocksource_register " 208 printk(KERN_ERR "msm_timer_init: clocksource_register "
212 "failed for %s\n", cs->name); 209 "failed for %s\n", cs->name);
diff --git a/arch/arm/mach-netx/time.c b/arch/arm/mach-netx/time.c
index 82801dbf0579..f12f22d09b6c 100644
--- a/arch/arm/mach-netx/time.c
+++ b/arch/arm/mach-netx/time.c
@@ -114,7 +114,6 @@ static struct clocksource clocksource_netx = {
114 .rating = 200, 114 .rating = 200,
115 .read = netx_get_cycles, 115 .read = netx_get_cycles,
116 .mask = CLOCKSOURCE_MASK(32), 116 .mask = CLOCKSOURCE_MASK(32),
117 .shift = 20,
118 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 117 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
119}; 118};
120 119
@@ -151,9 +150,7 @@ static void __init netx_timer_init(void)
151 writel(NETX_GPIO_COUNTER_CTRL_RUN, 150 writel(NETX_GPIO_COUNTER_CTRL_RUN,
152 NETX_GPIO_COUNTER_CTRL(TIMER_CLOCKSOURCE)); 151 NETX_GPIO_COUNTER_CTRL(TIMER_CLOCKSOURCE));
153 152
154 clocksource_netx.mult = 153 clocksource_register_hz(&clocksource_netx, CLOCK_TICK_RATE);
155 clocksource_hz2mult(CLOCK_TICK_RATE, clocksource_netx.shift);
156 clocksource_register(&clocksource_netx);
157 154
158 netx_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, 155 netx_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC,
159 netx_clockevent.shift); 156 netx_clockevent.shift);
diff --git a/arch/arm/mach-ns9xxx/time-ns9360.c b/arch/arm/mach-ns9xxx/time-ns9360.c
index 77281260358a..9ca32f55728b 100644
--- a/arch/arm/mach-ns9xxx/time-ns9360.c
+++ b/arch/arm/mach-ns9xxx/time-ns9360.c
@@ -35,7 +35,6 @@ static struct clocksource ns9360_clocksource = {
35 .rating = 300, 35 .rating = 300,
36 .read = ns9360_clocksource_read, 36 .read = ns9360_clocksource_read,
37 .mask = CLOCKSOURCE_MASK(32), 37 .mask = CLOCKSOURCE_MASK(32),
38 .shift = 20,
39 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 38 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
40}; 39};
41 40
@@ -148,10 +147,7 @@ static void __init ns9360_timer_init(void)
148 147
149 __raw_writel(tc, SYS_TC(TIMER_CLOCKSOURCE)); 148 __raw_writel(tc, SYS_TC(TIMER_CLOCKSOURCE));
150 149
151 ns9360_clocksource.mult = clocksource_hz2mult(ns9360_cpuclock(), 150 clocksource_register_hz(&ns9360_clocksource, ns9360_cpuclock());
152 ns9360_clocksource.shift);
153
154 clocksource_register(&ns9360_clocksource);
155 151
156 latch = SH_DIV(ns9360_cpuclock(), HZ, 0); 152 latch = SH_DIV(ns9360_cpuclock(), HZ, 0);
157 153
diff --git a/arch/arm/mach-omap1/time.c b/arch/arm/mach-omap1/time.c
index 1be6a214d88d..abb34ff2041b 100644
--- a/arch/arm/mach-omap1/time.c
+++ b/arch/arm/mach-omap1/time.c
@@ -208,7 +208,6 @@ static struct clocksource clocksource_mpu = {
208 .rating = 300, 208 .rating = 300,
209 .read = mpu_read, 209 .read = mpu_read,
210 .mask = CLOCKSOURCE_MASK(32), 210 .mask = CLOCKSOURCE_MASK(32),
211 .shift = 24,
212 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 211 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
213}; 212};
214 213
@@ -217,13 +216,10 @@ static void __init omap_init_clocksource(unsigned long rate)
217 static char err[] __initdata = KERN_ERR 216 static char err[] __initdata = KERN_ERR
218 "%s: can't register clocksource!\n"; 217 "%s: can't register clocksource!\n";
219 218
220 clocksource_mpu.mult
221 = clocksource_khz2mult(rate/1000, clocksource_mpu.shift);
222
223 setup_irq(INT_TIMER2, &omap_mpu_timer2_irq); 219 setup_irq(INT_TIMER2, &omap_mpu_timer2_irq);
224 omap_mpu_timer_start(1, ~0, 1); 220 omap_mpu_timer_start(1, ~0, 1);
225 221
226 if (clocksource_register(&clocksource_mpu)) 222 if (clocksource_register_hz(&clocksource_mpu, rate))
227 printk(err, clocksource_mpu.name); 223 printk(err, clocksource_mpu.name);
228} 224}
229 225
diff --git a/arch/arm/mach-omap2/timer-gp.c b/arch/arm/mach-omap2/timer-gp.c
index e13c29eecf2b..a7816dbdc6b1 100644
--- a/arch/arm/mach-omap2/timer-gp.c
+++ b/arch/arm/mach-omap2/timer-gp.c
@@ -195,7 +195,6 @@ static struct clocksource clocksource_gpt = {
195 .rating = 300, 195 .rating = 300,
196 .read = clocksource_read_cycles, 196 .read = clocksource_read_cycles,
197 .mask = CLOCKSOURCE_MASK(32), 197 .mask = CLOCKSOURCE_MASK(32),
198 .shift = 24,
199 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 198 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
200}; 199};
201 200
@@ -220,9 +219,7 @@ static void __init omap2_gp_clocksource_init(void)
220 219
221 omap_dm_timer_set_load_start(gpt, 1, 0); 220 omap_dm_timer_set_load_start(gpt, 1, 0);
222 221
223 clocksource_gpt.mult = 222 if (clocksource_register_hz(&clocksource_gpt, tick_rate))
224 clocksource_khz2mult(tick_rate/1000, clocksource_gpt.shift);
225 if (clocksource_register(&clocksource_gpt))
226 printk(err2, clocksource_gpt.name); 223 printk(err2, clocksource_gpt.name);
227} 224}
228#endif 225#endif
diff --git a/arch/arm/mach-pxa/time.c b/arch/arm/mach-pxa/time.c
index 293e40aeaf29..e7f64d9b4f2d 100644
--- a/arch/arm/mach-pxa/time.c
+++ b/arch/arm/mach-pxa/time.c
@@ -17,11 +17,11 @@
17#include <linux/interrupt.h> 17#include <linux/interrupt.h>
18#include <linux/clockchips.h> 18#include <linux/clockchips.h>
19#include <linux/sched.h> 19#include <linux/sched.h>
20#include <linux/cnt32_to_63.h>
21 20
22#include <asm/div64.h> 21#include <asm/div64.h>
23#include <asm/mach/irq.h> 22#include <asm/mach/irq.h>
24#include <asm/mach/time.h> 23#include <asm/mach/time.h>
24#include <asm/sched_clock.h>
25#include <mach/regs-ost.h> 25#include <mach/regs-ost.h>
26 26
27/* 27/*
@@ -32,29 +32,18 @@
32 * long as there is always less than 582 seconds between successive 32 * long as there is always less than 582 seconds between successive
33 * calls to sched_clock() which should always be the case in practice. 33 * calls to sched_clock() which should always be the case in practice.
34 */ 34 */
35static DEFINE_CLOCK_DATA(cd);
35 36
36#define OSCR2NS_SCALE_FACTOR 10 37unsigned long long notrace sched_clock(void)
37
38static unsigned long oscr2ns_scale;
39
40static void __init set_oscr2ns_scale(unsigned long oscr_rate)
41{ 38{
42 unsigned long long v = 1000000000ULL << OSCR2NS_SCALE_FACTOR; 39 u32 cyc = OSCR;
43 do_div(v, oscr_rate); 40 return cyc_to_sched_clock(&cd, cyc, (u32)~0);
44 oscr2ns_scale = v;
45 /*
46 * We want an even value to automatically clear the top bit
47 * returned by cnt32_to_63() without an additional run time
48 * instruction. So if the LSB is 1 then round it up.
49 */
50 if (oscr2ns_scale & 1)
51 oscr2ns_scale++;
52} 41}
53 42
54unsigned long long sched_clock(void) 43static void notrace pxa_update_sched_clock(void)
55{ 44{
56 unsigned long long v = cnt32_to_63(OSCR); 45 u32 cyc = OSCR;
57 return (v * oscr2ns_scale) >> OSCR2NS_SCALE_FACTOR; 46 update_sched_clock(&cd, cyc, (u32)~0);
58} 47}
59 48
60 49
@@ -127,7 +116,6 @@ static struct clocksource cksrc_pxa_oscr0 = {
127 .rating = 200, 116 .rating = 200,
128 .read = pxa_read_oscr, 117 .read = pxa_read_oscr,
129 .mask = CLOCKSOURCE_MASK(32), 118 .mask = CLOCKSOURCE_MASK(32),
130 .shift = 20,
131 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 119 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
132}; 120};
133 121
@@ -145,7 +133,7 @@ static void __init pxa_timer_init(void)
145 OIER = 0; 133 OIER = 0;
146 OSSR = OSSR_M0 | OSSR_M1 | OSSR_M2 | OSSR_M3; 134 OSSR = OSSR_M0 | OSSR_M1 | OSSR_M2 | OSSR_M3;
147 135
148 set_oscr2ns_scale(clock_tick_rate); 136 init_sched_clock(&cd, pxa_update_sched_clock, 32, clock_tick_rate);
149 137
150 ckevt_pxa_osmr0.mult = 138 ckevt_pxa_osmr0.mult =
151 div_sc(clock_tick_rate, NSEC_PER_SEC, ckevt_pxa_osmr0.shift); 139 div_sc(clock_tick_rate, NSEC_PER_SEC, ckevt_pxa_osmr0.shift);
@@ -155,12 +143,9 @@ static void __init pxa_timer_init(void)
155 clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_pxa_osmr0) + 1; 143 clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_pxa_osmr0) + 1;
156 ckevt_pxa_osmr0.cpumask = cpumask_of(0); 144 ckevt_pxa_osmr0.cpumask = cpumask_of(0);
157 145
158 cksrc_pxa_oscr0.mult =
159 clocksource_hz2mult(clock_tick_rate, cksrc_pxa_oscr0.shift);
160
161 setup_irq(IRQ_OST0, &pxa_ost0_irq); 146 setup_irq(IRQ_OST0, &pxa_ost0_irq);
162 147
163 clocksource_register(&cksrc_pxa_oscr0); 148 clocksource_register_hz(&cksrc_pxa_oscr0, clock_tick_rate);
164 clockevents_register_device(&ckevt_pxa_osmr0); 149 clockevents_register_device(&ckevt_pxa_osmr0);
165} 150}
166 151
diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c
index 14fbe50376b6..aad806c5cb12 100644
--- a/arch/arm/mach-realview/core.c
+++ b/arch/arm/mach-realview/core.c
@@ -52,6 +52,8 @@
52#include <mach/irqs.h> 52#include <mach/irqs.h>
53#include <asm/hardware/timer-sp.h> 53#include <asm/hardware/timer-sp.h>
54 54
55#include <plat/sched_clock.h>
56
55#include "core.h" 57#include "core.h"
56 58
57#ifdef CONFIG_ZONE_DMA 59#ifdef CONFIG_ZONE_DMA
@@ -655,6 +657,12 @@ void realview_leds_event(led_event_t ledevt)
655#endif /* CONFIG_LEDS */ 657#endif /* CONFIG_LEDS */
656 658
657/* 659/*
660 * The sched_clock counter
661 */
662#define REFCOUNTER (__io_address(REALVIEW_SYS_BASE) + \
663 REALVIEW_SYS_24MHz_OFFSET)
664
665/*
658 * Where is the timer (VA)? 666 * Where is the timer (VA)?
659 */ 667 */
660void __iomem *timer0_va_base; 668void __iomem *timer0_va_base;
@@ -669,6 +677,8 @@ void __init realview_timer_init(unsigned int timer_irq)
669{ 677{
670 u32 val; 678 u32 val;
671 679
680 versatile_sched_clock_init(REFCOUNTER, 24000000);
681
672 /* 682 /*
673 * set clock frequency: 683 * set clock frequency:
674 * REALVIEW_REFCLK is 32KHz 684 * REALVIEW_REFCLK is 32KHz
diff --git a/arch/arm/mach-s5pv310/time.c b/arch/arm/mach-s5pv310/time.c
index 01b012ad1bfd..b262d4615331 100644
--- a/arch/arm/mach-s5pv310/time.c
+++ b/arch/arm/mach-s5pv310/time.c
@@ -211,7 +211,6 @@ struct clocksource pwm_clocksource = {
211 .rating = 250, 211 .rating = 250,
212 .read = s5pv310_pwm4_read, 212 .read = s5pv310_pwm4_read,
213 .mask = CLOCKSOURCE_MASK(32), 213 .mask = CLOCKSOURCE_MASK(32),
214 .shift = 20,
215 .flags = CLOCK_SOURCE_IS_CONTINUOUS , 214 .flags = CLOCK_SOURCE_IS_CONTINUOUS ,
216}; 215};
217 216
@@ -230,10 +229,7 @@ static void __init s5pv310_clocksource_init(void)
230 s5pv310_pwm_init(4, ~0); 229 s5pv310_pwm_init(4, ~0);
231 s5pv310_pwm_start(4, 1); 230 s5pv310_pwm_start(4, 1);
232 231
233 pwm_clocksource.mult = 232 if (clocksource_register_hz(&pwm_clocksource, clock_rate))
234 clocksource_khz2mult(clock_rate/1000, pwm_clocksource.shift);
235
236 if (clocksource_register(&pwm_clocksource))
237 panic("%s: can't register clocksource\n", pwm_clocksource.name); 233 panic("%s: can't register clocksource\n", pwm_clocksource.name);
238} 234}
239 235
diff --git a/arch/arm/mach-sa1100/generic.c b/arch/arm/mach-sa1100/generic.c
index 3555d616774c..59d14f0fdcf8 100644
--- a/arch/arm/mach-sa1100/generic.c
+++ b/arch/arm/mach-sa1100/generic.c
@@ -16,9 +16,7 @@
16#include <linux/pm.h> 16#include <linux/pm.h>
17#include <linux/cpufreq.h> 17#include <linux/cpufreq.h>
18#include <linux/ioport.h> 18#include <linux/ioport.h>
19#include <linux/sched.h> /* just for sched_clock() - funny that */
20#include <linux/platform_device.h> 19#include <linux/platform_device.h>
21#include <linux/cnt32_to_63.h>
22 20
23#include <asm/div64.h> 21#include <asm/div64.h>
24#include <mach/hardware.h> 22#include <mach/hardware.h>
@@ -110,27 +108,6 @@ unsigned int sa11x0_getspeed(unsigned int cpu)
110} 108}
111 109
112/* 110/*
113 * This is the SA11x0 sched_clock implementation. This has
114 * a resolution of 271ns, and a maximum value of 32025597s (370 days).
115 *
116 * The return value is guaranteed to be monotonic in that range as
117 * long as there is always less than 582 seconds between successive
118 * calls to this function.
119 *
120 * ( * 1E9 / 3686400 => * 78125 / 288)
121 */
122unsigned long long sched_clock(void)
123{
124 unsigned long long v = cnt32_to_63(OSCR);
125
126 /* the <<1 gets rid of the cnt_32_to_63 top bit saving on a bic insn */
127 v *= 78125<<1;
128 do_div(v, 288<<1);
129
130 return v;
131}
132
133/*
134 * Default power-off for SA1100 111 * Default power-off for SA1100
135 */ 112 */
136static void sa1100_power_off(void) 113static void sa1100_power_off(void)
diff --git a/arch/arm/mach-sa1100/time.c b/arch/arm/mach-sa1100/time.c
index 74b6e0e570b6..ae4f3d80416f 100644
--- a/arch/arm/mach-sa1100/time.c
+++ b/arch/arm/mach-sa1100/time.c
@@ -12,12 +12,39 @@
12#include <linux/errno.h> 12#include <linux/errno.h>
13#include <linux/interrupt.h> 13#include <linux/interrupt.h>
14#include <linux/irq.h> 14#include <linux/irq.h>
15#include <linux/sched.h> /* just for sched_clock() - funny that */
15#include <linux/timex.h> 16#include <linux/timex.h>
16#include <linux/clockchips.h> 17#include <linux/clockchips.h>
17 18
18#include <asm/mach/time.h> 19#include <asm/mach/time.h>
20#include <asm/sched_clock.h>
19#include <mach/hardware.h> 21#include <mach/hardware.h>
20 22
23/*
24 * This is the SA11x0 sched_clock implementation.
25 */
26static DEFINE_CLOCK_DATA(cd);
27
28/*
29 * Constants generated by clocks_calc_mult_shift(m, s, 3.6864MHz,
30 * NSEC_PER_SEC, 60).
31 * This gives a resolution of about 271ns and a wrap period of about 19min.
32 */
33#define SC_MULT 2275555556u
34#define SC_SHIFT 23
35
36unsigned long long notrace sched_clock(void)
37{
38 u32 cyc = OSCR;
39 return cyc_to_fixed_sched_clock(&cd, cyc, (u32)~0, SC_MULT, SC_SHIFT);
40}
41
42static void notrace sa1100_update_sched_clock(void)
43{
44 u32 cyc = OSCR;
45 update_sched_clock(&cd, cyc, (u32)~0);
46}
47
21#define MIN_OSCR_DELTA 2 48#define MIN_OSCR_DELTA 2
22 49
23static irqreturn_t sa1100_ost0_interrupt(int irq, void *dev_id) 50static irqreturn_t sa1100_ost0_interrupt(int irq, void *dev_id)
@@ -81,7 +108,6 @@ static struct clocksource cksrc_sa1100_oscr = {
81 .rating = 200, 108 .rating = 200,
82 .read = sa1100_read_oscr, 109 .read = sa1100_read_oscr,
83 .mask = CLOCKSOURCE_MASK(32), 110 .mask = CLOCKSOURCE_MASK(32),
84 .shift = 20,
85 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 111 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
86}; 112};
87 113
@@ -97,6 +123,9 @@ static void __init sa1100_timer_init(void)
97 OIER = 0; /* disable any timer interrupts */ 123 OIER = 0; /* disable any timer interrupts */
98 OSSR = 0xf; /* clear status on all timers */ 124 OSSR = 0xf; /* clear status on all timers */
99 125
126 init_fixed_sched_clock(&cd, sa1100_update_sched_clock, 32,
127 3686400, SC_MULT, SC_SHIFT);
128
100 ckevt_sa1100_osmr0.mult = 129 ckevt_sa1100_osmr0.mult =
101 div_sc(3686400, NSEC_PER_SEC, ckevt_sa1100_osmr0.shift); 130 div_sc(3686400, NSEC_PER_SEC, ckevt_sa1100_osmr0.shift);
102 ckevt_sa1100_osmr0.max_delta_ns = 131 ckevt_sa1100_osmr0.max_delta_ns =
@@ -105,12 +134,9 @@ static void __init sa1100_timer_init(void)
105 clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_sa1100_osmr0) + 1; 134 clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_sa1100_osmr0) + 1;
106 ckevt_sa1100_osmr0.cpumask = cpumask_of(0); 135 ckevt_sa1100_osmr0.cpumask = cpumask_of(0);
107 136
108 cksrc_sa1100_oscr.mult =
109 clocksource_hz2mult(CLOCK_TICK_RATE, cksrc_sa1100_oscr.shift);
110
111 setup_irq(IRQ_OST0, &sa1100_timer_irq); 137 setup_irq(IRQ_OST0, &sa1100_timer_irq);
112 138
113 clocksource_register(&cksrc_sa1100_oscr); 139 clocksource_register_hz(&cksrc_sa1100_oscr, CLOCK_TICK_RATE);
114 clockevents_register_device(&ckevt_sa1100_osmr0); 140 clockevents_register_device(&ckevt_sa1100_osmr0);
115} 141}
116 142
diff --git a/arch/arm/mach-tcc8k/time.c b/arch/arm/mach-tcc8k/time.c
index 78d06008841d..e0a8d609afe1 100644
--- a/arch/arm/mach-tcc8k/time.c
+++ b/arch/arm/mach-tcc8k/time.c
@@ -35,7 +35,6 @@ static struct clocksource clocksource_tcc = {
35 .rating = 200, 35 .rating = 200,
36 .read = tcc_get_cycles, 36 .read = tcc_get_cycles,
37 .mask = CLOCKSOURCE_MASK(32), 37 .mask = CLOCKSOURCE_MASK(32),
38 .shift = 28,
39 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 38 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
40}; 39};
41 40
@@ -103,9 +102,7 @@ static int __init tcc_clockevent_init(struct clk *clock)
103{ 102{
104 unsigned int c = clk_get_rate(clock); 103 unsigned int c = clk_get_rate(clock);
105 104
106 clocksource_tcc.mult = clocksource_hz2mult(c, 105 clocksource_register_hz(&clocksource_tcc, c);
107 clocksource_tcc.shift);
108 clocksource_register(&clocksource_tcc);
109 106
110 clockevent_tcc.mult = div_sc(c, NSEC_PER_SEC, 107 clockevent_tcc.mult = div_sc(c, NSEC_PER_SEC,
111 clockevent_tcc.shift); 108 clockevent_tcc.shift);
diff --git a/arch/arm/mach-tegra/timer.c b/arch/arm/mach-tegra/timer.c
index 9057d6fd1d31..7b8ad1f98f44 100644
--- a/arch/arm/mach-tegra/timer.c
+++ b/arch/arm/mach-tegra/timer.c
@@ -18,6 +18,7 @@
18 */ 18 */
19 19
20#include <linux/init.h> 20#include <linux/init.h>
21#include <linux/sched.h>
21#include <linux/time.h> 22#include <linux/time.h>
22#include <linux/interrupt.h> 23#include <linux/interrupt.h>
23#include <linux/irq.h> 24#include <linux/irq.h>
@@ -25,10 +26,10 @@
25#include <linux/clocksource.h> 26#include <linux/clocksource.h>
26#include <linux/clk.h> 27#include <linux/clk.h>
27#include <linux/io.h> 28#include <linux/io.h>
28#include <linux/cnt32_to_63.h>
29 29
30#include <asm/mach/time.h> 30#include <asm/mach/time.h>
31#include <asm/localtimer.h> 31#include <asm/localtimer.h>
32#include <asm/sched_clock.h>
32 33
33#include <mach/iomap.h> 34#include <mach/iomap.h>
34#include <mach/irqs.h> 35#include <mach/irqs.h>
@@ -91,7 +92,7 @@ static void tegra_timer_set_mode(enum clock_event_mode mode,
91 92
92static cycle_t tegra_clocksource_read(struct clocksource *cs) 93static cycle_t tegra_clocksource_read(struct clocksource *cs)
93{ 94{
94 return cnt32_to_63(timer_readl(TIMERUS_CNTR_1US)); 95 return timer_readl(TIMERUS_CNTR_1US);
95} 96}
96 97
97static struct clock_event_device tegra_clockevent = { 98static struct clock_event_device tegra_clockevent = {
@@ -106,14 +107,29 @@ static struct clocksource tegra_clocksource = {
106 .name = "timer_us", 107 .name = "timer_us",
107 .rating = 300, 108 .rating = 300,
108 .read = tegra_clocksource_read, 109 .read = tegra_clocksource_read,
109 .mask = 0x7FFFFFFFFFFFFFFFULL, 110 .mask = CLOCKSOURCE_MASK(32),
110 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 111 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
111}; 112};
112 113
113unsigned long long sched_clock(void) 114static DEFINE_CLOCK_DATA(cd);
115
116/*
117 * Constants generated by clocks_calc_mult_shift(m, s, 1MHz, NSEC_PER_SEC, 60).
118 * This gives a resolution of about 1us and a wrap period of about 1h11min.
119 */
120#define SC_MULT 4194304000u
121#define SC_SHIFT 22
122
123unsigned long long notrace sched_clock(void)
114{ 124{
115 return clocksource_cyc2ns(tegra_clocksource.read(&tegra_clocksource), 125 u32 cyc = timer_readl(TIMERUS_CNTR_1US);
116 tegra_clocksource.mult, tegra_clocksource.shift); 126 return cyc_to_fixed_sched_clock(&cd, cyc, (u32)~0, SC_MULT, SC_SHIFT);
127}
128
129static void notrace tegra_update_sched_clock(void)
130{
131 u32 cyc = timer_readl(TIMERUS_CNTR_1US);
132 update_sched_clock(&cd, cyc, (u32)~0);
117} 133}
118 134
119static irqreturn_t tegra_timer_interrupt(int irq, void *dev_id) 135static irqreturn_t tegra_timer_interrupt(int irq, void *dev_id)
@@ -158,6 +174,9 @@ static void __init tegra_init_timer(void)
158 WARN(1, "Unknown clock rate"); 174 WARN(1, "Unknown clock rate");
159 } 175 }
160 176
177 init_fixed_sched_clock(&cd, tegra_update_sched_clock, 32,
178 1000000, SC_MULT, SC_SHIFT);
179
161 if (clocksource_register_hz(&tegra_clocksource, 1000000)) { 180 if (clocksource_register_hz(&tegra_clocksource, 1000000)) {
162 printk(KERN_ERR "Failed to register clocksource\n"); 181 printk(KERN_ERR "Failed to register clocksource\n");
163 BUG(); 182 BUG();
diff --git a/arch/arm/mach-u300/timer.c b/arch/arm/mach-u300/timer.c
index 3fc4472719be..3ec58bd2d6e4 100644
--- a/arch/arm/mach-u300/timer.c
+++ b/arch/arm/mach-u300/timer.c
@@ -9,6 +9,7 @@
9 * Author: Linus Walleij <linus.walleij@stericsson.com> 9 * Author: Linus Walleij <linus.walleij@stericsson.com>
10 */ 10 */
11#include <linux/interrupt.h> 11#include <linux/interrupt.h>
12#include <linux/sched.h>
12#include <linux/time.h> 13#include <linux/time.h>
13#include <linux/timex.h> 14#include <linux/timex.h>
14#include <linux/clockchips.h> 15#include <linux/clockchips.h>
@@ -21,6 +22,7 @@
21#include <mach/hardware.h> 22#include <mach/hardware.h>
22 23
23/* Generic stuff */ 24/* Generic stuff */
25#include <asm/sched_clock.h>
24#include <asm/mach/map.h> 26#include <asm/mach/map.h>
25#include <asm/mach/time.h> 27#include <asm/mach/time.h>
26#include <asm/mach/irq.h> 28#include <asm/mach/irq.h>
@@ -352,12 +354,18 @@ static struct clocksource clocksource_u300_1mhz = {
352 * this wraps around for now, since it is just a relative time 354 * this wraps around for now, since it is just a relative time
353 * stamp. (Inspired by OMAP implementation.) 355 * stamp. (Inspired by OMAP implementation.)
354 */ 356 */
357static DEFINE_CLOCK_DATA(cd);
358
355unsigned long long notrace sched_clock(void) 359unsigned long long notrace sched_clock(void)
356{ 360{
357 return clocksource_cyc2ns(clocksource_u300_1mhz.read( 361 u32 cyc = readl(U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT2CC);
358 &clocksource_u300_1mhz), 362 return cyc_to_sched_clock(&cd, cyc, (u32)~0);
359 clocksource_u300_1mhz.mult, 363}
360 clocksource_u300_1mhz.shift); 364
365static void notrace u300_update_sched_clock(void)
366{
367 u32 cyc = readl(U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT2CC);
368 update_sched_clock(&cd, cyc, (u32)~0);
361} 369}
362 370
363 371
@@ -375,6 +383,8 @@ static void __init u300_timer_init(void)
375 clk_enable(clk); 383 clk_enable(clk);
376 rate = clk_get_rate(clk); 384 rate = clk_get_rate(clk);
377 385
386 init_sched_clock(&cd, u300_update_sched_clock, 32, rate);
387
378 /* 388 /*
379 * Disable the "OS" and "DD" timers - these are designed for Symbian! 389 * Disable the "OS" and "DD" timers - these are designed for Symbian!
380 * Example usage in cnh1601578 cpu subsystem pd_timer_app.c 390 * Example usage in cnh1601578 cpu subsystem pd_timer_app.c
@@ -412,9 +422,7 @@ static void __init u300_timer_init(void)
412 writel(U300_TIMER_APP_EGPT2_TIMER_ENABLE, 422 writel(U300_TIMER_APP_EGPT2_TIMER_ENABLE,
413 U300_TIMER_APP_VBASE + U300_TIMER_APP_EGPT2); 423 U300_TIMER_APP_VBASE + U300_TIMER_APP_EGPT2);
414 424
415 clocksource_calc_mult_shift(&clocksource_u300_1mhz, 425 if (clocksource_register_hz(&clocksource_u300_1mhz, rate))
416 rate, APPTIMER_MIN_RANGE);
417 if (clocksource_register(&clocksource_u300_1mhz))
418 printk(KERN_ERR "timer: failed to initialize clock " 426 printk(KERN_ERR "timer: failed to initialize clock "
419 "source %s\n", clocksource_u300_1mhz.name); 427 "source %s\n", clocksource_u300_1mhz.name);
420 428
diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c
index 6b93bd600271..40a024c71e4b 100644
--- a/arch/arm/mach-versatile/core.c
+++ b/arch/arm/mach-versatile/core.c
@@ -51,6 +51,8 @@
51#include <mach/platform.h> 51#include <mach/platform.h>
52#include <asm/hardware/timer-sp.h> 52#include <asm/hardware/timer-sp.h>
53 53
54#include <plat/sched_clock.h>
55
54#include "core.h" 56#include "core.h"
55 57
56/* 58/*
@@ -886,6 +888,12 @@ void __init versatile_init(void)
886} 888}
887 889
888/* 890/*
891 * The sched_clock counter
892 */
893#define REFCOUNTER (__io_address(VERSATILE_SYS_BASE) + \
894 VERSATILE_SYS_24MHz_OFFSET)
895
896/*
889 * Where is the timer (VA)? 897 * Where is the timer (VA)?
890 */ 898 */
891#define TIMER0_VA_BASE __io_address(VERSATILE_TIMER0_1_BASE) 899#define TIMER0_VA_BASE __io_address(VERSATILE_TIMER0_1_BASE)
@@ -900,6 +908,8 @@ static void __init versatile_timer_init(void)
900{ 908{
901 u32 val; 909 u32 val;
902 910
911 versatile_sched_clock_init(REFCOUNTER, 24000000);
912
903 /* 913 /*
904 * set clock frequency: 914 * set clock frequency:
905 * VERSATILE_REFCLK is 32KHz 915 * VERSATILE_REFCLK is 32KHz
diff --git a/arch/arm/mach-vexpress/v2m.c b/arch/arm/mach-vexpress/v2m.c
index 91ff2e0df856..de13603dc028 100644
--- a/arch/arm/mach-vexpress/v2m.c
+++ b/arch/arm/mach-vexpress/v2m.c
@@ -18,11 +18,12 @@
18#include <asm/mach/map.h> 18#include <asm/mach/map.h>
19#include <asm/mach/time.h> 19#include <asm/mach/time.h>
20#include <asm/hardware/arm_timer.h> 20#include <asm/hardware/arm_timer.h>
21#include <asm/hardware/timer-sp.h>
21 22
22#include <mach/clkdev.h> 23#include <mach/clkdev.h>
23#include <mach/motherboard.h> 24#include <mach/motherboard.h>
24 25
25#include <asm/hardware/timer-sp.h> 26#include <plat/sched_clock.h>
26 27
27#include "core.h" 28#include "core.h"
28 29
@@ -50,6 +51,8 @@ void __init v2m_map_io(struct map_desc *tile, size_t num)
50 51
51static void __init v2m_timer_init(void) 52static void __init v2m_timer_init(void)
52{ 53{
54 versatile_sched_clock_init(MMIO_P2V(V2M_SYS_24MHZ), 24000000);
55
53 writel(0, MMIO_P2V(V2M_TIMER0) + TIMER_CTRL); 56 writel(0, MMIO_P2V(V2M_TIMER0) + TIMER_CTRL);
54 writel(0, MMIO_P2V(V2M_TIMER1) + TIMER_CTRL); 57 writel(0, MMIO_P2V(V2M_TIMER1) + TIMER_CTRL);
55 58
diff --git a/arch/arm/mach-w90x900/time.c b/arch/arm/mach-w90x900/time.c
index b80f769bc135..4b089cb930dc 100644
--- a/arch/arm/mach-w90x900/time.c
+++ b/arch/arm/mach-w90x900/time.c
@@ -153,7 +153,6 @@ static struct clocksource clocksource_nuc900 = {
153 .rating = 200, 153 .rating = 200,
154 .read = nuc900_get_cycles, 154 .read = nuc900_get_cycles,
155 .mask = CLOCKSOURCE_MASK(TDR_SHIFT), 155 .mask = CLOCKSOURCE_MASK(TDR_SHIFT),
156 .shift = 10,
157 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 156 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
158}; 157};
159 158
@@ -176,9 +175,7 @@ static void __init nuc900_clocksource_init(void)
176 val |= (COUNTEN | PERIOD | PRESCALE); 175 val |= (COUNTEN | PERIOD | PRESCALE);
177 __raw_writel(val, REG_TCSR1); 176 __raw_writel(val, REG_TCSR1);
178 177
179 clocksource_nuc900.mult = 178 clocksource_register_hz(&clocksource_nuc900, rate);
180 clocksource_khz2mult((rate / 1000), clocksource_nuc900.shift);
181 clocksource_register(&clocksource_nuc900);
182} 179}
183 180
184static void __init nuc900_timer_init(void) 181static void __init nuc900_timer_init(void)
diff --git a/arch/arm/plat-iop/time.c b/arch/arm/plat-iop/time.c
index 558cdfaf76b6..07f23bb42bed 100644
--- a/arch/arm/plat-iop/time.c
+++ b/arch/arm/plat-iop/time.c
@@ -17,6 +17,7 @@
17#include <linux/interrupt.h> 17#include <linux/interrupt.h>
18#include <linux/time.h> 18#include <linux/time.h>
19#include <linux/init.h> 19#include <linux/init.h>
20#include <linux/sched.h>
20#include <linux/timex.h> 21#include <linux/timex.h>
21#include <linux/sched.h> 22#include <linux/sched.h>
22#include <linux/io.h> 23#include <linux/io.h>
@@ -24,6 +25,7 @@
24#include <linux/clockchips.h> 25#include <linux/clockchips.h>
25#include <mach/hardware.h> 26#include <mach/hardware.h>
26#include <asm/irq.h> 27#include <asm/irq.h>
28#include <asm/sched_clock.h>
27#include <asm/uaccess.h> 29#include <asm/uaccess.h>
28#include <asm/mach/irq.h> 30#include <asm/mach/irq.h>
29#include <asm/mach/time.h> 31#include <asm/mach/time.h>
@@ -50,15 +52,21 @@ static struct clocksource iop_clocksource = {
50 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 52 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
51}; 53};
52 54
55static DEFINE_CLOCK_DATA(cd);
56
53/* 57/*
54 * IOP sched_clock() implementation via its clocksource. 58 * IOP sched_clock() implementation via its clocksource.
55 */ 59 */
56unsigned long long sched_clock(void) 60unsigned long long notrace sched_clock(void)
57{ 61{
58 cycle_t cyc = iop_clocksource_read(NULL); 62 u32 cyc = 0xffffffffu - read_tcr1();
59 struct clocksource *cs = &iop_clocksource; 63 return cyc_to_sched_clock(&cd, cyc, (u32)~0);
64}
60 65
61 return clocksource_cyc2ns(cyc, cs->mult, cs->shift); 66static void notrace iop_update_sched_clock(void)
67{
68 u32 cyc = 0xffffffffu - read_tcr1();
69 update_sched_clock(&cd, cyc, (u32)~0);
62} 70}
63 71
64/* 72/*
@@ -88,6 +96,7 @@ static void iop_set_mode(enum clock_event_mode mode,
88 case CLOCK_EVT_MODE_PERIODIC: 96 case CLOCK_EVT_MODE_PERIODIC:
89 write_tmr0(tmr & ~IOP_TMR_EN); 97 write_tmr0(tmr & ~IOP_TMR_EN);
90 write_tcr0(ticks_per_jiffy - 1); 98 write_tcr0(ticks_per_jiffy - 1);
99 write_trr0(ticks_per_jiffy - 1);
91 tmr |= (IOP_TMR_RELOAD | IOP_TMR_EN); 100 tmr |= (IOP_TMR_RELOAD | IOP_TMR_EN);
92 break; 101 break;
93 case CLOCK_EVT_MODE_ONESHOT: 102 case CLOCK_EVT_MODE_ONESHOT:
@@ -143,6 +152,8 @@ void __init iop_init_time(unsigned long tick_rate)
143{ 152{
144 u32 timer_ctl; 153 u32 timer_ctl;
145 154
155 init_sched_clock(&cd, iop_update_sched_clock, 32, tick_rate);
156
146 ticks_per_jiffy = DIV_ROUND_CLOSEST(tick_rate, HZ); 157 ticks_per_jiffy = DIV_ROUND_CLOSEST(tick_rate, HZ);
147 iop_tick_rate = tick_rate; 158 iop_tick_rate = tick_rate;
148 159
@@ -153,6 +164,7 @@ void __init iop_init_time(unsigned long tick_rate)
153 * Set up interrupting clockevent timer 0. 164 * Set up interrupting clockevent timer 0.
154 */ 165 */
155 write_tmr0(timer_ctl & ~IOP_TMR_EN); 166 write_tmr0(timer_ctl & ~IOP_TMR_EN);
167 write_tisr(1);
156 setup_irq(IRQ_IOP_TIMER0, &iop_timer_irq); 168 setup_irq(IRQ_IOP_TIMER0, &iop_timer_irq);
157 clockevents_calc_mult_shift(&iop_clockevent, 169 clockevents_calc_mult_shift(&iop_clockevent,
158 tick_rate, IOP_MIN_RANGE); 170 tick_rate, IOP_MIN_RANGE);
@@ -162,9 +174,6 @@ void __init iop_init_time(unsigned long tick_rate)
162 clockevent_delta2ns(0xf, &iop_clockevent); 174 clockevent_delta2ns(0xf, &iop_clockevent);
163 iop_clockevent.cpumask = cpumask_of(0); 175 iop_clockevent.cpumask = cpumask_of(0);
164 clockevents_register_device(&iop_clockevent); 176 clockevents_register_device(&iop_clockevent);
165 write_trr0(ticks_per_jiffy - 1);
166 write_tcr0(ticks_per_jiffy - 1);
167 write_tmr0(timer_ctl);
168 177
169 /* 178 /*
170 * Set up free-running clocksource timer 1. 179 * Set up free-running clocksource timer 1.
@@ -172,7 +181,5 @@ void __init iop_init_time(unsigned long tick_rate)
172 write_trr1(0xffffffff); 181 write_trr1(0xffffffff);
173 write_tcr1(0xffffffff); 182 write_tcr1(0xffffffff);
174 write_tmr1(timer_ctl); 183 write_tmr1(timer_ctl);
175 clocksource_calc_mult_shift(&iop_clocksource, tick_rate, 184 clocksource_register_hz(&iop_clocksource, tick_rate);
176 IOP_MIN_RANGE);
177 clocksource_register(&iop_clocksource);
178} 185}
diff --git a/arch/arm/plat-mxc/epit.c b/arch/arm/plat-mxc/epit.c
index ee9582f4972e..d69d343ff61f 100644
--- a/arch/arm/plat-mxc/epit.c
+++ b/arch/arm/plat-mxc/epit.c
@@ -93,7 +93,6 @@ static struct clocksource clocksource_epit = {
93 .rating = 200, 93 .rating = 200,
94 .read = epit_read, 94 .read = epit_read,
95 .mask = CLOCKSOURCE_MASK(32), 95 .mask = CLOCKSOURCE_MASK(32),
96 .shift = 20,
97 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 96 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
98}; 97};
99 98
@@ -101,9 +100,7 @@ static int __init epit_clocksource_init(struct clk *timer_clk)
101{ 100{
102 unsigned int c = clk_get_rate(timer_clk); 101 unsigned int c = clk_get_rate(timer_clk);
103 102
104 clocksource_epit.mult = clocksource_hz2mult(c, 103 clocksource_register_hz(&clocksource_epit, c);
105 clocksource_epit.shift);
106 clocksource_register(&clocksource_epit);
107 104
108 return 0; 105 return 0;
109} 106}
diff --git a/arch/arm/plat-mxc/time.c b/arch/arm/plat-mxc/time.c
index f9a1b059a76c..9f0c2610595e 100644
--- a/arch/arm/plat-mxc/time.c
+++ b/arch/arm/plat-mxc/time.c
@@ -120,7 +120,6 @@ static struct clocksource clocksource_mxc = {
120 .rating = 200, 120 .rating = 200,
121 .read = mx1_2_get_cycles, 121 .read = mx1_2_get_cycles,
122 .mask = CLOCKSOURCE_MASK(32), 122 .mask = CLOCKSOURCE_MASK(32),
123 .shift = 20,
124 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 123 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
125}; 124};
126 125
@@ -131,9 +130,7 @@ static int __init mxc_clocksource_init(struct clk *timer_clk)
131 if (timer_is_v2()) 130 if (timer_is_v2())
132 clocksource_mxc.read = v2_get_cycles; 131 clocksource_mxc.read = v2_get_cycles;
133 132
134 clocksource_mxc.mult = clocksource_hz2mult(c, 133 clocksource_register_hz(&clocksource_mxc, c);
135 clocksource_mxc.shift);
136 clocksource_register(&clocksource_mxc);
137 134
138 return 0; 135 return 0;
139} 136}
diff --git a/arch/arm/plat-nomadik/Kconfig b/arch/arm/plat-nomadik/Kconfig
index 5da3f97c537b..187f4e84bb22 100644
--- a/arch/arm/plat-nomadik/Kconfig
+++ b/arch/arm/plat-nomadik/Kconfig
@@ -14,6 +14,7 @@ if PLAT_NOMADIK
14 14
15config HAS_MTU 15config HAS_MTU
16 bool 16 bool
17 select HAVE_SCHED_CLOCK
17 help 18 help
18 Support for Multi Timer Unit. MTU provides access 19 Support for Multi Timer Unit. MTU provides access
19 to multiple interrupt generating programmable 20 to multiple interrupt generating programmable
diff --git a/arch/arm/plat-nomadik/timer.c b/arch/arm/plat-nomadik/timer.c
index 63cdc6025bd7..41723402006b 100644
--- a/arch/arm/plat-nomadik/timer.c
+++ b/arch/arm/plat-nomadik/timer.c
@@ -17,9 +17,9 @@
17#include <linux/clk.h> 17#include <linux/clk.h>
18#include <linux/jiffies.h> 18#include <linux/jiffies.h>
19#include <linux/err.h> 19#include <linux/err.h>
20#include <linux/cnt32_to_63.h> 20#include <linux/sched.h>
21#include <linux/timer.h>
22#include <asm/mach/time.h> 21#include <asm/mach/time.h>
22#include <asm/sched_clock.h>
23 23
24#include <plat/mtu.h> 24#include <plat/mtu.h>
25 25
@@ -52,81 +52,24 @@ static struct clocksource nmdk_clksrc = {
52 * Override the global weak sched_clock symbol with this 52 * Override the global weak sched_clock symbol with this
53 * local implementation which uses the clocksource to get some 53 * local implementation which uses the clocksource to get some
54 * better resolution when scheduling the kernel. 54 * better resolution when scheduling the kernel.
55 *
56 * Because the hardware timer period may be quite short
57 * (32.3 secs on the 133 MHz MTU timer selection on ux500)
58 * and because cnt32_to_63() needs to be called at least once per
59 * half period to work properly, a kernel keepwarm() timer is set up
60 * to ensure this requirement is always met.
61 *
62 * Also the sched_clock timer will wrap around at some point,
63 * here we set it to run continously for a year.
64 */ 55 */
65#define SCHED_CLOCK_MIN_WRAP 3600*24*365 56static DEFINE_CLOCK_DATA(cd);
66static struct timer_list cnt32_to_63_keepwarm_timer;
67static u32 sched_mult;
68static u32 sched_shift;
69 57
70unsigned long long notrace sched_clock(void) 58unsigned long long notrace sched_clock(void)
71{ 59{
72 u64 cycles; 60 u32 cyc;
73 61
74 if (unlikely(!mtu_base)) 62 if (unlikely(!mtu_base))
75 return 0; 63 return 0;
76 64
77 cycles = cnt32_to_63(-readl(mtu_base + MTU_VAL(0))); 65 cyc = -readl(mtu_base + MTU_VAL(0));
78 /* 66 return cyc_to_sched_clock(&cd, cyc, (u32)~0);
79 * sched_mult is guaranteed to be even so will
80 * shift out bit 63
81 */
82 return (cycles * sched_mult) >> sched_shift;
83} 67}
84 68
85/* Just kick sched_clock every so often */ 69static void notrace nomadik_update_sched_clock(void)
86static void cnt32_to_63_keepwarm(unsigned long data)
87{ 70{
88 mod_timer(&cnt32_to_63_keepwarm_timer, round_jiffies(jiffies + data)); 71 u32 cyc = -readl(mtu_base + MTU_VAL(0));
89 (void) sched_clock(); 72 update_sched_clock(&cd, cyc, (u32)~0);
90}
91
92/*
93 * Set up a timer to keep sched_clock():s 32_to_63 algorithm warm
94 * once in half a 32bit timer wrap interval.
95 */
96static void __init nmdk_sched_clock_init(unsigned long rate)
97{
98 u32 v;
99 unsigned long delta;
100 u64 days;
101
102 /* Find the apropriate mult and shift factors */
103 clocks_calc_mult_shift(&sched_mult, &sched_shift,
104 rate, NSEC_PER_SEC, SCHED_CLOCK_MIN_WRAP);
105 /* We need to multiply by an even number to get rid of bit 63 */
106 if (sched_mult & 1)
107 sched_mult++;
108
109 /* Let's see what we get, take max counter and scale it */
110 days = (0xFFFFFFFFFFFFFFFFLLU * sched_mult) >> sched_shift;
111 do_div(days, NSEC_PER_SEC);
112 do_div(days, (3600*24));
113
114 pr_info("sched_clock: using %d bits @ %lu Hz wrap in %lu days\n",
115 (64 - sched_shift), rate, (unsigned long) days);
116
117 /*
118 * Program a timer to kick us at half 32bit wraparound
119 * Formula: seconds per wrap = (2^32) / f
120 */
121 v = 0xFFFFFFFFUL / rate;
122 /* We want half of the wrap time to keep cnt32_to_63 warm */
123 v /= 2;
124 pr_debug("sched_clock: prescaled timer rate: %lu Hz, "
125 "initialize keepwarm timer every %d seconds\n", rate, v);
126 /* Convert seconds to jiffies */
127 delta = msecs_to_jiffies(v*1000);
128 setup_timer(&cnt32_to_63_keepwarm_timer, cnt32_to_63_keepwarm, delta);
129 mod_timer(&cnt32_to_63_keepwarm_timer, round_jiffies(jiffies + delta));
130} 73}
131 74
132/* Clockevent device: use one-shot mode */ 75/* Clockevent device: use one-shot mode */
@@ -222,7 +165,6 @@ void __init nmdk_timer_init(void)
222 } else { 165 } else {
223 cr |= MTU_CRn_PRESCALE_1; 166 cr |= MTU_CRn_PRESCALE_1;
224 } 167 }
225 clocksource_calc_mult_shift(&nmdk_clksrc, rate, MTU_MIN_RANGE);
226 168
227 /* Timer 0 is the free running clocksource */ 169 /* Timer 0 is the free running clocksource */
228 writel(cr, mtu_base + MTU_CR(0)); 170 writel(cr, mtu_base + MTU_CR(0));
@@ -233,11 +175,11 @@ void __init nmdk_timer_init(void)
233 /* Now the clock source is ready */ 175 /* Now the clock source is ready */
234 nmdk_clksrc.read = nmdk_read_timer; 176 nmdk_clksrc.read = nmdk_read_timer;
235 177
236 if (clocksource_register(&nmdk_clksrc)) 178 if (clocksource_register_hz(&nmdk_clksrc, rate))
237 pr_err("timer: failed to initialize clock source %s\n", 179 pr_err("timer: failed to initialize clock source %s\n",
238 nmdk_clksrc.name); 180 nmdk_clksrc.name);
239 181
240 nmdk_sched_clock_init(rate); 182 init_sched_clock(&cd, nomadik_update_sched_clock, 32, rate);
241 183
242 /* Timer 1 is used for events */ 184 /* Timer 1 is used for events */
243 185
diff --git a/arch/arm/plat-omap/counter_32k.c b/arch/arm/plat-omap/counter_32k.c
index 8722a136f3a5..ea4644021fb9 100644
--- a/arch/arm/plat-omap/counter_32k.c
+++ b/arch/arm/plat-omap/counter_32k.c
@@ -15,8 +15,11 @@
15#include <linux/kernel.h> 15#include <linux/kernel.h>
16#include <linux/init.h> 16#include <linux/init.h>
17#include <linux/clk.h> 17#include <linux/clk.h>
18#include <linux/io.h>
19#include <linux/err.h> 18#include <linux/err.h>
19#include <linux/io.h>
20#include <linux/sched.h>
21
22#include <asm/sched_clock.h>
20 23
21#include <plat/common.h> 24#include <plat/common.h>
22#include <plat/board.h> 25#include <plat/board.h>
@@ -45,7 +48,7 @@
45static u32 offset_32k __read_mostly; 48static u32 offset_32k __read_mostly;
46 49
47#ifdef CONFIG_ARCH_OMAP16XX 50#ifdef CONFIG_ARCH_OMAP16XX
48static cycle_t omap16xx_32k_read(struct clocksource *cs) 51static cycle_t notrace omap16xx_32k_read(struct clocksource *cs)
49{ 52{
50 return omap_readl(OMAP16XX_TIMER_32K_SYNCHRONIZED) - offset_32k; 53 return omap_readl(OMAP16XX_TIMER_32K_SYNCHRONIZED) - offset_32k;
51} 54}
@@ -54,7 +57,7 @@ static cycle_t omap16xx_32k_read(struct clocksource *cs)
54#endif 57#endif
55 58
56#ifdef CONFIG_ARCH_OMAP2420 59#ifdef CONFIG_ARCH_OMAP2420
57static cycle_t omap2420_32k_read(struct clocksource *cs) 60static cycle_t notrace omap2420_32k_read(struct clocksource *cs)
58{ 61{
59 return omap_readl(OMAP2420_32KSYNCT_BASE + 0x10) - offset_32k; 62 return omap_readl(OMAP2420_32KSYNCT_BASE + 0x10) - offset_32k;
60} 63}
@@ -63,7 +66,7 @@ static cycle_t omap2420_32k_read(struct clocksource *cs)
63#endif 66#endif
64 67
65#ifdef CONFIG_ARCH_OMAP2430 68#ifdef CONFIG_ARCH_OMAP2430
66static cycle_t omap2430_32k_read(struct clocksource *cs) 69static cycle_t notrace omap2430_32k_read(struct clocksource *cs)
67{ 70{
68 return omap_readl(OMAP2430_32KSYNCT_BASE + 0x10) - offset_32k; 71 return omap_readl(OMAP2430_32KSYNCT_BASE + 0x10) - offset_32k;
69} 72}
@@ -72,7 +75,7 @@ static cycle_t omap2430_32k_read(struct clocksource *cs)
72#endif 75#endif
73 76
74#ifdef CONFIG_ARCH_OMAP3 77#ifdef CONFIG_ARCH_OMAP3
75static cycle_t omap34xx_32k_read(struct clocksource *cs) 78static cycle_t notrace omap34xx_32k_read(struct clocksource *cs)
76{ 79{
77 return omap_readl(OMAP3430_32KSYNCT_BASE + 0x10) - offset_32k; 80 return omap_readl(OMAP3430_32KSYNCT_BASE + 0x10) - offset_32k;
78} 81}
@@ -81,7 +84,7 @@ static cycle_t omap34xx_32k_read(struct clocksource *cs)
81#endif 84#endif
82 85
83#ifdef CONFIG_ARCH_OMAP4 86#ifdef CONFIG_ARCH_OMAP4
84static cycle_t omap44xx_32k_read(struct clocksource *cs) 87static cycle_t notrace omap44xx_32k_read(struct clocksource *cs)
85{ 88{
86 return omap_readl(OMAP4430_32KSYNCT_BASE + 0x10) - offset_32k; 89 return omap_readl(OMAP4430_32KSYNCT_BASE + 0x10) - offset_32k;
87} 90}
@@ -93,7 +96,7 @@ static cycle_t omap44xx_32k_read(struct clocksource *cs)
93 * Kernel assumes that sched_clock can be called early but may not have 96 * Kernel assumes that sched_clock can be called early but may not have
94 * things ready yet. 97 * things ready yet.
95 */ 98 */
96static cycle_t omap_32k_read_dummy(struct clocksource *cs) 99static cycle_t notrace omap_32k_read_dummy(struct clocksource *cs)
97{ 100{
98 return 0; 101 return 0;
99} 102}
@@ -103,7 +106,6 @@ static struct clocksource clocksource_32k = {
103 .rating = 250, 106 .rating = 250,
104 .read = omap_32k_read_dummy, 107 .read = omap_32k_read_dummy,
105 .mask = CLOCKSOURCE_MASK(32), 108 .mask = CLOCKSOURCE_MASK(32),
106 .shift = 10,
107 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 109 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
108}; 110};
109 111
@@ -111,10 +113,25 @@ static struct clocksource clocksource_32k = {
111 * Returns current time from boot in nsecs. It's OK for this to wrap 113 * Returns current time from boot in nsecs. It's OK for this to wrap
112 * around for now, as it's just a relative time stamp. 114 * around for now, as it's just a relative time stamp.
113 */ 115 */
114unsigned long long sched_clock(void) 116static DEFINE_CLOCK_DATA(cd);
117
118/*
119 * Constants generated by clocks_calc_mult_shift(m, s, 32768, NSEC_PER_SEC, 60).
120 * This gives a resolution of about 30us and a wrap period of about 36hrs.
121 */
122#define SC_MULT 4000000000u
123#define SC_SHIFT 17
124
125unsigned long long notrace sched_clock(void)
126{
127 u32 cyc = clocksource_32k.read(&clocksource_32k);
128 return cyc_to_fixed_sched_clock(&cd, cyc, (u32)~0, SC_MULT, SC_SHIFT);
129}
130
131static void notrace omap_update_sched_clock(void)
115{ 132{
116 return clocksource_cyc2ns(clocksource_32k.read(&clocksource_32k), 133 u32 cyc = clocksource_32k.read(&clocksource_32k);
117 clocksource_32k.mult, clocksource_32k.shift); 134 update_sched_clock(&cd, cyc, (u32)~0);
118} 135}
119 136
120/** 137/**
@@ -168,13 +185,13 @@ static int __init omap_init_clocksource_32k(void)
168 if (!IS_ERR(sync_32k_ick)) 185 if (!IS_ERR(sync_32k_ick))
169 clk_enable(sync_32k_ick); 186 clk_enable(sync_32k_ick);
170 187
171 clocksource_32k.mult = clocksource_hz2mult(32768,
172 clocksource_32k.shift);
173
174 offset_32k = clocksource_32k.read(&clocksource_32k); 188 offset_32k = clocksource_32k.read(&clocksource_32k);
175 189
176 if (clocksource_register(&clocksource_32k)) 190 if (clocksource_register_hz(&clocksource_32k, 32768))
177 printk(err, clocksource_32k.name); 191 printk(err, clocksource_32k.name);
192
193 init_fixed_sched_clock(&cd, omap_update_sched_clock, 32,
194 32768, SC_MULT, SC_SHIFT);
178 } 195 }
179 return 0; 196 return 0;
180} 197}
diff --git a/arch/arm/plat-orion/time.c b/arch/arm/plat-orion/time.c
index 715a30177f28..c3da2478b2aa 100644
--- a/arch/arm/plat-orion/time.c
+++ b/arch/arm/plat-orion/time.c
@@ -13,11 +13,11 @@
13 13
14#include <linux/kernel.h> 14#include <linux/kernel.h>
15#include <linux/sched.h> 15#include <linux/sched.h>
16#include <linux/cnt32_to_63.h>
17#include <linux/timer.h> 16#include <linux/timer.h>
18#include <linux/clockchips.h> 17#include <linux/clockchips.h>
19#include <linux/interrupt.h> 18#include <linux/interrupt.h>
20#include <linux/irq.h> 19#include <linux/irq.h>
20#include <asm/sched_clock.h>
21#include <asm/mach/time.h> 21#include <asm/mach/time.h>
22#include <mach/bridge-regs.h> 22#include <mach/bridge-regs.h>
23#include <mach/hardware.h> 23#include <mach/hardware.h>
@@ -44,52 +44,26 @@ static u32 ticks_per_jiffy;
44 44
45/* 45/*
46 * Orion's sched_clock implementation. It has a resolution of 46 * Orion's sched_clock implementation. It has a resolution of
47 * at least 7.5ns (133MHz TCLK) and a maximum value of 834 days. 47 * at least 7.5ns (133MHz TCLK).
48 *
49 * Because the hardware timer period is quite short (21 secs if
50 * 200MHz TCLK) and because cnt32_to_63() needs to be called at
51 * least once per half period to work properly, a kernel timer is
52 * set up to ensure this requirement is always met.
53 */ 48 */
54#define TCLK2NS_SCALE_FACTOR 8 49static DEFINE_CLOCK_DATA(cd);
55
56static unsigned long tclk2ns_scale;
57 50
58unsigned long long sched_clock(void) 51unsigned long long notrace sched_clock(void)
59{ 52{
60 unsigned long long v = cnt32_to_63(0xffffffff - readl(TIMER0_VAL)); 53 u32 cyc = 0xffffffff - readl(TIMER0_VAL);
61 return (v * tclk2ns_scale) >> TCLK2NS_SCALE_FACTOR; 54 return cyc_to_sched_clock(&cd, cyc, (u32)~0);
62} 55}
63 56
64static struct timer_list cnt32_to_63_keepwarm_timer;
65 57
66static void cnt32_to_63_keepwarm(unsigned long data) 58static void notrace orion_update_sched_clock(void)
67{ 59{
68 mod_timer(&cnt32_to_63_keepwarm_timer, round_jiffies(jiffies + data)); 60 u32 cyc = 0xffffffff - readl(TIMER0_VAL);
69 (void) sched_clock(); 61 update_sched_clock(&cd, cyc, (u32)~0);
70} 62}
71 63
72static void __init setup_sched_clock(unsigned long tclk) 64static void __init setup_sched_clock(unsigned long tclk)
73{ 65{
74 unsigned long long v; 66 init_sched_clock(&cd, orion_update_sched_clock, 32, tclk);
75 unsigned long data;
76
77 v = NSEC_PER_SEC;
78 v <<= TCLK2NS_SCALE_FACTOR;
79 v += tclk/2;
80 do_div(v, tclk);
81 /*
82 * We want an even value to automatically clear the top bit
83 * returned by cnt32_to_63() without an additional run time
84 * instruction. So if the LSB is 1 then round it up.
85 */
86 if (v & 1)
87 v++;
88 tclk2ns_scale = v;
89
90 data = (0xffffffffUL / tclk / 2 - 2) * HZ;
91 setup_timer(&cnt32_to_63_keepwarm_timer, cnt32_to_63_keepwarm, data);
92 mod_timer(&cnt32_to_63_keepwarm_timer, round_jiffies(jiffies + data));
93} 67}
94 68
95/* 69/*
@@ -102,7 +76,6 @@ static cycle_t orion_clksrc_read(struct clocksource *cs)
102 76
103static struct clocksource orion_clksrc = { 77static struct clocksource orion_clksrc = {
104 .name = "orion_clocksource", 78 .name = "orion_clocksource",
105 .shift = 20,
106 .rating = 300, 79 .rating = 300,
107 .read = orion_clksrc_read, 80 .read = orion_clksrc_read,
108 .mask = CLOCKSOURCE_MASK(32), 81 .mask = CLOCKSOURCE_MASK(32),
@@ -245,8 +218,7 @@ void __init orion_time_init(unsigned int irq, unsigned int tclk)
245 writel(u & ~BRIDGE_INT_TIMER0, BRIDGE_MASK); 218 writel(u & ~BRIDGE_INT_TIMER0, BRIDGE_MASK);
246 u = readl(TIMER_CTRL); 219 u = readl(TIMER_CTRL);
247 writel(u | TIMER0_EN | TIMER0_RELOAD_EN, TIMER_CTRL); 220 writel(u | TIMER0_EN | TIMER0_RELOAD_EN, TIMER_CTRL);
248 orion_clksrc.mult = clocksource_hz2mult(tclk, orion_clksrc.shift); 221 clocksource_register_hz(&orion_clksrc, tclk);
249 clocksource_register(&orion_clksrc);
250 222
251 /* 223 /*
252 * Setup clockevent timer (interrupt-driven.) 224 * Setup clockevent timer (interrupt-driven.)
diff --git a/arch/arm/plat-spear/time.c b/arch/arm/plat-spear/time.c
index ab211652e4ca..839c88df9994 100644
--- a/arch/arm/plat-spear/time.c
+++ b/arch/arm/plat-spear/time.c
@@ -81,8 +81,6 @@ static struct clocksource clksrc = {
81 .rating = 200, /* its a pretty decent clock */ 81 .rating = 200, /* its a pretty decent clock */
82 .read = clocksource_read_cycles, 82 .read = clocksource_read_cycles,
83 .mask = 0xFFFF, /* 16 bits */ 83 .mask = 0xFFFF, /* 16 bits */
84 .mult = 0, /* to be computed */
85 .shift = 0, /* to be computed */
86 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 84 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
87}; 85};
88 86
@@ -105,10 +103,8 @@ static void spear_clocksource_init(void)
105 val |= CTRL_ENABLE ; 103 val |= CTRL_ENABLE ;
106 writew(val, gpt_base + CR(CLKSRC)); 104 writew(val, gpt_base + CR(CLKSRC));
107 105
108 clocksource_calc_mult_shift(&clksrc, tick_rate, SPEAR_MIN_RANGE);
109
110 /* register the clocksource */ 106 /* register the clocksource */
111 clocksource_register(&clksrc); 107 clocksource_register_hz(&clksrc, tick_rate);
112} 108}
113 109
114static struct clock_event_device clkevt = { 110static struct clock_event_device clkevt = {
diff --git a/arch/arm/plat-stmp3xxx/timer.c b/arch/arm/plat-stmp3xxx/timer.c
index 063c7bc0e740..c395630a6edc 100644
--- a/arch/arm/plat-stmp3xxx/timer.c
+++ b/arch/arm/plat-stmp3xxx/timer.c
@@ -89,7 +89,6 @@ static struct clocksource cksrc_stmp3xxx = {
89 .rating = 250, 89 .rating = 250,
90 .read = stmp3xxx_clock_read, 90 .read = stmp3xxx_clock_read,
91 .mask = CLOCKSOURCE_MASK(16), 91 .mask = CLOCKSOURCE_MASK(16),
92 .shift = 10,
93 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 92 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
94}; 93};
95 94
@@ -106,8 +105,6 @@ static struct irqaction stmp3xxx_timer_irq = {
106 */ 105 */
107static void __init stmp3xxx_init_timer(void) 106static void __init stmp3xxx_init_timer(void)
108{ 107{
109 cksrc_stmp3xxx.mult = clocksource_hz2mult(CLOCK_TICK_RATE,
110 cksrc_stmp3xxx.shift);
111 ckevt_timrot.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, 108 ckevt_timrot.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC,
112 ckevt_timrot.shift); 109 ckevt_timrot.shift);
113 ckevt_timrot.min_delta_ns = clockevent_delta2ns(2, &ckevt_timrot); 110 ckevt_timrot.min_delta_ns = clockevent_delta2ns(2, &ckevt_timrot);
@@ -140,7 +137,7 @@ static void __init stmp3xxx_init_timer(void)
140 137
141 setup_irq(IRQ_TIMER0, &stmp3xxx_timer_irq); 138 setup_irq(IRQ_TIMER0, &stmp3xxx_timer_irq);
142 139
143 clocksource_register(&cksrc_stmp3xxx); 140 clocksource_register_hz(&cksrc_stmp3xxx, CLOCK_TICK_RATE);
144 clockevents_register_device(&ckevt_timrot); 141 clockevents_register_device(&ckevt_timrot);
145} 142}
146 143
diff --git a/arch/arm/plat-versatile/Makefile b/arch/arm/plat-versatile/Makefile
index aaa571d17924..16dde0819934 100644
--- a/arch/arm/plat-versatile/Makefile
+++ b/arch/arm/plat-versatile/Makefile
@@ -1,6 +1,7 @@
1obj-y := clock.o 1obj-y := clock.o
2obj-$(CONFIG_ARCH_REALVIEW) += sched-clock.o 2ifneq ($(CONFIG_ARCH_INTEGRATOR),y)
3obj-$(CONFIG_ARCH_VERSATILE) += sched-clock.o 3obj-y += sched-clock.o
4endif
4ifeq ($(CONFIG_LEDS_CLASS),y) 5ifeq ($(CONFIG_LEDS_CLASS),y)
5obj-$(CONFIG_ARCH_REALVIEW) += leds.o 6obj-$(CONFIG_ARCH_REALVIEW) += leds.o
6obj-$(CONFIG_ARCH_VERSATILE) += leds.o 7obj-$(CONFIG_ARCH_VERSATILE) += leds.o
diff --git a/arch/arm/plat-versatile/include/plat/sched_clock.h b/arch/arm/plat-versatile/include/plat/sched_clock.h
new file mode 100644
index 000000000000..5c3e4fc9fa0c
--- /dev/null
+++ b/arch/arm/plat-versatile/include/plat/sched_clock.h
@@ -0,0 +1,6 @@
1#ifndef ARM_PLAT_SCHED_CLOCK_H
2#define ARM_PLAT_SCHED_CLOCK_H
3
4void versatile_sched_clock_init(void __iomem *, unsigned long);
5
6#endif
diff --git a/arch/arm/plat-versatile/sched-clock.c b/arch/arm/plat-versatile/sched-clock.c
index 9768cf7e83d7..3d6a4c292cab 100644
--- a/arch/arm/plat-versatile/sched-clock.c
+++ b/arch/arm/plat-versatile/sched-clock.c
@@ -18,36 +18,41 @@
18 * along with this program; if not, write to the Free Software 18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */ 20 */
21#include <linux/cnt32_to_63.h>
22#include <linux/io.h> 21#include <linux/io.h>
23#include <asm/div64.h> 22#include <linux/sched.h>
24 23
25#include <mach/hardware.h> 24#include <asm/sched_clock.h>
26#include <mach/platform.h> 25#include <plat/sched_clock.h>
27 26
28#ifdef VERSATILE_SYS_BASE 27static DEFINE_CLOCK_DATA(cd);
29#define REFCOUNTER (__io_address(VERSATILE_SYS_BASE) + VERSATILE_SYS_24MHz_OFFSET) 28static void __iomem *ctr;
30#endif
31
32#ifdef REALVIEW_SYS_BASE
33#define REFCOUNTER (__io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_24MHz_OFFSET)
34#endif
35 29
36/* 30/*
37 * This is the Realview and Versatile sched_clock implementation. This 31 * Constants generated by clocks_calc_mult_shift(m, s, 24MHz, NSEC_PER_SEC, 60).
38 * has a resolution of 41.7ns, and a maximum value of about 35583 days. 32 * This gives a resolution of about 41ns and a wrap period of about 178s.
39 *
40 * The return value is guaranteed to be monotonic in that range as
41 * long as there is always less than 89 seconds between successive
42 * calls to this function.
43 */ 33 */
44unsigned long long sched_clock(void) 34#define SC_MULT 2796202667u
35#define SC_SHIFT 26
36
37unsigned long long notrace sched_clock(void)
45{ 38{
46 unsigned long long v = cnt32_to_63(readl(REFCOUNTER)); 39 if (ctr) {
40 u32 cyc = readl(ctr);
41 return cyc_to_fixed_sched_clock(&cd, cyc, (u32)~0,
42 SC_MULT, SC_SHIFT);
43 } else
44 return 0;
45}
47 46
48 /* the <<1 gets rid of the cnt_32_to_63 top bit saving on a bic insn */ 47static void notrace versatile_update_sched_clock(void)
49 v *= 125<<1; 48{
50 do_div(v, 3<<1); 49 u32 cyc = readl(ctr);
50 update_sched_clock(&cd, cyc, (u32)~0);
51}
51 52
52 return v; 53void __init versatile_sched_clock_init(void __iomem *reg, unsigned long rate)
54{
55 ctr = reg;
56 init_fixed_sched_clock(&cd, versatile_update_sched_clock,
57 32, rate, SC_MULT, SC_SHIFT);
53} 58}