diff options
Diffstat (limited to 'arch/tile')
-rw-r--r-- | arch/tile/Kconfig | 6 | ||||
-rw-r--r-- | arch/tile/include/asm/perf_event.h | 22 | ||||
-rw-r--r-- | arch/tile/include/asm/pmc.h | 64 | ||||
-rw-r--r-- | arch/tile/kernel/Makefile | 2 | ||||
-rw-r--r-- | arch/tile/kernel/intvec_32.S | 24 | ||||
-rw-r--r-- | arch/tile/kernel/intvec_64.S | 24 | ||||
-rw-r--r-- | arch/tile/kernel/irq.c | 18 | ||||
-rw-r--r-- | arch/tile/kernel/messaging.c | 4 | ||||
-rw-r--r-- | arch/tile/kernel/pci.c | 2 | ||||
-rw-r--r-- | arch/tile/kernel/perf_event.c | 1005 | ||||
-rw-r--r-- | arch/tile/kernel/pmc.c | 121 | ||||
-rw-r--r-- | arch/tile/kernel/time.c | 10 | ||||
-rw-r--r-- | arch/tile/kernel/vdso/Makefile | 2 |
13 files changed, 1285 insertions, 19 deletions
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index b3692ce78f90..31c8c6223995 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig | |||
@@ -3,6 +3,8 @@ | |||
3 | 3 | ||
4 | config TILE | 4 | config TILE |
5 | def_bool y | 5 | def_bool y |
6 | select HAVE_PERF_EVENTS | ||
7 | select USE_PMC if PERF_EVENTS | ||
6 | select HAVE_DMA_ATTRS | 8 | select HAVE_DMA_ATTRS |
7 | select HAVE_DMA_API_DEBUG | 9 | select HAVE_DMA_API_DEBUG |
8 | select HAVE_KVM if !TILEGX | 10 | select HAVE_KVM if !TILEGX |
@@ -66,6 +68,10 @@ config HUGETLB_SUPER_PAGES | |||
66 | config GENERIC_TIME_VSYSCALL | 68 | config GENERIC_TIME_VSYSCALL |
67 | def_bool y | 69 | def_bool y |
68 | 70 | ||
71 | # Enable PMC if PERF_EVENTS, OPROFILE, or WATCHPOINTS are enabled. | ||
72 | config USE_PMC | ||
73 | bool | ||
74 | |||
69 | # FIXME: tilegx can implement a more efficient rwsem. | 75 | # FIXME: tilegx can implement a more efficient rwsem. |
70 | config RWSEM_GENERIC_SPINLOCK | 76 | config RWSEM_GENERIC_SPINLOCK |
71 | def_bool y | 77 | def_bool y |
diff --git a/arch/tile/include/asm/perf_event.h b/arch/tile/include/asm/perf_event.h new file mode 100644 index 000000000000..59c5b164e5b6 --- /dev/null +++ b/arch/tile/include/asm/perf_event.h | |||
@@ -0,0 +1,22 @@ | |||
1 | /* | ||
2 | * Copyright 2014 Tilera Corporation. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
12 | * more details. | ||
13 | */ | ||
14 | |||
15 | #ifndef _ASM_TILE_PERF_EVENT_H | ||
16 | #define _ASM_TILE_PERF_EVENT_H | ||
17 | |||
18 | #include <linux/percpu.h> | ||
19 | DECLARE_PER_CPU(u64, perf_irqs); | ||
20 | |||
21 | unsigned long handle_syscall_link_address(void); | ||
22 | #endif /* _ASM_TILE_PERF_EVENT_H */ | ||
diff --git a/arch/tile/include/asm/pmc.h b/arch/tile/include/asm/pmc.h new file mode 100644 index 000000000000..7ae3956d9008 --- /dev/null +++ b/arch/tile/include/asm/pmc.h | |||
@@ -0,0 +1,64 @@ | |||
1 | /* | ||
2 | * Copyright 2014 Tilera Corporation. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
12 | * more details. | ||
13 | */ | ||
14 | |||
15 | #ifndef _ASM_TILE_PMC_H | ||
16 | #define _ASM_TILE_PMC_H | ||
17 | |||
18 | #include <linux/ptrace.h> | ||
19 | |||
20 | #define TILE_BASE_COUNTERS 2 | ||
21 | |||
22 | /* Bitfields below are derived from SPR PERF_COUNT_CTL*/ | ||
23 | #ifndef __tilegx__ | ||
24 | /* PERF_COUNT_CTL on TILEPro */ | ||
25 | #define TILE_CTL_EXCL_USER (1 << 7) /* exclude user level */ | ||
26 | #define TILE_CTL_EXCL_KERNEL (1 << 8) /* exclude kernel level */ | ||
27 | #define TILE_CTL_EXCL_HV (1 << 9) /* exclude hypervisor level */ | ||
28 | |||
29 | #define TILE_SEL_MASK 0x7f /* 7 bits for event SEL, | ||
30 | COUNT_0_SEL */ | ||
31 | #define TILE_PLM_MASK 0x780 /* 4 bits priv level msks, | ||
32 | COUNT_0_MASK*/ | ||
33 | #define TILE_EVENT_MASK (TILE_SEL_MASK | TILE_PLM_MASK) | ||
34 | |||
35 | #else /* __tilegx__*/ | ||
36 | /* PERF_COUNT_CTL on TILEGx*/ | ||
37 | #define TILE_CTL_EXCL_USER (1 << 10) /* exclude user level */ | ||
38 | #define TILE_CTL_EXCL_KERNEL (1 << 11) /* exclude kernel level */ | ||
39 | #define TILE_CTL_EXCL_HV (1 << 12) /* exclude hypervisor level */ | ||
40 | |||
41 | #define TILE_SEL_MASK 0x3f /* 6 bits for event SEL, | ||
42 | COUNT_0_SEL*/ | ||
43 | #define TILE_BOX_MASK 0x1c0 /* 3 bits box msks, | ||
44 | COUNT_0_BOX */ | ||
45 | #define TILE_PLM_MASK 0x3c00 /* 4 bits priv level msks, | ||
46 | COUNT_0_MASK */ | ||
47 | #define TILE_EVENT_MASK (TILE_SEL_MASK | TILE_BOX_MASK | TILE_PLM_MASK) | ||
48 | #endif /* __tilegx__*/ | ||
49 | |||
50 | /* Takes register and fault number. Returns error to disable the interrupt. */ | ||
51 | typedef int (*perf_irq_t)(struct pt_regs *, int); | ||
52 | |||
53 | int userspace_perf_handler(struct pt_regs *regs, int fault); | ||
54 | |||
55 | perf_irq_t reserve_pmc_hardware(perf_irq_t new_perf_irq); | ||
56 | void release_pmc_hardware(void); | ||
57 | |||
58 | unsigned long pmc_get_overflow(void); | ||
59 | void pmc_ack_overflow(unsigned long status); | ||
60 | |||
61 | void unmask_pmc_interrupts(void); | ||
62 | void mask_pmc_interrupts(void); | ||
63 | |||
64 | #endif /* _ASM_TILE_PMC_H */ | ||
diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile index 27a2bf39dae8..21f77bf68c69 100644 --- a/arch/tile/kernel/Makefile +++ b/arch/tile/kernel/Makefile | |||
@@ -25,6 +25,8 @@ obj-$(CONFIG_PCI) += pci_gx.o | |||
25 | else | 25 | else |
26 | obj-$(CONFIG_PCI) += pci.o | 26 | obj-$(CONFIG_PCI) += pci.o |
27 | endif | 27 | endif |
28 | obj-$(CONFIG_PERF_EVENTS) += perf_event.o | ||
29 | obj-$(CONFIG_USE_PMC) += pmc.o | ||
28 | obj-$(CONFIG_TILE_USB) += usb.o | 30 | obj-$(CONFIG_TILE_USB) += usb.o |
29 | obj-$(CONFIG_TILE_HVGLUE_TRACE) += hvglue_trace.o | 31 | obj-$(CONFIG_TILE_HVGLUE_TRACE) += hvglue_trace.o |
30 | obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o mcount_64.o | 32 | obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o mcount_64.o |
diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index 2cbe6d5dd6b0..cdbda45a4e4b 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S | |||
@@ -313,13 +313,13 @@ intvec_\vecname: | |||
313 | movei r3, 0 | 313 | movei r3, 0 |
314 | } | 314 | } |
315 | .else | 315 | .else |
316 | .ifc \c_routine, op_handle_perf_interrupt | 316 | .ifc \c_routine, handle_perf_interrupt |
317 | { | 317 | { |
318 | mfspr r2, PERF_COUNT_STS | 318 | mfspr r2, PERF_COUNT_STS |
319 | movei r3, -1 /* not used, but set for consistency */ | 319 | movei r3, -1 /* not used, but set for consistency */ |
320 | } | 320 | } |
321 | .else | 321 | .else |
322 | .ifc \c_routine, op_handle_aux_perf_interrupt | 322 | .ifc \c_routine, handle_perf_interrupt |
323 | { | 323 | { |
324 | mfspr r2, AUX_PERF_COUNT_STS | 324 | mfspr r2, AUX_PERF_COUNT_STS |
325 | movei r3, -1 /* not used, but set for consistency */ | 325 | movei r3, -1 /* not used, but set for consistency */ |
@@ -946,6 +946,13 @@ STD_ENTRY(interrupt_return) | |||
946 | bzt r30, .Lrestore_regs | 946 | bzt r30, .Lrestore_regs |
947 | 3: | 947 | 3: |
948 | 948 | ||
949 | /* We are relying on INT_PERF_COUNT at 33, and AUX_PERF_COUNT at 48 */ | ||
950 | { | ||
951 | moveli r0, lo16(1 << (INT_PERF_COUNT - 32)) | ||
952 | bz r31, .Lrestore_regs | ||
953 | } | ||
954 | auli r0, r0, ha16(1 << (INT_AUX_PERF_COUNT - 32)) | ||
955 | mtspr SPR_INTERRUPT_MASK_RESET_K_1, r0 | ||
949 | 956 | ||
950 | /* | 957 | /* |
951 | * We now commit to returning from this interrupt, since we will be | 958 | * We now commit to returning from this interrupt, since we will be |
@@ -1171,6 +1178,10 @@ handle_nmi: | |||
1171 | PTREGS_PTR(r0, PTREGS_OFFSET_BASE) | 1178 | PTREGS_PTR(r0, PTREGS_OFFSET_BASE) |
1172 | } | 1179 | } |
1173 | FEEDBACK_REENTER(handle_nmi) | 1180 | FEEDBACK_REENTER(handle_nmi) |
1181 | { | ||
1182 | movei r30, 1 | ||
1183 | seq r31, r0, zero | ||
1184 | } | ||
1174 | j interrupt_return | 1185 | j interrupt_return |
1175 | STD_ENDPROC(handle_nmi) | 1186 | STD_ENDPROC(handle_nmi) |
1176 | 1187 | ||
@@ -1835,8 +1846,9 @@ int_unalign: | |||
1835 | /* Include .intrpt array of interrupt vectors */ | 1846 | /* Include .intrpt array of interrupt vectors */ |
1836 | .section ".intrpt", "ax" | 1847 | .section ".intrpt", "ax" |
1837 | 1848 | ||
1838 | #define op_handle_perf_interrupt bad_intr | 1849 | #ifndef CONFIG_USE_PMC |
1839 | #define op_handle_aux_perf_interrupt bad_intr | 1850 | #define handle_perf_interrupt bad_intr |
1851 | #endif | ||
1840 | 1852 | ||
1841 | #ifndef CONFIG_HARDWALL | 1853 | #ifndef CONFIG_HARDWALL |
1842 | #define do_hardwall_trap bad_intr | 1854 | #define do_hardwall_trap bad_intr |
@@ -1877,7 +1889,7 @@ int_unalign: | |||
1877 | int_hand INT_IDN_AVAIL, IDN_AVAIL, bad_intr | 1889 | int_hand INT_IDN_AVAIL, IDN_AVAIL, bad_intr |
1878 | int_hand INT_UDN_AVAIL, UDN_AVAIL, bad_intr | 1890 | int_hand INT_UDN_AVAIL, UDN_AVAIL, bad_intr |
1879 | int_hand INT_PERF_COUNT, PERF_COUNT, \ | 1891 | int_hand INT_PERF_COUNT, PERF_COUNT, \ |
1880 | op_handle_perf_interrupt, handle_nmi | 1892 | handle_perf_interrupt, handle_nmi |
1881 | int_hand INT_INTCTRL_3, INTCTRL_3, bad_intr | 1893 | int_hand INT_INTCTRL_3, INTCTRL_3, bad_intr |
1882 | #if CONFIG_KERNEL_PL == 2 | 1894 | #if CONFIG_KERNEL_PL == 2 |
1883 | dc_dispatch INT_INTCTRL_2, INTCTRL_2 | 1895 | dc_dispatch INT_INTCTRL_2, INTCTRL_2 |
@@ -1902,7 +1914,7 @@ int_unalign: | |||
1902 | int_hand INT_SN_CPL, SN_CPL, bad_intr | 1914 | int_hand INT_SN_CPL, SN_CPL, bad_intr |
1903 | int_hand INT_DOUBLE_FAULT, DOUBLE_FAULT, do_trap | 1915 | int_hand INT_DOUBLE_FAULT, DOUBLE_FAULT, do_trap |
1904 | int_hand INT_AUX_PERF_COUNT, AUX_PERF_COUNT, \ | 1916 | int_hand INT_AUX_PERF_COUNT, AUX_PERF_COUNT, \ |
1905 | op_handle_aux_perf_interrupt, handle_nmi | 1917 | handle_perf_interrupt, handle_nmi |
1906 | 1918 | ||
1907 | /* Synthetic interrupt delivered only by the simulator */ | 1919 | /* Synthetic interrupt delivered only by the simulator */ |
1908 | int_hand INT_BREAKPOINT, BREAKPOINT, do_breakpoint | 1920 | int_hand INT_BREAKPOINT, BREAKPOINT, do_breakpoint |
diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S index b8fc497f2437..5b67efcecabd 100644 --- a/arch/tile/kernel/intvec_64.S +++ b/arch/tile/kernel/intvec_64.S | |||
@@ -509,10 +509,10 @@ intvec_\vecname: | |||
509 | .ifc \c_routine, do_trap | 509 | .ifc \c_routine, do_trap |
510 | mfspr r2, GPV_REASON | 510 | mfspr r2, GPV_REASON |
511 | .else | 511 | .else |
512 | .ifc \c_routine, op_handle_perf_interrupt | 512 | .ifc \c_routine, handle_perf_interrupt |
513 | mfspr r2, PERF_COUNT_STS | 513 | mfspr r2, PERF_COUNT_STS |
514 | .else | 514 | .else |
515 | .ifc \c_routine, op_handle_aux_perf_interrupt | 515 | .ifc \c_routine, handle_perf_interrupt |
516 | mfspr r2, AUX_PERF_COUNT_STS | 516 | mfspr r2, AUX_PERF_COUNT_STS |
517 | .endif | 517 | .endif |
518 | .endif | 518 | .endif |
@@ -971,6 +971,15 @@ STD_ENTRY(interrupt_return) | |||
971 | beqzt r30, .Lrestore_regs | 971 | beqzt r30, .Lrestore_regs |
972 | 3: | 972 | 3: |
973 | 973 | ||
974 | #if INT_PERF_COUNT + 1 != INT_AUX_PERF_COUNT | ||
975 | # error Bad interrupt assumption | ||
976 | #endif | ||
977 | { | ||
978 | movei r0, 3 /* two adjacent bits for the PERF_COUNT mask */ | ||
979 | beqz r31, .Lrestore_regs | ||
980 | } | ||
981 | shli r0, r0, INT_PERF_COUNT | ||
982 | mtspr SPR_INTERRUPT_MASK_RESET_K, r0 | ||
974 | 983 | ||
975 | /* | 984 | /* |
976 | * We now commit to returning from this interrupt, since we will be | 985 | * We now commit to returning from this interrupt, since we will be |
@@ -1187,7 +1196,7 @@ handle_nmi: | |||
1187 | FEEDBACK_REENTER(handle_nmi) | 1196 | FEEDBACK_REENTER(handle_nmi) |
1188 | { | 1197 | { |
1189 | movei r30, 1 | 1198 | movei r30, 1 |
1190 | move r31, r0 | 1199 | cmpeq r31, r0, zero |
1191 | } | 1200 | } |
1192 | j interrupt_return | 1201 | j interrupt_return |
1193 | STD_ENDPROC(handle_nmi) | 1202 | STD_ENDPROC(handle_nmi) |
@@ -1491,8 +1500,9 @@ STD_ENTRY(fill_ra_stack) | |||
1491 | .global intrpt_start | 1500 | .global intrpt_start |
1492 | intrpt_start: | 1501 | intrpt_start: |
1493 | 1502 | ||
1494 | #define op_handle_perf_interrupt bad_intr | 1503 | #ifndef CONFIG_USE_PMC |
1495 | #define op_handle_aux_perf_interrupt bad_intr | 1504 | #define handle_perf_interrupt bad_intr |
1505 | #endif | ||
1496 | 1506 | ||
1497 | #ifndef CONFIG_HARDWALL | 1507 | #ifndef CONFIG_HARDWALL |
1498 | #define do_hardwall_trap bad_intr | 1508 | #define do_hardwall_trap bad_intr |
@@ -1540,9 +1550,9 @@ intrpt_start: | |||
1540 | #endif | 1550 | #endif |
1541 | int_hand INT_IPI_0, IPI_0, bad_intr | 1551 | int_hand INT_IPI_0, IPI_0, bad_intr |
1542 | int_hand INT_PERF_COUNT, PERF_COUNT, \ | 1552 | int_hand INT_PERF_COUNT, PERF_COUNT, \ |
1543 | op_handle_perf_interrupt, handle_nmi | 1553 | handle_perf_interrupt, handle_nmi |
1544 | int_hand INT_AUX_PERF_COUNT, AUX_PERF_COUNT, \ | 1554 | int_hand INT_AUX_PERF_COUNT, AUX_PERF_COUNT, \ |
1545 | op_handle_perf_interrupt, handle_nmi | 1555 | handle_perf_interrupt, handle_nmi |
1546 | int_hand INT_INTCTRL_3, INTCTRL_3, bad_intr | 1556 | int_hand INT_INTCTRL_3, INTCTRL_3, bad_intr |
1547 | #if CONFIG_KERNEL_PL == 2 | 1557 | #if CONFIG_KERNEL_PL == 2 |
1548 | dc_dispatch INT_INTCTRL_2, INTCTRL_2 | 1558 | dc_dispatch INT_INTCTRL_2, INTCTRL_2 |
diff --git a/arch/tile/kernel/irq.c b/arch/tile/kernel/irq.c index 0586fdb9352d..906a76bdb31d 100644 --- a/arch/tile/kernel/irq.c +++ b/arch/tile/kernel/irq.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <hv/drv_pcie_rc_intf.h> | 21 | #include <hv/drv_pcie_rc_intf.h> |
22 | #include <arch/spr_def.h> | 22 | #include <arch/spr_def.h> |
23 | #include <asm/traps.h> | 23 | #include <asm/traps.h> |
24 | #include <linux/perf_event.h> | ||
24 | 25 | ||
25 | /* Bit-flag stored in irq_desc->chip_data to indicate HW-cleared irqs. */ | 26 | /* Bit-flag stored in irq_desc->chip_data to indicate HW-cleared irqs. */ |
26 | #define IS_HW_CLEARED 1 | 27 | #define IS_HW_CLEARED 1 |
@@ -261,6 +262,23 @@ void ack_bad_irq(unsigned int irq) | |||
261 | } | 262 | } |
262 | 263 | ||
263 | /* | 264 | /* |
265 | * /proc/interrupts printing: | ||
266 | */ | ||
267 | int arch_show_interrupts(struct seq_file *p, int prec) | ||
268 | { | ||
269 | #ifdef CONFIG_PERF_EVENTS | ||
270 | int i; | ||
271 | |||
272 | seq_printf(p, "%*s: ", prec, "PMI"); | ||
273 | |||
274 | for_each_online_cpu(i) | ||
275 | seq_printf(p, "%10llu ", per_cpu(perf_irqs, i)); | ||
276 | seq_puts(p, " perf_events\n"); | ||
277 | #endif | ||
278 | return 0; | ||
279 | } | ||
280 | |||
281 | /* | ||
264 | * Generic, controller-independent functions: | 282 | * Generic, controller-independent functions: |
265 | */ | 283 | */ |
266 | 284 | ||
diff --git a/arch/tile/kernel/messaging.c b/arch/tile/kernel/messaging.c index 00331af9525d..7867266f9716 100644 --- a/arch/tile/kernel/messaging.c +++ b/arch/tile/kernel/messaging.c | |||
@@ -68,8 +68,8 @@ void hv_message_intr(struct pt_regs *regs, int intnum) | |||
68 | #endif | 68 | #endif |
69 | 69 | ||
70 | while (1) { | 70 | while (1) { |
71 | rmi = hv_receive_message(__get_cpu_var(msg_state), | 71 | HV_MsgState *state = this_cpu_ptr(&msg_state); |
72 | (HV_VirtAddr) message, | 72 | rmi = hv_receive_message(*state, (HV_VirtAddr) message, |
73 | sizeof(message)); | 73 | sizeof(message)); |
74 | if (rmi.msglen == 0) | 74 | if (rmi.msglen == 0) |
75 | break; | 75 | break; |
diff --git a/arch/tile/kernel/pci.c b/arch/tile/kernel/pci.c index c45593db7718..1f80a88c75a6 100644 --- a/arch/tile/kernel/pci.c +++ b/arch/tile/kernel/pci.c | |||
@@ -250,8 +250,6 @@ static void fixup_read_and_payload_sizes(void) | |||
250 | 250 | ||
251 | /* Scan for the smallest maximum payload size. */ | 251 | /* Scan for the smallest maximum payload size. */ |
252 | for_each_pci_dev(dev) { | 252 | for_each_pci_dev(dev) { |
253 | u32 devcap; | ||
254 | |||
255 | if (!pci_is_pcie(dev)) | 253 | if (!pci_is_pcie(dev)) |
256 | continue; | 254 | continue; |
257 | 255 | ||
diff --git a/arch/tile/kernel/perf_event.c b/arch/tile/kernel/perf_event.c new file mode 100644 index 000000000000..2bf6c9c135c1 --- /dev/null +++ b/arch/tile/kernel/perf_event.c | |||
@@ -0,0 +1,1005 @@ | |||
1 | /* | ||
2 | * Copyright 2014 Tilera Corporation. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | * | ||
15 | * Perf_events support for Tile processor. | ||
16 | * | ||
17 | * This code is based upon the x86 perf event | ||
18 | * code, which is: | ||
19 | * | ||
20 | * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> | ||
21 | * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar | ||
22 | * Copyright (C) 2009 Jaswinder Singh Rajput | ||
23 | * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter | ||
24 | * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | ||
25 | * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> | ||
26 | * Copyright (C) 2009 Google, Inc., Stephane Eranian | ||
27 | */ | ||
28 | |||
29 | #include <linux/kprobes.h> | ||
30 | #include <linux/kernel.h> | ||
31 | #include <linux/kdebug.h> | ||
32 | #include <linux/mutex.h> | ||
33 | #include <linux/bitmap.h> | ||
34 | #include <linux/irq.h> | ||
35 | #include <linux/interrupt.h> | ||
36 | #include <linux/perf_event.h> | ||
37 | #include <linux/atomic.h> | ||
38 | #include <asm/traps.h> | ||
39 | #include <asm/stack.h> | ||
40 | #include <asm/pmc.h> | ||
41 | #include <hv/hypervisor.h> | ||
42 | |||
43 | #define TILE_MAX_COUNTERS 4 | ||
44 | |||
45 | #define PERF_COUNT_0_IDX 0 | ||
46 | #define PERF_COUNT_1_IDX 1 | ||
47 | #define AUX_PERF_COUNT_0_IDX 2 | ||
48 | #define AUX_PERF_COUNT_1_IDX 3 | ||
49 | |||
50 | struct cpu_hw_events { | ||
51 | int n_events; | ||
52 | struct perf_event *events[TILE_MAX_COUNTERS]; /* counter order */ | ||
53 | struct perf_event *event_list[TILE_MAX_COUNTERS]; /* enabled | ||
54 | order */ | ||
55 | int assign[TILE_MAX_COUNTERS]; | ||
56 | unsigned long active_mask[BITS_TO_LONGS(TILE_MAX_COUNTERS)]; | ||
57 | unsigned long used_mask; | ||
58 | }; | ||
59 | |||
60 | /* TILE arch specific performance monitor unit */ | ||
61 | struct tile_pmu { | ||
62 | const char *name; | ||
63 | int version; | ||
64 | const int *hw_events; /* generic hw events table */ | ||
65 | /* generic hw cache events table */ | ||
66 | const int (*cache_events)[PERF_COUNT_HW_CACHE_MAX] | ||
67 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
68 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; | ||
69 | int (*map_hw_event)(u64); /*method used to map | ||
70 | hw events */ | ||
71 | int (*map_cache_event)(u64); /*method used to map | ||
72 | cache events */ | ||
73 | |||
74 | u64 max_period; /* max sampling period */ | ||
75 | u64 cntval_mask; /* counter width mask */ | ||
76 | int cntval_bits; /* counter width */ | ||
77 | int max_events; /* max generic hw events | ||
78 | in map */ | ||
79 | int num_counters; /* number base + aux counters */ | ||
80 | int num_base_counters; /* number base counters */ | ||
81 | }; | ||
82 | |||
83 | DEFINE_PER_CPU(u64, perf_irqs); | ||
84 | static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); | ||
85 | |||
86 | #define TILE_OP_UNSUPP (-1) | ||
87 | |||
88 | #ifndef __tilegx__ | ||
89 | /* TILEPro hardware events map */ | ||
90 | static const int tile_hw_event_map[] = { | ||
91 | [PERF_COUNT_HW_CPU_CYCLES] = 0x01, /* ONE */ | ||
92 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x06, /* MP_BUNDLE_RETIRED */ | ||
93 | [PERF_COUNT_HW_CACHE_REFERENCES] = TILE_OP_UNSUPP, | ||
94 | [PERF_COUNT_HW_CACHE_MISSES] = TILE_OP_UNSUPP, | ||
95 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x16, /* | ||
96 | MP_CONDITIONAL_BRANCH_ISSUED */ | ||
97 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x14, /* | ||
98 | MP_CONDITIONAL_BRANCH_MISSPREDICT */ | ||
99 | [PERF_COUNT_HW_BUS_CYCLES] = TILE_OP_UNSUPP, | ||
100 | }; | ||
101 | #else | ||
102 | /* TILEGx hardware events map */ | ||
103 | static const int tile_hw_event_map[] = { | ||
104 | [PERF_COUNT_HW_CPU_CYCLES] = 0x181, /* ONE */ | ||
105 | [PERF_COUNT_HW_INSTRUCTIONS] = 0xdb, /* INSTRUCTION_BUNDLE */ | ||
106 | [PERF_COUNT_HW_CACHE_REFERENCES] = TILE_OP_UNSUPP, | ||
107 | [PERF_COUNT_HW_CACHE_MISSES] = TILE_OP_UNSUPP, | ||
108 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0xd9, /* | ||
109 | COND_BRANCH_PRED_CORRECT */ | ||
110 | [PERF_COUNT_HW_BRANCH_MISSES] = 0xda, /* | ||
111 | COND_BRANCH_PRED_INCORRECT */ | ||
112 | [PERF_COUNT_HW_BUS_CYCLES] = TILE_OP_UNSUPP, | ||
113 | }; | ||
114 | #endif | ||
115 | |||
116 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
117 | |||
118 | /* | ||
119 | * Generalized hw caching related hw_event table, filled | ||
120 | * in on a per model basis. A value of -1 means | ||
121 | * 'not supported', any other value means the | ||
122 | * raw hw_event ID. | ||
123 | */ | ||
124 | #ifndef __tilegx__ | ||
125 | /* TILEPro hardware cache event map */ | ||
126 | static const int tile_cache_event_map[PERF_COUNT_HW_CACHE_MAX] | ||
127 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
128 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = { | ||
129 | [C(L1D)] = { | ||
130 | [C(OP_READ)] = { | ||
131 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
132 | [C(RESULT_MISS)] = 0x21, /* RD_MISS */ | ||
133 | }, | ||
134 | [C(OP_WRITE)] = { | ||
135 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
136 | [C(RESULT_MISS)] = 0x22, /* WR_MISS */ | ||
137 | }, | ||
138 | [C(OP_PREFETCH)] = { | ||
139 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
140 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
141 | }, | ||
142 | }, | ||
143 | [C(L1I)] = { | ||
144 | [C(OP_READ)] = { | ||
145 | [C(RESULT_ACCESS)] = 0x12, /* MP_ICACHE_HIT_ISSUED */ | ||
146 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
147 | }, | ||
148 | [C(OP_WRITE)] = { | ||
149 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
150 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
151 | }, | ||
152 | [C(OP_PREFETCH)] = { | ||
153 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
154 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
155 | }, | ||
156 | }, | ||
157 | [C(LL)] = { | ||
158 | [C(OP_READ)] = { | ||
159 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
160 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
161 | }, | ||
162 | [C(OP_WRITE)] = { | ||
163 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
164 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
165 | }, | ||
166 | [C(OP_PREFETCH)] = { | ||
167 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
168 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
169 | }, | ||
170 | }, | ||
171 | [C(DTLB)] = { | ||
172 | [C(OP_READ)] = { | ||
173 | [C(RESULT_ACCESS)] = 0x1d, /* TLB_CNT */ | ||
174 | [C(RESULT_MISS)] = 0x20, /* TLB_EXCEPTION */ | ||
175 | }, | ||
176 | [C(OP_WRITE)] = { | ||
177 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
178 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
179 | }, | ||
180 | [C(OP_PREFETCH)] = { | ||
181 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
182 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
183 | }, | ||
184 | }, | ||
185 | [C(ITLB)] = { | ||
186 | [C(OP_READ)] = { | ||
187 | [C(RESULT_ACCESS)] = 0x13, /* MP_ITLB_HIT_ISSUED */ | ||
188 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
189 | }, | ||
190 | [C(OP_WRITE)] = { | ||
191 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
192 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
193 | }, | ||
194 | [C(OP_PREFETCH)] = { | ||
195 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
196 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
197 | }, | ||
198 | }, | ||
199 | [C(BPU)] = { | ||
200 | [C(OP_READ)] = { | ||
201 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
202 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
203 | }, | ||
204 | [C(OP_WRITE)] = { | ||
205 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
206 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
207 | }, | ||
208 | [C(OP_PREFETCH)] = { | ||
209 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
210 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
211 | }, | ||
212 | }, | ||
213 | }; | ||
214 | #else | ||
215 | /* TILEGx hardware events map */ | ||
216 | static const int tile_cache_event_map[PERF_COUNT_HW_CACHE_MAX] | ||
217 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
218 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = { | ||
219 | [C(L1D)] = { | ||
220 | /* | ||
221 | * Like some other architectures (e.g. ARM), the performance | ||
222 | * counters don't differentiate between read and write | ||
223 | * accesses/misses, so this isn't strictly correct, but it's the | ||
224 | * best we can do. Writes and reads get combined. | ||
225 | */ | ||
226 | [C(OP_READ)] = { | ||
227 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
228 | [C(RESULT_MISS)] = 0x44, /* RD_MISS */ | ||
229 | }, | ||
230 | [C(OP_WRITE)] = { | ||
231 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
232 | [C(RESULT_MISS)] = 0x45, /* WR_MISS */ | ||
233 | }, | ||
234 | [C(OP_PREFETCH)] = { | ||
235 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
236 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
237 | }, | ||
238 | }, | ||
239 | [C(L1I)] = { | ||
240 | [C(OP_READ)] = { | ||
241 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
242 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
243 | }, | ||
244 | [C(OP_WRITE)] = { | ||
245 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
246 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
247 | }, | ||
248 | [C(OP_PREFETCH)] = { | ||
249 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
250 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
251 | }, | ||
252 | }, | ||
253 | [C(LL)] = { | ||
254 | [C(OP_READ)] = { | ||
255 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
256 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
257 | }, | ||
258 | [C(OP_WRITE)] = { | ||
259 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
260 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
261 | }, | ||
262 | [C(OP_PREFETCH)] = { | ||
263 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
264 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
265 | }, | ||
266 | }, | ||
267 | [C(DTLB)] = { | ||
268 | [C(OP_READ)] = { | ||
269 | [C(RESULT_ACCESS)] = 0x40, /* TLB_CNT */ | ||
270 | [C(RESULT_MISS)] = 0x43, /* TLB_EXCEPTION */ | ||
271 | }, | ||
272 | [C(OP_WRITE)] = { | ||
273 | [C(RESULT_ACCESS)] = 0x40, /* TLB_CNT */ | ||
274 | [C(RESULT_MISS)] = 0x43, /* TLB_EXCEPTION */ | ||
275 | }, | ||
276 | [C(OP_PREFETCH)] = { | ||
277 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
278 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
279 | }, | ||
280 | }, | ||
281 | [C(ITLB)] = { | ||
282 | [C(OP_READ)] = { | ||
283 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
284 | [C(RESULT_MISS)] = 0xd4, /* ITLB_MISS_INT */ | ||
285 | }, | ||
286 | [C(OP_WRITE)] = { | ||
287 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
288 | [C(RESULT_MISS)] = 0xd4, /* ITLB_MISS_INT */ | ||
289 | }, | ||
290 | [C(OP_PREFETCH)] = { | ||
291 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
292 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
293 | }, | ||
294 | }, | ||
295 | [C(BPU)] = { | ||
296 | [C(OP_READ)] = { | ||
297 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
298 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
299 | }, | ||
300 | [C(OP_WRITE)] = { | ||
301 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
302 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
303 | }, | ||
304 | [C(OP_PREFETCH)] = { | ||
305 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
306 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
307 | }, | ||
308 | }, | ||
309 | }; | ||
310 | #endif | ||
311 | |||
312 | static atomic_t tile_active_events; | ||
313 | static DEFINE_MUTEX(perf_intr_reserve_mutex); | ||
314 | |||
315 | static int tile_map_hw_event(u64 config); | ||
316 | static int tile_map_cache_event(u64 config); | ||
317 | |||
318 | static int tile_pmu_handle_irq(struct pt_regs *regs, int fault); | ||
319 | |||
320 | /* | ||
321 | * To avoid new_raw_count getting larger then pre_raw_count | ||
322 | * in tile_perf_event_update(), we limit the value of max_period to 2^31 - 1. | ||
323 | */ | ||
324 | static const struct tile_pmu tilepmu = { | ||
325 | #ifndef __tilegx__ | ||
326 | .name = "tilepro", | ||
327 | #else | ||
328 | .name = "tilegx", | ||
329 | #endif | ||
330 | .max_events = ARRAY_SIZE(tile_hw_event_map), | ||
331 | .map_hw_event = tile_map_hw_event, | ||
332 | .hw_events = tile_hw_event_map, | ||
333 | .map_cache_event = tile_map_cache_event, | ||
334 | .cache_events = &tile_cache_event_map, | ||
335 | .cntval_bits = 32, | ||
336 | .cntval_mask = (1ULL << 32) - 1, | ||
337 | .max_period = (1ULL << 31) - 1, | ||
338 | .num_counters = TILE_MAX_COUNTERS, | ||
339 | .num_base_counters = TILE_BASE_COUNTERS, | ||
340 | }; | ||
341 | |||
342 | static const struct tile_pmu *tile_pmu __read_mostly; | ||
343 | |||
344 | /* | ||
345 | * Check whether perf event is enabled. | ||
346 | */ | ||
347 | int tile_perf_enabled(void) | ||
348 | { | ||
349 | return atomic_read(&tile_active_events) != 0; | ||
350 | } | ||
351 | |||
352 | /* | ||
353 | * Read Performance Counters. | ||
354 | */ | ||
355 | static inline u64 read_counter(int idx) | ||
356 | { | ||
357 | u64 val = 0; | ||
358 | |||
359 | /* __insn_mfspr() only takes an immediate argument */ | ||
360 | switch (idx) { | ||
361 | case PERF_COUNT_0_IDX: | ||
362 | val = __insn_mfspr(SPR_PERF_COUNT_0); | ||
363 | break; | ||
364 | case PERF_COUNT_1_IDX: | ||
365 | val = __insn_mfspr(SPR_PERF_COUNT_1); | ||
366 | break; | ||
367 | case AUX_PERF_COUNT_0_IDX: | ||
368 | val = __insn_mfspr(SPR_AUX_PERF_COUNT_0); | ||
369 | break; | ||
370 | case AUX_PERF_COUNT_1_IDX: | ||
371 | val = __insn_mfspr(SPR_AUX_PERF_COUNT_1); | ||
372 | break; | ||
373 | default: | ||
374 | WARN_ON_ONCE(idx > AUX_PERF_COUNT_1_IDX || | ||
375 | idx < PERF_COUNT_0_IDX); | ||
376 | } | ||
377 | |||
378 | return val; | ||
379 | } | ||
380 | |||
381 | /* | ||
382 | * Write Performance Counters. | ||
383 | */ | ||
384 | static inline void write_counter(int idx, u64 value) | ||
385 | { | ||
386 | /* __insn_mtspr() only takes an immediate argument */ | ||
387 | switch (idx) { | ||
388 | case PERF_COUNT_0_IDX: | ||
389 | __insn_mtspr(SPR_PERF_COUNT_0, value); | ||
390 | break; | ||
391 | case PERF_COUNT_1_IDX: | ||
392 | __insn_mtspr(SPR_PERF_COUNT_1, value); | ||
393 | break; | ||
394 | case AUX_PERF_COUNT_0_IDX: | ||
395 | __insn_mtspr(SPR_AUX_PERF_COUNT_0, value); | ||
396 | break; | ||
397 | case AUX_PERF_COUNT_1_IDX: | ||
398 | __insn_mtspr(SPR_AUX_PERF_COUNT_1, value); | ||
399 | break; | ||
400 | default: | ||
401 | WARN_ON_ONCE(idx > AUX_PERF_COUNT_1_IDX || | ||
402 | idx < PERF_COUNT_0_IDX); | ||
403 | } | ||
404 | } | ||
405 | |||
406 | /* | ||
407 | * Enable performance event by setting | ||
408 | * Performance Counter Control registers. | ||
409 | */ | ||
410 | static inline void tile_pmu_enable_event(struct perf_event *event) | ||
411 | { | ||
412 | struct hw_perf_event *hwc = &event->hw; | ||
413 | unsigned long cfg, mask; | ||
414 | int shift, idx = hwc->idx; | ||
415 | |||
416 | /* | ||
417 | * prevent early activation from tile_pmu_start() in hw_perf_enable | ||
418 | */ | ||
419 | |||
420 | if (WARN_ON_ONCE(idx == -1)) | ||
421 | return; | ||
422 | |||
423 | if (idx < tile_pmu->num_base_counters) | ||
424 | cfg = __insn_mfspr(SPR_PERF_COUNT_CTL); | ||
425 | else | ||
426 | cfg = __insn_mfspr(SPR_AUX_PERF_COUNT_CTL); | ||
427 | |||
428 | switch (idx) { | ||
429 | case PERF_COUNT_0_IDX: | ||
430 | case AUX_PERF_COUNT_0_IDX: | ||
431 | mask = TILE_EVENT_MASK; | ||
432 | shift = 0; | ||
433 | break; | ||
434 | case PERF_COUNT_1_IDX: | ||
435 | case AUX_PERF_COUNT_1_IDX: | ||
436 | mask = TILE_EVENT_MASK << 16; | ||
437 | shift = 16; | ||
438 | break; | ||
439 | default: | ||
440 | WARN_ON_ONCE(idx < PERF_COUNT_0_IDX || | ||
441 | idx > AUX_PERF_COUNT_1_IDX); | ||
442 | return; | ||
443 | } | ||
444 | |||
445 | /* Clear mask bits to enable the event. */ | ||
446 | cfg &= ~mask; | ||
447 | cfg |= hwc->config << shift; | ||
448 | |||
449 | if (idx < tile_pmu->num_base_counters) | ||
450 | __insn_mtspr(SPR_PERF_COUNT_CTL, cfg); | ||
451 | else | ||
452 | __insn_mtspr(SPR_AUX_PERF_COUNT_CTL, cfg); | ||
453 | } | ||
454 | |||
455 | /* | ||
456 | * Disable performance event by clearing | ||
457 | * Performance Counter Control registers. | ||
458 | */ | ||
459 | static inline void tile_pmu_disable_event(struct perf_event *event) | ||
460 | { | ||
461 | struct hw_perf_event *hwc = &event->hw; | ||
462 | unsigned long cfg, mask; | ||
463 | int idx = hwc->idx; | ||
464 | |||
465 | if (idx == -1) | ||
466 | return; | ||
467 | |||
468 | if (idx < tile_pmu->num_base_counters) | ||
469 | cfg = __insn_mfspr(SPR_PERF_COUNT_CTL); | ||
470 | else | ||
471 | cfg = __insn_mfspr(SPR_AUX_PERF_COUNT_CTL); | ||
472 | |||
473 | switch (idx) { | ||
474 | case PERF_COUNT_0_IDX: | ||
475 | case AUX_PERF_COUNT_0_IDX: | ||
476 | mask = TILE_PLM_MASK; | ||
477 | break; | ||
478 | case PERF_COUNT_1_IDX: | ||
479 | case AUX_PERF_COUNT_1_IDX: | ||
480 | mask = TILE_PLM_MASK << 16; | ||
481 | break; | ||
482 | default: | ||
483 | WARN_ON_ONCE(idx < PERF_COUNT_0_IDX || | ||
484 | idx > AUX_PERF_COUNT_1_IDX); | ||
485 | return; | ||
486 | } | ||
487 | |||
488 | /* Set mask bits to disable the event. */ | ||
489 | cfg |= mask; | ||
490 | |||
491 | if (idx < tile_pmu->num_base_counters) | ||
492 | __insn_mtspr(SPR_PERF_COUNT_CTL, cfg); | ||
493 | else | ||
494 | __insn_mtspr(SPR_AUX_PERF_COUNT_CTL, cfg); | ||
495 | } | ||
496 | |||
497 | /* | ||
498 | * Propagate event elapsed time into the generic event. | ||
499 | * Can only be executed on the CPU where the event is active. | ||
500 | * Returns the delta events processed. | ||
501 | */ | ||
502 | static u64 tile_perf_event_update(struct perf_event *event) | ||
503 | { | ||
504 | struct hw_perf_event *hwc = &event->hw; | ||
505 | int shift = 64 - tile_pmu->cntval_bits; | ||
506 | u64 prev_raw_count, new_raw_count; | ||
507 | u64 oldval; | ||
508 | int idx = hwc->idx; | ||
509 | u64 delta; | ||
510 | |||
511 | /* | ||
512 | * Careful: an NMI might modify the previous event value. | ||
513 | * | ||
514 | * Our tactic to handle this is to first atomically read and | ||
515 | * exchange a new raw count - then add that new-prev delta | ||
516 | * count to the generic event atomically: | ||
517 | */ | ||
518 | again: | ||
519 | prev_raw_count = local64_read(&hwc->prev_count); | ||
520 | new_raw_count = read_counter(idx); | ||
521 | |||
522 | oldval = local64_cmpxchg(&hwc->prev_count, prev_raw_count, | ||
523 | new_raw_count); | ||
524 | if (oldval != prev_raw_count) | ||
525 | goto again; | ||
526 | |||
527 | /* | ||
528 | * Now we have the new raw value and have updated the prev | ||
529 | * timestamp already. We can now calculate the elapsed delta | ||
530 | * (event-)time and add that to the generic event. | ||
531 | * | ||
532 | * Careful, not all hw sign-extends above the physical width | ||
533 | * of the count. | ||
534 | */ | ||
535 | delta = (new_raw_count << shift) - (prev_raw_count << shift); | ||
536 | delta >>= shift; | ||
537 | |||
538 | local64_add(delta, &event->count); | ||
539 | local64_sub(delta, &hwc->period_left); | ||
540 | |||
541 | return new_raw_count; | ||
542 | } | ||
543 | |||
544 | /* | ||
545 | * Set the next IRQ period, based on the hwc->period_left value. | ||
546 | * To be called with the event disabled in hw: | ||
547 | */ | ||
548 | static int tile_event_set_period(struct perf_event *event) | ||
549 | { | ||
550 | struct hw_perf_event *hwc = &event->hw; | ||
551 | int idx = hwc->idx; | ||
552 | s64 left = local64_read(&hwc->period_left); | ||
553 | s64 period = hwc->sample_period; | ||
554 | int ret = 0; | ||
555 | |||
556 | /* | ||
557 | * If we are way outside a reasonable range then just skip forward: | ||
558 | */ | ||
559 | if (unlikely(left <= -period)) { | ||
560 | left = period; | ||
561 | local64_set(&hwc->period_left, left); | ||
562 | hwc->last_period = period; | ||
563 | ret = 1; | ||
564 | } | ||
565 | |||
566 | if (unlikely(left <= 0)) { | ||
567 | left += period; | ||
568 | local64_set(&hwc->period_left, left); | ||
569 | hwc->last_period = period; | ||
570 | ret = 1; | ||
571 | } | ||
572 | if (left > tile_pmu->max_period) | ||
573 | left = tile_pmu->max_period; | ||
574 | |||
575 | /* | ||
576 | * The hw event starts counting from this event offset, | ||
577 | * mark it to be able to extra future deltas: | ||
578 | */ | ||
579 | local64_set(&hwc->prev_count, (u64)-left); | ||
580 | |||
581 | write_counter(idx, (u64)(-left) & tile_pmu->cntval_mask); | ||
582 | |||
583 | perf_event_update_userpage(event); | ||
584 | |||
585 | return ret; | ||
586 | } | ||
587 | |||
588 | /* | ||
589 | * Stop the event but do not release the PMU counter | ||
590 | */ | ||
591 | static void tile_pmu_stop(struct perf_event *event, int flags) | ||
592 | { | ||
593 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
594 | struct hw_perf_event *hwc = &event->hw; | ||
595 | int idx = hwc->idx; | ||
596 | |||
597 | if (__test_and_clear_bit(idx, cpuc->active_mask)) { | ||
598 | tile_pmu_disable_event(event); | ||
599 | cpuc->events[hwc->idx] = NULL; | ||
600 | WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); | ||
601 | hwc->state |= PERF_HES_STOPPED; | ||
602 | } | ||
603 | |||
604 | if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { | ||
605 | /* | ||
606 | * Drain the remaining delta count out of a event | ||
607 | * that we are disabling: | ||
608 | */ | ||
609 | tile_perf_event_update(event); | ||
610 | hwc->state |= PERF_HES_UPTODATE; | ||
611 | } | ||
612 | } | ||
613 | |||
614 | /* | ||
615 | * Start an event (without re-assigning counter) | ||
616 | */ | ||
617 | static void tile_pmu_start(struct perf_event *event, int flags) | ||
618 | { | ||
619 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
620 | int idx = event->hw.idx; | ||
621 | |||
622 | if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) | ||
623 | return; | ||
624 | |||
625 | if (WARN_ON_ONCE(idx == -1)) | ||
626 | return; | ||
627 | |||
628 | if (flags & PERF_EF_RELOAD) { | ||
629 | WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); | ||
630 | tile_event_set_period(event); | ||
631 | } | ||
632 | |||
633 | event->hw.state = 0; | ||
634 | |||
635 | cpuc->events[idx] = event; | ||
636 | __set_bit(idx, cpuc->active_mask); | ||
637 | |||
638 | unmask_pmc_interrupts(); | ||
639 | |||
640 | tile_pmu_enable_event(event); | ||
641 | |||
642 | perf_event_update_userpage(event); | ||
643 | } | ||
644 | |||
645 | /* | ||
646 | * Add a single event to the PMU. | ||
647 | * | ||
648 | * The event is added to the group of enabled events | ||
649 | * but only if it can be scehduled with existing events. | ||
650 | */ | ||
651 | static int tile_pmu_add(struct perf_event *event, int flags) | ||
652 | { | ||
653 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
654 | struct hw_perf_event *hwc; | ||
655 | unsigned long mask; | ||
656 | int b, max_cnt; | ||
657 | |||
658 | hwc = &event->hw; | ||
659 | |||
660 | /* | ||
661 | * We are full. | ||
662 | */ | ||
663 | if (cpuc->n_events == tile_pmu->num_counters) | ||
664 | return -ENOSPC; | ||
665 | |||
666 | cpuc->event_list[cpuc->n_events] = event; | ||
667 | cpuc->n_events++; | ||
668 | |||
669 | hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; | ||
670 | if (!(flags & PERF_EF_START)) | ||
671 | hwc->state |= PERF_HES_ARCH; | ||
672 | |||
673 | /* | ||
674 | * Find first empty counter. | ||
675 | */ | ||
676 | max_cnt = tile_pmu->num_counters; | ||
677 | mask = ~cpuc->used_mask; | ||
678 | |||
679 | /* Find next free counter. */ | ||
680 | b = find_next_bit(&mask, max_cnt, 0); | ||
681 | |||
682 | /* Should not happen. */ | ||
683 | if (WARN_ON_ONCE(b == max_cnt)) | ||
684 | return -ENOSPC; | ||
685 | |||
686 | /* | ||
687 | * Assign counter to event. | ||
688 | */ | ||
689 | event->hw.idx = b; | ||
690 | __set_bit(b, &cpuc->used_mask); | ||
691 | |||
692 | /* | ||
693 | * Start if requested. | ||
694 | */ | ||
695 | if (flags & PERF_EF_START) | ||
696 | tile_pmu_start(event, PERF_EF_RELOAD); | ||
697 | |||
698 | return 0; | ||
699 | } | ||
700 | |||
701 | /* | ||
702 | * Delete a single event from the PMU. | ||
703 | * | ||
704 | * The event is deleted from the group of enabled events. | ||
705 | * If it is the last event, disable PMU interrupt. | ||
706 | */ | ||
707 | static void tile_pmu_del(struct perf_event *event, int flags) | ||
708 | { | ||
709 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
710 | int i; | ||
711 | |||
712 | /* | ||
713 | * Remove event from list, compact list if necessary. | ||
714 | */ | ||
715 | for (i = 0; i < cpuc->n_events; i++) { | ||
716 | if (cpuc->event_list[i] == event) { | ||
717 | while (++i < cpuc->n_events) | ||
718 | cpuc->event_list[i-1] = cpuc->event_list[i]; | ||
719 | --cpuc->n_events; | ||
720 | cpuc->events[event->hw.idx] = NULL; | ||
721 | __clear_bit(event->hw.idx, &cpuc->used_mask); | ||
722 | tile_pmu_stop(event, PERF_EF_UPDATE); | ||
723 | break; | ||
724 | } | ||
725 | } | ||
726 | /* | ||
727 | * If there are no events left, then mask PMU interrupt. | ||
728 | */ | ||
729 | if (cpuc->n_events == 0) | ||
730 | mask_pmc_interrupts(); | ||
731 | perf_event_update_userpage(event); | ||
732 | } | ||
733 | |||
734 | /* | ||
735 | * Propagate event elapsed time into the event. | ||
736 | */ | ||
737 | static inline void tile_pmu_read(struct perf_event *event) | ||
738 | { | ||
739 | tile_perf_event_update(event); | ||
740 | } | ||
741 | |||
742 | /* | ||
743 | * Map generic events to Tile PMU. | ||
744 | */ | ||
745 | static int tile_map_hw_event(u64 config) | ||
746 | { | ||
747 | if (config >= tile_pmu->max_events) | ||
748 | return -EINVAL; | ||
749 | return tile_pmu->hw_events[config]; | ||
750 | } | ||
751 | |||
752 | /* | ||
753 | * Map generic hardware cache events to Tile PMU. | ||
754 | */ | ||
755 | static int tile_map_cache_event(u64 config) | ||
756 | { | ||
757 | unsigned int cache_type, cache_op, cache_result; | ||
758 | int code; | ||
759 | |||
760 | if (!tile_pmu->cache_events) | ||
761 | return -ENOENT; | ||
762 | |||
763 | cache_type = (config >> 0) & 0xff; | ||
764 | if (cache_type >= PERF_COUNT_HW_CACHE_MAX) | ||
765 | return -EINVAL; | ||
766 | |||
767 | cache_op = (config >> 8) & 0xff; | ||
768 | if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) | ||
769 | return -EINVAL; | ||
770 | |||
771 | cache_result = (config >> 16) & 0xff; | ||
772 | if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) | ||
773 | return -EINVAL; | ||
774 | |||
775 | code = (*tile_pmu->cache_events)[cache_type][cache_op][cache_result]; | ||
776 | if (code == TILE_OP_UNSUPP) | ||
777 | return -EINVAL; | ||
778 | |||
779 | return code; | ||
780 | } | ||
781 | |||
782 | static void tile_event_destroy(struct perf_event *event) | ||
783 | { | ||
784 | if (atomic_dec_return(&tile_active_events) == 0) | ||
785 | release_pmc_hardware(); | ||
786 | } | ||
787 | |||
788 | static int __tile_event_init(struct perf_event *event) | ||
789 | { | ||
790 | struct perf_event_attr *attr = &event->attr; | ||
791 | struct hw_perf_event *hwc = &event->hw; | ||
792 | int code; | ||
793 | |||
794 | switch (attr->type) { | ||
795 | case PERF_TYPE_HARDWARE: | ||
796 | code = tile_pmu->map_hw_event(attr->config); | ||
797 | break; | ||
798 | case PERF_TYPE_HW_CACHE: | ||
799 | code = tile_pmu->map_cache_event(attr->config); | ||
800 | break; | ||
801 | case PERF_TYPE_RAW: | ||
802 | code = attr->config & TILE_EVENT_MASK; | ||
803 | break; | ||
804 | default: | ||
805 | /* Should not happen. */ | ||
806 | return -EOPNOTSUPP; | ||
807 | } | ||
808 | |||
809 | if (code < 0) | ||
810 | return code; | ||
811 | |||
812 | hwc->config = code; | ||
813 | hwc->idx = -1; | ||
814 | |||
815 | if (attr->exclude_user) | ||
816 | hwc->config |= TILE_CTL_EXCL_USER; | ||
817 | |||
818 | if (attr->exclude_kernel) | ||
819 | hwc->config |= TILE_CTL_EXCL_KERNEL; | ||
820 | |||
821 | if (attr->exclude_hv) | ||
822 | hwc->config |= TILE_CTL_EXCL_HV; | ||
823 | |||
824 | if (!hwc->sample_period) { | ||
825 | hwc->sample_period = tile_pmu->max_period; | ||
826 | hwc->last_period = hwc->sample_period; | ||
827 | local64_set(&hwc->period_left, hwc->sample_period); | ||
828 | } | ||
829 | event->destroy = tile_event_destroy; | ||
830 | return 0; | ||
831 | } | ||
832 | |||
833 | static int tile_event_init(struct perf_event *event) | ||
834 | { | ||
835 | int err = 0; | ||
836 | perf_irq_t old_irq_handler = NULL; | ||
837 | |||
838 | if (atomic_inc_return(&tile_active_events) == 1) | ||
839 | old_irq_handler = reserve_pmc_hardware(tile_pmu_handle_irq); | ||
840 | |||
841 | if (old_irq_handler) { | ||
842 | pr_warn("PMC hardware busy (reserved by oprofile)\n"); | ||
843 | |||
844 | atomic_dec(&tile_active_events); | ||
845 | return -EBUSY; | ||
846 | } | ||
847 | |||
848 | switch (event->attr.type) { | ||
849 | case PERF_TYPE_RAW: | ||
850 | case PERF_TYPE_HARDWARE: | ||
851 | case PERF_TYPE_HW_CACHE: | ||
852 | break; | ||
853 | |||
854 | default: | ||
855 | return -ENOENT; | ||
856 | } | ||
857 | |||
858 | err = __tile_event_init(event); | ||
859 | if (err) { | ||
860 | if (event->destroy) | ||
861 | event->destroy(event); | ||
862 | } | ||
863 | return err; | ||
864 | } | ||
865 | |||
866 | static struct pmu tilera_pmu = { | ||
867 | .event_init = tile_event_init, | ||
868 | .add = tile_pmu_add, | ||
869 | .del = tile_pmu_del, | ||
870 | |||
871 | .start = tile_pmu_start, | ||
872 | .stop = tile_pmu_stop, | ||
873 | |||
874 | .read = tile_pmu_read, | ||
875 | }; | ||
876 | |||
877 | /* | ||
878 | * PMU's IRQ handler, PMU has 2 interrupts, they share the same handler. | ||
879 | */ | ||
880 | int tile_pmu_handle_irq(struct pt_regs *regs, int fault) | ||
881 | { | ||
882 | struct perf_sample_data data; | ||
883 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
884 | struct perf_event *event; | ||
885 | struct hw_perf_event *hwc; | ||
886 | u64 val; | ||
887 | unsigned long status; | ||
888 | int bit; | ||
889 | |||
890 | __get_cpu_var(perf_irqs)++; | ||
891 | |||
892 | if (!atomic_read(&tile_active_events)) | ||
893 | return 0; | ||
894 | |||
895 | status = pmc_get_overflow(); | ||
896 | pmc_ack_overflow(status); | ||
897 | |||
898 | for_each_set_bit(bit, &status, tile_pmu->num_counters) { | ||
899 | |||
900 | event = cpuc->events[bit]; | ||
901 | |||
902 | if (!event) | ||
903 | continue; | ||
904 | |||
905 | if (!test_bit(bit, cpuc->active_mask)) | ||
906 | continue; | ||
907 | |||
908 | hwc = &event->hw; | ||
909 | |||
910 | val = tile_perf_event_update(event); | ||
911 | if (val & (1ULL << (tile_pmu->cntval_bits - 1))) | ||
912 | continue; | ||
913 | |||
914 | perf_sample_data_init(&data, 0, event->hw.last_period); | ||
915 | if (!tile_event_set_period(event)) | ||
916 | continue; | ||
917 | |||
918 | if (perf_event_overflow(event, &data, regs)) | ||
919 | tile_pmu_stop(event, 0); | ||
920 | } | ||
921 | |||
922 | return 0; | ||
923 | } | ||
924 | |||
925 | static bool __init supported_pmu(void) | ||
926 | { | ||
927 | tile_pmu = &tilepmu; | ||
928 | return true; | ||
929 | } | ||
930 | |||
931 | int __init init_hw_perf_events(void) | ||
932 | { | ||
933 | supported_pmu(); | ||
934 | perf_pmu_register(&tilera_pmu, "cpu", PERF_TYPE_RAW); | ||
935 | return 0; | ||
936 | } | ||
937 | arch_initcall(init_hw_perf_events); | ||
938 | |||
939 | /* Callchain handling code. */ | ||
940 | |||
941 | /* | ||
942 | * Tile specific backtracing code for perf_events. | ||
943 | */ | ||
944 | static inline void perf_callchain(struct perf_callchain_entry *entry, | ||
945 | struct pt_regs *regs) | ||
946 | { | ||
947 | struct KBacktraceIterator kbt; | ||
948 | unsigned int i; | ||
949 | |||
950 | /* | ||
951 | * Get the address just after the "jalr" instruction that | ||
952 | * jumps to the handler for a syscall. When we find this | ||
953 | * address in a backtrace, we silently ignore it, which gives | ||
954 | * us a one-step backtrace connection from the sys_xxx() | ||
955 | * function in the kernel to the xxx() function in libc. | ||
956 | * Otherwise, we lose the ability to properly attribute time | ||
957 | * from the libc calls to the kernel implementations, since | ||
958 | * oprofile only considers PCs from backtraces a pair at a time. | ||
959 | */ | ||
960 | unsigned long handle_syscall_pc = handle_syscall_link_address(); | ||
961 | |||
962 | KBacktraceIterator_init(&kbt, NULL, regs); | ||
963 | kbt.profile = 1; | ||
964 | |||
965 | /* | ||
966 | * The sample for the pc is already recorded. Now we are adding the | ||
967 | * address of the callsites on the stack. Our iterator starts | ||
968 | * with the frame of the (already sampled) call site. If our | ||
969 | * iterator contained a "return address" field, we could have just | ||
970 | * used it and wouldn't have needed to skip the first | ||
971 | * frame. That's in effect what the arm and x86 versions do. | ||
972 | * Instead we peel off the first iteration to get the equivalent | ||
973 | * behavior. | ||
974 | */ | ||
975 | |||
976 | if (KBacktraceIterator_end(&kbt)) | ||
977 | return; | ||
978 | KBacktraceIterator_next(&kbt); | ||
979 | |||
980 | /* | ||
981 | * Set stack depth to 16 for user and kernel space respectively, that | ||
982 | * is, total 32 stack frames. | ||
983 | */ | ||
984 | for (i = 0; i < 16; ++i) { | ||
985 | unsigned long pc; | ||
986 | if (KBacktraceIterator_end(&kbt)) | ||
987 | break; | ||
988 | pc = kbt.it.pc; | ||
989 | if (pc != handle_syscall_pc) | ||
990 | perf_callchain_store(entry, pc); | ||
991 | KBacktraceIterator_next(&kbt); | ||
992 | } | ||
993 | } | ||
994 | |||
995 | void perf_callchain_user(struct perf_callchain_entry *entry, | ||
996 | struct pt_regs *regs) | ||
997 | { | ||
998 | perf_callchain(entry, regs); | ||
999 | } | ||
1000 | |||
1001 | void perf_callchain_kernel(struct perf_callchain_entry *entry, | ||
1002 | struct pt_regs *regs) | ||
1003 | { | ||
1004 | perf_callchain(entry, regs); | ||
1005 | } | ||
diff --git a/arch/tile/kernel/pmc.c b/arch/tile/kernel/pmc.c new file mode 100644 index 000000000000..db62cc34b955 --- /dev/null +++ b/arch/tile/kernel/pmc.c | |||
@@ -0,0 +1,121 @@ | |||
1 | /* | ||
2 | * Copyright 2014 Tilera Corporation. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
12 | * more details. | ||
13 | */ | ||
14 | |||
15 | #include <linux/errno.h> | ||
16 | #include <linux/spinlock.h> | ||
17 | #include <linux/module.h> | ||
18 | #include <linux/atomic.h> | ||
19 | #include <linux/interrupt.h> | ||
20 | |||
21 | #include <asm/processor.h> | ||
22 | #include <asm/pmc.h> | ||
23 | |||
24 | perf_irq_t perf_irq = NULL; | ||
25 | int handle_perf_interrupt(struct pt_regs *regs, int fault) | ||
26 | { | ||
27 | int retval; | ||
28 | |||
29 | if (!perf_irq) | ||
30 | panic("Unexpected PERF_COUNT interrupt %d\n", fault); | ||
31 | |||
32 | nmi_enter(); | ||
33 | retval = perf_irq(regs, fault); | ||
34 | nmi_exit(); | ||
35 | return retval; | ||
36 | } | ||
37 | |||
38 | /* Reserve PMC hardware if it is available. */ | ||
39 | perf_irq_t reserve_pmc_hardware(perf_irq_t new_perf_irq) | ||
40 | { | ||
41 | return cmpxchg(&perf_irq, NULL, new_perf_irq); | ||
42 | } | ||
43 | EXPORT_SYMBOL(reserve_pmc_hardware); | ||
44 | |||
45 | /* Release PMC hardware. */ | ||
46 | void release_pmc_hardware(void) | ||
47 | { | ||
48 | perf_irq = NULL; | ||
49 | } | ||
50 | EXPORT_SYMBOL(release_pmc_hardware); | ||
51 | |||
52 | |||
53 | /* | ||
54 | * Get current overflow status of each performance counter, | ||
55 | * and auxiliary performance counter. | ||
56 | */ | ||
57 | unsigned long | ||
58 | pmc_get_overflow(void) | ||
59 | { | ||
60 | unsigned long status; | ||
61 | |||
62 | /* | ||
63 | * merge base+aux into a single vector | ||
64 | */ | ||
65 | status = __insn_mfspr(SPR_PERF_COUNT_STS); | ||
66 | status |= __insn_mfspr(SPR_AUX_PERF_COUNT_STS) << TILE_BASE_COUNTERS; | ||
67 | return status; | ||
68 | } | ||
69 | |||
70 | /* | ||
71 | * Clear the status bit for the corresponding counter, if written | ||
72 | * with a one. | ||
73 | */ | ||
74 | void | ||
75 | pmc_ack_overflow(unsigned long status) | ||
76 | { | ||
77 | /* | ||
78 | * clear overflow status by writing ones | ||
79 | */ | ||
80 | __insn_mtspr(SPR_PERF_COUNT_STS, status); | ||
81 | __insn_mtspr(SPR_AUX_PERF_COUNT_STS, status >> TILE_BASE_COUNTERS); | ||
82 | } | ||
83 | |||
84 | /* | ||
85 | * The perf count interrupts are masked and unmasked explicitly, | ||
86 | * and only here. The normal irq_enable() does not enable them, | ||
87 | * and irq_disable() does not disable them. That lets these | ||
88 | * routines drive the perf count interrupts orthogonally. | ||
89 | * | ||
90 | * We also mask the perf count interrupts on entry to the perf count | ||
91 | * interrupt handler in assembly code, and by default unmask them | ||
92 | * again (with interrupt critical section protection) just before | ||
93 | * returning from the interrupt. If the perf count handler returns | ||
94 | * a non-zero error code, then we don't re-enable them before returning. | ||
95 | * | ||
96 | * For Pro, we rely on both interrupts being in the same word to update | ||
97 | * them atomically so we never have one enabled and one disabled. | ||
98 | */ | ||
99 | |||
100 | #if CHIP_HAS_SPLIT_INTR_MASK() | ||
101 | # if INT_PERF_COUNT < 32 || INT_AUX_PERF_COUNT < 32 | ||
102 | # error Fix assumptions about which word PERF_COUNT interrupts are in | ||
103 | # endif | ||
104 | #endif | ||
105 | |||
106 | static inline unsigned long long pmc_mask(void) | ||
107 | { | ||
108 | unsigned long long mask = 1ULL << INT_PERF_COUNT; | ||
109 | mask |= 1ULL << INT_AUX_PERF_COUNT; | ||
110 | return mask; | ||
111 | } | ||
112 | |||
113 | void unmask_pmc_interrupts(void) | ||
114 | { | ||
115 | interrupt_mask_reset_mask(pmc_mask()); | ||
116 | } | ||
117 | |||
118 | void mask_pmc_interrupts(void) | ||
119 | { | ||
120 | interrupt_mask_set_mask(pmc_mask()); | ||
121 | } | ||
diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c index 5d10642db63e..462dcd0c1700 100644 --- a/arch/tile/kernel/time.c +++ b/arch/tile/kernel/time.c | |||
@@ -236,7 +236,15 @@ cycles_t ns2cycles(unsigned long nsecs) | |||
236 | * clock frequency. | 236 | * clock frequency. |
237 | */ | 237 | */ |
238 | struct clock_event_device *dev = &__raw_get_cpu_var(tile_timer); | 238 | struct clock_event_device *dev = &__raw_get_cpu_var(tile_timer); |
239 | return ((u64)nsecs * dev->mult) >> dev->shift; | 239 | |
240 | /* | ||
241 | * as in clocksource.h and x86's timer.h, we split the calculation | ||
242 | * into 2 parts to avoid unecessary overflow of the intermediate | ||
243 | * value. This will not lead to any loss of precision. | ||
244 | */ | ||
245 | u64 quot = (u64)nsecs >> dev->shift; | ||
246 | u64 rem = (u64)nsecs & ((1ULL << dev->shift) - 1); | ||
247 | return quot * dev->mult + ((rem * dev->mult) >> dev->shift); | ||
240 | } | 248 | } |
241 | 249 | ||
242 | void update_vsyscall_tz(void) | 250 | void update_vsyscall_tz(void) |
diff --git a/arch/tile/kernel/vdso/Makefile b/arch/tile/kernel/vdso/Makefile index e2b7a2f4ee41..a025f63d54cd 100644 --- a/arch/tile/kernel/vdso/Makefile +++ b/arch/tile/kernel/vdso/Makefile | |||
@@ -104,7 +104,7 @@ $(obj-vdso32:%=%): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32) | |||
104 | $(obj-vdso32:%=%): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) | 104 | $(obj-vdso32:%=%): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) |
105 | 105 | ||
106 | $(obj)/vgettimeofday32.o: $(obj)/vgettimeofday.c | 106 | $(obj)/vgettimeofday32.o: $(obj)/vgettimeofday.c |
107 | $(call if_changed,cc_o_c) | 107 | $(call if_changed_rule,cc_o_c) |
108 | 108 | ||
109 | $(obj)/vrt_sigreturn32.o: $(obj)/vrt_sigreturn.S | 109 | $(obj)/vrt_sigreturn32.o: $(obj)/vrt_sigreturn.S |
110 | $(call if_changed,as_o_S) | 110 | $(call if_changed,as_o_S) |